[Likwid-commit] [likwid] 01/09: Imported Upstream version 3.1.3
Christoph Martin
chrism at debian.org
Mon Mar 2 15:31:05 UTC 2015
This is an automated email from the git hooks/post-receive script.
chrism pushed a commit to branch master
in repository likwid.
commit 7c57191e41356c3f95f7a50a64bf20ab063c652c
Author: Christoph Martin <martin at uni-mainz.de>
Date: Wed Feb 25 17:26:21 2015 +0100
Imported Upstream version 3.1.3
---
INSTALL | 10 +-
Makefile | 82 +-
README | 2 +-
bench/x86-64/branch.ptt | 36 +
bench/x86-64/copy_avx.ptt | 15 +
bench/x86-64/copy_mem_avx.ptt | 14 +
bench/x86-64/copy_mem_sse.ptt | 15 +
bench/x86-64/copy_plain.ptt | 16 +
bench/x86-64/copy_sse.ptt | 15 +
bench/x86-64/load_avx.ptt | 12 +
bench/x86-64/load_plain.ptt | 12 +
bench/x86-64/load_sse.ptt | 12 +
bench/x86-64/peak_avx.ptt | 49 +
bench/x86-64/peak_sse.ptt | 49 +
bench/x86-64/peakflops_avx.ptt | 37 +
bench/x86-64/peakflops_sse.ptt | 37 +
bench/x86-64/store_avx.ptt | 15 +
bench/x86-64/store_mem_avx.ptt | 14 +
bench/x86-64/store_mem_sse.ptt | 14 +
bench/x86-64/store_plain.ptt | 15 +
bench/x86-64/store_sse.ptt | 15 +
bench/x86-64/stream_avx.ptt | 22 +
bench/x86-64/striad_avx.ptt | 23 +
bench/x86-64/striad_mem_avx.ptt | 11 +
bench/x86-64/striad_mem_sse.ptt | 11 +
bench/x86-64/striad_plain.ptt | 23 +
bench/x86-64/striad_sse.ptt | 23 +
bench/x86-64/sum_sse.ptt | 23 +
bench/x86-64/triad_avx.ptt | 12 +
bench/x86-64/triad_split.ptt | 30 +
bench/x86-64/update_avx.ptt | 15 +
bench/x86-64/update_plain.ptt | 15 +
bench/x86-64/update_sse.ptt | 15 +
bench/x86-64/vtriad_avx.ptt | 22 +
bench/x86-64/vtriad_mem_avx.ptt | 10 +
bench/x86-64/vtriad_mem_sse.ptt | 10 +
bench/x86-64/vtriad_plain.ptt | 22 +
bench/x86-64/vtriad_sse.ptt | 22 +
config.mk | 1 -
doc/feedGnuplot.1 | 190 +++
doc/likwid-accessD.1 | 22 +
doc/likwid-bench.1 | 31 +-
doc/likwid-features.1 | 14 +-
doc/likwid-genCfg.1 | 30 +
doc/likwid-memsweeper.1 | 28 +
doc/likwid-mpirun.1 | 81 ++
doc/likwid-perfctr.1 | 100 +-
doc/likwid-perfscope.1 | 55 +
doc/likwid-pin.1 | 24 +-
doc/likwid-powermeter.1 | 17 +-
doc/likwid-setFreq.1 | 24 +
doc/likwid-setFrequencies.1 | 16 +-
doc/likwid-topology.1 | 14 +-
filters/csv | 5 +-
filters/xml | 27 +-
groups/core2/BRANCH.txt | 2 +
groups/core2/CACHE.txt | 2 +
groups/core2/DATA.txt | 2 +
groups/core2/FLOPS_DP.txt | 2 +
groups/core2/FLOPS_SP.txt | 2 +
groups/core2/FLOPS_X87.txt | 2 +
groups/core2/L2.txt | 2 +
groups/core2/L2CACHE.txt | 2 +
groups/core2/MEM.txt | 2 +
groups/core2/TLB.txt | 2 +
groups/haswell/ENERGY.txt | 11 +-
groups/haswell/ICACHE.txt | 25 +
groups/haswell/L2.txt | 33 +
groups/haswell/L2CACHE.txt | 6 +-
groups/haswell/{L2CACHE.txt => L3CACHE.txt} | 24 +-
groups/haswell/TLB.txt | 22 -
groups/haswell/TLB_DATA.txt | 35 +
groups/haswell/TLB_INSTR.txt | 28 +
groups/ivybridge/ENERGY.txt | 4 +
groups/ivybridge/FLOPS_AVX.txt | 4 +-
groups/ivybridge/FLOPS_DP.txt | 2 +-
groups/ivybridge/ICACHE.txt | 25 +
.../{haswell/L2CACHE.txt => ivybridge/L3CACHE.txt} | 24 +-
groups/ivybridge/MEM.txt | 20 +-
groups/ivybridge/MEM_DP.txt | 24 +-
groups/ivybridge/MEM_SP.txt | 22 +-
groups/ivybridge/TLB.txt | 23 -
groups/ivybridge/TLB_DATA.txt | 35 +
groups/ivybridge/TLB_INSTR.txt | 28 +
groups/sandybridge/ENERGY.txt | 6 +-
groups/sandybridge/FLOPS_DP.txt | 2 +-
.../L2CACHE.txt => sandybridge/L3CACHE.txt} | 24 +-
groups/sandybridge/MEM.txt | 20 +-
groups/sandybridge/MEM_DP.txt | 22 +-
groups/sandybridge/MEM_SP.txt | 22 +-
groups/sandybridge/TLB.txt | 23 -
groups/sandybridge/TLB_DATA.txt | 35 +
groups/sandybridge/TLB_INSTR.txt | 28 +
groups/{core2 => silvermont}/BRANCH.txt | 17 +-
groups/{haswell => silvermont}/ENERGY.txt | 4 +-
groups/silvermont/ICACHE.txt | 25 +
groups/silvermont/L1TOL2.txt | 28 +
groups/silvermont/L2TOMEM.txt | 26 +
kernel/Makefile | 12 +
kernel/enable_rdpmc.c | 73 ++
make/include_GCC.mk | 2 +-
make/include_ICC.mk | 6 +-
perl/generatePas.pl | 2 +-
perl/likwid-mpirun | 36 +-
perl/likwid-setFrequencies | 41 +-
perl/set_license.pl | 4 +-
perl/templates/group.tt | 57 +-
src/access-daemon/Makefile | 9 +-
src/access-daemon/accessDaemon.c | 345 ++++--
src/access-daemon/setFreq.c | 100 +-
src/access-daemon/setFreq.c.tmp | 0
src/accessClient.c | 46 +-
src/affinity.c | 179 +--
src/allocator.c | 43 +-
src/applications/likwid-bench.c | 353 +++---
src/applications/likwid-features.c | 48 +-
src/applications/likwid-genCfg.c | 32 +-
src/applications/likwid-memsweeper.c | 43 +-
src/applications/likwid-perfctr.c | 166 ++-
src/applications/likwid-pin.c | 150 ++-
src/applications/likwid-powermeter.c | 291 +++--
src/applications/likwid-topology.c | 86 +-
src/asciiBoxes.c | 7 +-
src/asciiTable.c | 4 +-
src/barrier.c | 8 +-
src/bench.c | 122 +-
src/bitUtil.c | 4 +-
src/cpuFeatures.c | 274 ++--
src/cpuid.c | 108 +-
src/daemon.c | 94 +-
src/ghash.c | 1 -
src/hashTable.c | 10 +-
src/includes/accessClient.h | 4 +-
src/includes/accessClient_types.h | 4 +-
src/includes/affinity.h | 6 +-
src/includes/affinity_types.h | 6 +-
src/includes/allocator.h | 17 +-
src/includes/asciiBoxes.h | 6 +-
src/includes/asciiBoxes_types.h | 4 +-
src/includes/asciiTable.h | 4 +-
src/includes/asciiTable_types.h | 4 +-
src/includes/barrier.h | 4 +-
src/includes/barrier_types.h | 4 +-
src/includes/bitUtil.h | 4 +-
src/includes/cpuFeatures.h | 4 +-
src/includes/cpuFeatures_types.h | 38 +-
src/includes/cpuid.h | 11 +-
src/includes/cpuid_types.h | 5 +-
src/includes/daemon.h | 6 +-
src/includes/error.h | 4 +-
src/includes/ghash.h | 14 +-
src/includes/hashTable.h | 4 +-
src/includes/libperfctr_types.h | 4 +-
src/includes/likwid.h | 4 +-
src/includes/lock.h | 56 +-
src/includes/memsweep.h | 10 +-
src/includes/msr.h | 4 +-
src/includes/multiplex.h | 4 +-
src/includes/multiplex_types.h | 4 +-
src/includes/numa.h | 4 +-
src/includes/numa_types.h | 10 +-
src/includes/pci.h | 4 +-
src/includes/pci_types.h | 4 +-
src/includes/perfmon.h | 49 +-
src/includes/perfmon_atom.h | 4 +-
src/includes/perfmon_atom_events.txt | 4 +-
src/includes/perfmon_core2.h | 13 +-
src/includes/perfmon_core2_counters.h | 13 +-
src/includes/perfmon_core2_events.txt | 9 +-
src/includes/perfmon_haswell.h | 156 ++-
src/includes/perfmon_haswell_counters.h | 13 +-
src/includes/perfmon_haswell_events.txt | 148 +--
src/includes/perfmon_interlagos.h | 14 +-
src/includes/perfmon_interlagos_counters.h | 4 +-
src/includes/perfmon_interlagos_events.txt | 4 +-
src/includes/perfmon_ivybridge.h | 84 +-
src/includes/perfmon_ivybridge_counters.h | 103 +-
src/includes/perfmon_ivybridge_events.txt | 210 +---
src/includes/perfmon_k10.h | 13 +-
src/includes/perfmon_k10_counters.h | 4 +-
src/includes/perfmon_k10_events.txt | 4 +-
src/includes/perfmon_k8.h | 4 +-
src/includes/perfmon_k8_events.txt | 4 +-
src/includes/perfmon_kabini.h | 18 +-
src/includes/perfmon_kabini_counters.h | 4 +-
src/includes/perfmon_kabini_events.txt | 4 +-
src/includes/perfmon_nehalem.h | 44 +-
src/includes/perfmon_nehalemEX.h | 602 ++++++++-
src/includes/perfmon_nehalemEX_events.txt | 619 +++++++++-
src/includes/perfmon_nehalem_counters.h | 4 +-
src/includes/perfmon_nehalem_events.txt | 4 +-
src/includes/perfmon_p6_events.txt | 4 +-
src/includes/perfmon_phi.h | 11 +-
src/includes/perfmon_phi_counters.h | 4 +-
src/includes/perfmon_phi_events.txt | 4 +-
src/includes/perfmon_pm.h | 17 +-
src/includes/perfmon_pm_counters.h | 4 +-
src/includes/perfmon_pm_events.txt | 4 +-
src/includes/perfmon_sandybridge.h | 47 +-
src/includes/perfmon_sandybridge_counters.h | 93 +-
src/includes/perfmon_sandybridge_events.txt | 695 +----------
.../{perfmon_haswell.h => perfmon_silvermont.h} | 161 ++-
...ll_counters.h => perfmon_silvermont_counters.h} | 26 +-
src/includes/perfmon_silvermont_events.txt | 440 +++++++
src/includes/perfmon_types.h | 44 +-
src/includes/perfmon_westmere.h | 4 +-
src/includes/perfmon_westmereEX.h | 522 +++++---
src/includes/perfmon_westmereEX_counters.h | 72 +-
src/includes/perfmon_westmereEX_events.txt | 637 +++++++++-
src/includes/perfmon_westmere_events.txt | 4 +-
src/includes/power.h | 4 +-
src/includes/power_types.h | 4 +-
src/includes/registers.h | 84 +-
src/includes/strUtil.h | 6 +-
src/includes/strUtil_types.h | 4 +-
src/includes/test_types.h | 22 +-
src/includes/textcolor.h | 4 +-
src/includes/thermal.h | 9 +-
src/includes/thermal_types.h | 5 +-
src/includes/threads.h | 7 +-
src/includes/threads_types.h | 26 +-
src/includes/timer.h | 70 +-
src/includes/timer_types.h | 4 +-
src/includes/tree.h | 4 +-
src/includes/tree_types.h | 4 +-
src/includes/types.h | 4 +-
src/libperfctr.c | 79 +-
src/likwid.f90 | 28 +-
src/likwid_f90_interface.c | 8 +-
src/memsweep.c | 40 +-
src/msr.c | 168 ++-
src/multiplex.c | 4 +-
src/numa.c | 38 +-
src/pci.c | 159 ++-
src/perfmon.c | 255 +++-
src/power.c | 54 +-
src/pthread-overload/Makefile | 4 +-
src/pthread-overload/pthread-overload.c | 8 +-
src/strUtil.c | 1305 ++++++++++----------
src/thermal.c | 5 +-
src/threads.c | 13 +-
src/timer.c | 28 +-
src/tree.c | 44 +-
test/accuracy/Makefile | 25 +
test/accuracy/README | 18 +
test/accuracy/likwid-accuracy.py | 533 ++++++++
test/accuracy/likwid-tester | 4 +-
test/accuracy/statistics.py | 643 ++++++++++
test/executable_tests/Makefile | 22 +
test/executable_tests/README | 8 +
test/executable_tests/likwid-bench.txt | 29 +
test/executable_tests/likwid-features.txt | 9 +
test/executable_tests/likwid-genCfg.txt | 5 +
test/executable_tests/likwid-memsweeper.txt | 8 +
test/executable_tests/likwid-perfctr.txt | 38 +
test/executable_tests/likwid-pin.txt | 26 +
test/executable_tests/likwid-powermeter.txt | 14 +
test/executable_tests/likwid-setFreq.txt | 6 +
test/executable_tests/likwid-topology.txt | 11 +
test/executable_tests/tester.sh | 80 ++
260 files changed, 10026 insertions(+), 4091 deletions(-)
diff --git a/INSTALL b/INSTALL
index 4742591..5939aa9 100644
--- a/INSTALL
+++ b/INSTALL
@@ -20,7 +20,7 @@ the WIKI. On 32bit systems you have to pick the GCCX86 compiler target.
All generated files are located in the [GCC|ICC|GCCX86] build directory.
This includes the dependency files, object files and also the
generated source files and the pas and assembly files for likwid-bench.
-If you debug your likwid-bench benchmarks you can look at all
+If you debug your likwid-bench benchmarks you can look at all
intermediate build files and also the final assembly code.
== Known problems ==
@@ -60,7 +60,7 @@ Check if msr device files are there with 'ls /dev/cpu/0/'. If msr device files a
consult your distros documentation how to do so.
Once you have the msr device files avilable:
-3. Adopt access rights on the msr device files for normal user. To allow everybody access you can
+3. Adopt access rights on the msr device files for normal user. To allow everybody access you can
use 'chmod o+rw /dev/cpu/*/msr' . This is only recommended on save single user desktop systems.
As a general access to the msr registers is not desired on security sensitive
@@ -79,7 +79,11 @@ page:
http://code.google.com/p/likwid/wiki/MSRDaemon
A common solution to give access is to use the likwid-accessD and make it suid root.
-You need to carry out the following steps:
+Starting with version 3.1.3 make install will do those steps. Of course this will only
+work as long as you are root while calling make install.
+
+If for you are not root and someone else needs to install the daemon the
+following steps need to be carried out:
1. Go to the directory where you installed the likwid tools.
2. Change to the sbin directory there.
diff --git a/Makefile b/Makefile
index de85f13..eecd4e9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,12 +1,12 @@
# =======================================================================================
-#
+#
# Filename: Makefile
-#
+#
# Description: Central Makefile
-#
-# Version: 3.1.2
-# Released: 2.6.2014
-#
+#
+# Version: 3.1.3
+# Released: 4.11.2014
+#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
#
@@ -87,11 +87,14 @@ endif
LIKWID_LIB = liblikwid
ifeq ($(SHARED_LIBRARY),true)
-CFLAGS += $(SHARED_CFLAGS)
+CFLAGS += $(SHARED_CFLAGS) -ggdb
DYNAMIC_TARGET_LIB := $(LIKWID_LIB).so
+TARGET_LIB := $(DYNAMIC_TARGET_LIB)
LIBS += -L. -llikwid
+SHARED_LFLAGS += -lm -lpthread
else
STATIC_TARGET_LIB := $(LIKWID_LIB).a
+TARGET_LIB := $(STATIC_TARGET_LIB)
endif
ifneq ($(COLOR),NONE)
@@ -155,16 +158,17 @@ PERFMONHEADERS = $(patsubst $(SRC_DIR)/includes/%.txt, $(BUILD_DIR)/%.h,$(wildc
OBJ_BENCH = $(patsubst $(BENCH_DIR)/%.ptt, $(BUILD_DIR)/%.o,$(wildcard $(BENCH_DIR)/*.ptt))
APPS = likwid-perfctr \
- likwid-features \
- likwid-powermeter \
- likwid-memsweeper \
- likwid-topology \
- likwid-genCfg \
- likwid-pin \
- likwid-bench
-
-PERL_APPS = likwid-mpirun \
- likwid-perfscope
+ likwid-features \
+ likwid-powermeter \
+ likwid-memsweeper \
+ likwid-topology \
+ likwid-genCfg \
+ likwid-pin \
+ likwid-bench
+
+PERL_APPS = likwid-mpirun \
+ likwid-setFrequencies \
+ likwid-perfscope
DAEMON_APPS = $(SETFREQ_TARGET) \
$(DAEMON_TARGET)
@@ -185,7 +189,7 @@ FORTRAN_INTERFACE =
FORTRAN_INSTALL =
endif
-all: $(BUILD_DIR) $(GENGROUPLOCK) $(PERFMONHEADERS) $(OBJ) $(OBJ_BENCH) $(STATIC_TARGET_LIB) $(DYNAMIC_TARGET_LIB) $(APPS) $(FORTRAN_INTERFACE) $(PINLIB) $(DAEMON_TARGET) $(SETFREQ_TARGET)
+all: $(BUILD_DIR) $(GENGROUPLOCK) $(PERFMONHEADERS) $(OBJ) $(OBJ_BENCH) $(TARGET_LIB) $(APPS) $(FORTRAN_INTERFACE) $(PINLIB) $(DAEMON_TARGET) $(SETFREQ_TARGET)
tags:
@echo "===> GENERATE TAGS"
@@ -197,11 +201,11 @@ $(APPS): $(addprefix $(SRC_DIR)/applications/,$(addsuffix .c,$(APPS))) $(BUILD
$(STATIC_TARGET_LIB): $(OBJ)
@echo "===> CREATE STATIC LIB $(STATIC_TARGET_LIB)"
- $(Q)${AR} -cq $(STATIC_TARGET_LIB) $(OBJ)
+ $(Q)${AR} -crus $(STATIC_TARGET_LIB) $(OBJ)
$(DYNAMIC_TARGET_LIB): $(OBJ)
@echo "===> CREATE SHARED LIB $(DYNAMIC_TARGET_LIB)"
- $(Q)${CC} $(SHARED_LFLAGS) $(SHARED_CFLAGS) -o $(DYNAMIC_TARGET_LIB) $(OBJ) -lm
+ $(Q)${CC} $(SHARED_CFLAGS) -o $(DYNAMIC_TARGET_LIB) $(OBJ) -lm $(SHARED_LFLAGS)
$(DAEMON_TARGET): $(SRC_DIR)/access-daemon/accessDaemon.c
@echo "===> Build access daemon $(DAEMON_TARGET)"
@@ -216,7 +220,7 @@ $(BUILD_DIR):
$(PINLIB):
@echo "===> CREATE LIB $(PINLIB)"
- $(Q)$(MAKE) -s -C src/pthread-overload/ $(PINLIB)
+ $(Q)$(MAKE) -s -C src/pthread-overload/ $(PINLIB)
$(GENGROUPLOCK): $(foreach directory,$(shell ls $(GROUP_DIR)), $(wildcard $(GROUP_DIR)/$(directory)/*.txt))
@echo "===> GENERATE GROUP HEADERS"
@@ -288,17 +292,21 @@ install:
cp -f $$app $(PREFIX)/bin; \
done
@cp -f perl/feedGnuplot $(PREFIX)/bin
- @sed -e "s+<PREFIX>+$(PREFIX)+g" perl/likwid-setFrequencies > $(PREFIX)/bin/likwid-setFrequencies
@for app in $(PERL_APPS); do \
- cp -f perl/$$app $(PREFIX)/bin; \
+ sed -e "s+<PREFIX>+$(PREFIX)+g" perl/$$app > $(PREFIX)/bin/$$app; \
done
@chmod 755 $(PREFIX)/bin/likwid-*
- @echo "===> INSTALL daemon applications to $(PREFIX)/bin"
+ @echo "===> INSTALL daemon applications to $(PREFIX)/sbin"
@mkdir -p $(PREFIX)/sbin
@for app in $(DAEMON_APPS); do \
cp -f $$app $(PREFIX)/sbin; \
+ if [ $(shell id -u) = "0" ]; then \
+ chown root $(PREFIX)/sbin/$$app; \
+ chmod 4775 $(PREFIX)/sbin/$$app; \
+ else \
+ echo "Only root can adjust the privileges of the daemon applications in $(PREFIX)/sbin"; \
+ fi; \
done
- @chmod 755 $(PREFIX)/sbin/likwid-*
@echo "===> INSTALL man pages to $(MANPREFIX)/man1"
@mkdir -p $(MANPREFIX)/man1
@sed -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" < $(DOC_DIR)/likwid-topology.1 > $(MANPREFIX)/man1/likwid-topology.1
@@ -308,10 +316,19 @@ install:
@sed -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" < $(DOC_DIR)/likwid-pin.1 > $(MANPREFIX)/man1/likwid-pin.1
@sed -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" < $(DOC_DIR)/likwid-setFrequencies.1 > $(MANPREFIX)/man1/likwid-setFrequencies.1
@sed -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" < $(DOC_DIR)/likwid-bench.1 > $(MANPREFIX)/man1/likwid-bench.1
+ @sed -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" < $(DOC_DIR)/feedGnuplot.1 > $(MANPREFIX)/man1/feedGnuplot.1
+ @sed -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" < $(DOC_DIR)/likwid-accessD.1 > $(MANPREFIX)/man1/likwid-accessD.1
+ @sed -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" < $(DOC_DIR)/likwid-genCfg.1 > $(MANPREFIX)/man1/likwid-genCfg.1
+ @sed -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" < $(DOC_DIR)/likwid-memsweeper.1 > $(MANPREFIX)/man1/likwid-memsweeper.1
+ @sed -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" < $(DOC_DIR)/likwid-mpirun.1 > $(MANPREFIX)/man1/likwid-mpirun.1
+ @sed -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" < $(DOC_DIR)/likwid-perfscope.1 > $(MANPREFIX)/man1/likwid-perfscope.1
+ @sed -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" < $(DOC_DIR)/likwid-setFreq.1 > $(MANPREFIX)/man1/likwid-setFreq.1
@chmod 644 $(MANPREFIX)/man1/likwid-*
@echo "===> INSTALL headers to $(PREFIX)/include"
- @mkdir -p $(PREFIX)/include
+ @mkdir -p $(PREFIX)/include/likwid
@cp -f src/includes/likwid*.h $(PREFIX)/include/
+ @cp -f src/includes/* $(PREFIX)/include/likwid
+ @cp -f GCC/perfmon_group_types.h $(PREFIX)/include/likwid
$(FORTRAN_INSTALL)
@echo "===> INSTALL libraries to $(PREFIX)/lib"
@mkdir -p $(PREFIX)/lib
@@ -321,23 +338,22 @@ install:
@mkdir -p $(LIKWIDFILTERPATH)
@cp -f filters/* $(LIKWIDFILTERPATH)
@chmod 755 $(LIKWIDFILTERPATH)/*
- @echo
- @echo "Please set suitable permissions and capabilities\nfor the daemon applications in $(PREFIX)/sbin"
uninstall:
@echo "===> REMOVING applications from $(PREFIX)/bin"
- @rm -f $(addprefix $(PREFIX)/bin/,$(APPS))
+ @rm -f $(addprefix $(PREFIX)/bin/,$(APPS))
@rm -f $(addprefix $(PREFIX)/bin/,$(PERL_APPS))
- @rm -f $(PREFIX)/bin/likwid-setFrequencies
@rm -f $(PREFIX)/bin/feedGnuplot
@echo "===> REMOVING daemon applications from $(PREFIX)/sbin"
- @rm -f $(addprefix $(PREFIX)/sbin/,$(DAEMON_APPS))
+ @rm -f $(addprefix $(PREFIX)/sbin/,$(DAEMON_APPS))
@echo "===> REMOVING man pages from $(MANPREFIX)/man1"
- @rm -f $(addprefix $(MANPREFIX)/man1/,$(addsuffix .1,$(APPS)))
+ @rm -f $(MANPREFIX)/man1/likwid-*
+ @rm -f $(MANPREFIX)/man1/feedGnuplot.1
@echo "===> REMOVING headers from $(PREFIX)/include"
@rm -f $(PREFIX)/include/likwid*.h
+ @rm -rf $(PREFIX)/include/likwid
@echo "===> REMOVING libs from $(PREFIX)/lib"
- @rm -f $(PREFIX)/lib/$(LIKWID_LIB)*
+ @rm -f $(PREFIX)/lib/$(LIKWID_LIB)*
@echo "===> REMOVING filter from $(PREFIX)/share"
@rm -rf $(PREFIX)/share/likwid
diff --git a/README b/README
index 7e3e466..f47ac01 100644
--- a/README
+++ b/README
@@ -21,7 +21,7 @@ likwid wiki pages at:
http://code.google.com/p/likwid/wiki/Introduction
-If you have problems or suggestions please let me know on the likwid mailing list:
+If you have problems or suggestions please let us know on the likwid mailing list:
http://groups.google.com/group/likwid-users
diff --git a/bench/x86-64/branch.ptt b/bench/x86-64/branch.ptt
new file mode 100644
index 0000000..e15086d
--- /dev/null
+++ b/bench/x86-64/branch.ptt
@@ -0,0 +1,36 @@
+STREAMS 4
+TYPE DOUBLE_RAND
+FLOPS 2
+BYTES 32
+LOOP 8
+movaps FPR1, [STR1 + GPR1*8]
+movaps FPR2, [STR1 + GPR1*8+16]
+movaps FPR3, [STR1 + GPR1*8+32]
+movaps FPR4, [STR1 + GPR1*8+48]
+cvtsd2si GPR2, FPR1
+cmp GPR2, 0
+jl sub
+mulpd FPR1, [STR2 + GPR1*8]
+addpd FPR1, [STR3 + GPR1*8]
+mulpd FPR2, [STR2 + GPR1*8+16]
+addpd FPR2, [STR3 + GPR1*8+16]
+mulpd FPR3, [STR2 + GPR1*8+32]
+addpd FPR3, [STR3 + GPR1*8+32]
+mulpd FPR4, [STR2 + GPR1*8+48]
+addpd FPR4, [STR3 + GPR1*8+48]
+jmp end
+sub:
+mulpd FPR1, [STR2 + GPR1*8]
+subpd FPR1, [STR3 + GPR1*8]
+mulpd FPR2, [STR2 + GPR1*8+16]
+subpd FPR2, [STR3 + GPR1*8+16]
+mulpd FPR3, [STR2 + GPR1*8+32]
+subpd FPR3, [STR3 + GPR1*8+32]
+mulpd FPR4, [STR2 + GPR1*8+48]
+subpd FPR4, [STR3 + GPR1*8+48]
+end:
+movaps [STR0 + GPR1*8], FPR1
+movaps [STR0 + GPR1*8+16], FPR2
+movaps [STR0 + GPR1*8+32], FPR3
+movaps [STR0 + GPR1*8+48], FPR4
+
diff --git a/bench/x86-64/copy_avx.ptt b/bench/x86-64/copy_avx.ptt
new file mode 100644
index 0000000..814bb78
--- /dev/null
+++ b/bench/x86-64/copy_avx.ptt
@@ -0,0 +1,15 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 16
+vmovaps ymm1, [STR0 + GPR1 * 8]
+vmovaps ymm2, [STR0 + GPR1 * 8 + 32]
+vmovaps ymm3, [STR0 + GPR1 * 8 + 64]
+vmovaps ymm4, [STR0 + GPR1 * 8 + 96]
+vmovaps [STR1 + GPR1 * 8] , ymm1
+vmovaps [STR1 + GPR1 * 8 + 32], ymm2
+vmovaps [STR1 + GPR1 * 8 + 64], ymm3
+vmovaps [STR1 + GPR1 * 8 + 96], ymm4
+
+
diff --git a/bench/x86-64/copy_mem_avx.ptt b/bench/x86-64/copy_mem_avx.ptt
new file mode 100644
index 0000000..651a55e
--- /dev/null
+++ b/bench/x86-64/copy_mem_avx.ptt
@@ -0,0 +1,14 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 32
+vmovaps ymm1, [STR0 + GPR1 * 8]
+vmovaps ymm2, [STR0 + GPR1 * 8 + 32]
+vmovaps ymm3, [STR0 + GPR1 * 8 + 64]
+vmovaps ymm4, [STR0 + GPR1 * 8 + 96]
+vmovntps [STR1 + GPR1 * 8] , ymm1
+vmovntps [STR1 + GPR1 * 8 + 32], ymm2
+vmovntps [STR1 + GPR1 * 8 + 64], ymm3
+vmovntps [STR1 + GPR1 * 8 + 96], ymm4
+
diff --git a/bench/x86-64/copy_mem_sse.ptt b/bench/x86-64/copy_mem_sse.ptt
new file mode 100644
index 0000000..f803bce
--- /dev/null
+++ b/bench/x86-64/copy_mem_sse.ptt
@@ -0,0 +1,15 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 8
+movaps FPR1, [STR0 + GPR1 * 8]
+movaps FPR2, [STR0 + GPR1 * 8 + 16]
+movaps FPR3, [STR0 + GPR1 * 8 + 32]
+movaps FPR4, [STR0 + GPR1 * 8 + 48]
+movntps [STR1 + GPR1 * 8] , FPR1
+movntps [STR1 + GPR1 * 8 + 16], FPR2
+movntps [STR1 + GPR1 * 8 + 32], FPR3
+movntps [STR1 + GPR1 * 8 + 48], FPR4
+
+
diff --git a/bench/x86-64/copy_plain.ptt b/bench/x86-64/copy_plain.ptt
new file mode 100644
index 0000000..4fcbbbc
--- /dev/null
+++ b/bench/x86-64/copy_plain.ptt
@@ -0,0 +1,16 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 4
+movsd FPR1, [STR0 + GPR1 * 8]
+movsd FPR2, [STR0 + GPR1 * 8 + 8]
+movsd FPR3, [STR0 + GPR1 * 8 + 16]
+movsd FPR4, [STR0 + GPR1 * 8 + 24]
+movsd [STR1 + GPR1 * 8] , FPR1
+movsd [STR1 + GPR1 * 8 + 8] , FPR2
+movsd [STR1 + GPR1 * 8 + 16], FPR3
+movsd [STR1 + GPR1 * 8 + 24], FPR4
+
+
+
diff --git a/bench/x86-64/copy_sse.ptt b/bench/x86-64/copy_sse.ptt
new file mode 100644
index 0000000..ffca4f5
--- /dev/null
+++ b/bench/x86-64/copy_sse.ptt
@@ -0,0 +1,15 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 8
+movaps FPR1, [STR0 + GPR1 * 8]
+movaps FPR2, [STR0 + GPR1 * 8 + 16]
+movaps FPR3, [STR0 + GPR1 * 8 + 32]
+movaps FPR4, [STR0 + GPR1 * 8 + 48]
+movaps [STR1 + GPR1 * 8] , FPR1
+movaps [STR1 + GPR1 * 8 + 16], FPR2
+movaps [STR1 + GPR1 * 8 + 32], FPR3
+movaps [STR1 + GPR1 * 8 + 48], FPR4
+
+
diff --git a/bench/x86-64/load_avx.ptt b/bench/x86-64/load_avx.ptt
new file mode 100644
index 0000000..93b45c7
--- /dev/null
+++ b/bench/x86-64/load_avx.ptt
@@ -0,0 +1,12 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+LOOP 16
+mov GPR12, [STR0 + GPR1 * 8 + 256]
+vmovaps ymm1, [STR0 + GPR1 * 8]
+vmovaps ymm2, [STR0 + GPR1 * 8 + 32]
+vmovaps ymm3, [STR0 + GPR1 * 8 + 64]
+vmovaps ymm4, [STR0 + GPR1 * 8 + 96]
+
+
diff --git a/bench/x86-64/load_plain.ptt b/bench/x86-64/load_plain.ptt
new file mode 100644
index 0000000..be6d21c
--- /dev/null
+++ b/bench/x86-64/load_plain.ptt
@@ -0,0 +1,12 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+LOOP 4
+mov GPR12, [STR0 + GPR1 * 8 + 256]
+movsd FPR1, [STR0 + GPR1 * 8]
+movsd FPR2, [STR0 + GPR1 * 8 + 8]
+movsd FPR3, [STR0 + GPR1 * 8 + 16]
+movsd FPR4, [STR0 + GPR1 * 8 + 24]
+
+
diff --git a/bench/x86-64/load_sse.ptt b/bench/x86-64/load_sse.ptt
new file mode 100644
index 0000000..36aaab1
--- /dev/null
+++ b/bench/x86-64/load_sse.ptt
@@ -0,0 +1,12 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+LOOP 8
+mov GPR12, [STR0 + GPR1 * 8 + 256]
+movaps FPR1, [STR0 + GPR1 * 8]
+movaps FPR2, [STR0 + GPR1 * 8 + 16]
+movaps FPR3, [STR0 + GPR1 * 8 + 32]
+movaps FPR4, [STR0 + GPR1 * 8 + 48]
+
+
diff --git a/bench/x86-64/peak_avx.ptt b/bench/x86-64/peak_avx.ptt
new file mode 100644
index 0000000..047178e
--- /dev/null
+++ b/bench/x86-64/peak_avx.ptt
@@ -0,0 +1,49 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 2
+BYTES 16
+INC 16
+vmovaps ymm1, [SCALAR]
+sub GPR2, 8
+sub STR0, 64
+sub STR1, 64
+mov GPR1, GPR2
+neg GPR1
+.align 32
+1:
+vmovaps ymm2, [STR0 + GPR1 * 8 ]
+vaddpd ymm2, ymm2, ymm1
+vmulpd ymm2, ymm2, ymm1
+vmovaps ymm6, [STR0 + GPR1 * 8 ]
+vaddpd ymm2, ymm2, ymm1
+vmulpd ymm2, ymm2, ymm1
+#vpshufd ymm2, ymm1, 0x1
+vmovaps [STR1 + GPR1 * 8], ymm2
+vmovaps ymm3, [STR0 + GPR1 * 8 + 32]
+vaddpd ymm3, ymm3, ymm1
+vmulpd ymm3, ymm3, ymm1
+vmovaps ymm7, [STR0 + GPR1 * 8 + 32 ]
+vaddpd ymm3, ymm3, ymm1
+vmulpd ymm3, ymm3, ymm1
+#vpshufd ymm3, ymm1, 0x1
+vmovaps [STR1 + GPR1 * 8 + 32], ymm3
+vmovaps ymm4, [STR0 + GPR1 * 8 + 64]
+vaddpd ymm4, ymm4, ymm1
+vmulpd ymm4, ymm4, ymm1
+vmovaps ymm8, [STR0 + GPR1 * 8 + 64 ]
+vaddpd ymm4, ymm4, ymm1
+vmulpd ymm4, ymm4, ymm1
+#vpshufd ymm4, ymm1, 0x1
+vmovaps [STR1 + GPR1 * 8 + 32], ymm4
+vmovaps ymm5, [STR0 + GPR1 * 8 + 96]
+vaddpd ymm5, ymm5, ymm1
+vmulpd ymm5, ymm5, ymm1
+vmovaps ymm9, [STR0 + GPR1 * 8 + 96]
+vaddpd ymm5, ymm5, ymm1
+vmulpd ymm5, ymm5, ymm1
+#vpshufd ymm5, ymm1, 0x1
+vmovaps [STR1 + GPR1 * 8 + 96], ymm5
+add GPR1, 16
+js 1b
+
+
diff --git a/bench/x86-64/peak_sse.ptt b/bench/x86-64/peak_sse.ptt
new file mode 100644
index 0000000..c03e2c8
--- /dev/null
+++ b/bench/x86-64/peak_sse.ptt
@@ -0,0 +1,49 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 2
+BYTES 16
+INC 8
+movaps FPR1, [SCALAR]
+sub GPR2, 4
+sub STR0, 32
+sub STR1, 32
+mov GPR1, GPR2
+neg GPR1
+.align 16
+1:
+movaps FPR2, [STR0 + GPR1 * 8 ]
+addpd FPR2, FPR1
+mulpd FPR2, FPR1
+movaps FPR6, [STR0 + GPR1 * 8 ]
+addpd FPR2, FPR1
+mulpd FPR2, FPR1
+pshufd FPR2, FPR1, 0x1
+#movaps [STR1 + GPR1 * 8], FPR2
+movaps FPR3, [STR0 + GPR1 * 8 + 16]
+addpd FPR3, FPR1
+mulpd FPR3, FPR1
+movaps FPR7, [STR0 + GPR1 * 8 + 16 ]
+addpd FPR3, FPR1
+mulpd FPR3, FPR1
+pshufd FPR3, FPR1, 0x1
+#movaps [STR1 + GPR1 * 8 + 16], FPR3
+movaps FPR4, [STR0 + GPR1 * 8 + 32]
+addpd FPR4, FPR1
+mulpd FPR4, FPR1
+movaps FPR8, [STR0 + GPR1 * 8 + 32 ]
+addpd FPR4, FPR1
+mulpd FPR4, FPR1
+pshufd FPR4, FPR1, 0x1
+#movaps [STR1 + GPR1 * 8 + 32], FPR4
+movaps FPR5, [STR0 + GPR1 * 8 + 48]
+addpd FPR5, FPR1
+mulpd FPR5, FPR1
+movaps FPR9, [STR0 + GPR1 * 8 + 48 ]
+addpd FPR5, FPR1
+mulpd FPR5, FPR1
+pshufd FPR5, FPR1, 0x1
+#movaps [STR1 + GPR1 * 8 + 48], FPR5
+add GPR1, 8
+js 1b
+
+
diff --git a/bench/x86-64/peakflops_avx.ptt b/bench/x86-64/peakflops_avx.ptt
new file mode 100644
index 0000000..d9f9885
--- /dev/null
+++ b/bench/x86-64/peakflops_avx.ptt
@@ -0,0 +1,37 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 2
+BYTES 16
+INC 16
+vmovaps ymm1, [SCALAR]
+sub GPR2, 8
+sub STR0, 64
+sub STR1, 64
+mov GPR1, GPR2
+neg GPR1
+.align 32
+1:
+vmovaps ymm2, [STR0 + GPR1 * 8 ]
+vaddpd ymm2, ymm2, ymm1
+vmulpd ymm2, ymm2, ymm1
+vaddpd ymm2, ymm2, ymm1
+vmulpd ymm2, ymm2, ymm1
+vmovaps ymm3, [STR0 + GPR1 * 8 + 32]
+add GPR1, 16
+vaddpd ymm3, ymm3, ymm1
+vmulpd ymm3, ymm3, ymm1
+vaddpd ymm3, ymm3, ymm1
+vmulpd ymm3, ymm3, ymm1
+vmovaps ymm4, [STR0 + GPR1 * 8 - 64]
+vaddpd ymm4, ymm4, ymm1
+vmulpd ymm4, ymm4, ymm1
+vaddpd ymm4, ymm4, ymm1
+vmulpd ymm4, ymm4, ymm1
+vmovaps ymm5, [STR0 + GPR1 * 8 - 32]
+vaddpd ymm5, ymm5, ymm1
+vmulpd ymm5, ymm5, ymm1
+vaddpd ymm5, ymm5, ymm1
+vmulpd ymm5, ymm5, ymm1
+js 1b
+
+
diff --git a/bench/x86-64/peakflops_sse.ptt b/bench/x86-64/peakflops_sse.ptt
new file mode 100644
index 0000000..94c769a
--- /dev/null
+++ b/bench/x86-64/peakflops_sse.ptt
@@ -0,0 +1,37 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 2
+BYTES 16
+INC 8
+movaps FPR1, [SCALAR]
+sub GPR2, 4
+sub STR0, 32
+sub STR1, 32
+mov GPR1, GPR2
+neg GPR1
+.align 32
+1:
+movaps FPR2, [STR0 + GPR1 * 8 ]
+addpd FPR2, FPR1
+mulpd FPR2, FPR1
+addpd FPR2, FPR1
+mulpd FPR2, FPR1
+movaps FPR3, [STR0 + GPR1 * 8 + 16]
+add GPR1, 8
+addpd FPR3, FPR1
+mulpd FPR3, FPR1
+addpd FPR3, FPR1
+mulpd FPR3, FPR1
+movaps FPR4, [STR0 + GPR1 * 8 - 32]
+addpd FPR4, FPR1
+mulpd FPR4, FPR1
+addpd FPR4, FPR1
+mulpd FPR4, FPR1
+movaps FPR5, [STR0 + GPR1 * 8 - 16]
+addpd FPR5, FPR1
+mulpd FPR5, FPR1
+addpd FPR5, FPR1
+mulpd FPR5, FPR1
+js 1b
+
+
diff --git a/bench/x86-64/store_avx.ptt b/bench/x86-64/store_avx.ptt
new file mode 100644
index 0000000..7b589a8
--- /dev/null
+++ b/bench/x86-64/store_avx.ptt
@@ -0,0 +1,15 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+vmovaps ymm1, [SCALAR]
+vmovaps ymm2, [SCALAR]
+vmovaps ymm3, [SCALAR]
+vmovaps ymm4, [SCALAR]
+LOOP 16
+#mov GPR14, [STR0 + GPR1 * 8 + 256]
+vmovaps [STR0 + GPR1 * 8] , ymm1
+vmovaps [STR0 + GPR1 * 8 + 32], ymm2
+vmovaps [STR0 + GPR1 * 8 + 64], ymm3
+vmovaps [STR0 + GPR1 * 8 + 96], ymm4
+
diff --git a/bench/x86-64/store_mem_avx.ptt b/bench/x86-64/store_mem_avx.ptt
new file mode 100644
index 0000000..e023fd0
--- /dev/null
+++ b/bench/x86-64/store_mem_avx.ptt
@@ -0,0 +1,14 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+vmovaps ymm1, [SCALAR]
+vmovaps ymm2, [SCALAR]
+vmovaps ymm3, [SCALAR]
+vmovaps ymm4, [SCALAR]
+LOOP 16
+vmovntpd [STR0 + GPR1 * 8] , ymm1
+vmovntpd [STR0 + GPR1 * 8 + 32], ymm2
+vmovntpd [STR0 + GPR1 * 8 + 64], ymm3
+vmovntpd [STR0 + GPR1 * 8 + 96], ymm4
+
diff --git a/bench/x86-64/store_mem_sse.ptt b/bench/x86-64/store_mem_sse.ptt
new file mode 100644
index 0000000..0a0222d
--- /dev/null
+++ b/bench/x86-64/store_mem_sse.ptt
@@ -0,0 +1,14 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+movaps FPR1, [SCALAR]
+movaps FPR2, [SCALAR]
+movaps FPR3, [SCALAR]
+movaps FPR4, [SCALAR]
+LOOP 8
+movntpd [STR0 + GPR1 * 8] , FPR1
+movntpd [STR0 + GPR1 * 8 + 16], FPR2
+movntpd [STR0 + GPR1 * 8 + 32], FPR3
+movntpd [STR0 + GPR1 * 8 + 48], FPR4
+
diff --git a/bench/x86-64/store_plain.ptt b/bench/x86-64/store_plain.ptt
new file mode 100644
index 0000000..0f667cd
--- /dev/null
+++ b/bench/x86-64/store_plain.ptt
@@ -0,0 +1,15 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+movsd FPR1, [SCALAR]
+movsd FPR2, [SCALAR]
+movsd FPR3, [SCALAR]
+movsd FPR4, [SCALAR]
+LOOP 4
+#mov GPR14, [STR0 + GPR1 * 8 + 256]
+movsd [STR0 + GPR1 * 8] , FPR1
+movsd [STR0 + GPR1 * 8 + 8], FPR2
+movsd [STR0 + GPR1 * 8 + 16], FPR3
+movsd [STR0 + GPR1 * 8 + 24], FPR4
+
diff --git a/bench/x86-64/store_sse.ptt b/bench/x86-64/store_sse.ptt
new file mode 100644
index 0000000..4ef9ab9
--- /dev/null
+++ b/bench/x86-64/store_sse.ptt
@@ -0,0 +1,15 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+movaps FPR1, [SCALAR]
+movaps FPR2, [SCALAR]
+movaps FPR3, [SCALAR]
+movaps FPR4, [SCALAR]
+LOOP 8
+#mov GPR14, [STR0 + GPR1 * 8 + 256]
+movaps [STR0 + GPR1 * 8] , FPR1
+movaps [STR0 + GPR1 * 8 + 16], FPR2
+movaps [STR0 + GPR1 * 8 + 32], FPR3
+movaps [STR0 + GPR1 * 8 + 48], FPR4
+
diff --git a/bench/x86-64/stream_avx.ptt b/bench/x86-64/stream_avx.ptt
new file mode 100644
index 0000000..8fbaf7c
--- /dev/null
+++ b/bench/x86-64/stream_avx.ptt
@@ -0,0 +1,22 @@
+STREAMS 3
+TYPE SINGLE
+FLOPS 4
+BYTES 48
+vbroadcastss ymm1, [SCALAR]
+LOOP 8
+vmovaps ymm2, [STR1 + GPR1*8]
+vmovaps ymm3, [STR1 + GPR1*8+16]
+vmovaps ymm4, [STR1 + GPR1*8+32]
+vmovaps ymm5, [STR1 + GPR1*8+48]
+vmulps ymm2, ymm2, ymm1
+vaddps ymm2, ymm2, [STR2 + GPR1*8]
+vmulps ymm3, ymm3, ymm1
+vaddps ymm3, ymm3, [STR2 + GPR1*8]
+vmulps ymm4, ymm4, ymm1
+vaddps ymm4, ymm4, [STR2 + GPR1*8]
+vmulps ymm5, ymm5, ymm1
+vaddps ymm5, ymm5, [STR2 + GPR1*8]
+vmovaps [STR0 + GPR1*8], ymm2
+vmovaps [STR0 + GPR1*8+16], ymm3
+vmovaps [STR0 + GPR1*8+32], ymm4
+vmovaps [STR0 + GPR1*8+48], ymm5
diff --git a/bench/x86-64/striad_avx.ptt b/bench/x86-64/striad_avx.ptt
new file mode 100644
index 0000000..b3c1317
--- /dev/null
+++ b/bench/x86-64/striad_avx.ptt
@@ -0,0 +1,23 @@
+STREAMS 3
+TYPE DOUBLE
+FLOPS 2
+BYTES 24
+vmovaps ymm5, [SCALAR]
+LOOP 16
+vmovaps ymm1, [STR1 + GPR1*8]
+vmovaps ymm2, [STR1 + GPR1*8+32]
+vmovaps ymm3, [STR1 + GPR1*8+64]
+vmovaps ymm4, [STR1 + GPR1*8+96]
+vmulpd ymm1, ymm1, ymm5
+vaddpd ymm1, ymm1, [STR2 + GPR1*8]
+vmulpd ymm2, ymm2, ymm5
+vaddpd ymm2, ymm2, [STR2 + GPR1*8+32]
+vmulpd ymm3, ymm3, ymm5
+vaddpd ymm3, ymm3, [STR2 + GPR1*8+64]
+vmulpd ymm4, ymm4, ymm5
+vaddpd ymm4, ymm4, [STR2 + GPR1*8+96]
+vmovaps [STR0 + GPR1*8] , ymm1
+vmovaps [STR0 + GPR1*8+32], ymm2
+vmovaps [STR0 + GPR1*8+64], ymm3
+vmovaps [STR0 + GPR1*8+96], ymm4
+
diff --git a/bench/x86-64/striad_mem_avx.ptt b/bench/x86-64/striad_mem_avx.ptt
new file mode 100644
index 0000000..cef2688
--- /dev/null
+++ b/bench/x86-64/striad_mem_avx.ptt
@@ -0,0 +1,11 @@
+STREAMS 3
+TYPE DOUBLE
+FLOPS 2
+BYTES 24
+vmovaps ymm5, [SCALAR]
+LOOP 4
+vmovaps ymm1, [STR2 + GPR1*8]
+vmulpd ymm1, ymm1, ymm5
+vaddpd ymm1, ymm1, [STR1 + GPR1*8]
+vmovntpd [STR0 + GPR1*8], ymm1
+
diff --git a/bench/x86-64/striad_mem_sse.ptt b/bench/x86-64/striad_mem_sse.ptt
new file mode 100644
index 0000000..b8364cc
--- /dev/null
+++ b/bench/x86-64/striad_mem_sse.ptt
@@ -0,0 +1,11 @@
+STREAMS 3
+TYPE DOUBLE
+FLOPS 2
+BYTES 24
+movaps FPR5, [SCALAR]
+LOOP 2
+movaps FPR1, [STR2 + GPR1*8]
+mulpd FPR1, FPR5
+addpd FPR1, [STR1 + GPR1*8]
+movntpd [STR0 + GPR1*8], FPR1
+
diff --git a/bench/x86-64/striad_plain.ptt b/bench/x86-64/striad_plain.ptt
new file mode 100644
index 0000000..7b29664
--- /dev/null
+++ b/bench/x86-64/striad_plain.ptt
@@ -0,0 +1,23 @@
+STREAMS 3
+TYPE DOUBLE
+FLOPS 2
+BYTES 24
+movss FPR5, [SCALAR]
+LOOP 4
+movsd FPR1, [STR1 + GPR1*8]
+movsd FPR2, [STR1 + GPR1*8+8]
+movsd FPR3, [STR1 + GPR1*8+16]
+movsd FPR4, [STR1 + GPR1*8+24]
+mulsd FPR1, FPR5
+addsd FPR1, [STR2 + GPR1*8]
+mulsd FPR2, FPR5
+addsd FPR2, [STR2 + GPR1*8+8]
+mulsd FPR3, FPR5
+addsd FPR3, [STR2 + GPR1*8+16]
+mulsd FPR4, FPR5
+addsd FPR4, [STR2 + GPR1*8+24]
+movsd [STR0 + GPR1*8] , FPR1
+movsd [STR0 + GPR1*8+8] , FPR2
+movsd [STR0 + GPR1*8+16], FPR3
+movsd [STR0 + GPR1*8+24], FPR4
+
diff --git a/bench/x86-64/striad_sse.ptt b/bench/x86-64/striad_sse.ptt
new file mode 100644
index 0000000..7c84c3c
--- /dev/null
+++ b/bench/x86-64/striad_sse.ptt
@@ -0,0 +1,23 @@
+STREAMS 3
+TYPE DOUBLE
+FLOPS 2
+BYTES 24
+movaps FPR5, [SCALAR]
+LOOP 8
+movaps FPR1, [STR1 + GPR1*8]
+movaps FPR2, [STR1 + GPR1*8+16]
+movaps FPR3, [STR1 + GPR1*8+32]
+movaps FPR4, [STR1 + GPR1*8+48]
+mulpd FPR1, FPR5
+addpd FPR1, [STR2 + GPR1*8]
+mulpd FPR2, FPR5
+addpd FPR2, [STR2 + GPR1*8+16]
+mulpd FPR3, FPR5
+addpd FPR3, [STR2 + GPR1*8+32]
+mulpd FPR4, FPR5
+addpd FPR4, [STR2 + GPR1*8+48]
+movaps [STR0 + GPR1*8] , FPR1
+movaps [STR0 + GPR1*8+16], FPR2
+movaps [STR0 + GPR1*8+32], FPR3
+movaps [STR0 + GPR1*8+48], FPR4
+
diff --git a/bench/x86-64/sum_sse.ptt b/bench/x86-64/sum_sse.ptt
new file mode 100644
index 0000000..3e7a2bb
--- /dev/null
+++ b/bench/x86-64/sum_sse.ptt
@@ -0,0 +1,23 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 1
+BYTES 8
+xorpd FPR1, FPR1
+movapd FPR2, FPR1
+movapd FPR3, FPR1
+movapd FPR4, FPR1
+movapd FPR5, FPR1
+movapd FPR6, FPR1
+movapd FPR7, FPR1
+movapd FPR8, FPR1
+LOOP 16
+addpd FPR1, [STR0 + GPR1 * 8]
+addpd FPR2, [STR0 + GPR1 * 8 + 16]
+addpd FPR3, [STR0 + GPR1 * 8 + 32]
+addpd FPR4, [STR0 + GPR1 * 8 + 48]
+addpd FPR5, [STR0 + GPR1 * 8 + 64]
+addpd FPR6, [STR0 + GPR1 * 8 + 80]
+addpd FPR7, [STR0 + GPR1 * 8 + 96]
+addpd FPR8, [STR0 + GPR1 * 8 + 112]
+
+
diff --git a/bench/x86-64/triad_avx.ptt b/bench/x86-64/triad_avx.ptt
new file mode 100644
index 0000000..3514cfd
--- /dev/null
+++ b/bench/x86-64/triad_avx.ptt
@@ -0,0 +1,12 @@
+STREAMS 4
+TYPE DOUBLE
+FLOPS 2
+BYTES 16
+LOOP 32
+vmovapd ymm1, [STR1 + GPR1]
+vmovapd ymm2, [STR2 + GPR1]
+vmovapd ymm3, [STR3 + GPR1]
+vmulpd ymm0, ymm1, ymm2
+vaddpd ymm0, ymm0, ymm3
+vmovapd [STR0 + GPR1], ymm0
+
diff --git a/bench/x86-64/triad_split.ptt b/bench/x86-64/triad_split.ptt
new file mode 100644
index 0000000..7b30e47
--- /dev/null
+++ b/bench/x86-64/triad_split.ptt
@@ -0,0 +1,30 @@
+STREAMS 4
+TYPE DOUBLE
+FLOPS 2
+BYTES 32
+LOOP 8
+movapd FPR1, [STR1 + GPR1*8]
+movapd FPR2, [STR1 + GPR1*8+16]
+movapd FPR3, [STR1 + GPR1*8+32]
+movapd FPR4, [STR1 + GPR1*8+48]
+movapd FPR5, [STR2 + GPR1*8]
+movapd FPR6, [STR3 + GPR1*8]
+movapd FPR7, [STR2 + GPR1*8+16]
+movapd FPR8, [STR3 + GPR1*8+16]
+movapd FPR9, [STR2 + GPR1*8+32]
+movapd FPR10, [STR3 + GPR1*8+32]
+movapd FPR11, [STR2 + GPR1*8+48]
+movapd FPR12, [STR3 + GPR1*8+48]
+mulpd FPR1, FPR5
+addpd FPR1, FPR6
+mulpd FPR2, FPR7
+addpd FPR2, FPR8
+mulpd FPR3, FPR9
+addpd FPR3, FPR10
+mulpd FPR4, FPR11
+addpd FPR4, FPR12
+movapd [STR0 + GPR1*8], FPR1
+movapd [STR0 + GPR1*8+16], FPR2
+movapd [STR0 + GPR1*8+32], FPR3
+movapd [STR0 + GPR1*8+48], FPR4
+
diff --git a/bench/x86-64/update_avx.ptt b/bench/x86-64/update_avx.ptt
new file mode 100644
index 0000000..2e9178e
--- /dev/null
+++ b/bench/x86-64/update_avx.ptt
@@ -0,0 +1,15 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 16
+vmovaps ymm1, [STR0 + GPR1 * 8]
+vmovaps [STR0 + GPR1 * 8] , ymm1
+vmovaps ymm2, [STR0 + GPR1 * 8 + 32]
+vmovaps ymm3, [STR0 + GPR1 * 8 + 64]
+vmovaps ymm4, [STR0 + GPR1 * 8 + 96]
+vmovaps [STR0 + GPR1 * 8 + 32], ymm2
+vmovaps [STR0 + GPR1 * 8 + 64], ymm3
+vmovaps [STR0 + GPR1 * 8 + 96], ymm4
+
+
diff --git a/bench/x86-64/update_plain.ptt b/bench/x86-64/update_plain.ptt
new file mode 100644
index 0000000..b5a3e4a
--- /dev/null
+++ b/bench/x86-64/update_plain.ptt
@@ -0,0 +1,15 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 4
+movss FPR1, [STR0 + GPR1 * 8]
+movss [STR0 + GPR1 * 8] , FPR1
+movss FPR2, [STR0 + GPR1 * 8 + 8]
+movss FPR3, [STR0 + GPR1 * 8 + 16]
+movss FPR4, [STR0 + GPR1 * 8 + 24]
+movss [STR0 + GPR1 * 8 + 8], FPR2
+movss [STR0 + GPR1 * 8 + 16], FPR3
+movss [STR0 + GPR1 * 8 + 24], FPR4
+
+
diff --git a/bench/x86-64/update_sse.ptt b/bench/x86-64/update_sse.ptt
new file mode 100644
index 0000000..ac1129b
--- /dev/null
+++ b/bench/x86-64/update_sse.ptt
@@ -0,0 +1,15 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 8
+movaps FPR1, [STR0 + GPR1 * 8]
+movaps [STR0 + GPR1 * 8] , FPR1
+movaps FPR2, [STR0 + GPR1 * 8 + 16]
+movaps FPR3, [STR0 + GPR1 * 8 + 32]
+movaps FPR4, [STR0 + GPR1 * 8 + 48]
+movaps [STR0 + GPR1 * 8 + 16], FPR2
+movaps [STR0 + GPR1 * 8 + 32], FPR3
+movaps [STR0 + GPR1 * 8 + 48], FPR4
+
+
diff --git a/bench/x86-64/vtriad_avx.ptt b/bench/x86-64/vtriad_avx.ptt
new file mode 100644
index 0000000..4a542d2
--- /dev/null
+++ b/bench/x86-64/vtriad_avx.ptt
@@ -0,0 +1,22 @@
+STREAMS 4
+TYPE DOUBLE
+FLOPS 2
+BYTES 32
+LOOP 16
+vmovaps ymm1, [STR1 + GPR1*8]
+vmovaps ymm2, [STR1 + GPR1*8+32]
+vmovaps ymm3, [STR1 + GPR1*8+64]
+vmovaps ymm4, [STR1 + GPR1*8+96]
+vmulpd ymm1, ymm1, [STR2 + GPR1*8]
+vaddpd ymm1, ymm1, [STR3 + GPR1*8]
+vmulpd ymm2, ymm2, [STR2 + GPR1*8+32]
+vaddpd ymm2, ymm2, [STR3 + GPR1*8+32]
+vmulpd ymm3, ymm3, [STR2 + GPR1*8+64]
+vaddpd ymm3, ymm3, [STR3 + GPR1*8+64]
+vmulpd ymm4, ymm4, [STR2 + GPR1*8+96]
+vaddpd ymm4, ymm4, [STR3 + GPR1*8+96]
+vmovaps [STR0 + GPR1*8], ymm1
+vmovaps [STR0 + GPR1*8+32], ymm2
+vmovaps [STR0 + GPR1*8+64], ymm3
+vmovaps [STR0 + GPR1*8+96], ymm4
+
diff --git a/bench/x86-64/vtriad_mem_avx.ptt b/bench/x86-64/vtriad_mem_avx.ptt
new file mode 100644
index 0000000..315ef14
--- /dev/null
+++ b/bench/x86-64/vtriad_mem_avx.ptt
@@ -0,0 +1,10 @@
+STREAMS 4
+TYPE DOUBLE
+FLOPS 2
+BYTES 32
+LOOP 4
+vmovaps ymm1, [STR1 + GPR1*8]
+vmulpd ymm1, ymm1, [STR2 + GPR1*8]
+vaddpd ymm1, ymm1, [STR3 + GPR1*8]
+vmovntpd [STR0 + GPR1*8], ymm1
+
diff --git a/bench/x86-64/vtriad_mem_sse.ptt b/bench/x86-64/vtriad_mem_sse.ptt
new file mode 100644
index 0000000..7c24748
--- /dev/null
+++ b/bench/x86-64/vtriad_mem_sse.ptt
@@ -0,0 +1,10 @@
+STREAMS 4
+TYPE DOUBLE
+FLOPS 2
+BYTES 32
+LOOP 2
+movaps FPR1, [STR1 + GPR1*8]
+mulpd FPR1, [STR2 + GPR1*8]
+addpd FPR1, [STR3 + GPR1*8]
+movntpd [STR0 + GPR1*8], FPR1
+
diff --git a/bench/x86-64/vtriad_plain.ptt b/bench/x86-64/vtriad_plain.ptt
new file mode 100644
index 0000000..120331c
--- /dev/null
+++ b/bench/x86-64/vtriad_plain.ptt
@@ -0,0 +1,22 @@
+STREAMS 4
+TYPE DOUBLE
+FLOPS 2
+BYTES 32
+LOOP 4
+movsd FPR1, [STR1 + GPR1*8]
+movsd FPR2, [STR1 + GPR1*8+8]
+movsd FPR3, [STR1 + GPR1*8+16]
+movss FPR4, [STR1 + GPR1*8+24]
+mulsd FPR1, [STR2 + GPR1*8]
+addsd FPR1, [STR3 + GPR1*8]
+mulsd FPR2, [STR2 + GPR1*8+8]
+addsd FPR2, [STR3 + GPR1*8+8]
+mulsd FPR3, [STR2 + GPR1*8+16]
+addsd FPR3, [STR3 + GPR1*8+16]
+mulsd FPR4, [STR2 + GPR1*8+24]
+addsd FPR4, [STR3 + GPR1*8+24]
+movsd [STR0 + GPR1*8], FPR1
+movsd [STR0 + GPR1*8+8], FPR2
+movsd [STR0 + GPR1*8+16], FPR3
+movsd [STR0 + GPR1*8+24], FPR4
+
diff --git a/bench/x86-64/vtriad_sse.ptt b/bench/x86-64/vtriad_sse.ptt
new file mode 100644
index 0000000..d521aa0
--- /dev/null
+++ b/bench/x86-64/vtriad_sse.ptt
@@ -0,0 +1,22 @@
+STREAMS 4
+TYPE DOUBLE
+FLOPS 2
+BYTES 32
+LOOP 8
+movaps FPR1, [STR1 + GPR1*8]
+movaps FPR2, [STR1 + GPR1*8+16]
+movaps FPR3, [STR1 + GPR1*8+32]
+movaps FPR4, [STR1 + GPR1*8+48]
+mulpd FPR1, [STR2 + GPR1*8]
+addpd FPR1, [STR3 + GPR1*8]
+mulpd FPR2, [STR2 + GPR1*8+16]
+addpd FPR2, [STR3 + GPR1*8+16]
+mulpd FPR3, [STR2 + GPR1*8+32]
+addpd FPR3, [STR3 + GPR1*8+32]
+mulpd FPR4, [STR2 + GPR1*8+48]
+addpd FPR4, [STR3 + GPR1*8+48]
+movaps [STR0 + GPR1*8], FPR1
+movaps [STR0 + GPR1*8+16], FPR2
+movaps [STR0 + GPR1*8+32], FPR3
+movaps [STR0 + GPR1*8+48], FPR4
+
diff --git a/config.mk b/config.mk
index a1caba3..2c3f3be 100644
--- a/config.mk
+++ b/config.mk
@@ -15,7 +15,6 @@ MANPREFIX = $(PREFIX)/man#NO SPACE
# For the daemon based secure msr/pci access configure
# the absolute path to the msr daemon executable.
# Usually you can leave this to the default.
-# $(PREFIX)/sbin/likwid-accessD
ACCESSDAEMON = $(PREFIX)/sbin/likwid-accessD#NO SPACE
# Set the default mode for MSR access.
diff --git a/doc/feedGnuplot.1 b/doc/feedGnuplot.1
new file mode 100644
index 0000000..3d53986
--- /dev/null
+++ b/doc/feedGnuplot.1
@@ -0,0 +1,190 @@
+.TH feedGnuplot 1 <DATE> likwid\-<VERSION>
+.SH NAME
+feedGnuplot \- General purpose pipe-oriented plotting tool
+.SH SYNOPSIS
+.B likwid-setFreq
+.IR <coreId>
+.IR <frequency>
+.IR [<governor>]
+
+.SH DESCRIPTION
+.B feedGnuplot
+is a pipe-oriented plotting frontend for GNUplot that can read internediate results and create a sort of live plot of the data.
+.B feedGnuplot
+is used by
+.B likwid-perfscope(1)
+to print performance counter data printed out by the timeline daemon mode of
+.B likwid-perfctr(1).
+The Perl script
+.B feedGnuplot
+is not written by the LIKWID Authors, it was written by Dima Kogan and published under GPL. The original web page is https://github.com/dkogan/feedgnuplot
+.SH OPTIONS
+.TP
+.B \-h
+prints a help message to standard output, then exits.#
+.TP
+.B \-\-[no]domain
+If enabled, the first element of each line is the domain variable. If not, the point index is used.
+.TP
+.B \-\-[no]dataid
+If enabled, each data point is preceded by the ID of the data set that point corresponds to. This ID is
+interpreted as a string, NOT as just a number. If not enabled, the order of the point is used.
+.TP
+.B \-\-[no]3d
+Do [not] plot in 3D. This only makes sense with
+.B --domain.
+Each domain here is an (x,y) tuple.
+.TP
+.B \-\-colormap
+Show a colormapped xy plot. Requires extra data for the color. zmin/zmax can be used to set the extents of the colors.
+Automatically increments extraValuesPerPoint.
+.TP
+.B \-\-[no]stream
+Do [not] display the data a point at a time, as it comes in.
+.TP
+.B \-\-[no]lines
+Do [not] draw lines to connect consecutive points.
+.TP
+.B \-\-[no]points
+Do [not] draw points.
+.TP
+.B \-\-circles
+Plot with circles. This requires a radius be specified for each point. Automatically increments extraValuesPerPoint.
+.TP
+.B \-\-xlabel " xxx
+Set x-axis label.
+.TP
+.B \-\-ylabel " xxx
+Set y-axis label.
+.TP
+.B \-\-y2label " xxx
+Set y2-axis label. Does not apply to 3d plots.
+.TP
+.B \-\-zlabel " xxx
+Set z-axis label. Only applies to 3d plots.
+.TP
+.B \-\-title " xxx
+Set the title of the plot.
+.TP
+.B \-\-legend " curveID=legend
+Set the label for a curve plot. Use this option multiple times for multiple curves. With
+.B --dataid
+, curveID is the ID. Otherwise, it's the index of the curve, starting at 0.
+.TP
+.B \-\-autolegend
+Use the curve IDs for the legend. Titles given with
+.B --legend
+override these.
+.TP
+.B \-\-xlen " xxx
+When using
+.B --stream
+, sets the size of the x-window to plot. Omit this or set it to 0 to plot ALL the data. Does not make sense with 3d plots. Implies
+.B --monotonic
+.TP
+.B \-\-xmin " xxx
+Set the minimal point in range for the x-axis. These are ignored in a streaming plot.
+.TP
+.B \-\-xmax " xxx
+Set the maximal point in range for the x-axis. These are ignored in a streaming plot.
+.TP
+.B \-\-ymin " xxx
+Set the minimal point in range for the y-axis.
+.TP
+.B \-\-ymax " xxx
+Set the maximal point in range for the y-axis.
+.TP
+.B \-\-y2min " xxx
+Set the minimal point in range for the y2-axis. Does not apply to 3d plots.
+.TP
+.B \-\-y2max " xxx
+Set the maximal point in range for the y2-axis. Does not apply to 3d plots.
+.TP
+.B \-\-zmin " xxx
+Set the minimal point in range for the z-axis. Only applies to 3d plots or colormaps.
+.TP
+.B \-\-zmax " xxx
+Set the maximal point in range for the z-axis. Only applies to 3d plots or colormaps.
+.TP
+.B \-\-y2 " xxx
+Plot the data specified by this curve ID on the y2 axis. Without
+.B --dataid
+, the ID is just an ordered 0-based index. Does not apply to 3d plots.
+.TP
+.B \-\-curvestyle " curveID=style
+Additional styles per curve. With
+.B --dataid
+, curveID is the ID. Otherwise, it's the index of the curve, starting at 0. Use this option multiple times for multiple curves.
+.TP
+.B \-\-curvestyleall " xxx
+Additional styles for ALL curves.
+.TP
+.B \-\-extracmds " xxx
+Additional commands. These could contain extra global styles for instance.
+.TP
+.B \-\-size " xxx
+Gnuplot size option.
+.TP
+.B \-\-square
+Plot data with aspect ratio 1. For 3D plots, this controls the aspect ratio for all 3 axes.
+.TP
+.B \-\-square_xy
+For 3D plots, set square aspect ratio for ONLY the x,y axes.
+.TP
+.B \-\-hardcopy " xxx
+If not streaming, output to a file specified here. Format inferred from filename.
+.TP
+.B \-\-maxcurves " xxx
+The maximum allowed number of curves. This is 100 by default, but can be reset with this option. This exists purely to prevent perl from allocating all of the system's memory when reading bogus data.
+.TP
+.B \-\-monotonic
+If
+.B --domain
+is given, checks to make sure that the x-coordinate in the input data is monotonically increasing.If a given x-variable is in the past, all data currently cached for this curve is purged. Without
+.B --monotonic
+, all data is kept. Does not make sense with 3d plots. No
+.B --monotonic
+by default.
+.TP
+.B \-\-extraValuesPerPoint " xxx
+How many extra values are given for each data point. Normally this is 0, and does not need to be specified, but sometimes we want extra data, like for colors or point sizes or error bars, etc.
+.B feedGnuplot
+options that require this (colormap, circles) automatically set it. This option is ONLY needed if unknown styles are used, with
+.B --curvestyleall
+for instance.
+.TP
+.B \-\-dump
+Instead of printing to gnuplot, print to STDOUT. For debugging.
+
+.SH EXAMPLE
+.IP 1. 4
+Simple real-time plotting example: plot how much data is received on the wlan0 network interface in bytes/second
+.TP
+.B while true; do sleep 1; cat /proc/net/dev; done | gawk '/wlan0/ {if(b) {print $2-b; fflush()} b=$2}' | \\
+.B feedgnuplot --lines --stream --xlen 10 --ylabel 'Bytes/sec' --xlabel seconds
+.PP
+Reads the stats of the network interface 'wlan0' every second, reformats it with
+.B gawk
+and pipes the formated output into
+.B feedGnuplot
+qto create a line plot (
+.B --lines
+) of the streaming input (
+.B --stream
+). Always show the last 10 seconds (
+.B --xlen
+) and use the labels 'seconds' for the x-axis and 'Bytes/sec' for the y-axis.
+.IP 2. 4
+Simple real-time plotting example: plot the 'idle' CPU consumption against time
+.TP
+.B sar 1 -1 | awk '$1 ~ /..:..:../ && $8 ~/^[0-9\.]*$/ {print $1,$8; fflush()}' | \\
+.B feedgnuplot --stream --domain --lines --timefmt '%H:%M:%S' --set 'format x "%H:%M:%S"'
+.PP
+Reads the CPU IDLE consumption and sets the current time as x-axis key.
+
+.SH AUTHOR
+Written by Dima Kogan <dima at secretsauce.net>.
+.SH BUGS
+Report Bugs on <https://github.com/dkogan/feedgnuplot/issues>.
+.SH "SEE ALSO"
+gnuplot(1), awk(1), sar(1), likwid-perfscope(1), likwid-perfctr(1)
diff --git a/doc/likwid-accessD.1 b/doc/likwid-accessD.1
new file mode 100644
index 0000000..7d444af
--- /dev/null
+++ b/doc/likwid-accessD.1
@@ -0,0 +1,22 @@
+.TH LIKWID-ACCESSD 1 <DATE> likwid\-<VERSION>
+.SH NAME
+likwid-accessD \- This tool forwards the access operations from LIKWID PerfMon tools
+to the MSR device files
+.SH DESCRIPTION
+.B likwid-accessD
+is a command line application that opens a UNIX file socket and waits for access
+operations from LIKWID tools that require access to the MSR and PCI device
+files. The MSR and PCI device files are only accessible for users with root
+privileges, therefore
+.B likwid-accessD
+requires the suid-bit set.
+Depending on the current system architecture,
+.B likwid-accessD
+permits only access to registers defined for the architecture.
+
+.SH AUTHOR
+Written by Thomas Roehl <thomas.roehl at gmail.com>.
+.SH BUGS
+Report Bugs on <http://code.google.com/p/likwid/issues/list>.
+.SH "SEE ALSO"
+likwid-perfctr(1), likwid-powermeter(1), likwid-features(1), likwid-pin(1), likwid-topology(1),
diff --git a/doc/likwid-bench.1 b/doc/likwid-bench.1
index b7414a0..45d0f6c 100644
--- a/doc/likwid-bench.1
+++ b/doc/likwid-bench.1
@@ -1,19 +1,20 @@
.TH LIKWID-BENCH 1 <DATE> likwid\-<VERSION>
+.WARN
.SH NAME
likwid-bench \- low-level benchmark suite and microbenchmarking framework
.SH SYNOPSIS
.B likwid-bench
.RB [\-hap]
.RB [ \-l
-.IR testname ]
+.IR <testname> ]
.RB [ \-i
-.IR iterations ]
+.IR <iterations> ]
.RB [ \-g
-.IR number_of_workgroups ]
+.IR <number_of_workgroups> ]
.RB [ \-t
-.IR testname ]
+.IR <testname> ]
.RB [ \-w
-.IR workgroup_expression ]
+.IR <workgroup_expression> ]
.SH DESCRIPTION
.B likwid-bench
is a benchmark suite for low-level (assembly) benchmarks to measure bandwidths and instruction throughput for specific instruction code on x86 systems. The currently included benchmark codes include common data access patterns like load and store but also calculations like vector triad and sum.
@@ -38,25 +39,25 @@ list available benchmark codes for the current system.
.B \-\^p
list available thread domains.
.TP
-.B \-\^l " testname"
+.B \-\^l " <testname>"
list properties of a benchmark code.
.TP
-.B \-\^i " iterations"
+.B \-\^i " <iterations>"
number of iterations to perform inside the benchmark code.
.TP
-.B \-\^t " testname"
+.B \-\^t " <testname>"
Name of the benchmark code to run (mandatory).
.TP
-.B \-\^g " number_of_workgroups"
+.B \-\^g " <number_of_workgroups>"
specify the number of workgroups to perform the benchmark code on (mandatory).
.TP
-.B \-\^w " workgroup_expression"
+.B \-\^w " <workgroup_expression>"
Specify the affinity domain, thread count and data set size for the current benchmarking run (mandatory).
.SH WORKGROUP SYNTAX
-.B <thread_domain>:<size>[:<num_threads>[:<chunk_size>:<stride>]][-<streamId>:<domain_id>]
-with size in kB, MB or GB.
-Where thread domain is where threads are placed. Size is the total data set size for the benchmark. num_threads specifies how many threads are used. Threads are always placed using a compact policy in
+
+.B <thread_domain>:<size> [:<num_threads>[:<chunk_size>:<stride>]] [-<streamId>:<domain_id>]
+with size in kB, MB or GB. Where thread domain is where threads are placed. Size is the total data set size for the benchmark. num_threads specifies how many threads are used. Threads are always placed using a compact policy in
.B likwid-bench.
This means that per default all SMT threads are used. Optionally similar a the expression based syntax in
.B likwid-pin
@@ -95,8 +96,8 @@ The option INSTRUMENT_BENCH in config.mk needs to be true at compile time to use
.PP
.B likwid-perfctr
will configure and start the performance counters on socket 0 with 4 threads prior to the execution of
-.B likwid-bench
-. The performance counters are read right before and after running the benchmarking code to
+.B likwid-bench.
+The performance counters are read right before and after running the benchmarking code to
minimize the interferences of the measurement.
.IP 5. 4
Run the copy benchmark and place the data on other socket
diff --git a/doc/likwid-features.1 b/doc/likwid-features.1
index 589b2eb..e67cf44 100644
--- a/doc/likwid-features.1
+++ b/doc/likwid-features.1
@@ -5,9 +5,11 @@ likwid-features \- print and toggle the flags of the MSR_IA32_MISC_ENABLE model
.B likwid-features
.RB [ \-vh ]
.RB [ \-c
-.IR coreId ]
-.RB [ \-su
-.IR prefetcher_tag ]
+.IR <coreId> ]
+.RB [ \-s
+.IR <prefetcher_tag> ]
+.RB [ \-u
+.IR <prefetcher_tag> ]
.SH DESCRIPTION
.B likwid-features
is a command line application to print the flags in the model
@@ -41,13 +43,13 @@ prints version information to standard output, then exits.
.B \-\^h
prints a help message to standard output, then exits.
.TP
-.B \-\^c " coreId"
+.B \-\^c " <coreId>"
set on which processor core the MSR should be read
.TP
-.B \-\^u " HW_PREFETCHER | CL_PREFETCHER | DCU_PREFETCHER | IP_PREFETCHER"
+.B \-\^u " <HW_PREFETCHER | CL_PREFETCHER | DCU_PREFETCHER | IP_PREFETCHER>"
specify which prefetcher to unset
.TP
-.B \-\^s " HW_PREFETCHER | CL_PREFETCHER | DCU_PREFETCHER | IP_PREFETCHER"
+.B \-\^s " <HW_PREFETCHER | CL_PREFETCHER | DCU_PREFETCHER | IP_PREFETCHER>"
specify which prefetcher to set
.SH AUTHOR
diff --git a/doc/likwid-genCfg.1 b/doc/likwid-genCfg.1
new file mode 100644
index 0000000..8b7632f
--- /dev/null
+++ b/doc/likwid-genCfg.1
@@ -0,0 +1,30 @@
+.TH LIKWID-GENCFG 1 <DATE> likwid\-<VERSION>
+.SH NAME
+likwid-genCfg \- Get system topology and write them to file for faster LIKWID startup
+.SH SYNOPSIS
+.B likwid-genCfg
+.RB [\-hv]
+.RB [ \-o
+.IR <filename>]
+.SH DESCRIPTION
+.B likwid-genCfg
+is a command line application that stores the system's CPU and NUMA topology to
+file. LIKWID applications use this file to read in the topology fast instead of
+re-gathering all values. The default output path is /etc/likwid.cfg.
+.SH OPTIONS
+.TP
+.B \-h
+prints a help message to standard output, then exits.
+.TP
+.B \-v
+prints a version message to standard output, then exits.
+.TP
+.B \-\^o " <filename>
+sets output file path (optional)
+
+.SH AUTHOR
+Written by Thomas Roehl <thomas.roehl at gmail.com>.
+.SH BUGS
+Report Bugs on <http://code.google.com/p/likwid/issues/list>.
+.SH "SEE ALSO"
+likwid-topology(1), likwid-perfctr(1), likwid-features(1), likwid-pin(1), likwid-powermeter(1)
diff --git a/doc/likwid-memsweeper.1 b/doc/likwid-memsweeper.1
new file mode 100644
index 0000000..f474360
--- /dev/null
+++ b/doc/likwid-memsweeper.1
@@ -0,0 +1,28 @@
+.TH LIKWID-MEMSWEEPER 1 <DATE> likwid\-<VERSION>
+.SH NAME
+likwid-memsweeper \- A tool to clean up NUMA memory domains and last level caches.
+.SH SYNOPSIS
+.B likwid-memsweeper
+.RB [\-hv]
+.RB [ \-c
+.IR <NUMA_ID> ]
+.SH DESCRIPTION
+.B likwid-memsweeper
+is a command line application to shrink the file buffer cache by filling the NUMA domain with random pages. Moreover the tool invalidates all cachelines in the LLC for 64 bit x86 systems. If no NUMA domain is specified, all are sweept.
+.SH OPTIONS
+.TP
+.B \-h
+prints a help message to standard output, then exits.
+.TP
+.B \-v
+prints a version message to standard output, then exits.
+.TP
+.B \-\^c " <NUMA_ID>
+set the NUMA domain for sweeping.
+
+.SH AUTHOR
+Written by Thomas Roehl <thomas.roehl at gmail.com>.
+.SH BUGS
+Report Bugs on <http://code.google.com/p/likwid/issues/list>.
+.SH "SEE ALSO"
+likwid-perfctr(1), likwid-features(1), likwid-pin(1), likwid-powermeter(1), likwid-topology(1),
diff --git a/doc/likwid-mpirun.1 b/doc/likwid-mpirun.1
new file mode 100644
index 0000000..765b0c8
--- /dev/null
+++ b/doc/likwid-mpirun.1
@@ -0,0 +1,81 @@
+.TH LIKWID-MPIRUN 1 <DATE> likwid\-<VERSION>
+.SH NAME
+likwid-mpirun \- A tool to start and monitor MPI applications with LIKWID
+.SH SYNOPSIS
+.B likwid-memsweeper
+.RB [\-hd]
+.RB [ \-hostfile
+.IR filename ]
+.RB [ \-nperdomain
+.IR number_of_processes_in_domain ]
+.RB [ \-pin
+.IR expression ]
+.RB [ \-omp
+.IR expression ]
+.RB [ \-mpi
+.IR expression ]
+.RB [\-\-]
+.SH DESCRIPTION
+.B likwid-mpirun
+is a command line application that wraps the vendor-specific mpirun tool and adds calls to
+.B likwid-perfctr(1)
+to the execution string. The user-given application is ran, measured and the results returned to the staring node.
+.SH OPTIONS
+.TP
+.B \-h
+prints a help message to standard output, then exits.
+.TP
+.B \-d
+prints debug messages to standard output.
+.TP
+.B \-\^hostfile " filename
+specifies the nodes to schedule the MPI processes on
+.TP
+.B \-\^nperdomain " number_of_processes_in_domain
+specifies the processes per affinity domain (see
+.B likwid-pin
+for info about affinity domains)
+.TP
+.B \-\^pin " expression
+specifies the pinning for hybrid execution (see
+.B likwid-pin
+for info about affinity domains)
+.TP
+.B \-\^omp " expression
+enables hybrid setup. Can only be used in combination with
+.B -pin.
+The only possible value is: intel
+.TP
+.B \-\^mpi " expression
+specifies the MPI implementation that should be used by the wrapper. Possible values are intelmpi, openmpi and mvapich2
+.TP
+.B \-\-
+stops parsing arguments for likwid-mpirun, in order to set options for underlying MPI implementation after \-\-.
+
+.SH EXAMPLE
+.IP 1. 4
+For standard application:
+.TP
+.B likwid-mpirun -np 32 ./myApp
+.PP
+Will run 32 MPI processes, each host is filled with as much processes as written in ppn
+.IP 2. 4
+With pinning:
+.TP
+.B likwid-mpirun -np 32 -nperdomain S:2 ./myApp
+.PP
+Will start 32 MPI processes with 2 processes per socket.
+.IP 3. 4
+For hybrid runs:
+.TP
+.B likwid-mpirun -np 32 -pin M0:0-3_M1:0-3 ./myApp
+.PP
+Will start 32 MPI processes with 2 processes per node. Threads of the first process are pinned to the cores 0-3 in NUMA domain 0 (M0). The OpenMP threads of the second process are pinned to the first four cores in NUMA domain 1 (M1)
+
+
+.SH AUTHOR
+Written by Thomas Roehl <thomas.roehl at gmail.com>.
+.SH BUGS
+Report Bugs on <http://code.google.com/p/likwid/issues/list>.
+.SH "SEE ALSO"
+likwid-pin(1), likwid-perfctr(1), likwid-features(1), likwid-powermeter(1), likwid-topology(1),
diff --git a/doc/likwid-perfctr.1 b/doc/likwid-perfctr.1
index 0156136..ea3e4f3 100644
--- a/doc/likwid-perfctr.1
+++ b/doc/likwid-perfctr.1
@@ -5,19 +5,19 @@ likwid-perfctr \- configure and read out hardware performance counters on x86 cp
.B likwid-perfctr
.RB [\-vhHVmaeiMoO]
.RB [ \-c/\-C
-.IR core_list ]
+.IR <core_list> ]
.RB [ \-g
-.IR performance_group
+.IR <performance_group>
or
-.IR performance_event_string ]
+.IR <performance_event_string> ]
.RB [ \-t
-.IR frequency ]
+.IR <frequency> ]
.RB [ \-S
-.IR time ]
+.IR <time> ]
.RB [ \-s
-.IR skip_mask ]
+.IR <skip_mask> ]
.RB [ \-o
-.IR output_file ]
+.IR <output_file> ]
.SH DESCRIPTION
.B likwid-perfctr
is a lightweight command line application to configure and read out hardware performance monitoring data
@@ -29,46 +29,66 @@ The following x86 processors are supported:
.IP \[bu]
.B Intel Core 2:
all variants. Counters:
-.I PMC0, PMC1, FIXC0, FIXC1, FIXC2
+.I PMC[0-1], FIXC[0-2]
.IP \[bu]
.B Intel Nehalem:
-all variants. Counters:
-.I PMC0, PMC1, PMC2, PMC3, UPMC0 - UPMC7, FIXC0, FIXC1, FIXC2
+Counters:
+.I PMC[0-3], FIXC[0-2], UPMC[0-7]
.IP \[bu]
.B Intel Nehalem EX:
-all variants, no uncore for the moment. Counters:
-.I PMC0, PMC1, PMC2, PMC3, FIXC0, FIXC1, FIXC2
+Counters:
+.I PMC[0-3], FIXC[0-2], MBOX[0-1]C[0-5], BBOX[0-1]C[0-3], RBOX[0-1]C[0-7], WBOX[0-5], UBOX0, SBOX[0-1]C[0-3], CBOX[0-9]C[0-4]
.IP \[bu]
.B Intel Westmere:
-all variants. Counters:
-.I PMC0, PMC1, PMC2, PMC3, UPMC0 - UPMC7, FIXC0, FIXC1, FIXC2
+ Counters:
+.I PMC[0-3], FIXC[0-2], UPMC[0-7]
+.IP \[bu]
+.B Intel Westmere EX:
+Counters:
+.I PMC[0-3], FIXC[0-2], MBOX[0-1]C[0-5], BBOX[0-1]C[0-3], RBOX[0-1]C[0-7], WBOX[0-5], UBOX0, SBOX[0-1]C[0-3], CBOX[0-9]C[0-4]
.IP \[bu]
.B Intel Sandy Bridge:
-all variants, partial support for uncore, full RAPL support. Counters:
-.I PMC0, PMC1, PMC2, PMC3, FIXC0, FIXC1, FIXC2
+full RAPL support. Counters:
+.I PMC[0-3], FIXC[0-2], PWR[0-3]
+.IP \[bu]
+.B Intel Sandy Bridge EP:
+partial support for uncore, full RAPL support. Counters:
+.I PMC[0-3], FIXC[0-2], PWR[0-3]. MBOX[0-3]C[0-3]
.IP \[bu]
.B Intel Ivy Bridge:
-all variants, partial support for uncore, full RAPL support. Counters:
-.I PMC0, PMC1, PMC2, PMC3, FIXC0, FIXC1, FIXC2
+full RAPL support. Counters:
+.I PMC[0-3], FIXC[0-2], PWR[0-3]
+.IP \[bu]
+.B Intel Ivy Bridge EP:
+partial support for uncore, full RAPL support. Counters:
+.I PMC[0-3], FIXC[0-2], PWR[0-3], CBOX[0-9]C[0-3], MBOX[0-3]C[0-3], MBOX[0-3]FIX
.IP \[bu]
.B Intel Haswell:
-only desktop variants, full RAPL support. Counters:
-.I PMC0, PMC1, PMC2, PMC3, FIXC0, FIXC1, FIXC2
+full RAPL support. Counters:
+.I PMC[0-3], FIXC[0-2], PWR[0-3]
+.IP \[bu]
+.B Intel Haswell EP:
+no uncore support, full RAPL support. Counters:
+.I PMC[0-3], FIXC[0-2], PWR[0-3]
+.IP \[bu]
+.B Intel Atom Silvermont:
+full RAPL support. Counters:
+.I PMC[0-1], FIXC[0-2], PWR[0-1]
.IP \[bu]
.B Intel Pentium M:
Banias and Dothan variants. Counters:
-.I PMC0, PMC1
+.I PMC[0-1]
.IP \[bu]
.B Intel P6:
Tested on P3.
.IP \[bu]
.B AMD K8:
all variants. Counters:
-.I PMC0, PMC1, PMC2, PMC3
+.I PMC[0-3]
.IP \[bu]
.B AMD K10:
Barcelona, Shanghai, Istanbul, MagnyCours based processors. Counters:
-.I PMC0, PMC1, PMC2, PMC3
+.I PMC[0-3]
.SH OPTIONS
.TP
@@ -93,7 +113,7 @@ print available performance groups for current processor.
.B \-\^e
print available counters and performance events of current processor.
.TP
-.B \-\^o
+.B \-\^o " <filename>
store all ouput to a file instead of stdout. For the filename the following placeholders are supported:
%j for PBS_JOBID, %r for MPI RANK (only Intel MPI at the moment), %h hostname and %p for process pid.
The placeholders must be separated by underscore as, e.g., -o test_%h_%p. You must specify a suffix to
@@ -106,26 +126,26 @@ Do not print tables for results, use easily parseable CSV instead.
.B \-\^i
print cpuid information about processor and on Intel Performance Monitoring features, then exit.
.TP
-.B \-\^c " processor_list"
+.B \-\^c " <processor_list>"
specify a numerical list of processors. The list may contain multiple
items, separated by comma, and ranges. For example 0,3,9-11.
.TP
-.B \-\^C " processor_list"
+.B \-\^C " <processor_list>"
specify a numerical list of processors. The list may contain multiple
items, separated by comma, and ranges. For example 0,3,9-11. This variant will
also pin the threads to the cores. Also logical numberings can be used.
.TP
-.B \-\^g " performance group or performance event set string"
+.B \-\^g " <performance group> or <performance event set string>"
specify which performance group to measure. This can be one of the tags output with the -a flag.
Also a custom event set can be specified by a comma separated list of events. Each event has the format
eventId:register with the the register being one of a architecture supported performance counter registers.
.TP
-.B \-\^t " frequency of measurements in seconds"
-timeline mode for time resolved measurements. The output has the format:
+.B \-\^t " <frequency of measurements>"
+timeline mode for time resolved measurements, possible suffixes 's' and 'ms' like 100ms. The output has the format:
.TP
.B <Event> <Timestamp> <Result thread0> <Result thread1> ...
.TP
-.B \-\^S " time_in_seconds"
+.B \-\^S " <time_in_seconds>"
stethoscope mode with duration in senconds. Can be used to measure an application from the outside.
.SH EXAMPLE
@@ -137,13 +157,13 @@ or use the builtin pin functionality.
.IP 1. 4
As wrapper with performance group:
.TP
-.B likwid-perfctr -C 0-2 -g TLB ./cacheBench -n 2 -l 1048576 -i 100 -t Stream
+.B likwid-perfctr -C 0-2 -g TLB ./cacheBench -n 2 -l 1048576 -i 100 -t Stream
.PP
The parent process is pinned to processor 0, Thread 0 to processor 1 and Thread 1 to processor 2.
.IP 2. 4
As wrapper with custom event set on AMD:
.TP
-.B likwid-perfctr -C 0-4 -g INSTRUCTIONS_RETIRED_SSE:PMC0,CPU_CLOCKS_UNHALTED:PMC3 ./cacheBench
+.B likwid-perfctr -C 0-4 -g INSTRUCTIONS_RETIRED_SSE:PMC0,CPU_CLOCKS_UNHALTED:PMC3 ./myApp
.PP
It is specified that the event
.B INSTRUCTIONS_RETIRED_SSE
@@ -160,7 +180,7 @@ event. If you want this you have to include this event in your custom event stri
.IP 3. 4
As wrapper with custom event set on Intel:
.TP
-.B likwid-perfctr -C 0 -g INSTR_RETIRED_ANY:FIXC0,CPU_CLK_UNHALTED_CORE:FIXC1,UNC_L3_LINES_IN_ANY:UPMC0 ./stream-icc
+.B likwid-perfctr -C 0 -g INSTR_RETIRED_ANY:FIXC0,CPU_CLK_UNHALTED_CORE:FIXC1 ./myApp
.PP
On Intel processors fixed events are measured on dedicated counters. These are
.B INSTR_RETIRED_ANY
@@ -175,7 +195,7 @@ will calculate the runtime and CPI metrics for your run.
.IP 4. 4
Using the marker API to measure only parts of your code (this can be used both with groups or custom event sets):
.TP
-.B likwid-perfctr -m -C 0-4 -g INSTRUCTIONS_RETIRED_SSE:PMC0,CPU_CLOCKS_UNHALTED:PMC3 ./cacheBench
+.B likwid-perfctr -m -C 0-4 -g INSTRUCTIONS_RETIRED_SSE:PMC0,CPU_CLOCKS_UNHALTED:PMC3 ./cacheBench
.PP
You have to link you code against liblikwid.a/.so and use the marker API calls.
The following code snippet shows the necessary calls:
@@ -211,18 +231,22 @@ if (threadId == 0)
.IP 5. 4
Using likwid in timeline mode:
.TP
-.B likwid-perfctr -c 0-3 -g FLOPS_DP -t 300ms ./cacheBench > out.txt
+.B likwid-perfctr -c 0-3 -g FLOPS_DP -t 300ms ./myApp > out.txt
.PP
This will read out the counters every 300ms on physical cores 0-3 and write the results to out.txt.
For timeline mode there is a frontend application likwid-scope, which enables live plotting of selected events.
-For more code examples have a look at the likwid WIKI pages.
+For more code examples have a look at the likwid WIKI pages. The processes are
+.B not
+pinned to the CPUs 0-3.
.IP 6. 4
Using likwid in stethoscope mode:
.TP
-.B likwid-perfctr -c 0-3 -g FLOPS_DP -S 2s
+.B likwid-perfctr -c 0-3 -g FLOPS_DP -S 2s
.PP
-This will start the counters and read them out after 2s on physical cores 0-3 and write the results to stdout.
+This will start the counters and read them out after 2s on physical cores 0-3 and write the results to stdout. The processes are
+.B not
+pinned to the CPUs 0-3.
.SH AUTHOR
Written by Jan Treibig <jan.treibig at gmail.com>.
diff --git a/doc/likwid-perfscope.1 b/doc/likwid-perfscope.1
new file mode 100644
index 0000000..2d48e21
--- /dev/null
+++ b/doc/likwid-perfscope.1
@@ -0,0 +1,55 @@
+.TH LIKWID-PERFSCOPE 1 <DATE> likwid\-<VERSION>
+.SH NAME
+likwid-perfscope \- Frontend for the timeline mode of
+.N likwid-perfctr(1)
+that on-the-fly generates pictures from the measurements
+.SH SYNOPSIS
+.B likwid-perfscope
+.RB [\-h]
+.RB [ \-cores
+.IR <cpu_list> ]
+.RB [ \-freq
+.IR <frequency> ]
+.RB [ \-group
+.IR <eventset> ]
+.SH DESCRIPTION
+.B likwid-perfscope
+is a command line application written in Perl that uses the timeline daemon mode of
+.B likwid-perfctr(1)
+to create on-the-fly pictures with the current measurements. It uses the
+.B feedGnuplot(1)
+script to send the current data to gnuplot.
+.SH OPTIONS
+.TP
+.B \-h
+prints a help message to standard output, then exits.
+.TP
+.B \-\^cores " <cpu_list>
+measures the given group on given CPUs in <cpu_list>
+.TP
+.B \-\^freq " <frequency>
+reads the current performance values every <frequency>. Available suffixes are 's' and 'ms', e.g. 500ms. Default value is 1s
+.TP
+.B \-\^group " <eventset>
+defines the events and counters that should be read. Possible values can be gathered from
+.B likwid-perfctr(1).
+Default is group 'FLOPS_DP'
+
+.SH EXAMPLE
+.IP 1. 4
+Monitor double precision floating-point operations:
+.TP
+.B likwid-perfscope -group FLOPS_DP -cores 0-3 -freq 500ms
+.PP
+Executes
+.B likwid-perfctr
+on the first four cores. The values are read every 500ms are forwarded to gnuplot using the
+.B feedGnuplot
+script.
+
+.SH AUTHOR
+Written by Jan Treibig <jan.treibig at gmail.com>.
+.SH BUGS
+Report Bugs on <http://code.google.com/p/likwid/issues/list>.
+.SH "SEE ALSO"
+likwid-perfctr(1), feedGnuplot(1), likwid-pin(1), likwid-powermeter(1), likwid-setFrequencies(1)
diff --git a/doc/likwid-pin.1 b/doc/likwid-pin.1
index 559f47f..efea873 100644
--- a/doc/likwid-pin.1
+++ b/doc/likwid-pin.1
@@ -3,20 +3,18 @@
likwid-pin \- pin a sequential or threaded application to dedicated processors
.SH SYNOPSIS
.B likwid-pin
-.RB [\-vhqip]
+.RB [\-vhqipS]
.RB [ \-c
-.IR core_list ]
+.IR <core_list> ]
.RB [ \-s
-.IR skip_mask ]
-.RB [ \-S
-.IR Sweep_memory_before_run]
+.IR <skip_mask> ]
.RB [ \-d
-.IR delimiter ]
+.IR <delimiter> ]
.SH DESCRIPTION
.B likwid-pin
is a command line application to pin a sequential or multithreaded
applications to dedicated processors. It can be used as replacement for
-.B taskset(1).
+.B taskset(1).
Opposite to taskset no affinity mask but single processors are specified.
For multithreaded applications based on the pthread library the
.I pthread_create
@@ -27,8 +25,8 @@ to a dedicated processor as specified in
.PP
Per default every generated thread is pinned to the core in the order of calls
to
-.I pthread_create
-. It is possible to skip single threads using -s commandline option.
+.I pthread_create.
+It is possible to skip single threads using -s commandline option.
.PP
For OpenMP implementations gcc and icc compilers are explicitly supported. Others may also work.
.B likwid-pin
@@ -67,7 +65,7 @@ prints version information to standard output, then exits.
.B \-\^h
prints a help message to standard output, then exits.
.TP
-.B \-\^c " processor_list OR thread expression OR scatter policy "
+.B \-\^c " <processor_list> OR <thread_expression> OR <scatter policy> "
specify a numerical list of processors. The list may contain multiple
items, separated by comma, and ranges. For example 0,3,9-11. You can also use
logical numberings, either within a node (N), a socket (S<id>) or a numa domain (M<id>).
@@ -75,10 +73,10 @@ likwid-pin also supports logical pinning within a cpuset with a L prefix. If you
likwid-pin will pin the threads to the processors on the node with physical cores first.
See below for details on using a thread expression or scatter policy
.TP
-.B \-\^s " skip_mask
+.B \-\^s " <skip_mask>
Specify skip mask as HEX number. For each set bit the corresponding thread is skipped.
.TP
-.B \-\^S " enable memory sweeper
+.B \-\^S
All ccNUMA memory domains belonging to the specified threadlist will be cleaned before the run. Can solve file buffer cache problems on Linux.
.TP
.B \-\^p
@@ -90,7 +88,7 @@ set numa memory policy to interleave spanning all numa nodes involved in pinning
.B \-\^q
silent execution without output
.TP
-.B \-\^d
+.B \-\^d " <delimiter>
set delimiter used to output the physical processor list (-p & -c)
diff --git a/doc/likwid-powermeter.1 b/doc/likwid-powermeter.1
index 14dce68..f4a3ba2 100644
--- a/doc/likwid-powermeter.1
+++ b/doc/likwid-powermeter.1
@@ -1,15 +1,15 @@
.TH LIKWID-POWERMETER 1 <DATE> likwid\-<VERSION>
.SH NAME
-likwid-powermeter \- A tool to print Power and Clocking information on Intel CPUS
+likwid-powermeter \- A tool to print power and clocking information on Intel CPUs
.SH SYNOPSIS
.B likwid-powermeter
.RB [ \-vhip ]
.RB [ \-c
-.IR socket_list ]
+.IR <socket_list> ]
.RB [ \-s
-.IR duration_in_seconds ]
+.IR <duration_in_seconds> ]
.RB [ \-M
-.IR access mode (0=direct, 1=accessDaemon) ]
+.IR <access_mode>]
.SH DESCRIPTION
.B likwid-powermeter
is a command line application to get the energy comsumption of Intel RAPL capable processors.
@@ -17,7 +17,8 @@ It also prints information about TDP and Turbo Mode steps supported.
The Turbo Mode information works on all Turbo mode enabled Intel processors. The tool can be either used
in stethoscope mode for a specified duration or as a wrapper to your application measuring your complete
run. RAPL works on a per package (socket) base.
-Please note that the RAPL counters are also accessible as normal events within likwid-perfctr.
+Please note that the RAPL counters are also accessible as normal events within
+.B likwid-perfctr.
.SH OPTIONS
.TP
.B \-\^v
@@ -26,7 +27,7 @@ prints version information to standard output, then exits.
.B \-\^h
prints a help message to standard output, then exits.
.TP
-.B \-\^c " socket_list"
+.B \-\^c " <socket_list>"
set on which sockets the RAPL interface is accessed. comma-separated list of socket IDs
.TP
.B \-\^p
@@ -35,10 +36,10 @@ prints out information about dynamic clocks and CPI information on the socket me
.B \-\^i
prints out information TDP and Turbo mode steps
.TP
-.B \-\^M
+.B \-\^M " <access_mode>"
set the access method. 0 for direct access to MSR/RAPL registers, 1 for using the accessDaemon.
.TP
-.B \-\^s
+.B \-\^s " <duration_in_seconds>
measure the power for a specific time (default 2s)
diff --git a/doc/likwid-setFreq.1 b/doc/likwid-setFreq.1
new file mode 100644
index 0000000..87054c7
--- /dev/null
+++ b/doc/likwid-setFreq.1
@@ -0,0 +1,24 @@
+.TH LIKWID-SETFREQ 1 <DATE> likwid\-<VERSION>
+.SH NAME
+likwid-setFreq \- Mediator for
+.B likwid-setFrequencies(1)
+that performs the actual setting of CPU cores' frequency and governor.
+.SH SYNOPSIS
+.B likwid-setFreq
+.IR <coreId>
+.IR <frequency>
+.IR [<governor>]
+
+.SH DESCRIPTION
+.B likwid-setFreq
+is a command line application that mediates the request from
+.B likwid-setFrequencies(1)
+because setting a CPU core's frequency and/or governor requires root privileges. This executable must be suid-root.
+
+
+.SH AUTHOR
+Written by Jan Treibig <jan.treibig at gmail.com>.
+.SH BUGS
+Report Bugs on <http://code.google.com/p/likwid/issues/list>.
+.SH "SEE ALSO"
+likwid-setFrequencies(1), likwid-perfctr(1), feedGnuplot(1), likwid-pin(1), likwid-powermeter(1)
diff --git a/doc/likwid-setFrequencies.1 b/doc/likwid-setFrequencies.1
index 50d70a9..b268280 100644
--- a/doc/likwid-setFrequencies.1
+++ b/doc/likwid-setFrequencies.1
@@ -5,16 +5,16 @@ likwid-setFrequencies \- print and manage the clock frequency of CPU cores
.B likwid-setFrequencies
.RB [\-hpl]
.RB [ \-c
-.IR cpu_list,_socket_list_or_expression ]
+.IR <cpu_list,_socket_list_or_expression> ]
.RB [ \-g
-.IR governor ]
+.IR <governor> ]
.RB [ \-f
-.IR frequency ]
+.IR <frequency> ]
.SH DESCRIPTION
.B likwid-setFrequencies
is a command line application to set the clock frequency of CPU cores. Since only priviledged users are allowed to change the frequency of CPU cores, the application works in combination with a daemon
-.B likwid-setFreq
-. The daemon needs the suid permission bit to be set in order to manipulate the sysfs entries. With
+.B likwid-setFreq.
+The daemon needs the suid permission bit to be set in order to manipulate the sysfs entries. With
.B likwid-setFrequencies
the clock of all cores inside a the cpu_list or affinity domain can be set to a specific frequency or governor at once.
.SH OPTIONS
@@ -28,14 +28,14 @@ prints the current frequencies for all CPU cores
.B \-l
prints all configurable frequencies
.TP
-.B \-c
+.B \-\^c " <cpu_list,_socket_list_or_expression>
set the affinity domain where to set the frequencies. Common are N (Node), SX (Socket X), CX (Cache Group X) and MX (Memory Group X). For detailed information about affinity domains see
.B likwid-pin(1)
.TP
-.B \-g
+.B \-\^g " <governor>
set the governor of all CPU cores inside the affinity domain. Current governors are ondemand, performance, turbo. Default is ondemand
.TP
-.B \-f
+.B \-\^f " <frequency>
set a fixed frequency at all CPU cores inside the affinity domain. Implicitly sets userspace governor for the cores.
.SH AUTHOR
diff --git a/doc/likwid-topology.1 b/doc/likwid-topology.1
index c3a0316..64bc8b4 100644
--- a/doc/likwid-topology.1
+++ b/doc/likwid-topology.1
@@ -5,14 +5,13 @@ likwid-topology \- print thread and cache topology
.B likwid-topology
.RB [\-hvgcC]
.RB [ \-o
-.IR output_file ]
+.IR <filename> ]
.SH DESCRIPTION
.B likwid-topology
-is a command line application to print the thread and cache
-topology on multicore x86 processors. Used with mono spaced fonts it can
+is a command line application to print the thread and cache topology on multicore x86 processors. Used with mono spaced fonts it can
draw the processor topology of a machine in ASCII art. Beyond topology
-likwid-topology determines the clock of a processor and prints detailed
-informations about the caches hierarchy and NUMA structure.
+.B likwid-topology
+determines the clock of a processor and prints detailed informations about the caches hierarchy and NUMA structure.
.SH OPTIONS
.TP
.B \-v
@@ -28,9 +27,10 @@ prints topology information in ASCII art. Best viewed with monospaced font.
prints detailed informations about cache hierarchy
.TP
.B \-C
-measures and output the processor clock. This involves a longer runtime of likwid-topology.
+measures and output the processor clock. This involves a longer runtime of
+.B likwid-topology.
.TP
-.B \-o
+.B \-\^f " <filename>
Specify output file for topology information. According to the file suffix, the information
is converted using converter scripts installed at <PREFIX>/share/likwid
diff --git a/filters/csv b/filters/csv
index 626916b..654f204 100755
--- a/filters/csv
+++ b/filters/csv
@@ -67,8 +67,9 @@ if ($fileType eq 'topology') {
} elsif ($region eq 'numa') {
if (/Domain ([0-9]*)/) {
print OUTFILE 'Domain ID'.$SEP.$1.$NL;
- } elsif (/Memory:.*total ([0-9.]+) MB/) {
- print OUTFILE 'Memory [MB]'.$SEP.$1.$NL;
+ } elsif (/Memory: ([0-9.]+) MB free of total ([0-9.]+) MB/) {
+ print OUTFILE 'Free Memory [MB]'.$SEP.$1.$NL;
+ print OUTFILE 'Total Memory [MB]'.$SEP.$2.$NL;
} elsif (/(.*):\t*[ ]*(.*)/) {
print OUTFILE $1.$SEP.$2.$NL;
}
diff --git a/filters/xml b/filters/xml
index 23eaf8e..b72c430 100755
--- a/filters/xml
+++ b/filters/xml
@@ -22,6 +22,7 @@ open OUTFILE,"> $filename";
if ($fileType eq 'topology') {
my $region = 'topo';
+ my $indomain = 0;
print OUTFILE '<node>'.$NL;
while (<INFILE>) {
@@ -36,13 +37,15 @@ if ($fileType eq 'topology') {
}
if ($region eq 'topo') {
- if (/(CPU type):\t(.*)/) {
+ if (/(CPU type):\t([\w ]*)/) {
print OUTFILE '<cpu>'.$2.'</cpu>'.$NL;
- } elsif (/(Sockets):\t(.*)/) {
+ } elsif (/CPU clock:\t([\d.]) GHz/) {
+ print OUTFILE '<clock>'.$1.'</clock>'.$NL;
+ } elsif (/(Sockets):\t(\d*)/) {
print OUTFILE '<socketsPerNode>'.$2.'</socketsPerNode>'.$NL;
- } elsif (/(Cores per socket):\t(.*)/) {
+ } elsif (/(Cores per socket):\t(\d*)/) {
print OUTFILE '<coresPerSocket>'.$2.'</coresPerSocket>'.$NL;
- } elsif (/(Threads per core):\t(.*)/) {
+ } elsif (/(Threads per core):\t(\d*)/) {
print OUTFILE '<threadsPerCore>'.$2.'</threadsPerCore>'.$NL;
} elsif (/([0-9]*)\t\t([0-9]*)\t\t([0-9]*)\t\t([0-9]*)/) {
#TODO Build tree for XML output from table!
@@ -68,15 +71,25 @@ if ($fileType eq 'topology') {
}
} elsif ($region eq 'numa') {
if (/Domain ([0-9]*)/) {
+ if ($indomain )
+ {
+ print OUTFILE '</domain>'.$NL;
+ }
print OUTFILE '<domain>'.$NL;
print OUTFILE '<id>'.$1.'</id>'.$NL;
- } elsif (/Memory:.*total ([0-9.]+) MB/) {
- print OUTFILE '<memory>'.$1.'</memory>'.$NL;
+ $indomain = 1
+ } elsif (/Memory: ([0-9.]+) MB free of total ([0-9.]+) MB/) {
+ print OUTFILE '<freememory>'.$1.'</freememory>'.$NL;
+ print OUTFILE '<totalmemory>'.$2.'</totalmemory>'.$NL;
} elsif (/Processors:[ ]+([0-9. ]+)/) {
print OUTFILE '<processors>'.$1.'</processors>'.$NL;
}
}
}
+ if ($indomain)
+ {
+ print OUTFILE '</domain>'.$NL;
+ }
print OUTFILE '</numa>'.$NL;
print OUTFILE '</node>'.$NL;
@@ -117,7 +130,7 @@ if ($fileType eq 'topology') {
die "Filter failed! Unknown application type $fileType!\n";
}
-unlink($infile);
+#unlink($infile);
close INFILE;
close OUTFILE;
diff --git a/groups/core2/BRANCH.txt b/groups/core2/BRANCH.txt
index 15a9ae0..2515d6c 100644
--- a/groups/core2/BRANCH.txt
+++ b/groups/core2/BRANCH.txt
@@ -3,12 +3,14 @@ SHORT Branch prediction miss rate/ratio
EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
PMC0 BR_INST_RETIRED_ANY
PMC1 BR_INST_RETIRED_MISPRED
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Branch rate PMC0/FIXC0
Branch misprediction rate PMC1/FIXC0
diff --git a/groups/core2/CACHE.txt b/groups/core2/CACHE.txt
index 26e310c..fd2af0c 100644
--- a/groups/core2/CACHE.txt
+++ b/groups/core2/CACHE.txt
@@ -3,12 +3,14 @@ SHORT Data cache miss rate/ratio
EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
PMC0 L1D_REPL
PMC1 L1D_ALL_CACHE_REF
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Data cache misses PMC0
Data cache request rate PMC1/FIXC0
diff --git a/groups/core2/DATA.txt b/groups/core2/DATA.txt
index af77c1e..c48ad99 100644
--- a/groups/core2/DATA.txt
+++ b/groups/core2/DATA.txt
@@ -3,12 +3,14 @@ SHORT Load to store ratio
EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
PMC0 INST_RETIRED_LOADS
PMC1 INST_RETIRED_STORES
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Load to Store ratio PMC0/PMC1
diff --git a/groups/core2/FLOPS_DP.txt b/groups/core2/FLOPS_DP.txt
index 81e30b3..8e72f07 100644
--- a/groups/core2/FLOPS_DP.txt
+++ b/groups/core2/FLOPS_DP.txt
@@ -3,12 +3,14 @@ SHORT Double Precision MFlops/s
EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
PMC0 SIMD_COMP_INST_RETIRED_PACKED_DOUBLE
PMC1 SIMD_COMP_INST_RETIRED_SCALAR_DOUBLE
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
DP MFlops/s 1.0E-06*(PMC0*2.0+PMC1)/time
diff --git a/groups/core2/FLOPS_SP.txt b/groups/core2/FLOPS_SP.txt
index 92c95bb..acd2df7 100644
--- a/groups/core2/FLOPS_SP.txt
+++ b/groups/core2/FLOPS_SP.txt
@@ -3,12 +3,14 @@ SHORT Single Precision MFlops/s
EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
PMC0 SIMD_COMP_INST_RETIRED_PACKED_SINGLE
PMC1 SIMD_COMP_INST_RETIRED_SCALAR_SINGLE
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
SP MFlops/s 1.0E-06*(PMC0*4.0+PMC1)/time
diff --git a/groups/core2/FLOPS_X87.txt b/groups/core2/FLOPS_X87.txt
index 1bcd4d6..052356e 100644
--- a/groups/core2/FLOPS_X87.txt
+++ b/groups/core2/FLOPS_X87.txt
@@ -3,11 +3,13 @@ SHORT X87 MFlops/s
EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
PMC0 X87_OPS_RETIRED_ANY
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
X87 MFlops/s 1.0E-06*PMC0/time
diff --git a/groups/core2/L2.txt b/groups/core2/L2.txt
index 8436400..88c75c5 100644
--- a/groups/core2/L2.txt
+++ b/groups/core2/L2.txt
@@ -3,12 +3,14 @@ SHORT L2 cache bandwidth in MBytes/s
EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
PMC0 L1D_REPL
PMC1 L1D_M_EVICT
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
L2 load [MBytes/s] 1.0E-06*PMC0*64.0/time
L2 evict [MBytes/s] 1.0E-06*PMC1*64.0/time
diff --git a/groups/core2/L2CACHE.txt b/groups/core2/L2CACHE.txt
index dbbed5d..34c607a 100644
--- a/groups/core2/L2CACHE.txt
+++ b/groups/core2/L2CACHE.txt
@@ -3,12 +3,14 @@ SHORT L2 cache miss rate/ratio
EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
PMC0 L2_RQSTS_THIS_CORE_ALL_MESI
PMC1 L2_RQSTS_SELF_I_STATE
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
L2 request rate PMC0/FIXC0
L2 miss rate PMC1/FIXC0
diff --git a/groups/core2/MEM.txt b/groups/core2/MEM.txt
index 8f193d6..b205dc4 100644
--- a/groups/core2/MEM.txt
+++ b/groups/core2/MEM.txt
@@ -3,11 +3,13 @@ SHORT Main memory bandwidth in MBytes/s
EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
PMC0 BUS_TRANS_MEM_THIS_CORE_THIS_A
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Memory bandwidth [MBytes/s] 1.0E-06*PMC0*64.0/time
Memory data volume [GBytes] 1.0E-09*PMC0*64.0
diff --git a/groups/core2/TLB.txt b/groups/core2/TLB.txt
index f36abfe..d536d88 100644
--- a/groups/core2/TLB.txt
+++ b/groups/core2/TLB.txt
@@ -3,12 +3,14 @@ SHORT TLB miss rate/ratio
EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
PMC0 DTLB_MISSES_ANY
PMC1 L1D_ALL_CACHE_REF
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
L1 DTLB request rate PMC1/FIXC0
DTLB miss rate PMC0/FIXC0
diff --git a/groups/haswell/ENERGY.txt b/groups/haswell/ENERGY.txt
index 039563c..15b1c45 100644
--- a/groups/haswell/ENERGY.txt
+++ b/groups/haswell/ENERGY.txt
@@ -6,6 +6,8 @@ FIXC1 CPU_CLK_UNHALTED_CORE
FIXC2 CPU_CLK_UNHALTED_REF
TMP0 TEMP_CORE
PWR0 PWR_PKG_ENERGY
+PWR1 PWR_PP0_ENERGY
+PWR3 PWR_DRAM_ENERGY
METRICS
Runtime (RDTSC) [s] time
@@ -15,11 +17,18 @@ CPI FIXC1/FIXC0
Temperature [C] TMP0
Energy [J] PWR0
Power [W] PWR0/time
+Energy PP0 [J] PWR1
+Power PP0 [W] PWR1/time
+Energy DRAM [J] PWR3
+Power DRAM [W] PWR3/time
LONG
Formula:
Power = PWR_PKG_ENERGY / time
+Power PP0 = PWR_PP0_ENERGY / time
+Power DRAM = PWR_DRAM_ENERGY / time
-
Haswell implements the new RAPL interface. This interface enables to
-monitor the consumed energy on the package (socket) level.
+monitor the consumed energy on the package (socket) and DRAM level.
+The PP0 energy domain is often refered to an integrated GPU.
diff --git a/groups/haswell/ICACHE.txt b/groups/haswell/ICACHE.txt
new file mode 100644
index 0000000..6ce3ce8
--- /dev/null
+++ b/groups/haswell/ICACHE.txt
@@ -0,0 +1,25 @@
+SHORT Instruction cache miss rate/ratio
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 ICACHE_ACCESSES
+PMC1 ICACHE_MISSES
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI FIXC1/FIXC0
+L1I request rate PMC0/FIXC0
+L1I miss rate PMC1/FIXC0
+L1I miss ratio PMC1/PMC0
+
+LONG
+Formulas:
+L2 request rate = ICACHE_ACCESSES / INSTR_RETIRED_ANY
+L2 miss rate = ICACHE_MISSES / INSTR_RETIRED_ANY
+L2 miss ratio = ICACHE_MISSES / ICACHE_ACCESSES
+-
+This group measures some L1 instruction cache metrics.
diff --git a/groups/haswell/L2.txt b/groups/haswell/L2.txt
new file mode 100644
index 0000000..47d8ec7
--- /dev/null
+++ b/groups/haswell/L2.txt
@@ -0,0 +1,33 @@
+SHORT L2 cache bandwidth in MBytes/s
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 L1D_REPLACEMENT
+PMC1 L2_TRANS_L1D_WB
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI FIXC1/FIXC0
+L2D load bandwidth [MBytes/s] 1.0E-06*PMC0*64.0/time
+L2D load data volume [GBytes] 1.0E-09*PMC0*64.0
+L2D evict bandwidth [MBytes/s] 1.0E-06*PMC1*64.0/time
+L2D evict data volume [GBytes] 1.0E-09*PMC1*64.0
+L2 bandwidth [MBytes/s] 1.0E-06*(PMC0+PMC1+PMC2)*64.0/time
+L2 data volume [GBytes] 1.0E-09*(PMC0+PMC1+PMC2)*64.0
+
+LONG
+Formulas:
+L2D load bandwidth [MBytes/s] 1.0E-06*L1D_REPLACEMENT*64.0/time
+L2D load data volume [GBytes] 1.0E-09*L1D_REPLACEMENT*64.0
+L2D evict bandwidth [MBytes/s] 1.0E-06*L2_TRANS_L1D_WB*64.0/time
+L2D evict data volume [GBytes] 1.0E-09*L2_TRANS_L1D_WB*64.0
+L2 bandwidth [MBytes/s] 1.0E-06*(L1D_REPLACEMENT+L2_TRANS_L1D_WB+ICACHE_MISSES)*64.0/time
+L2 data volume [GBytes] 1.0E-09*(L1D_REPLACEMENT+L2_TRANS_L1D_WB+ICACHE_MISSES)*64.0
+-
+Profiling group to measure L2 cache bandwidth. The bandwidth is computed by the
+number of cacheline loaded from the L2 to the L1 data cache and the writebacks from
+the L1 data cache to the L2 cache.
diff --git a/groups/haswell/L2CACHE.txt b/groups/haswell/L2CACHE.txt
index 3d7c36e..8186f69 100644
--- a/groups/haswell/L2CACHE.txt
+++ b/groups/haswell/L2CACHE.txt
@@ -4,7 +4,7 @@ EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
FIXC2 CPU_CLK_UNHALTED_REF
-PMC0 L2_TRANS_ALL_REQUESTS
+PMC0 L2_RQSTS_REFERENCES
PMC1 L2_RQSTS_MISS
METRICS
@@ -18,9 +18,9 @@ L2 miss ratio PMC1/PMC0
LONG
Formulas:
-L2 request rate = L2_TRANS_ALL_REQUESTS / INSTR_RETIRED_ANY
+L2 request rate = L2_RQSTS_REFERENCES / INSTR_RETIRED_ANY
L2 miss rate = L2_RQSTS_MISS / INSTR_RETIRED_ANY
-L2 miss ratio = L2_RQSTS_MISS / L2_TRANS_ALL_REQUESTS
+L2 miss ratio = L2_RQSTS_MISS / L2_RQSTS_REFERENCES
-
This group measures the locality of your data accesses with regard to the
L2 Cache. L2 request rate tells you how data intensive your code is
diff --git a/groups/haswell/L2CACHE.txt b/groups/haswell/L3CACHE.txt
similarity index 53%
copy from groups/haswell/L2CACHE.txt
copy to groups/haswell/L3CACHE.txt
index 3d7c36e..d4fd89e 100644
--- a/groups/haswell/L2CACHE.txt
+++ b/groups/haswell/L3CACHE.txt
@@ -1,32 +1,32 @@
-SHORT L2 cache miss rate/ratio
+SHORT L3 cache miss rate/ratio
EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
FIXC2 CPU_CLK_UNHALTED_REF
-PMC0 L2_TRANS_ALL_REQUESTS
-PMC1 L2_RQSTS_MISS
+PMC0 MEM_LOAD_UOPS_RETIRED_L3_ALL
+PMC1 MEM_LOAD_UOPS_RETIRED_L3_MISS
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
-L2 request rate PMC0/FIXC0
-L2 miss rate PMC1/FIXC0
-L2 miss ratio PMC1/PMC0
+L3 request rate (PMC0)/FIXC0
+L3 miss rate PMC1/FIXC0
+L3 miss ratio PMC1/PMC0
LONG
Formulas:
-L2 request rate = L2_TRANS_ALL_REQUESTS / INSTR_RETIRED_ANY
-L2 miss rate = L2_RQSTS_MISS / INSTR_RETIRED_ANY
-L2 miss ratio = L2_RQSTS_MISS / L2_TRANS_ALL_REQUESTS
+L3 request rate = MEM_LOAD_UOPS_RETIRED_L3_ALL / INSTR_RETIRED_ANY
+L3 miss rate = MEM_LOAD_UOPS_RETIRED_L3_MISS / INSTR_RETIRED_ANY
+L3 miss ratio = MEM_LOAD_UOPS_RETIRED_L3_MISS / MEM_LOAD_UOPS_RETIRED_L3_ALL
-
This group measures the locality of your data accesses with regard to the
-L2 Cache. L2 request rate tells you how data intensive your code is
+L3 Cache. L3 request rate tells you how data intensive your code is
or how many Data accesses you have in average per instruction.
-The L2 miss rate gives a measure how often it was necessary to get
-cachelines from memory. And finally L2 miss ratio tells you how many of your
+The L3 miss rate gives a measure how often it was necessary to get
+cachelines from memory. And finally L3 miss ratio tells you how many of your
memory references required a cacheline to be loaded from a higher level.
While the Data cache miss rate might be given by your algorithm you should
try to get Data cache miss ratio as low as possible by increasing your cache reuse.
diff --git a/groups/haswell/TLB.txt b/groups/haswell/TLB.txt
deleted file mode 100644
index 78bf096..0000000
--- a/groups/haswell/TLB.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-SHORT TLB miss rate/ratio
-
-EVENTSET
-FIXC0 INSTR_RETIRED_ANY
-FIXC1 CPU_CLK_UNHALTED_CORE
-FIXC2 CPU_CLK_UNHALTED_REF
-PMC0 DTLB_LOAD_MISSES_CAUSES_A_WALK
-
-METRICS
-Runtime (RDTSC) [s] time
-Runtime unhalted [s] FIXC1*inverseClock
-Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
-CPI FIXC1/FIXC0
-L1 DTLB miss rate PMC0/FIXC0
-
-LONG
-Formulas:
-DTLB miss rate LOAD_MISSES_CAUSES_A_WALK / INSTR_RETIRED_ANY
--
-The DTLB miss rate gives a measure how often a TLB miss occured
-per instruction.
-
diff --git a/groups/haswell/TLB_DATA.txt b/groups/haswell/TLB_DATA.txt
new file mode 100644
index 0000000..2f59772
--- /dev/null
+++ b/groups/haswell/TLB_DATA.txt
@@ -0,0 +1,35 @@
+SHORT L1 Data TLB miss rate/ratio
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 DTLB_LOAD_MISSES_CAUSES_A_WALK
+PMC1 DTLB_STORE_MISSES_CAUSES_A_WALK
+PMC2 DTLB_LOAD_MISSES_WALK_DURATION
+PMC3 DTLB_STORE_MISSES_WALK_DURATION
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI FIXC1/FIXC0
+L1 DTLB load misses PMC0
+L1 DTLB load miss rate PMC0/FIXC0
+L1 DTLB load miss duration PMC2
+L1 DTLB store misses PMC1
+L1 DTLB store miss rate PMC1/FIXC0
+L1 DTLB store miss duration PMC3
+
+LONG
+Formulas:
+L1 DTLB load misses DTLB_LOAD_MISSES_CAUSES_A_WALK
+L1 DTLB load miss rate DTLB_LOAD_MISSES_CAUSES_A_WALK / INSTR_RETIRED_ANY
+L1 DTLB load miss duration DTLB_LOAD_MISSES_WALK_DURATION
+L1 DTLB store misses DTLB_STORE_MISSES_CAUSES_A_WALK
+L1 DTLB store miss rate DTLB_STORE_MISSES_CAUSES_A_WALK / INSTR_RETIRED_ANY
+L1 DTLB store miss duration DTLB_STORE_MISSES_WALK_DURATION
+-
+The DTLB load and store miss rates gives a measure how often a TLB miss occured
+per instruction. The duration measures the time in cycles how long a walk did take.
+
diff --git a/groups/haswell/TLB_INSTR.txt b/groups/haswell/TLB_INSTR.txt
new file mode 100644
index 0000000..f95f78a
--- /dev/null
+++ b/groups/haswell/TLB_INSTR.txt
@@ -0,0 +1,28 @@
+SHORT L1 Instruction TLB miss rate/ratio
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 ITLB_MISSES_CAUSES_A_WALK
+PMC1 ITLB_MISSES_WALK_DURATION
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI FIXC1/FIXC0
+L1 ITLB misses PMC0
+L1 ITLB miss rate PMC0/FIXC0
+L1 ITLB miss duration PMC1
+
+
+LONG
+Formulas:
+L1 ITLB misses ITLB_MISSES_CAUSES_A_WALK
+L1 ITLB miss rate ITLB_MISSES_CAUSES_A_WALK / INSTR_RETIRED_ANY
+L1 ITLB miss duration ITLB_MISSES_WALK_DURATION
+-
+The ITLB miss rates gives a measure how often a TLB miss occured
+per instruction. The duration measures the time in cycles how long a walk did take.
+
diff --git a/groups/ivybridge/ENERGY.txt b/groups/ivybridge/ENERGY.txt
index 4646bf5..3f70077 100644
--- a/groups/ivybridge/ENERGY.txt
+++ b/groups/ivybridge/ENERGY.txt
@@ -6,6 +6,7 @@ FIXC1 CPU_CLK_UNHALTED_CORE
FIXC2 CPU_CLK_UNHALTED_REF
TMP0 TEMP_CORE
PWR0 PWR_PKG_ENERGY
+PWR1 PWR_PP0_ENERGY
PWR3 PWR_DRAM_ENERGY
METRICS
@@ -16,12 +17,15 @@ CPI FIXC1/FIXC0
Temperature [C] TMP0
Energy [J] PWR0
Power [W] PWR0/time
+Energy PP0 [J] PWR1
+Power PP0 [W] PWR1/time
Energy DRAM [J] PWR3
Power DRAM [W] PWR3/time
LONG
Formula:
Power = PWR_PKG_ENERGY / time
+Power PP0 [W] PWR1/time
Power DRAM = PWR_DRAM_ENERGY / time
-
IvyBridge implements the new RAPL interface. This interface enables to
diff --git a/groups/ivybridge/FLOPS_AVX.txt b/groups/ivybridge/FLOPS_AVX.txt
index 2bc99ea..e8074c1 100644
--- a/groups/ivybridge/FLOPS_AVX.txt
+++ b/groups/ivybridge/FLOPS_AVX.txt
@@ -12,8 +12,8 @@ Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
-SP 32b packed MFlops/s 1.0E-06*(PMC0*8.0)/time
-DP 32b packed MFlops/s 1.0E-06*(PMC1*4.0)/time
+32b packed SP MFlops/s 1.0E-06*(PMC0*8.0)/time
+32b packed DP MFlops/s 1.0E-06*(PMC1*4.0)/time
LONG
Formula:
diff --git a/groups/ivybridge/FLOPS_DP.txt b/groups/ivybridge/FLOPS_DP.txt
index 88509c9..1e47b50 100644
--- a/groups/ivybridge/FLOPS_DP.txt
+++ b/groups/ivybridge/FLOPS_DP.txt
@@ -13,7 +13,7 @@ Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
-MFlops/s 1.0E-06*(PMC0*2.0+PMC1)/time
+MFlops/s 1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
AVX MFlops/s 1.0E-06*(PMC2*4.0)/time
Packed MUOPS/s 1.0E-06*(PMC0+PMC2)/time
Scalar MUOPS/s 1.0E-06*PMC1/time
diff --git a/groups/ivybridge/ICACHE.txt b/groups/ivybridge/ICACHE.txt
new file mode 100644
index 0000000..6ce3ce8
--- /dev/null
+++ b/groups/ivybridge/ICACHE.txt
@@ -0,0 +1,25 @@
+SHORT Instruction cache miss rate/ratio
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 ICACHE_ACCESSES
+PMC1 ICACHE_MISSES
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI FIXC1/FIXC0
+L1I request rate PMC0/FIXC0
+L1I miss rate PMC1/FIXC0
+L1I miss ratio PMC1/PMC0
+
+LONG
+Formulas:
+L2 request rate = ICACHE_ACCESSES / INSTR_RETIRED_ANY
+L2 miss rate = ICACHE_MISSES / INSTR_RETIRED_ANY
+L2 miss ratio = ICACHE_MISSES / ICACHE_ACCESSES
+-
+This group measures some L1 instruction cache metrics.
diff --git a/groups/haswell/L2CACHE.txt b/groups/ivybridge/L3CACHE.txt
similarity index 53%
copy from groups/haswell/L2CACHE.txt
copy to groups/ivybridge/L3CACHE.txt
index 3d7c36e..d4fd89e 100644
--- a/groups/haswell/L2CACHE.txt
+++ b/groups/ivybridge/L3CACHE.txt
@@ -1,32 +1,32 @@
-SHORT L2 cache miss rate/ratio
+SHORT L3 cache miss rate/ratio
EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
FIXC2 CPU_CLK_UNHALTED_REF
-PMC0 L2_TRANS_ALL_REQUESTS
-PMC1 L2_RQSTS_MISS
+PMC0 MEM_LOAD_UOPS_RETIRED_L3_ALL
+PMC1 MEM_LOAD_UOPS_RETIRED_L3_MISS
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
-L2 request rate PMC0/FIXC0
-L2 miss rate PMC1/FIXC0
-L2 miss ratio PMC1/PMC0
+L3 request rate (PMC0)/FIXC0
+L3 miss rate PMC1/FIXC0
+L3 miss ratio PMC1/PMC0
LONG
Formulas:
-L2 request rate = L2_TRANS_ALL_REQUESTS / INSTR_RETIRED_ANY
-L2 miss rate = L2_RQSTS_MISS / INSTR_RETIRED_ANY
-L2 miss ratio = L2_RQSTS_MISS / L2_TRANS_ALL_REQUESTS
+L3 request rate = MEM_LOAD_UOPS_RETIRED_L3_ALL / INSTR_RETIRED_ANY
+L3 miss rate = MEM_LOAD_UOPS_RETIRED_L3_MISS / INSTR_RETIRED_ANY
+L3 miss ratio = MEM_LOAD_UOPS_RETIRED_L3_MISS / MEM_LOAD_UOPS_RETIRED_L3_ALL
-
This group measures the locality of your data accesses with regard to the
-L2 Cache. L2 request rate tells you how data intensive your code is
+L3 Cache. L3 request rate tells you how data intensive your code is
or how many Data accesses you have in average per instruction.
-The L2 miss rate gives a measure how often it was necessary to get
-cachelines from memory. And finally L2 miss ratio tells you how many of your
+The L3 miss rate gives a measure how often it was necessary to get
+cachelines from memory. And finally L3 miss ratio tells you how many of your
memory references required a cacheline to be loaded from a higher level.
While the Data cache miss rate might be given by your algorithm you should
try to get Data cache miss ratio as low as possible by increasing your cache reuse.
diff --git a/groups/ivybridge/MEM.txt b/groups/ivybridge/MEM.txt
index 6632cd4..1f9ff4a 100644
--- a/groups/ivybridge/MEM.txt
+++ b/groups/ivybridge/MEM.txt
@@ -5,23 +5,23 @@ FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
FIXC2 CPU_CLK_UNHALTED_REF
MBOX0C0 CAS_COUNT_RD
-MBOX1C0 CAS_COUNT_WR
-MBOX0C1 CAS_COUNT_RD
+MBOX0C1 CAS_COUNT_WR
+MBOX1C0 CAS_COUNT_RD
MBOX1C1 CAS_COUNT_WR
-MBOX0C2 CAS_COUNT_RD
-MBOX1C2 CAS_COUNT_WR
-MBOX0C3 CAS_COUNT_RD
-MBOX1C3 CAS_COUNT_WR
+MBOX2C0 CAS_COUNT_RD
+MBOX2C1 CAS_COUNT_WR
+MBOX3C0 CAS_COUNT_RD
+MBOX3C1 CAS_COUNT_WR
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
-Memory Read BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3)*64.0/time
-Memory Write BW [MBytes/s] 1.0E-06*(MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0/time
-Memory BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3+MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0/time
-Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3+MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0
+Memory Read BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0)*64.0/time
+Memory Write BW [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0/time
+Memory BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX1C0+MBOX1C1+MBOX2C0+MBOX2C1+MBOX3C0+MBOX3C1)*64.0/time
+Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX0C1+MBOX1C0+MBOX1C1+MBOX2C0+MBOX2C1+MBOX3C0+MBOX3C1)*64.0
LONG
Profiling group to measure main memory bandwidth drawn by all cores of
diff --git a/groups/ivybridge/MEM_DP.txt b/groups/ivybridge/MEM_DP.txt
index 2e4138e..7bc76cd 100644
--- a/groups/ivybridge/MEM_DP.txt
+++ b/groups/ivybridge/MEM_DP.txt
@@ -4,25 +4,27 @@ EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
FIXC2 CPU_CLK_UNHALTED_REF
+TMP0 TEMP_CORE
PWR0 PWR_PKG_ENERGY
PWR3 PWR_DRAM_ENERGY
PMC0 FP_COMP_OPS_EXE_SSE_FP_PACKED_DOUBLE
PMC1 FP_COMP_OPS_EXE_SSE_FP_SCALAR_DOUBLE
PMC2 SIMD_FP_256_PACKED_DOUBLE
MBOX0C0 CAS_COUNT_RD
-MBOX1C0 CAS_COUNT_WR
-MBOX0C1 CAS_COUNT_RD
+MBOX0C1 CAS_COUNT_WR
+MBOX1C0 CAS_COUNT_RD
MBOX1C1 CAS_COUNT_WR
-MBOX0C2 CAS_COUNT_RD
-MBOX1C2 CAS_COUNT_WR
-MBOX0C3 CAS_COUNT_RD
-MBOX1C3 CAS_COUNT_WR
+MBOX2C0 CAS_COUNT_RD
+MBOX2C1 CAS_COUNT_WR
+MBOX3C0 CAS_COUNT_RD
+MBOX3C1 CAS_COUNT_WR
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
+Temperature TMP0
Energy [J] PWR0
Power [W] PWR0/time
Energy DRAM [J] PWR3
@@ -31,10 +33,10 @@ AVX MFlops/s 1.0E-06*(4.0*PMC2)/time
MFlops/s 1.0E-06*(PMC0*2.0+PMC1)/time
Packed MUOPS/s 1.0E-06*PMC0/time
Scalar MUOPS/s 1.0E-06*PMC1/time
-Memory Read BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3)*64.0/time
-Memory Write BW [MBytes/s] 1.0E-06*(MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0/time
-Memory BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3+MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0/time
-Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3+MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0
+Memory Read BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0)*64.0/time
+Memory Write BW [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0/time
+Memory BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX1C0+MBOX1C1+MBOX2C0+MBOX2C1+MBOX3C0+MBOX3C1)*64.0/time
+Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX0C1+MBOX1C0+MBOX1C1+MBOX2C0+MBOX2C1+MBOX3C0+MBOX3C1)*64.0
LONG
Formula:
@@ -42,6 +44,8 @@ Power = PWR_PKG_ENERGY / runtime
Power DRAM = PWR_DRAM_ENERGY / runtime
MFlops/s = (FP_COMP_OPS_EXE_SSE_FP_PACKED*2 + FP_COMP_OPS_EXE_SSE_FP_SCALAR)/ runtime
AVX MFlops/s = (SIMD_FP_256_PACKED_DOUBLE*4)/ runtime
+Memory Read BW [MBytes/s] 1.0E-06*(CAS_COUNT_RD+CAS_COUNT_RD+CAS_COUNT_RD+CAS_COUNT_RD)*64.0/time
+Memory Write BW [MBytes/s] 1.0E-06*(CAS_COUNT_WR+CAS_COUNT_WR+CAS_COUNT_WR+CAS_COUNT_WR)*64.0/time
--
Profiling group to measure memory bandwidth drawn by all cores of a socket.
Since this group is based on uncore events it is only possible to measure on
diff --git a/groups/ivybridge/MEM_SP.txt b/groups/ivybridge/MEM_SP.txt
index d06f263..4388cc4 100644
--- a/groups/ivybridge/MEM_SP.txt
+++ b/groups/ivybridge/MEM_SP.txt
@@ -11,13 +11,13 @@ PMC0 FP_COMP_OPS_EXE_SSE_FP_PACKED_SINGLE
PMC1 FP_COMP_OPS_EXE_SSE_FP_SCALAR_SINGLE
PMC2 SIMD_FP_256_PACKED_SINGLE
MBOX0C0 CAS_COUNT_RD
-MBOX1C0 CAS_COUNT_WR
-MBOX0C1 CAS_COUNT_RD
+MBOX0C1 CAS_COUNT_WR
+MBOX1C0 CAS_COUNT_RD
MBOX1C1 CAS_COUNT_WR
-MBOX0C2 CAS_COUNT_RD
-MBOX1C2 CAS_COUNT_WR
-MBOX0C3 CAS_COUNT_RD
-MBOX1C3 CAS_COUNT_WR
+MBOX2C0 CAS_COUNT_RD
+MBOX2C1 CAS_COUNT_WR
+MBOX3C0 CAS_COUNT_RD
+MBOX3C1 CAS_COUNT_WR
METRICS
Runtime (RDTSC) [s] time
@@ -33,10 +33,10 @@ AVX MFlops/s 1.0E-06*(8.0*PMC2)/time
MFlops/s 1.0E-06*(PMC0*4.0+PMC1)/time
Packed MUOPS/s 1.0E-06*PMC0/time
Scalar MUOPS/s 1.0E-06*PMC1/time
-Memory Read BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3)*64.0/time
-Memory Write BW [MBytes/s] 1.0E-06*(MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0/time
-Memory BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3+MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0/time
-Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3+MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0
+Memory Read BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0)*64.0/time
+Memory Write BW [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0/time
+Memory BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX1C0+MBOX1C1+MBOX2C0+MBOX2C1+MBOX3C0+MBOX3C1)*64.0/time
+Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX0C1+MBOX1C0+MBOX1C1+MBOX2C0+MBOX2C1+MBOX3C0+MBOX3C1)*64.0
LONG
Formula:
@@ -44,6 +44,8 @@ Power = PWR_PKG_ENERGY / runtime
Power DRAM = PWR_DRAM_ENERGY / runtime
MFlops/s = (FP_COMP_OPS_EXE_SSE_FP_PACKED_SINGLE * 4 + FP_COMP_OPS_EXE_SSE_FP_SCALAR_SINGLE) / runtime
AVX MFlops/s = (SIMD_FP_256_PACKED_SINGLE * 8) / runtime
+Memory Read BW [MBytes/s] 1.0E-06*(CAS_COUNT_RD+CAS_COUNT_RD+CAS_COUNT_RD+CAS_COUNT_RD)*64.0/time
+Memory Write BW [MBytes/s] 1.0E-06*(CAS_COUNT_WR+CAS_COUNT_WR+CAS_COUNT_WR+CAS_COUNT_WR)*64.0/time
--
Profiling group to measure memory bandwidth drawn by all cores of a socket.
Since this group is based on uncore events it is only possible to measure on
diff --git a/groups/ivybridge/TLB.txt b/groups/ivybridge/TLB.txt
deleted file mode 100644
index 83f0e24..0000000
--- a/groups/ivybridge/TLB.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-SHORT TLB miss rate/ratio
-
-EVENTSET
-FIXC0 INSTR_RETIRED_ANY
-FIXC1 CPU_CLK_UNHALTED_CORE
-FIXC2 CPU_CLK_UNHALTED_REF
-PMC0 DTLB_LOAD_MISSES_CAUSES_A_WALK
-
-METRICS
-Runtime (RDTSC) [s] time
-Runtime unhalted [s] FIXC1*inverseClock
-Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
-CPI FIXC1/FIXC0
-L1 DTLB miss rate PMC0/FIXC0
-
-LONG
-Formulas:
-DTLB miss rate LOAD_MISSES_CAUSES_A_WALK / INSTR_RETIRED_ANY
--
-The DTLB miss rate gives a measure how often a TLB miss occured per instruction
-in average. TLB misses increase if many pages (4kB data chunks) are accessed in
-a short time period.
-
diff --git a/groups/ivybridge/TLB_DATA.txt b/groups/ivybridge/TLB_DATA.txt
new file mode 100644
index 0000000..2f59772
--- /dev/null
+++ b/groups/ivybridge/TLB_DATA.txt
@@ -0,0 +1,35 @@
+SHORT L1 Data TLB miss rate/ratio
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 DTLB_LOAD_MISSES_CAUSES_A_WALK
+PMC1 DTLB_STORE_MISSES_CAUSES_A_WALK
+PMC2 DTLB_LOAD_MISSES_WALK_DURATION
+PMC3 DTLB_STORE_MISSES_WALK_DURATION
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI FIXC1/FIXC0
+L1 DTLB load misses PMC0
+L1 DTLB load miss rate PMC0/FIXC0
+L1 DTLB load miss duration PMC2
+L1 DTLB store misses PMC1
+L1 DTLB store miss rate PMC1/FIXC0
+L1 DTLB store miss duration PMC3
+
+LONG
+Formulas:
+L1 DTLB load misses DTLB_LOAD_MISSES_CAUSES_A_WALK
+L1 DTLB load miss rate DTLB_LOAD_MISSES_CAUSES_A_WALK / INSTR_RETIRED_ANY
+L1 DTLB load miss duration DTLB_LOAD_MISSES_WALK_DURATION
+L1 DTLB store misses DTLB_STORE_MISSES_CAUSES_A_WALK
+L1 DTLB store miss rate DTLB_STORE_MISSES_CAUSES_A_WALK / INSTR_RETIRED_ANY
+L1 DTLB store miss duration DTLB_STORE_MISSES_WALK_DURATION
+-
+The DTLB load and store miss rates gives a measure how often a TLB miss occured
+per instruction. The duration measures the time in cycles how long a walk did take.
+
diff --git a/groups/ivybridge/TLB_INSTR.txt b/groups/ivybridge/TLB_INSTR.txt
new file mode 100644
index 0000000..f95f78a
--- /dev/null
+++ b/groups/ivybridge/TLB_INSTR.txt
@@ -0,0 +1,28 @@
+SHORT L1 Instruction TLB miss rate/ratio
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 ITLB_MISSES_CAUSES_A_WALK
+PMC1 ITLB_MISSES_WALK_DURATION
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI FIXC1/FIXC0
+L1 ITLB misses PMC0
+L1 ITLB miss rate PMC0/FIXC0
+L1 ITLB miss duration PMC1
+
+
+LONG
+Formulas:
+L1 ITLB misses ITLB_MISSES_CAUSES_A_WALK
+L1 ITLB miss rate ITLB_MISSES_CAUSES_A_WALK / INSTR_RETIRED_ANY
+L1 ITLB miss duration ITLB_MISSES_WALK_DURATION
+-
+The ITLB miss rates gives a measure how often a TLB miss occured
+per instruction. The duration measures the time in cycles how long a walk did take.
+
diff --git a/groups/sandybridge/ENERGY.txt b/groups/sandybridge/ENERGY.txt
index b9a0491..9261934 100644
--- a/groups/sandybridge/ENERGY.txt
+++ b/groups/sandybridge/ENERGY.txt
@@ -6,6 +6,7 @@ FIXC1 CPU_CLK_UNHALTED_CORE
FIXC2 CPU_CLK_UNHALTED_REF
TMP0 TEMP_CORE
PWR0 PWR_PKG_ENERGY
+PWR1 PWR_PP0_ENERGY
PWR3 PWR_DRAM_ENERGY
METRICS
@@ -15,13 +16,16 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Temperature [C] TMP0
Energy [J] PWR0
-Energy DRAM [J] PWR3
Power [W] PWR0/time
+Energy PP0 [J] PWR1
+Power PP0 [W] PWR1/time
+Energy DRAM [J] PWR3
Power DRAM [W] PWR3/time
LONG
Formula:
Power = PWR_PKG_ENERGY / time
+Power PP0 = PWR_PP0_ENERGY / time
Power DRAM = PWR_DRAM_ENERGY / time
-
SandyBridge implements the new RAPL interface. This interface enables to
diff --git a/groups/sandybridge/FLOPS_DP.txt b/groups/sandybridge/FLOPS_DP.txt
index ef1a0e8..cda580a 100644
--- a/groups/sandybridge/FLOPS_DP.txt
+++ b/groups/sandybridge/FLOPS_DP.txt
@@ -13,7 +13,7 @@ Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
-MFlops/s 1.0E-06*(PMC0*2.0+PMC1)/time
+MFlops/s 1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
32b AVX MFlops/s 1.0E-06*(PMC2*4.0)/time
Packed MUOPS/s 1.0E-06*(PMC0+PMC2)/time
Scalar MUOPS/s 1.0E-06*PMC1/time
diff --git a/groups/haswell/L2CACHE.txt b/groups/sandybridge/L3CACHE.txt
similarity index 53%
copy from groups/haswell/L2CACHE.txt
copy to groups/sandybridge/L3CACHE.txt
index 3d7c36e..d4fd89e 100644
--- a/groups/haswell/L2CACHE.txt
+++ b/groups/sandybridge/L3CACHE.txt
@@ -1,32 +1,32 @@
-SHORT L2 cache miss rate/ratio
+SHORT L3 cache miss rate/ratio
EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
FIXC2 CPU_CLK_UNHALTED_REF
-PMC0 L2_TRANS_ALL_REQUESTS
-PMC1 L2_RQSTS_MISS
+PMC0 MEM_LOAD_UOPS_RETIRED_L3_ALL
+PMC1 MEM_LOAD_UOPS_RETIRED_L3_MISS
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
-L2 request rate PMC0/FIXC0
-L2 miss rate PMC1/FIXC0
-L2 miss ratio PMC1/PMC0
+L3 request rate (PMC0)/FIXC0
+L3 miss rate PMC1/FIXC0
+L3 miss ratio PMC1/PMC0
LONG
Formulas:
-L2 request rate = L2_TRANS_ALL_REQUESTS / INSTR_RETIRED_ANY
-L2 miss rate = L2_RQSTS_MISS / INSTR_RETIRED_ANY
-L2 miss ratio = L2_RQSTS_MISS / L2_TRANS_ALL_REQUESTS
+L3 request rate = MEM_LOAD_UOPS_RETIRED_L3_ALL / INSTR_RETIRED_ANY
+L3 miss rate = MEM_LOAD_UOPS_RETIRED_L3_MISS / INSTR_RETIRED_ANY
+L3 miss ratio = MEM_LOAD_UOPS_RETIRED_L3_MISS / MEM_LOAD_UOPS_RETIRED_L3_ALL
-
This group measures the locality of your data accesses with regard to the
-L2 Cache. L2 request rate tells you how data intensive your code is
+L3 Cache. L3 request rate tells you how data intensive your code is
or how many Data accesses you have in average per instruction.
-The L2 miss rate gives a measure how often it was necessary to get
-cachelines from memory. And finally L2 miss ratio tells you how many of your
+The L3 miss rate gives a measure how often it was necessary to get
+cachelines from memory. And finally L3 miss ratio tells you how many of your
memory references required a cacheline to be loaded from a higher level.
While the Data cache miss rate might be given by your algorithm you should
try to get Data cache miss ratio as low as possible by increasing your cache reuse.
diff --git a/groups/sandybridge/MEM.txt b/groups/sandybridge/MEM.txt
index 6632cd4..1f9ff4a 100644
--- a/groups/sandybridge/MEM.txt
+++ b/groups/sandybridge/MEM.txt
@@ -5,23 +5,23 @@ FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
FIXC2 CPU_CLK_UNHALTED_REF
MBOX0C0 CAS_COUNT_RD
-MBOX1C0 CAS_COUNT_WR
-MBOX0C1 CAS_COUNT_RD
+MBOX0C1 CAS_COUNT_WR
+MBOX1C0 CAS_COUNT_RD
MBOX1C1 CAS_COUNT_WR
-MBOX0C2 CAS_COUNT_RD
-MBOX1C2 CAS_COUNT_WR
-MBOX0C3 CAS_COUNT_RD
-MBOX1C3 CAS_COUNT_WR
+MBOX2C0 CAS_COUNT_RD
+MBOX2C1 CAS_COUNT_WR
+MBOX3C0 CAS_COUNT_RD
+MBOX3C1 CAS_COUNT_WR
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
-Memory Read BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3)*64.0/time
-Memory Write BW [MBytes/s] 1.0E-06*(MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0/time
-Memory BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3+MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0/time
-Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3+MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0
+Memory Read BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0)*64.0/time
+Memory Write BW [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0/time
+Memory BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX1C0+MBOX1C1+MBOX2C0+MBOX2C1+MBOX3C0+MBOX3C1)*64.0/time
+Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX0C1+MBOX1C0+MBOX1C1+MBOX2C0+MBOX2C1+MBOX3C0+MBOX3C1)*64.0
LONG
Profiling group to measure main memory bandwidth drawn by all cores of
diff --git a/groups/sandybridge/MEM_DP.txt b/groups/sandybridge/MEM_DP.txt
index 2891a45..78fbd18 100644
--- a/groups/sandybridge/MEM_DP.txt
+++ b/groups/sandybridge/MEM_DP.txt
@@ -11,13 +11,13 @@ PMC0 FP_COMP_OPS_EXE_SSE_FP_PACKED_DOUBLE
PMC1 FP_COMP_OPS_EXE_SSE_FP_SCALAR_DOUBLE
PMC2 SIMD_FP_256_PACKED_DOUBLE
MBOX0C0 CAS_COUNT_RD
-MBOX1C0 CAS_COUNT_WR
-MBOX0C1 CAS_COUNT_RD
+MBOX0C1 CAS_COUNT_WR
+MBOX1C0 CAS_COUNT_RD
MBOX1C1 CAS_COUNT_WR
-MBOX0C2 CAS_COUNT_RD
-MBOX1C2 CAS_COUNT_WR
-MBOX0C3 CAS_COUNT_RD
-MBOX1C3 CAS_COUNT_WR
+MBOX2C0 CAS_COUNT_RD
+MBOX2C1 CAS_COUNT_WR
+MBOX3C0 CAS_COUNT_RD
+MBOX3C1 CAS_COUNT_WR
METRICS
Runtime (RDTSC) [s] time
@@ -33,10 +33,10 @@ MFlops/s 1.0E-06*(PMC0*2.0+PMC1)/time
32b AVX MFlops/s 1.0E-06*(PMC2*4.0)/time
Packed MUOPS/s 1.0E-06*(PMC0+PMC2)/time
Scalar MUOPS/s 1.0E-06*PMC1/time
-Memory Read BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3)*64.0/time
-Memory Write BW [MBytes/s] 1.0E-06*(MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0/time
-Memory BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3+MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0/time
-Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3+MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0
+Memory Read BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0)*64.0/time
+Memory Write BW [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0/time
+Memory BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX1C0+MBOX1C1+MBOX2C0+MBOX2C1+MBOX3C0+MBOX3C1)*64.0/time
+Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX0C1+MBOX1C0+MBOX1C1+MBOX2C0+MBOX2C1+MBOX3C0+MBOX3C1)*64.0
LONG
Formula:
@@ -44,6 +44,8 @@ Power = PWR_PKG_ENERGY / runtime
Power DRAM = PWR_DRAM_ENERGY / runtime
MFlops/s = (FP_COMP_OPS_EXE_SSE_FP_PACKED*2 + FP_COMP_OPS_EXE_SSE_FP_SCALAR)/ runtime
AVX MFlops/s = (SIMD_FP_256_PACKED_DOUBLE*4)/ runtime
+Memory Read BW [MBytes/s] 1.0E-06*(CAS_COUNT_RD+CAS_COUNT_RD+CAS_COUNT_RD+CAS_COUNT_RD)*64.0/time
+Memory Write BW [MBytes/s] 1.0E-06*(CAS_COUNT_WR+CAS_COUNT_WR+CAS_COUNT_WR+CAS_COUNT_WR)*64.0/time
--
Profiling group to measure memory bandwidth drawn by all cores of a socket.
Since this group is based on uncore events it is only possible to measure on
diff --git a/groups/sandybridge/MEM_SP.txt b/groups/sandybridge/MEM_SP.txt
index 9ac34d0..1ede713 100644
--- a/groups/sandybridge/MEM_SP.txt
+++ b/groups/sandybridge/MEM_SP.txt
@@ -11,13 +11,13 @@ PMC0 FP_COMP_OPS_EXE_SSE_FP_PACKED_SINGLE
PMC1 FP_COMP_OPS_EXE_SSE_FP_SCALAR_SINGLE
PMC2 SIMD_FP_256_PACKED_DOUBLE
MBOX0C0 CAS_COUNT_RD
-MBOX1C0 CAS_COUNT_WR
-MBOX0C1 CAS_COUNT_RD
+MBOX0C1 CAS_COUNT_WR
+MBOX1C0 CAS_COUNT_RD
MBOX1C1 CAS_COUNT_WR
-MBOX0C2 CAS_COUNT_RD
-MBOX1C2 CAS_COUNT_WR
-MBOX0C3 CAS_COUNT_RD
-MBOX1C3 CAS_COUNT_WR
+MBOX2C0 CAS_COUNT_RD
+MBOX2C1 CAS_COUNT_WR
+MBOX3C0 CAS_COUNT_RD
+MBOX3C1 CAS_COUNT_WR
METRICS
Runtime (RDTSC) [s] time
@@ -33,10 +33,10 @@ MFlops/s 1.0E-06*(PMC0*4.0+PMC1)/time
32b AVX MFlops/s 1.0E-06*(PMC2*8.0)/time
Packed MUOPS/s 1.0E-06*(PMC0+PMC2)/time
Scalar MUOPS/s 1.0E-06*PMC1/time
-Memory Read BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3)*64.0/time
-Memory Write BW [MBytes/s] 1.0E-06*(MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0/time
-Memory BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3+MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0/time
-Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX0C1+MBOX0C2+MBOX0C3+MBOX1C0+MBOX1C1+MBOX1C2+MBOX1C3)*64.0
+Memory Read BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0)*64.0/time
+Memory Write BW [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0/time
+Memory BW [MBytes/s] 1.0E-06*(MBOX0C0+MBOX0C1+MBOX1C0+MBOX1C1+MBOX2C0+MBOX2C1+MBOX3C0+MBOX3C1)*64.0/time
+Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX0C1+MBOX1C0+MBOX1C1+MBOX2C0+MBOX2C1+MBOX3C0+MBOX3C1)*64.0
LONG
Formula:
@@ -44,6 +44,8 @@ Power = PWR_PKG_ENERGY / runtime
Power DRAM = PWR_DRAM_ENERGY / runtime
MFlops/s = (FP_COMP_OPS_EXE_SSE_FP_PACKED_SINGLE * 4 + FP_COMP_OPS_EXE_SSE_FP_SCALAR_SINGLE) / runtime
AVX MFlops/s = (SIMD_FP_256_PACKED_SINGLE * 8) / runtime
+Memory Read BW [MBytes/s] 1.0E-06*(CAS_COUNT_RD+CAS_COUNT_RD+CAS_COUNT_RD+CAS_COUNT_RD)*64.0/time
+Memory Write BW [MBytes/s] 1.0E-06*(CAS_COUNT_WR+CAS_COUNT_WR+CAS_COUNT_WR+CAS_COUNT_WR)*64.0/time
--
Profiling group to measure memory bandwidth drawn by all cores of a socket.
Since this group is based on uncore events it is only possible to measure on
diff --git a/groups/sandybridge/TLB.txt b/groups/sandybridge/TLB.txt
deleted file mode 100644
index 83f0e24..0000000
--- a/groups/sandybridge/TLB.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-SHORT TLB miss rate/ratio
-
-EVENTSET
-FIXC0 INSTR_RETIRED_ANY
-FIXC1 CPU_CLK_UNHALTED_CORE
-FIXC2 CPU_CLK_UNHALTED_REF
-PMC0 DTLB_LOAD_MISSES_CAUSES_A_WALK
-
-METRICS
-Runtime (RDTSC) [s] time
-Runtime unhalted [s] FIXC1*inverseClock
-Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
-CPI FIXC1/FIXC0
-L1 DTLB miss rate PMC0/FIXC0
-
-LONG
-Formulas:
-DTLB miss rate LOAD_MISSES_CAUSES_A_WALK / INSTR_RETIRED_ANY
--
-The DTLB miss rate gives a measure how often a TLB miss occured per instruction
-in average. TLB misses increase if many pages (4kB data chunks) are accessed in
-a short time period.
-
diff --git a/groups/sandybridge/TLB_DATA.txt b/groups/sandybridge/TLB_DATA.txt
new file mode 100644
index 0000000..2f59772
--- /dev/null
+++ b/groups/sandybridge/TLB_DATA.txt
@@ -0,0 +1,35 @@
+SHORT L1 Data TLB miss rate/ratio
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 DTLB_LOAD_MISSES_CAUSES_A_WALK
+PMC1 DTLB_STORE_MISSES_CAUSES_A_WALK
+PMC2 DTLB_LOAD_MISSES_WALK_DURATION
+PMC3 DTLB_STORE_MISSES_WALK_DURATION
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI FIXC1/FIXC0
+L1 DTLB load misses PMC0
+L1 DTLB load miss rate PMC0/FIXC0
+L1 DTLB load miss duration PMC2
+L1 DTLB store misses PMC1
+L1 DTLB store miss rate PMC1/FIXC0
+L1 DTLB store miss duration PMC3
+
+LONG
+Formulas:
+L1 DTLB load misses DTLB_LOAD_MISSES_CAUSES_A_WALK
+L1 DTLB load miss rate DTLB_LOAD_MISSES_CAUSES_A_WALK / INSTR_RETIRED_ANY
+L1 DTLB load miss duration DTLB_LOAD_MISSES_WALK_DURATION
+L1 DTLB store misses DTLB_STORE_MISSES_CAUSES_A_WALK
+L1 DTLB store miss rate DTLB_STORE_MISSES_CAUSES_A_WALK / INSTR_RETIRED_ANY
+L1 DTLB store miss duration DTLB_STORE_MISSES_WALK_DURATION
+-
+The DTLB load and store miss rates gives a measure how often a TLB miss occured
+per instruction. The duration measures the time in cycles how long a walk did take.
+
diff --git a/groups/sandybridge/TLB_INSTR.txt b/groups/sandybridge/TLB_INSTR.txt
new file mode 100644
index 0000000..f95f78a
--- /dev/null
+++ b/groups/sandybridge/TLB_INSTR.txt
@@ -0,0 +1,28 @@
+SHORT L1 Instruction TLB miss rate/ratio
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 ITLB_MISSES_CAUSES_A_WALK
+PMC1 ITLB_MISSES_WALK_DURATION
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI FIXC1/FIXC0
+L1 ITLB misses PMC0
+L1 ITLB miss rate PMC0/FIXC0
+L1 ITLB miss duration PMC1
+
+
+LONG
+Formulas:
+L1 ITLB misses ITLB_MISSES_CAUSES_A_WALK
+L1 ITLB miss rate ITLB_MISSES_CAUSES_A_WALK / INSTR_RETIRED_ANY
+L1 ITLB miss duration ITLB_MISSES_WALK_DURATION
+-
+The ITLB miss rates gives a measure how often a TLB miss occured
+per instruction. The duration measures the time in cycles how long a walk did take.
+
diff --git a/groups/core2/BRANCH.txt b/groups/silvermont/BRANCH.txt
similarity index 51%
copy from groups/core2/BRANCH.txt
copy to groups/silvermont/BRANCH.txt
index 15a9ae0..cbaf834 100644
--- a/groups/core2/BRANCH.txt
+++ b/groups/silvermont/BRANCH.txt
@@ -3,12 +3,14 @@ SHORT Branch prediction miss rate/ratio
EVENTSET
FIXC0 INSTR_RETIRED_ANY
FIXC1 CPU_CLK_UNHALTED_CORE
-PMC0 BR_INST_RETIRED_ANY
-PMC1 BR_INST_RETIRED_MISPRED
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 BR_INST_RETIRED_ALL_BRANCHES
+PMC1 BR_MISP_RETIRED_ALL_BRANCHES
METRICS
Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Branch rate PMC0/FIXC0
Branch misprediction rate PMC1/FIXC0
@@ -17,12 +19,13 @@ Instructions per branch FIXC0/PMC0
LONG
Formulas:
-Branch rate = BR_INST_RETIRED_ANY / INSTR_RETIRED_ANY
-Branch misprediction rate = BR_INST_RETIRED_MISPRED / INSTR_RETIRED_ANY
-Branch misprediction ratio = BR_INST_RETIRED_MISPRED / BR_INST_RETIRED_ANY
-Instructions per branch = INSTR_RETIRED_ANY / BR_INST_RETIRED_ANY
+Branch rate = BR_INST_RETIRED_ALL_BRANCHES / INSTR_RETIRED_ANY
+Branch misprediction rate = BR_MISP_RETIRED_ALL_BRANCHES / INSTR_RETIRED_ANY
+Branch misprediction ratio = BR_MISP_RETIRED_ALL_BRANCHES / BR_INST_RETIRED_ALL_BRANCHES
+Instructions per branch = INSTR_RETIRED_ANY / BR_INST_RETIRED_ALL_BRANCHES
-
The rates state how often in average a branch or a mispredicted branch occured
per instruction retired in total. The Branch misprediction ratio sets directly
-into relation what ration of all branch instruction where mispredicted.
+into relation what ratio of all branch instruction where mispredicted.
Instructions per branch is 1/Branch rate.
+
diff --git a/groups/haswell/ENERGY.txt b/groups/silvermont/ENERGY.txt
similarity index 81%
copy from groups/haswell/ENERGY.txt
copy to groups/silvermont/ENERGY.txt
index 039563c..5646a9a 100644
--- a/groups/haswell/ENERGY.txt
+++ b/groups/silvermont/ENERGY.txt
@@ -8,7 +8,7 @@ TMP0 TEMP_CORE
PWR0 PWR_PKG_ENERGY
METRICS
-Runtime (RDTSC) [s] time
+Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
@@ -20,6 +20,6 @@ LONG
Formula:
Power = PWR_PKG_ENERGY / time
-
-Haswell implements the new RAPL interface. This interface enables to
+Silvermont implements the new RAPL interface. This interface enables to
monitor the consumed energy on the package (socket) level.
diff --git a/groups/silvermont/ICACHE.txt b/groups/silvermont/ICACHE.txt
new file mode 100644
index 0000000..6ce3ce8
--- /dev/null
+++ b/groups/silvermont/ICACHE.txt
@@ -0,0 +1,25 @@
+SHORT Instruction cache miss rate/ratio
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 ICACHE_ACCESSES
+PMC1 ICACHE_MISSES
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI FIXC1/FIXC0
+L1I request rate PMC0/FIXC0
+L1I miss rate PMC1/FIXC0
+L1I miss ratio PMC1/PMC0
+
+LONG
+Formulas:
+L2 request rate = ICACHE_ACCESSES / INSTR_RETIRED_ANY
+L2 miss rate = ICACHE_MISSES / INSTR_RETIRED_ANY
+L2 miss ratio = ICACHE_MISSES / ICACHE_ACCESSES
+-
+This group measures some L1 instruction cache metrics.
diff --git a/groups/silvermont/L1TOL2.txt b/groups/silvermont/L1TOL2.txt
new file mode 100644
index 0000000..225533d
--- /dev/null
+++ b/groups/silvermont/L1TOL2.txt
@@ -0,0 +1,28 @@
+SHORT L2 load cache bandwidth in MBytes/s
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 MEM_UOPS_RETIRED_L1_MISS_LOADS
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI FIXC1/FIXC0
+L2 Load [MBytes/s] 1.0E-06*PMC0*64.0/time
+L2 bandwidth [MBytes/s] 1.0E-06*(PMC0)*64.0/time
+L2 data volume [GBytes] 1.0E-09*(PMC0)*64.0
+
+LONG
+Formulas:
+L2 Load [MBytes/s] = 1.0E-06*MEM_UOPS_RETIRED_L1_MISS_LOADS*64/time
+L2 bandwidth [MBytes/s] = 1.0E-06*(MEM_UOPS_RETIRED_L1_MISS_LOADS)*64/time
+L2 data volume [GBytes] = 1.0E-09*(MEM_UOPS_RETIRED_L1_MISS_LOADS)*64
+-
+Profiling group to measure L2 load cache bandwidth. The bandwidth is computed by the
+number of cacheline allocated in the L1 cache. Since there is no possibility to retrieve
+the evicted cache lines, this group measures only the load cache bandwidth.
+The group also output totally loaded data volume transfered between L2 and L1.
+
diff --git a/groups/silvermont/L2TOMEM.txt b/groups/silvermont/L2TOMEM.txt
new file mode 100644
index 0000000..bc4cbed
--- /dev/null
+++ b/groups/silvermont/L2TOMEM.txt
@@ -0,0 +1,26 @@
+SHORT L2 to Mem load cache bandwidth in MBytes/s
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 MEM_UOPS_RETIRED_L2_MISS_LOADS
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI FIXC1/FIXC0
+L2 to MEM load bandwidth [MBytes/s] 1.0E-06*(PMC0)*64.0/time
+L2 to MEM load data volume [GBytes] 1.0E-09*(PMC0)*64.0
+
+LONG
+Formulas:
+L2 to MEM load bandwidth [MBytes/s] = 1.0E-06*(MEM_UOPS_RETIRED_L2_MISS_LOADS)*64/time
+L2 to MEM load data volume [GBytes] = 1.0E-09*(MEM_UOPS_RETIRED_L2_MISS_LOADS)*64
+-
+Profiling group to measure L2 to MEM load cache bandwidth. The bandwidth is computed by the
+number of cacheline allocated in the L2 cache. Since there is no possibility to retrieve
+the evicted cache lines, this group measures only the load cache bandwidth.
+The group also output totally loaded data volume transfered between memory and L2.
+
diff --git a/kernel/Makefile b/kernel/Makefile
new file mode 100644
index 0000000..fd0ffdf
--- /dev/null
+++ b/kernel/Makefile
@@ -0,0 +1,12 @@
+obj-m := enable_rdpmc.o
+KERNELDIR ?= /lib/modules/$(shell uname -r)/build
+PWD := $(shell pwd)
+
+all:
+ $(MAKE) -Wpacked -C $(KERNELDIR) M=$(PWD) modules
+
+modules_install:
+ install -m 666 enable_rdpmc.ko /lib/modules/$(shell uname -r)/extra/
+
+clean:
+ rm -f *.ko *.o modules.order Module.symvers enable_rdpmc.mod.c
diff --git a/kernel/enable_rdpmc.c b/kernel/enable_rdpmc.c
new file mode 100644
index 0000000..0ecc86d
--- /dev/null
+++ b/kernel/enable_rdpmc.c
@@ -0,0 +1,73 @@
+/*
+ * Read PMC in kernel mode.
+ */
+#include <linux/module.h> /* Needed by all modules */
+#include <linux/kernel.h> /* Needed for KERN_INFO */
+
+#define MODULE_PARAM(type, name, value, desc) \
+ type name = value; \
+ module_param(name, type, 0664); \
+ MODULE_PARM_DESC(name, desc)
+
+MODULE_PARAM(int, debug, 0, "Debug output");
+
+
+static uint64_t printc4(void) {
+ uint64_t output;
+ // Read back CR4 to check the bit.
+ __asm__("\t mov %%cr4,%0" : "=r"(output));
+ return output;
+}
+
+static void setc4b8(void * info) {
+ // Set CR4, Bit 8 (9th bit from the right) to enable
+ __asm__("push %rax\n\t"
+ "mov %cr4,%rax;\n\t"
+ "or $(1 << 8),%rax;\n\t"
+ "mov %rax,%cr4;\n\t"
+ "wbinvd\n\t"
+ "pop %rax"
+ );
+
+ if (debug) {
+ printk(KERN_INFO "Processor %d, RDPMC_ENABLE_BIT=%llu\n", smp_processor_id(), printc4());
+ }
+}
+
+static void clearc4b8(void * info) {
+ printc4();
+ __asm__("push %rax\n\t"
+ "push %rbx\n\t"
+ "mov %cr4,%rax;\n\t"
+ "mov $(1 << 8), %rbx\n\t"
+ "not %rbx\n\t"
+ "and %rbx, %rax;\n\t"
+ "mov %rax,%cr4;\n\t"
+ "wbinvd\n\t"
+ "pop %rbx\n\t"
+ "pop %rax\n\t"
+ );
+
+ if (debug) {
+ printk(KERN_INFO "Processor %d, RDPMC_ENABLE_BIT=%llu\n", smp_processor_id(), printc4());
+ }
+}
+
+
+
+int start_module(void)
+{
+ on_each_cpu(setc4b8, NULL, 0);
+ return 0;
+}
+void stop_module(void)
+{
+ on_each_cpu(clearc4b8, NULL, 0);
+}
+
+module_init(start_module);
+module_exit(stop_module)
+
+MODULE_AUTHOR("Thomas Roehl <Thomas.Roehl at fau.de>");
+MODULE_DESCRIPTION("Enable RDPMC for userspace");
+MODULE_LICENSE("GPL");
diff --git a/make/include_GCC.mk b/make/include_GCC.mk
index 38606c1..1ccfd88 100644
--- a/make/include_GCC.mk
+++ b/make/include_GCC.mk
@@ -12,7 +12,7 @@ GEN_PMHEADER = ./perl/gen_events.pl
#ANSI_CFLAGS += -Wextra
#ANSI_CFLAGS += -Wall
-CFLAGS = -O2 -Wno-format -std=c99
+CFLAGS = -O2 -Wno-format -Wno-nonnull -std=c99
FCFLAGS = -module ./ # ifort
#FCFLAGS = -J ./ -fsyntax-only #gfortran
PASFLAGS = x86-64
diff --git a/make/include_ICC.mk b/make/include_ICC.mk
index b379daa..ce49bfe 100644
--- a/make/include_ICC.mk
+++ b/make/include_ICC.mk
@@ -7,11 +7,10 @@ GEN_PAS = ./perl/generatePas.pl
GEN_GROUPS = ./perl/generateGroups.pl
GEN_PMHEADER = ./perl/gen_events.pl
-ANSI_CFLAGS = -strict-ansi
ANSI_CFLAGS += -std=c99
-CFLAGS = -O1 -Wno-format -vec-report=0
-FCFLAGS = -module ./
+CFLAGS = -O1 -Wno-format
+FCFLAGS = -module ./
ASFLAGS = -gdwarf-2
PASFLAGS = x86-64
CPPFLAGS =
@@ -21,7 +20,6 @@ SHARED_CFLAGS = -fpic
SHARED_LFLAGS = -shared
DEFINES = -D_GNU_SOURCE
-DEFINES += -DMAX_NUM_THREADS=128
DEFINES += -DPAGE_ALIGNMENT=4096
#enable this option to build likwid-bench with marker API for likwid-perfctr
#DEFINES += -DPERFMON
diff --git a/perl/generatePas.pl b/perl/generatePas.pl
index 520cbc6..9c1dcd1 100755
--- a/perl/generatePas.pl
+++ b/perl/generatePas.pl
@@ -98,7 +98,7 @@ while (defined(my $file = readdir(DIR))) {
}
} elsif ($line =~ /TYPE[ ]+(SINGLE|DOUBLE)/) {
$type = $1;
- } elsif ($line =~ /FLOPS[ ]+([0-9]+)/) {
+ } elsif ($line =~ /FLOPS[ ]+([0-9.]+)/) {
$flops = $1;
} elsif ($line =~ /BYTES[ ]+([0-9]+)/) {
$bytes = $1;
diff --git a/perl/likwid-mpirun b/perl/likwid-mpirun
index fb8daf1..b922359 100755
--- a/perl/likwid-mpirun
+++ b/perl/likwid-mpirun
@@ -1,11 +1,39 @@
#!/usr/bin/perl
+# =======================================================================================
+#
+# Filename: likwid-mpirun
+#
+# Description: Wrapper application to mpi startup mechanisms. Builds on
+# likwid to control affinity and has integrated perfctr support.
+#
+# Version: <VERSION>
+# Released: <DATE>
+#
+# Author: Jan Treibig (jt), jan.treibig at gmail.com
+# Project: likwid
+#
+# Copyright (C) 2014 Jan Treibig
+#
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation, either version 3 of the License, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =======================================================================================
use Getopt::Long;
##############################
# CONFIGURATION #
-##############################
-my $LIKWIDPIN = 'likwid-pin';
-my $LIKWIDPERF = 'likwid-perfctr';
+##############################
+my $LIKWIDPIN = '<PREFIX>/bin/likwid-pin';
+my $LIKWIDPERF = '<PREFIX>/bin/likwid-perfctr';
my $MPIROOT_openmpi = $ENV{'MPIHOME'};
my $MPIROOT_intelmpi = $ENV{'MPIHOME'};
my $MPIEXEC_openmpi = "$MPIROOT_openmpi/bin/mpiexec";
@@ -425,4 +453,4 @@ if (-e $WrapperScript and not $debug) {
unlink ($Hostfilename);
}
-# vim: foldmethod=marker foldmarker=#<#,#>#
+# vim: foldmethod=marker foldmarker=#<#,#>#
diff --git a/perl/likwid-setFrequencies b/perl/likwid-setFrequencies
index 8cc2a97..5834441 100755
--- a/perl/likwid-setFrequencies
+++ b/perl/likwid-setFrequencies
@@ -1,4 +1,31 @@
#!/usr/bin/perl
+# =======================================================================================
+#
+# Filename: likwid-setFrequencies
+#
+# Description: Application allowing to change core frequencies
+#
+# Version: <VERSION>
+# Released: <DATE>
+#
+# Author: Jan Treibig (jt), jan.treibig at gmail.com
+# Project: likwid
+#
+# Copyright (C) 2014 Jan Treibig
+#
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation, either version 3 of the License, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =======================================================================================
use Getopt::Std;
@@ -30,7 +57,7 @@ sub usage
This script allows to switch governors and set fixed
frequencies on Linux system.
-usage: $0 [-hlp] [-g governor] [-d domain] [-f frequency]
+usage: $0 [-hlp] [-g governor] [-c domain] [-f frequency]
-h : this (help) message
-p : print current frequencies
-l : list available frequencies
@@ -145,10 +172,14 @@ if ($opt{g} eq 'turbo') {
if ($opt{g}) {
$governor = $opt{g};
- print "Set governor in domain $domain to $governor \n";
- foreach my $processID (@processors) {
-# print "$SYSCMD $processID 0 $governor\n";
- system("$SYSCMD $processID 0 $governor");
+ if (($governor ne "ondemand") and ($governor ne "performance")) {
+ print "Governor $governor not valid\n";
+ } else {
+ print "Set governor in domain $domain to $governor \n";
+ foreach my $processID (@processors) {
+ system("$SYSCMD $processID 0 $governor");
+ }
}
}
+# vim: foldmethod=marker foldmarker=#<#,#>#
diff --git a/perl/set_license.pl b/perl/set_license.pl
index 9ce5fda..f80326d 100755
--- a/perl/set_license.pl
+++ b/perl/set_license.pl
@@ -11,8 +11,8 @@ my $fc = '!';
#my $VERSION = '<VERSION>';
#my $DATE = '<DATE>';
-my $VERSION = '3.1.2';
-my $DATE = '2.6.2014';
+my $VERSION = '3.1.3';
+my $DATE = '4.11.2014';
my $YEAR = '2014';
my $AUTHOR = 'Jan Treibig';
my $LICENSE = 'gpl';
diff --git a/perl/templates/group.tt b/perl/templates/group.tt
index 43ae7c3..2122caf 100644
--- a/perl/templates/group.tt
+++ b/perl/templates/group.tt
@@ -2,14 +2,65 @@
#define NUM_GROUPS_[% arch FILTER upper %] [% numGroups %]
+[% FOREACH group IN groups %]
+static const char* group_names_[% arch FILTER ucfirst %]_[% group.name %] [] = {[% FOREACH metric IN group.metrics %] "[% metric.label %]", [% END %] NULL};
+[% END %]
+
static PerfmonGroupMap [% arch %]_group_map[NUM_GROUPS_[% arch FILTER upper %]] = {
[% FOREACH group IN groups %]
- {"[% group.name %]",[% group.name %],[% group.isUncore %],"[% group.shortHelp %]","[% group.eventSet %]"},
+ {"[% group.name %]",[% group.name %],[% group.isUncore %],"[% group.shortHelp %]","[% group.eventSet %]", 0 [% FOREACH metric IN group.metrics %] +1 [% END %], group_names_[% arch FILTER ucfirst %]_[% group.name %]
+ },
[% END %]
};
+void perfmon_getDerivedCounterValues[% arch FILTER ucfirst %](PerfmonGroup group, float * values, float * out_max, float * out_min){
+ double time = rdtscTime;
+ double inverseClock = 1.0 /(double) timer_getCpuClock();
+
+ values[0] = time;
+ out_min[0] = time;
+ out_max[0] = time;
+
+ switch ( group ) {
+ [% FOREACH group IN groups %]
+ case [% group.name %]:{
+ int threadId;
+ int counter = 0;
+ double sum,min,max;
+
+ [% FOREACH metric IN group.metrics %]
+ sum = 0;
+ min = 1e300;
+ max = 0;
+
+ for(threadId=0; threadId < perfmon_numThreads; threadId++)
+ {
+ double cur = [% metric.rule %];
+ cur = isnan(cur) ? 0.0 : cur;
+ sum += cur;
+ max = max > cur ? max : cur;
+ min = min < cur ? min : cur;
+ }
+
+ values[counter] = (float) sum / perfmon_numThreads;
+ out_min[counter] = (float) min;
+ out_max[counter] = (float) max;
+ counter++;
+ [% END %]
+ return;
+ }
+ [% END %]
+
+ default:
+ fprintf (stderr, "perfmon_getDerivedCounterValues[% arch %]: Unknown group! Exiting!\n" );
+ exit (EXIT_FAILURE);
+ break;
+ }
+}
+
+
void
-perfmon_printDerivedMetrics[% arch FILTER ucfirst %](PerfmonGroup group)
+perfmon_printDerivedMetrics[% arch FILTER ucfirst %](PerfmonGroup groupId)
{
int threadId;
double time = rdtscTime;
@@ -25,7 +76,7 @@ perfmon_printDerivedMetrics[% arch FILTER ucfirst %](PerfmonGroup group)
uint64_t cpi_cyc = 0;
int cpi_index = 0;
- switch ( group )
+ switch ( groupId )
{
[% FOREACH group IN groups %]
case [% group.name %]:
diff --git a/src/access-daemon/Makefile b/src/access-daemon/Makefile
index 1f2b3d9..afd751b 100644
--- a/src/access-daemon/Makefile
+++ b/src/access-daemon/Makefile
@@ -4,8 +4,8 @@
#
# Description: accessDaemon Makefile
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
@@ -38,12 +38,13 @@ ifeq ($(COMPILER),GCC)
CFLAGS += -pedantic -Wall -Wextra -std=c99
endif
CPPFLAGS := $(DEFINES) $(INCLUDES)
+Q=
all: $(DAEMON_TARGET) $(SETFREQ_TARGET)
$(DAEMON_TARGET): accessDaemon.c
- $(CC) $(CFLAGS) $(CPPFLAGS) -o ../../$(DAEMON_TARGET) accessDaemon.c
+ $(CC) $(ANSI_CFLAGS) $(CFLAGS) $(CPPFLAGS) -o ../../$(DAEMON_TARGET) accessDaemon.c
$(SETFREQ_TARGET): setFreq.c
- $(CC) $(CFLAGS) $(CPPFLAGS) -o ../../$(SETFREQ_TARGET) setFreq.c
+ $(CC) $(ANSI_CFLAGS) $(CFLAGS) $(CPPFLAGS) -o ../../$(SETFREQ_TARGET) setFreq.c
diff --git a/src/access-daemon/accessDaemon.c b/src/access-daemon/accessDaemon.c
index a1903ab..5679a92 100644
--- a/src/access-daemon/accessDaemon.c
+++ b/src/access-daemon/accessDaemon.c
@@ -5,8 +5,8 @@
*
* Description: Implementation of access daemon.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Authors: Michael Meier, michael.meier at rrze.fau.de
* Jan Treibig (jt), jan.treibig at gmail.com
@@ -55,34 +55,88 @@
#define str(x) #x
#define CHECK_ERROR(func, msg) \
- if ((func) < 0) { syslog(LOG_ERR, "ERROR - [%s:%d] " str(msg) " - %s \n", __FILE__, __LINE__, strerror(errno)); }
+ if ((func) < 0) { \
+ syslog(LOG_ERR, "ERROR - [%s:%d] " str(msg) " - %s \n", __FILE__, __LINE__, strerror(errno)); \
+ }
#define CHECK_FILE_ERROR(func, msg) \
- if ((func) == 0) { syslog(LOG_ERR, "ERROR - [%s:%d] " str(msg) " - %s \n", __FILE__, __LINE__, strerror(errno)); }
+ if ((func) == 0) { \
+ syslog(LOG_ERR, "ERROR - [%s:%d] " str(msg) " - %s \n", __FILE__, __LINE__, strerror(errno)); \
+ }
#define EXIT_IF_ERROR(func, msg) \
- if ((func) < 0) { syslog(LOG_ERR, "ERROR - [%s:%d] " str(msg) " - %s \n", __FILE__, __LINE__, strerror(errno)); stop_daemon(); exit(EXIT_FAILURE); }
+ if ((func) < 0) { \
+ syslog(LOG_ERR, "ERROR - [%s:%d] " str(msg) " - %s \n", __FILE__, __LINE__, strerror(errno)); \
+ stop_daemon(); \
+ exit(EXIT_FAILURE); \
+ }
-#define CPUID \
- __asm__ volatile ("cpuid" \
- : "=a" (eax), \
- "=b" (ebx) \
+#define CPUID \
+ __asm__ volatile ("cpuid" \
+ : "=a" (eax), "=b" (ebx) \
: "0" (eax))
-#define P6_FAMILY 0x6U
-#define K8_FAMILY 0xFU
-#define K10_FAMILY 0x10U
-#define K15_FAMILY 0x15U
-#define K16_FAMILY 0x16U
-
+/* Intel P6 */
+#define PENTIUM_M_BANIAS 0x09U
+#define PENTIUM_M_DOTHAN 0x0DU
+#define CORE_DUO 0x0EU
+#define CORE2_65 0x0FU
+#define CORE2_45 0x17U
+#define ATOM 0x1CU
+#define ATOM_45 0x26U
+#define ATOM_32 0x36U
+#define ATOM_22 0x27U
+#define ATOM_SILVERMONT 0x4DU
+#define NEHALEM 0x1AU
+#define NEHALEM_BLOOMFIELD 0x1AU
+#define NEHALEM_LYNNFIELD 0x1EU
+#define NEHALEM_LYNNFIELD_M 0x1FU
+#define NEHALEM_WESTMERE 0x2CU
+#define NEHALEM_WESTMERE_M 0x25U
#define SANDYBRIDGE 0x2AU
#define SANDYBRIDGE_EP 0x2DU
+#define HASWELL 0x3CU
+#define HASWELL_EX 0x3FU
+#define HASWELL_M1 0x45U
+#define HASWELL_M2 0x46U
#define IVYBRIDGE 0x3AU
#define IVYBRIDGE_EP 0x3EU
-#define HASWELL 0x3CU
+#define NEHALEM_EX 0x2EU
+#define WESTMERE_EX 0x2FU
+#define XEON_MP 0x1DU
+
+/* Intel MIC */
+#define XEON_PHI 0x01U
+
+/* AMD K10 */
+#define BARCELONA 0x02U
+#define SHANGHAI 0x04U
+#define ISTANBUL 0x08U
+#define MAGNYCOURS 0x09U
+
+/* AMD K8 */
+#define OPTERON_SC_1MB 0x05U
+#define OPTERON_DC_E 0x21U
+#define OPTERON_DC_F 0x41U
+#define ATHLON64_X2 0x43U
+#define ATHLON64_X2_F 0x4BU
+#define ATHLON64_F1 0x4FU
+#define ATHLON64_F2 0x5FU
+#define ATHLON64_X2_G 0x6BU
+#define ATHLON64_G1 0x6FU
+#define ATHLON64_G2 0x7FU
+
+
+#define P6_FAMILY 0x6U
+#define MIC_FAMILY 0xBU
+#define NETBURST_FAMILY 0xFFU
+#define K15_FAMILY 0x15U
+#define K16_FAMILY 0x16U
+#define K10_FAMILY 0x10U
+#define K8_FAMILY 0xFU
#define PCI_ROOT_PATH "/proc/bus/pci/"
#define MAX_PATH_LENGTH 60
@@ -159,6 +213,44 @@ static int allowed_intel(uint32_t reg)
}
}
+static int allowed_silvermont(uint32_t reg)
+{
+ if ( ((reg & 0x0F8U) == 0x0C0U) ||
+ ((reg & 0xFF0U) == 0x180U) ||
+ ((reg & 0xF00U) == 0x300U) ||
+ ((reg & 0xF00U) == 0x600U) ||
+ ((reg & 0xF00U) == 0xC00U) ||
+ ((reg & 0xF00U) == 0xD00U) ||
+ (reg == 0x1A0) ||
+ (reg == 0x0CE) ||
+ (reg == 0x1AD) ||
+ (reg == 0x19C) ||
+ (reg == 0x1A2) ||
+ (reg == 0x1A6) ||
+ (reg == 0x1A6) ||
+ (reg == 0x1A7))
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+static int allowed_westmereEX(uint32_t reg)
+{
+ if (allowed_intel(reg) == 1)
+ {
+ return 1;
+ }
+ else if ((reg & 0xF00) == 0xF00)
+ {
+ return 1;
+ }
+ return 0;
+}
+
static int allowed_sandybridge(uint32_t reg)
{
if ( ((reg & 0x0F8U) == 0x0C0U) ||
@@ -182,6 +274,30 @@ static int allowed_sandybridge(uint32_t reg)
}
}
+static int allowed_haswell(uint32_t reg)
+{
+ if ( ((reg & 0x0F8U) == 0x0C0U) ||
+ ((reg & 0xFF0U) == 0x180U) ||
+ ((reg & 0xF00U) == 0x300U) ||
+ ((reg & 0xF00U) == 0xC00U) ||
+ ((reg & 0xF00U) == 0xD00U) ||
+ ((reg & 0xF00U) == 0xE00U) ||
+ ((reg & 0xF00U) == 0x600U) ||
+ ((reg & 0xF00U) == 0x700U) ||
+ (reg == 0x1A0) ||
+ (reg == 0x0CE) ||
+ (reg == 0x19C) ||
+ (reg == 0x1A2) ||
+ (reg == 0x1AD) ||
+ (reg == 0x1A6))
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+}
static int allowed_amd(uint32_t reg)
{
@@ -231,6 +347,11 @@ static void msr_read(AccessDataRecord * dRecord)
dRecord->errorcode = ERR_NOERROR;
dRecord->data = 0;
+ if (FD_MSR[cpu] == -2)
+ {
+ dRecord->errorcode = ERR_NODEV;
+ return;
+ }
if (!allowed(reg))
{
syslog(LOG_ERR, "attempt to read from restricted register 0x%x", reg);
@@ -256,6 +377,12 @@ static void msr_write(AccessDataRecord * dRecord)
dRecord->errorcode = ERR_NOERROR;
+ if (FD_MSR[cpu] == -2)
+ {
+ dRecord->errorcode = ERR_NODEV;
+ return;
+ }
+
if (!allowed(reg))
{
syslog(LOG_ERR, "attempt to write to restricted register %x", reg);
@@ -282,10 +409,10 @@ static void pci_read(AccessDataRecord* dRecord)
dRecord->data = 0;
if (FD_PCI[socketId][device] == -2)
- {
- dRecord->errorcode = ERR_NODEV;
- return;
- }
+ {
+ dRecord->errorcode = ERR_NODEV;
+ return;
+ }
else if ( !FD_PCI[socketId][device] )
{
strncpy(pci_filepath, PCI_ROOT_PATH, 30);
@@ -302,7 +429,7 @@ static void pci_read(AccessDataRecord* dRecord)
}
}
- if ( pread(FD_PCI[socketId][device], &data, sizeof(data), reg) != sizeof(data))
+ if ( pread(FD_PCI[socketId][device], &data, sizeof(data), reg) != sizeof(data))
{
syslog(LOG_ERR, "Failed to read data from pci device file on socket %u device %u",
socketId, device);
@@ -323,11 +450,11 @@ static void pci_write(AccessDataRecord* dRecord)
uint32_t data = (uint32_t) dRecord->data;
dRecord->errorcode = ERR_NOERROR;
- if (FD_PCI[socketId][device] == -2)
- {
- dRecord->errorcode = ERR_NODEV;
- return;
- }
+ if (FD_PCI[socketId][device] == -2)
+ {
+ dRecord->errorcode = ERR_NODEV;
+ return;
+ }
else if ( !FD_PCI[socketId][device] )
{
strncpy(pci_filepath, PCI_ROOT_PATH, 30);
@@ -344,7 +471,7 @@ static void pci_write(AccessDataRecord* dRecord)
}
}
- if (pwrite(FD_PCI[socketId][device], &data, sizeof data, reg) != sizeof data)
+ if (pwrite(FD_PCI[socketId][device], &data, sizeof data, reg) != sizeof data)
{
syslog(LOG_ERR, "Failed to write data to pci device file on socket %u", socketId);
dRecord->errorcode = ERR_RWFAIL;
@@ -432,7 +559,7 @@ static void daemonize(int* parentPid)
/* Change the current working directory. This prevents the current
directory from being locked; hence not being able to remove it. */
- if ((chdir("/")) < 0)
+ if ((chdir("/")) < 0)
{
syslog(LOG_ERR, "chdir failed: %s", strerror(errno));
exit(EXIT_FAILURE);
@@ -458,6 +585,7 @@ int main(void)
mode_t oldumask;
uint32_t numHWThreads = sysconf(_SC_NPROCESSORS_CONF);
uint32_t model;
+ int isIntel = 1;
if (!lock_check())
{
@@ -465,61 +593,77 @@ int main(void)
exit(EXIT_FAILURE);
}
+ for ( uint32_t i=0; i < numHWThreads; i++ )
{
- uint32_t eax = 0x00;
- uint32_t ebx = 0x00;
- int isIntel = 1;
- CPUID;
- if (ebx == 0x68747541U)
- {
- isIntel = 0;
- }
+ FD_MSR[i] = -1;
+ }
- eax = 0x01;
- CPUID;
- uint32_t family = ((eax >> 8) & 0xFU) + ((eax >> 20) & 0xFFU);
- model = (((eax >> 16) & 0xFU) << 4) + ((eax >> 4) & 0xFU);
+ uint32_t eax = 0x00;
+ uint32_t ebx = 0x00;
+
+ CPUID;
+ if (ebx == 0x68747541U)
+ {
+ isIntel = 0;
+ }
- switch (family)
- {
- case P6_FAMILY:
- allowed = allowed_intel;
+ eax = 0x01;
+ CPUID;
+ uint32_t family = ((eax >> 8) & 0xFU) + ((eax >> 20) & 0xFFU);
+ model = (((eax >> 16) & 0xFU) << 4) + ((eax >> 4) & 0xFU);
- if ((model == SANDYBRIDGE) ||
- (model == SANDYBRIDGE_EP) ||
- (model == IVYBRIDGE) ||
- (model == IVYBRIDGE_EP) )
- {
- allowed = allowed_sandybridge;
- isPCIUncore = 1;
- }
- else if (model == HASWELL)
- {
- allowed = allowed_sandybridge;
- }
- break;
- case K8_FAMILY:
- if (isIntel)
- {
- fprintf(stderr,
- "ERROR - [%s:%d] - Netburst architecture is not supported! Exiting! \n",
- __FILE__,__LINE__);
- exit(EXIT_FAILURE);
- }
- case K10_FAMILY:
+ switch (family)
+ {
+ case P6_FAMILY:
+ allowed = allowed_intel;
+
+ if (isIntel && ((model == SANDYBRIDGE) ||
+ (model == SANDYBRIDGE_EP) ||
+ (model == IVYBRIDGE) ||
+ (model == IVYBRIDGE_EP) ))
+ {
+ allowed = allowed_sandybridge;
+ isPCIUncore = 1;
+ }
+ else if (isIntel && ((model == HASWELL) ||
+ (model == HASWELL_M1) ||
+ (model == HASWELL_M2) ||
+ (model == HASWELL_EX)))
+ {
+ allowed = allowed_haswell;
+ }
+ else if (isIntel && (model == ATOM_SILVERMONT))
+ {
+ allowed = allowed_silvermont;
+ }
+ else if (isIntel && (model == WESTMERE_EX))
+ {
+ allowed = allowed_westmereEX;
+ }
+ break;
+ case K8_FAMILY:
+ case K10_FAMILY:
+ if (!isIntel)
+ {
allowed = allowed_amd;
- break;
- case K15_FAMILY:
+ }
+ break;
+ case K15_FAMILY:
+ if (!isIntel)
+ {
allowed = allowed_amd15;
- break;
- case K16_FAMILY:
+ }
+ break;
+ case K16_FAMILY:
+ if (!isIntel)
+ {
allowed = allowed_amd16;
- break;
- default:
- fprintf(stderr, "ERROR - [%s:%d] - Unsupported processor. Exiting! \n",
- __FILE__, __LINE__);
- exit(EXIT_FAILURE);
- }
+ }
+ break;
+ default:
+ fprintf(stderr, "ERROR - [%s:%d] - Unsupported processor. Exiting!\n",
+ __FILE__, __LINE__);
+ exit(EXIT_FAILURE);
}
openlog(ident, 0, LOG_USER);
@@ -593,12 +737,21 @@ int main(void)
* NOTICE: This assumes consecutive processor Ids! */
for ( uint32_t i=0; i < numHWThreads; i++ )
{
+#ifdef __MIC
+ sprintf(msr_file_name,"/dev/msr%d",i);
+ if (access(msr_file_name, F_OK))
+ {
+ sprintf(msr_file_name,"/dev/cpu/%d/msr",i);
+ }
+#else
sprintf(msr_file_name,"/dev/cpu/%d/msr",i);
+#endif
FD_MSR[i] = open(msr_file_name, O_RDWR);
if ( FD_MSR[i] < 0 )
{
syslog(LOG_ERR, "Failed to open device file %s.",msr_file_name);
+ FD_MSR[i] = -2;
}
}
@@ -608,7 +761,7 @@ int main(void)
{
for (int j=0; j<MAX_NUM_NODES; j++)
{
- socket_bus[j] = "N-A";
+ socket_bus[j] = "N-A";
for (int i=0; i<MAX_NUM_DEVICES; i++)
{
FD_PCI[j][i] = -2;
@@ -661,25 +814,25 @@ int main(void)
}
else
{
- socket_count = cntr;
-
- for (int j=0; j<socket_count; j++)
- {
- for (int i=0; i<MAX_NUM_DEVICES; i++)
- {
- sprintf(pci_filepath, "%s%s%s",PCI_ROOT_PATH,socket_bus[j],pci_DevicePath[i]);
-
- if (!access(pci_filepath,F_OK))
- {
- FD_PCI[j][i] = 0;
- }
- else
- {
- syslog(LOG_NOTICE, "Device %s not found, excluded it from device list\n",pci_filepath);
- }
- }
- }
- }
+ socket_count = cntr;
+
+ for (int j=0; j<socket_count; j++)
+ {
+ for (int i=0; i<MAX_NUM_DEVICES; i++)
+ {
+ sprintf(pci_filepath, "%s%s%s",PCI_ROOT_PATH,socket_bus[j],pci_DevicePath[i]);
+
+ if (!access(pci_filepath,F_OK))
+ {
+ FD_PCI[j][i] = 0;
+ }
+ else
+ {
+ syslog(LOG_NOTICE, "Device %s not found, excluded it from device list\n",pci_filepath);
+ }
+ }
+ }
+ }
}
}
diff --git a/src/access-daemon/setFreq.c b/src/access-daemon/setFreq.c
index e23335c..967dbbf 100644
--- a/src/access-daemon/setFreq.c
+++ b/src/access-daemon/setFreq.c
@@ -1,36 +1,101 @@
+/*
+ * =======================================================================================
+ *
+ * Filename: setFreq.c
+ *
+ * Description: Wrapper for accessing setfreq kernel FS files
+ *
+ * Version: 3.1.3
+ * Released: 4.11.2014
+ *
+ * Authors: Michael Meier, michael.meier at rrze.fau.de
+ * Jan Treibig (jt), jan.treibig at gmail.com
+ * Project: likwid
+ *
+ * Copyright (C) 2014 Jan Treibig
+ *
+ * This program is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 3 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * =======================================================================================
+ */
+
#include <stdlib.h>
#include <stdio.h>
-
#include <string.h>
+static int get_numCPUs()
+{
+ int cpucount = 0;
+ char line[1024];
+ FILE* fp = fopen("/proc/cpuinfo","r");
+ if (fp != NULL)
+ {
+ while( fgets(line,1024,fp) )
+ {
+ if (strncmp(line, "processor", 9) == 0)
+ {
+ cpucount++;
+ }
+ }
+ }
+ return cpucount;
+}
+
int main (int argn, char** argv)
{
int cpuid;
int freq;
+ int numCPUs = 0;
char* gov;
char* gpath = malloc(100);
char* fpath = malloc(100);
+ FILE* f;
- if (argn < 3)
+ if (argn < 3 || argn > 4)
{
fprintf(stderr, "Usage: %s <processorID> <frequency> [<governor>] \n",argv[0]);
+ exit(EXIT_FAILURE);
}
cpuid = atoi(argv[1]);
+ numCPUs = get_numCPUs();
+ if (cpuid < 0 || cpuid > numCPUs)
+ {
+ fprintf(stderr, "CPU %d not a valid CPU ID. Range from 0 to %d.\n",cpuid,numCPUs);
+ exit(EXIT_FAILURE);
+ }
freq = atoi(argv[2]);
+ if (freq < 0)
+ {
+ fprintf(stderr, "Frequency must be greater than 0.\n");
+ exit(EXIT_FAILURE);
+ }
+ snprintf(gpath, 60, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", cpuid);
+ snprintf(fpath, 60, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_setspeed", cpuid);
if (argn == 4)
{
gov = argv[3];
- if ((strncmp(gov,"ondemand",12)) && (strncmp(gov,"performance",12))) {
+ if ((strncmp(gov,"ondemand",12)) && (strncmp(gov,"performance",12)))
+ {
fprintf(stderr, "Invalid governor %s!\n",gov);
return (EXIT_FAILURE);
}
- snprintf(gpath, 60, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", cpuid);
- FILE* f = fopen(gpath, "w");
- if (f == NULL) {
+ f = fopen(gpath, "w");
+ if (f == NULL)
+ {
fprintf(stderr, "Unable to open path for writing\n");
return (EXIT_FAILURE);
}
@@ -38,20 +103,21 @@ int main (int argn, char** argv)
fclose(f);
return(EXIT_SUCCESS);
}
-
- snprintf(gpath, 60, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", cpuid);
- snprintf(fpath, 60, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_setspeed", cpuid);
-
- FILE* f = fopen(gpath, "w");
- if (f == NULL) {
- fprintf(stderr, "Unable to open path for writing\n");
- return (EXIT_FAILURE);
+ else
+ {
+ f = fopen(gpath, "w");
+ if (f == NULL)
+ {
+ fprintf(stderr, "Unable to open path for writing\n");
+ return (EXIT_FAILURE);
+ }
+ fprintf(f,"userspace");
+ fclose(f);
}
- fprintf(f,"userspace");
- fclose(f);
f = fopen(fpath, "w");
- if (f == NULL) {
+ if (f == NULL)
+ {
fprintf(stderr, "Unable to open path for writing\n");
return (EXIT_FAILURE);
}
diff --git a/src/access-daemon/setFreq.c.tmp b/src/access-daemon/setFreq.c.tmp
deleted file mode 100644
index e69de29..0000000
diff --git a/src/accessClient.c b/src/accessClient.c
index 4c1cd20..ba4cb59 100644
--- a/src/accessClient.c
+++ b/src/accessClient.c
@@ -5,11 +5,11 @@
*
* Description: Implementation of client to the access daemon.
* Provides API to read and write values to MSR or
- * PCI Cfg Adresses. This module is used by the
+ * PCI Cfg Adresses. This module is used by the
* msr and pci modules.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -72,7 +72,7 @@ static char* accessClient_strerror(AccessErrorType det)
case ERR_DAEMONBUSY: return "daemon already has a same/higher priority client";
case ERR_LOCKED: return "access to HPM is locked";
case ERR_UNSUPPORTED: return "unsupported processor";
- case ERR_NODEV: return "no such device";
+ case ERR_NODEV: return "no such device";
default: return "UNKNOWN errorcode";
}
}
@@ -93,25 +93,25 @@ static int startDaemon(void)
if (accessClient_mode == DAEMON_AM_ACCESS_D)
{
- if (access(exeprog, F_OK))
- {
- fprintf(stderr, "Daemon '%s' cannot be found\n", exeprog);
- exit(EXIT_FAILURE);
- }
- if (access(exeprog, X_OK))
- {
- fprintf(stderr, "Daemon '%s' not executable\n", exeprog);
- exit(EXIT_FAILURE);
- }
+ if (access(exeprog, F_OK))
+ {
+ fprintf(stderr, "Daemon '%s' cannot be found\n", exeprog);
+ exit(EXIT_FAILURE);
+ }
+ if (access(exeprog, X_OK))
+ {
+ fprintf(stderr, "Daemon '%s' not executable\n", exeprog);
+ exit(EXIT_FAILURE);
+ }
pid = fork();
if (pid == 0)
- {
- ret = execve (exeprog, newargv, newenv);
- ERRNO_PRINT;
- fprintf(stderr, "Failed to execute the daemon '%s' (see error above)\n", exeprog);
- exit(EXIT_FAILURE);
- }
+ {
+ ret = execve (exeprog, newargv, newenv);
+ ERRNO_PRINT;
+ fprintf(stderr, "Failed to execute the daemon '%s' (see error above)\n", exeprog);
+ exit(EXIT_FAILURE);
+ }
else if (pid < 0)
{
ERROR_PLAIN_PRINT(Failed to fork);
@@ -215,9 +215,9 @@ uint64_t accessClient_read(
if (data.errorcode != ERR_NOERROR)
{
fprintf(stderr, "Failed to read data through daemon: "
- "daemon returned error %d '%s' for cpu %d reg %x\n",
+ "daemon returned error %d '%s' for cpu %d reg 0x%x\n",
data.errorcode, accessClient_strerror(data.errorcode), cpu, reg);
- exit(EXIT_FAILURE);
+ //exit(EXIT_FAILURE);
}
return data.data;
@@ -245,7 +245,7 @@ void accessClient_write(
fprintf(stderr, "Failed to write data through daemon: "
"daemon returned error %d '%s' for cpu %d reg 0x%x\n",
data.errorcode, accessClient_strerror(data.errorcode), cpu, reg);
- exit(EXIT_FAILURE);
+ //exit(EXIT_FAILURE);
}
if (data.data != 0x00ULL)
diff --git a/src/affinity.c b/src/affinity.c
index 3b5f508..59b05da 100644
--- a/src/affinity.c
+++ b/src/affinity.c
@@ -5,8 +5,8 @@
*
* Description: Implementation of affinity module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -33,6 +33,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
+#include <math.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <sys/time.h>
@@ -88,52 +89,50 @@ treeFillNextEntries(
int offset,
int numberOfEntries )
{
- int counter = numberOfEntries;
- TreeNode* node = tree;
- TreeNode* thread;
+ int counter = numberOfEntries;
+ TreeNode* node = tree;
+ TreeNode* thread;
+ node = tree_getChildNode(node);
- node = tree_getChildNode(node);
-
- /* get socket node */
- for (int i=0; i<socketId; i++)
- {
- node = tree_getNextNode(node);
-
- if ( node == NULL )
+ /* get socket node */
+ for (int i=0; i<socketId; i++)
{
- printf("ERROR: Socket %d not existing!",i);
- exit(EXIT_FAILURE);
- }
- }
+ node = tree_getNextNode(node);
- node = tree_getChildNode(node);
- /* skip offset cores */
- for (int i=0; i<offset; i++)
- {
- node = tree_getNextNode(node);
+ if ( node == NULL )
+ {
+ printf("ERROR: Socket %d not existing!",i);
+ exit(EXIT_FAILURE);
+ }
+ }
- if ( node == NULL )
+ node = tree_getChildNode(node);
+ /* skip offset cores */
+ for (int i=0; i<offset; i++)
{
- printf("ERROR: Core %d not existing!",i);
- exit(EXIT_FAILURE);
- }
- }
+ node = tree_getNextNode(node);
- /* Traverse horizontal */
- while ( node != NULL )
- {
- if ( !counter ) break;
+ if ( node == NULL )
+ {
+ printf("ERROR: Core %d on socket %d not existing!",i,socketId);
+ exit(EXIT_FAILURE);
+ }
+ }
+ /* Traverse horizontal */
+ while ( node != NULL )
+ {
+ if ( !counter ) break;
- thread = tree_getChildNode(node);
+ thread = tree_getChildNode(node);
- while ( thread != NULL )
- {
- processorIds[numberOfEntries-counter] = thread->id;
- thread = tree_getNextNode(thread);
- counter--;
+ while ( thread != NULL )
+ {
+ processorIds[numberOfEntries-counter] = thread->id;
+ thread = tree_getNextNode(thread);
+ counter--;
+ }
+ node = tree_getNextNode(node);
}
- node = tree_getNextNode(node);
- }
}
/* ##### FUNCTION DEFINITIONS - EXPORTED FUNCTIONS ################## */
@@ -166,9 +165,20 @@ affinity_init()
(cpuid_topology.numCoresPerSocket/numberOfCoresPerCache);
/* determine total number of domains */
- numberOfDomains += numberOfSocketDomains + numberOfCacheDomains + numberOfNumaDomains;
-
+ if ( numberOfNumaDomains > 1 )
+ {
+ numberOfDomains += numberOfSocketDomains + numberOfCacheDomains + numberOfNumaDomains;
+ }
+ else
+ {
+ numberOfDomains += numberOfSocketDomains + numberOfCacheDomains;
+ }
domains = (AffinityDomain*) malloc(numberOfDomains * sizeof(AffinityDomain));
+ if (!domains)
+ {
+ fprintf(stderr, "Cannot allocate affinity domain memory\n");
+ return;
+ }
/* Node domain */
domains[0].numberOfProcessors = cpuid_topology.numHWThreads;
@@ -228,37 +238,40 @@ affinity_init()
}
}
- /* Memory domains */
- currentDomain += numberOfCacheDomains;
- subCounter = 0;
-
- for (int i=0; i < numberOfSocketDomains; i++ )
+ if ( numberOfNumaDomains > 1 )
{
- offset = 0;
-
- for ( int j=0; j < (numberOfNumaDomains/numberOfSocketDomains); j++ )
- {
- domains[currentDomain + subCounter].numberOfProcessors = numberOfProcessorsPerCache;
- domains[currentDomain + subCounter].numberOfCores = numberOfCoresPerCache;
- domains[currentDomain + subCounter].processorList = (int*) malloc(numa_info.nodes[subCounter].numberOfProcessors*sizeof(int));
- domains[currentDomain + subCounter].tag = bformat("M%d", subCounter);
-
- treeFillNextEntries(
- cpuid_topology.topologyTree,
- domains[currentDomain + subCounter].processorList,
- i, offset, domains[currentDomain + subCounter].numberOfProcessors);
+ /* Memory domains */
+ currentDomain += numberOfCacheDomains;
+ subCounter = 0;
- offset += numberOfCoresPerCache;
- subCounter++;
- }
- }
+ for (int i=0; i < numberOfSocketDomains; i++ )
+ {
+ offset = 0;
+ for ( int j=0; j < (int)ceil((double)numberOfNumaDomains/numberOfSocketDomains); j++ )
+ {
+ domains[currentDomain + subCounter].numberOfProcessors = numa_info.nodes[subCounter].numberOfProcessors;
+ domains[currentDomain + subCounter].numberOfCores = numberOfCoresPerCache;
+ domains[currentDomain + subCounter].processorList = (int*) malloc(numa_info.nodes[subCounter].numberOfProcessors*sizeof(int));
+ domains[currentDomain + subCounter].tag = bformat("M%d", subCounter);
+
+ treeFillNextEntries(
+ cpuid_topology.topologyTree,
+ domains[currentDomain + subCounter].processorList,
+ i, offset, domains[currentDomain + subCounter].numberOfProcessors);
+
+ offset += domains[currentDomain + subCounter].numberOfCores;
+
+ subCounter++;
+ }
+ }
- /* This is redundant ;-). Create thread to node lookup */
- for ( uint32_t i = 0; i < numa_info.numberOfNodes; i++ )
- {
- for ( int j = 0; j < numa_info.nodes[i].numberOfProcessors; j++ )
+ /* This is redundant ;-). Create thread to node lookup */
+ for ( uint32_t i = 0; i < numa_info.numberOfNodes; i++ )
{
- affinity_core2node_lookup[numa_info.nodes[i].processors[j]] = i;
+ for ( int j = 0; j < numa_info.nodes[i].numberOfProcessors; j++ )
+ {
+ affinity_core2node_lookup[numa_info.nodes[i].processors[j]] = i;
+ }
}
}
@@ -308,7 +321,7 @@ affinity_threadGetProcessorId()
void
affinity_pinThread(int processorId)
{
- cpu_set_t cpuset;
+ cpu_set_t cpuset;
pthread_t thread;
thread = pthread_self();
@@ -327,11 +340,11 @@ affinity_pinThread(int processorId)
void
affinity_pinProcess(int processorId)
{
- cpu_set_t cpuset;
+ cpu_set_t cpuset;
- CPU_ZERO(&cpuset);
- CPU_SET(processorId, &cpuset);
- sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
+ CPU_ZERO(&cpuset);
+ CPU_SET(processorId, &cpuset);
+ sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
}
@@ -351,18 +364,22 @@ affinity_getDomain(bstring domain)
}
void
-affinity_printDomains()
+affinity_printDomains(FILE* OUTSTREAM)
{
- for ( int i=0; i < affinity_numberOfDomains; i++ )
+ if (OUTSTREAM)
{
- printf("Domain %d:\n",i);
- printf("\tTag %s:",bdata(domains[i].tag));
-
- for ( uint32_t j=0; j < domains[i].numberOfProcessors; j++ )
+ for ( int i=0; i < affinity_numberOfDomains; i++ )
{
- printf(" %d",domains[i].processorList[j]);
+ fprintf(OUTSTREAM, "Domain %d:\n", i);
+ fprintf(OUTSTREAM, "\tTag %s:", bdata(domains[i].tag));
+
+ for ( uint32_t j=0; j < domains[i].numberOfProcessors; j++ )
+ {
+ fprintf(OUTSTREAM, " %d", domains[i].processorList[j]);
+ }
+ fprintf(OUTSTREAM, "\n");
+ fflush(OUTSTREAM);
}
- printf("\n");
}
}
diff --git a/src/allocator.c b/src/allocator.c
index 811cc1c..83e8164 100644
--- a/src/allocator.c
+++ b/src/allocator.c
@@ -5,8 +5,8 @@
*
* Description: Implementation of allocator module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -76,6 +76,7 @@ allocator_finalize()
void
allocator_allocateVector(
+ FILE* OUTSTREAM,
void** ptr,
int alignment,
uint64_t size,
@@ -90,9 +91,11 @@ allocator_allocateVector(
switch ( type )
{
case SINGLE:
+ case SINGLE_RAND:
bytesize = (size+offset) * sizeof(float);
break;
+ case DOUBLE_RAND:
case DOUBLE:
bytesize = (size+offset) * sizeof(double);
break;
@@ -128,10 +131,13 @@ allocator_allocateVector(
domain = affinity_getDomain(domainString);
affinity_pinProcess(domain->processorList[0]);
- printf("Allocate: Process running on core %d - Vector length %llu Offset %d\n",
+ if (OUTSTREAM)
+ {
+ fprintf(OUTSTREAM, "Allocate: Process running on core %d - Vector length %llu Offset %d\n",
affinity_processGetProcessorId(),
LLU_CAST size,
offset);
+ }
switch ( type )
{
@@ -142,7 +148,7 @@ allocator_allocateVector(
for ( uint64_t i=0; i < size; i++ )
{
- sptr[i] = 0.0;
+ sptr[i] = 1.0;
}
*ptr = (void*) sptr;
@@ -156,11 +162,38 @@ allocator_allocateVector(
for ( uint64_t i=0; i < size; i++ )
{
- dptr[i] = 0.0;
+ dptr[i] = 1.0;
+ }
+ *ptr = (void*) dptr;
+ }
+ break;
+ case SINGLE_RAND:
+ {
+ srand((uint64_t)ptr);
+ float* sptr = (float*) (*ptr);
+ sptr += offset;
+
+ for ( uint64_t i=0; i < size; i++ )
+ {
+ sptr[i] = rand()/((float)RAND_MAX)*2.0-1.0;
+ }
+ *ptr = (void*) sptr;
+ }
+ break;
+ case DOUBLE_RAND:
+ {
+ srand((uint64_t)ptr);
+ double* dptr = (double*) (*ptr);
+ dptr += offset;
+
+ for ( uint64_t i=0; i < size; i++ )
+ {
+ dptr[i] = rand()/((double)RAND_MAX)*2.0-1.0;
}
*ptr = (void*) dptr;
}
break;
+
}
}
diff --git a/src/applications/likwid-bench.c b/src/applications/likwid-bench.c
index 001874f..15f6f0d 100644
--- a/src/applications/likwid-bench.c
+++ b/src/applications/likwid-bench.c
@@ -5,8 +5,8 @@
*
* Description: A flexible and extensible benchmarking toolbox
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -49,31 +49,39 @@
#include <allocator.h>
#include <likwid.h>
+#ifdef PAPI
+#include <papi.h>
+#include <omp.h>
+#endif
extern void* runTest(void* arg);
/* ##### MACROS - LOCAL TO THIS SOURCE FILE ######################### */
#define HELP_MSG \
- printf("Threaded Memory Hierarchy Benchmark -- Version %d.%d \n\n",VERSION,RELEASE); \
-printf("\n"); \
-printf("Supported Options:\n"); \
-printf("-h\t Help message\n"); \
-printf("-a\t list available benchmarks \n"); \
-printf("-p\t list available thread domains\n"); \
-printf("-l <TEST>\t list properties of benchmark \n"); \
-printf("-i <INT>\t number of iterations \n"); \
-printf("-g <INT>\t number of workgroups (mandatory)\n"); \
-printf("-t <TEST>\t type of test \n"); \
-printf("-w\t <thread_domain>:<size>[:<num_threads>[:<chunk size>:<stride>][-<streamId>:<domain_id>[:<offset>]], size in kB, MB or GB (mandatory)\n"); \
-printf("Processors are in compact ordering. Optionally every stream can be placed. Either no stream or all streams must be placed. Multiple streams are separated by commas.\n"); \
-printf("Usage: likwid-bench -t copy -i 1000 -g 1 -w S0:100kB:10:1:2 \n"); \
-printf("\tRun 10 threads on socket 0 using physical cores only (presuming SMT2 system).\n"); \
-printf("Example with data placement: likwid-bench -t copy -i 1000 -g 1 -w S0:100kB:20-0:S1,1:S1 \n"); \
-printf("\tRun 20 threads on socket 0 and place both arrays of the copy test case on socket 1.\n")
+ fprintf(stdout, "Threaded Memory Hierarchy Benchmark -- Version %d.%d \n\n",VERSION,RELEASE); \
+ fprintf(stdout, "\n"); \
+ fprintf(stdout, "Supported Options:\n"); \
+ fprintf(stdout, "-h\t Help message\n"); \
+ fprintf(stdout, "-v\t Version information\n"); \
+ fprintf(stdout, "-q\t Silent without output\n"); \
+ fprintf(stdout, "-a\t list available benchmarks \n"); \
+ fprintf(stdout, "-p\t list available thread domains\n"); \
+ fprintf(stdout, "-l <TEST>\t list properties of benchmark \n"); \
+ fprintf(stdout, "-i <INT>\t number of iterations \n"); \
+ fprintf(stdout, "-g <INT>\t number of workgroups (mandatory)\n"); \
+ fprintf(stdout, "-t <TEST>\t type of test \n"); \
+ fprintf(stdout, "-w\t <thread_domain>:<size>[:<num_threads>[:<chunk size>:<stride>][-<streamId>:<domain_id>[:<offset>]], size in kB, MB or GB (mandatory)\n"); \
+ fprintf(stdout, "Processors are in compact ordering. Optionally every stream can be placed. Either no stream or all streams must be placed. Multiple streams are separated by commas.\n"); \
+ fprintf(stdout, "Usage: likwid-bench -t copy -i 1000 -g 1 -w S0:100kB:10:1:2 \n"); \
+ fprintf(stdout, "\tRun 10 threads on socket 0 using physical cores only (presuming SMT2 system).\n"); \
+ fprintf(stdout, "Example with data placement: likwid-bench -t copy -i 1000 -g 1 -w S0:100kB:20-0:S1,1:S1 \n"); \
+ fprintf(stdout, "\tRun 20 threads on socket 0 and place both arrays of the copy test case on socket 1.\n"); \
+ fflush(stdout);
#define VERSION_MSG \
- printf("likwid-bench %d.%d \n\n",VERSION,RELEASE)
+ fprintf(stdout, "likwid-bench %d.%d \n\n",VERSION,RELEASE); \
+ fflush(stdout);
/* ##### FUNCTION DEFINITIONS - LOCAL TO THIS SOURCE FILE ############ */
@@ -116,6 +124,7 @@ int main(int argc, char** argv)
const TestCase* test = NULL;
Workgroup* currentWorkgroup = NULL;
Workgroup* groups = NULL;
+ FILE* OUTSTREAM = stdout;
if (cpuid_init() == EXIT_FAILURE)
{
@@ -132,45 +141,52 @@ int main(int argc, char** argv)
exit(EXIT_SUCCESS);
}
opterr = 0;
- while ((c = getopt (argc, argv, "g:w:t:i:l:aphv")) != -1) {
+ while ((c = getopt (argc, argv, "g:w:t:i:l:aphvq")) != -1) {
switch (c)
{
case 'h':
HELP_MSG;
affinity_finalize();
- if (groups)
- {
- free(groups);
- }
+ if (groups)
+ {
+ free(groups);
+ }
exit (EXIT_SUCCESS);
case 'v':
VERSION_MSG;
affinity_finalize();
if (groups)
{
- free(groups);
+ free(groups);
}
exit (EXIT_SUCCESS);
case 'a':
- printf(TESTS"\n");
+ if (OUTSTREAM)
+ {
+ fprintf(OUTSTREAM, TESTS"\n");
+ fflush(OUTSTREAM);
+ }
affinity_finalize();
if (groups)
{
- free(groups);
+ free(groups);
}
exit (EXIT_SUCCESS);
+ case 'q':
+ OUTSTREAM = NULL;
+ break;
case 'w':
tmp--;
if (tmp == -1)
{
fprintf (stderr, "More workgroups configured than allocated!\n"
- "Did you forget to set the number of workgroups with -g?\n");
+ "Did you forget to set the number of workgroups with -g?\n");
affinity_finalize();
if (groups)
- {
- free(groups);
- }
+ {
+ free(groups);
+ }
return EXIT_FAILURE;
}
if (!test)
@@ -178,9 +194,9 @@ int main(int argc, char** argv)
fprintf (stderr, "You need to specify a test case first!\n");
affinity_finalize();
if (groups)
- {
- free(groups);
- }
+ {
+ free(groups);
+ }
return EXIT_FAILURE;
}
testcase = bfromcstr(optarg);
@@ -195,13 +211,14 @@ int main(int argc, char** argv)
fprintf (stderr, "Stream %d: offset is not a multiple of stride!\n",i);
affinity_finalize();
if (groups)
- {
- free(groups);
- }
+ {
+ free(groups);
+ }
return EXIT_FAILURE;
}
- allocator_allocateVector(&(currentWorkgroup->streams[i].ptr),
+ allocator_allocateVector(OUTSTREAM,
+ &(currentWorkgroup->streams[i].ptr),
PAGE_ALIGNMENT,
currentWorkgroup->size,
currentWorkgroup->streams[i].offset,
@@ -212,6 +229,11 @@ int main(int argc, char** argv)
break;
case 'i':
iter = atoi(optarg);
+ if (iter <= 0)
+ {
+ fprintf(stderr, "Iterations must be greater than 0.\n");
+ exit(EXIT_FAILURE);
+ }
break;
case 'l':
testcase = bfromcstr(optarg);
@@ -227,38 +249,46 @@ int main(int argc, char** argv)
if (biseqcstr(testcase,"none") || !test)
{
fprintf (stderr, "Unknown test case %s\n",optarg);
- printf("Available test cases:\n");
- printf(TESTS"\n");
+ if (OUTSTREAM)
+ {
+ fprintf(OUTSTREAM, "Available test cases:\n");
+ fprintf(OUTSTREAM, TESTS"\n");
+ fflush(OUTSTREAM);
+ }
affinity_finalize();
if (groups)
- {
- free(groups);
- }
+ {
+ free(groups);
+ }
return EXIT_FAILURE;
}
else
{
- printf("Name: %s\n",test->name);
- printf("Number of streams: %d\n",test->streams);
- printf("Loop stride: %d\n",test->stride);
- printf("Flops: %d\n",test->flops);
- printf("Bytes: %d\n",test->bytes);
- switch (test->type)
+ if (OUTSTREAM)
{
- case SINGLE:
- printf("Data Type: Single precision float\n");
- break;
- case DOUBLE:
- printf("Data Type: Double precision float\n");
- break;
+ fprintf(OUTSTREAM, "Name: %s\n",test->name);
+ fprintf(OUTSTREAM, "Number of streams: %d\n",test->streams);
+ fprintf(OUTSTREAM, "Loop stride: %d\n",test->stride);
+ fprintf(OUTSTREAM, "Flops: %d\n", (int) test->flops);
+ fprintf(OUTSTREAM, "Bytes: %d\n",test->bytes);
+ switch (test->type)
+ {
+ case SINGLE:
+ fprintf(OUTSTREAM, "Data Type: Single precision float\n");
+ break;
+ case DOUBLE:
+ fprintf(OUTSTREAM, "Data Type: Double precision float\n");
+ break;
+ }
+ fflush(OUTSTREAM);
}
}
bdestroy(testcase);
affinity_finalize();
if (groups)
- {
- free(groups);
- }
+ {
+ free(groups);
+ }
exit (EXIT_SUCCESS);
break;
@@ -267,6 +297,11 @@ int main(int argc, char** argv)
break;
case 'g':
numberOfWorkgroups = atoi(optarg);
+ if (numberOfWorkgroups <= 0)
+ {
+ fprintf(stderr, "Number of Workgroups must be 1 or greater.\n");
+ exit(EXIT_FAILURE);
+ }
allocator_init(numberOfWorkgroups * MAX_STREAMS);
tmp = numberOfWorkgroups;
groups = (Workgroup*) malloc(numberOfWorkgroups*sizeof(Workgroup));
@@ -287,9 +322,9 @@ int main(int argc, char** argv)
fprintf (stderr, "Unknown test case %s\n",optarg);
affinity_finalize();
if (groups)
- {
- free(groups);
- }
+ {
+ free(groups);
+ }
return EXIT_FAILURE;
}
bdestroy(testcase);
@@ -306,57 +341,68 @@ int main(int argc, char** argv)
optopt);
affinity_finalize();
if (groups)
- {
- free(groups);
- }
+ {
+ free(groups);
+ }
return EXIT_FAILURE;
default:
HELP_MSG;
}
}
- if (tmp > 0)
- {
- fprintf(stderr, "%d workgroups requested but only %d given on commandline\n",numberOfWorkgroups,numberOfWorkgroups-tmp);
- affinity_finalize();
- allocator_finalize();
- if (groups)
+ if (numberOfWorkgroups == 0 && !optPrintDomains)
+ {
+ fprintf(stderr, "Number of Workgroups must be 1 or greater.\n");
+ affinity_finalize();
+ allocator_finalize();
+ if (groups)
{
- free(groups);
+ free(groups);
}
- exit(EXIT_FAILURE);
- }
- if (iter <= 0)
- {
- fprintf(stderr,"Iterations must be greater than 0\n");
- affinity_finalize();
- allocator_finalize();
- if (groups)
+ exit(EXIT_FAILURE);
+ }
+ if (tmp > 0 && iter > 0)
+ {
+ fprintf(stderr, "%d workgroups requested but only %d given on commandline\n",numberOfWorkgroups,numberOfWorkgroups-tmp);
+ affinity_finalize();
+ allocator_finalize();
+ if (groups)
{
- free(groups);
+ free(groups);
}
- exit(EXIT_FAILURE);
- }
- if (test && !(currentWorkgroup || groups))
- {
- fprintf(stderr, "Workgroups must be set on commandline\n");
- affinity_finalize();
- allocator_finalize();
- if (groups)
+ exit(EXIT_FAILURE);
+ }
+ if (iter <= 0)
+ {
+ fprintf(stderr,"Iterations must be greater than 0\n");
+ affinity_finalize();
+ allocator_finalize();
+ if (groups)
+ {
+ free(groups);
+ }
+ exit(EXIT_FAILURE);
+ }
+ if (test && !(currentWorkgroup || groups))
+ {
+ fprintf(stderr, "Workgroups must be set on commandline\n");
+ affinity_finalize();
+ allocator_finalize();
+ if (groups)
{
- free(groups);
+ free(groups);
}
- exit(EXIT_FAILURE);
- }
+ exit(EXIT_FAILURE);
+ }
if (optPrintDomains)
{
- affinity_printDomains();
+ affinity_printDomains(OUTSTREAM);
affinity_finalize();
- allocator_finalize();
- if (groups)
+ allocator_finalize();
+ if (groups)
{
- free(groups);
+ free(groups);
}
exit (EXIT_SUCCESS);
}
@@ -369,7 +415,7 @@ int main(int argc, char** argv)
globalNumberOfThreads += groups[i].numberOfThreads;
}
- threads_init(globalNumberOfThreads);
+ threads_init(OUTSTREAM, globalNumberOfThreads);
threads_createGroups(numberOfWorkgroups);
/* we configure global barriers only */
@@ -377,9 +423,21 @@ int main(int argc, char** argv)
barrier_registerGroup(globalNumberOfThreads);
#ifdef PERFMON
- printf("Using likwid\n");
+ if (OUTSTREAM)
+ {
+ fprintf(OUTSTREAM, "Using likwid\n");
+ fflush(OUTSTREAM);
+ }
likwid_markerInit();
#endif
+#ifdef PAPI
+ if (OUTSTREAM)
+ {
+ fprintf(OUTSTREAM, "Using PAPI\n");
+ }
+ PAPI_library_init (PAPI_VER_CURRENT);
+ PAPI_thread_init((unsigned long (*)(void))(omp_get_thread_num));
+#endif
/* initialize data structures for threads */
@@ -407,61 +465,68 @@ int main(int argc, char** argv)
free(myData.streams);
}
- printf(HLINE);
- printf("LIKWID MICRO BENCHMARK\n");
- printf("Test: %s\n",test->name);
- printf(HLINE);
- printf("Using %d work groups\n",numberOfWorkgroups);
- printf("Using %d threads\n",globalNumberOfThreads);
- printf(HLINE);
+ if (OUTSTREAM)
+ {
+ fprintf(OUTSTREAM, HLINE);
+ fprintf(OUTSTREAM, "LIKWID MICRO BENCHMARK\n");
+ fprintf(OUTSTREAM, "Test: %s\n",test->name);
+ fprintf(OUTSTREAM, HLINE);
+ fprintf(OUTSTREAM, "Using %d work groups\n",numberOfWorkgroups);
+ fprintf(OUTSTREAM, "Using %d threads\n",globalNumberOfThreads);
+ fprintf(OUTSTREAM, HLINE);
+ fflush(OUTSTREAM);
+ }
threads_create(runTest);
threads_join();
allocator_finalize();
-
- uint32_t realSize = 0;
- uint64_t realCycles = 0;
- int current_id = 0;
-
- printf(HLINE);
- for(j=0;j<numberOfWorkgroups;j++)
+
+ uint32_t realSize = 0;
+ uint64_t realCycles = 0;
+ int current_id = 0;
+
+ if (OUTSTREAM)
{
- current_id = j*groups[j].numberOfThreads;
- realCycles += threads_data[current_id].cycles;
- realSize += groups[j].numberOfThreads * threads_data[current_id].data.size;
- }
- time = (double) realCycles / (double) timer_getCpuClock();
- printf("Cycles: %llu \n", LLU_CAST realCycles);
- printf("Iterations: %llu \n", LLU_CAST iter);
- printf("Size %d \n", realSize );
- printf("Vectorlength: %llu \n", LLU_CAST threads_data[current_id].data.size);
- printf("Time: %e sec\n", time);
- printf("Number of Flops: %llu \n", LLU_CAST (iter * realSize * test->flops));
- printf("MFlops/s: %.2f\n",
- 1.0E-06 * ((double) iter * realSize * test->flops/ time));
- printf("MByte/s: %.2f\n",
- 1.0E-06 * ( (double) iter * realSize * test->bytes/ time));
- printf("Cycles per update: %f\n",
- ((double) realCycles / (double) (iter * numberOfWorkgroups * threads_data[current_id].numberOfThreads * threads_data[current_id].data.size)));
-
- switch ( test->type )
- {
- case SINGLE:
- printf("Cycles per cacheline: %f\n",
- (16.0 * (double) realCycles / (double) (iter * realSize)));
- break;
- case DOUBLE:
- printf("Cycles per cacheline: %f\n",
- (8.0 * (double) realCycles / (double) (iter * realSize)));
- break;
- }
-
-
- printf(HLINE);
+ fprintf(OUTSTREAM, HLINE);
+ for(j=0;j<numberOfWorkgroups;j++)
+ {
+ current_id = j*groups[j].numberOfThreads;
+ realCycles += threads_data[current_id].cycles;
+ realSize += groups[j].numberOfThreads * threads_data[current_id].data.size;
+ }
+ time = (double) realCycles / (double) timer_getCpuClock();
+ fprintf(OUTSTREAM, "Cycles: %llu \n", LLU_CAST realCycles);
+ fprintf(OUTSTREAM, "Iterations: %llu \n", LLU_CAST iter);
+ fprintf(OUTSTREAM, "Size %d \n", realSize );
+ fprintf(OUTSTREAM, "Vectorlength: %llu \n", LLU_CAST threads_data[current_id].data.size);
+ fprintf(OUTSTREAM, "Time: %e sec\n", time);
+ fprintf(OUTSTREAM, "Number of Flops: %llu \n", LLU_CAST (iter * realSize * test->flops));
+ fprintf(OUTSTREAM, "MFlops/s: %.2f\n",
+ 1.0E-06 * ((double) iter * realSize * test->flops/ time));
+ fprintf(OUTSTREAM, "MByte/s: %.2f\n",
+ 1.0E-06 * ( (double) iter * realSize * test->bytes/ time));
+ fprintf(OUTSTREAM, "Cycles per update: %f\n",
+ ((double) realCycles / (double) (iter * numberOfWorkgroups * threads_data[current_id].numberOfThreads * threads_data[current_id].data.size)));
+
+ switch ( test->type )
+ {
+ case SINGLE:
+ fprintf(OUTSTREAM, "Cycles per cacheline: %f\n",
+ (16.0 * (double) realCycles / (double) (iter * realSize)));
+ break;
+ case DOUBLE:
+ fprintf(OUTSTREAM, "Cycles per cacheline: %f\n",
+ (8.0 * (double) realCycles / (double) (iter * realSize)));
+ break;
+ }
+
+ fprintf(OUTSTREAM, HLINE);
+ fflush(OUTSTREAM);
+ }
threads_destroy(numberOfWorkgroups);
barrier_destroy();
- affinity_finalize();
+ affinity_finalize();
#ifdef PERFMON
likwid_markerClose();
#endif
diff --git a/src/applications/likwid-features.c b/src/applications/likwid-features.c
index 679561e..6fe5477 100644
--- a/src/applications/likwid-features.c
+++ b/src/applications/likwid-features.c
@@ -6,8 +6,8 @@
* Description: An application to read out and set the feature flag
* register on Intel Core 2 processors.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -45,18 +45,20 @@
#include <cpuFeatures.h>
#define HELP_MSG \
-printf("\nlikwid-features -- Version %d.%d \n\n",VERSION,RELEASE); \
-printf("A tool to print and toggle the feature flag msr on Intel CPUS.\n"); \
-printf("Supported Features: HW_PREFETCHER, CL_PREFETCHER, DCU_PREFETCHER, IP_PREFETCHER.\n\n"); \
-printf("Options:\n"); \
-printf("-h\t Help message\n"); \
-printf("-v\t Version information\n"); \
-printf("-s <FEATURE>\t set cpu feature \n"); \
-printf("-u <FEATURE>\t unset cpu feature \n"); \
-printf("-c <ID>\t core id\n\n")
+ fprintf(stdout, "\nlikwid-features -- Version %d.%d \n\n",VERSION,RELEASE); \
+ fprintf(stdout, "A tool to print and toggle the feature flag msr on Intel CPUS.\n"); \
+ fprintf(stdout, "Supported Features: HW_PREFETCHER, CL_PREFETCHER, DCU_PREFETCHER, IP_PREFETCHER.\n\n"); \
+ fprintf(stdout, "Options:\n"); \
+ fprintf(stdout, "-h\t Help message\n"); \
+ fprintf(stdout, "-v\t Version information\n"); \
+ fprintf(stdout, "-s <FEATURE>\t set cpu feature \n"); \
+ fprintf(stdout, "-u <FEATURE>\t unset cpu feature \n"); \
+ fprintf(stdout, "-c <ID>\t core id\n\n"); \
+ fflush(stdout);
#define VERSION_MSG \
-printf("likwid-features %d.%d \n\n",VERSION,RELEASE)
+ fprintf(stdout, "likwid-features %d.%d \n\n",VERSION,RELEASE); \
+ fflush(stdout);
int main (int argc, char** argv)
{
@@ -80,7 +82,7 @@ int main (int argc, char** argv)
case 'u':
optSetFeature = 2;
case 's':
- if (! (argString = bSecureInput(20,optarg)))
+ if (! (argString = bSecureInput(40,optarg)))
{
fprintf(stderr,"Failed to read argument string!\n");
exit(EXIT_FAILURE);
@@ -115,7 +117,7 @@ int main (int argc, char** argv)
}
break;
case 'c':
- if (! (argString = bSecureInput(10,optarg)))
+ if (! (argString = bSecureInput(20,optarg)))
{
fprintf(stderr,"Failed to read argument string!\n");
exit(EXIT_FAILURE);
@@ -147,9 +149,10 @@ int main (int argc, char** argv)
ERROR_PLAIN_PRINT(Unsupported processor!);
}
- printf(HLINE);
- printf("CPU name:\t%s \n",cpuid_info.name);
- printf("CPU core id:\t%d \n", cpuId);
+ fprintf(stdout, HLINE);
+ fprintf(stdout, "CPU name:\t%s \n",cpuid_info.name);
+ fprintf(stdout, "CPU core id:\t%d \n", cpuId);
+ fflush(stdout);
if (cpuid_info.family != P6_FAMILY)
{
@@ -165,21 +168,22 @@ int main (int argc, char** argv)
accessClient_init(&socket_fd);
msr_init(socket_fd);
- cpuFeatures_init(cpuId);
+ cpuFeatures_init(cpuId);
cpuFeatures_print(cpuId);
if (optSetFeature == 1)
{
- printf(SLINE);
+ fprintf(stdout, SLINE);
cpuFeatures_enable(cpuId, feature);
- printf(SLINE);
+ fprintf(stdout, SLINE);
}
else if (optSetFeature == 2)
{
- printf(SLINE);
+ fprintf(stdout, SLINE);
cpuFeatures_disable(cpuId, feature);
- printf(SLINE);
+ fprintf(stdout, SLINE);
}
+ fflush(stdout);
msr_finalize();
return EXIT_SUCCESS;
diff --git a/src/applications/likwid-genCfg.c b/src/applications/likwid-genCfg.c
index c8d3216..97147fd 100644
--- a/src/applications/likwid-genCfg.c
+++ b/src/applications/likwid-genCfg.c
@@ -6,8 +6,8 @@
* Description: An application to dump the cpu topology information to
* a config file.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -43,21 +43,24 @@
/* ##### MACROS - LOCAL TO THIS SOURCE FILE ######################### */
#define HELP_MSG \
-printf("\nlikwid-genCfg -- Version %d.%d \n\n",VERSION,RELEASE); \
-printf("A tool to dump node topology information into a file.\n"); \
-printf("Options:\n"); \
-printf("-h\t Help message\n"); \
-printf("-v\t Version information\n"); \
-printf("-o\t output file path (optional)\n\n");
+ fprintf(stdout, "\nlikwid-genCfg -- Version %d.%d \n\n",VERSION,RELEASE); \
+ fprintf(stdout, "A tool to dump node topology information into a file.\n"); \
+ fprintf(stdout, "Options:\n"); \
+ fprintf(stdout, "-h\t Help message\n"); \
+ fprintf(stdout, "-v\t Version information\n"); \
+ fprintf(stdout, "-o\t output file path (optional)\n\n"); \
+ fflush(stdout);
#define VERSION_MSG \
-printf("likwid-powermeter %d.%d \n\n",VERSION,RELEASE)
+ fprintf(stdout, "likwid-genCfg %d.%d \n\n",VERSION,RELEASE); \
+ fflush(stdout);
int main (int argc, char** argv)
{
FILE *file;
char *filepath = TOSTRING(CFGFILE);
+ size_t size;
int c;
while ((c = getopt (argc, argv, "ho:v")) != -1)
@@ -92,17 +95,18 @@ int main (int argc, char** argv)
}
cpuid_init();
- printf(HLINE);
- printf("CPU name:\t%s \n",cpuid_info.name);
+ fprintf(stdout, HLINE);
+ fprintf(stdout, "CPU name:\t%s \n",cpuid_info.name);
+ fflush(stdout);
if ((file = fopen(filepath, "wb")) != NULL)
{
- (void) fwrite((void*) &cpuid_topology, sizeof(CpuTopology), 1, file);
+ size = fwrite((void*) &cpuid_topology, sizeof(CpuTopology), 1, file);
- (void) fwrite((void*) cpuid_topology.threadPool,
+ size = fwrite((void*) cpuid_topology.threadPool,
sizeof(HWThread), cpuid_topology.numHWThreads, file);
- (void) fwrite((void*) cpuid_topology.cacheLevels,
+ size = fwrite((void*) cpuid_topology.cacheLevels,
sizeof(CacheLevel), cpuid_topology.numCacheLevels, file);
fclose(file);
diff --git a/src/applications/likwid-memsweeper.c b/src/applications/likwid-memsweeper.c
index 925aa79..4806763 100644
--- a/src/applications/likwid-memsweeper.c
+++ b/src/applications/likwid-memsweeper.c
@@ -5,8 +5,8 @@
*
* Description: An application to clean up NUMA memory domains.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -46,26 +46,32 @@
/* ##### MACROS - LOCAL TO THIS SOURCE FILE ######################### */
#define HELP_MSG \
-printf("\nlikwid-memsweeper -- Version %d.%d \n\n",VERSION,RELEASE); \
-printf("A tool clean up NUMA memory domains and last level caches.\n"); \
-printf("Options:\n"); \
-printf("-h\t Help message\n"); \
-printf("-v\t Version information\n"); \
-printf("-c\t specify NUMA domain ID to clean up\n"); \
-printf("Usage: likwid-memsweeper \n"); \
-printf("To clean specific domain: likwid-memsweeper -c 2 \n");
+ fprintf(stdout, "\nlikwid-memsweeper -- Version %d.%d \n\n",VERSION,RELEASE); \
+ fprintf(stdout, "A tool clean up NUMA memory domains and last level caches.\n"); \
+ fprintf(stdout, "Options:\n"); \
+ fprintf(stdout, "-h\t Help message\n"); \
+ fprintf(stdout, "-v\t Version information\n"); \
+ fprintf(stdout, "-q\t Silent without output\n"); \
+ fprintf(stdout, "-c\t Specify NUMA domain ID to clean up\n"); \
+ fprintf(stdout, "\t If no specific domain is set, all domains are swept.\n"); \
+ fprintf(stdout, "Usage:\n"); \
+ fprintf(stdout, "To clean specific domain: likwid-memsweeper -c 2 \n"); \
+ fflush(stdout);
#define VERSION_MSG \
-printf("likwid-memsweeper %d.%d \n\n",VERSION,RELEASE)
+ fprintf(stdout, "likwid-memsweeper %d.%d \n\n",VERSION,RELEASE); \
+ fflush(stdout);
int main (int argc, char** argv)
{
int domainId = -1;
int c;
+ int optSilent = 0;
bstring argString;
+ FILE* OUTSTREAM = stdout;
- while ((c = getopt (argc, argv, "+c:hv")) != -1)
+ while ((c = getopt (argc, argv, "+c:hvq")) != -1)
{
switch (c)
{
@@ -75,6 +81,10 @@ int main (int argc, char** argv)
case 'v':
VERSION_MSG;
exit (EXIT_SUCCESS);
+ case 'q':
+ optSilent = 1;
+ OUTSTREAM = NULL;
+ break;
case 'c':
if (! (argString = bSecureInput(10,optarg)))
{
@@ -111,11 +121,16 @@ int main (int argc, char** argv)
if (domainId < 0)
{
- memsweep_node();
+ memsweep_node(OUTSTREAM);
+ }
+ else if (domainId < numa_info.numberOfNodes)
+ {
+ memsweep_domain(OUTSTREAM, domainId);
}
else
{
- memsweep_domain(domainId);
+ fprintf(stderr, "Unknown NUMA domain %d\n", domainId);
+ exit(EXIT_FAILURE);
}
return EXIT_SUCCESS;
diff --git a/src/applications/likwid-perfctr.c b/src/applications/likwid-perfctr.c
index fce52ea..6c9f98f 100644
--- a/src/applications/likwid-perfctr.c
+++ b/src/applications/likwid-perfctr.c
@@ -6,8 +6,8 @@
* Description: An application to read out performance counter registers
* on x86 processors
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -58,31 +58,35 @@
/* ##### MACROS - LOCAL TO THIS SOURCE FILE ######################### */
#define HELP_MSG \
-printf("likwid-perfctr -- Version %d.%d \n\n",VERSION,RELEASE); \
-printf("\n"); \
-printf("Example Usage: likwid-perfctr -C 2 ./a.out \n"); \
-printf("Supported Options:\n"); \
-printf("-h\t Help message\n"); \
-printf("-v\t Version information\n"); \
-printf("-V\t verbose output\n"); \
-printf("-g\t performance group or event set string\n"); \
-printf("-H\t Get group help (together with -g switch) \n"); \
-printf("-t\t timeline mode with frequency in s or ms, e.g. 300ms\n"); \
-printf("-S\t stethoscope mode with duration in s\n"); \
-printf("-m\t use markers inside code \n"); \
-printf("-s\t bitmask with threads to skip\n"); \
-printf("-o\t Store output to file, with output conversation according to file suffix\n"); \
-printf("\t Conversation scripts can be supplied in %s\n",TOSTRING(LIKWIDFILTERPATH)); \
-printf("-O\t Output easily parseable CSV instead of fancy tables\n"); \
-printf("-M\t set how MSR registers are accessed: 0=direct, 1=msrd\n"); \
-printf("-a\t list available performance groups\n"); \
-printf("-e\t list available counters and events\n"); \
-printf("-i\t print cpu info\n"); \
-printf("-c\t processor ids to measure (required), e.g. 1,2-4,8\n"); \
-printf("-C\t processor ids to measure (this variant also cares for pinning of process/threads), e.g. 1,2-4,8\n");
+fprintf(stdout, "likwid-perfctr -- Version %d.%d \n\n",VERSION,RELEASE); \
+fprintf(stdout, "\n"); \
+fprintf(stdout, "Example Usage: likwid-perfctr -C 2 ./a.out \n"); \
+fprintf(stdout, "Supported Options:\n"); \
+fprintf(stdout, "-h\t Help message\n"); \
+fprintf(stdout, "-v\t Version information\n"); \
+fprintf(stdout, "-V\t verbose output\n"); \
+fprintf(stdout, "-g\t performance group or event set string\n"); \
+fprintf(stdout, "-H\t Get group help (together with -g switch) \n"); \
+fprintf(stdout, "-t\t timeline mode with frequency in s or ms, e.g. 300ms\n"); \
+fprintf(stdout, "-S\t stethoscope mode with duration in s\n"); \
+fprintf(stdout, "-m\t use markers inside code \n"); \
+fprintf(stdout, "-s\t bitmask with threads to skip\n"); \
+fprintf(stdout, "-o\t Store output to file, with output conversation according to file suffix\n"); \
+fprintf(stdout, "\t Conversation scripts can be supplied in %s\n",TOSTRING(LIKWIDFILTERPATH)); \
+fprintf(stdout, "-O\t Output easily parseable CSV instead of fancy tables\n"); \
+fprintf(stdout, "-M\t set how MSR registers are accessed: 0=direct, 1=msrd\n"); \
+fprintf(stdout, "-a\t list available performance groups\n"); \
+fprintf(stdout, "-e\t list available counters and events\n"); \
+fprintf(stdout, "-i\t print cpu info\n"); \
+fprintf(stdout, "-c\t processor ids to measure (required), e.g 0,3-4,8\n"); \
+fprintf(stdout, "-C\t processor ids to measure (this variant also cares for pinning of process/threads)\n"); \
+fprintf(stdout, "\t\t for -c and -C, see likwid-pin -h for details\n"); \
+fflush(stdout);
+
#define VERSION_MSG \
-printf("likwid-perfctr %d.%d \n\n",VERSION,RELEASE);
+fprintf(stdout, "likwid-perfctr %d.%d \n\n",VERSION,RELEASE); \
+fflush(stdout);
/* To be able to give useful error messages instead of just dieing without a
* comment. Mainly happens because we get a SIGPIPE if the daemon drops us. */
@@ -157,7 +161,6 @@ int main (int argc, char** argv)
case 'c':
CHECK_OPTION_STRING;
numThreads = bstr_to_cpuset(threads, argString);
-
if(!numThreads)
{
ERROR_PLAIN_PRINT(Failed to parse cpu list.);
@@ -165,7 +168,8 @@ int main (int argc, char** argv)
break;
case 'd':
- printf("Option -d for daemon mode is deprecated. Daemon mode has be renamed to timeline mode (Option -t)!\n");
+ fprintf(stdout, "Option -d for daemon mode is deprecated. Daemon mode has be renamed to timeline mode (Option -t)!\n");
+ fflush(stdout);
break;
case 'e':
numThreads=1; /*to get over the error message */
@@ -219,6 +223,12 @@ int main (int argc, char** argv)
case 'S':
CHECK_OPTION_STRING;
optStethoscope = str2int((char*) argString->data);
+ if (optStethoscope <= 0)
+ {
+ fprintf(stderr, "The measurement time must be larger than 0\n\n");
+ HELP_MSG;
+ exit(EXIT_FAILURE);
+ }
break;
case 't':
CHECK_OPTION_STRING;
@@ -234,11 +244,11 @@ int main (int argc, char** argv)
perfmon_verbose = 1;
break;
case '?':
- if (optopt == 'S'||optopt == 't'||optopt == 'c'||optopt == 'C'||
- optopt == 'o'||optopt == 'M'||optopt == 'g')
- {
-
- }
+ if (optopt == 'S'||optopt == 't'||optopt == 'c'||optopt == 'C'||
+ optopt == 'o'||optopt == 'M'||optopt == 'g')
+ {
+
+ }
else if (isprint (optopt))
{
fprintf (stderr, "Unknown option `-%c'.\n", optopt);
@@ -261,6 +271,7 @@ int main (int argc, char** argv)
if (!numThreads)
{
fprintf (stderr, "ERROR: Required -c. You must specify at least one processor.\n");
+ HELP_MSG;
exit(EXIT_FAILURE);
}
@@ -285,12 +296,12 @@ int main (int argc, char** argv)
}
bformata(pinString,",%d",threads[0]);
-
- if (skipMask > 0)
- {
- skipString = bformat("%d",skipMask);
- setenv("LIKWID_SKIP",(char*) skipString->data , 1);
- }
+
+ if (skipMask > 0)
+ {
+ skipString = bformat("%d",skipMask);
+ setenv("LIKWID_SKIP",(char*) skipString->data , 1);
+ }
setenv("KMP_AFFINITY", "disabled", 1);
setenv("LIKWID_PIN",(char*) pinString->data , 1);
@@ -317,7 +328,12 @@ int main (int argc, char** argv)
{
if(i != j && threads[i] == threads[j])
{
- fprintf (stderr, "ERROR: Processor list is not unique.\n");
+ fprintf (stderr, "ERROR: Processor list (%d",threads[0]);
+ for (c=1;c<numThreads;c++)
+ {
+ fprintf (stderr, ",%d",threads[c]);
+ }
+ fprintf (stderr, ") is not unique.\n");
exit(EXIT_FAILURE);
}
}
@@ -340,7 +356,7 @@ int main (int argc, char** argv)
fprintf(OUTSTREAM,"CPU stepping:\t%u \n", cpuid_info.stepping);
fprintf(OUTSTREAM,"CPU features:\t%s \n", cpuid_info.features);
- if( cpuid_info.family == P6_FAMILY && cpuid_info.perf_version)
+ if( cpuid_info.family == P6_FAMILY && cpuid_info.perf_version)
{
fprintf(OUTSTREAM,HLINE);
fprintf(OUTSTREAM,"PERFMON version:\t%u \n",cpuid_info.perf_version);
@@ -350,6 +366,7 @@ int main (int argc, char** argv)
}
}
fprintf(OUTSTREAM,HLINE);
+ fflush(OUTSTREAM);
if (optInfo)
{
@@ -371,11 +388,24 @@ int main (int argc, char** argv)
perfmon_printEvents();
exit (EXIT_SUCCESS);
}
- if ((!optTimeline && !optStethoscope) && (optind == argc))
+ if ((!optTimeline && !optStethoscope) && (optind == argc))
{
fprintf(OUTSTREAM,"NOTICE: You have to specify a program to measure as argument!\n");
exit (EXIT_SUCCESS);
}
+ argv += optind;
+ bstring exeString = bfromcstr(argv[0]);
+ for (i=1; i<(argc-optind); i++)
+ {
+ bconchar(exeString, ' ');
+ bcatcstr(exeString, argv[i]);
+ }
+ if (blength(exeString) == 0 && !optStethoscope)
+ {
+ fprintf(OUTSTREAM, "Executable must be given on commandline\n");
+ fflush(OUTSTREAM);
+ exit(EXIT_FAILURE);
+ }
if (biseqcstr(eventString,"_NOGROUP"))
{
fprintf(OUTSTREAM,"NOTICE: You have to specify a group or event set to measure using the -g option.\n");
@@ -388,11 +418,20 @@ int main (int argc, char** argv)
fprintf(OUTSTREAM,HLINE);
fprintf(OUTSTREAM,"CPU type:\t%s \n",cpuid_info.name);
fprintf(OUTSTREAM,"CPU clock:\t%3.2f GHz \n", (float) timer_getCpuClock() * 1.E-09);
+ fflush(OUTSTREAM);
- perfmon_setupEventSet(eventString, &counterMask);
fprintf(OUTSTREAM,HLINE);
+ fflush(OUTSTREAM);
- if (optTimeline)
+ if (optStethoscope)
+ {
+ perfmon_setupEventSet(eventString, &counterMask);
+ perfmon_startCounters();
+ sleep(optStethoscope);
+ perfmon_stopCounters();
+ perfmon_printCounterResults();
+ }
+ else if (optTimeline)
{
fprintf(OUTSTREAM,"CORES: %d", threads[0]);
for (int i=1; i<numThreads; i++)
@@ -400,43 +439,38 @@ int main (int argc, char** argv)
fprintf(OUTSTREAM," %d", threads[i]);
}
fprintf(OUTSTREAM," \n");
+ fflush(OUTSTREAM);
- daemon_init(eventString);
- daemon_start(interval);
- }
-
- argv += optind;
- bstring exeString = bfromcstr(argv[0]);
-
- if (optStethoscope)
- {
- perfmon_startCounters();
- sleep(optStethoscope);
- perfmon_stopCounters();
- perfmon_printCounterResults();
+ daemon_start(eventString, interval);
+ if (system(bdata(exeString)) == EOF)
+ {
+ fprintf(stderr, "Failed to execute %s!\n", bdata(exeString));
+ exit(EXIT_FAILURE);
+ }
+ daemon_stop(SIGINT);
}
else
{
- for (i=1; i<(argc-optind); i++)
+ if (perfmon_verbose)
{
- bconchar(exeString, ' ');
- bcatcstr(exeString, argv[i]);
+ fprintf(OUTSTREAM,"Executing: %s \n",bdata(exeString));
+ fflush(OUTSTREAM);
}
- if (perfmon_verbose) fprintf(OUTSTREAM,"Executing: %s \n",bdata(exeString));
if (optReport)
{
// multiplex_start();
}
- else if (!optUseMarker)
+ else if (!optUseMarker && !optTimeline)
{
+ perfmon_setupEventSet(eventString, &counterMask);
perfmon_startCounters();
}
else
{
if (getenv("LIKWID_FILEPATH") == NULL)
setenv("LIKWID_FILEPATH",(char*) filepath->data, 1);
-
+ perfmon_setupEventSet(eventString, &counterMask);
char* modeStr = (char*) malloc(40 * sizeof(char));
sprintf(modeStr,"%d",accessClient_mode);
setenv("LIKWID_MODE", modeStr, 1);
@@ -447,8 +481,6 @@ int main (int argc, char** argv)
perfmon_startCounters();
}
- fprintf(OUTSTREAM,"%s\n",bdata(exeString));
-
if (system(bdata(exeString)) == EOF)
{
fprintf(stderr, "Failed to execute %s!\n", bdata(exeString));
@@ -462,15 +494,15 @@ int main (int argc, char** argv)
}
else
{
- if (!optUseMarker)
+ if (optUseMarker)
{
perfmon_stopCounters();
- perfmon_printCounterResults();
+ perfmon_printMarkerResults(filepath);
}
else
{
perfmon_stopCounters();
- perfmon_printMarkerResults(filepath);
+ perfmon_printCounterResults();
}
}
}
diff --git a/src/applications/likwid-pin.c b/src/applications/likwid-pin.c
index e046df0..3d9e85b 100644
--- a/src/applications/likwid-pin.c
+++ b/src/applications/likwid-pin.c
@@ -5,8 +5,8 @@
*
* Description: An application to pin a program including threads
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -52,46 +52,55 @@
/* ##### MACROS - LOCAL TO THIS SOURCE FILE ######################### */
#define HELP_MSG \
-printf("likwid-pin -- Version %d.%d \n\n",VERSION,RELEASE); \
-printf("\n"); \
-printf("Supported Options:\n"); \
-printf("-h\t Help message\n"); \
-printf("-v\t Version information\n"); \
-printf("-i\t Set numa interleave policy with all involved numa nodes\n"); \
-printf("-S\t Sweep memory in involved numa nodes\n"); \
-printf("-c\t comma separated list of processor ids or expression\n"); \
-printf("-s\t bitmask with threads to skip\n"); \
-printf("-p\t Print available domains with mapping on physical ids\n"); \
-printf(" \t If used together with -c option outputs a physical processor ids.\n"); \
-printf("-d\t Delimiter used for using -p to output physical processor list, default is comma.\n\n"); \
-printf("-q\t Silent without output\n\n"); \
-printf("There are three possibilities to provide a thread to processor list:\n\n"); \
-printf("1. Thread list with physical or logical thread numberings and physical cores first.\n"); \
-printf("Example usage thread list: likwid-pin -c N:0,4-6 ./myApp\n"); \
-printf("You can pin with the following numberings:\n"); \
-printf("\t1. Physical numbering of OS.\n"); \
-printf("\t2. Logical numbering inside node. e.g. -c N:0-3\n"); \
-printf("\t3. Logical numbering inside socket. e.g. -c S0:0-3\n"); \
-printf("\t4. Logical numbering inside last level cache group. e.g. -c C0:0-3\n"); \
-printf("\t5. Logical numbering inside NUMA domain. e.g. -c M0:0-3\n"); \
-printf("\tYou can also mix domains separated by @, e.g. -c S0:0-3 at S1:0-3 \n\n"); \
-printf("2. Expressions based thread list generation with compact processor numbering.\n"); \
-printf("Example usage expression: likwid-pin -c E:N:8 ./myApp\n"); \
-printf("This will generate a compact list of thread to processor mapping for the node domain with eight threads.\n"); \
-printf("The following syntax variants are available:\n"); \
-printf("\t1. -c E:<thread domain>:<number of threads>\n"); \
-printf("\t2. -c E:<thread domain>:<number of threads>:<chunk size>:<stride>\n"); \
-printf("\t For two SMT threads per core on a SMT 4 machine use e.g. -c E:N:122:2:4\n\n"); \
-printf("3. Scatter policy among thread domain type.\n"); \
-printf("Example usage scatter: likwid-pin -c M:scatter ./myApp\n"); \
-printf("This will generate a thread to processor mapping scattered among all memory domains with physical cores first.\n\n"); \
-printf("If you ommit the -c option likwid will use all processors available on the node\n"); \
-printf("with physical cores first. likwid-pin will also set OMP_NUM_THREADS with as many\n"); \
-printf("threads as specified in your pin expression if OMP_NUM_THREADS is not present\n"); \
-printf("in your environment.\n\n")
+ fprintf(stdout, "likwid-pin -- Version %d.%d \n\n",VERSION,RELEASE); \
+ fprintf(stdout, "\n"); \
+ fprintf(stdout, "Supported Options:\n"); \
+ fprintf(stdout, "-h\t Help message\n"); \
+ fprintf(stdout, "-v\t Version information\n"); \
+ fprintf(stdout, "-i\t Set numa interleave policy with all involved numa nodes\n"); \
+ fprintf(stdout, "-S\t Sweep memory in involved numa nodes\n"); \
+ fprintf(stdout, "-c\t comma separated list of processor ids or expression\n"); \
+ fprintf(stdout, "-s\t bitmask with threads to skip\n"); \
+ fprintf(stdout, "-p\t Print available domains with mapping on physical ids\n"); \
+ fprintf(stdout, " \t If used together with -c option outputs a physical processor ids.\n"); \
+ fprintf(stdout, "-d\t Delimiter used for using -p to output physical processor list, default is comma.\n\n"); \
+ fprintf(stdout, "-q\t Silent without output\n\n"); \
+ fprintf(stdout, "There are three possibilities to provide a thread to processor list:\n\n"); \
+ fprintf(stdout, "1. Thread list with physical or logical thread numberings and physical cores first.\n"); \
+ fprintf(stdout, "Example usage thread list: likwid-pin -c N:0,4-6 ./myApp\n"); \
+ fprintf(stdout, "You can pin with the following numberings:\n"); \
+ fprintf(stdout, "\t1. Physical numbering of OS.\n"); \
+ fprintf(stdout, "\t2. Logical numbering inside node. e.g. -c N:0-3\n"); \
+ fprintf(stdout, "\t3. Logical numbering inside socket. e.g. -c S0:0-3\n"); \
+ fprintf(stdout, "\t4. Logical numbering inside last level cache group. e.g. -c C0:0-3\n"); \
+ fprintf(stdout, "\t5. Logical numbering inside NUMA domain. e.g. -c M0:0-3\n"); \
+ fprintf(stdout, "\tYou can also mix domains separated by @, e.g. -c S0:0-3 at S1:0-3 \n\n"); \
+ fprintf(stdout, "2. Expressions based thread list generation with compact processor numbering.\n"); \
+ fprintf(stdout, "Example usage expression: likwid-pin -c E:N:8 ./myApp\n"); \
+ fprintf(stdout, "This will generate a compact list of thread to processor mapping for the node domain with eight threads.\n"); \
+ fprintf(stdout, "The following syntax variants are available:\n"); \
+ fprintf(stdout, "\t1. -c E:<thread domain>:<number of threads>\n"); \
+ fprintf(stdout, "\t2. -c E:<thread domain>:<number of threads>:<chunk size>:<stride>\n"); \
+ fprintf(stdout, "\t For two SMT threads per core on a SMT 4 machine use e.g. -c E:N:122:2:4\n\n"); \
+ fprintf(stdout, "3. Scatter policy among thread domain type.\n"); \
+ fprintf(stdout, "Example usage scatter: likwid-pin -c M:scatter ./myApp\n"); \
+ fprintf(stdout, "This will generate a thread to processor mapping scattered among all memory domains with physical cores first.\n\n"); \
+ fprintf(stdout, "4. Logical pinning.\n"); \
+ fprintf(stdout, "Example usage logical pinning: likwid-pin -c L:0,3,4 ./myApp\n"); \
+ fprintf(stdout, "This will generate a mapping containing the processors with index 0, 3 and 4 in the currently available processor list.\n"); \
+ fprintf(stdout, "If you are running inside a cpuset (taskset, cgroup) the sorted list of allowed processors is taken as processor list.\n"); \
+ fprintf(stdout, "Example usage logical pinning inside cpuset:\n"); \
+ fprintf(stdout, "taskset -c 4,7,2,1,5 likwid-pin -c L:0,2,4 ./myApp\n"); \
+ fprintf(stdout, "This maps the application to the processors 1,4,7.\n\n"); \
+ fprintf(stdout, "If you ommit the -c option likwid will use all processors available on the node\n"); \
+ fprintf(stdout, "with physical cores first. likwid-pin will also set OMP_NUM_THREADS with as many\n"); \
+ fprintf(stdout, "threads as specified in your pin expression if OMP_NUM_THREADS is not present\n"); \
+ fprintf(stdout, "in your environment.\n\n"); \
+ fflush(stdout);
#define VERSION_MSG \
- printf("likwid-pin %d.%d \n\n",VERSION,RELEASE)
+ fprintf(stdout, "likwid-pin %d.%d \n\n",VERSION,RELEASE); \
+ fflush(stdout);
/* ##### FUNCTION DEFINITIONS - LOCAL TO THIS SOURCE FILE ########### */
static void
@@ -105,22 +114,23 @@ pinPid(int cpuid, int silent)
status = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
- if (status == -1)
+ if (status == -1)
{
- printf("sched_setaffinity failed : %s \n",strerror(errno));
+ fprintf(stderr, "sched_setaffinity failed : %s \n",strerror(errno));
}
- else
+ else
{
if(!silent)
{
#ifdef COLOR
color_on(BRIGHT, COLOR);
#endif
- printf("[likwid-pin] Main PID -> core %d - OK", cpuid);
+ fprintf(stdout, "[likwid-pin] Main PID -> core %d - OK", cpuid);
#ifdef COLOR
color_reset();
#endif
- printf("\n");
+ fprintf(stdout, "\n");
+ fflush(stdout);
}
}
}
@@ -144,6 +154,7 @@ int main (int argc, char** argv)
int numThreads=0;
int threads[MAX_NUM_THREADS];
char delimiter = ',';
+ FILE* OUTSTREAM = stdout;
threads[0] = 0;
if (argc == 1) {
@@ -190,13 +201,14 @@ int main (int argc, char** argv)
case 'p':
if (!hasAffinity)
{
- printf("Option -p is not supported for unknown processor!\n");
+ fprintf(stderr, "Option -p is not supported for unknown processor!\n");
exit(EXIT_SUCCESS);
}
optPrintDomains = 1;
break;
case 'q':
optSilent = 1;
+ OUTSTREAM = NULL;
setenv("LIKWID_SILENT","true", 1);
break;
case 's':
@@ -206,7 +218,7 @@ int main (int argc, char** argv)
case 'S':
if (!hasAffinity)
{
- printf("Option -S is not supported for unknown processor!\n");
+ fprintf(stderr, "Option -S is not supported for unknown processor!\n");
exit(EXIT_SUCCESS);
}
optMemSweep = 1;
@@ -221,24 +233,28 @@ int main (int argc, char** argv)
}
if (optind == argc && !optPrintDomains)
{
- fprintf(stderr,"Executable must be given on commandline\n");
- exit(EXIT_FAILURE);
+ fprintf(stderr,"Executable must be given on commandline\n");
+ exit(EXIT_FAILURE);
}
if (optPrintDomains && numThreads)
{
- printf("%d",threads[0]);
-
- for ( i=1; i< numThreads; i++)
+ if ((!optSilent) && (OUTSTREAM))
{
- printf("%c%d",delimiter,threads[i]);
+ fprintf(OUTSTREAM, "%d",threads[0]);
+
+ for ( i=1; i< numThreads; i++)
+ {
+ fprintf(OUTSTREAM, "%c%d",delimiter,threads[i]);
+ }
+ fprintf(OUTSTREAM, "\n");
+ fflush(OUTSTREAM);
}
- printf("\n");
exit (EXIT_SUCCESS);
}
else if ( optPrintDomains )
{
- affinity_printDomains();
+ affinity_printDomains(OUTSTREAM);
exit (EXIT_SUCCESS);
}
@@ -258,14 +274,22 @@ int main (int argc, char** argv)
if (optInterleaved)
{
- printf("Set mem_policy to interleaved\n");
+ if ((!optSilent) && (OUTSTREAM))
+ {
+ fprintf(OUTSTREAM, "Set mem_policy to interleaved\n");
+ fflush(OUTSTREAM);
+ }
numa_setInterleaved(threads, numThreads);
}
if (optMemSweep)
{
- printf("Sweeping memory\n");
- memsweep_threadGroup(threads, numThreads);
+ if ((!optSilent) && (OUTSTREAM))
+ {
+ fprintf(OUTSTREAM, "Sweeping memory\n");
+ fflush(OUTSTREAM);
+ }
+ memsweep_threadGroup(OUTSTREAM, threads, numThreads);
}
if ( getenv("OMP_NUM_THREADS") == NULL )
@@ -287,11 +311,11 @@ int main (int argc, char** argv)
bformata(pinString,",%d",threads[0]);
- if (skipMask >= 0)
- {
- skipString = bformat("%d",skipMask);
- setenv("LIKWID_SKIP",(char*) bdata(skipString) , 1);
- }
+ if (skipMask >= 0)
+ {
+ skipString = bformat("%d",skipMask);
+ setenv("LIKWID_SKIP",(char*) bdata(skipString) , 1);
+ }
setenv("KMP_AFFINITY", "disabled", 1);
setenv("LIKWID_PIN",(char*) bdata(pinString) , 1);
diff --git a/src/applications/likwid-powermeter.c b/src/applications/likwid-powermeter.c
index 4843fa8..4daa393 100644
--- a/src/applications/likwid-powermeter.c
+++ b/src/applications/likwid-powermeter.c
@@ -6,8 +6,8 @@
* Description: An application to get information about power
* consumption on architectures implementing the RAPL interface.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -49,25 +49,28 @@
#include <perfmon.h>
#include <power.h>
#include <thermal.h>
+#include <bstrlib.h>
/* ##### MACROS - LOCAL TO THIS SOURCE FILE ######################### */
#define HELP_MSG \
-printf("\nlikwid-powermeter -- Version %d.%d \n\n",VERSION,RELEASE); \
-printf("A tool to print Power and Clocking information on Intel SandyBridge CPUS.\n"); \
-printf("Options:\n"); \
-printf("-h\t\t Help message\n"); \
-printf("-v\t\t Version information\n"); \
-printf("-M <0|1>\t set how MSR registers are accessed: 0=direct, 1=msrd \n"); \
-printf("-c <list>\t specify sockets to measure\n"); \
-printf("-i\t\t print information from MSR_PKG_POWER_INFO register and Turbo Mode\n"); \
-printf("-s <duration>\t set measure duration in sec. (default 2s) \n"); \
-printf("-p\t\t print dynamic clocking and CPI values (requires executable)\n\n"); \
-printf("Usage: likwid-powermeter -s 4 -c 1 \n"); \
-printf("Alternative as wrapper: likwid-powermeter -c 1 ./a.out\n")
+fprintf(stdout, "\nlikwid-powermeter -- Version %d.%d \n\n",VERSION,RELEASE); \
+fprintf(stdout, "A tool to print Power and Clocking information on Intel SandyBridge CPUS.\n"); \
+fprintf(stdout, "Options:\n"); \
+fprintf(stdout, "-h\t\t Help message\n"); \
+fprintf(stdout, "-v\t\t Version information\n"); \
+fprintf(stdout, "-M <0|1>\t set how MSR registers are accessed: 0=direct, 1=msrd \n"); \
+fprintf(stdout, "-c <list>\t specify sockets to measure\n"); \
+fprintf(stdout, "-i\t\t print information from MSR_PKG_POWER_INFO register and Turbo Mode\n"); \
+fprintf(stdout, "-s <duration>\t set measure duration in sec. (default 2s) \n"); \
+fprintf(stdout, "-p\t\t print dynamic clocking and CPI values (requires executable)\n\n"); \
+fprintf(stdout, "Usage: likwid-powermeter -s 4 -c 1 \n"); \
+fprintf(stdout, "Alternative as wrapper: likwid-powermeter -c 1 ./a.out\n"); \
+fflush(stdout);
#define VERSION_MSG \
-printf("likwid-powermeter %d.%d \n\n",VERSION,RELEASE)
+fprintf(stdout, "likwid-powermeter %d.%d \n\n",VERSION,RELEASE); \
+fflush(stdout);
int main (int argc, char** argv)
@@ -77,25 +80,28 @@ int main (int argc, char** argv)
int optClock = 0;
int optStethoscope = 0;
int optSockets = 0;
+ int optTemp = 0;
double runtime;
int hasDRAM = 0;
- int c;
+ int hasPP0 = 0;
+ int hasPP1 = 0;
+ int c, i;
bstring argString;
bstring eventString = bfromcstr("CLOCK");
int numSockets=1;
int numThreads=0;
int threadsSockets[MAX_NUM_NODES*2];
int threads[MAX_NUM_THREADS];
-
+ const AffinityDomain* socketDomains[MAX_NUM_NODES*2];
threadsSockets[0] = 0;
-
+
if (argc == 1)
{
- HELP_MSG;
- exit (EXIT_SUCCESS);
+ HELP_MSG;
+ exit (EXIT_SUCCESS);
}
- while ((c = getopt (argc, argv, "+c:hiM:ps:v")) != -1)
+ while ((c = getopt (argc, argv, "+c:hiM:ps:vt")) != -1)
{
switch (c)
{
@@ -128,11 +134,14 @@ int main (int argc, char** argv)
case 'v':
VERSION_MSG;
exit (EXIT_SUCCESS);
+ case 't':
+ optTemp = 1;
+ break;
case '?':
- if (optopt == 's' || optopt == 'M' || optopt == 'c')
- {
- HELP_MSG;
- }
+ if (optopt == 's' || optopt == 'M' || optopt == 'c')
+ {
+ HELP_MSG;
+ }
else if (isprint (optopt))
{
fprintf (stderr, "Unknown option `-%c'.\n", optopt);
@@ -155,16 +164,20 @@ int main (int argc, char** argv)
fprintf(stderr,"Access to performance counters is locked.\n");
exit(EXIT_FAILURE);
}
-
if (optClock && optind == argc)
{
- fprintf(stderr,"Commandline option -p requires an executable.\n");
- exit(EXIT_FAILURE);
+ fprintf(stderr,"Commandline option -p requires an executable.\n");
+ exit(EXIT_FAILURE);
}
if (optSockets && !optStethoscope && optind == argc)
{
- fprintf(stderr,"Commandline option -c requires an executable if not used in combination with -s.\n");
- exit(EXIT_FAILURE);
+ fprintf(stderr,"Commandline option -c requires an executable if not used in combination with -s.\n");
+ exit(EXIT_FAILURE);
+ }
+ if (optStethoscope == 0 && optind == argc && !optInfo)
+ {
+ fprintf(stderr,"Either -s <seconds> or executable must be given on commandline.\n");
+ exit(EXIT_FAILURE);
}
if (cpuid_init() == EXIT_FAILURE)
@@ -172,15 +185,27 @@ int main (int argc, char** argv)
fprintf(stderr, "CPU not supported\n");
exit(EXIT_FAILURE);
}
-
if (numSockets > cpuid_topology.numSockets)
{
- fprintf(stderr, "System has only %d sockets but %d are given on commandline\n",
- cpuid_topology.numSockets, numSockets);
- exit(EXIT_FAILURE);
+ fprintf(stderr, "System has only %d sockets but %d are given on commandline.\n",
+ cpuid_topology.numSockets, numSockets);
+ exit(EXIT_FAILURE);
+ }
+
+ numa_init();
+ affinity_init();
+
+ for (c = 0; c < numSockets; c++)
+ {
+ if (threadsSockets[c] >= cpuid_topology.numSockets)
+ {
+ fprintf(stderr, "System has no socket %d\n", threadsSockets[c]);
+ exit(EXIT_FAILURE);
+ }
+ bstring socketStr = bformat("S%d",threadsSockets[c]);
+ socketDomains[threadsSockets[c]] = affinity_getDomain(socketStr);
}
- numa_init(); /* consider NUMA node as power unit for the moment */
accessClient_init(&socket_fd);
msr_init(socket_fd);
timer_init();
@@ -191,49 +216,93 @@ int main (int argc, char** argv)
(cpuid_info.model == IVYBRIDGE) ||
(cpuid_info.model == IVYBRIDGE_EP) ||
(cpuid_info.model == HASWELL) ||
+ (cpuid_info.model == HASWELL_EX) ||
(cpuid_info.model == NEHALEM_BLOOMFIELD) ||
(cpuid_info.model == NEHALEM_LYNNFIELD) ||
- (cpuid_info.model == NEHALEM_WESTMERE))
+ (cpuid_info.model == NEHALEM_WESTMERE) ||
+ (cpuid_info.model == ATOM_SILVERMONT_C) ||
+ (cpuid_info.model == ATOM_SILVERMONT_E) ||
+ (cpuid_info.model == ATOM_SILVERMONT_F1) ||
+ (cpuid_info.model == ATOM_SILVERMONT_F2) ||
+ (cpuid_info.model == ATOM_SILVERMONT_F3))
{
- power_init(numa_info.nodes[0].processors[0]);
+ if (numSockets == 0)
+ {
+ numSockets = numa_info.numberOfNodes;
+ }
+ for(int i=0; i<numSockets; i++)
+ {
+ power_init(socketDomains[threadsSockets[i]]->processorList[0]);
+ }
}
else
{
- fprintf (stderr, "Query Turbo Mode only supported on Intel Nehalem/Westmere/SandyBridge/IvyBridge/Haswell processors!\n");
+ fprintf (stderr, "Query Turbo Mode only supported on Intel Nehalem/Westmere/SandyBridge/IvyBridge/Haswell/Silvermont processors!\n");
exit(EXIT_FAILURE);
}
double clock = (double) timer_getCpuClock();
- printf(HLINE);
- printf("CPU name:\t%s \n",cpuid_info.name);
- printf("CPU clock:\t%3.2f GHz \n", (float) clock * 1.E-09);
- printf(HLINE);
+ fprintf(stdout, HLINE);
+ fprintf(stdout, "CPU name:\t%s \n",cpuid_info.name);
+ fprintf(stdout, "CPU clock:\t%3.2f GHz \n", (float) clock * 1.E-09);
+ fprintf(stdout, HLINE);
+ fflush(stdout);
if (optInfo)
{
if (power_info.turbo.numSteps != 0)
{
- printf("Base clock:\t%.2f MHz \n", power_info.baseFrequency );
- printf("Minimal clock:\t%.2f MHz \n", power_info.minFrequency );
- printf("Turbo Boost Steps:\n");
+ fprintf(stdout, "Base clock:\t%.2f MHz \n", power_info.baseFrequency );
+ fprintf(stdout, "Minimal clock:\t%.2f MHz \n", power_info.minFrequency );
+ fprintf(stdout, "Turbo Boost Steps:\n");
for (int i=0; i < power_info.turbo.numSteps; i++ )
{
- printf("C%d %.2f MHz \n",i+1, power_info.turbo.steps[i] );
+ fprintf(stdout, "C%d %.2f MHz \n",i+1, power_info.turbo.steps[i] );
}
}
- printf(HLINE);
+ fprintf(stdout, HLINE);
+ fflush(stdout);
}
- if (cpuid_info.model == SANDYBRIDGE_EP)
+ if ((cpuid_info.model == SANDYBRIDGE_EP) ||
+ (cpuid_info.model == IVYBRIDGE_EP) ||
+ (cpuid_info.model == HASWELL_EX) ||
+ (cpuid_info.model == HASWELL))
{
hasDRAM = 1;
}
- else if ((cpuid_info.model != SANDYBRIDGE) &&
- (cpuid_info.model != SANDYBRIDGE_EP) &&
- (cpuid_info.model != IVYBRIDGE) &&
- (cpuid_info.model != IVYBRIDGE_EP) &&
- (cpuid_info.model != HASWELL))
+ if ((cpuid_info.model == SANDYBRIDGE_EP) ||
+ (cpuid_info.model == SANDYBRIDGE) ||
+ (cpuid_info.model == IVYBRIDGE_EP) ||
+ (cpuid_info.model == IVYBRIDGE) ||
+ (cpuid_info.model == HASWELL) ||
+ (cpuid_info.model == ATOM_SILVERMONT_E) ||
+ (cpuid_info.model == ATOM_SILVERMONT_F1) ||
+ (cpuid_info.model == ATOM_SILVERMONT_F2) ||
+ (cpuid_info.model == ATOM_SILVERMONT_F3))
+ {
+ hasPP0 = 1;
+ }
+ if ((cpuid_info.model == HASWELL) ||
+ (cpuid_info.model == SANDYBRIDGE) ||
+ (cpuid_info.model == IVYBRIDGE))
+ {
+ hasPP1 = 1;
+ }
+ if ((cpuid_info.model != SANDYBRIDGE) &&
+ (cpuid_info.model != SANDYBRIDGE_EP) &&
+ (cpuid_info.model != IVYBRIDGE) &&
+ (cpuid_info.model != IVYBRIDGE_EP) &&
+ (cpuid_info.model != HASWELL) &&
+ (cpuid_info.model != HASWELL_M1) &&
+ (cpuid_info.model != HASWELL_M2) &&
+ (cpuid_info.model != HASWELL_EX) &&
+ (cpuid_info.model != ATOM_SILVERMONT_C) &&
+ (cpuid_info.model != ATOM_SILVERMONT_E) &&
+ (cpuid_info.model != ATOM_SILVERMONT_F1) &&
+ (cpuid_info.model != ATOM_SILVERMONT_F2) &&
+ (cpuid_info.model != ATOM_SILVERMONT_F3))
{
fprintf (stderr, "RAPL not supported on this processor!\n");
exit(EXIT_FAILURE);
@@ -241,21 +310,24 @@ int main (int argc, char** argv)
if (optInfo)
{
- printf("Thermal Spec Power: %g Watts \n", power_info.tdp );
- printf("Minimum Power: %g Watts \n", power_info.minPower);
- printf("Maximum Power: %g Watts \n", power_info.maxPower);
- printf("Maximum Time Window: %g micro sec \n", power_info.maxTimeWindow);
- printf(HLINE);
+ fprintf(stdout, "Thermal Spec Power: %g Watts \n", power_info.tdp );
+ fprintf(stdout, "Minimum Power: %g Watts \n", power_info.minPower);
+ fprintf(stdout, "Maximum Power: %g Watts \n", power_info.maxPower);
+ fprintf(stdout, "Maximum Time Window: %g micro sec \n", power_info.maxTimeWindow);
+ fprintf(stdout, HLINE);
+ fflush(stdout);
exit(EXIT_SUCCESS);
}
if (optClock)
{
affinity_init();
- argString = bformat("S%u:0-%u", threadsSockets[0], cpuid_topology.numCoresPerSocket-1);
+ argString = bformat("S%u:0-%u", threadsSockets[0],
+ socketDomains[threadsSockets[0]]->numberOfProcessors-1);
for (int i=1; i<numSockets; i++)
{
- bstring tExpr = bformat("@S%u:0-%u", threadsSockets[i], cpuid_topology.numCoresPerSocket-1);
+ bstring tExpr = bformat("@S%u:0-%u", threadsSockets[i],
+ socketDomains[threadsSockets[i]]->numberOfProcessors-1);
bconcat(argString, tExpr);
}
numThreads = bstr_to_cpuset(threads, argString);
@@ -267,12 +339,15 @@ int main (int argc, char** argv)
{
PowerData pDataPkg[MAX_NUM_NODES*2];
PowerData pDataDram[MAX_NUM_NODES*2];
- printf("Measure on sockets: %d", threadsSockets[0]);
+ PowerData pDataPP0[MAX_NUM_NODES*2];
+ PowerData pDataPP1[MAX_NUM_NODES*2];
+ fprintf(stdout, "Measure on sockets: %d", threadsSockets[0]);
for (int i=1; i<numSockets; i++)
{
- printf(", %d", threadsSockets[i]);
+ fprintf(stdout, ", %d", threadsSockets[i]);
}
- printf("\n");
+ fprintf(stdout, "\n");
+ fflush(stdout);
if (optStethoscope)
{
@@ -284,9 +359,11 @@ int main (int argc, char** argv)
{
for (int i=0; i<numSockets; i++)
{
- int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
- if (hasDRAM) power_start(pDataDram+i, cpuId, DRAM);
- power_start(pDataPkg+i, cpuId, PKG);
+ int cpuId = socketDomains[threadsSockets[i]]->processorList[0];
+ if (hasDRAM) power_start(&(pDataDram[i]), cpuId, DRAM);
+ if (hasPP0) power_start(&(pDataPP0[i]), cpuId, PP0);
+ if (hasPP1) power_start(&(pDataPP1[i]), cpuId, PP1);
+ power_start(&(pDataPkg[i]), cpuId, PKG);
}
}
sleep(optStethoscope);
@@ -301,9 +378,11 @@ int main (int argc, char** argv)
{
for (int i=0; i<numSockets; i++)
{
- int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
- power_stop(pDataPkg+i, cpuId, PKG);
- if (hasDRAM) power_stop(pDataDram+i, cpuId, DRAM);
+ int cpuId = socketDomains[threadsSockets[i]]->processorList[0];
+ power_stop(&(pDataPkg[i]), cpuId, PKG);
+ if (hasPP1) power_stop(&(pDataPP1[i]), cpuId, PP1);
+ if (hasPP0) power_stop(&(pDataPP0[i]), cpuId, PP0);
+ if (hasDRAM) power_stop(&(pDataDram[i]), cpuId, DRAM);
}
}
runtime = (double) optStethoscope;
@@ -319,7 +398,8 @@ int main (int argc, char** argv)
bconchar(exeString, ' ');
bcatcstr(exeString, argv[i]);
}
- printf("%s\n",bdata(exeString));
+ fprintf(stdout, "Executing: %s\n",bdata(exeString));
+ fflush(stdout);
if (optClock)
@@ -330,9 +410,11 @@ int main (int argc, char** argv)
{
for (int i=0; i<numSockets; i++)
{
- int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
- if (hasDRAM) power_start(pDataDram+i, cpuId, DRAM);
- power_start(pDataPkg+i, cpuId, PKG);
+ int cpuId = socketDomains[threadsSockets[i]]->processorList[0];
+ if (hasDRAM) power_start(&(pDataDram[i]), cpuId, DRAM);
+ if (hasPP0) power_start(&(pDataPP0[i]), cpuId, PP0);
+ if (hasPP1) power_start(&(pDataPP1[i]), cpuId, PP1);
+ power_start(&(pDataPkg[i]), cpuId, PKG);
}
timer_start(&time);
@@ -356,9 +438,11 @@ int main (int argc, char** argv)
for (int i=0; i<numSockets; i++)
{
- int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
- power_stop(pDataPkg+i, cpuId, PKG);
- if (hasDRAM) power_stop(pDataDram+i, cpuId, DRAM);
+ int cpuId = socketDomains[threadsSockets[i]]->processorList[0];
+ power_stop(&(pDataPkg[i]), cpuId, PKG);
+ if (hasDRAM) power_stop(&(pDataDram[i]), cpuId, DRAM);
+ if (hasPP0) power_stop(&(pDataPP0[i]), cpuId, PP0);
+ if (hasPP1) power_stop(&(pDataPP1[i]), cpuId, PP1);
}
runtime = timer_print(&time);
}
@@ -366,39 +450,56 @@ int main (int argc, char** argv)
if (!optClock)
{
- printf("Runtime: %g second \n",runtime);
- printf(HLINE);
+ fprintf(stdout, "Runtime: %g second \n",runtime);
+ fprintf(stdout, HLINE);
for (int i=0; i<numSockets; i++)
{
- printf("Socket %d\n",threadsSockets[i]);
- printf("Domain: PKG \n");
- printf("Energy consumed: %g Joules \n", power_printEnergy(pDataPkg+i));
- printf("Power consumed: %g Watts \n", power_printEnergy(pDataPkg+i) / runtime );
+ fprintf(stdout, "Socket %d (Measured on CPU %d)\n",threadsSockets[i],
+ socketDomains[threadsSockets[i]]->processorList[0]);
+ fprintf(stdout, "Domain: PKG \n");
+ fprintf(stdout, "Energy consumed: %g Joules \n", power_printEnergy(&(pDataPkg[i])));
+ fprintf(stdout, "Power consumed: %g Watts \n", power_printEnergy(&(pDataPkg[i])) / runtime );
if (hasDRAM)
{
- printf("Domain: DRAM \n");
- printf("Energy consumed: %g Joules \n", power_printEnergy(pDataDram+i));
- printf("Power consumed: %g Watts \n", power_printEnergy(pDataDram+i) / runtime );
+ fprintf(stdout, "Domain: DRAM \n");
+ fprintf(stdout, "Energy consumed: %g Joules \n", power_printEnergy(&(pDataDram[i])));
+ fprintf(stdout, "Power consumed: %g Watts \n", power_printEnergy(&(pDataDram[i])) / runtime );
}
- printf("\n");
+ if (hasPP0)
+ {
+ fprintf(stdout, "Domain: PP0 \n");
+ fprintf(stdout, "Energy consumed: %g Joules \n", power_printEnergy(&(pDataPP0[i])));
+ fprintf(stdout, "Power consumed: %g Watts \n", power_printEnergy(&(pDataPP0[i])) / runtime );
+ }
+ if (hasPP1)
+ {
+ fprintf(stdout, "Domain: PP1 \n");
+ fprintf(stdout, "Energy consumed: %g Joules \n", power_printEnergy(&(pDataPP1[i])));
+ fprintf(stdout, "Power consumed: %g Watts \n", power_printEnergy(&(pDataPP1[i])) / runtime );
+ }
+ fprintf(stdout, "\n");
}
+ fflush(stdout);
}
}
-#if 0
- if ( cpuid_hasFeature(TM2) )
+
+ if ( optTemp && cpuid_hasFeature(TM2))
{
- thermal_init(0);
printf("Current core temperatures:\n");
-
- for (uint32_t i = 0; i < cpuid_topology.numCoresPerSocket; i++ )
+ for (i = 0; i < numSockets; i++)
{
- printf("Core %d: %u C\n",
- numa_info.nodes[socketId].processors[i],
- thermal_read(numa_info.nodes[socketId].processors[i]));
+ printf("Socket %d\n",threadsSockets[i]);
+ for (c = 0; c < socketDomains[threadsSockets[i]]->numberOfProcessors; c++ )
+ {
+ thermal_init(i);
+ printf("Core %d: %u C\n",
+ socketDomains[threadsSockets[i]]->processorList[c],
+ thermal_read(socketDomains[threadsSockets[i]]->processorList[c]));
+ }
}
}
-#endif
+
msr_finalize();
return EXIT_SUCCESS;
diff --git a/src/applications/likwid-topology.c b/src/applications/likwid-topology.c
index d381ef8..7ba0e33 100644
--- a/src/applications/likwid-topology.c
+++ b/src/applications/likwid-topology.c
@@ -6,8 +6,8 @@
* Description: A application to determine the thread and cache topology
* on x86 processors.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -51,24 +51,26 @@
/* ##### MACROS - LOCAL TO THIS SOURCE FILE ######################### */
#define HELP_MSG \
-printf("\nlikwid-topology -- Version %d.%d \n\n",VERSION,RELEASE); \
-printf("A tool to print the thread and cache topology on x86 CPUs.\n"); \
-printf("Options:\n"); \
-printf("-h\t Help message\n"); \
-printf("-v\t Version information\n"); \
-printf("-c\t list cache information\n"); \
-printf("-C\t measure processor clock\n"); \
-printf("-o\t Store output to file, with output conversation according to file suffix\n"); \
-printf("\t Conversation scripts can be supplied in %s\n",TOSTRING(LIKWIDFILTERPATH)); \
-printf("-g\t graphical output\n\n")
+ fprintf(OUTSTREAM, "\nlikwid-topology -- Version %d.%d \n\n",VERSION,RELEASE); \
+ fprintf(OUTSTREAM, "A tool to print the thread and cache topology on x86 CPUs.\n"); \
+ fprintf(OUTSTREAM, "Options:\n"); \
+ fprintf(OUTSTREAM, "-h\t Help message\n"); \
+ fprintf(OUTSTREAM, "-v\t Version information\n"); \
+ fprintf(OUTSTREAM, "-c\t list cache information\n"); \
+ fprintf(OUTSTREAM, "-C\t measure processor clock\n"); \
+ fprintf(OUTSTREAM, "-o\t Store output to file, with output conversion according to file suffix\n"); \
+ fprintf(OUTSTREAM, "\t Conversion scripts can be supplied in %s\n",TOSTRING(LIKWIDFILTERPATH)); \
+ fprintf(OUTSTREAM, "-g\t graphical output\n\n"); \
+ fflush(OUTSTREAM);
#define VERSION_MSG \
-printf("likwid-topology %d.%d \n\n",VERSION,RELEASE)
+ fprintf(OUTSTREAM, "likwid-topology %d.%d \n\n",VERSION,RELEASE); \
+ fflush(OUTSTREAM);
/* ##### FUNCTION DEFINITIONS - EXPORTED FUNCTIONS ################## */
int main (int argc, char** argv)
-{
+{
int optGraphical = 0;
int optCaches = 0;
int optClock = 0;
@@ -140,12 +142,12 @@ int main (int argc, char** argv)
numa_init();
fprintf(OUTSTREAM, HLINE);
- fprintf(OUTSTREAM, "CPU type:\t%s \n",cpuid_info.name);
+ fprintf(OUTSTREAM, "CPU type:\t%s\n",cpuid_info.name);
if (optClock)
{
timer_init();
- fprintf(OUTSTREAM, "CPU clock:\t%3.2f GHz \n", (float) timer_getCpuClock() * 1.E-09);
+ fprintf(OUTSTREAM, "CPU clock:\t%3.2f GHz\n", (float) timer_getCpuClock() * 1.E-09);
}
/*----------------------------------------------------------------------
@@ -190,6 +192,7 @@ int main (int argc, char** argv)
fprintf(OUTSTREAM, ")\n");
}
fprintf(OUTSTREAM, HLINE"\n");
+ fflush(OUTSTREAM);
/*----------------------------------------------------------------------
* Cache Topology
@@ -208,7 +211,7 @@ int main (int argc, char** argv)
fprintf(OUTSTREAM, "Size:\t%d kB\n",
cpuid_topology.cacheLevels[i].size/1024);
}
- else
+ else
{
fprintf(OUTSTREAM, "Size:\t%d MB\n",
cpuid_topology.cacheLevels[i].size/1048576);
@@ -236,7 +239,7 @@ int main (int argc, char** argv)
cpuid_topology.cacheLevels[i].associativity);
fprintf(OUTSTREAM, "Number of sets:\t%d\n",
cpuid_topology.cacheLevels[i].sets);
- fprintf(OUTSTREAM, "Cache line size:%d\n",
+ fprintf(OUTSTREAM, "Cache line size:\t%d\n",
cpuid_topology.cacheLevels[i].lineSize);
if(cpuid_topology.cacheLevels[i].inclusive)
{
@@ -289,6 +292,7 @@ int main (int argc, char** argv)
}
fprintf(OUTSTREAM, "\n");
+ fflush(OUTSTREAM);
/*----------------------------------------------------------------------
* NUMA Topology
@@ -331,6 +335,7 @@ int main (int argc, char** argv)
}
}
fprintf(OUTSTREAM, "\n");
+ fflush(OUTSTREAM);
/*----------------------------------------------------------------------
* Graphical topology
@@ -345,7 +350,7 @@ int main (int argc, char** argv)
fprintf(OUTSTREAM, SLINE);
/* Allocate without instruction cache */
- if ( cpuid_info.family == P6_FAMILY || cpuid_info.family == MIC_FAMILY )
+ if ( cpuid_info.family == P6_FAMILY || cpuid_info.family == MIC_FAMILY )
{
container = asciiBoxes_allocateContainer(
cpuid_topology.numCacheLevels,
@@ -384,7 +389,7 @@ int main (int argc, char** argv)
tmp++;
threadNode = tree_getNextNode(threadNode);
}
- asciiBoxes_addBox(container, 0, j, boxLabel);
+ asciiBoxes_addBox(container, 0, j, boxLabel);
j++;
coreNode = tree_getNextNode(coreNode);
}
@@ -422,7 +427,7 @@ int main (int argc, char** argv)
boxLabel = bformat("%dkB",
cpuid_topology.cacheLevels[i].size/1024);
}
- else
+ else
{
boxLabel = bformat("%dMB",
cpuid_topology.cacheLevels[i].size/1048576);
@@ -443,17 +448,17 @@ int main (int argc, char** argv)
lineCursor,
columnCursor,
columnCursor+cacheWidth,
- boxLabel);
+ boxLabel);
columnCursor += sharedCores;
}
- else
+ else
{
asciiBoxes_addBox(
container,
lineCursor,
columnCursor,
- boxLabel);
+ boxLabel);
columnCursor++;
}
@@ -464,7 +469,7 @@ int main (int argc, char** argv)
}
}
- asciiBoxes_print(container);
+ asciiBoxes_print(OUTSTREAM, container);
socketNode = tree_getNextNode(socketNode);
}
bdestroy(boxLabel);
@@ -475,22 +480,23 @@ int main (int argc, char** argv)
/* call filterscript if specified */
if (!biseqcstr(filterScript,"NO"))
{
- struct bstrList* tokens;
- tokens = bsplit(filterScript,' ');
- if (access(bdata(tokens->entry[0]), F_OK))
- {
- fprintf(stderr, "Cannot find filter %s!\n", bdata(tokens->entry[0]));
- bstrListDestroy(tokens);
- exit(EXIT_FAILURE);
- }
- if (access(bdata(tokens->entry[0]), X_OK))
- {
- fprintf(stderr, "Cannot execute filter %s!\n", bdata(tokens->entry[0]));
- bstrListDestroy(tokens);
- exit(EXIT_FAILURE);
- }
- bstrListDestroy(tokens);
+ struct bstrList* tokens;
+ tokens = bsplit(filterScript,' ');
+ if (access(bdata(tokens->entry[0]), F_OK))
+ {
+ fprintf(stderr, "Cannot find filter %s!\n", bdata(tokens->entry[0]));
+ bstrListDestroy(tokens);
+ exit(EXIT_FAILURE);
+ }
+ if (access(bdata(tokens->entry[0]), X_OK))
+ {
+ fprintf(stderr, "Cannot execute filter %s!\n", bdata(tokens->entry[0]));
+ bstrListDestroy(tokens);
+ exit(EXIT_FAILURE);
+ }
+ bstrListDestroy(tokens);
bcatcstr(filterScript, " topology");
+
if (system(bdata(filterScript)) == EOF)
{
fprintf(stderr, "Failed to execute filter %s!\n", bdata(filterScript));
diff --git a/src/asciiBoxes.c b/src/asciiBoxes.c
index c6560e3..a22dab5 100644
--- a/src/asciiBoxes.c
+++ b/src/asciiBoxes.c
@@ -5,8 +5,8 @@
*
* Description: Module implementing output of nested ascii art boxes
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -108,7 +108,7 @@ asciiBoxes_addJoinedBox(
}
void
-asciiBoxes_print(BoxContainer* container)
+asciiBoxes_print(FILE* OUTSTREAM, BoxContainer* container)
{
int width;
int boxwidth=0; /* box width is inner width of box */
@@ -251,5 +251,6 @@ asciiBoxes_print(BoxContainer* container)
printf("-");
}
printf("+\n");
+ fflush(stdout);
}
diff --git a/src/asciiTable.c b/src/asciiTable.c
index 3e4b508..29b615a 100644
--- a/src/asciiTable.c
+++ b/src/asciiTable.c
@@ -5,8 +5,8 @@
*
* Description: Module implementing output of ascii table.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/barrier.c b/src/barrier.c
index c5faad4..3a93f92 100644
--- a/src/barrier.c
+++ b/src/barrier.c
@@ -5,8 +5,8 @@
*
* Description: Implementation of threaded spin loop barrier
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -122,7 +122,7 @@ barrier_registerThread(BarrierData* barr, int groupId, int threadId)
void
barrier_init(int numberOfGroups)
-{
+{
maxGroupId = numberOfGroups-1;
groups = (BarrierGroup*) malloc(numberOfGroups * sizeof(BarrierGroup));
}
@@ -151,5 +151,5 @@ barrier_synchronize(BarrierData* barr)
void barrier_destroy(void)
{
- free(groups);
+ free(groups);
}
diff --git a/src/bench.c b/src/bench.c
index 4460552..3a0b81b 100644
--- a/src/bench.c
+++ b/src/bench.c
@@ -5,8 +5,8 @@
*
* Description: Benchmarking framework for likwid-bench
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -43,26 +43,38 @@
#include <affinity.h>
#include <barrier.h>
#include <likwid.h>
+#ifdef PAPI
+#include <papi.h>
+#endif
/* ##### EXPORTED VARIABLES ########################################### */
/* ##### MACROS - LOCAL TO THIS SOURCE FILE ######################### */
-//#define BARRIER pthread_barrier_wait(&threads_barrier)
+//#define BARRIER pthread_barrier_wait(&threads_barrier)
#define BARRIER barrier_synchronize(&barr)
#ifdef PERFMON
#define START_PERFMON likwid_markerStartRegion("bench");
#define STOP_PERFMON likwid_markerStopRegion("bench");
#define LIKWID_THREAD_INIT likwid_markerThreadInit();
+#define EXECUTE EXECUTE_LIKWID
+#else
+#ifdef PAPI
+#define START_PERFMON(event_set) PAPI_start(event_set);
+#define STOP_PERFMON(event_set, result) PAPI_stop ( event_set ,result );
+#define LIKWID_THREAD_INIT
+#define EXECUTE EXECUTE_PAPI
#else
#define START_PERFMON
#define STOP_PERFMON
#define LIKWID_THREAD_INIT
+#define EXECUTE EXECUTE_LIKWID
+#endif
#endif
-#define EXECUTE(func) \
+#define EXECUTE_LIKWID(func) \
BARRIER; \
if (data->threadId == 0) \
{ \
@@ -80,9 +92,27 @@
timer_stop(&time); \
data->cycles = timer_printCycles(&time); \
} \
- BARRIER
-
+ BARRIER
+#define EXECUTE_PAPI(func) \
+ BARRIER; \
+ if (data->threadId == 0) \
+ { \
+ timer_start(&time); \
+ } \
+ START_PERFMON(event_set) \
+ for (i=0; i< data->data.iter; i++) \
+ { \
+ func; \
+ } \
+ BARRIER; \
+ STOP_PERFMON(event_set, &(result[0])) \
+ if (data->threadId == 0) \
+ { \
+ timer_stop(&time); \
+ data->cycles = timer_printCycles(&time); \
+ } \
+ BARRIER
/* ##### FUNCTION DEFINITIONS - EXPORTED FUNCTIONS ################## */
void* runTest(void* arg)
@@ -96,11 +126,49 @@ void* runTest(void* arg)
ThreadUserData* myData;
TimerData time;
FuncPrototype func;
+ FILE* OUTSTREAM;
+#ifdef PAPI
+ int event_set = PAPI_NULL;
+ char groupname[50];
+ char* group_ptr = &(groupname[0]);
+ long long int result[4] = {0,0,0,0};
+ group_ptr = getenv("PAPI_BENCH");
+ PAPI_create_eventset(&event_set);
+ PAPI_add_event(event_set, PAPI_TOT_CYC);
+ // L3 group
+ if (strncmp(group_ptr,"L3",2) == 0)
+ {
+ PAPI_add_event(event_set, PAPI_L3_TCA);
+ }
+ // L2 group
+ else if (strncmp(group_ptr,"L2",2) == 0)
+ {
+ PAPI_add_event(event_set, PAPI_L2_TCA);
+ }
+ // FLOPS_AVX
+ else if (strncmp(group_ptr,"FLOPS_AVX",9) == 0)
+ {
+ PAPI_add_event(event_set, PAPI_VEC_SP);
+ PAPI_add_event(event_set, PAPI_VEC_DP);
+ PAPI_add_event(event_set, PAPI_FP_INS);
+ }
+ // FLOPS_DP
+ else if (strncmp(group_ptr,"FLOPS_DP",8) == 0)
+ {
+ PAPI_add_event(event_set, PAPI_DP_OPS);
+ }
+ // FLOPS_SP
+ else if (strncmp(group_ptr,"FLOPS_SP",8) == 0)
+ {
+ PAPI_add_event(event_set, PAPI_SP_OPS);
+ }
+#endif
data = (ThreadData*) arg;
myData = &(data->data);
func = myData->test->kernel;
threadId = data->threadId;
+ OUTSTREAM = data->output;
barrier_registerThread(&barr, 0, data->globalThreadId);
/* Prepare ptrs for thread */
@@ -111,6 +179,7 @@ void* runTest(void* arg)
switch ( myData->test->type )
{
+ case SINGLE_RAND:
case SINGLE:
{
float* sptr;
@@ -123,6 +192,7 @@ void* runTest(void* arg)
}
}
break;
+ case DOUBLE_RAND:
case DOUBLE:
{
double* dptr;
@@ -143,21 +213,24 @@ void* runTest(void* arg)
sleep(1);
LIKWID_THREAD_INIT;
BARRIER;
- printf("Group: %d Thread %d Global Thread %d running on core %d - Vector length %llu Offset %d\n",
- data->groupId,
- threadId,
- data->globalThreadId,
- affinity_threadGetProcessorId(),
- LLU_CAST size,
- offset);
+ if (OUTSTREAM)
+ {
+ fprintf(OUTSTREAM, "Group: %d Thread %d Global Thread %d running on core %d - Vector length %llu Offset %d\n",
+ data->groupId,
+ threadId,
+ data->globalThreadId,
+ affinity_threadGetProcessorId(),
+ LLU_CAST size,
+ offset);
+ }
BARRIER;
/* Up to 10 streams the following registers are used for Array ptr:
* Size rdi
- * in Registers: rsi rdx rcx r8 r9
+ * in Registers: rsi rdx rcx r8 r9
* passed on stack, then: r10 r11 r12 r13 r14 r15
* If more than 10 streams are used first 5 streams are in register, above 5 a macro must be used to
- * load them from stack
+ * load them from stack
* */
switch ( myData->test->streams ) {
@@ -440,7 +513,24 @@ void* runTest(void* arg)
default:
break;
}
-
+#ifdef PAPI
+ double papi_result = 0.0;
+ // L2 & L3 group
+ if (strncmp(group_ptr,"L3",2) == 0 ||
+ strncmp(group_ptr,"L2",2) == 0)
+ {
+ papi_result = ((double)result[1]) * 64.0;
+ }
+ // FLOPS_AVX
+ else if (strncmp(group_ptr,"FLOPS",5) == 0)
+ {
+ papi_result = (double) result[1]+ (double) result[2];
+ }
+ if (OUTSTREAM)
+ {
+ fprintf(OUTSTREAM, "Thread %d Result %f\n",threadId, papi_result);
+ }
+#endif
pthread_exit(NULL);
}
diff --git a/src/bitUtil.c b/src/bitUtil.c
index 3df4d62..cdce490 100644
--- a/src/bitUtil.c
+++ b/src/bitUtil.c
@@ -5,8 +5,8 @@
*
* Description: Utility routines manipulating bit arrays.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/cpuFeatures.c b/src/cpuFeatures.c
index 96b54d2..4733a82 100644
--- a/src/cpuFeatures.c
+++ b/src/cpuFeatures.c
@@ -9,8 +9,8 @@
* Allows to turn on and off the Hardware prefetcher
* available.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -60,11 +60,11 @@ CpuFeatureFlags cpuFeatureFlags;
#define TEST_FLAG(feature,flag) \
if (flags & (1ULL<<(flag))) \
{ \
- cpuFeatureFlags.feature = 1; \
+ cpuFeatureFlags.feature = 1; \
} \
else \
{ \
- cpuFeatureFlags.feature = 0; \
+ cpuFeatureFlags.feature = 0; \
}
@@ -75,15 +75,15 @@ cpuFeatures_init(int cpu)
{
uint64_t flags = msr_read(cpu, MSR_IA32_MISC_ENABLE);
- TEST_FLAG(fastStrings,0);
- TEST_FLAG(thermalControl,3);
- TEST_FLAG(perfMonitoring,7);
- TEST_FLAG(branchTraceStorage,11);
- TEST_FLAG(pebs,12);
- TEST_FLAG(speedstep,16);
- TEST_FLAG(monitor,18);
- TEST_FLAG(cpuidMaxVal,22);
- TEST_FLAG(xdBit,34);
+ TEST_FLAG(fastStrings,0);
+ TEST_FLAG(thermalControl,3);
+ TEST_FLAG(perfMonitoring,7);
+ TEST_FLAG(branchTraceStorage,11);
+ TEST_FLAG(pebs,12);
+ TEST_FLAG(speedstep,16);
+ TEST_FLAG(monitor,18);
+ TEST_FLAG(cpuidMaxVal,22);
+ TEST_FLAG(xdBit,34);
if ((cpuid_info.model == NEHALEM) ||
(cpuid_info.model == NEHALEM_BLOOMFIELD) ||
@@ -154,7 +154,7 @@ cpuFeatures_print(int cpu)
}
printf("Branch Trace Storage: \t\t");
- if (flags & (1ULL<<11))
+ if (flags & (1ULL<<11))
{
PRINT_VALUE(RED,notsupported);
}
@@ -164,7 +164,7 @@ cpuFeatures_print(int cpu)
}
printf("PEBS: \t\t\t\t");
- if (flags & (1ULL<<12))
+ if (flags & (1ULL<<12))
{
PRINT_VALUE(RED,notsupported);
}
@@ -174,7 +174,7 @@ cpuFeatures_print(int cpu)
}
printf("Intel Enhanced SpeedStep: \t");
- if (flags & (1ULL<<16))
+ if (flags & (1ULL<<16))
{
PRINT_VALUE(GREEN,enabled);
}
@@ -184,7 +184,7 @@ cpuFeatures_print(int cpu)
}
printf("MONITOR/MWAIT: \t\t\t");
- if (flags & (1ULL<<18))
+ if (flags & (1ULL<<18))
{
PRINT_VALUE(GREEN,supported);
}
@@ -194,7 +194,7 @@ cpuFeatures_print(int cpu)
}
printf("Limit CPUID Maxval: \t\t");
- if (flags & (1ULL<<22))
+ if (flags & (1ULL<<22))
{
PRINT_VALUE(RED,enabled);
}
@@ -204,7 +204,7 @@ cpuFeatures_print(int cpu)
}
printf("XD Bit Disable: \t\t");
- if (flags & (1ULL<<34))
+ if (flags & (1ULL<<34))
{
PRINT_VALUE(RED,disabled);
}
@@ -212,45 +212,53 @@ cpuFeatures_print(int cpu)
{
PRINT_VALUE(GREEN,enabled);
}
-
- printf("IP Prefetcher: \t\t\t");
- if (flags & (1ULL<<39))
- {
- PRINT_VALUE(RED,disabled);
- }
- else
- {
- PRINT_VALUE(GREEN,enabled);
- }
-
- printf("Hardware Prefetcher: \t\t");
- if (flags & (1ULL<<9))
- {
- PRINT_VALUE(RED,disabled);
- }
- else
+ if ((cpuid_info.model == NEHALEM) ||
+ (cpuid_info.model == NEHALEM_BLOOMFIELD) ||
+ (cpuid_info.model == NEHALEM_LYNNFIELD) ||
+ (cpuid_info.model == NEHALEM_WESTMERE) ||
+ (cpuid_info.model == NEHALEM_WESTMERE_M) ||
+ (cpuid_info.model == NEHALEM_EX) ||
+ (cpuid_info.model == CORE2_45) ||
+ (cpuid_info.model == CORE2_65))
{
- PRINT_VALUE(GREEN,enabled);
- }
+ printf("IP Prefetcher: \t\t\t");
+ if (flags & (1ULL<<39))
+ {
+ PRINT_VALUE(RED,disabled);
+ }
+ else
+ {
+ PRINT_VALUE(GREEN,enabled);
+ }
- printf("Adjacent Cache Line Prefetch: \t");
- if (flags & (1ULL<<19))
- {
- PRINT_VALUE(RED,disabled);
- }
- else
- {
- PRINT_VALUE(GREEN,enabled);
- }
+ printf("Hardware Prefetcher: \t\t");
+ if (flags & (1ULL<<9))
+ {
+ PRINT_VALUE(RED,disabled);
+ }
+ else
+ {
+ PRINT_VALUE(GREEN,enabled);
+ }
+ printf("Adjacent Cache Line Prefetch: \t");
+ if (flags & (1ULL<<19))
+ {
+ PRINT_VALUE(RED,disabled);
+ }
+ else
+ {
+ PRINT_VALUE(GREEN,enabled);
+ }
- printf("DCU Prefetcher: \t\t");
- if (flags & (1ULL<<37))
- {
- PRINT_VALUE(RED,disabled);
- }
- else
- {
- PRINT_VALUE(GREEN,enabled);
+ printf("DCU Prefetcher: \t\t");
+ if (flags & (1ULL<<37))
+ {
+ PRINT_VALUE(RED,disabled);
+ }
+ else
+ {
+ PRINT_VALUE(GREEN,enabled);
+ }
}
if ((cpuid_info.model == NEHALEM) ||
@@ -260,12 +268,12 @@ cpuFeatures_print(int cpu)
(cpuid_info.model == NEHALEM_WESTMERE_M) ||
(cpuid_info.model == NEHALEM_EX))
{
- printf("Intel Turbo Mode: \t");
- if (flags & (1ULL<<38))
+ printf("Intel Turbo Mode: \t\t");
+ if (flags & (1ULL<<38))
{
PRINT_VALUE(RED,disabled);
}
- else
+ else
{
PRINT_VALUE(GREEN,enabled);
}
@@ -275,11 +283,11 @@ cpuFeatures_print(int cpu)
{
printf("Intel Dynamic Acceleration: \t");
- if (flags & (1ULL<<38))
+ if (flags & (1ULL<<38))
{
PRINT_VALUE(RED,disabled);
}
- else
+ else
{
PRINT_VALUE(GREEN,enabled);
}
@@ -288,78 +296,104 @@ cpuFeatures_print(int cpu)
printf(HLINE);
}
-void
+void
cpuFeatures_enable(int cpu, CpuFeature type)
{
- uint64_t flags = msr_read(cpu, MSR_IA32_MISC_ENABLE);
-
- switch ( type )
+ if ((cpuid_info.model == NEHALEM) ||
+ (cpuid_info.model == NEHALEM_BLOOMFIELD) ||
+ (cpuid_info.model == NEHALEM_LYNNFIELD) ||
+ (cpuid_info.model == NEHALEM_WESTMERE) ||
+ (cpuid_info.model == NEHALEM_WESTMERE_M) ||
+ (cpuid_info.model == NEHALEM_EX) ||
+ (cpuid_info.model == CORE2_45) ||
+ (cpuid_info.model == CORE2_65))
{
- case HW_PREFETCHER:
- printf("HW_PREFETCHER:\t");
- flags &= ~(1ULL<<9);
- break;
-
- case CL_PREFETCHER:
- printf("CL_PREFETCHER:\t");
- flags &= ~(1ULL<<19);
- break;
-
- case DCU_PREFETCHER:
- printf("DCU_PREFETCHER:\t");
- flags &= ~(1ULL<<37);
- break;
-
- case IP_PREFETCHER:
- printf("IP_PREFETCHER:\t");
- flags &= ~(1ULL<<39);
- break;
-
- default:
- printf("ERROR: CpuFeature not supported!\n");
- break;
+ uint64_t flags = msr_read(cpu, MSR_IA32_MISC_ENABLE);
+ switch ( type )
+ {
+ case HW_PREFETCHER:
+ printf("HW_PREFETCHER:\t");
+ flags &= ~(1ULL<<9);
+ break;
+
+ case CL_PREFETCHER:
+ printf("CL_PREFETCHER:\t");
+ flags &= ~(1ULL<<19);
+ break;
+
+ case DCU_PREFETCHER:
+ printf("DCU_PREFETCHER:\t");
+ flags &= ~(1ULL<<37);
+ break;
+
+ case IP_PREFETCHER:
+ printf("IP_PREFETCHER:\t");
+ flags &= ~(1ULL<<39);
+ break;
+
+ default:
+ printf("ERROR: CpuFeature not supported!\n");
+ break;
+ }
+ PRINT_VALUE(GREEN,enabled);
+ printf("\n");
+ msr_write(cpu, MSR_IA32_MISC_ENABLE, flags);
+ }
+ else
+ {
+ printf("ERROR: Architecture does not support the manipulation of prefetchers\n");
}
- PRINT_VALUE(GREEN,enabled);
- printf("\n");
-
- msr_write(cpu, MSR_IA32_MISC_ENABLE, flags);
}
void
cpuFeatures_disable(int cpu, CpuFeature type)
{
- uint64_t flags = msr_read(cpu, MSR_IA32_MISC_ENABLE);
+ if ((cpuid_info.model == NEHALEM) ||
+ (cpuid_info.model == NEHALEM_BLOOMFIELD) ||
+ (cpuid_info.model == NEHALEM_LYNNFIELD) ||
+ (cpuid_info.model == NEHALEM_WESTMERE) ||
+ (cpuid_info.model == NEHALEM_WESTMERE_M) ||
+ (cpuid_info.model == NEHALEM_EX) ||
+ (cpuid_info.model == CORE2_45) ||
+ (cpuid_info.model == CORE2_65))
+ {
+ uint64_t flags = msr_read(cpu, MSR_IA32_MISC_ENABLE);
- switch ( type )
+ switch ( type )
+ {
+ case HW_PREFETCHER:
+ printf("HW_PREFETCHER:\t");
+ flags |= (1ULL<<9);
+ break;
+
+ case CL_PREFETCHER:
+ printf("CL_PREFETCHER:\t");
+ flags |= (1ULL<<19);
+ break;
+
+ case DCU_PREFETCHER:
+ printf("DCU_PREFETCHER:\t");
+ flags |= (1ULL<<37);
+ break;
+
+ case IP_PREFETCHER:
+ printf("IP_PREFETCHER:\t");
+ flags |= (1ULL<<39);
+ break;
+
+ default:
+ printf("ERROR: CpuFeature not supported!\n");
+ break;
+ }
+ PRINT_VALUE(RED,disabled);
+ printf("\n");
+
+ msr_write(cpu, MSR_IA32_MISC_ENABLE, flags);
+ }
+ else
{
- case HW_PREFETCHER:
- printf("HW_PREFETCHER:\t");
- flags |= (1ULL<<9);
- break;
-
- case CL_PREFETCHER:
- printf("CL_PREFETCHER:\t");
- flags |= (1ULL<<19);
- break;
-
- case DCU_PREFETCHER:
- printf("DCU_PREFETCHER:\t");
- flags |= (1ULL<<37);
- break;
-
- case IP_PREFETCHER:
- printf("IP_PREFETCHER:\t");
- flags |= (1ULL<<39);
- break;
-
- default:
- printf("ERROR: CpuFeature not supported!\n");
- break;
+ printf("ERROR: Architecture does not support the manipulation of prefetchers\n");
}
- PRINT_VALUE(RED,disabled);
- printf("\n");
-
- msr_write(cpu, MSR_IA32_MISC_ENABLE, flags);
}
diff --git a/src/cpuid.c b/src/cpuid.c
index fec599b..6a9ac47 100644
--- a/src/cpuid.c
+++ b/src/cpuid.c
@@ -6,8 +6,8 @@
* Description: Implementation of cpuid module.
* Provides API to extract cpuid info on x86 processors.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -39,6 +39,7 @@
#include <unistd.h>
#include <sched.h>
#include <time.h>
+#include <math.h>
#include <error.h>
#include <cpuid.h>
@@ -78,6 +79,8 @@ static char* core_2b_str = "Intel Core 2 45nm processor";
static char* atom_45_str = "Intel Atom 45nm processor";
static char* atom_32_str = "Intel Atom 32nm processor";
static char* atom_22_str = "Intel Atom 22nm processor";
+static char* atom_silvermont_str = "Intel Atom (Silvermont) 22nm processor";
+static char* atom_saltwell_str = "Intel Atom (Saltwell) 32nm processor";
static char* nehalem_bloom_str = "Intel Core Bloomfield processor";
static char* nehalem_lynn_str = "Intel Core Lynnfield processor";
static char* nehalem_west_str = "Intel Core Westmere processor";
@@ -86,6 +89,7 @@ static char* ivybridge_str = "Intel Core IvyBridge processor";
static char* ivybridge_ep_str = "Intel Core IvyBridge EP processor";
static char* sandybridge_ep_str = "Intel Core SandyBridge EP processor";
static char* haswell_str = "Intel Core Haswell processor";
+static char* haswell_ex_str = "Intel Core Haswell EX processor";
static char* nehalem_ex_str = "Intel Nehalem EX processor";
static char* westmere_ex_str = "Intel Westmere EX processor";
static char* xeon_mp_string = "Intel Xeon MP processor";
@@ -224,6 +228,9 @@ static int intelCpuidFunc_4(CacheLevel** cachePool)
int maxNumLevels=0;
uint32_t valid=1;
CacheLevel* pool;
+ int threadsPerCpu = 0;
+ int numThreadsPerSocket = cpuid_topology.numCoresPerSocket *
+ cpuid_topology.numThreadsPerCore;
while (valid)
{
@@ -257,6 +264,7 @@ static int intelCpuidFunc_4(CacheLevel** cachePool)
pool[i].associativity *
pool[i].lineSize;
pool[i].threads = extractBitField(eax,10,14)+1;
+ pool[i].inclusive = edx&0x2;
/* WORKAROUND cpuid reports wrong number of threads on SMT processor with SMT
* turned off */
@@ -285,21 +293,60 @@ static int intelCpuidFunc_4(CacheLevel** cachePool)
}
/* :WORKAROUND:08/13/2009 08:34:15 AM:jt: For L3 caches the value is sometimes
- * too large in here. Ask Intel what is wrong here!
- * Limit threads per Socket then to the maximum possible value.*/
- if(pool[i].threads > (int)
- (cpuid_topology.numCoresPerSocket*
- cpuid_topology.numThreadsPerCore))
+ * too large in here.
+ * See Documentation: Threads contains maximum number of threads supported
+ * by the cache.
+ * Limit threads per Socket then to the maximum possible value. If the number
+ * of threads supported by the cache does not divide the threads on the socket
+ * without remainder, the threads are adjusted to fit the multiple caches.
+ */
+ if(pool[i].threads > numThreadsPerSocket)
{
- pool[i].threads = cpuid_topology.numCoresPerSocket*
- cpuid_topology.numThreadsPerCore;
+ pool[i].threads = numThreadsPerSocket;
+ }
+ else if (((double)numThreadsPerSocket)/((double)pool[i].threads) !=
+ (double)(numThreadsPerSocket/pool[i].threads))
+ {
+ pool[i].threads = numThreadsPerSocket/
+ (int)ceil(((double)numThreadsPerSocket)/((double)pool[i].threads));
+ }
+ /* For Intel Silvermont this is not enough. It returns 4 threads and 8 cores
+ * for the L2 cache. But according to the data sheet, each 1MB L2 cache slice
+ * is shared by 2 threads/cores.
+ */
+ else if (pool[i].level == 2 &&
+ ((cpuid_info.model == ATOM_SILVERMONT_C) ||
+ (cpuid_info.model == ATOM_SILVERMONT_E) ||
+ (cpuid_info.model == ATOM_SILVERMONT_F1) ||
+ (cpuid_info.model == ATOM_SILVERMONT_F2) ||
+ (cpuid_info.model == ATOM_SILVERMONT_F3)))
+ {
+ pool[i].threads = 2;
}
- pool[i].inclusive = edx&0x2;
}
+
+
return maxNumLevels;
}
+static int recheck_numHWThreads()
+{
+ int cpucount = 0;
+ char line[1024];
+ FILE* fp = fopen("/proc/cpuinfo","r");
+ if (fp != NULL)
+ {
+ while( fgets(line,1024,fp) )
+ {
+ if (strncmp(line, "processor", 9) == 0)
+ {
+ cpucount++;
+ }
+ }
+ }
+ return cpucount;
+}
/* ##### FUNCTION DEFINITIONS - EXPORTED FUNCTIONS ################## */
@@ -383,15 +430,16 @@ int cpuid_init (void)
case HASWELL:
- case HASWELL_EX:
-
case HASWELL_M1:
case HASWELL_M2:
-
cpuid_info.name = haswell_str;
break;
+ case HASWELL_EX:
+ cpuid_info.name = haswell_ex_str;
+ break;
+
case NEHALEM_EX:
cpuid_info.name = nehalem_ex_str;
break;
@@ -418,6 +466,14 @@ int cpuid_init (void)
cpuid_info.name = atom_22_str;
break;
+ case ATOM_SILVERMONT_C:
+ case ATOM_SILVERMONT_E:
+ case ATOM_SILVERMONT_F1:
+ case ATOM_SILVERMONT_F2:
+ case ATOM_SILVERMONT_F3:
+ cpuid_info.name = atom_silvermont_str;
+ break;
+
default:
cpuid_info.name = unknown_intel_str;
break;
@@ -585,20 +641,19 @@ int cpuid_init (void)
strcat(cpuid_info.features, "RDRAND ");
cpuid_info.featureFlags |= (1<<RDRAND);
}
-
if (edx & (1<<22))
{
- strcpy(cpuid_info.features, "ACPI ");
+ strcat(cpuid_info.features, "ACPI ");
cpuid_info.featureFlags |= (1<<ACPI);
}
if (edx & (1<<23))
{
- strcpy(cpuid_info.features, "MMX ");
+ strcat(cpuid_info.features, "MMX ");
cpuid_info.featureFlags |= (1<<MMX);
}
if (edx & (1<<25))
{
- strcpy(cpuid_info.features, "SSE ");
+ strcat(cpuid_info.features, "SSE ");
cpuid_info.featureFlags |= (1<<SSE);
}
if (edx & (1<<26))
@@ -647,13 +702,17 @@ int cpuid_init (void)
if ((file = fopen(filepath, "rb")) != NULL)
{
- printf("Read config from file\n");
+ //printf("Read config from file\n");
initTopology(file);
fclose(file);
}
else
{
cpuid_topology.numHWThreads = sysconf(_SC_NPROCESSORS_CONF);
+ if (recheck_numHWThreads() != cpuid_topology.numHWThreads)
+ {
+ cpuid_topology.numHWThreads = recheck_numHWThreads();
+ }
cpu_set_t cpuSet;
CPU_ZERO(&cpuSet);
sched_getaffinity(0,sizeof(cpu_set_t), &cpuSet);
@@ -679,13 +738,16 @@ void cpuid_print (void)
printf("\t%s\n",nehalem_bloom_str);
printf("\t%s\n",nehalem_lynn_str);
printf("\t%s\n",nehalem_west_str);
- printf("\t%s\n",nehalem_ex_str);
- printf("\t%s\n",westmere_ex_str);
+ printf("\t%s (with Uncore support)\n",nehalem_ex_str);
+ printf("\t%s (with Uncore support)\n",westmere_ex_str);
printf("\t%s\n",sandybridge_str);
- printf("\t%s\n",sandybridge_ep_str);
+ printf("\t%s (with Uncore support)\n",sandybridge_ep_str);
printf("\t%s\n",ivybridge_str);
- printf("\t%s\n",ivybridge_ep_str);
- printf("\t%s\n",haswell_str);
+ printf("\t%s (with Uncore support)\n",ivybridge_ep_str);
+ printf("\t%s (with Uncore support)\n",haswell_str);
+ printf("\t%s (no Uncore support)\n",haswell_ex_str);
+ printf("\t%s\n",atom_silvermont_str);
+ printf("\t%s\n",atom_saltwell_str);
printf("\t%s\n\n",xeon_phi_string);
printf("Supported AMD processors:\n");
diff --git a/src/daemon.c b/src/daemon.c
index 8cf4150..de5bfa5 100644
--- a/src/daemon.c
+++ b/src/daemon.c
@@ -5,8 +5,8 @@
*
* Description: C Module implementing a daemon time loop
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -41,64 +41,82 @@
#include <perfmon.h>
#include <daemon.h>
-static int daemon_run = 0;
+static volatile int daemon_run = 0;
static bstring eventString;
static TimerData timeData;
+static pid_t daemonpid = 0;
void
-daemon_init(bstring str)
+daemon_start(bstring str, struct timespec interval)
{
- eventString = bstrcpy(str);
- signal(SIGINT, daemon_stop);
- signal(SIGUSR1, daemon_interrupt);
-
-}
-
-void
-daemon_start(struct timespec interval)
-{
- daemon_run = 1;
- perfmon_startCounters();
- timer_start(&timeData);
-
- while (1)
+ daemonpid = fork();
+ if (daemonpid == 0)
{
- if (daemon_run)
+ eventString = bstrcpy(str);
+ signal(SIGINT, daemon_interrupt);
+ signal(SIGUSR1, daemon_interrupt);
+ daemon_run = 1;
+ perfmon_setupEventSet(eventString, NULL);
+ perfmon_startCounters();
+ timer_start(&timeData);
+
+ while (1)
{
- timer_stop(&timeData);
- perfmon_readCounters();
- perfmon_logCounterResults( timer_print(&timeData) );
- timer_start(&timeData);
+ if (daemon_run)
+ {
+ timer_stop(&timeData);
+ perfmon_readCounters();
+ perfmon_logCounterResults( timer_print(&timeData) );
+ timer_start(&timeData);
+ }
+ else
+ {
+ break;
+ }
+ nanosleep( &interval, NULL);
}
- nanosleep( &interval, NULL);
+ signal(SIGINT, SIG_DFL);
+ signal(SIGUSR1, SIG_DFL);
+ exit(EXIT_SUCCESS);
}
}
void
daemon_stop(int sig)
{
- printf("DAEMON: EXIT on %d\n", sig);
- perfmon_stopCounters();
- signal(SIGINT, SIG_DFL);
- kill(getpid(), SIGINT);
+ if (daemonpid > 0)
+ {
+ printf("PARENT: KILL daemon with signal %d\n", sig);
+ kill(daemonpid, sig);
+ //perfmon_stopCounters();
+ }
}
void
daemon_interrupt(int sig)
{
- if (daemon_run)
+ if (sig == SIGUSR1)
{
- perfmon_stopCounters();
- daemon_run = 0;
- printf("DAEMON: STOP on %d\n",sig);
- }
- else
+ if (daemon_run)
+ {
+ perfmon_stopCounters();
+ daemon_run = 0;
+ printf("DAEMON: STOP on %d\n",sig);
+ exit(EXIT_SUCCESS);
+ }
+ else
+ {
+ perfmon_setupEventSet(eventString, NULL);
+ perfmon_startCounters();
+ daemon_run = 1;
+ printf("DAEMON: START with events %s\n",bdata(eventString));
+ }
+ } else
{
- perfmon_setupEventSet(eventString, NULL);
- perfmon_startCounters();
- daemon_run = 1;
- printf("DAEMON: START\n");
+ printf("DAEMON: EXIT on %d\n", sig);
+ daemon_run = 0;
+ exit(EXIT_SUCCESS);
}
}
diff --git a/src/ghash.c b/src/ghash.c
index 91a61bc..87e0ed0 100644
--- a/src/ghash.c
+++ b/src/ghash.c
@@ -1,6 +1,5 @@
/*
* =======================================================================================
- * Copyright (C) 2014 Jan Treibig
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/hashTable.c b/src/hashTable.c
index b5711cb..bf6c3d8 100644
--- a/src/hashTable.c
+++ b/src/hashTable.c
@@ -6,8 +6,8 @@
* Description: Hashtable implementation based on SGLIB.
* Used for Marker API result handling.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -86,7 +86,11 @@ int hashTable_get(bstring label, LikwidThreadResults** resEntry)
(*resEntry)->label = bstrcpy (label);
(*resEntry)->time = 0.0;
(*resEntry)->count = 0;
- for (int i=0; i< NUM_PMC; i++) (*resEntry)->PMcounters[i] = 0.0;
+ for (int i=0; i< NUM_PMC; i++)
+ {
+ (*resEntry)->PMcounters[i] = 0.0;
+ (*resEntry)->StartPMcounters[i] = 0.0;
+ }
g_hash_table_insert(
resPtr->hashTable,
diff --git a/src/includes/accessClient.h b/src/includes/accessClient.h
index 1c4fefe..0058182 100644
--- a/src/includes/accessClient.h
+++ b/src/includes/accessClient.h
@@ -5,8 +5,8 @@
*
* Description: Header File accessClient Module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/accessClient_types.h b/src/includes/accessClient_types.h
index 98610d7..a0c7a84 100644
--- a/src/includes/accessClient_types.h
+++ b/src/includes/accessClient_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for accessClient module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/affinity.h b/src/includes/affinity.h
index 93814e7..f347e64 100644
--- a/src/includes/affinity.h
+++ b/src/includes/affinity.h
@@ -5,8 +5,8 @@
*
* Description: Header File affinity Module
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -42,7 +42,7 @@ extern int affinity_threadGetProcessorId();
extern void affinity_pinProcess(int processorId);
extern void affinity_pinThread(int processorId);
extern const AffinityDomain* affinity_getDomain(bstring domain);
-extern void affinity_printDomains();
+extern void affinity_printDomains(FILE* OUTSTREAM);
#endif /*AFFINITY_H*/
diff --git a/src/includes/affinity_types.h b/src/includes/affinity_types.h
index 3527b75..2b08bfe 100644
--- a/src/includes/affinity_types.h
+++ b/src/includes/affinity_types.h
@@ -5,8 +5,8 @@
*
* Description: Type Definitions for affinity Module
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -35,7 +35,7 @@ typedef struct {
bstring tag;
uint32_t numberOfProcessors;
uint32_t numberOfCores;
- int* processorList;
+ int* processorList;
} AffinityDomain;
diff --git a/src/includes/allocator.h b/src/includes/allocator.h
index 47acb8d..a21555c 100644
--- a/src/includes/allocator.h
+++ b/src/includes/allocator.h
@@ -5,8 +5,8 @@
*
* Description: Header File allocator Module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: none
@@ -36,12 +36,13 @@
extern void allocator_init(int numVectors);
extern void allocator_finalize();
-extern void allocator_allocateVector(void** ptr,
- int alignment,
- uint64_t size,
- int offset,
- DataType type,
- bstring domain);
+extern void allocator_allocateVector(FILE* OUTSTREAM,
+ void** ptr,
+ int alignment,
+ uint64_t size,
+ int offset,
+ DataType type,
+ bstring domain);
#endif /*ALLOCATOR_H*/
diff --git a/src/includes/asciiBoxes.h b/src/includes/asciiBoxes.h
index 3e82632..dd37a05 100644
--- a/src/includes/asciiBoxes.h
+++ b/src/includes/asciiBoxes.h
@@ -5,8 +5,8 @@
*
* Description: Module to draw nested ascii art boxes.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -37,6 +37,6 @@
extern BoxContainer* asciiBoxes_allocateContainer(int numLines,int numColumns);
extern void asciiBoxes_addBox(BoxContainer* container, int line, int column, bstring label);
extern void asciiBoxes_addJoinedBox(BoxContainer* container, int line, int startColumn, int endColumn, bstring label);
-extern void asciiBoxes_print(BoxContainer* container);
+extern void asciiBoxes_print(FILE* OUTSTREAM, BoxContainer* container);
#endif /*ASCIIBOXES_H*/
diff --git a/src/includes/asciiBoxes_types.h b/src/includes/asciiBoxes_types.h
index 42347a1..f09c4b3 100644
--- a/src/includes/asciiBoxes_types.h
+++ b/src/includes/asciiBoxes_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for asciiBoxes module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/asciiTable.h b/src/includes/asciiTable.h
index 399a3fe..6096c4a 100644
--- a/src/includes/asciiTable.h
+++ b/src/includes/asciiTable.h
@@ -5,8 +5,8 @@
*
* Description: Module to create and print a ascii table
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/asciiTable_types.h b/src/includes/asciiTable_types.h
index 639fd08..986a8a2 100644
--- a/src/includes/asciiTable_types.h
+++ b/src/includes/asciiTable_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for asciiTable module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/barrier.h b/src/includes/barrier.h
index cf07624..5f4142d 100644
--- a/src/includes/barrier.h
+++ b/src/includes/barrier.h
@@ -5,8 +5,8 @@
*
* Description: Header File barrier Module
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/barrier_types.h b/src/includes/barrier_types.h
index 87327ff..d0abb55 100644
--- a/src/includes/barrier_types.h
+++ b/src/includes/barrier_types.h
@@ -5,8 +5,8 @@
*
* Description: Type Definitions for barrier Module
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/bitUtil.h b/src/includes/bitUtil.h
index 8e87e69..c876eea 100644
--- a/src/includes/bitUtil.h
+++ b/src/includes/bitUtil.h
@@ -6,8 +6,8 @@
* Description: Header File bitUtil Module.
* Helper routines for dealing with bit manipulations
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/cpuFeatures.h b/src/includes/cpuFeatures.h
index bca9211..9274e40 100644
--- a/src/includes/cpuFeatures.h
+++ b/src/includes/cpuFeatures.h
@@ -5,8 +5,8 @@
*
* Description: Header File of Module cpuFeatures.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/cpuFeatures_types.h b/src/includes/cpuFeatures_types.h
index a6f30d9..3e7ec5d 100644
--- a/src/includes/cpuFeatures_types.h
+++ b/src/includes/cpuFeatures_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for CpuFeature module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -38,23 +38,23 @@ typedef enum {
IP_PREFETCHER} CpuFeature;
typedef struct {
- unsigned int fastStrings:1;
- unsigned int thermalControl:1;
- unsigned int perfMonitoring:1;
- unsigned int hardwarePrefetcher:1;
- unsigned int ferrMultiplex:1;
- unsigned int branchTraceStorage:1;
- unsigned int pebs:1;
- unsigned int speedstep:1;
- unsigned int monitor:1;
- unsigned int clPrefetcher:1;
- unsigned int speedstepLock:1;
- unsigned int cpuidMaxVal:1;
- unsigned int xdBit:1;
- unsigned int dcuPrefetcher:1;
- unsigned int dynamicAcceleration:1;
- unsigned int turboMode:1;
- unsigned int ipPrefetcher:1;
+ unsigned int fastStrings:1;
+ unsigned int thermalControl:1;
+ unsigned int perfMonitoring:1;
+ unsigned int hardwarePrefetcher:1;
+ unsigned int ferrMultiplex:1;
+ unsigned int branchTraceStorage:1;
+ unsigned int pebs:1;
+ unsigned int speedstep:1;
+ unsigned int monitor:1;
+ unsigned int clPrefetcher:1;
+ unsigned int speedstepLock:1;
+ unsigned int cpuidMaxVal:1;
+ unsigned int xdBit:1;
+ unsigned int dcuPrefetcher:1;
+ unsigned int dynamicAcceleration:1;
+ unsigned int turboMode:1;
+ unsigned int ipPrefetcher:1;
} CpuFeatureFlags;
diff --git a/src/includes/cpuid.h b/src/includes/cpuid.h
index c8db288..80c426a 100644
--- a/src/includes/cpuid.h
+++ b/src/includes/cpuid.h
@@ -7,8 +7,8 @@
* Reads out cpuid information and initilaizes a global
* data structure cpuid_info.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -45,6 +45,11 @@
#define ATOM_45 0x26U
#define ATOM_32 0x36U
#define ATOM_22 0x27U
+#define ATOM_SILVERMONT_E 0x37U
+#define ATOM_SILVERMONT_C 0x4DU
+#define ATOM_SILVERMONT_F1 0x4AU
+#define ATOM_SILVERMONT_F2 0x5AU
+#define ATOM_SILVERMONT_F3 0x5DU
#define NEHALEM 0x1AU
#define NEHALEM_BLOOMFIELD 0x1AU
#define NEHALEM_LYNNFIELD 0x1EU
@@ -118,7 +123,7 @@ extern int cpuid_isInCpuset(void);
static inline int cpuid_hasFeature(FeatureBit bit)
{
- return (cpuid_info.featureFlags & (1<<bit));
+ return (cpuid_info.featureFlags & (1<<bit));
}
diff --git a/src/includes/cpuid_types.h b/src/includes/cpuid_types.h
index 4c59ccd..cccc22d 100644
--- a/src/includes/cpuid_types.h
+++ b/src/includes/cpuid_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for cpuid module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -79,6 +79,7 @@ typedef struct {
uint32_t perf_num_ctr;
uint32_t perf_width_ctr;
uint32_t perf_num_fixed_ctr;
+ int supportUncore;
} CpuInfo;
typedef struct {
diff --git a/src/includes/daemon.h b/src/includes/daemon.h
index 753507f..3272636 100644
--- a/src/includes/daemon.h
+++ b/src/includes/daemon.h
@@ -5,8 +5,8 @@
*
* Description: Header File daemon Module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -35,7 +35,7 @@
#include <time.h>
extern void daemon_init();
-extern void daemon_start(struct timespec interval);
+extern void daemon_start(bstring str, struct timespec interval);
extern void daemon_stop(int sig);
extern void daemon_interrupt(int sig);
diff --git a/src/includes/error.h b/src/includes/error.h
index c726a7c..3c1526f 100644
--- a/src/includes/error.h
+++ b/src/includes/error.h
@@ -5,8 +5,8 @@
*
* Description: Central error handling macros
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/ghash.h b/src/includes/ghash.h
index f0c233a..f33e9fb 100644
--- a/src/includes/ghash.h
+++ b/src/includes/ghash.h
@@ -59,13 +59,13 @@ typedef struct _GHashTableIter GHashTableIter;
struct _GHashTableIter
{
- /*< private >*/
- gpointer dummy1;
- gpointer dummy2;
- gpointer dummy3;
- int dummy4;
- gboolean dummy5;
- gpointer dummy6;
+ /*< private >*/
+ gpointer dummy1;
+ gpointer dummy2;
+ gpointer dummy3;
+ int dummy4;
+ gboolean dummy5;
+ gpointer dummy6;
};
char* g_strdup (const char *str);
diff --git a/src/includes/hashTable.h b/src/includes/hashTable.h
index 713a3bd..078fff9 100644
--- a/src/includes/hashTable.h
+++ b/src/includes/hashTable.h
@@ -7,8 +7,8 @@
* Wrapper for HAshTable data structure holding thread
* specific region information.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/libperfctr_types.h b/src/includes/libperfctr_types.h
index f757d0f..99a38dc 100644
--- a/src/includes/libperfctr_types.h
+++ b/src/includes/libperfctr_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for libperfctr module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/likwid.h b/src/includes/likwid.h
index 287582e..dd4cdfd 100644
--- a/src/includes/likwid.h
+++ b/src/includes/likwid.h
@@ -5,8 +5,8 @@
*
* Description: Header File of likwid marker API
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/lock.h b/src/includes/lock.h
index 3684f4c..87d1593 100644
--- a/src/includes/lock.h
+++ b/src/includes/lock.h
@@ -5,8 +5,8 @@
*
* Description: Header File Locking primitive Module
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -60,40 +60,40 @@ static int lock_check(void)
if ((lock_handle = open(filepath, O_RDONLY )) == -1 )
{
- if (errno == ENOENT)
- {
- /* There is no lock file. Proceed. */
- result = 1;
- }
- else if (errno == EACCES)
- {
- /* There is a lock file. We cannot open it. */
- result = 0;
- }
- else
- {
- /* Another error occured. Proceed. */
- result = 1;
- }
+ if (errno == ENOENT)
+ {
+ /* There is no lock file. Proceed. */
+ result = 1;
+ }
+ else if (errno == EACCES)
+ {
+ /* There is a lock file. We cannot open it. */
+ result = 0;
+ }
+ else
+ {
+ /* Another error occured. Proceed. */
+ result = 1;
+ }
}
else
{
- /* There is a lock file and we can open it. Check if we own it. */
- stat(filepath, &buf);
+ /* There is a lock file and we can open it. Check if we own it. */
+ stat(filepath, &buf);
- if ( buf.st_uid == getuid() ) /* Succeed, we own the lock */
- {
- result = 1;
- }
- else /* we are not the owner */
- {
- result = 0;
- }
+ if ( buf.st_uid == getuid() ) /* Succeed, we own the lock */
+ {
+ result = 1;
+ }
+ else /* we are not the owner */
+ {
+ result = 0;
+ }
}
if (lock_handle)
{
- close(lock_handle);
+ close(lock_handle);
}
return result;
diff --git a/src/includes/memsweep.h b/src/includes/memsweep.h
index 3dfa486..e29d4d8 100644
--- a/src/includes/memsweep.h
+++ b/src/includes/memsweep.h
@@ -5,8 +5,8 @@
*
* Description: Header File memsweep Module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -34,9 +34,9 @@
#include <types.h>
extern void memsweep_setMemoryFraction(uint64_t fraction);
-extern void memsweep_node(void);
-extern void memsweep_domain(int domainId);
-extern void memsweep_threadGroup(int* processorList, int numberOfProcessors);
+extern void memsweep_node(FILE* OUTSTREAM);
+extern void memsweep_domain(FILE* OUTSTREAM, int domainId);
+extern void memsweep_threadGroup(FILE* OUTSTREAM, int* processorList, int numberOfProcessors);
#endif /* MEMSWEEP_H */
diff --git a/src/includes/msr.h b/src/includes/msr.h
index a9ab911..45f8069 100644
--- a/src/includes/msr.h
+++ b/src/includes/msr.h
@@ -5,8 +5,8 @@
*
* Description: Header File msr Module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/multiplex.h b/src/includes/multiplex.h
index d40d0eb..c34cac8 100644
--- a/src/includes/multiplex.h
+++ b/src/includes/multiplex.h
@@ -5,8 +5,8 @@
*
* Description: Header File multiplex Module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/multiplex_types.h b/src/includes/multiplex_types.h
index 492d61c..8578a8f 100644
--- a/src/includes/multiplex_types.h
+++ b/src/includes/multiplex_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for multiplex module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/numa.h b/src/includes/numa.h
index ab253fc..3a2d0f1 100644
--- a/src/includes/numa.h
+++ b/src/includes/numa.h
@@ -5,8 +5,8 @@
*
* Description: Header File numa Module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/numa_types.h b/src/includes/numa_types.h
index c065d50..bd4afda 100644
--- a/src/includes/numa_types.h
+++ b/src/includes/numa_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for numa module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -37,10 +37,10 @@ typedef struct {
uint64_t totalMemory;
uint64_t freeMemory;
int numberOfProcessors;
- uint32_t* processors;
- uint32_t* processorsCompact;
+ uint32_t* processors;
+ uint32_t* processorsCompact;
int numberOfDistances;
- uint32_t* distances;
+ uint32_t* distances;
} NumaNode;
typedef struct {
diff --git a/src/includes/pci.h b/src/includes/pci.h
index 62aa69b..1672f1c 100644
--- a/src/includes/pci.h
+++ b/src/includes/pci.h
@@ -5,8 +5,8 @@
*
* Description: Header File pci Module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/pci_types.h b/src/includes/pci_types.h
index 2a542aa..cfb9657 100644
--- a/src/includes/pci_types.h
+++ b/src/includes/pci_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for pci module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/perfmon.h b/src/includes/perfmon.h
index 96653d1..6e9d9f9 100644
--- a/src/includes/perfmon.h
+++ b/src/includes/perfmon.h
@@ -7,8 +7,8 @@
* Configures and reads out performance counters
* on x86 based architectures. Supports multi threading.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -41,8 +41,7 @@ extern int perfmon_verbose;
extern void (*perfmon_startCountersThread) (int thread_id);
extern void (*perfmon_stopCountersThread) (int thread_id);
extern int (*perfmon_getIndex) (bstring reg, PerfmonCounterIndex* index);
-extern void (*perfmon_setupCounterThread) (int thread_id,
- PerfmonEvent* event , PerfmonCounterIndex index);
+extern void (*perfmon_setupCounterThread) (int thread_id, PerfmonEvent* event , PerfmonCounterIndex index);
extern void perfmon_initEventSet(StrUtilEventSet* eventSetConfig, PerfmonEventSet* set);
extern void perfmon_setCSVMode(int v);
@@ -53,6 +52,46 @@ extern void perfmon_finalize(void);
extern void perfmon_setupEventSet(bstring eventString, BitMask* mask);
extern double perfmon_getEventResult(int thread, int index);
extern int perfmon_setupEventSetC(char* eventCString, const char*** eventnames);
+
+
+/*
+The following structure and set of functions provide an efficient and easy interface to
+access counters from different groups and switch between them.
+
+TODO: The internals need some cleanup, but the interface should remain rather stable.
+
+Usage:
+setup = perfmon_prepareEventSetup("VIEW"), etc..
+Whenever you want to use one of the prepared setups call:
+perfmon_setupCountersForEventSet(setup)
+
+then you can startCounters, stopCounters and then
+perfmon_getEventCounterValues() and/or
+perfmon_getDerivedCounterValues()
+ */
+typedef struct {
+ const char* groupName;
+ int numberOfEvents;
+ const char** eventNames;
+ int numberOfDerivedCounters;
+ const char** derivedNames;
+
+ // Internal structures DO NOT ACCESS THEM, they need cleanup.
+ StrUtilEventSet* eventSetConfig;
+ PerfmonEventSet* perfmon_set;
+ PerfmonGroup groupSet;
+ int groupIndex;
+} EventSetup;
+
+
+extern EventSetup perfmon_prepareEventSetup(char* eventGroupString);
+extern void perfmon_setupCountersForEventSet(EventSetup * setup);
+
+// obtain values for all cores, average, min and max for the cores.
+extern void perfmon_getEventCounterValues(uint64_t* avg_values, uint64_t* max, uint64_t* min);
+extern void perfmon_getDerivedCounterValues(float* avg_values, float* max, float* min);
+/////////////////////////
+
extern void perfmon_setupCounters(void);
extern void perfmon_startCounters(void);
extern void perfmon_stopCounters(void);
@@ -61,6 +100,8 @@ extern double perfmon_getResult(int threadId, char* counterString);
extern void perfmon_printMarkerResults(bstring filepath);
extern void perfmon_logCounterResults(double time);
extern void perfmon_printCounterResults(void);
+
+
extern void perfmon_printCounters(void);
extern void perfmon_printEvents(void);
diff --git a/src/includes/perfmon_atom.h b/src/includes/perfmon_atom.h
index 5477742..201cea6 100644
--- a/src/includes/perfmon_atom.h
+++ b/src/includes/perfmon_atom.h
@@ -5,8 +5,8 @@
*
* Description: Header file of perfmon module for Atom
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/perfmon_atom_events.txt b/src/includes/perfmon_atom_events.txt
index ec92314..4ca18e4 100644
--- a/src/includes/perfmon_atom_events.txt
+++ b/src/includes/perfmon_atom_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for Intel Atom
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
diff --git a/src/includes/perfmon_core2.h b/src/includes/perfmon_core2.h
index 193f630..f737dda 100644
--- a/src/includes/perfmon_core2.h
+++ b/src/includes/perfmon_core2.h
@@ -5,8 +5,8 @@
*
* Description: Header file of perfmon module for Core 2
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -73,12 +73,12 @@ void perfmon_setupCounterThread_core2(
uint64_t flags;
uint64_t reg = core2_counter_map[index].configRegister;
int cpu_id = perfmon_threadData[thread_id].processorId;
+ uint64_t fixed_flags = msr_read(cpu_id, MSR_PERF_FIXED_CTR_CTRL);
+ perfmon_threadData[thread_id].counters[index].init = TRUE;
if ( core2_counter_map[index].type == PMC )
{
- perfmon_threadData[thread_id].counters[index].init = TRUE;
- flags = msr_read(cpu_id,reg);
- flags &= ~(0xFFFFU);
+ flags = (1<<16)|(1<<19)|(1<<22);
/* Intel with standard 8 bit event mask: [7:0] */
flags |= (event->umask<<8) + event->eventId;
@@ -101,7 +101,8 @@ void perfmon_setupCounterThread_core2(
}
else if (core2_counter_map[index].type == FIXED)
{
- perfmon_threadData[thread_id].counters[index].init = TRUE;
+ fixed_flags |= (0x2 << (index*4));
+ msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, fixed_flags);
}
}
diff --git a/src/includes/perfmon_core2_counters.h b/src/includes/perfmon_core2_counters.h
index cbade24..d6c33fb 100644
--- a/src/includes/perfmon_core2_counters.h
+++ b/src/includes/perfmon_core2_counters.h
@@ -5,8 +5,8 @@
*
* Description: Counter header file of perfmon module for Core 2
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -28,15 +28,16 @@
* =======================================================================================
*/
-#define NUM_COUNTERS_CORE2 4
-#define NUM_COUNTERS_CORE_CORE2 4
+#define NUM_COUNTERS_CORE2 5
+#define NUM_COUNTERS_CORE_CORE2 5
static PerfmonCounterMap core2_counter_map[NUM_COUNTERS_CORE2] = {
/* Fixed Counters: instructions retired, cycles unhalted core */
{"FIXC0", PMC0, FIXED, MSR_PERF_FIXED_CTR_CTRL, MSR_PERF_FIXED_CTR0, 0, 0},
{"FIXC1", PMC1, FIXED, MSR_PERF_FIXED_CTR_CTRL, MSR_PERF_FIXED_CTR1, 0, 0},
+ {"FIXC2", PMC2, FIXED, MSR_PERF_FIXED_CTR_CTRL, MSR_PERF_FIXED_CTR2, 0, 0},
/* PMC Counters: 2 40bit wide */
- {"PMC0", PMC2, PMC, MSR_PERFEVTSEL0, MSR_PMC0, 0, 0},
- {"PMC1", PMC3, PMC, MSR_PERFEVTSEL1, MSR_PMC1, 0, 0}
+ {"PMC0", PMC3, PMC, MSR_PERFEVTSEL0, MSR_PMC0, 0, 0},
+ {"PMC1", PMC4, PMC, MSR_PERFEVTSEL1, MSR_PMC1, 0, 0}
};
diff --git a/src/includes/perfmon_core2_events.txt b/src/includes/perfmon_core2_events.txt
index f8dc59f..60c6211 100644
--- a/src/includes/perfmon_core2_events.txt
+++ b/src/includes/perfmon_core2_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for Intel Core 2
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
@@ -29,9 +29,12 @@
EVENT_INSTR_RETIRED 0x00 FIXC0
UMASK_INSTR_RETIRED_ANY 0x00
-EVENT_CPU_CLK_UNHALTED 0x00 FIXC1
+EVENT_CPU_CLK_UNHALTED_CORE 0x00 FIXC1
UMASK_CPU_CLK_UNHALTED_CORE 0x00
+EVENT_CPU_CLK_UNHALTED_REF 0x00 FIXC2
+UMASK_CPU_CLK_UNHALTED_REF 0x00
+
EVENT_LOAD_BLOCK 0x03 PMC
UMASK_LOAD_BLOCK_STA 0x02
UMASK_LOAD_BLOCK_STD 0x04
diff --git a/src/includes/perfmon_haswell.h b/src/includes/perfmon_haswell.h
index 0352476..57f12af 100644
--- a/src/includes/perfmon_haswell.h
+++ b/src/includes/perfmon_haswell.h
@@ -5,8 +5,8 @@
*
* Description: Header File of perfmon module for Haswell.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -61,39 +61,78 @@ void perfmon_init_haswell(PerfmonThread *thread)
msr_write(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, 0x0ULL);
msr_write(cpu_id, MSR_PEBS_ENABLE, 0x0ULL);
- /* initialize fixed counters
- * FIXED 0: Instructions retired
- * FIXED 1: Clocks unhalted core
- * FIXED 2: Clocks unhalted ref */
- msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, 0x222ULL);
-
- /* Preinit of PERFEVSEL registers */
- flags |= (1<<22); /* enable flag */
- flags |= (1<<16); /* user mode flag */
+ lock_acquire((int*) &socket_lock[affinity_core2node_lookup[cpu_id]], cpu_id);
+ if (cpuid_info.model != HASWELL_EX && cpuid_info.supportUncore)
+ {
+ msr_write(cpu_id, MSR_UNC_CBO_0_PERFEVTSEL0, 0xAA);
+ flags = msr_read(cpu_id, MSR_UNC_CBO_0_PERFEVTSEL0);
+ if (flags != 0xAA)
+ {
+ fprintf(stdout, "The current system does not support Uncore MSRs, deactivating Uncore support\n");
+ cpuid_info.supportUncore = 0;
+ }
+ }
- msr_write(cpu_id, MSR_PERFEVTSEL0, flags);
- msr_write(cpu_id, MSR_PERFEVTSEL1, flags);
- msr_write(cpu_id, MSR_PERFEVTSEL2, flags);
- msr_write(cpu_id, MSR_PERFEVTSEL3, flags);
- if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id) ||
- lock_acquire((int*) &socket_lock[affinity_core2node_lookup[cpu_id]], cpu_id))
+ if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id) && (cpuid_info.supportUncore))
{
-
+ flags = 0x0ULL;
+ flags = (1ULL<<22)|(1ULL<<20);
+ msr_write(cpu_id, MSR_UNC_CBO_0_PERFEVTSEL0, flags);
+ msr_write(cpu_id, MSR_UNC_CBO_0_PERFEVTSEL1, flags);
+ msr_write(cpu_id, MSR_UNC_CBO_1_PERFEVTSEL0, flags);
+ msr_write(cpu_id, MSR_UNC_CBO_1_PERFEVTSEL1, flags);
+ msr_write(cpu_id, MSR_UNC_CBO_2_PERFEVTSEL0, flags);
+ msr_write(cpu_id, MSR_UNC_CBO_2_PERFEVTSEL1, flags);
+ msr_write(cpu_id, MSR_UNC_CBO_3_PERFEVTSEL0, flags);
+ msr_write(cpu_id, MSR_UNC_CBO_3_PERFEVTSEL1, flags);
+
+ msr_write(cpu_id, MSR_UNC_ARB_PERFEVTSEL0, flags);
+ msr_write(cpu_id, MSR_UNC_ARB_PERFEVTSEL1, flags);
+
+ msr_write(cpu_id, MSR_UNC_PERF_FIXED_CTRL, flags);
+
+ msr_write(cpu_id, MSR_UNC_CBO_0_CTR0, 0x0ULL);
+ msr_write(cpu_id, MSR_UNC_CBO_0_CTR1, 0x0ULL);
+ msr_write(cpu_id, MSR_UNC_CBO_1_CTR0, 0x0ULL);
+ msr_write(cpu_id, MSR_UNC_CBO_1_CTR1, 0x0ULL);
+ msr_write(cpu_id, MSR_UNC_CBO_2_CTR0, 0x0ULL);
+ msr_write(cpu_id, MSR_UNC_CBO_2_CTR1, 0x0ULL);
+ msr_write(cpu_id, MSR_UNC_CBO_3_CTR0, 0x0ULL);
+ msr_write(cpu_id, MSR_UNC_CBO_3_CTR1, 0x0ULL);
+
+ msr_write(cpu_id, MSR_UNC_ARB_CTR0, 0x0ULL);
+ msr_write(cpu_id, MSR_UNC_ARB_CTR1, 0x0ULL);
+
+ msr_write(cpu_id, MSR_UNC_PERF_FIXED_CTR, 0x0ULL);
}
-
}
+#define HAS_SETUP_BOX \
+ if (haveLock) \
+ { \
+ flags = (1ULL<<22)|(1ULL<<20); \
+ flags |= (event->umask<<8) + event->eventId; \
+ if (event->cfgBits != 0) /* set custom cfg and cmask */ \
+ { \
+ flags &= ~(0xFFFFU<<16); /* clear upper 16bits */ \
+ flags |= ((event->cmask<<8) + event->cfgBits)<<16; \
+ } \
+ msr_write(cpu_id, reg , flags); \
+ }
+
void perfmon_setupCounterThread_haswell(
int thread_id,
PerfmonEvent* event,
PerfmonCounterIndex index)
{
int haveLock = 0;
- uint64_t flags;
+ uint64_t flags = 0x0ULL;
uint32_t uflags;
uint64_t reg = haswell_counter_map[index].configRegister;
int cpu_id = perfmon_threadData[thread_id].processorId;
+ uint64_t fixed_flags = msr_read(cpu_id, MSR_PERF_FIXED_CTR_CTRL);
+ uint64_t orig_fixed_flags = fixed_flags;
perfmon_threadData[thread_id].counters[index].init = TRUE;
if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id))
@@ -105,8 +144,7 @@ void perfmon_setupCounterThread_haswell(
{
case PMC:
- flags = msr_read(cpu_id,reg);
- flags &= ~(0xFFFFU); /* clear lower 16bits */
+ flags = (1<<22)|(1<<16);
/* Intel with standard 8 bit event mask: [7:0] */
flags |= (event->umask<<8) + event->eventId;
@@ -124,20 +162,35 @@ void perfmon_setupCounterThread_haswell(
LLU_CAST reg,
LLU_CAST flags);
}
-
msr_write(cpu_id, reg , flags);
break;
case FIXED:
+ fixed_flags |= (0x2 << (index*4));
break;
case POWER:
break;
+ case CBOX0:
+ case CBOX1:
+ case CBOX2:
+ case CBOX3:
+ case UBOX:
+ if (cpuid_info.supportUncore)
+ {
+ HAS_SETUP_BOX;
+ }
+ break;
+
default:
/* should never be reached */
break;
}
+ if (fixed_flags != orig_fixed_flags)
+ {
+ msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, fixed_flags);
+ }
}
void perfmon_startCountersThread_haswell(int thread_id)
@@ -146,6 +199,7 @@ void perfmon_startCountersThread_haswell(int thread_id)
uint64_t flags = 0x0ULL;
uint32_t uflags = 0x10000UL; /* Clear freeze bit */
int cpu_id = perfmon_threadData[thread_id].processorId;
+ int start_uncore = 0;
if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id))
{
@@ -176,7 +230,14 @@ void perfmon_startCountersThread_haswell(int thread_id)
perfmon_threadData[thread_id].counters[i].counterData =
power_read(cpu_id, haswell_counter_map[i].counterRegister);
}
+ break;
+ case CBOX0:
+ case CBOX1:
+ case CBOX2:
+ case CBOX3:
+ case UBOX:
+ start_uncore = 1;
break;
default:
@@ -186,6 +247,11 @@ void perfmon_startCountersThread_haswell(int thread_id)
}
}
+ if (haveLock && start_uncore && cpuid_info.supportUncore)
+ {
+ msr_write(cpu_id, MSR_UNC_PERF_GLOBAL_CTRL, (1ULL<<29));
+ }
+
if (perfmon_verbose)
{
printf("perfmon_start_counters: Write Register 0x%X , \
@@ -193,7 +259,6 @@ void perfmon_startCountersThread_haswell(int thread_id)
printf("perfmon_start_counters: Write Register 0x%X , \
Flags: 0x%llX \n",MSR_UNCORE_PERF_GLOBAL_CTRL, LLU_CAST uflags);
}
-
msr_write(cpu_id, MSR_PERF_GLOBAL_CTRL, flags);
msr_write(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, 0x30000000FULL);
}
@@ -201,6 +266,7 @@ void perfmon_startCountersThread_haswell(int thread_id)
void perfmon_stopCountersThread_haswell(int thread_id)
{
uint64_t flags;
+ uint64_t tmp;
uint32_t uflags = 0x10100UL; /* Set freeze bit */
uint64_t counter_result = 0x0ULL;
int haveLock = 0;
@@ -212,6 +278,10 @@ void perfmon_stopCountersThread_haswell(int thread_id)
}
msr_write(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL);
+ if (haveLock && cpuid_info.supportUncore)
+ {
+ msr_write(cpu_id, MSR_UNC_PERF_GLOBAL_CTRL, 0x0ULL);
+ }
for ( int i=0; i < perfmon_numCountersHaswell; i++ )
{
@@ -241,6 +311,18 @@ void perfmon_stopCountersThread_haswell(int thread_id)
thermal_read(cpu_id);
break;
+ case CBOX0:
+ case CBOX1:
+ case CBOX2:
+ case CBOX3:
+ case UBOX:
+ if(haveLock && cpuid_info.supportUncore)
+ {
+ perfmon_threadData[thread_id].counters[i].counterData =
+ msr_read(cpu_id, haswell_counter_map[i].counterRegister);
+ }
+ break;
+
default:
/* should never be reached */
break;
@@ -261,12 +343,22 @@ void perfmon_readCountersThread_haswell(int thread_id)
uint64_t counter_result = 0x0ULL;
int haveLock = 0;
int cpu_id = perfmon_threadData[thread_id].processorId;
+ uint64_t core_flags = 0x0ULL;
+ uint64_t uncore_flags = 0x0ULL;
if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id))
{
haveLock = 1;
}
+ core_flags = msr_read(cpu_id, MSR_PERF_GLOBAL_CTRL);
+ msr_write(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL);
+ if (cpuid_info.supportUncore)
+ {
+ uncore_flags = msr_read(cpu_id, MSR_UNC_PERF_GLOBAL_CTRL);
+ msr_write(cpu_id, MSR_UNC_PERF_GLOBAL_CTRL, 0x0ULL);
+ }
+
for ( int i=0; i<perfmon_numCountersHaswell; i++ )
{
if (perfmon_threadData[thread_id].counters[i].init == TRUE)
@@ -289,6 +381,17 @@ void perfmon_readCountersThread_haswell(int thread_id)
power_read(cpu_id, haswell_counter_map[i].counterRegister);
break;
+ case CBOX0:
+ case CBOX1:
+ case CBOX2:
+ case CBOX3:
+ case UBOX:
+ if(haveLock)
+ {
+ perfmon_threadData[thread_id].counters[i].counterData =
+ msr_read(cpu_id, haswell_counter_map[i].counterRegister);
+ }
+ break;
default:
/* should never be reached */
break;
@@ -297,5 +400,10 @@ void perfmon_readCountersThread_haswell(int thread_id)
}
}
}
+ if (cpuid_info.supportUncore && uncore_flags > 0x0ULL)
+ {
+ msr_write(cpu_id, MSR_UNC_PERF_GLOBAL_CTRL, uncore_flags);
+ }
+ msr_write(cpu_id, MSR_PERF_GLOBAL_CTRL, core_flags);
}
diff --git a/src/includes/perfmon_haswell_counters.h b/src/includes/perfmon_haswell_counters.h
index 4302efe..3dc7247 100644
--- a/src/includes/perfmon_haswell_counters.h
+++ b/src/includes/perfmon_haswell_counters.h
@@ -5,8 +5,8 @@
*
* Description: Counter Header File of perfmon module for Haswell.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -29,7 +29,8 @@
*/
#define NUM_COUNTERS_HASWELL 12
-#define NUM_COUNTERS_CORE_HASWELL 7
+#define NUM_COUNTERS_UNCORE_HASWELL 4
+#define NUM_COUNTERS_CORE_HASWELL 8
static PerfmonCounterMap haswell_counter_map[NUM_COUNTERS_HASWELL] = {
/* Fixed Counters: instructions retired, cycles unhalted core */
@@ -45,8 +46,8 @@ static PerfmonCounterMap haswell_counter_map[NUM_COUNTERS_HASWELL] = {
{"TMP0", PMC7, THERMAL, 0, 0, 0, 0},
/* RAPL counters */
{"PWR0", PMC8, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0},
- {"PWR1", PMC9, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0},
- {"PWR2", PMC10, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0},
- {"PWR3", PMC11, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0},
+ {"PWR1", PMC9, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0},
+ {"PWR2", PMC10, POWER, 0, MSR_PP1_ENERGY_STATUS, 0, 0},
+ {"PWR3", PMC11, POWER, 0, MSR_DRAM_ENERGY_STATUS, 0, 0},
};
diff --git a/src/includes/perfmon_haswell_events.txt b/src/includes/perfmon_haswell_events.txt
index bb9d56d..f958a3a 100644
--- a/src/includes/perfmon_haswell_events.txt
+++ b/src/includes/perfmon_haswell_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for Intel Ivy Bridge
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
@@ -35,6 +35,9 @@ UMASK_PWR_PKG_ENERGY 0x00
EVENT_PWR_PP0_ENERGY 0x00 PWR1
UMASK_PWR_PP0_ENERGY 0x00
+EVENT_PWR_PP1_ENERGY 0x00 PWR2
+UMASK_PWR_PP1_ENERGY 0x00
+
EVENT_PWR_DRAM_ENERGY 0x00 PWR3
UMASK_PWR_DRAM_ENERGY 0x00
@@ -49,6 +52,7 @@ UMASK_CPU_CLK_UNHALTED_REF 0x00
EVENT_LD_BLOCKS 0x03 PMC
UMASK_LD_BLOCKS_STORE_FORWARD 0x02
+UMASK_LD_BLOCKS_NO_SR 0x08
EVENT_MISALIGN_MEM_REF 0x05 PMC
UMASK_MISALIGN_MEM_REF_LOADS 0x01
@@ -56,7 +60,7 @@ UMASK_MISALIGN_MEM_REF_STORES 0x02
UMASK_MISALIGN_MEM_REF_ANY 0x03
EVENT_LD_BLOCKS_PARTIAL 0x07 PMC
-UMASK_LD_BLOCKS_PARTIAL_ADDRESS_ALIAS 0x01 PMC
+UMASK_LD_BLOCKS_PARTIAL_ADDRESS_ALIAS 0x01
EVENT_DTLB_LOAD_MISSES 0x08 PMC
UMASK_DTLB_LOAD_MISSES_CAUSES_A_WALK 0x01
@@ -69,8 +73,8 @@ UMASK_DTLB_LOAD_MISSES_STLB_HIT_2M 0x40
UMASK_DTLB_LOAD_MISSES_STLB_HIT 0x60
UMASK_DTLB_LOAD_MISSES_PDE_CACHE_MISS 0x80
-EVENT_RECOVERY_CYCLES 0x0D PMC
-UMASK_UOPS_ISSUED_ANY 0x03
+EVENT_INT_MISC 0x0D PMC
+UMASK_INT_MISC_RECOVERY_CYCLES 0x03 0x01
EVENT_UOPS_ISSUED 0x0E PMC
UMASK_UOPS_ISSUED_ANY 0x01
@@ -84,7 +88,7 @@ UMASK_L2_RQSTS_ALL_DEMAND_DATA_RD_HIT 0x41
UMASK_L2_RQSTS_ALL_DEMAND_DATA_RD 0xE1
UMASK_L2_RQSTS_RFO_HIT 0x42
UMASK_L2_RQSTS_RFO_MISS 0x22
-UMASK_L2_RQSTS_RFO_ANY 0xE2
+UMASK_L2_RQSTS_ALL_RFO 0xE2
UMASK_L2_RQSTS_CODE_RD_HIT 0x44
UMASK_L2_RQSTS_CODE_RD_MISS 0x24
UMASK_L2_RQSTS_ALL_DEMAND_MISS 0x27
@@ -105,7 +109,7 @@ UMASK_LONGEST_LAT_CACHE_MISS 0x41
EVENT_CPU_CLOCK_UNHALTED 0x3C PMC
UMASK_CPU_CLOCK_UNHALTED_THREAD_P 0x00
-UMASK_CPU_CLOCK_UNHALTED_REF_P 0x01
+UMASK_CPU_CLOCK_UNHALTED_REF_XCLK 0x01
EVENT_L1D_PEND_MISS 0x48 PMC1
UMASK_L1D_PEND_MISS_PENDING 0x01
@@ -127,6 +131,9 @@ UMASK_LOAD_HIT_PRE_HW_PF 0x02
EVENT_L1D 0x51 PMC
UMASK_L1D_REPLACEMENT 0x01
+UMASK_L1D_ALLOCATED_IN_M 0x02
+UMASK_L1D_M_EVICT 0x04
+UMASK_L1D_ALL_M_REPLACEMENT 0x08
EVENT_MOVE_ELIMINATION 0x58 PMC
UMASK_MOVE_ELIMINATION_INT_NOT_ELIMINATED 0x04
@@ -154,18 +161,27 @@ UMASK_CACHE_LOCK_CYCLES_CACHE_LOCK_DURATION 0x02
EVENT_IDQ 0x79 PMC
UMASK_IDQ_EMPTY 0x02
UMASK_IDQ_MITE_UOPS 0x04
+UMASK_IDQ_MITE_UOPS_CYCLES 0x04 0x00 0x01
UMASK_IDQ_DSB_UOPS 0x08
+UMASK_IDQ_DSB_UOPS_CYCLES 0x08 0x00 0x01
UMASK_IDQ_MS_DSB_UOPS 0x10
+UMASK_IDQ_MS_DSB_UOPS_CYCLES 0x10 0x00 0x01
UMASK_IDQ_MS_MITE_UOPS 0x20
+UMASK_IDQ_MS_MITE_UOPS_CYCLES 0x20 0x00 0x01
UMASK_IDQ_MS_UOPS 0x30
+UMASK_IDQ_MS_UOPS_CYCLES 0x30 0x00 0x01
+UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS 0x18 0x00 0x01
UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS 0x18 0x00 0x01
UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS 0x18 0x00 0x04
-UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS 0x24 0x00 0x01
+UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS 0x24 0x00 0x01
UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS 0x24 0x00 0x04
-UMASK_IDQ_ALL_MITE_ALL_UOPS 0x3C
+UMASK_IDQ_MITE_ALL_UOPS 0x3C
EVENT_ICACHE 0x80 PMC
+UMASK_ICACHE_HITS 0x01
UMASK_ICACHE_MISSES 0x02
+UMASK_ICACHE_ACCESSES 0x03
+UMASK_ICACHE_IFETCH_STALL 0x04
EVENT_ITLB_MISSES 0x85 PMC
UMASK_ITLB_MISSES_CAUSES_A_WALK 0x01
@@ -185,28 +201,19 @@ EVENT_BR_INST_EXEC 0x88 PMC
UMASK_BR_INST_EXEC_COND_TAKEN 0x81
UMASK_BR_INST_EXEC_COND_NON_TAKEN 0x41
UMASK_BR_INST_EXEC_DIRECT_JMP_TAKEN 0x82
-UMASK_BR_INST_EXEC_DIRECT_JMP_NON_TAKEN 0x42
UMASK_BR_INST_EXEC_INDIRECT_JMP_NON_CALL_RET_TAKEN 0x84
-UMASK_BR_INST_EXEC_INDIRECT_JMP_NON_CALL_RET_NON_TAKEN 0x44
UMASK_BR_INST_EXEC_RETURN_NEAR_TAKEN 0x88
-UMASK_BR_INST_EXEC_RETURN_NEAR_NON_TAKEN 0x48
UMASK_BR_INST_EXEC_DIRECT_NEAR_CALL_TAKEN 0x90
-UMASK_BR_INST_EXEC_DIRECT_NEAR_CALL_NON_TAKEN 0x50
-UMASK_BR_INST_EXEC_INDIRECT_NEAR_CALL_TAKEN 0xA0
-UMASK_BR_INST_EXEC_INDIRECT_NEAR_CALL_NON_TAKEN 0x60
-UMASK_BR_INST_EXEC_ALL_BRANCHES 0xFF
+UMASK_BR_INST_EXEC_INDIRECT_NEAR_CALL_TAKEN 0xA0
+UMASK_BR_INST_EXEC_ALL_BRANCHES 0xFF
EVENT_BR_MISP_EXEC 0x89 PMC
UMASK_BR_MISP_EXEC_COND_TAKEN 0x81
UMASK_BR_MISP_EXEC_COND_NON_TAKEN 0x41
UMASK_BR_MISP_EXEC_INDIRECT_JMP_NON_CALL_RET_TAKEN 0x84
-UMASK_BR_MISP_EXEC_INDIRECT_JMP_NON_CALL_RET_NON_TAKEN 0x44
UMASK_BR_MISP_EXEC_RETURN_NEAR_TAKEN 0x88
-UMASK_BR_MISP_EXEC_RETURN_NEAR_NON_TAKEN 0x48
UMASK_BR_MISP_EXEC_DIRECT_NEAR_CALL_TAKEN 0x90
-UMASK_BR_MISP_EXEC_DIRECT_NEAR_CALL_NON_TAKEN 0x50
UMASK_BR_MISP_EXEC_INDIRECT_NEAR_CALL_TAKEN 0xA0
-UMASK_BR_MISP_EXEC_INDIRECT_NEAR_CALL_NON_TAKEN 0x60
UMASK_BR_MISP_EXEC_ALL_BRANCHES 0xFF
EVENT_IDQ_UOPS_NOT_DELIVERED 0x9C PMC
@@ -230,9 +237,18 @@ UMASK_RESOURCE_STALLS_ROB 0x10
EVENT_CYCLE_ACTIVITY 0xA3 PMC
UMASK_CYCLE_ACTIVITY_CYCLES_L2_PENDING 0x01
+UMASK_CYCLE_ACTIVITY_CYCLES_L2_PENDING_CYCLES 0x01 0x00 0x02
UMASK_CYCLE_ACTIVITY_CYCLES_LDM_PENDING 0x02
-UMASK_CYCLE_ACTIVITY_STALL_L2_PENDING 0x05
-UMASK_CYCLE_ACTIVITY_L1D_PENDING 0x08
+UMASK_CYCLE_ACTIVITY_CYCLES_LDM_PENDING_CYCLES 0x01 0x00 0x02
+UMASK_CYCLE_ACTIVITY_STALLS_L2_PENDING 0x05
+
+EVENT_CYCLE_ACTIVITY_CYCLES 0xA3 PMC2
+UMASK_CYCLE_ACTIVITY_CYCLES_L1D_PENDING 0x08
+UMASK_CYCLE_ACTIVITY_CYCLES_L1D_PENDING_CYCLES 0x08 0x00 0x08
+
+EVENT_CYCLE_ACTIVITY_STALLS 0xA3 PMC2
+UMASK_CYCLE_ACTIVITY_STALLS_L1D_PENDING 0x0C
+UMASK_CYCLE_ACTIVITY_STALLS_L1D_PENDING_CYCLES 0x0C 0x00 0x0C
EVENT_LSD_UOPS 0xA8 PMC
UMASK_LSD_UOPS 0x01
@@ -265,7 +281,6 @@ UMASK_TLB_FLUSH_STLB_ANY 0x20
EVENT_INST_RETIRED 0xC0 PMC1
UMASK_INST_RETIRED_ANY_P 0x00
-UMASK_INST_RETIRED_ALL 0x01
EVENT_OTHER_ASSISTS 0xC1 PMC
UMASK_OTHER_ASSISTS_AVX_TO_SSE 0x08
@@ -292,11 +307,11 @@ UMASK_BR_INST_RETIRED_NEAR_TAKEN 0x20
UMASK_BR_INST_RETIRED_FAR_BRANCH 0x40
EVENT_BR_MISP_RETIRED 0xC5 PMC
-UMASK_BR_MISP_RETIRED_ALL_BRANCHES 0x00
+UMASK_BR_MISP_RETIRED_ALL_BRANCHES_1 0x00
UMASK_BR_MISP_RETIRED_CONDITIONAL 0x01
-UMASK_BR_MISP_RETIRED_ALL_BRANCHES 0x04
-UMASK_BR_MISP_RETIRED_NOT_TAKEN 0x10
-UMASK_BR_MISP_RETIRED_TAKEN 0x20
+UMASK_BR_MISP_RETIRED_ALL_BRANCHES_2 0x04
+UMASK_BR_MISP_RETIRED_NEAR_NOT_TAKEN 0x10
+UMASK_BR_MISP_RETIRED_NEAR_TAKEN 0x20
EVENT_FP_ASSIST 0xCA PMC
UMASK_FP_ASSIST_X87_OUTPUT 0x02
@@ -309,7 +324,7 @@ EVENT_ROB_MISC_EVENT_LBR_INSERTS 0xCC PMC
UMASK_ROB_MISC_EVENT_LBR_INSERTS 0x20
EVENT_MEM_TRANS_RETIRED_LOAD_LAT 0xCD PMC
-UMASK_MEM_TRANS_RETIRED_LOAD_LAT 0x01
+UMASK_MEM_TRANS_RETIRED_LOAD_LATENCY 0x01
EVENT_MEM_UOP_RETIRED 0xD0 PMC
UMASK_MEM_UOP_RETIRED_LOADS 0x81
@@ -321,18 +336,23 @@ UMASK_MEM_UOP_RETIRED_STORES_LOCK 0x22
UMASK_MEM_UOP_RETIRED_LOADS_SPLIT 0x41
UMASK_MEM_UOP_RETIRED_STORES_SPLIT 0x42
-EVENT_MEMLOAD_UOPS_RETIRED 0xD1 PMC
-UMASK_MEMLOAD_UOPS_RETIRED_L1_HIT 0x01
-UMASK_MEMLOAD_UOPS_RETIRED_L2_HIT 0x02
-UMASK_MEMLOAD_UOPS_RETIRED_LLC_HIT 0x04
-UMASK_MEMLOAD_UOPS_RETIRED_L2_MISS 0x10
-UMASK_MEMLOAD_UOPS_RETIRED_HIT_LFB 0x40
-
-EVENT_MEM_LOAD_UOPS_LLC_HIT_RETIRED 0xD2 PMC
-UMASK_MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_MISS 0x01
-UMASK_MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_HIT 0x02
-UMASK_MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_HITM 0x04
-UMASK_MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_NONE 0x08
+EVENT_MEM_LOAD_UOPS_RETIRED 0xD1 PMC
+UMASK_MEM_LOAD_UOPS_RETIRED_L1_HIT 0x01
+UMASK_MEM_LOAD_UOPS_RETIRED_L1_MISS 0x08
+UMASK_MEM_LOAD_UOPS_RETIRED_L1_ALL 0x09
+UMASK_MEM_LOAD_UOPS_RETIRED_L2_HIT 0x02
+UMASK_MEM_LOAD_UOPS_RETIRED_L2_MISS 0x10
+UMASK_MEM_LOAD_UOPS_RETIRED_L2_ALL 0x12
+UMASK_MEM_LOAD_UOPS_RETIRED_L3_HIT 0x04
+UMASK_MEM_LOAD_UOPS_RETIRED_L3_MISS 0x20
+UMASK_MEM_LOAD_UOPS_RETIRED_L3_ALL 0x24
+UMASK_MEM_LOAD_UOPS_RETIRED_HIT_LFB 0x40
+
+EVENT_MEM_LOAD_UOPS_L3_HIT_RETIRED 0xD2 PMC
+UMASK_MEM_LOAD_UOPS_L3_HIT_RETIRED_XSNP_MISS 0x01
+UMASK_MEM_LOAD_UOPS_L3_HIT_RETIRED_XSNP_HIT 0x02
+UMASK_MEM_LOAD_UOPS_L3_HIT_RETIRED_XSNP_HITM 0x04
+UMASK_MEM_LOAD_UOPS_L3_HIT_RETIRED_XSNP_NONE 0x08
EVENT_MEM_LOAD_UOPS_LLC_MISS_RETIRED 0xD3 PMC
UMASK_MEM_LOAD_UOPS_LLC_MISS_RETIRED_LOCAL_DRAM 0x01
@@ -374,6 +394,8 @@ UMASK_TX_EXEC_MISC1 0x01
UMASK_TX_EXEC_MISC2 0x02
UMASK_TX_EXEC_MISC3 0x04
UMASK_TX_EXEC_MISC4 0x08
+UMASK_TX_EXEC_MISC5 0x10
+
EVENT_HLE_RETIRED 0xC8 PMC
UMASK_HLE_RETIRED_START 0x01
@@ -394,49 +416,3 @@ UMASK_RTM_RETIRED_ABORTED_MISC2 0x10
UMASK_RTM_RETIRED_ABORTED_MISC3 0x20
UMASK_RTM_RETIRED_ABORTED_MISC4 0x40
UMASK_RTM_RETIRED_ABORTED_MISC5 0x80
-
-EVENT_UNC_CBO_XSNP_RESPONSE 0x22 UPMC
-UMASK_UNC_CBO_XSNP_RESPONSE_EXT_FILTER_MISS 0x21
-UMASK_UNC_CBO_XSNP_RESPONSE_XCORE_FILTER_MISS 0x41
-UMASK_UNC_CBO_XSNP_RESPONSE_EVICTION_FILTER_MISS 0x81
-UMASK_UNC_CBO_XSNP_RESPONSE_EXT_FILTER_INVAL 0x22
-UMASK_UNC_CBO_XSNP_RESPONSE_XCORE_FILTER_INVAL 0x42
-UMASK_UNC_CBO_XSNP_RESPONSE_EVICTION_FILTER_INVAL 0x82
-UMASK_UNC_CBO_XSNP_RESPONSE_EXT_FILTER_HIT 0x24
-UMASK_UNC_CBO_XSNP_RESPONSE_XCORE_FILTER_HIT 0x44
-UMASK_UNC_CBO_XSNP_RESPONSE_EVICTION_FILTER_HIT 0x84
-UMASK_UNC_CBO_XSNP_RESPONSE_EXT_FILTER_HITM 0x28
-UMASK_UNC_CBO_XSNP_RESPONSE_XCORE_FILTER_HITM 0x48
-UMASK_UNC_CBO_XSNP_RESPONSE_EVICTION_FILTER_HITM 0x88
-UMASK_UNC_CBO_XSNP_RESPONSE_EXT_FILTER_INVAL_M 0x20
-UMASK_UNC_CBO_XSNP_RESPONSE_XCORE_FILTER_INVAL_M 0x50
-UMASK_UNC_CBO_XSNP_RESPONSE_EVICTION_FILTER_INVAL_M 0x90
-
-EVENT_UNC_CBO_CACHE_LOOKUP 0x34 UPMC
-UMASK_UNC_CBO_CACHE_LOOKUP_READ_FILTER_M 0x11
-UMASK_UNC_CBO_CACHE_LOOKUP_WRITE_FILTER_M 0x26
-UMASK_UNC_CBO_CACHE_LOOKUP_EXTSNP_FILTER_M 0x48
-UMASK_UNC_CBO_CACHE_LOOKUP_ANY_FILTER_M 0x81
-UMASK_UNC_CBO_CACHE_LOOKUP_READ_FILTER_ES 0x16
-UMASK_UNC_CBO_CACHE_LOOKUP_WRITE_FILTER_ES 0x28
-UMASK_UNC_CBO_CACHE_LOOKUP_EXTSNP_FILTER_ES 0x41
-UMASK_UNC_CBO_CACHE_LOOKUP_ANY_FILTER_ES 0x86
-UMASK_UNC_CBO_CACHE_LOOKUP_READ_FILTER_I 0x18
-UMASK_UNC_CBO_CACHE_LOOKUP_WRITE_FILTER_I 0x21
-UMASK_UNC_CBO_CACHE_LOOKUP_EXTSNP_FILTER_I 0x46
-UMASK_UNC_CBO_CACHE_LOOKUP_ANY_FILTER_I 0x88
-
-EVENT_UNC_ARB_TRK_OCCUPANCY_ALL 0x80 UPMC
-UMASK_UNC_ARB_TRK_OCCUPANCY_ALL 0x01
-
-EVENT_UNC_ARB_TRK_REQUEST 0x81 UPMC
-UMASK_UNC_ARB_TRK_REQUEST_ALL 0x01
-UMASK_UNC_ARB_TRK_REQUEST_WRITES 0x20
-UMASK_UNC_ARB_TRK_REQUEST_EVICTIONS 0x80
-
-EVENT_UNC_ARB_COH_TRK_OCCUPANCY_ALL 0x83 UPMC
-UMASK_UNC_ARB_COH_TRK_OCCUPANCY_ALL 0x01
-
-EVENT_UNC_ARB_COH_TRK_REQUEST_ALL 0x84 UPMC
-UMASK_UNC_ARB_COH_TRK_REQUEST_ALL 0x01
-
diff --git a/src/includes/perfmon_interlagos.h b/src/includes/perfmon_interlagos.h
index b96b944..d28bb18 100644
--- a/src/includes/perfmon_interlagos.h
+++ b/src/includes/perfmon_interlagos.h
@@ -5,8 +5,8 @@
*
* Description: Header file of perfmon module for AMD Interlagos
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -60,13 +60,13 @@ void perfmon_init_interlagos(PerfmonThread *thread)
msr_write(cpu_id, MSR_AMD15_NB_PERFEVTSEL3, 0x0ULL);
}
- flags |= (1<<16); /* user mode flag */
- msr_write(cpu_id, MSR_AMD15_PERFEVTSEL0, flags);
+ //flags |= (1<<16); /* user mode flag */
+ /*msr_write(cpu_id, MSR_AMD15_PERFEVTSEL0, flags);
msr_write(cpu_id, MSR_AMD15_PERFEVTSEL1, flags);
msr_write(cpu_id, MSR_AMD15_PERFEVTSEL2, flags);
msr_write(cpu_id, MSR_AMD15_PERFEVTSEL3, flags);
msr_write(cpu_id, MSR_AMD15_PERFEVTSEL4, flags);
- msr_write(cpu_id, MSR_AMD15_PERFEVTSEL5, flags);
+ msr_write(cpu_id, MSR_AMD15_PERFEVTSEL5, flags);*/
}
@@ -87,9 +87,7 @@ void perfmon_setupCounterThread_interlagos(
return;
}
- flags = msr_read(cpu_id,reg);
- flags &= ~(0xFFFFU);
-
+ flags = (1<<16);
/* AMD uses a 12 bit Event mask: [35:32][7:0] */
flags |= ((uint64_t)(event->eventId>>8)<<32) + (event->umask<<8) + (event->eventId & ~(0xF00U));
diff --git a/src/includes/perfmon_interlagos_counters.h b/src/includes/perfmon_interlagos_counters.h
index 136d0f7..a593f5a 100644
--- a/src/includes/perfmon_interlagos_counters.h
+++ b/src/includes/perfmon_interlagos_counters.h
@@ -5,8 +5,8 @@
*
* Description: Counter Header File of perfmon module for AMD Interlagos
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/perfmon_interlagos_events.txt b/src/includes/perfmon_interlagos_events.txt
index 16f0a9b..1fa0a44 100644
--- a/src/includes/perfmon_interlagos_events.txt
+++ b/src/includes/perfmon_interlagos_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for AMD Interlagos
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
diff --git a/src/includes/perfmon_ivybridge.h b/src/includes/perfmon_ivybridge.h
index 9de9f6d..0615c27 100644
--- a/src/includes/perfmon_ivybridge.h
+++ b/src/includes/perfmon_ivybridge.h
@@ -5,8 +5,8 @@
*
* Description: Header File of perfmon module for Ivy Bridge.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -65,16 +65,16 @@ void perfmon_init_ivybridge(PerfmonThread *thread)
* FIXED 0: Instructions retired
* FIXED 1: Clocks unhalted core
* FIXED 2: Clocks unhalted ref */
- msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, 0x222ULL);
+ //msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, 0x222ULL);
/* Preinit of PERFEVSEL registers */
- flags |= (1<<22); /* enable flag */
- flags |= (1<<16); /* user mode flag */
+ //flags |= (1<<22); /* enable flag */
+ //flags |= (1<<16); /* user mode flag */
- msr_write(cpu_id, MSR_PERFEVTSEL0, flags);
+ /*msr_write(cpu_id, MSR_PERFEVTSEL0, flags);
msr_write(cpu_id, MSR_PERFEVTSEL1, flags);
msr_write(cpu_id, MSR_PERFEVTSEL2, flags);
- msr_write(cpu_id, MSR_PERFEVTSEL3, flags);
+ msr_write(cpu_id, MSR_PERFEVTSEL3, flags);*/
/* TODO Robust implementation which also works if stuff is not there */
if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id) ||
@@ -150,11 +150,12 @@ void perfmon_init_ivybridge(PerfmonThread *thread)
pci_write(cpu_id, PCI_IMC_DEVICE_CH_3, PCI_UNC_MC_PMON_CTR_3_A, 0x0U);
pci_write(cpu_id, PCI_IMC_DEVICE_CH_3, PCI_UNC_MC_PMON_CTR_3_B, 0x0U);
+#if 0
/* FIXME: Not yet tested/ working due to BIOS issues on test
* machines */
/* QPI registers can be zeroed with single write */
- uflags = 0x0113UL; /*enable freeze (bit 16), freeze (bit 8), reset */
+ uflags = 0x0103UL; /* freeze (bit 8), reset */
pci_write(cpu_id, PCI_QPI_DEVICE_PORT_0, PCI_UNC_QPI_PMON_BOX_CTL, uflags);
pci_write(cpu_id, PCI_QPI_DEVICE_PORT_1, PCI_UNC_QPI_PMON_BOX_CTL, uflags);
uflags = 0x0UL;
@@ -168,7 +169,7 @@ void perfmon_init_ivybridge(PerfmonThread *thread)
pci_write(cpu_id, PCI_QPI_DEVICE_PORT_1, PCI_UNC_QPI_PMON_CTL_2, uflags);
pci_write(cpu_id, PCI_QPI_DEVICE_PORT_1, PCI_UNC_QPI_PMON_CTL_3, uflags);
-#if 0
+
/* Cbo counters */
uflags = 0xF0103UL; /*enable freeze (bit 8), reset */
msr_write(cpu_id, MSR_UNC_C0_PMON_BOX_CTL, uflags);
@@ -197,19 +198,22 @@ void perfmon_init_ivybridge(PerfmonThread *thread)
}
}
-#define BOX_GATE_SNB(channel,label) \
+#define BOX_GATE_IVB(channel,label) \
if (perfmon_verbose) { \
- printf("[%d] perfmon_setup_counter (label): Write Register 0x%llX , Flags: 0x%llX \n", \
+ printf("[%d] perfmon_setup_counter (##label): Write Register 0x%llX , Flags: 0x%llX \n", \
cpu_id, \
LLU_CAST reg, \
LLU_CAST flags); \
} \
-if(haveLock) { \
- uflags = pci_read(cpu_id, channel, reg); \
- uflags &= ~(0xFFFFU); \
- uflags |= (event->umask<<8) + event->eventId; \
- pci_write(cpu_id, channel, reg, uflags); \
-}
+ if(haveLock) { \
+ uflags = (1UL<<22);\
+ uflags |= (event->umask<<8) + event->eventId; \
+ if (event->cfgBits == 0xFF) \
+ { \
+ uflags |= (1<<21); \
+ } \
+ pci_write(cpu_id, channel, reg, uflags); \
+ }
void perfmon_setupCounterThread_ivybridge(
@@ -222,6 +226,8 @@ void perfmon_setupCounterThread_ivybridge(
uint32_t uflags;
uint64_t reg = ivybridge_counter_map[index].configRegister;
int cpu_id = perfmon_threadData[thread_id].processorId;
+ uint64_t fixed_flags = msr_read(cpu_id, MSR_PERF_FIXED_CTR_CTRL);
+ uint64_t orig_fixed_flags = fixed_flags;
perfmon_threadData[thread_id].counters[index].init = TRUE;
if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id))
@@ -233,9 +239,10 @@ void perfmon_setupCounterThread_ivybridge(
{
case PMC:
- perfmon_threadData[thread_id].counters[index].init = TRUE;
- flags = msr_read(cpu_id,reg);
- flags &= ~(0xFFFFU); /* clear lower 16bits */
+
+ //flags = msr_read(cpu_id,reg);
+ //flags &= ~(0xFFFFU); /* clear lower 16bits */
+ flags = (1<<22)|(1<<16);
/* Intel with standard 8 bit event mask: [7:0] */
flags |= (event->umask<<8) + event->eventId;
@@ -258,25 +265,26 @@ void perfmon_setupCounterThread_ivybridge(
break;
case FIXED:
+ fixed_flags |= (0x2ULL<<(index*4));
break;
case POWER:
break;
case MBOX0:
- BOX_GATE_SNB(PCI_IMC_DEVICE_CH_0,MBOX0);
+ BOX_GATE_IVB(PCI_IMC_DEVICE_CH_0,MBOX0);
break;
case MBOX1:
- BOX_GATE_SNB(PCI_IMC_DEVICE_CH_1,MBOX1);
+ BOX_GATE_IVB(PCI_IMC_DEVICE_CH_1,MBOX1);
break;
case MBOX2:
- BOX_GATE_SNB(PCI_IMC_DEVICE_CH_2,MBOX2);
+ BOX_GATE_IVB(PCI_IMC_DEVICE_CH_2,MBOX2);
break;
case MBOX3:
- BOX_GATE_SNB(PCI_IMC_DEVICE_CH_3,MBOX3);
+ BOX_GATE_IVB(PCI_IMC_DEVICE_CH_3,MBOX3);
break;
case SBOX0:
@@ -286,8 +294,9 @@ void perfmon_setupCounterThread_ivybridge(
{
if(haveLock)
{
- uflags = pci_read(cpu_id, PCI_QPI_DEVICE_PORT_0, reg);
- uflags &= ~(0xFFFFU);
+ //uflags = pci_read(cpu_id, PCI_QPI_DEVICE_PORT_0, reg);
+ //uflags &= ~(0xFFFFU);
+ uflags = (1UL<<22);
uflags |= (1UL<<21) + event->eventId; /* Set extension bit */
printf("UFLAGS 0x%x \n",uflags);
pci_write(cpu_id, PCI_QPI_DEVICE_PORT_0, reg, uflags);
@@ -307,7 +316,7 @@ void perfmon_setupCounterThread_ivybridge(
}
else
{
- BOX_GATE_SNB(PCI_QPI_DEVICE_PORT_0,SBOX0);
+ BOX_GATE_IVB(PCI_QPI_DEVICE_PORT_0,SBOX0);
}
break;
@@ -319,8 +328,9 @@ void perfmon_setupCounterThread_ivybridge(
{
if(haveLock)
{
- uflags = pci_read(cpu_id, PCI_QPI_DEVICE_PORT_1, reg);
- uflags &= ~(0xFFFFU);
+ //uflags = pci_read(cpu_id, PCI_QPI_DEVICE_PORT_1, reg);
+ //uflags &= ~(0xFFFFU);
+ uflags = (1UL<<22);
uflags |= (1UL<<21) + event->eventId; /* Set extension bit */
pci_write(cpu_id, PCI_QPI_DEVICE_PORT_1, reg, uflags);
@@ -337,7 +347,7 @@ void perfmon_setupCounterThread_ivybridge(
}
else
{
- BOX_GATE_SNB(PCI_QPI_DEVICE_PORT_0,SBOX0);
+ BOX_GATE_IVB(PCI_QPI_DEVICE_PORT_0,SBOX0);
}
break;
@@ -379,6 +389,10 @@ void perfmon_setupCounterThread_ivybridge(
/* should never be reached */
break;
}
+ if (fixed_flags != orig_fixed_flags)
+ {
+ msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, fixed_flags);
+ }
}
#define CBOX_START(NUM) \
@@ -454,14 +468,14 @@ void perfmon_startCountersThread_ivybridge(int thread_id)
case SBOX0:
if(haveLock)
{
- pci_write(cpu_id, PCI_QPI_DEVICE_PORT_0, PCI_UNC_QPI_PMON_BOX_CTL, uflags);
+ pci_write(cpu_id, PCI_QPI_DEVICE_PORT_0, PCI_UNC_QPI_PMON_BOX_CTL, 0x0ULL);
}
break;
case SBOX1:
if(haveLock)
{
- pci_write(cpu_id, PCI_QPI_DEVICE_PORT_1, PCI_UNC_QPI_PMON_BOX_CTL, uflags);
+ pci_write(cpu_id, PCI_QPI_DEVICE_PORT_1, PCI_UNC_QPI_PMON_BOX_CTL, 0x0ULL);
}
break;
@@ -549,7 +563,7 @@ if(haveLock) { \
#define SBOX_STOP(NUM) \
if(haveLock) { \
- pci_write(cpu_id, PCI_QPI_DEVICE_PORT_##NUM , PCI_UNC_QPI_PMON_BOX_CTL, uflags); \
+ pci_write(cpu_id, PCI_QPI_DEVICE_PORT_##NUM , PCI_UNC_QPI_PMON_BOX_CTL, (1<<8)); \
counter_result = pci_read(cpu_id, PCI_QPI_DEVICE_PORT_##NUM , ivybridge_counter_map[i].counterRegister); \
counter_result = (counter_result<<32) + pci_read(cpu_id, PCI_QPI_DEVICE_PORT_##NUM , ivybridge_counter_map[i].counterRegister2); \
perfmon_threadData[thread_id].counters[i].counterData = counter_result; \
@@ -713,13 +727,11 @@ void perfmon_readCountersThread_ivybridge(int thread_id)
{
haveLock = 1;
}
-
for ( int i=0; i<NUM_COUNTERS_IVYBRIDGE; i++ )
{
if (perfmon_threadData[thread_id].counters[i].init == TRUE)
{
- if ((ivybridge_counter_map[i].type == PMC) ||
- (ivybridge_counter_map[i].type == FIXED))
+ if ((ivybridge_counter_map[i].type == PMC) || (ivybridge_counter_map[i].type == FIXED))
{
perfmon_threadData[thread_id].counters[i].counterData =
msr_read(cpu_id, ivybridge_counter_map[i].counterRegister);
diff --git a/src/includes/perfmon_ivybridge_counters.h b/src/includes/perfmon_ivybridge_counters.h
index d4fa25a..e63dfb0 100644
--- a/src/includes/perfmon_ivybridge_counters.h
+++ b/src/includes/perfmon_ivybridge_counters.h
@@ -5,8 +5,8 @@
*
* Description: Counter header file of perfmon module for Ivy Bridge.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -29,8 +29,8 @@
*/
#define NUM_COUNTERS_CORE_IVYBRIDGE 8
-#define NUM_COUNTERS_UNCORE_IVYBRIDGE 60
-#define NUM_COUNTERS_IVYBRIDGE 85
+#define NUM_COUNTERS_UNCORE_IVYBRIDGE 12
+#define NUM_COUNTERS_IVYBRIDGE 32
static PerfmonCounterMap ivybridge_counter_map[NUM_COUNTERS_IVYBRIDGE] = {
/* Fixed Counters: instructions retired, cycles unhalted core */
@@ -49,82 +49,27 @@ static PerfmonCounterMap ivybridge_counter_map[NUM_COUNTERS_IVYBRIDGE] = {
{"PWR1", PMC9, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0},
{"PWR2", PMC10, POWER, 0, MSR_PP1_ENERGY_STATUS, 0, 0},
{"PWR3", PMC11, POWER, 0, MSR_DRAM_ENERGY_STATUS, 0, 0},
- /* CBOX counters */
- {"CBOX0C0", PMC12, CBOX0, MSR_UNC_C0_PMON_CTL0, MSR_UNC_C0_PMON_CTR0, 0, 0},
- {"CBOX0C1", PMC13, CBOX0, MSR_UNC_C0_PMON_CTL1, MSR_UNC_C0_PMON_CTR1, 0, 0},
- {"CBOX0C2", PMC14, CBOX0, MSR_UNC_C0_PMON_CTL2, MSR_UNC_C0_PMON_CTR2, 0, 0},
- {"CBOX0C3", PMC15, CBOX0, MSR_UNC_C0_PMON_CTL3, MSR_UNC_C0_PMON_CTR3, 0, 0},
- {"CBOX1C0", PMC16, CBOX1, MSR_UNC_C1_PMON_CTL0, MSR_UNC_C1_PMON_CTR0, 0, 0},
- {"CBOX1C1", PMC17, CBOX1, MSR_UNC_C1_PMON_CTL1, MSR_UNC_C1_PMON_CTR1, 0, 0},
- {"CBOX1C2", PMC18, CBOX1, MSR_UNC_C1_PMON_CTL2, MSR_UNC_C1_PMON_CTR2, 0, 0},
- {"CBOX1C3", PMC19, CBOX1, MSR_UNC_C1_PMON_CTL3, MSR_UNC_C1_PMON_CTR3, 0, 0},
- {"CBOX2C0", PMC20, CBOX2, MSR_UNC_C2_PMON_CTL0, MSR_UNC_C2_PMON_CTR0, 0, 0},
- {"CBOX2C1", PMC21, CBOX2, MSR_UNC_C2_PMON_CTL1, MSR_UNC_C2_PMON_CTR1, 0, 0},
- {"CBOX2C2", PMC22, CBOX2, MSR_UNC_C2_PMON_CTL2, MSR_UNC_C2_PMON_CTR2, 0, 0},
- {"CBOX2C3", PMC23, CBOX2, MSR_UNC_C2_PMON_CTL3, MSR_UNC_C2_PMON_CTR3, 0, 0},
- {"CBOX3C0", PMC24, CBOX3, MSR_UNC_C3_PMON_CTL0, MSR_UNC_C3_PMON_CTR0, 0, 0},
- {"CBOX3C1", PMC25, CBOX3, MSR_UNC_C3_PMON_CTL1, MSR_UNC_C3_PMON_CTR1, 0, 0},
- {"CBOX3C2", PMC26, CBOX3, MSR_UNC_C3_PMON_CTL2, MSR_UNC_C3_PMON_CTR2, 0, 0},
- {"CBOX3C3", PMC27, CBOX3, MSR_UNC_C3_PMON_CTL3, MSR_UNC_C3_PMON_CTR3, 0, 0},
- {"CBOX4C0", PMC28, CBOX4, MSR_UNC_C4_PMON_CTL0, MSR_UNC_C4_PMON_CTR0, 0, 0},
- {"CBOX4C1", PMC29, CBOX4, MSR_UNC_C4_PMON_CTL1, MSR_UNC_C4_PMON_CTR1, 0, 0},
- {"CBOX4C2", PMC30, CBOX4, MSR_UNC_C4_PMON_CTL2, MSR_UNC_C4_PMON_CTR2, 0, 0},
- {"CBOX4C3", PMC31, CBOX4, MSR_UNC_C4_PMON_CTL3, MSR_UNC_C4_PMON_CTR3, 0, 0},
- {"CBOX5C0", PMC32, CBOX5, MSR_UNC_C5_PMON_CTL0, MSR_UNC_C5_PMON_CTR0, 0, 0},
- {"CBOX5C1", PMC33, CBOX5, MSR_UNC_C5_PMON_CTL1, MSR_UNC_C5_PMON_CTR1, 0, 0},
- {"CBOX5C2", PMC34, CBOX5, MSR_UNC_C5_PMON_CTL2, MSR_UNC_C5_PMON_CTR2, 0, 0},
- {"CBOX5C3", PMC35, CBOX5, MSR_UNC_C5_PMON_CTL3, MSR_UNC_C5_PMON_CTR3, 0, 0},
- {"CBOX6C0", PMC36, CBOX6, MSR_UNC_C6_PMON_CTL0, MSR_UNC_C6_PMON_CTR0, 0, 0},
- {"CBOX6C1", PMC37, CBOX6, MSR_UNC_C6_PMON_CTL1, MSR_UNC_C6_PMON_CTR1, 0, 0},
- {"CBOX6C2", PMC38, CBOX6, MSR_UNC_C6_PMON_CTL2, MSR_UNC_C6_PMON_CTR2, 0, 0},
- {"CBOX6C3", PMC39, CBOX6, MSR_UNC_C6_PMON_CTL3, MSR_UNC_C6_PMON_CTR3, 0, 0},
- {"CBOX7C0", PMC40, CBOX7, MSR_UNC_C7_PMON_CTL0, MSR_UNC_C7_PMON_CTR0, 0, 0},
- {"CBOX7C1", PMC41, CBOX7, MSR_UNC_C7_PMON_CTL1, MSR_UNC_C7_PMON_CTR1, 0, 0},
- {"CBOX7C2", PMC42, CBOX7, MSR_UNC_C7_PMON_CTL2, MSR_UNC_C7_PMON_CTR2, 0, 0},
- {"CBOX7C3", PMC43, CBOX7, MSR_UNC_C7_PMON_CTL3, MSR_UNC_C7_PMON_CTR3, 0, 0},
- {"CBOX8C0", PMC44, CBOX8, MSR_UNC_C8_PMON_CTL0, MSR_UNC_C8_PMON_CTR0, 0, 0},
- {"CBOX8C1", PMC45, CBOX8, MSR_UNC_C8_PMON_CTL1, MSR_UNC_C8_PMON_CTR1, 0, 0},
- {"CBOX8C2", PMC46, CBOX8, MSR_UNC_C8_PMON_CTL2, MSR_UNC_C8_PMON_CTR2, 0, 0},
- {"CBOX8C3", PMC47, CBOX8, MSR_UNC_C8_PMON_CTL3, MSR_UNC_C8_PMON_CTR3, 0, 0},
- {"CBOX9C0", PMC48, CBOX9, MSR_UNC_C9_PMON_CTL0, MSR_UNC_C9_PMON_CTR0, 0, 0},
- {"CBOX9C1", PMC49, CBOX9, MSR_UNC_C9_PMON_CTL1, MSR_UNC_C9_PMON_CTR1, 0, 0},
- {"CBOX9C2", PMC50, CBOX9, MSR_UNC_C9_PMON_CTL2, MSR_UNC_C9_PMON_CTR2, 0, 0},
- {"CBOX9C3", PMC51, CBOX9, MSR_UNC_C9_PMON_CTL3, MSR_UNC_C9_PMON_CTR3, 0, 0},
- {"CBOX9C0", PMC52, CBOX10, MSR_UNC_C10_PMON_CTL0, MSR_UNC_C10_PMON_CTR0, 0, 0},
- {"CBOX9C1", PMC53, CBOX10, MSR_UNC_C10_PMON_CTL1, MSR_UNC_C10_PMON_CTR1, 0, 0},
- {"CBOX9C2", PMC54, CBOX10, MSR_UNC_C10_PMON_CTL2, MSR_UNC_C10_PMON_CTR2, 0, 0},
- {"CBOX9C3", PMC55, CBOX10, MSR_UNC_C10_PMON_CTL3, MSR_UNC_C10_PMON_CTR3, 0, 0},
- {"CBOX9C0", PMC56, CBOX11, MSR_UNC_C11_PMON_CTL0, MSR_UNC_C11_PMON_CTR0, 0, 0},
- {"CBOX9C1", PMC57, CBOX11, MSR_UNC_C11_PMON_CTL1, MSR_UNC_C11_PMON_CTR1, 0, 0},
- {"CBOX9C2", PMC58, CBOX11, MSR_UNC_C11_PMON_CTL2, MSR_UNC_C11_PMON_CTR2, 0, 0},
- {"CBOX9C3", PMC59, CBOX11, MSR_UNC_C11_PMON_CTL3, MSR_UNC_C11_PMON_CTR3, 0, 0},
/* IMC Counters: 4 48bit wide per memory channel, split in two reads */
- {"MBOX0C0",PMC60, MBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_0},
- {"MBOX1C0",PMC61, MBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_0},
- {"MBOX2C0",PMC62, MBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_0},
- {"MBOX3C0",PMC63, MBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_0},
- {"MBOX0C1",PMC64, MBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_1},
- {"MBOX1C1",PMC65, MBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_1},
- {"MBOX2C1",PMC66, MBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_1},
- {"MBOX3C1",PMC67, MBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_1},
- {"MBOX0C2",PMC68, MBOX2, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_2},
- {"MBOX1C2",PMC69, MBOX2, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_2},
- {"MBOX2C2",PMC70, MBOX2, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_2},
- {"MBOX3C2",PMC71, MBOX2, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_2},
- {"MBOX0C3",PMC72, MBOX3, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_3},
- {"MBOX1C3",PMC73, MBOX3, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_3},
- {"MBOX2C3",PMC74, MBOX3, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_3},
- {"MBOX3C3",PMC75, MBOX3, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_3},
- {"MBOXFIX",PMC76, MBOXFIX, 0, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_UNC_MC_PMON_FIXED_CTL},
- /* QPI counters four 48bit wide per port, split in two reads */
- {"SBOX0P0",PMC77, SBOX0, PCI_UNC_QPI_PMON_CTL_0, PCI_UNC_QPI_PMON_CTR_0_A, PCI_UNC_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_0},
- {"SBOX1P0",PMC78, SBOX0, PCI_UNC_QPI_PMON_CTL_1, PCI_UNC_QPI_PMON_CTR_1_A, PCI_UNC_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_0},
- {"SBOX2P0",PMC79, SBOX0, PCI_UNC_QPI_PMON_CTL_2, PCI_UNC_QPI_PMON_CTR_2_A, PCI_UNC_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_0},
- {"SBOX3P0",PMC80, SBOX0, PCI_UNC_QPI_PMON_CTL_3, PCI_UNC_QPI_PMON_CTR_3_A, PCI_UNC_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_0},
- {"SBOX0P1",PMC81, SBOX1, PCI_UNC_QPI_PMON_CTL_0, PCI_UNC_QPI_PMON_CTR_0_A, PCI_UNC_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_1},
- {"SBOX1P1",PMC82, SBOX1, PCI_UNC_QPI_PMON_CTL_1, PCI_UNC_QPI_PMON_CTR_1_A, PCI_UNC_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_1},
- {"SBOX2P1",PMC83, SBOX1, PCI_UNC_QPI_PMON_CTL_2, PCI_UNC_QPI_PMON_CTR_2_A, PCI_UNC_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_1},
- {"SBOX3P1",PMC84, SBOX1, PCI_UNC_QPI_PMON_CTL_3, PCI_UNC_QPI_PMON_CTR_3_A, PCI_UNC_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_1}
+ {"MBOX0C0",PMC12, MBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_2},
+ {"MBOX0C1",PMC13, MBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_2},
+ {"MBOX0C2",PMC14, MBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_2},
+ {"MBOX0C3",PMC15, MBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_2},
+ {"MBOX0FIX",PMC16, MBOXFIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_CH_2},
+ {"MBOX1C0",PMC17, MBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_3},
+ {"MBOX1C1",PMC18, MBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_3},
+ {"MBOX1C2",PMC19, MBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_3},
+ {"MBOX1C3",PMC20, MBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_3},
+ {"MBOX1FIX",PMC21, MBOXFIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_CH_3},
+ {"MBOX2C0",PMC22, MBOX2, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_0},
+ {"MBOX2C1",PMC23, MBOX2, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_0},
+ {"MBOX2C2",PMC24, MBOX2, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_0},
+ {"MBOX2C3",PMC25, MBOX2, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_0},
+ {"MBOX2FIX",PMC26, MBOXFIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_CH_0},
+ {"MBOX3C0",PMC27, MBOX3, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_1},
+ {"MBOX3C1",PMC28, MBOX3, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_1},
+ {"MBOX3C2",PMC29, MBOX3, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_1},
+ {"MBOX3C3",PMC30, MBOX3, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_1},
+ {"MBOX3FIX",PMC31, MBOXFIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_CH_1},
};
diff --git a/src/includes/perfmon_ivybridge_events.txt b/src/includes/perfmon_ivybridge_events.txt
index f2cb185..5318ce6 100644
--- a/src/includes/perfmon_ivybridge_events.txt
+++ b/src/includes/perfmon_ivybridge_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for Intel Ivy Bridge
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
@@ -131,7 +131,9 @@ UMASK_LOAD_HIT_PRE_HW_PF 0x02
EVENT_L1D 0x51 PMC
UMASK_L1D_REPLACEMENT 0x01
-UMASK_L1D_M_EVICT 0x04
+UMASK_L1D_ALLOCATED_IN_M 0x02
+UMASK_L1D_M_EVICT 0x04
+UMASK_L1D_ALL_M_REPLACEMENT 0x08
EVENT_MOVE_ELIMINATION 0x58 PMC
UMASK_MOVE_ELIMINATION_INT_NOT_ELIMINATED 0x04
@@ -173,7 +175,10 @@ UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS 0x24 0x00 0x04
UMASK_IDQ_ALL_MITE_ALL_UOPS 0x3C
EVENT_ICACHE 0x80 PMC
+UMASK_ICACHE_HITS 0x01
UMASK_ICACHE_MISSES 0x02
+UMASK_ICACHE_ACCESSES 0x03
+UMASK_ICACHE_IFETCH_STALL 0x04
EVENT_ITLB_MISSES 0x85 PMC
UMASK_ITLB_MISSES_CAUSES_A_WALK 0x01
@@ -320,12 +325,17 @@ UMASK_MEM_UOP_RETIRED_STORES_LOCK 0x22
UMASK_MEM_UOP_RETIRED_LOADS_SPLIT 0x41
UMASK_MEM_UOP_RETIRED_STORES_SPLIT 0x42
-EVENT_MEMLOAD_UOPS_RETIRED 0xD1 PMC
-UMASK_MEMLOAD_UOPS_RETIRED_L1_HIT 0x01
-UMASK_MEMLOAD_UOPS_RETIRED_L2_HIT 0x02
-UMASK_MEMLOAD_UOPS_RETIRED_LLC_HIT 0x04
-UMASK_MEMLOAD_UOPS_RETIRED_LLC_MISS 0x20
-UMASK_MEMLOAD_UOPS_RETIRED_HIT_LFB 0x40
+EVENT_MEM_LOAD_UOPS_RETIRED 0xD1 PMC
+UMASK_MEM_LOAD_UOPS_RETIRED_L1_HIT 0x01
+UMASK_MEM_LOAD_UOPS_RETIRED_L1_MISS 0x08
+UMASK_MEM_LOAD_UOPS_RETIRED_L1_ALL 0x09
+UMASK_MEM_LOAD_UOPS_RETIRED_L2_HIT 0x02
+UMASK_MEM_LOAD_UOPS_RETIRED_L2_MISS 0x10
+UMASK_MEM_LOAD_UOPS_RETIRED_L2_ALL 0x12
+UMASK_MEM_LOAD_UOPS_RETIRED_L3_HIT 0x04
+UMASK_MEM_LOAD_UOPS_RETIRED_L3_MISS 0x20
+UMASK_MEM_LOAD_UOPS_RETIRED_L3_ALL 0x24
+UMASK_MEM_LOAD_UOPS_RETIRED_HIT_LFB 0x40
EVENT_MEM_LOAD_UOPS_LLC_HIT_RETIRED 0xD2 PMC
UMASK_MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_MISS 0x01
@@ -368,178 +378,6 @@ UMASK_MEM_LOAD_UOPS_LLC_MISS_RETIRED_REMOTE_DRAM 0x0C
UMASK_MEM_LOAD_UOPS_LLC_MISS_RETIRED_REMOTE_HITM 0x10
UMASK_MEM_LOAD_UOPS_LLC_MISS_RETIRED_REMOTE_FWD 0x20
-EVENT_CBO_CLOCKTICKS 0x00 CBOX
-UMASK_CBO_CLOCKTICKS 0x00
-
-EVENT_COUNTER0_OCCUPANCY 0x1F CBOX
-UMASK_COUNTER0_OCCUPANCY 0x00
-
-EVENT_LLC_LOOKUP 0x34 CBOX0|CBOX1
-UMASK_LLC_LOOKUP_DATA_READ 0x03
-UMASK_LLC_LOOKUP_WRITE 0x05
-UMASK_LLC_LOOKUP_REMOTE_SNOOP 0x09
-UMASK_LLC_LOOKUP_ANY 0x11
-UMASK_LLC_LOOKUP_NID 0x41
-
-EVENT_LLC_VICTIMS 0x37 CBOX0|CBOX1
-UMASK_LLC_VICTIMS_M_STATE 0x01
-UMASK_LLC_VICTIMS_E_STATE 0x02
-UMASK_LLC_VICTIMS_S_STATE 0x04
-UMASK_LLC_VICTIMS_MISS 0x08
-UMASK_LLC_VICTIMS_NID 0x40
-
-EVENT_CBO_MISC 0x39 CBOX0|CBOX1
-UMASK_CBO_MISC_RSPI_WAS_FSE 0x01
-UMASK_CBO_MISC_WC_ALIASING 0x02
-UMASK_CBO_MISC_STARTED 0x04
-UMASK_CBO_MISC_RFO_HIT_S 0x08
-
-EVENT_RING_AD_USED 0x1B CBOX2|CBOX3
-UMASK_RING_AD_USED_0_UP_EVEN 0x01
-UMASK_RING_AD_USED_0_UP_ODD 0x02
-UMASK_RING_AD_USED_0_DOWN_EVEN 0x04
-UMASK_RING_AD_USED_0_DOWN_ODD 0x08
-UMASK_RING_AD_USED_1_UP_EVEN 0x10
-UMASK_RING_AD_USED_1_UP_ODD 0x20
-UMASK_RING_AD_USED_1_DOWN_EVEN 0x40
-UMASK_RING_AD_USED_1_DOWN_ODD 0x80
-UMASK_RING_AD_USED_DOWN 0xCC
-UMASK_RING_AD_USED_UP 0x33
-
-EVENT_RING_AK_USED 0x1C CBOX2|CBOX3
-UMASK_RING_AK_USED_0_UP_EVEN 0x01
-UMASK_RING_AK_USED_0_UP_ODD 0x02
-UMASK_RING_AK_USED_0_DOWN_EVEN 0x04
-UMASK_RING_AK_USED_0_DOWN_ODD 0x08
-UMASK_RING_AK_USED_1_UP_EVEN 0x10
-UMASK_RING_AK_USED_1_UP_ODD 0x20
-UMASK_RING_AK_USED_1_DOWN_EVEN 0x40
-UMASK_RING_AK_USED_1_DOWN_ODD 0x80
-UMASK_RING_AK_USED_DOWN 0xCC
-UMASK_RING_AK_USED_UP 0x33
-
-EVENT_RING_BL_USED 0x1D CBOX2|CBOX3
-UMASK_RING_BL_USED_0_UP_EVEN 0x01
-UMASK_RING_BL_USED_0_UP_ODD 0x02
-UMASK_RING_BL_USED_0_DOWN_EVEN 0x04
-UMASK_RING_BL_USED_0_DOWN_ODD 0x08
-UMASK_RING_BL_USED_1_UP_EVEN 0x10
-UMASK_RING_BL_USED_1_UP_ODD 0x20
-UMASK_RING_BL_USED_1_DOWN_EVEN 0x40
-UMASK_RING_BL_USED_1_DOWN_ODD 0x80
-UMASK_RING_BL_USED_DOWN 0xCC
-UMASK_RING_BL_USED_UP 0x33
-
-EVENT_RING_BOUNCES 0x05 CBOX0|CBOX1
-UMASK_RING_BOUNCES_AK_IRQ 0x02
-UMASK_RING_BOUNCES_AK_CORE 0x04
-UMASK_RING_BOUNCES_BL_CORE 0x08
-UMASK_RING_BOUNCES_IV_CORE 0x01
-
-EVENT_RING_IV_USED 0x1E CBOX2|CBOX3
-UMASK_RING_IV_USED_ANY 0x0F
-UMASK_RING_IV_USED_UP 0x33
-UMASK_RING_IV_USED_DOWN 0xCC
-
-EVENT_RING_SRC_THRTL 0x07 CBOX0|CBOX1
-UMASK_RING_SRC_THRTL 0x00
-
-EVENT_RXR_EXT_STARVED 0x12 CBOX0|CBOX1
-UMASK_RXR_EXT_STARVED_IRQ 0x01
-UMASK_RXR_EXT_STARVED_IPQ 0x02
-UMASK_RXR_EXT_STARVED_PRQ 0x04
-UMASK_RXR_EXT_STARVED_ISMQ_BIDS 0x08
-
-EVENT_RXR_INSERTS 0x13 CBOX0|CBOX1
-UMASK_RXR_INSERTS_IRQ 0x01
-UMASK_RXR_INSERTS_IRQ_REJECTED 0x02
-UMASK_RXR_INSERTS_IPQ 0x04
-UMASK_RXR_INSERTS_VFIFO 0x10
-
-EVENT_RXR_IPQ_RETRY 0x31 CBOX0|CBOX1
-UMASK_RXR_IPQ_RETRY_ANY 0x01
-UMASK_RXR_IPQ_RETRY_FULL 0x02
-UMASK_RXR_IPQ_RETRY_ADDR_CONFLICT 0x04
-UMASK_RXR_IPQ_RETRY_QPI_CREDITS 0x10
-
-EVENT_RXR_IRQ_RETRY 0x32 CBOX0|CBOX1
-UMASK_RXR_IRQ_RETRY_ANY 0x01
-UMASK_RXR_IRQ_RETRY_FULL 0x02
-UMASK_RXR_IRQ_RETRY_ADDR_CONFLICT 0x04
-UMASK_RXR_IRQ_RETRY_RTID 0x08
-UMASK_RXR_IRQ_RETRY_QPI_CREDITS 0x10
-UMASK_RXR_IRQ_RETRY_HO_CREDITS 0x20
-
-EVENT_RXR_ISMQ_RETRY 0x33 CBOX0|CBOX1
-UMASK_RXR_ISMQ_RETRY_ANY 0x01
-UMASK_RXR_ISMQ_RETRY_FULL 0x02
-UMASK_RXR_ISMQ_RETRY_RTID 0x08
-UMASK_RXR_ISMQ_RETRY_QPI_CREDITS 0x10
-UMASK_RXR_ISMQ_RETRY_HO_CREDITS 0x20
-UMASK_RXR_ISMQ_RETRY_WB_CREDITS 0x80
-
-EVENT_RXR_OCCUPANCY 0x11 CBOX0
-UMASK_RXR_OCCUPANCY_IRQ 0x01
-UMASK_RXR_OCCUPANCY_IRQ_REJECTED 0x02
-UMASK_RXR_OCCUPANCY_IPQ 0x04
-UMASK_RXR_OCCUPANCY_VIFO 0x10
-
-EVENT_TOR_INSERTS 0x35 CBOX0|CBOX1
-UMASK_TOR_INSERTS_OPCODE 0x01
-UMASK_TOR_INSERTS_MISS_OPCODE 0x03
-UMASK_TOR_INSERTS_EVICTION 0x04
-UMASK_TOR_INSERTS_ALL 0x08
-UMASK_TOR_INSERTS_WB 0x10
-UMASK_TOR_INSERTS_MISS_ALL 0x0A
-UMASK_TOR_INSERTS_MISS_LOCAL 0x2A
-UMASK_TOR_INSERTS_MISS_LOCAL_OPCODE 0x23
-UMASK_TOR_INSERTS_NID_OPCODE 0x41
-UMASK_TOR_INSERTS_NID_EVICTION 0x44
-UMASK_TOR_INSERTS_NID_ALL 0x48
-UMASK_TOR_INSERTS_NID_WB 0x50
-UMASK_TOR_INSERTS_NID_MISS_OPCODE 0x43
-UMASK_TOR_INSERTS_NID_MISS_ALL 0x4A
-UMASK_TOR_INSERTS_REMOTE_OPCODE 0x81
-UMASK_TOR_INSERTS_MISS_REMOTE_OPCODE 0x83
-UMASK_TOR_INSERTS_REMOTE 0x88
-UMASK_TOR_INSERTS_MISS_REMOTE 0x8A
-
-EVENT_TOR_OCCUPANCY 0x36 CBOX0
-UMASK_TOR_OCCUPANCY_OPCODE 0x01
-UMASK_TOR_OCCUPANCY_MISS_OPCODE 0x03
-UMASK_TOR_OCCUPANCY_EVICTION 0x04
-UMASK_TOR_OCCUPANCY_ALL 0x08
-UMASK_TOR_OCCUPANCY_MISS_ALL 0x0A
-UMASK_TOR_OCCUPANCY_WB 0x10
-UMASK_TOR_OCCUPANCY_LOCAL_OPCODE 0x21
-UMASK_TOR_OCCUPANCY_MISS_LOCAL_OPCODE 0x23
-UMASK_TOR_OCCUPANCY_LOCAL 0x28
-UMASK_TOR_OCCUPANCY_MISS_LOCAL 0x2A
-UMASK_TOR_OCCUPANCY_NID_OPCODE 0x41
-UMASK_TOR_OCCUPANCY_NID_EVICTION 0x44
-UMASK_TOR_OCCUPANCY_NID_ALL 0x48
-UMASK_TOR_OCCUPANCY_NID_MISS_OPCODE 0x43
-UMASK_TOR_OCCUPANCY_NID_MISS_ALL 0x4A
-UMASK_TOR_OCCUPANCY_NID_WB 0x50
-UMASK_TOR_OCCUPANCY_REMOTE_OPCODE 0x81
-UMASK_TOR_OCCUPANCY_MISS_REMOTE_OPCODE 0x83
-UMASK_TOR_OCCUPANCY_REMOTE 0x88
-UMASK_TOR_OCCUPANCY_MISS_REMOTE 0x8A
-
-EVENT_TXR_ADS_USED 0x04 CBOX0|CBOX1
-UMASK_TXR_ADS_USED_AD 0x01
-UMASK_TXR_ADS_USED_AK 0x02
-UMASK_TXR_ADS_USED_BL 0x04
-
-EVENT_TXR_INSERTS 0x02 CBOX0|CBOX1
-UMASK_TXR_INSERTS_AD_CACHE 0x01
-UMASK_TXR_INSERTS_AK_CACHE 0x02
-UMASK_TXR_INSERTS_BL_CACHE 0x04
-UMASK_TXR_INSERTS_IV_CACHE 0x08
-UMASK_TXR_INSERTS_AD_CORE 0x10
-UMASK_TXR_INSERTS_AK_CORE 0x20
-UMASK_TXR_INSERTS_BL_CORE 0x40
-
EVENT_DRAM_CLOCKTICKS 0x00 MBOX
UMASK_DRAM_CLOCKTICKS 0x00
@@ -821,13 +659,3 @@ UMASK_WR_CAS_RANK7_BANK4 0x10
UMASK_WR_CAS_RANK7_BANK5 0x20
UMASK_WR_CAS_RANK7_BANK6 0x40
UMASK_WR_CAS_RANK7_BANK7 0x80
-
-
-
-
-
-
-
-
-
-
diff --git a/src/includes/perfmon_k10.h b/src/includes/perfmon_k10.h
index 45274fd..cc614af 100644
--- a/src/includes/perfmon_k10.h
+++ b/src/includes/perfmon_k10.h
@@ -5,8 +5,8 @@
*
* Description: Header file of perfmon module for K10
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -46,12 +46,12 @@ void perfmon_init_k10(PerfmonThread *thread)
msr_write(cpu_id, MSR_AMD_PERFEVTSEL2, 0x0ULL);
msr_write(cpu_id, MSR_AMD_PERFEVTSEL3, 0x0ULL);
- flags |= (1<<16); /* user mode flag */
+ //flags |= (1<<16); /* user mode flag */
- msr_write(cpu_id, MSR_AMD_PERFEVTSEL0, flags);
+ /*msr_write(cpu_id, MSR_AMD_PERFEVTSEL0, flags);
msr_write(cpu_id, MSR_AMD_PERFEVTSEL1, flags);
msr_write(cpu_id, MSR_AMD_PERFEVTSEL2, flags);
- msr_write(cpu_id, MSR_AMD_PERFEVTSEL3, flags);
+ msr_write(cpu_id, MSR_AMD_PERFEVTSEL3, flags);*/
}
@@ -65,8 +65,7 @@ void perfmon_setupCounterThread_k10(
int cpu_id = perfmon_threadData[thread_id].processorId;
perfmon_threadData[thread_id].counters[index].init = TRUE;
- flags = msr_read(cpu_id,reg);
- flags &= ~(0xFFFFU);
+ flags |= (1<<16);
/* AMD uses a 12 bit Event mask: [35:32][7:0] */
flags |= ((uint64_t)(event->eventId>>8)<<32) + (event->umask<<8) + (event->eventId & ~(0xF00U));
diff --git a/src/includes/perfmon_k10_counters.h b/src/includes/perfmon_k10_counters.h
index e07c23a..d01be3d 100644
--- a/src/includes/perfmon_k10_counters.h
+++ b/src/includes/perfmon_k10_counters.h
@@ -5,8 +5,8 @@
*
* Description: AMD K10 specific subroutines
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/perfmon_k10_events.txt b/src/includes/perfmon_k10_events.txt
index c4a89e2..64c20e9 100644
--- a/src/includes/perfmon_k10_events.txt
+++ b/src/includes/perfmon_k10_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for AMD K10
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
diff --git a/src/includes/perfmon_k8.h b/src/includes/perfmon_k8.h
index 2f393f8..9313168 100644
--- a/src/includes/perfmon_k8.h
+++ b/src/includes/perfmon_k8.h
@@ -7,8 +7,8 @@
* Configures and reads out performance counters
* on x86 based architectures. Supports multi threading.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/perfmon_k8_events.txt b/src/includes/perfmon_k8_events.txt
index 7f93db2..127b56f 100644
--- a/src/includes/perfmon_k8_events.txt
+++ b/src/includes/perfmon_k8_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for AMD K8
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
diff --git a/src/includes/perfmon_kabini.h b/src/includes/perfmon_kabini.h
index 476636a..018eb04 100644
--- a/src/includes/perfmon_kabini.h
+++ b/src/includes/perfmon_kabini.h
@@ -5,8 +5,8 @@
*
* Description: Header file of perfmon module for AMD Family16
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -57,11 +57,11 @@ void perfmon_init_kabini(PerfmonThread *thread)
msr_write(cpu_id, MSR_AMD16_NB_PERFEVTSEL3, 0x0ULL);
}
- flags |= (1<<16); /* user mode flag */
- msr_write(cpu_id, MSR_AMD16_PERFEVTSEL0, flags);
+ //flags |= (1<<16); /* user mode flag */
+ /*msr_write(cpu_id, MSR_AMD16_PERFEVTSEL0, flags);
msr_write(cpu_id, MSR_AMD16_PERFEVTSEL1, flags);
msr_write(cpu_id, MSR_AMD16_PERFEVTSEL2, flags);
- msr_write(cpu_id, MSR_AMD16_PERFEVTSEL3, flags);
+ msr_write(cpu_id, MSR_AMD16_PERFEVTSEL3, flags);*/
}
@@ -70,7 +70,7 @@ void perfmon_setupCounterThread_kabini(
PerfmonEvent* event,
PerfmonCounterIndex index)
{
- uint64_t flags;
+ uint64_t flags = 0x0ULL;
uint64_t reg = kabini_counter_map[index].configRegister;
int cpu_id = perfmon_threadData[thread_id].processorId;
perfmon_threadData[thread_id].counters[index].init = TRUE;
@@ -82,8 +82,10 @@ void perfmon_setupCounterThread_kabini(
return;
}
- flags = msr_read(cpu_id,reg);
- flags &= ~(0xFFFFU);
+ if (kabini_counter_map[index].type == PMC)
+ {
+ flags |= (1<<16);
+ }
/* AMD uses a 12 bit Event mask: [35:32][7:0] */
flags |= ((uint64_t)(event->eventId>>8)<<32) + (event->umask<<8) + (event->eventId & ~(0xF00U));
diff --git a/src/includes/perfmon_kabini_counters.h b/src/includes/perfmon_kabini_counters.h
index 9cea474..8662522 100644
--- a/src/includes/perfmon_kabini_counters.h
+++ b/src/includes/perfmon_kabini_counters.h
@@ -5,8 +5,8 @@
*
* Description: Counter Header File of perfmon module for AMD Family16
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/perfmon_kabini_events.txt b/src/includes/perfmon_kabini_events.txt
index 4f28024..9ccc726 100644
--- a/src/includes/perfmon_kabini_events.txt
+++ b/src/includes/perfmon_kabini_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for AMD Kabini
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: saravanan.ekanathan at amd.com
# Project: likwid
diff --git a/src/includes/perfmon_nehalem.h b/src/includes/perfmon_nehalem.h
index 99f2422..b3e7907 100644
--- a/src/includes/perfmon_nehalem.h
+++ b/src/includes/perfmon_nehalem.h
@@ -5,8 +5,8 @@
*
* Description: Header File of perfmon module for Nehalem.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -64,17 +64,17 @@ void perfmon_init_nehalem(PerfmonThread *thread)
* FIXED 0: Instructions retired
* FIXED 1: Clocks unhalted core
* FIXED 2: Clocks unhalted ref */
- msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, 0x222ULL);
+ //msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, 0x222ULL);
// flags |= (1<<22); /* enable flag */
// flags |= (1<<16); /* user mode flag */
- setBit(flags,16); /* set user mode flag */
- setBit(flags,22); /* set enable flag */
+ //setBit(flags,16); /* set user mode flag */
+ //setBit(flags,22); /* set enable flag */
- msr_write(cpu_id, MSR_PERFEVTSEL0, flags);
+ /*msr_write(cpu_id, MSR_PERFEVTSEL0, flags);
msr_write(cpu_id, MSR_PERFEVTSEL1, flags);
msr_write(cpu_id, MSR_PERFEVTSEL2, flags);
- msr_write(cpu_id, MSR_PERFEVTSEL3, flags);
+ msr_write(cpu_id, MSR_PERFEVTSEL3, flags);*/
if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id) ||
@@ -105,19 +105,19 @@ void perfmon_init_nehalem(PerfmonThread *thread)
msr_write(cpu_id, MSR_UNCORE_PERF_GLOBAL_CTRL, 0x0ULL);
msr_write(cpu_id, MSR_UNCORE_PERF_GLOBAL_OVF_CTRL, 0x0ULL);
msr_write(cpu_id, MSR_UNCORE_ADDR_OPCODE_MATCH, 0x0ULL);
- msr_write(cpu_id, MSR_OFFCORE_RSP0, 0x0ULL);
+ msr_write(cpu_id, MSR_OFFCORE_RESP0, 0x0ULL);
/* Preinit of PERFEVSEL registers */
- clearBit(flags,16); /* set enable flag */
+ //clearBit(flags,16); /* set enable flag */
- msr_write(cpu_id, MSR_UNCORE_PERFEVTSEL0, flags);
+ /*msr_write(cpu_id, MSR_UNCORE_PERFEVTSEL0, flags);
msr_write(cpu_id, MSR_UNCORE_PERFEVTSEL1, flags);
msr_write(cpu_id, MSR_UNCORE_PERFEVTSEL2, flags);
msr_write(cpu_id, MSR_UNCORE_PERFEVTSEL3, flags);
msr_write(cpu_id, MSR_UNCORE_PERFEVTSEL4, flags);
msr_write(cpu_id, MSR_UNCORE_PERFEVTSEL5, flags);
msr_write(cpu_id, MSR_UNCORE_PERFEVTSEL6, flags);
- msr_write(cpu_id, MSR_UNCORE_PERFEVTSEL7, flags);
+ msr_write(cpu_id, MSR_UNCORE_PERFEVTSEL7, flags);*/
}
}
@@ -128,20 +128,21 @@ void perfmon_setupCounterThread_nehalem(
PerfmonCounterIndex index)
{
int haveLock = 0;
- uint64_t flags;
+ uint64_t flags = 0x0ULL;
uint64_t reg = nehalem_counter_map[index].configRegister;
int cpu_id = perfmon_threadData[thread_id].processorId;
+ uint64_t fixed_flags = msr_read(cpu_id, MSR_PERF_FIXED_CTR_CTRL);
if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id))
{
haveLock = 1;
}
+ perfmon_threadData[thread_id].counters[index].init = TRUE;
+
if ( nehalem_counter_map[index].type == PMC )
{
- perfmon_threadData[thread_id].counters[index].init = TRUE;
- flags = msr_read(cpu_id,reg);
- flags &= ~(0xFFFFU); /* clear lower 16bits */
+ flags = (1<<16)|(1<<22);
/* Intel with standard 8 bit event mask: [7:0] */
flags |= (event->umask<<8) + event->eventId;
@@ -166,9 +167,7 @@ void perfmon_setupCounterThread_nehalem(
{
if(haveLock)
{
- perfmon_threadData[thread_id].counters[index].init = TRUE;
- flags = msr_read(cpu_id,reg);
- flags &= ~(0xFFFFU); /* clear lower 16bits */
+ flags = (1<<22);
/* Intel with standard 8 bit event mask: [7:0] */
flags |= (event->umask<<8) + event->eventId;
@@ -193,7 +192,8 @@ void perfmon_setupCounterThread_nehalem(
}
else if (nehalem_counter_map[index].type == FIXED)
{
- perfmon_threadData[thread_id].counters[index].init = TRUE;
+ fixed_flags |= (0x2 <<(index*4));
+ msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, fixed_flags);
}
}
@@ -214,7 +214,7 @@ void perfmon_startCountersThread_nehalem(int thread_id)
uflags = 0x100000000ULL;
}
- for ( int i=0; i<NUM_PMC; i++ )
+ for ( int i=0; i<NUM_PMC; i++ )
{
if (perfmon_threadData[thread_id].counters[i].init == TRUE)
{
@@ -287,11 +287,11 @@ void perfmon_stopCountersThread_nehalem(int thread_id)
}
flags = msr_read(cpu_id,MSR_PERF_GLOBAL_STATUS);
- printf ("Status: 0x%llX \n", LLU_CAST flags);
- if((flags & 0x3) || (flags & (0x3ULL<<32)) )
+ if((flags & 0x3) || (flags & (0x3ULL<<32)) )
{
printf ("Overflow occured \n");
+ printf ("Status: 0x%llX \n", LLU_CAST flags);
}
}
diff --git a/src/includes/perfmon_nehalemEX.h b/src/includes/perfmon_nehalemEX.h
index 84457ae..ea632cf 100644
--- a/src/includes/perfmon_nehalemEX.h
+++ b/src/includes/perfmon_nehalemEX.h
@@ -5,8 +5,8 @@
*
* Description: Header File of perfmon module for Nehalem EX.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -38,12 +38,299 @@ static int perfmon_numGroupsNehalemEX = NUM_GROUPS_NEHALEMEX;
static int perfmon_numArchEventsNehalemEX = NUM_ARCH_EVENTS_NEHALEMEX;
/* This SUCKS: There are only subtle difference between NehalemEX
- * and Westmere EX Uncore. Still one of them is that one field is
- * 1 bit shifted. Thank you Intel for this mess!!! Do you want
+ * and Westmere EX Uncore. Still one of them is that one field is
+ * 1 bit shifted. Thank you Intel for this mess!!! Do you want
* to change the register definitions for every architecture?*/
-/* MBOX macros */
+void perfmon_init_nehalemEX(PerfmonThread *thread)
+{
+ uint64_t flags = 0x0ULL;
+ int cpu_id = thread->processorId;
+ perfmon_verbose = 1;
+ msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, 0x0ULL);
+ msr_write(cpu_id, MSR_PERFEVTSEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_PERFEVTSEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_PERFEVTSEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_PERFEVTSEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_PMC0, 0x0ULL);
+ msr_write(cpu_id, MSR_PMC1, 0x0ULL);
+ msr_write(cpu_id, MSR_PMC2, 0x0ULL);
+ msr_write(cpu_id, MSR_PMC3, 0x0ULL);
+ msr_write(cpu_id, MSR_PERF_FIXED_CTR0, 0x0ULL);
+ msr_write(cpu_id, MSR_PERF_FIXED_CTR1, 0x0ULL);
+ msr_write(cpu_id, MSR_PERF_FIXED_CTR2, 0x0ULL);
+ msr_write(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL);
+ msr_write(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, 0x0ULL);
+ msr_write(cpu_id, MSR_PEBS_ENABLE, 0x0ULL);
+
+ /* initialize fixed counters
+ * FIXED 0: Instructions retired
+ * FIXED 1: Clocks unhalted core
+ * FIXED 2: Clocks unhalted ref */
+ //msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, 0x222ULL);
+
+ /* Preinit of PERFEVSEL registers */
+ //flags |= (1<<22); /* enable flag */
+ //flags |= (1<<16); /* user mode flag */
+
+ /*msr_write(cpu_id, MSR_PERFEVTSEL0, flags);
+ msr_write(cpu_id, MSR_PERFEVTSEL1, flags);
+ msr_write(cpu_id, MSR_PERFEVTSEL2, flags);
+ msr_write(cpu_id, MSR_PERFEVTSEL3, flags);*/
+
+ /* Initialize uncore */
+ /* MBOX */
+ thread->counters[PMC7].id = 0;
+ thread->counters[PMC8].id = 1;
+ thread->counters[PMC9].id = 2;
+ thread->counters[PMC10].id = 3;
+ thread->counters[PMC11].id = 4;
+ thread->counters[PMC12].id = 5;
+ westmereEX_PMunits[MBOX0].ctrlRegister = MSR_M0_PMON_BOX_CTRL;
+ westmereEX_PMunits[MBOX0].statusRegister = MSR_M0_PMON_BOX_STATUS;
+ westmereEX_PMunits[MBOX0].ovflRegister = MSR_M0_PMON_BOX_OVF_CTRL;
+
+ thread->counters[PMC13].id = 0;
+ thread->counters[PMC14].id = 1;
+ thread->counters[PMC15].id = 2;
+ thread->counters[PMC16].id = 3;
+ thread->counters[PMC17].id = 4;
+ thread->counters[PMC18].id = 5;
+ westmereEX_PMunits[MBOX1].ctrlRegister = MSR_M1_PMON_BOX_CTRL;
+ westmereEX_PMunits[MBOX1].statusRegister = MSR_M1_PMON_BOX_STATUS;
+ westmereEX_PMunits[MBOX1].ovflRegister = MSR_M1_PMON_BOX_OVF_CTRL;
+
+ /* BBOX */
+ thread->counters[PMC19].id = 0;
+ thread->counters[PMC20].id = 1;
+ thread->counters[PMC21].id = 2;
+ thread->counters[PMC22].id = 3;
+ westmereEX_PMunits[BBOX0].ctrlRegister = MSR_B0_PMON_BOX_CTRL;
+ westmereEX_PMunits[BBOX0].statusRegister = MSR_B0_PMON_BOX_STATUS;
+ westmereEX_PMunits[BBOX0].ovflRegister = MSR_B0_PMON_BOX_OVF_CTRL;
+
+ thread->counters[PMC23].id = 0;
+ thread->counters[PMC24].id = 1;
+ thread->counters[PMC25].id = 2;
+ thread->counters[PMC26].id = 3;
+ westmereEX_PMunits[BBOX1].ctrlRegister = MSR_B1_PMON_BOX_CTRL;
+ westmereEX_PMunits[BBOX1].statusRegister = MSR_B1_PMON_BOX_STATUS;
+ westmereEX_PMunits[BBOX1].ovflRegister = MSR_B1_PMON_BOX_OVF_CTRL;
+
+ /* RBOX */
+ thread->counters[PMC27].id = 0;
+ thread->counters[PMC28].id = 1;
+ thread->counters[PMC29].id = 2;
+ thread->counters[PMC30].id = 3;
+ thread->counters[PMC31].id = 4;
+ thread->counters[PMC32].id = 5;
+ thread->counters[PMC33].id = 6;
+ thread->counters[PMC34].id = 7;
+ westmereEX_PMunits[RBOX0].ctrlRegister = MSR_R0_PMON_BOX_CTRL;
+ westmereEX_PMunits[RBOX0].statusRegister = MSR_R0_PMON_BOX_STATUS;
+ westmereEX_PMunits[RBOX0].ovflRegister = MSR_R0_PMON_BOX_OVF_CTRL;
+
+ thread->counters[PMC35].id = 0;
+ thread->counters[PMC36].id = 1;
+ thread->counters[PMC37].id = 2;
+ thread->counters[PMC38].id = 3;
+ thread->counters[PMC39].id = 4;
+ thread->counters[PMC40].id = 5;
+ thread->counters[PMC41].id = 6;
+ thread->counters[PMC42].id = 7;
+ westmereEX_PMunits[RBOX1].ctrlRegister = MSR_R1_PMON_BOX_CTRL;
+ westmereEX_PMunits[RBOX1].statusRegister = MSR_R1_PMON_BOX_STATUS;
+ westmereEX_PMunits[RBOX1].ovflRegister = MSR_R1_PMON_BOX_OVF_CTRL;
+
+ /* WBOX */
+ thread->counters[PMC43].id = 0;
+ thread->counters[PMC44].id = 1;
+ thread->counters[PMC45].id = 2;
+ thread->counters[PMC46].id = 3;
+ thread->counters[PMC47].id = 31;
+ westmereEX_PMunits[WBOX].ctrlRegister = MSR_W_PMON_BOX_CTRL;
+ westmereEX_PMunits[WBOX].statusRegister = MSR_W_PMON_BOX_STATUS;
+ westmereEX_PMunits[WBOX].ovflRegister = MSR_W_PMON_BOX_OVF_CTRL;
+
+ thread->counters[PMC48].id = 0;
+ westmereEX_PMunits[UBOX].ctrlRegister = MSR_U_PMON_GLOBAL_CTRL;
+ westmereEX_PMunits[UBOX].statusRegister = MSR_U_PMON_GLOBAL_STATUS;
+ westmereEX_PMunits[UBOX].ovflRegister = MSR_U_PMON_GLOBAL_OVF_CTRL;
+
+ /* Set IDs for all CBOXes */
+ for (int i=PMC49; i<=PMC88; i+= 5)
+ {
+ for(int j=0; j<5; j++)
+ {
+ thread->counters[i].id = j;
+ }
+ }
+ westmereEX_PMunits[CBOX0].ctrlRegister = MSR_C0_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX0].statusRegister = MSR_C0_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX0].ovflRegister = MSR_C0_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX1].ctrlRegister = MSR_C1_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX1].statusRegister = MSR_C1_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX1].ovflRegister = MSR_C1_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX2].ctrlRegister = MSR_C2_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX2].statusRegister = MSR_C2_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX2].ovflRegister = MSR_C2_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX3].ctrlRegister = MSR_C3_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX3].statusRegister = MSR_C3_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX3].ovflRegister = MSR_C3_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX4].ctrlRegister = MSR_C4_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX4].statusRegister = MSR_C4_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX4].ovflRegister = MSR_C4_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX5].ctrlRegister = MSR_C5_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX5].statusRegister = MSR_C5_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX5].ovflRegister = MSR_C5_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX6].ctrlRegister = MSR_C6_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX6].statusRegister = MSR_C6_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX6].ovflRegister = MSR_C6_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX7].ctrlRegister = MSR_C7_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX7].statusRegister = MSR_C7_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX7].ovflRegister = MSR_C7_PMON_BOX_OVF_CTRL;
+
+ thread->counters[PMC99].id = 0;
+ thread->counters[PMC100].id = 1;
+ thread->counters[PMC101].id = 2;
+ thread->counters[PMC102].id = 3;
+ westmereEX_PMunits[SBOX0].ctrlRegister = MSR_S0_PMON_BOX_CTRL;
+ westmereEX_PMunits[SBOX0].statusRegister = MSR_S0_PMON_BOX_STATUS;
+ westmereEX_PMunits[SBOX0].ovflRegister = MSR_S0_PMON_BOX_OVF_CTRL;
+ thread->counters[PMC103].id = 0;
+ thread->counters[PMC104].id = 1;
+ thread->counters[PMC105].id = 2;
+ thread->counters[PMC106].id = 3;
+ westmereEX_PMunits[SBOX1].ctrlRegister = MSR_S1_PMON_BOX_CTRL;
+ westmereEX_PMunits[SBOX1].statusRegister = MSR_S1_PMON_BOX_STATUS;
+ westmereEX_PMunits[SBOX1].ovflRegister = MSR_S1_PMON_BOX_OVF_CTRL;
+
+ if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id) ||
+ lock_acquire((int*) &socket_lock[affinity_core2node_lookup[cpu_id]], cpu_id))
+ {
+ msr_write(cpu_id, MSR_W_PMON_BOX_CTRL, 0x0ULL);
+ msr_write(cpu_id, MSR_W_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_W_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_W_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_W_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_W_PMON_FIXED_CTR, 0x0ULL);
+
+ msr_write(cpu_id, MSR_M0_PMON_BOX_CTRL, 0x0ULL);
+ msr_write(cpu_id, MSR_M0_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_M0_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_M0_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_M0_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_M0_PMON_EVNT_SEL4, 0x0ULL);
+ msr_write(cpu_id, MSR_M0_PMON_EVNT_SEL5, 0x0ULL);
+
+ msr_write(cpu_id, MSR_M1_PMON_BOX_CTRL, 0x0ULL);
+ msr_write(cpu_id, MSR_M1_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_M1_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_M1_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_M1_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_M1_PMON_EVNT_SEL4, 0x0ULL);
+ msr_write(cpu_id, MSR_M1_PMON_EVNT_SEL5, 0x0ULL);
+
+ msr_write(cpu_id, MSR_B0_PMON_BOX_CTRL, 0x0ULL);
+ msr_write(cpu_id, MSR_B0_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_B0_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_B0_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_B0_PMON_EVNT_SEL3, 0x0ULL);
+
+ msr_write(cpu_id, MSR_B1_PMON_BOX_CTRL, 0x0ULL);
+ msr_write(cpu_id, MSR_B1_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_B1_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_B1_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_B1_PMON_EVNT_SEL3, 0x0ULL);
+
+ msr_write(cpu_id, MSR_R0_PMON_BOX_CTRL, 0x0ULL);
+ msr_write(cpu_id, MSR_R0_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_R0_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_R0_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_R0_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_R0_PMON_EVNT_SEL4, 0x0ULL);
+ msr_write(cpu_id, MSR_R0_PMON_EVNT_SEL5, 0x0ULL);
+ msr_write(cpu_id, MSR_R0_PMON_EVNT_SEL6, 0x0ULL);
+ msr_write(cpu_id, MSR_R0_PMON_EVNT_SEL7, 0x0ULL);
+
+ msr_write(cpu_id, MSR_R1_PMON_BOX_CTRL, 0x0ULL);
+ msr_write(cpu_id, MSR_R1_PMON_EVNT_SEL8, 0x0ULL);
+ msr_write(cpu_id, MSR_R1_PMON_EVNT_SEL9, 0x0ULL);
+ msr_write(cpu_id, MSR_R1_PMON_EVNT_SEL10, 0x0ULL);
+ msr_write(cpu_id, MSR_R1_PMON_EVNT_SEL11, 0x0ULL);
+ msr_write(cpu_id, MSR_R1_PMON_EVNT_SEL12, 0x0ULL);
+ msr_write(cpu_id, MSR_R1_PMON_EVNT_SEL13, 0x0ULL);
+ msr_write(cpu_id, MSR_R1_PMON_EVNT_SEL14, 0x0ULL);
+ msr_write(cpu_id, MSR_R1_PMON_EVNT_SEL15, 0x0ULL);
+
+ msr_write(cpu_id, MSR_U_PMON_GLOBAL_EVNT_SEL, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C0_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C0_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C0_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C0_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C0_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C1_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C1_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C1_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C1_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C1_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C2_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C2_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C2_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C2_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C2_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C3_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C3_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C3_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C3_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C3_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C4_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C4_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C4_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C4_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C4_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C5_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C5_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C5_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C5_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C5_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C6_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C6_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C6_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C6_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C6_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C7_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C7_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C7_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C7_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C7_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_S0_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_S0_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_S0_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_S0_PMON_EVNT_SEL3, 0x0ULL);
+
+ msr_write(cpu_id, MSR_S1_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_S1_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_S1_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_S1_PMON_EVNT_SEL3, 0x0ULL);
+
+ flags = 0x0UL;
+ flags |= (1<<29); /* reset all */
+ msr_write(cpu_id, MSR_U_PMON_GLOBAL_CTRL, flags );
+ }
+}
+
+/* MBOX macros */
#define MBOX_GATE_NEHEX(NUM) \
flags = 0x41ULL; \
switch (event->cfgBits) \
@@ -249,16 +536,22 @@ void perfmon_setupCounterThread_nehalemEX(
PerfmonEvent* event,
PerfmonCounterIndex index)
{
- uint64_t flags = 0x0ULL;;
- uint64_t reg = westmereEX_counter_map[index].configRegister;
+ uint64_t flags = 0x0ULL;
+ int haveLock = 0;
+ uint64_t reg = counter_map[index].configRegister;
int cpu_id = perfmon_threadData[thread_id].processorId;
+ uint64_t fixed_flags = msr_read(cpu_id, MSR_PERF_FIXED_CTR_CTRL);
perfmon_threadData[thread_id].counters[index].init = TRUE;
- switch (westmereEX_counter_map[index].type)
+ if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id))
+ {
+ haveLock = 1;
+ }
+
+ switch (counter_map[index].type)
{
case PMC:
- flags = msr_read(cpu_id,reg);
- flags &= ~(0xFFFFU); /* clear lower 16bits */
+ flags = (1<<22)|(1<<16);
/* Intel with standard 8 bit event mask: [7:0] */
flags |= (event->umask<<8) + event->eventId;
@@ -274,53 +567,115 @@ void perfmon_setupCounterThread_nehalemEX(
break;
case FIXED:
+ fixed_flags |= (0x2<<(index*4));
+ msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, fixed_flags);
break;
case MBOX0:
- MBOX_GATE_NEHEX(0);
- msr_write(cpu_id, reg , flags);
- VERBOSEPRINTREG(cpu_id, reg, flags, MBOX0_CTRL)
+ if (haveLock)
+ {
+ MBOX_GATE_NEHEX(0);
+ msr_write(cpu_id, reg , flags);
+ VERBOSEPRINTREG(cpu_id, reg, flags, MBOX0_CTRL)
+ }
break;
case MBOX1:
- MBOX_GATE_NEHEX(1);
- msr_write(cpu_id, reg , flags);
- VERBOSEPRINTREG(cpu_id, reg, flags, MBOX1_CTRL)
+ if (haveLock)
+ {
+ MBOX_GATE_NEHEX(1);
+ msr_write(cpu_id, reg , flags);
+ VERBOSEPRINTREG(cpu_id, reg, flags, MBOX1_CTRL)
+ }
break;
case BBOX0:
-
case BBOX1:
- flags = 0x1ULL; /* set enable bit */
- flags |= (event->eventId<<1);
- msr_write(cpu_id, reg , flags);
- VERBOSEPRINTREG(cpu_id, reg, flags, BBOX_CTRL)
+ if (haveLock)
+ {
+ flags = 0x1ULL; /* set enable bit */
+ flags |= (event->eventId<<1);
+ msr_write(cpu_id, reg , flags);
+ VERBOSEPRINTREG(cpu_id, reg, flags, BBOX_CTRL)
+ }
break;
case RBOX0:
- RBOX_GATE(0);
- msr_write(cpu_id, reg , flags);
- VERBOSEPRINTREG(cpu_id, reg, flags, RBOX0_CTRL)
+ if (haveLock)
+ {
+ RBOX_GATE(0);
+ msr_write(cpu_id, reg , flags);
+ VERBOSEPRINTREG(cpu_id, reg, flags, RBOX0_CTRL)
+ }
break;
case RBOX1:
- RBOX_GATE(1);
- msr_write(cpu_id, reg , flags);
- VERBOSEPRINTREG(cpu_id, reg, flags, RBOX1_CTRL)
+ if (haveLock)
+ {
+ RBOX_GATE(1);
+ msr_write(cpu_id, reg , flags);
+ VERBOSEPRINTREG(cpu_id, reg, flags, RBOX1_CTRL)
+ }
break;
case WBOX:
- if (event->eventId == 0xFF) /* Fixed Counter */
+ if (haveLock)
{
- flags = 0x1ULL; /* set enable bit */
+ if (event->eventId == 0xFF) /* Fixed Counter */
+ {
+ flags = 0x1ULL; /* set enable bit */
+ }
+ else
+ {
+ flags |= (1<<22); /* set enable bit */
+ flags |= (event->umask<<8) + event->eventId;
+ }
+ msr_write(cpu_id, reg , flags);
+ VERBOSEPRINTREG(cpu_id, reg, flags, WBOX_CTRL)
}
- else
+ break;
+
+ case UBOX:
+ if (haveLock)
+ {
+ flags = 0x0ULL;
+ flags |= (1<<22);
+ flags |= event->eventId;
+ fprintf(stderr, "Setup UBOX with value 0x%llx in register 0x%llx, event 0x%x \n", LLU_CAST flags, LLU_CAST reg,event->eventId);
+ msr_write(cpu_id, reg , flags);
+ VERBOSEPRINTREG(cpu_id, reg, flags, UBOX_CTRL)
+ }
+ break;
+
+ case CBOX0:
+ case CBOX1:
+ case CBOX2:
+ case CBOX3:
+ case CBOX4:
+ case CBOX5:
+ case CBOX6:
+ case CBOX7:
+ if (haveLock)
{
- flags |= (1<<22); /* set enable bit */
+ flags = 0x0ULL;
+ flags |= (1<<22);
flags |= (event->umask<<8) + event->eventId;
+ fprintf(stderr, "Setup CBOX with value 0x%llx in register 0x%llx, event 0x%x umask 0x%x \n", LLU_CAST flags, LLU_CAST reg,event->eventId, event->umask);
+ msr_write(cpu_id, reg , flags);
+ VERBOSEPRINTREG(cpu_id, reg, flags, CBOX_CTRL)
+ }
+ break;
+ case SBOX0:
+ case SBOX1:
+ if (haveLock)
+ {
+ flags = 0x0ULL;
+ flags |= (1<<22);
+ flags |= (event->umask<<8);
+ flags |= (event->eventId);
+ msr_write(cpu_id, reg , flags);
+ VERBOSEPRINTREG(cpu_id, reg, flags, SBOX_CTRL)
}
- msr_write(cpu_id, reg , flags);
- VERBOSEPRINTREG(cpu_id, reg, flags, WBOX_CTRL)
break;
default:
@@ -329,3 +684,184 @@ void perfmon_setupCounterThread_nehalemEX(
}
}
+
+/* Actions for Performance Monitoring Session:
+ *
+ * Core Counters (counter is always enabled in PERVSEL register):
+ * 1) Disable counters in global ctrl Register MSR_PERF_GLOBAL_CTRL
+ * 2) Zero according counter registers
+ * 3) Set enable bit in global register flag
+ * 4) Write global register flag
+ *
+ * Uncore Counters (only one core per socket):
+ * 1) Set reset flag in global U Box control register
+ * 2) Zero according counter registers
+ * 3) Set enable bit in Box control register
+ * 4) Write according uncore Box ctrl register
+ * 3) Set enable bit in global U Box control register
+ * */
+
+void perfmon_startCountersThread_nehalemEX(int thread_id)
+{
+ int haveLock = 0;
+ uint64_t flags = 0x0ULL;
+ uint32_t uflags[NUM_UNITS];
+ int enable_ubox = 0;
+ int cpu_id = perfmon_threadData[thread_id].processorId;
+
+ msr_write(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL);
+
+ if (socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id)
+ {
+ uint32_t ubflags = 0x0UL;
+ ubflags |= (1<<29); /* reset all */
+ haveLock = 1;
+ // msr_write(cpu_id, MSR_U_PMON_GLOBAL_CTRL, ubflags );
+ // VERBOSEPRINTREG(cpu_id, MSR_U_PMON_GLOBAL_CTRL, ubflags, UBOX_GLOBAL_CTRL)
+ }
+
+ for ( int i=0; i<NUM_UNITS; i++ )
+ {
+ uflags[i] = 0x0UL;
+ }
+
+ for ( int i=0; i<NUM_PMC; i++ )
+ {
+ if (perfmon_threadData[thread_id].counters[i].init == TRUE) {
+ if (westmereEX_counter_map[i].type == PMC)
+ {
+ msr_write(cpu_id, westmereEX_counter_map[i].counterRegister , 0x0ULL);
+ flags |= (1<<(i-OFFSET_PMC)); /* enable counter */
+ }
+ else if (westmereEX_counter_map[i].type == FIXED)
+ {
+ msr_write(cpu_id, westmereEX_counter_map[i].counterRegister , 0x0ULL);
+ flags |= (1ULL<<(i+32)); /* enable fixed counter */
+ }
+ else if (westmereEX_counter_map[i].type > UNCORE)
+ {
+ if(haveLock)
+ {
+ msr_write(cpu_id, westmereEX_counter_map[i].counterRegister , 0x0ULL);
+ uflags[westmereEX_counter_map[i].type] |=
+ (1<<(perfmon_threadData[thread_id].counters[i].id)); /* enable uncore counter */
+ if (westmereEX_counter_map[i].type == UBOX)
+ {
+ enable_ubox = 1;
+ }
+ }
+ }
+ }
+ }
+
+ VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, GLOBAL_CTRL);
+
+ if (haveLock)
+ {
+ for ( int i=0; i<NUM_UNITS; i++ )
+ {
+ /* if counters are enabled write the according box ctrl register */
+ if (uflags[i])
+ {
+ msr_write(cpu_id, westmereEX_PMunits[i].ctrlRegister, uflags[i]);
+ VERBOSEPRINTREG(cpu_id, westmereEX_PMunits[i].ctrlRegister, LLU_CAST uflags[i], BOXCTRL);
+ }
+ }
+
+ /* set global enable flag in U BOX ctrl register */
+ uint32_t ubflags = 0x0UL;
+ ubflags |= (1<<28); /* enable all */
+ if (enable_ubox)
+ {
+ ubflags |= (1<<0);
+ }
+ VERBOSEPRINTREG(cpu_id, MSR_U_PMON_GLOBAL_CTRL, LLU_CAST ubflags, UBOX_GLOBAL_CTRL);
+ msr_write(cpu_id, MSR_U_PMON_GLOBAL_CTRL, ubflags );
+ }
+ /* Finally enable counters */
+ msr_write(cpu_id, MSR_PERF_GLOBAL_CTRL, flags);
+ msr_write(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, 0x30000000FULL);
+}
+
+void perfmon_stopCountersThread_nehalemEX(int thread_id)
+{
+ int haveLock = 0;
+ int cpu_id = perfmon_threadData[thread_id].processorId;
+
+ msr_write(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL);
+
+ if (socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id)
+ {
+ uint32_t ubflags = 0x0UL;
+ haveLock = 1;
+ // ubflags |= (1<<29); /* reset all */
+ msr_write(cpu_id, MSR_U_PMON_GLOBAL_CTRL, ubflags );
+ }
+
+ for ( int i=0; i<NUM_COUNTERS_WESTMEREEX; i++ )
+ {
+ if (perfmon_threadData[thread_id].counters[i].init == TRUE)
+ {
+ if (westmereEX_counter_map[i].type > UNCORE)
+ {
+ if(haveLock)
+ {
+ perfmon_threadData[thread_id].counters[i].counterData =
+ msr_read(cpu_id, westmereEX_counter_map[i].counterRegister);
+
+ VERBOSEPRINTREG(cpu_id, westmereEX_counter_map[i].counterRegister,
+ LLU_CAST perfmon_threadData[thread_id].counters[i].counterData, READ_UNCORE);
+ }
+ }
+ else
+ {
+ perfmon_threadData[thread_id].counters[i].counterData =
+ msr_read(cpu_id, westmereEX_counter_map[i].counterRegister);
+
+ VERBOSEPRINTREG(cpu_id, westmereEX_counter_map[i].counterRegister,
+ LLU_CAST perfmon_threadData[thread_id].counters[i].counterData, READ_CORE);
+ }
+ }
+ }
+
+#if 0
+ flags = msr_read(cpu_id,MSR_PERF_GLOBAL_STATUS);
+ printf ("Status: 0x%llX \n", LLU_CAST flags);
+ if((flags & 0x3) || (flags & (0x3ULL<<32)) )
+ {
+ printf ("Overflow occured \n");
+ }
+#endif
+}
+
+void perfmon_readCountersThread_nehalemEX(int thread_id)
+{
+ int haveLock = 0;
+ int cpu_id = perfmon_threadData[thread_id].processorId;
+
+ if (socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id)
+ {
+ haveLock = 1;
+ }
+
+ for ( int i=0; i<NUM_COUNTERS_WESTMEREEX; i++ )
+ {
+ if (perfmon_threadData[thread_id].counters[i].init == TRUE)
+ {
+ if (westmereEX_counter_map[i].type > UNCORE)
+ {
+ if(haveLock)
+ {
+ perfmon_threadData[thread_id].counters[i].counterData =
+ msr_read(cpu_id, westmereEX_counter_map[i].counterRegister);
+ }
+ }
+ else
+ {
+ perfmon_threadData[thread_id].counters[i].counterData =
+ msr_read(cpu_id, westmereEX_counter_map[i].counterRegister);
+ }
+ }
+ }
+}
+
diff --git a/src/includes/perfmon_nehalemEX_events.txt b/src/includes/perfmon_nehalemEX_events.txt
index 1aa2fa1..565f5ca 100644
--- a/src/includes/perfmon_nehalemEX_events.txt
+++ b/src/includes/perfmon_nehalemEX_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for Intel NehalemEX
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
@@ -534,26 +534,26 @@ UMASK_C_CYCLES_TURBO_C7 0x80
UMASK_C_CYCLES_TURBO_C_ALL 0xFF
EVENT_C_C0_THROTTLE_DIE 0x01 WBOX
-UMASK_C_C0_THROTTLE_DIE_C0 0x01
-UMASK_C_C0_THROTTLE_DIE_C1 0x02
-UMASK_C_C0_THROTTLE_DIE_C2 0x04
-UMASK_C_C0_THROTTLE_DIE_C3 0x08
-UMASK_C_C0_THROTTLE_DIE_C4 0x10
-UMASK_C_C0_THROTTLE_DIE_C5 0x20
-UMASK_C_C0_THROTTLE_DIE_C6 0x40
-UMASK_C_C0_THROTTLE_DIE_C7 0x80
-UMASK_C_C0_THROTTLE_DIE_C_ALL 0xFF
+UMASK_C_C0_THROTTLE_DIE_C0 0x01
+UMASK_C_C0_THROTTLE_DIE_C1 0x02
+UMASK_C_C0_THROTTLE_DIE_C2 0x04
+UMASK_C_C0_THROTTLE_DIE_C3 0x08
+UMASK_C_C0_THROTTLE_DIE_C4 0x10
+UMASK_C_C0_THROTTLE_DIE_C5 0x20
+UMASK_C_C0_THROTTLE_DIE_C6 0x40
+UMASK_C_C0_THROTTLE_DIE_C7 0x80
+UMASK_C_C0_THROTTLE_DIE_C_ALL 0xFF
EVENT_C_C0_THROTTLE_PROCHOT 0x03 WBOX
-UMASK_C_C0_THROTTLE_PROCHOT_C0 0x01
-UMASK_C_C0_THROTTLE_PROCHOT_C1 0x02
-UMASK_C_C0_THROTTLE_PROCHOT_C2 0x04
-UMASK_C_C0_THROTTLE_PROCHOT_C3 0x08
-UMASK_C_C0_THROTTLE_PROCHOT_C4 0x10
-UMASK_C_C0_THROTTLE_PROCHOT_C5 0x20
-UMASK_C_C0_THROTTLE_PROCHOT_C6 0x40
-UMASK_C_C0_THROTTLE_PROCHOT_C7 0x80
-UMASK_C_C0_THROTTLE_PROCHOT_C_ALL 0xFF
+UMASK_C_C0_THROTTLE_PROCHOT_C0 0x01
+UMASK_C_C0_THROTTLE_PROCHOT_C1 0x02
+UMASK_C_C0_THROTTLE_PROCHOT_C2 0x04
+UMASK_C_C0_THROTTLE_PROCHOT_C3 0x08
+UMASK_C_C0_THROTTLE_PROCHOT_C4 0x10
+UMASK_C_C0_THROTTLE_PROCHOT_C5 0x20
+UMASK_C_C0_THROTTLE_PROCHOT_C6 0x40
+UMASK_C_C0_THROTTLE_PROCHOT_C7 0x80
+UMASK_C_C0_THROTTLE_PROCHOT_C_ALL 0xFF
EVENT_C_C0_THROTTLE_TMP 0x00 WBOX
UMASK_C_C0_THROTTLE_TMP_C0 0x01
@@ -1463,7 +1463,7 @@ UMASK_GLOBAL_ARB_BID_PORT6_QLX1 0x0F 0x02 0x00
UMASK_GLOBAL_ARB_BID_PORT7_QLX0 0x14 0x02 0x00
UMASK_GLOBAL_ARB_BID_PORT7_QLX1 0x15 0x02 0x00
-EVENT_GLOBAL_ARB_BID_FAIL 0x01 RBO0A
+EVENT_GLOBAL_ARB_BID_FAIL 0x01 RBOX0
UMASK_GLOBAL_ARB_BID_FAIL_PORT0_QLX0_VN0_HOM 0x02 0x05 0x00
UMASK_GLOBAL_ARB_BID_FAIL_PORT0_QLX0_VN0_SNP 0x02 0x05 0x01
UMASK_GLOBAL_ARB_BID_FAIL_PORT0_QLX0_VN0_NDR 0x02 0x05 0x02
@@ -2751,3 +2751,580 @@ UMASK_TARGET_AVAILABLE_PORT7_QLX1_VN1_NCB 0x15 0x09 0x0D
UMASK_TARGET_AVAILABLE_PORT7_QLX1_VN1_VSM 0x15 0x09 0x0E
UMASK_TARGET_AVAILABLE_PORT7_QLX1_VN1_VLG 0x15 0x09 0x0F
+EVENT_LLC_MISSES 0x14 CBOX
+UMASK_LLC_MISSES_SHARED 0x01
+UMASK_LLC_MISSES_FORWARD 0x02
+UMASK_LLC_MISSES_INVALID 0x04
+UMASK_LLC_MISSES_ALL 0x07
+
+EVENT_LLC_HITS 0x15 CBOX
+UMASK_LLC_HITS_MODIFIED 0x01
+UMASK_LLC_HITS_EXCLUSIVE 0x02
+UMASK_LLC_HITS_SHARED 0x04
+UMASK_LLC_HITS_FORWARD 0x08
+UMASK_LLC_HITS_ALL 0x0F
+
+EVENT_LLC_S_FILLS 0x16 CBOX
+UMASK_LLC_S_FILLS_MODIFIED 0x01
+UMASK_LLC_S_FILLS_EXCLUSIVE 0x02
+UMASK_LLC_S_FILLS_SHARED 0x04
+UMASK_LLC_S_FILLS_FORWARD 0x08
+UMASK_LLC_S_FILLS_ALL 0x0F
+
+EVENT_LLC_VICTIMS 0x17 CBOX
+UMASK_LLC_VICTIMS_MODIFIED 0x01
+UMASK_LLC_VICTIMS_EXCLUSIVE 0x02
+UMASK_LLC_VICTIMS_SHARED 0x04
+UMASK_LLC_VICTIMS_FORWARD 0x08
+UMASK_LLC_VICTIMS_ALL 0x1F
+UMASK_LLC_VICTIMS_FILL_WITHOUT_VICTIMS 0x10
+
+EVENT_ARB_LOSSES 0x0A CBOX
+UMASK_ARB_LOSSES_AD_SB 0x01
+UMASK_ARB_LOSSES_AD_NSB 0x02
+UMASK_ARB_LOSSES_AD_ALL 0x03
+UMASK_ARB_LOSSES_AK_SB 0x04
+UMASK_ARB_LOSSES_AK_NSB 0x08
+UMASK_ARB_LOSSES_AK_ALL 0x0C
+UMASK_ARB_LOSSES_BL_SB 0x10
+UMASK_ARB_LOSSES_BL_NSB 0x20
+UMASK_ARB_LOSSES_BL_ALL 0x30
+UMASK_ARB_LOSSES_IV 0x40
+UMASK_ARB_LOSSES_ALL 0x7F
+
+EVENT_ARB_WINS 0x0A CBOX
+UMASK_ARB_WINS_AD_SB 0x01
+UMASK_ARB_WINS_AD_NSB 0x02
+UMASK_ARB_WINS_AD_ALL 0x03
+UMASK_ARB_WINS_AK_SB 0x04
+UMASK_ARB_WINS_AK_NSB 0x08
+UMASK_ARB_WINS_AK_ALL 0x0C
+UMASK_ARB_WINS_BL_SB 0x10
+UMASK_ARB_WINS_BL_NSB 0x20
+UMASK_ARB_WINS_BL_ALL 0x30
+UMASK_ARB_WINS_IV 0x40
+UMASK_ARB_WINS_ALL 0x7F
+
+EVENT_BOUNCES_C2P_AK 0x02 CBOX
+UMASK_BOUNCES_C2P_AK_SB 0x01
+UMASK_BOUNCES_C2P_AK_NSB 0x02
+UMASK_BOUNCES_C2P_AK_ALL 0x03
+
+EVENT_BOUNCES_C2P_BL 0x03 CBOX
+UMASK_BOUNCES_C2P_BL_SB 0x01
+UMASK_BOUNCES_C2P_BL_NSB 0x02
+UMASK_BOUNCES_C2P_BL_ALL 0x03
+
+EVENT_BOUNCES_C2P_IV 0x04 CBOX
+UMASK_BOUNCES_C2P_IV 0x00
+
+EVENT_BOUNCES_P2C_AD 0x01 CBOX
+UMASK_BOUNCES_P2C_AD_SB 0x01
+UMASK_BOUNCES_P2C_AD_NSB 0x02
+UMASK_BOUNCES_P2C_AD_ALL 0x03
+
+EVENT_EGRESS_BYPASS_WINS 0x0C CBOX
+UMASK_EGRESS_BYPASS_WINS_AD_BYP0 0x01
+UMASK_EGRESS_BYPASS_WINS_AD_BYP1 0x02
+UMASK_EGRESS_BYPASS_WINS_AK_BYP0 0x04
+UMASK_EGRESS_BYPASS_WINS_AK_BYP1 0x08
+UMASK_EGRESS_BYPASS_WINS_BL_BYP0 0x10
+UMASK_EGRESS_BYPASS_WINS_BL_BYP1 0x20
+UMASK_EGRESS_BYPASS_WINS_IV_BYP0 0x40
+UMASK_EGRESS_BYPASS_WINS_IV_BYP1 0x80
+
+EVENT_INGRESS_BYPASS_WINS_AD 0x0E CBOX
+UMASK_INGRESS_BYPASS_WINS_AD_IRQ_BYP0 0x01
+UMASK_INGRESS_BYPASS_WINS_AD_IRQ_BYP1 0x02
+UMASK_INGRESS_BYPASS_WINS_AD_IPQ_BYP0 0x04
+UMASK_INGRESS_BYPASS_WINS_AD_IPQ_BYP1 0x08
+
+EVENT_MAF_ACK 0x10 CBOX
+UMASK_MAF_ACK 0x00
+
+EVENT_MAF_NACK1 0x11 CBOX
+UMASK_MAF_NACK1_GO_PENDING 0x01
+UMASK_MAF_NACK1_VIC_PENDING 0x02
+UMASK_MAF_NACK1_SNP_PENDING 0x04
+UMASK_MAF_NACK1_AC_PENDING 0x08
+UMASK_MAF_NACK1_IDX_BLOCK 0x10
+UMASK_MAF_NACK1_PA_BLOCK 0x20
+UMASK_MAF_NACK1_IDLE_QPI 0x40
+UMASK_MAF_NACK1_ALL_MAF_NACK1 0x80
+UMASK_MAF_NACK1_TOTAL_MAF_NACKS 0xFF
+
+EVENT_MAF_NACK2 0x12 CBOX
+UMASK_MAF_NACK2_MAF_FULL 0x01
+UMASK_MAF_NACK2_EGRESS_FULL 0x02
+UMASK_MAF_NACK2_VIQ_FULL 0x04
+UMASK_MAF_NACK2_NO_TRACKER_CREDITS 0x08
+UMASK_MAF_NACK2_NO_S_FIFO_CREDITS 0x10
+UMASK_MAF_NACK2_NO_S_REQTBL_ENTRIES 0x20
+UMASK_MAF_NACK2_WB_PENDING 0x40
+UMASK_MAF_NACK2_NACK2_ELSE 0x80
+
+EVENT_OCCUPANCY_IPQ 0x1A CBOX
+UMASK_OCCUPANCY_IPQ 0x00
+
+EVENT_OCCUPANCY_IRQ 0x18 CBOX
+UMASK_OCCUPANCY_IRQ 0x00
+
+EVENT_OCCUPANCY_MAF 0x1E CBOX
+UMASK_OCCUPANCY_MAF 0x00
+
+EVENT_OCCUPANCY_RSPF 0x22 CBOX
+UMASK_OCCUPANCY_RSPF 0x00
+
+EVENT_OCCUPANCY_RWRF 0x20 CBOX
+UMASK_OCCUPANCY_RWRF 0x00
+
+EVENT_OCCUPANCY_VIQ 0x1C CBOX
+UMASK_OCCUPANCY_VIQ 0x00
+
+EVENT_SINKS_C2P 0x06 CBOX
+UMASK_SINKS_C2P_IV 0x01
+UMASK_SINKS_C2P_AK 0x02
+UMASK_SINKS_C2P_BL 0x04
+
+EVENT_SINKS_P2C 0x05 CBOX
+UMASK_SINKS_P2C_IV 0x01
+UMASK_SINKS_P2C_AK 0x02
+UMASK_SINKS_P2C_BL 0x04
+
+EVENT_SINKS_S2C 0x07 CBOX
+UMASK_SINKS_S2C_AD 0x01
+UMASK_SINKS_S2C_AK 0x02
+UMASK_SINKS_S2C_BL 0x04
+
+EVENT_SINKS_S2P_BL 0x08 CBOX
+UMASK_SINKS_S2P_BL 0x00
+
+EVENT_SNP_HITS 0x28 CBOX
+UMASK_SNP_HITS_REMOTE_RD_HITM 0x01
+UMASK_SNP_HITS_REMOTE_RD_HITE 0x02
+UMASK_SNP_HITS_REMOTE_RD_HITS 0x04
+UMASK_SNP_HITS_REMOTE_RD_HITF 0x08
+UMASK_SNP_HITS_REMOTE_RFO_HITM 0x10
+UMASK_SNP_HITS_REMOTE_RFO_HITE 0x20
+UMASK_SNP_HITS_REMOTE_RFO_HITS 0x40
+UMASK_SNP_HITS_REMOTE_RFO_HITF 0x80
+UMASK_SNP_HITS_REMOTE_HITM 0x11
+UMASK_SNP_HITS_REMOTE_HITE 0x22
+UMASK_SNP_HITS_REMOTE_HITS 0x44
+UMASK_SNP_HITS_REMOTE_HITF 0x88
+UMASK_SNP_HITS_REMOTE_ANY 0xFF
+
+EVENT_SNPS 0x27 CBOX
+UMASK_SNPS_REMOTE_RD 0x01
+UMASK_SNPS_REMOTE_RFO 0x02
+UMASK_SNPS_REMOTE_ANY 0x03
+
+EVENT_STARVED_EGRESS 0x0B CBOX
+UMASK_STARVED_EGRESS_P2C_AD_SB 0x01
+UMASK_STARVED_EGRESS_C2P_AD_SB 0x02
+UMASK_STARVED_EGRESS_AD_SB 0x03
+UMASK_STARVED_EGRESS_AD_NSB 0x04
+UMASK_STARVED_EGRESS_AD 0x07
+UMASK_STARVED_EGRESS_AK_SB 0x08
+UMASK_STARVED_EGRESS_AK_NSB 0x10
+UMASK_STARVED_EGRESS_AK 0x18
+UMASK_STARVED_EGRESS_BL_SB 0x20
+UMASK_STARVED_EGRESS_BL_NSB 0x40
+UMASK_STARVED_EGRESS_BL 0x60
+UMASK_STARVED_EGRESS_IV 0x80
+
+EVENT_TRANS_IPQ 0x1B CBOX
+UMASK_TRANS_IPQ 0x00
+
+EVENT_TRANS_IRQ 0x19 CBOX
+UMASK_TRANS_IRQ 0x00
+
+EVENT_TRANS_MAF 0x1F CBOX
+UMASK_TRANS_MAF 0x00
+
+EVENT_TRANS_RSPF 0x23 CBOX
+UMASK_TRANS_RSPF 0x00
+
+EVENT_TRANS_RWRF 0x21 CBOX
+UMASK_TRANS_RWRF 0x00
+
+EVENT_TRANS_VIQ 0x1D CBOX
+UMASK_TRANS_VIQ 0x00
+
+EVENT_BUF_VALID_LOCAL_INT 0x00 UBOX
+UMASK_BUF_VALID_LOCAL_INT 0x00
+
+EVENT_BUF_VALID_REMOTE_INT 0x01 UBOX
+UMASK_BUF_VALID_REMOTE_INT 0x00
+
+EVENT_BUF_VALID_LOCK 0x02 UBOX
+UMASK_BUF_VALID_LOCK 0x00
+
+EVENT_BUF_VALID_STST 0x03 UBOX
+UMASK_BUF_VALID_STST 0x00
+
+EVENT_BUF_VALID_SPC_CYCLES 0x04 UBOX
+UMASK_BUF_VALID_SPC_CYCLES 0x00
+
+EVENT_CORRECTED_ERR 0x1E4 UBOX
+UMASK_CORRECTED_ERR 0x00
+
+EVENT_FATAL_ERR 0x1E6 UBOX
+UMASK_FATAL_ERR 0x00
+
+EVENT_IPIS_SENT 0xF9 UBOX
+UMASK_IPIS_SENT 0x00
+
+EVENT_RECOV 0x1DF UBOX
+UMASK_RECOV 0x00
+
+EVENT_U2R_REQUESTS 0x050 UBOX
+UMASK_U2R_REQUESTS 0x00
+
+EVENT_U2R_REQUEST_CYCLES 0x051 UBOX
+UMASK_U2R_REQUEST_CYCLES 0x00
+
+EVENT_WOKEN 0xF8 UBOX
+UMASK_WOKEN 0x00
+
+EVENT_TO_R_B_HOM_MSGQ_CYCLES_FULL 0x03 SBOX
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_FULL_RBOX 0x01
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_FULL_BBOX 0x02
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_FULL_ALL 0x03
+
+EVENT_TO_R_B_HOM_MSGQ_CYCLES_NE 0x06 SBOX
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_NE_RBOX 0x01
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_NE_BBOX 0x02
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_NE_ALL 0x03
+
+EVENT_TO_R_B_HOM_MSGQ_OCCUPANCY 0x07 SBOX
+UMASK_TO_R_B_HOM_MSGQ_OCCUPANCY_RBOX 0x01
+UMASK_TO_R_B_HOM_MSGQ_OCCUPANCY_BBOX 0x02
+UMASK_TO_R_B_HOM_MSGQ_OCCUPANCY_ALL 0x03
+
+EVENT_B2S_DRS_BYPASS 0x53 SBOX
+UMASK_B2S_DRS_BYPASS 0x00
+
+EVENT_BBOX_CREDITS 0x77 SBOX
+UMASK_BBOX_CREDITS 0x00
+
+EVENT_BBOX_CREDIT_RETURNS 0x6B SBOX
+UMASK_BBOX_CREDIT_RETURNS 0x00
+
+EVENT_BBOX_HOM_BYPASS 0x54 SBOX
+UMASK_BBOX_HOM_BYPASS 0x00
+
+EVENT_EGRESS_BYPASS 0x40 SBOX
+UMASK_EGRESS_BYPASS_AD_CW 0x01
+UMASK_EGRESS_BYPASS_AD_CCW 0x02
+UMASK_EGRESS_BYPASS_AD 0x03
+UMASK_EGRESS_BYPASS_AK_CW 0x04
+UMASK_EGRESS_BYPASS_AK_CCW 0x08
+UMASK_EGRESS_BYPASS_AK 0x0C
+UMASK_EGRESS_BYPASS_BL_CW 0x10
+UMASK_EGRESS_BYPASS_BL_CCW 0x20
+UMASK_EGRESS_BYPASS_BL 0x30
+
+EVENT_EGRESS_ARB_WINS 0x41 SBOX
+UMASK_EGRESS_ARB_WINS_AD_CW 0x01
+UMASK_EGRESS_ARB_WINS_AD_CCW 0x02
+UMASK_EGRESS_ARB_WINS_AD 0x03
+UMASK_EGRESS_ARB_WINS_AK_CW 0x04
+UMASK_EGRESS_ARB_WINS_AK_CCW 0x08
+UMASK_EGRESS_ARB_WINS_AK 0x0C
+UMASK_EGRESS_ARB_WINS_BL_CW 0x10
+UMASK_EGRESS_ARB_WINS_BL_CCW 0x20
+UMASK_EGRESS_ARB_WINS_BL 0x30
+
+EVENT_EGRESS_ARB_LOSSES 0x42 SBOX
+UMASK_EGRESS_ARB_LOSSES_AD_CW 0x01
+UMASK_EGRESS_ARB_LOSSES_AD_CCW 0x02
+UMASK_EGRESS_ARB_LOSSES_AD 0x03
+UMASK_EGRESS_ARB_LOSSES_AK_CW 0x04
+UMASK_EGRESS_ARB_LOSSES_AK_CCW 0x08
+UMASK_EGRESS_ARB_LOSSES_AK 0x0C
+UMASK_EGRESS_ARB_LOSSES_BL_CW 0x10
+UMASK_EGRESS_ARB_LOSSES_BL_CCW 0x20
+UMASK_EGRESS_ARB_LOSSES_BL 0x30
+
+EVENT_EGRESS_STARVED 0x43 SBOX
+UMASK_EGRESS_STARVED_AD_CW 0x01
+UMASK_EGRESS_STARVED_AD_CCW 0x02
+UMASK_EGRESS_STARVED_AD 0x03
+UMASK_EGRESS_STARVED_AK_CW 0x04
+UMASK_EGRESS_STARVED_AK_CCW 0x08
+UMASK_EGRESS_STARVED_AK 0x0C
+UMASK_EGRESS_STARVED_BL_CW 0x10
+UMASK_EGRESS_STARVED_BL_CCW 0x20
+UMASK_EGRESS_STARVED_BL 0x30
+
+EVENT_FLITS_SENT_DRS 0x65 SBOX
+UMASK_FLITS_SENT_DRS 0x00
+
+EVENT_FLITS_SENT_NCB 0x69 SBOX
+UMASK_FLITS_SENT_NCB 0x00
+
+EVENT_FLITS_SENT_NCS 0x67 SBOX
+UMASK_FLITS_SENT_NCS 0x00
+
+EVENT_HALFLINE_BYPASS 0x30 SBOX
+UMASK_HALFLINE_BYPASS 0x00
+
+EVENT_NO_CREDIT_AD 0x87 SBOX
+UMASK_NO_CREDIT_AD 0x00
+
+EVENT_NO_CREDIT_AK 0x88 SBOX
+UMASK_NO_CREDIT_AK 0x00
+
+EVENT_NO_CREDIT_BL 0x89 SBOX
+UMASK_NO_CREDIT_BL 0x00
+
+EVENT_NO_CREDIT_HOM 0x80 SBOX
+UMASK_NO_CREDIT_HOM 0x00
+
+EVENT_NO_CREDIT_SNP 0x81 SBOX
+UMASK_NO_CREDIT_SNP 0x00
+
+EVENT_NO_CREDIT_DRS 0x82 SBOX
+UMASK_NO_CREDIT_DRS 0x00
+
+EVENT_NO_CREDIT_NCS 0x83 SBOX
+UMASK_NO_CREDIT_NCS 0x00
+
+EVENT_NO_CREDIT_NCB 0x84 SBOX
+UMASK_NO_CREDIT_NCB 0x00
+
+EVENT_NO_CREDIT_NDR 0x85 SBOX
+UMASK_NO_CREDIT_NDR 0x00
+
+EVENT_NO_CREDIT_IPQ 0x8A SBOX
+UMASK_NO_CREDIT_IPQ 0x00
+
+EVENT_NO_CREDIT_VNA 0x86 SBOX
+UMASK_NO_CREDIT_VNA_RBOX 0x01
+UMASK_NO_CREDIT_VNA_BBOX 0x02
+UMASK_NO_CREDIT_VNA_ALL 0x03
+
+EVENT_PKTS_RCVD_DRS_FROM_R 0x72 SBOX
+UMASK_PKTS_RCVD_DRS_FROM_R 0x00
+
+EVENT_PKTS_RCVD_DRS_FROM_B 0x73 SBOX
+UMASK_PKTS_RCVD_DRS_FROM_B 0x00
+
+EVENT_PKTS_RCVD_NCB 0x75 SBOX
+UMASK_PKTS_RCVD_NCB 0x00
+
+EVENT_PKTS_RCVD_NCS 0x74 SBOX
+UMASK_PKTS_RCVD_NCS 0x00
+
+EVENT_PKTS_RCVD_NDR 0x70 SBOX
+UMASK_PKTS_RCVD_NDR 0x00
+
+EVENT_PKTS_RCVD_SNP 0x71 SBOX
+UMASK_PKTS_RCVD_SNP 0x00
+
+EVENT_PKTS_SENT_DRS 0x64 SBOX
+UMASK_PKTS_SENT_DRS_CBOX0_4 0x01
+UMASK_PKTS_SENT_DRS_CBOX1_5 0x02
+UMASK_PKTS_SENT_DRS_CBOX2_6 0x04
+UMASK_PKTS_SENT_DRS_CBOX3_7 0x08
+UMASK_PKTS_SENT_DRS_ALL 0x0F
+
+EVENT_PKTS_SENT_HOM 0x60 SBOX
+UMASK_PKTS_SENT_HOM_RBOX 0x01
+UMASK_PKTS_SENT_HOM_BBOX 0x02
+UMASK_PKTS_SENT_HOM_ALL 0x03
+
+EVENT_PKTS_SENT_NCB 0x68 SBOX
+UMASK_PKTS_SENT_NCB_CBOX0_4 0x01
+UMASK_PKTS_SENT_NCB_CBOX1_5 0x02
+UMASK_PKTS_SENT_NCB_CBOX2_6 0x04
+UMASK_PKTS_SENT_NCB_CBOX3_7 0x08
+UMASK_PKTS_SENT_NCB_ALL 0x0F
+
+EVENT_PKTS_SENT_NCS 0x66 SBOX
+UMASK_PKTS_SENT_NCS_CBOX0_4 0x01
+UMASK_PKTS_SENT_NCS_CBOX1_5 0x02
+UMASK_PKTS_SENT_NCS_CBOX2_6 0x04
+UMASK_PKTS_SENT_NCS_CBOX3_7 0x08
+UMASK_PKTS_SENT_NCS_ALL 0x0F
+
+EVENT_PKTS_SENT_NDR 0x63 SBOX
+UMASK_PKTS_SENT_NDR 0x00
+
+EVENT_PKTS_SENT_SNP 0x62 SBOX
+UMASK_PKTS_SENT_SNP 0x00
+
+EVENT_RBOX_CREDIT_RETURNS 0x6A SBOX
+UMASK_RBOX_CREDIT_RETURNS 0x00
+
+EVENT_RBOX_CREDIT_CARRIERS 0x76 SBOX
+UMASK_RBOX_CREDIT_CARRIERS 0x00
+
+EVENT_RBOX_HOM_BYPASS 0x50 SBOX
+UMASK_RBOX_HOM_BYPASS 0x00
+
+EVENT_RBOX_SNP_BYPASS 0x51 SBOX
+UMASK_RBOX_SNP_BYPASS_SNP 0x01
+UMASK_RBOX_SNP_BYPASS_BIG_SNP 0x02
+UMASK_RBOX_SNP_BYPASS_ALL 0x03
+
+EVENT_REQ_TBL_OCCUPANCY 0x31 SBOX
+UMASK_REQ_TBL_OCCUPANCY_LOCAL 0x01
+UMASK_REQ_TBL_OCCUPANCY_REMOTE 0x02
+UMASK_REQ_TBL_OCCUPANCY_ALL 0x03
+
+EVENT_S2B_HOM_BYPASS 0x52 SBOX
+UMASK_S2B_HOM_BYPASS 0x00
+
+EVENT_TO_RING_B2S_MSGQ_CYCLES_FULL 0x2B SBOX
+UMASK_TO_RING_B2S_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_RING_B2S_MSGQ_CYCLES_NE 0x2D SBOX
+UMASK_TO_RING_B2S_MSGQ_CYCLES_NE 0x00
+
+EVENT_TO_RING_B2S_MSGQ_OCCUPANCY 0x2F SBOX
+UMASK_TO_RING_B2S_MSGQ_OCCUPANCY 0x00
+
+EVENT_TO_RING_MSGQ_OCCUPANCY 0x26 SBOX
+UMASK_TO_RING_MSGQ_OCCUPANCY_SNP 0x01
+UMASK_TO_RING_MSGQ_OCCUPANCY_NCS 0x02
+UMASK_TO_RING_MSGQ_OCCUPANCY_NCB 0x04
+UMASK_TO_RING_MSGQ_OCCUPANCY_ALL 0x07
+
+EVENT_TO_RING_NCB_MSGQ_CYCLES_FULL 0x21 SBOX
+UMASK_TO_RING_NCB_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_RING_NCB_MSGQ_CYCLES_NE 0x24 SBOX
+UMASK_TO_RING_NCB_MSGQ_CYCLES_NE 0x00
+
+EVENT_TO_RING_NCS_MSGQ_CYCLES_FULL 0x22 SBOX
+UMASK_TO_RING_NCS_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_RING_NDR_MSGQ_CYCLES_FULL 0x27 SBOX
+UMASK_TO_RING_NDR_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_RING_NDR_MSGQ_CYCLES_NE 0x28 SBOX
+UMASK_TO_RING_NDR_MSGQ_CYCLES_NE 0x00
+
+EVENT_TO_RING_NDR_MSGQ_OCCUPANCY 0x29 SBOX
+UMASK_TO_RING_NDR_MSGQ_OCCUPANCY 0x00
+
+EVENT_TO_RING_R2S_MSGQ_CYCLES_FULL 0x2A SBOX
+UMASK_TO_RING_R2S_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_RING_R2S_MSGQ_CYCLES_NE 0x2C SBOX
+UMASK_TO_RING_R2S_MSGQ_CYCLES_NE 0x00
+
+EVENT_TO_RING_R2S_MSGQ_OCCUPANCY 0x2E SBOX
+UMASK_TO_RING_R2S_MSGQ_OCCUPANCY 0x00
+
+EVENT_TO_RING_SNP_MSGQ_CYCLES_FULL 0x20 SBOX
+UMASK_TO_RING_SNP_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_RING_SNP_MSGQ_CYCLES_NE 0x23 SBOX
+UMASK_TO_RING_SNP_MSGQ_CYCLES_NE 0x00
+
+EVENT_TO_R_DRS_MSGQ_CYCLES_FULL 0x0E SBOX
+UMASK_TO_R_DRS_MSGQ_CYCLES_FULL_CBOX0_4 0x01
+UMASK_TO_R_DRS_MSGQ_CYCLES_FULL_CBOX1_5 0x02
+UMASK_TO_R_DRS_MSGQ_CYCLES_FULL_CBOX2_6 0x04
+UMASK_TO_R_DRS_MSGQ_CYCLES_FULL_CBOX3_7 0x08
+UMASK_TO_R_DRS_MSGQ_CYCLES_FULL_ALL 0x0F
+
+EVENT_TO_R_DRS_MSGQ_CYCLES_NE 0x0F SBOX
+UMASK_TO_R_DRS_MSGQ_CYCLES_NE_CBOX0_4 0x01
+UMASK_TO_R_DRS_MSGQ_CYCLES_NE_CBOX1_5 0x02
+UMASK_TO_R_DRS_MSGQ_CYCLES_NE_CBOX2_6 0x04
+UMASK_TO_R_DRS_MSGQ_CYCLES_NE_CBOX3_7 0x08
+UMASK_TO_R_DRS_MSGQ_CYCLES_NE_ALL 0x0F
+
+EVENT_TO_R_DRS_MSGQ_OCCUPANCY 0x10 SBOX
+UMASK_TO_R_DRS_MSGQ_OCCUPANCY_CBOX0_4 0x01
+UMASK_TO_R_DRS_MSGQ_OCCUPANCY_CBOX1_5 0x02
+UMASK_TO_R_DRS_MSGQ_OCCUPANCY_CBOX2_6 0x04
+UMASK_TO_R_DRS_MSGQ_OCCUPANCY_CBOX3_7 0x08
+UMASK_TO_R_DRS_MSGQ_OCCUPANCY_ALL 0x0F
+
+EVENT_TO_R_B_HOM_MSGQ_CYCLES_FULL 0x03 SBOX
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_FULL_RBOX 0x01
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_FULL_BBOX 0x02
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_FULL_ALL 0x03
+
+EVENT_TO_R_B_HOM_MSGQ_CYCLES_NE 0x06 SBOX
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_NE_RBOX 0x01
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_NE_BBOX 0x02
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_NE_ALL 0x03
+
+EVENT_TO_R_B_HOM_MSGQ_OCCUPANCY 0x07 SBOX
+UMASK_TO_R_B_HOM_MSGQ_OCCUPANCY_RBOX 0x01
+UMASK_TO_R_B_HOM_MSGQ_OCCUPANCY_BBOX 0x02
+UMASK_TO_R_B_HOM_MSGQ_OCCUPANCY_ALL 0x03
+
+EVENT_TO_R_NCB_MSGQ_CYCLES_FULL 0x11 SBOX
+UMASK_TO_R_NCB_MSGQ_CYCLES_FULL_CBOX0_4 0x01
+UMASK_TO_R_NCB_MSGQ_CYCLES_FULL_CBOX1_5 0x02
+UMASK_TO_R_NCB_MSGQ_CYCLES_FULL_CBOX2_6 0x04
+UMASK_TO_R_NCB_MSGQ_CYCLES_FULL_CBOX3_7 0x08
+UMASK_TO_R_NCB_MSGQ_CYCLES_FULL_ALL 0x0F
+
+EVENT_TO_R_NCB_MSGQ_CYCLES_NE 0x12 SBOX
+UMASK_TO_R_NCB_MSGQ_CYCLES_NE_CBOX0_4 0x01
+UMASK_TO_R_NCB_MSGQ_CYCLES_NE_CBOX1_5 0x02
+UMASK_TO_R_NCB_MSGQ_CYCLES_NE_CBOX2_6 0x04
+UMASK_TO_R_NCB_MSGQ_CYCLES_NE_CBOX3_7 0x08
+UMASK_TO_R_NCB_MSGQ_CYCLES_NE_ALL 0x0F
+
+EVENT_TO_R_NCB_MSGQ_OCCUPANCY 0x13 SBOX
+UMASK_TO_R_NCB_MSGQ_OCCUPANCY_CBOX0_4 0x01
+UMASK_TO_R_NCB_MSGQ_OCCUPANCY_CBOX1_5 0x02
+UMASK_TO_R_NCB_MSGQ_OCCUPANCY_CBOX2_6 0x04
+UMASK_TO_R_NCB_MSGQ_OCCUPANCY_CBOX3_7 0x08
+UMASK_TO_R_NCB_MSGQ_OCCUPANCY_ALL 0x0F
+
+EVENT_TO_R_NCS_MSGQ_CYCLES_FULL 0x14 SBOX
+UMASK_TO_R_NCS_MSGQ_CYCLES_FULL_CBOX0_4 0x01
+UMASK_TO_R_NCS_MSGQ_CYCLES_FULL_CBOX1_5 0x02
+UMASK_TO_R_NCS_MSGQ_CYCLES_FULL_CBOX2_6 0x04
+UMASK_TO_R_NCS_MSGQ_CYCLES_FULL_CBOX3_7 0x08
+UMASK_TO_R_NCS_MSGQ_CYCLES_FULL_ALL 0x0F
+
+EVENT_TO_R_NCS_MSGQ_CYCLES_NE 0x15 SBOX
+UMASK_TO_R_NCS_MSGQ_CYCLES_NE_CBOX0_4 0x01
+UMASK_TO_R_NCS_MSGQ_CYCLES_NE_CBOX1_5 0x02
+UMASK_TO_R_NCS_MSGQ_CYCLES_NE_CBOX2_6 0x04
+UMASK_TO_R_NCS_MSGQ_CYCLES_NE_CBOX3_7 0x08
+UMASK_TO_R_NCS_MSGQ_CYCLES_NE_ALL 0x0F
+
+EVENT_TO_R_NCS_MSGQ_OCCUPANCY 0x16 SBOX
+UMASK_TO_R_NCS_MSGQ_OCCUPANCY_CBOX0_4 0x01
+UMASK_TO_R_NCS_MSGQ_OCCUPANCY_CBOX1_5 0x02
+UMASK_TO_R_NCS_MSGQ_OCCUPANCY_CBOX2_6 0x04
+UMASK_TO_R_NCS_MSGQ_OCCUPANCY_CBOX3_7 0x08
+UMASK_TO_R_NCS_MSGQ_OCCUPANCY_ALL 0x0F
+
+EVENT_TO_R_NDR_MSGQ_CYCLES_FULL 0x0B SBOX
+UMASK_TO_R_NDR_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_R_NDR_MSGQ_CYCLES_NE 0x0C SBOX
+UMASK_TO_R_NDR_MSGQ_CYCLES_NE 0x00
+
+EVENT_TO_R_NDR_MSGQ_OCCUPANCY 0x0D SBOX
+UMASK_TO_R_NDR_MSGQ_OCCUPANCY 0x00
+
+EVENT_TO_R_PROG_EV 0x00 SBOX
+UMASK_TO_R_PROG_EV 0x00
+
+EVENT_TO_R_B_REQUESTS 0x6C SBOX
+UMASK_TO_R_B_REQUESTS_LOCAL 0x01
+UMASK_TO_R_B_REQUESTS_REMOTE 0x02
+UMASK_TO_R_B_REQUESTS_ALL 0x03
+
+EVENT_TO_R_SNP_MSGQ_CYCLES_FULL 0x08 SBOX
+UMASK_TO_R_SNP_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_R_SNP_MSGQ_CYCLES_NE 0x09 SBOX
+UMASK_TO_R_SNP_MSGQ_CYCLES_NE 0x00
+
+EVENT_TO_R_SNP_MSGQ_OCCUPANCY 0x0A SBOX
+UMASK_TO_R_SNP_MSGQ_OCCUPANCY 0x00
diff --git a/src/includes/perfmon_nehalem_counters.h b/src/includes/perfmon_nehalem_counters.h
index da61ea4..d3831c1 100644
--- a/src/includes/perfmon_nehalem_counters.h
+++ b/src/includes/perfmon_nehalem_counters.h
@@ -5,8 +5,8 @@
*
* Description: Counter Header File of perfmon module for Nehalem.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/perfmon_nehalem_events.txt b/src/includes/perfmon_nehalem_events.txt
index 2abe611..0eeed50 100644
--- a/src/includes/perfmon_nehalem_events.txt
+++ b/src/includes/perfmon_nehalem_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for Intel Nehalem
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
diff --git a/src/includes/perfmon_p6_events.txt b/src/includes/perfmon_p6_events.txt
index 4ebe03b..0db8338 100644
--- a/src/includes/perfmon_p6_events.txt
+++ b/src/includes/perfmon_p6_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for Pentium 3
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
diff --git a/src/includes/perfmon_phi.h b/src/includes/perfmon_phi.h
index 4dfddec..0f5dd54 100644
--- a/src/includes/perfmon_phi.h
+++ b/src/includes/perfmon_phi.h
@@ -5,8 +5,8 @@
*
* Description: Header File of perfmon module for Xeon Phi.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -61,15 +61,14 @@ void perfmon_setupCounterThread_phi(
PerfmonEvent* event,
PerfmonCounterIndex index)
{
- uint64_t flags;
+ uint64_t flags = 0x0ULL;
uint64_t reg = phi_counter_map[index].configRegister;
int cpu_id = perfmon_threadData[thread_id].processorId;
+ perfmon_threadData[thread_id].counters[index].init = TRUE;
if (phi_counter_map[index].type == PMC)
{
- perfmon_threadData[thread_id].counters[index].init = TRUE;
- flags = msr_read(cpu_id,reg);
- flags &= ~(0xFFFFU);
+ flags = (1<<22)|(1<<16);
/* Intel with standard 8 bit event mask: [7:0] */
flags |= (event->umask<<8) + event->eventId;
diff --git a/src/includes/perfmon_phi_counters.h b/src/includes/perfmon_phi_counters.h
index 7203cfa..edf0658 100644
--- a/src/includes/perfmon_phi_counters.h
+++ b/src/includes/perfmon_phi_counters.h
@@ -5,8 +5,8 @@
*
* Description: Counter Header File of perfmon module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/perfmon_phi_events.txt b/src/includes/perfmon_phi_events.txt
index 36f4167..d6393ba 100644
--- a/src/includes/perfmon_phi_events.txt
+++ b/src/includes/perfmon_phi_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for Intel Xeon Phi
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
diff --git a/src/includes/perfmon_pm.h b/src/includes/perfmon_pm.h
index 1821d0a..88346d1 100644
--- a/src/includes/perfmon_pm.h
+++ b/src/includes/perfmon_pm.h
@@ -5,8 +5,8 @@
*
* Description: Header File of perfmon module Pentium M.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -58,12 +58,12 @@ void perfmon_init_pm(PerfmonThread *thread)
msr_write(cpu_id, MSR_PERFEVTSEL1, 0x0ULL);
/* Preinit of two PMC counters */
- flags |= (1<<16); /* user mode flag */
- flags |= (1<<19); /* pin control flag */
+ //flags |= (1<<16); /* user mode flag */
+ //flags |= (1<<19); /* pin control flag */
// flags |= (1<<22); /* enable flag */
- msr_write(cpu_id, MSR_PERFEVTSEL0, flags);
- msr_write(cpu_id, MSR_PERFEVTSEL1, flags);
+ /*msr_write(cpu_id, MSR_PERFEVTSEL0, flags);
+ msr_write(cpu_id, MSR_PERFEVTSEL1, flags);*/
}
void perfmon_setupCounterThread_pm(
@@ -76,8 +76,7 @@ void perfmon_setupCounterThread_pm(
int cpu_id = perfmon_threadData[thread_id].processorId;
perfmon_threadData[thread_id].counters[index].init = TRUE;
- flags = msr_read(cpu_id,reg);
- flags &= ~(0xFFFFU);
+ flags = (1<<16)|(1<<19);
/* Intel with standard 8 bit event mask: [7:0] */
flags |= (event->umask<<8) + event->eventId;
@@ -134,7 +133,7 @@ void perfmon_stopCountersThread_pm(int thread_id)
if (perfmon_threadData[thread_id].counters[i].init == TRUE)
{
perfmon_threadData[thread_id].counters[i].counterData =
- msr_read(cpu_id, pm_counter_map[i].counterRegister);
+ msr_read(cpu_id, pm_counter_map[i].counterRegister);
}
}
}
diff --git a/src/includes/perfmon_pm_counters.h b/src/includes/perfmon_pm_counters.h
index 4d14f96..9119096 100644
--- a/src/includes/perfmon_pm_counters.h
+++ b/src/includes/perfmon_pm_counters.h
@@ -5,8 +5,8 @@
*
* Description: Counter Header File of perfmon module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/perfmon_pm_events.txt b/src/includes/perfmon_pm_events.txt
index 5765f05..9ed83a8 100644
--- a/src/includes/perfmon_pm_events.txt
+++ b/src/includes/perfmon_pm_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for Intel Pentium M
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
diff --git a/src/includes/perfmon_sandybridge.h b/src/includes/perfmon_sandybridge.h
index ec9687a..f11714a 100644
--- a/src/includes/perfmon_sandybridge.h
+++ b/src/includes/perfmon_sandybridge.h
@@ -5,8 +5,8 @@
*
* Description: Header File of perfmon module for Sandy Bridge.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -67,13 +67,13 @@ void perfmon_init_sandybridge(PerfmonThread *thread)
msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, 0x222ULL);
/* Preinit of PERFEVSEL registers */
- flags |= (1<<22); /* enable flag */
- flags |= (1<<16); /* user mode flag */
+ //flags |= (1<<22); /* enable flag */
+ //flags |= (1<<16); /* user mode flag */
- msr_write(cpu_id, MSR_PERFEVTSEL0, flags);
+ /*msr_write(cpu_id, MSR_PERFEVTSEL0, flags);
msr_write(cpu_id, MSR_PERFEVTSEL1, flags);
msr_write(cpu_id, MSR_PERFEVTSEL2, flags);
- msr_write(cpu_id, MSR_PERFEVTSEL3, flags);
+ msr_write(cpu_id, MSR_PERFEVTSEL3, flags);*/
/* TODO Robust implementation which also works if stuff is not there */
if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id) ||
@@ -180,12 +180,11 @@ void perfmon_init_sandybridge(PerfmonThread *thread)
LLU_CAST reg, \
LLU_CAST flags); \
} \
-if(haveLock) { \
- uflags = pci_read(cpu_id, channel, reg); \
- uflags &= ~(0xFFFFU); \
- uflags |= (event->umask<<8) + event->eventId; \
- pci_write(cpu_id, channel, reg, uflags); \
-}
+ if(haveLock) { \
+ uflags = (1<<22); \
+ uflags |= (event->umask<<8) + event->eventId; \
+ pci_write(cpu_id, channel, reg, uflags); \
+ }
void perfmon_setupCounterThread_sandybridge(
@@ -198,6 +197,8 @@ void perfmon_setupCounterThread_sandybridge(
uint32_t uflags;
uint64_t reg = sandybridge_counter_map[index].configRegister;
int cpu_id = perfmon_threadData[thread_id].processorId;
+ uint64_t fixed_flags = msr_read(cpu_id, MSR_PERF_FIXED_CTR_CTRL);
+ uint64_t orig_fixed_flags = fixed_flags;
perfmon_threadData[thread_id].counters[index].init = TRUE;
if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id))
@@ -209,8 +210,9 @@ void perfmon_setupCounterThread_sandybridge(
{
case PMC:
- flags = msr_read(cpu_id,reg);
- flags &= ~(0xFFFFU); /* clear lower 16bits */
+ //flags = msr_read(cpu_id,reg);
+ //flags &= ~(0xFFFFU); /* clear lower 16bits */
+ flags = (1<<22)|(1<<16);
/* Intel with standard 8 bit event mask: [7:0] */
flags |= (event->umask<<8) + event->eventId;
@@ -233,6 +235,7 @@ void perfmon_setupCounterThread_sandybridge(
break;
case FIXED:
+ fixed_flags |= (0x2 << (index*4));
break;
case POWER:
@@ -261,8 +264,9 @@ void perfmon_setupCounterThread_sandybridge(
{
if(haveLock)
{
- uflags = pci_read(cpu_id, PCI_QPI_DEVICE_PORT_0, reg);
- uflags &= ~(0xFFFFU);
+ //uflags = pci_read(cpu_id, PCI_QPI_DEVICE_PORT_0, reg);
+ //uflags &= ~(0xFFFFU);
+ uflags = (1<<22);
uflags |= (1UL<<21) + event->eventId; /* Set extension bit */
printf("UFLAGS 0x%x \n",uflags);
pci_write(cpu_id, PCI_QPI_DEVICE_PORT_0, reg, uflags);
@@ -294,8 +298,9 @@ void perfmon_setupCounterThread_sandybridge(
{
if(haveLock)
{
- uflags = pci_read(cpu_id, PCI_QPI_DEVICE_PORT_1, reg);
- uflags &= ~(0xFFFFU);
+ //uflags = pci_read(cpu_id, PCI_QPI_DEVICE_PORT_1, reg);
+ //uflags &= ~(0xFFFFU);
+ uflags = (1<<22);
uflags |= (1UL<<21) + event->eventId; /* Set extension bit */
pci_write(cpu_id, PCI_QPI_DEVICE_PORT_1, reg, uflags);
@@ -320,6 +325,10 @@ void perfmon_setupCounterThread_sandybridge(
/* should never be reached */
break;
}
+ if (fixed_flags != orig_fixed_flags)
+ {
+ msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, fixed_flags);
+ }
}
void perfmon_startCountersThread_sandybridge(int thread_id)
@@ -392,7 +401,7 @@ void perfmon_startCountersThread_sandybridge(int thread_id)
case MBOXFIX:
if(haveLock)
{
- pci_write(cpu_id, PCI_IMC_DEVICE_CH_0, PCI_UNC_MC_PMON_FIXED_CTL, 0x48000UL);
+ pci_write(cpu_id, counter_map[i].device, PCI_UNC_MC_PMON_FIXED_CTL, 0x48000UL);
}
break;
diff --git a/src/includes/perfmon_sandybridge_counters.h b/src/includes/perfmon_sandybridge_counters.h
index 8f709ba..afe9c04 100644
--- a/src/includes/perfmon_sandybridge_counters.h
+++ b/src/includes/perfmon_sandybridge_counters.h
@@ -5,8 +5,8 @@
*
* Description: Counter header file of perfmon module for Sandy Bridge.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -28,8 +28,8 @@
* =======================================================================================
*/
-#define NUM_COUNTERS_SANDYBRIDGE 69
-#define NUM_COUNTERS_UNCORE_SANDYBRIDGE 44
+#define NUM_COUNTERS_SANDYBRIDGE 32
+#define NUM_COUNTERS_UNCORE_SANDYBRIDGE 12
#define NUM_COUNTERS_CORE_SANDYBRIDGE 8
static PerfmonCounterMap sandybridge_counter_map[NUM_COUNTERS_SANDYBRIDGE] = {
@@ -46,69 +46,30 @@ static PerfmonCounterMap sandybridge_counter_map[NUM_COUNTERS_SANDYBRIDGE] = {
{"TMP0", PMC7, THERMAL, 0, 0, 0, 0},
/* RAPL counters */
{"PWR0", PMC8, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0},
- {"PWR1", PMC9, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0},
- {"PWR2", PMC10, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0},
- {"PWR3", PMC11, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0},
- /* CBOX counters */
- {"CBOX0C0", PMC12, CBOX0, MSR_UNC_C0_PMON_CTL0, MSR_UNC_C0_PMON_CTR0, 0, 0},
- {"CBOX0C1", PMC13, CBOX0, MSR_UNC_C0_PMON_CTL1, MSR_UNC_C0_PMON_CTR1, 0, 0},
- {"CBOX0C2", PMC14, CBOX0, MSR_UNC_C0_PMON_CTL2, MSR_UNC_C0_PMON_CTR2, 0, 0},
- {"CBOX0C3", PMC15, CBOX0, MSR_UNC_C0_PMON_CTL3, MSR_UNC_C0_PMON_CTR3, 0, 0},
- {"CBOX1C0", PMC16, CBOX1, MSR_UNC_C1_PMON_CTL0, MSR_UNC_C1_PMON_CTR0, 0, 0},
- {"CBOX1C1", PMC17, CBOX1, MSR_UNC_C1_PMON_CTL1, MSR_UNC_C1_PMON_CTR1, 0, 0},
- {"CBOX1C2", PMC18, CBOX1, MSR_UNC_C1_PMON_CTL2, MSR_UNC_C1_PMON_CTR2, 0, 0},
- {"CBOX1C3", PMC19, CBOX1, MSR_UNC_C1_PMON_CTL3, MSR_UNC_C1_PMON_CTR3, 0, 0},
- {"CBOX2C0", PMC20, CBOX2, MSR_UNC_C2_PMON_CTL0, MSR_UNC_C2_PMON_CTR0, 0, 0},
- {"CBOX2C1", PMC21, CBOX2, MSR_UNC_C2_PMON_CTL1, MSR_UNC_C2_PMON_CTR1, 0, 0},
- {"CBOX2C2", PMC22, CBOX2, MSR_UNC_C2_PMON_CTL2, MSR_UNC_C2_PMON_CTR2, 0, 0},
- {"CBOX2C3", PMC23, CBOX2, MSR_UNC_C2_PMON_CTL3, MSR_UNC_C2_PMON_CTR3, 0, 0},
- {"CBOX3C0", PMC24, CBOX3, MSR_UNC_C3_PMON_CTL0, MSR_UNC_C3_PMON_CTR0, 0, 0},
- {"CBOX3C1", PMC25, CBOX3, MSR_UNC_C3_PMON_CTL1, MSR_UNC_C3_PMON_CTR1, 0, 0},
- {"CBOX3C2", PMC26, CBOX3, MSR_UNC_C3_PMON_CTL2, MSR_UNC_C3_PMON_CTR2, 0, 0},
- {"CBOX3C3", PMC27, CBOX3, MSR_UNC_C3_PMON_CTL3, MSR_UNC_C3_PMON_CTR3, 0, 0},
- {"CBOX4C0", PMC28, CBOX4, MSR_UNC_C4_PMON_CTL0, MSR_UNC_C4_PMON_CTR0, 0, 0},
- {"CBOX4C1", PMC29, CBOX4, MSR_UNC_C4_PMON_CTL1, MSR_UNC_C4_PMON_CTR1, 0, 0},
- {"CBOX4C2", PMC30, CBOX4, MSR_UNC_C4_PMON_CTL2, MSR_UNC_C4_PMON_CTR2, 0, 0},
- {"CBOX4C3", PMC31, CBOX4, MSR_UNC_C4_PMON_CTL3, MSR_UNC_C4_PMON_CTR3, 0, 0},
- {"CBOX5C0", PMC32, CBOX5, MSR_UNC_C5_PMON_CTL0, MSR_UNC_C5_PMON_CTR0, 0, 0},
- {"CBOX5C1", PMC33, CBOX5, MSR_UNC_C5_PMON_CTL1, MSR_UNC_C5_PMON_CTR1, 0, 0},
- {"CBOX5C2", PMC34, CBOX5, MSR_UNC_C5_PMON_CTL2, MSR_UNC_C5_PMON_CTR2, 0, 0},
- {"CBOX5C3", PMC35, CBOX5, MSR_UNC_C5_PMON_CTL3, MSR_UNC_C5_PMON_CTR3, 0, 0},
- {"CBOX6C0", PMC36, CBOX6, MSR_UNC_C6_PMON_CTL0, MSR_UNC_C6_PMON_CTR0, 0, 0},
- {"CBOX6C1", PMC37, CBOX6, MSR_UNC_C6_PMON_CTL1, MSR_UNC_C6_PMON_CTR1, 0, 0},
- {"CBOX6C2", PMC38, CBOX6, MSR_UNC_C6_PMON_CTL2, MSR_UNC_C6_PMON_CTR2, 0, 0},
- {"CBOX6C3", PMC39, CBOX6, MSR_UNC_C6_PMON_CTL3, MSR_UNC_C6_PMON_CTR3, 0, 0},
- {"CBOX7C0", PMC40, CBOX7, MSR_UNC_C7_PMON_CTL0, MSR_UNC_C7_PMON_CTR0, 0, 0},
- {"CBOX7C1", PMC41, CBOX7, MSR_UNC_C7_PMON_CTL1, MSR_UNC_C7_PMON_CTR1, 0, 0},
- {"CBOX7C2", PMC42, CBOX7, MSR_UNC_C7_PMON_CTL2, MSR_UNC_C7_PMON_CTR2, 0, 0},
- {"CBOX7C3", PMC43, CBOX7, MSR_UNC_C7_PMON_CTL3, MSR_UNC_C7_PMON_CTR3, 0, 0},
+ {"PWR1", PMC9, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0},
+ {"PWR2", PMC10, POWER, 0, MSR_PP1_ENERGY_STATUS, 0, 0},
+ {"PWR3", PMC11, POWER, 0, MSR_DRAM_ENERGY_STATUS, 0, 0},
/* IMC Counters: 4 48bit wide per memory channel, split in two reads */
- {"MBOX0C0",PMC44, MBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_0},
- {"MBOX1C0",PMC45, MBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_0},
- {"MBOX2C0",PMC46, MBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_0},
- {"MBOX3C0",PMC47, MBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_0},
- {"MBOX0C1",PMC48, MBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_1},
- {"MBOX1C1",PMC49, MBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_1},
- {"MBOX2C1",PMC50, MBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_1},
- {"MBOX3C1",PMC51, MBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_1},
- {"MBOX0C2",PMC52, MBOX2, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_2},
- {"MBOX1C2",PMC53, MBOX2, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_2},
- {"MBOX2C2",PMC54, MBOX2, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_2},
- {"MBOX3C2",PMC55, MBOX2, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_2},
- {"MBOX0C3",PMC56, MBOX3, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_3},
- {"MBOX1C3",PMC57, MBOX3, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_3},
- {"MBOX2C3",PMC58, MBOX3, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_3},
- {"MBOX3C3",PMC59, MBOX3, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_3},
- {"MBOXFIX",PMC60, MBOXFIX, 0, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_UNC_MC_PMON_FIXED_CTL},
- /* QPI counters four 48bit wide per port, split in two reads */
- {"SBOX0P0",PMC61, SBOX0, PCI_UNC_QPI_PMON_CTL_0, PCI_UNC_QPI_PMON_CTR_0_A, PCI_UNC_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_0},
- {"SBOX1P0",PMC62, SBOX0, PCI_UNC_QPI_PMON_CTL_1, PCI_UNC_QPI_PMON_CTR_1_A, PCI_UNC_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_0},
- {"SBOX2P0",PMC63, SBOX0, PCI_UNC_QPI_PMON_CTL_2, PCI_UNC_QPI_PMON_CTR_2_A, PCI_UNC_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_0},
- {"SBOX3P0",PMC64, SBOX0, PCI_UNC_QPI_PMON_CTL_3, PCI_UNC_QPI_PMON_CTR_3_A, PCI_UNC_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_0},
- {"SBOX0P1",PMC65, SBOX1, PCI_UNC_QPI_PMON_CTL_0, PCI_UNC_QPI_PMON_CTR_0_A, PCI_UNC_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_1},
- {"SBOX1P1",PMC66, SBOX1, PCI_UNC_QPI_PMON_CTL_1, PCI_UNC_QPI_PMON_CTR_1_A, PCI_UNC_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_1},
- {"SBOX2P1",PMC67, SBOX1, PCI_UNC_QPI_PMON_CTL_2, PCI_UNC_QPI_PMON_CTR_2_A, PCI_UNC_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_1},
- {"SBOX3P1",PMC68, SBOX1, PCI_UNC_QPI_PMON_CTL_3, PCI_UNC_QPI_PMON_CTR_3_A, PCI_UNC_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_1}
+ {"MBOX0C0",PMC12, MBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_0},
+ {"MBOX0C1",PMC13, MBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_0},
+ {"MBOX0C2",PMC14, MBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_0},
+ {"MBOX0C3",PMC15, MBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_0},
+ {"MBOX1C0",PMC16, MBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_1},
+ {"MBOX1C1",PMC17, MBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_1},
+ {"MBOX1C2",PMC18, MBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_1},
+ {"MBOX1C3",PMC19, MBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_1},
+ {"MBOX2C0",PMC20, MBOX2, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_2},
+ {"MBOX2C1",PMC21, MBOX2, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_2},
+ {"MBOX2C2",PMC22, MBOX2, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_2},
+ {"MBOX2C3",PMC23, MBOX2, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_2},
+ {"MBOX3C0",PMC24, MBOX3, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_CH_3},
+ {"MBOX3C1",PMC25, MBOX3, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_CH_3},
+ {"MBOX3C2",PMC26, MBOX3, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_CH_3},
+ {"MBOX3C3",PMC27, MBOX3, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_CH_3},
+ {"MBOX0FIX",PMC28, MBOXFIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_CH_0},
+ {"MBOX1FIX",PMC29, MBOXFIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_CH_1},
+ {"MBOX2FIX",PMC30, MBOXFIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_CH_2},
+ {"MBOX3FIX",PMC31, MBOXFIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_CH_3},
};
diff --git a/src/includes/perfmon_sandybridge_events.txt b/src/includes/perfmon_sandybridge_events.txt
index 72f6009..ec4d397 100644
--- a/src/includes/perfmon_sandybridge_events.txt
+++ b/src/includes/perfmon_sandybridge_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for Intel SandyBridge
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
@@ -183,7 +183,10 @@ UMASK_IDQ_MS_MITE_UOPS 0x20
UMASK_IDQ_MS_UOPS 0x30
EVENT_ICACHE 0x80 PMC
+UMASK_ICACHE_HITS 0x01
UMASK_ICACHE_MISSES 0x02
+UMASK_ICACHE_ACCESSES 0x03
+UMASK_ICACHE_IFETCH_STALL 0x04
EVENT_ITLB_MISSES 0x85 PMC
UMASK_ITLB_MISSES_CAUSES_A_WALK 0x01
@@ -342,11 +345,17 @@ UMASK_MEM_UOP_RETIRED_STORES_LOCK 0x22
UMASK_MEM_UOP_RETIRED_LOADS_SPLIT 0x41
UMASK_MEM_UOP_RETIRED_STORES_SPLIT 0x42
-EVENT_MEMLOAD_UOPS_RETIRED 0xD1 PMC
-UMASK_MEMLOAD_UOPS_RETIRED_L1_HIT 0x01
-UMASK_MEMLOAD_UOPS_RETIRED_L2_HIT 0x02
-UMASK_MEMLOAD_UOPS_RETIRED_LLC_HIT 0x04
-UMASK_MEMLOAD_UOPS_RETIRED_HIT_LFB 0x40
+EVENT_MEM_LOAD_UOPS_RETIRED 0xD1 PMC
+UMASK_MEM_LOAD_UOPS_RETIRED_L1_HIT 0x01
+UMASK_MEM_LOAD_UOPS_RETIRED_L1_MISS 0x08
+UMASK_MEM_LOAD_UOPS_RETIRED_L1_ALL 0x09
+UMASK_MEM_LOAD_UOPS_RETIRED_L2_HIT 0x02
+UMASK_MEM_LOAD_UOPS_RETIRED_L2_MISS 0x10
+UMASK_MEM_LOAD_UOPS_RETIRED_L2_ALL 0x12
+UMASK_MEM_LOAD_UOPS_RETIRED_L3_HIT 0x04
+UMASK_MEM_LOAD_UOPS_RETIRED_L3_MISS 0x20
+UMASK_MEM_LOAD_UOPS_RETIRED_L3_ALL 0x24
+UMASK_MEM_LOAD_UOPS_RETIRED_HIT_LFB 0x40
EVENT_MEM_LOAD_UOPS_LLC_HIT_RETIRED 0xD2 PMC
UMASK_MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_MISS 0x01
@@ -406,243 +415,7 @@ EVENT_MEM_LOAD_UOPS_LLC_MISS_RETIRED 0xD3 PMC
UMASK_MEM_LOAD_UOPS_LLC_MISS_RETIRED_LOCAL_DRAM 0x01
UMASK_MEM_LOAD_UOPS_LLC_MISS_RETIRED_REMOTE_DRAM 0x04
-EVENT_CBO_CLOCKTICKS 0x00 CBOX
-UMASK_CBO_CLOCKTICKS 0x00
-
-EVENT_COUNTER0_OCCUPANCY 0x1F CBOX0
-UMASK_COUNTER0_OCCUPANCY 0x00
-
-EVENT_ISMQ_DRD_MISS_OCC 0x21 CBOX0|CBOX1
-UMASK_ISMQ_DRD_MISS_OCC 0x00
-
-EVENT_LLC_LOOKUP 0x34 CBOX0|CBOX1
-UMASK_LLC_LOOKUP_DATA_READ 0x03
-UMASK_LLC_LOOKUP_WRITE 0x05
-UMASK_LLC_LOOKUP_REMOTE_SNOOP 0x09
-UMASK_LLC_LOOKUP_NID 0x41
-
-EVENT_LLC_VICTIMS 0x37 CBOX0|CBOX1
-UMASK_LLC_VICTIMS_M_STATE 0x01
-UMASK_LLC_VICTIMS_E_STATE 0x02
-UMASK_LLC_VICTIMS_S_STATE 0x04
-UMASK_LLC_VICTIMS_MISS 0x08
-UMASK_LLC_VICTIMS_NID 0x40
-
-EVENT_CBO_MISC 0x39 CBOX0|CBOX1
-UMASK_CBO_MISC_RSPI_WAS_FSE 0x01
-UMASK_CBO_MISC_WC_ALIASING 0x02
-UMASK_CBO_MISC_STARTED 0x04
-UMASK_CBO_MISC_RFO_HIT_S 0x08
-
-EVENT_RING_AD_USED 0x1B CBOX2|CBOX3
-UMASK_RING_AD_USED_UP_EVEN 0x01
-UMASK_RING_AD_USED_UP_ODD 0x02
-UMASK_RING_AD_USED_DOWN_EVEN 0x04
-UMASK_RING_AD_USED_DOWN_ODD 0x08
-
-EVENT_RING_AK_USED 0x1C CBOX2|CBOX3
-UMASK_RING_AK_USED_UP_EVEN 0x01
-UMASK_RING_AK_USED_UP_ODD 0x02
-UMASK_RING_AK_USED_DOWN_EVEN 0x04
-UMASK_RING_AK_USED_DOWN_ODD 0x08
-
-EVENT_RING_BL_USED 0x1D CBOX2|CBOX3
-UMASK_RING_BL_USED_UP_EVEN 0x01
-UMASK_RING_BL_USED_UP_ODD 0x02
-UMASK_RING_BL_USED_DOWN_EVEN 0x04
-UMASK_RING_BL_USED_DOWN_ODD 0x08
-
-EVENT_RING_BOUNCES 0x05 CBOX0|CBOX1
-UMASK_RING_BOUNCES_AK_CORE 0x02
-UMASK_RING_BOUNCES_BL_CORE 0x04
-UMASK_RING_BOUNCES_IV_CORE 0x08
-
-EVENT_RING_IV_USED 0x1E CBOX2|CBOX3
-UMASK_RING_IV_USED_ANY 0x0F
-
-EVENT_RING_SRC_THRTL 0x05 CBOX0|CBOX1
-UMASK_RING_SRC_THRTL 0x07
-
-EVENT_RXR_EXT_STARVED 0x12 CBOX0|CBOX1
-UMASK_RXR_EXT_STARVED_IRQ 0x01
-UMASK_RXR_EXT_STARVED_IPQ 0x02
-UMASK_RXR_EXT_STARVED_ISMQ 0x04
-UMASK_RXR_EXT_STARVED_ISMQ_BIDS 0x08
-
-EVENT_RXR_INSERTS 0x13 CBOX0|CBOX1
-UMASK_RXR_INSERTS_IRQ 0x01
-UMASK_RXR_INSERTS_IRQ_REJECTED 0x02
-UMASK_RXR_INSERTS_IPQ 0x04
-UMASK_RXR_INSERTS_VFIFO 0x10
-
-EVENT_RXR_IPQ_RETRY 0x31 CBOX0|CBOX1
-UMASK_RXR_IPQ_RETRY_ANY 0x01
-UMASK_RXR_IPQ_RETRY_FULL 0x02
-UMASK_RXR_IPQ_RETRY_ADDR_CONFLICT 0x04
-UMASK_RXR_IPQ_RETRY_QPI_CREDITS 0x10
-
-EVENT_RXR_IRQ_RETRY 0x32 CBOX0|CBOX1
-UMASK_RXR_IRQ_RETRY_ANY 0x01
-UMASK_RXR_IRQ_RETRY_FULL 0x02
-UMASK_RXR_IRQ_RETRY_ADDR_CONFLICT 0x04
-UMASK_RXR_IRQ_RETRY_RTID 0x08
-UMASK_RXR_IRQ_RETRY_QPI_CREDITS 0x10
-
-EVENT_RXR_ISMQ_RETRY 0x33 CBOX0|CBOX1
-UMASK_RXR_ISMQ_RETRY_ANY 0x01
-UMASK_RXR_ISMQ_RETRY_FULL 0x02
-UMASK_RXR_ISMQ_RETRY_ADDR_CONFLICT 0x04
-UMASK_RXR_ISMQ_RETRY_RTID 0x08
-UMASK_RXR_ISMQ_RETRY_QPI_CREDITS 0x10
-
-EVENT_RXR_OCCUPANCY 0x11 CBOX0
-UMASK_RXR_OCCUPANCY_IRQ 0x01
-UMASK_RXR_OCCUPANCY_IRQ_REJECTED 0x02
-UMASK_RXR_OCCUPANCY_IPQ 0x04
-UMASK_RXR_OCCUPANCY_VIFO 0x10
-
-EVENT_TOR_INSERTS 0x35 CBOX1
-UMASK_TOR_INSERTS_OPCODE 0x01
-UMASK_TOR_INSERTS_EVICTION 0x04
-UMASK_TOR_INSERTS_WB 0x10
-UMASK_TOR_INSERTS_MISS_OPCODE 0x03
-UMASK_TOR_INSERTS_MISS_ALL 0x0A
-UMASK_TOR_INSERTS_NID_OPCODE 0x41
-UMASK_TOR_INSERTS_NID_EVICTION 0x44
-UMASK_TOR_INSERTS_NID_ALL 0x48
-UMASK_TOR_INSERTS_NID_WB 0x50
-UMASK_TOR_INSERTS_NID_MISS_OPCODE 0x43
-UMASK_TOR_INSERTS_NID_MISS_ALL 0x4A
-
-EVENT_TOR_OCCUPANCY 0x36 CBOX0
-UMASK_TOR_OCCUPANCY_OPCODE 0x01
-UMASK_TOR_OCCUPANCY_EVICTION 0x04
-UMASK_TOR_OCCUPANCY_ALL 0x08
-UMASK_TOR_OCCUPANCY_MISS_OPCODE 0x03
-UMASK_TOR_OCCUPANCY_MISS_ALL 0x0A
-UMASK_TOR_OCCUPANCY_NID_OPCODE 0x41
-UMASK_TOR_OCCUPANCY_NID_EVICTION 0x44
-UMASK_TOR_OCCUPANCY_NID_ALL 0x48
-UMASK_TOR_OCCUPANCY_NID_MISS_OPCODE 0x43
-UMASK_TOR_OCCUPANCY_NID_MISS_ALL 0x4A
-
-EVENT_TXT_ADS_USED 0x04 CBOX0|CBOX1
-UMASK_TXT_ADS_USED 0x00
-
-EVENT_TXT_INSERTS 0x02 CBOX0|CBOX1
-UMASK_TXT_INSERTS_AD_CACHE 0x01
-UMASK_TXT_INSERTS_AK_CACHE 0x02
-UMASK_TXT_INSERTS_BL_CACHE 0x04
-UMASK_TXT_INSERTS_IV_CACHE 0x08
-UMASK_TXT_INSERTS_AD_CORE 0x10
-UMASK_TXT_INSERTS_AK_CORE 0x20
-UMASK_TXT_INSERTS_BL_CORE 0x40
-
-EVENT_HA_CLOCKTICKS 0x00 BBOX
-UMASK_HA_CLOCKTICKS 0x00
-
-EVENT_CONFLICT_CYCLES 0x0B BBOX
-UMASK_CONFLICT_CYCLES_NO_CONFLICT 0x01
-UMASK_CONFLICT_CYCLES_CONFLICT 0x02
-
-EVENT_DIRECT2CORE_COUNT 0x11 BBOX
-UMASK_DIRECT2CORE_COUNT 0x00
-
-EVENT_DIRECT2CORE_CYCLES_DISABLED 0x12 BBOX
-UMASK_DIRECT2CORE_CYCLES_DISABLED 0x00
-
-EVENT_DIRECT2CORE_TXN_OVERRIDE 0x13 BBOX
-UMASK_DIRECT2CORE_TXN_OVERRIDE 0x00
-
-EVENT_DIRECTORY_LOOKUP 0x0C BBOX
-UMASK_DIRECTORY_LOOKUP_SNP 0x01
-UMASK_DIRECTORY_LOOKUP_NO_SNP 0x02
-
-EVENT_DIRECTORY_UPDATE 0x0D BBOX
-UMASK_DIRECTORY_UPDATE_SET 0x01
-UMASK_DIRECTORY_UPDATE_CLEAR 0x02
-UMASK_DIRECTORY_UPDATE_ANY 0x03
-
-EVENT_IGR_NO_CREDIT_CYCLES 0x22 BBOX
-UMASK_IGR_NO_CREDIT_CYCLES_AD_QPI0 0x01
-UMASK_IGR_NO_CREDIT_CYCLES_AD_QPI1 0x02
-UMASK_IGR_NO_CREDIT_CYCLES_BL_QPI0 0x04
-UMASK_IGR_NO_CREDIT_CYCLES_BL_QPI1 0x08
-
-EVENT_IMC_RETRY 0x1E BBOX
-UMASK_IMC_RETRY 0x00
-
-EVENT_IMC_WRITES 0x1A BBOX
-UMASK_IMC_WRITES_FULL 0x01
-UMASK_IMC_WRITES_PARTIAL 0x02
-UMASK_IMC_WRITES_FULL_ISOCH 0x04
-UMASK_IMC_WRITES_PARTIAL_ISOCH 0x08
-UMASK_IMC_WRITES_ALL 0x0F
-
-EVENT_REQUESTS 0x01 BBOX
-UMASK_REQUESTS_READS 0x03
-UMASK_REQUESTS_WRITES 0x0C
-
-EVENT_RPQ_CYCLES_NO_REG_CREDITS 0x15 BBOX
-UMASK_RPQ_CYCLES_NO_REG_CREDITS_CHN0 0x01
-UMASK_RPQ_CYCLES_NO_REG_CREDITS_CHN1 0x02
-UMASK_RPQ_CYCLES_NO_REG_CREDITS_CHN2 0x04
-UMASK_RPQ_CYCLES_NO_REG_CREDITS_CHN3 0x08
-UMASK_RPQ_CYCLES_NO_REG_CREDITS_ALL 0x0F
-
-EVENT_TAD_REQUESTS_G0 0x1B BBOX
-UMASK_TAD_REQUESTS_G0_REGION_0 0x01
-UMASK_TAD_REQUESTS_G0_REGION_1 0x02
-UMASK_TAD_REQUESTS_G0_REGION_2 0x04
-UMASK_TAD_REQUESTS_G0_REGION_3 0x08
-UMASK_TAD_REQUESTS_G0_REGION_4 0x10
-UMASK_TAD_REQUESTS_G0_REGION_5 0x20
-UMASK_TAD_REQUESTS_G0_REGION_6 0x40
-UMASK_TAD_REQUESTS_G0_REGION_7 0x80
-
-EVENT_TAD_REQUESTS_G1 0x1C BBOX
-UMASK_TAD_REQUESTS_G1_REGION_8 0x01
-UMASK_TAD_REQUESTS_G1_REGION_9 0x02
-UMASK_TAD_REQUESTS_G1_REGION_10 0x04
-UMASK_TAD_REQUESTS_G1_REGION_11 0x08
-
-EVENT_TRACKER_INSERTS 0x06 BBOX
-UMASK_TRACKER_INSERTS_ALL 0x03
-
-EVENT_TXR_AD 0x0F BBOX
-UMASK_TXR_AD_NDR 0x01
-UMASK_TXR_AD_SNP 0x02
-
-EVENT_TXR_AD_CYCLES_FULL 0x2A BBOX
-UMASK_TXR_AD_CYCLES_FULL_SCHED0 0x01
-UMASK_TXR_AD_CYCLES_FULL_SCHED1 0x02
-UMASK_TXR_AD_CYCLES_FULL_ALL 0x03
-
-EVENT_TXR_AK_CYCLES_FULL 0x32 BBOX
-UMASK_TXR_AK_CYCLES_FULL_SCHED0 0x01
-UMASK_TXR_AK_CYCLES_FULL_SCHED1 0x02
-UMASK_TXR_AK_CYCLES_FULL_ALL 0x03
-
-EVENT_TXR_AK_NDR 0x0E BBOX
-UMASK_TXR_AK_NDR 0x00
-
-EVENT_TXR_BL 0x10 BBOX
-UMASK_TXR_BL_DRS_CACHE 0x01
-UMASK_TXR_BL_DRS_CORE 0x02
-UMASK_TXR_BL_DRS_QPI 0x04
-
-EVENT_TXR_BL_CYCLES_FULL 0x36 BBOX
-UMASK_TXR_BL_CYCLES_FULL_SCHED0 0x01
-UMASK_TXR_BL_CYCLES_FULL_SCHED1 0x02
-UMASK_TXR_BL_CYCLES_FULL_ALL 0x03
-
-EVENT_WPQ_CYCLES_NO_REG_CREDITS 0x18 BBOX
-UMASK_WPQ_CYCLES_NO_REG_CREDITS_CHN0 0x01
-UMASK_WPQ_CYCLES_NO_REG_CREDITS_CHN1 0x02
-UMASK_WPQ_CYCLES_NO_REG_CREDITS_CHN2 0x04
-UMASK_WPQ_CYCLES_NO_REG_CREDITS_CHN3 0x08
-
-EVENT_DRAM_CLOCKTICKS 0x00 MBOXFIX
+EVENT_DRAM_CLOCKTICKS 0x00 MBOX0FIX|MBOX1FIX|MBOX2FIX|MBOX3FIX
UMASK_DRAM_CLOCKTICKS 0x00
EVENT_ACT_COUNT 0x01 MBOX
@@ -742,437 +515,3 @@ UMASK_WPQ_READ_HIT 0x00
EVENT_WPQ_WRITE_HIT 0x24 MBOX
UMASK_WPQ_WRITE_HIT 0x00
-
-EVENT_PCU_CLOCKTICKS 0x00 WBOX
-UMASK_PCU_CLOCKTICKS 0x00
-
-EVENT_CORE0_TRANSITION_CYCLES 0x03 WBOX
-UMASK_CORE0_TRANSITION_CYCLES 0x00
-
-EVENT_CORE1_TRANSITION_CYCLES 0x04 WBOX
-UMASK_CORE1_TRANSITION_CYCLES 0x00
-
-EVENT_CORE2_TRANSITION_CYCLES 0x05 WBOX
-UMASK_CORE2_TRANSITION_CYCLES 0x00
-
-EVENT_CORE3_TRANSITION_CYCLES 0x06 WBOX
-UMASK_CORE3_TRANSITION_CYCLES 0x00
-
-EVENT_CORE4_TRANSITION_CYCLES 0x07 WBOX
-UMASK_CORE4_TRANSITION_CYCLES 0x00
-
-EVENT_CORE5_TRANSITION_CYCLES 0x08 WBOX
-UMASK_CORE5_TRANSITION_CYCLES 0x00
-
-EVENT_CORE6_TRANSITION_CYCLES 0x09 WBOX
-UMASK_CORE6_TRANSITION_CYCLES 0x00
-
-EVENT_CORE7_TRANSITION_CYCLES 0x0A WBOX
-UMASK_CORE7_TRANSITION_CYCLES 0x00
-
-EVENT_DEMOTIONS_CORE0 0x1E WBOX
-UMASK_DEMOTIONS_CORE0 0x00
-
-EVENT_DEMOTIONS_CORE1 0x1F WBOX
-UMASK_DEMOTIONS_CORE1 0x00
-
-EVENT_DEMOTIONS_CORE2 0x20 WBOX
-UMASK_DEMOTIONS_CORE2 0x00
-
-EVENT_DEMOTIONS_CORE3 0x21 WBOX
-UMASK_DEMOTIONS_CORE3 0x00
-
-EVENT_DEMOTIONS_CORE4 0x22 WBOX
-UMASK_DEMOTIONS_CORE4 0x00
-
-EVENT_DEMOTIONS_CORE5 0x23 WBOX
-UMASK_DEMOTIONS_CORE5 0x00
-
-EVENT_DEMOTIONS_CORE6 0x24 WBOX
-UMASK_DEMOTIONS_CORE6 0x00
-
-EVENT_DEMOTIONS_CORE7 0x25 WBOX
-UMASK_DEMOTIONS_CORE7 0x00
-
-EVENT_FREQ_BAND0_CYCLES 0x0B WBOX
-UMASK_FREQ_BAND0_CYCLES 0x00
-
-EVENT_FREQ_BAND1_CYCLES 0x0C WBOX
-UMASK_FREQ_BAND1_CYCLES 0x00
-
-EVENT_FREQ_BAND2_CYCLES 0x0D WBOX
-UMASK_FREQ_BAND2_CYCLES 0x00
-
-EVENT_FREQ_BAND3_CYCLES 0x0E WBOX
-UMASK_FREQ_BAND3_CYCLES 0x00
-
-EVENT_FREQ_MAX_CURRENT_CYCLES 0x07 WBOX
-UMASK_FREQ_MAX_CURRENT_CYCLES 0x00
-
-EVENT_FREQ_MAX_LIMIT_THERMAL_CYCLES 0x04 WBOX
-UMASK_FREQ_MAX_LIMIT_THERMAL_CYCLES 0x00
-
-EVENT_FREQ_MAX_POWER_CYCLES 0x05 WBOX
-UMASK_FREQ_MAX_POWER_CYCLES 0x00
-
-EVENT_FREQ_MAX_OS_CYCLES 0x06 WBOX
-UMASK_FREQ_MAX_OS_CYCLES 0x00
-
-EVENT_FREQ_MIN_IO_P_CYCLES 0x01 WBOX
-UMASK_FREQ_MIN_IO_P_CYCLES 0x00
-
-EVENT_FREQ_MIN_PERF_P_CYCLES 0x02 WBOX
-UMASK_FREQ_MIN_PERF_P_CYCLES 0x00
-
-EVENT_FREQ_TRANS_CYCLES 0x00 WBOX
-UMASK_FREQ_TRANS_CYCLES 0x00
-
-EVENT_MEMORY_PHASE_SHEDDING_CYCLES 0x2F WBOX
-UMASK_MEMORY_PHASE_SHEDDING_CYCLES 0x00
-
-EVENT_POWER_STATE_OCCUPANCY 0x80 WBOX
-UMASK_POWER_STATE_OCCUPANCY_CORES_C0 0x40
-UMASK_POWER_STATE_OCCUPANCY_CORES_C3 0x80
-UMASK_POWER_STATE_OCCUPANCY_CORES_C6 0xC0
-
-EVENT_PROCHOT_EXTERNAL_CYCLES 0x0A WBOX
-UMASK_PROCHOT_EXTERNAL_CYCLES 0x00
-
-EVENT_PROCHOT_INTERNAL_CYCLES 0x09 WBOX
-UMASK_PROCHOT_INTERNAL_CYCLES 0x00
-
-EVENT_TOTAL_TRANSITION_CYCLES 0x0B WBOX
-UMASK_TOTAL_TRANSITION_CYCLES 0x00
-
-EVENT_VOLT_TRANS_CYCLES_CHANGE 0x03 WBOX
-UMASK_VOLT_TRANS_CYCLES_CHANGE 0x00
-
-EVENT_VOLT_TRANS_CYCLES_DECREASE 0x02 WBOX
-UMASK_VOLT_TRANS_CYCLES_DECREASE 0x00
-
-EVENT_VOLT_TRANS_CYCLES_INCREASE 0x01 WBOX
-UMASK_VOLT_TRANS_CYCLES_INCREASE 0x00
-
-EVENT_VR_HOT_CYCLES 0x32 WBOX
-UMASK_VR_HOT_CYCLES 0x00
-
-EVENT_QPI_LL_RATE 0xFF SBOX
-UMASK_QPI_LL_RATE 0x00
-
-
-EVENT_QPI_LL_CLOCKTICKS 0x14 SBOX
-UMASK_QPI_LL_CLOCKTICKS 0x00
-
-EVENT_CTO_COUNT 0x38 SBOX
-UMASK_CTO_COUNT_ANY_DATAC_TO_NODE0 0x1C 0xF8 0x00
-UMASK_CTO_COUNT_ANY_DATAC_TO_NODE1 0x1C 0xF8 0x01
-UMASK_CTO_COUNT_ANY_DATAC_TO_NODE2 0x1C 0xF8 0x02
-UMASK_CTO_COUNT_ANY_DATAC_TO_NODE3 0x1C 0xF8 0x03
-UMASK_CTO_COUNT_WRITE_TO_NODE0 0x1C 0xE0 0x00
-UMASK_CTO_COUNT_WRITE_TO_NODE1 0x1C 0xE0 0x01
-UMASK_CTO_COUNT_WRITE_TO_NODE2 0x1C 0xE0 0x02
-UMASK_CTO_COUNT_WRITE_TO_NODE3 0x1C 0xE0 0x03
-UMASK_CTO_COUNT_NCB_ANY_TO_NODE0 0x18 0xE0 0x00
-UMASK_CTO_COUNT_NCB_ANY_TO_NODE1 0x18 0xE0 0x01
-UMASK_CTO_COUNT_NCB_ANY_TO_NODE2 0x18 0xE0 0x02
-UMASK_CTO_COUNT_NCB_ANY_TO_NODE3 0x18 0xE0 0x03
-UMASK_CTO_COUNT_NCB_INTR_TO_NODE0 0x19 0xF8 0x00
-UMASK_CTO_COUNT_NCB_INTR_TO_NODE1 0x19 0xF8 0x01
-UMASK_CTO_COUNT_NCB_INTR_TO_NODE2 0x19 0xF8 0x02
-UMASK_CTO_COUNT_NCB_INTR_TO_NODE3 0x19 0xF8 0x03
-
-EVENT_DIRECT2CORE 0x13 SBOX
-UMASK_DIRECT2CORE_SUCCESS 0x01
-UMASK_DIRECT2CORE_FAILURE_CREDITS 0x02
-UMASK_DIRECT2CORE_FAILURE_RBT 0x04
-UMASK_DIRECT2COREFAILURE_CREDIRTS_RBT 0x08
-
-EVENT_L1_POWER_CYCLES 0x12 SBOX
-UMASK_L1_POWER_CYCLES 0x00
-
-EVENT_RXL0P_POWER_CYCLES 0x10 SBOX
-UMASK_RXL0P_POWER_CYCLES 0x00
-
-EVENT_RXL0_POWER_CYCLES 0x0F SBOX
-UMASK_RXL0_POWER_CYCLES 0x00
-
-EVENT_RXL_BYPASSED 0x09 SBOX
-UMASK_RXL_BYPASSED 0x00
-
-EVENT_RXL_CREDITS_CONSUMED_VN0 0x1E SBOX
-UMASK_RXL_CREDITS_CONSUMED_VN0_DRS 0x01
-UMASK_RXL_CREDITS_CONSUMED_VN0_NCB 0x02
-UMASK_RXL_CREDITS_CONSUMED_VN0_NCS 0x04
-UMASK_RXL_CREDITS_CONSUMED_VN0_HOM 0x08
-UMASK_RXL_CREDITS_CONSUMED_VN0_SNP 0x10
-UMASK_RXL_CREDITS_CONSUMED_VN0_NDR 0x20
-
-EVENT_RXL_CREDITS_CONSUMED_VNA 0x1D SBOX
-UMASK_RXL_CREDITS_CONSUMED_VNA 0x00
-
-EVENT_RXL_FLITS_G0 0x01 SBOX
-UMASK_RXL_FLITS_G0_IDLE 0x01
-UMASK_RXL_FLITS_G0_DATA 0x02
-UMASK_RXL_FLITS_G0_NON_DATA 0x04
-
-EVENT_RXL_FLITS_G1 0x02 SBOX
-UMASK_RXL_FLITS_G1_SNP 0x01
-UMASK_RXL_FLITS_G1_HOM_REQ 0x02
-UMASK_RXL_FLITS_G1_HOM_NONREQ 0x04
-UMASK_RXL_FLITS_G1_HOM 0x06
-UMASK_RXL_FLITS_G1_DRS_DATA 0x08
-UMASK_RXL_FLITS_G1_DRS_NONDATA 0x10
-UMASK_RXL_FLITS_G1_DRS 0x60
-
-EVENT_RXL_FLITS_G2 0x03 SBOX
-UMASK_RXL_FLITS_G2_NDR_AD 0x01
-UMASK_RXL_FLITS_G2_NDR_AK 0x02
-UMASK_RXL_FLITS_G2_NCB_DATA 0x04
-UMASK_RXL_FLITS_G2_NCB_NODATA 0x08
-UMASK_RXL_FLITS_G2_NCB 0x06
-UMASK_RXL_FLITS_G2_NCS 0x10
-
-EVENT_RXL_INSERTS 0x08 SBOX
-UMASK_RXL_INSERTS 0x00
-
-EVENT_RXL_INSERTS_DRS 0x09 SBOX
-UMASK_RXL_INSERTS_DRS 0x00
-
-EVENT_RXL_INSERTS_HOM 0x0C SBOX
-UMASK_RXL_INSERTS_HOM 0x00
-
-EVENT_RXL_INSERTS_NCB 0x0A SBOX
-UMASK_RXL_INSERTS_NCB 0x00
-
-EVENT_RXL_INSERTS_NCS 0x0B SBOX
-UMASK_RXL_INSERTS_NCS 0x00
-
-EVENT_RXL_INSERTS_NDR 0x0E SBOX
-UMASK_RXL_INSERTS_NDR 0x00
-
-EVENT_RXL_INSERTS_SNP 0x0D SBOX
-UMASK_RXL_INSERTS_SNP 0x00
-
-EVENT_RXL_OCCUPANCY 0x0B SBOX
-UMASK_RXL_OCCUPANCY 0x00
-
-EVENT_RXL_OCCUPANCY_DRS 0x15 SBOX
-UMASK_RXL_OCCUPANCY_DRS 0x00
-
-EVENT_RXL_OCCUPANCY_HOM 0x18 SBOX
-UMASK_RXL_OCCUPANCY_HOM 0x00
-
-EVENT_RXL_OCCUPANCY_NCB 0x16 SBOX
-UMASK_RXL_OCCUPANCY_NCB 0x00
-
-EVENT_RXL_OCCUPANCY_NCS 0x17 SBOX
-UMASK_RXL_OCCUPANCY_NCS 0x00
-
-EVENT_RXL_OCCUPANCY_NDR 0x1A SBOX
-UMASK_RXL_OCCUPANCY_NDR 0x00
-
-EVENT_RXL_OCCUPANCY_SNP 0x19 SBOX
-UMASK_RXL_OCCUPANCY_SNP 0x00
-
-EVENT_TXL0P_POWER_CYCLES 0x0D SBOX
-UMASK_TXL0P_POWER_CYCLES 0x00
-
-EVENT_TXL0_POWER_CYCLES 0x0C SBOX
-UMASK_TXL0_POWER_CYCLES 0x00
-
-EVENT_TXL_BYPASSED 0x05 SBOX
-UMASK_TXL_BYPASSED 0x00
-
-EVENT_TXL_CYCLES_NE 0x06 SBOX
-UMASK_TXL_CYCLES_NE 0x00
-
-EVENT_TXL_FLITS_G0 0x00 SBOX
-UMASK_TXL_FLITS_G0_IDLE 0x01
-UMASK_TXL_FLITS_G0_DATA 0x02
-UMASK_TXL_FLITS_G0_NON_DATA 0x04
-
-EVENT_TXL_FLITS_G1 0x00 SBOX
-UMASK_TXL_FLITS_G1_SNP 0x01
-UMASK_TXL_FLITS_G1_HOM_REQ 0x02
-UMASK_TXL_FLITS_G1_HOM_NONREQ 0x04
-UMASK_TXL_FLITS_G1_HOM 0x06
-UMASK_TXL_FLITS_G1_DRS_DATA 0x08
-UMASK_TXL_FLITS_G1_DRS_NONDATA 0x10
-UMASK_TXL_FLITS_G1_DRS 0x60
-
-EVENT_TXL_FLITS_G2 0x01 SBOX
-UMASK_TXL_FLITS_G2_NDR_AD 0x01
-UMASK_TXL_FLITS_G2_NDR_AK 0x02
-UMASK_TXL_FLITS_G2_NCB_DATA 0x04
-UMASK_TXL_FLITS_G2_NCB_NODATA 0x08
-UMASK_TXL_FLITS_G2_NCB 0x06
-UMASK_TXL_FLITS_G2_NCS 0x10
-
-EVENT_TXL_INSERTS 0x04 SBOX
-UMASK_TXL_INSERTS 0x00
-
-EVENT_TXL_OCCUPANCY 0x07 SBOX
-UMASK_TXL_OCCUPANCY 0x00
-
-EVENT_CREDIT_RETURNS 0x1C SBOX
-UMASK_CREDIT_RETURNS 0x00
-
-EVENT_CREDIT_RETURN_OCCUPANCY 0x1B SBOX
-UMASK_CREDIT_RETURN_OCCUPANCY 0x00
-
-EVENT_R2PCIE_CLOCKTICKS 0x01 PBOX
-UMASK_R2PCIE_CLOCKTICKS 0x00
-
-EVENT_RING_AD_USED 0x07 PBOX
-UMASK_RING_AD_USED_CW_EVEN 0x01
-UMASK_RING_AD_USED_CW_ODD 0x02
-UMASK_RING_AD_USED_CCW_EVEN 0x04
-UMASK_RING_AD_USED_CCW_EVEN 0x08
-
-EVENT_RING_AK_USED 0x08 PBOX
-UMASK_RING_AK_USED_CW_EVEN 0x01
-UMASK_RING_AK_USED_CW_ODD 0x02
-UMASK_RING_AK_USED_CCW_EVEN 0x04
-UMASK_RING_AK_USED_CCW_EVEN 0x08
-
-EVENT_RING_BL_USED 0x09 PBOX
-UMASK_RING_BL_USED_CW_EVEN 0x01
-UMASK_RING_BL_USED_CW_ODD 0x02
-UMASK_RING_BL_USED_CCW_EVEN 0x04
-UMASK_RING_BL_USED_CCW_EVEN 0x08
-
-EVENT_RING_IV_USED 0x0A PBOX
-UMASK_RING_IV_USED_ANY 0x0F
-
-EVENT_RXR_AK_BOUNCES 0x12 PBOX0
-UMASK_RXR_AK_BOUNCES 0x00
-
-EVENT_RXR_CYCLES_NE 0x10 PBOX0|PBOX1
-UMASK_RXR_CYCLES_NE_DRS 0x08
-UMASK_RXR_CYCLES_NE_NCB 0x10
-UMASK_RXR_CYCLES_NE_NCS 0x20
-
-EVENT_TXR_CYCLES_FULL 0x25 PBOX0
-UMASK_TXR_CYCLES_FULL_AD 0x01
-UMASK_TXR_CYCLES_FULL_AK 0x02
-UMASK_TXR_CYCLES_FULL_BL 0x04
-
-EVENT_TXR_CYCLES_NE 0x23 PBOX0
-UMASK_TXR_CYCLES_NE_AD 0x01
-UMASK_TXR_CYCLES_NE_AK 0x02
-UMASK_TXR_CYCLES_NE_BL 0x04
-
-EVENT_TXR_INSERTS 0x24 PBOX0
-UMASK_TXR_INSERTS 0x00
-
-EVENT_R3QPI_CLOCKTICKS 0x01 RBOX
-UMASK_R3QPI_CLOCKTICKS 0x00
-
-EVENT_IIO_CREDITS_ACQUIRED 0x20 RBOX
-UMASK_IIO_CREDITS_ACQUIRED_DRS 0x08
-UMASK_IIO_CREDITS_ACQUIRED_NCB 0x10
-UMASK_IIO_CREDITS_ACQUIRED_NCS 0x20
-
-EVENT_IIO_CREDITS_REJECT 0x21 RBOX
-UMASK_IIO_CREDITS_REJECT_DRS 0x08
-UMASK_IIO_CREDITS_REJECT_NCB 0x10
-UMASK_IIO_CREDITS_REJECT_NCS 0x20
-
-EVENT_IIO_CREDITS_USED 0x22 RBOX
-UMASK_IIO_CREDITS_USED_DRS 0x08
-UMASK_IIO_CREDITS_USED_NCB 0x10
-UMASK_IIO_CREDITS_USED_NCS 0x20
-
-EVENT_RING_AD_USED 0x07 RBOX
-UMASK_RING_AD_USED_CW_EVEN 0x01
-UMASK_RING_AD_USED_CW_ODD 0x02
-UMASK_RING_AD_USED_CCW_EVEN 0x04
-UMASK_RING_AD_USED_CCW_ODD 0x08
-
-EVENT_RING_AK_USED 0x08 RBOX
-UMASK_RING_AK_USED_CW_EVEN 0x01
-UMASK_RING_AK_USED_CW_ODD 0x02
-UMASK_RING_AK_USED_CCW_EVEN 0x04
-UMASK_RING_AK_USED_CCW_ODD 0x08
-
-EVENT_RING_BL_USED 0x09 RBOX
-UMASK_RING_BL_USED_CW_EVEN 0x01
-UMASK_RING_BL_USED_CW_ODD 0x02
-UMASK_RING_BL_USED_CCW_EVEN 0x04
-UMASK_RING_BL_USED_CCW_ODD 0x08
-
-EVENT_RING_IV_USED 0x0A RBOX
-UMASK_RING_IV_USED_ANY 0x0F
-
-EVENT_RXR_BYPASSED 0x12 RBOX
-UMASK_RXR_BYPASSED 0x00
-
-EVENT_RXR_CYCLES_NE 0x10 RBOX
-UMASK_RXR_CYCLES_NE_HOM 0x01
-UMASK_RXR_CYCLES_NE_SNP 0x02
-UMASK_RXR_CYCLES_NE_NDR 0x04
-UMASK_RXR_CYCLES_NE_DRS 0x08
-UMASK_RXR_CYCLES_NE_NCB 0x10
-UMASK_RXR_CYCLES_NE_NCS 0x20
-
-EVENT_RXR_INSERTS 0x10 RBOX
-UMASK_RXR_INSERTS_HOM 0x01
-UMASK_RXR_INSERTS_SNP 0x02
-UMASK_RXR_INSERTS_NDR 0x04
-UMASK_RXR_INSERTS_DRS 0x08
-UMASK_RXR_INSERTS_NCB 0x10
-UMASK_RXR_INSERTS_NCS 0x20
-
-EVENT_RXR_OCCUPANCY 0x13 RBOX
-UMASK_RXR_OCCUPANCY_HOM 0x01
-UMASK_RXR_OCCUPANCY_SNP 0x02
-UMASK_RXR_OCCUPANCY_NDR 0x04
-UMASK_RXR_OCCUPANCY_DRS 0x08
-UMASK_RXR_OCCUPANCY_NCB 0x10
-UMASK_RXR_OCCUPANCY_NCS 0x20
-
-EVENT_TXR_CYCLES_FULL 0x25 RBOX
-UMASK_TXR_CYCLES_FULL 0x00
-
-EVENT_TXR_CYCLES_NE 0x23 RBOX
-UMASK_TXR_CYCLES_NE 0x00
-
-EVENT_TXR_INSERTS 0x24 RBOX
-UMASK_TXR_INSERTS 0x00
-
-EVENT_TXR_NACK 0x26 RBOX
-UMASK_TXR_NACK 0x00
-
-EVENT_VN0_CREDITS_REJECT 0x37 RBOX
-UMASK_VN0_CREDITS_REJECT_HOM 0x01
-UMASK_VN0_CREDITS_REJECT_SNP 0x02
-UMASK_VN0_CREDITS_REJECT_NDR 0x04
-UMASK_VN0_CREDITS_REJECT_DRS 0x08
-UMASK_VN0_CREDITS_REJECT_NCB 0x10
-UMASK_VN0_CREDITS_REJECT_NCS 0x20
-
-EVENT_VN0_CREDITS_USED 0x36 RBOX
-UMASK_VN0_CREDITS_USED_HOM 0x01
-UMASK_VN0_CREDITS_USED_SNP 0x02
-UMASK_VN0_CREDITS_USED_NDR 0x04
-UMASK_VN0_CREDITS_USED_DRS 0x08
-UMASK_VN0_CREDITS_USED_NCB 0x10
-UMASK_VN0_CREDITS_USED_NCS 0x20
-
-EVENT_VNA_CREDITS_ACQUIRED 0x33 RBOX
-UMASK_VNA_CREDITS_ACQUIRED 0x00
-
-EVENT_VNA_CREDITS_REJECT 0x34 RBOX
-UMASK_VNA_CREDITS_REJECT_HOM 0x01
-UMASK_VNA_CREDITS_REJECT_SNP 0x02
-UMASK_VNA_CREDITS_REJECT_NDR 0x04
-UMASK_VNA_CREDITS_REJECT_DRS 0x08
-UMASK_VNA_CREDITS_REJECT_NCB 0x10
-UMASK_VNA_CREDITS_REJECT_NCS 0x20
-
-EVENT_VNA_CREDITS_CYCLES_OUT 0x31 RBOX
-UMASK_VNA_CREDITS_CYCLES_OUT 0x00
-
-EVENT_VNA_CREDITS_CYCLES_USED 0x32 RBOX
-UMASK_VNA_CREDITS_CYCLESUSED 0x00
-
diff --git a/src/includes/perfmon_haswell.h b/src/includes/perfmon_silvermont.h
similarity index 63%
copy from src/includes/perfmon_haswell.h
copy to src/includes/perfmon_silvermont.h
index 0352476..9cfd6f1 100644
--- a/src/includes/perfmon_haswell.h
+++ b/src/includes/perfmon_silvermont.h
@@ -1,12 +1,12 @@
/*
* =======================================================================================
*
- * Filename: perfmon_haswell.h
+ * Filename: perfmon_silvermont.h
*
- * Description: Header File of perfmon module for Haswell.
+ * Description: Header file of perfmon module for Intel Atom Silvermont
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -27,95 +27,58 @@
*
* =======================================================================================
*/
+
+#include <perfmon_silvermont_events.h>
+#include <perfmon_silvermont_groups.h>
+#include <perfmon_silvermont_counters.h>
-#include <perfmon_haswell_events.h>
-#include <perfmon_haswell_groups.h>
-#include <perfmon_haswell_counters.h>
+static int perfmon_numCountersSilvermont = NUM_COUNTERS_SILVERMONT;
+static int perfmon_numGroupsSilvermont = NUM_GROUPS_SILVERMONT;
+static int perfmon_numArchEventsSilvermont = NUM_ARCH_EVENTS_SILVERMONT;
-static int perfmon_numCountersHaswell = NUM_COUNTERS_HASWELL;
-static int perfmon_numGroupsHaswell = NUM_GROUPS_HASWELL;
-static int perfmon_numArchEventsHaswell = NUM_ARCH_EVENTS_HASWELL;
-
-#define OFFSET_PMC 3
-
-void perfmon_init_haswell(PerfmonThread *thread)
+void perfmon_init_silvermont(PerfmonThread *thread)
{
uint64_t flags = 0x0ULL;
int cpu_id = thread->processorId;
+ lock_acquire((int*) &socket_lock[affinity_core2node_lookup[cpu_id]], cpu_id);
/* Initialize registers */
- msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, 0x0ULL);
msr_write(cpu_id, MSR_PERFEVTSEL0, 0x0ULL);
msr_write(cpu_id, MSR_PERFEVTSEL1, 0x0ULL);
- msr_write(cpu_id, MSR_PERFEVTSEL2, 0x0ULL);
- msr_write(cpu_id, MSR_PERFEVTSEL3, 0x0ULL);
- msr_write(cpu_id, MSR_PMC0, 0x0ULL);
- msr_write(cpu_id, MSR_PMC1, 0x0ULL);
- msr_write(cpu_id, MSR_PMC2, 0x0ULL);
- msr_write(cpu_id, MSR_PMC3, 0x0ULL);
- msr_write(cpu_id, MSR_PERF_FIXED_CTR0, 0x0ULL);
- msr_write(cpu_id, MSR_PERF_FIXED_CTR1, 0x0ULL);
- msr_write(cpu_id, MSR_PERF_FIXED_CTR2, 0x0ULL);
+ msr_write(cpu_id, MSR_OFFCORE_RESP0, 0x0ULL);
+ msr_write(cpu_id, MSR_OFFCORE_RESP1, 0x0ULL);
+
+ msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, 0x0ULL);
msr_write(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL);
- msr_write(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, 0x0ULL);
msr_write(cpu_id, MSR_PEBS_ENABLE, 0x0ULL);
-
- /* initialize fixed counters
- * FIXED 0: Instructions retired
- * FIXED 1: Clocks unhalted core
- * FIXED 2: Clocks unhalted ref */
- msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, 0x222ULL);
-
- /* Preinit of PERFEVSEL registers */
- flags |= (1<<22); /* enable flag */
- flags |= (1<<16); /* user mode flag */
-
- msr_write(cpu_id, MSR_PERFEVTSEL0, flags);
- msr_write(cpu_id, MSR_PERFEVTSEL1, flags);
- msr_write(cpu_id, MSR_PERFEVTSEL2, flags);
- msr_write(cpu_id, MSR_PERFEVTSEL3, flags);
-
- if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id) ||
- lock_acquire((int*) &socket_lock[affinity_core2node_lookup[cpu_id]], cpu_id))
- {
-
- }
-
}
-void perfmon_setupCounterThread_haswell(
+void perfmon_setupCounterThread_silvermont(
int thread_id,
PerfmonEvent* event,
PerfmonCounterIndex index)
{
int haveLock = 0;
- uint64_t flags;
+ uint64_t flags = 0x0ULL;
uint32_t uflags;
- uint64_t reg = haswell_counter_map[index].configRegister;
+ uint64_t reg = silvermont_counter_map[index].configRegister;
int cpu_id = perfmon_threadData[thread_id].processorId;
+ uint64_t fixed_flags = msr_read(cpu_id, MSR_PERF_FIXED_CTR_CTRL);
+ uint64_t orig_fixed_flags = fixed_flags;
perfmon_threadData[thread_id].counters[index].init = TRUE;
- if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id))
- {
- haveLock = 1;
- }
-
- switch (haswell_counter_map[index].type)
+ switch (silvermont_counter_map[index].type)
{
case PMC:
- flags = msr_read(cpu_id,reg);
+ flags = (1<<16)|(1<<22);
flags &= ~(0xFFFFU); /* clear lower 16bits */
/* Intel with standard 8 bit event mask: [7:0] */
flags |= (event->umask<<8) + event->eventId;
- if (event->cfgBits != 0) /* set custom cfg and cmask */
- {
- flags &= ~(0xFFFFU<<16); /* clear upper 16bits */
- flags |= ((event->cmask<<8) + event->cfgBits)<<16;
- }
+
if (perfmon_verbose)
{
@@ -124,11 +87,32 @@ void perfmon_setupCounterThread_haswell(
LLU_CAST reg,
LLU_CAST flags);
}
-
msr_write(cpu_id, reg , flags);
+
+ // Offcore event with additional configuration register
+ // We included the additional register as counterRegister2
+ // to avoid creating a new data structure
+ // cfgBits contain offset of "request type" bit
+ // cmask contain offset of "response type" bit
+ if (event->eventId == 0xB7)
+ {
+ if (event->umask == 0x01)
+ {
+ reg = MSR_OFFCORE_RESP0;
+ }
+ else if (event->umask == 0x02)
+ {
+ reg = MSR_OFFCORE_RESP1;
+ }
+ flags = 0x0ULL;
+ flags = (1<<event->cfgBits)|(1<<event->cmask);
+ msr_write(cpu_id, reg , flags);
+ }
+
break;
case FIXED:
+ fixed_flags |= (2ULL<<(index*4));
break;
case POWER:
@@ -138,13 +122,19 @@ void perfmon_setupCounterThread_haswell(
/* should never be reached */
break;
}
+ if (fixed_flags != orig_fixed_flags)
+ {
+ msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, fixed_flags);
+ }
}
-void perfmon_startCountersThread_haswell(int thread_id)
+
+void perfmon_startCountersThread_silvermont(int thread_id)
{
int haveLock = 0;
uint64_t flags = 0x0ULL;
uint32_t uflags = 0x10000UL; /* Clear freeze bit */
+ uint64_t fixed_flags = 0x0ULL;
int cpu_id = perfmon_threadData[thread_id].processorId;
if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id))
@@ -154,19 +144,19 @@ void perfmon_startCountersThread_haswell(int thread_id)
msr_write(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL);
- for ( int i=0; i<perfmon_numCountersHaswell; i++ )
+ for ( int i=0; i<perfmon_numCountersSilvermont; i++ )
{
if (perfmon_threadData[thread_id].counters[i].init == TRUE)
{
- switch (haswell_counter_map[i].type)
+ switch (silvermont_counter_map[i].type)
{
case PMC:
- msr_write(cpu_id, haswell_counter_map[i].counterRegister, 0x0ULL);
+ msr_write(cpu_id, silvermont_counter_map[i].counterRegister, 0x0ULL);
flags |= (1<<(i-OFFSET_PMC)); /* enable counter */
break;
case FIXED:
- msr_write(cpu_id, haswell_counter_map[i].counterRegister, 0x0ULL);
+ msr_write(cpu_id, silvermont_counter_map[i].counterRegister, 0x0ULL);
flags |= (1ULL<<(i+32)); /* enable fixed counter */
break;
@@ -174,7 +164,7 @@ void perfmon_startCountersThread_haswell(int thread_id)
if(haveLock)
{
perfmon_threadData[thread_id].counters[i].counterData =
- power_read(cpu_id, haswell_counter_map[i].counterRegister);
+ power_read(cpu_id, silvermont_counter_map[i].counterRegister);
}
break;
@@ -193,12 +183,14 @@ void perfmon_startCountersThread_haswell(int thread_id)
printf("perfmon_start_counters: Write Register 0x%X , \
Flags: 0x%llX \n",MSR_UNCORE_PERF_GLOBAL_CTRL, LLU_CAST uflags);
}
-
- msr_write(cpu_id, MSR_PERF_GLOBAL_CTRL, flags);
- msr_write(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, 0x30000000FULL);
+ if (flags != 0x0ULL)
+ {
+ msr_write(cpu_id, MSR_PERF_GLOBAL_CTRL, flags);
+ }
}
-void perfmon_stopCountersThread_haswell(int thread_id)
+
+void perfmon_stopCountersThread_silvermont(int thread_id)
{
uint64_t flags;
uint32_t uflags = 0x10100UL; /* Set freeze bit */
@@ -213,17 +205,17 @@ void perfmon_stopCountersThread_haswell(int thread_id)
msr_write(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL);
- for ( int i=0; i < perfmon_numCountersHaswell; i++ )
+ for ( int i=0; i < perfmon_numCountersSilvermont; i++ )
{
if (perfmon_threadData[thread_id].counters[i].init == TRUE)
{
- switch (haswell_counter_map[i].type)
+ switch (silvermont_counter_map[i].type)
{
case PMC:
case FIXED:
perfmon_threadData[thread_id].counters[i].counterData =
- msr_read(cpu_id, haswell_counter_map[i].counterRegister);
+ (double)msr_read(cpu_id, silvermont_counter_map[i].counterRegister);
break;
case POWER:
@@ -231,7 +223,7 @@ void perfmon_stopCountersThread_haswell(int thread_id)
{
perfmon_threadData[thread_id].counters[i].counterData =
power_info.energyUnit *
- ( power_read(cpu_id, haswell_counter_map[i].counterRegister) -
+ ( power_read(cpu_id, silvermont_counter_map[i].counterRegister) -
perfmon_threadData[thread_id].counters[i].counterData);
}
break;
@@ -256,7 +248,7 @@ void perfmon_stopCountersThread_haswell(int thread_id)
}
}
-void perfmon_readCountersThread_haswell(int thread_id)
+void perfmon_readCountersThread_silvermont(int thread_id)
{
uint64_t counter_result = 0x0ULL;
int haveLock = 0;
@@ -267,26 +259,26 @@ void perfmon_readCountersThread_haswell(int thread_id)
haveLock = 1;
}
- for ( int i=0; i<perfmon_numCountersHaswell; i++ )
+ for ( int i=0; i<perfmon_numCountersSilvermont; i++ )
{
if (perfmon_threadData[thread_id].counters[i].init == TRUE)
{
- if ((haswell_counter_map[i].type == PMC) ||
- (haswell_counter_map[i].type == FIXED))
+ if ((silvermont_counter_map[i].type == PMC) ||
+ (silvermont_counter_map[i].type == FIXED))
{
perfmon_threadData[thread_id].counters[i].counterData =
- msr_read(cpu_id, haswell_counter_map[i].counterRegister);
+ msr_read(cpu_id, silvermont_counter_map[i].counterRegister);
}
else
{
if(haveLock)
{
- switch (haswell_counter_map[i].type)
+ switch (silvermont_counter_map[i].type)
{
case POWER:
perfmon_threadData[thread_id].counters[i].counterData =
power_info.energyUnit *
- power_read(cpu_id, haswell_counter_map[i].counterRegister);
+ power_read(cpu_id, silvermont_counter_map[i].counterRegister);
break;
default:
@@ -298,4 +290,3 @@ void perfmon_readCountersThread_haswell(int thread_id)
}
}
}
-
diff --git a/src/includes/perfmon_haswell_counters.h b/src/includes/perfmon_silvermont_counters.h
similarity index 68%
copy from src/includes/perfmon_haswell_counters.h
copy to src/includes/perfmon_silvermont_counters.h
index 4302efe..266ee4b 100644
--- a/src/includes/perfmon_haswell_counters.h
+++ b/src/includes/perfmon_silvermont_counters.h
@@ -1,12 +1,12 @@
/*
* =======================================================================================
*
- * Filename: perfmon_haswell_counters.h
+ * Filename: perfmon_silvermont_counters.h
*
- * Description: Counter Header File of perfmon module for Haswell.
+ * Description: Counter header file of perfmon module for Silvermont.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -28,10 +28,11 @@
* =======================================================================================
*/
-#define NUM_COUNTERS_HASWELL 12
-#define NUM_COUNTERS_CORE_HASWELL 7
+#define NUM_COUNTERS_CORE_SILVERMONT 6
+#define NUM_COUNTERS_UNCORE_SILVERMONT 0
+#define NUM_COUNTERS_SILVERMONT 8
-static PerfmonCounterMap haswell_counter_map[NUM_COUNTERS_HASWELL] = {
+static PerfmonCounterMap silvermont_counter_map[NUM_COUNTERS_SILVERMONT] = {
/* Fixed Counters: instructions retired, cycles unhalted core */
{"FIXC0", PMC0, FIXED, MSR_PERF_FIXED_CTR_CTRL, MSR_PERF_FIXED_CTR0, 0, 0},
{"FIXC1", PMC1, FIXED, MSR_PERF_FIXED_CTR_CTRL, MSR_PERF_FIXED_CTR1, 0, 0},
@@ -39,14 +40,11 @@ static PerfmonCounterMap haswell_counter_map[NUM_COUNTERS_HASWELL] = {
/* PMC Counters: 4 48bit wide */
{"PMC0", PMC3, PMC, MSR_PERFEVTSEL0, MSR_PMC0, 0, 0},
{"PMC1", PMC4, PMC, MSR_PERFEVTSEL1, MSR_PMC1, 0, 0},
- {"PMC2", PMC5, PMC, MSR_PERFEVTSEL2, MSR_PMC2, 0, 0},
- {"PMC3", PMC6, PMC, MSR_PERFEVTSEL3, MSR_PMC3, 0, 0},
/* Temperature Sensor*/
- {"TMP0", PMC7, THERMAL, 0, 0, 0, 0},
+ {"TMP0", PMC5, THERMAL, 0, 0, 0, 0},
/* RAPL counters */
- {"PWR0", PMC8, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0},
- {"PWR1", PMC9, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0},
- {"PWR2", PMC10, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0},
- {"PWR3", PMC11, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0},
+ {"PWR0", PMC6, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0},
+ {"PWR1", PMC7, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0},
};
+
diff --git a/src/includes/perfmon_silvermont_events.txt b/src/includes/perfmon_silvermont_events.txt
new file mode 100644
index 0000000..b8a088d
--- /dev/null
+++ b/src/includes/perfmon_silvermont_events.txt
@@ -0,0 +1,440 @@
+# =======================================================================================
+#
+# Filename: perfmon_silvermont_events.txt
+#
+# Description: Event list for Intel Atom (Silvermont)
+#
+# Version: 3.1.3
+# Released: 4.11.2014
+#
+# Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
+# Project: likwid
+#
+# Copyright (C) 2014 Jan Treibig
+#
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation, either version 3 of the License, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =======================================================================================
+
+EVENT_TEMP_CORE 0x00 TMP0
+UMASK_TEMP_CORE 0x00
+
+EVENT_PWR_PKG_ENERGY 0x00 PWR0
+UMASK_PWR_PKG_ENERGY 0x00
+
+EVENT_PWR_PP0_ENERGY 0x00 PWR1
+UMASK_PWR_PP0_ENERGY 0x00
+
+EVENT_INSTR_RETIRED 0x00 FIXC0
+UMASK_INSTR_RETIRED_ANY 0x00
+
+EVENT_CPU_CLK_UNHALTED 0x00 FIXC1
+UMASK_CPU_CLK_UNHALTED_CORE 0x00
+
+EVENT_CPU_CLK_UNHALTED 0x00 FIXC2
+UMASK_CPU_CLK_UNHALTED_REF 0x00
+
+EVENT_REHABQ 0x03 PMC
+UMASK_REHABQ_LD_BLOCK_ST_FORWARD 0x01
+UMASK_REHABQ_LD_BLOCK_STD_NOTREADY 0x02
+UMASK_REHABQ_ST_SPLITS 0x04
+UMASK_REHABQ_LD_SPLITS 0x08
+UMASK_REHABQ_LOCK 0x10
+UMASK_REHABQ_STA_FULL 0x20
+UMASK_REHABQ_ANY_LD 0x40
+UMASK_REHABQ_ANY_ST 0x80
+
+EVENT_MEM_UOPS_RETIRED 0x04 PMC
+UMASK_MEM_UOPS_RETIRED_L1_MISS_LOADS 0x01
+UMASK_MEM_UOPS_RETIRED_L2_HIT_LOADS 0x02
+UMASK_MEM_UOPS_RETIRED_L2_MISS_LOADS 0x04
+UMASK_MEM_UOPS_RETIRED_DTLB_MISS_LOADS 0x08
+UMASK_MEM_UOPS_RETIRED_UTLB_MISS 0x10
+UMASK_MEM_UOPS_RETIRED_HITM 0x20
+UMASK_MEM_UOPS_RETIRED_ALL_LOADS 0x40
+UMASK_MEM_UOPS_RETIRED_ALL_STORES 0x80
+
+EVENT_PAGE_WALKS 0x05 PMC
+UMASK_PAGE_WALKS_D_SIDE_CYCLES 0x01
+UMASK_PAGE_WALKS_I_SIDE_CYCLES 0x02
+UMASK_PAGE_WALKS_WALKS 0x03
+
+EVENT_LONGEST_LAT_CACHE 0x2E PMC
+UMASK_LONGEST_LAT_CACHE_MISS 0x41
+UMASK_LONGEST_LAT_CACHE_REFERENCE 0x4F
+
+EVENT_L2_REJECT_XQ 0x30 PMC
+UMASK_L2_REJECT_XQ_ALL 0x00
+
+EVENT_CORE_REJECT_L2Q 0x31 PMC
+UMASK_CORE_REJECT_L2Q_ALL 0x00
+
+EVENT_CPU_CLK_UNHALTED 0x3C PMC
+UMASK_CPU_CLK_UNHALTED_CORE_P 0x00
+UMASK_CPU_CLK_UNHALTED_REF_P 0x01
+
+EVENT_ICACHE 0x80 PMC
+UMASK_ICACHE_HIT 0x01
+UMASK_ICACHE_MISSES 0x02
+UMASK_ICACHE_ACCESSES 0x03
+UMASK_ICACHE_IFETCH_STALL 0x04
+
+EVENT_NIP_STALL 0xB6 PMC
+UMASK_NIP_STALL_ICACHE_MISS 0x04
+
+EVENT_OFFCORE_RESPONSE 0xB7 PMC
+UMASK_OFFCORE_RESPONSE_0_DMND_DATA_RD_ANY 0x01 0x00 0x10
+UMASK_OFFCORE_RESPONSE_0_DMND_DATA_RD_L2_HIT 0x01 0x00 0x12
+UMASK_OFFCORE_RESPONSE_0_DMND_DATA_RD_SNP_NONE 0x01 0x00 0x1F
+UMASK_OFFCORE_RESPONSE_0_DMND_DATA_RD_SNOOP_MISS 0x01 0x00 0x21
+UMASK_OFFCORE_RESPONSE_0_DMND_DATA_RD_SNOOP_HIT 0x01 0x00 0x22
+UMASK_OFFCORE_RESPONSE_0_DMND_DATA_RD_HITM 0x01 0x00 0x24
+UMASK_OFFCORE_RESPONSE_0_DMND_DATA_RD_NON_DRAM 0x01 0x00 0x25
+UMASK_OFFCORE_RESPONSE_0_DMND_DATA_RD_AVG_LAT 0x01 0x00 0x26
+
+UMASK_OFFCORE_RESPONSE_0_DMND_RFO_ANY 0x01 0x01 0x10
+UMASK_OFFCORE_RESPONSE_0_DMND_RFO_L2_HIT 0x01 0x01 0x12
+UMASK_OFFCORE_RESPONSE_0_DMND_RFO_SNP_NONE 0x01 0x01 0x1F
+UMASK_OFFCORE_RESPONSE_0_DMND_RFO_SNOOP_MISS 0x01 0x01 0x21
+UMASK_OFFCORE_RESPONSE_0_DMND_RFO_SNOOP_HIT 0x01 0x01 0x22
+UMASK_OFFCORE_RESPONSE_0_DMND_RFO_HITM 0x01 0x01 0x24
+UMASK_OFFCORE_RESPONSE_0_DMND_RFO_NON_DRAM 0x01 0x01 0x25
+UMASK_OFFCORE_RESPONSE_0_DMND_RFO_AVG_LAT 0x01 0x01 0x26
+
+UMASK_OFFCORE_RESPONSE_0_DMND_IFETCH_ANY 0x01 0x02 0x10
+UMASK_OFFCORE_RESPONSE_0_DMND_IFETCH_L2_HIT 0x01 0x02 0x12
+UMASK_OFFCORE_RESPONSE_0_DMND_IFETCH_SNP_NONE 0x01 0x02 0x1F
+UMASK_OFFCORE_RESPONSE_0_DMND_IFETCH_SNOOP_MISS 0x01 0x02 0x21
+UMASK_OFFCORE_RESPONSE_0_DMND_IFETCH_SNOOP_HIT 0x01 0x02 0x22
+UMASK_OFFCORE_RESPONSE_0_DMND_IFETCH_HITM 0x01 0x02 0x24
+UMASK_OFFCORE_RESPONSE_0_DMND_IFETCH_NON_DRAM 0x01 0x02 0x25
+UMASK_OFFCORE_RESPONSE_0_DMND_IFETCH_AVG_LAT 0x01 0x02 0x26
+
+UMASK_OFFCORE_RESPONSE_0_WB_ANY 0x01 0x03 0x10
+UMASK_OFFCORE_RESPONSE_0_WB_L2_HIT 0x01 0x03 0x12
+UMASK_OFFCORE_RESPONSE_0_WB_SNP_NONE 0x01 0x03 0x1F
+UMASK_OFFCORE_RESPONSE_0_WB_SNOOP_MISS 0x01 0x03 0x21
+UMASK_OFFCORE_RESPONSE_0_WB_SNOOP_HIT 0x01 0x03 0x22
+UMASK_OFFCORE_RESPONSE_0_WB_HITM 0x01 0x03 0x24
+UMASK_OFFCORE_RESPONSE_0_WB_NON_DRAM 0x01 0x03 0x25
+UMASK_OFFCORE_RESPONSE_0_WB_AVG_LAT 0x01 0x03 0x26
+
+UMASK_OFFCORE_RESPONSE_0_PF_DATA_RD_ANY 0x01 0x04 0x10
+UMASK_OFFCORE_RESPONSE_0_PF_DATA_RD_L2_HIT 0x01 0x04 0x12
+UMASK_OFFCORE_RESPONSE_0_PF_DATA_RD_SNP_NONE 0x01 0x04 0x1F
+UMASK_OFFCORE_RESPONSE_0_PF_DATA_RD_SNOOP_MISS 0x01 0x04 0x21
+UMASK_OFFCORE_RESPONSE_0_PF_DATA_RD_SNOOP_HIT 0x01 0x04 0x22
+UMASK_OFFCORE_RESPONSE_0_PF_DATA_RD_HITM 0x01 0x04 0x24
+UMASK_OFFCORE_RESPONSE_0_PF_DATA_RD_NON_DRAM 0x01 0x04 0x25
+UMASK_OFFCORE_RESPONSE_0_PF_DATA_RD_AVG_LAT 0x01 0x04 0x26
+
+UMASK_OFFCORE_RESPONSE_0_PF_RFO_ANY 0x01 0x05 0x10
+UMASK_OFFCORE_RESPONSE_0_PF_RFO_L2_HIT 0x01 0x05 0x12
+UMASK_OFFCORE_RESPONSE_0_PF_RFO_SNP_NONE 0x01 0x05 0x1F
+UMASK_OFFCORE_RESPONSE_0_PF_RFO_SNOOP_MISS 0x01 0x05 0x21
+UMASK_OFFCORE_RESPONSE_0_PF_RFO_SNOOP_HIT 0x01 0x05 0x22
+UMASK_OFFCORE_RESPONSE_0_PF_RFO_HITM 0x01 0x05 0x24
+UMASK_OFFCORE_RESPONSE_0_PF_RFO_NON_DRAM 0x01 0x05 0x25
+UMASK_OFFCORE_RESPONSE_0_PF_RFO_AVG_LAT 0x01 0x05 0x26
+
+UMASK_OFFCORE_RESPONSE_0_PF_IFETCH_ANY 0x01 0x06 0x10
+UMASK_OFFCORE_RESPONSE_0_PF_IFETCH_L2_HIT 0x01 0x06 0x12
+UMASK_OFFCORE_RESPONSE_0_PF_IFETCH_SNP_NONE 0x01 0x06 0x1F
+UMASK_OFFCORE_RESPONSE_0_PF_IFETCH_SNOOP_MISS 0x01 0x06 0x21
+UMASK_OFFCORE_RESPONSE_0_PF_IFETCH_SNOOP_HIT 0x01 0x06 0x22
+UMASK_OFFCORE_RESPONSE_0_PF_IFETCH_HITM 0x01 0x06 0x24
+UMASK_OFFCORE_RESPONSE_0_PF_IFETCH_NON_DRAM 0x01 0x06 0x25
+UMASK_OFFCORE_RESPONSE_0_PF_IFETCH_AVG_LAT 0x01 0x06 0x26
+
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_READ_ANY 0x01 0x07 0x10
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_READ_L2_HIT 0x01 0x07 0x12
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_READ_SNP_NONE 0x01 0x07 0x1F
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_READ_SNOOP_MISS 0x01 0x07 0x21
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_READ_SNOOP_HIT 0x01 0x07 0x22
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_READ_HITM 0x01 0x07 0x24
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_READ_NON_DRAM 0x01 0x07 0x25
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_READ_AVG_LAT 0x01 0x07 0x26
+
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_WRITE_ANY 0x01 0x08 0x10
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_WRITE_L2_HIT 0x01 0x08 0x12
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_WRITE_SNP_NONE 0x01 0x08 0x1F
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_WRITE_SNOOP_MISS 0x01 0x08 0x21
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_WRITE_SNOOP_HIT 0x01 0x08 0x22
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_WRITE_HITM 0x01 0x08 0x24
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_WRITE_NON_DRAM 0x01 0x08 0x25
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_WRITE_AVG_LAT 0x01 0x08 0x26
+
+UMASK_OFFCORE_RESPONSE_0_UC_IFETCH_ANY 0x01 0x09 0x10
+UMASK_OFFCORE_RESPONSE_0_UC_IFETCH_L2_HIT 0x01 0x09 0x12
+UMASK_OFFCORE_RESPONSE_0_UC_IFETCH_SNP_NONE 0x01 0x09 0x1F
+UMASK_OFFCORE_RESPONSE_0_UC_IFETCH_SNOOP_MISS 0x01 0x09 0x21
+UMASK_OFFCORE_RESPONSE_0_UC_IFETCH_SNOOP_HIT 0x01 0x09 0x22
+UMASK_OFFCORE_RESPONSE_0_UC_IFETCH_HITM 0x01 0x09 0x24
+UMASK_OFFCORE_RESPONSE_0_UC_IFETCH_NON_DRAM 0x01 0x09 0x25
+UMASK_OFFCORE_RESPONSE_0_UC_IFETCH_AVG_LAT 0x01 0x09 0x26
+
+UMASK_OFFCORE_RESPONSE_0_BUS_LOCKS_ANY 0x01 0x0A 0x10
+UMASK_OFFCORE_RESPONSE_0_BUS_LOCKS_L2_HIT 0x01 0x0A 0x12
+UMASK_OFFCORE_RESPONSE_0_BUS_LOCKS_SNP_NONE 0x01 0x0A 0x1F
+UMASK_OFFCORE_RESPONSE_0_BUS_LOCKS_SNOOP_MISS 0x01 0x0A 0x21
+UMASK_OFFCORE_RESPONSE_0_BUS_LOCKS_SNOOP_HIT 0x01 0x0A 0x22
+UMASK_OFFCORE_RESPONSE_0_BUS_LOCKS_HITM 0x01 0x0A 0x24
+UMASK_OFFCORE_RESPONSE_0_BUS_LOCKS_NON_DRAM 0x01 0x0A 0x25
+UMASK_OFFCORE_RESPONSE_0_BUS_LOCKS_AVG_LAT 0x01 0x0A 0x26
+
+UMASK_OFFCORE_RESPONSE_0_STRM_ST_ANY 0x01 0x0B 0x10
+UMASK_OFFCORE_RESPONSE_0_STRM_ST_L2_HIT 0x01 0x0B 0x12
+UMASK_OFFCORE_RESPONSE_0_STRM_ST_SNP_NONE 0x01 0x0B 0x1F
+UMASK_OFFCORE_RESPONSE_0_STRM_ST_SNOOP_MISS 0x01 0x0B 0x21
+UMASK_OFFCORE_RESPONSE_0_STRM_ST_SNOOP_HIT 0x01 0x0B 0x22
+UMASK_OFFCORE_RESPONSE_0_STRM_ST_HITM 0x01 0x0B 0x24
+UMASK_OFFCORE_RESPONSE_0_STRM_ST_NON_DRAM 0x01 0x0B 0x25
+UMASK_OFFCORE_RESPONSE_0_STRM_ST_AVG_LAT 0x01 0x0B 0x26
+
+UMASK_OFFCORE_RESPONSE_0_SW_PREFETCH_ANY 0x01 0x0C 0x10
+UMASK_OFFCORE_RESPONSE_0_SW_PREFETCH_L2_HIT 0x01 0x0C 0x12
+UMASK_OFFCORE_RESPONSE_0_SW_PREFETCH_SNP_NONE 0x01 0x0C 0x1F
+UMASK_OFFCORE_RESPONSE_0_SW_PREFETCH_SNOOP_MISS 0x01 0x0C 0x21
+UMASK_OFFCORE_RESPONSE_0_SW_PREFETCH_SNOOP_HIT 0x01 0x0C 0x22
+UMASK_OFFCORE_RESPONSE_0_SW_PREFETCH_HITM 0x01 0x0C 0x24
+UMASK_OFFCORE_RESPONSE_0_SW_PREFETCH_NON_DRAM 0x01 0x0C 0x25
+UMASK_OFFCORE_RESPONSE_0_SW_PREFETCH_AVG_LAT 0x01 0x0C 0x26
+
+UMASK_OFFCORE_RESPONSE_0_DCU_PF_DATA_RD_ANY 0x01 0x0D 0x10
+UMASK_OFFCORE_RESPONSE_0_DCU_PF_DATA_RD_L2_HIT 0x01 0x0D 0x12
+UMASK_OFFCORE_RESPONSE_0_DCU_PF_DATA_RD_SNP_NONE 0x01 0x0D 0x1F
+UMASK_OFFCORE_RESPONSE_0_DCU_PF_DATA_RD_SNOOP_MISS 0x01 0x0D 0x21
+UMASK_OFFCORE_RESPONSE_0_DCU_PF_DATA_RD_SNOOP_HIT 0x01 0x0D 0x22
+UMASK_OFFCORE_RESPONSE_0_DCU_PF_DATA_RD_HITM 0x01 0x0D 0x24
+UMASK_OFFCORE_RESPONSE_0_DCU_PF_DATA_RD_NON_DRAM 0x01 0x0D 0x25
+UMASK_OFFCORE_RESPONSE_0_DCU_PF_DATA_RD_AVG_LAT 0x01 0x0D 0x26
+
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_STRM_ST_ANY 0x01 0x0E 0x10
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_STRM_ST_L2_HIT 0x01 0x0E 0x12
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_STRM_ST_SNP_NONE 0x01 0x0E 0x1F
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_STRM_ST_SNOOP_MISS 0x01 0x0E 0x21
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_STRM_ST_SNOOP_HIT 0x01 0x0E 0x22
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_STRM_ST_HITM 0x01 0x0E 0x24
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_STRM_ST_NON_DRAM 0x01 0x0E 0x25
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_STRM_ST_AVG_LAT 0x01 0x0E 0x26
+
+UMASK_OFFCORE_RESPONSE_0_ANY_ANY 0x01 0x0F 0x10
+UMASK_OFFCORE_RESPONSE_0_ANY_L2_HIT 0x01 0x0F 0x12
+UMASK_OFFCORE_RESPONSE_0_ANY_SNP_NONE 0x01 0x0F 0x1F
+UMASK_OFFCORE_RESPONSE_0_ANY_SNOOP_MISS 0x01 0x0F 0x21
+UMASK_OFFCORE_RESPONSE_0_ANY_SNOOP_HIT 0x01 0x0F 0x22
+UMASK_OFFCORE_RESPONSE_0_ANY_HITM 0x01 0x0F 0x24
+UMASK_OFFCORE_RESPONSE_0_ANY_NON_DRAM 0x01 0x0F 0x25
+UMASK_OFFCORE_RESPONSE_0_ANY_AVG_LAT 0x01 0x0F 0x26
+
+UMASK_OFFCORE_RESPONSE_1_DMND_DATA_RD_ANY 0x02 0x00 0x10
+UMASK_OFFCORE_RESPONSE_1_DMND_DATA_RD_L2_HIT 0x02 0x00 0x12
+UMASK_OFFCORE_RESPONSE_1_DMND_DATA_RD_SNP_NONE 0x02 0x00 0x1F
+UMASK_OFFCORE_RESPONSE_1_DMND_DATA_RD_SNOOP_MISS 0x02 0x00 0x21
+UMASK_OFFCORE_RESPONSE_1_DMND_DATA_RD_SNOOP_HIT 0x02 0x00 0x22
+UMASK_OFFCORE_RESPONSE_1_DMND_DATA_RD_HITM 0x02 0x00 0x24
+UMASK_OFFCORE_RESPONSE_1_DMND_DATA_RD_NON_DRAM 0x02 0x00 0x25
+UMASK_OFFCORE_RESPONSE_1_DMND_DATA_RD_AVG_LAT 0x02 0x00 0x26
+
+UMASK_OFFCORE_RESPONSE_1_DMND_RFO_ANY 0x02 0x01 0x10
+UMASK_OFFCORE_RESPONSE_1_DMND_RFO_L2_HIT 0x02 0x01 0x12
+UMASK_OFFCORE_RESPONSE_1_DMND_RFO_SNP_NONE 0x02 0x01 0x1F
+UMASK_OFFCORE_RESPONSE_1_DMND_RFO_SNOOP_MISS 0x02 0x01 0x21
+UMASK_OFFCORE_RESPONSE_1_DMND_RFO_SNOOP_HIT 0x02 0x01 0x22
+UMASK_OFFCORE_RESPONSE_1_DMND_RFO_HITM 0x02 0x01 0x24
+UMASK_OFFCORE_RESPONSE_1_DMND_RFO_NON_DRAM 0x02 0x01 0x25
+UMASK_OFFCORE_RESPONSE_1_DMND_RFO_AVG_LAT 0x02 0x01 0x26
+
+UMASK_OFFCORE_RESPONSE_1_DMND_IFETCH_ANY 0x02 0x02 0x10
+UMASK_OFFCORE_RESPONSE_1_DMND_IFETCH_L2_HIT 0x02 0x02 0x12
+UMASK_OFFCORE_RESPONSE_1_DMND_IFETCH_SNP_NONE 0x02 0x02 0x1F
+UMASK_OFFCORE_RESPONSE_1_DMND_IFETCH_SNOOP_MISS 0x02 0x02 0x21
+UMASK_OFFCORE_RESPONSE_1_DMND_IFETCH_SNOOP_HIT 0x02 0x02 0x22
+UMASK_OFFCORE_RESPONSE_1_DMND_IFETCH_HITM 0x02 0x02 0x24
+UMASK_OFFCORE_RESPONSE_1_DMND_IFETCH_NON_DRAM 0x02 0x02 0x25
+UMASK_OFFCORE_RESPONSE_1_DMND_IFETCH_AVG_LAT 0x02 0x02 0x26
+
+UMASK_OFFCORE_RESPONSE_1_WB_ANY 0x02 0x03 0x10
+UMASK_OFFCORE_RESPONSE_1_WB_L2_HIT 0x02 0x03 0x12
+UMASK_OFFCORE_RESPONSE_1_WB_SNP_NONE 0x02 0x03 0x1F
+UMASK_OFFCORE_RESPONSE_1_WB_SNOOP_MISS 0x02 0x03 0x21
+UMASK_OFFCORE_RESPONSE_1_WB_SNOOP_HIT 0x02 0x03 0x22
+UMASK_OFFCORE_RESPONSE_1_WB_HITM 0x02 0x03 0x24
+UMASK_OFFCORE_RESPONSE_1_WB_NON_DRAM 0x02 0x03 0x25
+UMASK_OFFCORE_RESPONSE_1_WB_AVG_LAT 0x02 0x03 0x26
+
+UMASK_OFFCORE_RESPONSE_1_PF_DATA_RD_ANY 0x02 0x04 0x10
+UMASK_OFFCORE_RESPONSE_1_PF_DATA_RD_L2_HIT 0x02 0x04 0x12
+UMASK_OFFCORE_RESPONSE_1_PF_DATA_RD_SNP_NONE 0x02 0x04 0x1F
+UMASK_OFFCORE_RESPONSE_1_PF_DATA_RD_SNOOP_MISS 0x02 0x04 0x21
+UMASK_OFFCORE_RESPONSE_1_PF_DATA_RD_SNOOP_HIT 0x02 0x04 0x22
+UMASK_OFFCORE_RESPONSE_1_PF_DATA_RD_HITM 0x02 0x04 0x24
+UMASK_OFFCORE_RESPONSE_1_PF_DATA_RD_NON_DRAM 0x02 0x04 0x25
+UMASK_OFFCORE_RESPONSE_1_PF_DATA_RD_AVG_LAT 0x02 0x04 0x26
+
+UMASK_OFFCORE_RESPONSE_1_PF_RFO_ANY 0x02 0x05 0x10
+UMASK_OFFCORE_RESPONSE_1_PF_RFO_L2_HIT 0x02 0x05 0x12
+UMASK_OFFCORE_RESPONSE_1_PF_RFO_SNP_NONE 0x02 0x05 0x1F
+UMASK_OFFCORE_RESPONSE_1_PF_RFO_SNOOP_MISS 0x02 0x05 0x21
+UMASK_OFFCORE_RESPONSE_1_PF_RFO_SNOOP_HIT 0x02 0x05 0x22
+UMASK_OFFCORE_RESPONSE_1_PF_RFO_HITM 0x02 0x05 0x24
+UMASK_OFFCORE_RESPONSE_1_PF_RFO_NON_DRAM 0x02 0x05 0x25
+UMASK_OFFCORE_RESPONSE_1_PF_RFO_AVG_LAT 0x02 0x05 0x26
+
+UMASK_OFFCORE_RESPONSE_1_PF_IFETCH_ANY 0x02 0x06 0x10
+UMASK_OFFCORE_RESPONSE_1_PF_IFETCH_L2_HIT 0x02 0x06 0x12
+UMASK_OFFCORE_RESPONSE_1_PF_IFETCH_SNP_NONE 0x02 0x06 0x1F
+UMASK_OFFCORE_RESPONSE_1_PF_IFETCH_SNOOP_MISS 0x02 0x06 0x21
+UMASK_OFFCORE_RESPONSE_1_PF_IFETCH_SNOOP_HIT 0x02 0x06 0x22
+UMASK_OFFCORE_RESPONSE_1_PF_IFETCH_HITM 0x02 0x06 0x24
+UMASK_OFFCORE_RESPONSE_1_PF_IFETCH_NON_DRAM 0x02 0x06 0x25
+UMASK_OFFCORE_RESPONSE_1_PF_IFETCH_AVG_LAT 0x02 0x06 0x26
+
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_READ_ANY 0x02 0x07 0x10
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_READ_L2_HIT 0x02 0x07 0x12
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_READ_SNP_NONE 0x02 0x07 0x1F
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_READ_SNOOP_MISS 0x02 0x07 0x21
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_READ_SNOOP_HIT 0x02 0x07 0x22
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_READ_HITM 0x02 0x07 0x24
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_READ_NON_DRAM 0x02 0x07 0x25
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_READ_AVG_LAT 0x02 0x07 0x26
+
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_WRITE_ANY 0x02 0x08 0x10
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_WRITE_L2_HIT 0x02 0x08 0x12
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_WRITE_SNP_NONE 0x02 0x08 0x1F
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_WRITE_SNOOP_MISS 0x02 0x08 0x21
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_WRITE_SNOOP_HIT 0x02 0x08 0x22
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_WRITE_HITM 0x02 0x08 0x24
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_WRITE_NON_DRAM 0x02 0x08 0x25
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_WRITE_AVG_LAT 0x02 0x08 0x26
+
+UMASK_OFFCORE_RESPONSE_1_UC_IFETCH_ANY 0x02 0x09 0x10
+UMASK_OFFCORE_RESPONSE_1_UC_IFETCH_L2_HIT 0x02 0x09 0x12
+UMASK_OFFCORE_RESPONSE_1_UC_IFETCH_SNP_NONE 0x02 0x09 0x1F
+UMASK_OFFCORE_RESPONSE_1_UC_IFETCH_SNOOP_MISS 0x02 0x09 0x21
+UMASK_OFFCORE_RESPONSE_1_UC_IFETCH_SNOOP_HIT 0x02 0x09 0x22
+UMASK_OFFCORE_RESPONSE_1_UC_IFETCH_HITM 0x02 0x09 0x24
+UMASK_OFFCORE_RESPONSE_1_UC_IFETCH_NON_DRAM 0x02 0x09 0x25
+UMASK_OFFCORE_RESPONSE_1_UC_IFETCH_AVG_LAT 0x02 0x09 0x26
+
+UMASK_OFFCORE_RESPONSE_1 BUS_LOCKS_ANY 0x02 0x0A 0x10
+UMASK_OFFCORE_RESPONSE_1_BUS_LOCKS_L2_HIT 0x02 0x0A 0x12
+UMASK_OFFCORE_RESPONSE_1_BUS_LOCKS_SNP_NONE 0x02 0x0A 0x1F
+UMASK_OFFCORE_RESPONSE_1_BUS_LOCKS_SNOOP_MISS 0x02 0x0A 0x21
+UMASK_OFFCORE_RESPONSE_1_BUS_LOCKS_SNOOP_HIT 0x02 0x0A 0x22
+UMASK_OFFCORE_RESPONSE_1_BUS_LOCKS_HITM 0x02 0x0A 0x24
+UMASK_OFFCORE_RESPONSE_1_BUS_LOCKS_NON_DRAM 0x02 0x0A 0x25
+UMASK_OFFCORE_RESPONSE_1_BUS_LOCKS_AVG_LAT 0x02 0x0A 0x26
+
+UMASK_OFFCORE_RESPONSE_1_STRM_ST_ANY 0x02 0x0B 0x10
+UMASK_OFFCORE_RESPONSE_1_STRM_ST_L2_HIT 0x02 0x0B 0x12
+UMASK_OFFCORE_RESPONSE_1_STRM_ST_SNP_NONE 0x02 0x0B 0x1F
+UMASK_OFFCORE_RESPONSE_1_STRM_ST_SNOOP_MISS 0x02 0x0B 0x21
+UMASK_OFFCORE_RESPONSE_1_STRM_ST_SNOOP_HIT 0x02 0x0B 0x22
+UMASK_OFFCORE_RESPONSE_1_STRM_ST_HITM 0x02 0x0B 0x24
+UMASK_OFFCORE_RESPONSE_1_STRM_ST_NON_DRAM 0x02 0x0B 0x25
+UMASK_OFFCORE_RESPONSE_1_STRM_ST_AVG_LAT 0x02 0x0B 0x26
+
+UMASK_OFFCORE_RESPONSE_1_SW_PREFETCH_ANY 0x02 0x0C 0x10
+UMASK_OFFCORE_RESPONSE_1_SW_PREFETCH_L2_HIT 0x02 0x0C 0x12
+UMASK_OFFCORE_RESPONSE_1_SW_PREFETCH_SNP_NONE 0x02 0x0C 0x1F
+UMASK_OFFCORE_RESPONSE_1_SW_PREFETCH_SNOOP_MISS 0x02 0x0C 0x21
+UMASK_OFFCORE_RESPONSE_1_SW_PREFETCH_SNOOP_HIT 0x02 0x0C 0x22
+UMASK_OFFCORE_RESPONSE_1_SW_PREFETCH_HITM 0x02 0x0C 0x24
+UMASK_OFFCORE_RESPONSE_1_SW_PREFETCH_NON_DRAM 0x02 0x0C 0x25
+UMASK_OFFCORE_RESPONSE_1_SW_PREFETCH_AVG_LAT 0x02 0x0C 0x26
+
+UMASK_OFFCORE_RESPONSE_1_DCU_PF_DATA_RD_ANY 0x02 0x0D 0x10
+UMASK_OFFCORE_RESPONSE_1_DCU_PF_DATA_RD_L2_HIT 0x02 0x0D 0x12
+UMASK_OFFCORE_RESPONSE_1_DCU_PF_DATA_RD_SNP_NONE 0x02 0x0D 0x1F
+UMASK_OFFCORE_RESPONSE_1_DCU_PF_DATA_RD_SNOOP_MISS 0x02 0x0D 0x21
+UMASK_OFFCORE_RESPONSE_1_DCU_PF_DATA_RD_SNOOP_HIT 0x02 0x0D 0x22
+UMASK_OFFCORE_RESPONSE_1_DCU_PF_DATA_RD_HITM 0x02 0x0D 0x24
+UMASK_OFFCORE_RESPONSE_1_DCU_PF_DATA_RD_NON_DRAM 0x02 0x0D 0x25
+UMASK_OFFCORE_RESPONSE_1_DCU_PF_DATA_RD_AVG_LAT 0x02 0x0D 0x26
+
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_STRM_ST_ANY 0x02 0x0E 0x10
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_STRM_ST_L2_HIT 0x02 0x0E 0x12
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_STRM_ST_SNP_NONE 0x02 0x0E 0x1F
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_STRM_ST_SNOOP_MISS 0x02 0x0E 0x21
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_STRM_ST_SNOOP_HIT 0x02 0x0E 0x22
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_STRM_ST_HITM 0x02 0x0E 0x24
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_STRM_ST_NON_DRAM 0x02 0x0E 0x25
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_STRM_ST_AVG_LAT 0x02 0x0E 0x26
+
+UMASK_OFFCORE_RESPONSE_1_ANY_ANY 0x02 0x0F 0x10
+UMASK_OFFCORE_RESPONSE_1_ANY_L2_HIT 0x02 0x0F 0x12
+UMASK_OFFCORE_RESPONSE_1_ANY_SNP_NONE 0x02 0x0F 0x1F
+UMASK_OFFCORE_RESPONSE_1_ANY_SNOOP_MISS 0x02 0x0F 0x21
+UMASK_OFFCORE_RESPONSE_1_ANY_SNOOP_HIT 0x02 0x0F 0x22
+UMASK_OFFCORE_RESPONSE_1_ANY_HITM 0x02 0x0F 0x24
+UMASK_OFFCORE_RESPONSE_1_ANY_NON_DRAM 0x02 0x0F 0x25
+UMASK_OFFCORE_RESPONSE_1_ANY_AVG_LAT 0x02 0x0F 0x26
+
+
+EVENT_INST_RETIRED 0xC0 PMC
+UMASK_INST_RETIRED_ANY_P 0x00
+
+EVENT_UOPS_RETIRED 0xC2 PMC
+UMASK_UOPS_RETIRED_MS 0x01
+UMASK_UOPS_RETIRED_ALL 0x10
+
+EVENT_MACHINE_CLEARS 0xC3 PMC
+UMASK_MACHINE_CLEARS_SMC 0x01
+UMASK_MACHINE_CLEARS_MEMORY_ORDERING 0x02
+UMASK_MACHINE_CLEARS_FP_ASSIST 0x04
+UMASK_MACHINE_CLEARS_ALL 0x08
+
+EVENT_BR_INST_RETIRED 0xC4 PMC
+UMASK_BR_INST_RETIRED_ALL_BRANCHES 0x00
+UMASK_BR_INST_RETIRED_JCC 0x7E
+UMASK_BR_INST_RETIRED_FAR_BRANCH 0xBF
+UMASK_BR_INST_RETIRED_NON_RETURN_IND 0xEB
+UMASK_BR_INST_RETIRED_RETURN 0xF7
+UMASK_BR_INST_RETIRED_CALL 0xF9
+UMASK_BR_INST_RETIRED_IND_CALL 0xFB
+UMASK_BR_INST_RETIRED_REL_CALL 0xFD
+UMASK_BR_INST_RETIRED_TAKEN_JCC 0xFE
+
+EVENT_BR_MISP_RETIRED 0xC5 PMC
+UMASK_BR_MISP_RETIRED_ALL_BRANCHES 0x00
+UMASK_BR_MISP_RETIRED_JCC 0x7E
+UMASK_BR_MISP_RETIRED_FAR_BRANCH 0xBF
+UMASK_BR_MISP_RETIRED_NON_RETURN_IND 0xEB
+UMASK_BR_MISP_RETIRED_RETURN 0xF7
+UMASK_BR_MISP_RETIRED_CALL 0xF9
+UMASK_BR_MISP_RETIRED_IND_CALL 0xFB
+UMASK_BR_MISP_RETIRED_REL_CALL 0xFD
+UMASK_BR_MISP_RETIRED_TAKEN_JCC 0xFE
+
+EVENT_NO_ALLOC_CYCLES 0xCA PMC
+UMASK_NO_ALLOC_CYCLES_ROB_FULL 0x01
+UMASK_NO_ALLOC_CYCLES_RAT_STALL 0x20
+UMASK_NO_ALLOC_CYCLES_ALL 0x3F
+UMASK_NO_ALLOC_CYCLES_NOT_DELIVERED 0x50
+
+EVENT_RS_FULL_STALL 0xCB PMC
+UMASK_RS_FULL_STALL_MEC 0x01
+UMASK_RS_FULL_STALL_ALL 0x1F
+
+EVENT_CYCLES_DIV_BUSY 0xCD PMC
+UMASK_CYCLES_DIV_BUSY_ANY 0x01
+
+EVENT_BACLEARS 0xE6 PMC
+UMASK_BACLEARS_ALL 0x01
+UMASK_BACLEARS_RETURN 0x08
+UMASK_BACLEARS_COND 0x10
+
+EVENT_MS_DECODED 0xE7 PMC
+UMASK_MS_DECODED_MS_ENTRY 0x01
+
diff --git a/src/includes/perfmon_types.h b/src/includes/perfmon_types.h
index 1b47e95..1f0663a 100644
--- a/src/includes/perfmon_types.h
+++ b/src/includes/perfmon_types.h
@@ -7,8 +7,8 @@
* Configures and reads out performance counters
* on x86 based architectures. Supports multi threading.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -54,6 +54,10 @@ typedef enum {
PMC67, PMC68, PMC69, PMC70, PMC71, PMC72,
PMC73, PMC74, PMC75, PMC76, PMC77, PMC78,
PMC79, PMC80, PMC81, PMC82, PMC83, PMC84,
+ PMC85, PMC86, PMC87, PMC88, PMC89, PMC90,
+ PMC91, PMC92, PMC93, PMC94, PMC95, PMC96,
+ PMC97, PMC98, PMC99, PMC100, PMC101, PMC102,
+ PMC103, PMC104, PMC105, PMC106, PMC107, PMC108,
NUM_PMC} PerfmonCounterIndex;
typedef enum {
@@ -86,26 +90,32 @@ typedef enum {
CBOX9,
CBOX10,
CBOX11,
+ CBOX12,
+ CBOX13,
+ CBOX14,
PBOX,
POWER,
+ UBOX,
NUM_UNITS} PerfmonType;
typedef struct {
char* key;
PerfmonCounterIndex index;
- PerfmonType type;
- uint64_t configRegister;
- uint64_t counterRegister;
- uint64_t counterRegister2;
+ PerfmonType type;
+ uint64_t configRegister;
+ uint64_t counterRegister;
+ uint64_t counterRegister2;
PciDeviceIndex device;
} PerfmonCounterMap;
typedef struct {
- char* key;
+ const char* key;
PerfmonGroup index;
int isUncore;
- char* info;
- char* config;
+ const char* info;
+ const char* config;
+ int derivedCounters;
+ const char ** derivedCounterNames;
} PerfmonGroupMap;
typedef struct {
@@ -115,15 +125,15 @@ typedef struct {
/* only used in westmereEX at the moment */
typedef struct {
- uint32_t ctrlRegister;
- uint32_t statusRegister;
- uint32_t ovflRegister;
+ uint32_t ctrlRegister;
+ uint32_t statusRegister;
+ uint32_t ovflRegister;
} PerfmonUnit;
typedef struct {
- int init;
- int id; /* TODO id is only used for EX type processors */
- uint64_t counterData;
+ int init;
+ int id; /* TODO id is only used for EX type processors */
+ double counterData;
} PerfmonCounter;
typedef struct {
@@ -132,8 +142,8 @@ typedef struct {
} PerfmonThread;
typedef struct {
- const char* name;
- const char* limit;
+ const char* name;
+ const char* limit;
uint16_t eventId;
uint8_t umask;
uint8_t cfgBits;
diff --git a/src/includes/perfmon_westmere.h b/src/includes/perfmon_westmere.h
index 80c9b91..c469766 100644
--- a/src/includes/perfmon_westmere.h
+++ b/src/includes/perfmon_westmere.h
@@ -5,8 +5,8 @@
*
* Description: Header File of perfmon module for Westmere.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/perfmon_westmereEX.h b/src/includes/perfmon_westmereEX.h
index c58a1fd..8cbc921 100644
--- a/src/includes/perfmon_westmereEX.h
+++ b/src/includes/perfmon_westmereEX.h
@@ -5,8 +5,8 @@
*
* Description: Header File of perfmon module for Westmere EX.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -66,16 +66,16 @@ void perfmon_init_westmereEX(PerfmonThread *thread)
* FIXED 0: Instructions retired
* FIXED 1: Clocks unhalted core
* FIXED 2: Clocks unhalted ref */
- msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, 0x222ULL);
+ //msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, 0x222ULL);
/* Preinit of PERFEVSEL registers */
- flags |= (1<<22); /* enable flag */
- flags |= (1<<16); /* user mode flag */
+ //flags |= (1<<22); /* enable flag */
+ //flags |= (1<<16); /* user mode flag */
- msr_write(cpu_id, MSR_PERFEVTSEL0, flags);
+ /*msr_write(cpu_id, MSR_PERFEVTSEL0, flags);
msr_write(cpu_id, MSR_PERFEVTSEL1, flags);
msr_write(cpu_id, MSR_PERFEVTSEL2, flags);
- msr_write(cpu_id, MSR_PERFEVTSEL3, flags);
+ msr_write(cpu_id, MSR_PERFEVTSEL3, flags);*/
/* Initialize uncore */
/* MBOX */
@@ -151,6 +151,64 @@ void perfmon_init_westmereEX(PerfmonThread *thread)
westmereEX_PMunits[WBOX].statusRegister = MSR_W_PMON_BOX_STATUS;
westmereEX_PMunits[WBOX].ovflRegister = MSR_W_PMON_BOX_OVF_CTRL;
+ thread->counters[PMC48].id = 0;
+ westmereEX_PMunits[UBOX].ctrlRegister = MSR_U_PMON_GLOBAL_CTRL;
+ westmereEX_PMunits[UBOX].statusRegister = MSR_U_PMON_GLOBAL_STATUS;
+ westmereEX_PMunits[UBOX].ovflRegister = MSR_U_PMON_GLOBAL_OVF_CTRL;
+
+ /* Set IDs for all CBOXes */
+ int walker = 0;
+ for (int i=PMC49; i<=PMC98; i++)
+ {
+ thread->counters[i].id = walker;
+ walker = (walker == 4 ? 0 : walker + 1);
+ }
+ westmereEX_PMunits[CBOX0].ctrlRegister = MSR_C0_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX0].statusRegister = MSR_C0_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX0].ovflRegister = MSR_C0_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX1].ctrlRegister = MSR_C1_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX1].statusRegister = MSR_C1_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX1].ovflRegister = MSR_C1_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX2].ctrlRegister = MSR_C2_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX2].statusRegister = MSR_C2_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX2].ovflRegister = MSR_C2_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX3].ctrlRegister = MSR_C3_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX3].statusRegister = MSR_C3_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX3].ovflRegister = MSR_C3_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX4].ctrlRegister = MSR_C4_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX4].statusRegister = MSR_C4_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX4].ovflRegister = MSR_C4_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX5].ctrlRegister = MSR_C5_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX5].statusRegister = MSR_C5_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX5].ovflRegister = MSR_C5_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX6].ctrlRegister = MSR_C6_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX6].statusRegister = MSR_C6_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX6].ovflRegister = MSR_C6_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX7].ctrlRegister = MSR_C7_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX7].statusRegister = MSR_C7_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX7].ovflRegister = MSR_C7_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX8].ctrlRegister = MSR_C8_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX8].statusRegister = MSR_C8_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX8].ovflRegister = MSR_C8_PMON_BOX_OVF_CTRL;
+ westmereEX_PMunits[CBOX9].ctrlRegister = MSR_C9_PMON_BOX_CTRL;
+ westmereEX_PMunits[CBOX9].statusRegister = MSR_C9_PMON_BOX_STATUS;
+ westmereEX_PMunits[CBOX9].ovflRegister = MSR_C9_PMON_BOX_OVF_CTRL;
+
+ thread->counters[PMC99].id = 0;
+ thread->counters[PMC100].id = 1;
+ thread->counters[PMC101].id = 2;
+ thread->counters[PMC102].id = 3;
+ westmereEX_PMunits[SBOX0].ctrlRegister = MSR_S0_PMON_BOX_CTRL;
+ westmereEX_PMunits[SBOX0].statusRegister = MSR_S0_PMON_BOX_STATUS;
+ westmereEX_PMunits[SBOX0].ovflRegister = MSR_S0_PMON_BOX_OVF_CTRL;
+ thread->counters[PMC103].id = 0;
+ thread->counters[PMC104].id = 1;
+ thread->counters[PMC105].id = 2;
+ thread->counters[PMC106].id = 3;
+ westmereEX_PMunits[SBOX1].ctrlRegister = MSR_S1_PMON_BOX_CTRL;
+ westmereEX_PMunits[SBOX1].statusRegister = MSR_S1_PMON_BOX_STATUS;
+ westmereEX_PMunits[SBOX1].ovflRegister = MSR_S1_PMON_BOX_OVF_CTRL;
+
if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id) ||
lock_acquire((int*) &socket_lock[affinity_core2node_lookup[cpu_id]], cpu_id))
{
@@ -209,6 +267,78 @@ void perfmon_init_westmereEX(PerfmonThread *thread)
msr_write(cpu_id, MSR_R1_PMON_EVNT_SEL14, 0x0ULL);
msr_write(cpu_id, MSR_R1_PMON_EVNT_SEL15, 0x0ULL);
+ msr_write(cpu_id, MSR_U_PMON_GLOBAL_EVNT_SEL, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C0_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C0_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C0_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C0_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C0_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C1_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C1_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C1_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C1_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C1_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C2_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C2_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C2_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C2_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C2_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C3_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C3_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C3_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C3_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C3_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C4_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C4_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C4_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C4_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C4_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C5_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C5_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C5_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C5_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C5_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C6_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C6_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C6_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C6_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C6_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C7_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C7_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C7_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C7_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C7_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C8_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C8_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C8_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C8_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C8_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_C9_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_C9_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_C9_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_C9_PMON_EVNT_SEL3, 0x0ULL);
+ msr_write(cpu_id, MSR_C9_PMON_EVNT_SEL4, 0x0ULL);
+
+ msr_write(cpu_id, MSR_S0_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_S0_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_S0_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_S0_PMON_EVNT_SEL3, 0x0ULL);
+
+ msr_write(cpu_id, MSR_S1_PMON_EVNT_SEL0, 0x0ULL);
+ msr_write(cpu_id, MSR_S1_PMON_EVNT_SEL1, 0x0ULL);
+ msr_write(cpu_id, MSR_S1_PMON_EVNT_SEL2, 0x0ULL);
+ msr_write(cpu_id, MSR_S1_PMON_EVNT_SEL3, 0x0ULL);
+
{
uint32_t ubflags = 0x0UL;
ubflags |= (1<<29); /* reset all */
@@ -217,205 +347,203 @@ void perfmon_init_westmereEX(PerfmonThread *thread)
}
}
-/* MBOX macros */
-
#define MBOX_GATE(NUM) \
flags = 0x41ULL; \
switch (event->cfgBits) \
{ \
case 0x00: /* primary Event */ \
- flags |= (event->eventId<<9); \
- break; \
+ flags |= (event->eventId<<9); \
+ break; \
case 0x01: /* secondary Events */ \
- /* TODO fvid index is missing defaults to 0 */ \
- flags |= (1<<7); /* toggle flag mode */ \
- flags |= (event->eventId<<19); \
- switch (event->eventId) \
- { \
- case 0x00: /* CYCLES_DSP_FILL: DSP */ \
- { \
- uint64_t dsp_flags = 0x0ULL; \
- dsp_flags |= (event->umask<<7); \
- msr_write(cpu_id, MSR_M##NUM##_PMON_DSP, dsp_flags); \
- } \
- break; \
- case 0x01: /* CYCLES_SCHED_MODE: ISS */ \
- { \
- uint32_t iss_flags = 0x0UL; \
- iss_flags |= (event->umask<<4); \
- msr_write(cpu_id, MSR_M##NUM##_PMON_ISS, iss_flags); \
- } \
- break; \
- case 0x05: /* CYCLES_PGT_STATE: PGT */ \
- { \
- uint32_t pgt_flags = 0x0UL; \
- pgt_flags |= (event->umask<<6); \
- msr_write(cpu_id, MSR_M##NUM##_PMON_PGT, pgt_flags); \
- } \
- break; \
- case 0x06: /* BCMD_SCHEDQ_OCCUPANCY: MAP */ \
- { \
- uint32_t map_flags = 0x0UL; \
- map_flags |= (event->umask<<6); \
- msr_write(cpu_id, MSR_M##NUM##_PMON_MAP, map_flags); \
- } \
+ /* TODO fvid index is missing defaults to 0 */ \
+ flags |= (1<<7); /* toggle flag mode */ \
+ flags |= (event->eventId<<19); \
+ switch (event->eventId) \
+ { \
+ case 0x00: /* CYCLES_DSP_FILL: DSP */ \
+ { \
+ uint64_t dsp_flags = 0x0ULL; \
+ dsp_flags |= (event->umask<<7); \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_DSP, dsp_flags); \
+ } \
+ break; \
+ case 0x01: /* CYCLES_SCHED_MODE: ISS */ \
+ { \
+ uint32_t iss_flags = 0x0UL; \
+ iss_flags |= (event->umask<<4); \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_ISS, iss_flags); \
+ } \
+ break; \
+ case 0x05: /* CYCLES_PGT_STATE: PGT */ \
+ { \
+ uint32_t pgt_flags = 0x0UL; \
+ pgt_flags |= (event->umask<<6); \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_PGT, pgt_flags); \
+ } \
+ break; \
+ case 0x06: /* BCMD_SCHEDQ_OCCUPANCY: MAP */ \
+ { \
+ uint32_t map_flags = 0x0UL; \
+ map_flags |= (event->umask<<6); \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_MAP, map_flags); \
+ } \
+ break; \
+ } \
break; \
- } \
- break; \
case 0x02: /* DRAM_CMD: PLD/ISS */ \
- flags |= (event->eventId<<9); \
- { \
- uint32_t pld_flags = 0x0UL; \
- uint32_t iss_flags = 0x0UL; \
- pld_flags |= (event->umask<<8); \
- if (event->cmask != 0) \
+ flags |= (event->eventId<<9); \
{ \
- iss_flags |= (event->cmask<<7); \
- pld_flags |= 1; /* toggle cmd flag */ \
+ uint32_t pld_flags = 0x0UL; \
+ uint32_t iss_flags = 0x0UL; \
+ pld_flags |= (event->umask<<8); \
+ if (event->cmask != 0) \
+ { \
+ iss_flags |= (event->cmask<<7); \
+ pld_flags |= 1; /* toggle cmd flag */ \
+ } \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_PLD, pld_flags); \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_ISS, iss_flags); \
} \
- msr_write(cpu_id, MSR_M##NUM##_PMON_PLD, pld_flags); \
- msr_write(cpu_id, MSR_M##NUM##_PMON_ISS, iss_flags); \
- } \
- break; \
+ break; \
case 0x03: /* DSP_FILL: DSP */ \
- flags |= (event->eventId<<9); \
- { \
- uint64_t dsp_flags = 0x0ULL; \
- dsp_flags |= (event->umask<<7); \
- msr_write(cpu_id, MSR_M##NUM##_PMON_DSP, dsp_flags); \
- } \
- break; \
+ flags |= (event->eventId<<9); \
+ { \
+ uint64_t dsp_flags = 0x0ULL; \
+ dsp_flags |= (event->umask<<7); \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_DSP, dsp_flags); \
+ } \
+ break; \
case 0x04: /* DRAM_MISC: PLD */ \
- flags |= (event->eventId<<9); \
- { \
- uint64_t pld_flags = 0x0ULL; \
- switch (event->cmask) \
+ flags |= (event->eventId<<9); \
{ \
- case 0x0: \
- pld_flags |= (1<<16); \
- pld_flags |= (event->umask<<19); \
- break; \
- case 0x1: \
- pld_flags |= (event->umask<<18); \
- break; \
- case 0x2: \
- pld_flags |= (event->umask<<17); \
- break; \
- case 0x3: \
- pld_flags |= (event->umask<<7); \
- break; \
+ uint64_t pld_flags = 0x0ULL; \
+ switch (event->cmask) \
+ { \
+ case 0x0: \
+ pld_flags |= (1<<16); \
+ pld_flags |= (event->umask<<19); \
+ break; \
+ case 0x1: \
+ pld_flags |= (event->umask<<18); \
+ break; \
+ case 0x2: \
+ pld_flags |= (event->umask<<17); \
+ break; \
+ case 0x3: \
+ pld_flags |= (event->umask<<7); \
+ break; \
+ } \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_PLD, pld_flags); \
} \
- msr_write(cpu_id, MSR_M##NUM##_PMON_PLD, pld_flags); \
- } \
- break; \
+ break; \
case 0x05: /* FRM_TYPE: ISS */ \
- flags |= (event->eventId<<9); \
- { \
- uint32_t iss_flags = 0x0UL; \
- iss_flags |= event->umask; \
- msr_write(cpu_id, MSR_M##NUM##_PMON_ISS, iss_flags); \
- } \
- break; \
- case 0x06: /* FVC_EV0: FVC */ \
- flags |= (event->eventId<<9); \
- { \
- uint32_t fvc_flags = 0x0UL; \
- fvc_flags |= (event->umask<<12); \
- if (event->umask == 0x5) \
+ flags |= (event->eventId<<9); \
{ \
- fvc_flags |= (event->cmask<<6); \
+ uint32_t iss_flags = 0x0UL; \
+ iss_flags |= event->umask; \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_ISS, iss_flags); \
} \
- else \
+ break; \
+ case 0x06: /* FVC_EV0: FVC */ \
+ flags |= (event->eventId<<9); \
{ \
- fvc_flags |= (event->cmask<<9); \
+ uint32_t fvc_flags = 0x0UL; \
+ fvc_flags |= (event->umask<<12); \
+ if (event->umask == 0x5) \
+ { \
+ fvc_flags |= (event->cmask<<6); \
+ } \
+ else \
+ { \
+ fvc_flags |= (event->cmask<<9); \
+ } \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_ZDP, fvc_flags); \
+ VERBOSEPRINTREG(cpu_id, MSR_M##NUM##_PMON_ZDP, fvc_flags, FVC_EV0) \
} \
- msr_write(cpu_id, MSR_M##NUM##_PMON_ZDP, fvc_flags); \
- VERBOSEPRINTREG(cpu_id, MSR_M##NUM##_PMON_ZDP, fvc_flags, FVC_EV0) \
- } \
- break; \
+ break; \
case 0x07: /* FVC_EV1: FVC */ \
- flags |= (event->eventId<<9); \
- { \
- uint32_t fvc_flags = 0x0UL; \
- fvc_flags |= (event->umask<<15); \
- if (event->umask == 0x5) \
+ flags |= (event->eventId<<9); \
{ \
- fvc_flags |= (event->cmask<<6); \
+ uint32_t fvc_flags = 0x0UL; \
+ fvc_flags |= (event->umask<<15); \
+ if (event->umask == 0x5) \
+ { \
+ fvc_flags |= (event->cmask<<6); \
+ } \
+ else \
+ { \
+ fvc_flags |= (event->cmask<<9); \
+ } \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_ZDP, fvc_flags); \
+ VERBOSEPRINTREG(cpu_id, MSR_M##NUM##_PMON_ZDP, fvc_flags, FVC_EV1) \
} \
- else \
- { \
- fvc_flags |= (event->cmask<<9); \
- } \
- msr_write(cpu_id, MSR_M##NUM##_PMON_ZDP, fvc_flags); \
- VERBOSEPRINTREG(cpu_id, MSR_M##NUM##_PMON_ZDP, fvc_flags, FVC_EV1) \
- } \
- break; \
+ break; \
case 0x08: /* FVC_EV2: FVC */ \
- flags |= (event->eventId<<9); \
- { \
- uint32_t fvc_flags = 0x0UL; \
- fvc_flags |= (event->umask<<18); \
- if (event->umask == 0x5) \
- { \
- fvc_flags |= (event->cmask<<6); \
- } \
- else \
+ flags |= (event->eventId<<9); \
{ \
- fvc_flags |= (event->cmask<<9); \
+ uint32_t fvc_flags = 0x0UL; \
+ fvc_flags |= (event->umask<<18); \
+ if (event->umask == 0x5) \
+ { \
+ fvc_flags |= (event->cmask<<6); \
+ } \
+ else \
+ { \
+ fvc_flags |= (event->cmask<<9); \
+ } \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_ZDP, fvc_flags); \
+ VERBOSEPRINTREG(cpu_id, MSR_M##NUM##_PMON_ZDP, fvc_flags, FVC_EV2) \
} \
- msr_write(cpu_id, MSR_M##NUM##_PMON_ZDP, fvc_flags); \
- VERBOSEPRINTREG(cpu_id, MSR_M##NUM##_PMON_ZDP, fvc_flags, FVC_EV2) \
- } \
- break; \
+ break; \
case 0x09: /* FVC_EV3: FVC(ZDP) */ \
- flags |= (event->eventId<<9); \
- { \
- uint32_t fvc_flags = 0x0UL; \
- fvc_flags |= (event->umask<<21); \
- if (event->umask == 0x5) \
+ flags |= (event->eventId<<9); \
{ \
- fvc_flags |= (event->cmask<<6); \
+ uint32_t fvc_flags = 0x0UL; \
+ fvc_flags |= (event->umask<<21); \
+ if (event->umask == 0x5) \
+ { \
+ fvc_flags |= (event->cmask<<6); \
+ } \
+ else \
+ { \
+ fvc_flags |= (event->cmask<<9); \
+ } \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_ZDP, fvc_flags); \
} \
- else \
+ break; \
+ case 0x0A: /* ISS_SCHED: ISS */ \
+ flags |= (event->eventId<<9); \
{ \
- fvc_flags |= (event->cmask<<9); \
+ uint32_t iss_flags = 0x0UL; \
+ iss_flags |= (event->umask<<10); \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_ISS, iss_flags); \
} \
- msr_write(cpu_id, MSR_M##NUM##_PMON_ZDP, fvc_flags); \
- } \
- break; \
- case 0x0A: /* ISS_SCHED: ISS */ \
- flags |= (event->eventId<<9); \
- { \
- uint32_t iss_flags = 0x0UL; \
- iss_flags |= (event->umask<<10); \
- msr_write(cpu_id, MSR_M##NUM##_PMON_ISS, iss_flags); \
- } \
- break; \
+ break; \
case 0x0B: /* PGT_PAGE_EV: PGT */ \
- flags |= (event->eventId<<9); \
- { \
- uint32_t pgt_flags = 0x0UL; \
- pgt_flags |= event->umask; \
- msr_write(cpu_id, MSR_M##NUM##_PMON_PGT, pgt_flags); \
- } \
- break; \
+ flags |= (event->eventId<<9); \
+ { \
+ uint32_t pgt_flags = 0x0UL; \
+ pgt_flags |= event->umask; \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_PGT, pgt_flags); \
+ } \
+ break; \
case 0x0C: /* PGT_PAGE_EV2: PGT */ \
- flags |= (event->eventId<<9); \
- { \
- uint32_t pgt_flags = 0x0UL; \
- pgt_flags |= (event->umask<<11); \
- msr_write(cpu_id, MSR_M##NUM##_PMON_PGT, pgt_flags); \
- } \
- break; \
+ flags |= (event->eventId<<9); \
+ { \
+ uint32_t pgt_flags = 0x0UL; \
+ pgt_flags |= (event->umask<<11); \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_PGT, pgt_flags); \
+ } \
+ break; \
case 0x0D: /* THERM_TRP_DN: THR */ \
- flags |= (event->eventId<<9); \
- { \
- uint32_t thr_flags = 0x0UL; \
- thr_flags |= (1<<3); \
- thr_flags |= (event->umask<<9); \
- msr_write(cpu_id, MSR_M##NUM##_PMON_PGT, thr_flags); \
- } \
- break; \
+ flags |= (event->eventId<<9); \
+ { \
+ uint32_t thr_flags = 0x0UL; \
+ thr_flags |= (1<<3); \
+ thr_flags |= (event->umask<<9); \
+ msr_write(cpu_id, MSR_M##NUM##_PMON_PGT, thr_flags); \
+ } \
+ break; \
}
/* RBOX macros */
@@ -497,9 +625,10 @@ void perfmon_setupCounterThread_westmereEX(
PerfmonCounterIndex index)
{
int haveLock = 0;
- uint64_t flags = 0x0ULL;;
+ uint64_t flags = 0x0ULL;
uint64_t reg = westmereEX_counter_map[index].configRegister;
int cpu_id = perfmon_threadData[thread_id].processorId;
+ uint64_t fixed_flags = msr_read(cpu_id, MSR_PERF_FIXED_CTR_CTRL);
perfmon_threadData[thread_id].counters[index].init = TRUE;
if ((socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id))
@@ -510,8 +639,7 @@ void perfmon_setupCounterThread_westmereEX(
switch (westmereEX_counter_map[index].type)
{
case PMC:
- flags = msr_read(cpu_id,reg);
- flags &= ~(0xFFFFU); /* clear lower 16bits */
+ flags = (1<<22)|(1<<16);
/* Intel with standard 8 bit event mask: [7:0] */
flags |= (event->umask<<8) + event->eventId;
@@ -527,6 +655,8 @@ void perfmon_setupCounterThread_westmereEX(
break;
case FIXED:
+ fixed_flags |= (0x2 <<(index*4));
+ msr_write(cpu_id, MSR_PERF_FIXED_CTR_CTRL, fixed_flags);
break;
case MBOX0:
@@ -594,6 +724,37 @@ void perfmon_setupCounterThread_westmereEX(
}
break;
+ case UBOX:
+ if (haveLock)
+ {
+ flags = 0x0ULL;
+ flags |= (1<<22);
+ flags |= (event->eventId);
+ msr_write(cpu_id, reg , flags);
+ }
+
+ case CBOX0:
+ case CBOX1:
+ case CBOX2:
+ case CBOX3:
+ case CBOX4:
+ case CBOX5:
+ case CBOX6:
+ case CBOX7:
+ case CBOX8:
+ case CBOX9:
+ case SBOX0:
+ case SBOX1:
+ if (haveLock)
+ {
+ flags = 0x0ULL;
+ flags |= (1<<22);
+ flags |= (event->umask<<8);
+ flags |= (event->eventId);
+ msr_write(cpu_id, reg , flags);
+ }
+ break;
+
default:
/* should never be reached */
break;
@@ -621,6 +782,7 @@ void perfmon_startCountersThread_westmereEX(int thread_id)
int haveLock = 0;
uint64_t flags = 0x0ULL;
uint32_t uflags[NUM_UNITS];
+ int enable_ubox = 0;
int cpu_id = perfmon_threadData[thread_id].processorId;
msr_write(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL);
@@ -659,6 +821,10 @@ void perfmon_startCountersThread_westmereEX(int thread_id)
msr_write(cpu_id, westmereEX_counter_map[i].counterRegister , 0x0ULL);
uflags[westmereEX_counter_map[i].type] |=
(1<<(perfmon_threadData[thread_id].counters[i].id)); /* enable uncore counter */
+ if (westmereEX_counter_map[i].type == UBOX)
+ {
+ enable_ubox = 1;
+ }
}
}
}
@@ -681,6 +847,10 @@ void perfmon_startCountersThread_westmereEX(int thread_id)
/* set global enable flag in U BOX ctrl register */
uint32_t ubflags = 0x0UL;
ubflags |= (1<<28); /* enable all */
+ if (enable_ubox)
+ {
+ ubflags |= (1<<0);
+ }
VERBOSEPRINTREG(cpu_id, MSR_U_PMON_GLOBAL_CTRL, LLU_CAST ubflags, UBOX_GLOBAL_CTRL);
msr_write(cpu_id, MSR_U_PMON_GLOBAL_CTRL, ubflags );
}
diff --git a/src/includes/perfmon_westmereEX_counters.h b/src/includes/perfmon_westmereEX_counters.h
index 5d04b75..fd65746 100644
--- a/src/includes/perfmon_westmereEX_counters.h
+++ b/src/includes/perfmon_westmereEX_counters.h
@@ -5,8 +5,8 @@
*
* Description: Counter Header File of perfmon module for Westmere EX.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -29,8 +29,8 @@
*/
#define NUM_COUNTERS_CORE_WESTMEREEX 7
-#define NUM_COUNTERS_UNCORE_WESTMEREEX 48
-#define NUM_COUNTERS_WESTMEREEX 48
+#define NUM_COUNTERS_UNCORE_WESTMEREEX 107
+#define NUM_COUNTERS_WESTMEREEX 107
static PerfmonCounterMap westmereEX_counter_map[NUM_COUNTERS_WESTMEREEX] = {
/* Fixed Counters: instructions retired, cycles unhalted core */
@@ -86,6 +86,68 @@ static PerfmonCounterMap westmereEX_counter_map[NUM_COUNTERS_WESTMEREEX] = {
{"WBOX1",PMC44, WBOX, MSR_W_PMON_EVNT_SEL1, MSR_W_PMON_CTR1, 0, 0},
{"WBOX2",PMC45, WBOX, MSR_W_PMON_EVNT_SEL2, MSR_W_PMON_CTR2, 0, 0},
{"WBOX3",PMC46, WBOX, MSR_W_PMON_EVNT_SEL3, MSR_W_PMON_CTR3, 0, 0},
- {"WBOX4",PMC47, WBOX, MSR_W_PMON_FIXED_CTR_CTL, MSR_W_PMON_FIXED_CTR, 0, 0}
+ {"WBOX4",PMC47, WBOX, MSR_W_PMON_FIXED_CTR_CTL, MSR_W_PMON_FIXED_CTR, 0, 0},
+ /* UBOX */
+ {"UBOX0",PMC48, UBOX, MSR_U_PMON_GLOBAL_EVNT_SEL, MSR_U_PMON_GLOBAL_CTR, 0, 0},
+ /* CBOXes */
+ {"CBOX0C0",PMC49, CBOX0, MSR_C0_PMON_EVNT_SEL0, MSR_C0_PMON_CTR0, 0, 0},
+ {"CBOX0C1",PMC50, CBOX0, MSR_C0_PMON_EVNT_SEL1, MSR_C0_PMON_CTR1, 0, 0},
+ {"CBOX0C2",PMC51, CBOX0, MSR_C0_PMON_EVNT_SEL2, MSR_C0_PMON_CTR2, 0, 0},
+ {"CBOX0C3",PMC52, CBOX0, MSR_C0_PMON_EVNT_SEL3, MSR_C0_PMON_CTR3, 0, 0},
+ {"CBOX0C4",PMC53, CBOX0, MSR_C0_PMON_EVNT_SEL4, MSR_C0_PMON_CTR4, 0, 0},
+ {"CBOX1C0",PMC54, CBOX1, MSR_C1_PMON_EVNT_SEL0, MSR_C1_PMON_CTR0, 0, 0},
+ {"CBOX1C1",PMC55, CBOX1, MSR_C1_PMON_EVNT_SEL1, MSR_C1_PMON_CTR1, 0, 0},
+ {"CBOX1C2",PMC56, CBOX1, MSR_C1_PMON_EVNT_SEL2, MSR_C1_PMON_CTR2, 0, 0},
+ {"CBOX1C3",PMC57, CBOX1, MSR_C1_PMON_EVNT_SEL3, MSR_C1_PMON_CTR3, 0, 0},
+ {"CBOX1C4",PMC58, CBOX1, MSR_C1_PMON_EVNT_SEL4, MSR_C1_PMON_CTR4, 0, 0},
+ {"CBOX2C0",PMC59, CBOX2, MSR_C2_PMON_EVNT_SEL0, MSR_C2_PMON_CTR0, 0, 0},
+ {"CBOX2C1",PMC60, CBOX2, MSR_C2_PMON_EVNT_SEL1, MSR_C2_PMON_CTR1, 0, 0},
+ {"CBOX2C2",PMC61, CBOX2, MSR_C2_PMON_EVNT_SEL2, MSR_C2_PMON_CTR2, 0, 0},
+ {"CBOX2C3",PMC62, CBOX2, MSR_C2_PMON_EVNT_SEL3, MSR_C2_PMON_CTR3, 0, 0},
+ {"CBOX2C4",PMC63, CBOX2, MSR_C2_PMON_EVNT_SEL4, MSR_C2_PMON_CTR4, 0, 0},
+ {"CBOX3C0",PMC64, CBOX3, MSR_C3_PMON_EVNT_SEL0, MSR_C3_PMON_CTR0, 0, 0},
+ {"CBOX3C1",PMC65, CBOX3, MSR_C3_PMON_EVNT_SEL1, MSR_C3_PMON_CTR1, 0, 0},
+ {"CBOX3C2",PMC66, CBOX3, MSR_C3_PMON_EVNT_SEL2, MSR_C3_PMON_CTR2, 0, 0},
+ {"CBOX3C3",PMC67, CBOX3, MSR_C3_PMON_EVNT_SEL3, MSR_C3_PMON_CTR3, 0, 0},
+ {"CBOX3C4",PMC68, CBOX3, MSR_C3_PMON_EVNT_SEL4, MSR_C3_PMON_CTR4, 0, 0},
+ {"CBOX4C0",PMC69, CBOX4, MSR_C4_PMON_EVNT_SEL0, MSR_C4_PMON_CTR0, 0, 0},
+ {"CBOX4C1",PMC70, CBOX4, MSR_C4_PMON_EVNT_SEL1, MSR_C4_PMON_CTR1, 0, 0},
+ {"CBOX4C2",PMC71, CBOX4, MSR_C4_PMON_EVNT_SEL2, MSR_C4_PMON_CTR2, 0, 0},
+ {"CBOX4C3",PMC72, CBOX4, MSR_C4_PMON_EVNT_SEL3, MSR_C4_PMON_CTR3, 0, 0},
+ {"CBOX4C4",PMC73, CBOX4, MSR_C4_PMON_EVNT_SEL4, MSR_C4_PMON_CTR4, 0, 0},
+ {"CBOX5C0",PMC74, CBOX5, MSR_C5_PMON_EVNT_SEL0, MSR_C5_PMON_CTR0, 0, 0},
+ {"CBOX5C1",PMC75, CBOX5, MSR_C5_PMON_EVNT_SEL1, MSR_C5_PMON_CTR1, 0, 0},
+ {"CBOX5C2",PMC76, CBOX5, MSR_C5_PMON_EVNT_SEL2, MSR_C5_PMON_CTR2, 0, 0},
+ {"CBOX5C3",PMC77, CBOX5, MSR_C5_PMON_EVNT_SEL3, MSR_C5_PMON_CTR3, 0, 0},
+ {"CBOX5C4",PMC78, CBOX5, MSR_C5_PMON_EVNT_SEL4, MSR_C5_PMON_CTR4, 0, 0},
+ {"CBOX6C0",PMC79, CBOX6, MSR_C6_PMON_EVNT_SEL0, MSR_C6_PMON_CTR0, 0, 0},
+ {"CBOX6C1",PMC80, CBOX6, MSR_C6_PMON_EVNT_SEL1, MSR_C6_PMON_CTR1, 0, 0},
+ {"CBOX6C2",PMC81, CBOX6, MSR_C6_PMON_EVNT_SEL2, MSR_C6_PMON_CTR2, 0, 0},
+ {"CBOX6C3",PMC82, CBOX6, MSR_C6_PMON_EVNT_SEL3, MSR_C6_PMON_CTR3, 0, 0},
+ {"CBOX6C4",PMC83, CBOX6, MSR_C6_PMON_EVNT_SEL4, MSR_C6_PMON_CTR4, 0, 0},
+ {"CBOX7C0",PMC84, CBOX7, MSR_C7_PMON_EVNT_SEL0, MSR_C7_PMON_CTR0, 0, 0},
+ {"CBOX7C1",PMC85, CBOX7, MSR_C7_PMON_EVNT_SEL1, MSR_C7_PMON_CTR1, 0, 0},
+ {"CBOX7C2",PMC86, CBOX7, MSR_C7_PMON_EVNT_SEL2, MSR_C7_PMON_CTR2, 0, 0},
+ {"CBOX7C3",PMC87, CBOX7, MSR_C7_PMON_EVNT_SEL3, MSR_C7_PMON_CTR3, 0, 0},
+ {"CBOX7C4",PMC88, CBOX7, MSR_C7_PMON_EVNT_SEL4, MSR_C7_PMON_CTR4, 0, 0},
+ {"CBOX8C0",PMC89, CBOX8, MSR_C8_PMON_EVNT_SEL0, MSR_C8_PMON_CTR0, 0, 0},
+ {"CBOX8C1",PMC90, CBOX8, MSR_C8_PMON_EVNT_SEL1, MSR_C8_PMON_CTR1, 0, 0},
+ {"CBOX8C2",PMC91, CBOX8, MSR_C8_PMON_EVNT_SEL2, MSR_C8_PMON_CTR2, 0, 0},
+ {"CBOX8C3",PMC92, CBOX8, MSR_C8_PMON_EVNT_SEL3, MSR_C8_PMON_CTR3, 0, 0},
+ {"CBOX8C4",PMC93, CBOX8, MSR_C8_PMON_EVNT_SEL4, MSR_C8_PMON_CTR4, 0, 0},
+ {"CBOX9C0",PMC94, CBOX9, MSR_C9_PMON_EVNT_SEL0, MSR_C9_PMON_CTR0, 0, 0},
+ {"CBOX9C1",PMC95, CBOX9, MSR_C9_PMON_EVNT_SEL1, MSR_C9_PMON_CTR1, 0, 0},
+ {"CBOX9C2",PMC96, CBOX9, MSR_C9_PMON_EVNT_SEL2, MSR_C9_PMON_CTR2, 0, 0},
+ {"CBOX9C3",PMC97, CBOX9, MSR_C9_PMON_EVNT_SEL3, MSR_C9_PMON_CTR3, 0, 0},
+ {"CBOX9C4",PMC98, CBOX9, MSR_C9_PMON_EVNT_SEL4, MSR_C9_PMON_CTR4, 0, 0},
+ /* SBOXes */
+ {"SBOX0C0",PMC99 , SBOX0, MSR_S0_PMON_EVNT_SEL0, MSR_S0_PMON_CTR0, 0, 0},
+ {"SBOX0C1",PMC100, SBOX0, MSR_S0_PMON_EVNT_SEL1, MSR_S0_PMON_CTR1, 0, 0},
+ {"SBOX0C2",PMC101, SBOX0, MSR_S0_PMON_EVNT_SEL2, MSR_S0_PMON_CTR2, 0, 0},
+ {"SBOX0C3",PMC102, SBOX0, MSR_S0_PMON_EVNT_SEL3, MSR_S0_PMON_CTR3, 0, 0},
+ {"SBOX1C0",PMC103, SBOX1, MSR_S1_PMON_EVNT_SEL0, MSR_S1_PMON_CTR0, 0, 0},
+ {"SBOX1C1",PMC104, SBOX1, MSR_S1_PMON_EVNT_SEL1, MSR_S1_PMON_CTR1, 0, 0},
+ {"SBOX1C2",PMC105, SBOX1, MSR_S1_PMON_EVNT_SEL2, MSR_S1_PMON_CTR2, 0, 0},
+ {"SBOX1C3",PMC106, SBOX1, MSR_S1_PMON_EVNT_SEL3, MSR_S1_PMON_CTR3, 0, 0}
};
diff --git a/src/includes/perfmon_westmereEX_events.txt b/src/includes/perfmon_westmereEX_events.txt
index aa17ce2..2aabf8d 100644
--- a/src/includes/perfmon_westmereEX_events.txt
+++ b/src/includes/perfmon_westmereEX_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for Intel WestmereEX
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
@@ -2751,6 +2751,639 @@ UMASK_CORRECTED_ERR 0x00
EVENT_UNCORRECTED_ERR 0x1E5 UBOX
UMASK_UNCORRECTED_ERR 0x00
+EVENT_LLC_MISSES 0x14 CBOX
+UMASK_LLC_MISSES_SHARED 0x01
+UMASK_LLC_MISSES_FORWARD 0x02
+UMASK_LLC_MISSES_INVALID 0x04
+UMASK_LLC_MISSES_ALL 0x07
+
+EVENT_LLC_HITS 0x15 CBOX
+UMASK_LLC_HITS_MODIFIED 0x01
+UMASK_LLC_HITS_EXCLUSIVE 0x02
+UMASK_LLC_HITS_SHARED 0x04
+UMASK_LLC_HITS_FORWARD 0x08
+UMASK_LLC_HITS_ALL 0x0F
+
+EVENT_LLC_S_FILLS 0x16 CBOX
+UMASK_LLC_S_FILLS_MODIFIED 0x01
+UMASK_LLC_S_FILLS_EXCLUSIVE 0x02
+UMASK_LLC_S_FILLS_SHARED 0x04
+UMASK_LLC_S_FILLS_FORWARD 0x08
+UMASK_LLC_S_FILLS_ALL 0x0F
+
+EVENT_LLC_VICTIMS 0x17 CBOX
+UMASK_LLC_VICTIMS_MODIFIED 0x01
+UMASK_LLC_VICTIMS_EXCLUSIVE 0x02
+UMASK_LLC_VICTIMS_SHARED 0x04
+UMASK_LLC_VICTIMS_FORWARD 0x08
+UMASK_LLC_VICTIMS_ALL 0x0F
+UMASK_LLC_VICTIMS_FILL_WITHOUT_VICTIMS 0x10
+
+EVENT_ARB_LOSSES 0x0A CBOX
+UMASK_ARB_LOSSES_AD_SB 0x01
+UMASK_ARB_LOSSES_AD_NSB 0x02
+UMASK_ARB_LOSSES_AD_ALL 0x03
+UMASK_ARB_LOSSES_AK_SB 0x04
+UMASK_ARB_LOSSES_AK_NSB 0x08
+UMASK_ARB_LOSSES_AK_ALL 0x0C
+UMASK_ARB_LOSSES_BL_SB 0x10
+UMASK_ARB_LOSSES_BL_NSB 0x20
+UMASK_ARB_LOSSES_BL_ALL 0x30
+UMASK_ARB_LOSSES_IV 0x40
+UMASK_ARB_LOSSES_ALL 0x7F
+
+EVENT_ARB_WINS 0x0A CBOX
+UMASK_ARB_WINS_AD_SB 0x01
+UMASK_ARB_WINS_AD_NSB 0x02
+UMASK_ARB_WINS_AD_ALL 0x03
+UMASK_ARB_WINS_AK_SB 0x04
+UMASK_ARB_WINS_AK_NSB 0x08
+UMASK_ARB_WINS_AK_ALL 0x0C
+UMASK_ARB_WINS_BL_SB 0x10
+UMASK_ARB_WINS_BL_NSB 0x20
+UMASK_ARB_WINS_BL_ALL 0x30
+UMASK_ARB_WINS_IV 0x40
+UMASK_ARB_WINS_ALL 0x7F
+
+EVENT_ARB_WINS_P2C_NSB 0x34 CBOX
+UMASK_ARB_WINS_P2C_NSB 0x00
+
+EVENT_ARB_WINS_P2C_SB 0x33 CBOX
+UMASK_ARB_WINS_P2C_SB 0x00
+
+EVENT_BOUNCE_ASSERT 0x38 CBOX
+UMASK_BOUNCE_ASSERT 0x00
+
+EVENT_BOUNCE_DEASSERT 0x39 CBOX
+UMASK_BOUNCE_DEASSERT 0x00
+
+EVENT_BOUNCES_C2P_AK 0x02 CBOX
+UMASK_BOUNCES_C2P_AK_SB 0x01
+UMASK_BOUNCES_C2P_AK_NSB 0x02
+UMASK_BOUNCES_C2P_AK_ALL 0x03
+
+EVENT_BOUNCES_C2P_BL 0x03 CBOX
+UMASK_BOUNCES_C2P_BL_SB 0x01
+UMASK_BOUNCES_C2P_BL_NSB 0x02
+UMASK_BOUNCES_C2P_BL_ALL 0x03
+
+EVENT_BOUNCES_C2P_AD 0x01 CBOX
+UMASK_BOUNCES_C2P_AD_SB 0x01
+UMASK_BOUNCES_C2P_AD_NSB 0x02
+UMASK_BOUNCES_C2P_AD_ALL 0x03
+
+EVENT_BOUNCES_C2P_IV 0x04 CBOX
+UMASK_BOUNCES_C2P_IV 0x00
+
+EVENT_EGRESS_BYPASS_WINS 0x0C CBOX
+UMASK_EGRESS_BYPASS_WINS_AD_BYP0 0x01
+UMASK_EGRESS_BYPASS_WINS_AD_BYP1 0x02
+UMASK_EGRESS_BYPASS_WINS_AK_BYP0 0x04
+UMASK_EGRESS_BYPASS_WINS_AK_BYP1 0x08
+UMASK_EGRESS_BYPASS_WINS_BL_BYP0 0x10
+UMASK_EGRESS_BYPASS_WINS_BL_BYP1 0x20
+UMASK_EGRESS_BYPASS_WINS_IV_BYP0 0x40
+UMASK_EGRESS_BYPASS_WINS_IV_BYP1 0x80
+
+EVENT_INGRESS_BYPASS_WINS_AD 0x0E CBOX
+UMASK_INGRESS_BYPASS_WINS_AD_IRQ_BYP0 0x01
+UMASK_INGRESS_BYPASS_WINS_AD_IRQ_BYP1 0x02
+UMASK_INGRESS_BYPASS_WINS_AD_IPQ_BYP0 0x04
+UMASK_INGRESS_BYPASS_WINS_AD_IPQ_BYP1 0x08
+
+EVENT_IDF_NONZERO_NO_BL_CRD 0x36 CBOX
+UMASK_IDF_NONZERO_NO_BL_CRD 0x00
+
+EVENT_IDF_NONZERO_NO_VLD 0x37 CBOX
+UMASK_IDF_NONZERO_NO_VLD 0x00
+
+EVENT_IGR_BID_BLOCKED 0x3C CBOX
+UMASK_IGR_BID_BLOCKED 0x00
+
+EVENT_IGR_OP_SRAM 0x31 CBOX
+UMASK_IGR_OP_SRAM 0x00
+
+EVENT_IGR_OP_UC 0x32 CBOX
+UMASK_IGR_OP_UC 0x00
+
+EVENT_MAF_ACK 0x10 CBOX
+UMASK_MAF_ACK 0x00
+
+EVENT_MAF_NACK1 0x11 CBOX
+UMASK_MAF_NACK1_GO_PENDING 0x01
+UMASK_MAF_NACK1_VIC_PENDING 0x02
+UMASK_MAF_NACK1_SNP_PENDING 0x04
+UMASK_MAF_NACK1_AC_PENDING 0x08
+UMASK_MAF_NACK1_IDX_BLOCK 0x10
+UMASK_MAF_NACK1_PA_BLOCK 0x20
+UMASK_MAF_NACK1_IDLE_QPI 0x40
+UMASK_MAF_NACK1_ALL_MAF_NACK1 0x80
+UMASK_MAF_NACK1_TOTAL_MAF_NACKS 0xFF
+
+EVENT_MAF_NACK2 0x12 CBOX
+UMASK_MAF_NACK2_MAF_FULL 0x01
+UMASK_MAF_NACK2_EGRESS_FULL 0x02
+UMASK_MAF_NACK2_VIQ_FULL 0x04
+UMASK_MAF_NACK2_NO_TRACKER_CREDITS 0x08
+UMASK_MAF_NACK2_NO_S_FIFO_CREDITS 0x10
+UMASK_MAF_NACK2_NO_S_REQTBL_ENTRIES 0x20
+UMASK_MAF_NACK2_WB_PENDING 0x40
+UMASK_MAF_NACK2_NACK2_ELSE 0x80
+
+EVENT_OCCUPANCY_IPQ 0x1A CBOX
+UMASK_OCCUPANCY_IPQ 0x00
+
+EVENT_OCCUPANCY_IRQ 0x18 CBOX
+UMASK_OCCUPANCY_IRQ 0x00
+
+EVENT_OCCUPANCY_MAF 0x1E CBOX
+UMASK_OCCUPANCY_MAF 0x00
+
+EVENT_OCCUPANCY_RSPF 0x22 CBOX
+UMASK_OCCUPANCY_RSPF 0x00
+
+EVENT_OCCUPANCY_RWRF 0x20 CBOX
+UMASK_OCCUPANCY_RWRF 0x00
+
+EVENT_OCCUPANCY_VIQ 0x1C CBOX
+UMASK_OCCUPANCY_VIQ 0x00
+
+EVENT_SINKS_C2P 0x06 CBOX
+UMASK_SINKS_C2P_IV 0x01
+UMASK_SINKS_C2P_AK 0x02
+UMASK_SINKS_C2P_BL 0x04
+
+EVENT_SINKS_P2C 0x05 CBOX
+UMASK_SINKS_P2C_IV 0x01
+UMASK_SINKS_P2C_AK 0x02
+UMASK_SINKS_P2C_BL 0x04
+
+EVENT_SINKS_S2C 0x07 CBOX
+UMASK_SINKS_S2C_AD 0x01
+UMASK_SINKS_S2C_AK 0x02
+UMASK_SINKS_S2C_BL 0x04
+
+EVENT_SINKS_S2P_BL 0x08 CBOX
+UMASK_SINKS_S2P_BL 0x00
+
+EVENT_SNP_HITS 0x28 CBOX
+UMASK_SNP_HITS_REMOTE_RD_HITM 0x01
+UMASK_SNP_HITS_REMOTE_RD_HITE 0x02
+UMASK_SNP_HITS_REMOTE_RD_HITS 0x04
+UMASK_SNP_HITS_REMOTE_RD_HITF 0x08
+UMASK_SNP_HITS_REMOTE_RFO_HITM 0x10
+UMASK_SNP_HITS_REMOTE_RFO_HITE 0x20
+UMASK_SNP_HITS_REMOTE_RFO_HITS 0x40
+UMASK_SNP_HITS_REMOTE_RFO_HITF 0x80
+UMASK_SNP_HITS_REMOTE_HITM 0x11
+UMASK_SNP_HITS_REMOTE_HITE 0x22
+UMASK_SNP_HITS_REMOTE_HITS 0x44
+UMASK_SNP_HITS_REMOTE_HITF 0x88
+UMASK_SNP_HITS_REMOTE_ANY 0xFF
+
+EVENT_SNPS 0x27 CBOX
+UMASK_SNPS_REMOTE_RD 0x01
+UMASK_SNPS_REMOTE_RFO 0x02
+UMASK_SNPS_REMOTE_ANY 0x03
+
+EVENT_SPL_ARB_PRI_SW 0x2A CBOX
+UMASK_SPL_ARB_PRI_SW 0x00
+
+EVENT_SPL_CO_SB 0x2C CBOX
+UMASK_SPL_CO_SB 0x00
+
+EVENT_SPL_CO_NSB 0x2D CBOX
+UMASK_SPL_CO_NSB 0x00
+
+EVENT_SPL_DEAD 0x29 CBOX
+UMASK_SPL_DEAD 0x00
+
+EVENT_SPL_EGR_SB 0x2F CBOX
+UMASK_SPL_EGR_SB 0x00
+
+EVENT_SPL_EGR_NSB 0x30 CBOX
+UMASK_SPL_EGR_NSB 0x00
+
+EVENT_SPL_IN_FULL_IRQ 0x2E CBOX
+UMASK_SPL_IN_FULL_IRQ 0x00
+
+EVENT_SPL_NOT_CO 0x2B CBOX
+UMASK_SPL_NOT_CO 0x00
+
+EVENT_SPOOF_ASSERT 0x3A CBOX
+UMASK_SPOOF_ASSERT 0x00
+
+EVENT_SPOOF_DEASSERT 0x3B CBOX
+UMASK_SPOOF_DEASSERT 0x00
+
+EVENT_SPOOF_CRD_EMPTY 0x35 CBOX
+UMASK_SPOOF_CRD_EMPTY 0x00
+
+EVENT_STARVED_EGRESS 0x0B CBOX
+UMASK_STARVED_EGRESS_P2C_AD_SB 0x01
+UMASK_STARVED_EGRESS_C2P_AD_SB 0x02
+UMASK_STARVED_EGRESS_AD_SB 0x03
+UMASK_STARVED_EGRESS_AD_NSB 0x04
+UMASK_STARVED_EGRESS_AD 0x07
+UMASK_STARVED_EGRESS_AK_SB 0x08
+UMASK_STARVED_EGRESS_AK_NSB 0x10
+UMASK_STARVED_EGRESS_AK 0x18
+UMASK_STARVED_EGRESS_BL_SB 0x20
+UMASK_STARVED_EGRESS_BL_NSB 0x40
+UMASK_STARVED_EGRESS_BL 0x60
+UMASK_STARVED_EGRESS_IV 0x80
+
+EVENT_TRANS_IPQ 0x1B CBOX
+UMASK_TRANS_IPQ 0x00
+
+EVENT_TRANS_IRQ 0x19 CBOX
+UMASK_TRANS_IRQ 0x00
+
+EVENT_TRANS_MAF 0x1F CBOX
+UMASK_TRANS_MAF 0x00
+
+EVENT_TRANS_RSPF 0x23 CBOX
+UMASK_TRANS_RSPF 0x00
+
+EVENT_TRANS_RWRF 0x21 CBOX
+UMASK_TRANS_RWRF 0x00
+
+EVENT_TRANS_VIQ 0x1D CBOX
+UMASK_TRANS_VIQ 0x00
+
+EVENT_TO_R_PROG_EV 0x00 SBOX
+UMASK_TO_R_PROG_EV 0x00
+
+EVENT_TO_R_B_HOM_MSGQ_CYCLES_FULL 0x03 SBOX
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_FULL_RBOX 0x01
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_FULL_BBOX 0x02
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_FULL_ALL 0x03
+
+EVENT_TO_R_B_HOM_MSGQ_CYCLES_NE 0x06 SBOX
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_NE_RBOX 0x01
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_NE_BBOX 0x02
+UMASK_TO_R_B_HOM_MSGQ_CYCLES_NE_ALL 0x03
+
+EVENT_TO_R_B_HOM_MSGQ_OCCUPANCY 0x07 SBOX
+UMASK_TO_R_B_HOM_MSGQ_OCCUPANCY_RBOX 0x01
+UMASK_TO_R_B_HOM_MSGQ_OCCUPANCY_BBOX 0x02
+UMASK_TO_R_B_HOM_MSGQ_OCCUPANCY_ALL 0x03
+
+EVENT_TO_R_SNP_MSGQ_CYCLES_FULL 0x08 SBOX
+UMASK_TO_R_SNP_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_R_SNP_MSGQ_CYCLES_NE 0x09 SBOX
+UMASK_TO_R_SNP_MSGQ_CYCLES_NE 0x00
+
+EVENT_TO_R_SNP_MSGQ_OCCUPANCY 0x0A SBOX
+UMASK_TO_R_SNP_MSGQ_OCCUPANCY 0x00
+
+EVENT_TO_R_NDR_MSGQ_CYCLES_FULL 0x0B SBOX
+UMASK_TO_R_NDR_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_R_NDR_MSGQ_CYCLES_NE 0x0C SBOX
+UMASK_TO_R_NDR_MSGQ_CYCLES_NE 0x00
+
+EVENT_TO_R_NDR_MSGQ_OCCUPANCY 0x0D SBOX
+UMASK_TO_R_NDR_MSGQ_OCCUPANCY 0x00
+
+EVENT_TO_R_DRS_MSGQ_CYCLES_FULL 0x0E SBOX
+UMASK_TO_R_DRS_MSGQ_CYCLES_FULL_CBOX0_5 0x01
+UMASK_TO_R_DRS_MSGQ_CYCLES_FULL_CBOX1_6 0x02
+UMASK_TO_R_DRS_MSGQ_CYCLES_FULL_CBOX2_7 0x04
+UMASK_TO_R_DRS_MSGQ_CYCLES_FULL_CBOX3_8 0x08
+UMASK_TO_R_DRS_MSGQ_CYCLES_FULL_CBOX4_9 0x10
+UMASK_TO_R_DRS_MSGQ_CYCLES_FULL_ALL 0x1F
+
+EVENT_TO_R_DRS_MSGQ_CYCLES_NE 0x0F SBOX
+UMASK_TO_R_DRS_MSGQ_CYCLES_NE_CBOX0_5 0x01
+UMASK_TO_R_DRS_MSGQ_CYCLES_NE_CBOX1_6 0x02
+UMASK_TO_R_DRS_MSGQ_CYCLES_NE_CBOX2_7 0x04
+UMASK_TO_R_DRS_MSGQ_CYCLES_NE_CBOX3_8 0x08
+UMASK_TO_R_DRS_MSGQ_CYCLES_NE_CBOX4_9 0x10
+UMASK_TO_R_DRS_MSGQ_CYCLES_NE_ALL 0x1F
+
+EVENT_TO_R_DRS_MSGQ_OCCUPANCY 0x10 SBOX
+UMASK_TO_R_DRS_MSGQ_OCCUPANCY_CBOX0_5 0x01
+UMASK_TO_R_DRS_MSGQ_OCCUPANCY_CBOX1_6 0x02
+UMASK_TO_R_DRS_MSGQ_OCCUPANCY_CBOX2_7 0x04
+UMASK_TO_R_DRS_MSGQ_OCCUPANCY_CBOX3_8 0x08
+UMASK_TO_R_DRS_MSGQ_OCCUPANCY_CBOX4_9 0x10
+UMASK_TO_R_DRS_MSGQ_OCCUPANCY_ALL 0x1F
+
+EVENT_TO_R_NCB_MSGQ_CYCLES_FULL 0x11 SBOX
+UMASK_TO_R_NCB_MSGQ_CYCLES_FULL_CBOX0_5 0x01
+UMASK_TO_R_NCB_MSGQ_CYCLES_FULL_CBOX1_6 0x02
+UMASK_TO_R_NCB_MSGQ_CYCLES_FULL_CBOX2_7 0x04
+UMASK_TO_R_NCB_MSGQ_CYCLES_FULL_CBOX3_8 0x08
+UMASK_TO_R_NCB_MSGQ_CYCLES_FULL_CBOX4_9 0x10
+UMASK_TO_R_NCB_MSGQ_CYCLES_FULL_ALL 0x1F
+
+EVENT_TO_R_NCB_MSGQ_CYCLES_NE 0x12 SBOX
+UMASK_TO_R_NCB_MSGQ_CYCLES_NE_CBOX0_5 0x01
+UMASK_TO_R_NCB_MSGQ_CYCLES_NE_CBOX1_6 0x02
+UMASK_TO_R_NCB_MSGQ_CYCLES_NE_CBOX2_7 0x04
+UMASK_TO_R_NCB_MSGQ_CYCLES_NE_CBOX3_8 0x08
+UMASK_TO_R_NCB_MSGQ_CYCLES_NE_CBOX4_9 0x10
+UMASK_TO_R_NCB_MSGQ_CYCLES_NE_ALL 0x1F
+
+EVENT_TO_R_NCB_MSGQ_OCCUPANCY 0x13 SBOX
+UMASK_TO_R_NCB_MSGQ_OCCUPANCY_CBOX0_5 0x01
+UMASK_TO_R_NCB_MSGQ_OCCUPANCY_CBOX1_6 0x02
+UMASK_TO_R_NCB_MSGQ_OCCUPANCY_CBOX2_7 0x04
+UMASK_TO_R_NCB_MSGQ_OCCUPANCY_CBOX3_8 0x08
+UMASK_TO_R_NCB_MSGQ_OCCUPANCY_CBOX4_9 0x10
+UMASK_TO_R_NCB_MSGQ_OCCUPANCY_ALL 0x1F
+
+EVENT_TO_R_NCS_MSGQ_CYCLES_FULL 0x14 SBOX
+UMASK_TO_R_NCS_MSGQ_CYCLES_FULL_CBOX0_5 0x01
+UMASK_TO_R_NCS_MSGQ_CYCLES_FULL_CBOX1_6 0x02
+UMASK_TO_R_NCS_MSGQ_CYCLES_FULL_CBOX2_7 0x04
+UMASK_TO_R_NCS_MSGQ_CYCLES_FULL_CBOX3_8 0x08
+UMASK_TO_R_NCS_MSGQ_CYCLES_FULL_CBOX4_9 0x10
+UMASK_TO_R_NCS_MSGQ_CYCLES_FULL_ALL 0x1F
+
+EVENT_TO_R_NCS_MSGQ_CYCLES_NE 0x15 SBOX
+UMASK_TO_R_NCS_MSGQ_CYCLES_NE_CBOX0_5 0x01
+UMASK_TO_R_NCS_MSGQ_CYCLES_NE_CBOX1_6 0x02
+UMASK_TO_R_NCS_MSGQ_CYCLES_NE_CBOX2_7 0x04
+UMASK_TO_R_NCS_MSGQ_CYCLES_NE_CBOX3_8 0x08
+UMASK_TO_R_NCS_MSGQ_CYCLES_NE_CBOX4_9 0x10
+UMASK_TO_R_NCS_MSGQ_CYCLES_NE_ALL 0x1F
+
+EVENT_TO_R_NCS_MSGQ_OCCUPANCY 0x16 SBOX
+UMASK_TO_R_NCS_MSGQ_OCCUPANCY_CBOX0_5 0x01
+UMASK_TO_R_NCS_MSGQ_OCCUPANCY_CBOX1_6 0x02
+UMASK_TO_R_NCS_MSGQ_OCCUPANCY_CBOX2_7 0x04
+UMASK_TO_R_NCS_MSGQ_OCCUPANCY_CBOX3_8 0x08
+UMASK_TO_R_NCS_MSGQ_OCCUPANCY_CBOX4_9 0x10
+UMASK_TO_R_NCS_MSGQ_OCCUPANCY_ALL 0x1F
+
+EVENT_TO_RING_SNP_MSGQ_CYCLES_FULL 0x20 SBOX
+UMASK_TO_RING_SNP_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_RING_NCB_MSGQ_CYCLES_FULL 0x21 SBOX
+UMASK_TO_RING_NCB_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_RING_NCS_MSGQ_CYCLES_FULL 0x22 SBOX
+UMASK_TO_RING_NCS_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_RING_SNP_MSGQ_CYCLES_NE 0x23 SBOX
+UMASK_TO_RING_SNP_MSGQ_CYCLES_NE 0x00
+
+EVENT_TO_RING_NCB_MSGQ_CYCLES_NE 0x24 SBOX
+UMASK_TO_RING_NCB_MSGQ_CYCLES_NE 0x00
+
+EVENT_TO_RING_NCS_MSGQ_CYCLES_NE 0x25 SBOX
+UMASK_TO_RING_NCS_MSGQ_CYCLES_NE 0x00
+
+EVENT_TO_RING_MSGQ_OCCUPANCY 0x26 SBOX
+UMASK_TO_RING_MSGQ_OCCUPANCY_SNP 0x01
+UMASK_TO_RING_MSGQ_OCCUPANCY_NCS 0x02
+UMASK_TO_RING_MSGQ_OCCUPANCY_NCB 0x04
+UMASK_TO_RING_MSGQ_OCCUPANCY_ALL 0x07
+
+EVENT_TO_RING_NDR_MSGQ_CYCLES_FULL 0x27 SBOX
+UMASK_TO_RING_NDR_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_RING_NDR_MSGQ_CYCLES_NE 0x28 SBOX
+UMASK_TO_RING_NDR_MSGQ_CYCLES_NE 0x00
+
+EVENT_TO_RING_NDR_MSGQ_OCCUPANCY 0x29 SBOX
+UMASK_TO_RING_NDR_MSGQ_OCCUPANCY 0x00
+
+EVENT_TO_RING_R2S_MSGQ_CYCLES_FULL 0x2A SBOX
+UMASK_TO_RING_R2S_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_RING_R2S_MSGQ_CYCLES_NE 0x2B SBOX
+UMASK_TO_RING_R2S_MSGQ_CYCLES_NE 0x00
+
+EVENT_TO_RING_R2S_MSGQ_OCCUPANCY 0x2C SBOX
+UMASK_TO_RING_R2S_MSGQ_OCCUPANCY 0x00
+
+EVENT_TO_RING_B2S_MSGQ_CYCLES_FULL 0x2D SBOX
+UMASK_TO_RING_B2S_MSGQ_CYCLES_FULL 0x00
+
+EVENT_TO_RING_B2S_MSGQ_CYCLES_NE 0x2E SBOX
+UMASK_TO_RING_B2S_MSGQ_CYCLES_NE 0x00
+
+EVENT_TO_RING_B2S_MSGQ_OCCUPANCY 0x2F SBOX
+UMASK_TO_RING_B2S_MSGQ_OCCUPANCY 0x00
+
+EVENT_HALFLINE_BYPASS 0x30 SBOX
+UMASK_HALFLINE_BYPASS 0x00
+
+EVENT_REQ_TBL_OCCUPANCY 0x31 SBOX
+UMASK_REQ_TBL_OCCUPANCY_LOCAL 0x01
+UMASK_REQ_TBL_OCCUPANCY_REMOTE 0x02
+UMASK_REQ_TBL_OCCUPANCY_ALL 0x03
+
+EVENT_EGRESS_BYPASS 0x40 SBOX
+UMASK_EGRESS_BYPASS_AD_CW 0x01
+UMASK_EGRESS_BYPASS_AD_CCW 0x02
+UMASK_EGRESS_BYPASS_AD 0x03
+UMASK_EGRESS_BYPASS_AK_CW 0x04
+UMASK_EGRESS_BYPASS_AK_CCW 0x08
+UMASK_EGRESS_BYPASS_AK 0x0C
+UMASK_EGRESS_BYPASS_BL_CW 0x10
+UMASK_EGRESS_BYPASS_BL_CCW 0x20
+UMASK_EGRESS_BYPASS_BL 0x30
+
+EVENT_EGRESS_ARB_WINS 0x41 SBOX
+UMASK_EGRESS_ARB_WINS_AD_CW 0x01
+UMASK_EGRESS_ARB_WINS_AD_CCW 0x02
+UMASK_EGRESS_ARB_WINS_AD 0x03
+UMASK_EGRESS_ARB_WINS_AK_CW 0x04
+UMASK_EGRESS_ARB_WINS_AK_CCW 0x08
+UMASK_EGRESS_ARB_WINS_AK 0x0C
+UMASK_EGRESS_ARB_WINS_BL_CW 0x10
+UMASK_EGRESS_ARB_WINS_BL_CCW 0x20
+UMASK_EGRESS_ARB_WINS_BL 0x30
+
+EVENT_EGRESS_ARB_LOSSES 0x42 SBOX
+UMASK_EGRESS_ARB_LOSSES_AD_CW 0x01
+UMASK_EGRESS_ARB_LOSSES_AD_CCW 0x02
+UMASK_EGRESS_ARB_LOSSES_AD 0x03
+UMASK_EGRESS_ARB_LOSSES_AK_CW 0x04
+UMASK_EGRESS_ARB_LOSSES_AK_CCW 0x08
+UMASK_EGRESS_ARB_LOSSES_AK 0x0C
+UMASK_EGRESS_ARB_LOSSES_BL_CW 0x10
+UMASK_EGRESS_ARB_LOSSES_BL_CCW 0x20
+UMASK_EGRESS_ARB_LOSSES_BL 0x30
+
+EVENT_EGRESS_STARVED 0x43 SBOX
+UMASK_EGRESS_STARVED_AD_CW 0x01
+UMASK_EGRESS_STARVED_AD_CCW 0x02
+UMASK_EGRESS_STARVED_AD 0x03
+UMASK_EGRESS_STARVED_AK_CW 0x04
+UMASK_EGRESS_STARVED_AK_CCW 0x08
+UMASK_EGRESS_STARVED_AK 0x0C
+UMASK_EGRESS_STARVED_BL_CW 0x10
+UMASK_EGRESS_STARVED_BL_CCW 0x20
+UMASK_EGRESS_STARVED_BL 0x30
+
+EVENT_RBOX_HOM_BYPASS 0x50 SBOX
+UMASK_RBOX_HOM_BYPASS 0x00
+
+EVENT_RBOX_SNP_BYPASS 0x51 SBOX
+UMASK_RBOX_SNP_BYPASS_SNP 0x01
+UMASK_RBOX_SNP_BYPASS_BIG_SNP 0x02
+UMASK_RBOX_SNP_BYPASS_ALL 0x03
+
+EVENT_S2B_HOM_BYPASS 0x52 SBOX
+UMASK_S2B_HOM_BYPASS 0x00
+
+EVENT_B2S_DRS_BYPASS 0x53 SBOX
+UMASK_B2S_DRS_BYPASS 0x00
+
+EVENT_BBOX_HOM_BYPASS 0x54 SBOX
+UMASK_BBOX_HOM_BYPASS 0x00
+
+EVENT_PKTS_SENT_HOM 0x60 SBOX
+UMASK_PKTS_SENT_HOM_RBOX 0x01
+UMASK_PKTS_SENT_HOM_BBOX 0x02
+UMASK_PKTS_SENT_HOM_ALL 0x03
+
+EVENT_PKTS_SENT_SNP 0x62 SBOX
+UMASK_PKTS_SENT_SNP 0x00
+
+EVENT_PKTS_RCVD_SNP 0x71 SBOX
+UMASK_PKTS_RCVD_SNP 0x00
+
+EVENT_PKTS_SENT_NDR 0x63 SBOX
+UMASK_PKTS_SENT_NDR 0x00
+
+EVENT_PKTS_RCVD_NDR 0x70 SBOX
+UMASK_PKTS_RCVD_NDR 0x00
+
+EVENT_PKTS_SENT_DRS 0x64 SBOX
+UMASK_PKTS_SENT_DRS_CBOX0_5 0x01
+UMASK_PKTS_SENT_DRS_CBOX1_6 0x02
+UMASK_PKTS_SENT_DRS_CBOX2_7 0x04
+UMASK_PKTS_SENT_DRS_CBOX3_8 0x08
+UMASK_PKTS_SENT_DRS_CBOX4_9 0x10
+UMASK_PKTS_SENT_DRS_ALL 0x1F
+
+EVENT_FLITS_SENT_DRS 0x65 SBOX
+UMASK_FLITS_SENT_DRS 0x00
+
+EVENT_PKTS_RCVD_DRS_FROM_R 0x72 SBOX
+UMASK_PKTS_RCVD_DRS_FROM_R 0x00
+
+EVENT_PKTS_RCVD_DRS_FROM_B 0x73 SBOX
+UMASK_PKTS_RCVD_DRS_FROM_B 0x00
+
+EVENT_PKTS_SENT_NCS 0x66 SBOX
+UMASK_PKTS_SENT_NCS_CBOX0_5 0x01
+UMASK_PKTS_SENT_NCS_CBOX1_6 0x02
+UMASK_PKTS_SENT_NCS_CBOX2_7 0x04
+UMASK_PKTS_SENT_NCS_CBOX3_8 0x08
+UMASK_PKTS_SENT_NCS_CBOX4_9 0x10
+UMASK_PKTS_SENT_NCS_ALL 0x1F
+
+EVENT_FLITS_SENT_NCS 0x67 SBOX
+UMASK_FLITS_SENT_NCS 0x00
+
+EVENT_PKTS_RCVD_NCS 0x74 SBOX
+UMASK_PKTS_RCVD_NCS 0x00
+
+EVENT_PKTS_SENT_NCB 0x68 SBOX
+UMASK_PKTS_SENT_NCB_CBOX0_5 0x01
+UMASK_PKTS_SENT_NCB_CBOX1_6 0x02
+UMASK_PKTS_SENT_NCB_CBOX2_7 0x04
+UMASK_PKTS_SENT_NCB_CBOX3_8 0x08
+UMASK_PKTS_SENT_NCB_CBOX4_9 0x10
+UMASK_PKTS_SENT_NCB_ALL 0x1F
+
+EVENT_FLITS_SENT_NCB 0x69 SBOX
+UMASK_FLITS_SENT_NCB 0x00
+
+EVENT_PKTS_RCVD_NCB 0x75 SBOX
+UMASK_PKTS_RCVD_NCB 0x00
+
+EVENT_FLITS_SENT_LOC_NCS 0x90 SBOX
+UMASK_FLITS_SENT_LOC_NCS 0x00
+
+EVENT_PKTS_RCVD_LOC_NCS 0x8F SBOX
+UMASK_PKTS_RCVD_LOC_NCS 0x00
+
+EVENT_RBOX_CREDIT_RETURNS 0x6A SBOX
+UMASK_RBOX_CREDIT_RETURNS 0x00
+
+EVENT_BBOX_CREDIT_RETURNS 0x6B SBOX
+UMASK_BBOX_CREDIT_RETURNS 0x00
+
+EVENT_TO_R_B_REQUESTS 0x6C SBOX
+UMASK_TO_R_B_REQUESTS_LOCAL 0x01
+UMASK_TO_R_B_REQUESTS_REMOTE 0x02
+UMASK_TO_R_B_REQUESTS_ALL 0x03
+EVENT_RBOX_CREDITS 0x76 SBOX
+UMASK_RBOX_CREDITS 0x00
+
+EVENT_BBOX_CREDITS 0x77 SBOX
+UMASK_BBOX_CREDITS 0x00
+
+EVENT_NO_CREDIT_HOM 0x80 SBOX
+UMASK_NO_CREDIT_HOM 0x00
+
+EVENT_NO_CREDIT_SNP 0x81 SBOX
+UMASK_NO_CREDIT_SNP 0x00
+
+EVENT_NO_CREDIT_DRS 0x82 SBOX
+UMASK_NO_CREDIT_DRS 0x00
+
+EVENT_NO_CREDIT_NCS 0x83 SBOX
+UMASK_NO_CREDIT_NCS 0x00
+
+EVENT_NO_CREDIT_NCB 0x84 SBOX
+UMASK_NO_CREDIT_NCB 0x00
+
+EVENT_NO_CREDIT_NDR 0x85 SBOX
+UMASK_NO_CREDIT_NDR 0x00
+
+EVENT_NO_CREDIT_VNA 0x86 SBOX
+UMASK_NO_CREDIT_VNA_RBOX 0x01
+UMASK_NO_CREDIT_VNA_BBOX 0x02
+UMASK_NO_CREDIT_VNA_ALL 0x03
+
+EVENT_NO_CREDIT_AD 0x87 SBOX
+UMASK_NO_CREDIT_AD 0x00
+
+EVENT_NO_CREDIT_AK 0x88 SBOX
+UMASK_NO_CREDIT_AK 0x00
+
+EVENT_NO_CREDIT_BL 0x89 SBOX
+UMASK_NO_CREDIT_BL 0x00
+
+EVENT_NO_CREDIT_IPQ 0x8A SBOX
+UMASK_NO_CREDIT_IPQ 0x00
+
+EVENT_NO_CREDIT_LOC_NCS 0x8B SBOX
+UMASK_NO_CREDIT_LOC_NCS 0x00
+
+EVENT_TO_R_LOC_NCS_MSGQ_CYCLES_FULL 0x8C SBOX
+UMASK_TO_R_LOC_NCS_MSGQ_CYCLES_FULL_CBOX0_5 0x01
+UMASK_TO_R_LOC_NCS_MSGQ_CYCLES_FULL_CBOX1_6 0x02
+UMASK_TO_R_LOC_NCS_MSGQ_CYCLES_FULL_CBOX2_7 0x04
+UMASK_TO_R_LOC_NCS_MSGQ_CYCLES_FULL_CBOX3_8 0x08
+UMASK_TO_R_LOC_NCS_MSGQ_CYCLES_FULL_CBOX4_9 0x10
+UMASK_TO_R_LOC_NCS_MSGQ_CYCLES_FULL_ALL 0x1F
+
+EVENT_TO_R_LOC_NCS_MSGQ_CYCLES_NE 0x8D SBOX
+UMASK_TO_R_LOC_NCS_MSGQ_CYCLES_NE_CBOX0_5 0x01
+UMASK_TO_R_LOC_NCS_MSGQ_CYCLES_NE_CBOX1_6 0x02
+UMASK_TO_R_LOC_NCS_MSGQ_CYCLES_NE_CBOX2_7 0x04
+UMASK_TO_R_LOC_NCS_MSGQ_CYCLES_NE_CBOX3_8 0x08
+UMASK_TO_R_LOC_NCS_MSGQ_CYCLES_NE_CBOX4_9 0x10
+UMASK_TO_R_LOC_NCS_MSGQ_CYCLES_NE_ALL 0x1F
+
+EVENT_TO_R_LOC_NCS_MSGQ_OCCUPANCY 0x8E SBOX
+UMASK_TO_R_LOC_NCS_MSGQ_OCCUPANCY_CBOX0_5 0x01
+UMASK_TO_R_LOC_NCS_MSGQ_OCCUPANCY_CBOX1_6 0x02
+UMASK_TO_R_LOC_NCS_MSGQ_OCCUPANCY_CBOX2_7 0x04
+UMASK_TO_R_LOC_NCS_MSGQ_OCCUPANCY_CBOX3_8 0x08
+UMASK_TO_R_LOC_NCS_MSGQ_OCCUPANCY_CBOX4_9 0x10
+UMASK_TO_R_LOC_NCS_MSGQ_OCCUPANCY_ALL 0x1F
diff --git a/src/includes/perfmon_westmere_events.txt b/src/includes/perfmon_westmere_events.txt
index 94a4dea..3c3e66f 100644
--- a/src/includes/perfmon_westmere_events.txt
+++ b/src/includes/perfmon_westmere_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for Intel Westmere
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
diff --git a/src/includes/power.h b/src/includes/power.h
index efe84dc..6cb5fd3 100644
--- a/src/includes/power.h
+++ b/src/includes/power.h
@@ -6,8 +6,8 @@
* Description: Header File Power Module
* Implements Intel RAPL Interface.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/power_types.h b/src/includes/power_types.h
index c503d06..b53ce85 100644
--- a/src/includes/power_types.h
+++ b/src/includes/power_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for power module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/registers.h b/src/includes/registers.h
index 2b812d0..ae80e28 100644
--- a/src/includes/registers.h
+++ b/src/includes/registers.h
@@ -5,8 +5,8 @@
*
* Description: Register Defines for the perfmon module
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -61,13 +61,14 @@
#define MSR_PERF_GLOBAL_OVF_CTRL 0x390
#define MSR_PEBS_ENABLE 0x3F1
/* Perfmon V3 */
-#define MSR_OFFCORE_RSP0 0x1A6
+#define MSR_OFFCORE_RESP0 0x1A6
+#define MSR_OFFCORE_RESP1 0x1A7
#define MSR_UNCORE_PERF_GLOBAL_CTRL 0x391
#define MSR_UNCORE_PERF_GLOBAL_STATUS 0x392
#define MSR_UNCORE_PERF_GLOBAL_OVF_CTRL 0x393
#define MSR_UNCORE_FIXED_CTR0 0x394 /* Uncore clock cycles */
#define MSR_UNCORE_FIXED_CTR_CTRL 0x395 /*FIXME: Is this correct? */
-#define MSR_UNCORE_ADDR_OPCODE_MATCH 0x396
+#define MSR_UNCORE_ADDR_OPCODE_MATCH 0x396
#define MSR_UNCORE_PERFEVTSEL0 0x3C0
#define MSR_UNCORE_PERFEVTSEL1 0x3C1
#define MSR_UNCORE_PERFEVTSEL2 0x3C2
@@ -84,7 +85,36 @@
#define MSR_UNCORE_PMC5 0x3B5
#define MSR_UNCORE_PMC6 0x3B6
#define MSR_UNCORE_PMC7 0x3B7
-
+/*
+ * Perfmon V4 (starting with Haswell, according to
+ * Intel software developers guide also for SandyBridge,
+ * IvyBridge not mentioned in this section)
+ */
+#define MSR_UNC_PERF_GLOBAL_CTRL MSR_UNCORE_PERF_GLOBAL_CTRL
+#define MSR_UNC_PERF_GLOBAL_STATUS MSR_UNCORE_PERF_GLOBAL_STATUS
+#define MSR_UNC_PERF_FIXED_CTRL MSR_UNCORE_FIXED_CTR0
+#define MSR_UNC_PERF_FIXED_CTR MSR_UNCORE_FIXED_CTR_CTRL
+#define MSR_UNC_ARB_PERFEVTSEL0 MSR_UNCORE_PMC2
+#define MSR_UNC_ARB_PERFEVTSEL1 MSR_UNCORE_PMC3
+#define MSR_UNC_ARB_CTR0 MSR_UNCORE_PMC0
+#define MSR_UNC_ARB_CTR1 MSR_UNCORE_PMC1
+#define MSR_UNC_CBO_CONFIG 0x396
+#define MSR_UNC_CBO_0_PERFEVTSEL0 0x700
+#define MSR_UNC_CBO_0_PERFEVTSEL1 0x701
+#define MSR_UNC_CBO_0_CTR0 0x706
+#define MSR_UNC_CBO_0_CTR1 0x707
+#define MSR_UNC_CBO_1_PERFEVTSEL0 0x710
+#define MSR_UNC_CBO_1_PERFEVTSEL1 0x711
+#define MSR_UNC_CBO_1_CTR0 0x716
+#define MSR_UNC_CBO_1_CTR1 0x717
+#define MSR_UNC_CBO_2_PERFEVTSEL0 0x720
+#define MSR_UNC_CBO_2_PERFEVTSEL1 0x721
+#define MSR_UNC_CBO_2_CTR0 0x726
+#define MSR_UNC_CBO_2_CTR1 0x727
+#define MSR_UNC_CBO_3_PERFEVTSEL0 0x730
+#define MSR_UNC_CBO_3_PERFEVTSEL1 0x731
+#define MSR_UNC_CBO_3_CTR0 0x736
+#define MSR_UNC_CBO_3_CTR1 0x737
/* Xeon Phi */
#define MSR_MIC_TSC 0x010
#define MSR_MIC_PERFEVTSEL0 0x028
@@ -92,9 +122,9 @@
#define MSR_MIC_PMC0 0x020
#define MSR_MIC_PMC1 0x021
#define MSR_MIC_SPFLT_CONTROL 0x02C
-#define MSR_MIC_PERF_GLOBAL_STATUS 0x02D
-#define MSR_MIC_PERF_GLOBAL_OVF_CTRL 0x02E
-#define MSR_MIC_PERF_GLOBAL_CTRL 0x02F
+#define MSR_MIC_PERF_GLOBAL_STATUS 0x02D
+#define MSR_MIC_PERF_GLOBAL_OVF_CTRL 0x02E
+#define MSR_MIC_PERF_GLOBAL_CTRL 0x02F
/* Core v1/v2 type uncore
@@ -304,6 +334,10 @@
#define MSR_UNC_U_PMON_CTL1 0xC11
#define MSR_UNC_U_UCLK_FIXED_CTR 0xC09
#define MSR_UNC_U_UCLK_FIXED_CTL 0xC08
+#define MSR_UNC_U_PMON_BOX_STATUS 0xC15
+#define MSR_UNC_U_PMON_GLOBAL_STATUS 0xC01
+#define MSR_UNC_U_PMON_GLOBAL_CTL 0xC00
+#define MSR_UNC_U_PMON_GLOBAL_CONFIG 0xC06
/* HA Box Performance Monitoring */
@@ -641,6 +675,38 @@
#define MSR_C7_PMON_CTR4 0xDF9
#define MSR_C7_PMON_EVNT_SEL5 0xDFA
#define MSR_C7_PMON_CTR5 0xDFB
+/* C box 8 - Coherence Engine core 8 */
+#define MSR_C8_PMON_BOX_CTRL 0xF40
+#define MSR_C8_PMON_BOX_STATUS 0xF41
+#define MSR_C8_PMON_BOX_OVF_CTRL 0xF42
+#define MSR_C8_PMON_EVNT_SEL0 0xF50
+#define MSR_C8_PMON_CTR0 0xF51
+#define MSR_C8_PMON_EVNT_SEL1 0xF52
+#define MSR_C8_PMON_CTR1 0xF53
+#define MSR_C8_PMON_EVNT_SEL2 0xF54
+#define MSR_C8_PMON_CTR2 0xF55
+#define MSR_C8_PMON_EVNT_SEL3 0xF56
+#define MSR_C8_PMON_CTR3 0xF57
+#define MSR_C8_PMON_EVNT_SEL4 0xF58
+#define MSR_C8_PMON_CTR4 0xF59
+#define MSR_C8_PMON_EVNT_SEL5 0xF5A
+#define MSR_C8_PMON_CTR5 0xF5B
+/* C box 9 - Coherence Engine core 9 */
+#define MSR_C9_PMON_BOX_CTRL 0xFC0
+#define MSR_C9_PMON_BOX_STATUS 0xFC1
+#define MSR_C9_PMON_BOX_OVF_CTRL 0xFC2
+#define MSR_C9_PMON_EVNT_SEL0 0xFD0
+#define MSR_C9_PMON_CTR0 0xFD1
+#define MSR_C9_PMON_EVNT_SEL1 0xFD2
+#define MSR_C9_PMON_CTR1 0xFD3
+#define MSR_C9_PMON_EVNT_SEL2 0xFD4
+#define MSR_C9_PMON_CTR2 0xFD5
+#define MSR_C9_PMON_EVNT_SEL3 0xFD6
+#define MSR_C9_PMON_CTR3 0xFD7
+#define MSR_C9_PMON_EVNT_SEL4 0xFD8
+#define MSR_C9_PMON_CTR4 0xFD9
+#define MSR_C9_PMON_EVNT_SEL5 0xFDA
+#define MSR_C9_PMON_CTR5 0xFDB
/* R box 0 - Router 0 */
#define MSR_R0_PMON_BOX_CTRL 0xE00
#define MSR_R0_PMON_BOX_STATUS 0xE01
@@ -749,6 +815,8 @@
#define MSR_TURBO_POWER_CURRENT_LIMIT 0x1AC
#define MSR_TURBO_RATIO_LIMIT 0x1AD
+/* Intel Silvermont's RAPL registers */
+#define MSR_PKG_POWER_INFO_SILVERMONT 0x66E
/*
* AMD
*/
diff --git a/src/includes/strUtil.h b/src/includes/strUtil.h
index 5fad5df..18236b6 100644
--- a/src/includes/strUtil.h
+++ b/src/includes/strUtil.h
@@ -6,8 +6,8 @@
* Description: Header File strUtil Module.
* Helper routines for bstrlib and command line parsing
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -37,7 +37,7 @@
#include <time.h>
#define CHECK_OPTION_STRING \
-if (! (argString = bSecureInput(200,optarg))) { \
+if (! (argString = bSecureInput(400,optarg))) { \
ERROR_PLAIN_PRINT(Failed to read argument string!); \
}
diff --git a/src/includes/strUtil_types.h b/src/includes/strUtil_types.h
index 4dec99e..25766ff 100644
--- a/src/includes/strUtil_types.h
+++ b/src/includes/strUtil_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for strUtil module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/test_types.h b/src/includes/test_types.h
index e08fffa..45c0932 100644
--- a/src/includes/test_types.h
+++ b/src/includes/test_types.h
@@ -5,8 +5,8 @@
*
* Description: Type definitions for benchmarking framework
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -38,7 +38,10 @@ typedef void (*FuncPrototype)();
typedef enum {
SINGLE = 0,
- DOUBLE} DataType;
+ DOUBLE,
+ SINGLE_RAND,
+ DOUBLE_RAND
+} DataType;
typedef enum {
STREAM_1 = 1,
@@ -79,7 +82,8 @@ typedef enum {
STREAM_36,
STREAM_37,
STREAM_38,
- MAX_STREAMS} Pattern;
+ MAX_STREAMS
+} Pattern;
typedef struct {
char* name;
@@ -87,15 +91,15 @@ typedef struct {
DataType type ;
int stride;
FuncPrototype kernel;
- int flops;
- int bytes;
+ double flops;
+ int bytes;
} TestCase;
typedef struct {
- uint64_t size;
- uint32_t iter;
+ uint64_t size;
+ uint32_t iter;
const TestCase* test;
- uint64_t cycles;
+ uint64_t cycles;
uint32_t numberOfThreads;
int* processors;
void** streams;
diff --git a/src/includes/textcolor.h b/src/includes/textcolor.h
index 976a0a0..4c1b7b1 100644
--- a/src/includes/textcolor.h
+++ b/src/includes/textcolor.h
@@ -7,8 +7,8 @@
* Allows toggling of terminal escape sequences for
* colored text.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/thermal.h b/src/includes/thermal.h
index f104aa1..3153386 100644
--- a/src/includes/thermal.h
+++ b/src/includes/thermal.h
@@ -6,8 +6,8 @@
* Description: Header File Thermal Module.
* Implements Intel TM/TM2 Interface.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -44,7 +44,10 @@ static inline uint32_t thermal_read(int cpuId);
static uint32_t
thermal_read(int cpuId)
{
- return (thermal_info.activationT - extractBitField(msr_read(cpuId, IA32_THERM_STATUS),7,16));
+ uint32_t readout = extractBitField(msr_read(cpuId, IA32_THERM_STATUS),7,16);
+ return (readout == 0 ?
+ thermal_info.activationT - thermal_info.offset :
+ (thermal_info.activationT-thermal_info.offset) - readout );
}
#endif /*THERMAL_H*/
diff --git a/src/includes/thermal_types.h b/src/includes/thermal_types.h
index 71da84d..a619180 100644
--- a/src/includes/thermal_types.h
+++ b/src/includes/thermal_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for thermal module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -38,6 +38,7 @@ typedef struct {
uint16_t highT;
uint32_t resolution;
uint32_t activationT;
+ uint32_t offset;
} ThermalInfo;
diff --git a/src/includes/threads.h b/src/includes/threads.h
index 2db402c..6e00191 100644
--- a/src/includes/threads.h
+++ b/src/includes/threads.h
@@ -5,8 +5,8 @@
*
* Description: Header file of pthread interface module
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -34,6 +34,7 @@
#include <types.h>
#include <pthread.h>
#include <threads_types.h>
+#include <stdio.h>
#define THREADS_BARRIER pthread_barrier_wait(&threads_barrier)
@@ -46,7 +47,7 @@ extern ThreadGroup* threads_groups;
* @brief Initialization of the thread module
* @param numberOfThreads The total number of threads
*/
-extern void threads_init(int numberOfThreads);
+extern void threads_init(FILE* OUTSTREAM, int numberOfThreads);
/**
* @brief Create all threads
diff --git a/src/includes/threads_types.h b/src/includes/threads_types.h
index 783cbfd..dfa13f3 100644
--- a/src/includes/threads_types.h
+++ b/src/includes/threads_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for threads module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -31,23 +31,25 @@
#ifndef THREADS_TYPES_H
#define THREADS_TYPES_H
+#include <stdio.h>
#include <stdint.h>
typedef struct {
- int globalNumberOfThreads;
- int numberOfThreads;
- int globalThreadId;
- int threadId;
- int numberOfGroups;
- int groupId;
- double time;
- uint64_t cycles;
+ int globalNumberOfThreads;
+ int numberOfThreads;
+ int globalThreadId;
+ int threadId;
+ int numberOfGroups;
+ int groupId;
+ double time;
+ uint64_t cycles;
+ FILE* output;
ThreadUserData data;
} ThreadData;
typedef struct {
- int numberOfThreads;
- int* threadIds;
+ int numberOfThreads;
+ int* threadIds;
} ThreadGroup;
typedef void (*threads_copyDataFunc)(ThreadUserData* src,ThreadUserData* dst);
diff --git a/src/includes/timer.h b/src/includes/timer.h
index 77da7c3..b97f4ac 100644
--- a/src/includes/timer.h
+++ b/src/includes/timer.h
@@ -10,8 +10,8 @@
* with rdtsc of 100 cycles in the worst case. Therefore sensible
* measurements should be over 1000 cycles.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -38,30 +38,28 @@
#include <types.h>
#define RDTSC(cpu_c) \
-__asm__ volatile("xor %%eax,%%eax\n\t" \
-"cpuid\n\t" \
-"rdtsc\n\t" \
-"movl %%eax, %0\n\t" \
-"movl %%edx, %1\n\t" \
-: "=r" ((cpu_c).int32.lo), "=r" ((cpu_c).int32.hi) \
-: : "%eax","%ebx","%ecx","%edx")
+ __asm__ volatile("xor %%eax,%%eax\n\t" \
+ "cpuid\n\t" \
+ "rdtsc\n\t" \
+ "movl %%eax, %0\n\t" \
+ "movl %%edx, %1\n\t" \
+ : "=r" ((cpu_c).int32.lo), "=r" ((cpu_c).int32.hi) \
+ : : "%eax","%ebx","%ecx","%edx")
#define RDTSC_CR(cpu_c) \
-__asm__ volatile( \
-"rdtsc\n\t" \
-"movl %%eax, %0\n\t" \
-"movl %%edx, %1\n\t" \
-: "=r" ((cpu_c).int32.lo), "=r" ((cpu_c).int32.hi) \
-: : "%eax","%ebx","%ecx","%edx")
+ __asm__ volatile("rdtsc\n\t" \
+ "movl %%eax, %0\n\t" \
+ "movl %%edx, %1\n\t" \
+ : "=r" ((cpu_c).int32.lo), "=r" ((cpu_c).int32.hi) \
+ : : "%eax","%ebx","%ecx","%edx")
#define RDTSCP(cpu_c) \
-__asm__ volatile( \
-"rdtscp\n\t" \
-"movl %%eax, %0\n\t" \
-"movl %%edx, %1\n\t" \
-"cpuid\n\t" \
-: "=r" ((cpu_c).int32.lo), "=r" ((cpu_c).int32.hi) \
-: : "%eax","%ebx","%ecx","%edx")
+ __asm__ volatile("rdtscp\n\t" \
+ "movl %%eax, %0\n\t" \
+ "movl %%edx, %1\n\t" \
+ "cpuid\n\t" \
+ : "=r" ((cpu_c).int32.lo), "=r" ((cpu_c).int32.hi) \
+ : : "%eax","%ebx","%ecx","%edx")
#ifdef HAS_RDTSCP
#define RDTSC_STOP(cpu_c) RDTSCP(cpu_c);
@@ -85,15 +83,15 @@ void timer_start( TimerData* time )
RDTSC(time->start);
#endif
#ifdef _ARCH_PPC
- uint32_t tbl, tbu0, tbu1;
+ uint32_t tbl, tbu0, tbu1;
- do {
- __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0));
- __asm__ __volatile__ ("mftb %0" : "=r"(tbl));
- __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1));
- } while (tbu0 != tbu1);
+ do {
+ __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0));
+ __asm__ __volatile__ ("mftb %0" : "=r"(tbl));
+ __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1));
+ } while (tbu0 != tbu1);
- time->start.int64 = (((uint64_t)tbu0) << 32) | tbl;
+ time->start.int64 = (((uint64_t)tbu0) << 32) | tbl;
#endif
}
@@ -103,14 +101,14 @@ void timer_stop( TimerData* time )
RDTSC_STOP(time->stop)
#endif
#ifdef _ARCH_PPC
- uint32_t tbl, tbu0, tbu1;
- do {
- __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0));
- __asm__ __volatile__ ("mftb %0" : "=r"(tbl));
- __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1));
- } while (tbu0 != tbu1);
+ uint32_t tbl, tbu0, tbu1;
+ do {
+ __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0));
+ __asm__ __volatile__ ("mftb %0" : "=r"(tbl));
+ __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1));
+ } while (tbu0 != tbu1);
- time->stop.int64 = (((uint64_t)tbu0) << 32) | tbl;
+ time->stop.int64 = (((uint64_t)tbu0) << 32) | tbl;
#endif
}
diff --git a/src/includes/timer_types.h b/src/includes/timer_types.h
index 4437881..265d5c9 100644
--- a/src/includes/timer_types.h
+++ b/src/includes/timer_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for timer module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/tree.h b/src/includes/tree.h
index 428e1ca..9816cf7 100644
--- a/src/includes/tree.h
+++ b/src/includes/tree.h
@@ -6,8 +6,8 @@
* Description: Header File tree Module.
* Implements a simple tree data structure.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/tree_types.h b/src/includes/tree_types.h
index 6593a91..b449e39 100644
--- a/src/includes/tree_types.h
+++ b/src/includes/tree_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for tree module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/types.h b/src/includes/types.h
index 6d99813..2b0745a 100644
--- a/src/includes/types.h
+++ b/src/includes/types.h
@@ -5,8 +5,8 @@
*
* Description: Global Types file
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/libperfctr.c b/src/libperfctr.c
index 97b3e4d..a4b2158 100644
--- a/src/libperfctr.c
+++ b/src/libperfctr.c
@@ -5,8 +5,8 @@
*
* Description: Marker API interface of module perfmon
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -69,6 +69,7 @@
#include <perfmon_sandybridge_counters.h>
#include <perfmon_ivybridge_counters.h>
#include <perfmon_westmereEX_counters.h>
+#include <perfmon_silvermont_counters.h>
/* ##### VARIABLES - LOCAL TO THIS SOURCE FILE ###################### */
@@ -91,32 +92,31 @@ static BitMask counterMask;
void str2BitMask(const char* str, BitMask* mask)
{
- char* endptr;
- errno = 0;
- struct bstrList* tokens;
- bstring q = bfromcstralloc (60, str);
- tokens = bsplit(q,' ');
-
- for (int i=0; i<tokens->qty; i++)
- {
- uint64_t val = strtoull((char*) tokens->entry[i]->data, &endptr, 16);
-
- if ((errno == ERANGE && val == LONG_MAX )
- || (errno != 0 && val == 0))
- {
- ERROR;
- }
-
- if (endptr == str)
- {
- ERROR_PLAIN_PRINT(No digits were found);
- }
-
- mask->mask[i] = val;
- }
-
- bstrListDestroy(tokens);
- bdestroy(q);
+ char* endptr;
+ errno = 0;
+ struct bstrList* tokens;
+ bstring q = bfromcstralloc (60, str);
+ tokens = bsplit(q,' ');
+
+ for (int i=0; i<tokens->qty; i++)
+ {
+ uint64_t val = strtoull((char*) tokens->entry[i]->data, &endptr, 16);
+
+ if ((errno == ERANGE && val == LONG_MAX ) || (errno != 0 && val == 0))
+ {
+ ERROR;
+ }
+
+ if (endptr == str)
+ {
+ ERROR_PLAIN_PRINT(No digits were found);
+ }
+
+ mask->mask[i] = val;
+ }
+
+ bstrListDestroy(tokens);
+ bdestroy(q);
}
static int getProcessorID(cpu_set_t* cpu_set)
@@ -204,6 +204,17 @@ void likwid_markerInit(void)
perfmon_numCountersCore = NUM_COUNTERS_CORE_CORE2;
break;
+ case ATOM_SILVERMONT_C:
+ case ATOM_SILVERMONT_E:
+ case ATOM_SILVERMONT_F1:
+ case ATOM_SILVERMONT_F2:
+ case ATOM_SILVERMONT_F3:
+ power_init(0);
+ perfmon_counter_map = silvermont_counter_map;
+ perfmon_numCounters = NUM_COUNTERS_SILVERMONT;
+ perfmon_numCountersCore = NUM_COUNTERS_CORE_SILVERMONT;
+ break;
+
case CORE_DUO:
ERROR_PLAIN_PRINT(Unsupported Processor);
break;
@@ -681,8 +692,16 @@ void likwid_markerStopRegion(const char* regionTag)
{
if ( perfmon_counter_map[i].type == POWER )
{
- results->PMcounters[i] += power_info.energyUnit *
- (PMcounters[i] - results->StartPMcounters[i]);
+ if (PMcounters[i] >= results->StartPMcounters[i])
+ {
+ results->PMcounters[i] += power_info.energyUnit *
+ (PMcounters[i] - results->StartPMcounters[i]);
+ }
+ else
+ {
+ results->PMcounters[i] += power_info.energyUnit *
+ (((double)0xFFFFFFFF) - results->StartPMcounters[i] + PMcounters[i]);
+ }
}
else
{
diff --git a/src/likwid.f90 b/src/likwid.f90
index b4a3c12..1215dd4 100644
--- a/src/likwid.f90
+++ b/src/likwid.f90
@@ -4,8 +4,8 @@
!
! Description: Marker API f90 module
!
-! Version: 3.1.2
-! Released: 2.6.2014
+! Version: 3.1.3
+! Released: 4.11.2014
!
! Author: Jan Treibig (jt), jan.treibig at gmail.com
! Project: likwid
@@ -32,22 +32,22 @@ module likwid
interface
- subroutine likwid_markerInit()
- end subroutine likwid_markerInit
+ subroutine likwid_markerInit()
+ end subroutine likwid_markerInit
- subroutine likwid_markerThreadInit()
- end subroutine likwid_markerThreadInit
+ subroutine likwid_markerThreadInit()
+ end subroutine likwid_markerThreadInit
- subroutine likwid_markerClose()
- end subroutine likwid_markerClose
+ subroutine likwid_markerClose()
+ end subroutine likwid_markerClose
- subroutine likwid_markerStartRegion( regionTag )
- character(*) :: regionTag
- end subroutine likwid_markerStartRegion
+ subroutine likwid_markerStartRegion( regionTag )
+ character(*) :: regionTag
+ end subroutine likwid_markerStartRegion
- subroutine likwid_markerStopRegion( regionTag )
- character(*) :: regionTag
- end subroutine likwid_markerStopRegion
+ subroutine likwid_markerStopRegion( regionTag )
+ character(*) :: regionTag
+ end subroutine likwid_markerStopRegion
end interface
diff --git a/src/likwid_f90_interface.c b/src/likwid_f90_interface.c
index cc6ea5d..31bad92 100644
--- a/src/likwid_f90_interface.c
+++ b/src/likwid_f90_interface.c
@@ -5,8 +5,8 @@
*
* Description: F90 interface for marker API
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -62,7 +62,7 @@ void likwid_markerstartregion_(char* regionTag, int len)
}
likwid_markerStartRegion( tmp );
- free(tmp);
+ free(tmp);
}
void likwid_markerstopregion_(char* regionTag, int len)
@@ -79,6 +79,6 @@ void likwid_markerstopregion_(char* regionTag, int len)
}
likwid_markerStopRegion( tmp );
- free(tmp);
+ free(tmp);
}
diff --git a/src/memsweep.c b/src/memsweep.c
index 1af4b5e..8abf796 100644
--- a/src/memsweep.c
+++ b/src/memsweep.c
@@ -5,8 +5,8 @@
*
* Description: Implementation of sweeper module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -60,11 +60,11 @@ static uint64_t memoryFraction = 80ULL;
static void*
allocateOnNode(size_t size, int domainId)
{
- char *ptr;
+ char *ptr;
- ptr = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ ptr = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
- if (ptr == (char *)-1)
+ if (ptr == (char *)-1)
{
ERROR;
}
@@ -101,11 +101,14 @@ findProcessor(uint32_t nodeId, uint32_t coreId)
}
/* evict all dirty cachelines from last level cache */
-static void cleanupCache(char* ptr)
+static void cleanupCache(FILE* OUTSTREAM, char* ptr)
{
#ifdef __x86_64
uint32_t cachesize = 2 * cpuid_topology.cacheLevels[cpuid_topology.numCacheLevels-1].size;
- printf("Cleanup LLC using %u MB\n", cachesize / (1000000));
+ if (OUTSTREAM != NULL)
+ {
+ fprintf(OUTSTREAM, "Cleanup LLC using %u MB\n", cachesize / (1000000));
+ }
_loadData(cachesize,ptr);
#else
ERROR_PLAIN_PRINT(Cleanup cache is currently only available on 64bit X86 systems.);
@@ -122,32 +125,35 @@ memsweep_setMemoryFraction(uint64_t fraction)
void
-memsweep_node(void)
+memsweep_node(FILE* OUTSTREAM)
{
for ( uint32_t i=0; i < numa_info.numberOfNodes; i++)
{
- memsweep_domain(i);
+ memsweep_domain(OUTSTREAM, i);
}
}
void
-memsweep_domain(int domainId)
+memsweep_domain(FILE* OUTSTREAM, int domainId)
{
char* ptr = NULL;
size_t size = numa_info.nodes[domainId].totalMemory * 1024ULL * memoryFraction / 100ULL;
- printf("Sweeping domain %d: Using %g MB of %g MB\n",
- domainId,
- size / (1000.0 * 1000.0),
- numa_info.nodes[domainId].totalMemory/ 1000.0);
+ if (OUTSTREAM != NULL)
+ {
+ fprintf(OUTSTREAM, "Sweeping domain %d: Using %g MB of %g MB\n",
+ domainId,
+ size / (1000.0 * 1000.0),
+ numa_info.nodes[domainId].totalMemory/ 1000.0);
+ }
ptr = (char*) allocateOnNode(size, domainId);
initMemory(size, ptr, domainId);
- cleanupCache(ptr);
+ cleanupCache(OUTSTREAM, ptr);
munmap(ptr, size);
}
void
-memsweep_threadGroup(int* processorList, int numberOfProcessors)
+memsweep_threadGroup(FILE* OUTSTREAM, int* processorList, int numberOfProcessors)
{
for (uint32_t i=0; i<numa_info.numberOfNodes; i++)
{
@@ -155,7 +161,7 @@ memsweep_threadGroup(int* processorList, int numberOfProcessors)
{
if (findProcessor(i,processorList[j]))
{
- memsweep_domain(i);
+ memsweep_domain(OUTSTREAM, i);
break;
}
}
diff --git a/src/msr.c b/src/msr.c
index 448185b..cb867f2 100644
--- a/src/msr.c
+++ b/src/msr.c
@@ -9,8 +9,8 @@
* sys interface of the Linux 2.6 kernel. This module
* is based on the msr-util tools.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -45,12 +45,14 @@
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/un.h>
+#include <sys/wait.h>
#include <types.h>
#include <error.h>
#include <cpuid.h>
#include <accessClient.h>
#include <msr.h>
+#include <registers.h>
/* ##### MACROS - LOCAL TO THIS SOURCE FILE ######################### */
#define MAX_LENGTH_MSR_DEV_NAME 20
@@ -60,9 +62,61 @@
/* ##### VARIABLES - LOCAL TO THIS SOURCE FILE ###################### */
static int FD[MAX_NUM_THREADS];
static int socket_fd = -1;
+static int rdpmc_works = 0;
/* ##### FUNCTION DEFINITIONS - LOCAL TO THIS SOURCE FILE ########### */
+static inline int __rdpmc(int counter, uint64_t* value)
+{
+ unsigned low, high;
+ __asm__ volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+ *value = ((low) | ((uint64_t )(high) << 32));
+ return 0;
+}
+//Needed for rdpmc check
+void segfault_sigaction(int signal, siginfo_t *si, void *arg)
+{
+ exit(1);
+}
+int test_rdpmc(int flag)
+{
+ int ret, waiting;
+ int pid;
+ int status = 0;
+ uint64_t tmp;
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(struct sigaction));
+ sigemptyset(&sa.sa_mask);
+ sa.sa_sigaction = segfault_sigaction;
+ sa.sa_flags = SA_SIGINFO;
+
+ pid = fork();
+
+ if (pid < 0)
+ {
+ return -1;
+ }
+ if (!pid)
+ {
+ sigaction(SIGSEGV, &sa, NULL);
+ if (flag == 0)
+ {
+ __rdpmc(0, &tmp);
+ }
+ exit(0);
+ } else {
+
+ waiting = waitpid(pid, &status, 0);
+ if (waiting < 0 || status)
+ {
+ ret = 0;
+ } else
+ {
+ ret = 1;
+ }
+ }
+ return ret;
+}
/* ##### FUNCTION DEFINITIONS - EXPORTED FUNCTIONS ################## */
@@ -72,55 +126,42 @@ msr_init(int initSocket_fd)
{
if (accessClient_mode == DAEMON_AM_DIRECT)
{
+ char* msr_file_name = (char*) malloc(MAX_LENGTH_MSR_DEV_NAME * sizeof(char));
- int fd;
-#ifdef __MIC
- char* msr_file_name = "/dev/msr0";
- if( access( msr_file_name, F_OK ) == -1 )
- {
- msr_file_name = "/dev/cpu/0/msr";
- }
-#else
- char* msr_file_name = "/dev/cpu/0/msr";
-#endif
-
- fd = open(msr_file_name, O_RDWR);
+ sprintf(msr_file_name,"/dev/msr0");
+ if( access( msr_file_name, F_OK ) == -1 )
+ {
+ sprintf(msr_file_name,"/dev/cpu/0/msr");
+ }
- if (fd < 0)
+ if (access(msr_file_name, R_OK|W_OK))
{
- fprintf(stderr, "ERROR\n");
- fprintf(stderr, "rdmsr: failed to open '%s': %s!\n",
- msr_file_name , strerror(errno));
- fprintf(stderr, " Please check if the msr module \
- is loaded and the device file has correct permissions.\n");
- fprintf(stderr, " Alternatively you might want to \
- look into (sys)daemonmode.\n\n");
+ ERROR_PRINT(Cannot access MSR device file %s: %s.\n
+ Please check if 'msr' module is loaded and device files have correct permissions\n
+ Alternatively you might want to look into (sys)daemonmode\n,msr_file_name , strerror(errno));
+ free(msr_file_name);
exit(127);
}
-
- close(fd);
+ rdpmc_works = test_rdpmc(0);
/* NOTICE: This assumes consecutive processor Ids! */
for ( uint32_t i=0; i < cpuid_topology.numHWThreads; i++ )
{
- char* msr_file_name = (char*) malloc(MAX_LENGTH_MSR_DEV_NAME * sizeof(char));
-#ifdef __MIC
- sprintf(msr_file_name,"/dev/msr%d",i);
- if( access( msr_file_name, F_OK ) == -1 )
- {
- sprintf(msr_file_name,"/dev/cpu/%d/msr",i);
- }
-#else
- sprintf(msr_file_name,"/dev/cpu/%d/msr",i);
-#endif
-
+ sprintf(msr_file_name,"/dev/msr%d",i);
+ if( access( msr_file_name, F_OK ) == -1 )
+ {
+ sprintf(msr_file_name,"/dev/cpu/%d/msr",i);
+ }
FD[i] = open(msr_file_name, O_RDWR);
-
if ( FD[i] < 0 )
{
+ ERROR_PRINT(Cannot access MSR device file %s: %s\n,
+ msr_file_name , strerror(errno));
+ free(msr_file_name);
ERROR;
}
}
+ free(msr_file_name);
}
else
{
@@ -137,6 +178,7 @@ msr_finalize(void)
{
close(FD[i]);
}
+ rdpmc_works = 0;
}
else
{
@@ -152,9 +194,29 @@ msr_tread(const int tsocket_fd, const int cpu, uint32_t reg)
{
uint64_t data;
- if ( pread(FD[cpu], &data, sizeof(data), reg) != sizeof(data) )
+ if (rdpmc_works && reg >= MSR_PMC0 && reg <=MSR_PMC3)
+ {
+ if (__rdpmc(reg - MSR_PMC0, &data) )
+ {
+ ERROR_PRINT(Cannot read MSR reg 0x%x with RDPMC instruction on CPU %d\n,
+ reg,cpu);
+ }
+ }
+ else if (rdpmc_works && reg >= MSR_PERF_FIXED_CTR0 && reg <= MSR_PERF_FIXED_CTR2)
{
- ERROR_PRINT("cpu %d reg %x",cpu, reg);
+ if (__rdpmc(0x4000000ULL + (reg - MSR_PERF_FIXED_CTR0), &data) )
+ {
+ ERROR_PRINT(Cannot read MSR reg 0x%x with RDPMC instruction on CPU %d\n,
+ reg,cpu);
+ }
+ }
+ else
+ {
+ if ( pread(FD[cpu], &data, sizeof(data), reg) != sizeof(data) )
+ {
+ ERROR_PRINT(Cannot read MSR reg 0x%x with RDMSR instruction on CPU %d\n,
+ reg, cpu);
+ }
}
return data;
@@ -173,7 +235,8 @@ msr_twrite(const int tsocket_fd, const int cpu, uint32_t reg, uint64_t data)
{
if (pwrite(FD[cpu], &data, sizeof(data), reg) != sizeof(data))
{
- ERROR_PRINT("cpu %d reg %x",cpu, reg);
+ ERROR_PRINT(Cannot write MSR reg 0x%x with WRMSR instruction on CPU %d\n,
+ reg, cpu);
}
}
else
@@ -190,9 +253,29 @@ msr_read( const int cpu, uint32_t reg)
{
uint64_t data;
- if ( pread(FD[cpu], &data, sizeof(data), reg) != sizeof(data) )
+ if (rdpmc_works && reg >= MSR_PMC0 && reg <=MSR_PMC3)
{
- ERROR_PRINT("cpu %d reg %x",cpu, reg);
+ if (__rdpmc(reg - MSR_PMC0, &data) )
+ {
+ ERROR_PRINT(Cannot read MSR reg 0x%x with RDPMC instruction on CPU %d\n,
+ reg,cpu);
+ }
+ }
+ else if (rdpmc_works && reg >= MSR_PERF_FIXED_CTR0 && reg <= MSR_PERF_FIXED_CTR2)
+ {
+ if (__rdpmc(0x4000000ULL + (reg - MSR_PERF_FIXED_CTR0), &data) )
+ {
+ ERROR_PRINT(Cannot read MSR reg 0x%x with RDPMC instruction on CPU %d\n,
+ reg,cpu);
+ }
+ }
+ else
+ {
+ if ( pread(FD[cpu], &data, sizeof(data), reg) != sizeof(data) )
+ {
+ ERROR_PRINT(Cannot read MSR reg 0x%x with RDMSR instruction on CPU %d\n,
+ reg, cpu);
+ }
}
return data;
@@ -211,7 +294,8 @@ msr_write( const int cpu, uint32_t reg, uint64_t data)
{
if (pwrite(FD[cpu], &data, sizeof(data), reg) != sizeof(data))
{
- ERROR_PRINT("cpu %d reg %x",cpu, reg);
+ ERROR_PRINT(Cannot write MSR reg 0x%x with WRMSR instruction on CPU %d\n,
+ reg, cpu);
}
}
else
diff --git a/src/multiplex.c b/src/multiplex.c
index b3d927d..68a6b88 100644
--- a/src/multiplex.c
+++ b/src/multiplex.c
@@ -5,8 +5,8 @@
*
* Description:
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/numa.c b/src/numa.c
index c3a52cd..2f72765 100644
--- a/src/numa.c
+++ b/src/numa.c
@@ -5,8 +5,8 @@
*
* Description: Implementation of Linux NUMA interface
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -69,18 +69,18 @@ static int maxIdConfiguredNode = 0;
static void
setConfiguredNodes(void)
{
- DIR *dir;
- struct dirent *de;
+ DIR *dir;
+ struct dirent *de;
- dir = opendir("/sys/devices/system/node");
+ dir = opendir("/sys/devices/system/node");
- if (!dir)
+ if (!dir)
{
- maxIdConfiguredNode = 0;
- }
+ maxIdConfiguredNode = 0;
+ }
else
{
- while ((de = readdir(dir)) != NULL)
+ while ((de = readdir(dir)) != NULL)
{
int nd;
if (strncmp(de->d_name, "node", 4))
@@ -95,25 +95,25 @@ setConfiguredNodes(void)
maxIdConfiguredNode = nd;
}
}
- closedir(dir);
- }
+ closedir(dir);
+ }
}
static void
nodeMeminfo(int node, uint64_t* totalMemory, uint64_t* freeMemory)
{
- FILE *fp;
+ FILE *fp;
bstring filename;
bstring totalString = bformat("MemTotal:");
bstring freeString = bformat("MemFree:");
int i;
- filename = bformat("/sys/devices/system/node/node%d/meminfo", node);
+ filename = bformat("/sys/devices/system/node/node%d/meminfo", node);
- if (NULL != (fp = fopen (bdata(filename), "r")))
- {
- bstring src = bread ((bNread) fread, fp);
+ if (NULL != (fp = fopen (bdata(filename), "r")))
+ {
+ bstring src = bread ((bNread) fread, fp);
struct bstrList* tokens = bsplit(src,(char) '\n');
for (i=0;i<tokens->qty;i++)
@@ -133,13 +133,13 @@ nodeMeminfo(int node, uint64_t* totalMemory, uint64_t* freeMemory)
*freeMemory = str2int(bdata(subtokens->entry[0]));
}
}
- }
+ }
else
{
ERROR;
}
- fclose(fp);
+ fclose(fp);
}
static int
@@ -211,7 +211,6 @@ nodeProcessorList(int node, uint32_t** list)
/* FIXME: CPU list here is not physical cores first but numerical sorted */
-
return count;
}
@@ -303,6 +302,7 @@ numa_init()
for (i=0; i<numa_info.numberOfNodes; i++)
{
nodeMeminfo(i, &numa_info.nodes[i].totalMemory, &numa_info.nodes[i].freeMemory);
+ numa_info.nodes[i].id = i;
numa_info.nodes[i].numberOfProcessors = nodeProcessorList(i,&numa_info.nodes[i].processors);
numa_info.nodes[i].numberOfDistances = nodeDistanceList(i, numa_info.numberOfNodes, &numa_info.nodes[i].distances);
}
diff --git a/src/pci.c b/src/pci.c
index 01d0887..2e8a22f 100644
--- a/src/pci.c
+++ b/src/pci.c
@@ -8,8 +8,8 @@
* performance monitoring registers in PCI Cfg space
* for Intel Sandy Bridge Processors.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -156,52 +156,51 @@ pci_init(int initSocket_fd)
return;
}
- socket_count = cntr;
+ socket_count = cntr;
- bstring filepath = bfromcstr ( PCI_ROOT_PATH );
- bcatcstr(filepath, socket_bus[0]);
- bcatcstr(filepath, pci_DevicePath[0] );
+ bstring filepath = bfromcstr ( PCI_ROOT_PATH );
+ bcatcstr(filepath, socket_bus[0]);
+ bcatcstr(filepath, pci_DevicePath[0] );
if (access(bdata(filepath),F_OK))
{
fprintf(stderr, "INFO\n");
- fprintf(stderr, " This system has no support for PCI based Uncore counters.\n");
- fprintf(stderr, " This means you cannot use performance groups as MEM, which require Uncore counters.\n\n");
+ fprintf(stderr, "This system has no support for PCI based Uncore counters.\n");
+ fprintf(stderr, "This means you cannot use performance groups as MEM, which require Uncore counters.\n\n");
return;
}
bdestroy(filepath);
- if (accessClient_mode == DAEMON_AM_DIRECT)
+ for (int j=0; j<socket_count; j++)
{
- if(geteuid() != 0)
+ for (int i=0; i<MAX_NUM_DEVICES; i++)
{
- fprintf(stderr, "WARNING\n");
- fprintf(stderr, " Direct access to the PCI Cfg Adressspace is only allowed for uid root!\n");
- fprintf(stderr, " This means you can use performance groups as MEM only as root in direct mode.\n");
- fprintf(stderr, " Alternatively you might want to look into (sys)daemonmode.\n\n");
- }
- for (int j=0; j<socket_count; j++)
- {
- for (int i=0; i<MAX_NUM_DEVICES; i++)
- {
+ bstring filepath = bfromcstr ( PCI_ROOT_PATH );
+ bcatcstr(filepath, socket_bus[j]);
+ bcatcstr(filepath, pci_DevicePath[i] );
- bstring filepath = bfromcstr ( PCI_ROOT_PATH );
- bcatcstr(filepath, socket_bus[j]);
- bcatcstr(filepath, pci_DevicePath[i] );
-
- if (!access(bdata(filepath),R_OK|W_OK))
- {
- FD[j][i] = 0;
- }
- else
- {
- //fprintf(stderr, "Device %s not found, excluded it from device list\n",bdata(filepath));
- FD[j][i] = -2;
- }
- bdestroy(filepath);
+ if (!access(bdata(filepath),F_OK))
+ {
+ FD[j][i] = 0;
}
+ else
+ {
+ FD[j][i] = -2;
+ }
+ bdestroy(filepath);
+ }
+ }
+
+ if (accessClient_mode == DAEMON_AM_DIRECT)
+ {
+ if(geteuid() != 0)
+ {
+ fprintf(stderr, "WARNING\n");
+ fprintf(stderr, "Direct access to the PCI Cfg Adressspace is only allowed for uid root!\n");
+ fprintf(stderr, "This means you can use performance groups as MEM only as root in direct mode.\n");
+ fprintf(stderr, "Alternatively you might want to look into (sys)daemonmode.\n\n");
}
}
else /* daemon or sysdaemon-mode */
@@ -214,20 +213,18 @@ pci_init(int initSocket_fd)
void
pci_finalize()
{
- if (accessClient_mode == DAEMON_AM_DIRECT)
+ for (int j=0; j<socket_count; j++)
{
- for (int j=0; j<socket_count; j++)
+ for (int i=0; i<MAX_NUM_DEVICES; i++)
{
- for (int i=0; i<MAX_NUM_DEVICES; i++)
+ if (FD[j][i] > 0)
{
- if (FD[j][i] > 0)
- {
- close(FD[j][i]);
- }
+ close(FD[j][i]);
}
}
}
- else
+
+ if (accessClient_mode != DAEMON_AM_DIRECT)
{
socket_fd = -1;
}
@@ -238,16 +235,16 @@ uint32_t
pci_read(int cpu, PciDeviceIndex device, uint32_t reg)
{
int socketId = affinity_core2node_lookup[cpu];
+ if ( FD[socketId][device] == -2)
+ {
+ fprintf(stderr, "Trying to access non-existent PCI device (%s) for reading\n", pci_DevicePath[device]);
+ return 0;
+ }
if (accessClient_mode == DAEMON_AM_DIRECT)
{
uint32_t data = 0;
- if ( FD[socketId][device] == -2)
- {
- fprintf(stderr, "Accessing non-existent device %s%s%s\n",PCI_ROOT_PATH,socket_bus[socketId],pci_DevicePath[device]);
- return data;
- }
- else if ( !FD[socketId][device] )
+ if ( !FD[socketId][device] )
{
bstring filepath = bfromcstr ( PCI_ROOT_PATH );
bcatcstr(filepath, socket_bus[socketId]);
@@ -256,17 +253,16 @@ pci_read(int cpu, PciDeviceIndex device, uint32_t reg)
if ( FD[socketId][device] < 0)
{
- fprintf(stderr, "ERROR in pci_read:\n failed to open pci device %s: %s!\n",
+ fprintf(stderr, "ERROR in pci_read: failed to open pci device %s: %s!\n",
bdata(filepath), strerror(errno));
- // exit(127);
}
bdestroy(filepath);
}
if ( FD[socketId][device] > 0 &&
- pread(FD[socketId][device], &data, sizeof data, reg) != sizeof data )
+ pread(FD[socketId][device], &data, sizeof data, reg) != sizeof data )
{
- ERROR_PRINT("cpu %d reg %x",cpu, reg);
+ ERROR_PRINT("ERROR in pci_read: failed on CPU %d Register 0x%x", cpu, reg);
}
return data;
@@ -284,14 +280,14 @@ pci_write(int cpu, PciDeviceIndex device, uint32_t reg, uint32_t data)
{
int socketId = affinity_core2node_lookup[cpu];
+ if ( FD[socketId][device] == -2)
+ {
+ fprintf(stderr, "Trying to access non-existent PCI device (%s) for writing\n", pci_DevicePath[device]);
+ return;
+ }
if (accessClient_mode == DAEMON_AM_DIRECT)
{
- if ( FD[socketId][device] == -2)
- {
- fprintf(stderr, "Accessing non-existent device %s%s%s\n",PCI_ROOT_PATH,socket_bus[socketId],pci_DevicePath[device]);
- return;
- }
- else if ( !FD[socketId][device] )
+ if ( !FD[socketId][device] )
{
bstring filepath = bfromcstr ( PCI_ROOT_PATH );
bcatcstr(filepath, socket_bus[socketId]);
@@ -300,20 +296,17 @@ pci_write(int cpu, PciDeviceIndex device, uint32_t reg, uint32_t data)
if ( FD[socketId][device] < 0)
{
- fprintf(stderr, "ERROR in pci_write:\n failed to open pci device %s: %s!\n",
+ fprintf(stderr, "ERROR in pci_write: failed to open pci device %s: %s!\n",
bdata(filepath), strerror(errno));
- // exit(127);
}
bdestroy(filepath);
}
if ( FD[socketId][device] > 0 &&
- pwrite(FD[socketId][device], &data, sizeof data, reg) != sizeof data)
+ pwrite(FD[socketId][device], &data, sizeof data, reg) != sizeof data)
{
- ERROR_PRINT("cpu %d reg %x",cpu, reg);
+ ERROR_PRINT("ERROR in pci_write: failed on CPU %d Register 0x%x", cpu, reg);
}
-
- // printf("WRITE Device %s cpu %d reg 0x%x data 0x%x \n",bdata(filepath), cpu, reg, data);
}
else
{ /* daemon or sysdaemon-mode */
@@ -325,21 +318,19 @@ uint32_t
pci_tread(const int tsocket_fd, const int cpu, PciDeviceIndex device, uint32_t reg)
{
int socketId = affinity_core2node_lookup[cpu];
+ if ( FD[socketId][device] == -2)
+ {
+ return 0;
+ }
if (accessClient_mode == DAEMON_AM_DIRECT)
{
uint32_t data = 0;
- if ( FD[socketId][device] == -2)
- {
- fprintf(stderr, "Accessing non-existent device %s%s%s\n",PCI_ROOT_PATH,socket_bus[socketId],pci_DevicePath[device]);
- return data;
- }
- else if ( !FD[socketId][device] )
+ if ( !FD[socketId][device] )
{
bstring filepath = bfromcstr ( PCI_ROOT_PATH );
bcatcstr(filepath, socket_bus[socketId]);
bcatcstr(filepath, pci_DevicePath[device] );
- // printf("Generate PATH = %s \n",bdata(filepath));
FD[socketId][device] = open( bdata(filepath), O_RDWR);
@@ -347,17 +338,15 @@ pci_tread(const int tsocket_fd, const int cpu, PciDeviceIndex device, uint32_t r
{
fprintf(stderr, "ERROR in pci_tread:\n failed to open pci device %s: %s!\n",
bdata(filepath), strerror(errno));
- // exit(127);
}
bdestroy(filepath);
}
if ( FD[socketId][device] > 0 &&
- pread(FD[socketId][device], &data, sizeof data, reg) != sizeof data )
+ pread(FD[socketId][device], &data, sizeof data, reg) != sizeof data )
{
- ERROR_PRINT("cpu %d reg %x",cpu, reg);
+ ERROR_PRINT("ERROR in pci_tread: failed on CPU %d Register 0x%x", cpu, reg);
}
- // printf("READ Device %s cpu %d reg 0x%x data 0x%x \n",bdata(filepath), cpu, reg, data);
return data;
}
@@ -371,39 +360,33 @@ void
pci_twrite( const int tsocket_fd, const int cpu, PciDeviceIndex device, uint32_t reg, uint32_t data)
{
int socketId = affinity_core2node_lookup[cpu];
-
+ if ( FD[socketId][device] == -2)
+ {
+ return;
+ }
if (accessClient_mode == DAEMON_AM_DIRECT)
{
- if ( FD[socketId][device] == -2)
- {
- fprintf(stderr, "Accessing non-existent device %s%s%s\n",PCI_ROOT_PATH,socket_bus[socketId],pci_DevicePath[device]);
- return;
- }
- else if ( !FD[socketId][device] )
+ if ( !FD[socketId][device] )
{
bstring filepath = bfromcstr ( PCI_ROOT_PATH );
bcatcstr(filepath, socket_bus[socketId]);
bcatcstr(filepath, pci_DevicePath[device] );
- // printf("Generate PATH = %s \n",bdata(filepath));
FD[socketId][device] = open( bdata(filepath), O_RDWR);
if ( FD[socketId][device] < 0)
{
- fprintf(stderr, "ERROR in pci_twrite:\n failed to open pci device %s: %s!\n",
+ fprintf(stderr, "ERROR in pci_twrite: failed to open pci device %s: %s!\n",
bdata(filepath), strerror(errno));
- //exit(127);
}
bdestroy(filepath);
}
if ( FD[socketId][device] > 0 &&
- pwrite(FD[socketId][device], &data, sizeof data, reg) != sizeof data)
+ pwrite(FD[socketId][device], &data, sizeof data, reg) != sizeof data)
{
- ERROR_PRINT("cpu %d reg %x",cpu, reg);
+ ERROR_PRINT("ERROR in pci_twrite: failed on CPU %d Register 0x%x", cpu, reg);
}
-
- // printf("WRITE Device %s cpu %d reg 0x%x data 0x%x \n",bdata(filepath), cpu, reg, data);
}
else
{ /* daemon or sysdaemon-mode */
diff --git a/src/perfmon.c b/src/perfmon.c
index 6d1630f..30cacba 100644
--- a/src/perfmon.c
+++ b/src/perfmon.c
@@ -5,8 +5,8 @@
*
* Description: Implementation of perfmon Module.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -37,6 +37,7 @@
#include <float.h>
#include <unistd.h>
#include <sys/types.h>
+#include <assert.h>
#include <types.h>
#include <bitUtil.h>
@@ -71,6 +72,7 @@ static PerfmonEvent* eventHash;
static PerfmonCounterMap* counter_map;
static PerfmonGroupMap* group_map;
static PerfmonGroupHelp* group_help;
+static EventSetup * eventSetup;
static TimerData timeData;
static double rdtscTime;
@@ -123,7 +125,7 @@ static void initThread(int , int );
for (i=0; i<numRows; i++) \
{ \
fc->entry[1+i] = \
- bfromcstr(perfmon_set.events[i].event.name); }
+ bfromcstr(perfmon_set.events[i].event.name); }
#define INIT_BASIC \
fc = bstrListCreate(); \
@@ -145,6 +147,7 @@ static void initThread(int , int );
#include <perfmon_k10.h>
#include <perfmon_interlagos.h>
#include <perfmon_kabini.h>
+#include <perfmon_silvermont.h>
/* ##### EXPORTED FUNCTION POINTERS ################################### */
void (*perfmon_startCountersThread) (int thread_id);
@@ -154,6 +157,8 @@ void (*perfmon_setupCounterThread) (int thread_id,
PerfmonEvent* event, PerfmonCounterIndex index);
void (*printDerivedMetrics) (PerfmonGroup group);
void (*logDerivedMetrics) (PerfmonGroup group, double time, double timeStamp);
+void (*perfmon_getDerivedCounterValuesArch)(PerfmonGroup group, float * values, float * out_max, float * out_min);
+
/* ##### FUNCTION POINTERS - LOCAL TO THIS SOURCE FILE ################ */
@@ -163,18 +168,39 @@ static void (*initThreadArch) (PerfmonThread *thread);
static int getIndex (bstring reg, PerfmonCounterIndex* index)
{
+ int ret = FALSE;
+ int err = 0;
+ uint64_t tmp;
for (int i=0; i< perfmon_numCounters; i++)
{
if (biseqcstr(reg, counter_map[i].key))
{
*index = counter_map[i].index;
- return TRUE;
+ ret = TRUE;
+ }
+ }
+ if ((ret) && (counter_map[*index].type != THERMAL) && (counter_map[*index].type != POWER))
+ {
+ if (counter_map[*index].device == 0)
+ {
+ tmp = msr_read(0, counter_map[*index].configRegister);
+ msr_write(0, counter_map[*index].configRegister,0x0ULL);
}
+ else
+ {
+ tmp = pci_read(0, counter_map[*index].device, counter_map[*index].configRegister);
+ pci_write(0, counter_map[*index].device, counter_map[*index].configRegister, 0x0U);
+ }
+ }
+ else if ((ret) && (counter_map[*index].type == POWER))
+ {
+ tmp = msr_read(0, counter_map[*index].counterRegister);
}
- return FALSE;
+ return ret;
}
+
static int
getEvent(bstring event_str, PerfmonEvent* event)
{
@@ -214,9 +240,9 @@ initThread(int thread_id, int cpu_id)
}
struct cbsScan{
- /* Parse state */
- bstring src;
- int line;
+ /* Parse state */
+ bstring src;
+ int line;
LikwidResults* results;
};
@@ -242,7 +268,8 @@ static int lineCb (void* parm, int ofs, int len)
ERROR_PLAIN_PRINT(Failed to read marker file);
}
ret = sscanf (bdata(strList->entry[0]), "%d", &id); CHECKERROR;
- st->results[id].tag = bstrcpy(strList->entry[1]);
+ st->results[id].tag = bstrcpy(line);
+ bdelete(st->results[id].tag, 0, blength(strList->entry[0])+1);
}
else
{
@@ -291,14 +318,15 @@ readMarkerFile(bstring filename, LikwidResults** resultsRef)
ret = sscanf (bdata(src), "%d %d", &numberOfThreads, &perfmon_numRegions); CHECKERROR;
results = (LikwidResults*) malloc(perfmon_numRegions * sizeof(LikwidResults));
- if (numberOfThreads != perfmon_numThreads)
+ if (perfmon_numRegions == 0)
{
- fprintf(OUTSTREAM,"ERROR: \
- Is the number of threads for likwid-perfctr equal \
- to the number in the measured application?\n");
-
- fprintf(OUTSTREAM,"likwid_markerInit and likwid_markerClose \
- must be called in serial region.\n");
+ fprintf(OUTSTREAM,"ERROR: No region results are listed in marker file\n");
+ ERROR_PLAIN_PRINT(No region results in marker file);
+ }
+ else if (numberOfThreads != perfmon_numThreads)
+ {
+ fprintf(OUTSTREAM,"ERROR: Is the number of threads for likwid-perfctr equal to the number in the measured application?\n");
+ fprintf(OUTSTREAM,"likwid_markerInit and likwid_markerClose must be called in serial region.\n");
ERROR_PRINT(Number of threads %d in marker file unequal to number of threads in likwid-perfCtr %d,numberOfThreads,perfmon_numThreads);
}
@@ -352,22 +380,22 @@ readMarkerFile(bstring filename, LikwidResults** resultsRef)
static void
printResultTable(PerfmonResultTable * tableData)
{
- if (perfmon_csvoutput)
+ if (perfmon_csvoutput)
{
int r, c;
- for (c = 0; c < tableData->header->qty; c++)
+ for (c = 0; c < tableData->header->qty; c++)
{
fprintf(OUTSTREAM, "%s%s", ((c == 0) ? "\n" : ","), tableData->header->entry[c]->data);
}
fprintf(OUTSTREAM, "%s", "\n");
- for (r = 0; r < tableData->numRows; r++)
+ for (r = 0; r < tableData->numRows; r++)
{
fprintf(OUTSTREAM, "%s", tableData->rows[r].label->data);
- for (c = 0; c < tableData->numColumns; c++)
+ for (c = 0; c < tableData->numColumns; c++)
{
- if (!isnan(tableData->rows[r].value[c]))
+ if (!isnan(tableData->rows[r].value[c]))
{
fprintf(OUTSTREAM, ",%lf", tableData->rows[r].value[c]);
}
@@ -424,7 +452,7 @@ getGroupId(bstring groupStr,PerfmonGroup* group)
for (int i=0; i<perfmon_numGroups; i++)
{
- if (biseqcstr(groupStr,group_map[i].key))
+ if (biseqcstr(groupStr,group_map[i].key))
{
*group = group_map[i].index;
return i;
@@ -478,7 +506,7 @@ freeResultTable(PerfmonResultTable* tableData)
free(tableData->rows);
}
-static void
+static void
initResultTable(PerfmonResultTable* tableData,
bstrList* firstColumn,
int numRows,
@@ -505,17 +533,13 @@ initResultTable(PerfmonResultTable* tableData,
for (i=0; i<numRows; i++)
{
-// tableData->rows[i].label =
-// bfromcstr(perfmon_set.events[i].event.name);
-
tableData->rows[i].label = firstColumn->entry[1+i];
-
tableData->rows[i].value =
(double*) malloc((numColumns)*sizeof(double));
}
}
-static void
+static void
initStatisticTable(PerfmonResultTable* tableData,
bstrList* firstColumn,
int numRows)
@@ -545,12 +569,8 @@ initStatisticTable(PerfmonResultTable* tableData,
for (i=0; i<numRows; i++)
{
-// tableData->rows[i].label =
-// bfromcstr(perfmon_set.events[i].event.name);
-
tableData->rows[i].label = firstColumn->entry[1+i];
bcatcstr(tableData->rows[i].label," STAT");
-
tableData->rows[i].value =
(double*) malloc((numColumns)*sizeof(double));
}
@@ -802,7 +822,7 @@ perfmon_printMarkerResults(bstring filepath)
bstrListDestroy(regionLabels);
}
-void
+void
perfmon_logCounterResults(double time)
{
int i;
@@ -818,11 +838,8 @@ perfmon_logCounterResults(double time)
for (j=0; j<perfmon_numThreads; j++)
{
fprintf(OUTSTREAM, "%e ",
- (double) (perfmon_threadData[j].counters[perfmon_set.events[i].index].counterData) - perfmon_threadState[j][i]);
- tmp =perfmon_threadData[j].counters[perfmon_set.events[i].index].counterData;
- perfmon_threadData[j].counters[perfmon_set.events[i].index].counterData -=
- perfmon_threadState[j][i];
- perfmon_threadState[j][i] = tmp;
+ (double) (perfmon_threadData[j].counters[perfmon_set.events[i].index].counterData) - perfmon_threadState[j][perfmon_set.events[i].index]);
+ perfmon_threadState[j][perfmon_set.events[i].index] = perfmon_threadData[j].counters[perfmon_set.events[i].index].counterData;
}
fprintf(OUTSTREAM,"\n");
}
@@ -835,7 +852,7 @@ perfmon_logCounterResults(double time)
fflush(OUTSTREAM);
}
-void
+void
perfmon_printCounterResults()
{
int i;
@@ -911,6 +928,79 @@ perfmon_getEventResult(int thread, int index)
return (double) perfmon_threadData[thread].counters[perfmon_set.events[index].index].counterData;
}
+EventSetup perfmon_prepareEventSetup(char* eventGroupString){
+ EventSetup setup;
+ bstring eventString = bfromcstr(eventGroupString);
+
+ setup.eventSetConfig = malloc(sizeof(setup.eventSetConfig));
+ setup.perfmon_set = malloc(sizeof(setup.perfmon_set));
+
+ int groupId = getGroupId(eventString, & setup.groupSet);
+ setup.groupName = strdup(eventGroupString);
+ setup.groupIndex = groupId;
+ if (setup.groupSet == _NOGROUP)
+ {
+ /* eventString is a custom eventSet */
+ bstr_to_eventset(setup.eventSetConfig, eventString);
+ }
+ else
+ {
+ /* eventString is a group */
+ eventString = bfromcstr(group_map[groupId].config);
+ bstr_to_eventset(setup.eventSetConfig, eventString);
+ }
+
+ perfmon_initEventSet(setup.eventSetConfig, setup.perfmon_set);
+ bdestroy(eventString);
+
+ setup.eventNames = (const char**) malloc(setup.perfmon_set->numberOfEvents * sizeof(const char*));
+
+ setup.numberOfEvents = setup.perfmon_set->numberOfEvents;
+ for (int i=0; i< setup.perfmon_set->numberOfEvents; i++)
+ {
+ setup.eventNames[i] = setup.perfmon_set->events[i].event.name;
+ }
+
+ setup.numberOfDerivedCounters = group_map[groupId].derivedCounters;
+ setup.derivedNames = (const char**) malloc(setup.numberOfDerivedCounters * sizeof(const char*));
+
+ for(int i=0; i < group_map[groupId].derivedCounters; i++){
+ setup.derivedNames[i] = group_map[groupId].derivedCounterNames[i];
+ }
+
+ return setup;
+}
+
+
+void perfmon_setupCountersForEventSet(EventSetup * setup){
+ perfmon_set = *setup->perfmon_set;
+ groupSet = setup->groupSet;
+ eventSetup = setup;
+ perfmon_setupCounters();
+}
+
+void perfmon_getEventCounterValues(uint64_t * values, uint64_t * out_max, uint64_t * out_min){
+
+ for(int e = 0; e < perfmon_set.numberOfEvents; e++ ){
+ uint64_t sum = 0;
+ uint64_t min = (uint64_t) -1;
+ uint64_t max = 0;
+
+ for(int i = 0; i < perfmon_numThreads; i++){
+ uint64_t cur = perfmon_threadData[i].counters[e].counterData;
+ sum += cur;
+ max = max > cur ? max : cur;
+ min = min < cur ? min : cur;
+ }
+ values[e] = sum / perfmon_numThreads;
+ out_min[e] = min;
+ out_max[e] = max;
+ }
+}
+
+void perfmon_getDerivedCounterValues(float * values, float * out_max, float * out_min){
+ perfmon_getDerivedCounterValuesArch(eventSetup->groupSet, values, out_max, out_min);
+}
int
perfmon_setupEventSetC(char* eventCString, const char*** eventnames)
@@ -955,10 +1045,9 @@ perfmon_setupEventSet(bstring eventString, BitMask* counterMask)
StrUtilEventSet eventSetConfig;
PerfmonEvent eventSet;
struct bstrList* subStr;
-
groupId = getGroupId(eventString, &groupSet);
-
+
if (groupSet == _NOGROUP)
{
subStr = bstrListCreate();
@@ -966,15 +1055,25 @@ perfmon_setupEventSet(bstring eventString, BitMask* counterMask)
eventBool = getEvent(subStr->entry[0], &eventSet);
bstrListDestroy(subStr);
}
-
+
if (groupSet == _NOGROUP && eventBool != FALSE)
{
/* eventString is a custom eventSet */
/* append fixed counters for Intel processors */
if ( cpuid_info.family == P6_FAMILY )
{
- bcatcstr(eventString,
- ",INSTR_RETIRED_ANY:FIXC0,CPU_CLK_UNHALTED_CORE:FIXC1,CPU_CLK_UNHALTED_REF:FIXC2");
+ if (cpuid_info.perf_num_fixed_ctr > 0)
+ {
+ bcatcstr(eventString,",INSTR_RETIRED_ANY:FIXC0");
+ }
+ if (cpuid_info.perf_num_fixed_ctr > 1)
+ {
+ bcatcstr(eventString,",CPU_CLK_UNHALTED_CORE:FIXC1");
+ }
+ if (cpuid_info.perf_num_fixed_ctr > 2)
+ {
+ bcatcstr(eventString,",CPU_CLK_UNHALTED_REF:FIXC2");
+ }
}
bstr_to_eventset(&eventSetConfig, eventString);
}
@@ -988,7 +1087,9 @@ perfmon_setupEventSet(bstring eventString, BitMask* counterMask)
if ( group_map[groupId].isUncore )
{
if ( (cpuid_info.model != SANDYBRIDGE_EP) &&
- (cpuid_info.model != IVYBRIDGE_EP))
+ (cpuid_info.model != IVYBRIDGE_EP) &&
+ (cpuid_info.model != WESTMERE_EX) &&
+ (cpuid_info.model != NEHALEM_EX))
{
ERROR_PLAIN_PRINT(Uncore not supported on Desktop processors!);
exit(EXIT_FAILURE);
@@ -1083,7 +1184,9 @@ perfmon_printAvailableGroups()
if ( group_map[i].isUncore )
{
if ( (cpuid_info.model == SANDYBRIDGE_EP) ||
- (cpuid_info.model == IVYBRIDGE_EP))
+ (cpuid_info.model == IVYBRIDGE_EP) ||
+ (cpuid_info.model == WESTMERE_EX) ||
+ (cpuid_info.model == NEHALEM_EX))
{
fprintf(OUTSTREAM,"%s: %s\n",group_map[i].key,
group_map[i].info);
@@ -1136,12 +1239,16 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
{
perfmon_threadState[i] = (double*)
malloc(NUM_PMC * sizeof(double));
+ for(int j=0; j<NUM_PMC;j++)
+ {
+ perfmon_threadState[i][j] = 0.0;
+ }
}
- for(int i=0; i<MAX_NUM_NODES; i++) socket_lock[i] = LOCK_INIT;
-
OUTSTREAM = outstream;
+ for(int i=0; i<MAX_NUM_NODES; i++) socket_lock[i] = LOCK_INIT;
+
if (accessClient_mode != DAEMON_AM_DIRECT)
{
accessClient_init(&socket_fd);
@@ -1171,6 +1278,7 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
initThreadArch = perfmon_init_pm;
printDerivedMetrics = perfmon_printDerivedMetrics_pm;
+ assert(FALSE && "NOT SUPPORTED");
perfmon_startCountersThread = perfmon_startCountersThread_pm;
perfmon_stopCountersThread = perfmon_stopCountersThread_pm;
perfmon_setupCounterThread = perfmon_setupCounterThread_pm;
@@ -1196,11 +1304,35 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
initThreadArch = perfmon_init_core2;
printDerivedMetrics = perfmon_printDerivedMetricsAtom;
+ perfmon_getDerivedCounterValuesArch = perfmon_getDerivedCounterValuesAtom;
perfmon_startCountersThread = perfmon_startCountersThread_core2;
perfmon_stopCountersThread = perfmon_stopCountersThread_core2;
perfmon_setupCounterThread = perfmon_setupCounterThread_core2;
break;
+ case ATOM_SILVERMONT_C:
+ case ATOM_SILVERMONT_E:
+ case ATOM_SILVERMONT_F1:
+ case ATOM_SILVERMONT_F2:
+ case ATOM_SILVERMONT_F3:
+ power_init(0);
+ thermal_init(0);
+ eventHash = silvermont_arch_events;
+ perfmon_numArchEvents = perfmon_numArchEventsSilvermont;
+
+ group_map = silvermont_group_map;
+ group_help = silvermont_group_help;
+ perfmon_numGroups = perfmon_numGroupsSilvermont;
+
+ counter_map = silvermont_counter_map;
+ perfmon_numCounters = perfmon_numCountersSilvermont;
+
+ initThreadArch = perfmon_init_silvermont;
+ printDerivedMetrics = perfmon_printDerivedMetricsSilvermont;
+ perfmon_startCountersThread = perfmon_startCountersThread_silvermont;
+ perfmon_stopCountersThread = perfmon_stopCountersThread_silvermont;
+ perfmon_setupCounterThread = perfmon_setupCounterThread_silvermont;
+ break;
case CORE_DUO:
ERROR_PLAIN_PRINT(Unsupported Processor);
@@ -1224,6 +1356,8 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
initThreadArch = perfmon_init_core2;
printDerivedMetrics = perfmon_printDerivedMetricsCore2;
+ perfmon_getDerivedCounterValuesArch = perfmon_getDerivedCounterValuesCore2;
+
logDerivedMetrics = perfmon_logDerivedMetricsCore2;
perfmon_startCountersThread = perfmon_startCountersThread_core2;
perfmon_stopCountersThread = perfmon_stopCountersThread_core2;
@@ -1243,12 +1377,13 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
counter_map = westmereEX_counter_map;
perfmon_numCounters = perfmon_numCountersWestmereEX;
- initThreadArch = perfmon_init_westmereEX;
+ initThreadArch = perfmon_init_nehalemEX;
printDerivedMetrics = perfmon_printDerivedMetricsNehalemEX;
+ perfmon_getDerivedCounterValuesArch = perfmon_getDerivedCounterValuesNehalemEX;
logDerivedMetrics = perfmon_logDerivedMetricsNehalemEX;
- perfmon_startCountersThread = perfmon_startCountersThread_westmereEX;
- perfmon_stopCountersThread = perfmon_stopCountersThread_westmereEX;
- perfmon_readCountersThread = perfmon_readCountersThread_westmereEX;
+ perfmon_startCountersThread = perfmon_startCountersThread_nehalemEX;
+ perfmon_stopCountersThread = perfmon_stopCountersThread_nehalemEX;
+ perfmon_readCountersThread = perfmon_readCountersThread_nehalemEX;
perfmon_setupCounterThread = perfmon_setupCounterThread_nehalemEX;
break;
@@ -1266,6 +1401,7 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
initThreadArch = perfmon_init_westmereEX;
printDerivedMetrics = perfmon_printDerivedMetricsWestmereEX;
+ perfmon_getDerivedCounterValuesArch = perfmon_getDerivedCounterValuesWestmereEX;
logDerivedMetrics = perfmon_logDerivedMetricsWestmereEX;
perfmon_startCountersThread = perfmon_startCountersThread_westmereEX;
perfmon_stopCountersThread = perfmon_stopCountersThread_westmereEX;
@@ -1291,6 +1427,8 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
initThreadArch = perfmon_init_nehalem;
printDerivedMetrics = perfmon_printDerivedMetricsNehalem;
+ perfmon_getDerivedCounterValuesArch = perfmon_getDerivedCounterValuesNehalem;
+
logDerivedMetrics = perfmon_logDerivedMetricsNehalem;
perfmon_startCountersThread = perfmon_startCountersThread_nehalem;
perfmon_stopCountersThread = perfmon_stopCountersThread_nehalem;
@@ -1316,6 +1454,8 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
initThreadArch = perfmon_init_nehalem;
printDerivedMetrics = perfmon_printDerivedMetricsWestmere;
+ perfmon_getDerivedCounterValuesArch = perfmon_getDerivedCounterValuesWestmere;
+
logDerivedMetrics = perfmon_logDerivedMetricsWestmere;
perfmon_startCountersThread = perfmon_startCountersThread_nehalem;
perfmon_stopCountersThread = perfmon_stopCountersThread_nehalem;
@@ -1329,7 +1469,7 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
power_init(0); /* FIXME Static coreId is dangerous */
thermal_init(0);
- pci_init(socket_fd);
+ pci_init(socket_fd);
eventHash = ivybridge_arch_events;
perfmon_numArchEvents = perfmon_numArchEventsIvybridge;
@@ -1343,6 +1483,8 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
initThreadArch = perfmon_init_ivybridge;
printDerivedMetrics = perfmon_printDerivedMetricsIvybridge;
+ perfmon_getDerivedCounterValuesArch = perfmon_getDerivedCounterValuesIvybridge;
+
logDerivedMetrics = perfmon_logDerivedMetricsIvybridge;
perfmon_startCountersThread = perfmon_startCountersThread_ivybridge;
perfmon_stopCountersThread = perfmon_stopCountersThread_ivybridge;
@@ -1373,6 +1515,7 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
initThreadArch = perfmon_init_haswell;
printDerivedMetrics = perfmon_printDerivedMetricsHaswell;
+ perfmon_getDerivedCounterValuesArch = perfmon_getDerivedCounterValuesHaswell;
logDerivedMetrics = perfmon_logDerivedMetricsHaswell;
perfmon_startCountersThread = perfmon_startCountersThread_haswell;
perfmon_stopCountersThread = perfmon_stopCountersThread_haswell;
@@ -1400,6 +1543,7 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
initThreadArch = perfmon_init_sandybridge;
printDerivedMetrics = perfmon_printDerivedMetricsSandybridge;
+ perfmon_getDerivedCounterValuesArch = perfmon_getDerivedCounterValuesSandybridge;
logDerivedMetrics = perfmon_logDerivedMetricsSandybridge;
perfmon_startCountersThread = perfmon_startCountersThread_sandybridge;
perfmon_stopCountersThread = perfmon_stopCountersThread_sandybridge;
@@ -1431,6 +1575,7 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
initThreadArch = perfmon_init_phi;
printDerivedMetrics = perfmon_printDerivedMetricsPhi;
+ perfmon_getDerivedCounterValuesArch = perfmon_getDerivedCounterValuesPhi;
logDerivedMetrics = perfmon_logDerivedMetricsPhi;
perfmon_startCountersThread = perfmon_startCountersThread_phi;
perfmon_stopCountersThread = perfmon_stopCountersThread_phi;
@@ -1457,6 +1602,7 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
initThreadArch = perfmon_init_k10;
printDerivedMetrics = perfmon_printDerivedMetricsK8;
+ perfmon_getDerivedCounterValuesArch = perfmon_getDerivedCounterValuesK8;
logDerivedMetrics = perfmon_logDerivedMetricsK8;
perfmon_startCountersThread = perfmon_startCountersThread_k10;
perfmon_stopCountersThread = perfmon_stopCountersThread_k10;
@@ -1477,6 +1623,7 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
initThreadArch = perfmon_init_k10;
printDerivedMetrics = perfmon_printDerivedMetricsK10;
+ perfmon_getDerivedCounterValuesArch = perfmon_getDerivedCounterValuesK10;
logDerivedMetrics = perfmon_logDerivedMetricsK10;
perfmon_startCountersThread = perfmon_startCountersThread_k10;
perfmon_stopCountersThread = perfmon_stopCountersThread_k10;
@@ -1497,6 +1644,7 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
initThreadArch = perfmon_init_interlagos;
printDerivedMetrics = perfmon_printDerivedMetricsInterlagos;
+ perfmon_getDerivedCounterValuesArch = perfmon_getDerivedCounterValuesInterlagos;
logDerivedMetrics = perfmon_logDerivedMetricsInterlagos;
perfmon_startCountersThread = perfmon_startCountersThread_interlagos;
perfmon_stopCountersThread = perfmon_stopCountersThread_interlagos;
@@ -1517,6 +1665,7 @@ perfmon_init(int numThreads_local, int threads[], FILE* outstream)
initThreadArch = perfmon_init_kabini;
printDerivedMetrics = perfmon_printDerivedMetricsKabini;
+ perfmon_getDerivedCounterValuesArch = perfmon_getDerivedCounterValuesKabini;
logDerivedMetrics = perfmon_logDerivedMetricsKabini;
perfmon_startCountersThread = perfmon_startCountersThread_kabini;
perfmon_stopCountersThread = perfmon_stopCountersThread_kabini;
diff --git a/src/power.c b/src/power.c
index 8f55cb2..3f4118c 100644
--- a/src/power.c
+++ b/src/power.c
@@ -5,8 +5,8 @@
*
* Description: Module implementing Intel RAPL interface
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -54,6 +54,7 @@ power_init(int cpuId)
{
uint64_t flags;
int hasRAPL = 0;
+ uint32_t info_register = 0x0;
/* determine Turbo Mode features */
double busSpeed;
@@ -61,10 +62,24 @@ power_init(int cpuId)
if ((cpuid_info.model == SANDYBRIDGE_EP) ||
(cpuid_info.model == SANDYBRIDGE) ||
(cpuid_info.model == HASWELL) ||
+ (cpuid_info.model == HASWELL_EX) ||
(cpuid_info.model == IVYBRIDGE_EP) ||
(cpuid_info.model == IVYBRIDGE))
{
hasRAPL = 1;
+ info_register = MSR_PKG_POWER_INFO;
+ }
+ else if (cpuid_info.model == ATOM_SILVERMONT_C)
+ {
+ hasRAPL = 1;
+ info_register = MSR_PKG_POWER_INFO_SILVERMONT;
+ }
+ else if ((cpuid_info.model == ATOM_SILVERMONT_E) ||
+ (cpuid_info.model == ATOM_SILVERMONT_F1) ||
+ (cpuid_info.model == ATOM_SILVERMONT_F2) ||
+ (cpuid_info.model == ATOM_SILVERMONT_F3))
+ {
+ hasRAPL = 1;
}
if (cpuid_info.turbo)
@@ -114,15 +129,40 @@ power_init(int cpuId)
power_info.energyUnit = pow(0.5,(double) extractBitField(flags,5,8));
power_info.timeUnit = pow(0.5,(double) extractBitField(flags,4,16));
- flags = msr_read(cpuId, MSR_PKG_POWER_INFO);
- power_info.tdp = (double) extractBitField(flags,15,0) * power_info.powerUnit;
- power_info.minPower = (double) extractBitField(flags,15,16) * power_info.powerUnit;
- power_info.maxPower = (double) extractBitField(flags,15,32) * power_info.powerUnit;
- power_info.maxTimeWindow = (double) extractBitField(flags,7,48) * power_info.timeUnit;
+ if (info_register != 0x0)
+ {
+ flags = msr_read(cpuId, info_register);
+ power_info.tdp = (double) extractBitField(flags,15,0) * power_info.powerUnit;
+ if (cpuid_info.model != ATOM_SILVERMONT_C)
+ {
+ power_info.minPower = (double) extractBitField(flags,15,16) * power_info.powerUnit;
+ power_info.maxPower = (double) extractBitField(flags,15,32) * power_info.powerUnit;
+ power_info.maxTimeWindow = (double) extractBitField(flags,7,48) * power_info.timeUnit;
+ }
+ else
+ {
+ power_info.minPower = 0.0;
+ power_info.maxPower = 0.0;
+ power_info.maxTimeWindow = 0.0;
+ }
+ }
+ else
+ {
+ power_info.tdp = 0;
+ power_info.minPower = 0.0;
+ power_info.maxPower = 0.0;
+ power_info.maxTimeWindow = 0.0;
+ }
}
else
{
power_info.powerUnit = 0.0;
+ power_info.energyUnit = 0.0;
+ power_info.timeUnit = 0.0;
+ power_info.tdp = 0;
+ power_info.minPower = 0.0;
+ power_info.maxPower = 0.0;
+ power_info.maxTimeWindow = 0.0;
}
}
diff --git a/src/pthread-overload/Makefile b/src/pthread-overload/Makefile
index bb61f96..5f460a5 100644
--- a/src/pthread-overload/Makefile
+++ b/src/pthread-overload/Makefile
@@ -4,8 +4,8 @@
#
# Description: pthread-overload Makefile
#
-# Version: 3.1.2
-# Released: 2.6.2014
+# Version: 3.1.3
+# Released: 4.11.2014
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
diff --git a/src/pthread-overload/pthread-overload.c b/src/pthread-overload/pthread-overload.c
index c53c884..e9d5dcc 100644
--- a/src/pthread-overload/pthread-overload.c
+++ b/src/pthread-overload/pthread-overload.c
@@ -3,11 +3,11 @@
*
* Filename: pthread-overload.c
*
- * Description: Overloaded library for pthread_create call.
+ * Description: Overloaded library for pthread_create call.
* Implements pinning of threads together with likwid-pin.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -171,7 +171,7 @@ pthread_create(pthread_t* thread,
{
break;
}
- if (sosearchpaths[reallpthrindex] != NULL)
+ if (sosearchpaths[reallpthrindex] != NULL)
{
reallpthrindex++;
}
diff --git a/src/strUtil.c b/src/strUtil.c
index 91a7083..cf37920 100644
--- a/src/strUtil.c
+++ b/src/strUtil.c
@@ -5,8 +5,8 @@
*
* Description: Utility routines for strings. Depends on bstring lib.
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -46,161 +46,160 @@
static int
cpu_count(cpu_set_t* set)
{
- uint32_t i;
- int s = 0;
- const __cpu_mask *p = set->__bits;
- const __cpu_mask *end = &set->__bits[sizeof(cpu_set_t) / sizeof (__cpu_mask)];
+ uint32_t i;
+ int s = 0;
+ const __cpu_mask *p = set->__bits;
+ const __cpu_mask *end = &set->__bits[sizeof(cpu_set_t) / sizeof (__cpu_mask)];
- while (p < end)
- {
- __cpu_mask l = *p++;
-
- if (l == 0)
+ while (p < end)
{
- continue;
- }
+ __cpu_mask l = *p++;
- for (i=0; i< (sizeof(__cpu_mask)*8); i++)
- {
- if (l&(1UL<<i))
- {
- s++;
- }
+ if (l == 0)
+ {
+ continue;
+ }
+
+ for (i=0; i< (sizeof(__cpu_mask)*8); i++)
+ {
+ if (l&(1UL<<i))
+ {
+ s++;
+ }
+ }
}
- }
- return s;
+ return s;
}
/* ##### FUNCTION DEFINITIONS - EXPORTED FUNCTIONS ################## */
int str2int(const char* str)
{
- char* endptr;
- errno = 0;
- unsigned long val;
- val = strtoul(str, &endptr, 10);
- if ((errno == ERANGE && val == LONG_MAX )
- || (errno != 0 && val == 0))
- {
- ERROR;
- }
-
- if (endptr == str)
- {
- ERROR_PRINT(Cannot parse string %s to digits, str);
- }
-
- return (int) val;
+ char* endptr;
+ errno = 0;
+ unsigned long val;
+ val = strtoul(str, &endptr, 10);
+ if ((errno == ERANGE && val == LONG_MAX )
+ || (errno != 0 && val == 0))
+ {
+ ERROR;
+ }
+
+ if (endptr == str)
+ {
+ ERROR_PRINT(Cannot parse string %s to digits, str);
+ }
+
+ return (int) val;
}
uint32_t
bstr_to_cpuset_physical(uint32_t* threads, const_bstring q)
{
- int i;
- unsigned int rangeBegin;
- unsigned int rangeEnd;
- uint32_t numThreads=0;
- struct bstrList* tokens;
- struct bstrList* subtokens;
-
- tokens = bsplit(q,',');
+ int i;
+ unsigned int rangeBegin;
+ unsigned int rangeEnd;
+ uint32_t numThreads=0;
+ struct bstrList* tokens;
+ struct bstrList* subtokens;
- for (i=0;i<tokens->qty;i++)
- {
- subtokens = bsplit(tokens->entry[i],'-');
+ tokens = bsplit(q,',');
- if (numThreads > MAX_NUM_THREADS)
+ for (i=0;i<tokens->qty;i++)
{
- ERROR_PRINT(Number Of threads %d too large, numThreads);
- }
+ subtokens = bsplit(tokens->entry[i],'-');
- if( subtokens->qty == 1 )
- {
- threads[numThreads] = str2int((char *) bdata(subtokens->entry[0]));
- numThreads++;
- }
- else if ( subtokens->qty == 2 )
- {
- rangeBegin = str2int((char*) bdata(subtokens->entry[0]));
- rangeEnd = str2int((char*) bdata(subtokens->entry[1]));
+ if( subtokens->qty == 1 )
+ {
+ threads[numThreads] = str2int((char *) bdata(subtokens->entry[0]));
+ numThreads++;
+ }
+ else if ( subtokens->qty == 2 )
+ {
+ rangeBegin = str2int((char*) bdata(subtokens->entry[0]));
+ rangeEnd = str2int((char*) bdata(subtokens->entry[1]));
- if (!(rangeBegin <= rangeEnd))
- {
- ERROR_PRINT(Range End %d bigger than begin %d,rangeEnd,rangeBegin);
- }
+ if (!(rangeBegin <= rangeEnd))
+ {
+ ERROR_PRINT(Range End %d bigger than begin %d, rangeEnd, rangeBegin);
+ }
- while (rangeBegin <= rangeEnd) {
- threads[numThreads] = rangeBegin;
- numThreads++;
- rangeBegin++;
- }
+ while (rangeBegin <= rangeEnd) {
+ threads[numThreads] = rangeBegin;
+ numThreads++;
+ rangeBegin++;
+ }
+ }
+ else
+ {
+ ERROR_PLAIN_PRINT(Parse Error);
+ }
+ bstrListDestroy(subtokens);
}
- else
+ if (numThreads > MAX_NUM_THREADS)
{
- ERROR_PLAIN_PRINT(Parse Error);
+ ERROR_PRINT(Number Of threads %d too large, numThreads);
}
- bstrListDestroy(subtokens);
- }
- bstrListDestroy(tokens);
+ bstrListDestroy(tokens);
- return numThreads;
+ return numThreads;
}
uint32_t
bstr_to_cpuset_logical(uint32_t* threads, const_bstring q)
{
- int i;
- uint32_t j;
- int id;
- uint32_t tmpThreads[MAX_NUM_THREADS];
- int globalNumThreads=0;
- uint32_t numThreads=0;
- struct bstrList* tokens;
- struct bstrList* subtokens;
- const AffinityDomain* domain;
-
- tokens = bsplit(q,'@');
-
- for (i=0;i<tokens->qty;i++)
- {
- subtokens = bsplit(tokens->entry[i],':');
+ int i;
+ uint32_t j;
+ int id;
+ uint32_t tmpThreads[MAX_NUM_THREADS];
+ int globalNumThreads=0;
+ uint32_t numThreads=0;
+ struct bstrList* tokens;
+ struct bstrList* subtokens;
+ const AffinityDomain* domain;
- if ( subtokens->qty == 2 )
+ tokens = bsplit(q,'@');
+
+ for (i=0;i<tokens->qty;i++)
{
- domain = affinity_getDomain(subtokens->entry[0]);
+ subtokens = bsplit(tokens->entry[i],':');
- if (!domain)
- {
- ERROR_PRINT(Unknown domain ##%s##,bdata(subtokens->entry[0]));
- }
+ if ( subtokens->qty == 2 )
+ {
+ domain = affinity_getDomain(subtokens->entry[0]);
- numThreads = bstr_to_cpuset_physical(tmpThreads, subtokens->entry[1]);
+ if (!domain)
+ {
+ ERROR_PRINT(Unknown domain ##%s##,bdata(subtokens->entry[0]));
+ }
- for (j=0; j<numThreads; j++)
- {
- if (! (tmpThreads[j] >= domain->numberOfProcessors))
- {
- id = (tmpThreads[j]/domain->numberOfCores) +
- (tmpThreads[j]%domain->numberOfCores) * cpuid_topology.numThreadsPerCore;
- threads[globalNumThreads++] = domain->processorList[id];
+ numThreads = bstr_to_cpuset_physical(tmpThreads, subtokens->entry[1]);
+
+ for (j=0; j<numThreads; j++)
+ {
+ if (! (tmpThreads[j] >= domain->numberOfProcessors))
+ {
+ id = (tmpThreads[j]/domain->numberOfCores) +
+ (tmpThreads[j]%domain->numberOfCores) * cpuid_topology.numThreadsPerCore;
+ threads[globalNumThreads++] = domain->processorList[id];
+ }
+ else
+ {
+ ERROR_PRINT(Too many threads requested. Avaialable 0-%d,domain->numberOfProcessors-1);
+ }
+ }
}
else
{
- ERROR_PRINT(Too many threads requested. Avaialable 0-%d,domain->numberOfProcessors-1);
+ ERROR_PLAIN_PRINT(Parse Error);
}
- }
+ bstrListDestroy(subtokens);
}
- else
- {
- ERROR_PLAIN_PRINT(Parse Error);
- }
- bstrListDestroy(subtokens);
- }
- bstrListDestroy(tokens);
+ bstrListDestroy(tokens);
- return globalNumThreads;
+ return globalNumThreads;
}
#define PRINT_EXPR_ERR printf("SYNTAX ERROR: Expression must have the format E:<thread domain>:<num threads>[:chunk size>:<stride>]\n")
@@ -208,94 +207,92 @@ bstr_to_cpuset_logical(uint32_t* threads, const_bstring q)
uint32_t
bstr_to_cpuset_expression(uint32_t* threads, const_bstring qi)
{
- int i;
- uint32_t j;
- bstring q = (bstring) qi;
- int globalNumThreads=0;
- uint32_t numThreads=0;
- struct bstrList* tokens;
- struct bstrList* subtokens;
- const AffinityDomain* domain;
-
- bdelete (q, 0, 2);
- tokens = bsplit(q,'@');
-
- for (i=0;i<tokens->qty;i++)
- {
- subtokens = bsplit(tokens->entry[i],':');
+ int i;
+ uint32_t j;
+ bstring q = (bstring) qi;
+ int globalNumThreads=0;
+ uint32_t numThreads=0;
+ struct bstrList* tokens;
+ struct bstrList* subtokens;
+ const AffinityDomain* domain;
- if ( subtokens->qty == 2 )
+ bdelete (q, 0, 2);
+ tokens = bsplit(q,'@');
+
+ for (i=0;i<tokens->qty;i++)
{
- domain = affinity_getDomain(subtokens->entry[0]);
+ subtokens = bsplit(tokens->entry[i],':');
- if (!domain)
- {
- ERROR_PRINT(Unknown domain ##%s##,bdata(subtokens->entry[0]));
- }
+ if ( subtokens->qty == 2 )
+ {
+ domain = affinity_getDomain(subtokens->entry[0]);
- numThreads = str2int(bdata(subtokens->entry[1]));
+ if (!domain)
+ {
+ ERROR_PRINT(Unknown domain ##%s##,bdata(subtokens->entry[0]));
+ }
- if (numThreads > domain->numberOfProcessors)
- {
- ERROR_PRINT(Invalid processor id requested. Avaialable 0-%d,domain->numberOfProcessors-1);
- }
+ numThreads = str2int(bdata(subtokens->entry[1]));
- for (j=0; j<numThreads; j++)
- {
- threads[globalNumThreads++] = domain->processorList[j];
- }
- }
- else if ( subtokens->qty == 4 )
- {
- int counter;
- int currentId = 0;
- int startId = 0;
- int chunksize = str2int(bdata(subtokens->entry[2]));
- int stride = str2int(bdata(subtokens->entry[3]));
- domain = affinity_getDomain(subtokens->entry[0]);
+ if (numThreads > domain->numberOfProcessors)
+ {
+ ERROR_PRINT(Invalid processor id requested. Avaialable 0-%d,
+ domain->numberOfProcessors-1);
+ }
- if (!domain)
- {
- ERROR_PRINT(Unknown domain ##%s##,bdata(subtokens->entry[0]));
- }
+ for (j=0; j<numThreads; j++)
+ {
+ threads[globalNumThreads++] = domain->processorList[j];
+ }
+ }
+ else if ( subtokens->qty == 4 )
+ {
+ int counter;
+ int currentId = 0;
+ int startId = 0;
+ int chunksize = str2int(bdata(subtokens->entry[2]));
+ int stride = str2int(bdata(subtokens->entry[3]));
+ domain = affinity_getDomain(subtokens->entry[0]);
+
+ if (!domain)
+ {
+ ERROR_PRINT(Unknown domain ##%s##,bdata(subtokens->entry[0]));
+ }
- numThreads = str2int(bdata(subtokens->entry[1]));
+ numThreads = str2int(bdata(subtokens->entry[1]));
- if (numThreads > domain->numberOfProcessors)
- {
- ERROR_PRINT(Invalid processor id requested. Avaialable 0-%d,domain->numberOfProcessors-1);
- }
+ if (numThreads > domain->numberOfProcessors)
+ {
+ ERROR_PRINT(Invalid number of processors requested. Available 0-%d,
+ domain->numberOfProcessors-1);
+ }
- counter = chunksize;
- for (j=0; j<numThreads; j++)
- {
- if (counter)
- {
- threads[globalNumThreads++] = domain->processorList[currentId++];
+ counter = 0;
+ for (j=0; j<numThreads; j+=chunksize)
+ {
+ for(i=0;i<chunksize && j+i<numThreads ;i++)
+ {
+ threads[globalNumThreads++] = domain->processorList[counter+i];
+ }
+ counter += stride;
+ if (counter >= domain->numberOfProcessors)
+ {
+ counter = 0;
+ }
+ }
}
else
{
- startId += stride;
- if (startId >= numThreads) startId -= numThreads;
- currentId = startId;
- threads[globalNumThreads++] = domain->processorList[currentId++];
- counter = chunksize;
+ PRINT_EXPR_ERR;
+ ERROR_PLAIN_PRINT(Parse Error);
}
- counter--;
- }
+ bstrListDestroy(subtokens);
}
- else
- {
- PRINT_EXPR_ERR;
- ERROR_PLAIN_PRINT(Parse Error);
- }
- bstrListDestroy(subtokens);
- }
- bstrListDestroy(tokens);
+ bstrListDestroy(tokens);
- return globalNumThreads;
+ return globalNumThreads;
}
uint32_t
@@ -365,310 +362,319 @@ bstr_to_cpuset_scatter(uint32_t* threads, const_bstring qi)
#define CPUSET_ERROR \
- if (cpuid_isInCpuset()) { \
- ERROR_PLAIN_PRINT(You are running inside a cpuset. \
- In cpusets only logical pinning inside set is allowed!); \
- }
+ if (cpuid_isInCpuset()) { \
+ ERROR_PLAIN_PRINT(You are running inside a cpuset. In cpusets only logical pinning inside set is allowed!); \
+ }
int
bstr_to_cpuset(int* threadsIN, const_bstring q)
{
- uint32_t i;
- int num=0;
- int cpuMapping[cpuid_topology.numHWThreads];
- cpu_set_t cpu_set;
- uint32_t numThreads;
- bstring domainStr = bformat("NSCM");
- const_bstring scatter = bformat("scatter");
- struct bstrList* tokens;
- CPU_ZERO(&cpu_set);
- sched_getaffinity(0,sizeof(cpu_set_t), &cpu_set);
- uint32_t* threads = (uint32_t*) threadsIN;
-
- if (binchr (q, 0, domainStr) != BSTR_ERR)
- {
- CPUSET_ERROR;
-
- if (binstr (q, 0 , scatter ) != BSTR_ERR)
- {
- numThreads = bstr_to_cpuset_scatter(threads,q);
+ uint32_t i;
+ int num=0;
+ int cpuMapping[cpuid_topology.numHWThreads];
+ cpu_set_t cpu_set;
+ uint32_t numThreads;
+ bstring domainStr = bformat("NSCM");
+ const_bstring scatter = bformat("scatter");
+ struct bstrList* tokens;
+ CPU_ZERO(&cpu_set);
+ sched_getaffinity(0,sizeof(cpu_set_t), &cpu_set);
+ uint32_t* threads = (uint32_t*) threadsIN;
+
+ if (binchr (q, 0, domainStr) != BSTR_ERR)
+ {
+ CPUSET_ERROR;
+
+ if (binstr (q, 0 , scatter ) != BSTR_ERR)
+ {
+ numThreads = bstr_to_cpuset_scatter(threads,q);
+ }
+ else if (bstrchr (q, 'E') != BSTR_ERR)
+ {
+ numThreads = bstr_to_cpuset_expression(threads,q);
+ }
+ else
+ {
+ numThreads = bstr_to_cpuset_logical(threads,q);
+ }
}
- else if (bstrchr (q, 'E') != BSTR_ERR)
+ else if (bstrchr (q, 'L') != BSTR_ERR)
{
- numThreads = bstr_to_cpuset_expression(threads,q);
+ uint32_t count = cpu_count(&cpu_set);
+
+ tokens = bsplit(q,':');
+ numThreads = bstr_to_cpuset_physical(threads,tokens->entry[1]);
+
+ for (i=0; i < cpuid_topology.numHWThreads; i++)
+ {
+ if (CPU_ISSET(i,&cpu_set))
+ {
+ cpuMapping[num++]=i;
+ }
+ }
+
+ for (i=0; i < numThreads; i++)
+ {
+ if (!(threads[i] >= count))
+ {
+ threads[i] = cpuMapping[threads[i]];
+ }
+ else
+ {
+ fprintf(stderr, "Available CPUs: ");
+ for (int j=0; j< num-1;j++)
+ {
+ fprintf(stderr, "%d,", cpuMapping[j]);
+ }
+ fprintf(stderr, "%d\n", cpuMapping[num-1]);
+ ERROR_PRINT(Index %d out of range.,threads[i]);
+ }
+ }
+ bstrListDestroy(tokens);
}
else
{
- numThreads = bstr_to_cpuset_logical(threads,q);
+ CPUSET_ERROR;
+ numThreads = bstr_to_cpuset_physical(threads,q);
}
- }
- else if (bstrchr (q, 'L') != BSTR_ERR)
- {
- uint32_t count = cpu_count(&cpu_set);
- printf("Using logical numbering within cpuset %d\n",count);
- tokens = bsplit(q,':');
- numThreads = bstr_to_cpuset_physical(threads,tokens->entry[1]);
- for (i=0; i < cpuid_topology.numHWThreads; i++)
- {
- if (CPU_ISSET(i,&cpu_set))
- {
- cpuMapping[num++]=i;
- }
- }
-
- for (i=0; i < numThreads; i++)
- {
- if (!(threads[i] > count))
- {
- threads[i] = cpuMapping[threads[i]];
- }
- else
- {
- ERROR_PRINT(Request cpu out of range of max %d,count);
- }
- }
- bstrListDestroy(tokens);
- }
- else
- {
- CPUSET_ERROR;
- numThreads = bstr_to_cpuset_physical(threads,q);
- }
-
- bdestroy(domainStr);
- return (int) numThreads;
+ bdestroy(domainStr);
+ return (int) numThreads;
}
void
bstr_to_eventset(StrUtilEventSet* set, const_bstring q)
{
- int i;
- struct bstrList* tokens;
- struct bstrList* subtokens;
+ int i;
+ struct bstrList* tokens;
+ struct bstrList* subtokens;
- tokens = bsplit(q,',');
- set->numberOfEvents = tokens->qty;
- set->events = (StrUtilEvent*)
+ tokens = bsplit(q,',');
+ set->numberOfEvents = tokens->qty;
+ set->events = (StrUtilEvent*)
malloc(set->numberOfEvents * sizeof(StrUtilEvent));
- for (i=0;i<tokens->qty;i++)
- {
- subtokens = bsplit(tokens->entry[i],':');
-
- if ( subtokens->qty != 2 )
- {
-
- fprintf(stderr, "Cannot parse event string %s, probably missing counter name\n"
- ,bdata(tokens->entry[i]));
- fprintf(stderr, "Format: <eventName>:<counter>,...\n");
- msr_finalize();
- pci_finalize();
- exit(EXIT_FAILURE);
- //ERROR_PLAIN_PRINT(Error in parsing event string);
- }
- else
+ for (i=0;i<tokens->qty;i++)
{
- set->events[i].eventName = bstrcpy(subtokens->entry[0]);
- set->events[i].counterName = bstrcpy(subtokens->entry[1]);
- }
+ subtokens = bsplit(tokens->entry[i],':');
- bstrListDestroy(subtokens);
- }
+ if ( subtokens->qty != 2 )
+ {
+
+ fprintf(stderr, "Cannot parse event string %s, probably missing counter name\n"
+ ,bdata(tokens->entry[i]));
+ fprintf(stderr, "Format: <eventName>:<counter>,...\n");
+ msr_finalize();
+ pci_finalize();
+ exit(EXIT_FAILURE);
- bstrListDestroy(tokens);
+ }
+ else
+ {
+ set->events[i].eventName = bstrcpy(subtokens->entry[0]);
+ set->events[i].counterName = bstrcpy(subtokens->entry[1]);
+ }
+
+ bstrListDestroy(subtokens);
+ }
+
+ bstrListDestroy(tokens);
}
FILE*
bstr_to_outstream(const_bstring argString, bstring filter)
{
- int i;
- char* cstr;
- FILE* STREAM;
- struct bstrList* tokens;
- bstring base;
- bstring suffix = bfromcstr(".");
- bstring filename;
-
- /* configure filter */
- {
+ int i;
+ char* cstr;
+ FILE* STREAM;
+ struct bstrList* tokens;
+ bstring base;
+ bstring suffix = bfromcstr(".");
+ bstring filename;
+
+ /* configure filter */
tokens = bsplit(argString,'.');
if (tokens->qty < 2)
{
- fprintf(stderr, "Outputfile has no filetype suffix!\n");
- fprintf(stderr, "Add suffix .txt for raw output or any supported filter suffix.\n");
- exit(EXIT_FAILURE);
+ fprintf(stderr, "Outputfile has no filetype suffix!\n");
+ fprintf(stderr, "Add suffix .txt for raw output or any supported filter suffix.\n");
+ exit(EXIT_FAILURE);
}
base = bstrcpy(tokens->entry[0]);
if (biseqcstr(tokens->entry[1],"txt"))
{
- bassigncstr(filter, "NO");
+ bassigncstr(filter, "NO");
}
else
{
- bassigncstr(filter, TOSTRING(LIKWIDFILTERPATH));
- bconchar(filter,'/');
- bconcat(filter,tokens->entry[1]);
+ bassigncstr(filter, TOSTRING(LIKWIDFILTERPATH));
+ bconchar(filter,'/');
+ bconcat(filter,tokens->entry[1]);
}
bconcat(suffix,tokens->entry[1]);
bstrListDestroy(tokens);
- }
-
- tokens = bsplit(base,'_');
-
- if (tokens->qty < 1)
- {
- ERROR_PLAIN_PRINT(Error in parsing file string);
- }
-
- filename = bstrcpy(tokens->entry[0]);
-
- for (i=1; i<tokens->qty; i++)
- {
- if (biseqcstr(tokens->entry[i],"%j"))
- {
- cstr = getenv("PBS_JOBID");
- if (cstr != NULL)
- {
- bcatcstr(filename, "_");
- bcatcstr(filename, cstr);
- }
- }
- else if (biseqcstr(tokens->entry[i],"%r"))
- {
- cstr = getenv("PMI_RANK");
- if (cstr == NULL)
- {
- cstr = getenv("OMPI_COMM_WORLD_RANK");
- }
- if (cstr != NULL)
- {
- bcatcstr(filename, "_");
- bcatcstr(filename, cstr);
- }
- }
- else if (biseqcstr(tokens->entry[i],"%h"))
- {
- cstr = (char*) malloc(HOST_NAME_MAX * sizeof(char));
- gethostname(cstr,HOST_NAME_MAX);
- bcatcstr(filename, "_");
- bcatcstr(filename, cstr);
- free(cstr);
- }
- else if (biseqcstr(tokens->entry[i],"%p"))
- {
- bstring pid = bformat("_%d",getpid());
- bconcat(filename, pid);
- bdestroy(pid);
- }
- else
- {
- ERROR_PLAIN_PRINT(Unsupported placeholder in filename!);
- }
- }
-
- if (biseqcstr(filter,"NO"))
- {
- bconcat(filename, suffix);
- }
- else
- {
- bcatcstr(filter, " ");
- bcatcstr(filename, ".tmp");
- bconcat(filter, filename);
- }
-
- bstrListDestroy(tokens);
- STREAM = fopen(bdata(filename),"w");
- bdestroy(filename);
- bdestroy(suffix);
- bdestroy(base);
-
- return STREAM;
+
+ tokens = bsplit(base,'_');
+
+ if (tokens->qty < 1)
+ {
+ ERROR_PLAIN_PRINT(Error in parsing file string);
+ }
+
+ filename = bstrcpy(tokens->entry[0]);
+
+ for (i=1; i<tokens->qty; i++)
+ {
+ if (biseqcstr(tokens->entry[i],"%j"))
+ {
+ cstr = getenv("PBS_JOBID");
+ if (cstr != NULL)
+ {
+ bcatcstr(filename, "_");
+ bcatcstr(filename, cstr);
+ }
+ }
+ else if (biseqcstr(tokens->entry[i],"%r"))
+ {
+ cstr = getenv("PMI_RANK");
+ if (cstr == NULL)
+ {
+ cstr = getenv("OMPI_COMM_WORLD_RANK");
+ }
+ if (cstr != NULL)
+ {
+ bcatcstr(filename, "_");
+ bcatcstr(filename, cstr);
+ }
+ }
+ else if (biseqcstr(tokens->entry[i],"%h"))
+ {
+ cstr = (char*) malloc(HOST_NAME_MAX * sizeof(char));
+ gethostname(cstr,HOST_NAME_MAX);
+ bcatcstr(filename, "_");
+ bcatcstr(filename, cstr);
+ free(cstr);
+ }
+ else if (biseqcstr(tokens->entry[i],"%p"))
+ {
+ bstring pid = bformat("_%d",getpid());
+ bconcat(filename, pid);
+ bdestroy(pid);
+ }
+ else
+ {
+ ERROR_PLAIN_PRINT(Unsupported placeholder in filename!);
+ }
+ }
+
+ if (biseqcstr(filter,"NO"))
+ {
+ bconcat(filename, suffix);
+ }
+ else
+ {
+ bcatcstr(filter, " ");
+ bcatcstr(filename, ".tmp");
+ bconcat(filter, filename);
+ }
+
+ bstrListDestroy(tokens);
+ STREAM = fopen(bdata(filename),"w");
+ bdestroy(filename);
+ bdestroy(suffix);
+ bdestroy(base);
+
+ return STREAM;
}
uint64_t
bstr_to_doubleSize(const_bstring str, DataType type)
{
- bstring unit = bmidstr(str, blength(str)-2, 2);
- bstring sizeStr = bmidstr(str, 0, blength(str)-2);
- uint64_t sizeU = str2int(bdata(sizeStr));
- uint64_t junk = 0;
- uint64_t bytesize = 0;
-
- switch (type)
- {
- case SINGLE:
- bytesize = sizeof(float);
- break;
-
- case DOUBLE:
- bytesize = sizeof(double);
- break;
- }
-
- if (biseqcstr(unit, "kB")) {
- junk = (sizeU *1024)/bytesize;
- } else if (biseqcstr(unit, "MB")) {
- junk = (sizeU *1024*1024)/bytesize;
- } else if (biseqcstr(unit, "GB")) {
- junk = (sizeU *1024*1024*1024)/bytesize;
- }
-
- return junk;
+ bstring unit = bmidstr(str, blength(str)-2, 2);
+ bstring sizeStr = bmidstr(str, 0, blength(str)-2);
+ uint64_t sizeU = str2int(bdata(sizeStr));
+ uint64_t junk = 0;
+ uint64_t bytesize = 0;
+
+ switch (type)
+ {
+ case SINGLE:
+ case SINGLE_RAND:
+ bytesize = sizeof(float);
+ break;
+
+ case DOUBLE:
+ case DOUBLE_RAND:
+ bytesize = sizeof(double);
+ break;
+ }
+
+ if (biseqcstr(unit, "kB")) {
+ junk = (sizeU *1024)/bytesize;
+ } else if (biseqcstr(unit, "MB")) {
+ junk = (sizeU *1024*1024)/bytesize;
+ } else if (biseqcstr(unit, "GB")) {
+ junk = (sizeU *1024*1024*1024)/bytesize;
+ }
+
+ return junk;
}
void
bstr_to_interval(const_bstring str, struct timespec* interval)
{
- int size;
- int pos;
- bstring ms = bformat("ms");
-
- if ((pos = bstrrchr (str, 's')) != BSTR_ERR)
- {
- if (pos != (blength(str)-1))
- {
- ERROR_PLAIN_PRINT(Parsing of daemon interval failed);
- }
-
- /* unit is ms */
- if (binstrr (str, blength(str), ms) != BSTR_ERR)
- {
- bstring sizeStr = bmidstr(str, 0, blength(str)-2);
- size = str2int(bdata(sizeStr));
- if (size >= 1000)
- {
- interval->tv_sec = size/1000;
- interval->tv_nsec = (size%1000) * 1.E06;
- }
- else
- {
- interval->tv_sec = 0L;
- interval->tv_nsec = size * 1.E06;
- }
- }
- /* unit is s */
- else
- {
- bstring sizeStr = bmidstr(str, 0, blength(str)-1);
- size = str2int(bdata(sizeStr));
- interval->tv_sec = size;
- interval->tv_nsec = 0L;
- }
- }
- else
- {
- ERROR_PLAIN_PRINT(Parsing of daemon interval failed);
- }
+ int size;
+ int pos;
+ bstring ms = bformat("ms");
+
+ if ((pos = bstrrchr (str, 's')) != BSTR_ERR)
+ {
+ if (pos != (blength(str)-1))
+ {
+ fprintf(stderr, "You need to specify a time unit s or ms like 200ms\n");
+ msr_finalize();
+ exit(EXIT_FAILURE);
+ }
+
+ /* unit is ms */
+ if (binstrr (str, blength(str), ms) != BSTR_ERR)
+ {
+ bstring sizeStr = bmidstr(str, 0, blength(str)-2);
+ size = str2int(bdata(sizeStr));
+ if (size >= 1000)
+ {
+ interval->tv_sec = size/1000;
+ interval->tv_nsec = (size%1000) * 1.E06;
+ }
+ else
+ {
+ interval->tv_sec = 0L;
+ interval->tv_nsec = size * 1.E06;
+ }
+ }
+ /* unit is s */
+ else
+ {
+ bstring sizeStr = bmidstr(str, 0, blength(str)-1);
+ size = str2int(bdata(sizeStr));
+ interval->tv_sec = size;
+ interval->tv_nsec = 0L;
+ }
+ }
+ else
+ {
+ fprintf(stderr, "You need to specify a time unit s or ms like 200ms\n");
+ msr_finalize();
+ exit(EXIT_FAILURE);
+ }
}
@@ -678,199 +684,200 @@ bstr_to_workgroup(Workgroup* group,
DataType type,
int numberOfStreams)
{
- uint32_t i;
- int parseStreams = 0;
- bstring threadInfo;
- bstring streams= bformat("0");
- struct bstrList* tokens;
- struct bstrList* subtokens;
- const AffinityDomain* domain;
-
- /* split the workgroup into the thread and the streams part */
- tokens = bsplit(str,'-');
-
- if (tokens->qty == 2)
- {
- threadInfo = bstrcpy(tokens->entry[0]);
- streams = bstrcpy(tokens->entry[1]);
- parseStreams = 1;
- }
- else if (tokens->qty == 1)
- {
- threadInfo = bstrcpy(tokens->entry[0]);
- }
- else
- {
- ERROR_PLAIN_PRINT(Error in parsing workgroup string);
- }
-
- bstrListDestroy (tokens);
- tokens = bsplit(threadInfo,':');
-
- if (tokens->qty == 5)
- {
- uint32_t maxNumThreads;
- int chunksize;
- int stride;
- int counter;
- int currentId = 0;
- int startId = 0;
+ uint32_t i;
+ int parseStreams = 0;
+ bstring threadInfo;
+ bstring streams= bformat("0");
+ struct bstrList* tokens;
+ struct bstrList* subtokens;
+ const AffinityDomain* domain;
- domain = affinity_getDomain(tokens->entry[0]);
+ /* split the workgroup into the thread and the streams part */
+ tokens = bsplit(str,'-');
- if (domain == NULL)
+ if (tokens->qty == 2)
{
- fprintf(stderr, "Error: Domain %s not available on current machine.\nTry likwid-bench -p for supported domains.",
- bdata(tokens->entry[0]));
- exit(EXIT_FAILURE);
+ threadInfo = bstrcpy(tokens->entry[0]);
+ streams = bstrcpy(tokens->entry[1]);
+ parseStreams = 1;
+ }
+ else if (tokens->qty == 1)
+ {
+ threadInfo = bstrcpy(tokens->entry[0]);
+ }
+ else
+ {
+ ERROR_PLAIN_PRINT(Error in parsing workgroup string);
}
- group->size = bstr_to_doubleSize(tokens->entry[1], type);
- group->numberOfThreads = str2int(bdata(tokens->entry[2]));
- chunksize = str2int(bdata(tokens->entry[3]));
- stride = str2int(bdata(tokens->entry[4]));
- maxNumThreads = (domain->numberOfProcessors / stride) * chunksize;
+ bstrListDestroy (tokens);
+ tokens = bsplit(threadInfo,':');
- if (group->numberOfThreads > maxNumThreads)
+ if (tokens->qty == 5)
{
- fprintf(stderr, "Error: Domain %s supports only up to %d threads with used expression.\n",
- bdata(tokens->entry[0]), maxNumThreads);
- exit(EXIT_FAILURE);
- }
+ uint32_t maxNumThreads;
+ int chunksize;
+ int stride;
+ int counter;
+ int currentId = 0;
+ int startId = 0;
+
+ domain = affinity_getDomain(tokens->entry[0]);
- group->processorIds = (int*) malloc(group->numberOfThreads * sizeof(int));
+ if (domain == NULL)
+ {
+ fprintf(stderr, "Error: Domain %s not available on current machine.\nTry likwid-bench -p for supported domains.",
+ bdata(tokens->entry[0]));
+ exit(EXIT_FAILURE);
+ }
- counter = chunksize;
+ group->size = bstr_to_doubleSize(tokens->entry[1], type);
+ group->numberOfThreads = str2int(bdata(tokens->entry[2]));
+ chunksize = str2int(bdata(tokens->entry[3]));
+ stride = str2int(bdata(tokens->entry[4]));
+ maxNumThreads = (domain->numberOfProcessors / stride) * chunksize;
- for (i=0; i<group->numberOfThreads; i++)
- {
- if (counter)
- {
- group->processorIds[i] = domain->processorList[currentId++];
- }
- else
- {
- startId += stride;
- currentId = startId;
- group->processorIds[i] = domain->processorList[currentId++];
- counter = chunksize;
- }
- counter--;
- }
- }
- else if (tokens->qty == 3)
- {
- domain = affinity_getDomain(tokens->entry[0]);
+ if (group->numberOfThreads > maxNumThreads)
+ {
+ fprintf(stderr, "Error: Domain %s supports only up to %d threads with used expression.\n",
+ bdata(tokens->entry[0]), maxNumThreads);
+ exit(EXIT_FAILURE);
+ }
- if (domain == NULL)
- {
- fprintf(stderr, "Error: Domain %s not available on current machine.\nTry likwid-bench -p for supported domains.",
- bdata(tokens->entry[0]));
- exit(EXIT_FAILURE);
- }
+ group->processorIds = (int*) malloc(group->numberOfThreads * sizeof(int));
- group->size = bstr_to_doubleSize(tokens->entry[1], type);
- group->numberOfThreads = str2int(bdata(tokens->entry[2]));
+ counter = chunksize;
- if (group->numberOfThreads > domain->numberOfProcessors)
- {
- fprintf(stderr, "Error: Domain %s supports only up to %d threads.\n",
- bdata(tokens->entry[0]),domain->numberOfProcessors);
- exit(EXIT_FAILURE);
+ for (i=0; i<group->numberOfThreads; i++)
+ {
+ if (counter)
+ {
+ group->processorIds[i] = domain->processorList[currentId++];
+ }
+ else
+ {
+ startId += stride;
+ currentId = startId;
+ group->processorIds[i] = domain->processorList[currentId++];
+ counter = chunksize;
+ }
+ counter--;
+ }
}
+ else if (tokens->qty == 3)
+ {
+ domain = affinity_getDomain(tokens->entry[0]);
- group->processorIds = (int*) malloc(group->numberOfThreads * sizeof(int));
+ if (domain == NULL)
+ {
+ fprintf(stderr, "Error: Domain %s not available on current machine.\n", bdata(tokens->entry[0]));
+ fprintf(stderr, "Try likwid-bench -p for supported domains.\n");
+ exit(EXIT_FAILURE);
+ }
- for (i=0; i<group->numberOfThreads; i++)
- {
- group->processorIds[i] = domain->processorList[i];
- }
- }
- else if (tokens->qty == 2)
- {
- domain = affinity_getDomain(tokens->entry[0]);
+ group->size = bstr_to_doubleSize(tokens->entry[1], type);
+ group->numberOfThreads = str2int(bdata(tokens->entry[2]));
- if (domain == NULL)
- {
- fprintf(stderr, "Error: Domain %s not available on current machine.\nTry likwid-bench -p for supported domains.",
- bdata(tokens->entry[0]));
- exit(EXIT_FAILURE);
- }
+ if (group->numberOfThreads > domain->numberOfProcessors)
+ {
+ fprintf(stderr, "Error: Domain %s supports only up to %d threads.\n",
+ bdata(tokens->entry[0]),domain->numberOfProcessors);
+ exit(EXIT_FAILURE);
+ }
- group->size = bstr_to_doubleSize(tokens->entry[1], type);
- group->numberOfThreads = domain->numberOfProcessors;
- group->processorIds = (int*) malloc(group->numberOfThreads * sizeof(int));
+ group->processorIds = (int*) malloc(group->numberOfThreads * sizeof(int));
- for (i=0; i<group->numberOfThreads; i++)
- {
- group->processorIds[i] = domain->processorList[i];
+ for (i=0; i<group->numberOfThreads; i++)
+ {
+ group->processorIds[i] = domain->processorList[i];
+ }
}
- }
- else
- {
- ERROR_PLAIN_PRINT(Error in parsing workgroup string);
- }
+ else if (tokens->qty == 2)
+ {
+ domain = affinity_getDomain(tokens->entry[0]);
- bstrListDestroy(tokens);
+ if (domain == NULL)
+ {
+ fprintf(stderr, "Error: Domain %s not available on current machine.\n",
+ bdata(tokens->entry[0]));
+ fprintf(stderr, "Try likwid-bench -p for supported domains.\n");
+ exit(EXIT_FAILURE);
+ }
- /* parse stream list */
- if (parseStreams)
- {
- tokens = bsplit(streams,',');
+ group->size = bstr_to_doubleSize(tokens->entry[1], type);
+ group->numberOfThreads = domain->numberOfProcessors;
+ group->processorIds = (int*) malloc(group->numberOfThreads * sizeof(int));
- if (tokens->qty < numberOfStreams)
+ for (i=0; i<group->numberOfThreads; i++)
+ {
+ group->processorIds[i] = domain->processorList[i];
+ }
+ }
+ else
{
- ERROR_PRINT(Testcase requires at least %d streams, numberOfStreams);
+ ERROR_PLAIN_PRINT(Error in parsing workgroup string);
}
- group->streams = (Stream*) malloc(numberOfStreams * sizeof(Stream));
+ bstrListDestroy(tokens);
- for (i=0;i<(uint32_t) tokens->qty;i++)
+ /* parse stream list */
+ if (parseStreams)
{
- subtokens = bsplit(tokens->entry[i],':');
+ tokens = bsplit(streams,',');
- if ( subtokens->qty == 3 )
- {
- int index = str2int(bdata(subtokens->entry[0]));
- if (index >= numberOfStreams)
+ if (tokens->qty < numberOfStreams)
{
- ERROR_PRINT(Stream Index %d out of range,index);
+ ERROR_PRINT(Testcase requires at least %d streams, numberOfStreams);
}
- group->streams[index].domain = bstrcpy(subtokens->entry[1]);
- group->streams[index].offset = str2int(bdata(subtokens->entry[2]));
- }
- else if ( subtokens->qty == 2 )
- {
- int index = str2int(bdata(subtokens->entry[0]));
- if (index >= numberOfStreams)
+
+ group->streams = (Stream*) malloc(numberOfStreams * sizeof(Stream));
+
+ for (i=0;i<(uint32_t) tokens->qty;i++)
{
- ERROR_PRINT(Stream Index %d out of range,index);
- }
- group->streams[index].domain = bstrcpy(subtokens->entry[1]);
- group->streams[index].offset = 0;
- }
- else
- {
- ERROR_PLAIN_PRINT(Error in parsing event string);
- }
+ subtokens = bsplit(tokens->entry[i],':');
- bstrListDestroy(subtokens);
- }
+ if ( subtokens->qty == 3 )
+ {
+ int index = str2int(bdata(subtokens->entry[0]));
+ if (index >= numberOfStreams)
+ {
+ ERROR_PRINT(Stream Index %d out of range,index);
+ }
+ group->streams[index].domain = bstrcpy(subtokens->entry[1]);
+ group->streams[index].offset = str2int(bdata(subtokens->entry[2]));
+ }
+ else if ( subtokens->qty == 2 )
+ {
+ int index = str2int(bdata(subtokens->entry[0]));
+ if (index >= numberOfStreams)
+ {
+ ERROR_PRINT(Stream Index %d out of range,index);
+ }
+ group->streams[index].domain = bstrcpy(subtokens->entry[1]);
+ group->streams[index].offset = 0;
+ }
+ else
+ {
+ ERROR_PLAIN_PRINT(Error in parsing event string);
+ }
- bstrListDestroy(tokens);
- }
- else
- {
- group->streams = (Stream*) malloc(numberOfStreams * sizeof(Stream));
+ bstrListDestroy(subtokens);
+ }
- for (i=0; i< (uint32_t)numberOfStreams; i++)
+ bstrListDestroy(tokens);
+ }
+ else
{
- group->streams[i].domain = domain->tag;
- group->streams[i].offset = 0;
+ group->streams = (Stream*) malloc(numberOfStreams * sizeof(Stream));
+
+ for (i=0; i< (uint32_t)numberOfStreams; i++)
+ {
+ group->streams[i].domain = domain->tag;
+ group->streams[i].offset = 0;
+ }
}
- }
- group->size /= numberOfStreams;
+ group->size /= numberOfStreams;
}
@@ -878,91 +885,91 @@ bstr_to_workgroup(Workgroup* group,
bstring
bSecureInput (int maxlen, char* vgcCtx) {
- int i, m, c = 1;
- bstring b, t;
- int termchar = 0;
+ int i, m, c = 1;
+ bstring b, t;
+ int termchar = 0;
- if (!vgcCtx) return NULL;
+ if (!vgcCtx) return NULL;
- b = bfromcstralloc (INIT_SECURE_INPUT_LENGTH, "");
+ b = bfromcstralloc (INIT_SECURE_INPUT_LENGTH, "");
- for (i=0; ; i++)
- {
- if (termchar == c)
- {
- break;
- }
- else if ((maxlen > 0) && (i >= maxlen))
- {
- b = NULL;
- return b;
- }
- else
+ for (i=0; ; i++)
{
- c = *(vgcCtx++);
- }
+ if (termchar == c)
+ {
+ break;
+ }
+ else if ((maxlen > 0) && (i >= maxlen))
+ {
+ b = NULL;
+ return b;
+ }
+ else
+ {
+ c = *(vgcCtx++);
+ }
- if (EOF == c)
- {
- break;
- }
+ if (EOF == c)
+ {
+ break;
+ }
- if (i+1 >= b->mlen) {
+ if (i+1 >= b->mlen) {
- /* Double size, but deal with unusual case of numeric
- overflows */
+ /* Double size, but deal with unusual case of numeric
+ overflows */
- if ((m = b->mlen << 1) <= b->mlen &&
- (m = b->mlen + 1024) <= b->mlen &&
- (m = b->mlen + 16) <= b->mlen &&
- (m = b->mlen + 1) <= b->mlen)
- {
- t = NULL;
- }
- else
- {
- t = bfromcstralloc (m, "");
- }
+ if ((m = b->mlen << 1) <= b->mlen &&
+ (m = b->mlen + 1024) <= b->mlen &&
+ (m = b->mlen + 16) <= b->mlen &&
+ (m = b->mlen + 1) <= b->mlen)
+ {
+ t = NULL;
+ }
+ else
+ {
+ t = bfromcstralloc (m, "");
+ }
- if (t)
- {
- memcpy (t->data, b->data, i);
- }
+ if (t)
+ {
+ memcpy (t->data, b->data, i);
+ }
- bdestroy (b); /* Clean previous buffer */
- b = t;
- if (!b)
- {
- return b;
- }
- }
+ bdestroy (b); /* Clean previous buffer */
+ b = t;
+ if (!b)
+ {
+ return b;
+ }
+ }
- b->data[i] = (unsigned char) c;
- }
+ b->data[i] = (unsigned char) c;
+ }
- i--;
- b->slen = i;
- b->data[i] = (unsigned char) '\0';
- return b;
+ i--;
+ b->slen = i;
+ b->data[i] = (unsigned char) '\0';
+ return b;
}
int
bJustifyCenter (bstring b, int width)
{
- unsigned char space = ' ';
- int alignSpace = (width - b->slen) / 2;
- int restSpace = (width - b->slen) % 2;
- if (width <= 0) return -__LINE__;
+ unsigned char space = ' ';
+ int alignSpace = (width - b->slen) / 2;
+ int restSpace = (width - b->slen) % 2;
+ if (width <= 0) return -__LINE__;
- if (b->slen <= width)
- {
- binsertch (b, 0, alignSpace, space);
- }
+ if (b->slen <= width)
+ {
+ binsertch (b, 0, alignSpace, space);
+ }
- binsertch (b, b->slen , alignSpace+restSpace, space);
+ binsertch (b, b->slen , alignSpace+restSpace, space);
- return BSTR_OK;
+ return BSTR_OK;
}
diff --git a/src/thermal.c b/src/thermal.c
index 45e7d27..0812086 100644
--- a/src/thermal.c
+++ b/src/thermal.c
@@ -5,8 +5,8 @@
*
* Description: Module implementing Intel TM/TM2 interface
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -68,6 +68,7 @@ void thermal_init(int cpuId)
flags = 0ULL;
flags = msr_read(cpuId, MSR_TEMPERATURE_TARGET);
thermal_info.activationT = extractBitField(flags,8,16);
+ thermal_info.offset = extractBitField(flags,6,24);
}
}
diff --git a/src/threads.c b/src/threads.c
index f96f550..87fa2b2 100644
--- a/src/threads.c
+++ b/src/threads.c
@@ -5,8 +5,8 @@
*
* Description: High level interface to pthreads
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -54,7 +54,7 @@ static int numThreads = 0;
/* ##### FUNCTION DEFINITIONS - EXPORTED FUNCTIONS ################## */
void
-threads_init(int numberOfThreads)
+threads_init(FILE* OUTSTREAM, int numberOfThreads)
{
int i;
numThreads = numberOfThreads;
@@ -68,6 +68,7 @@ threads_init(int numberOfThreads)
threads_data[i].globalNumberOfThreads = numThreads;
threads_data[i].globalThreadId = i;
threads_data[i].threadId = i;
+ threads_data[i].output = OUTSTREAM;
}
pthread_barrier_init(&threads_barrier, NULL, numThreads);
@@ -102,7 +103,7 @@ threads_createGroups(int numberOfGroups)
{
ERROR_PRINT(Not enough threads %d to create %d groups,numThreads,numberOfGroups);
}
- else
+ else
{
numThreadsPerGroup = numThreads / numberOfGroups;
}
@@ -205,11 +206,11 @@ threads_join(void)
void
threads_destroy(int numberOfGroups)
{
- int i;
+ int i;
free(threads_data);
for(i=0;i<numberOfGroups;i++)
{
- free(threads_groups[i].threadIds);
+ free(threads_groups[i].threadIds);
}
free(threads_groups);
free(threads);
diff --git a/src/timer.c b/src/timer.c
index 32a97d4..337c13d 100644
--- a/src/timer.c
+++ b/src/timer.c
@@ -5,8 +5,8 @@
*
* Description: Implementation of timer module
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -79,19 +79,19 @@ getCpuSpeed(void)
((uint64_t)tv1.tv_sec * 1000000 + tv1.tv_usec));
#endif
#ifdef _ARCH_PPC
- FILE *fpipe;
- char *command="grep timebase /proc/cpuinfo | awk '{ print $3 }'";
- char buff[256];
-
- if ( !(fpipe = (FILE*)popen(command,"r")) )
- { // If fpipe is NULL
- perror("Problems with pipe");
- exit(1);
- }
+ FILE *fpipe;
+ char *command="grep timebase /proc/cpuinfo | awk '{ print $3 }'";
+ char buff[256];
+
+ if ( !(fpipe = (FILE*)popen(command,"r")) )
+ { // If fpipe is NULL
+ perror("Problems with pipe");
+ exit(1);
+ }
- fgets(buff, 256, fpipe);
+ fgets(buff, 256, fpipe);
- return (uint64_t) atoi(buff);
+ return (uint64_t) atoi(buff);
#endif
}
@@ -120,7 +120,7 @@ double timer_print( TimerData* time )
uint64_t cycles;
/* clamp to zero if something goes wrong */
- if ((time->stop.int64-baseline) < time->start.int64)
+ if ((time->stop.int64-baseline) < time->start.int64)
{
cycles = 0ULL;
}
diff --git a/src/tree.c b/src/tree.c
index e93ecc4..795dd17 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -5,8 +5,8 @@
*
* Description: Module implementing a tree data structure
*
- * Version: 3.1.2
- * Released: 2.6.2014
+ * Version: 3.1.3
+ * Released: 4.11.2014
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -48,33 +48,33 @@ tree_init(TreeNode** root, int id)
void
tree_print(TreeNode* nodePtr)
{
- int level = 0;
+ int level = 0;
- if (nodePtr != NULL)
- {
+ if (nodePtr != NULL)
+ {
- TreeNode* digger;
- TreeNode* walker;
+ TreeNode* digger;
+ TreeNode* walker;
- digger = nodePtr->llink;
+ digger = nodePtr->llink;
- while (digger != NULL)
- {
- printf("\n Level %d:\n", level++);
- printf("%d ", digger->id);
- walker = digger->rlink;
+ while (digger != NULL)
+ {
+ printf("\n Level %d:\n", level++);
+ printf("%d ", digger->id);
+ walker = digger->rlink;
- while (walker != NULL)
- {
- printf("%d ", walker->id);
- walker = walker->rlink;
- }
+ while (walker != NULL)
+ {
+ printf("%d ", walker->id);
+ walker = walker->rlink;
+ }
- digger = digger->llink;
- }
+ digger = digger->llink;
+ }
- printf("\n ");
- }
+ printf("\n ");
+ }
}
void
diff --git a/test/accuracy/Makefile b/test/accuracy/Makefile
new file mode 100644
index 0000000..f84b1cd
--- /dev/null
+++ b/test/accuracy/Makefile
@@ -0,0 +1,25 @@
+LIKWID_PATH=../..
+LIKWID_APP=likwid-bench
+HOST=$(shell hostname -s)
+
+
+all: plain marker
+
+plain:
+ sed -i -e s/'INSTRUMENT_BENCH = .*#'/'INSTRUMENT_BENCH = false#'/g $(LIKWID_PATH)/config.mk
+ sed -i -e s/'CPPFLAGS := -DPAPI '/'CPPFLAGS := '/g $(LIKWID_PATH)/Makefile
+ cd $(LIKWID_PATH) && make distclean && make
+ cp $(LIKWID_PATH)/$(LIKWID_APP) $(LIKWID_APP)-plain
+
+marker:
+ sed -i -e s/'INSTRUMENT_BENCH = .*#'/'INSTRUMENT_BENCH = true#'/g $(LIKWID_PATH)/config.mk
+ sed -i -e s/'CPPFLAGS := -DPAPI '/'CPPFLAGS := '/g $(LIKWID_PATH)/Makefile
+ cd $(LIKWID_PATH) && make distclean && make
+ cp $(LIKWID_PATH)/$(LIKWID_APP) $(LIKWID_APP)-marker
+papi:
+ sed -i -e s/'INSTRUMENT_BENCH = .*#'/'INSTRUMENT_BENCH = false#'/g $(LIKWID_PATH)/config.mk
+ cp $(LIKWID_PATH)/Makefile $(LIKWID_PATH)/Makefile.orig
+ sed -i -e s/'CPPFLAGS := '/'CPPFLAGS := -DPAPI '/g $(LIKWID_PATH)/Makefile
+ cd $(LIKWID_PATH) && make distclean && make
+ cp $(LIKWID_PATH)/$(LIKWID_APP) $(LIKWID_APP)-papi
+ mv $(LIKWID_PATH)/Makefile.orig $(LIKWID_PATH)/Makefile
diff --git a/test/accuracy/README b/test/accuracy/README
new file mode 100644
index 0000000..9dd8a78
--- /dev/null
+++ b/test/accuracy/README
@@ -0,0 +1,18 @@
+LIKWID accuracy tester
+
+likwid-tester and likwid-tester-plot are test applications written in Perl. The likwid-accuracy.py application does the same but is written in Python.
+
+Usage:
+make #build non-instrumentated and LIKWID-instrumentated versions of
+likwid-bench.
+Adjust test files in TESTS.
+Adjust test set file SET.txt or use the -s/--sets switch on commandline.
+likwid-accuracy.py #Runs the tests of all sets and saves results in folder RESULTS/<hostname>
+
+Options for likwid-accuracy.py:
+--pgf: Create a TeX file containing the definition of a PGF plot with suffix .tex -> .pdf
+--grace: Create grace batch file for further manipulation with XMgrace or create plot with gracebat .agr/.bat -> .png
+--gnuplot: Create GNUplot script .plot -> .jpg
+--script: Create a Bash script containing all commands to create all plots using pdflatex, gracebat and gnuplot.
+--scriptname: Set name for Bash script, default is $CWD/create_plots.sh
+--wiki/--only_wiki: Create a Wiki page for the Google Code Wiki including the .png pics found in Google Code Wiki picture path (http://<project>.googlecode.com/svn/wiki/images).
diff --git a/test/accuracy/likwid-accuracy.py b/test/accuracy/likwid-accuracy.py
new file mode 100755
index 0000000..3d2d63c
--- /dev/null
+++ b/test/accuracy/likwid-accuracy.py
@@ -0,0 +1,533 @@
+#!/usr/bin/env python
+
+import os, sys, os.path
+import re
+import subprocess
+import socket
+import stat
+import getopt
+
+# Needed for Wiki page output
+import glob
+import statistics
+
+bench_plain = "./likwid-bench-plain"
+bench_marker = "./likwid-bench-marker"
+bench_papi = "./likwid-bench-papi"
+perfctr = "../../likwid-perfctr"
+topology = "../../likwid-topology"
+topology_type = re.compile("^CPU type:\s+(.*)")
+topology_sockets = re.compile("^Sockets:\s+(\d+)")
+topology_corespersocket = re.compile("^Cores per socket:\s+(\d+)")
+topology_threadspercore = re.compile("^Threads per core:\s+(\d+)")
+testlist = "SET.txt"
+testfolder = "TESTS"
+resultfolder = "RESULTS"
+hostname = socket.gethostname()
+picture_base = "http://likwid.googlecode.com/svn/wiki/images"
+
+gnu_colors = ["red","blue","green"]#,"black","brown", "gray","violet", "cyan", "magenta","orange","#4B0082","#800000","turquoise","#006400","yellow"]
+gnu_marks = [5,13,9]#,2,3,4,6,7,8,9,10,11,12,14,15]
+
+wiki = False
+papi = False
+only_wiki = False
+sets = []
+out_pgf = False
+out_gnuplot = False
+out_grace = False
+scriptfilename = "create_plots.sh"
+out_script = False
+
+def usage():
+ print "Execute and evaluate accuracy tests for LIKWID with likwid-bench and likwid-perfctr"
+ print
+ print "-h/--help:\tPrint this help text"
+ print "-s/--sets:\tSpecifiy testgroups (comma separated). Can also be set in SET.txt"
+ print "--wiki:\t\tBesides testing write out results in Google code wiki syntax"
+ print "--only_wiki:\tDo not run benchmarks, read results from file and write out results in Google code wiki syntax"
+ print "Picture options:"
+ print "--pgf:\t\tCreate TeX document for each test with PGFPlot"
+ print "--gnuplot:\tCreate GNUPlot script for each test"
+ print "--grace:\tCreate Grace script that can be evaluated with gracebat"
+ print "--script:\tActivate recording of commands in a bash script"
+ print "--scriptname:\tRecord commands to create pictures in file (default: %s)" % (os.path.join(os.path.join(resultfolder,hostname),scriptfilename))
+
+def get_system_info():
+ name = None
+ sockets = 0
+ corespersocket = 0
+ threadspercore = 0
+
+ p = subprocess.Popen(topology, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ p.wait()
+ if p.returncode != 0:
+ name = "Unknown system"
+ return
+ for line in p.stdout.read().split("\n"):
+ if not line.strip() or line.startswith("*") or line.startswith("-"): continue
+ if line.startswith("CPU type"):
+ name = topology_type.match(line).group(1).strip()
+ if line.startswith("Sockets"):
+ sockets = int(topology_sockets.match(line).group(1))
+ if line.startswith("Cores per socket"):
+ corespersocket = int(topology_corespersocket.match(line).group(1))
+ if line.startswith("Threads per core"):
+ threadspercore = int(topology_threadspercore.match(line).group(1))
+ if name and sockets > 0 and corespersocket > 0 and threadspercore > 0:
+ break
+ return name, sockets, corespersocket, threadspercore
+
+def get_groups():
+ groups = {}
+ p = subprocess.Popen(perfctr+" -a", shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ p.wait()
+ if p.returncode != 0:
+ return groups
+ for line in p.stdout.read().split("\n"):
+ if line.startswith("-") or not line.strip(): continue
+ if line.startswith("Available"): continue
+ name, description = line.split(":")
+ groups[name.strip()] = description.strip()
+ return groups
+
+def get_test_groups(groupdict):
+ groups = {}
+ if len(sets) > 0:
+ setlist = sets
+ else:
+ setfp = open("SET.txt",'r')
+ setlist = setfp.read().strip().split("\n")
+ setfp.close()
+
+ filelist = glob.glob(testfolder+"/*.txt")
+ for name in setlist:
+ tests = []
+ file = os.path.join(testfolder, name) + ".txt"
+ if not os.path.exists(file): continue
+ fp = open(file,'r')
+ finput = fp.read().strip().split("\n")
+ fp.close()
+ for line in finput:
+ if line.startswith("TEST"):
+ tests.append(line.split(" ")[1])
+ groups[name] = tests
+
+
+ return groups
+
+def get_values_from_file(file, lineoffset, linecount):
+ results = []
+ fp = open(file,'r')
+ finput = fp.read().strip().split("\n")
+ fp.close()
+ try:
+ for line in finput[lineoffset:lineoffset+linecount]:
+ results.append(float(line.split(" ")[1]))
+ except:
+ print "Cannot read file %s from %d to %d" % (file, lineoffset,lineoffset+linecount, )
+ for line in finput[lineoffset:lineoffset+linecount]:
+ print line
+ return results
+
+def write_pgf(group, test, plain_file, marker_file, papi_file=None, execute=False, script=None):
+ filename = os.path.join(os.path.join(resultfolder,hostname),group+"_"+test+".tex")
+ fp = open(filename,'w')
+ fp.write("\documentclass{article}\n")
+ fp.write("\usepackage{pgfplots}\n")
+ fp.write("\\begin{document}\n")
+ fp.write("% cut from here\n")
+ fp.write("\\begin{tikzpicture}\n")
+ fp.write("\\begin{axis}[xlabel={Run}, ylabel={MFlops/s / MBytes/s},title={%s\_%s},legend pos=south east,xtick=data,width=.75\\textwidth]\n" % (group.replace("_","\_"),test.replace("_","\_"),))
+ fp.write("\\addplot+[red,mark=square*,mark options={draw=red, fill=red}] table {%s};\n" % (os.path.basename(plain_file),))
+ fp.write("\\addlegendentry{plain};\n")
+ fp.write("\\addplot+[blue,mark=diamond*,mark options={draw=blue, fill=blue}] table {%s};\n" % (os.path.basename(marker_file),))
+ fp.write("\\addlegendentry{marker};\n")
+ if papi and papi_file:
+ fp.write("\\addplot+[green,mark=triangle*,mark options={draw=green, fill=green}] table {%s};\n" % (os.path.basename(papi_file),))
+ fp.write("\\addlegendentry{papi};\n")
+ fp.write("\\end{axis}\n")
+ fp.write("\\end{tikzpicture}\n")
+ fp.write("% stop cutting here\n")
+ fp.write("\\end{document}\n")
+ fp.close()
+ if execute:
+ cmd = "cd %s && pdflatex %s && cd -" % (os.path.dirname(filename), os.path.basename(filename),)
+ p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ p.wait()
+ if p.returncode != 0:
+ print p.stdout.read()
+ p.stdout.close()
+ if script:
+ script.write("pdflatex %s\n" % (os.path.basename(filename),))
+ return filename
+
+def write_gnuplot(group, test, plain_file, marker_file, papi_file, execute=False, script=None):
+ filename = os.path.join(os.path.join(resultfolder,hostname),group+"_"+test+".plot")
+ fp = open(filename,'w')
+ for i,color in enumerate(gnu_colors):
+ fp.write("set style line %d linetype 1 linecolor rgb '%s' lw 2 pt %s\n" % (i+1, color,gnu_marks[i]))
+ fp.write("set terminal jpeg\n")
+ fp.write("set title '%s_%s'\n" % (group, test,))
+ fp.write("set output '%s'\n" % (os.path.basename(os.path.join(os.path.join(resultfolder,hostname),group+"_"+test+".jpg")),))
+ fp.write("set xlabel 'Run'\n")
+ fp.write("set ylabel 'MFlops/s / MBytes/s'\n")
+ #fp.write("set xtics 1\n")
+ plot_string = "plot '%s' using 1:2 title 'plain' with linespoints ls 1, \\\n '%s' using 1:2 title 'marker' with linespoints ls 2" % (os.path.basename(plain_file), os.path.basename(marker_file),)
+ if papi and papi_file:
+ plot_string += ", \\\n '%s' using 1:2 title 'papi' with linespoints ls 3\n" % (os.path.basename(papi_file),)
+ fp.write(plot_string+"\n")
+ fp.close()
+ if execute:
+ cmd = "cd %s && gnuplot %s && cd -" % (os.path.dirname(filename), os.path.basename(filename),)
+ print cmd
+ p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ p.wait()
+ if p.returncode != 0:
+ print p.stdout.read()
+ p.stdout.close()
+ if script:
+ script.write("gnuplot %s\n" % (os.path.basename(filename),))
+ return filename
+
+def write_grace(group, test, plain_file, marker_file, papi_file=None, execute=False, script=None):
+ filename = os.path.join(os.path.join(resultfolder,hostname),group+"_"+test+".bat")
+ agrname = os.path.join(os.path.join(resultfolder,hostname),group+"_"+test+".agr")
+ pngname = os.path.join(os.path.join(resultfolder,hostname),group+"_"+test+".png")
+ if execute or script:
+ plain_file = os.path.basename(plain_file)
+ marker_file = os.path.basename(marker_file)
+ if papi_file: papi_file = os.path.basename(papi_file)
+ pngname = os.path.basename(pngname)
+ agrname = os.path.basename(agrname)
+ cmd_options = "-autoscale xy -nxy %s -nxy %s "% (plain_file,marker_file,)
+ if papi and papi_file:
+ cmd_options += "-nxy %s " % (papi_file,)
+ out_options = "-hdevice PNG -printfile %s " % (pngname,)
+ out_options += "-saveall %s" % (agrname,)
+ fp = open(filename,'w')
+ fp.write("title \"%s_%s\"\n" % (group, test,))
+ fp.write("xaxis label \"Run\"\n")
+ fp.write("xaxis label char size 1.2\n")
+ fp.write("xaxis ticklabel char size 1.2\n")
+ fp.write("yaxis label \"MFlops/s / MBytes/s\"\n")
+ fp.write("yaxis label char size 1.2\n")
+ fp.write("yaxis ticklabel char size 1.2\n")
+ fp.write("legend 0.8,0.7\n")
+ fp.write("s0 legend \"plain\"\n")
+ fp.write("s0 symbol 2\n")
+ fp.write("s0 symbol size 1\n")
+ fp.write("s0 symbol color 2\n")
+ fp.write("s0 symbol pattern 1\n")
+ fp.write("s0 symbol fill color 2\n")
+ fp.write("s0 symbol fill pattern 1\n")
+ fp.write("s0 symbol linewidth 2\n")
+ fp.write("s0 symbol linestyle 1\n")
+ fp.write("s0 line type 1\n")
+ fp.write("s0 line color 2\n")
+ fp.write("s0 line linestyle 1\n")
+ fp.write("s0 line linewidth 2\n")
+ fp.write("s0 line pattern 1\n")
+ fp.write("s1 legend \"marker\"\n")
+ fp.write("s1 symbol 3\n")
+ fp.write("s1 symbol size 1\n")
+ fp.write("s1 symbol color 4\n")
+ fp.write("s1 symbol pattern 1\n")
+ fp.write("s1 symbol fill color 4\n")
+ fp.write("s1 symbol fill pattern 1\n")
+ fp.write("s1 symbol linewidth 2\n")
+ fp.write("s1 symbol linestyle 1\n")
+ fp.write("s1 line type 1\n")
+ fp.write("s1 line color 4\n")
+ fp.write("s1 line linestyle 1\n")
+ fp.write("s1 line linewidth 2\n")
+ fp.write("s1 line pattern 1\n")
+ if papi and papi_file:
+ fp.write("s2 legend \"papi\"\n")
+ fp.write("s2 symbol 4\n")
+ fp.write("s2 symbol size 1\n")
+ fp.write("s2 symbol color 3\n")
+ fp.write("s2 symbol pattern 1\n")
+ fp.write("s2 symbol fill color 3\n")
+ fp.write("s2 symbol fill pattern 1\n")
+ fp.write("s2 symbol linewidth 2\n")
+ fp.write("s2 symbol linestyle 1\n")
+ fp.write("s2 line type 1\n")
+ fp.write("s2 line color 3\n")
+ fp.write("s2 line linestyle 1\n")
+ fp.write("s2 line linewidth 2\n")
+ fp.write("s2 line pattern 1\n")
+ fp.close()
+ if execute:
+ cmd = "cd %s && gracebat %s -param %s %s && cd -" % (os.path.dirname(filename), cmd_options, os.path.basename(filename),out_options,)
+ p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ p.wait()
+ if p.returncode != 0:
+ print p.stdout.read()
+ p.stdout.close()
+ if script:
+ script.write("gracebat %s -param %s %s\n" % (cmd_options, os.path.basename(filename),out_options,))
+ return filename
+
+try:
+ opts, args = getopt.getopt(sys.argv[1:], "hs:", ["help", "sets=","script","scriptname=","wiki","only_wiki","pgf","gnuplot","grace","papi"])
+except getopt.GetoptError as err:
+ print str(err)
+ usage()
+ sys.exit(2)
+
+if len(opts) == 0:
+ usage()
+ sys.exit(1)
+
+for o, a in opts:
+ if o in ("-h","--help"):
+ usage()
+ sys.exit(0)
+ if o == "--wiki":
+ wiki = True
+ if o == "--only_wiki":
+ only_wiki = True
+ if o == "--papi":
+ papi = True
+ if o == "--pgf":
+ out_pgf = True
+ if o == "--gnuplot":
+ out_gnuplot = True
+ if o == "--grace":
+ out_grace = True
+ if o in ("-s","--sets"):
+ sets = a.split(",")
+ if o == "--script":
+ out_script = True
+ if o == "--scriptname":
+ scriptfilename = a
+
+if not os.path.exists(testlist):
+ print "Cannot find file %s containing list of testgroups" % (testlist,)
+ sys.exit(1)
+if not os.path.exists(testfolder):
+ print "Cannot find folder %s containing the testgroups" % (testfolder,)
+ sys.exit(1)
+
+test_set = {}
+plain_set = {}
+marker_set = {}
+papi_set = {}
+fp = open(testlist,'r')
+for line in fp.read().split("\n"):
+ if not line.strip() or line.startswith("#"): continue
+ if os.path.exists("%s/%s.txt" % (testfolder,line.strip(),)):
+ test_set[line.strip()] = {}
+ plain_set[line.strip()] = {}
+ marker_set[line.strip()] = {}
+ papi_set[line.strip()] = {}
+ testfp = open("%s/%s.txt" % (testfolder,line.strip(),),'r')
+ test = None
+ for i,testline in enumerate(testfp.read().split("\n")):
+ if test and not testline.strip(): test = None
+ if testline.startswith("REGEX_BENCH"):
+ test_set[line.strip()]["REGEX_BENCH"] = re.compile(" ".join(testline.split(" ")[1:]))
+ if testline.startswith("REGEX_PERF"):
+ test_set[line.strip()]["REGEX_PERF"] = re.compile(" ".join(testline.split(" ")[1:]))
+ if testline.startswith("REGEX_PAPI"):
+ test_set[line.strip()]["REGEX_PAPI"] = re.compile(" ".join(testline.split(" ")[1:]))
+ if testline.startswith("TEST"):
+ test = testline.split(" ")[1]
+ test_set[line.strip()][test] = {}
+ plain_set[line.strip()][test] = {}
+ marker_set[line.strip()][test] = {}
+ papi_set[line.strip()][test] = {}
+ if testline.startswith("RUNS") and test:
+ test_set[line.strip()][test]["RUNS"] = int(testline.split(" ")[1])
+ if testline.startswith("VARIANT") and test:
+ linelist = re.split("\s+",testline);
+ variant = linelist[1]
+ if not test_set[line.strip()][test].has_key("variants"):
+ test_set[line.strip()][test]["variants"] = []
+ test_set[line.strip()][test][variant] = linelist[2]
+ test_set[line.strip()][test]["variants"].append(linelist[1])
+ plain_set[line.strip()][test][variant] = []
+ marker_set[line.strip()][test][variant] = []
+ papi_set[line.strip()][test][variant] = []
+ testfp.close()
+fp.close()
+
+
+if len(test_set.keys()) == 0:
+ print "Cannot find any group in %s" % (testlist)
+ sys.exit(1)
+
+if not os.path.exists(resultfolder):
+ os.mkdir(resultfolder)
+if not os.path.exists(os.path.join(resultfolder,hostname)):
+ os.mkdir(os.path.join(resultfolder,hostname))
+
+if not only_wiki:
+ scriptfile = os.path.join(os.path.join(resultfolder,hostname),scriptfilename)
+ script = open(scriptfile,'w')
+ script.write("#!/bin/bash\n")
+
+ for group in test_set.keys():
+ perfctr_string = "%s -c S0:0 -g %s -m " % (perfctr,group,)
+ for test in test_set[group].keys():
+ if test.startswith("REGEX"): continue
+ file_plain = os.path.join(os.path.join(resultfolder,hostname),group+"_"+test+"_plain.dat")
+ raw_plain = os.path.join(os.path.join(resultfolder,hostname),group+"_"+test+"_plain.raw")
+ file_marker = os.path.join(os.path.join(resultfolder,hostname),group+"_"+test+"_marker.dat")
+ raw_marker = os.path.join(os.path.join(resultfolder,hostname),group+"_"+test+"_marker.raw")
+ outfp_plain = open(file_plain,'w')
+ rawfp_plain = open(raw_plain,'w')
+ outfp_marker = open(file_marker,'w')
+ rawfp_marker = open(raw_marker,'w')
+ if papi:
+ file_papi = os.path.join(os.path.join(resultfolder,hostname),group+"_"+test+"_papi.dat")
+ raw_papi = os.path.join(os.path.join(resultfolder,hostname),group+"_"+test+"_papi.raw")
+ outfp_papi = open(file_papi,'w')
+ rawfp_papi = open(raw_papi,'w')
+ else:
+ file_papi = None
+ raw_papi = None
+ counter = 1
+ for size in test_set[group][test]["variants"]:
+ if size.startswith("RUNS"): continue
+ bench_options = "-t %s -i %s -g 1 -w N:%s:1" % (test, test_set[group][test][size], size,)
+ for i in range(0,test_set[group][test]["RUNS"]):
+ # Run with plain likwid-bench
+ p = subprocess.Popen(bench_plain+" "+bench_options, shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
+ try:
+ p.wait()
+ stdout = p.stdout.read()
+ p.stdout.close()
+ except:
+ sys.exit(1)
+ for line in stdout.split("\n"):
+ if p.returncode != 0: print line
+ match = test_set[group]["REGEX_BENCH"].match(line)
+ if match:
+ plain_set[group][test][size].append(match.group(1))
+ outfp_plain.write(str(counter)+" "+match.group(1)+"\n")
+ rawfp_plain.write(line+"\n")
+ # Run with papi instrumented likwid-bench
+ if papi:
+ os.environ["PAPI_BENCH"] = str(group)
+ p = subprocess.Popen(bench_papi+" "+bench_options, shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
+ try:
+ p.wait()
+ stdout = p.stdout.read()
+ p.stdout.close()
+ except:
+ sys.exit(1)
+ for line in stdout.split("\n"):
+ if p.returncode != 0: print line
+ match = test_set[group]["REGEX_PAPI"].match(line)
+ if match:
+ papi_set[group][test][size].append(match.group(1))
+ outfp_papi.write(str(counter)+" "+match.group(1)+"\n")
+ rawfp_papi.write(line+"\n")
+ # Run with LIKWID instrumented likwid-bench and likwid-perfctr
+ p = subprocess.Popen(perfctr_string+" "+bench_marker+" "+bench_options, shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
+ stdout = ""
+ try:
+ p.wait()
+ stdout = p.stdout.read()
+ p.stdout.close()
+ except:
+ sys.exit(1)
+ for line in stdout.split("\n"):
+ if p.returncode != 0: print line
+ match = test_set[group]["REGEX_PERF"].match(line)
+ if match:
+ marker_set[group][test][size].append(float(match.group(1)))
+ outfp_marker.write(str(counter)+" "+str(float(match.group(1)))+"\n")
+ rawfp_marker.write(line+"\n")
+ counter += 1
+ outfp_plain.close()
+ rawfp_plain.close()
+ outfp_marker.close()
+ rawfp_marker.close()
+ if papi:
+ outfp_papi.close()
+ rawfp_papi.close()
+ if out_pgf: pgf_file = write_pgf(group, test, file_plain, file_marker, file_papi, script=script)
+ if out_gnuplot: plot_file = write_gnuplot(group, test, file_plain, file_marker, file_papi, script=script)
+ if out_grace: grace_file = write_grace(group, test, file_plain, file_marker, file_papi, script=script)
+
+
+ script.close()
+ os.chmod(scriptfile, stat.S_IRWXU)
+#if only_wiki:
+# for group in test_set.keys():
+# for test in test_set[group].keys():
+# if test.startswith("REGEX"): continue
+# filename = os.path.join(os.path.join(resultfolder,hostname),group+"_"+test+"_plain.dat")
+# for i,size in enumerate(test_set[group][test]["variants"]):
+# start = i*test_set[group][test]["RUNS"]
+# end = (i+1)*test_set[group][test]["RUNS"]
+# runs = test_set[group][test]["RUNS"]
+# print "Read file %s for size %s from %d to %d" % (filename,size, start, end,)
+# plain_set[group][test][size] = get_values_from_file(filename, start, runs)
+# if len(plain_set[group][test][size]) == 0: plain_set[group][test][size].append(0)
+# filename = os.path.join(os.path.join(resultfolder,hostname),group+"_"+test+"_marker.dat")
+# for i,size in enumerate(test_set[group][test]["variants"]):
+# start = i*test_set[group][test]["RUNS"]
+# end = (i+1)*test_set[group][test]["RUNS"]
+# runs = test_set[group][test]["RUNS"]
+# print "Read file %s for size %s from %d to %d" % (filename,size, start, end,)
+# marker_set[group][test][size] = get_values_from_file(filename, start, runs)
+# if len(marker_set[group][test][size]) == 0: marker_set[group][test][size].append(0)
+
+
+if wiki or only_wiki:
+ name, sockets, corespersocket, threadspercore = get_system_info();
+ groups = get_groups()
+ testable_groups = get_test_groups(groups)
+ #print groups
+ #print testable_groups
+ #if testable_groups.has_key("FLOPS_DP"): del testable_groups["FLOPS_DP"]
+
+ print "#summary Accuracy Tests for %s\n" % (name,)
+ print "= Hardware description ="
+ print "Sockets: %d<br>" % (sockets,)
+ print "Cores per socket: %d<br>" % (corespersocket,)
+ print "Threads per core: %d<br>" % (threadspercore,)
+ print "Total number of processing units: %d<br>" % (sockets * corespersocket * threadspercore)
+ print
+ print "= Available groups ="
+ print "Each architecture defines a different set of groups. Here all the groups available for the %s are listed:<br>" % (name,)
+ for grp in groups.keys():
+ print "%s: %s<br>" % (grp, groups[grp],)
+ print
+ print "= Available verification tests ="
+ print "Not all groups can be tested for accuracy. Here only the groups are listed that can be verified. Each group is followed by the low-level benchmarks that are performed for comparison.<br>"
+ #print testable_groups
+ for grp in testable_groups.keys():
+ print "%s: %s<br>" % (grp, ", ".join (testable_groups[grp]))
+ print
+ print "= Accuracy comparison ="
+ print "For each varification group, the tests are performed twice. Once in a plain manner without measuring but calculating the resulting values and once through an instumented code with LIKWID.<br>"
+
+
+ for grp in testable_groups.keys():
+ print "== Verification of Group %s ==" % (grp,)
+ for test in testable_groups[grp]:
+ #print grp, test, test_set[grp][test]
+ print "=== Verification of Group %s with Test %s ===" % (grp, test,)
+ print "|| *Stream size* || *Iterations* ||"
+ for variant in test_set[grp][test]["variants"]:
+ print "|| %s || %s ||" % (variant, test_set[grp][test][variant], )
+ print
+ print "Each data size is tested %d times, hence the first %d entries on the x-axis correspond to the %d runs for the first data size of %s and so on.<br>" % (test_set[grp][test]["RUNS"],test_set[grp][test]["RUNS"],test_set[grp][test]["RUNS"],test_set[grp][test]["variants"][0],)
+ print "%s/accuracy/%s/%s_%s.png" % (picture_base,hostname, grp, test,)
+ print
+ file_plain = os.path.join(os.path.join(resultfolder,hostname),grp+"_"+test+"_plain.dat")
+ file_marker = os.path.join(os.path.join(resultfolder,hostname),grp+"_"+test+"_marker.dat")
+ print "|| Variant || Plain (Min) || LIKWID (Min) || Plain (Max) || LIKWID (Max) || Plain (Avg) || LIKWID (Avg) ||"
+ for i, variant in enumerate(test_set[grp][test]["variants"]):
+ results_plain = get_values_from_file(file_plain, i*test_set[grp][test]["RUNS"], test_set[grp][test]["RUNS"])
+ results_marker = get_values_from_file(file_marker, i*test_set[grp][test]["RUNS"], test_set[grp][test]["RUNS"])
+ if results_plain == []: results_plain.append(0)
+ if results_marker == []: results_marker.append(0)
+ print "|| %s || %d || %d || %d || %d || %d || %d ||" % (variant, min(results_plain), min(results_marker), max(results_plain), max(results_marker), int(statistics.mean(results_plain)), int(statistics.mean(results_marker)),)
+ print
+ print
diff --git a/test/accuracy/likwid-tester b/test/accuracy/likwid-tester
index 286b759..ea264ae 100755
--- a/test/accuracy/likwid-tester
+++ b/test/accuracy/likwid-tester
@@ -127,10 +127,12 @@ foreach my $test ( keys %$TESTS ) {
foreach ( 0 ... $runs ) {
print DATAFILE1 "$globalrun ";
print DATAFILE2 "$globalrun ";
+ #print "$BENCH_PLAIN -g 1 -t $benchmark -i $variant->{iter} -w $domain:$variant->{size}:1 > out-$hostname.txt";
system ("$BENCH_PLAIN -g 1 -t $benchmark -i $variant->{iter} -w $domain:$variant->{size}:1 > out-$hostname.txt");
my $result = extract_result('plain',$TESTS->{$test}->{REGEX_BENCH},$TESTS->{$test}->{REGEX_PERF});
print DATAFILE1 "$result\n";
- system ("$PERFCTR -c". $domain .":0 -m -g $test $BENCH_MARKER -g 1 -t $benchmark -i $variant->{iter} -w $domain:$variant->{size}:1 > out-$hostname.txt");
+ #print "$PERFCTR -C E:". $domain .":0 -m -g $test $BENCH_MARKER -g 1 -t $benchmark -i $variant->{iter} -w $domain:$variant->{size}:1 > out-$hostname.txt";
+ system ("$PERFCTR -C E:". $domain .":1 -m -g $test $BENCH_MARKER -g 1 -t $benchmark -i $variant->{iter} -w $domain:$variant->{size}:1 > out-$hostname.txt");
$result = extract_result('marker',$TESTS->{$test}->{REGEX_BENCH},$TESTS->{$test}->{REGEX_PERF});
print DATAFILE2 "$result\n";
$globalrun++;
diff --git a/test/accuracy/statistics.py b/test/accuracy/statistics.py
new file mode 100755
index 0000000..15dfdf2
--- /dev/null
+++ b/test/accuracy/statistics.py
@@ -0,0 +1,643 @@
+## Module statistics.py
+##
+## Copyright (c) 2013 Steven D'Aprano <steve+python at pearwood.info>.
+##
+## Licensed under the Apache License, Version 2.0 (the "License");
+## you may not use this file except in compliance with the License.
+## You may obtain a copy of the License at
+##
+## http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+
+
+"""
+Basic statistics module.
+
+This module provides functions for calculating statistics of data, including
+averages, variance, and standard deviation.
+
+Calculating averages
+--------------------
+
+================== =============================================
+Function Description
+================== =============================================
+mean Arithmetic mean (average) of data.
+median Median (middle value) of data.
+median_low Low median of data.
+median_high High median of data.
+median_grouped Median, or 50th percentile, of grouped data.
+mode Mode (most common value) of data.
+================== =============================================
+
+Calculate the arithmetic mean ("the average") of data:
+
+>>> mean([-1.0, 2.5, 3.25, 5.75])
+2.625
+
+
+Calculate the standard median of discrete data:
+
+>>> median([2, 3, 4, 5])
+3.5
+
+
+Calculate the median, or 50th percentile, of data grouped into class intervals
+centred on the data values provided. E.g. if your data points are rounded to
+the nearest whole number:
+
+>>> median_grouped([2, 2, 3, 3, 3, 4]) #doctest: +ELLIPSIS
+2.8333333333...
+
+This should be interpreted in this way: you have two data points in the class
+interval 1.5-2.5, three data points in the class interval 2.5-3.5, and one in
+the class interval 3.5-4.5. The median of these data points is 2.8333...
+
+
+Calculating variability or spread
+---------------------------------
+
+================== =============================================
+Function Description
+================== =============================================
+pvariance Population variance of data.
+variance Sample variance of data.
+pstdev Population standard deviation of data.
+stdev Sample standard deviation of data.
+================== =============================================
+
+Calculate the standard deviation of sample data:
+
+>>> stdev([2.5, 3.25, 5.5, 11.25, 11.75]) #doctest: +ELLIPSIS
+4.38961843444...
+
+If you have previously calculated the mean, you can pass it as the optional
+second argument to the four "spread" functions to avoid recalculating it:
+
+>>> data = [1, 2, 2, 4, 4, 4, 5, 6]
+>>> mu = mean(data)
+>>> pvariance(data, mu)
+2.5
+
+
+Exceptions
+----------
+
+A single exception is defined: StatisticsError is a subclass of ValueError.
+
+"""
+
+__all__ = [ 'StatisticsError',
+ 'pstdev', 'pvariance', 'stdev', 'variance',
+ 'median', 'median_low', 'median_high', 'median_grouped',
+ 'mean', 'mode',
+ ]
+
+
+import collections
+import math
+
+from fractions import Fraction
+from decimal import Decimal
+
+
+# === Exceptions ===
+
+class StatisticsError(ValueError):
+ pass
+
+
+# === Private utilities ===
+
+def _sum(data, start=0):
+ """_sum(data [, start]) -> value
+
+ Return a high-precision sum of the given numeric data. If optional
+ argument ``start`` is given, it is added to the total. If ``data`` is
+ empty, ``start`` (defaulting to 0) is returned.
+
+
+ Examples
+ --------
+
+ >>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75)
+ 11.0
+
+ Some sources of round-off error will be avoided:
+
+ >>> _sum([1e50, 1, -1e50] * 1000) # Built-in sum returns zero.
+ 1000.0
+
+ Fractions and Decimals are also supported:
+
+ >>> from fractions import Fraction as F
+ >>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)])
+ Fraction(63, 20)
+
+ >>> from decimal import Decimal as D
+ >>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")]
+ >>> _sum(data)
+ Decimal('0.6963')
+
+ Mixed types are currently treated as an error, except that int is
+ allowed.
+ """
+ # We fail as soon as we reach a value that is not an int or the type of
+ # the first value which is not an int. E.g. _sum([int, int, float, int])
+ # is okay, but sum([int, int, float, Fraction]) is not.
+ allowed_types = set([int, type(start)])
+ n, d = _exact_ratio(start)
+ partials = {d: n} # map {denominator: sum of numerators}
+ # Micro-optimizations.
+ exact_ratio = _exact_ratio
+ partials_get = partials.get
+ # Add numerators for each denominator.
+ for x in data:
+ _check_type(type(x), allowed_types)
+ n, d = exact_ratio(x)
+ partials[d] = partials_get(d, 0) + n
+ # Find the expected result type. If allowed_types has only one item, it
+ # will be int; if it has two, use the one which isn't int.
+ assert len(allowed_types) in (1, 2)
+ if len(allowed_types) == 1:
+ assert allowed_types.pop() is int
+ T = int
+ else:
+ T = (allowed_types - set([int])).pop()
+ if None in partials:
+ assert issubclass(T, (float, Decimal))
+ assert not math.isfinite(partials[None])
+ return T(partials[None])
+ total = Fraction()
+ for d, n in sorted(partials.items()):
+ total += Fraction(n, d)
+ if issubclass(T, int):
+ assert total.denominator == 1
+ return T(total.numerator)
+ if issubclass(T, Decimal):
+ return T(total.numerator)/total.denominator
+ return T(total)
+
+
+def _check_type(T, allowed):
+ if T not in allowed:
+ if len(allowed) == 1:
+ allowed.add(T)
+ else:
+ types = ', '.join([t.__name__ for t in allowed] + [T.__name__])
+ raise TypeError("unsupported mixed types: %s" % types)
+
+
+def _exact_ratio(x):
+ """Convert Real number x exactly to (numerator, denominator) pair.
+
+ >>> _exact_ratio(0.25)
+ (1, 4)
+
+ x is expected to be an int, Fraction, Decimal or float.
+ """
+ try:
+ try:
+ # int, Fraction
+ return (x.numerator, x.denominator)
+ except AttributeError:
+ # float
+ try:
+ return x.as_integer_ratio()
+ except AttributeError:
+ # Decimal
+ try:
+ return _decimal_to_ratio(x)
+ except AttributeError:
+ msg = "can't convert type '{}' to numerator/denominator"
+ exc = TypeError(msg.format(type(x).__name__))
+ exc.__cause__ = None
+ raise exc
+ except (OverflowError, ValueError):
+ # INF or NAN
+ if __debug__:
+ # Decimal signalling NANs cannot be converted to float :-(
+ if isinstance(x, Decimal):
+ assert not x.is_finite()
+ else:
+ assert not math.isfinite(x)
+ return (x, None)
+
+
+# FIXME This is faster than Fraction.from_decimal, but still too slow.
+def _decimal_to_ratio(d):
+ """Convert Decimal d to exact integer ratio (numerator, denominator).
+
+ >>> from decimal import Decimal
+ >>> _decimal_to_ratio(Decimal("2.6"))
+ (26, 10)
+
+ """
+ sign, digits, exp = d.as_tuple()
+ if exp in ('F', 'n', 'N'): # INF, NAN, sNAN
+ assert not d.is_finite()
+ raise ValueError
+ num = 0
+ for digit in digits:
+ num = num*10 + digit
+ if exp < 0:
+ den = 10**-exp
+ else:
+ num *= 10**exp
+ den = 1
+ if sign:
+ num = -num
+ return (num, den)
+
+
+def _counts(data):
+ # Generate a table of sorted (value, frequency) pairs.
+ table = collections.Counter(iter(data)).most_common()
+ if not table:
+ return table
+ # Extract the values with the highest frequency.
+ maxfreq = table[0][1]
+ for i in range(1, len(table)):
+ if table[i][1] != maxfreq:
+ table = table[:i]
+ break
+ return table
+
+
+# === Measures of central tendency (averages) ===
+
+def mean(data):
+ """Return the sample arithmetic mean of data.
+
+ >>> mean([1, 2, 3, 4, 4])
+ 2.8
+
+ >>> from fractions import Fraction as F
+ >>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)])
+ Fraction(13, 21)
+
+ >>> from decimal import Decimal as D
+ >>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")])
+ Decimal('0.5625')
+
+ If ``data`` is empty, StatisticsError will be raised.
+ """
+ if iter(data) is data:
+ data = list(data)
+ n = len(data)
+ if n < 1:
+ raise StatisticsError('mean requires at least one data point')
+ only_int = True
+ for item in data:
+ if not type(item) is int:
+ only_int = False
+ break
+ if (only_int): return _sum(data,0.0)/n
+ else:return _sum(data)/n
+
+def sort_and_convert(data):
+ newdata = []
+ for i in data: newdata.append(float(i))
+ return sorted(newdata)
+
+# FIXME: investigate ways to calculate medians without sorting? Quickselect?
+def median(data):
+ """Return the median (middle value) of numeric data.
+
+ When the number of data points is odd, return the middle data point.
+ When the number of data points is even, the median is interpolated by
+ taking the average of the two middle values:
+
+ >>> median([1, 3, 5])
+ 3
+ >>> median([1, 3, 5, 7])
+ 4.0
+
+ """
+ data = sorted(data)
+ n = len(data)
+ if n == 0:
+ raise StatisticsError("no median for empty data")
+ if n%2 == 1:
+ return data[n//2]
+ else:
+ i = n//2
+ return (float(data[i - 1]) + data[i])/2
+
+
+def median_low(data):
+ """Return the low median of numeric data.
+
+ When the number of data points is odd, the middle value is returned.
+ When it is even, the smaller of the two middle values is returned.
+
+ >>> median_low([1, 3, 5])
+ 3
+ >>> median_low([1, 3, 5, 7])
+ 3
+
+ """
+ data = sorted(data)
+ n = len(data)
+ if n == 0:
+ raise StatisticsError("no median for empty data")
+ if n%2 == 1:
+ return data[n//2]
+ else:
+ return data[n//2 - 1]
+
+
+def median_high(data):
+ """Return the high median of data.
+
+ When the number of data points is odd, the middle value is returned.
+ When it is even, the larger of the two middle values is returned.
+
+ >>> median_high([1, 3, 5])
+ 3
+ >>> median_high([1, 3, 5, 7])
+ 5
+
+ """
+ data = sorted(data)
+ n = len(data)
+ if n == 0:
+ raise StatisticsError("no median for empty data")
+ return data[n//2]
+
+
+def median_grouped(data, interval=1):
+ """"Return the 50th percentile (median) of grouped continuous data.
+
+ >>> median_grouped([1, 2, 2, 3, 4, 4, 4, 4, 4, 5])
+ 3.7
+ >>> median_grouped([52, 52, 53, 54])
+ 52.5
+
+ This calculates the median as the 50th percentile, and should be
+ used when your data is continuous and grouped. In the above example,
+ the values 1, 2, 3, etc. actually represent the midpoint of classes
+ 0.5-1.5, 1.5-2.5, 2.5-3.5, etc. The middle value falls somewhere in
+ class 3.5-4.5, and interpolation is used to estimate it.
+
+ Optional argument ``interval`` represents the class interval, and
+ defaults to 1. Changing the class interval naturally will change the
+ interpolated 50th percentile value:
+
+ >>> median_grouped([1, 3, 3, 5, 7], interval=1)
+ 3.25
+ >>> median_grouped([1, 3, 3, 5, 7], interval=2)
+ 3.5
+
+ This function does not check whether the data points are at least
+ ``interval`` apart.
+ """
+ data = sorted(data)
+ n = len(data)
+ if n == 0:
+ raise StatisticsError("no median for empty data")
+ elif n == 1:
+ return data[0]
+ # Find the value at the midpoint. Remember this corresponds to the
+ # centre of the class interval.
+ x = data[n//2]
+ for obj in (x, interval):
+ if isinstance(obj, (str, bytes)):
+ raise TypeError('expected number but got %r' % obj)
+ try:
+ L = x - interval/2 # The lower limit of the median interval.
+ except TypeError:
+ # Mixed type. For now we just coerce to float.
+ L = float(x) - float(interval)/2
+ print L
+ cf = data.index(x) # Number of values below the median interval.
+ print cf
+ # FIXME The following line could be more efficient for big lists.
+ f = data.count(x) # Number of data points in the median interval.
+ print f
+ return L + interval*(n/2 - cf)/f
+
+
+def mode(data):
+ """Return the most common data point from discrete or nominal data.
+
+ ``mode`` assumes discrete data, and returns a single value. This is the
+ standard treatment of the mode as commonly taught in schools:
+
+ >>> mode([1, 1, 2, 3, 3, 3, 3, 4])
+ 3
+
+ This also works with nominal (non-numeric) data:
+
+ >>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
+ 'red'
+
+ If there is not exactly one most common value, ``mode`` will raise
+ StatisticsError.
+ """
+ # Generate a table of sorted (value, frequency) pairs.
+ table = _counts(data)
+ if len(table) == 1:
+ return table[0][0]
+ elif table:
+ raise StatisticsError(
+ 'no unique mode; found %d equally common values' % len(table)
+ )
+ else:
+ raise StatisticsError('no mode for empty data')
+
+
+# === Measures of spread ===
+
+# See http://mathworld.wolfram.com/Variance.html
+# http://mathworld.wolfram.com/SampleVariance.html
+# http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+#
+# Under no circumstances use the so-called "computational formula for
+# variance", as that is only suitable for hand calculations with a small
+# amount of low-precision data. It has terrible numeric properties.
+#
+# See a comparison of three computational methods here:
+# http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/
+
+def _ss(data, c=None):
+ """Return sum of square deviations of sequence data.
+
+ If ``c`` is None, the mean is calculated in one pass, and the deviations
+ from the mean are calculated in a second pass. Otherwise, deviations are
+ calculated from ``c`` as given. Use the second case with care, as it can
+ lead to garbage results.
+ """
+ if c is None:
+ c = mean(data)
+ ss = _sum((x-c)**2 for x in data)
+ # The following sum should mathematically equal zero, but due to rounding
+ # error may not.
+ ss -= _sum((x-c) for x in data)**2/len(data)
+ assert not ss < 0, 'negative sum of square deviations: %f' % ss
+ return ss
+
+
+def variance(data, xbar=None):
+ """Return the sample variance of data.
+
+ data should be an iterable of Real-valued numbers, with at least two
+ values. The optional argument xbar, if given, should be the mean of
+ the data. If it is missing or None, the mean is automatically calculated.
+
+ Use this function when your data is a sample from a population. To
+ calculate the variance from the entire population, see ``pvariance``.
+
+ Examples:
+
+ >>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5]
+ >>> variance(data)
+ 1.3720238095238095
+
+ If you have already calculated the mean of your data, you can pass it as
+ the optional second argument ``xbar`` to avoid recalculating it:
+
+ >>> m = mean(data)
+ >>> variance(data, m)
+ 1.3720238095238095
+
+ This function does not check that ``xbar`` is actually the mean of
+ ``data``. Giving arbitrary values for ``xbar`` may lead to invalid or
+ impossible results.
+
+ Decimals and Fractions are supported:
+
+ >>> from decimal import Decimal as D
+ >>> variance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")])
+ Decimal('31.01875')
+
+ >>> from fractions import Fraction as F
+ >>> variance([F(1, 6), F(1, 2), F(5, 3)])
+ Fraction(67, 108)
+
+ """
+ if iter(data) is data:
+ data = list(data)
+ n = len(data)
+ if n < 2:
+ raise StatisticsError('variance requires at least two data points')
+ ss = _ss(data, xbar)
+ return ss/(n-1)
+
+
+def pvariance(data, mu=None):
+ """Return the population variance of ``data``.
+
+ data should be an iterable of Real-valued numbers, with at least one
+ value. The optional argument mu, if given, should be the mean of
+ the data. If it is missing or None, the mean is automatically calculated.
+
+ Use this function to calculate the variance from the entire population.
+ To estimate the variance from a sample, the ``variance`` function is
+ usually a better choice.
+
+ Examples:
+
+ >>> data = [0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]
+ >>> pvariance(data)
+ 1.25
+
+ If you have already calculated the mean of the data, you can pass it as
+ the optional second argument to avoid recalculating it:
+
+ >>> mu = mean(data)
+ >>> pvariance(data, mu)
+ 1.25
+
+ This function does not check that ``mu`` is actually the mean of ``data``.
+ Giving arbitrary values for ``mu`` may lead to invalid or impossible
+ results.
+
+ Decimals and Fractions are supported:
+
+ >>> from decimal import Decimal as D
+ >>> pvariance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")])
+ Decimal('24.815')
+
+ >>> from fractions import Fraction as F
+ >>> pvariance([F(1, 4), F(5, 4), F(1, 2)])
+ Fraction(13, 72)
+
+ """
+ if iter(data) is data:
+ data = list(data)
+ n = len(data)
+ if n < 1:
+ raise StatisticsError('pvariance requires at least one data point')
+ ss = _ss(data, mu)
+ return ss/n
+
+
+def stdev(data, xbar=None):
+ """Return the square root of the sample variance.
+
+ See ``variance`` for arguments and other details.
+
+ >>> stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
+ 1.0810874155219827
+
+ """
+ var = variance(data, xbar)
+ try:
+ return var.sqrt()
+ except AttributeError:
+ return math.sqrt(var)
+
+
+def pstdev(data, mu=None):
+ """Return the square root of the population variance.
+
+ See ``pvariance`` for arguments and other details.
+
+ >>> pstdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
+ 0.986893273527251
+
+ """
+ var = pvariance(data, mu)
+ try:
+ return var.sqrt()
+ except AttributeError:
+ return math.sqrt(var)
+
+def percentile(data, percentile):
+ sorted_data = sorted(data)
+ if percentile > 1:
+ percentile /= 100.0
+ index = int(len(data)*percentile)
+ if (index == 0): index = 1
+ elif (index == len(data)): index = len(data)-1
+ return sorted_data[index]
+
+def percentile_10(data):
+ return percentile(data,0.1)
+def percentile_20(data):
+ return percentile(data,0.2)
+def percentile_25(data):
+ return percentile(data,0.25)
+def percentile_30(data):
+ return percentile(data,0.3)
+def percentile_40(data):
+ return percentile(data,0.4)
+def percentile_50(data):
+ return percentile(data,0.5)
+def percentile_60(data):
+ return percentile(data,0.6)
+def percentile_70(data):
+ return percentile(data,0.7)
+def percentile_75(data):
+ return percentile(data,0.75)
+def percentile_80(data):
+ return percentile(data,0.8)
+def percentile_90(data):
+ return percentile(data,0.9)
+
diff --git a/test/executable_tests/Makefile b/test/executable_tests/Makefile
new file mode 100644
index 0000000..08acc2a
--- /dev/null
+++ b/test/executable_tests/Makefile
@@ -0,0 +1,22 @@
+
+
+all: topology pin perfctr memsweeper powermeter features bench genCfg setFreq
+
+topology:
+ ./tester.sh likwid-topology
+pin:
+ ./tester.sh likwid-pin
+perfctr:
+ ./tester.sh likwid-perfctr
+memsweeper:
+ ./tester.sh likwid-memsweeper
+powermeter:
+ ./tester.sh likwid-powermeter
+features:
+ ./tester.sh likwid-features
+bench:
+ ./tester.sh likwid-bench
+genCfg:
+ ./tester.sh likwid-genCfg
+setFreq:
+ ./tester.sh likwid-setFreq
diff --git a/test/executable_tests/README b/test/executable_tests/README
new file mode 100644
index 0000000..99ab560
--- /dev/null
+++ b/test/executable_tests/README
@@ -0,0 +1,8 @@
+Simple commandline argument evaluation tool
+
+Usage: ./tester.sh <executable>
+
+For batch testing all executables simply type make
+
+All lines in the <executable>.txt file are executed and the output evaluated.
+Only simple checks are made using grep.
diff --git a/test/executable_tests/likwid-bench.txt b/test/executable_tests/likwid-bench.txt
new file mode 100644
index 0000000..474b160
--- /dev/null
+++ b/test/executable_tests/likwid-bench.txt
@@ -0,0 +1,29 @@
+| EXIT 0 | GREP Help message
+-h | EXIT 0 | GREP Help message
+-v | EXIT 0 | GREP likwid-bench
+-p | EXIT 0 | GREP Domain
+-a | EXIT 0 | GREP sum
+-i | EXIT 1 | GREP requires an argument
+-i 0 | EXIT 1 | GREP Iterations must be greater than 0
+-i 100 | EXIT 1 | GREP Number of Workgroups must be 1 or greater
+-l | EXIT 1 | GREP requires an argument
+-l sum | EXIT 0 | GREP Name: sum
+-l XXX | EXIT 0 | GREP Unknown test case XXX
+-t | EXIT 1 | GREP requires an argument
+-t sum | EXIT 1 | GREP Number of Workgroups must be 1 or greater
+-t XXX | EXIT 1 | GREP Number of Workgroups must be 1 or greater
+-g | EXIT 1 | GREP requires an argument
+-g 0 | EXIT 1 | GREP Number of Workgroups must be 1 or greater
+-g 1 | EXIT 1 | GREP workgroups requested but only 0 given on commandline
+-g X | EXIT 1 | GREP Number of Workgroups must be 1 or greater
+-w | EXIT 1 | GREP requires an argument
+-g 1 -w X | EXIT 1 | GREP You need to specify a test case first
+-t sum -g 1 -w X | EXIT 1 | GREP Error in parsing workgroup string
+-t sum -g 1 -w N:1 | EXIT 1 | GREP Cannot parse string
+-t XXX -g 1 -w N:1MB:1 | EXIT 1 | GREP You need to specify a test case first
+-g 1 -w N:100kB:1 | EXIT 1 | GREP You need to specify a test case first
+-i 100 -t sum -g 1 -w N:100kB:1 | EXIT 0 | GREP Number of Flops
+-i 100 -t sum -g 2 -w N:100kB:1 | EXIT 1 | GREP workgroups requested but only 1 given on commandline
+-i 100 -t sum -g 2 -w N:100kB:1 -w N:100kB:1 | EXIT 0 | GREP Number of Flops
+-i 100 -t sum -g 1 -w N:100kB:2:1 | EXIT 1 | GREP Error in parsing workgroup string
+-i 100 -t sum -g 1 -w N:100kB:2:1:2 | EXIT 0 | GREP Number of Flops
diff --git a/test/executable_tests/likwid-features.txt b/test/executable_tests/likwid-features.txt
new file mode 100644
index 0000000..ce95592
--- /dev/null
+++ b/test/executable_tests/likwid-features.txt
@@ -0,0 +1,9 @@
+| EXIT 0 | GREP Performance monitoring | GREP CPU core id
+-h | EXIT 0 | GREP Help message
+-v | EXIT 0 | GREP likwid-features
+-c | EXIT 1 | GREP option requires an argument
+-s | EXIT 1 | GREP option requires an argument
+-u | EXIT 1 | GREP option requires an argument
+-c 0 | EXIT 0 | GREP Performance monitoring | GREP CPU core id
+-s HW_PREFETCHER | EXIT 0 | GREP Performance monitoring | GREP CPU core id
+-u HW_PREFETCHER | EXIT 0 | GREP Performance monitoring | GREP CPU core id
diff --git a/test/executable_tests/likwid-genCfg.txt b/test/executable_tests/likwid-genCfg.txt
new file mode 100644
index 0000000..6369b70
--- /dev/null
+++ b/test/executable_tests/likwid-genCfg.txt
@@ -0,0 +1,5 @@
+| EXIT 1 | GREP Permission denied
+-h | EXIT 0 | GREP Help message
+-v | EXIT 0 | GREP likwid-genCfg
+-o | EXIT 1 | GREP option requires an argument
+-o /tmp/topo.txt | EXIT 0 | GREP CPU name
diff --git a/test/executable_tests/likwid-memsweeper.txt b/test/executable_tests/likwid-memsweeper.txt
new file mode 100644
index 0000000..6c4cd0e
--- /dev/null
+++ b/test/executable_tests/likwid-memsweeper.txt
@@ -0,0 +1,8 @@
+| EXIT 0 | GREP Sweeping domain
+-h | EXIT 0 | GREP Help message
+-v | EXIT 0 | GREP likwid-memsweeper
+-c | EXIT 1 | GREP option requires an argument
+-c - | EXIT 1 | GREP Cannot parse string
+-c -1 | EXIT 0 | GREP Sweeping domain
+-c 0 | EXIT 0 | GREP Sweeping domain
+-c 10 | EXIT 1 | GREP ERROR | GREP numa
diff --git a/test/executable_tests/likwid-perfctr.txt b/test/executable_tests/likwid-perfctr.txt
new file mode 100644
index 0000000..80ac60d
--- /dev/null
+++ b/test/executable_tests/likwid-perfctr.txt
@@ -0,0 +1,38 @@
+| EXIT 0 | GREP Help message
+-h | EXIT 0 | GREP Help message
+-v | EXIT 0 | GREP likwid-perfctr
+-i | EXIT 0 | GREP CPU family
+-V -c 0 hostname | EXIT 0 | GREP NOTICE
+-V | EXIT 1 | GREP You must specify at least one processor
+-g | EXIT 1 | GREP option requires an argument
+-g BRANCH -H | EXIT 0 | GREP Group BRANCH:
+-a | EXIT 0 | GREP Available groups
+-V -e | EXIT 0 | GREP This architecture
+-t 200ms | EXIT 1 | GREP You must specify at least one processor
+-c | EXIT 1 | GREP option requires an argument
+-c 0 | EXIT 1 | GREP You have to specify a program to measure as argument
+-t 200ms -c 0 | EXIT 1 | GREP Executable must be given on commandline
+-S | EXIT 1 | GREP option requires an argument
+-o | EXIT 1 | GREP option requires an argument
+-o /tmp/test | EXIT 1 | GREP Outputfile has no filetype suffix
+-o /tmp/test.txt | EXIT 1 | GREP You must specify at least one processor
+-S 1 | EXIT 1 | GREP You must specify at least one processor
+-S 1 -c 0 | EXIT 1 | GREP You have to specify a group or event set to measure using the -g option.
+-S 1 -C 0 | EXIT 1 | GREP You have to specify a group or event set to measure using the -g option.
+-S 1 -c 0 -g BRANCH | EXIT 0 | GREP Measuring group BRANCH | GREP Branch
+-S 1 -C 0 -g BRANCH | EXIT 0 | GREP Measuring group BRANCH | GREP Branch
+-S 1 -c 0,1 -g BRANCH | EXIT 0 | GREP Measuring group BRANCH | GREP core 0 | GREP core 1 | GREP Branch
+-S 1 -c 0-1 -g BRANCH | EXIT 0 | GREP Measuring group BRANCH | GREP core 0 | GREP core 1 | GREP Branch
+-S 1 -c 0,1-1 -g BRANCH | EXIT 0 | GREP Measuring group BRANCH | GREP core 0 | GREP core 1 | GREP Branch
+-S 1 -C 0,1 -g BRANCH | EXIT 0 | GREP Measuring group BRANCH | GREP core 0 | GREP core 1 | GREP Branch
+-S 1 -C 0-1 -g BRANCH | EXIT 0 | GREP Measuring group BRANCH | GREP core 0 | GREP core 1 | GREP Branch
+-S 1 -C 0,1-1 -g BRANCH | EXIT 0 | GREP Measuring group BRANCH | GREP core 0 | GREP core 1 | GREP Branch
+-S 1 -c E:N:2 -g BRANCH | EXIT 0 | GREP Measuring group BRANCH | GREP core 0 | GREP Branch
+-S 1 -c E:N:2:1:2 -g BRANCH | EXIT 0 | GREP Measuring group BRANCH | GREP core 0 | GREP Branch
+-S 1 -c M:scatter -g BRANCH | EXIT 0 | GREP Measuring group BRANCH | GREP core 0 | GREP Branch
+-S 1 -C E:N:2 -g BRANCH | EXIT 0 | GREP Measuring group BRANCH | GREP core 0 | GREP Branch
+-S 1 -C E:N:2:1:2 -g BRANCH | EXIT 0 | GREP Measuring group BRANCH | GREP core 0 | GREP Branch
+-S 1 -C M:scatter -g BRANCH | EXIT 0 | GREP Measuring group BRANCH | GREP core 0 | GREP Branch
+-c 0 -g BRANCH hostname | EXIT 0 | GREP Measuring group BRANCH | GREP core 0 | GREP Branch
+-C 0 -g BRANCH hostname | EXIT 0 | GREP Measuring group BRANCH | GREP core 0 | GREP Branch
+-C 0 -g BRANCH -m hostname | EXIT 1 | GREP The marker result file could not be found
diff --git a/test/executable_tests/likwid-pin.txt b/test/executable_tests/likwid-pin.txt
new file mode 100644
index 0000000..801f79c
--- /dev/null
+++ b/test/executable_tests/likwid-pin.txt
@@ -0,0 +1,26 @@
+-h | EXIT 0 | GREP Help message
+-v | EXIT 0 | GREP likwid-pin
+-i | EXIT 1 | GREP Executable must be given on commandline
+-i hostname | EXIT 0 | GREP Set mem_policy to interleaved
+-S | EXIT 1 |GREP Executable must be given on commandline
+-S hostname | EXIT 0 | GREP Sweeping memory
+-c | EXIT 1 |GREP option requires an argument
+-p | EXIT 0 | GREP Domain | GREP Tag
+-c 0 | EXIT 1 | GREP Executable must be given on commandline
+-c 0 -p | EXIT 0 | GREP 0
+-c N:0 -p | EXIT 0 | GREP 0
+-c S0:0-1 -p | EXIT 0 | GREP 0,1
+-c N:0 at N:1 -p | EXIT 0 | GREP 0,1
+-c N:0 at N:1 at N:2 -p | EXIT 0 | GREP 0,1,2
+-c C0:1-0 -p | EXIT 1 | GREP Range End
+-c E:N:1 -p | EXIT 0 | GREP 0
+-c E:N:2 -p | EXIT 0 | LISTLEN , 2
+-c E:N:2:1:2 -p | EXIT 0 | LISTLEN , 2
+-c E:N:2:1:2 -d . -p | EXIT 0 | LISTLEN . 2
+-c M:scatter -p | EXIT 0
+-s | EXIT 1 | GREP option requires an argument
+-s 0x1 | EXIT 1 | GREP Executable must be given on commandline
+-s 0x1 hostname | EXIT 0 | GREP Main PID
+-q | EXIT 1 | GREP Executable must be given on commandline
+-q hostname | EXIT 1 | NGREP Main PID
+
diff --git a/test/executable_tests/likwid-powermeter.txt b/test/executable_tests/likwid-powermeter.txt
new file mode 100644
index 0000000..f733b06
--- /dev/null
+++ b/test/executable_tests/likwid-powermeter.txt
@@ -0,0 +1,14 @@
+| EXIT 0 | GREP Help message
+-h | EXIT 0 | GREP Help message
+-v | EXIT 0 | GREP likwid-powermeter
+-i | EXIT 0 | GREP Base clock | GREP Power
+-c | EXIT 1 | GREP option requires an argument | GREP Help message
+-s | EXIT 1 | GREP option requires an argument | GREP Help message
+-M | EXIT 1 | GREP option requires an argument | GREP Help message
+-s 1 | EXIT 0 | GREP consumed
+-c 0 | EXIT 1 | GREP Commandline option -c requires an executable if not used in combination with -s
+-p | EXIT 1 | GREP Commandline option -p requires an executable
+-c 0 -s 1 | EXIT 0 | GREP consumed | GREP Socket 0
+-p hostname | EXIT 0 | Measuring group CLOCK
+-c 0 hostname | EXIT 0 | GREP consumed | GREP Socket 0
+-M 1 | EXIT 1 | GREP Either -s <seconds> or executable must be given on commandline
diff --git a/test/executable_tests/likwid-setFreq.txt b/test/executable_tests/likwid-setFreq.txt
new file mode 100644
index 0000000..56c495b
--- /dev/null
+++ b/test/executable_tests/likwid-setFreq.txt
@@ -0,0 +1,6 @@
+| EXIT 1 | GREP Usage
+0 | EXIT 1 | GREP Usage
+0 0 | EXIT 1 | GREP Frequency must be greater than 0
+0 -1 | EXIT 1 | GREP Frequency must be greater than 0
+-1 -1 | EXIT 1 | GREP not a valid CPU ID. Range from 0 to
+100 0 | EXIT 1 | GREP not a valid CPU ID. Range from 0 to
diff --git a/test/executable_tests/likwid-topology.txt b/test/executable_tests/likwid-topology.txt
new file mode 100644
index 0000000..810b1e9
--- /dev/null
+++ b/test/executable_tests/likwid-topology.txt
@@ -0,0 +1,11 @@
+-h | EXIT 0 | Help message
+-v | EXIT 0 | GREP likwid-topology
+-c | EXIT 0 | GREP Cache line size
+-C | EXIT 0 | GREP CPU clock
+-g | EXIT 0 | GREP +--------
+-g -v | EXIT 0 | GREP likwid-topology
+-c -g | EXIT 0 | GREP +-------- | GREP Cache line size
+-c -g -C | EXIT 0 | GREP +-------- | GREP Cache line size | GREP CPU clock
+-o | EXIT 1
+-o /tmp/out | EXIT 1 | GREP filter suffix
+-o /tmp/out.txt | EXIT 0
diff --git a/test/executable_tests/tester.sh b/test/executable_tests/tester.sh
new file mode 100755
index 0000000..71342df
--- /dev/null
+++ b/test/executable_tests/tester.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+if [ $# -ne 1 ]; then
+ echo "You need to give application to test on commandline"
+ exit 1
+fi
+
+EXECPATH=../..
+EXEC=$1
+TMPFILE=/tmp/testout
+
+f_grep() {
+ ARG="$1"
+ if [ `grep "${ARG}" ${TMPFILE} | wc -l` == "0" ]; then
+ return 1
+ fi
+ return 0
+}
+
+f_ngrep() {
+ ARG="$1"
+ if [ `grep "${ARG}" ${TMPFILE} | wc -l` != "0" ]; then
+ return 1
+ fi
+ return 0
+}
+
+f_listlen() {
+ LIST=$(cat ${TMPFILE})
+ DELIM=$(echo ${1} | cut -d ' ' -f 1)
+ COUNT=$(echo ${1} | cut -d ' ' -f 2)
+ CHARS=${LIST//[^${DELIM}]}
+ LENGTH=$(expr ${#CHARS} + 1)
+ if [ ${LENGTH} != "${COUNT}" ]; then
+ return 1
+ fi
+ return 0
+}
+
+if [ ! -e ${EXEC}.txt ]; then
+ echo "Cannot find testfile ${EXEC}.txt"
+ exit 1
+fi
+
+while read -r LINE || [[ -n $LINE ]]; do
+ if [ -z "${LINE}" ]; then continue; fi
+ if [[ "${LINE}" =~ \#.* ]]; then continue; fi
+
+ OPTIONS=$(echo "${LINE}" | cut -d '|' -f 1)
+ RESULTS=$(echo "${LINE}" | cut -d '|' -f 2-)
+ NUM_RESULTS="${RESULTS//[^|]}"
+ EXITCODE=$(${EXECPATH}/${EXEC} ${OPTIONS} 1>${TMPFILE} 2>&1 ; echo $?)
+ STATE=0
+ for ((i=1;i<=${#NUM_RESULTS}+1;i++)); do
+ RESULT=$(echo ${RESULTS} | cut -d '|' -f ${i})
+ RESULT_CMD=$(echo $RESULT | cut -d' ' -f1)
+ RESULT_OPTS=$(echo $RESULT | cut -d ' ' -f 2-)
+ if [ ${RESULT_CMD} == "EXIT" ]; then
+ if [ "${RESULT_OPTS}" != "$EXITCODE" ]; then
+ STATE=1
+ fi
+ elif [ ${RESULT_CMD} == "GREP" ]; then
+ f_grep "${RESULT_OPTS}"
+ STATE=$?
+ elif [ ${RESULT_CMD} == "NGREP" ]; then
+ f_ngrep "${RESULT_OPTS}"
+ STATE=$?
+ elif [ ${RESULT_CMD} == "LISTLEN" ]; then
+ f_listlen "${RESULT_OPTS}"
+ STATE=$?
+ fi
+ done
+ if [ $STATE -eq 0 ]; then
+ echo "SUCCESS : ${EXEC}" "${OPTIONS}"
+ else
+ echo "FAIL : ${EXEC}" "${OPTIONS}"
+ fi
+done < ${EXEC}.txt
+
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/likwid/likwid.git
More information about the Likwid-commit
mailing list