[Likwid-commit] [likwid] 01/06: Imported Upstream version 4.2.0

Christoph Martin chrism at debian.org
Wed Jan 11 15:24:14 UTC 2017


This is an automated email from the git hooks/post-receive script.

chrism pushed a commit to branch master
in repository likwid.

commit e8dbb7cc2d752051c6c4d935d81a521e69d7bcf4
Author: Christoph Martin <martin at uni-mainz.de>
Date:   Wed Jan 11 11:14:31 2017 +0100

    Imported Upstream version 4.2.0
---
 CHANGELOG                                          |   35 +-
 INSTALL                                            |   52 +-
 Makefile                                           |   63 +-
 README.md                                          |   10 +-
 bench/Makefile                                     |   13 +-
 bench/includes/allocator.h                         |   20 +-
 bench/includes/allocator_types.h                   |    6 +-
 bench/includes/barrier.h                           |    6 +-
 bench/includes/barrier_types.h                     |    5 +-
 bench/includes/strUtil.h                           |    5 +-
 bench/includes/test_types.h                        |    5 +-
 bench/includes/threads.h                           |    7 +-
 bench/includes/threads_types.h                     |    5 +-
 bench/likwid-bench.c                               |  107 +-
 bench/perl/AsmGen.pl                               |  142 ++-
 bench/perl/gas.pm                                  |  319 ++---
 bench/perl/generatePas.pl                          |   53 +-
 bench/perl/isax86.pm                               |   28 +-
 bench/perl/isax86_64.pm                            |   29 +-
 bench/perl/templates/group.tt                      |    8 +-
 bench/src/allocator.c                              |   57 +-
 bench/src/barrier.c                                |   17 +-
 bench/src/bench.c                                  |   42 +-
 bench/src/strUtil.c                                |   48 +-
 bench/src/threads.c                                |   31 +-
 bench/x86-64/copy_avx512.ptt                       |   21 +
 bench/x86-64/daxpy_avx512.ptt                      |   25 +
 bench/x86-64/daxpy_sp_sse.ptt                      |   36 +-
 bench/x86-64/ddot_avx512.ptt                       |   23 +
 bench/x86-64/load_avx512.ptt                       |   18 +
 bench/x86-64/store_avx512.ptt                      |   20 +
 bench/x86-64/stream_avx512.ptt                     |   29 +
 bench/x86-64/sum_avx.ptt                           |   33 +-
 bench/x86-64/sum_avx512.ptt                        |   30 +
 bench/x86-64/triad_avx512.ptt                      |   28 +
 bench/x86-64/update_avx512.ptt                     |   21 +
 config.mk                                          |   17 +-
 doc/applications/likwid-bench.md                   |    2 +-
 doc/archs/phi_knl.md                               |  688 +++++++++++
 doc/likwid-doxygen.md                              |    3 +
 doc/likwid-perfctr.1                               |    4 +
 doc/likwid-perfscope.1                             |    2 +-
 doc/likwid-pin.1                                   |    2 +-
 doc/likwid-setFreq.1                               |   16 +-
 doc/likwid-setFrequencies.1                        |   10 +
 doc/lua-doxygen.md                                 |  277 ++++-
 examples/C-likwidAPI.c                             |    6 +-
 examples/C-markerAPI.c                             |    6 +-
 examples/F-markerAPI.F90                           |    6 +-
 examples/Lua-likwidAPI.lua                         |    6 +-
 ext/hwloc/Makefile                                 |    4 -
 filters/README                                     |   14 +
 groups/atom/BRANCH.txt                             |    2 -
 groups/atom/DATA.txt                               |    2 -
 groups/atom/FLOPS_DP.txt                           |   10 +-
 groups/atom/FLOPS_SP.txt                           |   10 +-
 groups/atom/FLOPS_X87.txt                          |    7 +-
 groups/atom/MEM.txt                                |    2 -
 groups/atom/TLB.txt                                |    7 +-
 groups/broadwell/CLOCK.txt                         |    3 +
 groups/broadwell/CYCLE_ACTIVITY.txt                |   27 +
 groups/broadwell/FLOPS_DP.txt                      |    8 +-
 groups/broadwell/FLOPS_SP.txt                      |    8 +-
 groups/broadwell/PORT_USAGE.txt                    |   50 +
 groups/{sandybridge => broadwell}/UOPS.txt         |    0
 groups/broadwellD/CACHES.txt                       |   58 +-
 groups/broadwellD/CLOCK.txt                        |    3 +
 groups/broadwellD/CYCLE_ACTIVITY.txt               |   27 +
 groups/broadwellD/FALSE_SHARE.txt                  |    8 +-
 groups/broadwellD/FLOPS_DP.txt                     |    8 +-
 groups/broadwellD/FLOPS_SP.txt                     |    8 +-
 groups/broadwellD/MEM_DP.txt                       |    7 +
 groups/broadwellD/MEM_SP.txt                       |    5 +
 groups/broadwellD/PORT_USAGE.txt                   |   50 +
 groups/{sandybridge => broadwellD}/UOPS.txt        |    0
 groups/broadwellEP/CACHES.txt                      |   58 +-
 groups/broadwellEP/CLOCK.txt                       |    3 +
 groups/broadwellEP/CYCLE_ACTIVITY.txt              |   27 +
 groups/broadwellEP/FLOPS_DP.txt                    |    8 +-
 groups/broadwellEP/FLOPS_SP.txt                    |    8 +-
 groups/broadwellEP/MEM_DP.txt                      |    7 +
 groups/broadwellEP/MEM_SP.txt                      |    5 +
 groups/broadwellEP/PORT_USAGE.txt                  |   50 +
 groups/{sandybridge => broadwellEP}/UOPS.txt       |    0
 groups/core2/CACHE.txt                             |    6 +-
 groups/core2/FLOPS_DP.txt                          |    4 +-
 groups/core2/FLOPS_SP.txt                          |    4 +-
 groups/core2/TLB.txt                               |    6 +-
 groups/haswell/CACHES.txt                          |    2 +-
 groups/haswell/CLOCK.txt                           |    3 +
 groups/haswell/CYCLE_ACTIVITY.txt                  |   29 +
 groups/haswell/PORT_USAGE.txt                      |   46 +
 groups/haswellEP/CACHES.txt                        |    2 +-
 groups/haswellEP/CLOCK.txt                         |    3 +
 groups/haswellEP/CYCLE_ACTIVITY.txt                |   29 +
 groups/haswellEP/PORT_USAGE.txt                    |   46 +
 groups/ivybridge/CLOCK.txt                         |    3 +
 groups/ivybridge/CYCLE_ACTIVITY.txt                |   32 +
 groups/ivybridge/FLOPS_DP.txt                      |    8 +-
 groups/ivybridge/FLOPS_SP.txt                      |    8 +-
 groups/ivybridge/PORT_USAGE.txt                    |   40 +
 groups/ivybridgeEP/CACHES.txt                      |    2 +-
 groups/ivybridgeEP/CLOCK.txt                       |    3 +
 groups/ivybridgeEP/CYCLE_ACTIVITY.txt              |   32 +
 groups/ivybridgeEP/FLOPS_DP.txt                    |    8 +-
 groups/ivybridgeEP/FLOPS_SP.txt                    |    8 +-
 groups/ivybridgeEP/MEM_DP.txt                      |    7 +
 groups/ivybridgeEP/MEM_SP.txt                      |    6 +-
 groups/ivybridgeEP/PORT_USAGE.txt                  |   40 +
 groups/ivybridgeEP/UNCORECLOCK.txt                 |   16 +-
 groups/{atom => knl}/BRANCH.txt                    |   12 +-
 groups/{broadwellEP => knl}/CLOCK.txt              |    2 +-
 groups/{atom => knl}/DATA.txt                      |    8 +-
 groups/{broadwellD/CLOCK.txt => knl/ENERGY.txt}    |   14 +-
 groups/knl/FLOPS_DP.txt                            |   34 +
 groups/knl/FLOPS_SP.txt                            |   34 +
 groups/knl/FRONTEND_STALLS.txt                     |   24 +
 groups/knl/HBM.txt                                 |   46 +
 groups/knl/HBM_OFFCORE.txt                         |   32 +
 groups/knl/ICACHE.txt                              |   25 +
 groups/knl/L2CACHE.txt                             |   34 +
 groups/knl/MEM.txt                                 |   51 +
 groups/knl/TLB_DATA.txt                            |   27 +
 groups/knl/TLB_INSTR.txt                           |   27 +
 groups/knl/UOPS_STALLS.txt                         |   25 +
 groups/nehalem/FLOPS_DP.txt                        |    4 +-
 groups/nehalem/FLOPS_SP.txt                        |    4 +-
 groups/nehalem/MEM.txt                             |    8 +-
 groups/nehalemEX/FLOPS_DP.txt                      |    4 +-
 groups/nehalemEX/FLOPS_SP.txt                      |    4 +-
 groups/nehalemEX/MEM.txt                           |   15 +-
 groups/pentiumm/BRANCH.txt                         |    4 +-
 groups/pentiumm/FLOPS_DP.txt                       |    4 +-
 groups/pentiumm/FLOPS_SP.txt                       |    4 +-
 groups/sandybridge/CLOCK.txt                       |    3 +
 groups/sandybridge/CYCLE_ACTIVITY.txt              |   29 +
 groups/sandybridge/FLOPS_DP.txt                    |    8 +-
 groups/sandybridge/FLOPS_SP.txt                    |    8 +-
 groups/sandybridge/L3CACHE.txt                     |   21 +-
 groups/sandybridge/PORT_USAGE.txt                  |   40 +
 groups/sandybridge/UOPS.txt                        |    3 -
 groups/sandybridgeEP/CACHES.txt                    |    2 +-
 groups/sandybridgeEP/CLOCK.txt                     |    3 +
 groups/sandybridgeEP/CYCLE_ACTIVITY.txt            |   29 +
 groups/sandybridgeEP/FLOPS_DP.txt                  |    8 +-
 groups/sandybridgeEP/FLOPS_SP.txt                  |    8 +-
 groups/sandybridgeEP/L3CACHE.txt                   |   26 +-
 groups/sandybridgeEP/MEM_DP.txt                    |    7 +
 groups/sandybridgeEP/MEM_SP.txt                    |    5 +
 groups/sandybridgeEP/PORT_USAGE.txt                |   40 +
 groups/sandybridgeEP/UOPS.txt                      |    3 -
 groups/silvermont/MEM_LAT.txt                      |   23 -
 groups/skylake/CLOCK.txt                           |    3 +
 groups/skylake/CYCLE_ACTIVITY.txt                  |   29 +
 groups/skylake/FLOPS_DP.txt                        |    8 +-
 groups/skylake/FLOPS_SP.txt                        |    8 +-
 groups/skylake/PORT_USAGE.txt                      |   46 +
 groups/westmere/FLOPS_DP.txt                       |    6 +-
 groups/westmere/FLOPS_SP.txt                       |    6 +-
 groups/westmere/MEM.txt                            |    8 +-
 groups/westmere/UOPS.txt                           |    4 +-
 groups/westmereEX/FLOPS_DP.txt                     |    4 +-
 groups/westmereEX/FLOPS_SP.txt                     |    4 +-
 groups/westmereEX/UOPS.txt                         |    7 +-
 make/config_checks.mk                              |    7 +-
 make/config_defines.mk                             |   36 +-
 make/include_ICC.mk                                |   10 +-
 monitoring/groups/ivybridge/CYCLE_ACTIVITY.txt     |   26 +
 monitoring/groups/ivybridgeEP/CYCLE_ACTIVITY.txt   |   26 +
 monitoring/groups/sandybridge/CYCLE_ACTIVITY.txt   |   23 +
 monitoring/groups/sandybridgeEP/CYCLE_ACTIVITY.txt |   23 +
 perl/gen_events.pl                                 |    2 +-
 perl/set_license.pl                                |   13 +-
 perl/xmgrace.pm                                    |  222 ++--
 src/access-daemon/Makefile                         |    7 +-
 src/access-daemon/accessDaemon.c                   |  315 ++++-
 src/access-daemon/setFreq.c                        |  322 +++--
 src/access.c                                       |   39 +-
 src/access_client.c                                |   60 +-
 src/access_x86.c                                   |   26 +-
 src/access_x86_msr.c                               |   48 +-
 src/access_x86_pci.c                               |   30 +-
 src/affinity.c                                     |   34 +-
 src/applications/likwid-agent.lua                  |    5 +-
 src/applications/likwid-features.lua               |    4 +-
 src/applications/likwid-genTopoCfg.lua             |   10 +-
 src/applications/likwid-memsweeper.lua             |    4 +-
 src/applications/likwid-mpirun.lua                 |   55 +-
 src/applications/likwid-perfctr.lua                |  154 +--
 src/applications/likwid-perfscope.lua              |    6 +-
 src/applications/likwid-pin.lua                    |    4 +-
 src/applications/likwid-powermeter.lua             |   17 +-
 src/applications/likwid-setFrequencies.lua         |  174 ++-
 src/applications/likwid-topology.lua               |   45 +-
 src/applications/likwid.lua                        |   91 +-
 src/bitUtil.c                                      |   18 +-
 src/calculator.c                                   |  580 +++++----
 src/calculator_stack.c                             |    7 +-
 src/configuration.c                                |   59 +-
 src/cpuFeatures.c                                  |   14 +-
 src/cpustring.c                                    |  170 ++-
 src/frequency.c                                    |  438 +++++++
 src/hashTable.c                                    |   85 +-
 src/includes/access.h                              |    8 +-
 src/includes/access_client.h                       |   11 +-
 src/includes/access_client_types.h                 |    8 +-
 src/includes/access_x86.h                          |   11 +-
 src/includes/access_x86_msr.h                      |   10 +-
 src/includes/access_x86_pci.h                      |   10 +-
 src/includes/affinity.h                            |   10 +-
 src/includes/bitUtil.h                             |    8 +-
 src/includes/bstrlib.h                             |    8 +-
 src/includes/calculator.h                          |   37 +-
 src/includes/calculator_stack.h                    |   10 +-
 src/includes/configuration.h                       |   10 +-
 src/includes/cpuFeatures.h                         |    6 +-
 src/includes/cpuFeatures_types.h                   |   43 +-
 src/includes/cpuid.h                               |    6 +-
 src/includes/error.h                               |   15 +-
 src/includes/frequency.h                           |   12 +
 src/includes/hashTable.h                           |    6 +-
 src/includes/libperfctr_types.h                    |    6 +-
 src/includes/likwid.h                              |  137 ++-
 src/includes/lock.h                                |   66 +-
 src/includes/memsweep.h                            |    6 +-
 src/includes/numa.h                                |   14 +-
 src/includes/numa_hwloc.h                          |    7 +-
 src/includes/numa_proc.h                           |    8 +-
 src/includes/pci_hwloc.h                           |    6 +-
 src/includes/pci_proc.h                            |    6 +-
 src/includes/pci_types.h                           |   30 +-
 src/includes/perfgroup.h                           |   15 +-
 src/includes/perfmon.h                             |    8 +-
 src/includes/perfmon_atom.h                        |    4 +-
 src/includes/perfmon_atom_events.txt               |    8 +-
 src/includes/perfmon_broadwell.h                   |  159 ++-
 src/includes/perfmon_broadwellEP_counters.h        |  433 +++----
 src/includes/perfmon_broadwellEP_events.txt        |  232 +++-
 src/includes/perfmon_broadwell_counters.h          |   47 +-
 src/includes/perfmon_broadwell_events.txt          |  229 +++-
 src/includes/perfmon_broadwelld_counters.h         |  283 ++---
 src/includes/perfmon_broadwelld_events.txt         |  230 +++-
 src/includes/perfmon_core2.h                       |   37 +-
 src/includes/perfmon_core2_counters.h              |    6 +-
 src/includes/perfmon_core2_events.txt              |    8 +-
 src/includes/perfmon_goldmont.h                    |   45 +-
 src/includes/perfmon_goldmont_counters.h           |    4 +-
 src/includes/perfmon_goldmont_events.txt           |    4 +-
 src/includes/perfmon_haswell.h                     |  107 +-
 src/includes/perfmon_haswellEP_counters.h          |  375 +++---
 src/includes/perfmon_haswellEP_events.txt          |  135 +-
 src/includes/perfmon_haswell_counters.h            |   47 +-
 src/includes/perfmon_haswell_events.txt            |  137 ++-
 src/includes/perfmon_interlagos.h                  |   16 +-
 src/includes/perfmon_interlagos_counters.h         |    6 +-
 src/includes/perfmon_interlagos_events.txt         |    6 +-
 src/includes/perfmon_ivybridge.h                   |  191 +--
 src/includes/perfmon_ivybridgeEP_counters.h        |  323 ++---
 src/includes/perfmon_ivybridgeEP_events.txt        |  184 ++-
 src/includes/perfmon_ivybridge_counters.h          |   47 +-
 src/includes/perfmon_ivybridge_events.txt          |  184 ++-
 src/includes/perfmon_k10.h                         |   16 +-
 src/includes/perfmon_k10_counters.h                |    6 +-
 src/includes/perfmon_k10_events.txt                |    6 +-
 src/includes/perfmon_k8.h                          |    6 +-
 src/includes/perfmon_k8_events.txt                 |    6 +-
 src/includes/perfmon_kabini.h                      |   16 +-
 src/includes/perfmon_kabini_counters.h             |    6 +-
 src/includes/perfmon_kabini_events.txt             |    6 +-
 src/includes/perfmon_knl.h                         | 1299 ++++++++++++++++++++
 src/includes/perfmon_knl_counters.h                |  475 +++++++
 src/includes/perfmon_knl_events.txt                | 1033 ++++++++++++++++
 src/includes/perfmon_nehalem.h                     |   42 +-
 src/includes/perfmon_nehalemEX.h                   |   65 +-
 src/includes/perfmon_nehalemEX_counters.h          |    6 +-
 src/includes/perfmon_nehalemEX_events.txt          |    6 +-
 src/includes/perfmon_nehalemEX_westmereEX_common.h |    6 +-
 src/includes/perfmon_nehalem_counters.h            |    6 +-
 src/includes/perfmon_nehalem_events.txt            |    6 +-
 src/includes/perfmon_p6_events.txt                 |    6 +-
 src/includes/perfmon_perf.h                        |    6 +-
 src/includes/perfmon_perfevent.h                   |  423 +++++++
 src/includes/perfmon_phi.h                         |   16 +-
 src/includes/perfmon_phi_counters.h                |    6 +-
 src/includes/perfmon_phi_events.txt                |    6 +-
 src/includes/perfmon_pm.h                          |   16 +-
 src/includes/perfmon_pm_counters.h                 |    6 +-
 src/includes/perfmon_pm_events.txt                 |    6 +-
 src/includes/perfmon_sandybridge.h                 |  177 +--
 src/includes/perfmon_sandybridgeEP_counters.h      |  195 +--
 src/includes/perfmon_sandybridgeEP_events.txt      |  192 ++-
 src/includes/perfmon_sandybridge_counters.h        |   47 +-
 src/includes/perfmon_sandybridge_events.txt        |  173 ++-
 src/includes/perfmon_silvermont.h                  |   33 +-
 src/includes/perfmon_silvermont_counters.h         |    6 +-
 src/includes/perfmon_silvermont_events.txt         |    6 +-
 src/includes/perfmon_skylake.h                     |   49 +-
 src/includes/perfmon_skylake_counters.h            |   49 +-
 src/includes/perfmon_skylake_events.txt            |  322 ++++-
 src/includes/perfmon_types.h                       |   19 +-
 src/includes/perfmon_westmere.h                    |    6 +-
 src/includes/perfmon_westmereEX.h                  |   57 +-
 src/includes/perfmon_westmereEX_counters.h         |    6 +-
 src/includes/perfmon_westmereEX_events.txt         |   12 +-
 src/includes/perfmon_westmere_events.txt           |    6 +-
 src/includes/power.h                               |    7 +-
 src/includes/power_types.h                         |    7 +-
 src/includes/registers.h                           |  604 ++++++++-
 src/includes/registers_types.h                     |  120 +-
 src/includes/textcolor.h                           |   15 +-
 src/includes/thermal.h                             |    6 +-
 src/includes/thermal_types.h                       |    6 +-
 src/includes/timer.h                               |   13 +-
 src/includes/timer_types.h                         |    6 +-
 src/includes/tlb-info.h                            |   11 +-
 src/includes/topology.h                            |   21 +-
 src/includes/topology_cpuid.h                      |   12 +-
 src/includes/topology_hwloc.h                      |   16 +-
 src/includes/topology_proc.h                       |   12 +-
 src/includes/topology_types.h                      |    7 +-
 src/includes/tree.h                                |    7 +-
 src/includes/tree_types.h                          |    6 +-
 src/includes/types.h                               |    7 +-
 src/libperfctr.c                                   |  128 +-
 src/likwid.f90                                     |   68 +-
 src/likwid_f90_interface.c                         |   38 +-
 src/luawid.c                                       |  684 ++++++++---
 src/memsweep.c                                     |   35 +-
 src/numa.c                                         |   42 +-
 src/numa_hwloc.c                                   |   85 +-
 src/numa_proc.c                                    |   67 +-
 src/pci_hwloc.c                                    |   11 +-
 src/pci_proc.c                                     |   17 +-
 src/perfgroup.c                                    |  331 +++--
 src/perfmon.c                                      |  653 ++++++++--
 src/perfmon_perf.c                                 |   58 +-
 src/power.c                                        |  118 +-
 src/pthread-overload/Makefile                      |    9 +-
 src/pthread-overload/pthread-overload.c            |    5 +-
 src/thermal.c                                      |   16 +-
 src/timer.c                                        |   89 +-
 src/topology.c                                     |  145 ++-
 src/topology_cpuid.c                               |   93 +-
 src/topology_hwloc.c                               |  141 ++-
 src/topology_proc.c                                |   45 +-
 src/tree.c                                         |   10 +-
 test/Makefile                                      |   28 +-
 test/accuracy/TESTS/HBM.txt                        |   58 +
 test/check_group_files.py                          |  319 +++++
 test/executable_tests/likwid-perfctr.txt           |    2 +-
 test/executable_tests/likwid-pin.txt               |    2 +-
 test/executable_tests/tester.sh                    |    6 +-
 test/stream.c                                      |  448 ++++---
 test/test-likwidAPI.c                              |   74 +-
 354 files changed, 16096 insertions(+), 4878 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index a1cf597..3907744 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,37 @@
+# Changelog 4.2.0
+- Support for Intel Xeon Phi (Knights Landing): Core, Uncore, RAPL
+- Support for Uncore counters of some desktop chips (SandyBridge, IvyBridge,
+  Haswell, Broadwell and Skylake)
+- Basic support for Linux perf_event interface instead of native access.
+  Currently only core-local counters working, Uncore is experimental
+- Support to build against a existing Lua installation (5.1 - 5.3 tested)
+- Support for CPU frequency manipulation, Lua interface updated
+- Access module checks for LLNL's msr_safe kernel module
+- Support for counter registers that are only available when
+  HyperThreading is off
+- Fix for non-HyperThreading counters (PMC4-7) on Intel Broadwell
+- Socket measurements can be used for all cores on the socket in
+  metric formulas.
+- likwid-perfctr: Timeline mode without executable runs until user presses Ctrl+c
+- likwid-perfctr: New CYCLE_ACTIVITY groups
+- likwid-perfctr: New PORT_USAGE groups (only with deactivated HyperThreading)
+- likwid-perfctr: Regions are sorted in output as they are executed by the code
+- likwid-powermeter: Read Uncore frequency settings and performance energy bias
+- likwid-powermeter: Update of energy unit for DRAM domain for Intel
+                     Broadwell D/EP and Intel Xeon Phi (Knights Landing)
+- likwid-bench: Fix for 'cycles per update' metric
+- likwid-bench: Vector lengths are sanitized for thread count and loop stride
+- likwid-topology: Increase robustness
+- likwid-mpirun: Some fixes
+
+# Changelog 4.1.2
+- Fix for likwid-powermeter: Use proper energy unit
+- Fix for performance groups for Intel Broadwell (D/EP): DATA and FALSE_SHARE
+- Reduce number of started access daemons
+- Clean Uncore unit local control registers (needed for simultaneous use of LIKWID 3 and 4)
+- Clean config, filter and counter registers at *_finalize function
+- Fix for likwid-features and likwid-perfctr
+
 # Changelog 4.1.1
 - Fix for Uncore handling for EP/EN/EX systems
 - Minor fix for Uncore handling for Intel desktop systems
@@ -5,7 +39,6 @@
 - Support for Intel Goldmont (untested)
 - Fixes for likwid-mpirun
 
-
 # Changelog 4.1.0
 - Support for Intel Skylake (Core + Uncore)
 - Support for Intel Broadwell (Core + Uncore)
diff --git a/INSTALL b/INSTALL
index c4bfb05..80c874e 100644
--- a/INSTALL
+++ b/INSTALL
@@ -3,6 +3,7 @@
 1. Edit config.mk. Follow the comments there.
    Optionally you can change compiler settings in include_[GCC|CLANG|ICC|MIC].mk.
    Please note that only the default compiler flags GCC are supported and tested.
+   For 32 bit builds the only supported compiler setting is GCCX86.
 2. make (Builds hwloc, lua, Likwid libraries, access daemons and likwid-bench)
 3. make install (this is required for likwid-pin and if you use the accessDaemon)
 
@@ -16,18 +17,18 @@ compiler in make/include_[GCC|CLANG|ICC|MIC].mk.
 
 *NOTICE*
 
-All generated files are located in the [GCC|ICC|CLANG|MIC] build directory.
+All generated files are located in the [GCC|ICC|CLANG|MIC|GCCX86] build directory.
 This includes the dependency files, object files. The
 generated source files and the pas and assembly files for likwid-bench are build
-in bench/[GCC|ICC|CLANG|MIC].
+in bench/[GCC|ICC|CLANG|MIC|GCCX86].
 If you debug your likwid-bench benchmarks you can look at all
 intermediate build files and also the final assembly code.
 
-== Build on Xeon Phi ==
-For builds for the Xeon Phi coprocessor, the accessDaemon and the frequency
-daemon are disabled. Moreover, the access mode is set to 'direct'. This was made
-because it is important to run as few processes as possible on the Xeon Phi and
-the accessDaemon would start one process per hardware thread.
+== Build on Xeon Phi (Knights Corner) ==
+For builds for the Xeon Phi (Knights Corner) coprocessor, the accessDaemon and
+the frequency daemon are disabled. Moreover, the access mode is set to 'direct'.
+This was made because it is important to run as few processes as possible on the
+Xeon Phi and the accessDaemon would start one process per hardware thread.
 In order to build Likwid for the Xeon Phi processor, you have to change the
 RPATHS variable in make/include_MIC.mk to point to the folder with the Intel
 libraries like libimf.so. This is crucial because when using an suid-root
@@ -140,13 +141,40 @@ You register the necessary capability by calling
 
 sudo setcap cap_sys_rawio+ep EXECUTABLE
 
-on the executables. This is only possible on local file systems.
-The only feasable way is to register the likwid-accessD and proxy all access over it.
+on the executables. Not all file systems support capabilities.
+The only feasable way is to register the likwid-accessD and proxy all access
+over it.
 
 If you have still problems please let me know on the likwid mailing list:
 http://groups.google.com/group/likwid-users
 
-
-
-
+== Building LIKWID with perf_event kernel backend ==
+When setting USE_PERF_EVENT in config.mk, the perf_event kernel interface is
+used to programm the counters instead of the way through the msr kernel module
+and PCI devices. The event and counter lists are the same.
+The perf_event interface handles the counter allocation itself, thus the counters
+provided to LIKWID might not be the ones perf_event uses internally.
+The support is tested for the core-local fixed- and general-purpose counters. The
+paranoid setting requires to be 1 or less.
+It is also able to use it for Uncore counters but the support is experimental
+and requires a paranoid setting of 0 or less.
+Currently no counter options are supported.
+
+paranoid setting: /proc/sys/kernel/perf_event_paranoid
+
+
+== Linking applications with LIKWID lib ==
+Commonly the LIKWID C library can be used as all other libraries by setting the
+include (-I) and library path (-L) and add -llikwid to the command line.
+
+==== Internal hwloc library ====
+The included hwloc library shouldn't make a problem as all hwloc functions are
+prefixed with 'likwid_'.
+
+==== Internal Lua library and interpreter ====
+The included Lua libray might cause problems because the functions are not
+renamed and might interfere with the other Lua library functions.
+Maybe it works if the versions of Lua are the same (5.3.2).
+The Lua interpreter is renamed at installation to likwid-lua, so does not
+overwrite other interpreters.
 
diff --git a/Makefile b/Makefile
index 43e9fac..bc126b7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,3 @@
-#
 # =======================================================================================
 #
 #      Filename:  Makefile
@@ -11,7 +10,7 @@
 #      Author:  Jan Treibig (jt), jan.treibig at gmail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2013 Jan Treibig
+#      Copyright (C) 2016 Jan Treibig
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
@@ -33,6 +32,7 @@ GROUP_DIR   = ./groups
 FILTER_DIR  = ./filters
 MAKE_DIR    = ./make
 
+Q         ?= @
 
 #DO NOT EDIT BELOW
 
@@ -46,9 +46,13 @@ include $(MAKE_DIR)/include_$(COMPILER).mk
 include $(MAKE_DIR)/config_checks.mk
 include $(MAKE_DIR)/config_defines.mk
 
-INCLUDES  += -I./src/includes -I$(LUA_FOLDER)/includes -I$(HWLOC_FOLDER)/include -I$(BUILD_DIR)
+INCLUDES  += -I./src/includes -I$(LUA_INCLUDE_DIR) -I$(HWLOC_FOLDER)/include -I$(BUILD_DIR)
 LIBS      += -ldl
 
+ifeq ($(LUA_INTERNAL),false)
+LIBS      += -l$(LUA_LIB_NAME)
+endif
+
 #CONFIGURE BUILD SYSTEM
 BUILD_DIR  = ./$(COMPILER)
 Q         ?= @
@@ -69,6 +73,7 @@ endif
 PERFMONHEADERS  = $(patsubst $(SRC_DIR)/includes/%.txt, $(BUILD_DIR)/%.h,$(wildcard $(SRC_DIR)/includes/*.txt))
 OBJ_LUA    =  $(wildcard ./ext/lua/$(COMPILER)/*.o)
 OBJ_HWLOC  =  $(wildcard ./ext/hwloc/$(COMPILER)/*.o)
+FILTERS := $(filter-out ./filters/README,$(wildcard ./filters/*))
 
 
 L_APPS      =   likwid-perfctr \
@@ -113,6 +118,9 @@ $(L_APPS):  $(addprefix $(SRC_DIR)/applications/,$(addsuffix  .lua,$(L_APPS)))
 		-e s/'<VERSION>'/$(VERSION).$(RELEASE)/g \
 		-e s/'<DATE>'/$(DATE)/g \
 		$(addprefix $(SRC_DIR)/applications/,$(addsuffix  .lua,$@)) > $@
+	@if [ "$(LUA_INTERNAL)" = "false" ]; then \
+		sed -i -e s+"$(subst /,\\/,$(INSTALLED_BINPREFIX))/likwid-lua"+"$(LUA_BIN)/$(LUA_LIB_NAME)"+ $@; \
+	fi
 	@if [ "$(ACCESSMODE)" = "direct" ]; then sed -i -e s/"access_mode = 0"/"access_mode = 1"/g $(SRC_DIR)/applications/$@.lua;fi
 
 $(L_HELPER):
@@ -130,25 +138,24 @@ $(STATIC_TARGET_LIB): $(BUILD_DIR) $(PERFMONHEADERS) $(OBJ) $(TARGET_HWLOC_LIB)
 	@echo "===>  CREATE STATIC LIB  $(TARGET_LIB)"
 	$(Q)${AR} -crus $(STATIC_TARGET_LIB) $(OBJ) $(TARGET_HWLOC_LIB) $(TARGET_LUA_LIB)
 
-
 $(DYNAMIC_TARGET_LIB): $(BUILD_DIR) $(PERFMONHEADERS) $(OBJ) $(TARGET_HWLOC_LIB) $(TARGET_LUA_LIB)
 	@echo "===>  CREATE SHARED LIB  $(TARGET_LIB)"
 	$(Q)${CC} $(DEBUG_FLAGS) $(SHARED_LFLAGS) -Wl,-soname,$(TARGET_LIB).$(VERSION).$(RELEASE) $(SHARED_CFLAGS) -o $(DYNAMIC_TARGET_LIB) $(OBJ) $(LIBS) $(TARGET_HWLOC_LIB) $(TARGET_LUA_LIB) $(RPATHS)
 
 $(DAEMON_TARGET): $(SRC_DIR)/access-daemon/accessDaemon.c
 	@echo "===>  BUILD access daemon likwid-accessD"
-	$(Q)$(MAKE) -s -C  $(SRC_DIR)/access-daemon likwid-accessD
+	$(Q)$(MAKE) -C  $(SRC_DIR)/access-daemon likwid-accessD
 
 $(FREQ_TARGET): $(SRC_DIR)/access-daemon/setFreq.c
 	@echo "===>  BUILD frequency daemon likwid-setFreq"
-	$(Q)$(MAKE) -s -C  $(SRC_DIR)/access-daemon likwid-setFreq
+	$(Q)$(MAKE) -C  $(SRC_DIR)/access-daemon likwid-setFreq
 
 $(BUILD_DIR):
 	@mkdir $(BUILD_DIR)
 
 $(PINLIB):
 	@echo "===>  CREATE LIB  $(PINLIB)"
-	$(Q)$(MAKE) -s -C src/pthread-overload/ $(PINLIB)
+	$(Q)$(MAKE) -C src/pthread-overload/ $(PINLIB)
 
 $(GENGROUPLOCK): $(foreach directory,$(shell ls $(GROUP_DIR)), $(wildcard $(GROUP_DIR)/$(directory)/*.txt))
 	@echo "===>  GENERATE GROUP HEADERS"
@@ -160,17 +167,22 @@ $(FORTRAN_IF): $(SRC_DIR)/likwid.f90
 	$(Q)$(FC) -c  $(FCFLAGS) $<
 	@rm -f likwid.o
 
+ifeq ($(LUA_INTERNAL),true)
 $(TARGET_LUA_LIB):
 	@echo "===>  ENTER  $(LUA_FOLDER)"
-	$(Q)$(MAKE) -s --no-print-directory -C $(LUA_FOLDER) $(MAKECMDGOALS)
+	$(Q)$(MAKE) --no-print-directory -C $(LUA_FOLDER) $(MAKECMDGOALS)
+else
+$(TARGET_LUA_LIB):
+	@echo "===>  EXTERNAL LUA"
+endif
 
 $(TARGET_HWLOC_LIB):
 	@echo "===>  ENTER  $(HWLOC_FOLDER)"
-	$(Q)$(MAKE) -s --no-print-directory -C $(HWLOC_FOLDER) $(MAKECMDGOALS)
+	$(Q)$(MAKE) --no-print-directory -C $(HWLOC_FOLDER) $(MAKECMDGOALS)
 
 $(BENCH_TARGET):
 	@echo "===>  ENTER  $(BENCH_FOLDER)"
-	$(Q)$(MAKE) -s --no-print-directory -C $(BENCH_FOLDER) $(MAKECMDGOALS)
+	$(Q)$(MAKE) --no-print-directory -C $(BENCH_FOLDER) $(MAKECMDGOALS)
 
 #PATTERN RULES
 $(BUILD_DIR)/%.o:  %.c
@@ -189,24 +201,20 @@ $(BUILD_DIR)/%.o:  %.S
 	$(Q)$(AS) $(ASFLAGS) $@.tmp -o $@
 	@rm $@.tmp
 
-
 $(BUILD_DIR)/%.h:  $(SRC_DIR)/includes/%.txt
 	@echo "===>  GENERATE HEADER $@"
 	$(Q)$(GEN_PMHEADER) $< $@
 
-
 ifeq ($(findstring $(MAKECMDGOALS),clean),)
 -include $(OBJ:.o=.d)
 endif
 
 .PHONY: clean distclean install uninstall help $(TARGET_LUA_LIB) $(TARGET_HWLOC_LIB) $(BENCH_TARGET)
 
-
 .PRECIOUS: $(BUILD_DIR)/%.pas
 
 .NOTPARALLEL:
 
-
 clean: $(TARGET_LUA_LIB) $(TARGET_HWLOC_LIB) $(BENCH_TARGET)
 	@echo "===>  CLEAN"
 	@for APP in $(L_APPS); do \
@@ -319,28 +327,34 @@ install: install_daemon install_freq
 	@for APP in $(C_APPS); do \
 		install -m 755 $$APP  $(BINPREFIX); \
 	done
-	@install -m 755 ext/lua/lua $(BINPREFIX)/likwid-lua
+	@if [ "$(LUA_INTERNAL)" = "true" ]; then \
+		install -m 755 ext/lua/lua $(BINPREFIX)/$(LUA_LIB_NAME); \
+	fi
 	@echo "===> INSTALL helper applications to $(BINPREFIX)"
 	@install -m 755 perl/feedGnuplot $(BINPREFIX)
 	@echo "===> INSTALL lua to likwid interface to $(PREFIX)/share/lua"
 	@mkdir -p $(PREFIX)/share/lua
 	@chmod 775 $(PREFIX)/share/lua
-	@install -m 755 likwid.lua $(PREFIX)/share/lua
+	@install -m 644 likwid.lua $(PREFIX)/share/lua
 	@echo "===> INSTALL libraries to $(LIBPREFIX)"
 	@mkdir -p $(LIBPREFIX)
 	@chmod 775 $(LIBPREFIX)
 	@install -m 755 $(TARGET_LIB) $(LIBPREFIX)/$(TARGET_LIB).$(VERSION).$(RELEASE)
 	@install -m 755 liblikwidpin.so $(LIBPREFIX)/liblikwidpin.so.$(VERSION).$(RELEASE)
 	@install -m 755 $(TARGET_HWLOC_LIB) $(LIBPREFIX)/$(shell basename $(TARGET_HWLOC_LIB)).$(VERSION).$(RELEASE)
-	@install -m 755 $(TARGET_LUA_LIB) $(LIBPREFIX)/$(shell basename $(TARGET_LUA_LIB)).$(VERSION).$(RELEASE)
+	@if [ "$(LUA_INTERNAL)" = "true" ]; then \
+		install -m 755 $(TARGET_LUA_LIB) $(LIBPREFIX)/$(shell basename $(TARGET_LUA_LIB)).$(VERSION).$(RELEASE); \
+	fi
 	@cd $(LIBPREFIX) && ln -fs $(TARGET_LIB).$(VERSION).$(RELEASE) $(TARGET_LIB)
 	@cd $(LIBPREFIX) && ln -fs $(TARGET_LIB).$(VERSION).$(RELEASE) $(TARGET_LIB).$(VERSION)
 	@cd $(LIBPREFIX) && ln -fs $(PINLIB).$(VERSION).$(RELEASE) $(PINLIB)
 	@cd $(LIBPREFIX) && ln -fs $(PINLIB).$(VERSION).$(RELEASE) $(PINLIB).$(VERSION)
 	@cd $(LIBPREFIX) && ln -fs $(shell basename $(TARGET_HWLOC_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_HWLOC_LIB))
 	@cd $(LIBPREFIX) && ln -fs $(shell basename $(TARGET_HWLOC_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_HWLOC_LIB)).$(VERSION)
-	@cd $(LIBPREFIX) && ln -fs $(shell basename $(TARGET_LUA_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_LUA_LIB))
-	@cd $(LIBPREFIX) && ln -fs $(shell basename $(TARGET_LUA_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_LUA_LIB)).$(VERSION)
+	@if [ "$(LUA_INTERNAL)" = "true" ]; then \
+		cd $(LIBPREFIX) && ln -fs $(shell basename $(TARGET_LUA_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_LUA_LIB)); \
+		cd $(LIBPREFIX) && ln -fs $(shell basename $(TARGET_LUA_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_LUA_LIB)).$(VERSION); \
+	fi
 	@echo "===> INSTALL man pages to $(MANPREFIX)/man1"
 	@mkdir -p $(MANPREFIX)/man1
 	@chmod 775 $(MANPREFIX)/man1
@@ -392,9 +406,9 @@ install: install_daemon install_freq
 	@echo "===> INSTALL filters to $(abspath $(PREFIX)/share/likwid/filter)"
 	@mkdir -p $(abspath $(PREFIX)/share/likwid/filter)
 	@chmod 755 $(abspath $(PREFIX)/share/likwid/filter)
-	@cp -f filters/*  $(abspath $(PREFIX)/share/likwid/filter)
-	@chmod 755 $(abspath $(PREFIX)/share/likwid/filter)/*
-
+	@for F in $(FILTERS); do \
+		install -m 755 $$F  $(abspath $(PREFIX)/share/likwid/filter); \
+	done
 
 move: move_daemon move_freq
 	@echo "===> MOVE applications from $(BINPREFIX) to $(INSTALLED_BINPREFIX)"
@@ -412,7 +426,7 @@ move: move_daemon move_freq
 	@echo "===> MOVE lua to likwid interface from $(PREFIX)/share/lua to $(INSTALLED_PREFIX)/share/lua"
 	@mkdir -p $(INSTALLED_PREFIX)/share/lua
 	@chmod 775 $(INSTALLED_PREFIX)/share/lua
-	@install -m 755 $(PREFIX)/share/lua/likwid.lua $(INSTALLED_PREFIX)/share/lua
+	@install -m 644 $(PREFIX)/share/lua/likwid.lua $(INSTALLED_PREFIX)/share/lua
 	@echo "===> MOVE libraries from $(LIBPREFIX) to $(INSTALLED_LIBPREFIX)"
 	@mkdir -p $(INSTALLED_LIBPREFIX)
 	@chmod 775 $(INSTALLED_LIBPREFIX)
@@ -466,7 +480,6 @@ move: move_daemon move_freq
 	@cp -f $(abspath $(PREFIX)/share/likwid/filter)/* $(LIKWIDFILTERPATH)
 	@chmod 755 $(LIKWIDFILTERPATH)/*
 
-
 uninstall: uninstall_daemon uninstall_freq
 	@echo "===> REMOVING applications from $(PREFIX)/bin"
 	@rm -f $(addprefix $(BINPREFIX)/,$(addsuffix  .lua,$(L_APPS)))
@@ -586,4 +599,4 @@ help:
 	@echo "The common configuration is INSTALLED_PREFIX = PREFIX, so changing PREFIX is enough."
 	@echo "If PREFIX and INSTALLED_PREFIX differ, you have to move anything after 'make install' to"
 	@echo "the INSTALLED_PREFIX. You can also use 'make move' which does the job for you."
-	
+
diff --git a/README.md b/README.md
index 6626763..ed8c7a5 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,8 @@ Download, Build and Install
 You can get the releases of LIKWID at:
 http://ftp.fau.de/pub/likwid/
 
-For build and installation hints see INSTALL file
+For build and installation hints see INSTALL file or check the build instructions
+page in the wiki https://github.com/RRZE-HPC/likwid/wiki/Build
 
 --------------------------------------------------------------------------------
 Documentation
@@ -57,3 +58,10 @@ https://github.com/jlewandowski/likwid-java-api
 - For Python you can find an interface to the LIKWID API here:
 https://github.com/TomTheBear/likwid-python-api
 
+--------------------------------------------------------------------------------
+Survey
+--------------------------------------------------------------------------------
+We opened a survey at the user mailing list to get a feeling who uses LIKWID and how.
+Moreover we would be interested if you are missing a feature or what annoys you when using LIKWID.
+Link to the survey:
+https://groups.google.com/forum/#!topic/likwid-users/F7TDho3k7ps
diff --git a/bench/Makefile b/bench/Makefile
index da883ef..e5385b3 100644
--- a/bench/Makefile
+++ b/bench/Makefile
@@ -11,7 +11,7 @@
 #      Author:  Jan Treibig (jt), jan.treibig at gmail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2013 Jan Treibig
+#      Copyright (C) 2016 Jan Treibig
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
@@ -31,8 +31,6 @@ SRC_DIR     = ./src
 MAKE_DIR    = ../make
 
 #DO NOT EDIT BELOW
-
-
 # Dependency chains:
 # *.[ch] -> *.o -> executables
 # *.ptt -> *.pas -> *.s -> *.o -> executables
@@ -62,8 +60,8 @@ BENCH_DIR   = ./x86-64
 endif
 endif
 
-SHARED_TARGET_LIB := -L.. -L../ext/hwloc/ -L../ext/lua -llikwid -llikwid-hwloc -llikwid-lua
-STATIC_TARGET_LIB := ../liblikwid.a ../ext/hwloc/liblikwid-hwloc.a ../ext/lua/liblikwid-lua.a
+SHARED_TARGET_LIB := -L.. -L../ext/hwloc/ -L$(LUA_LIB_DIR) -llikwid -llikwid-hwloc -l$(LUA_LIB_NAME)
+STATIC_TARGET_LIB := ../liblikwid.a ../ext/hwloc/liblikwid-hwloc.a $(LUA_LIB_DIR)/lib$(LUA_LIB_NAME).a
 TARGET_LIB = $(SHARED_TARGET_LIB)
 
 BENCH_LIBS :=
@@ -78,7 +76,6 @@ else
 DEBUG_FLAGS =
 endif
 
-
 VPATH     = $(SRC_DIR)
 OBJ       = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c))
 ifeq ($(SHARED_LIBRARY),false)
@@ -126,12 +123,10 @@ endif
 
 .PHONY: clean distclean install uninstall
 
-
 .PRECIOUS: $(BUILD_DIR)/%.pas
 
 .NOTPARALLEL:
 
-
 clean:
 	@rm -rf likwid-bench
 
@@ -153,5 +148,3 @@ uninstall:
 	@echo "===> REMOVING man pages from $(MANPREFIX)/man1"
 	@rm -f $(MANPREFIX)/man1/likwid-bench.1
 
-
-
diff --git a/bench/includes/allocator.h b/bench/includes/allocator.h
index 76df396..bb1da23 100644
--- a/bench/includes/allocator.h
+++ b/bench/includes/allocator.h
@@ -3,10 +3,10 @@
  *
  *      Filename:  allocator.h
  *
- *      Description:  Header File allocator Module. 
+ *      Description:  Header File allocator Module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  none
@@ -27,7 +27,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef ALLOCATOR_H
 #define ALLOCATOR_H
 
@@ -39,12 +38,13 @@
 
 extern void allocator_init(int numVectors);
 extern void allocator_finalize();
+extern size_t allocator_dataTypeLength(DataType type);
 extern void allocator_allocateVector(void** ptr,
-        int alignment,
-        uint64_t size,
-        int offset,
-        DataType type,
-        bstring domain);
+                int alignment,
+                uint64_t size,
+                int offset,
+                DataType type,
+                int stride,
+                bstring domain);
 
 #endif /*ALLOCATOR_H*/
-
diff --git a/bench/includes/allocator_types.h b/bench/includes/allocator_types.h
index c73a125..0f3aae9 100644
--- a/bench/includes/allocator_types.h
+++ b/bench/includes/allocator_types.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File types of allocator Module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  none
@@ -41,6 +41,4 @@ typedef struct {
     DataType type;
 } allocation;
 
-
-
 #endif
diff --git a/bench/includes/barrier.h b/bench/includes/barrier.h
index 41abafa..bb9b969 100644
--- a/bench/includes/barrier.h
+++ b/bench/includes/barrier.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File barrier Module
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,7 +27,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef BARRIER_H
 #define BARRIER_H
 
@@ -54,5 +53,4 @@ extern void barrier_registerThread(BarrierData* barr, int groupsId, int threadId
 extern void  barrier_synchronize(BarrierData* barr);
 extern void  barrier_destroy(BarrierData* barr);
 
-
 #endif /*BARRIER_H*/
diff --git a/bench/includes/barrier_types.h b/bench/includes/barrier_types.h
index 2434299..8ec038b 100644
--- a/bench/includes/barrier_types.h
+++ b/bench/includes/barrier_types.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Type Definitions for barrier Module
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,7 +27,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef BARRIER_TYPES_H
 #define BARRIER_TYPES_H
 
diff --git a/bench/includes/strUtil.h b/bench/includes/strUtil.h
index 4b02ea8..6672237 100644
--- a/bench/includes/strUtil.h
+++ b/bench/includes/strUtil.h
@@ -4,8 +4,8 @@
  *
  *      Description:  Some sting functions
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -53,7 +53,6 @@ typedef struct {
     Stream* streams;
 } Workgroup;
 
-
 extern int bstr_to_workgroup(Workgroup* group, const_bstring str, DataType type, int numberOfStreams);
 extern void workgroups_destroy(Workgroup** groupList, int numberOfGroups, int numberOfStreams);
 
diff --git a/bench/includes/test_types.h b/bench/includes/test_types.h
index 9d4da1b..b4080d1 100644
--- a/bench/includes/test_types.h
+++ b/bench/includes/test_types.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Type definitions for benchmarking framework
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,7 +27,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef TEST_TYPES_H
 #define TEST_TYPES_H
 
diff --git a/bench/includes/threads.h b/bench/includes/threads.h
index 7693be5..f0953b5 100644
--- a/bench/includes/threads.h
+++ b/bench/includes/threads.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header file of pthread interface module
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,7 +27,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef THREADS_H
 #define THREADS_H
 
@@ -71,7 +70,7 @@ extern void threads_registerDataAll(
 
 /**
  * @brief  Register User thread data for one thread
- * @param  threadId thread Id 
+ * @param  threadId thread Id
  * @param  data  Reference to the user data structo
  * @param  func  Optional function pointer to copy data
  */
diff --git a/bench/includes/threads_types.h b/bench/includes/threads_types.h
index aa51ca1..5ddab9b 100644
--- a/bench/includes/threads_types.h
+++ b/bench/includes/threads_types.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Types file for threads module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,7 +27,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef THREADS_TYPES_H
 #define THREADS_TYPES_H
 
diff --git a/bench/likwid-bench.c b/bench/likwid-bench.c
index 37d40e6..28d40a9 100644
--- a/bench/likwid-bench.c
+++ b/bench/likwid-bench.c
@@ -5,8 +5,8 @@
  *
  *      Description:  A flexible and extensible benchmarking toolbox
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,6 +27,9 @@
  *
  * =======================================================================================
  */
+
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <errno.h>
 #include <stdio.h>
@@ -35,6 +38,7 @@
 #include <unistd.h>
 #include <ctype.h>
 #include <inttypes.h>
+#include <math.h>
 
 #include <bstrlib.h>
 #include <errno.h>
@@ -118,6 +122,7 @@ int main(int argc, char** argv)
     int tmp = 0;
     double time;
     double cycPerUp = 0.0;
+    double cycPerCL = 0.0;
     const TestCase* test = NULL;
     uint64_t realSize = 0;
     uint64_t realIter = 0;
@@ -188,10 +193,9 @@ int main(int argc, char** argv)
                 else
                 {
                     ownprintf("Name: %s\n",test->name);
+                    ownprintf("Description: %s\n",test->desc);
                     ownprintf("Number of streams: %d\n",test->streams);
                     ownprintf("Loop stride: %d\n",test->stride);
-                    ownprintf("Flops: %d\n",test->flops);
-                    ownprintf("Bytes: %d\n",test->bytes);
                     switch (test->type)
                     {
                         case INT:
@@ -204,6 +208,23 @@ int main(int argc, char** argv)
                             ownprintf("Data Type: Double precision float\n");
                             break;
                     }
+                    ownprintf("Flops per element: %d\n",test->flops);
+                    ownprintf("Bytes per element: %d\n",test->bytes);
+                    if (test->loads > 0 && test->stores > 0)
+                    {
+                        double ratio = (double)test->loads/(double)(test->stores+test->loads);
+                        double load_bytes = ((double)test->bytes) * ratio;
+                        ownprintf("Load bytes per element: %.0f\n", load_bytes);
+                        ownprintf("Store bytes per element: %.0f\n",((double)test->bytes) - load_bytes);
+                    }
+                    else if (test->loads >= 0 && test->stores == 0)
+                    {
+                        ownprintf("Load bytes per element: %d\n",test->bytes);
+                    }
+                    else if (test->loads == 0 && test->stores > 0)
+                    {
+                        ownprintf("Store bytes per element: %d\n",test->bytes);
+                    }
                     if (test->loads >= 0)
                     {
                         ownprintf("Load Ops: %d\n",test->loads);
@@ -224,6 +245,10 @@ int main(int argc, char** argv)
                     {
                         ownprintf("Loop instructions: %d\n",test->instr_loop);
                     }
+                    if (test->uops >= 0)
+                    {
+                        ownprintf("Loop micro Ops (\u03BCOPs): %d\n",test->uops);
+                    }
                 }
                 bdestroy(testcase);
                 exit (EXIT_SUCCESS);
@@ -326,8 +351,13 @@ int main(int argc, char** argv)
                 bstring groupstr = bfromcstr(optarg);
                 i = bstr_to_workgroup(currentWorkgroup, groupstr, test->type, test->streams);
                 bdestroy(groupstr);
+                size_t newsize = 0;
+                size_t stride = test->stride;
+                int nrThreads = currentWorkgroup->numberOfThreads;
+                size_t orig_size = currentWorkgroup->size;
                 if (i == 0)
                 {
+                    int warn_once = 1;
                     for (i=0; i<  test->streams; i++)
                     {
                         if (currentWorkgroup->streams[i].offset%test->stride)
@@ -335,12 +365,26 @@ int main(int argc, char** argv)
                             fprintf (stderr, "Error: Stream %d: offset is not a multiple of stride!\n",i);
                             return EXIT_FAILURE;
                         }
+                        if ((int)(floor(orig_size/currentWorkgroup->numberOfThreads)) % test->stride)
+                        {
+                            newsize = (((int)(floor(orig_size/nrThreads))/stride)*(stride))*nrThreads;
+                            if (warn_once)
+                            {
+                                fprintf (stderr, "Warning: Sanitizing vector length to a multiple of the loop stride %d and thread count %d from %d elements (%d bytes) to %d elements (%d bytes)\n",stride, nrThreads, orig_size, orig_size*test->bytes, newsize, newsize*test->bytes);
+                                warn_once = 0;
+                            }
+                        }
+                        else
+                        {
+                            newsize = orig_size;
+                        }
                         allocator_allocateVector(&(currentWorkgroup->streams[i].ptr),
-                                PAGE_ALIGNMENT,
-                                currentWorkgroup->size,
-                                currentWorkgroup->streams[i].offset,
-                                test->type,
-                                currentWorkgroup->streams[i].domain);
+                                                    PAGE_ALIGNMENT,
+                                                    newsize,
+                                                    currentWorkgroup->streams[i].offset,
+                                                    test->type,
+                                                    test->stride,
+                                                    currentWorkgroup->streams[i].domain);
                     }
                     tmp++;
                 }
@@ -348,6 +392,8 @@ int main(int argc, char** argv)
                 {
                     exit(EXIT_FAILURE);
                 }
+                if (newsize != currentWorkgroup->size)
+                    currentWorkgroup->size = newsize;
                 break;
             default:
                 continue;
@@ -453,8 +499,6 @@ int main(int argc, char** argv)
         }
     }
 
-
-
     time = (double) maxCycles / (double) cyclesClock;
     ownprintf(bdata(HLINE));
     ownprintf("Cycles:\t\t\t%" PRIu64 "\n", maxCycles);
@@ -463,43 +507,62 @@ int main(int argc, char** argv)
     ownprintf("Time:\t\t\t%e sec\n", time);
     ownprintf("Iterations:\t\t%" PRIu64 "\n", realIter);
     ownprintf("Iterations per thread:\t%" PRIu64 "\n",threads_data[0].data.iter);
-    ownprintf("Inner loop executions:\t%.0f\n", ((double)realSize)/((double)test->stride));
-    ownprintf("Size:\t\t\t%" PRIu64 "\n",  realSize*test->bytes );
+    ownprintf("Inner loop executions:\t%d\n", (int)(((double)realSize)/((double)test->stride*globalNumberOfThreads)));
+    ownprintf("Size (Byte):\t\t%" PRIu64 "\n",  realSize*test->bytes );
     ownprintf("Size per thread:\t%" PRIu64 "\n", threads_data[0].data.size*test->bytes);
     ownprintf("Number of Flops:\t%" PRIu64 "\n", (threads_data[0].data.iter * realSize *  test->flops));
     ownprintf("MFlops/s:\t\t%.2f\n",
             1.0E-06 * ((double) threads_data[0].data.iter * realSize *  test->flops/  time));
-    
-    ownprintf("Data volume (Byte):\t%llu\n", LLU_CAST (threads_data[0].data.iter * realSize *  test->bytes));
+    ownprintf("Data volume (Byte):\t%llu\n",
+            LLU_CAST (threads_data[0].data.iter * realSize *  test->bytes));
     ownprintf("MByte/s:\t\t%.2f\n",
             1.0E-06 * ( (double) threads_data[0].data.iter * realSize *  test->bytes/ time));
 
-    cycPerUp = ((double) maxCycles / (double) (threads_data[0].data.iter * realSize));
-    ownprintf("Cycles per update:\t%f\n", cycPerUp);
-
+    cycPerCL = ((double) maxCycles / (double) (threads_data[0].data.iter*(realSize/test->streams)* 8 / 64.0));
     switch ( test->type )
     {
         case INT:
         case SINGLE:
-            ownprintf("Cycles per cacheline:\t%f\n", (16.0 * cycPerUp));
+            cycPerUp = cycPerCL/(16.0*test->streams);
             break;
         case DOUBLE:
-            ownprintf("Cycles per cacheline:\t%f\n", (8.0 * cycPerUp));
+            cycPerUp = cycPerCL/(8.0*test->streams);
             break;
     }
+    ownprintf("Cycles per update:\t%f\n", cycPerUp);
+    ownprintf("Cycles per cacheline:\t%f\n", cycPerCL);
     ownprintf("Loads per update:\t%ld\n", test->loads );
     ownprintf("Stores per update:\t%ld\n", test->stores );
+    if (test->loads > 0 && test->stores > 0)
+    {
+        double ratio = (double)test->loads/(double)(test->stores+test->loads);
+        double load_bytes = ((double)test->bytes) * ratio;
+        ownprintf("Load bytes per element:\t%.0f\n", load_bytes);
+        ownprintf("Store bytes per elem.:\t%.0f\n",((double)test->bytes) - load_bytes);
+    }
+    else if (test->loads >= 0 && test->stores == 0)
+    {
+        ownprintf("Load bytes per element:\t%d\n",test->bytes);
+        ownprintf("Store bytes per elem.:\t0\n");
+    }
+    else if (test->loads == 0 && test->stores > 0)
+    {
+        ownprintf("Load bytes per element:\t0\n");
+        ownprintf("Store bytes per elem.:\t%d\n",test->bytes);
+    }
     if ((test->loads > 0) && (test->stores > 0))
     {
         ownprintf("Load/store ratio:\t%.2f\n", ((double)test->loads)/((double)test->stores) );
     }
     if ((test->instr_loop > 0) && (test->instr_const > 0))
     {
-        ownprintf("Instructions:\t\t%" PRIu64 "\n", LLU_CAST ((double)realSize/test->stride)*test->instr_loop*threads_data[0].data.iter + test->instr_const );
+        ownprintf("Instructions:\t\t%" PRIu64 "\n",
+                LLU_CAST ((double)realSize/test->stride)*test->instr_loop*threads_data[0].data.iter + test->instr_const );
     }
     if (test->uops > 0)
     {
-        ownprintf("UOPs:\t\t\t%" PRIu64 "\n", LLU_CAST ((double)realSize/test->stride)*test->uops*threads_data[0].data.iter);
+        ownprintf("UOPs:\t\t\t%" PRIu64 "\n",
+                LLU_CAST ((double)realSize/test->stride)*test->uops*threads_data[0].data.iter);
     }
 
     ownprintf(bdata(HLINE));
diff --git a/bench/perl/AsmGen.pl b/bench/perl/AsmGen.pl
index 7fee506..3c1bc85 100755
--- a/bench/perl/AsmGen.pl
+++ b/bench/perl/AsmGen.pl
@@ -1,4 +1,32 @@
 #!/usr/bin/perl -w
+# =======================================================================================
+#
+#      Filename:  AsmGen.pl
+#
+#      Description:  Parser for internal high level assembly syntax.
+#
+#      Version:   <VERSION>
+#      Released:  <DATE>
+#
+#      Author:  Jan Treibig (jt), jan.treibig at gmail.com
+#      Project:  likwid
+#
+#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#
+#      This program is free software: you can redistribute it and/or modify it under
+#      the terms of the GNU General Public License as published by the Free Software
+#      Foundation, either version 3 of the License, or (at your option) any later
+#      version.
+#
+#      This program is distributed in the hope that it will be useful, but WITHOUT ANY
+#      WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+#      PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+#      You should have received a copy of the GNU General Public License along with
+#      this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# =======================================================================================
+
 use strict;
 no strict "refs";
 use warnings;
@@ -30,42 +58,42 @@ $::RD_AUTOACTION = q { [@item[0..$#item]] };
 
 sub init
 {
-	getopts( "$OPT_STRING", \%OPT ) or usage();
-	if ($OPT{h}) { usage(); };
-	if ($OPT{v}) { $VERBOSE = 1;}
-	if ($OPT{d}) { $DEBUG = 1;}
-
-	if (! $ARGV[0]) {
-		die "ERROR: Please specify a input file!\n\nCall script with argument -h for help.\n";
-	}
-
-	$INPUTFILE = $ARGV[0];
-	$CPP_ARGS = $ARGV[1] if ($ARGV[1]);
-
-	if ($INPUTFILE =~ /.pas$/) {
-		$INPUTFILE =~ s/\.pas//; 
-	} else {
-		die "ERROR: Input file must have pas ending!\n";
-	}
-	if ($OPT{o}) { 
-		$OUTPUTFILE = $OPT{o};
-	}else {
-		$OUTPUTFILE = "$INPUTFILE.s";
-	}
-	if ($OPT{i}) { 
-		$ISA = $OPT{i};
-		print "INFO: Using isa $ISA.\n\n" if ($VERBOSE);
-	} else {
-		print "INFO: No isa specified.\n Using default $ISA.\n\n" if ($VERBOSE);
-	}
-	if ($OPT{a}) { 
-		$AS = $OPT{a};
-		print "INFO: Using as $AS.\n\n" if ($VERBOSE);
-	} else {
-		print "INFO: No as specified.\n Using default $AS.\n\n" if ($VERBOSE);
-	}
-
-  as::isa_init();
+    getopts( "$OPT_STRING", \%OPT ) or usage();
+    if ($OPT{h}) { usage(); };
+    if ($OPT{v}) { $VERBOSE = 1;}
+    if ($OPT{d}) { $DEBUG = 1;}
+
+    if (! $ARGV[0]) {
+        die "ERROR: Please specify a input file!\n\nCall script with argument -h for help.\n";
+    }
+
+    $INPUTFILE = $ARGV[0];
+    $CPP_ARGS = $ARGV[1] if ($ARGV[1]);
+
+    if ($INPUTFILE =~ /.pas$/) {
+        $INPUTFILE =~ s/\.pas//; 
+    } else {
+        die "ERROR: Input file must have pas ending!\n";
+    }
+    if ($OPT{o}) { 
+        $OUTPUTFILE = $OPT{o};
+    }else {
+        $OUTPUTFILE = "$INPUTFILE.s";
+    }
+    if ($OPT{i}) { 
+        $ISA = $OPT{i};
+        print "INFO: Using isa $ISA.\n\n" if ($VERBOSE);
+    } else {
+        print "INFO: No isa specified.\n Using default $ISA.\n\n" if ($VERBOSE);
+    }
+    if ($OPT{a}) { 
+        $AS = $OPT{a};
+        print "INFO: Using as $AS.\n\n" if ($VERBOSE);
+    } else {
+        print "INFO: No as specified.\n Using default $AS.\n\n" if ($VERBOSE);
+    }
+
+    as::isa_init();
 }
 
 sub usage
@@ -219,7 +247,7 @@ expression:  align
             |loop
             |timer
             |mode
-			|ASMCODE
+            |ASMCODE
 { $item[1] }
 
 instruction : define_data
@@ -244,9 +272,9 @@ print "INFO: Calling cpp with arguments $CPP_ARGS.\n" if ($VERBOSE);
 my $text = `cpp -x assembler-with-cpp $CPP_ARGS $INPUTFILE.pas`;
 
 if ($OPT{p}) {
-	open FILE,">$INPUTFILE.Pas";
-	print FILE $text;
-	close FILE;
+    open FILE,">$INPUTFILE.Pas";
+    print FILE $text;
+    close FILE;
 }
 
 open STDOUT,">$OUTPUTFILE";
@@ -257,28 +285,28 @@ my $parse_tree = $parser->startrule($text) or print STDERR "ERROR: Syntax Error\
 tree_exec($parse_tree);
 
 if ($DEBUG) {
-	open FILE,'>parse_tree.txt';
-	print FILE Dumper $parse_tree,"\n";
-	close FILE;
+    open FILE,'>parse_tree.txt';
+    print FILE Dumper $parse_tree,"\n";
+    close FILE;
 }
 
 print "$as::AS->{FOOTER}\n";
 
-sub tree_exec 
+sub tree_exec
 {
-	my $tree = shift;
-
-	foreach my $node (@$tree) {
-		if ($node !~ /^skip|^instruction|^expression|^loop/) {
-			if (ref($node) eq 'ARRAY')  {
-				tree_exec($node);
-			}else {
-				if (ref($node) eq 'HASH') {
-					&{$node->{FUNC}}(@{$node->{ARGS}});
-				}
-			}
-		}
-	}
+    my $tree = shift;
+
+    foreach my $node (@$tree) {
+        if ($node !~ /^skip|^instruction|^expression|^loop/) {
+            if (ref($node) eq 'ARRAY')  {
+                tree_exec($node);
+            }else {
+                if (ref($node) eq 'HASH') {
+                    &{$node->{FUNC}}(@{$node->{ARGS}});
+                }
+            }
+        }
+    }
 }
 
 
diff --git a/bench/perl/gas.pm b/bench/perl/gas.pm
index c9f3f81..453c086 100644
--- a/bench/perl/gas.pm
+++ b/bench/perl/gas.pm
@@ -1,4 +1,31 @@
-#!/usr/bin/perl 
+#!/usr/bin/perl
+# =======================================================================================
+#
+#      Filename:  gas.pm
+#
+#      Description:  Implements gas callbacks for likwid asm parser.
+#
+#      Version:   <VERSION>
+#      Released:  <DATE>
+#
+#      Author:  Jan Treibig (jt), jan.treibig at gmail.com
+#      Project:  likwid
+#
+#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#
+#      This program is free software: you can redistribute it and/or modify it under
+#      the terms of the GNU General Public License as published by the Free Software
+#      Foundation, either version 3 of the License, or (at your option) any later
+#      version.
+#
+#      This program is distributed in the hope that it will be useful, but WITHOUT ANY
+#      WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+#      PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+#      You should have received a copy of the GNU General Public License along with
+#      this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# =======================================================================================
 
 package as;
 use Data::Dumper;
@@ -6,7 +33,7 @@ use isax86;
 use isax86_64;
 
 $AS = { HEADER     => '.intel_syntax noprefix',
-	    FOOTER     => ''};
+        FOOTER     => ''};
 
 $LOCAL = {};
 $MODE = 'GLOBAL';
@@ -20,191 +47,191 @@ my $ARG;
 
 sub emit_code
 {
-	my $code = shift;
-	$code =~ s/([GF]PR[0-9]+)/$REG->{$1}/g;
-	$code =~ s/(ARG[0-9]+)/$ARG->{$1}/g;
-	$code =~ s/(LOCAL[0-9]+)/$LOCAL->{$1}/g;
-	print "$code\n";
+    my $code = shift;
+    $code =~ s/([GF]PR[0-9]+)/$REG->{$1}/g;
+    $code =~ s/(ARG[0-9]+)/$ARG->{$1}/g;
+    $code =~ s/(LOCAL[0-9]+)/$LOCAL->{$1}/g;
+    print "$code\n";
 }
 
 sub align
 {
-	my $number = shift;
-	print ".align $number\n";
+    my $number = shift;
+    print ".align $number\n";
 
 }
 
 sub mode
 {
-	$cmd = shift;
+    $cmd = shift;
 
-	if ($cmd eq 'START') {
-		$MODE = 'LOCAL';
-	} elsif ($cmd eq 'STOP') {
-		$MODE = 'GLOBAL';
-	}
+    if ($cmd eq 'START') {
+        $MODE = 'LOCAL';
+    } elsif ($cmd eq 'STOP') {
+        $MODE = 'GLOBAL';
+    }
 }
 
 sub function_entry
 {
-	my $symbolname = shift;
-	my $allocate = shift;
-	my $distance;
-
-	foreach ( (0 .. $allocate) ) {
-		$distance =  $_ * $WORDLENGTH;
-		$LOCAL->{"LOCAL$_"} = "[$BASEPTR-$distance]";
-	}
-
-	if($CURRENT_SECTION ne 'text') {
-		$CURRENT_SECTION = 'text';
-		print ".text\n";
-	}
-
-	print ".globl $symbolname\n";
-	print ".type $symbolname, \@function\n";
-	print "$symbolname :\n";
-
-	if ($main::ISA eq 'x86') {
-		print "push ebp\n";
-		print "mov ebp, esp\n";
-		$distance = $allocate * $WORDLENGTH;
-		print "sub  esp, $distance\n" if ($allocate);
-		print "push ebx\n";
-		print "push esi\n";
-		print "push edi\n";
-	} elsif ($main::ISA eq 'x86-64') {
-		print "push rbp\n";
-		print "mov rbp, rsp\n";
-		$distance = $allocate * $WORDLENGTH;
-		print "sub  rsp, $distance\n" if ($allocate);
-		print "push rbx\n";
-		print "push r12\n";
-		print "push r13\n";
-		print "push r14\n";
-		print "push r15\n";
-	}
+    my $symbolname = shift;
+    my $allocate = shift;
+    my $distance;
+
+    foreach ( (0 .. $allocate) ) {
+        $distance =  $_ * $WORDLENGTH;
+        $LOCAL->{"LOCAL$_"} = "[$BASEPTR-$distance]";
+    }
+
+    if($CURRENT_SECTION ne 'text') {
+        $CURRENT_SECTION = 'text';
+        print ".text\n";
+    }
+
+    print ".globl $symbolname\n";
+    print ".type $symbolname, \@function\n";
+    print "$symbolname :\n";
+
+    if ($main::ISA eq 'x86') {
+        print "push ebp\n";
+        print "mov ebp, esp\n";
+        $distance = $allocate * $WORDLENGTH;
+        print "sub  esp, $distance\n" if ($allocate);
+        print "push ebx\n";
+        print "push esi\n";
+        print "push edi\n";
+    } elsif ($main::ISA eq 'x86-64') {
+        print "push rbp\n";
+        print "mov rbp, rsp\n";
+        $distance = $allocate * $WORDLENGTH;
+        print "sub  rsp, $distance\n" if ($allocate);
+        print "push rbx\n";
+        print "push r12\n";
+        print "push r13\n";
+        print "push r14\n";
+        print "push r15\n";
+    }
 }
 
 sub function_exit
 {
-	my $symbolname = shift;
-
-	$LOCAL = {};
-
-	if ($main::ISA eq 'x86') {
-		print "pop edi\n";
-		print "pop esi\n";
-		print "pop ebx\n";
-		print "mov  esp, ebp\n";
-		print "pop ebp\n";
-	} elsif ($main::ISA eq 'x86-64') {
-		print "pop r15\n";
-		print "pop r14\n";
-		print "pop r13\n";
-		print "pop r12\n";
-		print "pop rbx\n";
-		print "mov  rsp, rbp\n";
-		print "pop rbp\n";
-	}
-	print "ret\n";
-	print ".size $symbolname, .-$symbolname\n";
-	print "\n";
+    my $symbolname = shift;
+
+    $LOCAL = {};
+
+    if ($main::ISA eq 'x86') {
+        print "pop edi\n";
+        print "pop esi\n";
+        print "pop ebx\n";
+        print "mov  esp, ebp\n";
+        print "pop ebp\n";
+    } elsif ($main::ISA eq 'x86-64') {
+        print "pop r15\n";
+        print "pop r14\n";
+        print "pop r13\n";
+        print "pop r12\n";
+        print "pop rbx\n";
+        print "mov  rsp, rbp\n";
+        print "pop rbp\n";
+    }
+    print "ret\n";
+    print ".size $symbolname, .-$symbolname\n";
+    print "\n";
 }
 
 sub define_data
 {
-	my $symbolname = shift;
-	my $type = shift;
-	my $value = shift;
-
-	if($CURRENT_SECTION ne 'data') {
-		$CURRENT_SECTION = 'data';
-		print ".data\n";
-	}
-	print ".align 64\n";
-	print "$symbolname:\n";
-	if ($type eq 'DOUBLE') {
-		print ".double $value, $value, $value, $value, $value, $value, $value, $value\n"
-	} elsif ($type eq 'SINGLE') {
-		print ".single $value, $value, $value, $value, $value, $value, $value, $value\n"
-	} elsif ($type eq 'INT') {
-		print ".int $value, $value\n"
-	}
+    my $symbolname = shift;
+    my $type = shift;
+    my $value = shift;
+
+    if($CURRENT_SECTION ne 'data') {
+        $CURRENT_SECTION = 'data';
+        print ".data\n";
+    }
+    print ".align 64\n";
+    print "$symbolname:\n";
+    if ($type eq 'DOUBLE') {
+        print ".double $value, $value, $value, $value, $value, $value, $value, $value\n"
+    } elsif ($type eq 'SINGLE') {
+        print ".single $value, $value, $value, $value, $value, $value, $value, $value\n"
+    } elsif ($type eq 'INT') {
+        print ".int $value, $value\n"
+    }
 }
 
 sub define_offset
 {
-	my $symbolname = shift;
-	my $type = shift;
-	my $value = shift;
-
-	if($CURRENT_SECTION ne 'data') {
-		$CURRENT_SECTION = 'data';
-		print ".data\n";
-	}
-	print ".align 16\n";
-	print "$symbolname:\n";
-  print ".int $value\n";
+    my $symbolname = shift;
+    my $type = shift;
+    my $value = shift;
+
+    if($CURRENT_SECTION ne 'data') {
+        $CURRENT_SECTION = 'data';
+        print ".data\n";
+    }
+    print ".align 16\n";
+    print "$symbolname:\n";
+    print ".int $value\n";
 }
 
 
 sub loop_entry
 {
-  my $symbolname = shift;
-  my $stopping_criterion = shift;
-  $stopping_criterion = $REG->{$stopping_criterion} if( exists $REG->{$stopping_criterion});
-
-  if ($main::ISA eq 'x86') {
-    print "xor   eax, eax\n";
-  } elsif ($main::ISA eq 'x86-64') {
-    print "xor   rax, rax\n";
-  }
-  print ".align 16\n";
-  if ($MODE eq 'GLOBAL') {
-    print "$symbolname :\n";
-  }else {
-    print "1:\n";
-  }
+    my $symbolname = shift;
+    my $stopping_criterion = shift;
+    $stopping_criterion = $REG->{$stopping_criterion} if( exists $REG->{$stopping_criterion});
+
+    if ($main::ISA eq 'x86') {
+        print "xor   eax, eax\n";
+    } elsif ($main::ISA eq 'x86-64') {
+        print "xor   rax, rax\n";
+    }
+    print ".align 16\n";
+    if ($MODE eq 'GLOBAL') {
+        print "$symbolname :\n";
+    }else {
+        print "1:\n";
+    }
 
 }
 
 
 sub loop_exit
 {
-  my $symbolname = shift;
-  my $step = shift;
-
-  if ($main::ISA eq 'x86') {
-    print "add eax, $step\n";
-    print "cmp eax, edi\n";
-  } elsif ($main::ISA eq 'x86-64') {
-    print "addq rax, $step\n";
-    print "cmpq rax, rdi\n";
-  }
-  if ($MODE eq 'GLOBAL') {
-    print "jl $symbolname\n";
-  }else {
-    print "jl 1b\n";
-  }
-  print "\n";
+    my $symbolname = shift;
+    my $step = shift;
+
+    if ($main::ISA eq 'x86') {
+        print "add eax, $step\n";
+        print "cmp eax, edi\n";
+    } elsif ($main::ISA eq 'x86-64') {
+        print "addq rax, $step\n";
+        print "cmpq rax, rdi\n";
+    }
+    if ($MODE eq 'GLOBAL') {
+        print "jl $symbolname\n";
+    }else {
+        print "jl 1b\n";
+    }
+    print "\n";
 }
 
 sub isa_init
 {
-  if ($main::ISA eq 'x86') {
-    $WORDLENGTH = $isax86::WORDLENGTH_X86 ;
-    $STACKPTR = $isax86::STACKPTR_X86 ;
-    $BASEPTR = $isax86::BASEPTR_X86 ;
-    $REG = $isax86::REG_X86;
-    $ARG = $isax86::ARG_X86 ;
-  } elsif ($main::ISA eq 'x86-64') {
-    $WORDLENGTH = $isax86_64::WORDLENGTH_X86_64;
-    $STACKPTR = $isax86_64::STACKPTR_X86_64 ;
-    $BASEPTR = $isax86_64::BASEPTR_X86_64 ;
-    $REG = $isax86_64::REG_X86_64;
-    $ARG = $isax86_64::ARG_X86_64 ;
-  }
+    if ($main::ISA eq 'x86') {
+        $WORDLENGTH = $isax86::WORDLENGTH_X86 ;
+        $STACKPTR = $isax86::STACKPTR_X86 ;
+        $BASEPTR = $isax86::BASEPTR_X86 ;
+        $REG = $isax86::REG_X86;
+        $ARG = $isax86::ARG_X86 ;
+    } elsif ($main::ISA eq 'x86-64') {
+        $WORDLENGTH = $isax86_64::WORDLENGTH_X86_64;
+        $STACKPTR = $isax86_64::STACKPTR_X86_64 ;
+        $BASEPTR = $isax86_64::BASEPTR_X86_64 ;
+        $REG = $isax86_64::REG_X86_64;
+        $ARG = $isax86_64::ARG_X86_64 ;
+    }
 }
 
 
diff --git a/bench/perl/generatePas.pl b/bench/perl/generatePas.pl
index 2dcd530..b625b6b 100755
--- a/bench/perl/generatePas.pl
+++ b/bench/perl/generatePas.pl
@@ -1,4 +1,31 @@
 #!/usr/bin/perl
+# =======================================================================================
+#
+#      Filename:  generatePas.pl
+#
+#      Description:  Converter from ptt to pas file format.
+#
+#      Version:   <VERSION>
+#      Released:  <DATE>
+#
+#      Author:  Jan Treibig (jt), jan.treibig at gmail.com
+#      Project:  likwid
+#
+#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#
+#      This program is free software: you can redistribute it and/or modify it under
+#      the terms of the GNU General Public License as published by the Free Software
+#      Foundation, either version 3 of the License, or (at your option) any later
+#      version.
+#
+#      This program is distributed in the hope that it will be useful, but WITHOUT ANY
+#      WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+#      PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+#      You should have received a copy of the GNU General Public License along with
+#      this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# =======================================================================================
 
 use lib 'util';
 use strict;
@@ -79,7 +106,7 @@ my $stream_lookup = {
 opendir (DIR, "./$BenchRoot") or die "Cannot open bench directory: $!\n";
 my $tpl = Template->new({
         INCLUDE_PATH => ["$TemplateRoot"]
-        });
+    });
 
 while (defined(my $file = readdir(DIR))) {
     if ($file !~ /^\./) {
@@ -172,18 +199,18 @@ while (defined(my $file = readdir(DIR))) {
 
         $tpl->process('bench.tt', $Vars, "$OutputDirectory/$name.pas");
         push(@Testcases,{name    => $name,
-                         streams => $streams,
-                         type    => $type,
-                         stride  => $increment,
-                         flops   => $flops,
-                         bytes   => $bytes,
-                         desc    => $desc,
-                         loads    => $loads,
-                         stores    => $stores,
-                         branches    => $branches,
-                         instr_const    => $instr,
-                         instr_loop    => $loop_instr,
-                         uops    => $uops});
+                streams => $streams,
+                type    => $type,
+                stride  => $increment,
+                flops   => $flops,
+                bytes   => $bytes,
+                desc    => $desc,
+                loads    => $loads,
+                stores    => $stores,
+                branches    => $branches,
+                instr_const    => $instr,
+                instr_loop    => $loop_instr,
+                uops    => $uops});
     }
 }
 #print Dumper(@Testcases);
diff --git a/bench/perl/isax86.pm b/bench/perl/isax86.pm
index 7575f37..d586fac 100644
--- a/bench/perl/isax86.pm
+++ b/bench/perl/isax86.pm
@@ -1,4 +1,31 @@
 #!/usr/bin/perl
+# =======================================================================================
+#
+#      Filename:  isax86.pm
+#
+#      Description:  Configuration for x86 ISA for ptt to pas converter.
+#
+#      Version:   <VERSION>
+#      Released:  <DATE>
+#
+#      Author:  Jan Treibig (jt), jan.treibig at gmail.com
+#      Project:  likwid
+#
+#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#
+#      This program is free software: you can redistribute it and/or modify it under
+#      the terms of the GNU General Public License as published by the Free Software
+#      Foundation, either version 3 of the License, or (at your option) any later
+#      version.
+#
+#      This program is distributed in the hope that it will be useful, but WITHOUT ANY
+#      WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+#      PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+#      You should have received a copy of the GNU General Public License along with
+#      this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# =======================================================================================
 
 package isax86;
 
@@ -41,5 +68,4 @@ $ARG_X86 = {
     ARG17 => '[ebp+72]',
     ARG18 => '[ebp+76]'};
 
-
 1;
diff --git a/bench/perl/isax86_64.pm b/bench/perl/isax86_64.pm
index 7c57279..a18a847 100644
--- a/bench/perl/isax86_64.pm
+++ b/bench/perl/isax86_64.pm
@@ -1,4 +1,31 @@
-#!/usr/bin/perl 
+#!/usr/bin/perl
+# =======================================================================================
+#
+#      Filename:  isax86_64.pm
+#
+#      Description:  Configuration for x86_64 ISA for ptt to pas converter.
+#
+#      Version:   <VERSION>
+#      Released:  <DATE>
+#
+#      Author:  Jan Treibig (jt), jan.treibig at gmail.com
+#      Project:  likwid
+#
+#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#
+#      This program is free software: you can redistribute it and/or modify it under
+#      the terms of the GNU General Public License as published by the Free Software
+#      Foundation, either version 3 of the License, or (at your option) any later
+#      version.
+#
+#      This program is distributed in the hope that it will be useful, but WITHOUT ANY
+#      WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+#      PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+#      You should have received a copy of the GNU General Public License along with
+#      this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# =======================================================================================
 
 package isax86_64;
 
diff --git a/bench/perl/templates/group.tt b/bench/perl/templates/group.tt
index 5676318..0e3860a 100644
--- a/bench/perl/templates/group.tt
+++ b/bench/perl/templates/group.tt
@@ -25,7 +25,7 @@ perfmon_printDerivedMetrics[% arch FILTER ucfirst %](PerfmonGroup group)
     uint64_t cpi_cyc  = 0;
     int cpi_index = 0;
 
-    switch ( group ) 
+    switch ( group )
     {
 [% FOREACH group IN groups %]
         case [% group.name %]:
@@ -72,7 +72,7 @@ perfmon_printDerivedMetrics[% arch FILTER ucfirst %](PerfmonGroup group)
             {
                 stat[cpi_index][0] = (double) cpi_cyc / (double) cpi_instr;
             }
-                
+
             break;
 [% END %]
 
@@ -85,7 +85,7 @@ perfmon_printDerivedMetrics[% arch FILTER ucfirst %](PerfmonGroup group)
     printResultTable(&tableData);
     freeResultTable(&tableData);
 
-    // for threaded results print sum, max, min and avg 
+    // for threaded results print sum, max, min and avg
     if (perfmon_numThreads > 1)
     {
         initStatisticTable(&tableData, fc, numRows);
@@ -116,7 +116,7 @@ perfmon_logDerivedMetrics[% arch FILTER ucfirst %](PerfmonGroup group, double ti
     double tmpValue;
     double inverseClock = 1.0 /(double) timer_getCpuClock();
 
-    switch ( group ) 
+    switch ( group )
     {
         [% FOREACH group IN groups %]
         case [% group.name %]:
diff --git a/bench/src/allocator.c b/bench/src/allocator.c
index 290a6b1..3c37755 100644
--- a/bench/src/allocator.c
+++ b/bench/src/allocator.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Implementation of allocator module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -37,21 +37,12 @@
 #include <allocator.h>
 #include <likwid.h>
 
-/* #####   EXPORTED VARIABLES   ########################################### */
-
-
-/* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
-
-
 /* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
 
 static int numberOfAllocatedVectors = 0;
 static allocation* allocList;
 static AffinityDomains_t domains = NULL;
 
-/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
-
-
 /* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
 
 void
@@ -77,6 +68,26 @@ allocator_finalize()
     numberOfAllocatedVectors = 0;
 }
 
+size_t
+allocator_dataTypeLength(DataType type)
+{
+    switch (type)
+    {
+        case INT:
+            return sizeof(int);
+            break;
+        case SINGLE:
+            return sizeof(float);
+            break;
+        case DOUBLE:
+            return sizeof(double);
+            break;
+        default:
+            return 0;
+    }
+    return 0;
+}
+
 void
 allocator_allocateVector(
         void** ptr,
@@ -84,6 +95,7 @@ allocator_allocateVector(
         uint64_t size,
         int offset,
         DataType type,
+        int stride,
         bstring domainString)
 {
     int i;
@@ -92,23 +104,9 @@ allocator_allocateVector(
     int errorCode;
     int elements = 0;
 
-    switch ( type )
-    {
-        case INT:
-            bytesize = (size+offset) * sizeof(int);
-            elements = alignment / sizeof(int);
-            break;
-
-        case SINGLE:
-            bytesize = (size+offset) * sizeof(float);
-            elements = alignment / sizeof(float);
-            break;
-
-        case DOUBLE:
-            bytesize = (size+offset) * sizeof(double);
-            elements = alignment / sizeof(double);
-            break;
-    }
+    size_t typesize = allocator_dataTypeLength(type);
+    bytesize = (size+offset) * typesize;
+    elements = alignment / typesize;
 
     for (i=0;i<domains->numberOfAffinityDomains;i++)
     {
@@ -155,9 +153,10 @@ allocator_allocateVector(
     numberOfAllocatedVectors++;
 
     affinity_pinProcess(domain->processorList[0]);
-    printf("Allocate: Process running on core %d (Domain %s) - Vector length %llu Offset %d Alignment %llu\n",
+    printf("Allocate: Process running on core %d (Domain %s) - Vector length %llu/%llu Offset %d Alignment %llu\n",
             affinity_processGetProcessorId(),
             bdata(domain->tag),
+            LLU_CAST size,
             LLU_CAST bytesize,
             offset,
             LLU_CAST elements);
diff --git a/bench/src/barrier.c b/bench/src/barrier.c
index b536ff3..4abe827 100644
--- a/bench/src/barrier.c
+++ b/bench/src/barrier.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Implementation of threaded spin loop barrier
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,7 +27,9 @@
  *
  * =======================================================================================
  */
+
 /* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -35,9 +37,6 @@
 #include <errno.h>
 #include <barrier.h>
 
-/* #####   EXPORTED VARIABLES   ########################################### */
-
-
 /* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
 
 #define CACHELINE_SIZE 64
@@ -48,9 +47,6 @@ static BarrierGroup* groups;
 static int currentGroupId = 0;
 static int maxGroupId = 0;
 
-/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
-
-
 /* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
 
 int
@@ -66,7 +62,7 @@ barrier_registerGroup(int numThreads)
     groups[currentGroupId].numberOfThreads = numThreads;
     ret = posix_memalign(
             (void**) &groups[currentGroupId].groupBval,
-            CACHELINE_SIZE, 
+            CACHELINE_SIZE,
             numThreads * 32 * sizeof(int));
 
     if (ret < 0)
@@ -75,7 +71,6 @@ barrier_registerGroup(int numThreads)
         exit(EXIT_FAILURE);
     }
 
-
     return currentGroupId++;
 }
 
@@ -109,7 +104,6 @@ barrier_registerThread(BarrierData* barr, int groupId, int threadId)
         exit(EXIT_FAILURE);
     }
 
-
     barr->index[0] = threadId;
 
     for (i = 0; i < barr->numberOfThreads; i++)
@@ -165,3 +159,4 @@ void barrier_destroy(BarrierData* barr)
     free(barr->index);
     free(groups[currentGroupId].groupBval);
 }
+
diff --git a/bench/src/bench.c b/bench/src/bench.c
index 3cbfb54..83b89d7 100644
--- a/bench/src/bench.c
+++ b/bench/src/bench.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Benchmarking framework for likwid-bench
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *               Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -28,6 +28,7 @@
  *
  * =======================================================================================
  */
+
 /* #####   HEADER FILE INCLUDES   ######################################### */
 
 #include <pthread.h>
@@ -43,14 +44,10 @@
 #include <barrier.h>
 #include <likwid.h>
 
-/* #####   EXPORTED VARIABLES   ########################################### */
-
-
 /* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
 
 #define BARRIER   barrier_synchronize(&barr)
 
-
 #define EXECUTE(func)   \
     BARRIER; \
     LIKWID_MARKER_START("bench");  \
@@ -66,11 +63,10 @@
     BARRIER
 
 
-
-
 /* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
 
-void* runTest(void* arg)
+void*
+runTest(void* arg)
 {
     int threadId;
     int offset;
@@ -90,12 +86,15 @@ void* runTest(void* arg)
     barrier_registerThread(&barr, 0, data->globalThreadId);
 
     /* Prepare ptrs for thread */
-    vecsize = myData->size;
+    vecsize = myData->size / data->numberOfThreads;
     size = myData->size / data->numberOfThreads;
-    myData->size = size;
+    
     size -= (size % myData->test->stride);
+    myData->size = size;
     offset = data->threadId * size;
-    
+    //printf("Orig size %lu Size %lu\n", myData->size / data->numberOfThreads, size);
+    if (size != vecsize && data->threadId == 0)
+        printf("Sanitizing vector length to a multiple of the loop stride from %d elements (%d bytes) to %d elements (%d bytes)\n", vecsize, vecsize*myData->test->bytes, size, size*myData->test->bytes);
 
     switch ( myData->test->type )
     {
@@ -134,7 +133,6 @@ void* runTest(void* arg)
             break;
     }
 
-
     /* pin the thread */
     likwid_pinThread(myData->processors[threadId]);
     printf("Group: %d Thread %d Global Thread %d running on core %d - Vector length %llu Offset %d\n",
@@ -142,7 +140,7 @@ void* runTest(void* arg)
             threadId,
             data->globalThreadId,
             affinity_threadGetProcessorId(),
-            LLU_CAST vecsize,
+            LLU_CAST size,
             offset);
     BARRIER;
 
@@ -438,7 +436,6 @@ void* runTest(void* arg)
     pthread_exit(NULL);
 }
 
-
 #define MEASURE(func) \
     iterations = 8; \
     while (1) \
@@ -455,12 +452,12 @@ void* runTest(void* arg)
             break; \
     } \
 
-
-void* getIterSingle(void* arg)
+void*
+getIterSingle(void* arg)
 {
     int threadId = 0;
     int offset = 0;
-    size_t size = 0;
+    size_t size = 0, vecsize = 0;
     size_t i;
     ThreadData* data;
     ThreadUserData* myData;
@@ -473,7 +470,13 @@ void* getIterSingle(void* arg)
     func = myData->test->kernel;
     threadId = data->threadId;
 
-    size = myData->size - (myData->size % myData->test->stride);
+    //size = myData->size - (myData->size % myData->test->stride);
+    vecsize = myData->size;
+    size = myData->size / data->numberOfThreads;
+    
+    size -= (size % myData->test->stride);
+    offset = data->threadId * size;
+
     likwid_pinThread(myData->processors[threadId]);
 
 #ifdef DEBUG_LIKWID
@@ -768,3 +771,4 @@ void* getIterSingle(void* arg)
 #endif
     return NULL;
 }
+
diff --git a/bench/src/strUtil.c b/bench/src/strUtil.c
index 93d4630..c9e1c7a 100644
--- a/bench/src/strUtil.c
+++ b/bench/src/strUtil.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Utility string routines building upon bstrlib
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com.
  *      Project:  likwid
@@ -27,11 +27,18 @@
  *
  * =======================================================================================
  */
+
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <strUtil.h>
 #include <math.h>
 #include <likwid.h>
+#include <allocator.h>
+
+/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE  ################## */
 
-static int str2int(const char* str)
+static int
+str2int(const char* str)
 {
     char* endptr;
     errno = 0;
@@ -54,7 +61,10 @@ static int str2int(const char* str)
     return (int) val;
 }
 
-uint64_t bstr_to_doubleSize(const_bstring str, DataType type)
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
+uint64_t
+bstr_to_doubleSize(const_bstring str, DataType type)
 {
     int ret;
     bstring unit = bmidstr(str, blength(str)-2, 2);
@@ -76,20 +86,7 @@ uint64_t bstr_to_doubleSize(const_bstring str, DataType type)
         return 0;
     }
 
-    switch (type)
-    {
-        case SINGLE:
-            bytesize = sizeof(float);
-            break;
-
-        case DOUBLE:
-            bytesize = sizeof(double);
-            break;
-
-        case INT:
-            bytesize = sizeof(int);
-            break;
-    }
+    bytesize = allocator_dataTypeLength(type);
 
     if ((biseqcstr(unit, "kB"))||(biseqcstr(unit, "KB")))
     {
@@ -112,8 +109,8 @@ uint64_t bstr_to_doubleSize(const_bstring str, DataType type)
     return junk;
 }
 
-
-bstring parse_workgroup(Workgroup* group, const_bstring str, DataType type)
+bstring
+parse_workgroup(Workgroup* group, const_bstring str, DataType type)
 {
     CpuTopology_t topo;
     struct bstrList* tokens;
@@ -121,7 +118,6 @@ bstring parse_workgroup(Workgroup* group, const_bstring str, DataType type)
     int numThreads = 0;
     bstring domain;
 
-
     tokens = bsplit(str,':');
     if (tokens->qty == 2)
     {
@@ -188,7 +184,8 @@ bstring parse_workgroup(Workgroup* group, const_bstring str, DataType type)
     return domain;
 }
 
-int parse_streams(Workgroup* group, const_bstring str, int numberOfStreams)
+int
+parse_streams(Workgroup* group, const_bstring str, int numberOfStreams)
 {
     struct bstrList* tokens;
     struct bstrList* subtokens;
@@ -249,7 +246,8 @@ int parse_streams(Workgroup* group, const_bstring str, int numberOfStreams)
     return 0;
 }
 
-int bstr_to_workgroup(Workgroup* group, const_bstring str, DataType type, int numberOfStreams)
+int
+bstr_to_workgroup(Workgroup* group, const_bstring str, DataType type, int numberOfStreams)
 {
     int parseStreams = 0;
     struct bstrList* tokens;
@@ -298,7 +296,8 @@ int bstr_to_workgroup(Workgroup* group, const_bstring str, DataType type, int nu
     return 0;
 }
 
-void workgroups_destroy(Workgroup** groupList, int numberOfGroups, int numberOfStreams)
+void
+workgroups_destroy(Workgroup** groupList, int numberOfGroups, int numberOfStreams)
 {
     int i = 0, j = 0;
     if (groupList == NULL)
@@ -317,3 +316,4 @@ void workgroups_destroy(Workgroup** groupList, int numberOfGroups, int numberOfS
     }
     free(list);
 }
+
diff --git a/bench/src/threads.c b/bench/src/threads.c
index df506e9..8e99a77 100644
--- a/bench/src/threads.c
+++ b/bench/src/threads.c
@@ -5,8 +5,8 @@
  *
  *      Description:  High level interface to pthreads
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -38,8 +38,6 @@
 #include <errno.h>
 #include <threads.h>
 
-
-
 /* #####   EXPORTED VARIABLES   ########################################### */
 
 pthread_barrier_t threads_barrier;
@@ -53,7 +51,9 @@ static pthread_attr_t attr;
 static int numThreads = 0;
 
 /* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE  ################## */
-static int count_characters(const char *str, char character)
+
+static int
+count_characters(const char *str, char character)
 {
     if (str == 0)
         return 0;
@@ -68,15 +68,16 @@ static int count_characters(const char *str, char character)
     return count;
 }
 
-void* dummy_function(void* arg)
+void*
+dummy_function(void* arg)
 {
     return 0;
 }
-/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
-
 
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
 
-int threads_test()
+int
+threads_test()
 {
     int cnt = 0;
     int err;
@@ -119,7 +120,7 @@ threads_init(int numberOfThreads)
 }
 
 
-void 
+void
 threads_create(void *(*startRoutine)(void*))
 {
     int i;
@@ -133,7 +134,7 @@ threads_create(void *(*startRoutine)(void*))
     }
 }
 
-void 
+void
 threads_createGroups(int numberOfGroups)
 {
     int i;
@@ -145,7 +146,7 @@ threads_createGroups(int numberOfGroups)
     {
         fprintf(stderr, "ERROR: Not enough threads %d to create %d groups\n",numThreads,numberOfGroups);
     }
-    else 
+    else
     {
         numThreadsPerGroup = numThreads / numberOfGroups;
     }
@@ -179,7 +180,7 @@ threads_createGroups(int numberOfGroups)
 }
 
 
-void 
+void
 threads_registerDataAll(ThreadUserData* data, threads_copyDataFunc func)
 {
     int i;
@@ -277,8 +278,7 @@ threads_destroy(int numberOfGroups, int numberOfStreams)
     int i = 0, j = 0;
     pthread_attr_destroy(&attr);
     pthread_barrier_destroy(&threads_barrier);
-    
-    
+
     for(i=0;i<numberOfGroups;i++)
     {
         for (j = 0; j < threads_groups[i].numberOfThreads; j++)
@@ -291,3 +291,4 @@ threads_destroy(int numberOfGroups, int numberOfStreams)
     free(threads_groups);
     free(threads);
 }
+
diff --git a/bench/x86-64/copy_avx512.ptt b/bench/x86-64/copy_avx512.ptt
new file mode 100644
index 0000000..5de150b
--- /dev/null
+++ b/bench/x86-64/copy_avx512.ptt
@@ -0,0 +1,21 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+DESC Double-precision vector copy, optimized for AVX-512
+LOADS 1
+STORES 1
+INSTR_CONST 16
+INSTR_LOOP 11
+UOPS 14
+LOOP 32
+vmovapd    zmm1, [STR0 + GPR1 * 8]
+vmovapd    zmm2, [STR0 + GPR1 * 8 + 64]
+vmovapd    zmm3, [STR0 + GPR1 * 8 + 128]
+vmovapd    zmm4, [STR0 + GPR1 * 8 + 192]
+vmovapd    [STR1 + GPR1 * 8]     , zmm1
+vmovapd    [STR1 + GPR1 * 8 + 64], zmm2
+vmovapd    [STR1 + GPR1 * 8 + 128], zmm3
+vmovapd    [STR1 + GPR1 * 8 + 192], zmm4
+
+
diff --git a/bench/x86-64/daxpy_avx512.ptt b/bench/x86-64/daxpy_avx512.ptt
new file mode 100644
index 0000000..331b96f
--- /dev/null
+++ b/bench/x86-64/daxpy_avx512.ptt
@@ -0,0 +1,25 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 2
+BYTES 24
+DESC Double-precision linear combination of two vectors, optimized for AVX-512
+LOADS 2
+STORES 1
+INSTR_CONST 17
+INSTR_LOOP 21
+UOPS 38
+vmovapd zmm7, [rip+SCALAR]
+LOOP 32
+vmulpd    zmm1, zmm7, [STR0 + GPR1*8]
+vaddpd    zmm1, zmm1, [STR1 + GPR1*8]
+vmulpd    zmm2, zmm7, [STR0 + GPR1*8+64]
+vaddpd    zmm2, zmm2, [STR1 + GPR1*8+64]
+vmovapd    [STR1 + GPR1*8], zmm1
+vmovapd    [STR1 + GPR1*8+64], zmm2
+vmulpd    zmm3, zmm7, [STR0 + GPR1*8+128]
+vaddpd    zmm3, zmm3, [STR1 + GPR1*8+128]
+vmulpd    zmm4, zmm7, [STR0 + GPR1*8+192]
+vaddpd    zmm4, zmm4, [STR1 + GPR1*8+192]
+vmovapd    [STR1 + GPR1*8+128], zmm3
+vmovapd    [STR1 + GPR1*8+192], zmm4
+
diff --git a/bench/x86-64/daxpy_sp_sse.ptt b/bench/x86-64/daxpy_sp_sse.ptt
index fc20441..0a926a7 100644
--- a/bench/x86-64/daxpy_sp_sse.ptt
+++ b/bench/x86-64/daxpy_sp_sse.ptt
@@ -8,21 +8,21 @@ STORES 1
 INSTR_CONST 17
 INSTR_LOOP 19
 UOPS 26
-movaps FPR7, [rip+SCALAR]
-LOOP 8
-movaps   FPR1, [STR0 + GPR1*4]
-movaps   FPR2, [STR0 + GPR1*4+16]
-movaps   FPR3, [STR0 + GPR1*4+32]
-movaps   FPR4, [STR0 + GPR1*4+48]
-mulps    FPR1, FPR7
-addps    FPR1, [STR1 + GPR1*4]
-mulps    FPR2, FPR7
-addps    FPR2, [STR1 + GPR1*4+16]
-mulps    FPR3, FPR7
-addps    FPR3, [STR1 + GPR1*4+32]
-mulps    FPR4, FPR7
-addps    FPR4, [STR1 + GPR1*4+48]
-movaps   [STR1 + GPR1*4], FPR1
-movaps   [STR1 + GPR1*4+16], FPR2
-movaps   [STR1 + GPR1*4+32], FPR3
-movaps   [STR1 + GPR1*4+48], FPR4
+vmovaps FPR7, [rip+SCALAR]
+LOOP 16
+vmovaps   FPR1, [STR0 + GPR1*4]
+vmovaps   FPR2, [STR0 + GPR1*4+16]
+vmovaps   FPR3, [STR0 + GPR1*4+32]
+vmovaps   FPR4, [STR0 + GPR1*4+48]
+vmulps    FPR1, FPR1, FPR7
+vaddps    FPR1, FPR1, [STR1 + GPR1*4]
+vmulps    FPR2, FPR2, FPR7
+vaddps    FPR2, FPR2, [STR1 + GPR1*4+16]
+vmulps    FPR3, FPR3, FPR7
+vaddps    FPR3, FPR3, [STR1 + GPR1*4+32]
+vmulps    FPR4, FPR4, FPR7
+vaddps    FPR4, FPR4, [STR1 + GPR1*4+48]
+vmovaps   [STR1 + GPR1*4], FPR1
+vmovaps   [STR1 + GPR1*4+16], FPR2
+vmovaps   [STR1 + GPR1*4+32], FPR3
+vmovaps   [STR1 + GPR1*4+48], FPR4
diff --git a/bench/x86-64/ddot_avx512.ptt b/bench/x86-64/ddot_avx512.ptt
new file mode 100644
index 0000000..3cd274a
--- /dev/null
+++ b/bench/x86-64/ddot_avx512.ptt
@@ -0,0 +1,23 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 2
+BYTES 16
+DESC Double-precision dot product of two vectors, optimized for AVX-512
+LOADS 2
+STORES 0
+INSTR_CONST 20
+INSTR_LOOP 15
+UOPS 18
+LOOP 32
+vmovapd zmm1,       [STR0 + GPR1 * 8]
+vmulpd  zmm1, zmm1, [STR1 + GPR1 * 8]
+vaddpd  zmm0, zmm0, zmm1
+vmovapd zmm2,       [STR0 + GPR1 * 8 + 64]
+vmulpd  zmm2, zmm2, [STR1 + GPR1 * 8 + 64]
+vaddpd  zmm5, zmm5, zmm2
+vmovapd zmm3,       [STR0 + GPR1 * 8 + 128]
+vmulpd  zmm3, zmm3, [STR1 + GPR1 * 8 + 128]
+vaddpd  zmm6, zmm6, zmm3
+vmovapd zmm4,       [STR0 + GPR1 * 8 + 192]
+vmulpd  zmm4, zmm4, [STR1 + GPR1 * 8 + 192]
+vaddpd  zmm7, zmm7, zmm4
diff --git a/bench/x86-64/load_avx512.ptt b/bench/x86-64/load_avx512.ptt
new file mode 100644
index 0000000..55e4e75
--- /dev/null
+++ b/bench/x86-64/load_avx512.ptt
@@ -0,0 +1,18 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+DESC Double-precision load, optimized for AVX-512
+LOADS 1
+STORES 0
+INSTR_CONST 16
+INSTR_LOOP 7
+UOPS 6
+LOOP 32
+#mov        GPR12, [STR0 + GPR1 * 8 + 256]
+vmovapd    zmm1, [STR0 + GPR1 * 8]
+vmovapd    zmm2, [STR0 + GPR1 * 8 + 64]
+vmovapd    zmm3, [STR0 + GPR1 * 8 + 128]
+vmovapd    zmm4, [STR0 + GPR1 * 8 + 192]
+
+
diff --git a/bench/x86-64/store_avx512.ptt b/bench/x86-64/store_avx512.ptt
new file mode 100644
index 0000000..2b070e7
--- /dev/null
+++ b/bench/x86-64/store_avx512.ptt
@@ -0,0 +1,20 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+DESC Double-precision store, optimized for AVX-512
+LOADS 0
+STORES 1
+INSTR_CONST 20
+INSTR_LOOP 7
+UOPS 10
+vmovapd zmm1, [rip+SCALAR]
+vmovapd zmm2, [rip+SCALAR]
+vmovapd zmm3, [rip+SCALAR]
+vmovapd zmm4, [rip+SCALAR]
+LOOP 32
+vmovapd    [STR0 + GPR1 * 8]     , zmm1
+vmovapd    [STR0 + GPR1 * 8 + 64], zmm2
+vmovapd    [STR0 + GPR1 * 8 + 128], zmm3
+vmovapd    [STR0 + GPR1 * 8 + 192], zmm4
+
diff --git a/bench/x86-64/stream_avx512.ptt b/bench/x86-64/stream_avx512.ptt
new file mode 100644
index 0000000..610a5d8
--- /dev/null
+++ b/bench/x86-64/stream_avx512.ptt
@@ -0,0 +1,29 @@
+STREAMS 3
+TYPE DOUBLE
+FLOPS 2
+BYTES 24
+DESC Double-precision stream triad A(i) = B(i)*c + C(i), optimized for AVX-512
+LOADS 2
+STORES 1
+INSTR_CONST 17
+INSTR_LOOP 19
+UOPS 26
+vmovapd zmm5, [rip+SCALAR]
+LOOP 32
+vmovapd    zmm1, [STR1 + GPR1*8]
+vmovapd    zmm2, [STR1 + GPR1*8+64]
+vmovapd    zmm3, [STR1 + GPR1*8+128]
+vmovapd    zmm4, [STR1 + GPR1*8+192]
+vmulpd     zmm1, zmm1, zmm5
+vaddpd     zmm1, zmm1, [STR2 + GPR1*8]
+vmulpd     zmm2, zmm2, zmm5
+vaddpd     zmm2, zmm2, [STR2 + GPR1*8+64]
+vmulpd     zmm3, zmm3, zmm5
+vaddpd     zmm3, zmm3, [STR2 + GPR1*8+128]
+vmulpd     zmm4, zmm4, zmm5
+vaddpd     zmm4, zmm4, [STR2 + GPR1*8+192]
+vmovapd    [STR0 + GPR1*8]   , zmm1
+vmovapd    [STR0 + GPR1*8+64], zmm2
+vmovapd    [STR0 + GPR1*8+128], zmm3
+vmovapd    [STR0 + GPR1*8+192], zmm4
+
diff --git a/bench/x86-64/sum_avx.ptt b/bench/x86-64/sum_avx.ptt
index 29d8ff0..dee6031 100644
--- a/bench/x86-64/sum_avx.ptt
+++ b/bench/x86-64/sum_avx.ptt
@@ -8,23 +8,22 @@ STORES 0
 INSTR_CONST 24
 INSTR_LOOP 11
 UOPS 18
-vxorpd FPR9, FPR9, FPR9
-vxorpd FPR1, FPR1, FPR1
-vmovapd FPR2, FPR1
-vmovapd FPR3, FPR1
-vmovapd FPR4, FPR1
-vmovapd FPR5, FPR1
-vmovapd FPR6, FPR1
-vmovapd FPR7, FPR1
-vmovapd FPR8, FPR1
+vxorpd ymm1, ymm1, ymm1
+vmovapd ymm2, ymm1
+vmovapd ymm3, ymm1
+vmovapd ymm4, ymm1
+vmovapd ymm5, ymm1
+vmovapd ymm6, ymm1
+vmovapd ymm7, ymm1
+vmovapd ymm8, ymm1
 LOOP 32
-vaddpd    FPR1, FPR1, [STR0 + GPR1 * 8]
-vaddpd    FPR2, FPR2, [STR0 + GPR1 * 8 + 32]
-vaddpd    FPR3, FPR3, [STR0 + GPR1 * 8 + 64]
-vaddpd    FPR4, FPR4, [STR0 + GPR1 * 8 + 96]
-vaddpd    FPR5, FPR5, [STR0 + GPR1 * 8 + 128]
-vaddpd    FPR6, FPR6, [STR0 + GPR1 * 8 + 160]
-vaddpd    FPR7, FPR7, [STR0 + GPR1 * 8 + 192]
-vaddpd    FPR8, FPR8, [STR0 + GPR1 * 8 + 224]
+vaddpd    ymm1, ymm1, [STR0 + GPR1 * 8]
+vaddpd    ymm2, ymm2, [STR0 + GPR1 * 8 + 32]
+vaddpd    ymm3, ymm3, [STR0 + GPR1 * 8 + 64]
+vaddpd    ymm4, ymm4, [STR0 + GPR1 * 8 + 96]
+vaddpd    ymm5, ymm5, [STR0 + GPR1 * 8 + 128]
+vaddpd    ymm6, ymm6, [STR0 + GPR1 * 8 + 160]
+vaddpd    ymm7, ymm7, [STR0 + GPR1 * 8 + 192]
+vaddpd    ymm8, ymm8, [STR0 + GPR1 * 8 + 224]
 
 
diff --git a/bench/x86-64/sum_avx512.ptt b/bench/x86-64/sum_avx512.ptt
new file mode 100644
index 0000000..081701b
--- /dev/null
+++ b/bench/x86-64/sum_avx512.ptt
@@ -0,0 +1,30 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 1
+BYTES 8
+DESC Double-precision sum of a vector, optimized for AVX-512
+LOADS 1
+STORES 0
+INSTR_CONST 24
+INSTR_LOOP 11
+UOPS 18
+vxorpd FPR9, FPR9, FPR9
+vxorpd FPR1, FPR1, FPR1
+vmovapd FPR2, FPR1
+vmovapd FPR3, FPR1
+vmovapd FPR4, FPR1
+vmovapd FPR5, FPR1
+vmovapd FPR6, FPR1
+vmovapd FPR7, FPR1
+vmovapd FPR8, FPR1
+LOOP 64
+vaddpd    FPR1, FPR1, [STR0 + GPR1 * 8]
+vaddpd    FPR2, FPR2, [STR0 + GPR1 * 8 + 64]
+vaddpd    FPR3, FPR3, [STR0 + GPR1 * 8 + 128]
+vaddpd    FPR4, FPR4, [STR0 + GPR1 * 8 + 192]
+vaddpd    FPR5, FPR5, [STR0 + GPR1 * 8 + 256]
+vaddpd    FPR6, FPR6, [STR0 + GPR1 * 8 + 320]
+vaddpd    FPR7, FPR7, [STR0 + GPR1 * 8 + 384]
+vaddpd    FPR8, FPR8, [STR0 + GPR1 * 8 + 448]
+
+
diff --git a/bench/x86-64/triad_avx512.ptt b/bench/x86-64/triad_avx512.ptt
new file mode 100644
index 0000000..202abac
--- /dev/null
+++ b/bench/x86-64/triad_avx512.ptt
@@ -0,0 +1,28 @@
+STREAMS 4
+TYPE DOUBLE
+FLOPS 2
+BYTES 32
+DESC Double-precision triad A(i) = B(i) * C(i) + D(i), optimized for AVX-512
+LOADS 3
+STORES 1
+INSTR_CONST 16
+INSTR_LOOP 19
+UOPS 30
+LOOP 32
+vmovapd    zmm1, [STR1 + GPR1*8]
+vmovapd    zmm2, [STR1 + GPR1*8+64]
+vmovapd    zmm3, [STR1 + GPR1*8+128]
+vmovapd    zmm4, [STR1 + GPR1*8+192]
+vmulpd    zmm1, zmm1, [STR2 + GPR1*8]
+vaddpd    zmm1, zmm1, [STR3 + GPR1*8]
+vmulpd    zmm2, zmm2, [STR2 + GPR1*8+64]
+vaddpd    zmm2, zmm2, [STR3 + GPR1*8+64]
+vmulpd    zmm3, zmm3, [STR2 + GPR1*8+128]
+vaddpd    zmm3, zmm3, [STR3 + GPR1*8+128]
+vmulpd    zmm4, zmm4, [STR2 + GPR1*8+192]
+vaddpd    zmm4, zmm4, [STR3 + GPR1*8+192]
+vmovapd    [STR0 + GPR1*8], zmm1
+vmovapd    [STR0 + GPR1*8+64], zmm2
+vmovapd    [STR0 + GPR1*8+128], zmm3
+vmovapd    [STR0 + GPR1*8+192], zmm4
+
diff --git a/bench/x86-64/update_avx512.ptt b/bench/x86-64/update_avx512.ptt
new file mode 100644
index 0000000..e4eac9e
--- /dev/null
+++ b/bench/x86-64/update_avx512.ptt
@@ -0,0 +1,21 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+DESC Double-precision vector update, optimized for AVX-512
+LOADS 1
+STORES 1
+INSTR_CONST 16
+INSTR_LOOP 11
+UOPS 14
+LOOP 32
+vmovapd    zmm1, [STR0 + GPR1 * 8]
+vmovapd    [STR0 + GPR1 * 8]     , zmm1
+vmovapd    zmm2, [STR0 + GPR1 * 8 + 64]
+vmovapd    zmm3, [STR0 + GPR1 * 8 + 128]
+vmovapd    zmm4, [STR0 + GPR1 * 8 + 192]
+vmovapd    [STR0 + GPR1 * 8 + 64], zmm2
+vmovapd    [STR0 + GPR1 * 8 + 128], zmm3
+vmovapd    [STR0 + GPR1 * 8 + 192], zmm4
+
+
diff --git a/config.mk b/config.mk
index 9c5d7cd..d12c19c 100644
--- a/config.mk
+++ b/config.mk
@@ -11,6 +11,13 @@ COLOR = BLUE#NO SPACE
 # Path were to install likwid
 PREFIX = /usr/local#NO SPACE
 
+# uncomment to optionally set external lua at 5.3:
+# default is use internally provide lua
+#LUA_INCLUDE_DIR = /usr/include/lua5.2#NO SPACE
+#LUA_LIB_DIR = /usr/lib/x86_64-linux-gnu#NO SPACE
+#LUA_LIB_NAME = lua5.2#NO SPACE, executable is assumed to have the same name
+#LUA_BIN = /usr/bin#NO SPACE
+
 #################################################################
 # Common users do not need to change values below this comment! #
 #################################################################
@@ -64,6 +71,10 @@ INSTRUMENT_BENCH = false#NO SPACE
 # Use recommended Portable Hardware Locality (hwloc) instead of CPUID
 USE_HWLOC = true#NO SPACE
 
+# Use Linux perf_event interface for measurements. Does not support thermal or
+# energy (RAPL) readings.
+USE_PERF_EVENT = false#NO SPACE
+
 # Build LIKWID with debug flags
 DEBUG = false#NO SPACE
 
@@ -76,10 +87,12 @@ TOPO_FILE_PATH = /etc/likwid_topo.cfg
 
 # Versioning Information
 VERSION = 4
-RELEASE = 1
-DATE    = 08.08.2016
+RELEASE = 2
+DATE    = 22.12.2016
 
 RPATHS = -Wl,-rpath=$(INSTALLED_LIBPREFIX)
+LIKWIDLOCKPATH = /var/run/likwid.lock
+LIKWIDSOCKETBASE = /tmp/likwid  # -%d will be added automatically to the socket name
 LIBLIKWIDPIN = $(abspath $(INSTALLED_PREFIX)/lib/liblikwidpin.so.$(VERSION).$(RELEASE))
 LIKWIDFILTERPATH = $(abspath $(INSTALLED_PREFIX)/share/likwid/filter)
 LIKWIDGROUPPATH = $(abspath $(INSTALLED_PREFIX)/share/likwid/perfgroups)
diff --git a/doc/applications/likwid-bench.md b/doc/applications/likwid-bench.md
index 1d371ee..17bbf7a 100644
--- a/doc/applications/likwid-bench.md
+++ b/doc/applications/likwid-bench.md
@@ -85,7 +85,7 @@ Run test <CODE>triad</CODE> using <CODE>2</CODE> threads in affinity domain <COD
 Run test <CODE>update</CODE> using all threads in affinity domain <CODE>S0</CODE> and <CODE>S1</CODE>. The threads scheduled on <CODE>S0</CODE> use stream that sum up to <CODE>100kB</CODE>. Similar to <CODE>S1</CODE> the threads are placed there working only on their socket-local streams. The results of both workgroups are combined.
 </LI>
 <LI><CODE>likwid-perfctr -c E:S0:4 -g MEM -m likwid-bench -t update -w S0:100kB:4</CODE><BR>
-Run test <CODE>update</CODE> using <CODE>4</CODE> threads in affinity domain <CODE>S0</CODE>. The input and output stream of the <CODE>copy</CODE> benchmark sum up to <CODE>100kB</CODE> placed in affinity domain <CODE>S0</CODE>. The benchmark execution is measured using the \ref Marker_API. It measures the <CODE>MEM</CODE> performance group on the first four CPUs of the <CODE>S0</CODE> affinity domain. For further information about hardware performance counters see \ref likwid-perfctr<BR [...]
+Run test <CODE>update</CODE> using <CODE>4</CODE> threads in affinity domain <CODE>S0</CODE>. The input and output stream of the <CODE>copy</CODE> benchmark sum up to <CODE>100kB</CODE> placed in affinity domain <CODE>S0</CODE>. The benchmark execution is measured using the \ref Marker_API. It measures the <CODE>MEM</CODE> performance group on the first four CPUs of the <CODE>S0</CODE> affinity domain. For further information about hardware performance counters see \ref likwid-perfctr <B [...]
 </LI>
 <LI><CODE>likwid-bench -t copy -w S0:1GB:2:1:2-0:S1,1:S1</CODE><BR>
 Run test <CODE>copy</CODE> using <CODE>2</CODE> threads in affinity domain <CODE>S0</CODE> skipping one thread during selection. The two streams used in the <CODE>copy</CODE> benchmark have the IDs 0 and 1 and a summed up size of <CODE>1GB</CODE>. Both streams are placed in affinity domain <CODE>S1</CODE>.
diff --git a/doc/archs/phi_knl.md b/doc/archs/phi_knl.md
new file mode 100644
index 0000000..b1d3f8d
--- /dev/null
+++ b/doc/archs/phi_knl.md
@@ -0,0 +1,688 @@
+/*! \page phi_knl Intel® Xeon Phi (Knights Landing)
+
+<H1>Available performance monitors for the Intel® Xeon Phi (Knights Landing) microarchitecture</H1>
+<UL>
+<LI>\ref KNL_FIXED "Fixed-purpose counters"</LI>
+<LI>\ref KNL_PMC "General-purpose counters"</LI>
+<LI>\ref KNL_THERMAL "Thermal counters"</LI>
+<LI>\ref KNL_POWER "Power measurement counters"</LI>
+<LI>\ref KNL_UBOX "Uncore management counters"</LI>
+<LI>\ref KNL_CBOX "Last level cache counters"</LI>
+<LI>\ref KNL_WBOX "Power control unit general-purpose counters"</LI>
+<LI>\ref KNL_MBOX "Memory controller (iMC) counters"</LI>
+<LI>\ref KNL_EBOX "Embedded DRAM controller (EDC) counters"</LI>
+<LI>\ref KNL_PBOX "Ring-to-PCIe counters"</LI>
+<LI>\ref KNL_IBOX "IRP box counters"</LI>
+
+</UL>
+
+<H1>Counters available for each hardware thread</H1>
+\anchor KNL_FIXED
+<H2>Fixed-purpose counters</H2>
+<P>Since the Core2 microarchitecture, Intel® provides a set of fixed-purpose counters. Each can measure only one specific event.</P>
+<H3>Counter and events</H3>
+<TABLE>
+<TR>
+  <TH>Counter name</TH>
+  <TH>Event name</TH>
+</TR>
+<TR>
+  <TD>FIXC0</TD>
+  <TD>INSTR_RETIRED_ANY</TD>
+</TR>
+<TR>
+  <TD>FIXC1</TD>
+  <TD>CPU_CLK_UNHALTED_CORE</TD>
+</TR>
+<TR>
+  <TD>FIXC2</TD>
+  <TD>CPU_CLK_UNHALTED_REF</TD>
+</TR>
+</TABLE>
+<H3>Available Options</H3>
+<TABLE>
+<TR>
+  <TH>Option</TH>
+  <TH>Argument</TH>
+  <TH>Description</TH>
+  <TH>Comment</TH>
+</TR>
+<TR>
+  <TD>anythread</TD>
+  <TD>N</TD>
+  <TD>Set bit 2+(index*4) in config register</TD>
+  <TD></TD>
+</TR>
+<TR>
+  <TD>kernel</TD>
+  <TD>N</TD>
+  <TD>Set bit (index*4) in config register</TD>
+  <TD></TD>
+</TR>
+</TABLE>
+
+\anchor KNL_PMC
+<H2>General-purpose counters</H2>
+<P>The Intel® Xeon Phi (Knights Landing) microarchitecture provides 2 general-purpose counters consisting of a config and a counter register.</P>
+<H3>Counter and events</H3>
+<TABLE>
+<TR>
+  <TH>Counter name</TH>
+  <TH>Event name</TH>
+</TR>
+<TR>
+  <TD>PMC0</TD>
+  <TD>*</TD>
+</TR>
+<TR>
+  <TD>PMC1</TD>
+  <TD>*</TD>
+</TR>
+</TABLE>
+<H3>Available Options</H3>
+<TABLE>
+<TR>
+  <TH>Option</TH>
+  <TH>Argument</TH>
+  <TH>Description</TH>
+  <TH>Comment</TH>
+</TR>
+<TR>
+  <TD>edgedetect</TD>
+  <TD>N</TD>
+  <TD>Set bit 18 in config register</TD>
+  <TD></TD>
+</TR>
+<TR>
+  <TD>kernel</TD>
+  <TD>N</TD>
+  <TD>Set bit 17 in config register</TD>
+  <TD></TD>
+</TR>
+<TR>
+  <TD>threshold</TD>
+  <TD>8 bit hex value</TD>
+  <TD>Set bits 24-31 in config register</TD>
+  <TD></TD>
+</TR>
+<TR>
+  <TD>invert</TD>
+  <TD>N</TD>
+  <TD>Set bit 23 in config register</TD>
+  <TD></TD>
+</TR>
+</TABLE>
+
+<H3>Special handling for events</H3>
+<P>The Intel® Xeon Phi (Knights Landing) microarchitecture provides measuring of offcore events in PMC counters. Therefore the stream of offcore events must be filtered using the OFFCORE_RESPONSE registers. The Intel® Xeon Phi (Knights Landing) microarchitecture has two of those registers. LIKWID defines some events that perform the filtering according to the event name. Although there are many bitmasks possible, LIKWID natively provides only the ones with response type ANY. Own  [...]
+</P>
+<TABLE>
+<TR>
+  <TH>Option</TH>
+  <TH>Argument</TH>
+  <TH>Description</TH>
+  <TH>Comment</TH>
+</TR>
+<TR>
+  <TD>match0</TD>
+  <TD>16 bit hex value</TD>
+  <TD>Input value masked with 0xFFFF and written to bits 0-15 in the OFFCORE_RESPONSE register</TD>
+  <TD>Check the <A HREF="http://www.Intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html">Intel® Software Developer System Programming Manual, Vol. 3, Chapter Performance Monitoring</A> and <A HREF="https://download.01.org/perfmon/SLM">https://download.01.org/perfmon/SLM</A>.</TD>
+</TR>
+<TR>
+  <TD>match1</TD>
+  <TD>22 bit hex value</TD>
+  <TD>Input value is written to bits 16-38 in the OFFCORE_RESPONSE register</TD>
+  <TD>Check the <A HREF="http://www.Intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html">Intel® Software Developer System Programming Manual, Vol. 3, Chapter Performance Monitoring</A> and <A HREF="https://download.01.org/perfmon/SLM">https://download.01.org/perfmon/SLM</A>.</TD>
+</TR>
+</TABLE>
+
+\anchor KNL_THERMAL
+<H2>Thermal counter</H2>
+<P>The Intel® Xeon Phi (Knights Landing) microarchitecture provides one register for the current core temperature.</P>
+<H3>Counter and events</H3>
+<TABLE>
+<TR>
+  <TH>Counter name</TH>
+  <TH>Event name</TH>
+</TR>
+<TR>
+  <TD>TMP0</TD>
+  <TD>TEMP_CORE</TD>
+</TR>
+</TABLE>
+
+<H1>Counters available for one hardware thread per socket</H1>
+\anchor KNL_POWER
+<H2>Power counters</H2>
+<P>The Intel® Xeon Phi (Knights Landing) microarchitecture provides measurements of the current power consumption through the RAPL interface.</P>
+<H3>Counter and events</H3>
+<TABLE>
+<TR>
+  <TH>Counter name</TH>
+  <TH>Event name</TH>
+</TR>
+<TR>
+  <TD>PWR0</TD>
+  <TD>PWR_PKG_ENERGY</TD>
+</TR>
+<TR>
+  <TD>PWR1</TD>
+  <TD>PWR_PP0_ENERGY</TD>
+</TR>
+<TR>
+  <TD>PWR3</TD>
+  <TD>PWR_DRAM_ENERGY</TD>
+</TR>
+</TABLE>
+
+\anchor KNL_UBOX
+<H2>Uncore management counters</H2>
+<P>The Intel® Xeon Phi (Knights Landing) microarchitecture provides measurements of the management box in the Uncore. The description from Intel®:<BR>
+The Uncore management performance counters are exposed to the operating system through the MSR interface. The name UBOX originates from the Nehalem EX Uncore monitoring where those functional units are called UBOX.
+</P>
+<H3>Counter and events</H3>
+<TABLE>
+<TR>
+  <TH>Counter name</TH>
+  <TH>Event name</TH>
+</TR>
+<TR>
+  <TD>UBOX0</TD>
+  <TD>*</TD>
+</TR>
+<TR>
+  <TD>UBOX1</TD>
+  <TD>*</TD>
+</TR>
+<TR>
+  <TD>UBOXFIX</TD>
+  <TD>UBOX_CLOCKTICKS</TD>
+</TR>
+</TABLE>
+
+<H3>Available Options (Only for UBOX<0,1> counters)</H3>
+<TABLE>
+<TR>
+  <TH>Option</TH>
+  <TH>Argument</TH>
+  <TH>Operation</TH>
+  <TH>Comment</TH>
+</TR>
+<TR>
+  <TD>edgedetect</TD>
+  <TD>N</TD>
+  <TD>Set bit 18 in config register</TD>
+  <TD></TD>
+</TR>
+<TR>
+  <TD>threshold</TD>
+  <TD>5 bit hex value</TD>
+  <TD>Set bits 24-28 in config register</TD>
+  <TD></TD>
+</TR>
+<TR>
+  <TD>invert</TD>
+  <TD>N</TD>
+  <TD>Set bit 23 in config register</TD>
+  <TD></TD>
+</TR>
+</TABLE>
+
+\anchor KNL_CBOX
+<H2>Last level cache counters</H2>
+<p>The Intel® Xeon Phi (KNL) microarchitecture provides measurements for the last level cache segments.</p>
+<H3>Counter and events</H3>
+<table>
+<tr>
+  <th>Counter name</th>
+  <th>Event name</th>
+</tr>
+<tr>
+  <td>CBOX<0-37>C0</td>
+  <td>*</td>
+</tr>
+<tr>
+  <td>CBOX<0-37>C1</td>
+  <td>*</td>
+</tr>
+<tr>
+  <td>CBOX<0-37>C2</td>
+  <td>*</td>
+</tr>
+<tr>
+  <td>CBOX<0-37>C3</td>
+  <td>*</td>
+</tr>
+</table>
+<H3>Available Options</H3>
+<table>
+<tr>
+  <th>Option</th>
+  <th>Argument</th>
+  <th>Description</th>
+  <th>Comment</th>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#edgedetect">edgedetect</a></td>
+  <td>N</td>
+  <td>Set bit 18 in config register</td>
+  <td></td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#invert">invert</a></td>
+  <td>N</td>
+  <td>Set bit 23 in config register</td>
+  <td></td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#threshold0xxxxx">threshold</a></td>
+  <td>8 bit hex value</td>
+  <td>Set bits 24-31 in config register</td>
+  <td></td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#opcode0xxxxx">opcode</a></td>
+  <td>9 bit hex value</td>
+  <td>Set bits 9-28 in PERF_UNIT_CTL_1_CHA_<0-37> register</td>
+  <td>A list of valid opcodes can be found in the <a href="https://software.intel.com/en-us/articles/intel-xeon-phi-x200-family-processor-performance-monitoring-reference-manual">Intel® Xeon® Phi Processor Performance Monitoring Reference Manual</a>.</td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#state0xxxxx">state</a></td>
+  <td>10 bit hex value</td>
+  <td>Set bits 17-26 in PERF_UNIT_CTL_CHA_<0-37> register</td>
+  <td>H: 0x08,<br>E: 0x04,<br>S: 0x02<br>All other bits reserved.</td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#tid0xxxxx">tid</a></td>
+  <td>9 bit hex value</td>
+  <td>Set bits 0-8 in PERF_UNIT_CTL_CHA_<0-37> register and enables TID filtering with bit 19 in config register</td>
+  <td>0-2 ThreadID, 3-8 CoreID</td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#nid0xxxxx">nid</a></td>
+  <td>2 bit hex value</td>
+  <td>Set bits 0-1 in PERF_UNIT_CTL_1_CHA_<0-37> register</td>
+  <td>Remote: 0x1<br>Local: 0x2</td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#match0-30xxxxx">match0</a></td>
+  <td>3 bit hex address</td>
+  <td>Set bits 29-31 in PERF_UNIT_CTL_1_CHA_<0-37> register</td>
+  <td>C6Opcode: 0x1<br>NonCohOpcode: 0x2<br>IsocOpcode: 0x3</td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#match0-30xxxxx">match1</a></td>
+  <td>2 bit hex address</td>
+  <td>Set bits 4-5 in PERF_UNIT_CTL_1_CHA_<0-37> register</td>
+  <td>Count near memory cache events: 0x1<br>Count non-near memory cache events: 0x2</td>
+</tr>
+</table>
+
+<H3>Special handling for events</H3>
+<p>The Intel® Xeon Phi (KNL) microarchitecture provides an event LLC_LOOKUP which can be filtered with the 'state' option. If no 'state' is set, LIKWID sets the state to 0xE, the default value to measure all lookups.<br>
+If the match1 option is not used, bits 4 and 5 in PERF_UNIT_CTL_1_CHA_<0-37> are set.<br>
+If no opcode option is set, the bit 3 in PERF_UNIT_CTL_1_CHA_<0-37> is set.</p>
+
+
+\anchor KNL_WBOX
+<H2>Power control unit general-purpose counters</H2>
+<p>The Intel® Xeon Phi (KNL) microarchitecture provides measurements of the power control unit (PCU) in the uncore.</p>
+
+<p>The PCU performance counters are exposed to the operating system through the MSR interface. The name WBOX originates from the Nehalem EX uncore monitoring.</p>
+<H3>Counter and events</H3>
+<table>
+<tr>
+  <th>Counter name</th>
+  <th>Event name</th>
+</tr>
+<tr>
+  <td>WBOX0</td>
+  <td>*</td>
+</tr>
+<tr>
+  <td>WBOX1</td>
+  <td>*</td>
+</tr>
+<tr>
+  <td>WBOX2</td>
+  <td>*</td>
+</tr>
+<tr>
+  <td>WBOX3</td>
+  <td>*</td>
+</tr>
+</table>
+<H3>Available Options</H3>
+<table>
+<tr>
+  <th>Option</th>
+  <th>Argument</th>
+  <th>Operation</th>
+  <th>Comment</th>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#edgedetect">edgedetect</a></td>
+  <td>N</td>
+  <td>Set bit 18 in config register</td>
+  <td></td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#invert">invert</a></td>
+  <td>N</td>
+  <td>Set bit 23 in config register</td>
+  <td></td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#threshold0xxxxx">threshold</a></td>
+  <td>5 bit hex value</td>
+  <td>Set bits 24-28 in config register</td>
+  <td></td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#match0-30xxxxx">match0</a></td>
+  <td>32 bit hex value</td>
+  <td>Set bits 0-31 in<br>MSR_UNC_PCU_PMON_BOX_FILTER register</td>
+  <td>Band0: bits 0-7,<br>Band1: bits 8-15,<br>Band2: bits 16-23,<br>Band3: bits 24-31</td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#occupancy0xxxxx">occupancy</a></td>
+  <td>2 bit hex value</td>
+  <td>Set bit 14-15 in config register</td>
+  <td>Cores<br>in C0: 0x1,<br>in C3: 0x2,<br>in C6: 0x3</td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#occ_edgedetect">occ_edgedetect</a></td>
+  <td>N</td>
+  <td>Set bit 31 in config register</td>
+  <td></td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#occ_invert">occ_invert</a></td>
+  <td>N</td>
+  <td>Set bit 30 in config register</td>
+  <td></td>
+</tr>
+</table>
+
+\anchor KNL_MBOX
+<H2>Memory controller (iMC) counters</H2>
+<p>The Intel® Xeon Phi (KNL) microarchitecture provides measurements of the integrated Memory Controllers (iMC) in the uncore. The description from Intel®:<br>
+<i>The processor implements two Memory Controllers on the processor die. Each memory
+controller is capable of controlling three DDR4 memory channels. The MC design is
+derived from the EDC (Near-Memory (MCDRAM) controller) and is a sub-set of EDC in
+functionality. The main difference from EDC is that the physical interface for MC will be
+DDR4 IOs. The processor MC will interface with the rest of the Untile via the mesh
+interface (R2Mem -> Ring-to-MC interface). Therefore, the MC agent is broken into
+three regions: The front-end ring/mesh interface called the "R2Mem", the core "EDC
+controller" logic, and three individual "DDR channel controllers/schedulers."
+</i><br></p>
+<p>The integrated Memory Controllers performance counters are exposed to the operating system through PCI interfaces. There may be two memory controllers in the system. There are four different PCI devices per memory controller, three for each memory channel and one for the controller. Each device has four different general-purpose counters. The three channels of the first memory controller are MBOX0-2, the memory controller itself is MBOX3. The three channels of the second memory contro [...]
+<H3>Counter and events</H3>
+<table>
+<tr>
+  <th>Counter name</th>
+  <th>Event name</th>
+</tr>
+<tr>
+  <td>MBOX<0-2,4-6>C0</td>
+  <td>MC_DCLK, MC_CAS*</td>
+</tr>
+<tr>
+  <td>MBOX<0-2,4-6>C1</td>
+  <td>MC_DCLK, MC_CAS*</td>
+</tr>
+<tr>
+  <td>MBOX<0-2,4-6>C2</td>
+  <td>MC_DCLK, MC_CAS*</td>
+</tr>
+<tr>
+  <td>MBOX<0-2,4-6>C3</td>
+  <td>MC_DCLK, MC_CAS*</td>
+</tr>
+<tr>
+  <td>MBOX<3,7>C0</td>
+  <td>MC_UCLK</td>
+</tr>
+<tr>
+  <td>MBOX<3,7>C1</td>
+  <td>MC_UCLK</td>
+</tr>
+<tr>
+  <td>MBOX<3,7>C2</td>
+  <td>MC_UCLK</td>
+</tr>
+<tr>
+  <td>MBOX<3,7>C3</td>
+  <td>MC_UCLK</td>
+</tr>
+<tr>
+  <td>MBOX<0-7>FIX</td>
+  <td>DRAM_CLOCKTICKS</td>
+</tr>
+</table>
+
+<H3>Available Options</H3>
+<table>
+<tr>
+  <th>Option</th>
+  <th>Argument</th>
+  <th>Operation</th>
+  <th>Comment</th>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#edgedetect">edgedetect</a></td>
+  <td>N</td>
+  <td>Set bit 18 in config register</td>
+  <td></td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#invert">invert</a></td>
+  <td>N</td>
+  <td>Set bit 23 in config register</td>
+  <td></td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#threshold0xxxxx">threshold</a></td>
+  <td>8 bit hex value</td>
+  <td>Set bits 24-31 in config register</td>
+  <td></td>
+</tr>
+</table>
+
+\anchor KNL_EBOX
+<H2>Embedded DRAM controller (EDC) counters</H2>
+<p>The Intel® Xeon Phi (KNL) microarchitecture provides measurements of the Embedded DRAM Controllers (EDC) in the uncore, the interface to the MCDRAM. The description from Intel®:<br>
+<i>The EDC is the high bandwidth near-memory controller for the processor. EDC refers
+to "Embedded DRAM Controller" (i.e. DRAM that is embedded in the processor
+package). The technology that is used to implement the embedded DRAM for the
+processor is MCDRAM (Multi-Chip (Stacked) DRAM). Eight channels of MCDRAM are
+supported by 8 MCDRAM Controllers (EDC). The EDC's are connected to the other
+components (clusters) within the processor by the internal mesh interconnect fabric.
+</i><br></p>
+
+<p>The Embedded DRAM Controllers (EDC) performance counters are exposed to the operating system through PCI interfaces. There are eight embedded memory controllers in the system. There are two different PCI devices per memory controller, one for the mesh side (EUBOX<em>C</em>)and one on the DRAM side (EDBOX<em>C</em>). Each device has four different general-purpose counters. </p>
+<p>The fixed-purpose counters are exposed to the operating system through PCI interfaces. There are eight embedded memory controllers in the system. There are two different PCI devices per memory controller, one for the mesh side (EUBOX<em>FIX)and one on the DRAM side (EDBOX</em>FIX). </p>
+<H3>Counter and events</H3>
+<table>
+<tr>
+  <th>Counter name</th>
+  <th>Event name</th>
+</tr>
+<tr>
+  <td>EUBOX<0-7>C0</td>
+  <td>EDC_UCLK, EDC_HIT_*, EDC_MISS_*</td>
+</tr>
+<tr>
+  <td>EUBOX<0-7>C1</td>
+  <td>EDC_UCLK, EDC_HIT_*, EDC_MISS_*</td>
+</tr>
+<tr>
+  <td>EUBOX<0-7>C2</td>
+  <td>EDC_UCLK, EDC_HIT_*, EDC_MISS_*</td>
+</tr>
+<tr>
+  <td>EUBOX<0-7>C3</td>
+  <td>EDC_UCLK, EDC_HIT_*, EDC_MISS_*</td>
+</tr>
+<tr>
+  <td>EUBOX<0-7>FIX</td>
+  <td>EDC_CLOCKTICKS</td>
+</tr>
+<tr>
+  <td>EDBOX<0-7>C0</td>
+  <td>EDC_ECLK, EDC_WPQ_INSERTS, EDC_RPQ_INSERTS</td>
+</tr>
+<tr>
+  <td>EDBOX<0-7>C1</td>
+  <td>EDC_ECLK, EDC_WPQ_INSERTS, EDC_RPQ_INSERTS</td>
+</tr>
+<tr>
+  <td>EDBOX<0-7>C2</td>
+  <td>EDC_ECLK, EDC_WPQ_INSERTS, EDC_RPQ_INSERTS</td>
+</tr>
+<tr>
+  <td>EDBOX<0-7>C3</td>
+  <td>EDC_ECLK, EDC_WPQ_INSERTS, EDC_RPQ_INSERTS</td>
+</tr>
+<tr>
+  <td>EDBOX<0-7>FIX</td>
+  <td>MCDRAM_CLOCKTICKS</td>
+</tr>
+</table>
+<H3>Available Options</H3>
+<table>
+<tr>
+  <th>Option</th>
+  <th>Argument</th>
+  <th>Operation</th>
+  <th>Comment</th>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#edgedetect">edgedetect</a></td>
+  <td>N</td>
+  <td>Set bit 18 in config register</td>
+  <td></td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#invert">invert</a></td>
+  <td>N</td>
+  <td>Set bit 23 in config register</td>
+  <td></td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#threshold0xxxxx">threshold</a></td>
+  <td>8 bit hex value</td>
+  <td>Set bits 24-31 in config register</td>
+  <td></td>
+</tr>
+</table>
+
+
+\anchor KNL_PBOX
+<H2>Ring-to-PCIe counters</H2>
+<p>The Intel® Xeon Phi (KNL) microarchitecture provides measurements of the Ring-to-PCIe (R2PCIe) interface in the uncore. The description from Intel®:<br>
+<i>The M2PCI is the logic which interfaces the IIO modules to the mesh and includes the mesh stop.</i></p>
+
+<p>The Ring-to-PCIe performance counters are exposed to the operating system through a PCI interface. Independent of the system's configuration, there is only one Ring-to-PCIe interface per CPU socket. </p>
+<H3>Counter and events</H3>
+<table>
+<tr>
+  <th>Counter name</th>
+  <th>Event name</th>
+</tr>
+<tr>
+  <td>PBOX0</td>
+  <td>*</td>
+</tr>
+<tr>
+  <td>PBOX1</td>
+  <td>*</td>
+</tr>
+<tr>
+  <td>PBOX2</td>
+  <td>*</td>
+</tr>
+<tr>
+  <td>PBOX3</td>
+  <td>*</td>
+</tr>
+</table>
+<H3>Available Options</H3>
+<table>
+<tr>
+  <th>Option</th>
+  <th>Argument</th>
+  <th>Operation</th>
+  <th>Comment</th>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#edgedetect">edgedetect</a></td>
+  <td>N</td>
+  <td>Set bit 18 in config register</td>
+  <td></td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#invert">invert</a></td>
+  <td>N</td>
+  <td>Set bit 23 in config register</td>
+  <td></td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#threshold0xxxxx">threshold</a></td>
+  <td>8 bit hex value</td>
+  <td>Set bits 24-31 in config register</td>
+  <td></td>
+</tr>
+</table>
+
+\anchor KNL_IBOX
+<H2>IRP box counters</H2>
+<p>The Intel® Xeon Phi (KNL) microarchitecture provides measurements of the IRP box in the uncore. The description from Intel®:<br>
+<i>IRP is responsible for maintaining coherency for IIO traffic that needs to be coherent (e.g. cross-socket P2P).
+</i></p>
+
+<p>The IRP box counters are exposed to the operating system through the PCI interface. The IBOX was introduced with the Intel® IvyBridge EP/EN/EX microarchitecture.</p>
+<H3>Counter and events</H3>
+<table>
+<tr>
+  <th>Counter name</th>
+  <th>Event name</th>
+</tr>
+<tr>
+  <td>IBOX0</td>
+  <td>*</td>
+</tr>
+<tr>
+  <td>IBOX1</td>
+  <td>*</td>
+</tr>
+</table>
+<H3>Available Options</H3>
+<table>
+<tr>
+  <th>Option</th>
+  <th>Argument</th>
+  <th>Operation</th>
+  <th>Comment</th>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#edgedetect">edgedetect</a></td>
+  <td>N</td>
+  <td>Set bit 18 in config register</td>
+  <td></td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#invert">invert</a></td>
+  <td>N</td>
+  <td>Set bit 23 in config register</td>
+  <td></td>
+</tr>
+<tr>
+  <td><a href="https://github.com/RRZE-HPC/likwid/wiki/DescOptions#threshold0xxxxx">threshold</a></td>
+  <td>8 bit hex value</td>
+  <td>Set bits 24-31 in config register</td>
+  <td></td>
+</tr>
+</table>
+*/
diff --git a/doc/likwid-doxygen.md b/doc/likwid-doxygen.md
index 32704f1..557f951 100644
--- a/doc/likwid-doxygen.md
+++ b/doc/likwid-doxygen.md
@@ -48,6 +48,7 @@ Optionally, a global configuration file \ref likwid.cfg can be given to modify s
 - \ref TimerMon
 - \ref MemSweep
 - \ref CpuFeatures
+- \ref CpuFreq
 
 \subsection Lua_Interface Lua Interface
 - \ref lua_Info
@@ -63,6 +64,7 @@ Optionally, a global configuration file \ref likwid.cfg can be given to modify s
 - \ref lua_Timer
 - \ref lua_MemSweep
 - \ref lua_cpuFeatures
+- \ref lua_CpuFreq
 - \ref lua_Misc (Some functionality not provided by Lua natively)
 
 \subsection Fortran90_Interface Fortran90 Interface
@@ -89,6 +91,7 @@ Optionally, a global configuration file \ref likwid.cfg can be given to modify s
 - \subpage broadwelld
 - \subpage broadwellep
 - \subpage skylake
+- \subpage phi_knl
 
 \subsection Architectures_AMD AMD®
 - \subpage k8
diff --git a/doc/likwid-perfctr.1 b/doc/likwid-perfctr.1
index 321da34..de857de 100644
--- a/doc/likwid-perfctr.1
+++ b/doc/likwid-perfctr.1
@@ -28,6 +28,7 @@ or
 .IR skip_mask ]
 .RB [ \-E
 .IR search_str ]
+.RB [ \-\-stats ]
 .SH DESCRIPTION
 .B likwid-perfctr
 is a lightweight command line application to configure and read out hardware performance monitoring data
@@ -105,6 +106,9 @@ Force writing of registers even if they are in use.
 .TP
 .B \-\^E <search_str>
 Print only events and corresponding counters matching <search_str>
+.TP
+.B \-\-\^stats
+Always print statistics table
 
 .SH EXAMPLE
 Because 
diff --git a/doc/likwid-perfscope.1 b/doc/likwid-perfscope.1
index 19886a8..f251fcd 100644
--- a/doc/likwid-perfscope.1
+++ b/doc/likwid-perfscope.1
@@ -71,7 +71,7 @@ see section
 .B EVENTSETS
 .TP
 .B \-\^r,\-\-\^range " <value>
-Plot only the last <value> values. Often refered to as sliding window.
+Plot only the last <value> values. Often referred to as sliding window.
 .TP
 .B \-\^p,\-\-\^plotdump
 Use the dumping feature of feedGnuplot to print out the plot configuration and its data at each timestep.
diff --git a/doc/likwid-pin.1 b/doc/likwid-pin.1
index 4822f8d..264d5cc 100644
--- a/doc/likwid-pin.1
+++ b/doc/likwid-pin.1
@@ -84,7 +84,7 @@ silent execution without output
 
 .SH CPU EXPRESSION
 .IP 1. 4
-The most intuitive CPU selection method is a comma-separated list of phyiscal CPU IDs. An example for this is
+The most intuitive CPU selection method is a comma-separated list of physical CPU IDs. An example for this is
 .B 0,2
 which schedules the threads on CPU cores 
 .B 0
diff --git a/doc/likwid-setFreq.1 b/doc/likwid-setFreq.1
index 1ef598c..087d584 100644
--- a/doc/likwid-setFreq.1
+++ b/doc/likwid-setFreq.1
@@ -6,8 +6,20 @@ that performs the actual setting of CPU cores' frequency and governor.
 .SH SYNOPSIS
 .B likwid-setFreq
 .IR <coreId>
-.IR <frequency>
-.IR [<governor>]
+.IR <command>
+.IR <frequency|governor>
+
+.SH OPTIONS
+.TP
+.B <command>
+Valid commands are
+.B cur
+,
+.B min
+,
+.B max
+and
+.B gov
 
 .SH DESCRIPTION
 .B likwid-setFreq
diff --git a/doc/likwid-setFrequencies.1 b/doc/likwid-setFrequencies.1
index b45fcbe..f5ad3cd 100644
--- a/doc/likwid-setFrequencies.1
+++ b/doc/likwid-setFrequencies.1
@@ -10,6 +10,10 @@ likwid-setFrequencies \- print and manage the clock frequency of CPU cores
 .IR <governor> ]
 .RB [ \-f,\-\-\^freq
 .IR <frequency> ]
+.RB [ \-x,\-\-\^min
+.IR <min_freq> ]
+.RB [ \-y,\-\-\^max
+.IR <max_freq> ]
 .SH DESCRIPTION
 .B likwid-setFrequencies
 is a command line application to set the clock frequency of CPU cores. Since only priviledged users are allowed to change the frequency of CPU cores, the application works in combination with a daemon
@@ -51,6 +55,12 @@ set the governor of all CPU cores inside the affinity domain. Current governors
 .TP
 .B \-\^f, \-\-\^freq <frequency>
 set a fixed frequency at all CPU cores inside the affinity domain. Implicitly sets userspace governor for the cores.
+.TP
+.B \-\^x, \-\-\^min <min_freq>
+set a fixed minimal frequency at all CPU cores inside the affinity domain. Can be used in combination with a dynamic governor.
+.TP
+.B \-\^y, \-\-\^max <max_freq>
+set a fixed maximal frequency at all CPU cores inside the affinity domain. Can be used in combination with a dynamic governor.
 
 .SH AUTHOR
 Written by Thomas Roehl <thomas.roehl at googlemail.com>.
diff --git a/doc/lua-doxygen.md b/doc/lua-doxygen.md
index e662a46..ad4881f 100644
--- a/doc/lua-doxygen.md
+++ b/doc/lua-doxygen.md
@@ -1,7 +1,7 @@
 /*! \page lua_Info Information about LIKWID's Lua API
 <H1>How to include Lua API into own Lua applications</H1>
 <CODE>
-package.path = package.path .. ';<PREFIX>/share/lua/?.lua'<BR>
+package.path = package.path .. ';/usr/local/share/lua/?.lua'<BR>
 local likwid = require("likwid")<BR>
 </CODE>
 <P></P>
@@ -51,7 +51,7 @@ or<BR>
 <H1>Data type definition for Lua config file module in the Lua API</H1>
 \anchor lua_config
 <H2>Config file read</H2>
-<P>This structure is returned by \ref getConfiguration function<BR>The config file can be created with \ref likwid-genTopoCfg executable. It searches the files /etc/likwid.cfg and <PREFIX>/etc/likwid.cfg. Other configuration file paths can be set in config.mk before building LIKWID.</P>
+<P>This structure is returned by \ref getConfiguration function<BR>The config file can be created with \ref likwid-genTopoCfg executable. It searches the files /etc/likwid.cfg and /usr/local/etc/likwid.cfg. Other configuration file paths can be set in config.mk before building LIKWID.</P>
 <TABLE>
 <TR>
   <TH>Membername</TH>
@@ -3014,6 +3014,279 @@ or<BR>
 </TR>
 </TABLE>
 
+/*! \page lua_CpuFreq CPU frequency manipulation module
+<H1>Data type definition for Lua CPU frequency manipulation module in the Lua API</H1>
+<H1>Function definitions for Lua CPU frequency manipulation module in the Lua API</H1>
+\anchor getCpuClockCurrent
+<H2>getCpuClockCurrent(cpuID)</H2>
+<P>Get the current CPU clock frequency</P>
+<TABLE>
+<TR>
+  <TH>Direction</TH>
+  <TH>Data type(s)</TH>
+</TR>
+<TR>
+  <TD>Input Parameter</TD>
+  <TD><TABLE>
+    <TR>
+      <TD>\a cpuID</TD>
+      <TD>CPU to get clock speed</TD>
+    </TR>
+  </TABLE></TD>
+</TR>
+<TR>
+  <TD>Returns</TD>
+  <TD>Frequency for success, 0 in case of errors.</TD>
+</TR>
+</TABLE>
+
+\anchor setCpuClockCurrent
+<H2>setCpuClockCurrent(cpuID, freq)</H2>
+<P>Set the current CPU clock frequency</P>
+<TABLE>
+<TR>
+  <TH>Direction</TH>
+  <TH>Data type(s)</TH>
+</TR>
+<TR>
+  <TD>Input Parameter</TD>
+  <TD><TABLE>
+    <TR>
+      <TD>\a cpuID</TD>
+      <TD>CPU to set clock speed</TD>
+    </TR>
+    <TR>
+      <TD>\a freq</TD>
+      <TD>CPU frequency in kHz</TD>
+    </TR>
+  </TABLE></TD>
+</TR>
+<TR>
+  <TD>Returns</TD>
+  <TD>Frequency for success, 0 in case of errors.</TD>
+</TR>
+</TABLE>
+
+\anchor getCpuClockMin
+<H2>getCpuClockMin(cpuID)</H2>
+<P>Get the minimal CPU clock frequency</P>
+<TABLE>
+<TR>
+  <TH>Direction</TH>
+  <TH>Data type(s)</TH>
+</TR>
+<TR>
+  <TD>Input Parameter</TD>
+  <TD><TABLE>
+    <TR>
+      <TD>\a cpuID</TD>
+      <TD>CPU to get minimal clock speed</TD>
+    </TR>
+  </TABLE></TD>
+</TR>
+<TR>
+  <TD>Returns</TD>
+  <TD>Frequency for success, 0 in case of errors.</TD>
+</TR>
+</TABLE>
+
+\anchor setCpuClockMin
+<H2>setCpuClockMin(cpuID, freq)</H2>
+<P>Set the minimal CPU clock frequency</P>
+<TABLE>
+<TR>
+  <TH>Direction</TH>
+  <TH>Data type(s)</TH>
+</TR>
+<TR>
+  <TD>Input Parameter</TD>
+  <TD><TABLE>
+    <TR>
+      <TD>\a cpuID</TD>
+      <TD>CPU to set minimal clock speed</TD>
+    </TR>
+    <TR>
+      <TD>\a freq</TD>
+      <TD>CPU frequency in kHz</TD>
+    </TR>
+  </TABLE></TD>
+</TR>
+<TR>
+  <TD>Returns</TD>
+  <TD>Frequency for success, 0 in case of errors.</TD>
+</TR>
+</TABLE>
+
+\anchor getCpuClockMax
+<H2>getCpuClockMax(cpuID)</H2>
+<P>Get the maximal CPU clock frequency</P>
+<TABLE>
+<TR>
+  <TH>Direction</TH>
+  <TH>Data type(s)</TH>
+</TR>
+<TR>
+  <TD>Input Parameter</TD>
+  <TD><TABLE>
+    <TR>
+      <TD>\a cpuID</TD>
+      <TD>CPU to get maximal clock speed</TD>
+    </TR>
+  </TABLE></TD>
+</TR>
+<TR>
+  <TD>Returns</TD>
+  <TD>Frequency for success, 0 in case of errors.</TD>
+</TR>
+</TABLE>
+
+\anchor setCpuClockMax
+<H2>setCpuClockMax(cpuID, freq)</H2>
+<P>Set the maximal CPU clock frequency</P>
+<TABLE>
+<TR>
+  <TH>Direction</TH>
+  <TH>Data type(s)</TH>
+</TR>
+<TR>
+  <TD>Input Parameter</TD>
+  <TD><TABLE>
+    <TR>
+      <TD>\a cpuID</TD>
+      <TD>CPU to set maximal clock speed</TD>
+    </TR>
+    <TR>
+      <TD>\a freq</TD>
+      <TD>CPU frequency in kHz</TD>
+    </TR>
+  </TABLE></TD>
+</TR>
+<TR>
+  <TD>Returns</TD>
+  <TD>Frequency for success, 0 in case of errors.</TD>
+</TR>
+</TABLE>
+
+\anchor getGovernor
+<H2>getGovernor(cpuID)</H2>
+<P>Get the current CPU frequency governor</P>
+<TABLE>
+<TR>
+  <TH>Direction</TH>
+  <TH>Data type(s)</TH>
+</TR>
+<TR>
+  <TD>Input Parameter</TD>
+  <TD><TABLE>
+    <TR>
+      <TD>\a cpuID</TD>
+      <TD>CPU to get the current CPU frequency governor</TD>
+    </TR>
+  </TABLE></TD>
+</TR>
+<TR>
+  <TD>Returns</TD>
+  <TD>Governor for success, nil in case of errors.</TD>
+</TR>
+</TABLE>
+
+\anchor setGovernor
+<H2>setGovernor(cpuID, gov)</H2>
+<P>Set the current CPU frequency governor</P>
+<TABLE>
+<TR>
+  <TH>Direction</TH>
+  <TH>Data type(s)</TH>
+</TR>
+<TR>
+  <TD>Input Parameter</TD>
+  <TD><TABLE>
+    <TR>
+      <TD>\a cpuID</TD>
+      <TD>CPU to set the current CPU frequency governor</TD>
+    </TR>
+    <TR>
+      <TD>\a gov</TD>
+      <TD>Governor name</TD>
+    </TR>
+  </TABLE></TD>
+</TR>
+<TR>
+  <TD>Returns</TD>
+  <TD>1 for success, 0 in case of errors.</TD>
+</TR>
+</TABLE>
+
+\anchor getDriver
+<H2>getDriver(cpuID)</H2>
+<P>Get the current cpufreq driver</P>
+<TABLE>
+<TR>
+  <TH>Direction</TH>
+  <TH>Data type(s)</TH>
+</TR>
+<TR>
+  <TD>Input Parameter</TD>
+  <TD><TABLE>
+    <TR>
+      <TD>\a cpuID</TD>
+      <TD>CPU to get the current cpufreq driver</TD>
+    </TR>
+  </TABLE></TD>
+</TR>
+<TR>
+  <TD>Returns</TD>
+  <TD>Driver for success, nil in case of errors.</TD>
+</TR>
+</TABLE>
+
+\anchor getAvailFreq
+<H2>getAvailFreq(cpuID)</H2>
+<P>Get all available CPU frequency settings</P>
+<TABLE>
+<TR>
+  <TH>Direction</TH>
+  <TH>Data type(s)</TH>
+</TR>
+<TR>
+  <TD>Input Parameter</TD>
+  <TD><TABLE>
+    <TR>
+      <TD>\a cpuID</TD>
+      <TD>CPU to get the CPU frequency settings</TD>
+    </TR>
+  </TABLE></TD>
+</TR>
+<TR>
+  <TD>Returns</TD>
+  <TD>frequency list + turbo value at success, {} and 0 in case of errors.</TD>
+</TR>
+</TABLE>
+
+\anchor getAvailGovs
+<H2>getAvailGovs(cpuID)</H2>
+<P>Get all available CPU frequency governors</P>
+<TABLE>
+<TR>
+  <TH>Direction</TH>
+  <TH>Data type(s)</TH>
+</TR>
+<TR>
+  <TD>Input Parameter</TD>
+  <TD><TABLE>
+    <TR>
+      <TD>\a cpuID</TD>
+      <TD>CPU to get the CPU frequency governors</TD>
+    </TR>
+  </TABLE></TD>
+</TR>
+<TR>
+  <TD>Returns</TD>
+  <TD>governor list at success, {} in case of errors.</TD>
+</TR>
+</TABLE>
+
+*/
 
 /*! \page lua_InputOutput Input and output functions module
 <H1>Data type definition for Lua output functions module in the Lua API</H1>
diff --git a/examples/C-likwidAPI.c b/examples/C-likwidAPI.c
index fcdd13e..d33b1cf 100644
--- a/examples/C-likwidAPI.c
+++ b/examples/C-likwidAPI.c
@@ -5,13 +5,13 @@
  *
  *      Description:  Example how to use the LIKWID API in C/C++ applications
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/examples/C-markerAPI.c b/examples/C-markerAPI.c
index fec66c1..3faaccc 100644
--- a/examples/C-markerAPI.c
+++ b/examples/C-markerAPI.c
@@ -5,13 +5,13 @@
  *
  *      Description:  Example how to use the C/C++ Marker API
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/examples/F-markerAPI.F90 b/examples/F-markerAPI.F90
index 3cafd52..30a0457 100644
--- a/examples/F-markerAPI.F90
+++ b/examples/F-markerAPI.F90
@@ -4,13 +4,13 @@
 !
 !      Description:  Example how to use the Fortran90 Marker API
 !
-!      Version:   4.1
-!      Released:  8.8.2016
+!      Version:   <VERSION>
+!      Released:  <DATE>
 !
 !      Author:  Thomas Roehl (tr), thomas.roehl at googlemail.com
 !      Project:  likwid
 !
-!      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+!      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 !
 !      This program is free software: you can redistribute it and/or modify it under
 !      the terms of the GNU General Public License as published by the Free Software
diff --git a/examples/Lua-likwidAPI.lua b/examples/Lua-likwidAPI.lua
index f5c9fc3..d5d4ca2 100644
--- a/examples/Lua-likwidAPI.lua
+++ b/examples/Lua-likwidAPI.lua
@@ -7,13 +7,13 @@
  *
  *      Description:  Example how to use the LIKWID API in Lua scripts
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/ext/hwloc/Makefile b/ext/hwloc/Makefile
index 1fd564c..5905df5 100644
--- a/ext/hwloc/Makefile
+++ b/ext/hwloc/Makefile
@@ -24,10 +24,6 @@ ifeq ($(COMPILER),MIC)
 CFLAGS += -mmic
 LFLAGS += -mmic
 endif
-ifeq ($(COMPILER),GCC)
-CFLAGS += -Wno-unused-result
-LFLAGS += -Wno-unused-result
-endif
 
 #CONFIGURE BUILD SYSTEM
 BUILD_DIR  = ./$(COMPILER)
diff --git a/filters/README b/filters/README
new file mode 100644
index 0000000..09e24f8
--- /dev/null
+++ b/filters/README
@@ -0,0 +1,14 @@
+== Filter scripts ==
+A filter script is used by LIKWID to transform the output of likwid-perfctr and
+likwid-topology from CSV (RFC 4180) to the desired format.
+
+The filter scripts have to follow some rules to be integrated into LIKWID:
+ - The name of the script must be equal to the resulting file suffix, e.g. xml
+   for transformation to XML.
+ - No output to stdout. All output to stdout is interpreted by the scripts as
+   failure. If you want to print something use stderr.
+ - A script must handle both likwid-perfctr and likwid-topology, the scripts
+   call the script like this: <script> <input file> <perfctr|topology>
+
+For implementing a new filter script written in Perl you can use the code
+skeleton <SRC>/filters/template.
diff --git a/groups/atom/BRANCH.txt b/groups/atom/BRANCH.txt
index 4213114..7b2bb20 100644
--- a/groups/atom/BRANCH.txt
+++ b/groups/atom/BRANCH.txt
@@ -3,14 +3,12 @@ SHORT Branch prediction miss rate/ratio
 EVENTSET
 FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
-FIXC2 CPU_CLK_UNHALTED_REF
 PMC0  BR_INST_RETIRED_ANY
 PMC1  BR_INST_RETIRED_MISPRED
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
-Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
 Branch rate   PMC0/FIXC0
 Branch misprediction rate  PMC1/FIXC0
diff --git a/groups/atom/DATA.txt b/groups/atom/DATA.txt
index 9349354..b2d007f 100644
--- a/groups/atom/DATA.txt
+++ b/groups/atom/DATA.txt
@@ -3,14 +3,12 @@ SHORT Load to store ratio
 EVENTSET
 FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
-FIXC2 CPU_CLK_UNHALTED_REF
 PMC0  L1D_CACHE_LD
 PMC1  L1D_CACHE_ST
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
-Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
 Load to store ratio PMC0/PMC1
 
diff --git a/groups/atom/FLOPS_DP.txt b/groups/atom/FLOPS_DP.txt
index 8d966cc..891a15e 100644
--- a/groups/atom/FLOPS_DP.txt
+++ b/groups/atom/FLOPS_DP.txt
@@ -7,11 +7,19 @@ PMC0  SIMD_COMP_INST_RETIRED_PACKED_DOUBLE
 PMC1  SIMD_COMP_INST_RETIRED_SCALAR_DOUBLE
 
 METRICS
-Runtime [s] FIXC1*inverseClock
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
 CPI  FIXC1/FIXC0
 DP MFLOP/s    1.0E-06*(PMC0*2.0+PMC1)/time
+Packed MUOPS/s   1.0E-06*PMC0/time
+Scalar MUOPS/s 1.0E-06*PMC1/time
 
 
 LONG
+Formulas:
+DP MFLOP/s = 1.0E-06*(SIMD_COMP_INST_RETIRED_PACKED_DOUBLE*2.0+SIMD_COMP_INST_RETIRED_SCALAR_DOUBLE)/runtime
+Packed MUOPS/s = 1.0E-06*SIMD_COMP_INST_RETIRED_PACKED_DOUBLE/runtime
+Scalar MUOPS/s = 1.0E-06*SIMD_COMP_INST_RETIRED_SCALAR_DOUBLE/runtime
+--
 Double Precision MFLOP/s Double Precision MFLOP/s
 
diff --git a/groups/atom/FLOPS_SP.txt b/groups/atom/FLOPS_SP.txt
index 49ca1f3..3ebce6c 100644
--- a/groups/atom/FLOPS_SP.txt
+++ b/groups/atom/FLOPS_SP.txt
@@ -7,10 +7,18 @@ PMC0  SIMD_COMP_INST_RETIRED_PACKED_SINGLE
 PMC1  SIMD_COMP_INST_RETIRED_SCALAR_SINGLE
 
 METRICS
-Runtime [s] FIXC1*inverseClock
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
 CPI  FIXC1/FIXC0
 SP MFLOP/s (SP assumed) 1.0E-06*(PMC0*4.0+PMC1)/time
+Packed MUOPS/s   1.0E-06*(PMC0)/time
+Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
+Formulas:
+SP MFLOP/s = 1.0E-06*(SIMD_COMP_INST_RETIRED_PACKED_DOUBLE*4.0+SIMD_COMP_INST_RETIRED_SCALAR_DOUBLE)/runtime
+Packed MUOPS/s = 1.0E-06*SIMD_COMP_INST_RETIRED_PACKED_SINGLE/runtime
+Scalar MUOPS/s = 1.0E-06*SIMD_COMP_INST_RETIRED_SCALAR_SINGLE/runtime
+--
 Single Precision MFLOP/s Double Precision MFLOP/s
 
diff --git a/groups/atom/FLOPS_X87.txt b/groups/atom/FLOPS_X87.txt
index 57d2d81..204a61e 100644
--- a/groups/atom/FLOPS_X87.txt
+++ b/groups/atom/FLOPS_X87.txt
@@ -6,10 +6,13 @@ FIXC1 CPU_CLK_UNHALTED_CORE
 PMC0  X87_COMP_OPS_EXE_ANY_AR
 
 METRICS
-Runtime [s] FIXC1*inverseClock
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
 CPI  FIXC1/FIXC0
 X87 MFLOP/s  1.0E-06*PMC0/time
 
 LONG
-X87 MFLOP/s
+X87 MFLOP/s = 1.0E-06*X87_COMP_OPS_EXE_ANY_AR/runtime
+--
+The MFLOP/s made with X87 instructions
 
diff --git a/groups/atom/MEM.txt b/groups/atom/MEM.txt
index db580e5..355b7fd 100644
--- a/groups/atom/MEM.txt
+++ b/groups/atom/MEM.txt
@@ -3,13 +3,11 @@ SHORT Main memory bandwidth in MBytes/s
 EVENTSET
 FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
-FIXC2 CPU_CLK_UNHALTED_REF
 PMC0  BUS_TRANS_MEM_THIS_CORE_THIS_A
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
-Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
 Memory bandwidth [MBytes/s] 1.0E-06*PMC0*64.0/time
 Memory data volume [GBytes] 1.0E-09*PMC0*64.0
diff --git a/groups/atom/TLB.txt b/groups/atom/TLB.txt
index 4952e6c..5d0aa1b 100644
--- a/groups/atom/TLB.txt
+++ b/groups/atom/TLB.txt
@@ -6,11 +6,16 @@ FIXC1 CPU_CLK_UNHALTED_CORE
 PMC0  DATA_TLB_MISSES_DTLB_MISS
 
 METRICS
-Runtime [s] FIXC1*inverseClock
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
 CPI  FIXC1/FIXC0
 DTLB misses       PMC0
 DTLB miss rate    PMC0/FIXC0
 
 LONG
+Formulas:
+DTLB misses = DATA_TLB_MISSES_DTLB_MISS
+DTLB miss rate = DATA_TLB_MISSES_DTLB_MISS/INSTR_RETIRED_ANY
+--
 The DTLB miss rate gives a measure how often a TLB miss occurred per instruction.
 
diff --git a/groups/broadwell/CLOCK.txt b/groups/broadwell/CLOCK.txt
index 595d3a1..5ff9f69 100644
--- a/groups/broadwell/CLOCK.txt
+++ b/groups/broadwell/CLOCK.txt
@@ -5,11 +5,13 @@ FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PWR0  PWR_PKG_ENERGY
+UBOXFIX UNCORE_CLOCK
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+Uncore Clock [MHz] 1.E-06*UBOXFIX/time
 CPI  FIXC1/FIXC0
 Energy [J]  PWR0
 Power [W] PWR0/time
@@ -17,6 +19,7 @@ Power [W] PWR0/time
 LONG
 Formula:
 Power =  PWR_PKG_ENERGY / time
+Uncore Clock [MHz] = 1.E-06 * UNCORE_CLOCK / time
 -
 Broadwell implements the new RAPL interface. This interface enables to
 monitor the consumed energy on the package (socket) level.
diff --git a/groups/broadwell/CYCLE_ACTIVITY.txt b/groups/broadwell/CYCLE_ACTIVITY.txt
new file mode 100644
index 0000000..5b3c289
--- /dev/null
+++ b/groups/broadwell/CYCLE_ACTIVITY.txt
@@ -0,0 +1,27 @@
+SHORT Cycle Activities
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 CYCLE_ACTIVITY_STALLS_L2_PENDING
+PMC1 CYCLE_ACTIVITY_STALLS_LDM_PENDING
+PMC3 CYCLE_ACTIVITY_CYCLES_NO_EXECUTE
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Cycles without execution [%] PMC3/FIXC1*100
+Cycles without execution due to L2 [%] PMC0/FIXC1*100
+Cycles without execution due to memory [%] PMC1/FIXC1*100
+
+LONG
+Cycles without execution [%] = CYCLE_ACTIVITY_CYCLES_NO_EXECUTE/CPU_CLK_UNHALTED_CORE*100
+Cycles with stalls due to L2 [%] = CYCLE_ACTIVITY_CYCLES_L2_PENDING/CPU_CLK_UNHALTED_CORE*100
+Cycles without execution due to memory [%] = CYCLE_ACTIVITY_STALLS_LDM_PENDING/CPU_CLK_UNHALTED_CORE*100
+--
+This performance group measures the stalls caused by data traffic in the cache
+hierarchy. No execution stalls due to L1D misses on Broadwell, the event shows
+high overcounting.
diff --git a/groups/broadwell/FLOPS_DP.txt b/groups/broadwell/FLOPS_DP.txt
index 60b5d5a..69f2e80 100644
--- a/groups/broadwell/FLOPS_DP.txt
+++ b/groups/broadwell/FLOPS_DP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*4.0)/time
+DP MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
+AVX DP MFLOP/s  1.0E-06*(PMC2*4.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE*2+FP_ARITH_INST_RETIRED_SCALAR_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
-AVX MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
+DP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE*2+FP_ARITH_INST_RETIRED_SCALAR_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
+AVX DP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_ARITH_INST_RETIRED_SCALAR_DOUBLE/runtime
 -
diff --git a/groups/broadwell/FLOPS_SP.txt b/groups/broadwell/FLOPS_SP.txt
index 2818d94..bc0087e 100644
--- a/groups/broadwell/FLOPS_SP.txt
+++ b/groups/broadwell/FLOPS_SP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*8.0)/time
+SP MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
+AVX SP MFLOP/s  1.0E-06*(PMC2*8.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_SINGLE*4+FP_ARITH_INST_RETIRED_SCALAR_SINGLE+FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
-AVX MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
+SP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_SINGLE*4+FP_ARITH_INST_RETIRED_SCALAR_SINGLE+FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
+AVX SP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_SINGLE+FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_ARITH_INST_RETIRED_SCALAR_SINGLE/runtime
 -
diff --git a/groups/broadwell/PORT_USAGE.txt b/groups/broadwell/PORT_USAGE.txt
new file mode 100644
index 0000000..459f7f6
--- /dev/null
+++ b/groups/broadwell/PORT_USAGE.txt
@@ -0,0 +1,50 @@
+SHORT  Execution port utilization
+
+REQUIRE_NOHT
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  UOPS_EXECUTED_PORT_PORT_0
+PMC1  UOPS_EXECUTED_PORT_PORT_1
+PMC2  UOPS_EXECUTED_PORT_PORT_2
+PMC3  UOPS_EXECUTED_PORT_PORT_3
+PMC4  UOPS_EXECUTED_PORT_PORT_4
+PMC5  UOPS_EXECUTED_PORT_PORT_5
+PMC6  UOPS_EXECUTED_PORT_PORT_6
+PMC7  UOPS_EXECUTED_PORT_PORT_7
+
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Port0 usage ratio PMC0/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port1 usage ratio PMC1/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port2 usage ratio PMC2/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port3 usage ratio PMC3/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port4 usage ratio PMC4/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port5 usage ratio PMC5/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port6 usage ratio PMC6/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port7 usage ratio PMC7/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+
+LONG
+Formulas:
+Port0 usage ratio UOPS_EXECUTED_PORT_PORT_0/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port1 usage ratio UOPS_EXECUTED_PORT_PORT_1/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port2 usage ratio UOPS_EXECUTED_PORT_PORT_2/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port3 usage ratio UOPS_EXECUTED_PORT_PORT_3/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port4 usage ratio UOPS_EXECUTED_PORT_PORT_4/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port5 usage ratio UOPS_EXECUTED_PORT_PORT_5/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port6 usage ratio UOPS_EXECUTED_PORT_PORT_6/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port7 usage ratio UOPS_EXECUTED_PORT_PORT_7/SUM(UOPS_EXECUTED_PORT_PORT_*)
+-
+This group measures the execution port utilization in a CPU core. The group can
+only be measured when HyperThreading is disabled because only then each CPU core
+can program eight counters.
+Please be aware that the counters PMC4-7 are broken on Intel Broadwell. They
+don't increment if either user- or kernel-level filtering is applied. User-level
+filtering is default in LIKWID, hence kernel-level filtering is added
+automatically for PMC4-7. The returned counts can be much higher.
diff --git a/groups/sandybridge/UOPS.txt b/groups/broadwell/UOPS.txt
similarity index 100%
copy from groups/sandybridge/UOPS.txt
copy to groups/broadwell/UOPS.txt
diff --git a/groups/broadwellD/CACHES.txt b/groups/broadwellD/CACHES.txt
index 3c13a52..275e30f 100644
--- a/groups/broadwellD/CACHES.txt
+++ b/groups/broadwellD/CACHES.txt
@@ -5,9 +5,25 @@ FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PMC0  L1D_REPLACEMENT
-PMC1  L1D_M_EVICT
+PMC1  L2_TRANS_L1D_WB
 PMC2  L2_LINES_IN_ALL
 PMC3  L2_TRANS_L2_WB
+CBOX0C1 LLC_VICTIMS_M
+CBOX1C1 LLC_VICTIMS_M
+CBOX2C1 LLC_VICTIMS_M
+CBOX3C1 LLC_VICTIMS_M
+CBOX4C1 LLC_VICTIMS_M
+CBOX5C1 LLC_VICTIMS_M
+CBOX6C1 LLC_VICTIMS_M
+CBOX7C1 LLC_VICTIMS_M
+CBOX8C1 LLC_VICTIMS_M
+CBOX9C1 LLC_VICTIMS_M
+CBOX10C1 LLC_VICTIMS_M
+CBOX11C1 LLC_VICTIMS_M
+CBOX12C1 LLC_VICTIMS_M
+CBOX13C1 LLC_VICTIMS_M
+CBOX14C1 LLC_VICTIMS_M
+CBOX15C1 LLC_VICTIMS_M
 CBOX0C0 LLC_LOOKUP_DATA_READ
 CBOX1C0 LLC_LOOKUP_DATA_READ
 CBOX2C0 LLC_LOOKUP_DATA_READ
@@ -24,22 +40,6 @@ CBOX12C0 LLC_LOOKUP_DATA_READ
 CBOX13C0 LLC_LOOKUP_DATA_READ
 CBOX14C0 LLC_LOOKUP_DATA_READ
 CBOX15C0 LLC_LOOKUP_DATA_READ
-CBOX16C0 LLC_LOOKUP_DATA_READ
-CBOX17C0 LLC_LOOKUP_DATA_READ
-CBOX0C1 LLC_VICTIMS_M
-CBOX1C1 LLC_VICTIMS_M
-CBOX2C1 LLC_VICTIMS_M
-CBOX3C1 LLC_VICTIMS_M
-CBOX4C1 LLC_VICTIMS_M
-CBOX5C1 LLC_VICTIMS_M
-CBOX6C1 LLC_VICTIMS_M
-CBOX7C1 LLC_VICTIMS_M
-CBOX8C1 LLC_VICTIMS_M
-CBOX9C1 LLC_VICTIMS_M
-CBOX10C1 LLC_VICTIMS_M
-CBOX11C1 LLC_VICTIMS_M
-CBOX12C1 LLC_VICTIMS_M
-CBOX13C1 LLC_VICTIMS_M
 MBOX0C0 CAS_COUNT_RD
 MBOX0C1 CAS_COUNT_WR
 MBOX1C0 CAS_COUNT_RD
@@ -73,15 +73,15 @@ L1 to/from L2 data volume [GBytes] 1.0E-09*(PMC0+PMC1)*64.0
 L3 to L2 load bandwidth [MBytes/s]  1.0E-06*PMC2*64.0/time
 L3 to L2 load data volume [GBytes]  1.0E-09*PMC2*64.0
 L2 to L3 evict bandwidth [MBytes/s]  1.0E-06*PMC3*64.0/time
-L2 to L3 evict data volume [GBytes]  1.0E-06*PMC3*64.0
+L2 to L3 evict data volume [GBytes]  1.0E-09*PMC3*64.0
 L2 to/from L3 bandwidth [MBytes/s] 1.0E-06*(PMC2+PMC3)*64.0/time
 L2 to/from L3 data volume [GBytes] 1.0E-09*(PMC2+PMC3)*64.0
-System to L3 bandwidth [MBytes/s] 1.0E-06*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0)*64.0/time
-System to L3 data volume [GBytes] 1.0E-09*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0)*64.0
-L3 to system bandwidth [MBytes/s] 1.0E-06*(CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1)*64/time
-L3 to system data volume [GBytes] 1.0E-09*(CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1)*64
-L3 to/from system bandwidth [MBytes/s] 1.0E-06*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1)*64.0/time
-L3 to/from system data volume [GBytes] 1.0E-09*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1)*64.0
+System to L3 bandwidth [MBytes/s] 1.0E-06*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX14C0+CBOX15C0)*64.0/time
+System to L3 data volume [GBytes] 1.0E-09*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX14C0+CBOX15C0)*64.0
+L3 to system bandwidth [MBytes/s] 1.0E-06*(CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1+CBOX15C1)*64/time
+L3 to system data volume [GBytes] 1.0E-09*(CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1+CBOX15C1)*64
+L3 to/from system bandwidth [MBytes/s] 1.0E-06*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX14C0+CBOX15C0+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1+CBOX15C1)*64.0/time
+L3 to/from system data volume [GBytes] 1.0E-09*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX14C0+CBOX15C0+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1+CBOX15C1)*64.0
 Memory read bandwidth [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0)*64.0/time
 Memory read data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0)*64.0
 Memory write bandwidth [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0/time
@@ -93,10 +93,10 @@ LONG
 Formulas:
 L2 to L1 load bandwidth [MBytes/s] = 1.0E-06*L1D_REPLACEMENT*64/time
 L2 to L1 load data volume [GBytes] = 1.0E-09*L1D_REPLACEMENT*64
-L1 to L2 evict bandwidth [MBytes/s] = 1.0E-06*L1D_M_EVICT*64/time
-L1 to L2 evict data volume [GBytes] = 1.0E-09*L1D_M_EVICT*64
-L1 to/from L2 bandwidth [MBytes/s] = 1.0E-06*(L1D_REPLACEMENT+L1D_M_EVICT)*64/time
-L1 to/from L2 data volume [GBytes] = 1.0E-09*(L1D_REPLACEMENT+L1D_M_EVICT)*64
+L1 to L2 evict bandwidth [MBytes/s] = 1.0E-06*L2_TRANS_L1D_WB*64/time
+L1 to L2 evict data volume [GBytes] = 1.0E-09*L2_TRANS_L1D_WB*64
+L1 to/from L2 bandwidth [MBytes/s] = 1.0E-06*(L1D_REPLACEMENT+L2_TRANS_L1D_WB)*64/time
+L1 to/from L2 data volume [GBytes] = 1.0E-09*(L1D_REPLACEMENT+L2_TRANS_L1D_WB)*64
 L3 to L2 load bandwidth [MBytes/s] = 1.0E-06*L2_LINES_IN_ALL*64/time
 L3 to L2 load data volume [GBytes] = 1.0E-09*L2_LINES_IN_ALL*64
 L2 to L3 evict bandwidth [MBytes/s] = 1.0E-06*L2_TRANS_L2_WB*64/time
@@ -111,7 +111,7 @@ L3 to/from system bandwidth [MBytes/s] = 1.0E-06*(SUM(LLC_LOOKUP_DATA_READ)+SUM(
 L3 to/from system data volume [GBytes] = 1.0E-09*(SUM(LLC_LOOKUP_DATA_READ)+SUM(LLC_VICTIMS_M))*64
 Memory read bandwidth [MBytes/s] = 1.0E-06*(SUM(CAS_COUNT_RD))*64.0/time
 Memory read data volume [GBytes] = 1.0E-09*(SUM(CAS_COUNT_RD))*64.0
-Memory write bandwidth [MBytes/s] 1.0E-06*(SUM(CAS_COUNT_WR))*64.0/time
+Memory write bandwidth [MBytes/s] = 1.0E-06*(SUM(CAS_COUNT_WR))*64.0/time
 Memory write data volume [GBytes] = 1.0E-09*(SUM(CAS_COUNT_WR))*64.0
 Memory bandwidth [MBytes/s] = 1.0E-06*(SUM(CAS_COUNT_RD)+SUM(CAS_COUNT_WR))*64.0/time
 Memory data volume [GBytes] = 1.0E-09*(SUM(CAS_COUNT_RD)+SUM(CAS_COUNT_WR))*64.0
diff --git a/groups/broadwellD/CLOCK.txt b/groups/broadwellD/CLOCK.txt
index 595d3a1..5ff9f69 100644
--- a/groups/broadwellD/CLOCK.txt
+++ b/groups/broadwellD/CLOCK.txt
@@ -5,11 +5,13 @@ FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PWR0  PWR_PKG_ENERGY
+UBOXFIX UNCORE_CLOCK
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+Uncore Clock [MHz] 1.E-06*UBOXFIX/time
 CPI  FIXC1/FIXC0
 Energy [J]  PWR0
 Power [W] PWR0/time
@@ -17,6 +19,7 @@ Power [W] PWR0/time
 LONG
 Formula:
 Power =  PWR_PKG_ENERGY / time
+Uncore Clock [MHz] = 1.E-06 * UNCORE_CLOCK / time
 -
 Broadwell implements the new RAPL interface. This interface enables to
 monitor the consumed energy on the package (socket) level.
diff --git a/groups/broadwellD/CYCLE_ACTIVITY.txt b/groups/broadwellD/CYCLE_ACTIVITY.txt
new file mode 100644
index 0000000..5b3c289
--- /dev/null
+++ b/groups/broadwellD/CYCLE_ACTIVITY.txt
@@ -0,0 +1,27 @@
+SHORT Cycle Activities
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 CYCLE_ACTIVITY_STALLS_L2_PENDING
+PMC1 CYCLE_ACTIVITY_STALLS_LDM_PENDING
+PMC3 CYCLE_ACTIVITY_CYCLES_NO_EXECUTE
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Cycles without execution [%] PMC3/FIXC1*100
+Cycles without execution due to L2 [%] PMC0/FIXC1*100
+Cycles without execution due to memory [%] PMC1/FIXC1*100
+
+LONG
+Cycles without execution [%] = CYCLE_ACTIVITY_CYCLES_NO_EXECUTE/CPU_CLK_UNHALTED_CORE*100
+Cycles with stalls due to L2 [%] = CYCLE_ACTIVITY_CYCLES_L2_PENDING/CPU_CLK_UNHALTED_CORE*100
+Cycles without execution due to memory [%] = CYCLE_ACTIVITY_STALLS_LDM_PENDING/CPU_CLK_UNHALTED_CORE*100
+--
+This performance group measures the stalls caused by data traffic in the cache
+hierarchy. No execution stalls due to L1D misses on Broadwell, the event shows
+high overcounting.
diff --git a/groups/broadwellD/FALSE_SHARE.txt b/groups/broadwellD/FALSE_SHARE.txt
index a87f7d4..dd2a44b 100644
--- a/groups/broadwellD/FALSE_SHARE.txt
+++ b/groups/broadwellD/FALSE_SHARE.txt
@@ -4,8 +4,8 @@ EVENTSET
 FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
-PMC0 MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_HITM
-PMC2 MEM_LOAD_UOPS_RETIRED_ALL
+PMC0 MEM_LOAD_UOPS_L3_HIT_RETIRED_XSNP_HITM
+PMC2 MEM_UOPS_RETIRED_LOADS_ALL
 
 METRICS
 Runtime (RDTSC) [s] time
@@ -17,8 +17,8 @@ Local LLC false sharing rate PMC0/PMC2
 
 LONG
 Formula:
-Local LLC false sharing [MByte] = 1.E-06*MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_HITM*64
-Local LLC false sharing rate = MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_HITM/MEM_LOAD_UOPS_RETIRED_ALL
+Local LLC false sharing [MByte] = 1.E-06*MEM_LOAD_UOPS_L3_HIT_RETIRED_XSNP_HITM*64
+Local LLC false sharing rate = MEM_LOAD_UOPS_L3_HIT_RETIRED_XSNP_HITM/MEM_UOPS_RETIRED_LOADS_ALL
 -
 False-sharing of cache lines can dramatically reduce the performance of an
 application. This performance group measures the L3 traffic induced by false-sharing.
diff --git a/groups/broadwellD/FLOPS_DP.txt b/groups/broadwellD/FLOPS_DP.txt
index 60b5d5a..69f2e80 100644
--- a/groups/broadwellD/FLOPS_DP.txt
+++ b/groups/broadwellD/FLOPS_DP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*4.0)/time
+DP MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
+AVX DP MFLOP/s  1.0E-06*(PMC2*4.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE*2+FP_ARITH_INST_RETIRED_SCALAR_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
-AVX MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
+DP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE*2+FP_ARITH_INST_RETIRED_SCALAR_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
+AVX DP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_ARITH_INST_RETIRED_SCALAR_DOUBLE/runtime
 -
diff --git a/groups/broadwellD/FLOPS_SP.txt b/groups/broadwellD/FLOPS_SP.txt
index 2818d94..bc0087e 100644
--- a/groups/broadwellD/FLOPS_SP.txt
+++ b/groups/broadwellD/FLOPS_SP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*8.0)/time
+SP MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
+AVX SP MFLOP/s  1.0E-06*(PMC2*8.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_SINGLE*4+FP_ARITH_INST_RETIRED_SCALAR_SINGLE+FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
-AVX MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
+SP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_SINGLE*4+FP_ARITH_INST_RETIRED_SCALAR_SINGLE+FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
+AVX SP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_SINGLE+FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_ARITH_INST_RETIRED_SCALAR_SINGLE/runtime
 -
diff --git a/groups/broadwellD/MEM_DP.txt b/groups/broadwellD/MEM_DP.txt
index bfea358..33dac2b 100644
--- a/groups/broadwellD/MEM_DP.txt
+++ b/groups/broadwellD/MEM_DP.txt
@@ -45,9 +45,12 @@ Memory write bandwidth [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4
 Memory write data volume [GBytes] 1.0E-09*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0
 Memory bandwidth [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0/time
 Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0
+Operational intensity (PMC0*2.0+PMC1+PMC2*4.0)/((MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0)
 
 LONG
 Formula:
+Power [W] = PWR_PKG_ENERGY/runtime
+Power DRAM [W] = PWR_DRAM_ENERGY/runtime
 MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE*2+FP_ARITH_INST_RETIRED_SCALAR_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
 AVX MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE)/runtime
@@ -58,9 +61,13 @@ Memory write bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC1))*64.0/runtime
 Memory write data volume [GBytes] = 1.0E-09*(SUM(MBOXxC1))*64.0
 Memory bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0/runtime
 Memory data volume [GBytes] = 1.0E-09*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0
+Operational intensity = (FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE*2+FP_ARITH_INST_RETIRED_SCALAR_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/((SUM(MBOXxC0)+SUM(MBOXxC1))*64.0)
 --
 Profiling group to measure memory bandwidth drawn by all cores of a socket.
 Since this group is based on Uncore events it is only possible to measure on
 a per socket base. Also outputs total data volume transferred from main memory.
 SSE scalar and packed double precision FLOP rates. Also reports on packed AVX
 32b instructions.
+The operational intensity is calculated using the FP values of the cores and the
+memory data volume of the whole socket. The actual operational intensity for
+multiple CPUs can be found in the statistics table in the Sum column.
diff --git a/groups/broadwellD/MEM_SP.txt b/groups/broadwellD/MEM_SP.txt
index e7d4642..1f86149 100644
--- a/groups/broadwellD/MEM_SP.txt
+++ b/groups/broadwellD/MEM_SP.txt
@@ -45,6 +45,7 @@ Memory write bandwidth [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4
 Memory write data volume [GBytes] 1.0E-09*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0
 Memory bandwidth [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0/time
 Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0
+Operational intensity (PMC0*4.0+PMC1+PMC2*8.0)/((MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0)
 
 LONG
 Formula:
@@ -60,9 +61,13 @@ Memory write bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC1))*64.0/runtime
 Memory write data volume [GBytes] = 1.0E-09*(SUM(MBOXxC1))*64.0
 Memory bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0/runtime
 Memory data volume [GBytes] = 1.0E-09*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0
+Operational intensity = (FP_ARITH_INST_RETIRED_128B_PACKED_SINGLE*4+FP_ARITH_INST_RETIRED_SCALAR_SINGLE+FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/((SUM(MBOXxC0)+SUM(MBOXxC1))*64.0)
 --
 Profiling group to measure memory bandwidth drawn by all cores of a socket.
 Since this group is based on Uncore events it is only possible to measure on
 a per socket base. Also outputs total data volume transferred from main memory.
 SSE scalar and packed single precision FLOP rates. Also reports on packed AVX
 32b instructions.
+The operational intensity is calculated using the FP values of the cores and the
+memory data volume of the whole socket. The actual operational intensity for
+multiple CPUs can be found in the statistics table in the Sum column.
diff --git a/groups/broadwellD/PORT_USAGE.txt b/groups/broadwellD/PORT_USAGE.txt
new file mode 100644
index 0000000..459f7f6
--- /dev/null
+++ b/groups/broadwellD/PORT_USAGE.txt
@@ -0,0 +1,50 @@
+SHORT  Execution port utilization
+
+REQUIRE_NOHT
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  UOPS_EXECUTED_PORT_PORT_0
+PMC1  UOPS_EXECUTED_PORT_PORT_1
+PMC2  UOPS_EXECUTED_PORT_PORT_2
+PMC3  UOPS_EXECUTED_PORT_PORT_3
+PMC4  UOPS_EXECUTED_PORT_PORT_4
+PMC5  UOPS_EXECUTED_PORT_PORT_5
+PMC6  UOPS_EXECUTED_PORT_PORT_6
+PMC7  UOPS_EXECUTED_PORT_PORT_7
+
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Port0 usage ratio PMC0/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port1 usage ratio PMC1/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port2 usage ratio PMC2/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port3 usage ratio PMC3/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port4 usage ratio PMC4/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port5 usage ratio PMC5/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port6 usage ratio PMC6/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port7 usage ratio PMC7/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+
+LONG
+Formulas:
+Port0 usage ratio UOPS_EXECUTED_PORT_PORT_0/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port1 usage ratio UOPS_EXECUTED_PORT_PORT_1/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port2 usage ratio UOPS_EXECUTED_PORT_PORT_2/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port3 usage ratio UOPS_EXECUTED_PORT_PORT_3/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port4 usage ratio UOPS_EXECUTED_PORT_PORT_4/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port5 usage ratio UOPS_EXECUTED_PORT_PORT_5/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port6 usage ratio UOPS_EXECUTED_PORT_PORT_6/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port7 usage ratio UOPS_EXECUTED_PORT_PORT_7/SUM(UOPS_EXECUTED_PORT_PORT_*)
+-
+This group measures the execution port utilization in a CPU core. The group can
+only be measured when HyperThreading is disabled because only then each CPU core
+can program eight counters.
+Please be aware that the counters PMC4-7 are broken on Intel Broadwell. They
+don't increment if either user- or kernel-level filtering is applied. User-level
+filtering is default in LIKWID, hence kernel-level filtering is added
+automatically for PMC4-7. The returned counts can be much higher.
diff --git a/groups/sandybridge/UOPS.txt b/groups/broadwellD/UOPS.txt
similarity index 100%
copy from groups/sandybridge/UOPS.txt
copy to groups/broadwellD/UOPS.txt
diff --git a/groups/broadwellEP/CACHES.txt b/groups/broadwellEP/CACHES.txt
index 3c13a52..6a14e52 100644
--- a/groups/broadwellEP/CACHES.txt
+++ b/groups/broadwellEP/CACHES.txt
@@ -5,9 +5,31 @@ FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PMC0  L1D_REPLACEMENT
-PMC1  L1D_M_EVICT
+PMC1  L2_TRANS_L1D_WB
 PMC2  L2_LINES_IN_ALL
 PMC3  L2_TRANS_L2_WB
+CBOX0C1 LLC_VICTIMS_M
+CBOX1C1 LLC_VICTIMS_M
+CBOX2C1 LLC_VICTIMS_M
+CBOX3C1 LLC_VICTIMS_M
+CBOX4C1 LLC_VICTIMS_M
+CBOX5C1 LLC_VICTIMS_M
+CBOX6C1 LLC_VICTIMS_M
+CBOX7C1 LLC_VICTIMS_M
+CBOX8C1 LLC_VICTIMS_M
+CBOX9C1 LLC_VICTIMS_M
+CBOX10C1 LLC_VICTIMS_M
+CBOX11C1 LLC_VICTIMS_M
+CBOX12C1 LLC_VICTIMS_M
+CBOX13C1 LLC_VICTIMS_M
+CBOX14C1 LLC_VICTIMS_M
+CBOX15C1 LLC_VICTIMS_M
+CBOX16C1 LLC_VICTIMS_M
+CBOX17C1 LLC_VICTIMS_M
+CBOX18C1 LLC_VICTIMS_M
+CBOX19C1 LLC_VICTIMS_M
+CBOX20C1 LLC_VICTIMS_M
+CBOX21C1 LLC_VICTIMS_M
 CBOX0C0 LLC_LOOKUP_DATA_READ
 CBOX1C0 LLC_LOOKUP_DATA_READ
 CBOX2C0 LLC_LOOKUP_DATA_READ
@@ -26,20 +48,10 @@ CBOX14C0 LLC_LOOKUP_DATA_READ
 CBOX15C0 LLC_LOOKUP_DATA_READ
 CBOX16C0 LLC_LOOKUP_DATA_READ
 CBOX17C0 LLC_LOOKUP_DATA_READ
-CBOX0C1 LLC_VICTIMS_M
-CBOX1C1 LLC_VICTIMS_M
-CBOX2C1 LLC_VICTIMS_M
-CBOX3C1 LLC_VICTIMS_M
-CBOX4C1 LLC_VICTIMS_M
-CBOX5C1 LLC_VICTIMS_M
-CBOX6C1 LLC_VICTIMS_M
-CBOX7C1 LLC_VICTIMS_M
-CBOX8C1 LLC_VICTIMS_M
-CBOX9C1 LLC_VICTIMS_M
-CBOX10C1 LLC_VICTIMS_M
-CBOX11C1 LLC_VICTIMS_M
-CBOX12C1 LLC_VICTIMS_M
-CBOX13C1 LLC_VICTIMS_M
+CBOX18C0 LLC_LOOKUP_DATA_READ
+CBOX19C0 LLC_LOOKUP_DATA_READ
+CBOX20C0 LLC_LOOKUP_DATA_READ
+CBOX21C0 LLC_LOOKUP_DATA_READ
 MBOX0C0 CAS_COUNT_RD
 MBOX0C1 CAS_COUNT_WR
 MBOX1C0 CAS_COUNT_RD
@@ -73,15 +85,15 @@ L1 to/from L2 data volume [GBytes] 1.0E-09*(PMC0+PMC1)*64.0
 L3 to L2 load bandwidth [MBytes/s]  1.0E-06*PMC2*64.0/time
 L3 to L2 load data volume [GBytes]  1.0E-09*PMC2*64.0
 L2 to L3 evict bandwidth [MBytes/s]  1.0E-06*PMC3*64.0/time
-L2 to L3 evict data volume [GBytes]  1.0E-06*PMC3*64.0
+L2 to L3 evict data volume [GBytes]  1.0E-09*PMC3*64.0
 L2 to/from L3 bandwidth [MBytes/s] 1.0E-06*(PMC2+PMC3)*64.0/time
 L2 to/from L3 data volume [GBytes] 1.0E-09*(PMC2+PMC3)*64.0
-System to L3 bandwidth [MBytes/s] 1.0E-06*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0)*64.0/time
-System to L3 data volume [GBytes] 1.0E-09*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0)*64.0
-L3 to system bandwidth [MBytes/s] 1.0E-06*(CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1)*64/time
-L3 to system data volume [GBytes] 1.0E-09*(CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1)*64
-L3 to/from system bandwidth [MBytes/s] 1.0E-06*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1)*64.0/time
-L3 to/from system data volume [GBytes] 1.0E-09*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1)*64.0
+System to L3 bandwidth [MBytes/s] 1.0E-06*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX14C0+CBOX15C0+CBOX16C0+CBOX17C0+CBOX18C0+CBOX19C0+CBOX20C0+CBOX21C0)*64.0/time
+System to L3 data volume [GBytes] 1.0E-09*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX14C0+CBOX15C0+CBOX16C0+CBOX17C0+CBOX18C0+CBOX19C0+CBOX20C0+CBOX21C0)*64.0
+L3 to system bandwidth [MBytes/s] 1.0E-06*(CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1+CBOX15C1+CBOX16C1+CBOX17C1+CBOX18C1+CBOX19C1+CBOX20C1+CBOX21C1)*64/time
+L3 to system data volume [GBytes] 1.0E-09*(CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1+CBOX15C1+CBOX16C1+CBOX17C1+CBOX18C1+CBOX19C1+CBOX20C1+CBOX21C1)*64
+L3 to/from system bandwidth [MBytes/s] 1.0E-06*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX14C0+CBOX15C0+CBOX16C0+CBOX17C0+CBOX18C0+CBOX19C0+CBOX20C0+CBOX21C0+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1+CBOX15C1+CBOX16C1+CBOX17C1+CBOX18C1+CBOX19C1+CBOX20C1+CBOX21C1)*64.0/time
+L3 to/from system data volume [GBytes] 1.0E-09*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX14C0+CBOX15C0+CBOX16C0+CBOX17C0+CBOX18C0+CBOX19C0+CBOX20C0+CBOX21C0+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1+CBOX15C1+CBOX16C1+CBOX17C1+CBOX18C1+CBOX19C1+CBOX20C1+CBOX21C1)*64.0
 Memory read bandwidth [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0)*64.0/time
 Memory read data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0)*64.0
 Memory write bandwidth [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0/time
@@ -111,7 +123,7 @@ L3 to/from system bandwidth [MBytes/s] = 1.0E-06*(SUM(LLC_LOOKUP_DATA_READ)+SUM(
 L3 to/from system data volume [GBytes] = 1.0E-09*(SUM(LLC_LOOKUP_DATA_READ)+SUM(LLC_VICTIMS_M))*64
 Memory read bandwidth [MBytes/s] = 1.0E-06*(SUM(CAS_COUNT_RD))*64.0/time
 Memory read data volume [GBytes] = 1.0E-09*(SUM(CAS_COUNT_RD))*64.0
-Memory write bandwidth [MBytes/s] 1.0E-06*(SUM(CAS_COUNT_WR))*64.0/time
+Memory write bandwidth [MBytes/s] = 1.0E-06*(SUM(CAS_COUNT_WR))*64.0/time
 Memory write data volume [GBytes] = 1.0E-09*(SUM(CAS_COUNT_WR))*64.0
 Memory bandwidth [MBytes/s] = 1.0E-06*(SUM(CAS_COUNT_RD)+SUM(CAS_COUNT_WR))*64.0/time
 Memory data volume [GBytes] = 1.0E-09*(SUM(CAS_COUNT_RD)+SUM(CAS_COUNT_WR))*64.0
diff --git a/groups/broadwellEP/CLOCK.txt b/groups/broadwellEP/CLOCK.txt
index 595d3a1..5ff9f69 100644
--- a/groups/broadwellEP/CLOCK.txt
+++ b/groups/broadwellEP/CLOCK.txt
@@ -5,11 +5,13 @@ FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PWR0  PWR_PKG_ENERGY
+UBOXFIX UNCORE_CLOCK
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+Uncore Clock [MHz] 1.E-06*UBOXFIX/time
 CPI  FIXC1/FIXC0
 Energy [J]  PWR0
 Power [W] PWR0/time
@@ -17,6 +19,7 @@ Power [W] PWR0/time
 LONG
 Formula:
 Power =  PWR_PKG_ENERGY / time
+Uncore Clock [MHz] = 1.E-06 * UNCORE_CLOCK / time
 -
 Broadwell implements the new RAPL interface. This interface enables to
 monitor the consumed energy on the package (socket) level.
diff --git a/groups/broadwellEP/CYCLE_ACTIVITY.txt b/groups/broadwellEP/CYCLE_ACTIVITY.txt
new file mode 100644
index 0000000..5b3c289
--- /dev/null
+++ b/groups/broadwellEP/CYCLE_ACTIVITY.txt
@@ -0,0 +1,27 @@
+SHORT Cycle Activities
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 CYCLE_ACTIVITY_STALLS_L2_PENDING
+PMC1 CYCLE_ACTIVITY_STALLS_LDM_PENDING
+PMC3 CYCLE_ACTIVITY_CYCLES_NO_EXECUTE
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Cycles without execution [%] PMC3/FIXC1*100
+Cycles without execution due to L2 [%] PMC0/FIXC1*100
+Cycles without execution due to memory [%] PMC1/FIXC1*100
+
+LONG
+Cycles without execution [%] = CYCLE_ACTIVITY_CYCLES_NO_EXECUTE/CPU_CLK_UNHALTED_CORE*100
+Cycles with stalls due to L2 [%] = CYCLE_ACTIVITY_CYCLES_L2_PENDING/CPU_CLK_UNHALTED_CORE*100
+Cycles without execution due to memory [%] = CYCLE_ACTIVITY_STALLS_LDM_PENDING/CPU_CLK_UNHALTED_CORE*100
+--
+This performance group measures the stalls caused by data traffic in the cache
+hierarchy. No execution stalls due to L1D misses on Broadwell, the event shows
+high overcounting.
diff --git a/groups/broadwellEP/FLOPS_DP.txt b/groups/broadwellEP/FLOPS_DP.txt
index 60b5d5a..69f2e80 100644
--- a/groups/broadwellEP/FLOPS_DP.txt
+++ b/groups/broadwellEP/FLOPS_DP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*4.0)/time
+DP MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
+AVX DP MFLOP/s  1.0E-06*(PMC2*4.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE*2+FP_ARITH_INST_RETIRED_SCALAR_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
-AVX MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
+DP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE*2+FP_ARITH_INST_RETIRED_SCALAR_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
+AVX DP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_ARITH_INST_RETIRED_SCALAR_DOUBLE/runtime
 -
diff --git a/groups/broadwellEP/FLOPS_SP.txt b/groups/broadwellEP/FLOPS_SP.txt
index 2818d94..bc0087e 100644
--- a/groups/broadwellEP/FLOPS_SP.txt
+++ b/groups/broadwellEP/FLOPS_SP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*8.0)/time
+SP MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
+AVX SP MFLOP/s  1.0E-06*(PMC2*8.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_SINGLE*4+FP_ARITH_INST_RETIRED_SCALAR_SINGLE+FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
-AVX MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
+SP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_SINGLE*4+FP_ARITH_INST_RETIRED_SCALAR_SINGLE+FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
+AVX SP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_SINGLE+FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_ARITH_INST_RETIRED_SCALAR_SINGLE/runtime
 -
diff --git a/groups/broadwellEP/MEM_DP.txt b/groups/broadwellEP/MEM_DP.txt
index bfea358..cdecadf 100644
--- a/groups/broadwellEP/MEM_DP.txt
+++ b/groups/broadwellEP/MEM_DP.txt
@@ -45,9 +45,12 @@ Memory write bandwidth [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4
 Memory write data volume [GBytes] 1.0E-09*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0
 Memory bandwidth [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0/time
 Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0
+Operational intensity (PMC0*2.0+PMC1+PMC2*4.0)/((MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0)
 
 LONG
 Formula:
+Power [W] = PWR_PKG_ENERGY/runtime
+Power DRAM [W] = PWR_DRAM_ENERGY/runtime
 MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE*2+FP_ARITH_INST_RETIRED_SCALAR_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
 AVX MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE)/runtime
@@ -58,9 +61,13 @@ Memory write bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC1))*64.0/runtime
 Memory write data volume [GBytes] = 1.0E-09*(SUM(MBOXxC1))*64.0
 Memory bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0/runtime
 Memory data volume [GBytes] = 1.0E-09*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0
+Operational intensity = (FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE*2+FP_ARITH_INST_RETIRED_SCALAR_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0)
 --
 Profiling group to measure memory bandwidth drawn by all cores of a socket.
 Since this group is based on Uncore events it is only possible to measure on
 a per socket base. Also outputs total data volume transferred from main memory.
 SSE scalar and packed double precision FLOP rates. Also reports on packed AVX
 32b instructions.
+The operational intensity is calculated using the FP values of the cores and the
+memory data volume of the whole socket. The actual operational intensity for
+multiple CPUs can be found in the statistics table in the Sum column.
diff --git a/groups/broadwellEP/MEM_SP.txt b/groups/broadwellEP/MEM_SP.txt
index e7d4642..c887bf6 100644
--- a/groups/broadwellEP/MEM_SP.txt
+++ b/groups/broadwellEP/MEM_SP.txt
@@ -45,6 +45,7 @@ Memory write bandwidth [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4
 Memory write data volume [GBytes] 1.0E-09*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0
 Memory bandwidth [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0/time
 Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0
+Operational intensity (PMC0*4.0+PMC1+PMC2*8.0)/((MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0)
 
 LONG
 Formula:
@@ -60,9 +61,13 @@ Memory write bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC1))*64.0/runtime
 Memory write data volume [GBytes] = 1.0E-09*(SUM(MBOXxC1))*64.0
 Memory bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0/runtime
 Memory data volume [GBytes] = 1.0E-09*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0
+Operational intensity = (FP_ARITH_INST_RETIRED_128B_PACKED_SINGLE*4+FP_ARITH_INST_RETIRED_SCALAR_SINGLE+FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0)
 --
 Profiling group to measure memory bandwidth drawn by all cores of a socket.
 Since this group is based on Uncore events it is only possible to measure on
 a per socket base. Also outputs total data volume transferred from main memory.
 SSE scalar and packed single precision FLOP rates. Also reports on packed AVX
 32b instructions.
+The operational intensity is calculated using the FP values of the cores and the
+memory data volume of the whole socket. The actual operational intensity for
+multiple CPUs can be found in the statistics table in the Sum column.
diff --git a/groups/broadwellEP/PORT_USAGE.txt b/groups/broadwellEP/PORT_USAGE.txt
new file mode 100644
index 0000000..459f7f6
--- /dev/null
+++ b/groups/broadwellEP/PORT_USAGE.txt
@@ -0,0 +1,50 @@
+SHORT  Execution port utilization
+
+REQUIRE_NOHT
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  UOPS_EXECUTED_PORT_PORT_0
+PMC1  UOPS_EXECUTED_PORT_PORT_1
+PMC2  UOPS_EXECUTED_PORT_PORT_2
+PMC3  UOPS_EXECUTED_PORT_PORT_3
+PMC4  UOPS_EXECUTED_PORT_PORT_4
+PMC5  UOPS_EXECUTED_PORT_PORT_5
+PMC6  UOPS_EXECUTED_PORT_PORT_6
+PMC7  UOPS_EXECUTED_PORT_PORT_7
+
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Port0 usage ratio PMC0/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port1 usage ratio PMC1/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port2 usage ratio PMC2/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port3 usage ratio PMC3/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port4 usage ratio PMC4/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port5 usage ratio PMC5/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port6 usage ratio PMC6/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port7 usage ratio PMC7/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+
+LONG
+Formulas:
+Port0 usage ratio UOPS_EXECUTED_PORT_PORT_0/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port1 usage ratio UOPS_EXECUTED_PORT_PORT_1/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port2 usage ratio UOPS_EXECUTED_PORT_PORT_2/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port3 usage ratio UOPS_EXECUTED_PORT_PORT_3/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port4 usage ratio UOPS_EXECUTED_PORT_PORT_4/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port5 usage ratio UOPS_EXECUTED_PORT_PORT_5/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port6 usage ratio UOPS_EXECUTED_PORT_PORT_6/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port7 usage ratio UOPS_EXECUTED_PORT_PORT_7/SUM(UOPS_EXECUTED_PORT_PORT_*)
+-
+This group measures the execution port utilization in a CPU core. The group can
+only be measured when HyperThreading is disabled because only then each CPU core
+can program eight counters.
+Please be aware that the counters PMC4-7 are broken on Intel Broadwell. They
+don't increment if either user- or kernel-level filtering is applied. User-level
+filtering is default in LIKWID, hence kernel-level filtering is added
+automatically for PMC4-7. The returned counts can be much higher.
diff --git a/groups/sandybridge/UOPS.txt b/groups/broadwellEP/UOPS.txt
similarity index 100%
copy from groups/sandybridge/UOPS.txt
copy to groups/broadwellEP/UOPS.txt
diff --git a/groups/core2/CACHE.txt b/groups/core2/CACHE.txt
index 1f446b8..6eda059 100644
--- a/groups/core2/CACHE.txt
+++ b/groups/core2/CACHE.txt
@@ -5,7 +5,7 @@ FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PMC0  L1D_REPL
-PMC1  L1D_ALL_CACHE_REF
+PMC1  L1D_ALL_REF
 
 METRICS
 Runtime (RDTSC) [s] time
@@ -18,9 +18,9 @@ data cache miss ratio PMC0/PMC1
 
 LONG
 Formulas:
-data cache request rate =  L1D_ALL_CACHE_REF / INSTR_RETIRED_ANY
+data cache request rate =  L1D_ALL_REF / INSTR_RETIRED_ANY
 data cache miss rate = L1D_REPL / INSTR_RETIRED_ANY
-data cache miss ratio =  L1D_REPL / L1D_ALL_CACHE_REF
+data cache miss ratio =  L1D_REPL / L1D_ALL_REF
 -
 This group measures the locality of your data accesses with regard to the
 L1 cache. Data cache request rate tells you how data intensive your code is
diff --git a/groups/core2/FLOPS_DP.txt b/groups/core2/FLOPS_DP.txt
index 8164fd3..330cffd 100644
--- a/groups/core2/FLOPS_DP.txt
+++ b/groups/core2/FLOPS_DP.txt
@@ -11,11 +11,11 @@ METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s    1.0E-06*(PMC0*2.0+PMC1)/time
+DP MFLOP/s    1.0E-06*(PMC0*2.0+PMC1)/time
 
 LONG
 Formulas:
-MFLOP/s = 1.0E-06*(SIMD_COMP_INST_RETIRED_PACKED_DOUBLE*2+SIMD_COMP_INST_RETIRED_SCALAR_DOUBLE)/time
+DP MFLOP/s = 1.0E-06*(SIMD_COMP_INST_RETIRED_PACKED_DOUBLE*2+SIMD_COMP_INST_RETIRED_SCALAR_DOUBLE)/time
 -
 Profiling group to measure double SSE FLOPs. Don't forget that your code might also execute X87 FLOPs.
 On the number of SIMD_COMP_INST_RETIRED_PACKED_DOUBLE you can see how well your code was vectorized.
diff --git a/groups/core2/FLOPS_SP.txt b/groups/core2/FLOPS_SP.txt
index 181be78..c84abb8 100644
--- a/groups/core2/FLOPS_SP.txt
+++ b/groups/core2/FLOPS_SP.txt
@@ -11,11 +11,11 @@ METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s 1.0E-06*(PMC0*4.0+PMC1)/time
+SP MFLOP/s 1.0E-06*(PMC0*4.0+PMC1)/time
 
 LONG
 Formulas:
-MFLOP/s = 1.0E-06*(SIMD_COMP_INST_RETIRED_PACKED_SINGLE*4+SIMD_COMP_INST_RETIRED_SCALAR_SINGLE)/time
+SP MFLOP/s = 1.0E-06*(SIMD_COMP_INST_RETIRED_PACKED_SINGLE*4+SIMD_COMP_INST_RETIRED_SCALAR_SINGLE)/time
 -
 Profiling group to measure single precision SSE FLOPs. Don't forget that your code might also execute X87 FLOPs.
 On the number of SIMD_COMP_INST_RETIRED_PACKED_SINGLE you can see how well your code was vectorized.
diff --git a/groups/core2/TLB.txt b/groups/core2/TLB.txt
index 80742f4..a46cc4b 100644
--- a/groups/core2/TLB.txt
+++ b/groups/core2/TLB.txt
@@ -5,7 +5,7 @@ FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PMC0  DTLB_MISSES_ANY
-PMC1  L1D_ALL_CACHE_REF
+PMC1  L1D_ALL_REF
 
 METRICS
 Runtime (RDTSC) [s] time
@@ -17,9 +17,9 @@ L1 DTLB miss ratio   PMC0/PMC1
 
 LONG
 Formulas:
-L1 DTLB request rate =  L1D_ALL_CACHE_REF / INSTR_RETIRED_ANY
+L1 DTLB request rate =  L1D_ALL_REF / INSTR_RETIRED_ANY
 DTLB miss rate  = DTLB_MISSES_ANY / INSTR_RETIRED_ANY
-L1 DTLB miss ratio  =  DTLB_MISSES_ANY / L1D_ALL_CACHE_REF
+L1 DTLB miss ratio  =  DTLB_MISSES_ANY / L1D_ALL_REF
 -
 L1 DTLB request rate tells you how data intensive your code is
 or how many data accesses you have on average per instruction.
diff --git a/groups/haswell/CACHES.txt b/groups/haswell/CACHES.txt
index d0d6f33..e39e861 100644
--- a/groups/haswell/CACHES.txt
+++ b/groups/haswell/CACHES.txt
@@ -33,7 +33,7 @@ L1 to/from L2 data volume [GBytes] 1.0E-09*(PMC0+PMC1)*64.0
 L3 to L2 load bandwidth [MBytes/s]  1.0E-06*PMC2*64.0/time
 L3 to L2 load data volume [GBytes]  1.0E-09*PMC2*64.0
 L2 to L3 evict bandwidth [MBytes/s]  1.0E-06*PMC3*64.0/time
-L2 to L3 evict data volume [GBytes]  1.0E-06*PMC3*64.0
+L2 to L3 evict data volume [GBytes]  1.0E-09*PMC3*64.0
 L2 to/from L3 bandwidth [MBytes/s] 1.0E-06*(PMC2+PMC3)*64.0/time
 L2 to/from L3 data volume [GBytes] 1.0E-09*(PMC2+PMC3)*64.0
 System to L3 bandwidth [MBytes/s] 1.0E-06*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0)*64.0/time
diff --git a/groups/haswell/CLOCK.txt b/groups/haswell/CLOCK.txt
index a2556b4..ef2dda1 100644
--- a/groups/haswell/CLOCK.txt
+++ b/groups/haswell/CLOCK.txt
@@ -5,11 +5,13 @@ FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PWR0  PWR_PKG_ENERGY
+UBOXFIX UNCORE_CLOCK
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+Uncore Clock [MHz] 1.E-06*UBOXFIX/time
 CPI  FIXC1/FIXC0
 Energy [J]  PWR0
 Power [W] PWR0/time
@@ -17,6 +19,7 @@ Power [W] PWR0/time
 LONG
 Formula:
 Power =  PWR_PKG_ENERGY / time
+Uncore Clock [MHz] = 1.E-06 * UNCORE_CLOCK / time
 -
 Haswell implements the new RAPL interface. This interface enables to
 monitor the consumed energy on the package (socket) level.
diff --git a/groups/haswell/CYCLE_ACTIVITY.txt b/groups/haswell/CYCLE_ACTIVITY.txt
new file mode 100644
index 0000000..820a10c
--- /dev/null
+++ b/groups/haswell/CYCLE_ACTIVITY.txt
@@ -0,0 +1,29 @@
+SHORT Cycle Activities
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 CYCLE_ACTIVITY_STALLS_L2_PENDING
+PMC1 CYCLE_ACTIVITY_STALLS_LDM_PENDING
+PMC2 CYCLE_ACTIVITY_STALLS_L1D_PENDING
+PMC3 CYCLE_ACTIVITY_CYCLES_NO_EXECUTE
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Cycles without execution [%] PMC3/FIXC1*100
+Cycles without execution due to L1D [%] PMC2/FIXC1*100
+Cycles without execution due to L2 [%] PMC0/FIXC1*100
+Cycles without execution due to memory [%] PMC1/FIXC1*100
+
+LONG
+Cycles without execution [%] = CYCLE_ACTIVITY_CYCLES_NO_EXECUTE/CPU_CLK_UNHALTED_CORE*100
+Cycles with stalls due to L1D [%] = CYCLE_ACTIVITY_CYCLES_L1D_PENDING/CPU_CLK_UNHALTED_CORE*100
+Cycles with stalls due to L2 [%] = CYCLE_ACTIVITY_CYCLES_L2_PENDING/CPU_CLK_UNHALTED_CORE*100
+Cycles without execution due to memory [%] = CYCLE_ACTIVITY_STALLS_LDM_PENDING/CPU_CLK_UNHALTED_CORE*100
+--
+This performance group measures the stalls caused by data traffic in the cache
+hierarchy.
diff --git a/groups/haswell/PORT_USAGE.txt b/groups/haswell/PORT_USAGE.txt
new file mode 100644
index 0000000..898cdd1
--- /dev/null
+++ b/groups/haswell/PORT_USAGE.txt
@@ -0,0 +1,46 @@
+SHORT  Execution port utilization
+
+REQUIRE_NOHT
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  UOPS_EXECUTED_PORT_PORT_0
+PMC1  UOPS_EXECUTED_PORT_PORT_1
+PMC2  UOPS_EXECUTED_PORT_PORT_2
+PMC3  UOPS_EXECUTED_PORT_PORT_3
+PMC4  UOPS_EXECUTED_PORT_PORT_4
+PMC5  UOPS_EXECUTED_PORT_PORT_5
+PMC6  UOPS_EXECUTED_PORT_PORT_6
+PMC7  UOPS_EXECUTED_PORT_PORT_7
+
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Port0 usage ratio PMC0/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port1 usage ratio PMC1/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port2 usage ratio PMC2/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port3 usage ratio PMC3/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port4 usage ratio PMC4/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port5 usage ratio PMC5/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port6 usage ratio PMC6/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port7 usage ratio PMC7/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+
+LONG
+Formulas:
+Port0 usage ratio UOPS_EXECUTED_PORT_PORT_0/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port1 usage ratio UOPS_EXECUTED_PORT_PORT_1/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port2 usage ratio UOPS_EXECUTED_PORT_PORT_2/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port3 usage ratio UOPS_EXECUTED_PORT_PORT_3/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port4 usage ratio UOPS_EXECUTED_PORT_PORT_4/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port5 usage ratio UOPS_EXECUTED_PORT_PORT_5/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port6 usage ratio UOPS_EXECUTED_PORT_PORT_6/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port7 usage ratio UOPS_EXECUTED_PORT_PORT_7/SUM(UOPS_EXECUTED_PORT_PORT_*)
+-
+This group measures the execution port utilization in a CPU core. The group can
+only be measured when HyperThreading is disabled because only then each CPU core
+can program eight counters.
diff --git a/groups/haswellEP/CACHES.txt b/groups/haswellEP/CACHES.txt
index 3c13a52..7721ad5 100644
--- a/groups/haswellEP/CACHES.txt
+++ b/groups/haswellEP/CACHES.txt
@@ -73,7 +73,7 @@ L1 to/from L2 data volume [GBytes] 1.0E-09*(PMC0+PMC1)*64.0
 L3 to L2 load bandwidth [MBytes/s]  1.0E-06*PMC2*64.0/time
 L3 to L2 load data volume [GBytes]  1.0E-09*PMC2*64.0
 L2 to L3 evict bandwidth [MBytes/s]  1.0E-06*PMC3*64.0/time
-L2 to L3 evict data volume [GBytes]  1.0E-06*PMC3*64.0
+L2 to L3 evict data volume [GBytes]  1.0E-09*PMC3*64.0
 L2 to/from L3 bandwidth [MBytes/s] 1.0E-06*(PMC2+PMC3)*64.0/time
 L2 to/from L3 data volume [GBytes] 1.0E-09*(PMC2+PMC3)*64.0
 System to L3 bandwidth [MBytes/s] 1.0E-06*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0)*64.0/time
diff --git a/groups/haswellEP/CLOCK.txt b/groups/haswellEP/CLOCK.txt
index a2556b4..ef2dda1 100644
--- a/groups/haswellEP/CLOCK.txt
+++ b/groups/haswellEP/CLOCK.txt
@@ -5,11 +5,13 @@ FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PWR0  PWR_PKG_ENERGY
+UBOXFIX UNCORE_CLOCK
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+Uncore Clock [MHz] 1.E-06*UBOXFIX/time
 CPI  FIXC1/FIXC0
 Energy [J]  PWR0
 Power [W] PWR0/time
@@ -17,6 +19,7 @@ Power [W] PWR0/time
 LONG
 Formula:
 Power =  PWR_PKG_ENERGY / time
+Uncore Clock [MHz] = 1.E-06 * UNCORE_CLOCK / time
 -
 Haswell implements the new RAPL interface. This interface enables to
 monitor the consumed energy on the package (socket) level.
diff --git a/groups/haswellEP/CYCLE_ACTIVITY.txt b/groups/haswellEP/CYCLE_ACTIVITY.txt
new file mode 100644
index 0000000..820a10c
--- /dev/null
+++ b/groups/haswellEP/CYCLE_ACTIVITY.txt
@@ -0,0 +1,29 @@
+SHORT Cycle Activities
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 CYCLE_ACTIVITY_STALLS_L2_PENDING
+PMC1 CYCLE_ACTIVITY_STALLS_LDM_PENDING
+PMC2 CYCLE_ACTIVITY_STALLS_L1D_PENDING
+PMC3 CYCLE_ACTIVITY_CYCLES_NO_EXECUTE
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Cycles without execution [%] PMC3/FIXC1*100
+Cycles without execution due to L1D [%] PMC2/FIXC1*100
+Cycles without execution due to L2 [%] PMC0/FIXC1*100
+Cycles without execution due to memory [%] PMC1/FIXC1*100
+
+LONG
+Cycles without execution [%] = CYCLE_ACTIVITY_CYCLES_NO_EXECUTE/CPU_CLK_UNHALTED_CORE*100
+Cycles with stalls due to L1D [%] = CYCLE_ACTIVITY_CYCLES_L1D_PENDING/CPU_CLK_UNHALTED_CORE*100
+Cycles with stalls due to L2 [%] = CYCLE_ACTIVITY_CYCLES_L2_PENDING/CPU_CLK_UNHALTED_CORE*100
+Cycles without execution due to memory [%] = CYCLE_ACTIVITY_STALLS_LDM_PENDING/CPU_CLK_UNHALTED_CORE*100
+--
+This performance group measures the stalls caused by data traffic in the cache
+hierarchy.
diff --git a/groups/haswellEP/PORT_USAGE.txt b/groups/haswellEP/PORT_USAGE.txt
new file mode 100644
index 0000000..898cdd1
--- /dev/null
+++ b/groups/haswellEP/PORT_USAGE.txt
@@ -0,0 +1,46 @@
+SHORT  Execution port utilization
+
+REQUIRE_NOHT
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  UOPS_EXECUTED_PORT_PORT_0
+PMC1  UOPS_EXECUTED_PORT_PORT_1
+PMC2  UOPS_EXECUTED_PORT_PORT_2
+PMC3  UOPS_EXECUTED_PORT_PORT_3
+PMC4  UOPS_EXECUTED_PORT_PORT_4
+PMC5  UOPS_EXECUTED_PORT_PORT_5
+PMC6  UOPS_EXECUTED_PORT_PORT_6
+PMC7  UOPS_EXECUTED_PORT_PORT_7
+
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Port0 usage ratio PMC0/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port1 usage ratio PMC1/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port2 usage ratio PMC2/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port3 usage ratio PMC3/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port4 usage ratio PMC4/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port5 usage ratio PMC5/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port6 usage ratio PMC6/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port7 usage ratio PMC7/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+
+LONG
+Formulas:
+Port0 usage ratio UOPS_EXECUTED_PORT_PORT_0/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port1 usage ratio UOPS_EXECUTED_PORT_PORT_1/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port2 usage ratio UOPS_EXECUTED_PORT_PORT_2/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port3 usage ratio UOPS_EXECUTED_PORT_PORT_3/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port4 usage ratio UOPS_EXECUTED_PORT_PORT_4/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port5 usage ratio UOPS_EXECUTED_PORT_PORT_5/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port6 usage ratio UOPS_EXECUTED_PORT_PORT_6/SUM(UOPS_EXECUTED_PORT_PORT_*)
+Port7 usage ratio UOPS_EXECUTED_PORT_PORT_7/SUM(UOPS_EXECUTED_PORT_PORT_*)
+-
+This group measures the execution port utilization in a CPU core. The group can
+only be measured when HyperThreading is disabled because only then each CPU core
+can program eight counters.
diff --git a/groups/ivybridge/CLOCK.txt b/groups/ivybridge/CLOCK.txt
index 278821e..d5e288a 100644
--- a/groups/ivybridge/CLOCK.txt
+++ b/groups/ivybridge/CLOCK.txt
@@ -5,11 +5,13 @@ FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PWR0  PWR_PKG_ENERGY
+UBOXFIX UNCORE_CLOCK
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+Uncore Clock [MHz] 1.E-06*UBOXFIX/time
 CPI  FIXC1/FIXC0
 Energy [J]  PWR0
 Power [W] PWR0/time
@@ -17,6 +19,7 @@ Power [W] PWR0/time
 LONG
 Formula:
 Power =  PWR_PKG_ENERGY / time
+Uncore Clock [MHz] = 1.E-06 * UNCORE_CLOCK / time
 -
 IvyBridge implements the new RAPL interface. This interface enables to
 monitor the consumed energy on the package (socket) level.
diff --git a/groups/ivybridge/CYCLE_ACTIVITY.txt b/groups/ivybridge/CYCLE_ACTIVITY.txt
new file mode 100644
index 0000000..0c22519
--- /dev/null
+++ b/groups/ivybridge/CYCLE_ACTIVITY.txt
@@ -0,0 +1,32 @@
+SHORT Cycle Activities
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 CYCLE_ACTIVITY_STALLS_LDM_PENDING
+PMC1 CYCLE_ACTIVITY_STALLS_L2_PENDING
+PMC2 CYCLE_ACTIVITY_STALLS_L1D_PENDING
+PMC3 CYCLE_ACTIVITY_CYCLES_NO_EXECUTE
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Cycles without execution [%] PMC3/FIXC1*100
+Cycles with stalls due to L1D [%] PMC2/FIXC1*100
+Cycles with stalls due to L2 [%] PMC1/FIXC1*100
+Cycles with stalls due to LDM [%] PMC0/FIXC1*100
+
+LONG
+Formulas:
+Cycles without execution [%] = 100.0 * CYCLE_ACTIVITY_CYCLES_NO_EXECUTE/CPU_CLK_UNHALTED_CORE
+Cycles with stalls due to L1D [%] = 100.0 * CYCLE_ACTIVITY_STALLS_L1D_PENDING/CPU_CLK_UNHALTED_CORE
+Cycles with stalls due to L2 [%] = 100.0 * CYCLE_ACTIVITY_STALLS_L2_PENDING/CPU_CLK_UNHALTED_CORE
+Cycles with stalls due to LDM [%] = 100.0 * CYCLE_ACTIVITY_STALLS_LDM_PENDING/CPU_CLK_UNHALTED_CORE
+--
+This performance group measures the execution stalls due to load misses in
+the L1D and L2 cache. The group measures only the cycles where the CPU cannot
+execute any instruction, it does not measure the duration of stalls in the cache
+layers.
diff --git a/groups/ivybridge/FLOPS_DP.txt b/groups/ivybridge/FLOPS_DP.txt
index b5e8273..37c5124 100644
--- a/groups/ivybridge/FLOPS_DP.txt
+++ b/groups/ivybridge/FLOPS_DP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*4.0)/time
+DP MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
+AVX DP MFLOP/s  1.0E-06*(PMC2*4.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_DOUBLE*4)/runtime
-AVX MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_DOUBLE*4)/runtime
+DP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_DOUBLE*4)/runtime
+AVX DP MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_DOUBLE*4)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED_DOUBLE+SIMD_FP_256_PACKED_DOUBLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR_DOUBLE/runtime
 -
diff --git a/groups/ivybridge/FLOPS_SP.txt b/groups/ivybridge/FLOPS_SP.txt
index 819b81c..3539ba0 100644
--- a/groups/ivybridge/FLOPS_SP.txt
+++ b/groups/ivybridge/FLOPS_SP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*8.0)/time
+SP MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
+AVX SP MFLOP/s  1.0E-06*(PMC2*8.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_SINGLE*8)/runtime
-AVX MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_SINGLE*8)/runtime
+SP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_SINGLE*8)/runtime
+AVX SP MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_SINGLE*8)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED_SINGLE+SIMD_FP_256_PACKED_SINGLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR_SINGLE/runtime
 -
diff --git a/groups/ivybridge/PORT_USAGE.txt b/groups/ivybridge/PORT_USAGE.txt
new file mode 100644
index 0000000..68d6630
--- /dev/null
+++ b/groups/ivybridge/PORT_USAGE.txt
@@ -0,0 +1,40 @@
+SHORT  Execution port utilization
+
+REQUIRE_NOHT
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  UOPS_DISPATCHED_PORT_PORT_0
+PMC1  UOPS_DISPATCHED_PORT_PORT_1
+PMC2  UOPS_DISPATCHED_PORT_PORT_2
+PMC3  UOPS_DISPATCHED_PORT_PORT_3
+PMC4  UOPS_DISPATCHED_PORT_PORT_4
+PMC5  UOPS_DISPATCHED_PORT_PORT_5
+
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Port0 usage ratio PMC0/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port1 usage ratio PMC1/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port2 usage ratio PMC2/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port3 usage ratio PMC3/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port4 usage ratio PMC4/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port5 usage ratio PMC5/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+
+LONG
+Formulas:
+Port0 usage ratio UOPS_DISPATCHED_PORT_PORT_0/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port1 usage ratio UOPS_DISPATCHED_PORT_PORT_1/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port2 usage ratio UOPS_DISPATCHED_PORT_PORT_2/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port3 usage ratio UOPS_DISPATCHED_PORT_PORT_3/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port4 usage ratio UOPS_DISPATCHED_PORT_PORT_4/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port5 usage ratio UOPS_DISPATCHED_PORT_PORT_5/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+-
+This group measures the execution port utilization in a CPU core. The group can
+only be measured when HyperThreading is disabled because only then each CPU core
+can program eight counters.
diff --git a/groups/ivybridgeEP/CACHES.txt b/groups/ivybridgeEP/CACHES.txt
index ad63925..c31b38c 100644
--- a/groups/ivybridgeEP/CACHES.txt
+++ b/groups/ivybridgeEP/CACHES.txt
@@ -71,7 +71,7 @@ L1 to/from L2 data volume [GBytes] 1.0E-09*(PMC0+PMC1)*64.0
 L3 to L2 load bandwidth [MBytes/s]  1.0E-06*PMC2*64.0/time
 L3 to L2 load data volume [GBytes]  1.0E-09*PMC2*64.0
 L2 to L3 evict bandwidth [MBytes/s]  1.0E-06*PMC3*64.0/time
-L2 to L3 evict data volume [GBytes]  1.0E-06*PMC3*64.0
+L2 to L3 evict data volume [GBytes]  1.0E-09*PMC3*64.0
 L2 to/from L3 bandwidth [MBytes/s] 1.0E-06*(PMC2+PMC3)*64.0/time
 L2 to/from L3 data volume [GBytes] 1.0E-09*(PMC2+PMC3)*64.0
 System to L3 bandwidth [MBytes/s] 1.0E-06*(CBOX0C0:STATE=0x3F+CBOX1C0:STATE=0x3F+CBOX2C0:STATE=0x3F+CBOX3C0:STATE=0x3F+CBOX4C0:STATE=0x3F+CBOX5C0:STATE=0x3F+CBOX6C0:STATE=0x3F+CBOX7C0:STATE=0x3F+CBOX8C0:STATE=0x3F+CBOX9C0:STATE=0x3F+CBOX10C0:STATE=0x3F+CBOX11C0:STATE=0x3F+CBOX12C0:STATE=0x3F+CBOX13C0:STATE=0x3F+CBOX14C0:STATE=0x3F)*64.0/time
diff --git a/groups/ivybridgeEP/CLOCK.txt b/groups/ivybridgeEP/CLOCK.txt
index 278821e..d5e288a 100644
--- a/groups/ivybridgeEP/CLOCK.txt
+++ b/groups/ivybridgeEP/CLOCK.txt
@@ -5,11 +5,13 @@ FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PWR0  PWR_PKG_ENERGY
+UBOXFIX UNCORE_CLOCK
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+Uncore Clock [MHz] 1.E-06*UBOXFIX/time
 CPI  FIXC1/FIXC0
 Energy [J]  PWR0
 Power [W] PWR0/time
@@ -17,6 +19,7 @@ Power [W] PWR0/time
 LONG
 Formula:
 Power =  PWR_PKG_ENERGY / time
+Uncore Clock [MHz] = 1.E-06 * UNCORE_CLOCK / time
 -
 IvyBridge implements the new RAPL interface. This interface enables to
 monitor the consumed energy on the package (socket) level.
diff --git a/groups/ivybridgeEP/CYCLE_ACTIVITY.txt b/groups/ivybridgeEP/CYCLE_ACTIVITY.txt
new file mode 100644
index 0000000..0c22519
--- /dev/null
+++ b/groups/ivybridgeEP/CYCLE_ACTIVITY.txt
@@ -0,0 +1,32 @@
+SHORT Cycle Activities
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 CYCLE_ACTIVITY_STALLS_LDM_PENDING
+PMC1 CYCLE_ACTIVITY_STALLS_L2_PENDING
+PMC2 CYCLE_ACTIVITY_STALLS_L1D_PENDING
+PMC3 CYCLE_ACTIVITY_CYCLES_NO_EXECUTE
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Cycles without execution [%] PMC3/FIXC1*100
+Cycles with stalls due to L1D [%] PMC2/FIXC1*100
+Cycles with stalls due to L2 [%] PMC1/FIXC1*100
+Cycles with stalls due to LDM [%] PMC0/FIXC1*100
+
+LONG
+Formulas:
+Cycles without execution [%] = 100.0 * CYCLE_ACTIVITY_CYCLES_NO_EXECUTE/CPU_CLK_UNHALTED_CORE
+Cycles with stalls due to L1D [%] = 100.0 * CYCLE_ACTIVITY_STALLS_L1D_PENDING/CPU_CLK_UNHALTED_CORE
+Cycles with stalls due to L2 [%] = 100.0 * CYCLE_ACTIVITY_STALLS_L2_PENDING/CPU_CLK_UNHALTED_CORE
+Cycles with stalls due to LDM [%] = 100.0 * CYCLE_ACTIVITY_STALLS_LDM_PENDING/CPU_CLK_UNHALTED_CORE
+--
+This performance group measures the execution stalls due to load misses in
+the L1D and L2 cache. The group measures only the cycles where the CPU cannot
+execute any instruction, it does not measure the duration of stalls in the cache
+layers.
diff --git a/groups/ivybridgeEP/FLOPS_DP.txt b/groups/ivybridgeEP/FLOPS_DP.txt
index b5e8273..37c5124 100644
--- a/groups/ivybridgeEP/FLOPS_DP.txt
+++ b/groups/ivybridgeEP/FLOPS_DP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*4.0)/time
+DP MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
+AVX DP MFLOP/s  1.0E-06*(PMC2*4.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_DOUBLE*4)/runtime
-AVX MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_DOUBLE*4)/runtime
+DP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_DOUBLE*4)/runtime
+AVX DP MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_DOUBLE*4)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED_DOUBLE+SIMD_FP_256_PACKED_DOUBLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR_DOUBLE/runtime
 -
diff --git a/groups/ivybridgeEP/FLOPS_SP.txt b/groups/ivybridgeEP/FLOPS_SP.txt
index 819b81c..3539ba0 100644
--- a/groups/ivybridgeEP/FLOPS_SP.txt
+++ b/groups/ivybridgeEP/FLOPS_SP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*8.0)/time
+SP MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
+AVX SP MFLOP/s  1.0E-06*(PMC2*8.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_SINGLE*8)/runtime
-AVX MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_SINGLE*8)/runtime
+SP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_SINGLE*8)/runtime
+AVX SP MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_SINGLE*8)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED_SINGLE+SIMD_FP_256_PACKED_SINGLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR_SINGLE/runtime
 -
diff --git a/groups/ivybridgeEP/MEM_DP.txt b/groups/ivybridgeEP/MEM_DP.txt
index da40bb9..b49887c 100644
--- a/groups/ivybridgeEP/MEM_DP.txt
+++ b/groups/ivybridgeEP/MEM_DP.txt
@@ -45,9 +45,12 @@ Memory write bandwidth [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4
 Memory write data volume [GBytes] 1.0E-09*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0
 Memory bandwidth [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0/time
 Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0
+Operational intensity (PMC0*2.0+PMC1+PMC2*4.0)/((MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0)
 
 LONG
 Formula:
+Power [W] = PWR_PKG_ENERGY/runtime
+Power DRAM [W] = PWR_DRAM_ENERGY/runtime
 MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_DOUBLE*4)/runtime
 AVX MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_DOUBLE*4)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED_DOUBLE+SIMD_FP_256_PACKED_DOUBLE)/runtime
@@ -58,6 +61,7 @@ Memory write bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC1))*64.0/time
 Memory write data volume [GBytes] = 1.0E-09*(SUM(MBOXxC1))*64.0
 Memory bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0/time
 Memory data volume [GBytes] = 1.0E-09*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0
+Operational intensity = (FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_DOUBLE*4)/(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0)
 --
 Profiling group to measure memory bandwidth drawn by all cores of a socket.
 Since this group is based on Uncore events it is only possible to measure on
@@ -65,4 +69,7 @@ a per socket base. Also outputs total data volume transferred from main memory.
 SSE scalar and packed double precision FLOP rates. Also reports on packed AVX
 32b instructions.  Please note that the current FLOP measurements on SandyBridge
 are potentially wrong. So you cannot trust these counters at the moment!
+The operational intensity is calculated using the FP values of the cores and the
+memory data volume of the whole socket. The actual operational intensity for
+multiple CPUs can be found in the statistics table in the Sum column.
 
diff --git a/groups/ivybridgeEP/MEM_SP.txt b/groups/ivybridgeEP/MEM_SP.txt
index 7fe9ea9..70622dd 100644
--- a/groups/ivybridgeEP/MEM_SP.txt
+++ b/groups/ivybridgeEP/MEM_SP.txt
@@ -45,6 +45,7 @@ Memory write bandwidth [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4
 Memory write data volume [GBytes] 1.0E-09*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0
 Memory bandwidth [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0/time
 Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0
+Operational intensity (PMC0*4.0+PMC1+PMC2*8.0)/((MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0)
 
 LONG
 Formula:
@@ -60,6 +61,7 @@ Memory write bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC1))*64.0/time
 Memory write data volume [GBytes] = 1.0E-09*(SUM(MBOXxC1))*64.0
 Memory bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0/time
 Memory data volume [GBytes] = 1.0E-09*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0
+Operational intensity = (FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_SINGLE*8)/(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0)
 --
 Profiling group to measure memory bandwidth drawn by all cores of a socket.
 Since this group is based on Uncore events it is only possible to measure on
@@ -67,4 +69,6 @@ a per socket base. Also outputs total data volume transferred from main memory.
 SSE scalar and packed single precision FLOP rates. Also reports on packed AVX
 32b instructions. Please note that the current FLOP measurements on IvyBridge
 are potentially wrong. So you cannot trust these counters at the moment!
-
+The operational intensity is calculated using the FP values of the cores and the
+memory data volume of the whole socket. The actual operational intensity for
+multiple CPUs can be found in the statistics table in the Sum column.
diff --git a/groups/ivybridgeEP/PORT_USAGE.txt b/groups/ivybridgeEP/PORT_USAGE.txt
new file mode 100644
index 0000000..68d6630
--- /dev/null
+++ b/groups/ivybridgeEP/PORT_USAGE.txt
@@ -0,0 +1,40 @@
+SHORT  Execution port utilization
+
+REQUIRE_NOHT
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  UOPS_DISPATCHED_PORT_PORT_0
+PMC1  UOPS_DISPATCHED_PORT_PORT_1
+PMC2  UOPS_DISPATCHED_PORT_PORT_2
+PMC3  UOPS_DISPATCHED_PORT_PORT_3
+PMC4  UOPS_DISPATCHED_PORT_PORT_4
+PMC5  UOPS_DISPATCHED_PORT_PORT_5
+
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Port0 usage ratio PMC0/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port1 usage ratio PMC1/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port2 usage ratio PMC2/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port3 usage ratio PMC3/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port4 usage ratio PMC4/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port5 usage ratio PMC5/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+
+LONG
+Formulas:
+Port0 usage ratio UOPS_DISPATCHED_PORT_PORT_0/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port1 usage ratio UOPS_DISPATCHED_PORT_PORT_1/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port2 usage ratio UOPS_DISPATCHED_PORT_PORT_2/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port3 usage ratio UOPS_DISPATCHED_PORT_PORT_3/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port4 usage ratio UOPS_DISPATCHED_PORT_PORT_4/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port5 usage ratio UOPS_DISPATCHED_PORT_PORT_5/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+-
+This group measures the execution port utilization in a CPU core. The group can
+only be measured when HyperThreading is disabled because only then each CPU core
+can program eight counters.
diff --git a/groups/ivybridgeEP/UNCORECLOCK.txt b/groups/ivybridgeEP/UNCORECLOCK.txt
index f8859fe..1cc1f98 100644
--- a/groups/ivybridgeEP/UNCORECLOCK.txt
+++ b/groups/ivybridgeEP/UNCORECLOCK.txt
@@ -29,7 +29,7 @@ MBOX3FIX DRAM_CLOCKTICKS
 SBOX0C0 SBOX_CLOCKTICKS
 SBOX1C0 SBOX_CLOCKTICKS
 SBOX2C0 SBOX_CLOCKTICKS
-UBOXFIX UBOX_CLOCKTICKS
+UBOXFIX UNCORE_CLOCK
 BBOX0C0 BBOX_CLOCKTICKS
 BBOX1C0 BBOX_CLOCKTICKS
 WBOX0 WBOX_CLOCKTICKS
@@ -43,6 +43,7 @@ METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+UBOX Frequency [GHz] 1.E-09*UBOXFIX/(FIXC1*inverseClock)
 CBOX0 Frequency [GHz] 1.E-09*CBOX0C0/(FIXC1*inverseClock)
 CBOX1 Frequency [GHz] 1.E-09*CBOX1C0/(FIXC1*inverseClock)
 CBOX2 Frequency [GHz] 1.E-09*CBOX2C0/(FIXC1*inverseClock)
@@ -69,7 +70,6 @@ MBOX3FIX Frequency [GHz] 1.E-09*MBOX3FIX/(FIXC1*inverseClock)
 SBOX0 Frequency [GHz] 1.E-09*SBOX0C0/(FIXC1*inverseClock)
 SBOX1 Frequency [GHz] 1.E-09*SBOX1C0/(FIXC1*inverseClock)
 SBOX2 Frequency [GHz] 1.E-09*SBOX2C0/(FIXC1*inverseClock)
-UBOX Frequency [GHz] 1.E-09*UBOXFIX/(FIXC1*inverseClock)
 BBOX0 Frequency [GHz] 1.E-09*BBOX0C0/(FIXC1*inverseClock)
 BBOX1 Frequency [GHz] 1.E-09*BBOX1C0/(FIXC1*inverseClock)
 WBOX Frequency [GHz] 1.E-09*WBOX0/(FIXC1*inverseClock)
@@ -82,3 +82,15 @@ IBOX Frequency [GHz] 1.E-09*IBOX0/(FIXC1*inverseClock)
 
 LONG
 Formulas:
+UBOX Frequency [GHz] = 1.E-09*UNCORE_CLOCK/(CPU_CLK_UNHALTED_CORE*inverseClock)
+CBOX[0-14] Frequency [GHz] = 1.E-09*CBOX_CLOCKTICKS/(CPU_CLK_UNHALTED_CORE*inverseClock)
+MBOX[0-3] Frequency [GHz] = 1.E-09*DRAM_CLOCKTICKS/(CPU_CLK_UNHALTED_CORE*inverseClock)
+MBOX[0-3]FIX Frequency [GHz] = 1.E-09*DRAM_CLOCKTICKS/(CPU_CLK_UNHALTED_CORE*inverseClock)
+SBOX[0-2] Frequency [GHz] = 1.E-09*SBOX_CLOCKTICKS/(CPU_CLK_UNHALTED_CORE*inverseClock)
+BBOX[0-1] Frequency [GHz] = 1.E-09*BBOX_CLOCKTICKS/(CPU_CLK_UNHALTED_CORE*inverseClock)
+RBOX[0-2] Frequency [GHz] = 1.E-09*RBOX_CLOCKTICKS/(CPU_CLK_UNHALTED_CORE*inverseClock)
+WBOX Frequency [GHz] = 1.E-09*WBOX_CLOCKTICKS/(CPU_CLK_UNHALTED_CORE*inverseClock)
+PBOX Frequency [GHz] = 1.E-09*PBOX_CLOCKTICKS/(CPU_CLK_UNHALTED_CORE*inverseClock)
+IBOX Frequency [GHz] = 1.E-09*IBOX_CLOCKTICKS/(CPU_CLK_UNHALTED_CORE*inverseClock)
+--
+A Overview over the frequencies of all Uncore units.
diff --git a/groups/atom/BRANCH.txt b/groups/knl/BRANCH.txt
similarity index 65%
copy from groups/atom/BRANCH.txt
copy to groups/knl/BRANCH.txt
index 4213114..b8d41b2 100644
--- a/groups/atom/BRANCH.txt
+++ b/groups/knl/BRANCH.txt
@@ -4,8 +4,8 @@ EVENTSET
 FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
-PMC0  BR_INST_RETIRED_ANY
-PMC1  BR_INST_RETIRED_MISPRED
+PMC0  BR_INST_RETIRED_ALL_BRANCHES
+PMC1  BR_MISP_RETIRED_ALL_BRANCHES
 
 METRICS
 Runtime (RDTSC) [s] time
@@ -19,10 +19,10 @@ Instructions per branch  FIXC0/PMC0
 
 LONG
 Formulas:
-Branch rate = BR_INST_RETIRED_ANY/INSTR_RETIRED_ANY
-Branch misprediction rate = BR_INST_RETIRED_MISPRED/INSTR_RETIRED_ANY
-Branch misprediction ratio = BR_INST_RETIRED_MISPRED/BR_INST_RETIRED_ANY
-Instructions per branch = INSTR_RETIRED_ANY/BR_INST_RETIRED_ANY
+Branch rate = BR_INST_RETIRED_ALL_BRANCHES/INSTR_RETIRED_ANY
+Branch misprediction rate =  BR_MISP_RETIRED_ALL_BRANCHES/INSTR_RETIRED_ANY
+Branch misprediction ratio = BR_MISP_RETIRED_ALL_BRANCHES/BR_INST_RETIRED_ALL_BRANCHES
+Instructions per branch = INSTR_RETIRED_ANY/BR_INST_RETIRED_ALL_BRANCHES
 -
 The rates state how often on average a branch or a mispredicted branch occurred
 per instruction retired in total. The branch misprediction ratio sets directly
diff --git a/groups/broadwellEP/CLOCK.txt b/groups/knl/CLOCK.txt
similarity index 82%
copy from groups/broadwellEP/CLOCK.txt
copy to groups/knl/CLOCK.txt
index 595d3a1..2ddd921 100644
--- a/groups/broadwellEP/CLOCK.txt
+++ b/groups/knl/CLOCK.txt
@@ -18,6 +18,6 @@ LONG
 Formula:
 Power =  PWR_PKG_ENERGY / time
 -
-Broadwell implements the new RAPL interface. This interface enables to
+The Xeon Phi (Knights Landing) implements the new RAPL interface. This interface enables to
 monitor the consumed energy on the package (socket) level.
 
diff --git a/groups/atom/DATA.txt b/groups/knl/DATA.txt
similarity index 60%
copy from groups/atom/DATA.txt
copy to groups/knl/DATA.txt
index 9349354..61a915b 100644
--- a/groups/atom/DATA.txt
+++ b/groups/knl/DATA.txt
@@ -4,8 +4,8 @@ EVENTSET
 FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
-PMC0  L1D_CACHE_LD
-PMC1  L1D_CACHE_ST
+PMC0  MEM_UOPS_RETIRED_ALL_LOADS
+PMC1  MEM_UOPS_RETIRED_ALL_STORES
 
 METRICS
 Runtime (RDTSC) [s] time
@@ -16,7 +16,7 @@ Load to store ratio PMC0/PMC1
 
 LONG
 Formulas:
-Load to store ratio = L1D_CACHE_LD/L1D_CACHE_ST
+Load to store ratio = MEM_UOPS_RETIRED_ALL_LOADS/MEM_UOPS_RETIRED_ALL_STORES
 -
-This is a simple metric to determine your load to store ratio.
+This is a metric to determine your load to store ratio.
 
diff --git a/groups/broadwellD/CLOCK.txt b/groups/knl/ENERGY.txt
similarity index 53%
copy from groups/broadwellD/CLOCK.txt
copy to groups/knl/ENERGY.txt
index 595d3a1..9fd5045 100644
--- a/groups/broadwellD/CLOCK.txt
+++ b/groups/knl/ENERGY.txt
@@ -4,20 +4,30 @@ EVENTSET
 FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
+TMP0  TEMP_CORE
 PWR0  PWR_PKG_ENERGY
+PWR1  PWR_PP0_ENERGY
+PWR3  PWR_DRAM_ENERGY
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
+Temperature [C]  TMP0
 Energy [J]  PWR0
 Power [W] PWR0/time
+Energy PP0 [J]  PWR1
+Power PP0 [W] PWR1/time
+Energy DRAM [J]  PWR1
+Power DRAM [W] PWR1/time
 
 LONG
 Formula:
-Power =  PWR_PKG_ENERGY / time
+Power = PWR_PKG_ENERGY / time
+Power PP0 = PWR_PP0_ENERGY / time
+Power DRAM = PWR_DRAM_ENERGY / time
 -
-Broadwell implements the new RAPL interface. This interface enables to
+Knights Landing implements the new RAPL interface. This interface enables to
 monitor the consumed energy on the package (socket) level.
 
diff --git a/groups/knl/FLOPS_DP.txt b/groups/knl/FLOPS_DP.txt
new file mode 100644
index 0000000..af2e248
--- /dev/null
+++ b/groups/knl/FLOPS_DP.txt
@@ -0,0 +1,34 @@
+SHORT Double Precision MFLOP/s
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  UOPS_RETIRED_SCALAR_SIMD
+PMC1  UOPS_RETIRED_PACKED_SIMD
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+DP MFLOP/s (SSE assumed) 1.0E-06*((PMC1*2.0)+PMC0)/time
+DP MFLOP/s (AVX assumed) 1.0E-06*((PMC1*4.0)+PMC0)/time
+DP MFLOP/s (AVX512 assumed) 1.0E-06*((PMC1*8.0)+PMC0)/time
+Packed MUOPS/s   1.0E-06*(PMC1)/time
+Scalar MUOPS/s 1.0E-06*PMC0/time
+
+LONG
+Formula:
+DP MFLOP/s (SSE assumed) = 1.0E-06*(UOPS_RETIRED_PACKED_SIMD*2+UOPS_RETIRED_SCALAR_SIMD)/runtime
+DP MFLOP/s (AVX assumed) = 1.0E-06*(UOPS_RETIRED_PACKED_SIMD*4+UOPS_RETIRED_SCALAR_SIMD)/runtime
+DP MFLOP/s (AVX512 assumed) = 1.0E-06*(UOPS_RETIRED_PACKED_SIMD*8+UOPS_RETIRED_SCALAR_SIMD)/runtime
+Packed MUOPS/s = 1.0E-06*(UOPS_RETIRED_PACKED_SIMD)/runtime
+Scalar MUOPS/s = 1.0E-06*UOPS_RETIRED_SCALAR_SIMD/runtime
+-
+AVX/SSE scalar and packed double precision FLOP rates. The Xeon Phi (Knights Landing) provides
+no possibility to differentiate between double and single precision FLOP/s. Therefore, we only
+assume that the printed MFLOP/s value is for double-precision code. Moreover, there is no way
+to distinguish between SSE, AVX or AVX512 packed SIMD operations. Therefore, this group prints
+out the MFLOP/s for different SIMD techniques.
+WARNING: The events also count for integer arithmetics
diff --git a/groups/knl/FLOPS_SP.txt b/groups/knl/FLOPS_SP.txt
new file mode 100644
index 0000000..750c808
--- /dev/null
+++ b/groups/knl/FLOPS_SP.txt
@@ -0,0 +1,34 @@
+SHORT Single Precision MFLOP/s
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  UOPS_RETIRED_SCALAR_SIMD
+PMC1  UOPS_RETIRED_PACKED_SIMD
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+SP MFLOP/s (SSE assumed) 1.0E-06*(PMC1*4.0+PMC0)/time
+SP MFLOP/s (AVX assumed) 1.0E-06*(PMC1*8.0+PMC0)/time
+SP MFLOP/s (AVX512 assumed) 1.0E-06*(PMC1*16.0+PMC0)/time
+Packed MUOPS/s   1.0E-06*(PMC1)/time
+Scalar MUOPS/s 1.0E-06*PMC0/time
+
+LONG
+Formula:
+SP MFLOP/s (SSE assumed) = 1.0E-06*(UOPS_RETIRED_PACKED_SIMD*4+UOPS_RETIRED_SCALAR_SIMD)/runtime
+SP MFLOP/s (AVX assumed) = 1.0E-06*(UOPS_RETIRED_PACKED_SIMD*8+UOPS_RETIRED_SCALAR_SIMD)/runtime
+SP MFLOP/s (AVX512 assumed) = 1.0E-06*(UOPS_RETIRED_PACKED_SIMD*16+UOPS_RETIRED_SCALAR_SIMD)/runtime
+Packed MUOPS/s = 1.0E-06*(UOPS_RETIRED_PACKED_SIMD)/runtime
+Scalar MUOPS/s = 1.0E-06*UOPS_RETIRED_SCALAR_SIMD/runtime
+-
+AVX/SSE scalar and packed single precision FLOP rates. The Xeon Phi (Knights Landing) provides
+no possibility to differentiate between double and single precision FLOP/s. Therefore, we only
+assume that the printed MFLOP/s value is for single-precision code. Moreover, there is no way
+to distinguish between SSE, AVX or AVX512 packed SIMD operations. Therefore, this group prints
+out the MFLOP/s for different SIMD techniques.
+WARNING: The events also count for integer arithmetics
diff --git a/groups/knl/FRONTEND_STALLS.txt b/groups/knl/FRONTEND_STALLS.txt
new file mode 100644
index 0000000..9c725db
--- /dev/null
+++ b/groups/knl/FRONTEND_STALLS.txt
@@ -0,0 +1,24 @@
+SHORT Frontend stalls 
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  NO_ALLOC_CYCLES_ALL
+PMC1  NO_ALLOC_CYCLES_ALL_COUNT
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Frontend stalls PMC1
+Avg. frontend stall duration [cyc] PMC0/PMC1
+Frontend stall ratio PMC0/FIXC1
+
+LONG
+Formulas:
+-
+Frontend stalls = NO_ALLOC_CYCLES_ALL
+Avg. frontend stall duration [cyc] = NO_ALLOC_CYCLES_ALL/NO_ALLOC_CYCLES_ALL_COUNT
+Frontend stall ratio = NO_ALLOC_CYCLES_ALL/CPU_CLK_UNHALTED_CORE
diff --git a/groups/knl/HBM.txt b/groups/knl/HBM.txt
new file mode 100644
index 0000000..ac44418
--- /dev/null
+++ b/groups/knl/HBM.txt
@@ -0,0 +1,46 @@
+SHORT Memory bandwidth in MBytes/s for High Bandwidth Memory (HBM)
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+EDBOX0C0 EDC_RPQ_INSERTS
+EDBOX1C0 EDC_RPQ_INSERTS
+EDBOX2C0 EDC_RPQ_INSERTS
+EDBOX3C0 EDC_RPQ_INSERTS
+EDBOX4C0 EDC_RPQ_INSERTS
+EDBOX5C0 EDC_RPQ_INSERTS
+EDBOX6C0 EDC_RPQ_INSERTS
+EDBOX7C0 EDC_RPQ_INSERTS
+EDBOX0C1 EDC_WPQ_INSERTS
+EDBOX1C1 EDC_WPQ_INSERTS
+EDBOX2C1 EDC_WPQ_INSERTS
+EDBOX3C1 EDC_WPQ_INSERTS
+EDBOX4C1 EDC_WPQ_INSERTS
+EDBOX5C1 EDC_WPQ_INSERTS
+EDBOX6C1 EDC_WPQ_INSERTS
+EDBOX7C1 EDC_WPQ_INSERTS
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Memory read bandwidth [MBytes/s] 1.0E-06*(EDBOX0C0+EDBOX1C0+EDBOX2C0+EDBOX3C0+EDBOX4C0+EDBOX5C0+EDBOX6C0+EDBOX7C0)*64.0/time
+Memory read data volume [GBytes] 1.0E-09*(EDBOX0C0+EDBOX1C0+EDBOX2C0+EDBOX3C0+EDBOX4C0+EDBOX5C0+EDBOX6C0+EDBOX7C0)*64.0
+Memory writeback bandwidth [MBytes/s] 1.0E-06*(EDBOX0C1+EDBOX1C1+EDBOX2C1+EDBOX3C1+EDBOX4C1+EDBOX5C1+EDBOX6C1+EDBOX7C1)*64.0/time
+Memory writeback data volume [GBytes] 1.0E-09*(EDBOX0C1+EDBOX1C1+EDBOX2C1+EDBOX3C1+EDBOX4C1+EDBOX5C1+EDBOX6C1+EDBOX7C1)*64.0
+Memory bandwidth [MBytes/s] 1.0E-06*(EDBOX0C0+EDBOX1C0+EDBOX2C0+EDBOX3C0+EDBOX4C0+EDBOX5C0+EDBOX6C0+EDBOX7C0+EDBOX0C1+EDBOX1C1+EDBOX2C1+EDBOX3C1+EDBOX4C1+EDBOX5C1+EDBOX6C1+EDBOX7C1)*64.0/time
+Memory data volume [GBytes] 1.0E-09*(EDBOX0C0+EDBOX1C0+EDBOX2C0+EDBOX3C0+EDBOX4C0+EDBOX5C0+EDBOX6C0+EDBOX7C0+EDBOX0C1+EDBOX1C1+EDBOX2C1+EDBOX3C1+EDBOX4C1+EDBOX5C1+EDBOX6C1+EDBOX7C1)*64.0
+
+LONG
+Formulas:
+Memory read bandwidth [MBytes/s] = 1.0E-06*(sum(EDC_RPQ_INSERTS))*64/time
+Memory read data volume [GBytes] = 1.0E-09*(sum(EDC_RPQ_INSERTS))*64
+Memory writeback bandwidth [MBytes/s] = 1.0E-06*(sum(EDC_WPQ_INSERTS))*64/time
+Memory writeback data volume [GBytes] = 1.0E-09*(sum(EDC_WPQ_INSERTS))*64
+Memory bandwidth [MBytes/s] = 1.0E-06*(sum(EDC_RPQ_INSERTS)+sum(EDC_WPQ_INSERTS))*64/time
+Memory data volume [GBytes] = 1.0E-09*(sum(EDC_RPQ_INSERTS)+sum(EDC_WPQ_INSERTS))*64
+-
+Profiling group to measure data transfers from and to the high bandwidth memory (HBM).
+
diff --git a/groups/knl/HBM_OFFCORE.txt b/groups/knl/HBM_OFFCORE.txt
new file mode 100644
index 0000000..268344d
--- /dev/null
+++ b/groups/knl/HBM_OFFCORE.txt
@@ -0,0 +1,32 @@
+SHORT Memory bandwidth in MBytes/s for High Bandwidth Memory (HBM)
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0:MATCH0=0x4908:MATCH1=0x3F8060 OFFCORE_RESPONSE_0_OPTIONS
+PMC1:MATCH0=0x32F7:MATCH1=0x3F8060 OFFCORE_RESPONSE_1_OPTIONS
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Memory read bandwidth [MBytes/s] 1.0E-06*(PMC1:MATCH0=0x32F7:MATCH1=0x3F8060)*64.0/time
+Memory read data volume [GBytes] 1.0E-09*(PMC1:MATCH0=0x32F7:MATCH1=0x3F8060)*64.0
+Memory writeback bandwidth [MBytes/s] 1.0E-06*(PMC0:MATCH0=0x4908:MATCH1=0x3F8060)*64.0/time
+Memory writeback data volume [GBytes] 1.0E-09*(PMC0:MATCH0=0x4908:MATCH1=0x3F8060)*64.0
+Memory bandwidth [MBytes/s] 1.0E-06*(PMC0:MATCH0=0x4908:MATCH1=0x3F8060+PMC1:MATCH0=0x32F7:MATCH1=0x3F8060)*64.0/time
+Memory data volume [GBytes] 1.0E-09*(PMC0:MATCH0=0x4908:MATCH1=0x3F8060+PMC1:MATCH0=0x32F7:MATCH1=0x3F8060)*64.0
+
+LONG
+Formulas:
+Memory read bandwidth [MBytes/s] = 1.0E-06*(sum(EDC_RPQ_INSERTS))*64/time
+Memory read data volume [GBytes] = 1.0E-09*(sum(EDC_RPQ_INSERTS))*64
+Memory writeback bandwidth [MBytes/s] = 1.0E-06*(sum(EDC_WPQ_INSERTS))*64/time
+Memory writeback data volume [GBytes] = 1.0E-09*(sum(EDC_WPQ_INSERTS))*64
+Memory bandwidth [MBytes/s] = 1.0E-06*(sum(EDC_RPQ_INSERTS)+sum(EDC_WPQ_INSERTS))*64/time
+Memory data volume [GBytes] = 1.0E-09*(sum(EDC_RPQ_INSERTS)+sum(EDC_WPQ_INSERTS))*64
+-
+Profiling group to measure data transfers from and to the high bandwidth memory (HBM).
+
diff --git a/groups/knl/ICACHE.txt b/groups/knl/ICACHE.txt
new file mode 100644
index 0000000..5f11ad6
--- /dev/null
+++ b/groups/knl/ICACHE.txt
@@ -0,0 +1,25 @@
+SHORT  Instruction cache miss rate/ratio
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  ICACHE_ACCESSES
+PMC1  ICACHE_MISSES
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+L1I request rate PMC0/FIXC0
+L1I miss rate PMC1/FIXC0
+L1I miss ratio PMC1/PMC0
+
+LONG
+Formulas:
+L1I request rate = ICACHE_ACCESSES / INSTR_RETIRED_ANY
+L1I miss rate = ICACHE_MISSES / INSTR_RETIRED_ANY
+L1I miss ratio = ICACHE_MISSES / ICACHE_ACCESSES
+-
+This group measures some L1 instruction cache metrics.
diff --git a/groups/knl/L2CACHE.txt b/groups/knl/L2CACHE.txt
new file mode 100644
index 0000000..e6de92f
--- /dev/null
+++ b/groups/knl/L2CACHE.txt
@@ -0,0 +1,34 @@
+SHORT L2 cache miss rate/ratio
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  MEM_UOPS_RETIRED_L2_HIT_LOADS
+PMC1  MEM_UOPS_RETIRED_L2_MISS_LOADS
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+L2 request rate (PMC0+PMC1)/FIXC0
+L2 miss rate PMC1/FIXC0
+L2 miss ratio PMC1/(PMC0+PMC1)
+
+LONG
+Formulas:
+L2 request rate = (MEM_UOPS_RETIRED_L2_HIT_LOADS+MEM_UOPS_RETIRED_L2_MISS_LOADS)/INSTR_RETIRED_ANY
+L2 miss rate = MEM_UOPS_RETIRED_L2_MISS_LOADS/INSTR_RETIRED_ANY
+L2 miss ratio = MEM_UOPS_RETIRED_L2_MISS_LOADS/(MEM_UOPS_RETIRED_L2_HIT_LOADS+MEM_UOPS_RETIRED_L2_MISS_LOADS)
+-
+This group measures the locality of your data accesses with regard to the
+L2 cache. L2 request rate tells you how data intensive your code is
+or how many data accesses you have on average per instruction.
+The L2 miss rate gives a measure how often it was necessary to get
+cache lines from memory. And finally L2 miss ratio tells you how many of your
+memory references required a cache line to be loaded from a higher level.
+While the data cache miss rate might be given by your algorithm you should
+try to get data cache miss ratio as low as possible by increasing your cache
+reuse.
+
diff --git a/groups/knl/MEM.txt b/groups/knl/MEM.txt
new file mode 100644
index 0000000..dc7188d
--- /dev/null
+++ b/groups/knl/MEM.txt
@@ -0,0 +1,51 @@
+SHORT Memory bandwidth in MBytes/s
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+MBOX0C0 MC_CAS_READS
+MBOX0C1 MC_CAS_WRITES
+MBOX1C0 MC_CAS_READS
+MBOX1C1 MC_CAS_WRITES
+MBOX2C0 MC_CAS_READS
+MBOX2C1 MC_CAS_WRITES
+MBOX3C0 MC_CAS_READS
+MBOX3C1 MC_CAS_WRITES
+MBOX4C0 MC_CAS_READS
+MBOX4C1 MC_CAS_WRITES
+MBOX5C0 MC_CAS_READS
+MBOX5C1 MC_CAS_WRITES
+MBOX6C0 MC_CAS_READS
+MBOX6C1 MC_CAS_WRITES
+MBOX7C0 MC_CAS_READS
+MBOX7C1 MC_CAS_WRITES
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Memory read bandwidth [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0)*64.0/time
+Memory read data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0)*64.0
+Memory writeback bandwidth [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0/time
+Memory writeback data volume [GBytes] 1.0E-09*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0
+Memory bandwidth [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0/time
+Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0
+
+LONG
+Formulas:
+Memory read bandwidth [MBytes/s] = 1.0E-06*(sum(MC_CAS_READS))*64/time
+Memory read data volume [GBytes] = 1.0E-09*(sum(MC_CAS_READS))*64
+Memory writeback bandwidth [MBytes/s] = 1.0E-06*(sum(MC_CAS_WRITES))*64/time
+Memory writeback data volume [GBytes] = 1.0E-09*(sum(MC_CAS_WRITES))*64
+Memory bandwidth [MBytes/s] = 1.0E-06*(sum(MC_CAS_READS)+sum(MC_CAS_WRITES))*64/time
+Memory data volume [GBytes] = 1.0E-09*(sum(MC_CAS_READS)+sum(MC_CAS_WRITES))*64
+-
+Profiling group to measure L2 to MEM load cache bandwidth. The bandwidth is computed by the
+number of cache line allocated in the L2 cache. Since there is no possibility to retrieve
+the evicted cache lines, this group measures only the load cache bandwidth. The
+writeback metrics count only modified cache lines that are written back to go to
+exclusive state
+The group also output totally load and writeback data volume transferred between memory and L2.
+
diff --git a/groups/knl/TLB_DATA.txt b/groups/knl/TLB_DATA.txt
new file mode 100644
index 0000000..5f2617f
--- /dev/null
+++ b/groups/knl/TLB_DATA.txt
@@ -0,0 +1,27 @@
+SHORT  L2 data TLB miss rate/ratio
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  PAGE_WALKS_DTLB_COUNT
+PMC1  PAGE_WALKS_DTLB_CYCLES
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+L1 DTLB misses     PMC0
+L1 DTLB miss rate  PMC0/FIXC0
+L1 DTLB miss duration [Cyc] PMC1/PMC0
+
+LONG
+Formulas:
+L1 DTLB misses = PAGE_WALKS_DTLB_COUNT
+L1 DTLB miss rate = PAGE_WALKS_DTLB_COUNT / INSTR_RETIRED_ANY
+L1 DTLB miss duration [Cyc] = PAGE_WALKS_DTLB_CYCLES / PAGE_WALKS_DTLB_COUNT
+-
+The DTLB load and store miss rates gives a measure how often a TLB miss occurred
+per instruction. The duration measures the time in cycles how long a walk did take.
+
diff --git a/groups/knl/TLB_INSTR.txt b/groups/knl/TLB_INSTR.txt
new file mode 100644
index 0000000..f3dd3ec
--- /dev/null
+++ b/groups/knl/TLB_INSTR.txt
@@ -0,0 +1,27 @@
+SHORT  L1 Instruction TLB miss rate/ratio
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  PAGE_WALKS_ITLB_COUNT
+PMC1  PAGE_WALKS_ITLB_CYCLES
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+L1 ITLB misses     PMC0
+L1 ITLB miss rate  PMC0/FIXC0
+L1 ITLB miss duration [Cyc] PMC1/PMC0
+
+
+LONG
+Formulas:
+L1 ITLB misses = PAGE_WALKS_ITLB_COUNT
+L1 ITLB miss rate = PAGE_WALKS_ITLB_COUNT / INSTR_RETIRED_ANY
+L1 ITLB miss duration [Cyc] = PAGE_WALKS_ITLB_CYCLES / PAGE_WALKS_ITLB_COUNT
+-
+The ITLB miss rates gives a measure how often a TLB miss occurred
+per instruction. The duration measures the time in cycles how long a walk did take.
diff --git a/groups/knl/UOPS_STALLS.txt b/groups/knl/UOPS_STALLS.txt
new file mode 100644
index 0000000..0252857
--- /dev/null
+++ b/groups/knl/UOPS_STALLS.txt
@@ -0,0 +1,25 @@
+SHORT UOP retirement stalls 
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  UOPS_RETIRED_STALLED_CYCLES
+PMC1  UOPS_RETIRED_STALLS
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Number of stalls PMC1
+Avg. stall duration [cyc] PMC0/PMC1
+Stall ratio PMC0/FIXC1
+
+LONG
+Number of stalls = UOPS_RETIRED_STALLS
+Avg. stall duration [cyc] = UOPS_RETIRED_STALLED_CYCLES/UOPS_RETIRED_STALLS
+Stall ratio = UOPS_RETIRED_STALLED_CYCLES/CPU_CLK_UNHALTED_CORE
+Formulas:
+-
+This group measures stalls in the UOP retirement. 
diff --git a/groups/nehalem/FLOPS_DP.txt b/groups/nehalem/FLOPS_DP.txt
index 3e75cad..01160e6 100644
--- a/groups/nehalem/FLOPS_DP.txt
+++ b/groups/nehalem/FLOPS_DP.txt
@@ -14,7 +14,7 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*2.0+PMC1)/time
+DP MFLOP/s  1.0E-06*(PMC0*2.0+PMC1)/time
 Packed MUOPS/s   1.0E-06*PMC0/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 SP MUOPS/s 1.0E-06*PMC2/time
@@ -22,7 +22,7 @@ DP MUOPS/s 1.0E-06*PMC3/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
+DP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
 Packed MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_PACKED/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR/runtime
 SP MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_SINGLE_PRECISION/runtime
diff --git a/groups/nehalem/FLOPS_SP.txt b/groups/nehalem/FLOPS_SP.txt
index 9768109..cae4705 100644
--- a/groups/nehalem/FLOPS_SP.txt
+++ b/groups/nehalem/FLOPS_SP.txt
@@ -14,7 +14,7 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s 1.0E-06*(PMC0*4.0+PMC1)/time
+SP MFLOP/s 1.0E-06*(PMC0*4.0+PMC1)/time
 Packed MUOPS/s   1.0E-06*PMC0/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 SP MUOPS/s 1.0E-06*PMC2/time
@@ -22,7 +22,7 @@ DP MUOPS/s 1.0E-06*PMC3/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
+SP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
 Packed MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_PACKED/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR/runtime
 SP MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_SINGLE_PRECISION/runtime
diff --git a/groups/nehalem/MEM.txt b/groups/nehalem/MEM.txt
index d2083f5..b528670 100644
--- a/groups/nehalem/MEM.txt
+++ b/groups/nehalem/MEM.txt
@@ -15,9 +15,9 @@ Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
 Memory read bandwidth [MBytes/s] 1.0E-06*UPMC0*64.0/time
-Memory data volume [GBytes] 1.0E-09*UPMC0*64.0
+Memory read data volume [GBytes] 1.0E-09*UPMC0*64.0
 Memory write bandwidth [MBytes/s] 1.0E-06*UPMC1*64.0/time
-Memory data volume [GBytes] 1.0E-09*UPMC1*64.0
+Memory write data volume [GBytes] 1.0E-09*UPMC1*64.0
 Memory bandwidth [MBytes/s] 1.0E-06*(UPMC0+UPMC1)*64.0/time
 Memory data volume [GBytes] 1.0E-09*(UPMC0+UPMC1)*64.0
 Remote memory read bandwidth [MBytes/s] 1.0E-06*UPMC2*64.0/time
@@ -30,9 +30,9 @@ Remote memory data volume [GBytes] 1.0E-09*(UPMC2+UPMC3)*64.0
 LONG
 Formulas:
 Memory read bandwidth [MBytes/s] = 1.0E-06*UNC_QMC_NORMAL_READS_ANY*64.0/time
-Memory data volume [GBytes] = 1.0E-09*UNC_QMC_NORMAL_READS_ANY*64.0
+Memory read data volume [GBytes] = 1.0E-09*UNC_QMC_NORMAL_READS_ANY*64.0
 Memory write bandwidth [MBytes/s] = 1.0E-06*UNC_QMC_WRITES_FULL_ANY*64.0/time
-Memory data volume [GBytes] = 1.0E-09*UNC_QMC_WRITES_FULL_ANY*64.0
+Memory write data volume [GBytes] = 1.0E-09*UNC_QMC_WRITES_FULL_ANY*64.0
 Memory bandwidth [MBytes/s] = 1.0E-06*(UNC_QMC_NORMAL_READS_ANY+UNC_QMC_WRITES_FULL_ANY)*64.0/time
 Memory data volume [GBytes] = 1.0E-09*(UNC_QMC_NORMAL_READS_ANY+UNC_QMC_WRITES_FULL_ANY)*64.0
 Remote memory read bandwidth [MBytes/s] = 1.0E-06*UNC_QHL_REQUESTS_REMOTE_READS*64.0/time
diff --git a/groups/nehalemEX/FLOPS_DP.txt b/groups/nehalemEX/FLOPS_DP.txt
index 3e75cad..01160e6 100644
--- a/groups/nehalemEX/FLOPS_DP.txt
+++ b/groups/nehalemEX/FLOPS_DP.txt
@@ -14,7 +14,7 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*2.0+PMC1)/time
+DP MFLOP/s  1.0E-06*(PMC0*2.0+PMC1)/time
 Packed MUOPS/s   1.0E-06*PMC0/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 SP MUOPS/s 1.0E-06*PMC2/time
@@ -22,7 +22,7 @@ DP MUOPS/s 1.0E-06*PMC3/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
+DP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
 Packed MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_PACKED/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR/runtime
 SP MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_SINGLE_PRECISION/runtime
diff --git a/groups/nehalemEX/FLOPS_SP.txt b/groups/nehalemEX/FLOPS_SP.txt
index 9768109..cae4705 100644
--- a/groups/nehalemEX/FLOPS_SP.txt
+++ b/groups/nehalemEX/FLOPS_SP.txt
@@ -14,7 +14,7 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s 1.0E-06*(PMC0*4.0+PMC1)/time
+SP MFLOP/s 1.0E-06*(PMC0*4.0+PMC1)/time
 Packed MUOPS/s   1.0E-06*PMC0/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 SP MUOPS/s 1.0E-06*PMC2/time
@@ -22,7 +22,7 @@ DP MUOPS/s 1.0E-06*PMC3/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
+SP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
 Packed MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_PACKED/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR/runtime
 SP MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_SINGLE_PRECISION/runtime
diff --git a/groups/nehalemEX/MEM.txt b/groups/nehalemEX/MEM.txt
index 510f27b..d3d2522 100644
--- a/groups/nehalemEX/MEM.txt
+++ b/groups/nehalemEX/MEM.txt
@@ -4,7 +4,7 @@ EVENTSET
 FIXC0   INSTR_RETIRED_ANY
 FIXC1   CPU_CLK_UNHALTED_CORE
 FIXC2   CPU_CLK_UNHALTED_REF
-WBOXFIX UNCORE_CLOCKTICKS
+WBOXFIX UNCORE_CLOCK
 MBOX0C0 FVC_EV0_BBOX_CMDS_READS
 MBOX0C1 DRAM_CMD_CAS_WR_OPN
 MBOX1C0 FVC_EV0_BBOX_CMDS_READS
@@ -26,12 +26,13 @@ Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX1C0+MBOX0C1+MBOX1C1)*64
 
 LONG
 Formulas:
-Memory read bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC0))*64.0/time
-Memory read data volume [GBytes] = 1.0E-09*(SUM(MBOXxC0))*64.0
-Memory write bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC1))*64.0/time
-Memory write data volume [GBytes] = 1.0E-09*(SUM(MBOXxC1))*64.0
-Memory bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0/time
-Memory data volume [GBytes] = 1.0E-09*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0
+Uncore Clock [MHz] = 1.E-06*(UNCORE_CLOCK)/time
+Memory read bandwidth [MBytes/s] = 1.0E-06*(SUM(FVC_EV0_BBOX_CMDS_READS))*64.0/time
+Memory read data volume [GBytes] = 1.0E-09*(SUM(FVC_EV0_BBOX_CMDS_READS))*64.0
+Memory write bandwidth [MBytes/s] = 1.0E-06*(SUM(MDRAM_CMD_CAS_WR_OPN))*64.0/time
+Memory write data volume [GBytes] = 1.0E-09*(SUM(DRAM_CMD_CAS_WR_OPN))*64.0
+Memory bandwidth [MBytes/s] = 1.0E-06*(SUM(FVC_EV0_BBOX_CMDS_READS)+SUM(DRAM_CMD_CAS_WR_OPN))*64.0/time
+Memory data volume [GBytes] = 1.0E-09*(SUM(FVC_EV0_BBOX_CMDS_READS)+SUM(DRAM_CMD_CAS_WR_OPN))*64.0
 -
 Profiling group to measure memory bandwidth drawn by all cores of a socket.
 On Nehalem EX it is not possible to measure the write operations with the
diff --git a/groups/pentiumm/BRANCH.txt b/groups/pentiumm/BRANCH.txt
index 157c331..269a500 100644
--- a/groups/pentiumm/BRANCH.txt
+++ b/groups/pentiumm/BRANCH.txt
@@ -2,7 +2,7 @@ SHORT Branch prediction miss rate/ratio
 
 EVENTSET
 PMC0  BR_INST_EXEC
-PMC1  BR_INST_MISSP_EXEC
+PMC1  BR_MISSP_EXEC
 
 METRICS
 Runtime (RDTSC) [s] time
@@ -10,7 +10,7 @@ Branch misprediction ratio  PMC1/PMC0
 
 LONG
 Formulas:
-Branch misprediction ratio = BR_INST_MISSP_EXEC / BR_INST_EXEC
+Branch misprediction ratio = BR_MISSP_EXEC / BR_INST_EXEC
 -
 The rates state how often on average a branch or a mispredicted branch occurred
 per instruction retired in total. The branch misprediction ratio sets directly
diff --git a/groups/pentiumm/FLOPS_DP.txt b/groups/pentiumm/FLOPS_DP.txt
index 976c44c..6e8568a 100644
--- a/groups/pentiumm/FLOPS_DP.txt
+++ b/groups/pentiumm/FLOPS_DP.txt
@@ -6,13 +6,13 @@ PMC1 EMON_SSE_SSE2_COMP_INST_RETIRED_SCALAR_DP
 
 METRICS
 Runtime (RDTSC) [s] time
-MFLOP/s  1.0E-06*(PMC0*2.0+PMC1)/time
+DP MFLOP/s  1.0E-06*(PMC0*2.0+PMC1)/time
 Packed MUOPS/s   1.0E-06*(PMC0)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s =  (EMON_SSE_SSE2_COMP_INST_RETIRED_PACKED_DP*2 + EMON_SSE_SSE2_COMP_INST_RETIRED_SCALAR_DP )/ runtime
+DP MFLOP/s =  (EMON_SSE_SSE2_COMP_INST_RETIRED_PACKED_DP*2 + EMON_SSE_SSE2_COMP_INST_RETIRED_SCALAR_DP )/ runtime
 Packed MUOPS/s = 1.0E-06*(EMON_SSE_SSE2_COMP_INST_RETIRED_PACKED_DP)/time
 Scalar MUOPS/s = 1.0E-06*EMON_SSE_SSE2_COMP_INST_RETIRED_SCALAR_DP/time
 -
diff --git a/groups/pentiumm/FLOPS_SP.txt b/groups/pentiumm/FLOPS_SP.txt
index 83b73f2..f3ae15a 100644
--- a/groups/pentiumm/FLOPS_SP.txt
+++ b/groups/pentiumm/FLOPS_SP.txt
@@ -6,12 +6,12 @@ PMC1 EMON_SSE_SSE2_COMP_INST_RETIRED_SCALAR_SP
 
 METRICS
 Runtime (RDTSC) [s] time
-MFLOP/s  1.0E-06*(PMC0)/time
+SP MFLOP/s  1.0E-06*(PMC0)/time
 Scalar MUOPS/s 1.0E-06*(PMC1)/time
 
 LONG
 Formula:
-MFLOP/s =  (EMON_SSE_SSE2_COMP_INST_RETIRED_ALL_SP)/ runtime
+SP MFLOP/s =  (EMON_SSE_SSE2_COMP_INST_RETIRED_ALL_SP)/ runtime
 Scalar MUOPS/s =  (EMON_SSE_SSE2_COMP_INST_RETIRED_SCALAR_SP)/ runtime
 -
 SSE scalar and packed single precision FLOP rates.
diff --git a/groups/sandybridge/CLOCK.txt b/groups/sandybridge/CLOCK.txt
index 7a5e87d..68ed016 100644
--- a/groups/sandybridge/CLOCK.txt
+++ b/groups/sandybridge/CLOCK.txt
@@ -6,11 +6,13 @@ FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PWR0  PWR_PKG_ENERGY
 PWR3  PWR_DRAM_ENERGY
+UBOXFIX UNCORE_CLOCK
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+Uncore Clock [MHz] 1.E-06*UBOXFIX/time
 CPI  FIXC1/FIXC0
 Energy [J]  PWR0
 Power [W] PWR0/time
@@ -21,6 +23,7 @@ LONG
 Formula:
 Power =  PWR_PKG_ENERGY / time
 Power DRAM =  PWR_DRAM_ENERGY / time
+Uncore Clock [MHz] = 1.E-06 * UNCORE_CLOCK / time
 -
 SandyBridge implements the new RAPL interface. This interface enables to
 monitor the consumed energy on the package (socket) and DRAM level.
diff --git a/groups/sandybridge/CYCLE_ACTIVITY.txt b/groups/sandybridge/CYCLE_ACTIVITY.txt
new file mode 100644
index 0000000..76c05d4
--- /dev/null
+++ b/groups/sandybridge/CYCLE_ACTIVITY.txt
@@ -0,0 +1,29 @@
+SHORT Cycle Activities
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC1 CYCLE_ACTIVITY_STALLS_L2_PENDING
+PMC2 CYCLE_ACTIVITY_STALLS_L1D_PENDING
+PMC3 CYCLE_ACTIVITY_CYCLES_NO_DISPATCH
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Cycles without execution [%] PMC3/FIXC1*100
+Cycles with stalls due to L1D [%] PMC2/FIXC1*100
+Cycles with stalls due to L2 [%] PMC1/FIXC1*100
+
+LONG
+Formulas:
+Cycles without execution [%] = 100.0 * CYCLE_ACTIVITY_CYCLES_NO_DISPATCH/CPU_CLK_UNHALTED_CORE
+Cycles with stalls due to L1D [%] = 100.0 * CYCLE_ACTIVITY_STALLS_L1D_PENDING/CPU_CLK_UNHALTED_CORE
+Cycles with stalls due to L2 [%] = 100.0 * CYCLE_ACTIVITY_STALLS_L2_PENDING/CPU_CLK_UNHALTED_CORE
+--
+This performance group measures the execution stalls due to load misses in
+the L1D and L2 cache. The group measures only the cycles where the CPU cannot
+execute any instruction, it does not measure the duration of stalls in the cache
+layers.
diff --git a/groups/sandybridge/FLOPS_DP.txt b/groups/sandybridge/FLOPS_DP.txt
index 244e5ce..1ac2d97 100644
--- a/groups/sandybridge/FLOPS_DP.txt
+++ b/groups/sandybridge/FLOPS_DP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*4.0)/time
+DP MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
+AVX DP MFLOP/s  1.0E-06*(PMC2*4.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_DOUBLE*4)/runtime
-AVX MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_DOUBLE*4)/runtime
+DP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_DOUBLE*4)/runtime
+AVX DP MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_DOUBLE*4)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED_DOUBLE+SIMD_FP_256_PACKED_DOUBLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR_DOUBLE/runtime
 -
diff --git a/groups/sandybridge/FLOPS_SP.txt b/groups/sandybridge/FLOPS_SP.txt
index 8cd8de2..0088847 100644
--- a/groups/sandybridge/FLOPS_SP.txt
+++ b/groups/sandybridge/FLOPS_SP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*8.0)/time
+SP MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
+AVX SP MFLOP/s  1.0E-06*(PMC2*8.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_SINGLE*8)/runtime
-AVX MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_SINGLE*8)/runtime
+SP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_SINGLE*8)/runtime
+AVX SP MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_SINGLE*8)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED_SINGLE+SIMD_FP_256_PACKED_SINGLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR_SINGLE/runtime
 -
diff --git a/groups/sandybridge/L3CACHE.txt b/groups/sandybridge/L3CACHE.txt
index c1cd039..d15e6e3 100644
--- a/groups/sandybridge/L3CACHE.txt
+++ b/groups/sandybridge/L3CACHE.txt
@@ -4,30 +4,31 @@ EVENTSET
 FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
-PMC0  MEM_LOAD_UOPS_RETIRED_L3_ALL
-PMC1  MEM_LOAD_UOPS_RETIRED_L3_MISS
-PMC2  UOPS_RETIRED_ALL
+PMC0:MATCH0=0x0081:MATCH1=0x3fffc0 OFFCORE_RESPONSE_0_OPTIONS
+PMC1:MATCH0=0x0081:MATCH1=0x1 OFFCORE_RESPONSE_1_OPTIONS
+PMC2 L1D_REPLACEMENT
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-L3 request rate PMC0/PMC2
-L3 miss rate PMC1/PMC2
-L3 miss ratio PMC1/PMC0
+L3 request rate PMC1:MATCH0=0x0081:MATCH1=0x1/FIXC0
+L3 miss rate PMC0:MATCH0=0x0081:MATCH1=0x3fffc0/FIXC0
+L3 miss ratio PMC0:MATCH0=0x0081:MATCH1=0x3fffc0/PMC1:MATCH0=0081:MATCH1=0x1
 
 LONG
 Formulas:
-L3 request rate = MEM_LOAD_UOPS_RETIRED_L3_ALL/UOPS_RETIRED_ALL
-L3 miss rate = MEM_LOAD_UOPS_RETIRED_L3_MISS/UOPS_RETIRED_ALL
-L3 miss ratio = MEM_LOAD_UOPS_RETIRED_L3_MISS/MEM_LOAD_UOPS_RETIRED_L3_ALL
+L3 request rate = OFFCORE_RESPONSE_1_OPTIONS:MATCH0=0x0081:MATCH1=0x1/INSTR_RETIRED_ANY
+L3 miss rate = OFFCORE_RESPONSE_0_OPTIONS:MATCH0=0x0081:MATCH1=0x3fffc0/INSTR_RETIRED_ANY
+L3 miss ratio = OFFCORE_RESPONSE_0_OPTIONS:MATCH0=0x0081:MATCH1=0x3fffc0/OFFCORE_RESPONSE_1_OPTIONS:MATCH0=0081:MATCH1=0x1
 -
 This group measures the locality of your data accesses with regard to the
 L3 cache. L3 request rate tells you how data intensive your code is
 or how many data accesses you have on average per instruction.
 The L3 miss rate gives a measure how often it was necessary to get
-cache lines from memory. And finally L3 miss ratio tells you how many of your
+cache lines from L3 compared to all loaded cache lines in L1.
+And finally L3 miss ratio tells you how many of your
 memory references required a cache line to be loaded from a higher level.
 While the data cache miss rate might be given by your algorithm you should
 try to get data cache miss ratio as low as possible by increasing your cache reuse.
diff --git a/groups/sandybridge/PORT_USAGE.txt b/groups/sandybridge/PORT_USAGE.txt
new file mode 100644
index 0000000..68d6630
--- /dev/null
+++ b/groups/sandybridge/PORT_USAGE.txt
@@ -0,0 +1,40 @@
+SHORT  Execution port utilization
+
+REQUIRE_NOHT
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  UOPS_DISPATCHED_PORT_PORT_0
+PMC1  UOPS_DISPATCHED_PORT_PORT_1
+PMC2  UOPS_DISPATCHED_PORT_PORT_2
+PMC3  UOPS_DISPATCHED_PORT_PORT_3
+PMC4  UOPS_DISPATCHED_PORT_PORT_4
+PMC5  UOPS_DISPATCHED_PORT_PORT_5
+
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Port0 usage ratio PMC0/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port1 usage ratio PMC1/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port2 usage ratio PMC2/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port3 usage ratio PMC3/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port4 usage ratio PMC4/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port5 usage ratio PMC5/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+
+LONG
+Formulas:
+Port0 usage ratio UOPS_DISPATCHED_PORT_PORT_0/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port1 usage ratio UOPS_DISPATCHED_PORT_PORT_1/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port2 usage ratio UOPS_DISPATCHED_PORT_PORT_2/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port3 usage ratio UOPS_DISPATCHED_PORT_PORT_3/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port4 usage ratio UOPS_DISPATCHED_PORT_PORT_4/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port5 usage ratio UOPS_DISPATCHED_PORT_PORT_5/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+-
+This group measures the execution port utilization in a CPU core. The group can
+only be measured when HyperThreading is disabled because only then each CPU core
+can program eight counters.
diff --git a/groups/sandybridge/UOPS.txt b/groups/sandybridge/UOPS.txt
index 178aec5..8f697ff 100644
--- a/groups/sandybridge/UOPS.txt
+++ b/groups/sandybridge/UOPS.txt
@@ -7,7 +7,6 @@ FIXC2 CPU_CLK_UNHALTED_REF
 PMC0  UOPS_ISSUED_ANY
 PMC1  UOPS_EXECUTED_THREAD
 PMC2  UOPS_RETIRED_ALL
-PMC3  UOPS_ISSUED_FLAGS_MERGE
 
 
 
@@ -17,14 +16,12 @@ Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
 Issued UOPs PMC0
-Merged UOPs PMC3
 Executed UOPs PMC1
 Retired UOPs PMC2
 
 LONG
 Formula:
 Issued UOPs = UOPS_ISSUED_ANY
-Merged UOPs = UOPS_ISSUED_FLAGS_MERGE
 Executed UOPs = UOPS_EXECUTED_THREAD
 Retired UOPs = UOPS_RETIRED_ALL
 -
diff --git a/groups/sandybridgeEP/CACHES.txt b/groups/sandybridgeEP/CACHES.txt
index 889cca8..60ecce5 100644
--- a/groups/sandybridgeEP/CACHES.txt
+++ b/groups/sandybridgeEP/CACHES.txt
@@ -47,7 +47,7 @@ L1 to/from L2 data volume [GBytes] 1.0E-09*(PMC0+PMC1)*64.0
 L3 to L2 load bandwidth [MBytes/s]  1.0E-06*PMC2*64.0/time
 L3 to L2 load data volume [GBytes]  1.0E-09*PMC2*64.0
 L2 to L3 evict bandwidth [MBytes/s]  1.0E-06*PMC3*64.0/time
-L2 to L3 evict data volume [GBytes]  1.0E-06*PMC3*64.0
+L2 to L3 evict data volume [GBytes]  1.0E-09*PMC3*64.0
 L2 to/from L3 bandwidth [MBytes/s] 1.0E-06*(PMC2+PMC3)*64.0/time
 L2 to/from L3 data volume [GBytes] 1.0E-09*(PMC2+PMC3)*64.0
 System to L3 bandwidth [MBytes/s] 1.0E-06*(CBOX0C0:STATE=0x3F+CBOX1C0:STATE=0x3F+CBOX2C0:STATE=0x3F+CBOX3C0:STATE=0x3F+CBOX4C0:STATE=0x3F+CBOX5C0:STATE=0x3F+CBOX6C0:STATE=0x3F+CBOX7C0:STATE=0x3F)*64.0/time
diff --git a/groups/sandybridgeEP/CLOCK.txt b/groups/sandybridgeEP/CLOCK.txt
index 7a5e87d..68ed016 100644
--- a/groups/sandybridgeEP/CLOCK.txt
+++ b/groups/sandybridgeEP/CLOCK.txt
@@ -6,11 +6,13 @@ FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PWR0  PWR_PKG_ENERGY
 PWR3  PWR_DRAM_ENERGY
+UBOXFIX UNCORE_CLOCK
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+Uncore Clock [MHz] 1.E-06*UBOXFIX/time
 CPI  FIXC1/FIXC0
 Energy [J]  PWR0
 Power [W] PWR0/time
@@ -21,6 +23,7 @@ LONG
 Formula:
 Power =  PWR_PKG_ENERGY / time
 Power DRAM =  PWR_DRAM_ENERGY / time
+Uncore Clock [MHz] = 1.E-06 * UNCORE_CLOCK / time
 -
 SandyBridge implements the new RAPL interface. This interface enables to
 monitor the consumed energy on the package (socket) and DRAM level.
diff --git a/groups/sandybridgeEP/CYCLE_ACTIVITY.txt b/groups/sandybridgeEP/CYCLE_ACTIVITY.txt
new file mode 100644
index 0000000..76c05d4
--- /dev/null
+++ b/groups/sandybridgeEP/CYCLE_ACTIVITY.txt
@@ -0,0 +1,29 @@
+SHORT Cycle Activities
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC1 CYCLE_ACTIVITY_STALLS_L2_PENDING
+PMC2 CYCLE_ACTIVITY_STALLS_L1D_PENDING
+PMC3 CYCLE_ACTIVITY_CYCLES_NO_DISPATCH
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Cycles without execution [%] PMC3/FIXC1*100
+Cycles with stalls due to L1D [%] PMC2/FIXC1*100
+Cycles with stalls due to L2 [%] PMC1/FIXC1*100
+
+LONG
+Formulas:
+Cycles without execution [%] = 100.0 * CYCLE_ACTIVITY_CYCLES_NO_DISPATCH/CPU_CLK_UNHALTED_CORE
+Cycles with stalls due to L1D [%] = 100.0 * CYCLE_ACTIVITY_STALLS_L1D_PENDING/CPU_CLK_UNHALTED_CORE
+Cycles with stalls due to L2 [%] = 100.0 * CYCLE_ACTIVITY_STALLS_L2_PENDING/CPU_CLK_UNHALTED_CORE
+--
+This performance group measures the execution stalls due to load misses in
+the L1D and L2 cache. The group measures only the cycles where the CPU cannot
+execute any instruction, it does not measure the duration of stalls in the cache
+layers.
diff --git a/groups/sandybridgeEP/FLOPS_DP.txt b/groups/sandybridgeEP/FLOPS_DP.txt
index 244e5ce..1ac2d97 100644
--- a/groups/sandybridgeEP/FLOPS_DP.txt
+++ b/groups/sandybridgeEP/FLOPS_DP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*4.0)/time
+DP MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
+AVX DP MFLOP/s  1.0E-06*(PMC2*4.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_DOUBLE*4)/runtime
-AVX MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_DOUBLE*4)/runtime
+DP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_DOUBLE*4)/runtime
+AVX DP MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_DOUBLE*4)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED_DOUBLE+SIMD_FP_256_PACKED_DOUBLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR_DOUBLE/runtime
 -
diff --git a/groups/sandybridgeEP/FLOPS_SP.txt b/groups/sandybridgeEP/FLOPS_SP.txt
index 8cd8de2..0088847 100644
--- a/groups/sandybridgeEP/FLOPS_SP.txt
+++ b/groups/sandybridgeEP/FLOPS_SP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*8.0)/time
+SP MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
+AVX SP MFLOP/s  1.0E-06*(PMC2*8.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_SINGLE*8)/runtime
-AVX MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_SINGLE*8)/runtime
+SP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_SINGLE*8)/runtime
+AVX SP MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_SINGLE*8)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED_SINGLE+SIMD_FP_256_PACKED_SINGLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR_SINGLE/runtime
 -
diff --git a/groups/sandybridgeEP/L3CACHE.txt b/groups/sandybridgeEP/L3CACHE.txt
index 28766be..d15e6e3 100644
--- a/groups/sandybridgeEP/L3CACHE.txt
+++ b/groups/sandybridgeEP/L3CACHE.txt
@@ -4,32 +4,32 @@ EVENTSET
 FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
-PMC0  MEM_LOAD_UOPS_RETIRED_L3_ALL
-PMC1  MEM_LOAD_UOPS_RETIRED_L3_MISS
-PMC2  UOPS_RETIRED_ALL
+PMC0:MATCH0=0x0081:MATCH1=0x3fffc0 OFFCORE_RESPONSE_0_OPTIONS
+PMC1:MATCH0=0x0081:MATCH1=0x1 OFFCORE_RESPONSE_1_OPTIONS
+PMC2 L1D_REPLACEMENT
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-L3 request rate PMC0/PMC2
-L3 miss rate PMC1/PMC2
-L3 miss ratio PMC1/PMC0
+L3 request rate PMC1:MATCH0=0x0081:MATCH1=0x1/FIXC0
+L3 miss rate PMC0:MATCH0=0x0081:MATCH1=0x3fffc0/FIXC0
+L3 miss ratio PMC0:MATCH0=0x0081:MATCH1=0x3fffc0/PMC1:MATCH0=0081:MATCH1=0x1
 
 LONG
 Formulas:
-L3 request rate = MEM_LOAD_UOPS_RETIRED_L3_ALL/UOPS_RETIRED_ALL
-L3 miss rate = MEM_LOAD_UOPS_RETIRED_L3_MISS/UOPS_RETIRED_ALL
-L3 miss ratio = MEM_LOAD_UOPS_RETIRED_L3_MISS/MEM_LOAD_UOPS_RETIRED_L3_ALL
+L3 request rate = OFFCORE_RESPONSE_1_OPTIONS:MATCH0=0x0081:MATCH1=0x1/INSTR_RETIRED_ANY
+L3 miss rate = OFFCORE_RESPONSE_0_OPTIONS:MATCH0=0x0081:MATCH1=0x3fffc0/INSTR_RETIRED_ANY
+L3 miss ratio = OFFCORE_RESPONSE_0_OPTIONS:MATCH0=0x0081:MATCH1=0x3fffc0/OFFCORE_RESPONSE_1_OPTIONS:MATCH0=0081:MATCH1=0x1
 -
 This group measures the locality of your data accesses with regard to the
-L3 cache. The L3 request rate tells you how data intensive your code is
+L3 cache. L3 request rate tells you how data intensive your code is
 or how many data accesses you have on average per instruction.
 The L3 miss rate gives a measure how often it was necessary to get
-cache lines from memory. And finally L3 miss ratio tells you how many of your
-memory references required a cache line to be loaded from a higher level as they were not
-stored in the L3 cache.
+cache lines from L3 compared to all loaded cache lines in L1.
+And finally L3 miss ratio tells you how many of your
+memory references required a cache line to be loaded from a higher level.
 While the data cache miss rate might be given by your algorithm you should
 try to get data cache miss ratio as low as possible by increasing your cache reuse.
 
diff --git a/groups/sandybridgeEP/MEM_DP.txt b/groups/sandybridgeEP/MEM_DP.txt
index 0193575..0406226 100644
--- a/groups/sandybridgeEP/MEM_DP.txt
+++ b/groups/sandybridgeEP/MEM_DP.txt
@@ -37,9 +37,12 @@ Memory write bandwidth [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0
 Memory write data volume [GBytes] 1.0E-09*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0
 Memory bandwidth [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0/time
 Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0
+Operational intensity (PMC0*2.0+PMC1+PMC2*4.0)/((MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0)
 
 LONG
 Formula:
+Power [W] = PWR_PKG_ENERGY/runtime
+Power DRAM [W] = PWR_DRAM_ENERGY/runtime
 MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_DOUBLE*4)/runtime
 AVX MFLOP/s = 1.0E-06*(SIMD_FP_256_PACKED_DOUBLE*4)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED_DOUBLE+SIMD_FP_256_PACKED_DOUBLE)/runtime
@@ -50,6 +53,7 @@ Memory write bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC1))*64.0/time
 Memory write data volume [GBytes] = 1.0E-09*(SUM(MBOXxC1))*64.0
 Memory bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0/time
 Memory data volume [GBytes] = 1.0E-09*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0
+Operational intensity = (FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_DOUBLE*4)/((SUM(MBOXxC0)+SUM(MBOXxC1))*64.0)
 --
 Profiling group to measure memory bandwidth drawn by all cores of a socket.
 Since this group is based on Uncore events it is only possible to measure on
@@ -57,3 +61,6 @@ a per socket base. Also outputs total data volume transferred from main memory.
 SSE scalar and packed double precision FLOP rates. Also reports on packed AVX
 32b instructions.  Please note that the current FLOP measurements on SandyBridge
 are potentially wrong. So you cannot trust these counters at the moment!
+The operational intensity is calculated using the FP values of the cores and the
+memory data volume of the whole socket. The actual operational intensity for
+multiple CPUs can be found in the statistics table in the Sum column.
diff --git a/groups/sandybridgeEP/MEM_SP.txt b/groups/sandybridgeEP/MEM_SP.txt
index 9e651fa..f78f56a 100644
--- a/groups/sandybridgeEP/MEM_SP.txt
+++ b/groups/sandybridgeEP/MEM_SP.txt
@@ -37,6 +37,7 @@ Memory write bandwidth [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0
 Memory write data volume [GBytes] 1.0E-09*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0
 Memory bandwidth [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0/time
 Memory data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0
+Operational intensity (PMC0*4.0+PMC1+PMC2*8.0)/((MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0)
 
 LONG
 Formula:
@@ -52,6 +53,7 @@ Memory write bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC1))*64.0/time
 Memory write data volume [GBytes] = 1.0E-09*(SUM(MBOXxC1))*64.0
 Memory bandwidth [MBytes/s] = 1.0E-06*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0/time
 Memory data volume [GBytes] = 1.0E-09*(SUM(MBOXxC0)+SUM(MBOXxC1))*64.0
+Operational intensity = (FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR+SIMD_FP_256_PACKED_SINGLE*8)/((SUM(MBOXxC0)+SUM(MBOXxC1))*64.0)
 --
 Profiling group to measure memory bandwidth drawn by all cores of a socket.
 Since this group is based on Uncore events it is only possible to measure on
@@ -59,3 +61,6 @@ a per socket base. Also outputs total data volume transferred from main memory.
 SSE scalar and packed single precision FLOP rates. Also reports on packed AVX
 32b instructions. Please note that the current FLOP measurements on SandyBridge
 are potentially wrong. So you cannot trust these counters at the moment!
+The operational intensity is calculated using the FP values of the cores and the
+memory data volume of the whole socket. The actual operational intensity for
+multiple CPUs can be found in the statistics table in the Sum column.
diff --git a/groups/sandybridgeEP/PORT_USAGE.txt b/groups/sandybridgeEP/PORT_USAGE.txt
new file mode 100644
index 0000000..68d6630
--- /dev/null
+++ b/groups/sandybridgeEP/PORT_USAGE.txt
@@ -0,0 +1,40 @@
+SHORT  Execution port utilization
+
+REQUIRE_NOHT
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  UOPS_DISPATCHED_PORT_PORT_0
+PMC1  UOPS_DISPATCHED_PORT_PORT_1
+PMC2  UOPS_DISPATCHED_PORT_PORT_2
+PMC3  UOPS_DISPATCHED_PORT_PORT_3
+PMC4  UOPS_DISPATCHED_PORT_PORT_4
+PMC5  UOPS_DISPATCHED_PORT_PORT_5
+
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Port0 usage ratio PMC0/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port1 usage ratio PMC1/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port2 usage ratio PMC2/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port3 usage ratio PMC3/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port4 usage ratio PMC4/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+Port5 usage ratio PMC5/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5)
+
+LONG
+Formulas:
+Port0 usage ratio UOPS_DISPATCHED_PORT_PORT_0/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port1 usage ratio UOPS_DISPATCHED_PORT_PORT_1/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port2 usage ratio UOPS_DISPATCHED_PORT_PORT_2/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port3 usage ratio UOPS_DISPATCHED_PORT_PORT_3/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port4 usage ratio UOPS_DISPATCHED_PORT_PORT_4/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port5 usage ratio UOPS_DISPATCHED_PORT_PORT_5/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+-
+This group measures the execution port utilization in a CPU core. The group can
+only be measured when HyperThreading is disabled because only then each CPU core
+can program eight counters.
diff --git a/groups/sandybridgeEP/UOPS.txt b/groups/sandybridgeEP/UOPS.txt
index 178aec5..8f697ff 100644
--- a/groups/sandybridgeEP/UOPS.txt
+++ b/groups/sandybridgeEP/UOPS.txt
@@ -7,7 +7,6 @@ FIXC2 CPU_CLK_UNHALTED_REF
 PMC0  UOPS_ISSUED_ANY
 PMC1  UOPS_EXECUTED_THREAD
 PMC2  UOPS_RETIRED_ALL
-PMC3  UOPS_ISSUED_FLAGS_MERGE
 
 
 
@@ -17,14 +16,12 @@ Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
 Issued UOPs PMC0
-Merged UOPs PMC3
 Executed UOPs PMC1
 Retired UOPs PMC2
 
 LONG
 Formula:
 Issued UOPs = UOPS_ISSUED_ANY
-Merged UOPs = UOPS_ISSUED_FLAGS_MERGE
 Executed UOPs = UOPS_EXECUTED_THREAD
 Retired UOPs = UOPS_RETIRED_ALL
 -
diff --git a/groups/silvermont/MEM_LAT.txt b/groups/silvermont/MEM_LAT.txt
deleted file mode 100644
index 516b135..0000000
--- a/groups/silvermont/MEM_LAT.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-SHORT Average data read latency
-
-EVENTSET
-FIXC0 INSTR_RETIRED_ANY
-FIXC1 CPU_CLK_UNHALTED_CORE
-FIXC2 CPU_CLK_UNHALTED_REF
-PMC0  OFFCORE_RESPONSE_0_DMND_DATA_RD_AVG_LAT
-PMC1  OFFCORE_RESPONSE_1_DMND_DATA_RD_ANY
-
-METRICS
-Runtime (RDTSC) [s] time
-Runtime unhalted [s] FIXC1*inverseClock
-Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
-CPI  FIXC1/FIXC0
-Average data read latency [cyc/read] PMC0/PMC1
-
-LONG
-Formulas:
-Average data read latency [cyc/read] = OFFCORE_RESPONSE_0_DMND_DATA_RD_AVG_LAT/OFFCORE_RESPONSE_1_DMND_DATA_RD_ANY
--
-The Offcore request facility of Intel Silvermont processors can be used to determine
-the average data read latency. It includes all operations done to read data like
-snoops and hits in upper cache levels.
diff --git a/groups/skylake/CLOCK.txt b/groups/skylake/CLOCK.txt
index 79a4480..591451d 100644
--- a/groups/skylake/CLOCK.txt
+++ b/groups/skylake/CLOCK.txt
@@ -6,11 +6,13 @@ FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PWR0  PWR_PKG_ENERGY
 PWR3  PWR_DRAM_ENERGY
+UBOXFIX UNCORE_CLOCK
 
 METRICS
 Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+Uncore Clock [MHz] 1.E-06*UBOXFIX/time
 CPI  FIXC1/FIXC0
 Energy [J]  PWR0
 Power [W] PWR0/time
@@ -21,6 +23,7 @@ LONG
 Formula:
 Power =  PWR_PKG_ENERGY / time
 Power DRAM =  PWR_DRAM_ENERGY / time
+Uncore Clock [MHz] = 1.E-06 * UNCORE_CLOCK / time
 -
 Skylake implements the RAPL interface. This interface enables to
 monitor the consumed energy on the package (socket) and DRAM level.
diff --git a/groups/skylake/CYCLE_ACTIVITY.txt b/groups/skylake/CYCLE_ACTIVITY.txt
new file mode 100644
index 0000000..820a10c
--- /dev/null
+++ b/groups/skylake/CYCLE_ACTIVITY.txt
@@ -0,0 +1,29 @@
+SHORT Cycle Activities
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0 CYCLE_ACTIVITY_STALLS_L2_PENDING
+PMC1 CYCLE_ACTIVITY_STALLS_LDM_PENDING
+PMC2 CYCLE_ACTIVITY_STALLS_L1D_PENDING
+PMC3 CYCLE_ACTIVITY_CYCLES_NO_EXECUTE
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Cycles without execution [%] PMC3/FIXC1*100
+Cycles without execution due to L1D [%] PMC2/FIXC1*100
+Cycles without execution due to L2 [%] PMC0/FIXC1*100
+Cycles without execution due to memory [%] PMC1/FIXC1*100
+
+LONG
+Cycles without execution [%] = CYCLE_ACTIVITY_CYCLES_NO_EXECUTE/CPU_CLK_UNHALTED_CORE*100
+Cycles with stalls due to L1D [%] = CYCLE_ACTIVITY_CYCLES_L1D_PENDING/CPU_CLK_UNHALTED_CORE*100
+Cycles with stalls due to L2 [%] = CYCLE_ACTIVITY_CYCLES_L2_PENDING/CPU_CLK_UNHALTED_CORE*100
+Cycles without execution due to memory [%] = CYCLE_ACTIVITY_STALLS_LDM_PENDING/CPU_CLK_UNHALTED_CORE*100
+--
+This performance group measures the stalls caused by data traffic in the cache
+hierarchy.
diff --git a/groups/skylake/FLOPS_DP.txt b/groups/skylake/FLOPS_DP.txt
index c99d2c1..cb6c227 100644
--- a/groups/skylake/FLOPS_DP.txt
+++ b/groups/skylake/FLOPS_DP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*4.0)/time
+DP MFLOP/s  1.0E-06*(PMC0*2.0+PMC1+PMC2*4.0)/time
+AVX DP MFLOP/s  1.0E-06*(PMC2*4.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE*2+FP_ARITH_INST_RETIRED_SCALAR_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
-AVX MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
+DP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE*2+FP_ARITH_INST_RETIRED_SCALAR_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
+AVX DP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE*4)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE+FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_ARITH_INST_RETIRED_SCALAR_DOUBLE/runtime
 -
diff --git a/groups/skylake/FLOPS_SP.txt b/groups/skylake/FLOPS_SP.txt
index a273e84..1064a82 100644
--- a/groups/skylake/FLOPS_SP.txt
+++ b/groups/skylake/FLOPS_SP.txt
@@ -13,15 +13,15 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
-AVX MFLOP/s  1.0E-06*(PMC2*8.0)/time
+SP MFLOP/s  1.0E-06*(PMC0*4.0+PMC1+PMC2*8.0)/time
+AVX SP MFLOP/s  1.0E-06*(PMC2*8.0)/time
 Packed MUOPS/s   1.0E-06*(PMC0+PMC2)/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_SINGLE*4+FP_ARITH_INST_RETIRED_SCALAR_SINGLE+FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
-AVX MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
+SP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_SINGLE*4+FP_ARITH_INST_RETIRED_SCALAR_SINGLE+FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
+AVX SP MFLOP/s = 1.0E-06*(FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE*8)/runtime
 Packed MUOPS/s = 1.0E-06*(FP_ARITH_INST_RETIRED_128B_PACKED_SINGLE+FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE)/runtime
 Scalar MUOPS/s = 1.0E-06*FP_ARITH_INST_RETIRED_SCALAR_SINGLE/runtime
 -
diff --git a/groups/skylake/PORT_USAGE.txt b/groups/skylake/PORT_USAGE.txt
new file mode 100644
index 0000000..1cfe431
--- /dev/null
+++ b/groups/skylake/PORT_USAGE.txt
@@ -0,0 +1,46 @@
+SHORT  Execution port utilization
+
+REQUIRE_NOHT
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+PMC0  UOPS_DISPATCHED_PORT_PORT_0
+PMC1  UOPS_DISPATCHED_PORT_PORT_1
+PMC2  UOPS_DISPATCHED_PORT_PORT_2
+PMC3  UOPS_DISPATCHED_PORT_PORT_3
+PMC4  UOPS_DISPATCHED_PORT_PORT_4
+PMC5  UOPS_DISPATCHED_PORT_PORT_5
+PMC6  UOPS_DISPATCHED_PORT_PORT_6
+PMC7  UOPS_DISPATCHED_PORT_PORT_7
+
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Port0 usage ratio PMC0/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port1 usage ratio PMC1/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port2 usage ratio PMC2/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port3 usage ratio PMC3/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port4 usage ratio PMC4/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port5 usage ratio PMC5/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port6 usage ratio PMC6/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+Port7 usage ratio PMC7/(PMC0+PMC1+PMC2+PMC3+PMC4+PMC5+PMC6+PMC7)
+
+LONG
+Formulas:
+Port0 usage ratio UOPS_DISPATCHED_PORT_PORT_0/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port1 usage ratio UOPS_DISPATCHED_PORT_PORT_1/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port2 usage ratio UOPS_DISPATCHED_PORT_PORT_2/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port3 usage ratio UOPS_DISPATCHED_PORT_PORT_3/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port4 usage ratio UOPS_DISPATCHED_PORT_PORT_4/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port5 usage ratio UOPS_DISPATCHED_PORT_PORT_5/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port6 usage ratio UOPS_DISPATCHED_PORT_PORT_6/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+Port7 usage ratio UOPS_DISPATCHED_PORT_PORT_7/SUM(UOPS_DISPATCHED_PORT_PORT_*)
+-
+This group measures the execution port utilization in a CPU core. The group can
+only be measured when HyperThreading is disabled because only then each CPU core
+can program eight counters.
diff --git a/groups/westmere/FLOPS_DP.txt b/groups/westmere/FLOPS_DP.txt
index 2773f06..3ee6ebc 100644
--- a/groups/westmere/FLOPS_DP.txt
+++ b/groups/westmere/FLOPS_DP.txt
@@ -14,7 +14,7 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*2.0+PMC1)/time
+DP MFLOP/s  1.0E-06*(PMC0*2.0+PMC1)/time
 Packed MUOPS/s   1.0E-06*PMC0/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 SP MUOPS/s 1.0E-06*PMC2/time
@@ -22,13 +22,13 @@ DP MUOPS/s 1.0E-06*PMC3/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
+DP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
 Packed MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_PACKED/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR/runtime
 SP MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_SINGLE_PRECISION/runtime
 DP MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_DOUBLE_PRECISION/runtime
 -
-The Nehalem has no possibility to measure MFLOPs if mixed precision calculations are done.
+Westmere has no possibility to measure MFLOPs if mixed precision calculations are done.
 Therefore both single as well as double precision are measured to ensure the correctness
 of the measurements. You can check if your code was vectorized on the number of
 FP_COMP_OPS_EXE_SSE_FP_PACKED versus the  FP_COMP_OPS_EXE_SSE_FP_SCALAR.
diff --git a/groups/westmere/FLOPS_SP.txt b/groups/westmere/FLOPS_SP.txt
index 8254fd9..2b0f6a3 100644
--- a/groups/westmere/FLOPS_SP.txt
+++ b/groups/westmere/FLOPS_SP.txt
@@ -14,7 +14,7 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s 1.0E-06*(PMC0*4.0+PMC1)/time
+SP MFLOP/s 1.0E-06*(PMC0*4.0+PMC1)/time
 Packed MUOPS/s   1.0E-06*PMC0/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 SP MUOPS/s 1.0E-06*PMC2/time
@@ -22,13 +22,13 @@ DP MUOPS/s 1.0E-06*PMC3/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
+SP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
 Packed MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_PACKED/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR/runtime
 SP MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_SINGLE_PRECISION/runtime
 DP MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_DOUBLE_PRECISION/runtime
 -
-The Nehalem has no possibility to measure MFLOPs if mixed precision calculations are done.
+Westmere has no possibility to measure MFLOPs if mixed precision calculations are done.
 Therefore both single as well as double precision are measured to ensure the correctness
 of the measurements. You can check if your code was vectorized on the number of
 FP_COMP_OPS_EXE_SSE_FP_PACKED versus the  FP_COMP_OPS_EXE_SSE_FP_SCALAR.
diff --git a/groups/westmere/MEM.txt b/groups/westmere/MEM.txt
index 513ec60..b5165e1 100644
--- a/groups/westmere/MEM.txt
+++ b/groups/westmere/MEM.txt
@@ -15,9 +15,9 @@ Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
 Memory read bandwidth [MBytes/s] 1.0E-06*UPMC0*64.0/time
-Memory data volume [GBytes] 1.0E-09*UPMC0*64.0
+Memory read data volume [GBytes] 1.0E-09*UPMC0*64.0
 Memory write bandwidth [MBytes/s] 1.0E-06*UPMC1*64.0/time
-Memory data volume [GBytes] 1.0E-09*UPMC1*64.0
+Memory write data volume [GBytes] 1.0E-09*UPMC1*64.0
 Memory bandwidth [MBytes/s] 1.0E-06*(UPMC0+UPMC1)*64.0/time
 Memory data volume [GBytes] 1.0E-09*(UPMC0+UPMC1)*64.0
 Remote memory read bandwidth [MBytes/s] 1.0E-06*UPMC2*64.0/time
@@ -30,9 +30,9 @@ Remote memory data volume [GBytes] 1.0E-09*(UPMC2+UPMC3)*64.0
 LONG
 Formulas:
 Memory read bandwidth [MBytes/s] = 1.0E-06*UNC_QMC_NORMAL_READS_ANY*64.0/time
-Memory data volume [GBytes] = 1.0E-09*UNC_QMC_NORMAL_READS_ANY*64.0
+Memory read data volume [GBytes] = 1.0E-09*UNC_QMC_NORMAL_READS_ANY*64.0
 Memory write bandwidth [MBytes/s] = 1.0E-06*UNC_QMC_WRITES_FULL_ANY*64.0/time
-Memory data volume [GBytes] = 1.0E-09*UNC_QMC_WRITES_FULL_ANY*64.0
+Memory write data volume [GBytes] = 1.0E-09*UNC_QMC_WRITES_FULL_ANY*64.0
 Memory bandwidth [MBytes/s] = 1.0E-06*(UNC_QMC_NORMAL_READS_ANY+UNC_QMC_WRITES_FULL_ANY)*64.0/time
 Memory data volume [GBytes] = 1.0E-09*(UNC_QMC_NORMAL_READS_ANY+UNC_QMC_WRITES_FULL_ANY)*64.0
 Remote memory read bandwidth [MBytes/s] = 1.0E-06*UNC_QHL_REQUESTS_REMOTE_READS*64.0/time
diff --git a/groups/westmere/UOPS.txt b/groups/westmere/UOPS.txt
index 9d738d0..2567704 100644
--- a/groups/westmere/UOPS.txt
+++ b/groups/westmere/UOPS.txt
@@ -6,7 +6,7 @@ FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PMC0  UOPS_ISSUED_ANY
 PMC1  UOPS_EXECUTED_THREAD
-PMC2  UOPS_RETIRED_ALL
+PMC2  UOPS_RETIRED_ANY
 PMC3  UOPS_ISSUED_FUSED
 
 
@@ -26,7 +26,7 @@ Formula:
 Issued UOPs = UOPS_ISSUED_ANY
 Merged UOPs = UOPS_ISSUED_FUSED
 Executed UOPs = UOPS_EXECUTED_THREAD
-Retired UOPs = UOPS_RETIRED_ALL
+Retired UOPs = UOPS_RETIRED_ANY
 -
 This group returns information about the instruction pipeline. It measures the
 issued, executed and retired uOPs and returns the number of uOPs which were issued
diff --git a/groups/westmereEX/FLOPS_DP.txt b/groups/westmereEX/FLOPS_DP.txt
index 3e75cad..01160e6 100644
--- a/groups/westmereEX/FLOPS_DP.txt
+++ b/groups/westmereEX/FLOPS_DP.txt
@@ -14,7 +14,7 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s  1.0E-06*(PMC0*2.0+PMC1)/time
+DP MFLOP/s  1.0E-06*(PMC0*2.0+PMC1)/time
 Packed MUOPS/s   1.0E-06*PMC0/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 SP MUOPS/s 1.0E-06*PMC2/time
@@ -22,7 +22,7 @@ DP MUOPS/s 1.0E-06*PMC3/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
+DP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*2+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
 Packed MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_PACKED/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR/runtime
 SP MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_SINGLE_PRECISION/runtime
diff --git a/groups/westmereEX/FLOPS_SP.txt b/groups/westmereEX/FLOPS_SP.txt
index 601027b..f8e3922 100644
--- a/groups/westmereEX/FLOPS_SP.txt
+++ b/groups/westmereEX/FLOPS_SP.txt
@@ -14,7 +14,7 @@ Runtime (RDTSC) [s] time
 Runtime unhalted [s] FIXC1*inverseClock
 Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
-MFLOP/s 1.0E-06*(PMC0*4.0+PMC1)/time
+SP MFLOP/s 1.0E-06*(PMC0*4.0+PMC1)/time
 Packed MUOPS/s   1.0E-06*PMC0/time
 Scalar MUOPS/s 1.0E-06*PMC1/time
 SP MUOPS/s 1.0E-06*PMC2/time
@@ -22,7 +22,7 @@ DP MUOPS/s 1.0E-06*PMC3/time
 
 LONG
 Formula:
-MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
+SP MFLOP/s = 1.0E-06*(FP_COMP_OPS_EXE_SSE_FP_PACKED*4+FP_COMP_OPS_EXE_SSE_FP_SCALAR)/runtime
 Packed MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_PACKED/runtime
 Scalar MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_FP_SCALAR/runtime
 SP MUOPS/s = 1.0E-06*FP_COMP_OPS_EXE_SSE_SINGLE_PRECISION/runtime
diff --git a/groups/westmereEX/UOPS.txt b/groups/westmereEX/UOPS.txt
index 9d738d0..59d2e09 100644
--- a/groups/westmereEX/UOPS.txt
+++ b/groups/westmereEX/UOPS.txt
@@ -5,8 +5,7 @@ FIXC0 INSTR_RETIRED_ANY
 FIXC1 CPU_CLK_UNHALTED_CORE
 FIXC2 CPU_CLK_UNHALTED_REF
 PMC0  UOPS_ISSUED_ANY
-PMC1  UOPS_EXECUTED_THREAD
-PMC2  UOPS_RETIRED_ALL
+PMC2  UOPS_RETIRED_ANY
 PMC3  UOPS_ISSUED_FUSED
 
 
@@ -18,15 +17,13 @@ Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
 CPI  FIXC1/FIXC0
 Issued UOPs PMC0
 Merged UOPs PMC3
-Executed UOPs PMC1
 Retired UOPs PMC2
 
 LONG
 Formula:
 Issued UOPs = UOPS_ISSUED_ANY
 Merged UOPs = UOPS_ISSUED_FUSED
-Executed UOPs = UOPS_EXECUTED_THREAD
-Retired UOPs = UOPS_RETIRED_ALL
+Retired UOPs = UOPS_RETIRED_ANY
 -
 This group returns information about the instruction pipeline. It measures the
 issued, executed and retired uOPs and returns the number of uOPs which were issued
diff --git a/make/config_checks.mk b/make/config_checks.mk
index ab266cf..8403570 100644
--- a/make/config_checks.mk
+++ b/make/config_checks.mk
@@ -16,6 +16,11 @@ GLIBC_VERSION := $(shell ldd --version | grep ldd |  awk '{ print $$NF }' | awk
 HAS_SCHEDAFFINITY = $(shell if [ $(GLIBC_VERSION) -lt 4 ]; then \
                echo 0;  else echo 1; \
 			   fi; )
+ENOUGH_CPUS = $(shell [ $(shell grep processor /proc/cpuinfo | wc -l) -le $(MAX_NUM_THREADS) ] && echo True )
+
+ifneq ($(ENOUGH_CPUS), True)
+$(info Warning: $(ENOUGH_CPUS) The MAX_NUM_THREADS variable must be larger or equal to the available CPUs. Currently, LIKWID is configured for $(MAX_NUM_THREADS) CPUs, but there are $(INSTALLED_CPUS) CPUs in the systen)
+endif
 
 INST_PREFIX := $(INSTALLED_PREFIX)
 ifneq "$(PREFIX)" "$(INST_PREFIX)"
@@ -24,7 +29,7 @@ endif
 
 FORTRAN_IF_NAME := likwid.mod
 ifneq ($(FORTRAN_INTERFACE),false)
-HAS_FORTRAN_COMPILER = $(shell $(FC) --version 2>/dev/null || echo 'NOFORTRAN' )
+HAS_FORTRAN_COMPILER := $(shell $(FC) --version 2>/dev/null || echo 'NOFORTRAN' )
 ifeq ($(HAS_FORTRAN_COMPILER),NOFORTRAN)
 FORTRAN_IF=
 $(info Warning: You have selected the fortran interface in config.mk, but there seems to be no fortran compiler $(FC) - not compiling it!)
diff --git a/make/config_defines.mk b/make/config_defines.mk
index f2b632c..e277d53 100644
--- a/make/config_defines.mk
+++ b/make/config_defines.mk
@@ -7,14 +7,26 @@ DEFINES   += -DVERSION=$(VERSION)         \
 		 -DMAX_NUM_NODES=$(MAX_NUM_NODES)     \
 		 -DACCESSDAEMON=$(INSTALLED_ACCESSDAEMON) \
 		 -DGROUPPATH=$(LIKWIDGROUPPATH) \
+		 -DLIKWIDLOCK=$(LIKWIDLOCKPATH) \
+		 -DLIKWIDSOCKETBASE=$(LIKWIDSOCKETBASE) \
 		 -D_GNU_SOURCE
 
 DYNAMIC_TARGET_LIB := liblikwid.so
 STATIC_TARGET_LIB := liblikwid.a
-
-LUA_FOLDER := ext/lua
-SHARED_LIBLUA := liblikwid-lua.so
-STATIC_LIBLUA := liblikwid-lua.a
+PWD ?= $(shell pwd)
+# LUA:
+ifdef LUA_INCLUDE_DIR
+LUA_INTERNAL := false#NO SPACE
+else
+LUA_FOLDER := $(PWD)/ext/lua#NO SPACE
+LUA_INCLUDE_DIR := $(LUA_FOLDER)/includes#NO SPACE
+LUA_LIB_DIR := $(LUA_FOLDER)#NO SPACE
+LUA_LIB_NAME := likwid-lua#NO SPACE
+LUA_INTERNAL := true#NO SPACE
+endif
+SHARED_LIBLUA := lib$(LUA_LIB_NAME).so
+STATIC_LIBLUA := lib$(LUA_LIB_NAME).a
+# HWLOC:
 HWLOC_FOLDER := ext/hwloc
 STATIC_LIBHWLOC := liblikwid-hwloc.a
 SHARED_LIBHWLOC := liblikwid-hwloc.so
@@ -56,11 +68,11 @@ CFLAGS += $(SHARED_CFLAGS)
 LIBS += -L. -pthread -lm -ldl
 TARGET_LIB := $(DYNAMIC_TARGET_LIB)
 TARGET_HWLOC_LIB=$(HWLOC_FOLDER)/$(SHARED_LIBHWLOC)
-TARGET_LUA_LIB=$(LUA_FOLDER)/$(SHARED_LIBLUA)
+TARGET_LUA_LIB=$(LUA_LIB_DIR)/$(SHARED_LIBLUA)
 else
 TARGET_LIB := $(STATIC_TARGET_LIB)
 TARGET_HWLOC_LIB=$(HWLOC_FOLDER)/$(STATIC_LIBHWLOC)
-TARGET_LUA_LIB=$(LUA_FOLDER)/$(STATIC_LIBLUA)
+TARGET_LUA_LIB=$(LUA_LIB_DIR)/$(STATIC_LIBLUA)
 endif
 
 ifeq ($(HAS_SCHEDAFFINITY),1)
@@ -111,7 +123,19 @@ endif
 
 ifeq ($(DEBUG),true)
 DEBUG_FLAGS = -g
+DEBUG_CFLAGS := $(filter-out -O0, $(CFLAGS))
+DEBUG_CFLAGS := $(filter-out -O1, $(DEBUG_CFLAGS))
+DEBUG_CFLAGS := $(filter-out -O2, $(DEBUG_CFLAGS))
+DEBUG_CFLAGS := $(filter-out -O3, $(DEBUG_CFLAGS))
+CFLAGS = -O0 $(DEBUG_CFLAGS)
 DEFINES += -DDEBUG_LIKWID
 else
 DEBUG_FLAGS =
 endif
+
+ifeq ($(USE_PERF_EVENT),true)
+$(info Info: Compiling for perf_event interface. Features like power consumption or thermal stuff is disabled);
+$(info Info: Currently Uncore support is experimental);
+DEFINES += -DLIKWID_USE_PERFEVENT
+endif
+
diff --git a/make/include_ICC.mk b/make/include_ICC.mk
index 9dfe66b..d03305d 100644
--- a/make/include_ICC.mk
+++ b/make/include_ICC.mk
@@ -9,8 +9,8 @@ GEN_PMHEADER = ./perl/gen_events.pl
 
 ANSI_CFLAGS  = -std=c99 #-strict-ansi
 
-CFLAGS   =  -O1 -Wno-format -vec-report=0 -fPIC -pthread
-FCFLAGS  = -module ./ 
+CFLAGS   =  -Ofast -fPIC -pthread
+FCFLAGS  = -module ./
 ASFLAGS  = -gdwarf-2
 PASFLAGS  = x86-64
 CPPFLAGS =
@@ -25,4 +25,8 @@ DEFINES  += -DPAGE_ALIGNMENT=4096
 INCLUDES =
 LIBS     = -lrt
 
-
+# colon seperated list of paths to search for libs at runtime on Xeon Phi file system
+ICC_LIB_RPATHS =
+ifneq (strip $(ICC_LIB_RPATHS),)
+RPATHS += -Wl,-rpath=$(ICC_LIB_RPATHS)
+endif
diff --git a/monitoring/groups/ivybridge/CYCLE_ACTIVITY.txt b/monitoring/groups/ivybridge/CYCLE_ACTIVITY.txt
new file mode 100644
index 0000000..a4bf45d
--- /dev/null
+++ b/monitoring/groups/ivybridge/CYCLE_ACTIVITY.txt
@@ -0,0 +1,26 @@
+SHORT Cycle Activities
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+#PMC2 CYCLE_ACTIVITY_CYCLES_L1D_PENDING
+PMC2 CYCLE_ACTIVITY_STALLS_L1D_PENDING
+#PMC0 CYCLE_ACTIVITY_CYCLES_L2_PENDING
+PMC1 CYCLE_ACTIVITY_STALLS_L2_PENDING
+#PMC1 CYCLE_ACTIVITY_CYCLES_LDM_PENDING
+PMC0 CYCLE_ACTIVITY_STALLS_LDM_PENDING
+PMC3 CYCLE_ACTIVITY_CYCLES_NO_EXECUTE
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Cycles without execution [%] PMC3/FIXC1*100
+Cycles with stalls due to L1D [%] PMC2/FIXC1*100
+Cycles with stalls due to L2 [%] PMC1/FIXC1*100
+Cycles with stalls due to LDM [%] PMC0/FIXC1*100
+
+LONG
+Formulas
diff --git a/monitoring/groups/ivybridgeEP/CYCLE_ACTIVITY.txt b/monitoring/groups/ivybridgeEP/CYCLE_ACTIVITY.txt
new file mode 100644
index 0000000..a4bf45d
--- /dev/null
+++ b/monitoring/groups/ivybridgeEP/CYCLE_ACTIVITY.txt
@@ -0,0 +1,26 @@
+SHORT Cycle Activities
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+#PMC2 CYCLE_ACTIVITY_CYCLES_L1D_PENDING
+PMC2 CYCLE_ACTIVITY_STALLS_L1D_PENDING
+#PMC0 CYCLE_ACTIVITY_CYCLES_L2_PENDING
+PMC1 CYCLE_ACTIVITY_STALLS_L2_PENDING
+#PMC1 CYCLE_ACTIVITY_CYCLES_LDM_PENDING
+PMC0 CYCLE_ACTIVITY_STALLS_LDM_PENDING
+PMC3 CYCLE_ACTIVITY_CYCLES_NO_EXECUTE
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Cycles without execution [%] PMC3/FIXC1*100
+Cycles with stalls due to L1D [%] PMC2/FIXC1*100
+Cycles with stalls due to L2 [%] PMC1/FIXC1*100
+Cycles with stalls due to LDM [%] PMC0/FIXC1*100
+
+LONG
+Formulas
diff --git a/monitoring/groups/sandybridge/CYCLE_ACTIVITY.txt b/monitoring/groups/sandybridge/CYCLE_ACTIVITY.txt
new file mode 100644
index 0000000..40abcb6
--- /dev/null
+++ b/monitoring/groups/sandybridge/CYCLE_ACTIVITY.txt
@@ -0,0 +1,23 @@
+SHORT Cycle Activities
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+#PMC2 CYCLE_ACTIVITY_CYCLES_L1D_PENDING
+PMC2 CYCLE_ACTIVITY_STALLS_L1D_PENDING
+#PMC0 CYCLE_ACTIVITY_CYCLES_L2_PENDING
+PMC1 CYCLE_ACTIVITY_STALLS_L2_PENDING
+PMC3 CYCLE_ACTIVITY_CYCLES_NO_DISPATCH
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Cycles without execution [%] PMC3/FIXC1*100
+Cycles with stalls due to L1D [%] PMC2/FIXC1*100
+Cycles with stalls due to L2 [%] PMC1/FIXC1*100
+
+LONG
+Formulas
diff --git a/monitoring/groups/sandybridgeEP/CYCLE_ACTIVITY.txt b/monitoring/groups/sandybridgeEP/CYCLE_ACTIVITY.txt
new file mode 100644
index 0000000..40abcb6
--- /dev/null
+++ b/monitoring/groups/sandybridgeEP/CYCLE_ACTIVITY.txt
@@ -0,0 +1,23 @@
+SHORT Cycle Activities
+
+EVENTSET
+FIXC0 INSTR_RETIRED_ANY
+FIXC1 CPU_CLK_UNHALTED_CORE
+FIXC2 CPU_CLK_UNHALTED_REF
+#PMC2 CYCLE_ACTIVITY_CYCLES_L1D_PENDING
+PMC2 CYCLE_ACTIVITY_STALLS_L1D_PENDING
+#PMC0 CYCLE_ACTIVITY_CYCLES_L2_PENDING
+PMC1 CYCLE_ACTIVITY_STALLS_L2_PENDING
+PMC3 CYCLE_ACTIVITY_CYCLES_NO_DISPATCH
+
+METRICS
+Runtime (RDTSC) [s] time
+Runtime unhalted [s] FIXC1*inverseClock
+Clock [MHz]  1.E-06*(FIXC1/FIXC2)/inverseClock
+CPI  FIXC1/FIXC0
+Cycles without execution [%] PMC3/FIXC1*100
+Cycles with stalls due to L1D [%] PMC2/FIXC1*100
+Cycles with stalls due to L2 [%] PMC1/FIXC1*100
+
+LONG
+Formulas
diff --git a/perl/gen_events.pl b/perl/gen_events.pl
index 4833ccc..d6c49e8 100755
--- a/perl/gen_events.pl
+++ b/perl/gen_events.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl 
+#!/usr/bin/perl
 
 use strict;
 use warnings;
diff --git a/perl/set_license.pl b/perl/set_license.pl
index 88155a2..fb099a6 100755
--- a/perl/set_license.pl
+++ b/perl/set_license.pl
@@ -10,16 +10,15 @@ my $cc = ' *';
 my $fc = '!';
 my $lc = ' *';
 
-#my $VERSION   = '<VERSION>';
-#my $DATE   = '<DATE>';
-my $VERSION   = '4.1';
-my $DATE   = '8.8.2016';
-my $YEAR  = '2016';
+my $VERSION   = '<VERSION>';
+my $DATE   = '<DATE>';
+#my $VERSION   = '4.0';
+#my $DATE   = '16.6.2015';
+my $YEAR  = '2015';
 my $AUTHOR = 'RRZE, University Erlangen-Nuremberg';
 my $LICENSE = 'gpl';
 
-my @SKIPLIST = ('ghash.c','ghash.h','loadData.S','bstrlib.c','bstrlib.h',
-    'calculator_stack.h', 'calculator_stack.c', 'calculator.c');
+my @SKIPLIST = ('ghash.c','ghash.h','loadData.S','bstrlib.c','bstrlib.h', 'calculator_stack.h', 'calculator_stack.c');
 
 sub print_copyright
 {
diff --git a/perl/xmgrace.pm b/perl/xmgrace.pm
index 1c36949..c5d202a 100644
--- a/perl/xmgrace.pm
+++ b/perl/xmgrace.pm
@@ -14,12 +14,12 @@
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License, v2, as
 #   published by the Free Software Foundation
-#  
+#
 #   This program is distributed in the hope that it will be useful,
 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #   GNU General Public License for more details.
-#  
+#
 #   You should have received a copy of the GNU General Public License
 #   along with this program; if not, write to the Free Software
 #   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
@@ -38,106 +38,106 @@ my $Y_MAX=0;
 
 sub xmgrace
 {
-	my ($global_opts, $datasets, $outputs);
-	my $global_options = shift;
-	my $data_sets = shift;
-	my $world = determine_world_coords($data_sets);
-
-	my $x_ticks = {'axis' => 'x', 'major' => 0, 'minor ticks' => 0};
-	my $y_ticks = {'axis' => 'y', 'major' => 0, 'minor ticks' => 0};
-
-	determine_ticks($X_MAX,$x_ticks);
-	determine_ticks($Y_MAX,$y_ticks);
-
-	$outputs = "-hdevice $global_options->{'device'} -printfile $global_options->{'output file'} ";
-	$outputs .= "-saveall $global_options->{'grace output file'}";
-	$global_opts = "-autoscale none -world $world";
-	open FILE,'>tmp.bat';
-	print FILE "title \"$global_options->{'title'}\"\n";
-	print FILE "subtitle \"$global_options->{'subtitle'}\"\n";
-	print FILE "xaxis label  \"$global_options->{'xaxis label'}\"\n";
-	print FILE "xaxis tick major  $x_ticks->{'major'}\n";
-	print FILE "xaxis tick minor ticks $x_ticks->{'minor ticks'}\n";
-	print FILE "yaxis label  \"$global_options->{'yaxis label'}\"\n";
-	print FILE "yaxis tick major  $y_ticks->{'major'}\n";
-	print FILE "yaxis tick minor ticks $y_ticks->{'minor ticks'}\n";
-	print FILE "legend  $global_options->{'legend'}\n";
-	$datasets = ' ';
-
-	my $num_graphs=0;
-	foreach my $dataset (@{$data_sets}) {
-		my $tag = "s$num_graphs";
-		$datasets .= "-nxy $dataset->{'data file'} ";
-		print FILE "$tag legend \"$dataset->{'title'}\"\n";
-		print FILE "$tag symbol $dataset->{'symbol'}->{'type'}\n";
-		print FILE "$tag symbol size $dataset->{'symbol'}->{'size'}\n";
-		print FILE "$tag symbol color $dataset->{'symbol'}->{'color'}\n";
-		print FILE "$tag symbol pattern $dataset->{'symbol'}->{'pattern'}\n";
-		print FILE "$tag symbol fill color $dataset->{'symbol'}->{'fill color'}\n";
-		print FILE "$tag symbol fill pattern $dataset->{'symbol'}->{'fill pattern'}\n";
-		print FILE "$tag symbol linewidth $dataset->{'symbol'}->{'linewidth'}\n";
-		print FILE "$tag symbol linestyle $dataset->{'symbol'}->{'linestyle'}\n";
-		print FILE "$tag line type $dataset->{'line'}->{'type'}\n";
-		print FILE "$tag line color $dataset->{'line'}->{'color'}\n";
-		print FILE "$tag line linestyle $dataset->{'line'}->{'linestyle'}\n";
-		print FILE "$tag line linewidth $dataset->{'line'}->{'linewidth'}\n";
-		print FILE "$tag line pattern $dataset->{'line'}->{'pattern'}\n";
-		$num_graphs++;
-	}
+    my ($global_opts, $datasets, $outputs);
+    my $global_options = shift;
+    my $data_sets = shift;
+    my $world = determine_world_coords($data_sets);
+
+    my $x_ticks = {'axis' => 'x', 'major' => 0, 'minor ticks' => 0};
+    my $y_ticks = {'axis' => 'y', 'major' => 0, 'minor ticks' => 0};
+
+    determine_ticks($X_MAX,$x_ticks);
+    determine_ticks($Y_MAX,$y_ticks);
+
+    $outputs = "-hdevice $global_options->{'device'} -printfile $global_options->{'output file'} ";
+    $outputs .= "-saveall $global_options->{'grace output file'}";
+    $global_opts = "-autoscale none -world $world";
+    open FILE,'>tmp.bat';
+    print FILE "title \"$global_options->{'title'}\"\n";
+    print FILE "subtitle \"$global_options->{'subtitle'}\"\n";
+    print FILE "xaxis label  \"$global_options->{'xaxis label'}\"\n";
+    print FILE "xaxis tick major  $x_ticks->{'major'}\n";
+    print FILE "xaxis tick minor ticks $x_ticks->{'minor ticks'}\n";
+    print FILE "yaxis label  \"$global_options->{'yaxis label'}\"\n";
+    print FILE "yaxis tick major  $y_ticks->{'major'}\n";
+    print FILE "yaxis tick minor ticks $y_ticks->{'minor ticks'}\n";
+    print FILE "legend  $global_options->{'legend'}\n";
+    $datasets = ' ';
+
+    my $num_graphs=0;
+    foreach my $dataset (@{$data_sets}) {
+        my $tag = "s$num_graphs";
+        $datasets .= "-nxy $dataset->{'data file'} ";
+        print FILE "$tag legend \"$dataset->{'title'}\"\n";
+        print FILE "$tag symbol $dataset->{'symbol'}->{'type'}\n";
+        print FILE "$tag symbol size $dataset->{'symbol'}->{'size'}\n";
+        print FILE "$tag symbol color $dataset->{'symbol'}->{'color'}\n";
+        print FILE "$tag symbol pattern $dataset->{'symbol'}->{'pattern'}\n";
+        print FILE "$tag symbol fill color $dataset->{'symbol'}->{'fill color'}\n";
+        print FILE "$tag symbol fill pattern $dataset->{'symbol'}->{'fill pattern'}\n";
+        print FILE "$tag symbol linewidth $dataset->{'symbol'}->{'linewidth'}\n";
+        print FILE "$tag symbol linestyle $dataset->{'symbol'}->{'linestyle'}\n";
+        print FILE "$tag line type $dataset->{'line'}->{'type'}\n";
+        print FILE "$tag line color $dataset->{'line'}->{'color'}\n";
+        print FILE "$tag line linestyle $dataset->{'line'}->{'linestyle'}\n";
+        print FILE "$tag line linewidth $dataset->{'line'}->{'linewidth'}\n";
+        print FILE "$tag line pattern $dataset->{'line'}->{'pattern'}\n";
+        $num_graphs++;
+    }
 
 #     print "EXE LINE: gracebat $global_opts $datasets -param tmp.bat $outputs\n"; 
-	close FILE;
-	system ("gracebat $global_opts $datasets -param tmp.bat $outputs");
-	unlink 'tmp.bat';
+    close FILE;
+    system ("gracebat $global_opts $datasets -param tmp.bat $outputs");
+    unlink 'tmp.bat';
 }
 
 sub determine_world_coords
 {
-	my $data_sets = shift;
-	my $x_min = 0;
-	my $y_min = 0;
-	my $x_max = 0;
-	my $y_max = 0.;
-	my @x, @y;
+    my $data_sets = shift;
+    my $x_min = 0;
+    my $y_min = 0;
+    my $x_max = 0;
+    my $y_max = 0.;
+    my @x, @y;
 
-	foreach my $dataset (@{$data_sets}) {
-		open FILE, "<$dataset->{'data file'}";
+    foreach my $dataset (@{$data_sets}) {
+        open FILE, "<$dataset->{'data file'}";
 
-		@x = ();
-		@y = ();
+        @x = ();
+        @y = ();
 
-		while (<FILE>) {
-			/([\d\.]+)[ ]+([\d\.]+)/;
+        while (<FILE>) {
+            /([\d\.]+)[ ]+([\d\.]+)/;
 
-			push @x, $1;
-			push @y, $2;
-		}
-		close FILE;
+            push @x, $1;
+            push @y, $2;
+        }
+        close FILE;
 
-		@x = sort { $a <=> $b } @x;
-		@y = sort { $a <=> $b } @y;
+        @x = sort { $a <=> $b } @x;
+        @y = sort { $a <=> $b } @y;
 
-		$x_max = $x[-1] if ($x[-1] > $x_max);
-		$y_max = $y[-1] if ($y[-1] > $y_max);
-	}
+        $x_max = $x[-1] if ($x[-1] > $x_max);
+        $y_max = $y[-1] if ($y[-1] > $y_max);
+    }
 
-	$x_max += $x_max * 0.1;
-	$y_max += $y_max * 0.1;
+    $x_max += $x_max * 0.1;
+    $y_max += $y_max * 0.1;
 
-	$X_MAX = $x_max; $Y_MAX = $y_max;
-	# We base all axes on zero for the moment
-	return "0 0 $x_max $y_max";
+    $X_MAX = $x_max; $Y_MAX = $y_max;
+    # We base all axes on zero for the moment
+    return "0 0 $x_max $y_max";
 }
 
-sub determine_ticks 
+sub determine_ticks
 {
-	my $range_max = shift;
-	my $tick_ptr = shift;
+    my $range_max = shift;
+    my $tick_ptr = shift;
 
-	if ($tick_ptr->{'axis'} eq 'x') {
-			$tick_ptr->{'major'} = 25;
-			$tick_ptr->{'minor ticks'} = 5;
-	}
+    if ($tick_ptr->{'axis'} eq 'x') {
+        $tick_ptr->{'major'} = 25;
+        $tick_ptr->{'minor ticks'} = 5;
+    }
 
     if ($tick_ptr->{'axis'} eq 'y') {
         if ($range_max < 10000) {
@@ -177,33 +177,33 @@ This module is roughly based on the Chart::Graph::Xmgrace module.
     use xmgrace;
 
     xmgrace ({"title"         => "$plot->{title}",
-	    "subtitle"        => "$plot->{subtitle}",
-	    "legend"          => "0.7,0.25",
-	    "output file"     => "$RESULT_TARGET/plot/eps/$plot->{title}.eps",
-	    "grace output file" => "$RESULT_TARGET/plot/agr/$plot->{title}.agr",
-	    "xaxis label"     => "number of processors",
-	    "yaxis label"     => "$PLOT_CONFIG->{$plot->{title}}->{YAXIS}"
-	},
-	[ { "title"     =>  "$SYSTEM",
-	    "data file" =>  "$RESULT_TARGET/plot/data/$plot->{title}.dat",
-	    "line" => {
-		"type"      => "1",
-		"color"     => "1",
-		"linewidth" => "2",
-		"linestyle" => "1",
-		"pattern"   => "1",
-	    },
-	    "symbol" => {
-		"type"      => "2",
-		"color"     => "1",
-		"pattern"   => "1",
-		"linewidth" => "2",
-		"linestyle" => "1",
-		"size"      => "1",
-		"fill pattern" => "1",
-		"fill color"=> "1",
-	    }
-	}]);
+        "subtitle"        => "$plot->{subtitle}",
+        "legend"          => "0.7,0.25",
+        "output file"     => "$RESULT_TARGET/plot/eps/$plot->{title}.eps",
+        "grace output file" => "$RESULT_TARGET/plot/agr/$plot->{title}.agr",
+        "xaxis label"     => "number of processors",
+        "yaxis label"     => "$PLOT_CONFIG->{$plot->{title}}->{YAXIS}"
+    },
+    [ { "title"     =>  "$SYSTEM",
+        "data file" =>  "$RESULT_TARGET/plot/data/$plot->{title}.dat",
+        "line" => {
+        "type"      => "1",
+        "color"     => "1",
+        "linewidth" => "2",
+        "linestyle" => "1",
+        "pattern"   => "1",
+        },
+        "symbol" => {
+        "type"      => "2",
+        "color"     => "1",
+        "pattern"   => "1",
+        "linewidth" => "2",
+        "linestyle" => "1",
+        "size"      => "1",
+        "fill pattern" => "1",
+        "fill color"=> "1",
+        }
+    }]);
 
 =head1 OPTION TABLES
 
diff --git a/src/access-daemon/Makefile b/src/access-daemon/Makefile
index 0bb2818..cb196a1 100644
--- a/src/access-daemon/Makefile
+++ b/src/access-daemon/Makefile
@@ -4,8 +4,8 @@
 #
 #      Description:  accessDaemon Makefile
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:  Jan Treibig (jt), jan.treibig at gmail.com
 #      Project:  likwid
@@ -33,7 +33,7 @@ DAEMON_TARGET = likwid-accessD
 SETFREQ_TARGET = likwid-setFreq
 Q         ?= @
 
-DEFINES   += -D_GNU_SOURCE -DMAX_NUM_THREADS=$(MAX_NUM_THREADS) -DMAX_NUM_NODES=$(MAX_NUM_NODES)
+DEFINES   += -D_GNU_SOURCE -DMAX_NUM_THREADS=$(MAX_NUM_THREADS) -DMAX_NUM_NODES=$(MAX_NUM_NODES) -DLIKWIDLOCK=$(LIKWIDLOCKPATH) -DLIKWIDSOCKETBASE=$(LIKWIDSOCKETBASE)
 INCLUDES  = -I../includes
 CFLAGS    += -std=c99 -fPIC -pie -fPIE -fstack-protector
 ifeq ($(COMPILER),GCCX86)
@@ -49,4 +49,3 @@ $(DAEMON_TARGET): accessDaemon.c
 $(SETFREQ_TARGET): setFreq.c
 	$(Q)$(CC) $(CFLAGS) $(CPPFLAGS) -o ../../$(SETFREQ_TARGET) setFreq.c
 
-
diff --git a/src/access-daemon/accessDaemon.c b/src/access-daemon/accessDaemon.c
index 5c48688..caa173b 100644
--- a/src/access-daemon/accessDaemon.c
+++ b/src/access-daemon/accessDaemon.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Implementation of access daemon.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Authors:  Michael Meier, michael.meier at rrze.fau.de
  *                Jan Treibig (jt), jan.treibig at gmail.com,
@@ -29,7 +29,9 @@
  *
  * =======================================================================================
  */
+
 /* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -54,22 +56,24 @@
 #include <perfmon_sandybridgeEP_counters.h>
 #include <perfmon_broadwelld_counters.h>
 #include <perfmon_broadwellEP_counters.h>
+#include <perfmon_knl_counters.h>
 #include <topology.h>
 #include <cpuid.h>
 #include <lock.h>
 
-
 /* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
+
 #define SA struct sockaddr
 #define str(x) #x
 
 #define CHECK_FILE_ERROR(func, msg)  \
     if ((func) == 0) { syslog(LOG_ERR, "ERROR - [%s:%d] " str(msg) " - %s \n", __FILE__, __LINE__, strerror(errno)); }
 
-
-
-
-
+#define LOG_AND_EXIT_IF_ERROR(func, msg)  \
+    if ((func) < 0) {  \
+        syslog(LOG_ERR, "ERROR - [%s:%d] " str(msg) " - %s \n", __FILE__, __LINE__, strerror(errno)); \
+        exit(EXIT_FAILURE); \
+    }
 
 #define PCI_ROOT_PATH    "/proc/bus/pci/"
 #define MAX_PATH_LENGTH   80
@@ -80,10 +84,12 @@
  * with an external monitoring system. */
 
 /* #####   TYPE DEFINITIONS   ########### */
+
 typedef int (*AllowedPrototype)(uint32_t);
 typedef int (*AllowedPciPrototype)(PciDeviceType, uint32_t);
 
 /* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
+
 static int sockfd = -1;
 static int connfd = -1; /* temporary in to make it compile */
 static char* filepath;
@@ -95,6 +101,7 @@ static int FD_PCI[MAX_NUM_NODES][MAX_NUM_PCI_DEVICES];
 static int isPCIUncore = 0;
 static PciDevice* pci_devices_daemon = NULL;
 static char pci_filepath[MAX_PATH_LENGTH];
+static int num_pmc_counters = 0;
 
 /* Socket to bus mapping -- will be determined at runtime;
  * typical mappings are:
@@ -106,13 +113,14 @@ static char pci_filepath[MAX_PATH_LENGTH];
  */
 static char* socket_bus[MAX_NUM_NODES] = { [0 ... (MAX_NUM_NODES-1)] = NULL};
 
-
 /* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
 
-static int allowed_intel(uint32_t reg)
+static int
+allowed_intel(uint32_t reg)
 {
-    if ( ((reg & 0x0F8U) == 0x0C0U) ||
-            ((reg & 0xFF0U) == 0x180U) ||
+    if ( ((reg & 0x0F0U) == 0x0C0U) ||
+            ((reg & 0x190U) == 0x180U) ||
+            ((reg & 0x190U) == 0x190U && num_pmc_counters > 4) ||
             ((reg & 0xF00U) == 0x300U) ||
             ((reg & 0xF00U) == 0xC00U) ||
             ((reg & 0xF00U) == 0xD00U) ||
@@ -124,8 +132,15 @@ static int allowed_intel(uint32_t reg)
             (reg == 0x19C)  ||
             (reg == 0x1A2)  ||
             (reg == 0x1AD)  ||
+            (reg == 0x1AE)  ||
+            (reg == 0x1AF)  ||
+            (reg == 0x1AC)  ||
             (reg == 0x1A6)  ||
-            (reg == 0x1A7))
+            (reg == 0x1A7)  ||
+            (reg == 0x620)  ||
+            (reg == 0xCD)   ||
+            (reg == 0x1B0)  ||
+            (reg == 0x1B1))
     {
         return 1;
     }
@@ -135,17 +150,21 @@ static int allowed_intel(uint32_t reg)
     }
 }
 
-static int allowed_sandybridge(uint32_t reg)
+static int
+allowed_sandybridge(uint32_t reg)
 {
     if ((allowed_intel(reg)) ||
-        (((reg & 0xF00U) == 0x600U)))
+        (((reg & 0xF00U) == 0x600U)) ||
+        (((reg & 0xF00U) == 0x700U)) ||
+        (reg == MSR_ALT_PEBS))
     {
         return 1;
     }
     return 0;
 }
 
-static int allowed_pci_sandybridge(PciDeviceType type, uint32_t reg)
+static int
+allowed_pci_sandybridge(PciDeviceType type, uint32_t reg)
 {
     switch (type)
     {
@@ -283,7 +302,8 @@ static int allowed_pci_sandybridge(PciDeviceType type, uint32_t reg)
     return 0;
 }
 
-static int allowed_haswell(uint32_t reg)
+static int
+allowed_haswell(uint32_t reg)
 {
     if ((allowed_intel(reg)) ||
         (allowed_sandybridge(reg)) ||
@@ -297,7 +317,8 @@ static int allowed_haswell(uint32_t reg)
     }
 }
 
-static int allowed_pci_haswell(PciDeviceType type, uint32_t reg)
+static int
+allowed_pci_haswell(PciDeviceType type, uint32_t reg)
 {
     switch (type)
     {
@@ -425,7 +446,8 @@ static int allowed_pci_haswell(PciDeviceType type, uint32_t reg)
     return 0;
 }
 
-static int allowed_silvermont(uint32_t reg)
+static int
+allowed_silvermont(uint32_t reg)
 {
 
     if ( ((reg & 0x0F8U) == 0x0C0U) ||
@@ -450,6 +472,139 @@ static int allowed_silvermont(uint32_t reg)
     }
 }
 
+static int allowed_knl(uint32_t reg)
+{
+    if (allowed_silvermont(reg))
+        return 1;
+    else
+    {
+        if (((reg & 0xF00U) == 0x700U) ||
+            ((reg & 0xF00U) == 0xE00U) ||
+            ((reg & 0xF00U) == 0xF00U))
+            return 1;
+    }
+    return 0;
+}
+
+static int allowed_pci_knl(PciDeviceType type, uint32_t reg)
+{
+    switch(type)
+    {
+	case EDC:
+	    if ((reg == PCI_MIC2_EDC_U_CTR0_A) ||
+		(reg == PCI_MIC2_EDC_U_CTR0_B) ||
+	        (reg == PCI_MIC2_EDC_U_CTR1_A) ||
+		(reg == PCI_MIC2_EDC_U_CTR1_B) ||
+	        (reg == PCI_MIC2_EDC_U_CTR2_A) ||
+		(reg == PCI_MIC2_EDC_U_CTR2_B) ||
+	        (reg == PCI_MIC2_EDC_U_CTR3_A) ||
+		(reg == PCI_MIC2_EDC_U_CTR3_B) ||
+		(reg == PCI_MIC2_EDC_U_CTRL0) ||
+		(reg == PCI_MIC2_EDC_U_CTRL1) ||
+		(reg == PCI_MIC2_EDC_U_CTRL2) ||
+		(reg == PCI_MIC2_EDC_U_CTRL3) ||
+		(reg == PCI_MIC2_EDC_U_BOX_CTRL) ||
+		(reg == PCI_MIC2_EDC_U_BOX_STATUS) ||
+		(reg == PCI_MIC2_EDC_U_FIXED_CTR_A) ||
+		(reg == PCI_MIC2_EDC_U_FIXED_CTR_B) ||
+		(reg == PCI_MIC2_EDC_U_FIXED_CTRL) ||
+	        (reg == PCI_MIC2_EDC_D_CTR0_A) ||
+		(reg == PCI_MIC2_EDC_D_CTR0_B) ||
+	        (reg == PCI_MIC2_EDC_D_CTR1_A) ||
+		(reg == PCI_MIC2_EDC_D_CTR1_B) ||
+	        (reg == PCI_MIC2_EDC_D_CTR2_A) ||
+		(reg == PCI_MIC2_EDC_D_CTR2_B) ||
+	        (reg == PCI_MIC2_EDC_D_CTR3_A) ||
+		(reg == PCI_MIC2_EDC_D_CTR3_B) ||
+		(reg == PCI_MIC2_EDC_D_CTRL0) ||
+		(reg == PCI_MIC2_EDC_D_CTRL1) ||
+		(reg == PCI_MIC2_EDC_D_CTRL2) ||
+		(reg == PCI_MIC2_EDC_D_CTRL3) ||
+		(reg == PCI_MIC2_EDC_D_BOX_CTRL) ||
+		(reg == PCI_MIC2_EDC_D_BOX_STATUS) ||
+		(reg == PCI_MIC2_EDC_D_FIXED_CTR_A) ||
+		(reg == PCI_MIC2_EDC_D_FIXED_CTR_B) ||
+		(reg == PCI_MIC2_EDC_D_FIXED_CTRL))
+	    {
+		return 1;
+	    }
+	    break;
+	case IMC:
+	    if ((reg == PCI_MIC2_MC_U_CTR0_A) ||
+		(reg == PCI_MIC2_MC_U_CTR0_B) ||
+	        (reg == PCI_MIC2_MC_U_CTR1_A) ||
+		(reg == PCI_MIC2_MC_U_CTR1_B) ||
+	        (reg == PCI_MIC2_MC_U_CTR2_A) ||
+		(reg == PCI_MIC2_MC_U_CTR2_B) ||
+	        (reg == PCI_MIC2_MC_U_CTR3_A) ||
+		(reg == PCI_MIC2_MC_U_CTR3_B) ||
+		(reg == PCI_MIC2_MC_U_CTRL0) ||
+		(reg == PCI_MIC2_MC_U_CTRL1) ||
+		(reg == PCI_MIC2_MC_U_CTRL2) ||
+		(reg == PCI_MIC2_MC_U_CTRL3) ||
+		(reg == PCI_MIC2_MC_U_BOX_CTRL) ||
+		(reg == PCI_MIC2_MC_U_BOX_STATUS) ||
+		(reg == PCI_MIC2_MC_U_FIXED_CTR_A) ||
+		(reg == PCI_MIC2_MC_U_FIXED_CTR_B) ||
+		(reg == PCI_MIC2_MC_U_FIXED_CTRL) ||
+	        (reg == PCI_MIC2_MC_D_CTR0_A) ||
+		(reg == PCI_MIC2_MC_D_CTR0_B) ||
+	        (reg == PCI_MIC2_MC_D_CTR1_A) ||
+		(reg == PCI_MIC2_MC_D_CTR1_B) ||
+	        (reg == PCI_MIC2_MC_D_CTR2_A) ||
+		(reg == PCI_MIC2_MC_D_CTR2_B) ||
+	        (reg == PCI_MIC2_MC_D_CTR3_A) ||
+		(reg == PCI_MIC2_MC_D_CTR3_B) ||
+		(reg == PCI_MIC2_MC_D_CTRL0) ||
+		(reg == PCI_MIC2_MC_D_CTRL1) ||
+		(reg == PCI_MIC2_MC_D_CTRL2) ||
+		(reg == PCI_MIC2_MC_D_CTRL3) ||
+		(reg == PCI_MIC2_MC_D_BOX_CTRL) ||
+		(reg == PCI_MIC2_MC_D_BOX_STATUS) ||
+		(reg == PCI_MIC2_MC_D_FIXED_CTR_A) ||
+		(reg == PCI_MIC2_MC_D_FIXED_CTR_B) ||
+		(reg == PCI_MIC2_MC_D_FIXED_CTRL))
+	    {
+		return 1;
+	    }
+	    break;
+	case R2PCIE:
+	    if ((reg == PCI_MIC2_M2PCIE_CTR0_A) ||
+		(reg == PCI_MIC2_M2PCIE_CTR0_B) ||
+		(reg == PCI_MIC2_M2PCIE_CTR1_A) ||
+                (reg == PCI_MIC2_M2PCIE_CTR1_B) ||
+		(reg == PCI_MIC2_M2PCIE_CTR2_A) ||
+                (reg == PCI_MIC2_M2PCIE_CTR2_B) ||
+		(reg == PCI_MIC2_M2PCIE_CTR3_A) ||
+                (reg == PCI_MIC2_M2PCIE_CTR3_B) ||
+		(reg == PCI_MIC2_M2PCIE_CTRL0) ||
+		(reg == PCI_MIC2_M2PCIE_CTRL1) ||
+		(reg == PCI_MIC2_M2PCIE_CTRL2) ||
+		(reg == PCI_MIC2_M2PCIE_CTRL3) ||
+		(reg == PCI_MIC2_M2PCIE_BOX_CTRL) ||
+		(reg == PCI_MIC2_M2PCIE_BOX_STATUS))
+	    {
+		return 1;
+	    }
+	    break;
+	case IRP:
+	    if ((reg == PCI_MIC2_IRP_CTR0) ||
+		(reg == PCI_MIC2_IRP_CTR1) ||
+		(reg == PCI_MIC2_IRP_CTRL0) ||
+		(reg == PCI_MIC2_IRP_CTRL1) ||
+		(reg == PCI_MIC2_IRP_BOX_CTRL) ||
+		(reg == PCI_MIC2_IRP_BOX_STATUS))
+	    {
+		return 1;
+	    }
+	    break;
+	default:
+	    break;
+
+    }
+    return 0;
+}
+
 static int allowed_amd(uint32_t reg)
 {
     if ( (reg & 0xFFFFFFF0U) == 0xC0010000U)
@@ -462,7 +617,8 @@ static int allowed_amd(uint32_t reg)
     }
 }
 
-static int allowed_amd15(uint32_t reg)
+static int
+allowed_amd15(uint32_t reg)
 {
     if ( ((reg & 0xFFFFFFF0U) == 0xC0010000U) ||
             ((reg & 0xFFFFFFF0U) == 0xC0010200U) ||
@@ -476,7 +632,8 @@ static int allowed_amd15(uint32_t reg)
     }
 }
 
-static int allowed_amd16(uint32_t reg)
+static int
+allowed_amd16(uint32_t reg)
 {
     if ( ((reg & 0xFFFFFFF0U) == 0xC0010000U) ||
             ((reg & 0xFFFFFFF8U) == 0xC0010240U))
@@ -489,7 +646,8 @@ static int allowed_amd16(uint32_t reg)
     }
 }
 
-static void msr_read(AccessDataRecord * dRecord)
+static void
+msr_read(AccessDataRecord * dRecord)
 {
     uint64_t data;
     uint32_t cpu = dRecord->cpu;
@@ -498,6 +656,13 @@ static void msr_read(AccessDataRecord * dRecord)
     dRecord->errorcode = ERR_NOERROR;
     dRecord->data = 0;
 
+    if (!lock_check())
+    {
+        syslog(LOG_ERR,"Access to performance counters is locked.\n");
+        dRecord->errorcode = ERR_LOCKED;
+        return;
+    }
+
     if (FD_MSR[cpu] <= 0)
     {
         dRecord->errorcode = ERR_NODEV;
@@ -506,28 +671,39 @@ static void msr_read(AccessDataRecord * dRecord)
 
     if (!allowed(reg))
     {
+        syslog(LOG_ERR, "Access to register 0x%X not allowed\n", reg);
         dRecord->errorcode = ERR_RESTREG;
         return;
     }
 
     if (pread(FD_MSR[cpu], &data, sizeof(data), reg) != sizeof(data))
     {
-        syslog(LOG_ERR, "Failed to read data to register 0x%x on core %u", reg, cpu);
+        syslog(LOG_ERR, "Failed to read data from register 0x%x on core %u", reg, cpu);
+#ifdef DEBUG_LIKWID
         syslog(LOG_ERR, "%s", strerror(errno));
+#endif
         dRecord->errorcode = ERR_RWFAIL;
         return;
     }
     dRecord->data = data;
 }
 
-static void msr_write(AccessDataRecord * dRecord)
+static void
+msr_write(AccessDataRecord * dRecord)
 {
     uint32_t cpu = dRecord->cpu;
     uint32_t reg = dRecord->reg;
     uint64_t data = dRecord->data;
 
     dRecord->errorcode = ERR_NOERROR;
-    
+
+    if (!lock_check())
+    {
+        syslog(LOG_ERR,"Access to performance counters is locked.\n");
+        dRecord->errorcode = ERR_LOCKED;
+        return;
+    }
+
     if (FD_MSR[cpu] <= 0)
     {
         dRecord->errorcode = ERR_NODEV;
@@ -544,13 +720,16 @@ static void msr_write(AccessDataRecord * dRecord)
     if (pwrite(FD_MSR[cpu], &data, sizeof(data), reg) != sizeof(data))
     {
         syslog(LOG_ERR, "Failed to write data to register 0x%x on core %u", reg, cpu);
+#ifdef DEBUG_LIKWID
         syslog(LOG_ERR, "%s", strerror(errno));
+#endif
         dRecord->errorcode = ERR_RWFAIL;
         return;
     }
 }
 
-static void msr_check(AccessDataRecord * dRecord)
+static void
+msr_check(AccessDataRecord * dRecord)
 {
     uint32_t cpu = dRecord->cpu;
     dRecord->errorcode = ERR_NOERROR;
@@ -563,7 +742,8 @@ static void msr_check(AccessDataRecord * dRecord)
     return;
 }
 
-static void pci_read(AccessDataRecord* dRecord)
+static void
+pci_read(AccessDataRecord* dRecord)
 {
     uint32_t socketId = dRecord->cpu;
     uint32_t reg = dRecord->reg;
@@ -573,6 +753,13 @@ static void pci_read(AccessDataRecord* dRecord)
     dRecord->errorcode = ERR_NOERROR;
     dRecord->data = 0;
 
+    if (!lock_check())
+    {
+        syslog(LOG_ERR,"Access to performance counters is locked.\n");
+        dRecord->errorcode = ERR_LOCKED;
+        return;
+    }
+
     if (FD_PCI[socketId][device] == -2)
     {
         dRecord->errorcode = ERR_NODEV;
@@ -599,8 +786,10 @@ static void pci_read(AccessDataRecord* dRecord)
             dRecord->errorcode = ERR_OPENFAIL;
             return;
         }
+#ifdef DEBUG_LIKWID
         syslog(LOG_ERR, "Open device file %s for device %s (%s) on socket %u", pci_filepath,
                     pci_types[pci_devices_daemon[device].type].name, pci_devices_daemon[device].name, socketId);
+#endif
     }
 
     if (FD_PCI[socketId][device] > 0 && pread(FD_PCI[socketId][device], &data, sizeof(data), reg) != sizeof(data))
@@ -614,9 +803,8 @@ static void pci_read(AccessDataRecord* dRecord)
     dRecord->data = (uint64_t) data;
 }
 
-
-
-static void pci_write(AccessDataRecord* dRecord)
+static void
+pci_write(AccessDataRecord* dRecord)
 {
     uint32_t socketId = dRecord->cpu;
     uint32_t reg = dRecord->reg;
@@ -625,6 +813,13 @@ static void pci_write(AccessDataRecord* dRecord)
 
     dRecord->errorcode = ERR_NOERROR;
 
+    if (!lock_check())
+    {
+        syslog(LOG_ERR,"Access to performance counters is locked.\n");
+        dRecord->errorcode = ERR_LOCKED;
+        return;
+    }
+
     if (FD_PCI[socketId][device] == -2)
     {
         dRecord->errorcode = ERR_NODEV;
@@ -653,8 +848,10 @@ static void pci_write(AccessDataRecord* dRecord)
             dRecord->errorcode = ERR_OPENFAIL;
             return;
         }
+#ifdef DEBUG_LIKWID
         syslog(LOG_ERR, "Open device file %s for device %s (%s) on socket %u", pci_filepath,
                     pci_types[pci_devices_daemon[device].type].name, pci_devices_daemon[device].name, socketId);
+#endif
     }
 
     if (FD_PCI[socketId][device] > 0 && pwrite(FD_PCI[socketId][device], &data, sizeof data, reg) != sizeof data)
@@ -666,8 +863,8 @@ static void pci_write(AccessDataRecord* dRecord)
     }
 }
 
-
-static void pci_check(AccessDataRecord* dRecord)
+static void
+pci_check(AccessDataRecord* dRecord)
 {
     uint32_t socketId = dRecord->cpu;
     uint32_t device = dRecord->device;
@@ -681,7 +878,8 @@ static void pci_check(AccessDataRecord* dRecord)
     return;
 }
 
-static void kill_client(void)
+static void
+kill_client(void)
 {
     if (connfd != -1)
     {
@@ -691,7 +889,8 @@ static void kill_client(void)
     connfd = -1;
 }
 
-static void stop_daemon(void)
+static void
+stop_daemon(void)
 {
     kill_client();
     for (int i=0;i<MAX_NUM_NODES;i++)
@@ -712,7 +911,8 @@ static void stop_daemon(void)
     exit(EXIT_SUCCESS);
 }
 
-int getBusFromSocket(const uint32_t socket)
+static int
+getBusFromSocket(const uint32_t socket)
 {
     int cur_bus = 0;
     uint32_t cur_socket = 0;
@@ -747,7 +947,8 @@ int getBusFromSocket(const uint32_t socket)
     return -1;
 }
 
-static void Signal_Handler(int sig)
+static void
+Signal_Handler(int sig)
 {
     if (sig == SIGPIPE)
     {
@@ -762,7 +963,8 @@ static void Signal_Handler(int sig)
     }
 }
 
-static void daemonize(int* parentPid)
+static void
+daemonize(int* parentPid)
 {
     pid_t pid, sid;
 
@@ -839,6 +1041,7 @@ int main(void)
     }
 
     daemonize(&pid);
+    syslog(LOG_INFO, "AccessDaemon runs with UID %d, eUID %d\n", getuid(), geteuid());
 
     {
         uint32_t  eax = 0x00;
@@ -856,6 +1059,9 @@ int main(void)
         CPUID(eax, ebx, ecx, edx);
         uint32_t family = ((eax >> 8) & 0xFU) + ((eax >> 20) & 0xFFU);
         model  = (((eax >> 16) & 0xFU) << 4) + ((eax >> 4) & 0xFU);
+        eax = 0x0A;
+        CPUID(eax, ebx, ecx, edx);
+        num_pmc_counters = (int)((eax>>8)&0xFFU);
 
         switch (family)
         {
@@ -877,26 +1083,28 @@ int main(void)
                          (model == HASWELL_M2) ||
                          (model == BROADWELL) ||
                          (model == SKYLAKE1) ||
-                         (model == SKYLAKE2))
+                         (model == SKYLAKE2) ||
+                         (model == KABYLAKE1) ||
+                         (model == KABYLAKE2))
                 {
-                    allowed = allowed_haswell;
+                    allowed = allowed_sandybridge;
                 }
                 else if (model == BROADWELL_D)
                 {
-                    allowed = allowed_haswell;
+                    allowed = allowed_sandybridge;
                     isPCIUncore = 1;
                     allowedPci = allowed_pci_haswell;
                 }
                 else if (model == HASWELL_EP)
                 {
                     isPCIUncore = 1;
-                    allowed = allowed_haswell;
+                    allowed = allowed_sandybridge;
                     allowedPci = allowed_pci_haswell;
                 }
                 else if (model == BROADWELL_E)
                 {
                     isPCIUncore = 1;
-                    allowed = allowed_haswell;
+                    allowed = allowed_sandybridge;
                     allowedPci = allowed_pci_haswell;
                 }
                 else if ((model == ATOM_SILVERMONT_C) ||
@@ -908,6 +1116,12 @@ int main(void)
                 {
                     allowed = allowed_silvermont;
                 }
+                else if (model == XEON_PHI_KNL)
+                {
+                    allowed = allowed_knl;
+                    isPCIUncore = 1;
+                    allowedPci = allowed_pci_knl;
+                }
                 break;
             case K8_FAMILY:
             case K10_FAMILY:
@@ -928,10 +1142,10 @@ int main(void)
 
     /* setup filename for socket */
     filepath = (char*) calloc(sizeof(addr1.sun_path), 1);
-    snprintf(filepath, sizeof(addr1.sun_path), "/tmp/likwid-%d", pid);
+    snprintf(filepath, sizeof(addr1.sun_path), TOSTRING(LIKWIDSOCKETBASE) "-%d", pid);
 
     /* get a socket */
-    EXIT_IF_ERROR(sockfd = socket(AF_LOCAL, SOCK_STREAM, 0), socket failed);
+    LOG_AND_EXIT_IF_ERROR(sockfd = socket(AF_LOCAL, SOCK_STREAM, 0), socket failed);
 
     /* initialize socket data structure */
     bzero(&addr1, sizeof(addr1));
@@ -944,9 +1158,9 @@ int main(void)
     CHECK_ERROR(setfsuid(getuid()), setfsuid failed);
 
     /* bind and listen on socket */
-    EXIT_IF_ERROR(bind(sockfd, (SA*) &addr1, sizeof(addr1)), bind failed);
-    EXIT_IF_ERROR(listen(sockfd, 1), listen failed);
-    EXIT_IF_ERROR(chmod(filepath, S_IRUSR|S_IWUSR), chmod failed);
+    LOG_AND_EXIT_IF_ERROR(bind(sockfd, (SA*) &addr1, sizeof(addr1)), bind failed);
+    LOG_AND_EXIT_IF_ERROR(listen(sockfd, 1), listen failed);
+    LOG_AND_EXIT_IF_ERROR(chmod(filepath, S_IRUSR|S_IWUSR), chmod failed);
 
     socklen = sizeof(addr1);
 
@@ -1036,6 +1250,10 @@ int main(void)
                 //testDevice = 0x80862f30;
                 pci_devices_daemon = broadwellEP_pci_devices;
             }
+            else if (model == XEON_PHI_KNL)
+            {
+                pci_devices_daemon = knl_pci_devices;
+            }
             else
             {
                 //testDevice = 0;
@@ -1086,10 +1304,12 @@ int main(void)
                                 pci_devices_daemon[i].online = 1;
                                 close(fd);
                             }
+#ifdef DEBUG_LIKWID
                             else if (j==0)
                             {
                                 syslog(LOG_NOTICE, "Device %s for socket %d not found at path %s, excluded it from device list: %s\n",pci_devices_daemon[i].name,j, pci_filepath, strerror(errno));
                             }
+#endif
                         }
                     }
                 }
@@ -1162,9 +1382,10 @@ LOOP:
             dRecord.errorcode = ERR_UNKNOWN;
         }
 
-        EXIT_IF_ERROR(write(connfd, (void*) &dRecord, sizeof(AccessDataRecord)), write failed);
+        LOG_AND_EXIT_IF_ERROR(write(connfd, (void*) &dRecord, sizeof(AccessDataRecord)), write failed);
     }
 
     /* never reached */
     return EXIT_SUCCESS;
 }
+
diff --git a/src/access-daemon/setFreq.c b/src/access-daemon/setFreq.c
index 43adc74..091758e 100644
--- a/src/access-daemon/setFreq.c
+++ b/src/access-daemon/setFreq.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Implementation of frequency daemon
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Authors:  Jan Treibig (jt), jan.treibig at gmail.com,
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -28,7 +28,9 @@
  *
  * =======================================================================================
  */
+
 /* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -36,160 +38,284 @@
 char setfiles[3][100] = {"scaling_min_freq", "scaling_max_freq", "scaling_setspeed"};
 char getfiles[3][100] = {"cpuinfo_min_freq", "cpuinfo_max_freq", "cpuinfo_cur_freq"};
 
+enum cmds {
+    SET_MIN = 0,
+    SET_MAX = 1,
+    SET_CURRENT = 2,
+    SET_GOV
+};
+
 /* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
-static int get_numCPUs()
+
+static void
+help(char *execname)
+{
+    fprintf(stderr, "Usage: %s <processorID> <cmd> <frequency|governor> \n",execname);
+    fprintf(stderr, "       Valid values for <cmd>:\n");
+    fprintf(stderr, "       - cur: change current frequency\n");
+    fprintf(stderr, "       - min: change minimal frequency\n");
+    fprintf(stderr, "       - max: change maximal frequency\n");
+    fprintf(stderr, "       - gov: change governor\n");
+}
+
+static int
+get_numCPUs()
 {
     int cpucount = 0;
     char line[1024];
     FILE* fp = fopen("/proc/cpuinfo","r");
     if (fp != NULL)
     {
-        while( fgets(line,1024,fp) )
+        while( fgets(line, 1024, fp) )
         {
             if (strncmp(line, "processor", 9) == 0)
             {
                 cpucount++;
             }
         }
+        fclose(fp);
     }
     return cpucount;
 }
 
+static unsigned long
+read_freq(char* fstr)
+{
+    unsigned long freq = strtoul(fstr, NULL, 10);
+    if (freq <= 0)
+    {
+        fprintf(stderr, "Frequency must be greater than 0.\n");
+        exit(EXIT_FAILURE);
+    }
+    return freq;
+}
+
+static int
+valid_freq(unsigned long freq)
+{
+    FILE *f = NULL;
+    const char fname[] = "/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies";
+    char delimiter[] = " ";
+    char buff[1024];
+    char freqstr[25];
+    char *ptr = NULL, *eptr = NULL;
+    
+    snprintf(freqstr, 24, "%lu", freq);
+    f = fopen(fname, "r");
+    if (f == NULL)
+    {
+        fprintf(stderr, "Cannot open file %s for reading!\n", fname);
+        return 0;
+    }
+    eptr = fgets(buff, 1024, f);
+    if (eptr == NULL)
+    {
+        fprintf(stderr, "Cannot read content of file %s!\n", fname);
+        fclose(f);
+        return 0;
+    }
+    ptr = strtok(buff, delimiter);
+    while (ptr != NULL)
+    {
+        if (strncmp(ptr, freqstr, strlen(ptr)) == 0)
+        {
+            fclose(f);
+            return 1;
+        }
+        ptr = strtok(NULL, delimiter);
+    }
+    fclose(f);
+    return 0;
+}
+
+static int
+valid_gov(char* gov)
+{
+    FILE *f = NULL;
+    const char fname[] = "/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_governors";
+    char delimiter[] = " ";
+    char buff[1024];
+    char *ptr = NULL, *eptr = NULL;
+    
+    f = fopen(fname, "r");
+    if (f == NULL)
+    {
+        fprintf(stderr, "Cannot open file %s for reading!\n", fname);
+        return 0;
+    }
+    eptr = fgets(buff, 1024, f);
+    if (eptr == NULL)
+    {
+        fprintf(stderr, "Cannot read content of file %s!\n", fname);
+        fclose(f);
+        return 0;
+    }
+    ptr = strtok(buff, delimiter);
+    while (ptr != NULL)
+    {
+        if (strncmp(ptr, gov, strlen(ptr)) == 0)
+        {
+            fclose(f);
+            return 1;
+        }
+        ptr = strtok(NULL, delimiter);
+    }
+    fclose(f);
+    return 0;
+}
+
 /* #####  MAIN FUNCTION DEFINITION   ################## */
-int main (int argn, char** argv)
+
+int
+main (int argn, char** argv)
 {
     int i = 0;
-    int tmp;
-    int cpuid;
-    int freq = 0;
+    int cpuid = 0;
+    int set_id = -1;
+    unsigned long freq = 0;
     int numCPUs = 0;
-    char* gov;
-    char* gpath = malloc(100);
-    char* fpath = malloc(100);
+    enum cmds cmd;
+    char* gov = NULL;
+    char* fpath = NULL;
+    FILE* f = NULL;
 
     if (argn < 3 || argn > 4)
     {
-        fprintf(stderr, "Usage: %s <processorID> <frequency> [<governor>] \n",argv[0]);
-        free(gpath);
-        free(fpath);
+        help(argv[0]);
         exit(EXIT_FAILURE);
     }
 
+    /* Check for valid CPU */
     cpuid = atoi(argv[1]);
     numCPUs = get_numCPUs();
     if (cpuid < 0 || cpuid > numCPUs)
     {
         fprintf(stderr, "CPU %d not a valid CPU ID. Range from 0 to %d.\n", cpuid, numCPUs);
-        free(gpath);
-        free(fpath);
         exit(EXIT_FAILURE);
     }
-    freq  = atoi(argv[2]);
-    if (freq <= 0)
+
+    /* Read in command and argument */
+    if (strncmp(argv[2], "cur", 3) == 0)
     {
-        fprintf(stderr, "Frequency must be greater than 0.\n");
-        free(gpath);
-        free(fpath);
+        cmd = SET_CURRENT;
+        freq = read_freq(argv[3]);
+        if (!valid_freq(freq))
+        {
+            fprintf(stderr, "Invalid frequency %lu!\n\n",freq);
+            help(argv[0]);
+            exit(EXIT_FAILURE);
+        }
+    }
+    else if (strncmp(argv[2], "min", 3) == 0)
+    {
+        cmd = SET_MIN;
+        freq = read_freq(argv[3]);
+        if (!valid_freq(freq))
+        {
+            fprintf(stderr, "Invalid frequency %lu!\n\n",freq);
+            help(argv[0]);
+            exit(EXIT_FAILURE);
+        }
+    }
+    else if (strncmp(argv[2], "max", 3) == 0)
+    {
+        cmd = SET_MAX;
+        freq = read_freq(argv[3]);
+        if (!valid_freq(freq))
+        {
+            fprintf(stderr, "Invalid frequency %lu!\n\n",freq);
+            help(argv[0]);
+            exit(EXIT_FAILURE);
+        }
+    }
+    else if (strncmp(argv[2], "gov", 3) == 0)
+    {
+        cmd = SET_GOV;
+        gov = argv[3];
+        /* Only allow specific governors */
+        if (!valid_gov(gov))
+        {
+            fprintf(stderr, "Invalid governor %s!\n\n",gov);
+            help(argv[0]);
+            exit(EXIT_FAILURE);
+        }
+    }
+    else
+    {
+        fprintf(stderr, "Unknown command %s!\n\n", argv[2]);
+        help(argv[0]);
         exit(EXIT_FAILURE);
     }
 
-    if (argn == 4)
+    fpath = malloc(100 * sizeof(char));
+    if (!fpath)
     {
-        FILE* f;
-        gov = argv[3];
+        fprintf(stderr, "Unable to allocate space!\n\n");
+        exit(EXIT_FAILURE);
+    }
 
-        if ((strncmp(gov,"ondemand",8) != 0) &&
-            (strncmp(gov,"performance",11) != 0) &&
-            (strncmp(gov,"conservative",12) != 0) &&
-            (strncmp(gov,"powersave",9) != 0)) {
-            fprintf(stderr, "Invalid governor %s!\n",gov);
-            free(gpath);
+    /* If the current frequency should be set we have to make sure that the governor is
+     * 'userspace'. Minimal and maximal frequency are possible for other governors but
+     * they dynamically adjust the current clock speed.
+     */
+    if (cmd == SET_CURRENT)
+    {
+        int tmp = 0;
+        char testgov[1024];
+        snprintf(fpath, 99, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", cpuid);
+        f = fopen(fpath, "r");
+        if (f == NULL) {
+            fprintf(stderr, "Unable to open path %s for reading\n",fpath);
             free(fpath);
             return (EXIT_FAILURE);
         }
-        
-        for (i=0; i<2; i++)
+        tmp = fread(testgov, 100, sizeof(char), f);
+        if (strncmp(testgov, "userspace", 9) != 0)
         {
-            snprintf(fpath, 99, "/sys/devices/system/cpu/cpu%d/cpufreq/%s", cpuid, getfiles[i]);
-            f = fopen(fpath, "r");
+            fclose(f);
+            snprintf(fpath, 99, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", cpuid);
+            f = fopen(fpath, "w");
             if (f == NULL) {
                 fprintf(stderr, "Unable to open path %s for writing\n", fpath);
-                free(gpath);
                 free(fpath);
                 return (EXIT_FAILURE);
             }
-            tmp = fread(fpath, 100, sizeof(char), f);
-            freq = atoi(fpath);
-            fclose(f);
-            snprintf(fpath, 99, "/sys/devices/system/cpu/cpu%d/cpufreq/%s", cpuid, setfiles[i]);
+            fprintf(f,"userspace");
+        }
+        fclose(f);
+    }
+
+    switch(cmd)
+    {
+        case SET_CURRENT:
+        case SET_MIN:
+        case SET_MAX:
+            /* The cmd is also used as index in the setfiles array */
+            snprintf(fpath, 99, "/sys/devices/system/cpu/cpu%d/cpufreq/%s", cpuid, setfiles[cmd]);
             f = fopen(fpath, "w");
             if (f == NULL) {
                 fprintf(stderr, "Unable to open path %s for writing\n",fpath);
-                free(gpath);
                 free(fpath);
                 return (EXIT_FAILURE);
             }
             fprintf(f,"%d",freq);
             fclose(f);
-
-        }
-        snprintf(gpath, 99, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", cpuid);
-
-        f = fopen(gpath, "w");
-        if (f == NULL) {
-            fprintf(stderr, "Unable to open path %s for writing\n", gpath);
-            free(gpath);
-            free(fpath);
-            return (EXIT_FAILURE);
-        }
-        fprintf(f,"%s",gov);
-        fclose(f);
-        free(gpath);
-        free(fpath);
-        return(EXIT_SUCCESS);
-    }
-
-    snprintf(gpath, 99, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", cpuid);
-
-    FILE* f = fopen(gpath, "w");
-    if (f == NULL) {
-        fprintf(stderr, "Unable to open path %s for writing\n", gpath);
-        free(gpath);
-        free(fpath);
-        return (EXIT_FAILURE);
-    }
-    if ((argn == 4) &&
-        ((strncmp(argv[3],"ondemand",8) == 0) ||
-        (strncmp(argv[3],"performance",11) == 0) ||
-        (strncmp(argv[3],"conservative",12) == 0) ||
-        (strncmp(argv[3],"powersave",9) == 0)))
-    {
-        fprintf(f, "%s", argv[3]);
-        tmp = 1;
-    }
-    else
-    {
-        fprintf(f, "%s", "userspace");
-        tmp = 3;
-    }
-    fclose(f);
-
-    for (i=0;i<tmp;i++)
-    {
-        snprintf(fpath, 99, "/sys/devices/system/cpu/cpu%d/cpufreq/%s", cpuid, setfiles[i]);
-        f = fopen(fpath, "w");
-        if (f == NULL) {
-            fprintf(stderr, "Unable to open path %s for writing\n",fpath);
-            free(gpath);
-            free(fpath);
-            return (EXIT_FAILURE);
-        }
-        fprintf(f,"%d",freq);
-        fclose(f);
+            break;
+        case SET_GOV:
+            snprintf(fpath, 99, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", cpuid);
+            f = fopen(fpath, "w");
+            if (f == NULL) {
+                fprintf(stderr, "Unable to open path %s for writing\n", fpath);
+                free(fpath);
+                return (EXIT_FAILURE);
+            }
+            fprintf(f,"%s",gov);
+            fclose(f);
+            break;
     }
-    free(gpath);
+    
     free(fpath);
-    return(EXIT_SUCCESS);
-}
+    return EXIT_SUCCESS;
 
+}
 
diff --git a/src/access.c b/src/access.c
index 350cd9d..6ea0b24 100644
--- a/src/access.c
+++ b/src/access.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Interface for the different register access modules.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -28,6 +28,8 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -52,18 +54,20 @@
 #include <access_x86.h>
 
 
+/* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
 
 static int registeredCpus = 0;
 static int registeredCpuList[MAX_NUM_THREADS] = { [0 ... (MAX_NUM_THREADS-1)] = 0 };
-
-
 static int (*access_read)(PciDeviceIndex dev, const int cpu, uint32_t reg, uint64_t *data) = NULL;
 static int (*access_write)(PciDeviceIndex dev, const int cpu, uint32_t reg, uint64_t data) = NULL;
 static int (*access_init) (int cpu_id) = NULL;
 static void (*access_finalize) (int cpu_id) = NULL;
 static int (*access_check) (PciDeviceIndex dev, int cpu_id) = NULL;
 
-void HPMmode(int mode)
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
+void
+HPMmode(int mode)
 {
     if ((mode == ACCESSMODE_DIRECT) || (mode == ACCESSMODE_DAEMON))
     {
@@ -71,7 +75,8 @@ void HPMmode(int mode)
     }
 }
 
-int HPMinit(void)
+int
+HPMinit(void)
 {
     int ret = 0;
     if (access_init == NULL)
@@ -101,17 +106,18 @@ int HPMinit(void)
         }
 #endif
     }
-    
+
     return 0;
 }
 
-
-int HPMinitialized(void)
+int
+HPMinitialized(void)
 {
     return registeredCpus;
 }
 
-int HPMaddThread(int cpu_id)
+int
+HPMaddThread(int cpu_id)
 {
     int ret;
     if (registeredCpuList[cpu_id] == 0)
@@ -138,7 +144,8 @@ int HPMaddThread(int cpu_id)
     return 0;
 }
 
-void HPMfinalize()
+void
+HPMfinalize()
 {
     if (registeredCpus != 0)
     {
@@ -169,7 +176,8 @@ void HPMfinalize()
     return;
 }
 
-int HPMread(int cpu_id, PciDeviceIndex dev, uint32_t reg, uint64_t* data)
+int
+HPMread(int cpu_id, PciDeviceIndex dev, uint32_t reg, uint64_t* data)
 {
     uint64_t tmp = 0x0ULL;
     *data = 0x0ULL;
@@ -191,7 +199,8 @@ int HPMread(int cpu_id, PciDeviceIndex dev, uint32_t reg, uint64_t* data)
     return err;
 }
 
-int HPMwrite(int cpu_id, PciDeviceIndex dev, uint32_t reg, uint64_t data)
+int
+HPMwrite(int cpu_id, PciDeviceIndex dev, uint32_t reg, uint64_t data)
 {
     int err = 0;
     if (dev >= MAX_NUM_PCI_DEVICES)
@@ -211,7 +220,8 @@ int HPMwrite(int cpu_id, PciDeviceIndex dev, uint32_t reg, uint64_t data)
     return err;
 }
 
-int HPMcheck(PciDeviceIndex dev, int cpu_id)
+int
+HPMcheck(PciDeviceIndex dev, int cpu_id)
 {
     if (registeredCpuList[cpu_id] == 0)
     {
@@ -219,3 +229,4 @@ int HPMcheck(PciDeviceIndex dev, int cpu_id)
     }
     return access_check(dev, cpu_id);
 }
+
diff --git a/src/access_client.c b/src/access_client.c
index 62a7e7c..be28287 100644
--- a/src/access_client.c
+++ b/src/access_client.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Interface to the access daemon for the access module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -27,6 +27,9 @@
  *
  * =======================================================================================
  */
+
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -52,6 +55,7 @@
 #define gettid() syscall(SYS_gettid)
 
 /* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
+
 static int globalSocket = -1;
 static pid_t masterPid = 0;
 static int cpuSockets_open = 0;
@@ -60,6 +64,7 @@ static pthread_mutex_t globalLock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_mutex_t cpuLocks[MAX_NUM_THREADS] = { [0 ... MAX_NUM_THREADS-1] = PTHREAD_MUTEX_INITIALIZER };
 
 /* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
+
 static char*
 access_client_strerror(AccessErrorType det)
 {
@@ -72,6 +77,7 @@ access_client_strerror(AccessErrorType det)
         case ERR_RWFAIL:     return "failed to read/write register";
         case ERR_DAEMONBUSY: return "daemon already has a same/higher priority client";
         case ERR_NODEV:      return "no such pci device";
+        case ERR_LOCKED:     return "access to registers is locked";
         default:             return "UNKNOWN errorcode";
     }
 }
@@ -122,7 +128,7 @@ access_client_startDaemon(int cpu_id)
         ERROR_PRINT(Failed to find the daemon '%s'\n, exeprog);
         exit(EXIT_FAILURE);
     }
-
+    DEBUG_PRINT(DEBUGLEV_INFO, Starting daemon %s, exeprog);
     pid = fork();
 
     if (pid == 0)
@@ -145,14 +151,15 @@ access_client_startDaemon(int cpu_id)
     }
     else if (pid < 0)
     {
-        ERROR_PLAIN_PRINT(Failed to fork);
+        ERROR_PRINT(Failed to fork access daemon for CPU %d, cpu_id);
+        return pid;
     }
 
     EXIT_IF_ERROR(socket_fd = socket(AF_LOCAL, SOCK_STREAM, 0), socket() failed);
 
     address.sun_family = AF_LOCAL;
     address_length = sizeof(address);
-    snprintf(address.sun_path, sizeof(address.sun_path), "/tmp/likwid-%d", pid);
+    snprintf(address.sun_path, sizeof(address.sun_path), TOSTRING(LIKWIDSOCKETBASE) "-%d", pid);
     filepath = strdup(address.sun_path);
 
     while (timeout > 0)
@@ -167,17 +174,16 @@ access_client_startDaemon(int cpu_id)
         }
 
         timeout--;
-        DEBUG_PRINT(DEBUGLEV_INFO, Still waiting for socket %s ..., filepath);
+        DEBUG_PRINT(DEBUGLEV_INFO, Still waiting for socket %s for CPU %d..., filepath, cpu_id);
     }
-    
+
     if (timeout <= 0)
     {
         ERRNO_PRINT;  /* should hopefully still work, as we make no syscalls in between. */
-        fprintf(stderr, "Exiting due to timeout: The socket file at '%s' \
-                could not be opened within 10 seconds.\n", filepath);
-        fprintf(stderr, "Consult the error message above this to find out why.\n");
-        fprintf(stderr, "If the error is 'no such file or directoy', \
-                it usually means that likwid-accessD just failed to start.\n");
+        fprintf(stderr, "Exiting due to timeout: The socket file at '%s' could not be\n", filepath);
+        fprintf(stderr, "opened within 10 seconds. Consult the error message above\n");
+        fprintf(stderr, "this to find out why. If the error is 'no such file or directoy',\n");
+        fprintf(stderr, "it usually means that likwid-accessD just failed to start.\n");
         exit(EXIT_FAILURE);
     }
     DEBUG_PRINT(DEBUGLEV_INFO, Successfully opened socket %s to daemon for CPU %d, filepath, cpu_id);
@@ -188,9 +194,9 @@ access_client_startDaemon(int cpu_id)
 
 /* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
 
-int access_client_init(int cpu_id)
+int
+access_client_init(int cpu_id)
 {
-    int ret = 0;
     if (masterPid != 0 && gettid() == masterPid)
     {
         return 0;
@@ -199,6 +205,12 @@ int access_client_init(int cpu_id)
     {
         pthread_mutex_lock(&cpuLocks[cpu_id]);
         cpuSockets[cpu_id] = access_client_startDaemon(cpu_id);
+        if (cpuSockets[cpu_id] < 0)
+        {
+            //ERROR_PRINT(Start of access daemon failed for CPU %d, cpu_id);
+            pthread_mutex_unlock(&cpuLocks[cpu_id]);
+            return cpuSockets[cpu_id];
+        }
         cpuSockets_open++;
         pthread_mutex_unlock(&cpuLocks[cpu_id]);
         if (globalSocket == -1)
@@ -208,11 +220,13 @@ int access_client_init(int cpu_id)
             masterPid = gettid();
             pthread_mutex_unlock(&globalLock);
         }
+        return 0;
     }
-    return ret;
+    return -1;
 }
 
-int access_client_read(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint64_t *data)
+int
+access_client_read(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint64_t *data)
 {
     int ret;
     int socket = globalSocket;
@@ -220,6 +234,7 @@ int access_client_read(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint6
     AccessDataRecord record;
     record.cpu = cpu_id;
     record.device = MSR_DEV;
+    record.errorcode = ERR_OPENFAIL;
 
     if (cpuSockets_open == 0)
     {
@@ -281,7 +296,8 @@ int access_client_read(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint6
     return 0;
 }
 
-int access_client_write(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint64_t data)
+int
+access_client_write(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint64_t data)
 {
     int socket = globalSocket;
     int ret;
@@ -289,6 +305,7 @@ int access_client_write(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint
     record.cpu = cpu_id;
     record.device = MSR_DEV;
     pthread_mutex_t* lockptr = &globalLock;
+    record.errorcode = ERR_OPENFAIL;
 
     if (cpuSockets_open == 0)
     {
@@ -347,7 +364,8 @@ int access_client_write(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint
     return 0;
 }
 
-void access_client_finalize(int cpu_id)
+void
+access_client_finalize(int cpu_id)
 {
     AccessDataRecord record;
     if (cpuSockets[cpu_id] > 0)
@@ -362,9 +380,11 @@ void access_client_finalize(int cpu_id)
     {
         globalSocket = -1;
     }
+    masterPid = 0;
 }
 
-int access_client_check(PciDeviceIndex dev, int cpu_id)
+int
+access_client_check(PciDeviceIndex dev, int cpu_id)
 {
     int socket = globalSocket;
     pthread_mutex_t* lockptr = &globalLock;
@@ -373,6 +393,7 @@ int access_client_check(PciDeviceIndex dev, int cpu_id)
     record.cpu = cpu_id;
     record.device = dev;
     record.type = DAEMON_CHECK;
+    record.errorcode = ERR_OPENFAIL;
     if (dev != MSR_DEV)
     {
         record.cpu = affinity_core2node_lookup[cpu_id];
@@ -395,3 +416,4 @@ int access_client_check(PciDeviceIndex dev, int cpu_id)
     }
     return 0;
 }
+
diff --git a/src/access_x86.c b/src/access_x86.c
index dfb3ed5..1e73055 100644
--- a/src/access_x86.c
+++ b/src/access_x86.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Interface to x86 related functions for the access module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -27,6 +27,9 @@
  *
  * =======================================================================================
  */
+
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -35,7 +38,6 @@
 #include <unistd.h>
 #include <signal.h>
 
-
 #include <types.h>
 #include <error.h>
 #include <topology.h>
@@ -45,9 +47,10 @@
 #include <access_x86_pci.h>
 #include <affinity.h>
 
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
 
-
-int access_x86_init(int cpu_id)
+int
+access_x86_init(int cpu_id)
 {
     int ret = access_x86_msr_init(cpu_id);
     if (ret == 0)
@@ -60,7 +63,8 @@ int access_x86_init(int cpu_id)
     return ret;
 }
 
-int access_x86_read(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint64_t *data)
+int
+access_x86_read(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint64_t *data)
 {
     int err;
     uint64_t tmp = 0x0ULL;
@@ -80,7 +84,8 @@ int access_x86_read(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint64_t
     return err;
 }
 
-int access_x86_write(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint64_t data)
+int
+access_x86_write(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint64_t data)
 {
     int err;
     if (dev == MSR_DEV)
@@ -97,7 +102,8 @@ int access_x86_write(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint64_
     return err;
 }
 
-void access_x86_finalize(int cpu_id)
+void
+access_x86_finalize(int cpu_id)
 {
     access_x86_msr_finalize(cpu_id);
     if (cpuid_info.supportUncore)
@@ -106,7 +112,8 @@ void access_x86_finalize(int cpu_id)
     }
 }
 
-int access_x86_check(PciDeviceIndex dev, int cpu_id)
+int
+access_x86_check(PciDeviceIndex dev, int cpu_id)
 {
     if (dev == MSR_DEV)
     {
@@ -118,3 +125,4 @@ int access_x86_check(PciDeviceIndex dev, int cpu_id)
     }
     return 0;
 }
+
diff --git a/src/access_x86_msr.c b/src/access_x86_msr.c
index 1ce7aec..e198389 100644
--- a/src/access_x86_msr.c
+++ b/src/access_x86_msr.c
@@ -9,8 +9,8 @@
  *                   sys interface of the Linux 2.6 kernel. This module
  *                   is based on the msr-util tools.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com.
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -56,19 +56,23 @@
 #ifdef LIKWID_PROFILE_COUNTER_READ
 #include <timer.h>
 #endif
+
 /* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
+
 #define MAX_LENGTH_MSR_DEV_NAME  20
 #define STRINGIFY(x) #x
 #define TOSTRING(x) STRINGIFY(x)
 
 /* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
+
 static int FD[MAX_NUM_THREADS] = { [0 ... MAX_NUM_THREADS-1] = -1 };
 static int rdpmc_works_pmc = -1;
 static int rdpmc_works_fixed = -1;
 
 /* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
 
-static inline int __rdpmc(int cpu_id, int counter, uint64_t* value)
+static inline int
+__rdpmc(int cpu_id, int counter, uint64_t* value)
 {
     unsigned low, high;
     cpu_set_t cpuset, current;
@@ -82,18 +86,21 @@ static inline int __rdpmc(int cpu_id, int counter, uint64_t* value)
     return 0;
 }
 
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
 //Needed for rdpmc check
-void segfault_sigaction(int signal, siginfo_t *si, void *arg)
+void
+segfault_sigaction(int signal, siginfo_t *si, void *arg)
 {
     exit(1);
 }
 
-int test_rdpmc(int cpu_id, uint64_t value, int flag)
+int
+test_rdpmc(int cpu_id, uint64_t value, int flag)
 {
     int ret;
     int pid;
 
-
     pid = fork();
 
     if (pid < 0)
@@ -115,14 +122,17 @@ int test_rdpmc(int cpu_id, uint64_t value, int flag)
             usleep(100);
         }
         exit(0);
-    } else {
+    }
+    else
+    {
         int status = 0;
         int waiting = 0;
         waiting = waitpid(pid, &status, 0);
         if ((waiting < 0) || (WEXITSTATUS(status) != 0))
         {
             ret = 0;
-        } else 
+        }
+        else
         {
             ret = 1;
         }
@@ -130,9 +140,6 @@ int test_rdpmc(int cpu_id, uint64_t value, int flag)
     return ret;
 }
 
-/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
-
-
 int
 access_x86_msr_init(const int cpu_id)
 {
@@ -146,7 +153,7 @@ access_x86_msr_init(const int cpu_id)
     }
     msr_file_name = (char*) malloc(MAX_LENGTH_MSR_DEV_NAME * sizeof(char));
     if (!msr_file_name)
-    {    
+    {
         return -ENOMEM;
     }
 
@@ -154,7 +161,16 @@ access_x86_msr_init(const int cpu_id)
     fd = open(msr_file_name, O_RDWR);
     if (fd < 0)
     {
-        sprintf(msr_file_name,"/dev/cpu/%d/msr", cpu_id);
+        sprintf(msr_file_name,"/dev/cpu/%d/msr_safe", cpu_id);
+        fd = open(msr_file_name, O_RDWR);
+        if (fd < 0)
+        {
+            sprintf(msr_file_name,"/dev/cpu/%d/msr", cpu_id);
+        }
+        else
+        {
+            close(fd);
+        }
     }
     else
     {
@@ -185,7 +201,7 @@ access_x86_msr_init(const int cpu_id)
     }
 
     sprintf(msr_file_name,"/dev/msr%d",cpu_id);
-    fd = open(msr_file_name, O_RDWR); 
+    fd = open(msr_file_name, O_RDWR);
     if (fd < 0)
     {
         sprintf(msr_file_name,"/dev/cpu/%d/msr",cpu_id);
@@ -219,7 +235,6 @@ access_x86_msr_finalize(const int cpu_id)
     }
 }
 
-
 int
 access_x86_msr_read( const int cpu_id, uint32_t reg, uint64_t *data)
 {
@@ -265,7 +280,7 @@ access_x86_msr_write( const int cpu_id, uint32_t reg, uint64_t data)
     int ret;
     if (FD[cpu_id] > 0)
     {
-        DEBUG_PRINT(DEBUGLEV_DEVELOP, Write MSR counter 0x%X with WRMSR instruction on CPU %d data 0x%X, reg, cpu_id, data);
+        DEBUG_PRINT(DEBUGLEV_DEVELOP, Write MSR counter 0x%X with WRMSR instruction on CPU %d data 0x%lX, reg, cpu_id, data);
         ret = pwrite(FD[cpu_id], &data, sizeof(data), reg);
         if (ret != sizeof(data))
         {
@@ -286,3 +301,4 @@ int access_x86_msr_check(PciDeviceIndex dev, int cpu_id)
     }
     return 0;
 }
+
diff --git a/src/access_x86_pci.c b/src/access_x86_pci.c
index 81ea0ae..b6ff223 100644
--- a/src/access_x86_pci.c
+++ b/src/access_x86_pci.c
@@ -8,8 +8,8 @@
  *                   performance monitoring registers in PCI Cfg space
  *                   for Intel Sandy Bridge Processors.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com,
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -60,6 +60,7 @@
 #endif
 
 /* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
+
 #define STRINGIFY(x) #x
 #define TOSTRING(x) STRINGIFY(x)
 
@@ -83,6 +84,7 @@ static int nr_sockets = 0;
 static char* socket_bus[MAX_NUM_NODES] = { [0 ... (MAX_NUM_NODES-1)] = "N-A"};
 
 /* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
+
 /* Dirty hack to avoid nonull warnings */
 int (*ownaccess)(const char*, int);
 int (*ownopen)(const char*, int, ...);
@@ -121,6 +123,12 @@ access_x86_pci_init(const int socket)
             case BROADWELL_D:
                 testDevice = 0x6f30;
                 break;
+            case BROADWELL_E:
+                testDevice = 0x6f30;
+                break;
+            case XEON_PHI_KNL:
+                testDevice = 0x7843;
+                break;
             default:
                 DEBUG_PRINT(DEBUGLEV_INFO,CPU model %s does not support PCI based Uncore performance monitoring, cpuid_info.name);
                 return -ENODEV;
@@ -162,7 +170,6 @@ access_x86_pci_init(const int socket)
 #endif
     }
 
-
     for(int j=1;j<MAX_NUM_PCI_DEVICES;j++)
     {
         if ((pci_devices[j].path != NULL) && (FD[socket][j] == -2))
@@ -176,7 +183,8 @@ access_x86_pci_init(const int socket)
                 pci_devices[j].online = 1;
                 if (access_x86_initialized == 0)
                 {
-                    DEBUG_PRINT(DEBUGLEV_DETAIL, PCI device %s (%d) online for socket %d at path %s, pci_devices[j].name,j, socket,bdata(filepath));
+                    DEBUG_PRINT(DEBUGLEV_DETAIL,
+                            PCI device %s (%d) online for socket %d at path %s, pci_devices[j].name,j, socket,bdata(filepath));
                     if (ownaccess(bdata(filepath),R_OK|W_OK))
                     {
                         ERROR_PRINT(PCI device %s (%d) online for socket %d at path %s but not accessible, pci_devices[j].name,j, socket,bdata(filepath));
@@ -194,7 +202,6 @@ access_x86_pci_init(const int socket)
     return 0;
 }
 
-
 void
 access_x86_pci_finalize(const int socket)
 {
@@ -207,7 +214,6 @@ access_x86_pci_finalize(const int socket)
     }
 }
 
-
 int
 access_x86_pci_read(PciDeviceIndex dev, const int socket, uint32_t reg, uint64_t *data)
 {
@@ -234,7 +240,7 @@ access_x86_pci_read(PciDeviceIndex dev, const int socket, uint32_t reg, uint64_t
 
         if ( FD[socket][dev] < 0)
         {
-            ERROR_PRINT(Failed to open PCI device %s at path %s\n, 
+            ERROR_PRINT(Failed to open PCI device %s at path %s\n,
                             pci_devices[dev].name,
                             bdata(filepath));
             *data = 0ULL;
@@ -244,7 +250,7 @@ access_x86_pci_read(PciDeviceIndex dev, const int socket, uint32_t reg, uint64_t
     }
 
     if ( FD[socket][dev] > 0 &&
-         pread(FD[socket][dev], &tmp, sizeof(tmp), reg) != sizeof(tmp) ) 
+         pread(FD[socket][dev], &tmp, sizeof(tmp), reg) != sizeof(tmp) )
     {
         ERROR_PRINT(Read from PCI device %s at register 0x%x failed, pci_devices[dev].name, reg);
         *data = 0ULL;
@@ -254,8 +260,6 @@ access_x86_pci_read(PciDeviceIndex dev, const int socket, uint32_t reg, uint64_t
     return 0;
 }
 
-
-
 int
 access_x86_pci_write(PciDeviceIndex dev, const int socket, uint32_t reg, uint64_t data)
 {
@@ -276,12 +280,11 @@ access_x86_pci_write(PciDeviceIndex dev, const int socket, uint32_t reg, uint64_
         filepath = bfromcstr ( PCI_ROOT_PATH );
         bcatcstr(filepath, socket_bus[socket]);
         bcatcstr(filepath, pci_devices[dev].path );
-        
         FD[socket][dev] = ownopen( bdata(filepath), O_RDWR);
 
         if ( FD[socket][dev] < 0)
         {
-            ERROR_PRINT(Failed to open PCI device %s at path %s\n, 
+            ERROR_PRINT(Failed to open PCI device %s at path %s\n,
                                 pci_devices[dev].name,
                                 bdata(filepath));
             return -EACCES;
@@ -298,7 +301,8 @@ access_x86_pci_write(PciDeviceIndex dev, const int socket, uint32_t reg, uint64_
     return 0;
 }
 
-int access_x86_pci_check(PciDeviceIndex dev, int socket)
+int
+access_x86_pci_check(PciDeviceIndex dev, int socket)
 {
     if (dev == MSR_DEV)
     {
diff --git a/src/affinity.c b/src/affinity.c
index ec27643..30e0033 100644
--- a/src/affinity.c
+++ b/src/affinity.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Implementation of affinity module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com,
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -55,12 +55,12 @@
 /* #####   EXPORTED VARIABLES   ########################################### */
 
 int affinity_core2node_lookup[MAX_NUM_THREADS];
+int affinity_thread2core_lookup[MAX_NUM_THREADS];
 
 /* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
 
 #define gettid() syscall(SYS_gettid)
 
-
 /* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
 
 static int  affinity_numberOfDomains = 0;
@@ -284,6 +284,7 @@ affinity_init()
                                       domains[currentDomain + subCounter].processorList,
                                       i, offset,
                                       domains[currentDomain + subCounter].numberOfProcessors);
+
             domains[currentDomain + subCounter].numberOfProcessors = tmp;
             offset += (tmp < numberOfCoresPerCache ? tmp : numberOfCoresPerCache);
             subCounter++;
@@ -301,12 +302,20 @@ affinity_init()
             {
                 domains[currentDomain + subCounter].numberOfProcessors =
                                 numa_info.nodes[subCounter].numberOfProcessors;
+
                 domains[currentDomain + subCounter].numberOfCores =
                                 numa_info.nodes[subCounter].numberOfProcessors/cpuid_topology.numThreadsPerCore;
+
                 domains[currentDomain + subCounter].tag = bformat("M%d", subCounter);
-                DEBUG_PRINT(DEBUGLEV_DEVELOP, Affinity domain M%d: %d HW threads on %d cores, subCounter, domains[currentDomain + subCounter].numberOfProcessors, domains[currentDomain + subCounter].numberOfCores);
+
+                DEBUG_PRINT(DEBUGLEV_DEVELOP,
+                        Affinity domain M%d: %d HW threads on %d cores,
+                        subCounter, domains[currentDomain + subCounter].numberOfProcessors,
+                        domains[currentDomain + subCounter].numberOfCores);
+
                 domains[currentDomain + subCounter].processorList =
                                 (int*) malloc(numa_info.nodes[subCounter].numberOfProcessors*sizeof(int));
+
                 if (!domains[currentDomain + subCounter].processorList)
                 {
                     fprintf(stderr,"No more memory for %ld bytes for processor list of affinity domain %s\n",
@@ -332,12 +341,18 @@ affinity_init()
         domains[currentDomain + subCounter].numberOfProcessors = NUMAthreads;
         domains[currentDomain + subCounter].numberOfCores =  NUMAthreads/cpuid_topology.numThreadsPerCore;
         domains[currentDomain + subCounter].tag = bformat("M%d", subCounter);
-        DEBUG_PRINT(DEBUGLEV_DEVELOP, Affinity domain M%d: %d HW threads on %d cores, subCounter, domains[currentDomain + subCounter].numberOfProcessors, domains[currentDomain + subCounter].numberOfCores);
+
+        DEBUG_PRINT(DEBUGLEV_DEVELOP,
+                Affinity domain M%d: %d HW threads on %d cores,
+                subCounter, domains[currentDomain + subCounter].numberOfProcessors,
+                domains[currentDomain + subCounter].numberOfCores);
+
         domains[currentDomain + subCounter].processorList = (int*) malloc(NUMAthreads*sizeof(int));
+
         if (!domains[currentDomain + subCounter].processorList)
         {
             fprintf(stderr,"No more memory for %ld bytes for processor list of affinity domain %s\n",
-                    NUMAthreads*sizeof(int), 
+                    NUMAthreads*sizeof(int),
                     bdata(domains[currentDomain + subCounter].tag));
             return;
         }
@@ -365,7 +380,6 @@ affinity_init()
     affinity_initialized = 1;
 }
 
-
 void
 affinity_finalize()
 {
@@ -402,7 +416,6 @@ affinity_finalize()
     affinity_initialized = 0;
 }
 
-
 int
 affinity_processGetProcessorId()
 {
@@ -419,7 +432,6 @@ affinity_processGetProcessorId()
     return getProcessorID(&cpu_set);
 }
 
-
 int
 affinity_threadGetProcessorId()
 {
@@ -449,7 +461,6 @@ affinity_pinThread(int processorId)
 }
 #endif
 
-
 void
 affinity_pinProcess(int processorId)
 {
@@ -461,7 +472,7 @@ affinity_pinProcess(int processorId)
 }
 
 void
-affinity_pinProcesses(int cpu_count, int* processorIds)
+affinity_pinProcesses(int cpu_count, const int* processorIds)
 {
     int i;
     cpu_set_t cpuset;
@@ -474,7 +485,6 @@ affinity_pinProcesses(int cpu_count, int* processorIds)
     sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
 }
 
-
 const AffinityDomain*
 affinity_getDomain(bstring domain)
 {
diff --git a/src/applications/likwid-agent.lua b/src/applications/likwid-agent.lua
index b557cbc..c162ed7 100644
--- a/src/applications/likwid-agent.lua
+++ b/src/applications/likwid-agent.lua
@@ -6,8 +6,8 @@
  *
  *      Description:  A monitoring daemon for hardware performance counters.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at gmail.com
  *      Project:  likwid
@@ -268,7 +268,6 @@ local function logger(results)
             f:close()
         end
     end
-    
 end
 
 local function check_gmetric()
diff --git a/src/applications/likwid-features.lua b/src/applications/likwid-features.lua
index 787aa22..470df45 100644
--- a/src/applications/likwid-features.lua
+++ b/src/applications/likwid-features.lua
@@ -6,8 +6,8 @@
  *
  *      Description:  A application to retrieve and manipulate CPU features.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   4.0
+ *      Released:  28.04.2015
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at gmail.com
  *      Project:  likwid
diff --git a/src/applications/likwid-genTopoCfg.lua b/src/applications/likwid-genTopoCfg.lua
index 845c359..5f42bf8 100644
--- a/src/applications/likwid-genTopoCfg.lua
+++ b/src/applications/likwid-genTopoCfg.lua
@@ -8,8 +8,8 @@
  *                    that is used by likwid to avoid reading the systems architecture at
  *                    each start.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at gmail.com
  *      Project:  likwid
@@ -116,7 +116,6 @@ for field, value in pairs(cputopo) do
             for k,v in pairs(threadPool_order) do
                 file:write(str..tostring(v).." = "..tostring(tab[v]).."\n")
             end
-            
         end
     elseif (field == "cacheLevels") then
         for id, tab in pairs(cputopo["cacheLevels"]) do
@@ -124,7 +123,6 @@ for field, value in pairs(cputopo) do
             for k,v in pairs(cacheLevels_order) do
                 file:write(str..tostring(v).." = "..tostring(tab[v]).."\n")
             end
-            
         end
     end
 end
@@ -135,8 +133,8 @@ for field, value in pairs(numainfo["nodes"]) do
         if id ~= "processors" and id ~= "distances" then
             file:write("numa_info nodes "..tostring(field).." "..tostring(id).." = "..tostring(tab).."\n")
         elseif id == "processors" then
-            for k,v in pairs(tab) do 
-                str = str..","..tostring(v) 
+            for k,v in pairs(tab) do
+                str = str..","..tostring(v)
                 file:write("numa_info nodes "..tostring(field).." "..tostring(id).." "..tostring(k).." = "..tostring(v).."\n")
             end
         elseif id == "distances" then
diff --git a/src/applications/likwid-memsweeper.lua b/src/applications/likwid-memsweeper.lua
index 999dedd..bf71a91 100644
--- a/src/applications/likwid-memsweeper.lua
+++ b/src/applications/likwid-memsweeper.lua
@@ -6,8 +6,8 @@
  *
  *      Description:  An application to clean up NUMA memory domains.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at gmail.com
  *      Project:  likwid
diff --git a/src/applications/likwid-mpirun.lua b/src/applications/likwid-mpirun.lua
index 7c090d6..de6bad2 100644
--- a/src/applications/likwid-mpirun.lua
+++ b/src/applications/likwid-mpirun.lua
@@ -4,11 +4,11 @@
  *
  *      Filename:  likwid-mpirun.lua
  *
- *      Description: A wrapper script to pin threads spawned by MPI processes and 
+ *      Description: A wrapper script to pin threads spawned by MPI processes and
  *                   measure hardware performance counters
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -83,6 +83,10 @@ local function usage()
     print_stdout("")
     print_stdout("Processes are pinned to physical CPU cores first. For syntax questions see likwid-pin")
     print_stdout("")
+    print_stdout("For CPU selection and which MPI rank measures Uncore counters the system topology")
+    print_stdout("of the current system is used. There is currently no possibility to overcome this")
+    print_stdout("limitation by providing a topology file or similar.")
+    print_stdout("")
     examples()
 end
 
@@ -215,7 +219,7 @@ local function executeOpenMPI(wrapperscript, hostfile, env, nrNodes)
         local input = f:read("*a")
         ver1,ver2,ver3 = input:match("(%d+)%.(%d+)%.(%d+)")
         if ver1 == "1" then
-            if ver2 == "7" then
+            if tonumber(ver2) >= 7 then
                 bindstr = "--bind-to none"
             elseif ver2 == "6" then
                 bindstr = "--bind-to-none"
@@ -511,7 +515,6 @@ function write_hostlist_to_file(hostlist, nperhost)
             table.insert(outlist, item)
         else
             prefixzeros = 0
-            
             host, start, ende,remain = item:match("(%a+)%[(%d+)-(%d+)%]([%w%d%[%]-]*)")
             if host and start and ende then
                 if tonumber(start) ~= 0 then
@@ -548,7 +551,6 @@ end
 
 local function writeHostfileSlurm(hostlist, filename)
     l = {}
-    
     for i, h in pairs(hostlist) do
         table.insert(l, h["hostname"])
     end
@@ -563,7 +565,6 @@ local function executeSlurm(wrapperscript, hostfile, env, nrNodes)
     if wrapperscript.sub(1,1) ~= "/" then
         wrapperscript = os.getenv("PWD").."/"..wrapperscript
     end
-    
     local exec = string.format("srun -N %d --ntasks-per-node=%d --cpu_bind=none %s %s",
                                 nrNodes, ppn, table.concat(mpiopts, ' '), wrapperscript)
     if debug then
@@ -686,7 +687,6 @@ local function getMpiExec(mpitype)
         writeHostfile = writeHostfileSlurm
         getEnvironment = getEnvironmentSlurm
     end
-    
     for i, exec in pairs(testing) do
         f = io.popen(string.format("which %s 2>/dev/null", exec), 'r')
         if f ~= nil then
@@ -811,7 +811,6 @@ local function assignHosts(hosts, np, ppn)
                                             maxslots=host["maxslots"],
                                             interface=host["interface"]})
                         current = ppn
-                        
                     end
                 else
                     print_stderr(string.format("WARN: Oversubscription for host %s.", host["hostname"]))
@@ -995,7 +994,6 @@ local function setPerfStrings(perflist, cpuexprs)
                     table.insert(coreevents, e)
                 end
             end
-            
             local tmpSocketFlags = {}
             for _,e in pairs(socketListFlags) do
                 table.insert(tmpSocketFlags, e)
@@ -1196,7 +1194,6 @@ local function writeWrapperScript(scriptname, execStr, hosts, outputname)
     f:write("else\n")
     f:write("\techo \"Unknown local rank $LOCALRANK\"\n")
     f:write("fi\n")
-    
     f:close()
     os.execute("chmod +x "..scriptname)
 end
@@ -1311,7 +1308,6 @@ local function parseMarkerOutputFile(filename)
     local eventlist = {}
     local counterlist = {}
     local idx = 1
-    
     local results = {}
     local f = io.open(filename, "r")
     if f == nil then
@@ -1595,8 +1591,6 @@ function printMpiOutput(group_list, all_results, regionname)
     end
 end
 
-
-
 function cpuCount()
     cputopo = likwid.getCpuTopology()
     local cpus = cputopo["activeHWThreads"]
@@ -1688,20 +1682,34 @@ for i=1,#arg do
         table.insert(mpiopts, arg[i])
     end
 end
+
 if #executable == 0 then
     print_stderr("ERROR: No executable given on commandline")
     os.exit(1)
-elseif os.execute(string.format("ls %s 1>/dev/null 2>&1", executable[1])) == 0 then
-    print_stderr("ERROR: Cannot find executable given on commandline")
-    os.exit(1)
 else
-    local f = io.popen(string.format("which %s 2>/dev/null", executable[1]))
-    if f ~= nil then
-        executable[1] = f:read("*line")
-        f:close()
+    local do_which = false
+    local found = false
+    if likwid.access(executable[1], "x") == -1 then
+        do_which = true
+    else
+        found = true
     end
-    if debug then
-        print_stdout("DEBUG: Executable given on commandline: "..table.concat(executable, " "))
+    if not found then
+        if do_which then
+            local f = io.popen(string.format("which %s 2>/dev/null", executable[1]))
+            if f ~= nil then
+                executable[1] = f:read("*line")
+                f:close()
+                found = true
+            end
+            if debug then
+                print_stdout("DEBUG: Executable given on commandline: "..table.concat(executable, " "))
+            end
+        end
+    end
+    if not found then
+        print_stderr("ERROR: Cannot find executable given on commandline")
+        os.exit(1)
     end
 end
 if #mpiopts > 0 and debug then
@@ -1854,7 +1862,6 @@ elseif ppn == 0 and np > 0 then
             maxppn = host["slots"]
         end
     end
-    
     if ppn == 0 then
         ppn = 1
     end
diff --git a/src/applications/likwid-perfctr.lua b/src/applications/likwid-perfctr.lua
index c35f0fd..7c19d00 100644
--- a/src/applications/likwid-perfctr.lua
+++ b/src/applications/likwid-perfctr.lua
@@ -7,8 +7,8 @@
  *      Description:  An application to read out performance counter registers
  *                    on x86 processors
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -315,23 +315,6 @@ io.stdout:setvbuf("no")
 cpuinfo = likwid.getCpuInfo()
 cputopo = likwid.getCpuTopology()
 
-if not likwid.msr_available(access_flags) then
-    if access_mode == 1 then
-        print_stderr("MSR device files not available")
-        print_stderr("Please load msr kernel module before retrying")
-        if likwid.access(outfile..".tmp", "e") == 0 then
-            os.remove(outfile..".tmp")
-        end
-        os.exit(1)
-    else
-        print_stderr("MSR device files not readable and writeable")
-        print_stderr("Be sure that you have enough permissions to access the MSR files directly")
-        if likwid.access(outfile..".tmp", "e") == 0 then
-            os.remove(outfile..".tmp")
-        end
-        os.exit(1)
-    end
-end
 
 if num_cpus == 0 and
    not gotC and
@@ -342,7 +325,7 @@ if num_cpus == 0 and
    not print_info then
     print_stderr("Option -c <list> or -C <list> must be given on commandline")
     usage()
-    if likwid.access(outfile..".tmp", "e") == 0 then
+    if outfile and likwid.access(outfile..".tmp", "e") == 0 then
         os.remove(outfile..".tmp")
     end
     os.exit(1)
@@ -354,7 +337,7 @@ elseif num_cpus == 0 and
        not print_group_help and
        not print_info then
     print_stderr("CPUs given on commandline are not valid in current environment, maybe it's limited by a cpuset.")
-    if likwid.access(outfile..".tmp", "e") == 0 then
+    if outfile and likwid.access(outfile..".tmp", "e") == 0 then
         os.remove(outfile..".tmp")
     end
     os.exit(1)
@@ -643,6 +626,7 @@ if set_access_modes then
         os.exit(1)
     end
 end
+
 if likwid.init(num_cpus, cpulist) < 0 then
     likwid.putTopology()
     likwid.putConfiguration()
@@ -693,7 +677,7 @@ if verbose == true then
     print_stdout(string.format("Executing: %s",execString))
 end
 local ldpath = os.getenv("LD_LIBRARY_PATH")
-local libpath = likwid.pinlibpath:match("([/%g]+)/%g+.so")
+local libpath = string.match(likwid.pinlibpath, "([/%a%d]+)/[%a%s%d]*")
 if ldpath == nil then
     likwid.setenv("LD_LIBRARY_PATH", libpath)
 elseif not ldpath:match(libpath) then
@@ -734,7 +718,7 @@ if use_wrapper or use_timeline then
     local nr_events = likwid.getNumberOfEvents(activeGroup)
     local nr_threads = likwid.getNumberOfThreads()
     local firstrun = true
-    
+
     if use_wrapper and #group_ids == 1 then
         duration = 30.E06
     end
@@ -746,10 +730,14 @@ if use_wrapper or use_timeline then
     end
 
     local pid = nil
-    if pin_cpus then
-        pid = likwid.startProgram(execString, #cpulist, cpulist)
+    if execString:len() > 0 then
+        if pin_cpus then
+            pid = likwid.startProgram(execString, #cpulist, cpulist)
+        else
+            pid = likwid.startProgram(execString, 0, cpulist)
+        end
     else
-        pid = likwid.startProgram(execString, 0, cpulist)
+        pid = likwid.getpid()
     end
 
     if not pid then
@@ -758,52 +746,55 @@ if use_wrapper or use_timeline then
         likwid.putNumaInfo()
         likwid.putConfiguration()
         os.exit(1)
-    end
-    start = likwid.startClock()
-    groupTime[activeGroup] = 0
-    while true do
-        if likwid.getSignalState() ~= 0 then
-            likwid.killProgram()
-            break
-        end
-        local remain = likwid.sleep(duration)
-        exitvalue = likwid.checkProgram(pid)
-        if remain > 0 or exitvalue >= 0 then
-            io.stdout:flush()
-            break
-        end
-        if use_timeline == true then
-            stop = likwid.stopClock()
-            likwid.readCounters()
-            
-            local time = likwid.getClock(start, stop)
-            if likwid.getNumberOfMetrics(activeGroup) == 0 then
-                results = likwid.getLastResults()
-            else
-                results = likwid.getLastMetrics()
+    else
+        start = likwid.startClock()
+        groupTime[activeGroup] = 0
+        while true do
+            if likwid.getSignalState() ~= 0 then
+                if execString:len() > 0 then
+                    likwid.killProgram()
+                end
+                break
             end
-            str = tostring(math.tointeger(activeGroup)) .. " "..tostring(#results[activeGroup]).." "..tostring(#cpulist).." "..tostring(time)
-            for i,l1 in pairs(results[activeGroup]) do
-                for j, value in pairs(l1) do
-                    str = str .. " " .. tostring(value)
+            local remain = likwid.sleep(duration)
+            exitvalue = likwid.checkProgram(pid)
+            if remain > 0 or exitvalue >= 0 then
+                io.stdout:flush()
+                if execString:len() > 0 then
+                    break
                 end
             end
-            io.stderr:write(str.."\n")
-            groupTime[activeGroup] = time
-        else
-            likwid.readCounters()
-        end
-        if #group_ids > 1 then
-            likwid.switchGroup(activeGroup + 1)
-            activeGroup = likwid.getIdOfActiveGroup()
-            if groupTime[activeGroup] == nil then
-                groupTime[activeGroup] = 0
+            if use_timeline == true then
+                stop = likwid.stopClock()
+                likwid.readCounters()
+                local time = likwid.getClock(start, stop)
+                if likwid.getNumberOfMetrics(activeGroup) == 0 then
+                    results = likwid.getLastResults()
+                else
+                    results = likwid.getLastMetrics()
+                end
+                str = tostring(math.tointeger(activeGroup)) .. " "..tostring(#results[activeGroup]).." "..tostring(#cpulist).." "..tostring(time)
+                for i,l1 in pairs(results[activeGroup]) do
+                    for j, value in pairs(l1) do
+                        str = str .. " " .. tostring(value)
+                    end
+                end
+                io.stderr:write(str.."\n")
+                groupTime[activeGroup] = time
+            else
+                likwid.readCounters()
+            end
+            if #group_ids > 1 then
+                likwid.switchGroup(activeGroup + 1)
+                activeGroup = likwid.getIdOfActiveGroup()
+                if groupTime[activeGroup] == nil then
+                    groupTime[activeGroup] = 0
+                end
+                nr_events = likwid.getNumberOfEvents(activeGroup)
             end
-            nr_events = likwid.getNumberOfEvents(activeGroup)
         end
-        
+        stop = likwid.stopClock()
     end
-    stop = likwid.stopClock()
 elseif use_stethoscope then
     local ret = likwid.startCounters()
     if ret < 0 then
@@ -839,15 +830,21 @@ end
 
 
 if use_marker == true then
-    results, metrics = likwid.getMarkerResults(markerFile, cpulist)
-    if #results == 0 then
-        print_stderr("No regions could be found in Marker API result file")
-    else
-        for r=1, #results do
-            likwid.printOutput(results[r], metrics[r], cpulist, r, print_stats)
+    if likwid.access(markerFile, "e") >= 0 then
+        results, metrics = likwid.getMarkerResults(markerFile, cpulist)
+        if not results then
+            print_stderr("Failure reading Marker API result file.")
+        elseif #results == 0 then
+            print_stderr("No regions could be found in Marker API result file.")
+        else
+            for r=1, #results do
+                likwid.printOutput(results[r], metrics[r], cpulist, r, print_stats)
+            end
         end
+        os.remove(markerFile)
+    else
+        print_stderr("Marker API result file does not exist. This may happen if the application has not called LIKWID_MARKER_CLOSE.")
     end
-    os.remove(markerFile)
 elseif use_timeline == false then
     results = likwid.getResults()
     metrics = likwid.getMetrics()
@@ -856,24 +853,29 @@ end
 
 if outfile then
     local suffix = ""
-    if string.match(outfile,"%.") then
+    if string.match(outfile,".-[^\\/]-%.?([^%.\\/]*)$") then
         suffix = string.match(outfile, ".-[^\\/]-%.?([^%.\\/]*)$")
     end
     local command = "<INSTALLED_PREFIX>/share/likwid/filter/" .. suffix
     local tmpfile = outfile..".tmp"
     if suffix == "" then
         os.rename(tmpfile, outfile)
-    elseif suffix ~= "txt" and suffix ~= "csv" and likwid.access(command, "x") then
+    elseif suffix ~= "txt" and suffix ~= "csv" and not likwid.access(command, "x") then
         print_stderr("Cannot find filter script, save output in CSV format to file "..outfile)
         os.rename(tmpfile, outfile)
     else
         if suffix ~= "txt" and suffix ~= "csv" then
             command = command .." ".. tmpfile .. " perfctr"
-            local f = assert(io.popen(command))
+            local f = assert(io.popen(command), "r")
             if f ~= nil then
                 local o = f:read("*a")
                 if o:len() > 0 then
                     print_stderr(string.format("Failed to executed filter script %s.",command))
+                else
+                    os.rename(outfile.."."..suffix, outfile)
+                    if not likwid.access(tmpfile, "e") then
+                        os.remove(tmpfile)
+                    end
                 end
             else
                 print_stderr("Failed to call filter script, save output in CSV format to file "..outfile)
diff --git a/src/applications/likwid-perfscope.lua b/src/applications/likwid-perfscope.lua
index faad459..1263e87 100644
--- a/src/applications/likwid-perfscope.lua
+++ b/src/applications/likwid-perfscope.lua
@@ -7,8 +7,8 @@
  *      Description:  An application to use the timeline mode of likwid-perfctr to generate
  *                    realtime plots using feedGnuplot
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -541,7 +541,7 @@ while true do
                 end
             end
         end
-        
+
         group_list[group]["output"]:write(str.."\n")
         group_list[group]["output"]:flush()
         if dump then
diff --git a/src/applications/likwid-pin.lua b/src/applications/likwid-pin.lua
index 93b8ac2..4747746 100644
--- a/src/applications/likwid-pin.lua
+++ b/src/applications/likwid-pin.lua
@@ -6,8 +6,8 @@
  *
  *      Description:  An application to pin a program including threads
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at gmail.com
  *      Project:  likwid
diff --git a/src/applications/likwid-powermeter.lua b/src/applications/likwid-powermeter.lua
index 813fec3..3525b52 100644
--- a/src/applications/likwid-powermeter.lua
+++ b/src/applications/likwid-powermeter.lua
@@ -4,11 +4,11 @@
  *
  *      Filename:  likwid-powermeter.lua
  *
- *      Description:  An application to get information about power 
+ *      Description:  An application to get information about power
  *      consumption on architectures implementing the RAPL interface.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at gmail.com
  *      Project:  likwid
@@ -122,7 +122,6 @@ for opt,arg in likwid.getopt(arg, {"V:", "c:", "h", "i", "M:", "p", "s:", "v", "
             usage()
             os.exit(1)
         end
-        
     elseif opt == "i" or opt == "info" then
         print_info = true
     elseif (opt == "p") then
@@ -236,6 +235,16 @@ if (print_info) then
             print_stdout()
         end
     end
+    if power["minUncoreFreq"] > 0 and power["maxUncoreFreq"] > 0 then
+        print_stdout("Info about Uncore:")
+        print_stdout(string.format("Minimal Uncore frequency: %g MHz", power["minUncoreFreq"]))
+        print_stdout(string.format("Maximal Uncore frequency: %g MHz", power["maxUncoreFreq"]))
+        print_stdout()
+    end
+    if power["perfBias"] then
+        print_stdout(string.format("Performance energy bias: %.0f (0=highest performance, 15 = lowest energy)", power["perfBias"]))
+        print_stdout()
+    end
     print_stdout(likwid.hline)
 end
 
diff --git a/src/applications/likwid-setFrequencies.lua b/src/applications/likwid-setFrequencies.lua
index 1599320..40dee09 100644
--- a/src/applications/likwid-setFrequencies.lua
+++ b/src/applications/likwid-setFrequencies.lua
@@ -6,8 +6,8 @@
  *
  *      Description:  A application to set the CPU frequency of CPU cores and domains.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at gmail.com
  *      Project:  likwid
@@ -52,14 +52,20 @@ function usage()
     print_stdout("-v\t Version information")
     print_stdout("-c dom\t Likwid thread domain which to apply settings (default are all CPUs)")
     print_stdout("\t See likwid-pin -h for details")
-    print_stdout("-g gov\t Set governor (" .. table.concat(getAvailGovs(nil), ", ") .. ") (set to ondemand if omitted)")
-    print_stdout("-f freq\t Set fixed frequency, implicitly sets userspace governor")
+    print_stdout("-g gov\t Set governor (" .. table.concat(likwid.getAvailGovs(nil), ", ") .. ") (set to ondemand if omitted)")
+    print_stdout("-f/--freq freq\t Set current frequency, implicitly sets userspace governor")
+    print_stdout("-x/--min freq\t Set minimal frequency")
+    print_stdout("-y/--max freq\t Set maximal frequency")
     print_stdout("-p\t Print current frequencies")
     print_stdout("-l\t List available frequencies")
     print_stdout("-m\t List available governors")
+    print_stdout("")
+    print_stdout("In order to set the highest frequency, use the governor 'turbo'. This sets the")
+    print_stdout("minimal frequency to the available minimum, the maximal and current frequency")
+    print_stdout("to the turbo related frequency. The governor is set to 'performance'.")
 end
 
-function getCurrentMinFreq(cpuid)
+--[[function getCurrentMinFreq(cpuid)
     local min = 10000000
     if cpuid == nil or cpuid < 0 then
         for cpuid=0,topo["numHWThreads"]-1 do
@@ -129,7 +135,6 @@ function getAvailFreq(cpuid)
     end
     line = fp:read("*l")
     fp:close()
-    
     local tmp = likwid.stringsplit(line:gsub("^%s*(.-)%s*$", "%1"), " ", nil, " ")
     local avail = {}
     local turbo = tonumber(tmp[1])/1E6
@@ -208,11 +213,13 @@ local function testDriver()
         return true
     end
     return false
-end
+end]]
 
 verbosity = 0
 governor = nil
 frequency = nil
+min_freq = nil
+max_freq = nil
 domain = nil
 printCurFreq = false
 printAvailFreq = false
@@ -224,7 +231,7 @@ if #arg == 0 then
 end
 
 
-for opt,arg in likwid.getopt(arg, {"g:", "c:", "f:", "l", "p", "h", "v", "m", "help","version","freq:"}) do
+for opt,arg in likwid.getopt(arg, {"g:", "c:", "f:", "l", "p", "h", "v", "m", "x:", "y:", "help","version","freq:", "min:", "max:"}) do
     if opt == "h" or opt == "help" then
         usage()
         os.exit(0)
@@ -237,6 +244,10 @@ for opt,arg in likwid.getopt(arg, {"g:", "c:", "f:", "l", "p", "h", "v", "m", "h
         governor = arg
     elseif opt == "f" or opt == "freq" then
         frequency = arg
+    elseif opt == "x" or opt == "min" then
+        min_freq = arg
+    elseif opt == "y" or opt == "max" then
+        max_freq = arg
     elseif (opt == "p") then
         printCurFreq = true
     elseif (opt == "l") then
@@ -251,7 +262,7 @@ for opt,arg in likwid.getopt(arg, {"g:", "c:", "f:", "l", "p", "h", "v", "m", "h
         os.exit(1)
     end
 end
-if not testDriver() then
+if likwid.getDriver() ~= "acpi-cpufreq" then
     print_stderr("The system does not use the acpi-cpufreq driver, other drivers are not usable with likwid-setFrequencies.")
     os.exit(1)
 end
@@ -281,33 +292,25 @@ end
 
 
 if printAvailGovs then
-    local govs = getAvailGovs(nil)
+    local govs = likwid.getAvailGovs(0)
     print_stdout("Available governors:")
-    print_stdout(table.concat(govs, ", "))
+    print_stdout(string.format("%s %s", table.concat(govs, " "), "turbo"))
 end
 
 if printAvailFreq then
+    local freqs, turbo = likwid.getAvailFreq(0)
     print_stdout("Available frequencies:")
-    local out = {}
-    local i = 1;
-    local freqs, turbo = getAvailFreq(nil)
-    if turbo ~= "0" then
-        table.insert(out, turbo)
-    end
-    for i=1,#freqs do
-        table.insert(out, freqs[i])
-    end
-
-    print_stdout(table.concat(out, " "))
+    print_stdout(string.format("%s %s", turbo, table.concat(freqs, " ")))
 end
 
 if printCurFreq then
     print_stdout("Current frequencies:")
-    local freqs = {}
-    local govs = {}
-    freqs, govs = getCurFreq()
     for i=1,#cpulist do
-        print_stdout(string.format("CPU %d: governor %12s frequency %5s GHz",cpulist[i],govs[cpulist[i]], freqs[cpulist[i]]))
+        gov = likwid.getGovernor(cpulist[i])
+        freq = tonumber(likwid.getCpuClockCurrent(cpulist[i]))/1E9
+        min = tonumber(likwid.getCpuClockMin(cpulist[i]))/1E9
+        max = tonumber(likwid.getCpuClockMax(cpulist[i]))/1E9
+        print_stdout(string.format("CPU %d: governor %12s min/cur/max %s/%s/%s GHz",cpulist[i], gov, min, freq, max))
     end
 end
 
@@ -315,50 +318,101 @@ if printAvailGovs or printAvailFreq or printCurFreq then
     os.exit(0)
 end
 
-if numthreads > 0 and not (frequency or governor) then
-    print_stderr("You need to set either a frequency or governor for the selected CPUs on commandline")
+if numthreads > 0 and not (frequency or min_freq or max_freq or governor) then
+    print_stderr("ERROR: You need to set either a frequency or governor for the selected CPUs on commandline")
     os.exit(1)
 end
 
-if frequency then
+if min_freq and max_freq and min_freq > max_freq then
+    print_stderr("ERROR: Minimal frequency higher than maximal frequency.")
+    os.exit(1)
+end
+if min_freq and max_freq and max_freq < min_freq then
+    print_stderr("ERROR: Maximal frequency below than minimal frequency.")
+    os.exit(1)
+end
+
+
+
+local availfreqs, availturbo = likwid.getAvailFreq(cpulist[i])
+if governor == "turbo" then
+    if not min_freq then
+        min_freq = availfreqs[#availfreqs]
+    end
+    if not max_freq or max_freq < availturbo then
+        max_freq = availturbo
+    end
+    frequency = availturbo
+end
+
+if min_freq then
     for i=1,#cpulist do
-        local freqs, turbo = getAvailFreq(cpulist[i])
         local valid_freq = false
-        for k,v in pairs(freqs) do
-            if (frequency == v) then
+        for k,v in pairs(availfreqs) do
+            if (min_freq == v) then
                 valid_freq = true
                 break
             end
         end
-        if frequency == turbo then
+        if min_freq == availturbo then
             valid_freq = true
         end
         if not valid_freq then
-            print_stderr(string.format("Frequency %s not available for CPU %d! Please select one of\n%s", frequency, cpulist[i], table.concat(freqs, ", ")))
+            print_stderr(string.format("ERROR: Selected min. frequency %s not available for CPU %d! Please select one of\n%s", min_freq, cpulist[i], table.concat(availfreqs, ", ")))
             os.exit(1)
         end
-    
-        local cmd = set_command .. " " .. tostring(cpulist[i]) .. " " .. tostring(tonumber(frequency)*1E6)
-        if governor then
-            cmd = cmd .. " " .. governor
+        local f = likwid.setCpuClockMin(cpulist[i], tonumber(min_freq)*1E6)
+    end
+end
+
+if max_freq then
+    for i=1,#cpulist do
+        local valid_freq = false
+        for k,v in pairs(availfreqs) do
+            if (max_freq == v) then
+                valid_freq = true
+                break
+            end
         end
-        if verbosity == 3 then
-            print_stdout("Execute: ".. cmd)
+        if max_freq == availturbo then
+            valid_freq = true
         end
-        local err = os.execute(cmd)
-        if err == false or err == nil then
-            print_stderr("Failed to set frequency for CPU "..tostring(cpulist[i]))
+        if not valid_freq then
+            print_stderr(string.format("ERROR: Selected max. frequency %s not available for CPU %d! Please select one of\n%s", max_freq, cpulist[i], table.concat(availfreqs, ", ")))
+            os.exit(1)
         end
+        local f = likwid.setCpuClockMax(cpulist[i], tonumber(max_freq)*1E6)
     end
-    if governor then
-        governor = nil
+end
+
+if frequency then
+    for i=1,#cpulist do
+        
+        local valid_freq = false
+        for k,v in pairs(availfreqs) do
+            if (frequency == v) then
+                valid_freq = true
+                break
+            end
+        end
+        if frequency == availturbo then
+            valid_freq = true
+        end
+        if not valid_freq then
+            print_stderr(string.format("ERROR: Selected frequency %s not available for CPU %d! Please select one of\n%s", frequency, cpulist[i], table.concat(availfreqs, ", ")))
+            os.exit(1)
+        end
+        local f = likwid.setCpuClockCurrent(cpulist[i], tonumber(frequency)*1E6)
     end
 end
 
 if governor then
-    local govs = getAvailGovs(nil)
-    local freqs, turbo = getAvailFreq(nil)
-    local cur_freqs, cur_govs = getCurFreq()
+    local govs = likwid.getAvailGovs(cpulist[1])
+    local cur_govs = {}
+    for i,c in pairs(cpulist) do
+        table.insert(cur_govs, likwid.getGovernor(cpulist[1]))
+    end
+    
     local valid_gov = false
     for k,v in pairs(govs) do
         if (governor == v) then
@@ -366,31 +420,21 @@ if governor then
             break
         end
     end
-    if governor == "turbo" and turbo ~= "0" then
+    local cur_freqs = {}
+    if governor == "turbo" and availturbo ~= "0" then
         valid_gov = true
+        governor = "performance"
         for i=1,#cpulist do
-            cur_freqs[cpulist[i]] = turbo
+            cur_freqs[cpulist[i]] = availturbo
         end
     end
     if not valid_gov then
-        print_stderr(string.format("Governor %s not available! Please select one of\n%s", governor, table.concat(govs, ", ")))
+        print_stderr(string.format("ERROR: Governor %s not available! Please select one of\n%s", governor, table.concat(govs, ", ")))
         os.exit(1)
     end
     for i=1,#cpulist do
-        if governor ~= cur_govs[cpulist[i]] then
-            local cmd = set_command .. " " .. tostring(cpulist[i]) .. " "
-            if governor == "turbo" then
-                cmd = cmd .. tostring(tonumber(turbo)*1E6)
-            else
-                cmd = cmd .. tostring(tonumber(cur_freqs[cpulist[i]])*1E6) .. " " .. governor
-            end
-            if verbosity == 3 then
-                print_stdout("Execute: ".. cmd)
-            end
-            local err = os.execute(cmd)
-            if err == false or err == nil then
-                print_stderr("Failed to set governor for CPU "..tostring(cpulist[i]))
-            end
+        if governor ~= cur_govs[i] then
+            local f = likwid.setGovernor(cpulist[i], governor)
         end
     end
 end
diff --git a/src/applications/likwid-topology.lua b/src/applications/likwid-topology.lua
index 2aabf0c..bbbbc87 100644
--- a/src/applications/likwid-topology.lua
+++ b/src/applications/likwid-topology.lua
@@ -7,8 +7,8 @@
  *      Description:  A application to determine the thread and cache topology
  *                    on x86 processors.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at gmail.com
  *      Project:  likwid
@@ -160,10 +160,10 @@ end
 table.insert(output_csv, likwid.hline)
 
 table.insert(output_csv, "STRUCT,Sockets,"..tostring(cputopo["numSockets"]))
-for socket=0,cputopo["numSockets"]-1 do
+for socket=0, #cputopo["topologyTree"] do
     csv_str = string.format("Socket %d:\t\t( ",cputopo["topologyTree"][socket]["ID"])
-    for core=0,cputopo["numCoresPerSocket"]-1 do
-        for thread=0, cputopo["numThreadsPerCore"]-1 do
+    for core = 0, #cputopo["topologyTree"][socket]["Childs"] do
+        for thread = 0, #cputopo["topologyTree"][socket]["Childs"][core]["Childs"] do
             csv_str = csv_str ..tostring(cputopo["topologyTree"][socket]["Childs"][core]["Childs"][thread]).. ","
         end
     end
@@ -188,7 +188,7 @@ for level=1,cputopo["numCacheLevels"] do
         else
             table.insert(output_csv, string.format("Size:\t\t\t%.0f MB",cputopo["cacheLevels"][level]["size"]/1048576))
         end
-        
+
         if (print_caches) then
             if (cputopo["cacheLevels"][level]["type"] == "DATACACHE") then
                 table.insert(output_csv, "Type:\t\t\tData cache")
@@ -199,7 +199,7 @@ for level=1,cputopo["numCacheLevels"] do
             table.insert(output_csv, string.format("Associativity:\t\t%d",cputopo["cacheLevels"][level]["associativity"]))
             table.insert(output_csv, string.format("Number of sets:\t\t%d",cputopo["cacheLevels"][level]["sets"]))
             table.insert(output_csv, string.format("Cache line size:\t%d",cputopo["cacheLevels"][level]["lineSize"]))
-            
+
             if (cputopo["cacheLevels"][level]["inclusive"] == 0) then
                 table.insert(output_csv, "Cache type:\t\tNon Inclusive")
             else
@@ -209,9 +209,9 @@ for level=1,cputopo["numCacheLevels"] do
         end
         local threads = cputopo["cacheLevels"][level]["threads"]
         str = "Cache groups:\t\t( "
-        for socket=0,cputopo["numSockets"]-1 do
-            for core=0,cputopo["numCoresPerSocket"]-1 do
-                for cpu=0,cputopo["numThreadsPerCore"]-1 do
+        for socket=0, #cputopo["topologyTree"] do
+            for core = 0, #cputopo["topologyTree"][socket]["Childs"] do
+                for cpu = 0, #cputopo["topologyTree"][socket]["Childs"][core]["Childs"] do
                     if (threads ~= 0) then
                         str = str .. cputopo["topologyTree"][socket]["Childs"][core]["Childs"][cpu] .. " "
                         threads = threads - 1
@@ -301,12 +301,12 @@ if print_graphical and not print_csv then
     print_stdout(likwid.sline)
     print_stdout("Graphical Topology")
     print_stdout(likwid.sline)
-    for socket=0,cputopo["numSockets"]-1 do
+    for socket=0, #cputopo["topologyTree"] do
         print_stdout(string.format("Socket %d:",cputopo["topologyTree"][socket]["ID"]))
         container = {}
-        for core=0,cputopo["numCoresPerSocket"]-1 do
+        for core = 0, #cputopo["topologyTree"][socket]["Childs"] do
             local tmpString = ""
-            for thread=0,cputopo["numThreadsPerCore"]-1 do
+            for thread = 0, #cputopo["topologyTree"][socket]["Childs"][core]["Childs"] do
                 if thread == 0 then
                     tmpString = tmpString .. tostring(cputopo["topologyTree"][socket]["Childs"][core]["Childs"][thread])
                 else
@@ -315,7 +315,7 @@ if print_graphical and not print_csv then
             end
             likwid.addSimpleAsciiBox(container, 1, core+1, tmpString)
         end
-        
+
         local columnCursor = 1
         local lineCursor = 2
         for cache=1,cputopo["numCacheLevels"] do
@@ -358,27 +358,32 @@ end
 
 if outfile then
     local suffix = ""
-    if string.match(outfile, "%.") then
+    if string.match(outfile, ".-[^\\/]-%.?([^%.\\/]*)$") then
         suffix = string.match(outfile, ".-[^\\/]-%.?([^%.\\/]*)$")
     end
     local command = "<INSTALLED_PREFIX>/share/likwid/filter/" .. suffix
     local tmpfile = outfile..".tmp"
     if suffix == "" then
         os.rename(tmpfile, outfile)
-    elseif suffix ~= "txt" and suffix ~= "csv" and likwid.access(command,"x") then
-        stdout_print("Cannot find filter script, save output in CSV format to file "..outfile)
+    elseif suffix ~= "txt" and suffix ~= "csv" and not likwid.access(command,"x") then
+        print_stderr("Cannot find filter script, save output in CSV format to file "..outfile)
         os.rename(tmpfile, outfile)
     else
         if suffix ~= "txt" and suffix ~= "csv" then
             command = command .." ".. tmpfile .. " topology"
-            local f = assert(io.popen(command))
+            local f = assert(io.popen(command), "r")
             if f ~= nil then
                 local o = f:read("*a")
                 if o:len() > 0 then
-                    stdout_print(string.format("Failed to executed filter script %s.",command))
+                    print_stderr(string.format("Failed to executed filter script %s.",command))
+                else
+                    os.rename(outfile.."."..suffix, outfile)
+                    if not likwid.access(tmpfile, "e") then
+                        os.remove(tmpfile)
+                    end
                 end
             else
-                stdout_print("Failed to call filter script, save output in CSV format to file "..outfile)
+                print_stderr("Failed to call filter script, save output in CSV format to file "..outfile)
                 os.rename(tmpfile, outfile)
                 os.remove(tmpfile)
             end
diff --git a/src/applications/likwid.lua b/src/applications/likwid.lua
index b184b10..bf9b424 100644
--- a/src/applications/likwid.lua
+++ b/src/applications/likwid.lua
@@ -5,8 +5,8 @@
  *
  *      Description:  Lua LIKWID interface library
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at gmail.com
  *      Project:  likwid
@@ -33,6 +33,8 @@ package.cpath = '<INSTALLED_LIBPREFIX>/?.so;' .. package.cpath
 require("liblikwid")
 require("math")
 
+if not math.tointeger then math.tointeger = function(x) return math.floor(tonumber(x)) or nil end end
+
 likwid.groupfolder = "<LIKWIDGROUPPATH>"
 
 likwid.version = <VERSION>
@@ -142,6 +144,21 @@ likwid.markerRegionTime = likwid_markerRegionTime
 likwid.markerRegionCount = likwid_markerRegionCount
 likwid.markerRegionResult = likwid_markerRegionResult
 likwid.markerRegionMetric = likwid_markerRegionMetric
+likwid.getCpuClockCurrent = likwid_getCpuClockCurrent
+likwid.setCpuClockCurrent = likwid_setCpuClockCurrent
+likwid.getCpuClockMin = likwid_getCpuClockMin
+likwid.setCpuClockMin = likwid_setCpuClockMin
+likwid.getCpuClockMax = likwid_getCpuClockMax
+likwid.setCpuClockMax = likwid_setCpuClockMax
+likwid.getGovernor = likwid_getGovernor
+likwid.setGovernor = likwid_setGovernor
+likwid.getDriver = likwid_getDriver
+likwid.getuid = likwid_getuid
+likwid.geteuid = likwid_geteuid
+likwid.setuid = likwid_setuid
+likwid.seteuid = likwid_seteuid
+likwid.setresuid = likwid_setresuid
+likwid.setresuser = likwid_setresuser
 
 likwid.cpuFeatures = { [0]="HW_PREFETCHER", [1]="CL_PREFETCHER", [2]="DCU_PREFETCHER", [3]="IP_PREFETCHER",
                         [4]="FAST_STRINGS", [5]="THERMAL_CONTROL", [6]="PERF_MON", [7]="FERR_MULTIPLEX",
@@ -276,9 +293,12 @@ local function get_spaces(str, min_space, max_space)
     local length = str:len()
     local back = 0
     local front = 0
-    back = math.ceil((max_space-str:len()) /2)
+    if tonumber(str) == nil then
+        back = math.ceil((max_space-str:len()) /2)
+    else
+        back = 0
+    end
     front = max_space - back - str:len()
-
     if (front < back) then
         local tmp = front
         front = back
@@ -361,10 +381,9 @@ local function printtable(tab)
     end
     hline = hline .. "+"
     print(hline)
-    
     str = "| "
     for i=1,nr_columns do
-        front, back = get_spaces(tostring(tab[i][1]), min_lengths[i],max_lengths[i])
+        front, back = get_spaces(tostring(tab[i][1]), min_lengths[i], max_lengths[i])
         str = str .. front.. tostring(tab[i][1]) ..back
         if i<nr_columns then
             str = str .. " | "
@@ -374,7 +393,6 @@ local function printtable(tab)
     end
     print(str)
     print(hline)
-    
     for j=2,nr_lines do
         str = "| "
         for i=1,nr_columns do
@@ -417,7 +435,6 @@ local function printcsv(tab, linelength)
         end
         print(str)
     end
-    
 end
 
 likwid.printcsv = printcsv
@@ -498,8 +515,6 @@ local function new_groupdata(eventString, fix_ctrs)
         if not eventString:match("FIXC2") and fix_ctrs == 3 then
             eventString = eventString..",CPU_CLK_UNHALTED_REF:FIXC2"
         end
-        
-        
     end
     gdata["EventString"] = eventString
     gdata["GroupString"] = eventString
@@ -531,7 +546,6 @@ local function get_groupdata(group)
         if (a == group) then group_exist = 1 end
     end
     if (group_exist == 0) then return new_groupdata(group, cpuinfo["perf_num_fixed_ctr"]) end
-    
     local f = io.open(likwid.groupfolder .. "/" .. cpuinfo["short_name"] .. "/" .. group .. ".txt", "r")
     if f == nil then
         f = io.open(os.getenv("HOME") .. "/.likwid/groups/" .. cpuinfo["short_name"] .."/" .. group .. ".txt", "r")
@@ -556,7 +570,6 @@ local function get_groupdata(group)
     nr_events = 1
     nr_metrics = 1
     for i, line in pairs(stringsplit(t,"\n")) do
-        
         if (parse_eventset or parse_metrics or parse_long) and line:len() == 0 then
             parse_eventset = false
             parse_metrics = false
@@ -566,7 +579,7 @@ local function get_groupdata(group)
         if line:match("^SHORT%a*") ~= nil then
             linelist = stringsplit(line, "%s+", nil, "%s+")
             table.remove(linelist, 1)
-            groupdata["ShortDescription"] = table.concat(linelist, " ")  
+            groupdata["ShortDescription"] = table.concat(linelist, " ")
         end
 
         if line:match("^EVENTSET$") ~= nil then
@@ -595,7 +608,6 @@ local function get_groupdata(group)
             groupdata["Events"][nr_events]["Counter"] = linelist[1]:gsub("^%s*(.-)%s*$", "%1")
             nr_events = nr_events + 1
         end
-        
         if parse_metrics and line:match("^METRICS$") == nil then
             linelist = stringsplit(line:gsub("^%s*(.-)%s*$", "%1"), "%s+", nil, "%s+")
             formula = linelist[#linelist]
@@ -605,23 +617,17 @@ local function get_groupdata(group)
             groupdata["Metrics"][nr_metrics]["formula"] = formula
             nr_metrics = nr_metrics + 1
         end
-        
         if parse_long and line:match("^LONG$") == nil then
             groupdata["LongDescription"] = groupdata["LongDescription"] .. "\n" .. line
         end
     end
     groupdata["LongDescription"] = groupdata["LongDescription"]:sub(2)
     groupdata["EventString"] = groupdata["EventString"]:sub(2)
-    
     return groupdata
-    
 end
 
 likwid.get_groupdata = get_groupdata
 
-
-
-
 local function parse_time(timestr)
     local duration = 0
     local s1,e1 = timestr:find("ms")
@@ -795,7 +801,6 @@ local function printOutput(results, metrics, cpulist, region, stats)
                     table.insert(tmpList, string.format("%e", likwid.markerRegionTime(region, c)))
                 end
             end
-            
             for e, event in pairs(group) do
                 local tmp = tostring(likwid.num2str(event[c]))
                 table.insert(tmpList, tmp)
@@ -979,7 +984,12 @@ likwid.getLastMetrics = getLastMetrics
 
 local function getMarkerResults(filename, cpulist)
     local cpuinfo = likwid.getCpuInfo()
-    likwid.readMarkerFile(filename)
+    local ret = likwid.readMarkerFile(filename)
+    if ret < 0 then
+        return nil, nil
+    elseif ret == 0 then
+        return {}, {}
+    end
     results = {}
     metrics = {}
     for i=1, likwid.markerNumRegions() do
@@ -999,9 +1009,9 @@ local function getMarkerResults(filename, cpulist)
             end
         end
         if likwid.getNumberOfMetrics(groupID) > 0 then
-            for k=1, likwid.getNumberOfMetrics(likwid.markerRegionGroup(i)) do
+            for k=1, likwid.getNumberOfMetrics(groupID) do
                 local metricName = likwid.getNameOfMetric(groupID, k)
-                metrics[i][groupID][k] = {}
+                metrics[i][likwid.markerRegionGroup(i)][k] = {}
                 for j=1, regionThreads do
                     metrics[i][groupID][k][j] = likwid.markerRegionMetric(i,k,j)
                 end
@@ -1080,7 +1090,6 @@ local function printAsciiBox(container)
             end
             innerboxline = innerboxline .. "+ "
         end
-        
         boxlabelline = "| "
         for j=1,numColumns do
             local offset = 0
@@ -1140,4 +1149,36 @@ end
 
 likwid.getMPIrank = getMPIrank
 
+
+local function llikwid_getAvailFreq(cpu)
+    local freq_str = likwid_getAvailFreq(cpu)
+    local freqs = {}
+    if not freq_str then
+        return freqs, 0
+    end
+    for item in freq_str:gmatch("[%d%.]+") do
+        table.insert(freqs, item)
+    end
+    local turbo = freqs[1]
+    table.remove(freqs, 1)
+    return freqs, turbo
+end
+
+likwid.getAvailFreq = llikwid_getAvailFreq
+
+local function llikwid_getAvailGovs(cpu)
+    local gov_str = likwid_getAvailGovs(cpu)
+    local govs = {}
+    if not gov_str then
+        return govs
+    end
+    for item in gov_str:gmatch("%a+") do
+        table.insert(govs, item)
+    end
+    return govs
+end
+
+likwid.getAvailGovs = llikwid_getAvailGovs
+
+
 return likwid
diff --git a/src/bitUtil.c b/src/bitUtil.c
index 6a3ddd5..8c8415a 100644
--- a/src/bitUtil.c
+++ b/src/bitUtil.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Utility routines manipulating bit arrays.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,15 +27,14 @@
  *
  * =======================================================================================
  */
+
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 
 #include <types.h>
 #include <bitUtil.h>
 
-/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
-
-
-
 /* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
 uint64_t
 field64(uint64_t value, int start, int length)
@@ -49,17 +48,17 @@ field32(uint32_t value, int start, int length)
     return (value >> start) & (~0U >> (32 - length));
 }
 
-uint32_t 
+uint32_t
 extractBitField(uint32_t inField, uint32_t width, uint32_t offset)
 {
     uint32_t bitMask;
     uint32_t outField;
 
-    if ((offset+width) == 32) 
+    if ((offset+width) == 32)
     {
         bitMask = (0xFFFFFFFF<<offset);
     }
-    else 
+    else
     {
         bitMask = (0xFFFFFFFF<<offset) ^ (0xFFFFFFFF<<(offset+width));
 
@@ -88,4 +87,3 @@ getBitFieldWidth(uint32_t number)
     return fieldWidth+1;  /* bsr returns the position, we want the width */
 }
 
-
diff --git a/src/calculator.c b/src/calculator.c
index b46132e..bb0a314 100644
--- a/src/calculator.c
+++ b/src/calculator.c
@@ -5,9 +5,13 @@
  *
  *      Description:  Infix calculator
  *
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
+ *
  *      Author:   Brandon Mills (bm), mills.brandont at gmail.com
+ *      Project:  likwid
  *
- *      Copyright (C) 2016 Brandon Mills
+ *      Copyright (C) Brandon Mills
  *
  *      Permission is hereby granted, free of charge, to any person obtaining a copy of this
  *      software and associated documentation files (the "Software"), to deal in the
@@ -32,8 +36,8 @@
  *
  *      Some changes done for the integration in LIKWID, see inline comments
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at gmail.com
@@ -56,6 +60,8 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -63,16 +69,22 @@
 #include <getopt.h>
 #include <calculator_stack.h>
 
+/* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
+
 #define bool char
 #define true 1
 #define false 0
-
 #define PI 3.141592653589793
 
 /* Added by Thomas Roehl (Thomas.Roehl at fau.de) to reduce reallocs by allocating a temporary
  * token for parsing as well as for transforming a number to a string.
  */
 #define MAXTOKENLENGTH 512
+#define MAXPRECISION 20
+#define DEFAULTPRECISION 5
+#define AUTOPRECISION -1
+#define FUNCTIONSEPARATOR "|"
+/* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
 
 typedef enum
 {
@@ -103,24 +115,24 @@ struct Preferences
     {
         bool degrees;
     } mode;
+    int precision;
+    int maxtokenlength;
 } prefs;
 
 typedef enum
 {
     divZero,
     overflow,
-    parenMismatch
+    parenMismatch,
+    inputMissing,
 } Error;
 
 typedef char* token;
-/* Added by Thomas Roehl (Thomas.Roehl at fau.de) to keep track of the
- * intermediate calculation results to free them in the end
- */
-token* calcTokens = NULL;
-int nrCalcTokens = 0;
 
 typedef double number;
 
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
 void raise(Error err)
 {
     char* msg;
@@ -135,11 +147,15 @@ void raise(Error err)
         case parenMismatch:
             msg = "Mismatched parentheses";
             break;
+        case inputMissing:
+            msg = "Function input missing";
+            break;
     }
     printf("\tError: %s\n", msg);
 }
 
-inline unsigned int toDigit(char ch)
+inline unsigned int
+toDigit(char ch)
 {
     return ch - '0';
 }
@@ -147,74 +163,222 @@ inline unsigned int toDigit(char ch)
 number buildNumber(token str)
 {
     number result = 0;
+    /*while(*str && *str != '.')
+    {
+        result = result * 10 + toDigit(*str++);
+    }*/
     result = strtod(str, NULL);
     return result;
 }
 
 token num2Str(number num)
 {
-    /* Increased precision by Thomas Roehl (Thomas.Roehl at fau.de) as required for LIKWID */
-    token str = (token)malloc((MAXTOKENLENGTH+1)*sizeof(char));
-    snprintf(str, 39, "%.20f", num);
+    int len = 0;
+    int precision = MAXPRECISION;
+    if (prefs.precision >= 0 && prefs.precision < precision)
+        precision = prefs.precision;
+    token str = (token)malloc(prefs.maxtokenlength*sizeof(char));
+    len = snprintf(str, prefs.maxtokenlength-1, "%.*f", precision, num);
+    if (prefs.precision == AUTOPRECISION)
+    {
+        while (str[len-1] == '0')
+        {
+            len = snprintf(str, prefs.maxtokenlength-1, "%.*f", --precision, num);
+        }
+    }
+
     return str;
 }
 
-
-
-inline number toRadians(number degrees)
+number
+toRadians(number degrees)
 {
     return degrees * PI / 180.0;
 }
 
-inline number toDegrees(number radians)
+number
+toDegrees(number radians)
 {
     return radians * 180.0 / PI;
 }
 
-token doFunc(token input, token function)
+token doFunc(Stack *s, token function)
 {
+    if (stackSize(s) == 0)
+    {
+        raise(inputMissing);
+        return "NaN";
+    }
+    else if (stackSize(s) == 1 && strcmp(stackTop(s), FUNCTIONSEPARATOR) == 0)
+    {
+        stackPop(s);
+        raise(inputMissing);
+        return "NaN";
+    }
+    token input = (token)stackPop(s);
     number num = buildNumber(input);
     number result = num;
+    number counter = 0;
 
-    if(strcmp(function, "abs") == 0)
+    if(strncmp(function, "abs", 3) == 0)
         result = fabs(num);
-    else if(strcmp(function, "floor") == 0)
+    else if(strncmp(function, "floor", 5) == 0)
         result = floor(num);
-    else if(strcmp(function, "ceil") == 0)
+    else if(strncmp(function, "ceil", 4) == 0)
         result = ceil(num);
-    else if(strcmp(function, "sin") == 0)
+    else if(strncmp(function, "sin", 3) == 0)
         result = !prefs.mode.degrees ? sin(num) : sin(toRadians(num));
-    else if(strcmp(function, "cos") == 0)
+    else if(strncmp(function, "cos", 3) == 0)
         result = !prefs.mode.degrees ? cos(num) : cos(toRadians(num));
-    else if(strcmp(function, "tan") == 0)
+    else if(strncmp(function, "tan", 3) == 0)
         result = !prefs.mode.degrees ? tan(num) : tan(toRadians(num));
-    else if(strcmp(function, "arcsin") == 0
-         || strcmp(function, "asin") == 0)
+    else if(strncmp(function, "arcsin", 6) == 0
+         || strncmp(function, "asin", 4) == 0)
         result = !prefs.mode.degrees ? asin(num) : toDegrees(asin(num));
-    else if(strcmp(function, "arccos") == 0
-         || strcmp(function, "acos") == 0)
+    else if(strncmp(function, "arccos", 6) == 0
+         || strncmp(function, "acos", 4) == 0)
         result = !prefs.mode.degrees ? acos(num) : toDegrees(acos(num));
-    else if(strcmp(function, "arctan") == 0
-         || strcmp(function, "atan") == 0)
+    else if(strncmp(function, "arctan", 6) == 0
+         || strncmp(function, "atan", 4) == 0)
         result = !prefs.mode.degrees ? atan(num) : toDegrees(atan(num));
-    else if(strcmp(function, "sqrt") == 0)
+    else if(strncmp(function, "sqrt", 4) == 0)
         result = sqrt(num);
-    else if(strcmp(function, "cbrt") == 0)
+    else if(strncmp(function, "cbrt", 4) == 0)
         result = cbrt(num);
-    else if(strcmp(function, "log") == 0)
+    else if(strncmp(function, "log", 3) == 0)
         result = log(num);
-    else if(strcmp(function, "exp") == 0)
+    else if(strncmp(function, "exp", 3) == 0)
         result = exp(num);
-    printf("Free %s\n", function);
-    free(function);
-    return num2Str(result);
+    else if(strncmp(function, "min", 3) == 0)
+    {
+        while (stackSize(s) > 0 && strcmp(stackTop(s), FUNCTIONSEPARATOR) != 0)
+        {
+            input = (token)stackPop(s);
+            num = buildNumber(input);
+            if (num < result)
+                result = num;
+        }
+    }
+    else if(strncmp(function, "max", 3) == 0)
+    {
+        while (stackSize(s) > 0 && strcmp(stackTop(s), FUNCTIONSEPARATOR) != 0)
+        {
+            input = (token)stackPop(s);
+            num = buildNumber(input);
+            if (num > result)
+                result = num;
+        }
+    }
+    else if(strncmp(function, "sum", 3) == 0)
+    {
+        while (stackSize(s) > 0  && strcmp(stackTop(s), FUNCTIONSEPARATOR) != 0)
+        {
+            input = (token)stackPop(s);
+            num = buildNumber(input);
+            result += num;
+        }
+    }
+    else if(strncmp(function, "avg", 3) == 0 ||
+            strncmp(function, "mean", 4) == 0)
+    {
+        // Result already initialized with first number
+        counter = 1;
+        while (stackSize(s) > 0  && strcmp(stackTop(s), FUNCTIONSEPARATOR) != 0)
+        {
+            input = (token)stackPop(s);
+            num = buildNumber(input);
+            result += num;
+            counter++;
+        }
+        result /= counter;
+    }
+    else if(strncmp(function, "median", 6) == 0)
+    {
+        // needed for sorting
+        Stack tmp, safe;
+        // Result already initialized with first number
+        counter = 1;
+        stackInit(&tmp, (stackSize(s) > 0 ? stackSize(s) : 1));
+        stackInit(&safe, (stackSize(s) > 0 ? stackSize(s) : 1));
+        // add first value to the later sorted stack
+        stackPush(&tmp, input);
+        while (stackSize(s) > 0  && strcmp(stackTop(s), FUNCTIONSEPARATOR) != 0)
+        {
+            input = (token)stackPop(s);
+            num = buildNumber(input);
+            // save all numbers larger as the stack value
+            while (stackSize(&tmp) > 0 && buildNumber(stackTop(&tmp)) < num)
+            {
+                stackPush(&safe, stackPop(&tmp));
+            }
+            // push value on the sorted stack
+            stackPush(&tmp, input);
+            // push all saved numbers back on the sorted stack
+            while (stackSize(&safe) > 0)
+            {
+                stackPush(&tmp, stackPop(&safe));
+            }
+            counter++;
+        }
+        stackFree(&safe);
+        // calculate the median index
+        counter = (number)(((int)counter+1)/2);
+        // pop all numbers until median index
+        while (counter > 1)
+        {
+            stackPop(&tmp);
+            counter--;
+        }
+        result = buildNumber(stackPop(&tmp));
+        // pop the remaining sorted stack
+        while (stackSize(&tmp) > 0)
+        {
+            stackPop(&tmp);
+        }
+        stackFree(&tmp);
+    }
+    else if(strncmp(function, "var", 3) == 0)
+    {
+        Stack tmp;
+        counter = 1;
+        // second stack to store values during calculation of mean
+        stackInit(&tmp, (stackSize(s) > 0 ? stackSize(s) : 1));
+        // push first value to temporary stack
+        stackPush(&tmp, input);
+        number mean = result;
+        while (stackSize(s) > 0  && strcmp(stackTop(s), FUNCTIONSEPARATOR) != 0)
+        {
+            input = (token)stackPop(s);
+            // push value to temporary stack
+            stackPush(&tmp, input);
+            num = buildNumber(input);
+            mean += num;
+            counter++;
+        }
+        // calculate mean
+        mean /= counter;
+        result = 0;
+        // calculate sum of squared differences
+        while (stackSize(&tmp) > 0)
+        {
+            input = (token)stackPop(&tmp);
+            num = buildNumber(input)-mean;
+            result += pow(num,2);
+        }
+        // determine variance
+        result /= counter;
+        stackFree(&tmp);
+    }
+    if (strcmp(stackTop(s), FUNCTIONSEPARATOR) == 0)
+        stackPop(s);
+    stackPush(s, num2Str(result));
+    return 0;
 }
 
-int doOp(token loperand, token op, token roperand, token *result)
+int doOp(Stack *s, token op)
 {
-    /* Added by Thomas Roehl (Thomas.Roehl at fau.de) to return
-     * errors from calculation like devide-by-zero, ... */
-    int err = 0;
+    token roperand = (token)stackPop(s);
+    token loperand = (token)stackPop(s);
     number lside = buildNumber(loperand);
     number rside = buildNumber(roperand);
     number ret;
@@ -234,9 +398,8 @@ int doOp(token loperand, token op, token roperand, token *result)
             {
                 if(rside == 0)
                 {
-                    /* Changed by Thomas Roehl */
-                    //raise(divZero);
-                    err = -1;
+                    raise(divZero);
+                    return -1;
                 }
                 else
                     ret = lside / rside;
@@ -246,9 +409,8 @@ int doOp(token loperand, token op, token roperand, token *result)
             {
                 if(rside == 0)
                 {
-                    /* Changed by Thomas Roehl */
-                    //raise(divZero);
-                    err = -1;
+                    raise(divZero);
+                    return -1;
                 }
                 else
                 {
@@ -268,11 +430,10 @@ int doOp(token loperand, token op, token roperand, token *result)
             }
             break;
     }
-    *result = num2Str(ret);
-    return err;
+    stackPush(s, num2Str(ret));
+    return 0;
 }
 
-
 Symbol type(char ch)
 {
     Symbol result;
@@ -380,26 +541,35 @@ Symbol type(char ch)
 
 bool isFunction(token tk)
 {
-    return (strcmp(tk, "abs") == 0
-        || strcmp(tk, "floor") == 0
-        || strcmp(tk, "ceil") == 0
-        || strcmp(tk, "sin") == 0
-        || strcmp(tk, "cos") == 0
-        || strcmp(tk, "tan") == 0
-        || strcmp(tk, "arcsin") == 0
-        || strcmp(tk, "arccos") == 0
-        || strcmp(tk, "arctan") == 0
-        || strcmp(tk, "asin") == 0
-        || strcmp(tk, "acos") == 0
-        || strcmp(tk, "atan") == 0
-        || strcmp(tk, "sqrt") == 0
-        || strcmp(tk, "cbrt") == 0
-        || strcmp(tk, "log") == 0
-        || strcmp(tk, "exp") == 0);
+    return (strncmp(tk, "abs", 3) == 0
+        || strncmp(tk, "floor", 5) == 0
+        || strncmp(tk, "ceil", 4) == 0
+        || strncmp(tk, "sin", 3) == 0
+        || strncmp(tk, "cos", 3) == 0
+        || strncmp(tk, "tan", 3) == 0
+        || strncmp(tk, "arcsin", 6) == 0
+        || strncmp(tk, "arccos", 6) == 0
+        || strncmp(tk, "arctan", 6) == 0
+        || strncmp(tk, "asin", 4) == 0
+        || strncmp(tk, "acos", 4) == 0
+        || strncmp(tk, "atan", 4) == 0
+        || strncmp(tk, "sqrt", 4) == 0
+        || strncmp(tk, "cbrt", 4) == 0
+        || strncmp(tk, "log", 3) == 0
+        || strncmp(tk, "min", 3) == 0
+        || strncmp(tk, "max", 3) == 0
+        || strncmp(tk, "sum", 3) == 0
+        || strncmp(tk, "avg", 3) == 0
+        || strncmp(tk, "mean", 4) == 0
+        || strncmp(tk, "median", 6) == 0
+        || strncmp(tk, "var", 3) == 0
+        || strncmp(tk, "exp", 3) == 0);
 }
 
 Symbol tokenType(token tk)
 {
+    if (!tk)
+        return invalid;
     Symbol ret = type(*tk);
     switch(ret)
     {
@@ -417,37 +587,33 @@ Symbol tokenType(token tk)
         case digit:
             ret = value;
             break;
+        default:
+            break;
     }
     return ret;
 }
 
 int tokenize(char *str, char *(**tokensRef))
 {
+    int i = 0;
     char** tokens = NULL;
     char** tmp = NULL;
     char* ptr = str;
     char ch = '\0';
     int numTokens = 0;
-    /* Added by Thomas Roehl (Thomas.Roehl at fau.de) to parse string
-     * in a temporary token to reduce frequent reallocs. newToken
-     * is replaced by tmpToken during parsing. Removed all reallocs
-     * and not required mallocs from the original code.
-     */
-    char* tmpToken = malloc((MAXTOKENLENGTH+1) * sizeof(char));
+    char* tmpToken = malloc((prefs.maxtokenlength+1) * sizeof(char));
     if (!tmpToken)
     {
         fprintf(stderr, "Malloc of temporary buffer failed\n");
         return 0;
     }
-    while(ch = *ptr++)
+    while((ch = *ptr++))
     {
         if(type(ch) == invalid) // Stop tokenizing when we encounter an invalid character
             break;
 
         token newToken = NULL;
-        /* Added by Thomas Roehl (Thomas.Roehl at fau.de)
-         * Prepare temporary token for next parsing step */
-        memset(tmpToken, '\0', MAXTOKENLENGTH+1);
+        tmpToken[0] = '\0';
         switch(type(ch))
         {
             case addop:
@@ -458,7 +624,8 @@ int tokenize(char *str, char *(**tokensRef))
                             || (tokenType(tokens[numTokens-1]) == addop
                                 || tokenType(tokens[numTokens-1]) == multop
                                 || tokenType(tokens[numTokens-1]) == expop
-                                || tokenType(tokens[numTokens-1]) == lparen)))
+                                || tokenType(tokens[numTokens-1]) == lparen
+                                || tokenType(tokens[numTokens-1]) == argsep)))
                     {
                         // Assemble an n-character (plus null-terminator) number token
                         {
@@ -471,36 +638,30 @@ int tokenize(char *str, char *(**tokensRef))
                                 //printf("Decimal\n");
                                 hasDecimal = true;
                                 len++;
-                                //newToken = (char*)malloc((len + 1) * sizeof(char));
                                 tmpToken[0] = '0';
                                 tmpToken[1] = '.';
                             }
                             else // Numbers that do not start with decimal
                             {
-                                //newToken = (char*)malloc((len + 1) * sizeof(char)); // Leave room for '\0'
                                 tmpToken[len-1] = ch;
                             }
 
                             // Assemble rest of number
                             for(; // Don't change len
                                 *ptr // There is a next character and it is not null
-                                && len <= MAXTOKENLENGTH
+                                && len <= prefs.maxtokenlength
                                 && (type(*ptr) == digit // The next character is a digit
                                      || ((type(*ptr) == decimal // Or the next character is a decimal
                                          && hasDecimal == 0)) // But we have not added a decimal
                                      || ((*ptr == 'E' || *ptr == 'e') // Or the next character is an exponent
                                          && hasExponent == false) // But we have not added an exponent yet
-                                     /* Added by Thomas Roehl (Thomas.Roehl at fau.de) to parse scientific notation
-                                      * with signed exponent correctly
-                                      */
-                                     || ((*ptr == '+' || *ptr == '-') && hasExponent == true)); // Exponent with sign
+                                || ((*ptr == '+' || *ptr == '-') && hasExponent == true)); // Exponent with sign
                                 ++len)
                             {
                                 if(type(*ptr) == decimal)
                                     hasDecimal = true;
                                 else if(*ptr == 'E' || *ptr == 'e')
                                     hasExponent = true;
-                                //newToken = (char*)realloc(newToken, (len + 1) * sizeof(char)); // Leave room for '\0'
                                 tmpToken[len] = *ptr++;
                             }
 
@@ -518,7 +679,6 @@ int tokenize(char *str, char *(**tokensRef))
             case argsep:
                 // Assemble a single-character (plus null-terminator) operation token
                 {
-                    //newToken = (char*)malloc(2 * sizeof(char)); // Leave room for '\0'
                     tmpToken[0] = ch;
                     tmpToken[1] = '\0';
                 }
@@ -536,42 +696,30 @@ int tokenize(char *str, char *(**tokensRef))
                         //printf("Decimal\n");
                         hasDecimal = true;
                         len++;
-                        //newToken = (char*)malloc((len + 1) * sizeof(char));
                         tmpToken[0] = '0';
                         tmpToken[1] = '.';
                     }
                     else // Numbers that do not start with decimal
                     {
-                        //newToken = (char*)malloc((len + 1) * sizeof(char)); // Leave room for '\0'
                         tmpToken[len-1] = ch;
                     }
 
                     // Assemble rest of number
-                    /* Added support for signed exponents in scientific notation
-                     * by Thomas Roehl (Thomas.Roehl at fau.de) as required for LIKWID */
                     for(; // Don't change len
                         *ptr // There is a next character and it is not null
-                        && len <= MAXTOKENLENGTH
+                        && len <= prefs.maxtokenlength
                         && (type(*ptr) == digit // The next character is a digit
                              || ((type(*ptr) == decimal // Or the next character is a decimal
-                                 && hasDecimal == false)) // But we have not added a decimal
+                                 && hasDecimal == 0)) // But we have not added a decimal
                              || ((*ptr == 'E' || *ptr == 'e') // Or the next character is an exponent
                                  && hasExponent == false) // But we have not added an exponent yet
-                             /* Added by Thomas Roehl (Thomas.Roehl at fau.de) to parse scientific notation
-                              * with signed exponent correctly
-                              */
                              || ((*ptr == '+' || *ptr == '-') && hasExponent == true)); // Exponent with sign
                         ++len)
                     {
                         if(type(*ptr) == decimal)
-                        {
                             hasDecimal = true;
-                        }
                         else if(*ptr == 'E' || *ptr == 'e')
-                        {
                             hasExponent = true;
-                        }
-                        //newToken = (char*)realloc(newToken, (len + 1) * sizeof(char)); // Leave room for '\0'
                         tmpToken[len] = *ptr++;
                     }
 
@@ -583,34 +731,48 @@ int tokenize(char *str, char *(**tokensRef))
                 // Assemble an n-character (plus null-terminator) text token
                 {
                     int len = 1;
-                    //newToken = (char*)malloc((len + 1) * sizeof(char)); // Leave room for '\0'
                     tmpToken[0] = ch;
-                    for(len = 1; *ptr && type(*ptr) == text && len <= MAXTOKENLENGTH; ++len)
+                    for(len = 1; *ptr && type(*ptr) == text && len <= prefs.maxtokenlength; ++len)
                     {
-                        //newToken = (char*)realloc(newToken, (len + 1) * sizeof(char)); // Leave room for '\0'
                         tmpToken[len] = *ptr++;
                     }
                     tmpToken[len] = '\0';
                 }
                 break;
+            default:
+                break;
         }
         // Add to list of tokens
-        if(tmpToken[0] != '\0')
+        if(tmpToken[0] != '\0' && strlen(tmpToken) > 0)
         {
             numTokens++;
             /*if(tokens == NULL) // First allocation
                 tokens = (char**)malloc(numTokens * sizeof(char*));
             else*/
-            /* Added by Thomas Roehl (Thomas.Roehl at fau.de)
-             * Allocate new output token and copy temporary token
-             */
+            
             newToken = malloc((strlen(tmpToken)+1) * sizeof(char));
+            if (!newToken)
+            {
+                numTokens--;
+                break;
+            }
             strcpy(newToken, tmpToken);
             newToken[strlen(tmpToken)] = '\0';
             tmp = (char**)realloc(tokens, numTokens * sizeof(char*));
             if (tmp == NULL)
             {
+                free(newToken);
+                if (tokens != NULL)
+                {
+                    for(i=0;i<numTokens-1;i++)
+                    {
+                        if (tokens[i] != NULL)
+                            free(tokens[i]);
+                    }
+                    free(tokens);
+                }
                 *tokensRef = NULL;
+                free(newToken);
                 free(tmpToken);
                 return 0;
             }
@@ -620,32 +782,38 @@ int tokenize(char *str, char *(**tokensRef))
         }
     }
     *tokensRef = tokens; // Send back out
-    /* Added by Thomas Roehl (Thomas.Roehl at fau.de) */
     free(tmpToken);
+    tmpToken = NULL;
     return numTokens;
 }
 
 bool leftAssoc(token op)
 {
-    bool ret;
+    bool ret = false;
     switch(tokenType(op))
     {
         case addop:
         case multop:
+        
             ret = true;
             break;
+        case function:
         case expop:
             ret = false;
             break;
+        default:
+            break;
     }
     return ret;
 }
 
 int precedence(token op1, token op2)
 {
-    int ret;
+    int ret = 0;
 
-    if(tokenType(op1) == tokenType(op2)) // Equal precedence
+    if (op2 == NULL)
+        ret = 1;
+    else if(tokenType(op1) == tokenType(op2)) // Equal precedence
         ret = 0;
     else if(tokenType(op1) == addop
             && (tokenType(op2) == multop || tokenType(op2) == expop)) // op1 has lower precedence
@@ -659,16 +827,17 @@ int precedence(token op1, token op2)
     else if(tokenType(op1) == expop
             && tokenType(op2) == multop) // op1 has higher precedence
         ret = 1;
-
+    else if (tokenType(op1) == function 
+            && (tokenType(op2) == addop || tokenType(op2) == multop || tokenType(op2) == expop || tokenType(op2) == lparen))
+        ret = 1;
+    else if ((tokenType(op1) == addop || tokenType(op1) == multop || tokenType(op1) == expop)
+            && tokenType(op2) == function)
+        ret = -1;
     return ret;
 }
 
-int evalStackPush(Stack *s, token val)
+void evalStackPush(Stack *s, token val)
 {
-    /* Added by Thomas Roehl (Thomas.Roehl at fau.de) to return
-     * calculation errors. Function now returns an int.
-     */
-    int ret = 0;
     if(prefs.display.postfix)
         printf("\t%s\n", val);
 
@@ -676,11 +845,11 @@ int evalStackPush(Stack *s, token val)
     {
         case function:
             {
-                token operand, res;
-                operand = (token)stackPop(s);
-                res = doFunc(operand, val);
-                //free(operand);
-                stackPush(s, res);
+                //token res;
+                //operand = (token)stackPop(s);
+                if (doFunc(s, val) < 0)
+                    return;
+                //stackPush(s, res);
             }
             break;
         case expop:
@@ -690,20 +859,13 @@ int evalStackPush(Stack *s, token val)
                 if(stackSize(s) >= 2)
                 {
                     // Pop two operands
-                    token l, r, res;
-                    r = (token)stackPop(s);
-                    l = (token)stackPop(s);
 
                     // Evaluate
-                    /* Added return value by Thomas Roehl (Thomas.Roehl at fau.de) */
-                    ret = doOp(l, val, r, &res);
+                    if (doOp(s, val) < 0)
+                        return;
+
                     // Push result
-                    stackPush(s, res);
-                    /* Added by Thomas Roehl (Thomas.Roehl at fau.de)
-                     * Keeping track of the intermediate results
-                     */
-                    calcTokens[nrCalcTokens] = res;
-                    nrCalcTokens++;
+                    //stackPush(s, res);
                 }
                 else
                 {
@@ -716,17 +878,18 @@ int evalStackPush(Stack *s, token val)
                 stackPush(s, val);
             }
             break;
+        default:
+            break;
     }
-    /* Return value by Thomas Roehl (Thomas.Roehl at fau.de) */
-    return ret;
 }
 
-int postfix(token *tokens, int numTokens, Stack *output)
+bool postfix(token *tokens, int numTokens, Stack *output)
 {
-    Stack operators;
+    Stack operators, intermediate;
     int i;
-    int err = 0;
-    stackInit(&operators, 2*numTokens);
+    bool err = false;
+    stackInit(&operators, numTokens);
+    stackInit(&intermediate, numTokens);
     for(i = 0; i < numTokens; i++)
     {
         // From Wikipedia/Shunting-yard_algorithm:
@@ -735,13 +898,23 @@ int postfix(token *tokens, int numTokens, Stack *output)
             case value:
                 {
                     // If the token is a number, then add it to the output queue.
-                    //printf("Adding number to output stack\n");
-                    err = evalStackPush(output, tokens[i]);
+                    //printf("Adding number %s to output stack\n", tokens[i]);
+                    evalStackPush(output, tokens[i]);
                 }
                 break;
             case function:
                 {
+                    while(stackSize(&operators) > 0
+                        && (tokenType(tokens[i]) != lparen)
+                        && ((precedence(tokens[i], (char*)stackTop(&operators)) <= 0)))
+                    {
+                        //printf("Moving operator %s from operator stack to output stack\n", (char*)stackTop(&operators));
+                        evalStackPush(output, stackPop(&operators));
+                        stackPush(&intermediate, stackTop(output));
+                    }
+
                     // If the token is a function token, then push it onto the stack.
+                    //printf("Adding operator %s to operator stack\n", tokens[i]);
                     stackPush(&operators, tokens[i]);
                 }
                 break;
@@ -756,24 +929,12 @@ int postfix(token *tokens, int numTokens, Stack *output)
                      */
                     while(stackSize(&operators) > 0
                         && tokenType((token)stackTop(&operators)) != lparen
-                        && stackSize(&operators) > 1
-                        && err == 0)
+                        && stackSize(&operators) > 1)
                     {
                         //printf("Moving operator from operator stack to output stack\n");
-                        token t = (token)stackPop(&operators);
-                        err = evalStackPush(output, t);
-                        //free(t);
+                        evalStackPush(output, stackPop(&operators));
+                        stackPush(&intermediate, stackTop(output));
                     }
-                    if(stackSize(&operators) > 0
-                        && tokenType((token)stackTop(&operators)) != lparen)
-                    {
-                        err = -1;
-                        /* Changed by Thomas Roehl */
-                        //raise(parenMismatch);
-                    }
-                    //printf("Removing left paren from operator stack\n");
-                    token t = stackPop(&operators); // Discard lparen
-                    //free(t);
                 }
                 break;
             case addop:
@@ -791,15 +952,13 @@ int postfix(token *tokens, int numTokens, Stack *output)
                     while(stackSize(&operators) > 0
                         && (tokenType((char*)stackTop(&operators)) == addop || tokenType((char*)stackTop(&operators)) == multop || tokenType((char*)stackTop(&operators)) == expop)
                         && ((leftAssoc(tokens[i]) && precedence(tokens[i], (char*)stackTop(&operators)) <= 0)
-                            || (!leftAssoc(tokens[i]) && precedence(tokens[i], (char*)stackTop(&operators)) < 0))
-                        && err == 0)
+                            || (!leftAssoc(tokens[i]) && precedence(tokens[i], (char*)stackTop(&operators)) < 0)))
                     {
-                        //printf("Moving operator from operator stack to output stack\n");
-                        token t = (token)stackPop(&operators);
-                        err = evalStackPush(output, t);
-                        //free(t);
+                        //printf("Moving operator %s from operator stack to output stack\n", (char*)stackTop(&operators));
+                        evalStackPush(output, stackPop(&operators));
+                        stackPush(&intermediate, stackTop(output));
                     }
-                    //printf("Adding operator to operator stack\n");
+                    //printf("Adding operator %s to operator stack\n", tokens[i]);
                     stackPush(&operators, tokens[i]);
                 }
                 break;
@@ -807,6 +966,8 @@ int postfix(token *tokens, int numTokens, Stack *output)
                 {
                     // If the token is a left paren, then push it onto the stack
                     //printf("Adding left paren to operator stack\n");
+                    if (tokenType(stackTop(&operators)) == function)
+                        stackPush(output, FUNCTIONSEPARATOR);
                     stackPush(&operators, tokens[i]);
                 }
                 break;
@@ -820,30 +981,31 @@ int postfix(token *tokens, int numTokens, Stack *output)
                      */
                     while(stackSize(&operators) > 0
                         && tokenType((token)stackTop(&operators)) != lparen
-                        && stackSize(&operators) > 1
-                        && err == 0)
+                        && stackSize(&operators) > 1)
                     {
-                        //printf("Moving operator from operator stack to output stack\n");
-                        token t = (token)stackPop(&operators);
-                        err = evalStackPush(output, t);
-                        //free(t);
+                        //printf("Moving operator %s from operator stack to output stack\n", (char*)stackTop(&operators));
+                        evalStackPush(output, stackPop(&operators));
+                        stackPush(&intermediate, stackTop(output));
                     }
                     if(stackSize(&operators) > 0
                         && tokenType((token)stackTop(&operators)) != lparen)
                     {
-                        err = -1;
-                        /* Changed by Thomas Roehl */
-                        //raise(parenMismatch);
+                        err = true;
+                        raise(parenMismatch);
                     }
                     //printf("Removing left paren from operator stack\n");
-                    token t = (token)stackPop(&operators);
-                    //stackPop(&operators); // Discard lparen
-                    //free(t);
+                    stackPop(&operators); // Discard lparen
+                    while (stackSize(&operators) > 0 && tokenType((token)stackTop(&operators)) == function)
+                    {
+                        //printf("Removing function from operator stack to output stack\n");
+                        evalStackPush(output, stackPop(&operators));
+                        stackPush(&intermediate, stackTop(output));
+                    }
                 }
                 break;
+            default:
+                break;
         }
-        if (err)
-            break;
     }
     /*
      * When there are no more tokens to read:
@@ -855,60 +1017,63 @@ int postfix(token *tokens, int numTokens, Stack *output)
     {
         if(tokenType((token)stackTop(&operators)) == lparen)
         {
-            /* Changed by Thomas Roehl */
-            //raise(parenMismatch);
-            err = -1;
+            raise(parenMismatch);
+            err = true;
         }
         //printf("Moving operator from operator stack to output stack\n");
-        token t = (token)stackPop(&operators);
-        err = evalStackPush(output, t);
-        //free(t);
+        evalStackPush(output, stackPop(&operators));
+        stackPush(&intermediate, stackTop(output));
     }
+    // pop result from intermediate stack
+    stackPop(&intermediate);
+    // free remaining intermediate results
+    while (stackSize(&intermediate) > 0)
+    {
+        stackPop(&intermediate);
+    }
+    if (err == true)
+    {
+        while (stackSize(&operators) > 0)
+        {
+            token s = stackPop(&operators);
+            //printf("Freeing %s from operators stack\n", s);
+            free(s);
+        }
+    }
+    stackFree(&intermediate);
     stackFree(&operators);
     return err;
 }
-
-
-
 /* Added by Thomas Roehl (Thomas.Roehl at fau.de) as interface for LIKWID */
-int calculate_infix(char* finfix, double *result)
+int
+calculate_infix(char* finfix, double *result)
 {
     int i;
     int ret = 0;
     *result = 0;
     token* tokens = NULL;
     Stack expr;
-    nrCalcTokens = 0;
+    prefs.maxtokenlength = MAXTOKENLENGTH;
+    prefs.precision = MAXPRECISION;
     int numTokens = tokenize(finfix, &tokens);
-    calcTokens = (token*)malloc(2 * numTokens * sizeof(token));
-    if (calcTokens == NULL)
-    {
-        ret = -1;
-        *result = NAN;
-    }
-    memset(calcTokens, 0, 2 * numTokens * sizeof(token));
-    stackInit(&expr, 2*numTokens);
+    stackInit(&expr, numTokens);
     ret = postfix(tokens, numTokens, &expr);
-    if ((stackSize(&expr) != 1) || (ret < 0))
+    if ((stackSize(&expr) != 1) || (ret == true))
     {
         *result = NAN;
         goto calcerror;
     }
     else
     {
+        for (i=0; i< numTokens; i++)
+        {
+            if (tokens[i] == stackTop(&expr))
+                tokens[i] = NULL;
+        }
         *result = strtod((char*)stackTop(&expr), NULL);
     }
     ret = 0;
 calcerror:
-    for (i=0;i<nrCalcTokens; i++)
-    {
-        if (calcTokens[i] != NULL)
-            free(calcTokens[i]);
-    }
-    if (calcTokens)
-        free(calcTokens);
-    calcTokens = NULL;
-    nrCalcTokens = 0;
     for (i=0;i<numTokens;i++)
     {
         if (tokens[i])
@@ -926,4 +1091,3 @@ calcerror:
     return ret;
 }
 
-
diff --git a/src/calculator_stack.c b/src/calculator_stack.c
index 43cae98..f8f35ca 100644
--- a/src/calculator_stack.c
+++ b/src/calculator_stack.c
@@ -5,12 +5,13 @@
  *
  *      Description:  Stack implementation for infix calculator
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Brandon Mills (bm), mills.brandont at gmail.com
+ *      Project:  likwid
  *
- *      Copyright (C) 2016 Brandon Mills
+ *      Copyright (C) Brandon Mills
  *
  *      Permission is hereby granted, free of charge, to any person obtaining a copy of this
  *      software and associated documentation files (the "Software"), to deal in the
diff --git a/src/configuration.c b/src/configuration.c
index b60dcaf..4fd1977 100644
--- a/src/configuration.c
+++ b/src/configuration.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Configuration file module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -28,6 +28,8 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -37,23 +39,34 @@
 #include <sys/stat.h>
 #include <errno.h>
 
-
-
 #include <configuration.h>
 
+/* #####   EXPORTED VARIABLES   ########################################### */
+
 Configuration config = {NULL,NULL,NULL,NULL,-1,MAX_NUM_THREADS,MAX_NUM_NODES};
 int init_config = 0;
 
+/* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
+
 static int daemonPath_len = 0;
 static int groupPath_len = 0;
 
-static int default_configuration(void)
+/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
+
+static int
+default_configuration(void)
 {
     int ret = 0;
     char filename[1024] = { [0 ... 1023] = '\0' };
     char *fptr = NULL;
     size_t len = 0;
     filename[0] = '\0';
+
+    groupPath_len = strlen(TOSTRING(GROUPPATH))+10;
+    config.groupPath = malloc(groupPath_len+1);
+    ret = snprintf(config.groupPath, groupPath_len, "%s", TOSTRING(GROUPPATH));
+    config.groupPath[ret] = '\0';
+#ifndef LIKWID_USE_PERFEVENT
     if (ACCESSMODE == 0)
     {
         config.daemonMode = ACCESSMODE_DIRECT;
@@ -61,13 +74,7 @@ static int default_configuration(void)
         return 0;
     }
     config.daemonMode = ACCESSMODE_DAEMON;
-    
-    groupPath_len = strlen(TOSTRING(GROUPPATH))+10;
-    config.groupPath = malloc(groupPath_len+1);
-    ret = snprintf(config.groupPath, groupPath_len, "%s", TOSTRING(GROUPPATH));
-    config.groupPath[ret] = '\0';
-    
-    
+
     FILE* fp = popen("which likwid-accessD 2>/dev/null | tr -d '\n'","r");
     if (fp == NULL)
     {
@@ -97,8 +104,9 @@ static int default_configuration(void)
             free(fptr);
         goto use_hardcoded;
     }
-    init_config = 1;
     fclose(fp);
+#endif
+    init_config = 1;
     return 0;
 use_hardcoded:
     ret = sprintf(filename,"%s", TOSTRING(ACCESSDAEMON));
@@ -111,13 +119,19 @@ use_hardcoded:
     }
     else
     {
-        ERROR_PLAIN_PRINT(Unable to get path to access daemon. Maybe your PATH environment variable does not contain the folder where you installed it or the file was moved away / not copied to that location?);
-        exit(EXIT_FAILURE);
+        if (getenv("LIKWID_NO_ACCESS") == NULL)
+        {
+            ERROR_PLAIN_PRINT(Unable to get path to access daemon. Maybe your PATH environment variable does not contain the folder where you installed it or the file was moved away / not copied to that location?);
+            exit(EXIT_FAILURE);
+        }
     }
     return 0;
 }
 
-int init_configuration(void)
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
+int
+init_configuration(void)
 {
     int i;
     FILE* fp;
@@ -152,7 +166,7 @@ int init_configuration(void)
     {
         sprintf(filename, "%s",preconfigured);
     }
-    
+
     if ((config.topologyCfgFileName == NULL) && (strlen(filename) == 0))
     {
         if (!access(TOSTRING(TOPOFILE), R_OK))
@@ -259,14 +273,14 @@ int init_configuration(void)
         }
     }
 
-
     init_config = 1;
 
     fclose(fp);
     return 0;
 }
 
-Configuration_t get_configuration(void)
+Configuration_t
+get_configuration(void)
 {
     if (init_config == 1)
     {
@@ -275,7 +289,8 @@ Configuration_t get_configuration(void)
     return NULL;
 }
 
-int destroy_configuration(void)
+int
+destroy_configuration(void)
 {
     if (init_config == 0)
     {
@@ -304,7 +319,8 @@ int destroy_configuration(void)
     return 0;
 }
 
-int config_setGroupPath(char* path)
+int
+config_setGroupPath(const char* path)
 {
     int ret = 0;
     struct stat st;
@@ -337,3 +353,4 @@ int config_setGroupPath(char* path)
     printf("Given path is no directory\n");
     return -ENOTDIR;
 }
+
diff --git a/src/cpuFeatures.c b/src/cpuFeatures.c
index 1c866ff..3348271 100644
--- a/src/cpuFeatures.c
+++ b/src/cpuFeatures.c
@@ -9,8 +9,8 @@
  *                  Allows to turn on and off the Hardware prefetcher
  *                  available.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -159,7 +159,9 @@ cpuFeatures_update(int cpu)
              (cpuid_info.model == BROADWELL_D) ||
              (cpuid_info.model == BROADWELL_E) ||
              (cpuid_info.model == SKYLAKE1) ||
-             (cpuid_info.model == SKYLAKE2))
+             (cpuid_info.model == SKYLAKE2) ||
+             (cpuid_info.model == KABYLAKE1) ||
+             (cpuid_info.model == KABYLAKE2))
     {
         TEST_FLAG_INV(FEAT_TURBO_MODE,38);
     }
@@ -184,6 +186,8 @@ cpuFeatures_update(int cpu)
             (cpuid_info.model == BROADWELL_E) ||
             (cpuid_info.model == SKYLAKE1) ||
             (cpuid_info.model == SKYLAKE2) ||
+            (cpuid_info.model == KABYLAKE1) ||
+            (cpuid_info.model == KABYLAKE2) ||
             (cpuid_info.model == ATOM_SILVERMONT_GOLD))
     {
         ret = HPMread(cpu, MSR_DEV, MSR_PREFETCH_ENABLE, &flags);
@@ -319,6 +323,8 @@ cpuFeatures_enable(int cpu, CpuFeature type, int print)
             (cpuid_info.model == BROADWELL_E) ||
             (cpuid_info.model == SKYLAKE1) ||
             (cpuid_info.model == SKYLAKE2) ||
+            (cpuid_info.model == KABYLAKE1) ||
+            (cpuid_info.model == KABYLAKE2) ||
             (cpuid_info.model == ATOM_SILVERMONT_GOLD))
     {
         reg = MSR_PREFETCH_ENABLE;
@@ -446,6 +452,8 @@ cpuFeatures_disable(int cpu, CpuFeature type, int print)
             (cpuid_info.model == BROADWELL_E) ||
             (cpuid_info.model == SKYLAKE1) ||
             (cpuid_info.model == SKYLAKE2) ||
+            (cpuid_info.model == KABYLAKE1) ||
+            (cpuid_info.model == KABYLAKE2) ||
             (cpuid_info.model == ATOM_SILVERMONT_GOLD))
     {
         reg = MSR_PREFETCH_ENABLE;
diff --git a/src/cpustring.c b/src/cpustring.c
index ed934ac..040ff76 100644
--- a/src/cpustring.c
+++ b/src/cpustring.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Parser for CPU selection strings
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -28,14 +28,18 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <math.h>
 
 #include <likwid.h>
 
+/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
 
-static int cpulist_sort(int* incpus, int* outcpus, int length)
+static int
+cpulist_sort(int* incpus, int* outcpus, int length)
 {
     int insert = 0;
     topology_init();
@@ -45,9 +49,9 @@ static int cpulist_sort(int* incpus, int* outcpus, int length)
         return -1;
     }
     int inner_loop = ceil((double)length/cpuid_topology->numThreadsPerCore);
-    for (int off=0;off < cpuid_topology->numThreadsPerCore;off++)
+    for (int off = 0; off < cpuid_topology->numThreadsPerCore; off++)
     {
-        for (int i=0; i<inner_loop;i++)
+        for (int i = 0; i < inner_loop; i++)
         {
             outcpus[insert] = incpus[(i*cpuid_topology->numThreadsPerCore)+off];
             insert++;
@@ -58,7 +62,8 @@ static int cpulist_sort(int* incpus, int* outcpus, int length)
     return insert;
 }
 
-static int cpulist_concat(int* cpulist, int startidx, int* addlist, int addlength)
+static int
+cpulist_concat(int* cpulist, int startidx, int* addlist, int addlength)
 {
     int count = 0;
     if (addlength <= 0)
@@ -73,7 +78,8 @@ static int cpulist_concat(int* cpulist, int startidx, int* addlist, int addlengt
     return count;
 }
 
-static int cpu_in_domain(int domainidx, int cpu)
+static int
+cpu_in_domain(int domainidx, int cpu)
 {
     affinity_init();
     AffinityDomains_t affinity = get_affinityDomains();
@@ -87,7 +93,8 @@ static int cpu_in_domain(int domainidx, int cpu)
     return 0;
 }
 
-static int cpuexpr_to_list(bstring bcpustr, bstring prefix, int* list, int length)
+static int
+cpuexpr_to_list(bstring bcpustr, bstring prefix, int* list, int length)
 {
     topology_init();
     CpuTopology_t cpuid_topology = get_cpuTopology();
@@ -124,8 +131,10 @@ list_done:
     return insert;
 }
 
-static int cpustr_to_cpulist_scatter(bstring bcpustr, int* cpulist, int length)
+static int
+cpustr_to_cpulist_scatter(bstring bcpustr, int* cpulist, int length)
 {
+    int max_procs = 0;
     topology_init();
     CpuTopology_t cpuid_topology = get_cpuTopology();
     affinity_init();
@@ -143,25 +152,42 @@ static int cpustr_to_cpulist_scatter(bstring bcpustr, int* cpulist, int length)
         }
         for (int i=0; i<affinity->numberOfAffinityDomains; i++)
         {
-            if (bstrchrp(affinity->domains[i].tag, cpustring[0], 0) != BSTR_ERR)
+            if (bstrchrp(affinity->domains[i].tag, cpustring[0], 0) != BSTR_ERR &&
+                affinity->domains[i].numberOfProcessors > 0)
             {
                 suitable[suitidx] = i;
                 suitidx++;
+                if (affinity->domains[i].numberOfProcessors > max_procs)
+                    max_procs = affinity->domains[i].numberOfProcessors;
             }
         }
-        int* sortedList = (int*) malloc(affinity->domains[suitable[0]].numberOfProcessors * sizeof(int));
-        if (!sortedList)
+        int** sLists = (int**) malloc(suitidx * sizeof(int*));
+        if (!sLists)
         {
             free(suitable);
             bcstrfree(cpustring);
             return -ENOMEM;
         }
-        for (int off=0;off<affinity->domains[suitable[0]].numberOfProcessors;off++)
+        for (int i = 0; i< suitidx; i++)
+        {
+            sLists[i] = (int*) malloc(max_procs * sizeof(int));
+            if (!sLists[i])
+            {
+                free(suitable);
+                for (int j=0; i<i; j++)
+                {
+                    free(sLists[j]);
+                }
+                bcstrfree(cpustring);
+                return -ENOMEM;
+            }
+            cpulist_sort(affinity->domains[suitable[i]].processorList, sLists[i], affinity->domains[suitable[i]].numberOfProcessors);
+        }
+        for (int off=0;off<max_procs;off++)
         {
             for(int i=0;i < suitidx; i++)
             {
-                cpulist_sort(affinity->domains[suitable[i]].processorList, sortedList, affinity->domains[suitable[i]].numberOfProcessors);
-                cpulist[insert] = sortedList[off];
+                cpulist[insert] = sLists[i][off];
                 insert++;
                 if (insert == length)
                     goto scatter_done;
@@ -169,7 +195,11 @@ static int cpustr_to_cpulist_scatter(bstring bcpustr, int* cpulist, int length)
         }
 scatter_done:
         bcstrfree(cpustring);
-        free(sortedList);
+        for (int i = 0; i< suitidx; i++)
+        {
+            free(sLists[i]);
+        }
+        free(sLists);
         free(suitable);
         return insert;
     }
@@ -177,7 +207,8 @@ scatter_done:
     return 0;
 }
 
-static int cpustr_to_cpulist_expression(bstring bcpustr, int* cpulist, int length)
+static int
+cpustr_to_cpulist_expression(bstring bcpustr, int* cpulist, int length)
 {
     topology_init();
     CpuTopology_t cpuid_topology = get_cpuTopology();
@@ -228,11 +259,11 @@ static int cpustr_to_cpulist_expression(bstring bcpustr, int* cpulist, int lengt
     int insert = 0;
     for (int i=0;i<count;i++)
     {
-        for (int j=0;j<chunk && offset+j<affinity->domains[domainidx].numberOfProcessors;j++)
+        for (int j=0; j<chunk && offset+j<affinity->domains[domainidx].numberOfProcessors;j++)
         {
             cpulist[insert] = affinity->domains[domainidx].processorList[offset + j];
             insert++;
-            if (insert == length)
+            if (insert == length || insert == count)
                 goto expression_done;
         }
         offset += stride;
@@ -252,7 +283,8 @@ expression_done:
     return insert;
 }
 
-static int cpustr_to_cpulist_logical(bstring bcpustr, int* cpulist, int length)
+static int
+cpustr_to_cpulist_logical(bstring bcpustr, int* cpulist, int length)
 {
     topology_init();
     CpuTopology_t cpuid_topology = get_cpuTopology();
@@ -264,7 +296,7 @@ static int cpustr_to_cpulist_logical(bstring bcpustr, int* cpulist, int length)
     struct bstrList* strlist;
     if (bstrchrp(bcpustr, 'L', 0) != 0)
     {
-        fprintf(stderr, "Not a valid CPU expression\n");
+        fprintf(stderr, "ERROR: Not a valid CPU expression\n");
         return 0;
     }
 
@@ -288,11 +320,12 @@ static int cpustr_to_cpulist_logical(bstring bcpustr, int* cpulist, int length)
     }
     if (domainidx < 0)
     {
-        fprintf(stderr, "Cannot find domain %s\n", bdata(bdomain));
+        fprintf(stderr, "ERROR: Cannot find domain %s\n", bdata(bdomain));
         bdestroy(bdomain);
         bdestroy(blist);
         return 0;
     }
+
     int *inlist = malloc(affinity->domains[domainidx].numberOfProcessors * sizeof(int));
     if (inlist == NULL)
     {
@@ -300,10 +333,16 @@ static int cpustr_to_cpulist_logical(bstring bcpustr, int* cpulist, int length)
         bdestroy(blist);
         return -ENOMEM;
     }
-    int ret = cpulist_sort(affinity->domains[domainidx].processorList, inlist, affinity->domains[domainidx].numberOfProcessors);
+
+    int ret = cpulist_sort(affinity->domains[domainidx].processorList,
+            inlist, affinity->domains[domainidx].numberOfProcessors);
 
     strlist = bsplit(blist, ',');
     int insert = 0;
+    int insert_offset = 0;
+    int inlist_offset = 0;
+    int inlist_idx = 0;
+    int require = 0;
     for (int i=0; i< strlist->qty; i++)
     {
         if (bstrchrp(strlist->entry[i], '-', 0) != BSTR_ERR)
@@ -312,27 +351,64 @@ static int cpustr_to_cpulist_logical(bstring bcpustr, int* cpulist, int length)
             indexlist = bsplit(strlist->entry[i], '-');
             if (atoi(bdata(indexlist->entry[0])) <= atoi(bdata(indexlist->entry[1])))
             {
-                for (int j=atoi(bdata(indexlist->entry[0])); j<=atoi(bdata(indexlist->entry[1]));j++)
+                require += atoi(bdata(indexlist->entry[1])) - atoi(bdata(indexlist->entry[0])) + 1;
+            }
+            else
+            {
+                require += atoi(bdata(indexlist->entry[0])) - atoi(bdata(indexlist->entry[1])) + 1;
+            }
+        }
+        else
+        {
+            require++;
+        }
+    }
+    if (require > ret && getenv("LIKWID_SILENT") == NULL)
+    {
+        fprintf(stderr,
+                "WARN: Selected affinity domain %s has only %d hardware threads, but selection string evaluates to %d threads.\n",
+                bdata(affinity->domains[domainidx].tag), ret, require);
+        fprintf(stderr, "      This results in multiple threads on the same hardware thread.\n");
+    }
+logical_redo:
+    for (int i=0; i< strlist->qty; i++)
+    {
+        if (bstrchrp(strlist->entry[i], '-', 0) != BSTR_ERR)
+        {
+            struct bstrList* indexlist;
+            indexlist = bsplit(strlist->entry[i], '-');
+            if (atoi(bdata(indexlist->entry[0])) <= atoi(bdata(indexlist->entry[1])))
+            {
+                for (int j=atoi(bdata(indexlist->entry[0])); j<=atoi(bdata(indexlist->entry[1])) && (insert_offset+insert < require);j++)
                 {
-                    cpulist[insert] = inlist[j];
+                    cpulist[insert_offset + insert] = inlist[inlist_idx % ret];
                     insert++;
-                    if (insert == length)
+                    inlist_idx++;
+                    if (insert == ret)
                     {
                         bstrListDestroy(indexlist);
-                        goto logical_done;
+                        if (insert == require)
+                            goto logical_done;
+                        else
+                            goto logical_redo;
                     }
                 }
             }
             else
             {
-                for (int j=atoi(bdata(indexlist->entry[0])); j>=atoi(bdata(indexlist->entry[1]));j--)
+                for (int j=atoi(bdata(indexlist->entry[0]));
+                        j>=atoi(bdata(indexlist->entry[1])) && (insert_offset+insert < require); j--)
                 {
-                    cpulist[insert] = inlist[j];
+                    cpulist[insert_offset + insert] = inlist[inlist_idx % ret];
                     insert++;
-                    if (insert == length)
+                    inlist_idx++;
+                    if (insert == ret)
                     {
                         bstrListDestroy(indexlist);
-                        goto logical_done;
+                        if (insert == require)
+                            goto logical_done;
+                        else
+                            goto logical_redo;
                     }
                 }
             }
@@ -340,11 +416,14 @@ static int cpustr_to_cpulist_logical(bstring bcpustr, int* cpulist, int length)
         }
         else
         {
-            cpulist[insert] = inlist[atoi(bdata(strlist->entry[i])) % ret];
+            cpulist[insert_offset + insert] = inlist[atoi(bdata(strlist->entry[i])) % ret];
             insert++;
-            if (insert == length)
+            if (insert == ret)
             {
-                goto logical_done;
+                if (insert == require)
+                    goto logical_done;
+                else
+                    goto logical_redo;
             }
         }
     }
@@ -353,12 +432,11 @@ logical_done:
     bdestroy(blist);
     bstrListDestroy(strlist);
     free(inlist);
-    return insert;
+    return require;
 }
 
-
-
-static int cpustr_to_cpulist_physical(bstring bcpustr, int* cpulist, int length)
+static int
+cpustr_to_cpulist_physical(bstring bcpustr, int* cpulist, int length)
 {
     topology_init();
     CpuTopology_t cpuid_topology = get_cpuTopology();
@@ -471,7 +549,10 @@ physical_done:
     return insert;
 }
 
-int cpustr_to_cpulist(char* cpustring, int* cpulist, int length)
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
+int
+cpustr_to_cpulist(const char* cpustring, int* cpulist, int length)
 {
     int insert = 0;
     int len = 0;
@@ -510,7 +591,9 @@ int cpustr_to_cpulist(char* cpustring, int* cpulist, int length)
         }
         else if (cpuid_topology->activeHWThreads < cpuid_topology->numHWThreads)
         {
-            fprintf(stdout, "INFO: You are running LIKWID in a cpuset with %d CPUs, only logical numbering allowed\n", cpuid_topology->activeHWThreads);
+            fprintf(stdout,
+                    "INFO: You are running LIKWID in a cpuset with %d CPUs, only logical numbering allowed\n",
+                    cpuid_topology->activeHWThreads);
             if (((bstrchrp(strlist->entry[i], 'N', 0) == 0) ||
                 (bstrchrp(strlist->entry[i], 'S', 0) == 0) ||
                 (bstrchrp(strlist->entry[i], 'C', 0) == 0) ||
@@ -558,7 +641,8 @@ int cpustr_to_cpulist(char* cpustring, int* cpulist, int length)
     return insert;
 }
 
-int nodestr_to_nodelist(char* nodestr, int* nodes, int length)
+int
+nodestr_to_nodelist(const char* nodestr, int* nodes, int length)
 {
     int ret = 0;
     bstring prefix = bformat("M");
@@ -569,7 +653,8 @@ int nodestr_to_nodelist(char* nodestr, int* nodes, int length)
     return ret;
 }
 
-int sockstr_to_socklist(char* sockstr, int* sockets, int length)
+int
+sockstr_to_socklist(const char* sockstr, int* sockets, int length)
 {
     int ret = 0;
     bstring prefix = bformat("S");
@@ -579,3 +664,4 @@ int sockstr_to_socklist(char* sockstr, int* sockets, int length)
     bdestroy(prefix);
     return ret;
 }
+
diff --git a/src/frequency.c b/src/frequency.c
new file mode 100644
index 0000000..f53d53c
--- /dev/null
+++ b/src/frequency.c
@@ -0,0 +1,438 @@
+/*
+ * =======================================================================================
+ *
+ *      Filename:  frequency.c
+ *
+ *      Description:  Module implementing an interface for frequency manipulation
+ *
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
+ *
+ *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
+ *                Jan Treibig (jt), jan.treibig at gmail.com
+ *      Project:  likwid
+ *
+ *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *
+ *      This program is free software: you can redistribute it and/or modify it under
+ *      the terms of the GNU General Public License as published by the Free Software
+ *      Foundation, either version 3 of the License, or (at your option) any later
+ *      version.
+ *
+ *      This program is distributed in the hope that it will be useful, but WITHOUT ANY
+ *      WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ *      PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License along with
+ *      this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * =======================================================================================
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <math.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+
+#include <bstrlib.h>
+#include <likwid.h>
+#include <error.h>
+
+#include <frequency.h>
+
+char* daemon_path = TOSTRING(INSTALL_PREFIX) "/sbin/likwid-setFreq";
+
+
+enum  {
+    ACPICPUFREQ,
+    INTELPSTATE,
+    PPCCPUFREQ,
+} freq_driver;
+
+
+
+uint64_t freq_getCpuClockCurrent(const int cpu_id )
+{
+    FILE *f = NULL;
+    char cmd[256];
+    char buff[256];
+    char* eptr = NULL;
+    uint64_t clock = 0x0ULL;
+
+    sprintf(buff, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq", cpu_id);
+    f = fopen(buff, "r");
+    if (f == NULL) {
+        fprintf(stderr, "Unable to open path %s for reading\n", buff);
+        return 0;
+    }
+    eptr = fgets(cmd, 256, f);
+    if (eptr != NULL)
+    {
+        clock = strtoull(cmd, NULL, 10);
+    }
+    fclose(f);
+    return clock * 1E3;
+}
+
+uint64_t freq_setCpuClockCurrent(const int cpu_id, const uint64_t freq)
+{
+    FILE *fpipe = NULL;
+    char cmd[256];
+    char buff[256];
+    uint64_t cur = 0x0ULL;
+    char* drv = freq_getDriver(cpu_id);
+    if (strcmp(drv, "intel_pstate") == 0)
+    {
+        fprintf(stderr, "CPUfreq driver intel_pstate not supported\n");
+        free(drv);
+        return 0x0ULL;
+    }
+    free(drv);
+    cur = freq_getCpuClockCurrent(cpu_id);
+    if (cur == freq)
+    {
+        return cur;
+    }
+
+    sprintf(buff, "%s", daemon_path);
+    if (access(buff, X_OK))
+    {
+        fprintf(stderr, "Daemon %s not executable", buff);
+        return 0;
+    }
+
+    sprintf(cmd, "%s %d cur %lu", daemon_path, cpu_id, freq);
+    if ( !(fpipe = (FILE*)popen(cmd,"r")) )
+    {  // If fpipe is NULL
+        fprintf(stderr, "Problems setting cpu frequency of CPU %d", cpu_id);
+        return 0;
+    }
+    if (pclose(fpipe))
+        return 0;
+
+    return freq;
+}
+
+uint64_t freq_getCpuClockMax(const int cpu_id )
+{
+    FILE *f = NULL;
+    char cmd[256];
+    char buff[256];
+    char* eptr = NULL;
+    uint64_t clock = 0x0ULL;
+
+    sprintf(buff, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_max_freq", cpu_id);
+    f = fopen(buff, "r");
+    if (f == NULL) {
+        fprintf(stderr, "Unable to open path %s for reading\n", buff);
+        return 0;
+    }
+    eptr = fgets(cmd, 256, f);
+    if (eptr != NULL)
+    {
+        clock = strtoull(cmd, NULL, 10);
+    }
+    fclose(f);
+    return clock *1E3;
+}
+
+uint64_t freq_setCpuClockMax(const int cpu_id, const uint64_t freq)
+{
+    FILE *fpipe = NULL;
+    char cmd[256];
+    char buff[256];
+    uint64_t cur = 0x0ULL;
+    char* drv = freq_getDriver(cpu_id);
+    if (strcmp(drv, "intel_pstate") == 0)
+    {
+        fprintf(stderr, "CPUfreq driver intel_pstate not supported\n");
+        free(drv);
+        return 0x0ULL;
+    }
+    free(drv);
+    cur = freq_getCpuClockMax(cpu_id);
+    if (cur == freq)
+    {
+        return cur;
+    }
+
+    sprintf(buff, "%s", daemon_path);
+    if (access(buff, X_OK))
+    {
+        fprintf(stderr, "Daemon %s not executable", buff);
+        return 0;
+    }
+
+    sprintf(cmd, "%s %d max %lu", daemon_path, cpu_id, freq);
+    if ( !(fpipe = (FILE*)popen(cmd,"r")) )
+    {  // If fpipe is NULL
+        fprintf(stderr, "Problems setting cpu frequency of CPU %d", cpu_id);
+        return 0;
+    }
+    if (pclose(fpipe))
+        return 0;
+
+    return freq;
+}
+
+uint64_t freq_getCpuClockMin(const int cpu_id )
+{
+
+    uint64_t clock = 0x0ULL;
+    FILE *f = NULL;
+    char cmd[256];
+    char buff[256];
+    char* eptr = NULL;
+
+    sprintf(buff, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_min_freq", cpu_id);
+    f = fopen(buff, "r");
+    if (f == NULL) {
+        fprintf(stderr, "Unable to open path %s for reading\n", buff);
+        return 0;
+    }
+    eptr = fgets(cmd, 256, f);
+    if (eptr != NULL)
+    {
+        clock = strtoull(cmd, NULL, 10);
+    }
+    fclose(f);
+    return clock *1E3;
+}
+
+uint64_t freq_setCpuClockMin(const int cpu_id, const uint64_t freq)
+{
+    FILE *fpipe = NULL;
+    char cmd[256];
+    char buff[256];
+    uint64_t cur = 0x0ULL;
+    char* drv = freq_getDriver(cpu_id);
+    if (strcmp(drv, "intel_pstate") == 0)
+    {
+        fprintf(stderr, "CPUfreq driver intel_pstate not supported\n");
+        free(drv);
+        return 0x0ULL;
+    }
+    free(drv);
+    cur = freq_getCpuClockMin(cpu_id);
+    if (cur == freq)
+    {
+        return cur;
+    }
+
+    sprintf(buff, "%s", daemon_path);
+    if (access(buff, X_OK))
+    {
+        fprintf(stderr, "Daemon %s not executable", buff);
+        return 0;
+    }
+
+    sprintf(cmd, "%s %d min %lu", daemon_path, cpu_id, freq);
+    if ( !(fpipe = (FILE*)popen(cmd,"r")) )
+    {  // If fpipe is NULL
+        fprintf(stderr, "Problems setting cpu frequency of CPU %d", cpu_id);
+        return 0;
+    }
+    if (pclose(fpipe))
+        return 0;
+
+    return freq;
+}
+
+char * freq_getGovernor(const int cpu_id )
+{
+    FILE *f = NULL;
+    char cmd[256];
+    char buff[256];
+    char* eptr = NULL, *sptr = NULL;
+
+    sprintf(buff, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", cpu_id);
+    f = fopen(buff, "r");
+    if (f == NULL) {
+        fprintf(stderr, "Unable to open path %s for reading\n", buff);
+        return NULL;
+    }
+    eptr = fgets(cmd, 256, f);
+    if (eptr != NULL)
+    {
+        bstring bbuff = bfromcstr(cmd);
+        btrimws(bbuff);
+        eptr = NULL;
+        eptr = malloc((blength(bbuff)+1) * sizeof(char));
+        if (eptr == NULL)
+        {
+            return NULL;
+        }
+        sptr = bdata(bbuff);
+        strcpy(eptr, sptr);
+        return eptr;
+    }
+    return NULL;
+}
+
+int freq_setGovernor(const int cpu_id, const char* gov)
+{
+    FILE *fpipe = NULL;
+    char cmd[256];
+    char buff[256];
+    char* drv = freq_getDriver(cpu_id);
+    if (strcmp(drv, "intel_pstate") == 0)
+    {
+        fprintf(stderr, "CPUfreq driver intel_pstate not supported\n");
+        free(drv);
+        return 0;
+    }
+    free(drv);
+    sprintf(buff, "%s", daemon_path);
+    if (access(buff, X_OK))
+    {
+        fprintf(stderr, "Daemon %s not executable", buff);
+        return 0;
+    }
+
+    sprintf(cmd, "%s %d gov %s", daemon_path, cpu_id, gov);
+    if ( !(fpipe = (FILE*)popen(cmd,"r")) )
+    {  // If fpipe is NULL
+        fprintf(stderr, "Problems setting cpu frequency of CPU %d", cpu_id);
+        return 0;
+    }
+    if (pclose(fpipe))
+        return 0;
+    return 1;
+}
+
+char * freq_getAvailFreq(const int cpu_id )
+{
+    int i, j, k;
+    FILE *f = NULL;
+    char cmd[256];
+    char buff[256];
+    char tmp[10];
+    char *eptr = NULL, *rptr = NULL, *sptr = NULL;
+    double d = 0.0;
+    bstring bbuff;
+
+    sprintf(buff, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_available_frequencies", cpu_id);
+    f = fopen(buff, "r");
+    if (f == NULL)
+    {
+        fprintf(stderr, "Unable to open path %s for reading\n", buff);
+        return NULL;
+    }
+    rptr = fgets(buff, 256, f);
+    if (rptr != NULL)
+    {
+        struct bstrList * freq_list;
+        bbuff = bfromcstr(buff);
+        btrimws(bbuff);
+        DEBUG_PRINT(DEBUGLEV_DETAIL, Result: %s, bdata(bbuff));
+
+        freq_list = bsplit(bbuff, ' ');
+        eptr = malloc(freq_list->qty * 10 * sizeof(char));
+        if (eptr == NULL)
+        {
+            fclose(f);
+            return NULL;
+        }
+        sptr = bdata(freq_list->entry[0]);
+        d = strtod(sptr, NULL);
+        j = sprintf(eptr, "%.3f", d * 1E-6);
+        for (i=1; i< freq_list->qty; i++)
+        {
+            sptr = bdata(freq_list->entry[i]);
+            d = strtod(sptr, NULL);
+            sprintf(tmp, " %.3f", d * 1E-6);
+            for (k= strlen(tmp)-1; k >= 0; k--)
+            {
+                if (tmp[k] != '0') break;
+                if (tmp[k] == '0' && k > 0 && tmp[k-1] != '.') tmp[k] = '\0';
+            }
+            j+= sprintf(&(eptr[j]), "%s", tmp);
+        }
+        bstrListDestroy(freq_list);
+    }
+    fclose(f);
+    return eptr;
+}
+
+char * freq_getAvailGovs(const int cpu_id )
+{
+    int i, j, k;
+    FILE *f = NULL;
+    char cmd[256];
+    char buff[256];
+    char tmp[10];
+    char* eptr = NULL, *rptr = NULL;
+    bstring bbuff;
+
+    sprintf(buff, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_available_governors", cpu_id);
+    f = fopen(buff, "r");
+    if (f == NULL)
+    {
+        fprintf(stderr, "Unable to open path %s for reading\n", buff);
+        return NULL;
+    }
+    rptr = fgets(buff, 256, f);
+    if (rptr != NULL)
+    {
+        struct bstrList * freq_list;
+        bbuff = bfromcstr(buff);
+        btrimws(bbuff);
+        freq_list = bsplit(bbuff, ' ');
+        k = 0;
+        for (i=0;i < freq_list->qty; i++)
+        {
+            k += blength(freq_list->entry[i]);
+        }
+        eptr = malloc((k+1) * sizeof(char));
+        if (eptr == NULL)
+        {
+            fclose(f);
+            return NULL;
+        }
+        j = sprintf(eptr, "%s", bdata(freq_list->entry[0]));
+
+        for (i=1; i< freq_list->qty; i++)
+        {
+            j += sprintf(&(eptr[j]), " %s", bdata(freq_list->entry[i]));
+        }
+        bstrListDestroy(freq_list);
+    }
+    fclose(f);
+    return eptr;
+}
+
+char * freq_getDriver(const int cpu_id )
+{
+    FILE *f = NULL;
+    char cmd[256];
+    char buff[256];
+    char* eptr = NULL, *rptr = NULL;
+    bstring bbuff;
+
+    sprintf(buff, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", cpu_id);
+    f = fopen(buff, "r");
+    if (f == NULL)
+    {
+        fprintf(stderr, "Unable to open path %s for reading\n", buff);
+        return NULL;
+    }
+    rptr = fgets(buff, 256, f);
+    if (rptr != NULL)
+    {
+        bbuff = bfromcstr(buff);
+        btrimws(bbuff);
+        eptr = malloc((strlen(buff)+1) * sizeof(char));
+        if (eptr == NULL)
+        {
+            fclose(f);
+            return NULL;
+        }
+        sprintf(eptr, "%s", bdata(bbuff));
+    }
+    fclose(f);
+    return eptr;
+}
diff --git a/src/hashTable.c b/src/hashTable.c
index d3bc9ab..07f64ec 100644
--- a/src/hashTable.c
+++ b/src/hashTable.c
@@ -6,8 +6,8 @@
  *      Description: Hashtable implementation based on SGLIB.
  *                   Used for Marker API result handling.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -29,6 +29,8 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -41,18 +43,21 @@
 #include <hashTable.h>
 #include <likwid.h>
 
+/* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
+
 typedef struct {
     pthread_t tid;
     uint32_t coreId;
+    uint32_t hashIndex;
     GHashTable* hashTable;
 } ThreadList;
 
-
 static ThreadList* threadList[MAX_NUM_THREADS];
 
-/* ======================================================================== */
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
 
-void hashTable_init()
+void
+hashTable_init()
 {
     for (int i=0; i<MAX_NUM_THREADS; i++)
     {
@@ -60,7 +65,8 @@ void hashTable_init()
     }
 }
 
-void hashTable_initThread(int coreID)
+void
+hashTable_initThread(int coreID)
 {
     ThreadList* resPtr = threadList[coreID];
     /* check if thread was already initialized */
@@ -70,12 +76,14 @@ void hashTable_initThread(int coreID)
         /* initialize structure */
         resPtr->tid =  pthread_self();
         resPtr->coreId  = coreID;
+        resPtr->hashIndex = 0;
         resPtr->hashTable = g_hash_table_new(g_str_hash, g_str_equal);
         threadList[coreID] = resPtr;
     }
 }
 
-int hashTable_get(bstring label, LikwidThreadResults** resEntry)
+int
+hashTable_get(bstring label, LikwidThreadResults** resEntry)
 {
     int coreID = likwid_getProcessorId();
     ThreadList* resPtr = threadList[coreID];
@@ -87,6 +95,7 @@ int hashTable_get(bstring label, LikwidThreadResults** resEntry)
         /* initialize structure */
         resPtr->tid =  pthread_self();
         resPtr->coreId  = coreID;
+        resPtr->hashIndex = 0;
         resPtr->hashTable = g_hash_table_new(g_str_hash, g_str_equal);
         threadList[coreID] = resPtr;
     }
@@ -100,6 +109,7 @@ int hashTable_get(bstring label, LikwidThreadResults** resEntry)
         (*resEntry)->label = bstrcpy (label);
         (*resEntry)->time = 0.0;
         (*resEntry)->count = 0;
+        (*resEntry)->index = resPtr->hashIndex++;
         for (int i=0; i< NUM_PMC; i++)
         {
             (*resEntry)->PMcounters[i] = 0.0;
@@ -115,7 +125,8 @@ int hashTable_get(bstring label, LikwidThreadResults** resEntry)
     return coreID;
 }
 
-void hashTable_finalize(int* numThreads, int* numRegions, LikwidResults** results)
+void
+hashTable_finalize(int* numThreads, int* numRegions, LikwidResults** results)
 {
     int threadId = 0;
     uint32_t numberOfThreads = 0;
@@ -143,7 +154,8 @@ void hashTable_finalize(int* numThreads, int* numRegions, LikwidResults** result
     (*results) = (LikwidResults*) malloc(numberOfRegions * sizeof(LikwidResults));
     if (!(*results))
     {
-        fprintf(stderr, "Failed to allocate %lu bytes for the results\n", numberOfRegions * sizeof(LikwidResults));
+        fprintf(stderr, "Failed to allocate %lu bytes for the results\n",
+                numberOfRegions * sizeof(LikwidResults));
     }
     else
     {
@@ -152,25 +164,29 @@ void hashTable_finalize(int* numThreads, int* numRegions, LikwidResults** result
             (*results)[i].time = (double*) malloc(numberOfThreads * sizeof(double));
             if (!(*results)[i].time)
             {
-                fprintf(stderr, "Failed to allocate %lu bytes for the time storage\n", numberOfThreads * sizeof(double));
+                fprintf(stderr, "Failed to allocate %lu bytes for the time storage\n",
+                        numberOfThreads * sizeof(double));
                 break;
             }
             (*results)[i].count = (uint32_t*) malloc(numberOfThreads * sizeof(uint32_t));
             if (!(*results)[i].count)
             {
-                fprintf(stderr, "Failed to allocate %lu bytes for the count storage\n", numberOfThreads * sizeof(uint32_t));
+                fprintf(stderr, "Failed to allocate %lu bytes for the count storage\n",
+                        numberOfThreads * sizeof(uint32_t));
                 break;
             }
             (*results)[i].cpulist = (int*) malloc(numberOfThreads * sizeof(int));
             if (!(*results)[i].count)
             {
-                fprintf(stderr, "Failed to allocate %lu bytes for the cpulist storage\n", numberOfThreads * sizeof(int));
+                fprintf(stderr, "Failed to allocate %lu bytes for the cpulist storage\n",
+                        numberOfThreads * sizeof(int));
                 break;
             }
             (*results)[i].counters = (double**) malloc(numberOfThreads * sizeof(double*));
             if (!(*results)[i].counters)
             {
-                fprintf(stderr, "Failed to allocate %lu bytes for the counter result storage\n", numberOfThreads * sizeof(double*));
+                fprintf(stderr, "Failed to allocate %lu bytes for the counter result storage\n",
+                        numberOfThreads * sizeof(double*));
                 break;
             }
 
@@ -182,7 +198,8 @@ void hashTable_finalize(int* numThreads, int* numRegions, LikwidResults** result
                 (*results)[i].counters[j] = (double*) malloc(NUM_PMC * sizeof(double));
                 if (!(*results)[i].counters)
                 {
-                    fprintf(stderr, "Failed to allocate %lu bytes for the counter result storage for thread %d\n", NUM_PMC * sizeof(double), j);
+                    fprintf(stderr, "Failed to allocate %lu bytes for the counter result storage for thread %d\n",
+                            NUM_PMC * sizeof(double), j);
                     break;
                 }
                 else
@@ -197,7 +214,6 @@ void hashTable_finalize(int* numThreads, int* numRegions, LikwidResults** result
     }
 
     uint32_t regionIds[numberOfRegions];
-    uint32_t currentRegion = 0;
 
     for (int core=0; core<MAX_NUM_THREADS; core++)
     {
@@ -206,10 +222,8 @@ void hashTable_finalize(int* numThreads, int* numRegions, LikwidResults** result
         if (resPtr != NULL)
         {
             LikwidThreadResults* threadResult  = NULL;
-
             GHashTableIter iter;
             gpointer key, value;
-
             g_hash_table_iter_init (&iter, resPtr->hashTable);
 
             /* iterate over all regions in thread */
@@ -218,15 +232,20 @@ void hashTable_finalize(int* numThreads, int* numRegions, LikwidResults** result
                 threadResult = (LikwidThreadResults*) value;
                 uint32_t* regionId = (uint32_t*) g_hash_table_lookup(regionLookup, key);
 
-                /* is region not yet registered */
+                /* is region not yet registered, this is the case for the first
+                 * processed CPU core that has a hash table.
+                 */
                 if ( regionId == NULL )
                 {
-                    (*results)[currentRegion].tag = bstrcpy (threadResult->label);
-                    (*results)[currentRegion].groupID = threadResult->groupID;
-                    regionIds[currentRegion] = currentRegion;
-                    regionId = regionIds + currentRegion;
-                    g_hash_table_insert(regionLookup, g_strdup(key), (regionIds+currentRegion));
-                    currentRegion++;
+                    (*results)[threadResult->index].tag = bstrcpy (threadResult->label);
+                    (*results)[threadResult->index].groupID = threadResult->groupID;
+                    regionIds[threadResult->index] = threadResult->index;
+                    regionId = regionIds + threadResult->index;
+                    /* The region id is added to a temporary hash table with the
+                     * region label as key to get it back for all following
+                     * threads.
+                     */
+                    g_hash_table_insert(regionLookup, g_strdup(key), (regionIds+threadResult->index));
                 }
 
                 (*results)[*regionId].count[threadId] = threadResult->count;
@@ -237,14 +256,9 @@ void hashTable_finalize(int* numThreads, int* numRegions, LikwidResults** result
                 {
                     (*results)[*regionId].counters[threadId][j] = threadResult->PMcounters[j];
                 }
-                bdestroy(threadResult->label);
-                free(threadResult);
             }
 
             threadId++;
-            g_hash_table_destroy(resPtr->hashTable);
-            free(resPtr);
-            threadList[core] = NULL;
         }
     }
     g_hash_table_destroy(regionLookup);
@@ -253,4 +267,17 @@ void hashTable_finalize(int* numThreads, int* numRegions, LikwidResults** result
     (*numRegions) = numberOfRegions;
 }
 
+void __attribute__((destructor (102))) hashTable_finalizeDestruct(void)
+{
+    for (int core=0; core<MAX_NUM_THREADS; core++)
+    {
+        ThreadList* resPtr = threadList[core];
+        if (resPtr != NULL)
+        {
+            g_hash_table_destroy(resPtr->hashTable);
+            free(resPtr);
+            threadList[core] = NULL;
+        }
+    }
+}
 
diff --git a/src/includes/access.h b/src/includes/access.h
index c7f95d3..39faee5 100644
--- a/src/includes/access.h
+++ b/src/includes/access.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File HPM access Module
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at gmail.com
  *      Project:  likwid
@@ -27,7 +27,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef ACCESS_H
 #define ACCESS_H
 
@@ -40,5 +39,4 @@ int HPMread(int cpu_id, PciDeviceIndex dev, uint32_t reg, uint64_t* data);
 int HPMwrite(int cpu_id, PciDeviceIndex dev, uint32_t reg, uint64_t data);
 int HPMcheck(PciDeviceIndex dev, int cpu_id);
 
-
-#endif
+#endif /* ACCESS_H */
diff --git a/src/includes/access_client.h b/src/includes/access_client.h
index ecfdb31..c601c37 100644
--- a/src/includes/access_client.h
+++ b/src/includes/access_client.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header file for interface to the access daemon for the access module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -27,9 +27,8 @@
  *
  * =======================================================================================
  */
-#ifndef LIKWID_ACCESS_CLIENT_H
-#define LIKWID_ACCESS_CLIENT_H
-
+#ifndef ACCESS_CLIENT_H
+#define ACCESS_CLIENT_H
 
 int access_client_init(int cpu_id);
 int access_client_read(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint64_t *data);
@@ -37,4 +36,4 @@ int access_client_write(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint
 void access_client_finalize(int cpu_id);
 int access_client_check(PciDeviceIndex dev, int cpu_id);
 
-#endif
+#endif /* ACCESS_CLIENT_H */
diff --git a/src/includes/access_client_types.h b/src/includes/access_client_types.h
index 1eb16a9..eaee956 100644
--- a/src/includes/access_client_types.h
+++ b/src/includes/access_client_types.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Types file for access_client access module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,7 +27,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef ACCESSCLIENT_TYPES_H
 #define ACCESSCLIENT_TYPES_H
 
@@ -48,7 +47,8 @@ typedef enum {
     ERR_OPENFAIL,     /* failure to open msr files */
     ERR_RWFAIL,       /* failure to read/write msr */
     ERR_DAEMONBUSY,   /* daemon already has another client */
-    ERR_NODEV         /* No such device */
+    ERR_NODEV,        /* No such device */
+    ERR_LOCKED        /* Global lock is set */
 } AccessErrorType;
 
 typedef struct {
diff --git a/src/includes/access_x86.h b/src/includes/access_x86.h
index dbfda7f..9240b2f 100644
--- a/src/includes/access_x86.h
+++ b/src/includes/access_x86.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header file for the interface to x86 related functions for the access module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -27,8 +27,8 @@
  *
  * =======================================================================================
  */
-#ifndef LIKWID_ACCESS_X86_H
-#define LIKWID_ACCESS_X86_H
+#ifndef ACCESS_X86_H
+#define ACCESS_X86_H
 
 #include <types.h>
 
@@ -38,5 +38,4 @@ int access_x86_write(PciDeviceIndex dev, const int cpu_id, uint32_t reg, uint64_
 void access_x86_finalize(int cpu_id);
 int access_x86_check(PciDeviceIndex dev, int cpu_id);
 
-
-#endif
+#endif /* ACCESS_X86_H */
diff --git a/src/includes/access_x86_msr.h b/src/includes/access_x86_msr.h
index 87d3500..d12cc21 100644
--- a/src/includes/access_x86_msr.h
+++ b/src/includes/access_x86_msr.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header file for the interface to x86 MSR functions for the access module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -27,8 +27,8 @@
  *
  * =======================================================================================
  */
-#ifndef LIKWID_ACCESS_X86_MSR_H
-#define LIKWID_ACCESS_X86_MSR_H
+#ifndef ACCESS_X86_MSR_H
+#define ACCESS_X86_MSR_H
 
 #include <types.h>
 
@@ -38,4 +38,4 @@ int access_x86_msr_read(const int cpu, uint32_t reg, uint64_t *data);
 int access_x86_msr_write(const int cpu, uint32_t reg, uint64_t data);
 int access_x86_msr_check(PciDeviceIndex dev, int cpu_id);
 
-#endif
+#endif /* ACCESS_X86_MSR_H */
diff --git a/src/includes/access_x86_pci.h b/src/includes/access_x86_pci.h
index 241a1e8..a7f41ff 100644
--- a/src/includes/access_x86_pci.h
+++ b/src/includes/access_x86_pci.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header file for the interface to x86 PCI functions for the access module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -27,8 +27,8 @@
  *
  * =======================================================================================
  */
-#ifndef LIKWID_ACCESS_X86_PCI_H
-#define LIKWID_ACCESS_X86_PCI_H
+#ifndef ACCESS_X86_PCI_H
+#define ACCESS_X86_PCI_H
 
 #include <types.h>
 
@@ -38,4 +38,4 @@ int access_x86_pci_read(PciDeviceIndex dev, const int socket, uint32_t reg, uint
 int access_x86_pci_write(PciDeviceIndex dev, const int socket, uint32_t reg, uint64_t data);
 int access_x86_pci_check(PciDeviceIndex dev, int socket);
 
-#endif
+#endif /* ACCESS_X86_PCI_H */
diff --git a/src/includes/affinity.h b/src/includes/affinity.h
index 3692976..ae31670 100644
--- a/src/includes/affinity.h
+++ b/src/includes/affinity.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File affinity Module
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at gmail.com
@@ -28,7 +28,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef AFFINITY_H
 #define AFFINITY_H
 
@@ -36,15 +35,14 @@
 #include <likwid.h>
 
 int socket_lock[MAX_NUM_NODES];
+int core_lock[MAX_NUM_THREADS];
 int tile_lock[MAX_NUM_THREADS];
 extern AffinityDomains affinityDomains;
 
 extern int affinity_core2node_lookup[MAX_NUM_THREADS];
-
+extern int affinity_thread2core_lookup[MAX_NUM_THREADS];
 extern int affinity_processGetProcessorId();
 extern int affinity_threadGetProcessorId();
 extern const AffinityDomain* affinity_getDomain(bstring domain);
 
-
 #endif /*AFFINITY_H*/
-
diff --git a/src/includes/bitUtil.h b/src/includes/bitUtil.h
index b0a17ab..05690bc 100644
--- a/src/includes/bitUtil.h
+++ b/src/includes/bitUtil.h
@@ -3,11 +3,11 @@
  *
  *      Filename:  bitUtil.h
  *
- *      Description:  Header File bitUtil Module. 
+ *      Description:  Header File bitUtil Module.
  *                    Helper routines for dealing with bit manipulations
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -28,7 +28,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef BITUTIL_H
 #define BITUTIL_H
 
@@ -66,5 +65,4 @@ extern uint32_t getBitFieldWidth(uint32_t number);
 #define bitMask_toString(_string,_mask)  \
     sprintf(_string,"%llX %llX", LLU_CAST (_mask).mask[0], LLU_CAST (_mask).mask[1]);
 
-
 #endif /*BITUTIL_H*/
diff --git a/src/includes/bstrlib.h b/src/includes/bstrlib.h
index a1160b6..02a836e 100644
--- a/src/includes/bstrlib.h
+++ b/src/includes/bstrlib.h
@@ -1,8 +1,8 @@
 /*
  * =======================================================================================
  * This source file is part of the bstring string library.  This code was
- * written by Paul Hsieh in 2002-2008, and is covered by the BSD open source 
- * license and the GPL. Refer to the accompanying documentation for details 
+ * written by Paul Hsieh in 2002-2008, and is covered by the BSD open source
+ * license and the GPL. Refer to the accompanying documentation for details
  * on usage and license.
  */
 /*
@@ -178,9 +178,9 @@ extern int bsreadlnsa (bstring r, struct bStream * s, const_bstring term);
 extern int bsreada (bstring b, struct bStream * s, int n);
 extern int bsunread (struct bStream * s, const_bstring b);
 extern int bspeek (bstring r, const struct bStream * s);
-extern int bssplitscb (struct bStream * s, const_bstring splitStr, 
+extern int bssplitscb (struct bStream * s, const_bstring splitStr,
     int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
-extern int bssplitstrcb (struct bStream * s, const_bstring splitStr, 
+extern int bssplitstrcb (struct bStream * s, const_bstring splitStr,
     int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
 extern int bseof (const struct bStream * s);
 
diff --git a/src/includes/calculator.h b/src/includes/calculator.h
index 819041a..4ac7d93 100644
--- a/src/includes/calculator.h
+++ b/src/includes/calculator.h
@@ -1,37 +1,36 @@
 /*
  * =======================================================================================
  *
- *      Filename:  calculator.c
+ *      Filename:  calculator.h
  *
- *      Description:  Infix calculator
+ *      Description:  Header file for infix calculator
  *
- *      Author:   Brandon Mills (bm), mills.brandont at gmail.com
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
- *      Copyright (C) 2016 Brandon Mills
+ *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
+ *                Thomas Roehl (tr), thomas.roehl at gmail.com
+ *      Project:  likwid
  *
- *      Permission is hereby granted, free of charge, to any person obtaining a copy of this
- *      software and associated documentation files (the "Software"), to deal in the
- *      Softwarewithout restriction, including without limitation the rights to use, copy,
- *      modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
- *      and to permit persons to whom the Software is furnished to do so, subject to the
- *      following conditions:
+ *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
  *
- *      The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+ *      This program is free software: you can redistribute it and/or modify it under
+ *      the terms of the GNU General Public License as published by the Free Software
+ *      Foundation, either version 3 of the License, or (at your option) any later
+ *      version.
  *
- *      THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
- *      INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
- *      PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- *      HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- *      OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- *      SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *      This program is distributed in the hope that it will be useful, but WITHOUT ANY
+ *      WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ *      PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License along with
+ *      this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  * =======================================================================================
  */
-
 #ifndef CALCULATOR_H
 #define CALCULATOR_H
 
-
 int calculate_infix(char* finfix, double *result);
 
 #endif
diff --git a/src/includes/calculator_stack.h b/src/includes/calculator_stack.h
index 1c1ab90..fb875f5 100644
--- a/src/includes/calculator_stack.h
+++ b/src/includes/calculator_stack.h
@@ -1,13 +1,17 @@
 /*
  * =======================================================================================
  *
- *      Filename:  calculator.c
+ *      Filename:  calculator_stack.h
  *
- *      Description:  Infix calculator
+ *      Description:  Stack implementation for infix calculator
+ *
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Brandon Mills (bm), mills.brandont at gmail.com
+ *      Project:  likwid
  *
- *      Copyright (C) 2016 Brandon Mills
+ *      Copyright (C) Brandon Mills
  *
  *      Permission is hereby granted, free of charge, to any person obtaining a copy of this
  *      software and associated documentation files (the "Software"), to deal in the
diff --git a/src/includes/configuration.h b/src/includes/configuration.h
index 21b97a5..4cc8932 100644
--- a/src/includes/configuration.h
+++ b/src/includes/configuration.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File of Module configuration.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at gmail.com
  *      Project:  likwid
@@ -27,7 +27,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef CONFIGURATION_H
 #define CONFIGURATION_H
 
@@ -35,12 +34,7 @@
 #include <likwid.h>
 #include <error.h>
 
-
 extern Configuration config;
 extern int init_config;
 
-
-
-
-
 #endif
diff --git a/src/includes/cpuFeatures.h b/src/includes/cpuFeatures.h
index 41c45e4..9c6a97f 100644
--- a/src/includes/cpuFeatures.h
+++ b/src/includes/cpuFeatures.h
@@ -5,13 +5,13 @@
  *
  *      Description:  Header File of Module cpuFeatures.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/cpuFeatures_types.h b/src/includes/cpuFeatures_types.h
index ec5e9c7..a8ccb12 100644
--- a/src/includes/cpuFeatures_types.h
+++ b/src/includes/cpuFeatures_types.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Types file for CpuFeature module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,7 +27,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef CPUFEATURES_TYPES_H
 #define CPUFEATURES_TYPES_H
 
@@ -38,25 +37,23 @@ typedef enum {
     IP_PREFETCHER} CpuFeature;
 
 typedef struct {
-	unsigned int fastStrings:1;
-	unsigned int thermalControl:1;
-	unsigned int perfMonitoring:1;
-	unsigned int hardwarePrefetcher:1;
-	unsigned int ferrMultiplex:1;
-	unsigned int branchTraceStorage:1;
-	unsigned int pebs:1;
-	unsigned int speedstep:1;
-	unsigned int monitor:1;
-	unsigned int clPrefetcher:1;
-	unsigned int speedstepLock:1;
-	unsigned int cpuidMaxVal:1;
-	unsigned int xdBit:1;
-	unsigned int dcuPrefetcher:1;
-	unsigned int dynamicAcceleration:1;
-	unsigned int turboMode:1;
-	unsigned int ipPrefetcher:1;
-    } CpuFeatureFlags;
-
-
+    unsigned int fastStrings:1;
+    unsigned int thermalControl:1;
+    unsigned int perfMonitoring:1;
+    unsigned int hardwarePrefetcher:1;
+    unsigned int ferrMultiplex:1;
+    unsigned int branchTraceStorage:1;
+    unsigned int pebs:1;
+    unsigned int speedstep:1;
+    unsigned int monitor:1;
+    unsigned int clPrefetcher:1;
+    unsigned int speedstepLock:1;
+    unsigned int cpuidMaxVal:1;
+    unsigned int xdBit:1;
+    unsigned int dcuPrefetcher:1;
+    unsigned int dynamicAcceleration:1;
+    unsigned int turboMode:1;
+    unsigned int ipPrefetcher:1;
+} CpuFeatureFlags;
 
 #endif /*CPUFEATURES_TYPES_H*/
diff --git a/src/includes/cpuid.h b/src/includes/cpuid.h
index 7cea6d8..9fd0afd 100644
--- a/src/includes/cpuid.h
+++ b/src/includes/cpuid.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Common macro definition for CPUID instruction
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at gmail.com
  *      Project:  likwid
@@ -27,7 +27,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef LIKWID_CPUID_H
 #define LIKWID_CPUID_H
 
@@ -36,7 +35,6 @@
  * File: arch/x86/boot/cpuflags.c
 */
 
-
 #if defined(__i386__) && defined(__PIC__)
 # define EBX_REG "=r"
 #else
diff --git a/src/includes/error.h b/src/includes/error.h
index 696db4d..231b590 100644
--- a/src/includes/error.h
+++ b/src/includes/error.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Central error handling macros
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at gmail.com
@@ -28,18 +28,13 @@
  *
  * =======================================================================================
  */
-
 #ifndef ERROR_H
 #define ERROR_H
 
-
 #include <likwid.h>
 
-
-
 #define str(x) #x
 
-
 #define ERRNO_PRINT fprintf(stderr, "ERROR - [%s:%d] %s\n", __FILE__, __LINE__, strerror(errno))
 
 #define ERROR  \
@@ -49,7 +44,6 @@
 #define ERROR_PLAIN_PRINT(msg) \
    fprintf(stderr,  "ERROR - [%s:%s:%d] " str(msg) "\n", __FILE__, __func__,__LINE__);
 
-
 #define ERROR_PRINT(fmt, ...) \
    fprintf(stderr,  "ERROR - [%s:%s:%d] %s.\n" str(fmt) "\n", __FILE__,  __func__,__LINE__, strerror(errno), __VA_ARGS__);
 
@@ -70,8 +64,6 @@
         exit(EXIT_FAILURE); \
     }
 
-
-
 #define VERBOSEPRINTREG(cpuid,reg,flags,msg) \
     if (perfmon_verbosity >= DEBUGLEV_DETAIL) \
     { \
@@ -79,7 +71,7 @@
                 __func__, __LINE__,  (cpuid), LLU_CAST (reg), LLU_CAST (flags)); \
         fflush(stdout);  \
     }
-    
+
 #define VERBOSEPRINTPCIREG(cpuid,dev,reg,flags,msg) \
     if (perfmon_verbosity >= DEBUGLEV_DETAIL) \
     { \
@@ -101,7 +93,6 @@
         fflush(stdout); \
     }
 
-
 #define CHECK_MSR_WRITE_ERROR(func) CHECK_AND_RETURN_ERROR(func, MSR write operation failed);
 #define CHECK_MSR_READ_ERROR(func) CHECK_AND_RETURN_ERROR(func, MSR read operation failed);
 #define CHECK_PCI_WRITE_ERROR(func) CHECK_AND_RETURN_ERROR(func, PCI write operation failed);
diff --git a/src/includes/frequency.h b/src/includes/frequency.h
new file mode 100644
index 0000000..cdf556b
--- /dev/null
+++ b/src/includes/frequency.h
@@ -0,0 +1,12 @@
+#ifndef FREQUENCY_H
+#define FREQUENCY_H
+
+#define STRINGIFY(x) #x
+#define TOSTRING(x) STRINGIFY(x)
+
+
+extern char* daemon_path;
+
+
+
+#endif /* FREQUENCY_H */
diff --git a/src/includes/hashTable.h b/src/includes/hashTable.h
index 9824e1d..13b5558 100644
--- a/src/includes/hashTable.h
+++ b/src/includes/hashTable.h
@@ -7,8 +7,8 @@
  *                    Wrapper for HashTable data structure holding thread
  *                    specific region information.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -29,7 +29,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef HASHTABLE_H
 #define HASHTABLE_H
 
@@ -41,5 +40,4 @@ void hashTable_initThread(int coreID);
 extern int hashTable_get(bstring regionTag, LikwidThreadResults** result);
 extern void hashTable_finalize(int* numberOfThreads, int* numberOfRegions, LikwidResults** results);
 
-
 #endif /*CPUID_H*/
diff --git a/src/includes/libperfctr_types.h b/src/includes/libperfctr_types.h
index 7cf836e..e5f4edd 100644
--- a/src/includes/libperfctr_types.h
+++ b/src/includes/libperfctr_types.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Types file for libperfctr module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,7 +27,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef LIBPERFCTR_H
 #define LIBPERFCTR_H
 
@@ -35,6 +34,7 @@
 
 typedef struct LikwidThreadResults{
     bstring  label;
+    uint32_t index;
     double time;
     TimerData startTime;
     int groupID;
diff --git a/src/includes/likwid.h b/src/includes/likwid.h
index d2ec5e9..b7063a2 100644
--- a/src/includes/likwid.h
+++ b/src/includes/likwid.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File of likwid API
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Authors:  Thomas Roehl (tr), thomas.roehl at googlemail.com
  *
@@ -28,7 +28,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef LIKWID_H
 #define LIKWID_H
 
@@ -107,7 +106,6 @@ extern "C" {
 #endif
 
 
-
 /*
 ################################################################################
 # Marker API related functions
@@ -310,7 +308,7 @@ Set group path in the config struction. The path must be a directory.
 @param [in] path
 @return error code (0 for success, -ENOMEM if reallocation failed, -ENOTDIR if no directoy)
 */
-extern int config_setGroupPath(char* path) __attribute__ ((visibility ("default") ));
+extern int config_setGroupPath(const char* path) __attribute__ ((visibility ("default") ));
 
 /** @}*/
 /*
@@ -504,7 +502,7 @@ Set the memory allocation policy to interleaved for given list of CPUs
 @param [in] processorList List of processors
 @param [in] numberOfProcessors Length of processor list
 */
-extern void numa_setInterleaved(int* processorList, int numberOfProcessors) __attribute__ ((visibility ("default") ));
+extern void numa_setInterleaved(const int* processorList, int numberOfProcessors) __attribute__ ((visibility ("default") ));
 /*! \brief Allocate memory from a specific specific NUMA node
 @param [in,out] ptr Start pointer of memory
 @param [in] size Size for the allocation
@@ -597,7 +595,7 @@ Pin processes to a CPU. Creates a cpuset with the given processor IDs
 @param [in] cpu_count Number of processors in processorIds
 @param [in] processorIds Array of processor IDs
 */
-extern void affinity_pinProcesses(int cpu_count, int* processorIds) __attribute__ ((visibility ("default") ));
+extern void affinity_pinProcesses(int cpu_count, const int* processorIds) __attribute__ ((visibility ("default") ));
 /*! \brief Pin thread to a CPU
 
 Pin thread to a CPU. Duplicate of likwid_pinThread()
@@ -642,7 +640,7 @@ different selection modes: scatter, expression, logical and physical.
 @param [in] length Length of cpulist
 @return error code (>0 on success for the returned list length, -ERRORCODE on failure)
 */
-extern int cpustr_to_cpulist(char* cpustring, int* cpulist, int length)  __attribute__ ((visibility ("default") ));
+extern int cpustr_to_cpulist(const char* cpustring, int* cpulist, int length)  __attribute__ ((visibility ("default") ));
 /*! \brief Read NUMA node selection string and resolve to available NUMA node numbers
 
 Reads the NUMA node selection string and fills the given list with the NUMA node numbers
@@ -652,7 +650,7 @@ defined in the selection string.
 @param [in] length Length of NUMA node list
 @return error code (>0 on success for the returned list length, -ERRORCODE on failure)
 */
-extern int nodestr_to_nodelist(char* nodestr, int* nodes, int length)  __attribute__ ((visibility ("default") ));
+extern int nodestr_to_nodelist(const char* nodestr, int* nodes, int length)  __attribute__ ((visibility ("default") ));
 /*! \brief Read CPU socket selection string and resolve to available CPU socket numbers
 
 Reads the CPU socket selection string and fills the given list with the CPU socket numbers
@@ -662,7 +660,7 @@ defined in the selection string.
 @param [in] length Length of CPU socket list
 @return error code (>0 on success for the returned list length, -ERRORCODE on failure)
 */
-extern int sockstr_to_socklist(char* sockstr, int* sockets, int length)  __attribute__ ((visibility ("default") ));
+extern int sockstr_to_socklist(const char* sockstr, int* sockets, int length)  __attribute__ ((visibility ("default") ));
 
 /** @}*/
 
@@ -700,7 +698,7 @@ The access mode must already be set when calling perfmon_init()
 @param [in] threadsToCpu List of CPUs
 @return error code (0 on success, -ERRORCODE on failure)
 */
-extern int perfmon_init(int nrThreads, int threadsToCpu[]) __attribute__ ((visibility ("default") ));
+extern int perfmon_init(int nrThreads, const int* threadsToCpu) __attribute__ ((visibility ("default") ));
 
 /*! \brief Initialize performance monitoring maps
 
@@ -710,6 +708,14 @@ perfmon_init_maps()
 \sa RegisterMap list, PerfmonEvent list and BoxMap list
 */
 extern void perfmon_init_maps(void) __attribute__ ((visibility ("default") ));
+/*! \brief Check the performance monitoring maps whether counters and events are available
+
+Checks each counter and event in the performance monitoring maps for their availibility on
+the current system. topology_init(), numa_init() and perfmon_init_maps() must be called before calling
+perfmon_check_counter_map().
+\sa RegisterMap list, PerfmonEvent list and BoxMap list
+*/
+extern void perfmon_check_counter_map(int cpu_id) __attribute__ ((visibility ("default") ));
 /*! \brief Add an event string to LIKWID
 
 A event string looks like Eventname:Countername(:Option1:Option2:...),...
@@ -717,7 +723,7 @@ The eventname, countername and options are checked if they are available.
 @param [in] eventCString Event string
 @return Returns the ID of the new eventSet
 */
-extern int perfmon_addEventSet(char* eventCString) __attribute__ ((visibility ("default") ));
+extern int perfmon_addEventSet(const char* eventCString) __attribute__ ((visibility ("default") ));
 /*! \brief Setup all performance monitoring counters of an eventSet
 
 A event string looks like Eventname:Countername(:Option1:Option2:...),...
@@ -967,7 +973,7 @@ extern int perfmon_getThreadsOfRegion(int region) __attribute__ ((visibility ("d
 /*! \brief Get the cpulist of a region
 @param [in] region ID of region
 @param [in] count Length of cpulist array
- at param [in] cpulist cpulist array
+ at param [in,out] cpulist cpulist array
 @return Number of threads of region or count, whatever is lower
 */
 extern int perfmon_getCpulistOfRegion(int region, int count, int* cpulist)  __attribute__ ((visibility ("default") ));
@@ -1034,13 +1040,13 @@ extern void timer_init( void ) __attribute__ ((visibility ("default") ));
 @param [in] time Structure holding the cycle count at start and stop
 @return Time in seconds
 */
-extern double timer_print( TimerData* time) __attribute__ ((visibility ("default") ));
+extern double timer_print( const TimerData* time) __attribute__ ((visibility ("default") ));
 /*! \brief Return the measured interval in cycles
 
 @param [in] time Structure holding the cycle count at start and stop
 @return Time in cycles
 */
-extern uint64_t timer_printCycles( TimerData* time) __attribute__ ((visibility ("default") ));
+extern uint64_t timer_printCycles( const TimerData* time) __attribute__ ((visibility ("default") ));
 /*! \brief Reset values in TimerData
 
 @param [in] time Structure holding the cycle count at start and stop
@@ -1174,6 +1180,9 @@ typedef struct {
     int hasRAPL; /*!< \brief RAPL support flag */
     double powerUnit; /*!< \brief Multiplier for power measurements */
     double timeUnit; /*!< \brief Multiplier for time information */
+    double uncoreMinFreq; /*!< \brief Minimal uncore frequency */
+    double uncoreMaxFreq; /*!< \brief Maximal uncore frequency */
+    uint8_t perfBias; /*!< \brief Performance energy bias */
     PowerDomain domains[NUM_POWER_DOMAINS]; /*!< \brief List of power domains */
 } PowerInfo;
 
@@ -1243,7 +1252,7 @@ extern int power_stop(PowerData_t data, int cpuId, PowerType type) __attribute__
 @param [in] data Data structure holding start and stop values for energy measurements
 @return Consumed energy in Joules
 */
-extern double power_printEnergy(PowerData* data) __attribute__ ((visibility ("default") ));
+extern double power_printEnergy(const PowerData* data) __attribute__ ((visibility ("default") ));
 /*! \brief Get energy Unit
 
 @param [in] domain RAPL domain ID
@@ -1337,7 +1346,7 @@ Sweeps (zeros) the memory of all NUMA nodes containing the CPUs in \a processorL
 @param [in] processorList List of CPU IDs
 @param [in] numberOfProcessors Number of CPUs in list
 */
-extern void memsweep_threadGroup(int* processorList, int numberOfProcessors) __attribute__ ((visibility ("default") ));
+extern void memsweep_threadGroup(const int* processorList, int numberOfProcessors) __attribute__ ((visibility ("default") ));
 /** @}*/
 
 /*
@@ -1370,7 +1379,7 @@ typedef enum {
     FEAT_DYN_ACCEL, /*!< \brief Intel Dynamic Acceleration */
     FEAT_TURBO_MODE, /*!< \brief Intel Turbo Mode */
     FEAT_TM2, /*!< \brief Thermal Monitoring 2 */
-    CPUFEATURES_MAX 
+    CPUFEATURES_MAX
 } CpuFeature;
 
 /*! \brief Initialize the internal feature variables for all CPUs
@@ -1419,6 +1428,98 @@ Disable a CPU feature for a specific CPU. Only the state of the prefetchers can
 extern int cpuFeatures_disable(int cpu, CpuFeature type, int print) __attribute__ ((visibility ("default") ));
 /** @}*/
 
+
+/*
+################################################################################
+# CPU frequency related functions
+################################################################################
+*/
+/** \addtogroup CpuFreq Retrieval and manipulation of processor clock frequencies
+ *  @{
+ */
+/*! \brief Get the current clock frequency of a core
+
+Get the current clock frequency of a core
+ at param [in] cpu_id CPU ID
+ at return Frequency or 0 in case of errors
+*/
+extern uint64_t freq_getCpuClockCurrent(const int cpu_id ) __attribute__ ((visibility ("default") ));
+/*! \brief Set the current clock frequency of a core
+
+Set the current clock frequency of a core
+ at param [in] cpu_id CPU ID
+ at param [in] freq Frequency in kHz
+ at return Frequency or 0 in case of errors
+*/
+extern uint64_t freq_setCpuClockCurrent(const int cpu_id, const uint64_t freq) __attribute__ ((visibility ("default") ));
+/*! \brief Get the maximal clock frequency of a core
+
+Get the maximal clock frequency of a core
+ at param [in] cpu_id CPU ID
+ at return Frequency or 0 in case of errors
+*/
+extern uint64_t freq_getCpuClockMax(const int cpu_id ) __attribute__ ((visibility ("default") ));
+/*! \brief Set the maximal clock frequency of a core
+
+Set the maximal clock frequency of a core
+ at param [in] cpu_id CPU ID
+ at param [in] freq Frequency in kHz
+ at return Frequency or 0 in case of errors
+*/
+extern uint64_t freq_setCpuClockMax(const int cpu_id, const uint64_t freq) __attribute__ ((visibility ("default") ));
+/*! \brief Get the minimal clock frequency of a core
+
+Get the minimal clock frequency of a core
+ at param [in] cpu_id CPU ID
+ at return Frequency or 0 in case of errors
+*/
+extern uint64_t freq_getCpuClockMin(const int cpu_id ) __attribute__ ((visibility ("default") ));
+/*! \brief Set the minimal clock frequency of a core
+
+Set the minimal clock frequency of a core
+ at param [in] cpu_id CPU ID
+ at param [in] freq Frequency in kHz
+ at return Frequency or 0 in case of errors
+*/
+extern uint64_t freq_setCpuClockMin(const int cpu_id, const uint64_t freq) __attribute__ ((visibility ("default") ));
+/*! \brief Get the frequency governor of a core
+
+Get the frequency governor of a core. The returned string must be freed by the caller.
+ at param [in] cpu_id CPU ID
+ at return Governor or NULL in case of errors
+*/
+extern char * freq_getGovernor(const int cpu_id ) __attribute__ ((visibility ("default") ));
+/*! \brief Set the frequency governor of a core
+
+Set the frequency governor of a core.
+ at param [in] cpu_id CPU ID
+ at param [in] gov Governor
+ at return 1 or 0 in case of errors
+*/
+extern int freq_setGovernor(const int cpu_id, const char* gov) __attribute__ ((visibility ("default") ));
+/*! \brief Get the available frequencies of a core
+
+Get the available frequencies of a core. The returned string must be freed by the caller.
+ at param [in] cpu_id CPU ID
+ at return String with available frequencies or NULL in case of errors
+*/
+extern char * freq_getAvailFreq(const int cpu_id ) __attribute__ ((visibility ("default") ));
+/*! \brief Get the available frequency governors of a core
+
+Get the available frequency governors of a core. The returned string must be freed by the caller.
+ at param [in] cpu_id CPU ID
+ at return String with available frequency governors or NULL in case of errors
+*/
+extern char * freq_getAvailGovs(const int cpu_id ) __attribute__ ((visibility ("default") ));
+/*! \brief Get the name of the currently active cpufreq driver
+
+Get the name of the currently active cpufreq driver. The returned string must be freed by the caller.
+ at param [in] cpu_id CPU ID
+ at return String with active cpufreq driver or NULL in case of errors
+*/
+extern char * freq_getDriver(const int cpu_id ) __attribute__ ((visibility ("default") ));
+/** @}*/
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/includes/lock.h b/src/includes/lock.h
index 623e81c..e0104d5 100644
--- a/src/includes/lock.h
+++ b/src/includes/lock.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File Locking primitive Module
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -41,17 +41,16 @@
 #define LOCK_INIT -1
 #define STRINGIFY(x) #x
 #define TOSTRING(x) STRINGIFY(x)
-#define LIKWIDLOCK  /var/run/likwid.lock
 
-static inline int lock_acquire(int* var, int newval)
+static inline int
+lock_acquire(int* var, int newval)
 {
     int oldval = LOCK_INIT;
     return __sync_bool_compare_and_swap (var, oldval, newval);
 }
 
-
-
-static int lock_check(void)
+static int
+lock_check(void)
 {
     struct stat buf;
     int lock_handle = -1;
@@ -60,40 +59,41 @@ static int lock_check(void)
 
     if ((lock_handle = open(filepath, O_RDONLY )) == -1 )
     {
-    if (errno == ENOENT)
-    {
-        /* There is no lock file. Proceed. */
-        result = 1;
-    }
-    else if (errno == EACCES)
-    {
-        /* There is a lock file. We cannot open it. */
-        result = 0;
-    }
-    else 
-    {
-        /* Another error occured. Proceed. */
-        result = 1;
-    }
+        if (errno == ENOENT)
+        {
+            /* There is no lock file. Proceed. */
+            result = 1;
+        }
+        else if (errno == EACCES)
+        {
+            /* There is a lock file. We cannot open it. */
+            result = 0;
+        }
+        else
+        {
+            /* Another error occured. Proceed. */
+            result = 1;
+        }
     }
     else
     {
-    /* There is a lock file and we can open it. Check if we own it. */
-    stat(filepath, &buf);
+        /* There is a lock file and we can open it. Check if we own it. */
+        stat(filepath, &buf);
 
-    if ( buf.st_uid == getuid() )  /* Succeed, we own the lock */
-    {
-        result = 1;
-    }
-    else  /* we are not the owner */
-    {
-        result = 0;
-    }
+        if ( buf.st_uid == getuid() )  /* Succeed, we own the lock */
+        {
+            result = 1;
+        }
+        else  /* we are not the owner */
+        {
+
+            result = 0;
+        }
     }
 
     if (lock_handle)
     {
-    close(lock_handle);
+        close(lock_handle);
     }
 
     return result;
diff --git a/src/includes/memsweep.h b/src/includes/memsweep.h
index dca6862..083ebf2 100644
--- a/src/includes/memsweep.h
+++ b/src/includes/memsweep.h
@@ -6,8 +6,8 @@
  *      Description:  Header File memsweep module for internal use. External functions are
  *                    defined in likwid.h
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -28,7 +28,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef MEMSWEEP_H
 #define MEMSWEEP_H
 
@@ -38,4 +37,3 @@ extern void memsweep_setMemoryFraction(uint64_t fraction);
 extern void memsweep_node(void);
 
 #endif /* MEMSWEEP_H */
-
diff --git a/src/includes/numa.h b/src/includes/numa.h
index 681894c..aaeaab3 100644
--- a/src/includes/numa.h
+++ b/src/includes/numa.h
@@ -6,8 +6,8 @@
  *      Description:  Header File NUMA module for internal use. External functions are
  *                    defined in likwid.h
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -28,7 +28,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef LIKWID_NUMA
 #define LIKWID_NUMA
 
@@ -40,19 +39,12 @@
 #include <numa_hwloc.h>
 #include <numa_proc.h>
 
-
-
-
 extern int str2int(const char* str);
 
 struct numa_functions {
     int (*numa_init) (void);
-    void (*numa_setInterleaved) (int*, int);
+    void (*numa_setInterleaved) (const int*, int);
     void (*numa_membind) (void*, size_t, int);
 };
 
-
-
-
-
 #endif
diff --git a/src/includes/numa_hwloc.h b/src/includes/numa_hwloc.h
index 33af62d..dbf4091 100644
--- a/src/includes/numa_hwloc.h
+++ b/src/includes/numa_hwloc.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File hwloc NUMA backend
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -27,8 +27,6 @@
  *
  * =======================================================================================
  */
-
-
 #ifndef LIKWID_NUMA_HWLOC
 #define LIKWID_NUMA_HWLOC
 
@@ -36,5 +34,4 @@ extern int hwloc_numa_init(void);
 extern void hwloc_numa_membind(void* ptr, size_t size, int domainId);
 extern void hwloc_numa_setInterleaved(int* processorList, int numberOfProcessors);
 
-
 #endif
diff --git a/src/includes/numa_proc.h b/src/includes/numa_proc.h
index 24d39e7..e03ef1e 100644
--- a/src/includes/numa_proc.h
+++ b/src/includes/numa_proc.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File procfs/sysfs NUMA backend
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -27,13 +27,11 @@
  *
  * =======================================================================================
  */
-
 #ifndef LIKWID_NUMA_PROC
 #define LIKWID_NUMA_PROC
 
 extern int proc_numa_init(void);
 extern void proc_numa_membind(void* ptr, size_t size, int domainId);
-extern void proc_numa_setInterleaved(int* processorList, int numberOfProcessors);
-
+extern void proc_numa_setInterleaved(const int* processorList, int numberOfProcessors);
 
 #endif
diff --git a/src/includes/pci_hwloc.h b/src/includes/pci_hwloc.h
index 9533b49..23223c7 100644
--- a/src/includes/pci_hwloc.h
+++ b/src/includes/pci_hwloc.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File hwloc based PCI lookup backend
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -27,11 +27,9 @@
  *
  * =======================================================================================
  */
-
 #ifndef PCI_HWLOC_H
 #define PCI_HWLOC_H
 
 extern int hwloc_pci_init(uint16_t testDevice, char** socket_bus, int* nrSockets);
 
-
 #endif
diff --git a/src/includes/pci_proc.h b/src/includes/pci_proc.h
index 3aa859c..61d733f 100644
--- a/src/includes/pci_proc.h
+++ b/src/includes/pci_proc.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File procfs based PCI lookup backend
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -27,11 +27,9 @@
  *
  * =======================================================================================
  */
-
 #ifndef PCI_PROC_H
 #define PCI_PROC_H
 
 extern int proc_pci_init(uint16_t testDevice, char** socket_bus, int* nrSockets);
 
-
 #endif
diff --git a/src/includes/pci_types.h b/src/includes/pci_types.h
index 651409a..8089f97 100644
--- a/src/includes/pci_types.h
+++ b/src/includes/pci_types.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Types file for pci module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -28,15 +28,11 @@
  *
  * =======================================================================================
  */
-
-
 #ifndef PCI_TYPES_H
 #define PCI_TYPES_H
 
 #include <stdint.h>
 
-
-
 typedef enum {
     NODEVTYPE = 0,
     R3QPI,
@@ -45,6 +41,7 @@ typedef enum {
     HA,
     QPI,
     IRP,
+    EDC,
     MAX_NUM_PCI_TYPES
 } PciDeviceType;
 
@@ -74,6 +71,22 @@ typedef enum {
     PCI_IMC_DEVICE_1_CH_2,
     PCI_IMC_DEVICE_1_CH_3,
     PCI_IRP_DEVICE,
+    PCI_EDC0_UCLK_DEVICE,
+    PCI_EDC0_DCLK_DEVICE,
+    PCI_EDC1_UCLK_DEVICE,
+    PCI_EDC1_DCLK_DEVICE,
+    PCI_EDC2_UCLK_DEVICE,
+    PCI_EDC2_DCLK_DEVICE,
+    PCI_EDC3_UCLK_DEVICE,
+    PCI_EDC3_DCLK_DEVICE,
+    PCI_EDC4_UCLK_DEVICE,
+    PCI_EDC4_DCLK_DEVICE,
+    PCI_EDC5_UCLK_DEVICE,
+    PCI_EDC5_DCLK_DEVICE,
+    PCI_EDC6_UCLK_DEVICE,
+    PCI_EDC6_DCLK_DEVICE,
+    PCI_EDC7_UCLK_DEVICE,
+    PCI_EDC7_DCLK_DEVICE,
     MAX_NUM_PCI_DEVICES
 } PciDeviceIndex;
 
@@ -91,13 +104,14 @@ typedef struct {
     char* desc;
 } PciType;
 
-
 static PciType pci_types[MAX_NUM_PCI_TYPES] = {
     [R3QPI] = {"R3QPI", "R3QPI is the interface between the Intel QPI Link Layer and the Ring."},
     [R2PCIE] = {"R2PCIE", "R2PCIe represents the interface between the Ring and IIO traffic to/from PCIe."},
     [IMC] = {"IMC", "The integrated Memory Controller provides the interface to DRAM and communicates to the rest of the uncore through the Home Agent."},
     [HA] = {"HA", "The HA is responsible for the protocol side of memory interactions."},
     [QPI] = {"QPI", "The Intel QPI Link Layer is responsible for packetizing requests from the caching agent on the way out to the system interface."},
-    [IRP] = {"IRP", "IRP is responsible for maintaining coherency for IIO traffic e.g. crosssocket P2P."}
+    [IRP] = {"IRP", "IRP is responsible for maintaining coherency for IIO traffic e.g. crosssocket P2P."},
+    [EDC] = {"EDC", "The Embedded DRAM controller is used for high bandwidth memory on the Xeon Phi (KNL)."},
 };
+
 #endif /*PCI_TYPES_H*/
diff --git a/src/includes/perfgroup.h b/src/includes/perfgroup.h
index 78c5453..0e332f4 100644
--- a/src/includes/perfgroup.h
+++ b/src/includes/perfgroup.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File of performance group and event set handler
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at gmail.com
  *      Project:  likwid
@@ -27,11 +27,9 @@
  *
  * =======================================================================================
  */
-
 #ifndef PERFGROUP_H
 #define PERFGROUP_H
 
-
  /*! \brief The groupInfo data structure describes a performance group
 
 Groups can be either be read in from file or be a group with custom event set. For
@@ -73,10 +71,10 @@ static char* groupFileSectionNames[5] = {
     "LONG"
 };
 
-extern int get_groups(char* grouppath, char* architecture, char*** groupnames, char*** groupshort, char*** grouplong);
+extern int get_groups(const char* grouppath, const char* architecture, char*** groupnames, char*** groupshort, char*** grouplong);
 extern void return_groups(int groups, char** groupnames, char** groupshort, char** grouplong);
-extern int read_group(char* grouppath, char* architecture, char* groupname, GroupInfo* ginfo);
-extern int custom_group(char* eventStr, GroupInfo* ginfo);
+extern int read_group(const char* grouppath, const char* architecture, const char* groupname, GroupInfo* ginfo);
+extern int custom_group(const char* eventStr, GroupInfo* ginfo);
 extern char* get_eventStr(GroupInfo* ginfo);
 void put_eventStr(char* eventset);
 extern char* get_shortInfo(GroupInfo* ginfo);
@@ -87,8 +85,9 @@ extern void return_group(GroupInfo* ginfo);
 
 extern void init_clist(CounterList* clist);
 extern int add_to_clist(CounterList* clist, char* counter, double result);
+extern int update_clist(CounterList* clist, char* counter, double result);
 extern void destroy_clist(CounterList* clist);
 
 extern int calc_metric(char* formula, CounterList* clist, double *result);
 
-#endif
+#endif /* PERFGROUP_H */
diff --git a/src/includes/perfmon.h b/src/includes/perfmon.h
index 0c39093..17efaf8 100644
--- a/src/includes/perfmon.h
+++ b/src/includes/perfmon.h
@@ -7,8 +7,8 @@
  *                    Configures and reads out performance counters
  *                    on x86 based architectures. Supports multi threading.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -30,11 +30,9 @@
  *
  * =======================================================================================
  */
-
 #ifndef PERFMON_H
 #define PERFMON_H
 
-
 #include <types.h>
 #include <likwid.h>
 
@@ -51,10 +49,8 @@ extern int (*perfmon_readCountersThread) (int thread_id, PerfmonEventSet* eventS
 extern int (*perfmon_finalizeCountersThread) (int thread_id, PerfmonEventSet* eventSet);
 extern int (*initThreadArch) (int cpu_id);
 
-
 /* Internal helpers */
 extern int getCounterTypeOffset(int index);
 extern uint64_t perfmon_getMaxCounterValue(RegisterType type);
 
-
 #endif /*PERFMON_H*/
diff --git a/src/includes/perfmon_atom.h b/src/includes/perfmon_atom.h
index e96c8e0..57a4892 100644
--- a/src/includes/perfmon_atom.h
+++ b/src/includes/perfmon_atom.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header file of perfmon module for Atom
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
diff --git a/src/includes/perfmon_atom_events.txt b/src/includes/perfmon_atom_events.txt
index 26ec5b9..28f07ee 100644
--- a/src/includes/perfmon_atom_events.txt
+++ b/src/includes/perfmon_atom_events.txt
@@ -4,8 +4,8 @@
 #
 #      Description:  Event list for Intel Atom
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -34,10 +34,10 @@ EVENT_CPU_CLK_UNHALTED           0x00   FIXC1
 UMASK_CPU_CLK_UNHALTED_CORE      0x00
 
 EVENT_BACLEARS                                        0xE6 PMC
-UMASK_BACLEARS_ANY                                    0x01 
+UMASK_BACLEARS_ANY                                    0x01
 
 EVENT_BOGUS_BR                                        0xE4 PMC
-UMASK_BOGUS_BR                                        0x01 
+UMASK_BOGUS_BR                                        0x01
 
 EVENT_BR_INST_RETIRED            0xC4   PMC
 UMASK_BR_INST_RETIRED_ANY        0x00
diff --git a/src/includes/perfmon_broadwell.h b/src/includes/perfmon_broadwell.h
index b22ed0c..b913d9c 100644
--- a/src/includes/perfmon_broadwell.h
+++ b/src/includes/perfmon_broadwell.h
@@ -5,13 +5,13 @@
  *
  *      Description:  Header File of perfmon module for Intel Broadwell.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -53,22 +53,42 @@ static int perfmon_numCountersBroadwellEP = NUM_COUNTERS_BROADWELLEP;
 static int perfmon_numCoreCountersBroadwellEP = NUM_COUNTERS_CORE_BROADWELLEP;
 static int perfmon_numArchEventsBroadwellEP = NUM_ARCH_EVENTS_BROADWELLEP;
 
+static int bdw_did_cbox_check = 0;
 int bdw_cbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event);
 int bdwep_cbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event);
 int (*broadwell_cbox_setup)(int, RegisterIndex, PerfmonEvent *);
+int print_ht_warn_once = 1;
+
+int bdw_cbox_nosetup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
+{
+    return 0;
+}
 
 int perfmon_init_broadwell(int cpu_id)
 {
+    int ret;
+    uint64_t data;
     lock_acquire((int*) &tile_lock[affinity_thread2tile_lookup[cpu_id]], cpu_id);
     lock_acquire((int*) &socket_lock[affinity_core2node_lookup[cpu_id]], cpu_id);
     CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PEBS_ENABLE, 0x0ULL));
     if ((cpuid_info.model == BROADWELL_E) || (cpuid_info.model == BROADWELL_D))
     {
         broadwell_cbox_setup = bdwep_cbox_setup;
-    }
-    else
-    {
-        broadwell_cbox_setup = bdw_cbox_setup;
+        bdw_did_cbox_check = 1;
+    }
+    else if (cpuid_info.model == BROADWELL &&
+             socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id &&
+             bdw_did_cbox_check == 0)
+    {
+        ret = HPMwrite(cpu_id, MSR_DEV, MSR_UNC_CBO_0_PERFEVTSEL0, 0x0ULL);
+        ret += HPMread(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, &data);
+        ret += HPMwrite(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, 0x0ULL);
+        ret += HPMread(cpu_id, MSR_DEV, MSR_UNC_CBO_0_PERFEVTSEL0, &data);
+        if ((ret == 0) && (data == 0x0ULL))
+            broadwell_cbox_setup = bdw_cbox_setup;
+        else
+            broadwell_cbox_setup = bdw_cbox_nosetup;
+        bdw_did_cbox_check = 1;
     }
     return 0;
 }
@@ -112,6 +132,19 @@ int bdw_pmc_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
         flags |= ((event->cmask<<8) + event->cfgBits)<<16;
     }
 
+    if (getCounterTypeOffset(index) >= 4)
+    {
+        if (print_ht_warn_once)
+        {
+            fprintf(stderr, "WARNING: PMC4-7 on Intel Broadwell systems requires KERNEL option to work\n");
+            fprintf(stderr, "         properly. The KERNEL option is added automatically for PMC4-7.\n");
+            fprintf(stderr, "         Be aware that the events also count during kernel execution and may\n");
+            fprintf(stderr, "         be higher than expected.\n");
+            print_ht_warn_once = 0;
+        }
+        flags |= (1ULL<<17);
+    }
+
     if (event->numberOfOptions > 0)
     {
         for(j = 0; j < event->numberOfOptions; j++)
@@ -320,7 +353,7 @@ int bdwep_cbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
             }
         }
     }
-
+    
     if (filter_flags0 != 0x0ULL)
     {
         VERBOSEPRINTREG(cpu_id, filter0, filter_flags0, SETUP_CBOX_FILTER0);
@@ -948,27 +981,46 @@ int bdw_qbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event, PciDevi
 }
 
 #define BDW_FREEZE_UNCORE \
-    if (haveLock && eventSet->regTypeMask & ~(0xFULL)) \
+    if (haveLock && MEASURE_UNCORE(eventSet) && (cpuid_info.model == BROADWELL_E || cpuid_info.model == BROADWELL_D)) \
     { \
         VERBOSEPRINTREG(cpu_id, MSR_UNC_V3_U_PMON_GLOBAL_CTL, LLU_CAST (1ULL<<31), FREEZE_UNCORE); \
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_V3_U_PMON_GLOBAL_CTL, (1ULL<<31))); \
-    }
+    } \
+    else if (haveLock && MEASURE_UNCORE(eventSet) && cpuid_info.model == BROADWELL) \
+    { \
+        uint64_t data = 0x0ULL; \
+        CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, &data)); \
+        if (!(data & (1ULL<<29))) \
+        { \
+            data &= ~(1ULL<<29); \
+            VERBOSEPRINTREG(cpu_id, MSR_UNCORE_PERF_GLOBAL_CTRL, data, FREEZE_UNCORE); \
+            CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, data)); \
+        } \
+    } \
 
 #define BDW_UNFREEZE_UNCORE \
-    if (haveLock && eventSet->regTypeMask & ~(0xFULL)) \
+    if (haveLock && MEASURE_UNCORE(eventSet) && (cpuid_info.model == BROADWELL_E || cpuid_info.model == BROADWELL_D)) \
     { \
         VERBOSEPRINTREG(cpu_id, MSR_UNC_V3_U_PMON_GLOBAL_CTL, LLU_CAST (1ULL<<29), UNFREEZE_UNCORE); \
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_V3_U_PMON_GLOBAL_CTL, (1ULL<<29))); \
-    }
+    } \
+    else if (haveLock && MEASURE_UNCORE(eventSet) && cpuid_info.model == BROADWELL) \
+    { \
+        uint64_t data = 0x0ULL; \
+        CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, &data)); \
+        data |= (1ULL<<29);\
+        VERBOSEPRINTREG(cpu_id, MSR_UNCORE_PERF_GLOBAL_CTRL, data, FREEZE_UNCORE); \
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, data)); \
+    } \
 
 #define BDW_UNFREEZE_UNCORE_AND_RESET_CTR \
-    if (haveLock && (eventSet->regTypeMask & ~(0xFULL))) \
+    if (haveLock && MEASURE_UNCORE(eventSet)) \
     { \
         for (int i=0;i < eventSet->numberOfEvents;i++) \
         { \
             RegisterIndex index = eventSet->events[i].index; \
-            RegisterType type = counter_map[index].type; \
-            if ((type < UNCORE) || (type == WBOX0FIX)) \
+            RegisterType type = eventSet->events[i].type; \
+            if ((type < UNCORE) || (type == WBOX0FIX) || (type == NOTYPE)) \
             { \
                 continue; \
             } \
@@ -983,8 +1035,7 @@ int bdw_qbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event, PciDevi
                 } \
             } \
         } \
-        VERBOSEPRINTREG(cpu_id, MSR_UNC_V3_U_PMON_GLOBAL_CTL, LLU_CAST (1ULL<<29), UNFREEZE_UNCORE); \
-        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_V3_U_PMON_GLOBAL_CTL, (1ULL<<29))); \
+        BDW_UNFREEZE_UNCORE; \
     }
 
 int perfmon_setupCounterThread_broadwell(
@@ -995,13 +1046,14 @@ int perfmon_setupCounterThread_broadwell(
     uint64_t flags;
     uint64_t fixed_flags = 0x0ULL;
     int cpu_id = groupSet->threads[thread_id].processorId;
+    print_ht_warn_once = 1;
 
     if (socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id)
     {
         haveLock = 1;
     }
     BDW_FREEZE_UNCORE;
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_AND_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
@@ -1012,7 +1064,7 @@ int perfmon_setupCounterThread_broadwell(
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -1134,9 +1186,10 @@ int perfmon_setupCounterThread_broadwell(
                 break;
         }
     }
+
     for (int i=UNCORE;i<NUM_UNITS;i++)
     {
-        if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(i))) && box_map[i].ctrlRegister != 0x0)
+        if (haveLock && TESTTYPE(eventSet, i) && box_map[i].ctrlRegister != 0x0)
         {
             VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL, CLEAR_UNCORE_BOX_CTRL);
             HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
@@ -1167,7 +1220,7 @@ int perfmon_startCountersThread_broadwell(int thread_id, PerfmonEventSet* eventS
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1229,7 +1282,7 @@ int perfmon_startCountersThread_broadwell(int thread_id, PerfmonEventSet* eventS
 
     BDW_UNFREEZE_UNCORE_AND_RESET_CTR;
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, LLU_CAST (1ULL<<63)|(1ULL<<62)|flags, CLEAR_PMC_AND_FIXED_OVERFLOW)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, (1ULL<<63)|(1ULL<<62)|flags));
@@ -1280,17 +1333,20 @@ int bdw_uncore_read(int cpu_id, RegisterIndex index, PerfmonEvent *event,
         uint64_t ovf_values = 0x0ULL;
         int global_offset = box_map[type].ovflOffset;
         int test_local = 0;
+        uint32_t global_status_reg = MSR_UNC_V3_U_PMON_GLOBAL_STATUS;
+        if (cpuid_info.model == BROADWELL)
+            global_status_reg = MSR_UNC_PERF_GLOBAL_STATUS;
         if (global_offset != -1)
         {
             CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV,
-                                           MSR_UNC_V3_U_PMON_GLOBAL_STATUS,
+                                           global_status_reg,
                                            &ovf_values));
-            VERBOSEPRINTREG(cpu_id, MSR_UNC_V3_U_PMON_GLOBAL_STATUS, LLU_CAST ovf_values, READ_GLOBAL_OVFL);
+            VERBOSEPRINTREG(cpu_id, global_status_reg, LLU_CAST ovf_values, READ_GLOBAL_OVFL);
             if (ovf_values & (1<<global_offset))
             {
-                VERBOSEPRINTREG(cpu_id, MSR_UNC_V3_U_PMON_GLOBAL_STATUS, LLU_CAST (1<<global_offset), CLEAR_GLOBAL_OVFL);
+                VERBOSEPRINTREG(cpu_id, global_status_reg, LLU_CAST (1<<global_offset), CLEAR_GLOBAL_OVFL);
                 CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV,
-                                                 MSR_UNC_V3_U_PMON_GLOBAL_STATUS,
+                                                 global_status_reg,
                                                  (1<<global_offset)));
                 test_local = 1;
             }
@@ -1334,6 +1390,19 @@ int bdw_uncore_read(int cpu_id, RegisterIndex index, PerfmonEvent *event,
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, (1ULL<<offset))); \
     }
 
+#define BDW_CHECK_LOCAL_OVERFLOW \
+    if (counter_result < eventSet->events[i].threadCounter[thread_id].counterData) \
+    { \
+        uint64_t ovf_values = 0x0ULL; \
+        uint64_t offset = getCounterTypeOffset(eventSet->events[i].index); \
+        CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, box_map[eventSet->events[i].type].statusRegister, &ovf_values)); \
+        if (ovf_values & (1ULL<<offset)) \
+        { \
+            eventSet->events[i].threadCounter[thread_id].overflows++; \
+            CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, box_map[eventSet->events[i].type].statusRegister, (1ULL<<offset))); \
+        } \
+    }
+
 int perfmon_stopCountersThread_broadwell(int thread_id, PerfmonEventSet* eventSet)
 {
     int haveLock = 0;
@@ -1345,7 +1414,7 @@ int perfmon_stopCountersThread_broadwell(int thread_id, PerfmonEventSet* eventSe
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_AND_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
@@ -1357,7 +1426,7 @@ int perfmon_stopCountersThread_broadwell(int thread_id, PerfmonEventSet* eventSe
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1384,7 +1453,7 @@ int perfmon_stopCountersThread_broadwell(int thread_id, PerfmonEventSet* eventSe
                     break;
 
                 case POWER:
-                    if (haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(POWER)))
+                    if (haveLock)
                     {
                         CHECK_POWER_READ_ERROR(power_read(cpu_id, counter1, (uint32_t*)&counter_result));
                         VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, STOP_POWER)
@@ -1509,7 +1578,7 @@ int perfmon_stopCountersThread_broadwell(int thread_id, PerfmonEventSet* eventSe
                                     counter_result = 0;
                                     break;
                             }
-
+                            
                         }
                         else if ((eventSet->events[i].event.eventId == 0x01) ||
                                  (eventSet->events[i].event.eventId == 0x02))
@@ -1543,7 +1612,7 @@ int perfmon_readCountersThread_broadwell(int thread_id, PerfmonEventSet* eventSe
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, &flags));
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, SAFE_PMC_FLAGS)
@@ -1558,7 +1627,7 @@ int perfmon_readCountersThread_broadwell(int thread_id, PerfmonEventSet* eventSe
         {
             counter_result= 0x0ULL;
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1586,7 +1655,7 @@ int perfmon_readCountersThread_broadwell(int thread_id, PerfmonEventSet* eventSe
                     break;
 
                 case POWER:
-                    if (haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(POWER)))
+                    if (haveLock)
                     {
                         CHECK_POWER_READ_ERROR(power_read(cpu_id, counter1, (uint32_t*)&counter_result));
                         VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, STOP_POWER)
@@ -1700,7 +1769,7 @@ int perfmon_readCountersThread_broadwell(int thread_id, PerfmonEventSet* eventSe
                                     counter_result = 0;
                                     break;
                             }
-
+                            
                         }
                         else if ((eventSet->events[i].event.eventId == 0x01) ||
                                  (eventSet->events[i].event.eventId == 0x02))
@@ -1718,7 +1787,7 @@ int perfmon_readCountersThread_broadwell(int thread_id, PerfmonEventSet* eventSe
         }
     }
     BDW_UNFREEZE_UNCORE;
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, RESTORE_PMC_FLAGS)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, flags));
@@ -1746,7 +1815,7 @@ int perfmon_finalizeCountersThread_broadwell(int thread_id, PerfmonEventSet* eve
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -1781,18 +1850,24 @@ int perfmon_finalizeCountersThread_broadwell(int thread_id, PerfmonEventSet* eve
             ovf_values_uncore = 0x0ULL;
             VERBOSEPRINTPCIREG(cpu_id, dev, reg, 0x0ULL, CLEAR_CTL);
             CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, reg, 0x0ULL));
+            if (type >= SBOX0 && type <= SBOX3)
+                CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, reg, 0x0ULL));
             VERBOSEPRINTPCIREG(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL, CLEAR_CTR);
             CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL));
+            if (type >= SBOX0 && type <= SBOX3)
+                CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL));
             if (counter_map[index].counterRegister2 != 0x0)
             {
                 VERBOSEPRINTPCIREG(cpu_id, dev, counter_map[index].counterRegister2, 0x0ULL, CLEAR_CTR);
                 CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister2, 0x0ULL));
+                if (type >= SBOX0 && type <= SBOX3)
+                    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister2, 0x0ULL));
             }
         }
         eventSet->events[i].threadCounter[thread_id].init = FALSE;
     }
 
-    if (haveLock && eventSet->regTypeMask & ~(0xFULL))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_UNC_V3_U_PMON_GLOBAL_STATUS, LLU_CAST ovf_values_uncore, CLEAR_UNCORE_OVF)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_V3_U_PMON_GLOBAL_STATUS, ovf_values_uncore));
@@ -1800,16 +1875,18 @@ int perfmon_finalizeCountersThread_broadwell(int thread_id, PerfmonEventSet* eve
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_V3_U_PMON_GLOBAL_CTL, 0x0ULL));
         for (int i=UNCORE;i<NUM_UNITS;i++)
         {
-            if ((eventSet->regTypeMask & (REG_TYPE_MASK(i))) && box_map[i].ctrlRegister != 0x0)
+            if (TESTTYPE(eventSet, i) && box_map[i].ctrlRegister != 0x0)
             {
                 VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL, CLEAR_UNCORE_BOX_CTRL);
                 HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
-                if (box_map[i].filterRegister1)
+                if (i >= SBOX0 && i <= SBOX3)
+                    HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
+                if (box_map[i].filterRegister1 != 0x0)
                 {
                     VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].filterRegister1, 0x0ULL, CLEAR_FILTER);
                     HPMwrite(cpu_id, box_map[i].device, box_map[i].filterRegister1, 0x0ULL);
                 }
-                if (box_map[i].filterRegister2)
+                if (box_map[i].filterRegister2 != 0x0)
                 {
                     VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].filterRegister2, 0x0ULL, CLEAR_FILTER);
                     HPMwrite(cpu_id, box_map[i].device, box_map[i].filterRegister2, 0x0ULL);
@@ -1818,7 +1895,7 @@ int perfmon_finalizeCountersThread_broadwell(int thread_id, PerfmonEventSet* eve
         }
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, LLU_CAST ovf_values_core, CLEAR_GLOBAL_OVF)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core));
diff --git a/src/includes/perfmon_broadwellEP_counters.h b/src/includes/perfmon_broadwellEP_counters.h
index 6c693e9..eefb990 100644
--- a/src/includes/perfmon_broadwellEP_counters.h
+++ b/src/includes/perfmon_broadwellEP_counters.h
@@ -5,13 +5,13 @@
  *
  *      Description:  Counter Header File of perfmon module for Broadwell EP/EN/EX.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -29,8 +29,8 @@
  */
 
 
-#define NUM_COUNTERS_BROADWELLEP 216
-#define NUM_COUNTERS_CORE_BROADWELLEP 8
+#define NUM_COUNTERS_BROADWELLEP 220
+#define NUM_COUNTERS_CORE_BROADWELLEP 12
 #define NUM_COUNTERS_UNCORE_BROADWELLEP 85
 
 #define BDW_EP_VALID_OPTIONS_FIXED EVENT_OPTION_ANYTHREAD_MASK|EVENT_OPTION_COUNT_KERNEL_MASK
@@ -61,217 +61,222 @@ static RegisterMap broadwellEP_counter_map[NUM_COUNTERS_BROADWELLEP] = {
     {"PMC1", PMC4, PMC, MSR_PERFEVTSEL1, MSR_PMC1, 0, 0, BDW_EP_VALID_OPTIONS_PMC},
     {"PMC2", PMC5, PMC, MSR_PERFEVTSEL2, MSR_PMC2, 0, 0, BDW_EP_VALID_OPTIONS_PMC|EVENT_OPTION_IN_TRANS_ABORT_MASK},
     {"PMC3", PMC6, PMC, MSR_PERFEVTSEL3, MSR_PMC3, 0, 0, BDW_EP_VALID_OPTIONS_PMC},
+    /* Additional PMC Counters: 4 48bit wide if HyperThreading is disabled*/
+    {"PMC4", PMC7, PMC, MSR_PERFEVTSEL4, MSR_PMC4, 0, 0, BDW_EP_VALID_OPTIONS_PMC},
+    {"PMC5", PMC8, PMC, MSR_PERFEVTSEL5, MSR_PMC5, 0, 0, BDW_EP_VALID_OPTIONS_PMC},
+    {"PMC6", PMC9, PMC, MSR_PERFEVTSEL6, MSR_PMC6, 0, 0, BDW_EP_VALID_OPTIONS_PMC|EVENT_OPTION_IN_TRANS_ABORT_MASK},
+    {"PMC7", PMC10, PMC, MSR_PERFEVTSEL7, MSR_PMC7, 0, 0, BDW_EP_VALID_OPTIONS_PMC},
     /* Temperature Sensor*/
-    {"TMP0", PMC7, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"TMP0", PMC11, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
     /* RAPL counters */
-    {"PWR0", PMC8, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR1", PMC9, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR2", PMC10, POWER, 0, MSR_PP1_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR3", PMC11, POWER, 0, MSR_DRAM_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
-    {"UBOX0", PMC12, UBOX, MSR_UNC_V3_U_PMON_CTL0, MSR_UNC_V3_U_PMON_CTR0,  0, 0, BDW_EP_VALID_OPTIONS_UBOX},
-    {"UBOX1", PMC13, UBOX, MSR_UNC_V3_U_PMON_CTL1, MSR_UNC_V3_U_PMON_CTR1,  0, 0, BDW_EP_VALID_OPTIONS_UBOX},
-    {"UBOXFIX", PMC14, UBOXFIX, MSR_UNC_V3_U_UCLK_FIXED_CTL, MSR_UNC_V3_U_UCLK_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"CBOX0C0", PMC15, CBOX0, MSR_UNC_V3_C0_PMON_CTL0, MSR_UNC_V3_C0_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX0C1", PMC16, CBOX0, MSR_UNC_V3_C0_PMON_CTL1, MSR_UNC_V3_C0_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX0C2", PMC17, CBOX0, MSR_UNC_V3_C0_PMON_CTL2, MSR_UNC_V3_C0_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX0C3", PMC18, CBOX0, MSR_UNC_V3_C0_PMON_CTL3, MSR_UNC_V3_C0_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX1C0", PMC19, CBOX1, MSR_UNC_V3_C1_PMON_CTL0, MSR_UNC_V3_C1_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX1C1", PMC20, CBOX1, MSR_UNC_V3_C1_PMON_CTL1, MSR_UNC_V3_C1_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX1C2", PMC21, CBOX1, MSR_UNC_V3_C1_PMON_CTL2, MSR_UNC_V3_C1_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX1C3", PMC22, CBOX1, MSR_UNC_V3_C1_PMON_CTL3, MSR_UNC_V3_C1_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX2C0", PMC23, CBOX2, MSR_UNC_V3_C2_PMON_CTL0, MSR_UNC_V3_C2_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX2C1", PMC24, CBOX2, MSR_UNC_V3_C2_PMON_CTL1, MSR_UNC_V3_C2_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX2C2", PMC25, CBOX2, MSR_UNC_V3_C2_PMON_CTL2, MSR_UNC_V3_C2_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX2C3", PMC26, CBOX2, MSR_UNC_V3_C2_PMON_CTL3, MSR_UNC_V3_C2_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX3C0", PMC27, CBOX3, MSR_UNC_V3_C3_PMON_CTL0, MSR_UNC_V3_C3_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX3C1", PMC28, CBOX3, MSR_UNC_V3_C3_PMON_CTL1, MSR_UNC_V3_C3_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX3C2", PMC29, CBOX3, MSR_UNC_V3_C3_PMON_CTL2, MSR_UNC_V3_C3_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX3C3", PMC30, CBOX3, MSR_UNC_V3_C3_PMON_CTL3, MSR_UNC_V3_C3_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX4C0", PMC31, CBOX4, MSR_UNC_V3_C4_PMON_CTL0, MSR_UNC_V3_C4_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX4C1", PMC32, CBOX4, MSR_UNC_V3_C4_PMON_CTL1, MSR_UNC_V3_C4_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX4C2", PMC33, CBOX4, MSR_UNC_V3_C4_PMON_CTL2, MSR_UNC_V3_C4_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX4C3", PMC34, CBOX4, MSR_UNC_V3_C4_PMON_CTL3, MSR_UNC_V3_C4_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX5C0", PMC35, CBOX5, MSR_UNC_V3_C5_PMON_CTL0, MSR_UNC_V3_C5_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX5C1", PMC36, CBOX5, MSR_UNC_V3_C5_PMON_CTL1, MSR_UNC_V3_C5_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX5C2", PMC37, CBOX5, MSR_UNC_V3_C5_PMON_CTL2, MSR_UNC_V3_C5_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX5C3", PMC38, CBOX5, MSR_UNC_V3_C5_PMON_CTL3, MSR_UNC_V3_C5_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX6C0", PMC39, CBOX6, MSR_UNC_V3_C6_PMON_CTL0, MSR_UNC_V3_C6_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX6C1", PMC40, CBOX6, MSR_UNC_V3_C6_PMON_CTL1, MSR_UNC_V3_C6_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX6C2", PMC41, CBOX6, MSR_UNC_V3_C6_PMON_CTL2, MSR_UNC_V3_C6_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX6C3", PMC42, CBOX6, MSR_UNC_V3_C6_PMON_CTL3, MSR_UNC_V3_C6_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX7C0", PMC43, CBOX7, MSR_UNC_V3_C7_PMON_CTL0, MSR_UNC_V3_C7_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX7C1", PMC44, CBOX7, MSR_UNC_V3_C7_PMON_CTL1, MSR_UNC_V3_C7_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX7C2", PMC45, CBOX7, MSR_UNC_V3_C7_PMON_CTL2, MSR_UNC_V3_C7_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX7C3", PMC46, CBOX7, MSR_UNC_V3_C7_PMON_CTL3, MSR_UNC_V3_C7_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX8C0", PMC47, CBOX8, MSR_UNC_V3_C8_PMON_CTL0, MSR_UNC_V3_C8_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX8C1", PMC48, CBOX8, MSR_UNC_V3_C8_PMON_CTL1, MSR_UNC_V3_C8_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX8C2", PMC49, CBOX8, MSR_UNC_V3_C8_PMON_CTL2, MSR_UNC_V3_C8_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX8C3", PMC50, CBOX8, MSR_UNC_V3_C8_PMON_CTL3, MSR_UNC_V3_C8_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX9C0", PMC51, CBOX9, MSR_UNC_V3_C9_PMON_CTL0, MSR_UNC_V3_C9_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX9C1", PMC52, CBOX9, MSR_UNC_V3_C9_PMON_CTL1, MSR_UNC_V3_C9_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX9C2", PMC53, CBOX9, MSR_UNC_V3_C9_PMON_CTL2, MSR_UNC_V3_C9_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX9C3", PMC54, CBOX9, MSR_UNC_V3_C9_PMON_CTL3, MSR_UNC_V3_C9_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX10C0", PMC55, CBOX10, MSR_UNC_V3_C10_PMON_CTL0, MSR_UNC_V3_C10_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX10C1", PMC56, CBOX10, MSR_UNC_V3_C10_PMON_CTL1, MSR_UNC_V3_C10_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX10C2", PMC57, CBOX10, MSR_UNC_V3_C10_PMON_CTL2, MSR_UNC_V3_C10_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX10C3", PMC58, CBOX10, MSR_UNC_V3_C10_PMON_CTL3, MSR_UNC_V3_C10_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX11C0", PMC59, CBOX11, MSR_UNC_V3_C11_PMON_CTL0, MSR_UNC_V3_C11_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX11C1", PMC60, CBOX11, MSR_UNC_V3_C11_PMON_CTL1, MSR_UNC_V3_C11_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX11C2", PMC61, CBOX11, MSR_UNC_V3_C11_PMON_CTL2, MSR_UNC_V3_C11_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX11C3", PMC62, CBOX11, MSR_UNC_V3_C11_PMON_CTL3, MSR_UNC_V3_C11_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX12C0", PMC63, CBOX12, MSR_UNC_V3_C12_PMON_CTL0, MSR_UNC_V3_C12_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX12C1", PMC64, CBOX12, MSR_UNC_V3_C12_PMON_CTL1, MSR_UNC_V3_C12_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX12C2", PMC65, CBOX12, MSR_UNC_V3_C12_PMON_CTL2, MSR_UNC_V3_C12_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX12C3", PMC66, CBOX12, MSR_UNC_V3_C12_PMON_CTL3, MSR_UNC_V3_C12_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX13C0", PMC67, CBOX13, MSR_UNC_V3_C13_PMON_CTL0, MSR_UNC_V3_C13_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX13C1", PMC68, CBOX13, MSR_UNC_V3_C13_PMON_CTL1, MSR_UNC_V3_C13_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX13C2", PMC69, CBOX13, MSR_UNC_V3_C13_PMON_CTL2, MSR_UNC_V3_C13_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX13C3", PMC70, CBOX13, MSR_UNC_V3_C13_PMON_CTL3, MSR_UNC_V3_C13_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX14C0", PMC71, CBOX14, MSR_UNC_V3_C14_PMON_CTL0, MSR_UNC_V3_C14_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX14C1", PMC72, CBOX14, MSR_UNC_V3_C14_PMON_CTL1, MSR_UNC_V3_C14_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX14C2", PMC73, CBOX14, MSR_UNC_V3_C14_PMON_CTL2, MSR_UNC_V3_C14_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX14C3", PMC74, CBOX14, MSR_UNC_V3_C14_PMON_CTL3, MSR_UNC_V3_C14_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX15C0", PMC75, CBOX15, MSR_UNC_V3_C15_PMON_CTL0, MSR_UNC_V3_C15_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX15C1", PMC76, CBOX15, MSR_UNC_V3_C15_PMON_CTL1, MSR_UNC_V3_C15_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX15C2", PMC77, CBOX15, MSR_UNC_V3_C15_PMON_CTL2, MSR_UNC_V3_C15_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX15C3", PMC78, CBOX15, MSR_UNC_V3_C15_PMON_CTL3, MSR_UNC_V3_C15_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX16C0", PMC79, CBOX16, MSR_UNC_V3_C16_PMON_CTL0, MSR_UNC_V3_C16_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX16C1", PMC80, CBOX16, MSR_UNC_V3_C16_PMON_CTL1, MSR_UNC_V3_C16_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX16C2", PMC81, CBOX16, MSR_UNC_V3_C16_PMON_CTL2, MSR_UNC_V3_C16_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX16C3", PMC82, CBOX16, MSR_UNC_V3_C16_PMON_CTL3, MSR_UNC_V3_C16_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX17C0", PMC83, CBOX17, MSR_UNC_V3_C17_PMON_CTL0, MSR_UNC_V3_C17_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX17C1", PMC84, CBOX17, MSR_UNC_V3_C17_PMON_CTL1, MSR_UNC_V3_C17_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX17C2", PMC85, CBOX17, MSR_UNC_V3_C17_PMON_CTL2, MSR_UNC_V3_C17_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX17C3", PMC86, CBOX17, MSR_UNC_V3_C17_PMON_CTL3, MSR_UNC_V3_C17_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX18C0", PMC87, CBOX18, MSR_UNC_V3_C18_PMON_CTL0, MSR_UNC_V3_C18_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX18C1", PMC88, CBOX18, MSR_UNC_V3_C18_PMON_CTL1, MSR_UNC_V3_C18_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX18C2", PMC89, CBOX18, MSR_UNC_V3_C18_PMON_CTL2, MSR_UNC_V3_C18_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX18C3", PMC90, CBOX18, MSR_UNC_V3_C18_PMON_CTL3, MSR_UNC_V3_C18_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX19C0", PMC91, CBOX19, MSR_UNC_V3_C19_PMON_CTL0, MSR_UNC_V3_C19_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX19C1", PMC92, CBOX19, MSR_UNC_V3_C19_PMON_CTL1, MSR_UNC_V3_C19_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX19C2", PMC93, CBOX19, MSR_UNC_V3_C19_PMON_CTL2, MSR_UNC_V3_C19_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX19C3", PMC94, CBOX19, MSR_UNC_V3_C19_PMON_CTL3, MSR_UNC_V3_C19_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX20C0", PMC95, CBOX20, MSR_UNC_V3_C20_PMON_CTL0, MSR_UNC_V3_C20_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX20C1", PMC96, CBOX20, MSR_UNC_V3_C20_PMON_CTL1, MSR_UNC_V3_C20_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX20C2", PMC97, CBOX20, MSR_UNC_V3_C20_PMON_CTL2, MSR_UNC_V3_C20_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX20C3", PMC98, CBOX20, MSR_UNC_V3_C20_PMON_CTL3, MSR_UNC_V3_C20_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX21C0", PMC99, CBOX21, MSR_UNC_V3_C21_PMON_CTL0, MSR_UNC_V3_C21_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX21C1", PMC100, CBOX21, MSR_UNC_V3_C21_PMON_CTL1, MSR_UNC_V3_C21_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX21C2", PMC101, CBOX21, MSR_UNC_V3_C21_PMON_CTL2, MSR_UNC_V3_C21_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX21C3", PMC102, CBOX21, MSR_UNC_V3_C21_PMON_CTL3, MSR_UNC_V3_C21_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX22C0", PMC103, CBOX22, MSR_UNC_V3_C22_PMON_CTL0, MSR_UNC_V3_C22_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX22C1", PMC104, CBOX22, MSR_UNC_V3_C22_PMON_CTL1, MSR_UNC_V3_C22_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX22C2", PMC105, CBOX22, MSR_UNC_V3_C22_PMON_CTL2, MSR_UNC_V3_C22_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX22C3", PMC106, CBOX22, MSR_UNC_V3_C22_PMON_CTL3, MSR_UNC_V3_C22_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX23C0", PMC107, CBOX23, MSR_UNC_V3_C23_PMON_CTL0, MSR_UNC_V3_C23_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX23C1", PMC108, CBOX23, MSR_UNC_V3_C23_PMON_CTL1, MSR_UNC_V3_C23_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX23C2", PMC109, CBOX23, MSR_UNC_V3_C23_PMON_CTL2, MSR_UNC_V3_C23_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"CBOX23C3", PMC110, CBOX23, MSR_UNC_V3_C23_PMON_CTL3, MSR_UNC_V3_C23_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
-    {"WBOX0", PMC111, WBOX, MSR_UNC_V3_PCU_PMON_CTL0, MSR_UNC_V3_PCU_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_WBOX},
-    {"WBOX1", PMC112, WBOX, MSR_UNC_V3_PCU_PMON_CTL1, MSR_UNC_V3_PCU_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_WBOX},
-    {"WBOX2", PMC113, WBOX, MSR_UNC_V3_PCU_PMON_CTL2, MSR_UNC_V3_PCU_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_WBOX},
-    {"WBOX3", PMC114, WBOX, MSR_UNC_V3_PCU_PMON_CTL3, MSR_UNC_V3_PCU_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_WBOX},
-    {"WBOX0FIX", PMC115, WBOX0FIX, 0, MSR_UNC_V3_PCU_CC3_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"WBOX1FIX", PMC116, WBOX0FIX, 0, MSR_UNC_V3_PCU_CC6_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"BBOX0C0", PMC117, BBOX0, PCI_UNC_HA_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_0, BDW_EP_VALID_OPTIONS_BBOX},
-    {"BBOX0C1", PMC118, BBOX0, PCI_UNC_HA_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_0, BDW_EP_VALID_OPTIONS_BBOX},
-    {"BBOX0C2", PMC119, BBOX0, PCI_UNC_HA_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_0, BDW_EP_VALID_OPTIONS_BBOX},
-    {"BBOX0C3", PMC120, BBOX0, PCI_UNC_HA_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_0, BDW_EP_VALID_OPTIONS_BBOX},
-    {"BBOX1C0", PMC121, BBOX1, PCI_UNC_HA_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_1, BDW_EP_VALID_OPTIONS_BBOX},
-    {"BBOX1C1", PMC122, BBOX1, PCI_UNC_HA_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_1, BDW_EP_VALID_OPTIONS_BBOX},
-    {"BBOX1C2", PMC123, BBOX1, PCI_UNC_HA_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_1, BDW_EP_VALID_OPTIONS_BBOX},
-    {"BBOX1C3", PMC124, BBOX1, PCI_UNC_HA_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_1, BDW_EP_VALID_OPTIONS_BBOX},
-    {"MBOX0C0", PMC125, MBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX0C1", PMC126, MBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX0C2", PMC127, MBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX0FIX", PMC128, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_INVERT_MASK},
-    {"MBOX0C3", PMC129, MBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX1C0", PMC130, MBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX1C1", PMC131, MBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX1C2", PMC132, MBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX1C3", PMC133, MBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX1FIX", PMC134, MBOX1FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_1, EVENT_OPTION_INVERT_MASK},
-    {"MBOX2C0", PMC135, MBOX2, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX2C1", PMC136, MBOX2, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX2C2", PMC137, MBOX2, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX2C3", PMC138, MBOX2, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX2FIX", PMC139, MBOX2FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_2, EVENT_OPTION_INVERT_MASK},
-    {"MBOX3C0", PMC140, MBOX3, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX3C1", PMC141, MBOX3, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX3C2", PMC142, MBOX3, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX3C3", PMC143, MBOX3, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX3FIX", PMC144, MBOX3FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_3, EVENT_OPTION_INVERT_MASK},
-    {"MBOX4C0", PMC145, MBOX4, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX4C1", PMC146, MBOX4, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX4C2", PMC147, MBOX4, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX4C3", PMC148, MBOX4, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX4FIX", PMC149, MBOX4FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_0, EVENT_OPTION_INVERT_MASK},
-    {"MBOX5C0", PMC150, MBOX5, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX5C1", PMC151, MBOX5, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX5C2", PMC152, MBOX5, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX5C3", PMC153, MBOX5, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX5FIX", PMC154, MBOX5FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_1, EVENT_OPTION_INVERT_MASK},
-    {"MBOX6C0", PMC155, MBOX6, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX6C1", PMC156, MBOX6, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX6C2", PMC157, MBOX6, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX6C3", PMC158, MBOX6, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX6FIX", PMC159, MBOX6FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_2, EVENT_OPTION_INVERT_MASK},
-    {"MBOX7C0", PMC160, MBOX7, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX7C1", PMC161, MBOX7, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX7C2", PMC162, MBOX7, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX7C3", PMC163, MBOX7, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
-    {"MBOX7FIX", PMC164, MBOX7FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_3, EVENT_OPTION_INVERT_MASK},
-    {"IBOX0C0", PMC165, IBOX0, PCI_UNC_IRP0_PMON_CTL_0, PCI_UNC_IRP0_PMON_CTR_0, 0, PCI_IRP_DEVICE, BDW_EP_VALID_OPTIONS_IBOX},
-    {"IBOX0C1", PMC166, IBOX0, PCI_UNC_IRP0_PMON_CTL_1, PCI_UNC_IRP0_PMON_CTR_1, 0, PCI_IRP_DEVICE, BDW_EP_VALID_OPTIONS_IBOX},
-    {"IBOX1C0", PMC167, IBOX1, PCI_UNC_IRP1_PMON_CTL_0, PCI_UNC_IRP1_PMON_CTR_0, 0, PCI_IRP_DEVICE, BDW_EP_VALID_OPTIONS_IBOX},
-    {"IBOX1C1", PMC168, IBOX1, PCI_UNC_IRP1_PMON_CTL_1, PCI_UNC_IRP1_PMON_CTR_1, 0, PCI_IRP_DEVICE, BDW_EP_VALID_OPTIONS_IBOX},
-    {"PBOX0", PMC169, PBOX, PCI_UNC_R2PCIE_PMON_CTL_0, PCI_UNC_R2PCIE_PMON_CTR_0_A, PCI_UNC_R2PCIE_PMON_CTR_0_B, PCI_R2PCIE_DEVICE, BDW_EP_VALID_OPTIONS_PBOX},
-    {"PBOX1", PMC170, PBOX, PCI_UNC_R2PCIE_PMON_CTL_1, PCI_UNC_R2PCIE_PMON_CTR_1_A, PCI_UNC_R2PCIE_PMON_CTR_1_B, PCI_R2PCIE_DEVICE, BDW_EP_VALID_OPTIONS_PBOX},
-    {"PBOX2", PMC171, PBOX, PCI_UNC_R2PCIE_PMON_CTL_2, PCI_UNC_R2PCIE_PMON_CTR_2_A, PCI_UNC_R2PCIE_PMON_CTR_2_B, PCI_R2PCIE_DEVICE, BDW_EP_VALID_OPTIONS_PBOX},
-    {"PBOX3", PMC172, PBOX, PCI_UNC_R2PCIE_PMON_CTL_3, PCI_UNC_R2PCIE_PMON_CTR_3_A, PCI_UNC_R2PCIE_PMON_CTR_3_B, PCI_R2PCIE_DEVICE, BDW_EP_VALID_OPTIONS_PBOX},
-    {"RBOX0C0", PMC173, RBOX0, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_0, BDW_EP_VALID_OPTIONS_RBOX},
-    {"RBOX0C1", PMC174, RBOX0, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_0, BDW_EP_VALID_OPTIONS_RBOX},
-    {"RBOX0C2", PMC175, RBOX0, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_0, BDW_EP_VALID_OPTIONS_RBOX},
-    {"RBOX1C0", PMC176, RBOX1, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_1, BDW_EP_VALID_OPTIONS_RBOX},
-    {"RBOX1C1", PMC177, RBOX1, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_1, BDW_EP_VALID_OPTIONS_RBOX},
-    {"RBOX1C2", PMC178, RBOX1, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_1, BDW_EP_VALID_OPTIONS_RBOX},
-    {"SBOX0C0", PMC179, SBOX0, MSR_UNC_V3_S0_PMON_CTL_0, MSR_UNC_V3_S0_PMON_CTR_0, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"SBOX0C1", PMC180, SBOX0, MSR_UNC_V3_S0_PMON_CTL_1, MSR_UNC_V3_S0_PMON_CTR_1, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"SBOX0C2", PMC181, SBOX0, MSR_UNC_V3_S0_PMON_CTL_2, MSR_UNC_V3_S0_PMON_CTR_2, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"SBOX0C3", PMC182, SBOX0, MSR_UNC_V3_S0_PMON_CTL_3, MSR_UNC_V3_S0_PMON_CTR_3, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"SBOX1C0", PMC183, SBOX1, MSR_UNC_V3_S1_PMON_CTL_0, MSR_UNC_V3_S1_PMON_CTR_0, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"SBOX1C1", PMC184, SBOX1, MSR_UNC_V3_S1_PMON_CTL_1, MSR_UNC_V3_S1_PMON_CTR_1, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"SBOX1C2", PMC185, SBOX1, MSR_UNC_V3_S1_PMON_CTL_2, MSR_UNC_V3_S1_PMON_CTR_2, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"SBOX1C3", PMC186, SBOX1, MSR_UNC_V3_S1_PMON_CTL_3, MSR_UNC_V3_S1_PMON_CTR_3, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"SBOX2C0", PMC187, SBOX2, MSR_UNC_V3_S2_PMON_CTL_0, MSR_UNC_V3_S2_PMON_CTR_0, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"SBOX2C1", PMC188, SBOX2, MSR_UNC_V3_S2_PMON_CTL_1, MSR_UNC_V3_S2_PMON_CTR_1, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"SBOX2C2", PMC189, SBOX2, MSR_UNC_V3_S2_PMON_CTL_2, MSR_UNC_V3_S2_PMON_CTR_2, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"SBOX2C3", PMC190, SBOX2, MSR_UNC_V3_S2_PMON_CTL_3, MSR_UNC_V3_S2_PMON_CTR_3, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"SBOX3C0", PMC191, SBOX3, MSR_UNC_V3_S3_PMON_CTL_0, MSR_UNC_V3_S3_PMON_CTR_0, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"SBOX3C1", PMC192, SBOX3, MSR_UNC_V3_S3_PMON_CTL_1, MSR_UNC_V3_S3_PMON_CTR_1, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"SBOX3C2", PMC193, SBOX3, MSR_UNC_V3_S3_PMON_CTL_2, MSR_UNC_V3_S3_PMON_CTR_2, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"SBOX3C3", PMC194, SBOX3, MSR_UNC_V3_S3_PMON_CTL_3, MSR_UNC_V3_S3_PMON_CTR_3, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
-    {"QBOX0C0", PMC195, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_0, PCI_UNC_V3_QPI_PMON_CTR_0_A, PCI_UNC_V3_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_0, BDW_EP_VALID_OPTIONS_QBOX},
-    {"QBOX0C1", PMC196, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_1, PCI_UNC_V3_QPI_PMON_CTR_1_A, PCI_UNC_V3_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_0, BDW_EP_VALID_OPTIONS_QBOX},
-    {"QBOX0C2", PMC197, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_2, PCI_UNC_V3_QPI_PMON_CTR_2_A, PCI_UNC_V3_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_0, BDW_EP_VALID_OPTIONS_QBOX},
-    {"QBOX0C3", PMC198, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_3, PCI_UNC_V3_QPI_PMON_CTR_3_A, PCI_UNC_V3_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_0, BDW_EP_VALID_OPTIONS_QBOX},
-    {"QBOX1C0", PMC199, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_0, PCI_UNC_V3_QPI_PMON_CTR_0_A, PCI_UNC_V3_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_1, BDW_EP_VALID_OPTIONS_QBOX},
-    {"QBOX1C1", PMC200, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_1, PCI_UNC_V3_QPI_PMON_CTR_1_A, PCI_UNC_V3_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_1, BDW_EP_VALID_OPTIONS_QBOX},
-    {"QBOX1C2", PMC201, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_2, PCI_UNC_V3_QPI_PMON_CTR_2_A, PCI_UNC_V3_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_1, BDW_EP_VALID_OPTIONS_QBOX},
-    {"QBOX1C3", PMC202, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_3, PCI_UNC_V3_QPI_PMON_CTR_3_A, PCI_UNC_V3_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_1, BDW_EP_VALID_OPTIONS_QBOX},
-    {"QBOX2C0", PMC203, QBOX2, PCI_UNC_V3_QPI_PMON_CTL_0, PCI_UNC_V3_QPI_PMON_CTR_0_A, PCI_UNC_V3_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_2, BDW_EP_VALID_OPTIONS_QBOX},
-    {"QBOX2C1", PMC204, QBOX2, PCI_UNC_V3_QPI_PMON_CTL_1, PCI_UNC_V3_QPI_PMON_CTR_1_A, PCI_UNC_V3_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_2, BDW_EP_VALID_OPTIONS_QBOX},
-    {"QBOX2C2", PMC205, QBOX2, PCI_UNC_V3_QPI_PMON_CTL_2, PCI_UNC_V3_QPI_PMON_CTR_2_A, PCI_UNC_V3_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_2, BDW_EP_VALID_OPTIONS_QBOX},
-    {"QBOX2C3", PMC206, QBOX2, PCI_UNC_V3_QPI_PMON_CTL_3, PCI_UNC_V3_QPI_PMON_CTR_3_A, PCI_UNC_V3_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_2, BDW_EP_VALID_OPTIONS_QBOX},
-    {"QBOX0FIX0", PMC207, QBOX0FIX, 0x0, PCI_UNC_V3_QPI_RATE_STATUS, 0x0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
-    {"QBOX0FIX1", PMC208, QBOX0FIX, 0x0, PCI_UNC_V3_QPI_LINK_IDLE, 0x0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
-    {"QBOX0FIX2", PMC209, QBOX0FIX, 0x0, PCI_UNC_V3_QPI_LINK_LLR, 0x0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
-    {"QBOX1FIX0", PMC210, QBOX1FIX, 0x0, PCI_UNC_V3_QPI_RATE_STATUS, 0x0, PCI_QPI_MISC_DEVICE_PORT_1, EVENT_OPTION_NONE_MASK},
-    {"QBOX1FIX1", PMC211, QBOX1FIX, 0x0, PCI_UNC_V3_QPI_LINK_IDLE, 0x0, PCI_QPI_MISC_DEVICE_PORT_1, EVENT_OPTION_NONE_MASK},
-    {"QBOX1FIX2", PMC212, QBOX1FIX, 0x0, PCI_UNC_V3_QPI_LINK_LLR, 0x0, PCI_QPI_MISC_DEVICE_PORT_1, EVENT_OPTION_NONE_MASK},
-    {"QBOX2FIX0", PMC213, QBOX2FIX, 0x0, PCI_UNC_V3_QPI_RATE_STATUS, 0x0, PCI_QPI_MISC_DEVICE_PORT_2, EVENT_OPTION_NONE_MASK},
-    {"QBOX2FIX1", PMC214, QBOX2FIX, 0x0, PCI_UNC_V3_QPI_LINK_IDLE, 0x0, PCI_QPI_MISC_DEVICE_PORT_2, EVENT_OPTION_NONE_MASK},
-    {"QBOX2FIX2", PMC215, QBOX2FIX, 0x0, PCI_UNC_V3_QPI_LINK_LLR, 0x0, PCI_QPI_MISC_DEVICE_PORT_2, EVENT_OPTION_NONE_MASK},
+    {"PWR0", PMC12, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR1", PMC13, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR2", PMC14, POWER, 0, MSR_PP1_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR3", PMC15, POWER, 0, MSR_DRAM_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
+    {"UBOX0", PMC16, UBOX, MSR_UNC_V3_U_PMON_CTL0, MSR_UNC_V3_U_PMON_CTR0,  0, 0, BDW_EP_VALID_OPTIONS_UBOX},
+    {"UBOX1", PMC17, UBOX, MSR_UNC_V3_U_PMON_CTL1, MSR_UNC_V3_U_PMON_CTR1,  0, 0, BDW_EP_VALID_OPTIONS_UBOX},
+    {"UBOXFIX", PMC18, UBOXFIX, MSR_UNC_V3_U_UCLK_FIXED_CTL, MSR_UNC_V3_U_UCLK_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"CBOX0C0", PMC19, CBOX0, MSR_UNC_V3_C0_PMON_CTL0, MSR_UNC_V3_C0_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX0C1", PMC20, CBOX0, MSR_UNC_V3_C0_PMON_CTL1, MSR_UNC_V3_C0_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX0C2", PMC21, CBOX0, MSR_UNC_V3_C0_PMON_CTL2, MSR_UNC_V3_C0_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX0C3", PMC22, CBOX0, MSR_UNC_V3_C0_PMON_CTL3, MSR_UNC_V3_C0_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX1C0", PMC23, CBOX1, MSR_UNC_V3_C1_PMON_CTL0, MSR_UNC_V3_C1_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX1C1", PMC24, CBOX1, MSR_UNC_V3_C1_PMON_CTL1, MSR_UNC_V3_C1_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX1C2", PMC25, CBOX1, MSR_UNC_V3_C1_PMON_CTL2, MSR_UNC_V3_C1_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX1C3", PMC26, CBOX1, MSR_UNC_V3_C1_PMON_CTL3, MSR_UNC_V3_C1_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX2C0", PMC27, CBOX2, MSR_UNC_V3_C2_PMON_CTL0, MSR_UNC_V3_C2_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX2C1", PMC28, CBOX2, MSR_UNC_V3_C2_PMON_CTL1, MSR_UNC_V3_C2_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX2C2", PMC29, CBOX2, MSR_UNC_V3_C2_PMON_CTL2, MSR_UNC_V3_C2_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX2C3", PMC30, CBOX2, MSR_UNC_V3_C2_PMON_CTL3, MSR_UNC_V3_C2_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX3C0", PMC31, CBOX3, MSR_UNC_V3_C3_PMON_CTL0, MSR_UNC_V3_C3_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX3C1", PMC32, CBOX3, MSR_UNC_V3_C3_PMON_CTL1, MSR_UNC_V3_C3_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX3C2", PMC33, CBOX3, MSR_UNC_V3_C3_PMON_CTL2, MSR_UNC_V3_C3_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX3C3", PMC34, CBOX3, MSR_UNC_V3_C3_PMON_CTL3, MSR_UNC_V3_C3_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX4C0", PMC35, CBOX4, MSR_UNC_V3_C4_PMON_CTL0, MSR_UNC_V3_C4_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX4C1", PMC36, CBOX4, MSR_UNC_V3_C4_PMON_CTL1, MSR_UNC_V3_C4_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX4C2", PMC37, CBOX4, MSR_UNC_V3_C4_PMON_CTL2, MSR_UNC_V3_C4_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX4C3", PMC38, CBOX4, MSR_UNC_V3_C4_PMON_CTL3, MSR_UNC_V3_C4_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX5C0", PMC39, CBOX5, MSR_UNC_V3_C5_PMON_CTL0, MSR_UNC_V3_C5_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX5C1", PMC40, CBOX5, MSR_UNC_V3_C5_PMON_CTL1, MSR_UNC_V3_C5_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX5C2", PMC41, CBOX5, MSR_UNC_V3_C5_PMON_CTL2, MSR_UNC_V3_C5_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX5C3", PMC42, CBOX5, MSR_UNC_V3_C5_PMON_CTL3, MSR_UNC_V3_C5_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX6C0", PMC43, CBOX6, MSR_UNC_V3_C6_PMON_CTL0, MSR_UNC_V3_C6_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX6C1", PMC44, CBOX6, MSR_UNC_V3_C6_PMON_CTL1, MSR_UNC_V3_C6_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX6C2", PMC45, CBOX6, MSR_UNC_V3_C6_PMON_CTL2, MSR_UNC_V3_C6_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX6C3", PMC46, CBOX6, MSR_UNC_V3_C6_PMON_CTL3, MSR_UNC_V3_C6_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX7C0", PMC47, CBOX7, MSR_UNC_V3_C7_PMON_CTL0, MSR_UNC_V3_C7_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX7C1", PMC48, CBOX7, MSR_UNC_V3_C7_PMON_CTL1, MSR_UNC_V3_C7_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX7C2", PMC49, CBOX7, MSR_UNC_V3_C7_PMON_CTL2, MSR_UNC_V3_C7_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX7C3", PMC50, CBOX7, MSR_UNC_V3_C7_PMON_CTL3, MSR_UNC_V3_C7_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX8C0", PMC51, CBOX8, MSR_UNC_V3_C8_PMON_CTL0, MSR_UNC_V3_C8_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX8C1", PMC52, CBOX8, MSR_UNC_V3_C8_PMON_CTL1, MSR_UNC_V3_C8_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX8C2", PMC53, CBOX8, MSR_UNC_V3_C8_PMON_CTL2, MSR_UNC_V3_C8_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX8C3", PMC54, CBOX8, MSR_UNC_V3_C8_PMON_CTL3, MSR_UNC_V3_C8_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX9C0", PMC55, CBOX9, MSR_UNC_V3_C9_PMON_CTL0, MSR_UNC_V3_C9_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX9C1", PMC56, CBOX9, MSR_UNC_V3_C9_PMON_CTL1, MSR_UNC_V3_C9_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX9C2", PMC57, CBOX9, MSR_UNC_V3_C9_PMON_CTL2, MSR_UNC_V3_C9_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX9C3", PMC58, CBOX9, MSR_UNC_V3_C9_PMON_CTL3, MSR_UNC_V3_C9_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX10C0", PMC59, CBOX10, MSR_UNC_V3_C10_PMON_CTL0, MSR_UNC_V3_C10_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX10C1", PMC60, CBOX10, MSR_UNC_V3_C10_PMON_CTL1, MSR_UNC_V3_C10_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX10C2", PMC61, CBOX10, MSR_UNC_V3_C10_PMON_CTL2, MSR_UNC_V3_C10_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX10C3", PMC62, CBOX10, MSR_UNC_V3_C10_PMON_CTL3, MSR_UNC_V3_C10_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX11C0", PMC63, CBOX11, MSR_UNC_V3_C11_PMON_CTL0, MSR_UNC_V3_C11_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX11C1", PMC64, CBOX11, MSR_UNC_V3_C11_PMON_CTL1, MSR_UNC_V3_C11_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX11C2", PMC65, CBOX11, MSR_UNC_V3_C11_PMON_CTL2, MSR_UNC_V3_C11_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX11C3", PMC66, CBOX11, MSR_UNC_V3_C11_PMON_CTL3, MSR_UNC_V3_C11_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX12C0", PMC67, CBOX12, MSR_UNC_V3_C12_PMON_CTL0, MSR_UNC_V3_C12_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX12C1", PMC68, CBOX12, MSR_UNC_V3_C12_PMON_CTL1, MSR_UNC_V3_C12_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX12C2", PMC69, CBOX12, MSR_UNC_V3_C12_PMON_CTL2, MSR_UNC_V3_C12_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX12C3", PMC70, CBOX12, MSR_UNC_V3_C12_PMON_CTL3, MSR_UNC_V3_C12_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX13C0", PMC71, CBOX13, MSR_UNC_V3_C13_PMON_CTL0, MSR_UNC_V3_C13_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX13C1", PMC72, CBOX13, MSR_UNC_V3_C13_PMON_CTL1, MSR_UNC_V3_C13_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX13C2", PMC73, CBOX13, MSR_UNC_V3_C13_PMON_CTL2, MSR_UNC_V3_C13_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX13C3", PMC74, CBOX13, MSR_UNC_V3_C13_PMON_CTL3, MSR_UNC_V3_C13_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX14C0", PMC75, CBOX14, MSR_UNC_V3_C14_PMON_CTL0, MSR_UNC_V3_C14_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX14C1", PMC76, CBOX14, MSR_UNC_V3_C14_PMON_CTL1, MSR_UNC_V3_C14_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX14C2", PMC77, CBOX14, MSR_UNC_V3_C14_PMON_CTL2, MSR_UNC_V3_C14_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX14C3", PMC78, CBOX14, MSR_UNC_V3_C14_PMON_CTL3, MSR_UNC_V3_C14_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX15C0", PMC79, CBOX15, MSR_UNC_V3_C15_PMON_CTL0, MSR_UNC_V3_C15_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX15C1", PMC80, CBOX15, MSR_UNC_V3_C15_PMON_CTL1, MSR_UNC_V3_C15_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX15C2", PMC81, CBOX15, MSR_UNC_V3_C15_PMON_CTL2, MSR_UNC_V3_C15_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX15C3", PMC82, CBOX15, MSR_UNC_V3_C15_PMON_CTL3, MSR_UNC_V3_C15_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX16C0", PMC83, CBOX16, MSR_UNC_V3_C16_PMON_CTL0, MSR_UNC_V3_C16_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX16C1", PMC84, CBOX16, MSR_UNC_V3_C16_PMON_CTL1, MSR_UNC_V3_C16_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX16C2", PMC85, CBOX16, MSR_UNC_V3_C16_PMON_CTL2, MSR_UNC_V3_C16_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX16C3", PMC86, CBOX16, MSR_UNC_V3_C16_PMON_CTL3, MSR_UNC_V3_C16_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX17C0", PMC87, CBOX17, MSR_UNC_V3_C17_PMON_CTL0, MSR_UNC_V3_C17_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX17C1", PMC88, CBOX17, MSR_UNC_V3_C17_PMON_CTL1, MSR_UNC_V3_C17_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX17C2", PMC89, CBOX17, MSR_UNC_V3_C17_PMON_CTL2, MSR_UNC_V3_C17_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX17C3", PMC90, CBOX17, MSR_UNC_V3_C17_PMON_CTL3, MSR_UNC_V3_C17_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX18C0", PMC91, CBOX18, MSR_UNC_V3_C18_PMON_CTL0, MSR_UNC_V3_C18_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX18C1", PMC92, CBOX18, MSR_UNC_V3_C18_PMON_CTL1, MSR_UNC_V3_C18_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX18C2", PMC93, CBOX18, MSR_UNC_V3_C18_PMON_CTL2, MSR_UNC_V3_C18_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX18C3", PMC94, CBOX18, MSR_UNC_V3_C18_PMON_CTL3, MSR_UNC_V3_C18_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX19C0", PMC95, CBOX19, MSR_UNC_V3_C19_PMON_CTL0, MSR_UNC_V3_C19_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX19C1", PMC96, CBOX19, MSR_UNC_V3_C19_PMON_CTL1, MSR_UNC_V3_C19_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX19C2", PMC97, CBOX19, MSR_UNC_V3_C19_PMON_CTL2, MSR_UNC_V3_C19_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX19C3", PMC98, CBOX19, MSR_UNC_V3_C19_PMON_CTL3, MSR_UNC_V3_C19_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX20C0", PMC99, CBOX20, MSR_UNC_V3_C20_PMON_CTL0, MSR_UNC_V3_C20_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX20C1", PMC100, CBOX20, MSR_UNC_V3_C20_PMON_CTL1, MSR_UNC_V3_C20_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX20C2", PMC101, CBOX20, MSR_UNC_V3_C20_PMON_CTL2, MSR_UNC_V3_C20_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX20C3", PMC102, CBOX20, MSR_UNC_V3_C20_PMON_CTL3, MSR_UNC_V3_C20_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX21C0", PMC103, CBOX21, MSR_UNC_V3_C21_PMON_CTL0, MSR_UNC_V3_C21_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX21C1", PMC104, CBOX21, MSR_UNC_V3_C21_PMON_CTL1, MSR_UNC_V3_C21_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX21C2", PMC105, CBOX21, MSR_UNC_V3_C21_PMON_CTL2, MSR_UNC_V3_C21_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX21C3", PMC106, CBOX21, MSR_UNC_V3_C21_PMON_CTL3, MSR_UNC_V3_C21_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX22C0", PMC107, CBOX22, MSR_UNC_V3_C22_PMON_CTL0, MSR_UNC_V3_C22_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX22C1", PMC108, CBOX22, MSR_UNC_V3_C22_PMON_CTL1, MSR_UNC_V3_C22_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX22C2", PMC109, CBOX22, MSR_UNC_V3_C22_PMON_CTL2, MSR_UNC_V3_C22_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX22C3", PMC110, CBOX22, MSR_UNC_V3_C22_PMON_CTL3, MSR_UNC_V3_C22_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX23C0", PMC111, CBOX23, MSR_UNC_V3_C23_PMON_CTL0, MSR_UNC_V3_C23_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX23C1", PMC112, CBOX23, MSR_UNC_V3_C23_PMON_CTL1, MSR_UNC_V3_C23_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX23C2", PMC113, CBOX23, MSR_UNC_V3_C23_PMON_CTL2, MSR_UNC_V3_C23_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"CBOX23C3", PMC114, CBOX23, MSR_UNC_V3_C23_PMON_CTL3, MSR_UNC_V3_C23_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_CBOX},
+    {"WBOX0", PMC115, WBOX, MSR_UNC_V3_PCU_PMON_CTL0, MSR_UNC_V3_PCU_PMON_CTR0, 0, 0, BDW_EP_VALID_OPTIONS_WBOX},
+    {"WBOX1", PMC116, WBOX, MSR_UNC_V3_PCU_PMON_CTL1, MSR_UNC_V3_PCU_PMON_CTR1, 0, 0, BDW_EP_VALID_OPTIONS_WBOX},
+    {"WBOX2", PMC117, WBOX, MSR_UNC_V3_PCU_PMON_CTL2, MSR_UNC_V3_PCU_PMON_CTR2, 0, 0, BDW_EP_VALID_OPTIONS_WBOX},
+    {"WBOX3", PMC118, WBOX, MSR_UNC_V3_PCU_PMON_CTL3, MSR_UNC_V3_PCU_PMON_CTR3, 0, 0, BDW_EP_VALID_OPTIONS_WBOX},
+    {"WBOX0FIX", PMC119, WBOX0FIX, 0, MSR_UNC_V3_PCU_CC3_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"WBOX1FIX", PMC120, WBOX0FIX, 0, MSR_UNC_V3_PCU_CC6_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"BBOX0C0", PMC121, BBOX0, PCI_UNC_HA_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_0, BDW_EP_VALID_OPTIONS_BBOX},
+    {"BBOX0C1", PMC122, BBOX0, PCI_UNC_HA_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_0, BDW_EP_VALID_OPTIONS_BBOX},
+    {"BBOX0C2", PMC123, BBOX0, PCI_UNC_HA_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_0, BDW_EP_VALID_OPTIONS_BBOX},
+    {"BBOX0C3", PMC124, BBOX0, PCI_UNC_HA_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_0, BDW_EP_VALID_OPTIONS_BBOX},
+    {"BBOX1C0", PMC125, BBOX1, PCI_UNC_HA_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_1, BDW_EP_VALID_OPTIONS_BBOX},
+    {"BBOX1C1", PMC126, BBOX1, PCI_UNC_HA_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_1, BDW_EP_VALID_OPTIONS_BBOX},
+    {"BBOX1C2", PMC127, BBOX1, PCI_UNC_HA_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_1, BDW_EP_VALID_OPTIONS_BBOX},
+    {"BBOX1C3", PMC128, BBOX1, PCI_UNC_HA_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_1, BDW_EP_VALID_OPTIONS_BBOX},
+    {"MBOX0C0", PMC129, MBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX0C1", PMC130, MBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX0C2", PMC131, MBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX0FIX", PMC132, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_INVERT_MASK},
+    {"MBOX0C3", PMC133, MBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX1C0", PMC134, MBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX1C1", PMC135, MBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX1C2", PMC136, MBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX1C3", PMC137, MBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX1FIX", PMC138, MBOX1FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_1, EVENT_OPTION_INVERT_MASK},
+    {"MBOX2C0", PMC139, MBOX2, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX2C1", PMC140, MBOX2, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX2C2", PMC141, MBOX2, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX2C3", PMC142, MBOX2, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX2FIX", PMC143, MBOX2FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_2, EVENT_OPTION_INVERT_MASK},
+    {"MBOX3C0", PMC144, MBOX3, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX3C1", PMC145, MBOX3, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX3C2", PMC146, MBOX3, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX3C3", PMC147, MBOX3, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX3FIX", PMC148, MBOX3FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_3, EVENT_OPTION_INVERT_MASK},
+    {"MBOX4C0", PMC149, MBOX4, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX4C1", PMC150, MBOX4, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX4C2", PMC151, MBOX4, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX4C3", PMC152, MBOX4, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_0, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX4FIX", PMC153, MBOX4FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_0, EVENT_OPTION_INVERT_MASK},
+    {"MBOX5C0", PMC154, MBOX5, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX5C1", PMC155, MBOX5, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX5C2", PMC156, MBOX5, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX5C3", PMC157, MBOX5, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_1, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX5FIX", PMC158, MBOX5FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_1, EVENT_OPTION_INVERT_MASK},
+    {"MBOX6C0", PMC159, MBOX6, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX6C1", PMC160, MBOX6, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX6C2", PMC161, MBOX6, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX6C3", PMC162, MBOX6, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_2, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX6FIX", PMC163, MBOX6FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_2, EVENT_OPTION_INVERT_MASK},
+    {"MBOX7C0", PMC164, MBOX7, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX7C1", PMC165, MBOX7, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX7C2", PMC166, MBOX7, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX7C3", PMC167, MBOX7, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_3, BDW_EP_VALID_OPTIONS_MBOX},
+    {"MBOX7FIX", PMC168, MBOX7FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_3, EVENT_OPTION_INVERT_MASK},
+    {"IBOX0C0", PMC169, IBOX0, PCI_UNC_IRP0_PMON_CTL_0, PCI_UNC_IRP0_PMON_CTR_0, 0, PCI_IRP_DEVICE, BDW_EP_VALID_OPTIONS_IBOX},
+    {"IBOX0C1", PMC170, IBOX0, PCI_UNC_IRP0_PMON_CTL_1, PCI_UNC_IRP0_PMON_CTR_1, 0, PCI_IRP_DEVICE, BDW_EP_VALID_OPTIONS_IBOX},
+    {"IBOX1C0", PMC171, IBOX1, PCI_UNC_IRP1_PMON_CTL_0, PCI_UNC_IRP1_PMON_CTR_0, 0, PCI_IRP_DEVICE, BDW_EP_VALID_OPTIONS_IBOX},
+    {"IBOX1C1", PMC172, IBOX1, PCI_UNC_IRP1_PMON_CTL_1, PCI_UNC_IRP1_PMON_CTR_1, 0, PCI_IRP_DEVICE, BDW_EP_VALID_OPTIONS_IBOX},
+    {"PBOX0", PMC173, PBOX, PCI_UNC_R2PCIE_PMON_CTL_0, PCI_UNC_R2PCIE_PMON_CTR_0_A, PCI_UNC_R2PCIE_PMON_CTR_0_B, PCI_R2PCIE_DEVICE, BDW_EP_VALID_OPTIONS_PBOX},
+    {"PBOX1", PMC174, PBOX, PCI_UNC_R2PCIE_PMON_CTL_1, PCI_UNC_R2PCIE_PMON_CTR_1_A, PCI_UNC_R2PCIE_PMON_CTR_1_B, PCI_R2PCIE_DEVICE, BDW_EP_VALID_OPTIONS_PBOX},
+    {"PBOX2", PMC175, PBOX, PCI_UNC_R2PCIE_PMON_CTL_2, PCI_UNC_R2PCIE_PMON_CTR_2_A, PCI_UNC_R2PCIE_PMON_CTR_2_B, PCI_R2PCIE_DEVICE, BDW_EP_VALID_OPTIONS_PBOX},
+    {"PBOX3", PMC176, PBOX, PCI_UNC_R2PCIE_PMON_CTL_3, PCI_UNC_R2PCIE_PMON_CTR_3_A, PCI_UNC_R2PCIE_PMON_CTR_3_B, PCI_R2PCIE_DEVICE, BDW_EP_VALID_OPTIONS_PBOX},
+    {"RBOX0C0", PMC177, RBOX0, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_0, BDW_EP_VALID_OPTIONS_RBOX},
+    {"RBOX0C1", PMC178, RBOX0, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_0, BDW_EP_VALID_OPTIONS_RBOX},
+    {"RBOX0C2", PMC179, RBOX0, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_0, BDW_EP_VALID_OPTIONS_RBOX},
+    {"RBOX1C0", PMC180, RBOX1, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_1, BDW_EP_VALID_OPTIONS_RBOX},
+    {"RBOX1C1", PMC181, RBOX1, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_1, BDW_EP_VALID_OPTIONS_RBOX},
+    {"RBOX1C2", PMC182, RBOX1, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_1, BDW_EP_VALID_OPTIONS_RBOX},
+    {"SBOX0C0", PMC183, SBOX0, MSR_UNC_V3_S0_PMON_CTL_0, MSR_UNC_V3_S0_PMON_CTR_0, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"SBOX0C1", PMC184, SBOX0, MSR_UNC_V3_S0_PMON_CTL_1, MSR_UNC_V3_S0_PMON_CTR_1, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"SBOX0C2", PMC185, SBOX0, MSR_UNC_V3_S0_PMON_CTL_2, MSR_UNC_V3_S0_PMON_CTR_2, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"SBOX0C3", PMC186, SBOX0, MSR_UNC_V3_S0_PMON_CTL_3, MSR_UNC_V3_S0_PMON_CTR_3, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"SBOX1C0", PMC187, SBOX1, MSR_UNC_V3_S1_PMON_CTL_0, MSR_UNC_V3_S1_PMON_CTR_0, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"SBOX1C1", PMC188, SBOX1, MSR_UNC_V3_S1_PMON_CTL_1, MSR_UNC_V3_S1_PMON_CTR_1, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"SBOX1C2", PMC189, SBOX1, MSR_UNC_V3_S1_PMON_CTL_2, MSR_UNC_V3_S1_PMON_CTR_2, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"SBOX1C3", PMC190, SBOX1, MSR_UNC_V3_S1_PMON_CTL_3, MSR_UNC_V3_S1_PMON_CTR_3, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"SBOX2C0", PMC191, SBOX2, MSR_UNC_V3_S2_PMON_CTL_0, MSR_UNC_V3_S2_PMON_CTR_0, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"SBOX2C1", PMC192, SBOX2, MSR_UNC_V3_S2_PMON_CTL_1, MSR_UNC_V3_S2_PMON_CTR_1, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"SBOX2C2", PMC193, SBOX2, MSR_UNC_V3_S2_PMON_CTL_2, MSR_UNC_V3_S2_PMON_CTR_2, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"SBOX2C3", PMC194, SBOX2, MSR_UNC_V3_S2_PMON_CTL_3, MSR_UNC_V3_S2_PMON_CTR_3, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"SBOX3C0", PMC195, SBOX3, MSR_UNC_V3_S3_PMON_CTL_0, MSR_UNC_V3_S3_PMON_CTR_0, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"SBOX3C1", PMC196, SBOX3, MSR_UNC_V3_S3_PMON_CTL_1, MSR_UNC_V3_S3_PMON_CTR_1, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"SBOX3C2", PMC197, SBOX3, MSR_UNC_V3_S3_PMON_CTL_2, MSR_UNC_V3_S3_PMON_CTR_2, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"SBOX3C3", PMC198, SBOX3, MSR_UNC_V3_S3_PMON_CTL_3, MSR_UNC_V3_S3_PMON_CTR_3, 0, 0, BDW_EP_VALID_OPTIONS_SBOX},
+    {"QBOX0C0", PMC199, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_0, PCI_UNC_V3_QPI_PMON_CTR_0_A, PCI_UNC_V3_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_0, BDW_EP_VALID_OPTIONS_QBOX},
+    {"QBOX0C1", PMC200, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_1, PCI_UNC_V3_QPI_PMON_CTR_1_A, PCI_UNC_V3_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_0, BDW_EP_VALID_OPTIONS_QBOX},
+    {"QBOX0C2", PMC201, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_2, PCI_UNC_V3_QPI_PMON_CTR_2_A, PCI_UNC_V3_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_0, BDW_EP_VALID_OPTIONS_QBOX},
+    {"QBOX0C3", PMC202, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_3, PCI_UNC_V3_QPI_PMON_CTR_3_A, PCI_UNC_V3_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_0, BDW_EP_VALID_OPTIONS_QBOX},
+    {"QBOX1C0", PMC203, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_0, PCI_UNC_V3_QPI_PMON_CTR_0_A, PCI_UNC_V3_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_1, BDW_EP_VALID_OPTIONS_QBOX},
+    {"QBOX1C1", PMC204, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_1, PCI_UNC_V3_QPI_PMON_CTR_1_A, PCI_UNC_V3_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_1, BDW_EP_VALID_OPTIONS_QBOX},
+    {"QBOX1C2", PMC205, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_2, PCI_UNC_V3_QPI_PMON_CTR_2_A, PCI_UNC_V3_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_1, BDW_EP_VALID_OPTIONS_QBOX},
+    {"QBOX1C3", PMC206, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_3, PCI_UNC_V3_QPI_PMON_CTR_3_A, PCI_UNC_V3_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_1, BDW_EP_VALID_OPTIONS_QBOX},
+    {"QBOX2C0", PMC207, QBOX2, PCI_UNC_V3_QPI_PMON_CTL_0, PCI_UNC_V3_QPI_PMON_CTR_0_A, PCI_UNC_V3_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_2, BDW_EP_VALID_OPTIONS_QBOX},
+    {"QBOX2C1", PMC208, QBOX2, PCI_UNC_V3_QPI_PMON_CTL_1, PCI_UNC_V3_QPI_PMON_CTR_1_A, PCI_UNC_V3_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_2, BDW_EP_VALID_OPTIONS_QBOX},
+    {"QBOX2C2", PMC209, QBOX2, PCI_UNC_V3_QPI_PMON_CTL_2, PCI_UNC_V3_QPI_PMON_CTR_2_A, PCI_UNC_V3_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_2, BDW_EP_VALID_OPTIONS_QBOX},
+    {"QBOX2C3", PMC210, QBOX2, PCI_UNC_V3_QPI_PMON_CTL_3, PCI_UNC_V3_QPI_PMON_CTR_3_A, PCI_UNC_V3_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_2, BDW_EP_VALID_OPTIONS_QBOX},
+    {"QBOX0FIX0", PMC211, QBOX0FIX, 0x0, PCI_UNC_V3_QPI_RATE_STATUS, 0x0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
+    {"QBOX0FIX1", PMC212, QBOX0FIX, 0x0, PCI_UNC_V3_QPI_LINK_IDLE, 0x0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
+    {"QBOX0FIX2", PMC213, QBOX0FIX, 0x0, PCI_UNC_V3_QPI_LINK_LLR, 0x0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
+    {"QBOX1FIX0", PMC214, QBOX1FIX, 0x0, PCI_UNC_V3_QPI_RATE_STATUS, 0x0, PCI_QPI_MISC_DEVICE_PORT_1, EVENT_OPTION_NONE_MASK},
+    {"QBOX1FIX1", PMC215, QBOX1FIX, 0x0, PCI_UNC_V3_QPI_LINK_IDLE, 0x0, PCI_QPI_MISC_DEVICE_PORT_1, EVENT_OPTION_NONE_MASK},
+    {"QBOX1FIX2", PMC216, QBOX1FIX, 0x0, PCI_UNC_V3_QPI_LINK_LLR, 0x0, PCI_QPI_MISC_DEVICE_PORT_1, EVENT_OPTION_NONE_MASK},
+    {"QBOX2FIX0", PMC217, QBOX2FIX, 0x0, PCI_UNC_V3_QPI_RATE_STATUS, 0x0, PCI_QPI_MISC_DEVICE_PORT_2, EVENT_OPTION_NONE_MASK},
+    {"QBOX2FIX1", PMC218, QBOX2FIX, 0x0, PCI_UNC_V3_QPI_LINK_IDLE, 0x0, PCI_QPI_MISC_DEVICE_PORT_2, EVENT_OPTION_NONE_MASK},
+    {"QBOX2FIX2", PMC219, QBOX2FIX, 0x0, PCI_UNC_V3_QPI_LINK_LLR, 0x0, PCI_QPI_MISC_DEVICE_PORT_2, EVENT_OPTION_NONE_MASK},
 };
 
 static BoxMap broadwellEP_box_map[NUM_UNITS] = {
diff --git a/src/includes/perfmon_broadwellEP_events.txt b/src/includes/perfmon_broadwellEP_events.txt
index c03e9ba..9a6221a 100644
--- a/src/includes/perfmon_broadwellEP_events.txt
+++ b/src/includes/perfmon_broadwellEP_events.txt
@@ -4,13 +4,13 @@
 #
 #      Description:  Event list for Intel Broadwell EP/EN/EX.
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
@@ -80,31 +80,35 @@ UMASK_INT_MISC_RAT_STALL_CYCLES     0x08
 DEFAULT_OPTIONS_INT_MISC_RAT_STALL_COUNT EVENT_OPTION_EDGE=1,EVENT_OPTION_THRESHOLD=0x01
 UMASK_INT_MISC_RAT_STALL_COUNT      0x08
 
-EVENT_UOPS_ISSUED                     0x0E  PMC
-UMASK_UOPS_ISSUED_ANY                 0x01
-UMASK_UOPS_ISSUED_FLAGS_MERGE         0x10
-UMASK_UOPS_ISSUED_SLOW_LEA            0x20
-UMASK_UOPS_ISSUED_SINGLE_MUL          0x40
+EVENT_UOPS_ISSUED                0x0E  PMC
+UMASK_UOPS_ISSUED_ANY            0x01
+UMASK_UOPS_ISSUED_FLAGS_MERGE    0x10
+UMASK_UOPS_ISSUED_SLOW_LEA       0x20
+UMASK_UOPS_ISSUED_SINGLE_MUL     0x40
 DEFAULT_OPTIONS_UOPS_ISSUED_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
-UMASK_UOPS_ISSUED_USED_CYCLES         0x01
+UMASK_UOPS_ISSUED_USED_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
-UMASK_UOPS_ISSUED_STALL_CYCLES        0x01
+UMASK_UOPS_ISSUED_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1
-UMASK_UOPS_ISSUED_TOTAL_CYCLES        0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_ANY EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_ANY            0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_FLAGS_MERGE EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_FLAGS_MERGE    0x10
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_SLOW_LEA EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_SLOW_LEA       0x20
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_SINGLE_MUL EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_SINGLE_MUL     0x40
+UMASK_UOPS_ISSUED_TOTAL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_USED_CYCLES    0x01
+UMASK_UOPS_ISSUED_CORE_USED_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_TOTAL_CYCLES   0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_ISSUED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_ISSUED_CYCLES_GE_6_UOPS_EXEC 0x01
 
 EVENT_ARITH_FPU_DIV_ACTIVE       0x14  PMC
 UMASK_ARITH_FPU_DIV_ACTIVE       0x01
@@ -162,6 +166,8 @@ UMASK_EPT_WALK_CYCLES            0x10
 
 EVENT_L1D                        0x51   PMC
 UMASK_L1D_REPLACEMENT            0x01
+# Undocumented event. Tested to count as accurate as L2_TRANS_L1D_WB
+UMASK_L1D_M_EVICT                0x04
 
 EVENT_TX_MEM                                        0x54 PMC
 UMASK_TX_MEM_ABORT_CONFLICT                         0x01
@@ -206,36 +212,101 @@ EVENT_LOCK_CYCLES                             0x63   PMC
 UMASK_LOCK_CYCLES_SPLIT_LOCK_UC_LOCK_DURATION 0x01
 UMASK_LOCK_CYCLES_CACHE_LOCK_DURATION         0x02
 
-EVENT_IDQ                              0x79   PMC
-UMASK_IDQ_EMPTY                        0x02
-UMASK_IDQ_MITE_UOPS                    0x04
-DEFAULT_OPTIONS_IDQ_MITE_CYCLES        EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_MITE_CYCLES                  0x04
-UMASK_IDQ_DSB_UOPS                     0x08
-DEFAULT_OPTIONS_IDQ_DSB_CYCLES         EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_DSB_CYCLES                   0x08
-UMASK_IDQ_MS_DSB_UOPS                  0x10
-DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES      EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_MS_DSB_CYCLES                0x10
-DEFAULT_OPTIONS_IDQ_MS_DSB_OCCUR       EVENT_OPTION_THRESHOLD=0x01,EVENT_OPTION_EDGE=1
-UMASK_IDQ_MS_DSB_OCCUR                 0x10
-UMASK_IDQ_MS_MITE_UOPS                 0x20
-DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES     EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_MS_MITE_CYCLES               0x20
-UMASK_IDQ_MS_UOPS                      0x30
-DEFAULT_OPTIONS_IDQ_MS_CYCLES          EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_MS_CYCLES                    0x30
-DEFAULT_OPTIONS_IDQ_MS_SWITCHES        EVENT_OPTION_THRESHOLD=0x01,EVENT_OPTION_EDGE=1
-UMASK_IDQ_MS_SWITCHES                  0x30
-DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS      0x18
-DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x04
-UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS        0x18
-DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS     0x24
-DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x04
-UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS       0x24
-UMASK_IDQ_MITE_ALL_UOPS       0x3C
+EVENT_IDQ                               0x79   PMC
+UMASK_IDQ_EMPTY                         0x02
+UMASK_IDQ_MITE_UOPS                     0x04
+UMASK_IDQ_DSB_UOPS                      0x08
+UMASK_IDQ_MS_DSB_UOPS                   0x10
+UMASK_IDQ_MS_MITE_UOPS                  0x20
+UMASK_IDQ_MS_UOPS                       0x30
+UMASK_IDQ_DSB_UOPS                      0x18
+UMASK_IDQ_MITE_ALL_UOPS                 0x24
+UMASK_IDQ_ALL_UOPS                      0x3C
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES         EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES                   0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES_1_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MITE_CYCLES_2_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MITE_CYCLES_3_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MITE_CYCLES_4_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES          EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES                    0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES_1_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_DSB_CYCLES_2_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_DSB_CYCLES_3_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_DSB_CYCLES_4_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES       EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES                 0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES_1_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_DSB_CYCLES_2_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_DSB_CYCLES_3_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_DSB_CYCLES_4_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_OCCUR        EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
+UMASK_IDQ_MS_DSB_OCCUR                  0x10
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES      EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES                0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES_1_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_MITE_CYCLES_2_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_MITE_CYCLES_3_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_MITE_CYCLES_4_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_CYCLES           EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES                     0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES_1_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_CYCLES_2_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_CYCLES_3_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_CYCLES_4_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_SWITCHES         EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
+UMASK_IDQ_MS_SWITCHES                   0x30
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS       0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_1_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_DSB_CYCLES_2_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_DSB_CYCLES_3_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS      0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_1_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_MITE_CYCLES_2_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_MITE_CYCLES_3_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_ANY_UOPS      0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_1_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_CYCLES_2_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_CYCLES_3_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_CYCLES_4_UOPS        0x3C
+
 
 EVENT_ICACHE                  0x80   PMC
 UMASK_ICACHE_HIT              0x01
@@ -335,29 +406,45 @@ UMASK_CYCLE_ACTIVITY_CYCLES_L1D_MISS    0x08
 EVENT_CYCLE_ACTIVITY_CYCLES             0xA3 PMC
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L2_MISS    EVENT_OPTION_THRESHOLD=0x01
 UMASK_CYCLE_ACTIVITY_CYCLES_L2_MISS     0x01
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L2_PENDING EVENT_OPTION_THRESHOLD=0x01
+UMASK_CYCLE_ACTIVITY_CYCLES_L2_PENDING  0x01
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_MEM_ANY    EVENT_OPTION_THRESHOLD=0x02
 UMASK_CYCLE_ACTIVITY_CYCLES_MEM_ANY     0x02
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_LDM_PENDING EVENT_OPTION_THRESHOLD=0x02
+UMASK_CYCLE_ACTIVITY_CYCLES_LDM_PENDING 0x02
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE EVENT_OPTION_THRESHOLD=0x04
 UMASK_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE  0x04
 
 EVENT_CYCLE_ACTIVITY_STALLS_L1D_MISS    0xA3 PMC2
-DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L1D_MISS    EVENT_OPTION_THRESHOLD=0x0C
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L1D_MISS    EVENT_OPTION_THRESHOLD=0x0C
 UMASK_CYCLE_ACTIVITY_STALLS_L1D_MISS    0x0C
 
 EVENT_CYCLE_ACTIVITY_STALLS             0xA3 PMC
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L2_MISS    EVENT_OPTION_THRESHOLD=0x05
 UMASK_CYCLE_ACTIVITY_STALLS_L2_MISS     0x05
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L2_PENDING EVENT_OPTION_THRESHOLD=0x05
+UMASK_CYCLE_ACTIVITY_STALLS_L2_PENDING  0x05
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_MEM_ANY    EVENT_OPTION_THRESHOLD=0x06
 UMASK_CYCLE_ACTIVITY_STALLS_MEM_ANY     0x06
-DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_TOTAL    EVENT_OPTION_THRESHOLD=0x04
-UMASK_CYCLE_ACTIVITY_STALLS_TOTAL       0x04
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_LDM_PENDING EVENT_OPTION_THRESHOLD=0x06
+UMASK_CYCLE_ACTIVITY_STALLS_LDM_PENDING 0x06
+
 
 EVENT_LSD_UOPS                 0xA8   PMC
 UMASK_LSD_UOPS                 0x01
+DEFAULT_OPTIONS_LSD_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_LSD_CYCLES_1_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_LSD_CYCLES_2_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_LSD_CYCLES_3_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_LSD_CYCLES_4_UOPS         0x01
 DEFAULT_OPTIONS_LSD_CYCLES_ACTIVE EVENT_OPTION_THRESHOLD=0x01
 UMASK_LSD_CYCLES_ACTIVE        0x01
-DEFAULT_OPTIONS_LSD_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x04
-UMASK_LSD_CYCLES_4_UOPS        0x01
+DEFAULT_OPTIONS_LSD_CYCLES_INACTIVE EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
+UMASK_LSD_CYCLES_INACTIVE         0x01
+
 
 EVENT_DSB2MITE_SWITCHES_PENALTY_CYCLES 0xAB PMC
 UMASK_DSB2MITE_SWITCHES_PENALTY_CYCLES 0x02
@@ -387,7 +474,15 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
 UMASK_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC 0x01
 DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC 0x01
-UMASK_UOPS_EXECUTED_CORE                       0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_EXECUTED_CYCLES_GE_7_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_EXECUTED_CYCLES_GE_8_UOPS_EXEC 0x01
+UMASK_UOPS_EXECUTED_CORE                  0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
 UMASK_UOPS_EXECUTED_CORE_USED_CYCLES           0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
@@ -402,6 +497,14 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC 0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_7_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_8_UOPS_EXEC 0x02
 
 EVENT_OFFCORE_REQUESTS_BUFFER_SQ_FULL 0xB2 PMC
 UMASK_OFFCORE_REQUESTS_BUFFER_SQ_FULL 0x01
@@ -450,6 +553,23 @@ DEFAULT_OPTIONS_UOPS_RETIRED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_
 UMASK_UOPS_RETIRED_CORE_STALL_CYCLES     0x01
 DEFAULT_OPTIONS_UOPS_RETIRED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_RETIRED_CORE_TOTAL_CYCLES     0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_RETIRED_CYCLES_GE_7_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_RETIRED_CYCLES_GE_8_UOPS_EXEC 0x01
+
 
 EVENT_MACHINE_CLEARS                    0xC3  PMC
 DEFAULT_OPTIONS_MACHINE_CLEARS_COUNT    EVENT_OPTION_THRESHOLD=0x01,EVENT_OPTION_EDGE=1
diff --git a/src/includes/perfmon_broadwell_counters.h b/src/includes/perfmon_broadwell_counters.h
index 362e9de..1f2ea84 100644
--- a/src/includes/perfmon_broadwell_counters.h
+++ b/src/includes/perfmon_broadwell_counters.h
@@ -5,13 +5,13 @@
  *
  *      Description:  Counter Header File of perfmon module for Broadwell.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -28,8 +28,8 @@
  * =======================================================================================
  */
 
-#define NUM_COUNTERS_BROADWELL 23
-#define NUM_COUNTERS_CORE_BROADWELL 8
+#define NUM_COUNTERS_BROADWELL 27
+#define NUM_COUNTERS_CORE_BROADWELL 12
 #define NUM_COUNTERS_UNCORE_BROADWELL 15
 
 #define BDW_VALID_OPTIONS_FIXED EVENT_OPTION_ANYTHREAD_MASK|EVENT_OPTION_COUNT_KERNEL_MASK
@@ -48,24 +48,29 @@ static RegisterMap broadwell_counter_map[NUM_COUNTERS_BROADWELL] = {
     {"PMC1", PMC4, PMC, MSR_PERFEVTSEL1, MSR_PMC1, 0, 0, BDW_VALID_OPTIONS_PMC},
     {"PMC2", PMC5, PMC, MSR_PERFEVTSEL2, MSR_PMC2, 0, 0, BDW_VALID_OPTIONS_PMC|EVENT_OPTION_IN_TRANS_ABORT_MASK},
     {"PMC3", PMC6, PMC, MSR_PERFEVTSEL3, MSR_PMC3, 0, 0, BDW_VALID_OPTIONS_PMC},
+    /* Additional PMC Counters: 4 48bit wide if HyperThreading is disabled*/
+    {"PMC4", PMC7, PMC, MSR_PERFEVTSEL4, MSR_PMC4, 0, 0, BDW_VALID_OPTIONS_PMC},
+    {"PMC5", PMC8, PMC, MSR_PERFEVTSEL5, MSR_PMC5, 0, 0, BDW_VALID_OPTIONS_PMC},
+    {"PMC6", PMC9, PMC, MSR_PERFEVTSEL6, MSR_PMC6, 0, 0, BDW_VALID_OPTIONS_PMC|EVENT_OPTION_IN_TRANS_ABORT_MASK},
+    {"PMC7", PMC10, PMC, MSR_PERFEVTSEL7, MSR_PMC7, 0, 0, BDW_VALID_OPTIONS_PMC},
     /* Temperature Sensor*/
-    {"TMP0", PMC7, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"TMP0", PMC11, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
     /* RAPL counters */
-    {"PWR0", PMC8, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR1", PMC9, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR2", PMC10, POWER, 0, MSR_PP1_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR3", PMC11, POWER, 0, MSR_DRAM_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
-    {"CBOX0C0", PMC12, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL0, MSR_UNC_CBO_0_CTR0, 0, 0, BDW_VALID_OPTIONS_CBOX},
-    {"CBOX0C1", PMC13, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL1, MSR_UNC_CBO_0_CTR1, 0, 0, BDW_VALID_OPTIONS_CBOX},
-    {"CBOX1C0", PMC14, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL0, MSR_UNC_CBO_1_CTR0, 0, 0, BDW_VALID_OPTIONS_CBOX},
-    {"CBOX1C1", PMC15, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL1, MSR_UNC_CBO_1_CTR1, 0, 0, BDW_VALID_OPTIONS_CBOX},
-    {"CBOX2C0", PMC16, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL0, MSR_UNC_CBO_2_CTR0, 0, 0, BDW_VALID_OPTIONS_CBOX},
-    {"CBOX2C1", PMC17, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL1, MSR_UNC_CBO_2_CTR1, 0, 0, BDW_VALID_OPTIONS_CBOX},
-    {"CBOX3C0", PMC18, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL0, MSR_UNC_CBO_3_CTR0, 0, 0, BDW_VALID_OPTIONS_CBOX},
-    {"CBOX3C1", PMC19, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL1, MSR_UNC_CBO_3_CTR1, 0, 0, BDW_VALID_OPTIONS_CBOX},
-    {"UBOX0", PMC20, UBOX, MSR_UNC_ARB_PERFEVTSEL0, MSR_UNC_ARB_CTR0, 0, 0, BDW_VALID_OPTIONS_UBOX},
-    {"UBOX1", PMC21, UBOX, MSR_UNC_ARB_PERFEVTSEL1, MSR_UNC_ARB_CTR1, 0, 0, BDW_VALID_OPTIONS_UBOX},
-    {"UBOXFIX", PMC22, UBOXFIX, MSR_UNC_PERF_FIXED_CTRL, MSR_UNC_PERF_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR0", PMC12, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR1", PMC13, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR2", PMC14, POWER, 0, MSR_PP1_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR3", PMC15, POWER, 0, MSR_DRAM_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
+    {"CBOX0C0", PMC16, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL0, MSR_UNC_CBO_0_CTR0, 0, 0, BDW_VALID_OPTIONS_CBOX},
+    {"CBOX0C1", PMC17, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL1, MSR_UNC_CBO_0_CTR1, 0, 0, BDW_VALID_OPTIONS_CBOX},
+    {"CBOX1C0", PMC18, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL0, MSR_UNC_CBO_1_CTR0, 0, 0, BDW_VALID_OPTIONS_CBOX},
+    {"CBOX1C1", PMC19, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL1, MSR_UNC_CBO_1_CTR1, 0, 0, BDW_VALID_OPTIONS_CBOX},
+    {"CBOX2C0", PMC20, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL0, MSR_UNC_CBO_2_CTR0, 0, 0, BDW_VALID_OPTIONS_CBOX},
+    {"CBOX2C1", PMC21, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL1, MSR_UNC_CBO_2_CTR1, 0, 0, BDW_VALID_OPTIONS_CBOX},
+    {"CBOX3C0", PMC22, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL0, MSR_UNC_CBO_3_CTR0, 0, 0, BDW_VALID_OPTIONS_CBOX},
+    {"CBOX3C1", PMC23, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL1, MSR_UNC_CBO_3_CTR1, 0, 0, BDW_VALID_OPTIONS_CBOX},
+    {"UBOX0", PMC24, UBOX, MSR_UNC_ARB_PERFEVTSEL0, MSR_UNC_ARB_CTR0, 0, 0, BDW_VALID_OPTIONS_UBOX},
+    {"UBOX1", PMC25, UBOX, MSR_UNC_ARB_PERFEVTSEL1, MSR_UNC_ARB_CTR1, 0, 0, BDW_VALID_OPTIONS_UBOX},
+    {"UBOXFIX", PMC26, UBOXFIX, MSR_UNC_PERF_FIXED_CTRL, MSR_UNC_PERF_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
 };
 
 
diff --git a/src/includes/perfmon_broadwell_events.txt b/src/includes/perfmon_broadwell_events.txt
index 548b355..b9293ef 100644
--- a/src/includes/perfmon_broadwell_events.txt
+++ b/src/includes/perfmon_broadwell_events.txt
@@ -4,13 +4,13 @@
 #
 #      Description:  Event list for Intel Broadwell
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
@@ -80,31 +80,37 @@ UMASK_INT_MISC_RAT_STALL_CYCLES     0x08
 DEFAULT_OPTIONS_INT_MISC_RAT_STALL_COUNT EVENT_OPTION_EDGE=1,EVENT_OPTION_THRESHOLD=0x01
 UMASK_INT_MISC_RAT_STALL_COUNT      0x08
 
-EVENT_UOPS_ISSUED                     0x0E  PMC
-UMASK_UOPS_ISSUED_ANY                 0x01
-UMASK_UOPS_ISSUED_FLAGS_MERGE         0x10
-UMASK_UOPS_ISSUED_SLOW_LEA            0x20
-UMASK_UOPS_ISSUED_SINGLE_MUL          0x40
+EVENT_UOPS_ISSUED                0x0E  PMC
+UMASK_UOPS_ISSUED_ANY            0x01
+UMASK_UOPS_ISSUED_FLAGS_MERGE    0x10
+UMASK_UOPS_ISSUED_SLOW_LEA       0x20
+UMASK_UOPS_ISSUED_SINGLE_MUL     0x40
 DEFAULT_OPTIONS_UOPS_ISSUED_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
-UMASK_UOPS_ISSUED_USED_CYCLES         0x01
+UMASK_UOPS_ISSUED_USED_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
-UMASK_UOPS_ISSUED_STALL_CYCLES        0x01
+UMASK_UOPS_ISSUED_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1
-UMASK_UOPS_ISSUED_TOTAL_CYCLES        0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_ANY EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_ANY            0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_FLAGS_MERGE EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_FLAGS_MERGE    0x10
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_SLOW_LEA EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_SLOW_LEA       0x20
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_SINGLE_MUL EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_SINGLE_MUL     0x40
+UMASK_UOPS_ISSUED_TOTAL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_USED_CYCLES    0x01
+UMASK_UOPS_ISSUED_CORE_USED_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_TOTAL_CYCLES   0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_ISSUED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_ISSUED_CYCLES_GE_6_UOPS_EXEC 0x01
+
+
 
 EVENT_ARITH_FPU_DIV_ACTIVE       0x14  PMC
 UMASK_ARITH_FPU_DIV_ACTIVE       0x01
@@ -206,36 +212,100 @@ EVENT_LOCK_CYCLES                             0x63   PMC
 UMASK_LOCK_CYCLES_SPLIT_LOCK_UC_LOCK_DURATION 0x01
 UMASK_LOCK_CYCLES_CACHE_LOCK_DURATION         0x02
 
-EVENT_IDQ                              0x79   PMC
-UMASK_IDQ_EMPTY                        0x02
-UMASK_IDQ_MITE_UOPS                    0x04
-DEFAULT_OPTIONS_IDQ_MITE_CYCLES        EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_MITE_CYCLES                  0x04
-UMASK_IDQ_DSB_UOPS                     0x08
-DEFAULT_OPTIONS_IDQ_DSB_CYCLES         EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_DSB_CYCLES                   0x08
-UMASK_IDQ_MS_DSB_UOPS                  0x10
-DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES      EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_MS_DSB_CYCLES                0x10
-DEFAULT_OPTIONS_IDQ_MS_DSB_OCCUR       EVENT_OPTION_THRESHOLD=0x01,EVENT_OPTION_EDGE=1
-UMASK_IDQ_MS_DSB_OCCUR                 0x10
-UMASK_IDQ_MS_MITE_UOPS                 0x20
-DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES     EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_MS_MITE_CYCLES               0x20
-UMASK_IDQ_MS_UOPS                      0x30
-DEFAULT_OPTIONS_IDQ_MS_CYCLES          EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_MS_CYCLES                    0x30
-DEFAULT_OPTIONS_IDQ_MS_SWITCHES        EVENT_OPTION_THRESHOLD=0x01,EVENT_OPTION_EDGE=1
-UMASK_IDQ_MS_SWITCHES                  0x30
-DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS      0x18
-DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x04
-UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS        0x18
-DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS     0x24
-DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x04
-UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS       0x24
-UMASK_IDQ_MITE_ALL_UOPS       0x3C
+EVENT_IDQ                               0x79   PMC
+UMASK_IDQ_EMPTY                         0x02
+UMASK_IDQ_MITE_UOPS                     0x04
+UMASK_IDQ_DSB_UOPS                      0x08
+UMASK_IDQ_MS_DSB_UOPS                   0x10
+UMASK_IDQ_MS_MITE_UOPS                  0x20
+UMASK_IDQ_MS_UOPS                       0x30
+UMASK_IDQ_DSB_UOPS                      0x18
+UMASK_IDQ_MITE_ALL_UOPS                 0x24
+UMASK_IDQ_ALL_UOPS                      0x3C
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES         EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES                   0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES_1_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MITE_CYCLES_2_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MITE_CYCLES_3_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MITE_CYCLES_4_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES          EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES                    0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES_1_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_DSB_CYCLES_2_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_DSB_CYCLES_3_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_DSB_CYCLES_4_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES       EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES                 0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES_1_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_DSB_CYCLES_2_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_DSB_CYCLES_3_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_DSB_CYCLES_4_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_OCCUR        EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
+UMASK_IDQ_MS_DSB_OCCUR                  0x10
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES      EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES                0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES_1_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_MITE_CYCLES_2_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_MITE_CYCLES_3_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_MITE_CYCLES_4_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_CYCLES           EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES                     0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES_1_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_CYCLES_2_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_CYCLES_3_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_CYCLES_4_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_SWITCHES         EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
+UMASK_IDQ_MS_SWITCHES                   0x30
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS       0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_1_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_DSB_CYCLES_2_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_DSB_CYCLES_3_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS      0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_1_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_MITE_CYCLES_2_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_MITE_CYCLES_3_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_ANY_UOPS      0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_1_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_CYCLES_2_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_CYCLES_3_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_CYCLES_4_UOPS        0x3C
 
 EVENT_ICACHE                  0x80   PMC
 UMASK_ICACHE_HIT              0x01
@@ -335,29 +405,44 @@ UMASK_CYCLE_ACTIVITY_CYCLES_L1D_MISS    0x08
 EVENT_CYCLE_ACTIVITY_CYCLES             0xA3 PMC
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L2_MISS    EVENT_OPTION_THRESHOLD=0x01
 UMASK_CYCLE_ACTIVITY_CYCLES_L2_MISS     0x01
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L2_PENDING EVENT_OPTION_THRESHOLD=0x01
+UMASK_CYCLE_ACTIVITY_CYCLES_L2_PENDING  0x01
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_MEM_ANY    EVENT_OPTION_THRESHOLD=0x02
 UMASK_CYCLE_ACTIVITY_CYCLES_MEM_ANY     0x02
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_LDM_PENDING EVENT_OPTION_THRESHOLD=0x02
+UMASK_CYCLE_ACTIVITY_CYCLES_LDM_PENDING 0x02
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE EVENT_OPTION_THRESHOLD=0x04
 UMASK_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE  0x04
 
 EVENT_CYCLE_ACTIVITY_STALLS_L1D_MISS    0xA3 PMC2
-DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L1D_MISS    EVENT_OPTION_THRESHOLD=0x0C
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L1D_MISS    EVENT_OPTION_THRESHOLD=0x0C
 UMASK_CYCLE_ACTIVITY_STALLS_L1D_MISS    0x0C
 
 EVENT_CYCLE_ACTIVITY_STALLS             0xA3 PMC
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L2_MISS    EVENT_OPTION_THRESHOLD=0x05
 UMASK_CYCLE_ACTIVITY_STALLS_L2_MISS     0x05
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L2_PENDING EVENT_OPTION_THRESHOLD=0x05
+UMASK_CYCLE_ACTIVITY_STALLS_L2_PENDING  0x05
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_MEM_ANY    EVENT_OPTION_THRESHOLD=0x06
 UMASK_CYCLE_ACTIVITY_STALLS_MEM_ANY     0x06
-DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_TOTAL    EVENT_OPTION_THRESHOLD=0x04
-UMASK_CYCLE_ACTIVITY_STALLS_TOTAL       0x04
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_LDM_PENDING EVENT_OPTION_THRESHOLD=0x06
+UMASK_CYCLE_ACTIVITY_STALLS_LDM_PENDING 0x06
+
 
 EVENT_LSD_UOPS                 0xA8   PMC
 UMASK_LSD_UOPS                 0x01
+DEFAULT_OPTIONS_LSD_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_LSD_CYCLES_1_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_LSD_CYCLES_2_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_LSD_CYCLES_3_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_LSD_CYCLES_4_UOPS         0x01
 DEFAULT_OPTIONS_LSD_CYCLES_ACTIVE EVENT_OPTION_THRESHOLD=0x01
 UMASK_LSD_CYCLES_ACTIVE        0x01
-DEFAULT_OPTIONS_LSD_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x04
-UMASK_LSD_CYCLES_4_UOPS        0x01
+DEFAULT_OPTIONS_LSD_CYCLES_INACTIVE EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
+UMASK_LSD_CYCLES_INACTIVE         0x01
 
 EVENT_DSB2MITE_SWITCHES_PENALTY_CYCLES 0xAB PMC
 UMASK_DSB2MITE_SWITCHES_PENALTY_CYCLES 0x02
@@ -387,7 +472,15 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
 UMASK_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC 0x01
 DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC 0x01
-UMASK_UOPS_EXECUTED_CORE                       0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_EXECUTED_CYCLES_GE_7_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_EXECUTED_CYCLES_GE_8_UOPS_EXEC 0x01
+UMASK_UOPS_EXECUTED_CORE                  0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
 UMASK_UOPS_EXECUTED_CORE_USED_CYCLES           0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
@@ -402,6 +495,14 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC 0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_7_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_8_UOPS_EXEC 0x02
 
 EVENT_OFFCORE_REQUESTS_BUFFER_SQ_FULL 0xB2 PMC
 UMASK_OFFCORE_REQUESTS_BUFFER_SQ_FULL 0x01
@@ -450,6 +551,22 @@ DEFAULT_OPTIONS_UOPS_RETIRED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_
 UMASK_UOPS_RETIRED_CORE_STALL_CYCLES     0x01
 DEFAULT_OPTIONS_UOPS_RETIRED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_RETIRED_CORE_TOTAL_CYCLES     0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_RETIRED_CYCLES_GE_7_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_RETIRED_CYCLES_GE_8_UOPS_EXEC 0x01
 
 EVENT_MACHINE_CLEARS                    0xC3  PMC
 DEFAULT_OPTIONS_MACHINE_CLEARS_COUNT    EVENT_OPTION_THRESHOLD=0x01,EVENT_OPTION_EDGE=1
diff --git a/src/includes/perfmon_broadwelld_counters.h b/src/includes/perfmon_broadwelld_counters.h
index c195ff2..e8deabc 100644
--- a/src/includes/perfmon_broadwelld_counters.h
+++ b/src/includes/perfmon_broadwelld_counters.h
@@ -5,13 +5,13 @@
  *
  *      Description:  Counter Header File of perfmon module for Broadwell D.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -29,8 +29,8 @@
  */
 
 
-#define NUM_COUNTERS_BROADWELLD 141
-#define NUM_COUNTERS_CORE_BROADWELLD 8
+#define NUM_COUNTERS_BROADWELLD 145
+#define NUM_COUNTERS_CORE_BROADWELLD 12
 #define NUM_COUNTERS_UNCORE_BROADWELLD 85
 
 #define BDW_D_VALID_OPTIONS_FIXED EVENT_OPTION_ANYTHREAD_MASK|EVENT_OPTION_COUNT_KERNEL_MASK
@@ -58,142 +58,147 @@ static RegisterMap broadwelld_counter_map[NUM_COUNTERS_BROADWELLD] = {
     {"PMC1", PMC4, PMC, MSR_PERFEVTSEL1, MSR_PMC1, 0, 0, BDW_D_VALID_OPTIONS_PMC},
     {"PMC2", PMC5, PMC, MSR_PERFEVTSEL2, MSR_PMC2, 0, 0, BDW_D_VALID_OPTIONS_PMC|EVENT_OPTION_IN_TRANS_ABORT_MASK},
     {"PMC3", PMC6, PMC, MSR_PERFEVTSEL3, MSR_PMC3, 0, 0, BDW_D_VALID_OPTIONS_PMC},
+    /* Additional PMC Counters: 4 48bit wide if HyperThreading is disabled*/
+    {"PMC4", PMC7, PMC, MSR_PERFEVTSEL4, MSR_PMC4, 0, 0, BDW_D_VALID_OPTIONS_PMC},
+    {"PMC5", PMC8, PMC, MSR_PERFEVTSEL5, MSR_PMC5, 0, 0, BDW_D_VALID_OPTIONS_PMC},
+    {"PMC6", PMC9, PMC, MSR_PERFEVTSEL6, MSR_PMC6, 0, 0, BDW_D_VALID_OPTIONS_PMC|EVENT_OPTION_IN_TRANS_ABORT_MASK},
+    {"PMC7", PMC10, PMC, MSR_PERFEVTSEL7, MSR_PMC7, 0, 0, BDW_D_VALID_OPTIONS_PMC},
     /* Temperature Sensor*/
-    {"TMP0", PMC7, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"TMP0", PMC11, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
     /* RAPL counters */
-    {"PWR0", PMC8, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR1", PMC9, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR2", PMC10, POWER, 0, MSR_PP1_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR3", PMC11, POWER, 0, MSR_DRAM_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
-    {"UBOX0", PMC12, UBOX, MSR_UNC_V3_U_PMON_CTL0, MSR_UNC_V3_U_PMON_CTR0,  0, 0, BDW_D_VALID_OPTIONS_UBOX},
-    {"UBOX1", PMC13, UBOX, MSR_UNC_V3_U_PMON_CTL1, MSR_UNC_V3_U_PMON_CTR1,  0, 0, BDW_D_VALID_OPTIONS_UBOX},
-    {"UBOXFIX", PMC14, UBOXFIX, MSR_UNC_V3_U_UCLK_FIXED_CTL, MSR_UNC_V3_U_UCLK_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"CBOX0C0", PMC15, CBOX0, MSR_UNC_V3_C0_PMON_CTL0, MSR_UNC_V3_C0_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX0C1", PMC16, CBOX0, MSR_UNC_V3_C0_PMON_CTL1, MSR_UNC_V3_C0_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX0C2", PMC17, CBOX0, MSR_UNC_V3_C0_PMON_CTL2, MSR_UNC_V3_C0_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX0C3", PMC18, CBOX0, MSR_UNC_V3_C0_PMON_CTL3, MSR_UNC_V3_C0_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX1C0", PMC19, CBOX1, MSR_UNC_V3_C1_PMON_CTL0, MSR_UNC_V3_C1_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX1C1", PMC20, CBOX1, MSR_UNC_V3_C1_PMON_CTL1, MSR_UNC_V3_C1_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX1C2", PMC21, CBOX1, MSR_UNC_V3_C1_PMON_CTL2, MSR_UNC_V3_C1_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX1C3", PMC22, CBOX1, MSR_UNC_V3_C1_PMON_CTL3, MSR_UNC_V3_C1_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX2C0", PMC23, CBOX2, MSR_UNC_V3_C2_PMON_CTL0, MSR_UNC_V3_C2_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX2C1", PMC24, CBOX2, MSR_UNC_V3_C2_PMON_CTL1, MSR_UNC_V3_C2_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX2C2", PMC25, CBOX2, MSR_UNC_V3_C2_PMON_CTL2, MSR_UNC_V3_C2_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX2C3", PMC26, CBOX2, MSR_UNC_V3_C2_PMON_CTL3, MSR_UNC_V3_C2_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX3C0", PMC27, CBOX3, MSR_UNC_V3_C3_PMON_CTL0, MSR_UNC_V3_C3_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX3C1", PMC28, CBOX3, MSR_UNC_V3_C3_PMON_CTL1, MSR_UNC_V3_C3_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX3C2", PMC29, CBOX3, MSR_UNC_V3_C3_PMON_CTL2, MSR_UNC_V3_C3_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX3C3", PMC30, CBOX3, MSR_UNC_V3_C3_PMON_CTL3, MSR_UNC_V3_C3_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX4C0", PMC31, CBOX4, MSR_UNC_V3_C4_PMON_CTL0, MSR_UNC_V3_C4_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX4C1", PMC32, CBOX4, MSR_UNC_V3_C4_PMON_CTL1, MSR_UNC_V3_C4_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX4C2", PMC33, CBOX4, MSR_UNC_V3_C4_PMON_CTL2, MSR_UNC_V3_C4_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX4C3", PMC34, CBOX4, MSR_UNC_V3_C4_PMON_CTL3, MSR_UNC_V3_C4_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX5C0", PMC35, CBOX5, MSR_UNC_V3_C5_PMON_CTL0, MSR_UNC_V3_C5_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX5C1", PMC36, CBOX5, MSR_UNC_V3_C5_PMON_CTL1, MSR_UNC_V3_C5_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX5C2", PMC37, CBOX5, MSR_UNC_V3_C5_PMON_CTL2, MSR_UNC_V3_C5_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX5C3", PMC38, CBOX5, MSR_UNC_V3_C5_PMON_CTL3, MSR_UNC_V3_C5_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX6C0", PMC39, CBOX6, MSR_UNC_V3_C6_PMON_CTL0, MSR_UNC_V3_C6_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX6C1", PMC40, CBOX6, MSR_UNC_V3_C6_PMON_CTL1, MSR_UNC_V3_C6_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX6C2", PMC41, CBOX6, MSR_UNC_V3_C6_PMON_CTL2, MSR_UNC_V3_C6_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX6C3", PMC42, CBOX6, MSR_UNC_V3_C6_PMON_CTL3, MSR_UNC_V3_C6_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX7C0", PMC43, CBOX7, MSR_UNC_V3_C7_PMON_CTL0, MSR_UNC_V3_C7_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX7C1", PMC44, CBOX7, MSR_UNC_V3_C7_PMON_CTL1, MSR_UNC_V3_C7_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX7C2", PMC45, CBOX7, MSR_UNC_V3_C7_PMON_CTL2, MSR_UNC_V3_C7_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX7C3", PMC46, CBOX7, MSR_UNC_V3_C7_PMON_CTL3, MSR_UNC_V3_C7_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX8C0", PMC47, CBOX8, MSR_UNC_V3_C8_PMON_CTL0, MSR_UNC_V3_C8_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX8C1", PMC48, CBOX8, MSR_UNC_V3_C8_PMON_CTL1, MSR_UNC_V3_C8_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX8C2", PMC49, CBOX8, MSR_UNC_V3_C8_PMON_CTL2, MSR_UNC_V3_C8_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX8C3", PMC50, CBOX8, MSR_UNC_V3_C8_PMON_CTL3, MSR_UNC_V3_C8_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX9C0", PMC51, CBOX9, MSR_UNC_V3_C9_PMON_CTL0, MSR_UNC_V3_C9_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX9C1", PMC52, CBOX9, MSR_UNC_V3_C9_PMON_CTL1, MSR_UNC_V3_C9_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX9C2", PMC53, CBOX9, MSR_UNC_V3_C9_PMON_CTL2, MSR_UNC_V3_C9_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX9C3", PMC54, CBOX9, MSR_UNC_V3_C9_PMON_CTL3, MSR_UNC_V3_C9_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX10C0", PMC55, CBOX10, MSR_UNC_V3_C10_PMON_CTL0, MSR_UNC_V3_C10_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX10C1", PMC56, CBOX10, MSR_UNC_V3_C10_PMON_CTL1, MSR_UNC_V3_C10_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX10C2", PMC57, CBOX10, MSR_UNC_V3_C10_PMON_CTL2, MSR_UNC_V3_C10_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX10C3", PMC58, CBOX10, MSR_UNC_V3_C10_PMON_CTL3, MSR_UNC_V3_C10_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX11C0", PMC59, CBOX11, MSR_UNC_V3_C11_PMON_CTL0, MSR_UNC_V3_C11_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX11C1", PMC60, CBOX11, MSR_UNC_V3_C11_PMON_CTL1, MSR_UNC_V3_C11_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX11C2", PMC61, CBOX11, MSR_UNC_V3_C11_PMON_CTL2, MSR_UNC_V3_C11_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX11C3", PMC62, CBOX11, MSR_UNC_V3_C11_PMON_CTL3, MSR_UNC_V3_C11_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX12C0", PMC63, CBOX12, MSR_UNC_V3_C12_PMON_CTL0, MSR_UNC_V3_C12_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX12C1", PMC64, CBOX12, MSR_UNC_V3_C12_PMON_CTL1, MSR_UNC_V3_C12_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX12C2", PMC65, CBOX12, MSR_UNC_V3_C12_PMON_CTL2, MSR_UNC_V3_C12_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX12C3", PMC66, CBOX12, MSR_UNC_V3_C12_PMON_CTL3, MSR_UNC_V3_C12_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX13C0", PMC67, CBOX13, MSR_UNC_V3_C13_PMON_CTL0, MSR_UNC_V3_C13_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX13C1", PMC68, CBOX13, MSR_UNC_V3_C13_PMON_CTL1, MSR_UNC_V3_C13_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX13C2", PMC69, CBOX13, MSR_UNC_V3_C13_PMON_CTL2, MSR_UNC_V3_C13_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX13C3", PMC70, CBOX13, MSR_UNC_V3_C13_PMON_CTL3, MSR_UNC_V3_C13_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX14C0", PMC71, CBOX14, MSR_UNC_V3_C14_PMON_CTL0, MSR_UNC_V3_C14_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX14C1", PMC72, CBOX14, MSR_UNC_V3_C14_PMON_CTL1, MSR_UNC_V3_C14_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX14C2", PMC73, CBOX14, MSR_UNC_V3_C14_PMON_CTL2, MSR_UNC_V3_C14_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX14C3", PMC74, CBOX14, MSR_UNC_V3_C14_PMON_CTL3, MSR_UNC_V3_C14_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX15C0", PMC75, CBOX15, MSR_UNC_V3_C15_PMON_CTL0, MSR_UNC_V3_C15_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX15C1", PMC76, CBOX15, MSR_UNC_V3_C15_PMON_CTL1, MSR_UNC_V3_C15_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX15C2", PMC77, CBOX15, MSR_UNC_V3_C15_PMON_CTL2, MSR_UNC_V3_C15_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"CBOX15C3", PMC78, CBOX15, MSR_UNC_V3_C15_PMON_CTL3, MSR_UNC_V3_C15_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
-    {"WBOX0", PMC79, WBOX, MSR_UNC_V3_PCU_PMON_CTL0, MSR_UNC_V3_PCU_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_WBOX},
-    {"WBOX1", PMC80, WBOX, MSR_UNC_V3_PCU_PMON_CTL1, MSR_UNC_V3_PCU_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_WBOX},
-    {"WBOX2", PMC81, WBOX, MSR_UNC_V3_PCU_PMON_CTL2, MSR_UNC_V3_PCU_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_WBOX},
-    {"WBOX3", PMC82, WBOX, MSR_UNC_V3_PCU_PMON_CTL3, MSR_UNC_V3_PCU_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_WBOX},
-    {"WBOX0FIX", PMC83, WBOX0FIX, 0, MSR_UNC_V3_PCU_CC3_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"WBOX1FIX", PMC84, WBOX0FIX, 0, MSR_UNC_V3_PCU_CC6_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"BBOX0C0", PMC85, BBOX0, PCI_UNC_HA_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_0, BDW_D_VALID_OPTIONS_BBOX},
-    {"BBOX0C1", PMC86, BBOX0, PCI_UNC_HA_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_0, BDW_D_VALID_OPTIONS_BBOX},
-    {"BBOX0C2", PMC87, BBOX0, PCI_UNC_HA_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_0, BDW_D_VALID_OPTIONS_BBOX},
-    {"BBOX0C3", PMC88, BBOX0, PCI_UNC_HA_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_0, BDW_D_VALID_OPTIONS_BBOX},
-    {"BBOX1C0", PMC89, BBOX1, PCI_UNC_HA_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_1, BDW_D_VALID_OPTIONS_BBOX},
-    {"BBOX1C1", PMC90, BBOX1, PCI_UNC_HA_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_1, BDW_D_VALID_OPTIONS_BBOX},
-    {"BBOX1C2", PMC91, BBOX1, PCI_UNC_HA_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_1, BDW_D_VALID_OPTIONS_BBOX},
-    {"BBOX1C3", PMC92, BBOX1, PCI_UNC_HA_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_1, BDW_D_VALID_OPTIONS_BBOX},
-    {"MBOX0C0", PMC93, MBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_0, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX0C1", PMC94, MBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_0, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX0C2", PMC95, MBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_0, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX0FIX", PMC96, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_INVERT_MASK},
-    {"MBOX0C3", PMC97, MBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_0, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX1C0", PMC98, MBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_1, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX1C1", PMC99, MBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_1, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX1C2", PMC100, MBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_1, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX1C3", PMC101, MBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_1, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX1FIX", PMC102, MBOX1FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_1, EVENT_OPTION_INVERT_MASK},
-    {"MBOX2C0", PMC103, MBOX2, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_2, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX2C1", PMC104, MBOX2, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_2, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX2C2", PMC105, MBOX2, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_2, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX2C3", PMC106, MBOX2, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_2, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX2FIX", PMC107, MBOX2FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_2, EVENT_OPTION_INVERT_MASK},
-    {"MBOX3C0", PMC108, MBOX3, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_3, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX3C1", PMC109, MBOX3, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_3, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX3C2", PMC110, MBOX3, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_3, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX3C3", PMC111, MBOX3, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_3, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX3FIX", PMC112, MBOX3FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_3, EVENT_OPTION_INVERT_MASK},
-    {"MBOX4C0", PMC113, MBOX4, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_0, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX4C1", PMC114, MBOX4, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_0, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX4C2", PMC115, MBOX4, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_0, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX4C3", PMC116, MBOX4, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_0, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX4FIX", PMC117, MBOX4FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_0, EVENT_OPTION_INVERT_MASK},
-    {"MBOX5C0", PMC118, MBOX5, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_1, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX5C1", PMC119, MBOX5, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_1, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX5C2", PMC120, MBOX5, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_1, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX5C3", PMC121, MBOX5, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_1, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX5FIX", PMC122, MBOX5FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_1, EVENT_OPTION_INVERT_MASK},
-    {"MBOX6C0", PMC123, MBOX6, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_2, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX6C1", PMC124, MBOX6, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_2, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX6C2", PMC125, MBOX6, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_2, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX6C3", PMC126, MBOX6, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_2, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX6FIX", PMC127, MBOX6FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_2, EVENT_OPTION_INVERT_MASK},
-    {"MBOX7C0", PMC128, MBOX7, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_3, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX7C1", PMC129, MBOX7, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_3, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX7C2", PMC130, MBOX7, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_3, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX7C3", PMC131, MBOX7, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_3, BDW_D_VALID_OPTIONS_MBOX},
-    {"MBOX7FIX", PMC132, MBOX7FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_3, EVENT_OPTION_INVERT_MASK},
-    {"IBOX0C0", PMC133, IBOX0, PCI_UNC_IRP0_PMON_CTL_0, PCI_UNC_IRP0_PMON_CTR_0, 0, PCI_IRP_DEVICE, BDW_D_VALID_OPTIONS_IBOX},
-    {"IBOX0C1", PMC134, IBOX0, PCI_UNC_IRP0_PMON_CTL_1, PCI_UNC_IRP0_PMON_CTR_1, 0, PCI_IRP_DEVICE, BDW_D_VALID_OPTIONS_IBOX},
-    {"IBOX1C0", PMC135, IBOX1, PCI_UNC_IRP1_PMON_CTL_0, PCI_UNC_IRP1_PMON_CTR_0, 0, PCI_IRP_DEVICE, BDW_D_VALID_OPTIONS_IBOX},
-    {"IBOX1C1", PMC136, IBOX1, PCI_UNC_IRP1_PMON_CTL_1, PCI_UNC_IRP1_PMON_CTR_1, 0, PCI_IRP_DEVICE, BDW_D_VALID_OPTIONS_IBOX},
-    {"PBOX0", PMC137, PBOX, PCI_UNC_R2PCIE_PMON_CTL_0, PCI_UNC_R2PCIE_PMON_CTR_0_A, PCI_UNC_R2PCIE_PMON_CTR_0_B, PCI_R2PCIE_DEVICE, BDW_D_VALID_OPTIONS_PBOX},
-    {"PBOX1", PMC138, PBOX, PCI_UNC_R2PCIE_PMON_CTL_1, PCI_UNC_R2PCIE_PMON_CTR_1_A, PCI_UNC_R2PCIE_PMON_CTR_1_B, PCI_R2PCIE_DEVICE, BDW_D_VALID_OPTIONS_PBOX},
-    {"PBOX2", PMC139, PBOX, PCI_UNC_R2PCIE_PMON_CTL_2, PCI_UNC_R2PCIE_PMON_CTR_2_A, PCI_UNC_R2PCIE_PMON_CTR_2_B, PCI_R2PCIE_DEVICE, BDW_D_VALID_OPTIONS_PBOX},
-    {"PBOX3", PMC140, PBOX, PCI_UNC_R2PCIE_PMON_CTL_3, PCI_UNC_R2PCIE_PMON_CTR_3_A, PCI_UNC_R2PCIE_PMON_CTR_3_B, PCI_R2PCIE_DEVICE, BDW_D_VALID_OPTIONS_PBOX},
+    {"PWR0", PMC12, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR1", PMC13, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR2", PMC14, POWER, 0, MSR_PP1_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR3", PMC15, POWER, 0, MSR_DRAM_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
+    {"UBOX0", PMC16, UBOX, MSR_UNC_V3_U_PMON_CTL0, MSR_UNC_V3_U_PMON_CTR0,  0, 0, BDW_D_VALID_OPTIONS_UBOX},
+    {"UBOX1", PMC17, UBOX, MSR_UNC_V3_U_PMON_CTL1, MSR_UNC_V3_U_PMON_CTR1,  0, 0, BDW_D_VALID_OPTIONS_UBOX},
+    {"UBOXFIX", PMC18, UBOXFIX, MSR_UNC_V3_U_UCLK_FIXED_CTL, MSR_UNC_V3_U_UCLK_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"CBOX0C0", PMC19, CBOX0, MSR_UNC_V3_C0_PMON_CTL0, MSR_UNC_V3_C0_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX0C1", PMC20, CBOX0, MSR_UNC_V3_C0_PMON_CTL1, MSR_UNC_V3_C0_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX0C2", PMC21, CBOX0, MSR_UNC_V3_C0_PMON_CTL2, MSR_UNC_V3_C0_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX0C3", PMC22, CBOX0, MSR_UNC_V3_C0_PMON_CTL3, MSR_UNC_V3_C0_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX1C0", PMC23, CBOX1, MSR_UNC_V3_C1_PMON_CTL0, MSR_UNC_V3_C1_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX1C1", PMC24, CBOX1, MSR_UNC_V3_C1_PMON_CTL1, MSR_UNC_V3_C1_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX1C2", PMC25, CBOX1, MSR_UNC_V3_C1_PMON_CTL2, MSR_UNC_V3_C1_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX1C3", PMC26, CBOX1, MSR_UNC_V3_C1_PMON_CTL3, MSR_UNC_V3_C1_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX2C0", PMC27, CBOX2, MSR_UNC_V3_C2_PMON_CTL0, MSR_UNC_V3_C2_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX2C1", PMC28, CBOX2, MSR_UNC_V3_C2_PMON_CTL1, MSR_UNC_V3_C2_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX2C2", PMC29, CBOX2, MSR_UNC_V3_C2_PMON_CTL2, MSR_UNC_V3_C2_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX2C3", PMC30, CBOX2, MSR_UNC_V3_C2_PMON_CTL3, MSR_UNC_V3_C2_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX3C0", PMC31, CBOX3, MSR_UNC_V3_C3_PMON_CTL0, MSR_UNC_V3_C3_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX3C1", PMC32, CBOX3, MSR_UNC_V3_C3_PMON_CTL1, MSR_UNC_V3_C3_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX3C2", PMC33, CBOX3, MSR_UNC_V3_C3_PMON_CTL2, MSR_UNC_V3_C3_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX3C3", PMC34, CBOX3, MSR_UNC_V3_C3_PMON_CTL3, MSR_UNC_V3_C3_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX4C0", PMC35, CBOX4, MSR_UNC_V3_C4_PMON_CTL0, MSR_UNC_V3_C4_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX4C1", PMC36, CBOX4, MSR_UNC_V3_C4_PMON_CTL1, MSR_UNC_V3_C4_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX4C2", PMC37, CBOX4, MSR_UNC_V3_C4_PMON_CTL2, MSR_UNC_V3_C4_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX4C3", PMC38, CBOX4, MSR_UNC_V3_C4_PMON_CTL3, MSR_UNC_V3_C4_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX5C0", PMC39, CBOX5, MSR_UNC_V3_C5_PMON_CTL0, MSR_UNC_V3_C5_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX5C1", PMC40, CBOX5, MSR_UNC_V3_C5_PMON_CTL1, MSR_UNC_V3_C5_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX5C2", PMC41, CBOX5, MSR_UNC_V3_C5_PMON_CTL2, MSR_UNC_V3_C5_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX5C3", PMC42, CBOX5, MSR_UNC_V3_C5_PMON_CTL3, MSR_UNC_V3_C5_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX6C0", PMC43, CBOX6, MSR_UNC_V3_C6_PMON_CTL0, MSR_UNC_V3_C6_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX6C1", PMC44, CBOX6, MSR_UNC_V3_C6_PMON_CTL1, MSR_UNC_V3_C6_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX6C2", PMC45, CBOX6, MSR_UNC_V3_C6_PMON_CTL2, MSR_UNC_V3_C6_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX6C3", PMC46, CBOX6, MSR_UNC_V3_C6_PMON_CTL3, MSR_UNC_V3_C6_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX7C0", PMC47, CBOX7, MSR_UNC_V3_C7_PMON_CTL0, MSR_UNC_V3_C7_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX7C1", PMC48, CBOX7, MSR_UNC_V3_C7_PMON_CTL1, MSR_UNC_V3_C7_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX7C2", PMC49, CBOX7, MSR_UNC_V3_C7_PMON_CTL2, MSR_UNC_V3_C7_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX7C3", PMC50, CBOX7, MSR_UNC_V3_C7_PMON_CTL3, MSR_UNC_V3_C7_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX8C0", PMC51, CBOX8, MSR_UNC_V3_C8_PMON_CTL0, MSR_UNC_V3_C8_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX8C1", PMC52, CBOX8, MSR_UNC_V3_C8_PMON_CTL1, MSR_UNC_V3_C8_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX8C2", PMC53, CBOX8, MSR_UNC_V3_C8_PMON_CTL2, MSR_UNC_V3_C8_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX8C3", PMC54, CBOX8, MSR_UNC_V3_C8_PMON_CTL3, MSR_UNC_V3_C8_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX9C0", PMC55, CBOX9, MSR_UNC_V3_C9_PMON_CTL0, MSR_UNC_V3_C9_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX9C1", PMC56, CBOX9, MSR_UNC_V3_C9_PMON_CTL1, MSR_UNC_V3_C9_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX9C2", PMC57, CBOX9, MSR_UNC_V3_C9_PMON_CTL2, MSR_UNC_V3_C9_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX9C3", PMC58, CBOX9, MSR_UNC_V3_C9_PMON_CTL3, MSR_UNC_V3_C9_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX10C0", PMC59, CBOX10, MSR_UNC_V3_C10_PMON_CTL0, MSR_UNC_V3_C10_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX10C1", PMC60, CBOX10, MSR_UNC_V3_C10_PMON_CTL1, MSR_UNC_V3_C10_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX10C2", PMC61, CBOX10, MSR_UNC_V3_C10_PMON_CTL2, MSR_UNC_V3_C10_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX10C3", PMC62, CBOX10, MSR_UNC_V3_C10_PMON_CTL3, MSR_UNC_V3_C10_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX11C0", PMC63, CBOX11, MSR_UNC_V3_C11_PMON_CTL0, MSR_UNC_V3_C11_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX11C1", PMC64, CBOX11, MSR_UNC_V3_C11_PMON_CTL1, MSR_UNC_V3_C11_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX11C2", PMC65, CBOX11, MSR_UNC_V3_C11_PMON_CTL2, MSR_UNC_V3_C11_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX11C3", PMC66, CBOX11, MSR_UNC_V3_C11_PMON_CTL3, MSR_UNC_V3_C11_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX12C0", PMC67, CBOX12, MSR_UNC_V3_C12_PMON_CTL0, MSR_UNC_V3_C12_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX12C1", PMC68, CBOX12, MSR_UNC_V3_C12_PMON_CTL1, MSR_UNC_V3_C12_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX12C2", PMC69, CBOX12, MSR_UNC_V3_C12_PMON_CTL2, MSR_UNC_V3_C12_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX12C3", PMC70, CBOX12, MSR_UNC_V3_C12_PMON_CTL3, MSR_UNC_V3_C12_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX13C0", PMC71, CBOX13, MSR_UNC_V3_C13_PMON_CTL0, MSR_UNC_V3_C13_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX13C1", PMC72, CBOX13, MSR_UNC_V3_C13_PMON_CTL1, MSR_UNC_V3_C13_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX13C2", PMC73, CBOX13, MSR_UNC_V3_C13_PMON_CTL2, MSR_UNC_V3_C13_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX13C3", PMC74, CBOX13, MSR_UNC_V3_C13_PMON_CTL3, MSR_UNC_V3_C13_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX14C0", PMC75, CBOX14, MSR_UNC_V3_C14_PMON_CTL0, MSR_UNC_V3_C14_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX14C1", PMC76, CBOX14, MSR_UNC_V3_C14_PMON_CTL1, MSR_UNC_V3_C14_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX14C2", PMC77, CBOX14, MSR_UNC_V3_C14_PMON_CTL2, MSR_UNC_V3_C14_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX14C3", PMC78, CBOX14, MSR_UNC_V3_C14_PMON_CTL3, MSR_UNC_V3_C14_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX15C0", PMC79, CBOX15, MSR_UNC_V3_C15_PMON_CTL0, MSR_UNC_V3_C15_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX15C1", PMC80, CBOX15, MSR_UNC_V3_C15_PMON_CTL1, MSR_UNC_V3_C15_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX15C2", PMC81, CBOX15, MSR_UNC_V3_C15_PMON_CTL2, MSR_UNC_V3_C15_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"CBOX15C3", PMC82, CBOX15, MSR_UNC_V3_C15_PMON_CTL3, MSR_UNC_V3_C15_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_CBOX},
+    {"WBOX0", PMC83, WBOX, MSR_UNC_V3_PCU_PMON_CTL0, MSR_UNC_V3_PCU_PMON_CTR0, 0, 0, BDW_D_VALID_OPTIONS_WBOX},
+    {"WBOX1", PMC84, WBOX, MSR_UNC_V3_PCU_PMON_CTL1, MSR_UNC_V3_PCU_PMON_CTR1, 0, 0, BDW_D_VALID_OPTIONS_WBOX},
+    {"WBOX2", PMC85, WBOX, MSR_UNC_V3_PCU_PMON_CTL2, MSR_UNC_V3_PCU_PMON_CTR2, 0, 0, BDW_D_VALID_OPTIONS_WBOX},
+    {"WBOX3", PMC86, WBOX, MSR_UNC_V3_PCU_PMON_CTL3, MSR_UNC_V3_PCU_PMON_CTR3, 0, 0, BDW_D_VALID_OPTIONS_WBOX},
+    {"WBOX0FIX", PMC87, WBOX0FIX, 0, MSR_UNC_V3_PCU_CC3_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"WBOX1FIX", PMC88, WBOX0FIX, 0, MSR_UNC_V3_PCU_CC6_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"BBOX0C0", PMC89, BBOX0, PCI_UNC_HA_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_0, BDW_D_VALID_OPTIONS_BBOX},
+    {"BBOX0C1", PMC90, BBOX0, PCI_UNC_HA_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_0, BDW_D_VALID_OPTIONS_BBOX},
+    {"BBOX0C2", PMC91, BBOX0, PCI_UNC_HA_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_0, BDW_D_VALID_OPTIONS_BBOX},
+    {"BBOX0C3", PMC92, BBOX0, PCI_UNC_HA_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_0, BDW_D_VALID_OPTIONS_BBOX},
+    {"BBOX1C0", PMC93, BBOX1, PCI_UNC_HA_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_1, BDW_D_VALID_OPTIONS_BBOX},
+    {"BBOX1C1", PMC94, BBOX1, PCI_UNC_HA_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_1, BDW_D_VALID_OPTIONS_BBOX},
+    {"BBOX1C2", PMC95, BBOX1, PCI_UNC_HA_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_1, BDW_D_VALID_OPTIONS_BBOX},
+    {"BBOX1C3", PMC96, BBOX1, PCI_UNC_HA_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_1, BDW_D_VALID_OPTIONS_BBOX},
+    {"MBOX0C0", PMC97, MBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_0, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX0C1", PMC98, MBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_0, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX0C2", PMC99, MBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_0, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX0FIX", PMC100, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_INVERT_MASK},
+    {"MBOX0C3", PMC101, MBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_0, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX1C0", PMC102, MBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_1, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX1C1", PMC103, MBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_1, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX1C2", PMC104, MBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_1, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX1C3", PMC105, MBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_1, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX1FIX", PMC106, MBOX1FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_1, EVENT_OPTION_INVERT_MASK},
+    {"MBOX2C0", PMC107, MBOX2, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_2, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX2C1", PMC108, MBOX2, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_2, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX2C2", PMC109, MBOX2, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_2, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX2C3", PMC110, MBOX2, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_2, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX2FIX", PMC111, MBOX2FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_2, EVENT_OPTION_INVERT_MASK},
+    {"MBOX3C0", PMC112, MBOX3, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_3, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX3C1", PMC113, MBOX3, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_3, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX3C2", PMC114, MBOX3, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_3, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX3C3", PMC115, MBOX3, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_3, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX3FIX", PMC116, MBOX3FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_3, EVENT_OPTION_INVERT_MASK},
+    {"MBOX4C0", PMC117, MBOX4, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_0, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX4C1", PMC118, MBOX4, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_0, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX4C2", PMC119, MBOX4, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_0, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX4C3", PMC120, MBOX4, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_0, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX4FIX", PMC121, MBOX4FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_0, EVENT_OPTION_INVERT_MASK},
+    {"MBOX5C0", PMC122, MBOX5, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_1, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX5C1", PMC123, MBOX5, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_1, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX5C2", PMC124, MBOX5, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_1, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX5C3", PMC125, MBOX5, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_1, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX5FIX", PMC126, MBOX5FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_1, EVENT_OPTION_INVERT_MASK},
+    {"MBOX6C0", PMC127, MBOX6, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_2, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX6C1", PMC128, MBOX6, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_2, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX6C2", PMC129, MBOX6, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_2, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX6C3", PMC130, MBOX6, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_2, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX6FIX", PMC131, MBOX6FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_2, EVENT_OPTION_INVERT_MASK},
+    {"MBOX7C0", PMC132, MBOX7, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_3, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX7C1", PMC133, MBOX7, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_3, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX7C2", PMC134, MBOX7, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_3, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX7C3", PMC135, MBOX7, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_3, BDW_D_VALID_OPTIONS_MBOX},
+    {"MBOX7FIX", PMC136, MBOX7FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_3, EVENT_OPTION_INVERT_MASK},
+    {"IBOX0C0", PMC137, IBOX0, PCI_UNC_IRP0_PMON_CTL_0, PCI_UNC_IRP0_PMON_CTR_0, 0, PCI_IRP_DEVICE, BDW_D_VALID_OPTIONS_IBOX},
+    {"IBOX0C1", PMC138, IBOX0, PCI_UNC_IRP0_PMON_CTL_1, PCI_UNC_IRP0_PMON_CTR_1, 0, PCI_IRP_DEVICE, BDW_D_VALID_OPTIONS_IBOX},
+    {"IBOX1C0", PMC139, IBOX1, PCI_UNC_IRP1_PMON_CTL_0, PCI_UNC_IRP1_PMON_CTR_0, 0, PCI_IRP_DEVICE, BDW_D_VALID_OPTIONS_IBOX},
+    {"IBOX1C1", PMC140, IBOX1, PCI_UNC_IRP1_PMON_CTL_1, PCI_UNC_IRP1_PMON_CTR_1, 0, PCI_IRP_DEVICE, BDW_D_VALID_OPTIONS_IBOX},
+    {"PBOX0", PMC141, PBOX, PCI_UNC_R2PCIE_PMON_CTL_0, PCI_UNC_R2PCIE_PMON_CTR_0_A, PCI_UNC_R2PCIE_PMON_CTR_0_B, PCI_R2PCIE_DEVICE, BDW_D_VALID_OPTIONS_PBOX},
+    {"PBOX1", PMC142, PBOX, PCI_UNC_R2PCIE_PMON_CTL_1, PCI_UNC_R2PCIE_PMON_CTR_1_A, PCI_UNC_R2PCIE_PMON_CTR_1_B, PCI_R2PCIE_DEVICE, BDW_D_VALID_OPTIONS_PBOX},
+    {"PBOX2", PMC143, PBOX, PCI_UNC_R2PCIE_PMON_CTL_2, PCI_UNC_R2PCIE_PMON_CTR_2_A, PCI_UNC_R2PCIE_PMON_CTR_2_B, PCI_R2PCIE_DEVICE, BDW_D_VALID_OPTIONS_PBOX},
+    {"PBOX3", PMC144, PBOX, PCI_UNC_R2PCIE_PMON_CTL_3, PCI_UNC_R2PCIE_PMON_CTR_3_A, PCI_UNC_R2PCIE_PMON_CTR_3_B, PCI_R2PCIE_DEVICE, BDW_D_VALID_OPTIONS_PBOX},
 };
 
 static BoxMap broadwelld_box_map[NUM_UNITS] = {
diff --git a/src/includes/perfmon_broadwelld_events.txt b/src/includes/perfmon_broadwelld_events.txt
index e52f292..a74e0d9 100644
--- a/src/includes/perfmon_broadwelld_events.txt
+++ b/src/includes/perfmon_broadwelld_events.txt
@@ -4,13 +4,13 @@
 #
 #      Description:  Event list for Intel Broadwell D
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
@@ -80,32 +80,35 @@ UMASK_INT_MISC_RAT_STALL_CYCLES     0x08
 DEFAULT_OPTIONS_INT_MISC_RAT_STALL_COUNT EVENT_OPTION_EDGE=1,EVENT_OPTION_THRESHOLD=0x01
 UMASK_INT_MISC_RAT_STALL_COUNT      0x08
 
-EVENT_UOPS_ISSUED                     0x0E  PMC
-UMASK_UOPS_ISSUED_ANY                 0x01
-UMASK_UOPS_ISSUED_FLAGS_MERGE         0x10
-UMASK_UOPS_ISSUED_SLOW_LEA            0x20
-UMASK_UOPS_ISSUED_SINGLE_MUL          0x40
+EVENT_UOPS_ISSUED                0x0E  PMC
+UMASK_UOPS_ISSUED_ANY            0x01
+UMASK_UOPS_ISSUED_FLAGS_MERGE    0x10
+UMASK_UOPS_ISSUED_SLOW_LEA       0x20
+UMASK_UOPS_ISSUED_SINGLE_MUL     0x40
 DEFAULT_OPTIONS_UOPS_ISSUED_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
-UMASK_UOPS_ISSUED_USED_CYCLES         0x01
+UMASK_UOPS_ISSUED_USED_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
-UMASK_UOPS_ISSUED_STALL_CYCLES        0x01
+UMASK_UOPS_ISSUED_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1
-UMASK_UOPS_ISSUED_TOTAL_CYCLES        0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_ANY EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_ANY            0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_FLAGS_MERGE EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_FLAGS_MERGE    0x10
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_SLOW_LEA EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_SLOW_LEA       0x20
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_SINGLE_MUL EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_SINGLE_MUL     0x40
+UMASK_UOPS_ISSUED_TOTAL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_USED_CYCLES    0x01
+UMASK_UOPS_ISSUED_CORE_USED_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_TOTAL_CYCLES   0x01
-
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_ISSUED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_ISSUED_CYCLES_GE_6_UOPS_EXEC 0x01
 EVENT_ARITH_FPU_DIV_ACTIVE       0x14  PMC
 UMASK_ARITH_FPU_DIV_ACTIVE       0x01
 
@@ -162,6 +165,8 @@ UMASK_EPT_WALK_CYCLES            0x10
 
 EVENT_L1D                        0x51   PMC
 UMASK_L1D_REPLACEMENT            0x01
+# Undocumented event. Tested to count as accurate as L2_TRANS_L1D_WB
+UMASK_L1D_M_EVICT                0x04
 
 EVENT_TX_MEM                                        0x54 PMC
 UMASK_TX_MEM_ABORT_CONFLICT                         0x01
@@ -206,36 +211,100 @@ EVENT_LOCK_CYCLES                             0x63   PMC
 UMASK_LOCK_CYCLES_SPLIT_LOCK_UC_LOCK_DURATION 0x01
 UMASK_LOCK_CYCLES_CACHE_LOCK_DURATION         0x02
 
-EVENT_IDQ                              0x79   PMC
-UMASK_IDQ_EMPTY                        0x02
-UMASK_IDQ_MITE_UOPS                    0x04
-DEFAULT_OPTIONS_IDQ_MITE_CYCLES        EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_MITE_CYCLES                  0x04
-UMASK_IDQ_DSB_UOPS                     0x08
-DEFAULT_OPTIONS_IDQ_DSB_CYCLES         EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_DSB_CYCLES                   0x08
-UMASK_IDQ_MS_DSB_UOPS                  0x10
-DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES      EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_MS_DSB_CYCLES                0x10
-DEFAULT_OPTIONS_IDQ_MS_DSB_OCCUR       EVENT_OPTION_THRESHOLD=0x01,EVENT_OPTION_EDGE=1
-UMASK_IDQ_MS_DSB_OCCUR                 0x10
-UMASK_IDQ_MS_MITE_UOPS                 0x20
-DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES     EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_MS_MITE_CYCLES               0x20
-UMASK_IDQ_MS_UOPS                      0x30
-DEFAULT_OPTIONS_IDQ_MS_CYCLES          EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_MS_CYCLES                    0x30
-DEFAULT_OPTIONS_IDQ_MS_SWITCHES        EVENT_OPTION_THRESHOLD=0x01,EVENT_OPTION_EDGE=1
-UMASK_IDQ_MS_SWITCHES                  0x30
-DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS      0x18
-DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x04
-UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS        0x18
-DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x01
-UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS     0x24
-DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x04
-UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS       0x24
-UMASK_IDQ_MITE_ALL_UOPS       0x3C
+EVENT_IDQ                               0x79   PMC
+UMASK_IDQ_EMPTY                         0x02
+UMASK_IDQ_MITE_UOPS                     0x04
+UMASK_IDQ_DSB_UOPS                      0x08
+UMASK_IDQ_MS_DSB_UOPS                   0x10
+UMASK_IDQ_MS_MITE_UOPS                  0x20
+UMASK_IDQ_MS_UOPS                       0x30
+UMASK_IDQ_DSB_UOPS                      0x18
+UMASK_IDQ_MITE_ALL_UOPS                 0x24
+UMASK_IDQ_ALL_UOPS                      0x3C
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES         EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES                   0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES_1_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MITE_CYCLES_2_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MITE_CYCLES_3_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MITE_CYCLES_4_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES          EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES                    0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES_1_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_DSB_CYCLES_2_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_DSB_CYCLES_3_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_DSB_CYCLES_4_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES       EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES                 0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES_1_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_DSB_CYCLES_2_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_DSB_CYCLES_3_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_DSB_CYCLES_4_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_OCCUR        EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
+UMASK_IDQ_MS_DSB_OCCUR                  0x10
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES      EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES                0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES_1_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_MITE_CYCLES_2_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_MITE_CYCLES_3_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_MITE_CYCLES_4_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_CYCLES           EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES                     0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES_1_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_CYCLES_2_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_CYCLES_3_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_CYCLES_4_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_SWITCHES         EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
+UMASK_IDQ_MS_SWITCHES                   0x30
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS       0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_1_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_DSB_CYCLES_2_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_DSB_CYCLES_3_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS      0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_1_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_MITE_CYCLES_2_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_MITE_CYCLES_3_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_ANY_UOPS      0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_1_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_CYCLES_2_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_CYCLES_3_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_CYCLES_4_UOPS        0x3C
 
 EVENT_ICACHE                  0x80   PMC
 UMASK_ICACHE_HIT              0x01
@@ -335,29 +404,44 @@ UMASK_CYCLE_ACTIVITY_CYCLES_L1D_MISS    0x08
 EVENT_CYCLE_ACTIVITY_CYCLES             0xA3 PMC
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L2_MISS    EVENT_OPTION_THRESHOLD=0x01
 UMASK_CYCLE_ACTIVITY_CYCLES_L2_MISS     0x01
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L2_PENDING EVENT_OPTION_THRESHOLD=0x01
+UMASK_CYCLE_ACTIVITY_CYCLES_L2_PENDING  0x01
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_MEM_ANY    EVENT_OPTION_THRESHOLD=0x02
 UMASK_CYCLE_ACTIVITY_CYCLES_MEM_ANY     0x02
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_LDM_PENDING EVENT_OPTION_THRESHOLD=0x02
+UMASK_CYCLE_ACTIVITY_CYCLES_LDM_PENDING 0x02
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE EVENT_OPTION_THRESHOLD=0x04
 UMASK_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE  0x04
 
 EVENT_CYCLE_ACTIVITY_STALLS_L1D_MISS    0xA3 PMC2
-DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L1D_MISS    EVENT_OPTION_THRESHOLD=0x0C
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L1D_MISS    EVENT_OPTION_THRESHOLD=0x0C
 UMASK_CYCLE_ACTIVITY_STALLS_L1D_MISS    0x0C
 
 EVENT_CYCLE_ACTIVITY_STALLS             0xA3 PMC
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L2_MISS    EVENT_OPTION_THRESHOLD=0x05
 UMASK_CYCLE_ACTIVITY_STALLS_L2_MISS     0x05
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L2_PENDING EVENT_OPTION_THRESHOLD=0x05
+UMASK_CYCLE_ACTIVITY_STALLS_L2_PENDING  0x05
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_MEM_ANY    EVENT_OPTION_THRESHOLD=0x06
 UMASK_CYCLE_ACTIVITY_STALLS_MEM_ANY     0x06
-DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_TOTAL    EVENT_OPTION_THRESHOLD=0x04
-UMASK_CYCLE_ACTIVITY_STALLS_TOTAL       0x04
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_LDM_PENDING EVENT_OPTION_THRESHOLD=0x06
+UMASK_CYCLE_ACTIVITY_STALLS_LDM_PENDING 0x06
+
 
 EVENT_LSD_UOPS                 0xA8   PMC
 UMASK_LSD_UOPS                 0x01
+DEFAULT_OPTIONS_LSD_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_LSD_CYCLES_1_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_LSD_CYCLES_2_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_LSD_CYCLES_3_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_LSD_CYCLES_4_UOPS         0x01
 DEFAULT_OPTIONS_LSD_CYCLES_ACTIVE EVENT_OPTION_THRESHOLD=0x01
 UMASK_LSD_CYCLES_ACTIVE        0x01
-DEFAULT_OPTIONS_LSD_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x04
-UMASK_LSD_CYCLES_4_UOPS        0x01
+DEFAULT_OPTIONS_LSD_CYCLES_INACTIVE EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
+UMASK_LSD_CYCLES_INACTIVE         0x01
 
 EVENT_DSB2MITE_SWITCHES_PENALTY_CYCLES 0xAB PMC
 UMASK_DSB2MITE_SWITCHES_PENALTY_CYCLES 0x02
@@ -387,7 +471,15 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
 UMASK_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC 0x01
 DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC 0x01
-UMASK_UOPS_EXECUTED_CORE                       0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_EXECUTED_CYCLES_GE_7_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_EXECUTED_CYCLES_GE_8_UOPS_EXEC 0x01
+UMASK_UOPS_EXECUTED_CORE                  0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
 UMASK_UOPS_EXECUTED_CORE_USED_CYCLES           0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
@@ -402,6 +494,14 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC 0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_7_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_8_UOPS_EXEC 0x02
 
 EVENT_OFFCORE_REQUESTS_BUFFER_SQ_FULL 0xB2 PMC
 UMASK_OFFCORE_REQUESTS_BUFFER_SQ_FULL 0x01
@@ -450,6 +550,22 @@ DEFAULT_OPTIONS_UOPS_RETIRED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_
 UMASK_UOPS_RETIRED_CORE_STALL_CYCLES     0x01
 DEFAULT_OPTIONS_UOPS_RETIRED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_RETIRED_CORE_TOTAL_CYCLES     0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_RETIRED_CYCLES_GE_7_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_RETIRED_CYCLES_GE_8_UOPS_EXEC 0x01
 
 EVENT_MACHINE_CLEARS                    0xC3  PMC
 DEFAULT_OPTIONS_MACHINE_CLEARS_COUNT    EVENT_OPTION_THRESHOLD=0x01,EVENT_OPTION_EDGE=1
diff --git a/src/includes/perfmon_core2.h b/src/includes/perfmon_core2.h
index ec3f0af..02c55d3 100644
--- a/src/includes/perfmon_core2.h
+++ b/src/includes/perfmon_core2.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Header file of perfmon module for Intel Core 2
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -110,7 +110,7 @@ int perfmon_setupCounterThread_core2( int thread_id, PerfmonEventSet* eventSet)
     uint64_t fixed_flags = 0x0ULL;
     int cpu_id = groupSet->threads[thread_id].processorId;
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
     }
@@ -118,7 +118,7 @@ int perfmon_setupCounterThread_core2( int thread_id, PerfmonEventSet* eventSet)
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -155,7 +155,7 @@ int perfmon_startCountersThread_core2(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -176,7 +176,7 @@ int perfmon_startCountersThread_core2(int thread_id, PerfmonEventSet* eventSet)
         }
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, UNFREEZE_PMC_AND_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, flags));
@@ -203,7 +203,7 @@ int perfmon_stopCountersThread_core2(int thread_id, PerfmonEventSet* eventSet)
     int cpu_id = groupSet->threads[thread_id].processorId;
 
     /* stop counters */
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_AND_FIXED);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
@@ -215,7 +215,7 @@ int perfmon_stopCountersThread_core2(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -250,7 +250,7 @@ int perfmon_readCountersThread_core2(int thread_id, PerfmonEventSet* eventSet)
     uint64_t counter_result;
     uint64_t flags;
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, &flags));
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, SAFE_PMC_FLAGS)
@@ -264,7 +264,7 @@ int perfmon_readCountersThread_core2(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -289,7 +289,7 @@ int perfmon_readCountersThread_core2(int thread_id, PerfmonEventSet* eventSet)
         }
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, RESTORE_PMC_FLAGS)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, flags));
@@ -307,7 +307,7 @@ int perfmon_finalizeCountersThread_core2(int thread_id, PerfmonEventSet* eventSe
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -330,9 +330,12 @@ int perfmon_finalizeCountersThread_core2(int thread_id, PerfmonEventSet* eventSe
             CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, counter_map[index].counterRegister, 0x0ULL));
         }
     }
-    VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, LLU_CAST ovf_values_core, CLEAR_GLOBAL_OVF)
-    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core));
-    VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST 0x0ULL, CLEAR_GLOBAL_CTRL)
-    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
+    if (MEASURE_CORE(eventSet))
+    {
+        VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, LLU_CAST ovf_values_core, CLEAR_GLOBAL_OVF)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core));
+        VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST 0x0ULL, CLEAR_GLOBAL_CTRL)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
+    }
     return 0;
 }
diff --git a/src/includes/perfmon_core2_counters.h b/src/includes/perfmon_core2_counters.h
index e3ae594..ad7e088 100644
--- a/src/includes/perfmon_core2_counters.h
+++ b/src/includes/perfmon_core2_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Counter header file of perfmon module for Intel Core 2
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_core2_events.txt b/src/includes/perfmon_core2_events.txt
index 93ad0b7..767666b 100644
--- a/src/includes/perfmon_core2_events.txt
+++ b/src/includes/perfmon_core2_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for Intel Core 2
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
@@ -603,7 +603,7 @@ DEFAULT_OPTIONS_UOPS_RETIRED_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTIO
 UMASK_UOPS_RETIRED_STALL_CYCLES     0x0F
 DEFAULT_OPTIONS_UOPS_RETIRED_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0x9,EVENT_OPTION_INVERT=1
 UMASK_UOPS_RETIRED_TOTAL_CYCLES     0x0F
-DEFAULT_OPTIONS_UOPS_RETIRED_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1,EVENT_OPTION_EDGE=1
+DEFAULT_OPTIONS_UOPS_RETIRED_STALL_COUNT EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1,EVENT_OPTION_EDGE=1
 UMASK_UOPS_RETIRED_STALL_COUNT     0x0F
 
 EVENT_MACHINE_NUKES              0xC3      PMC
diff --git a/src/includes/perfmon_goldmont.h b/src/includes/perfmon_goldmont.h
index 14270fe..7c3c006 100644
--- a/src/includes/perfmon_goldmont.h
+++ b/src/includes/perfmon_goldmont.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File of perfmon module for Intel Goldmont.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -168,7 +168,7 @@ int perfmon_setupCounterThread_goldmont(
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_AND_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
@@ -179,7 +179,7 @@ int perfmon_setupCounterThread_goldmont(
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -231,7 +231,7 @@ int perfmon_startCountersThread_goldmont(int thread_id, PerfmonEventSet* eventSe
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -270,7 +270,7 @@ int perfmon_startCountersThread_goldmont(int thread_id, PerfmonEventSet* eventSe
         }
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, LLU_CAST (1ULL<<63)|(1ULL<<62)|flags, CLEAR_PMC_AND_FIXED_OVERFLOW)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, (1ULL<<63)|(1ULL<<62)|flags));
@@ -318,7 +318,7 @@ int perfmon_stopCountersThread_goldmont(int thread_id, PerfmonEventSet* eventSet
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_AND_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
@@ -329,7 +329,7 @@ int perfmon_stopCountersThread_goldmont(int thread_id, PerfmonEventSet* eventSet
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -357,7 +357,7 @@ int perfmon_stopCountersThread_goldmont(int thread_id, PerfmonEventSet* eventSet
                     break;
 
                 case POWER:
-                    if (haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(POWER)))
+                    if (haveLock)
                     {
                         CHECK_POWER_READ_ERROR(power_read(cpu_id, counter1, (uint32_t*)&counter_result));
                         VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, STOP_POWER)
@@ -397,7 +397,7 @@ int perfmon_readCountersThread_goldmont(int thread_id, PerfmonEventSet* eventSet
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, &flags));
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, SAFE_PMC_FLAGS)
@@ -411,7 +411,7 @@ int perfmon_readCountersThread_goldmont(int thread_id, PerfmonEventSet* eventSet
         {
             counter_result= 0x0ULL;
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -438,7 +438,7 @@ int perfmon_readCountersThread_goldmont(int thread_id, PerfmonEventSet* eventSet
                     break;
 
                 case POWER:
-                    if (haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(POWER)))
+                    if (haveLock)
                     {
                         CHECK_POWER_READ_ERROR(power_read(cpu_id, counter1, (uint32_t*)&counter_result));
                         VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, STOP_POWER)
@@ -457,7 +457,7 @@ int perfmon_readCountersThread_goldmont(int thread_id, PerfmonEventSet* eventSet
         }
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, RESTORE_PMC_FLAGS)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, flags));
@@ -472,7 +472,6 @@ int perfmon_finalizeCountersThread_goldmont(int thread_id, PerfmonEventSet* even
     int haveTileLock = 0;
     int clearPBS = 0;
     uint64_t ovf_values_core = (1ULL<<63)|(1ULL<<62);
-    uint64_t ovf_values_UBOXFIX = 0x0ULL;
     int cpu_id = groupSet->threads[thread_id].processorId;
 
     if (socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id)
@@ -486,7 +485,7 @@ int perfmon_finalizeCountersThread_goldmont(int thread_id, PerfmonEventSet* even
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -512,26 +511,12 @@ int perfmon_finalizeCountersThread_goldmont(int thread_id, PerfmonEventSet* even
                 ovf_values_core |= (1ULL<<(index+32));
                 break;
             default:
-                /*if (counter_map[index].type > UBOXFIX)
-                {
-                    if (box_map[counter_map[index].type].ovflOffset >= 0)
-                    {
-                        ovf_values_UBOXFIX |= (1ULL<<box_map[counter_map[index].type].ovflOffset);
-                    }
-                }*/
                 break;
         }
         if ((reg) && (((type == PMC)||(type == FIXED))||((type >= UBOXFIX) && (haveLock))))
         {
-            CHECK_MSR_READ_ERROR(HPMread(cpu_id, dev, reg, &ovf_values_UBOXFIX));
-            VERBOSEPRINTPCIREG(cpu_id, dev, reg, ovf_values_UBOXFIX, SHOW_CTL);
-            ovf_values_UBOXFIX = 0x0ULL;
             VERBOSEPRINTPCIREG(cpu_id, dev, reg, 0x0ULL, CLEAR_CTL);
             CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, reg, 0x0ULL));
-            if ((type >= SBOX0) && (type <= SBOX3))
-            {
-                CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, reg, 0x0ULL));
-            }
             VERBOSEPRINTPCIREG(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL, CLEAR_CTR);
             CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL));
             if (counter_map[index].counterRegister2 != 0x0)
@@ -543,7 +528,7 @@ int perfmon_finalizeCountersThread_goldmont(int thread_id, PerfmonEventSet* even
         eventSet->events[i].threadCounter[thread_id].init = FALSE;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, LLU_CAST ovf_values_core, CLEAR_GLOBAL_OVF)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core));
diff --git a/src/includes/perfmon_goldmont_counters.h b/src/includes/perfmon_goldmont_counters.h
index f996cbc..d80572f 100644
--- a/src/includes/perfmon_goldmont_counters.h
+++ b/src/includes/perfmon_goldmont_counters.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Counter Header File of perfmon module for Intel Goldmont.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/includes/perfmon_goldmont_events.txt b/src/includes/perfmon_goldmont_events.txt
index 08218c3..f87660d 100644
--- a/src/includes/perfmon_goldmont_events.txt
+++ b/src/includes/perfmon_goldmont_events.txt
@@ -4,8 +4,8 @@
 #
 #      Description:  Event list for Intel Goldmont
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/includes/perfmon_haswell.h b/src/includes/perfmon_haswell.h
index b364155..38aeeda 100644
--- a/src/includes/perfmon_haswell.h
+++ b/src/includes/perfmon_haswell.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Header File of perfmon module for Intel Haswell.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -47,10 +47,16 @@ static int perfmon_numCountersHaswell = NUM_COUNTERS_HASWELL;
 static int perfmon_numCoreCountersHaswell = NUM_COUNTERS_CORE_HASWELL;
 static int perfmon_numArchEventsHaswell = NUM_ARCH_EVENTS_HASWELL;
 
+int has_did_cbox_check = 0;
 int has_cbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event);
 int hasep_cbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event);
 int (*haswell_cbox_setup)(int, RegisterIndex, PerfmonEvent *);
 
+int has_cbox_nosetup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
+{
+    return 0;
+}
+
 int perfmon_init_haswell(int cpu_id)
 {
     int ret;
@@ -58,17 +64,25 @@ int perfmon_init_haswell(int cpu_id)
     lock_acquire((int*) &tile_lock[affinity_thread2tile_lookup[cpu_id]], cpu_id);
     lock_acquire((int*) &socket_lock[affinity_core2node_lookup[cpu_id]], cpu_id);
     CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PEBS_ENABLE, 0x0ULL));
-    ret = HPMwrite(cpu_id, MSR_DEV, MSR_UNC_CBO_0_PERFEVTSEL0, 0x0ULL);
-    ret += HPMread(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, &data);
-    ret += HPMwrite(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, 0x0ULL);
-    ret += HPMread(cpu_id, MSR_DEV, MSR_UNC_CBO_0_PERFEVTSEL0, &data);
+    
     if (cpuid_info.model == HASWELL_EP)
     {
         haswell_cbox_setup = hasep_cbox_setup;
-    }
-    else if ((ret == 0) && (data == 0x0ULL))
-    {
-        haswell_cbox_setup = has_cbox_setup;
+        has_did_cbox_check = 1;
+    }
+    else if ((cpuid_info.model == HASWELL || cpuid_info.model == HASWELL_M1 || cpuid_info.model == HASWELL_M1) &&
+             socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id &&
+             has_did_cbox_check == 0)
+    {
+        ret = HPMwrite(cpu_id, MSR_DEV, MSR_UNC_CBO_0_PERFEVTSEL0, 0x0ULL);
+        ret += HPMread(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, &data);
+        ret += HPMwrite(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, 0x0ULL);
+        ret += HPMread(cpu_id, MSR_DEV, MSR_UNC_CBO_0_PERFEVTSEL0, &data);
+        if ((ret == 0) && (data == 0x0ULL))
+            haswell_cbox_setup = has_cbox_setup;
+        else
+            haswell_cbox_setup = has_cbox_nosetup;
+        has_did_cbox_check = 1;
     }
     return 0;
 }
@@ -914,12 +928,12 @@ int hasep_qbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event, PciDe
 }
 
 #define HASEP_FREEZE_UNCORE \
-    if (haveLock && eventSet->regTypeMask & ~(0xFULL) && cpuid_info.model == HASWELL_EP) \
+    if (haveLock && MEASURE_UNCORE(eventSet) && cpuid_info.model == HASWELL_EP) \
     { \
         VERBOSEPRINTREG(cpu_id, MSR_UNC_V3_U_PMON_GLOBAL_CTL, LLU_CAST (1ULL<<31), FREEZE_UNCORE); \
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_V3_U_PMON_GLOBAL_CTL, (1ULL<<31))); \
     } \
-    else if (haveLock && eventSet->regTypeMask & ~(0xFULL)) \
+    else if (haveLock && MEASURE_UNCORE(eventSet) && haswell_cbox_setup == has_cbox_setup) \
     { \
         uint64_t data = 0x0ULL; \
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, &data)); \
@@ -932,12 +946,12 @@ int hasep_qbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event, PciDe
     } \
 
 #define HASEP_UNFREEZE_UNCORE \
-    if (haveLock && eventSet->regTypeMask & ~(0xFULL) && cpuid_info.model == HASWELL_EP) \
+    if (haveLock && MEASURE_UNCORE(eventSet) && cpuid_info.model == HASWELL_EP) \
     { \
         VERBOSEPRINTREG(cpu_id, MSR_UNC_V3_U_PMON_GLOBAL_CTL, LLU_CAST (1ULL<<29), UNFREEZE_UNCORE); \
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_V3_U_PMON_GLOBAL_CTL, (1ULL<<29))); \
     } \
-    else if (haveLock && eventSet->regTypeMask & ~(0xFULL)) \
+    else if (haveLock && MEASURE_UNCORE(eventSet) && haswell_cbox_setup == has_cbox_setup) \
     { \
         uint64_t data = 0x0ULL; \
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, &data)); \
@@ -947,7 +961,7 @@ int hasep_qbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event, PciDe
     }
 
 #define HASEP_UNFREEZE_UNCORE_AND_RESET_CTR \
-    if (haveLock && (eventSet->regTypeMask & ~(0xFULL))) \
+    if (haveLock && MEASURE_UNCORE(eventSet)) \
     { \
         for (int i=0;i < eventSet->numberOfEvents;i++) \
         { \
@@ -981,7 +995,7 @@ int hasep_qbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event, PciDe
     }
 
 #define HASEP_FREEZE_UNCORE_AND_RESET_CTL \
-    if (haveLock && (eventSet->regTypeMask & ~(REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)|REG_TYPE_MASK(THERMAL)|REG_TYPE_MASK(POWER)))) \
+    if (haveLock && MEASURE_UNCORE(eventSet)) \
     { \
         HASEP_FREEZE_UNCORE; \
         for (int i=0;i < eventSet->numberOfEvents;i++) \
@@ -1030,7 +1044,7 @@ int perfmon_setupCounterThread_haswell(
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_AND_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
@@ -1042,7 +1056,7 @@ int perfmon_setupCounterThread_haswell(
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -1166,7 +1180,7 @@ int perfmon_setupCounterThread_haswell(
     }
     for (int i=UNCORE;i<NUM_UNITS;i++)
     {
-        if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(i))) && box_map[i].ctrlRegister != 0x0)
+        if (haveLock && TESTTYPE(eventSet, i) && box_map[i].ctrlRegister != 0x0)
         {
             VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL, CLEAR_UNCORE_BOX_CTRL);
             HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
@@ -1200,7 +1214,7 @@ int perfmon_startCountersThread_haswell(int thread_id, PerfmonEventSet* eventSet
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1263,8 +1277,8 @@ int perfmon_startCountersThread_haswell(int thread_id, PerfmonEventSet* eventSet
     }
 
     HASEP_UNFREEZE_UNCORE_AND_RESET_CTR;
-
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, LLU_CAST (1ULL<<63)|(1ULL<<62)|flags, CLEAR_PMC_AND_FIXED_OVERFLOW)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, (1ULL<<63)|(1ULL<<62)|flags));
@@ -1316,17 +1330,20 @@ int has_uncore_read(int cpu_id, RegisterIndex index, PerfmonEvent *event,
         uint64_t ovf_values = 0x0ULL;
         int global_offset = box_map[type].ovflOffset;
         int test_local = 0;
+        uint32_t global_status_reg = MSR_UNC_V3_U_PMON_GLOBAL_STATUS;
+        if (cpuid_info.model == HASWELL)
+            global_status_reg = MSR_UNC_PERF_GLOBAL_STATUS;
         if (global_offset != -1)
         {
             CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV,
-                                           MSR_UNC_V3_U_PMON_GLOBAL_STATUS,
+                                           global_status_reg,
                                            &ovf_values));
-            VERBOSEPRINTREG(cpu_id, MSR_UNC_V3_U_PMON_GLOBAL_STATUS, LLU_CAST ovf_values, READ_GLOBAL_OVFL);
+            VERBOSEPRINTREG(cpu_id, global_status_reg, LLU_CAST ovf_values, READ_GLOBAL_OVFL);
             if (ovf_values & (1<<global_offset))
             {
-                VERBOSEPRINTREG(cpu_id, MSR_UNC_V3_U_PMON_GLOBAL_STATUS, LLU_CAST (1<<global_offset), CLEAR_GLOBAL_OVFL);
+                VERBOSEPRINTREG(cpu_id, global_status_reg, LLU_CAST (1<<global_offset), CLEAR_GLOBAL_OVFL);
                 CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV,
-                                                 MSR_UNC_V3_U_PMON_GLOBAL_STATUS,
+                                                 global_status_reg,
                                                  (1<<global_offset)));
                 test_local = 1;
             }
@@ -1394,7 +1411,7 @@ int perfmon_stopCountersThread_haswell(int thread_id, PerfmonEventSet* eventSet)
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_AND_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
@@ -1408,7 +1425,7 @@ int perfmon_stopCountersThread_haswell(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1437,7 +1454,7 @@ int perfmon_stopCountersThread_haswell(int thread_id, PerfmonEventSet* eventSet)
                     break;
 
                 case POWER:
-                    if (haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(POWER)))
+                    if (haveLock)
                     {
                         CHECK_POWER_READ_ERROR(power_read(cpu_id, counter1, (uint32_t*)&counter_result));
                         VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, STOP_POWER)
@@ -1573,7 +1590,7 @@ int perfmon_stopCountersThread_haswell(int thread_id, PerfmonEventSet* eventSet)
                                 counter_result = 0;
                                 break;
                         }
-
+                        
                     }
                     else if ((eventSet->events[i].event.eventId == 0x01) ||
                              (eventSet->events[i].event.eventId == 0x02))
@@ -1609,7 +1626,7 @@ int perfmon_readCountersThread_haswell(int thread_id, PerfmonEventSet* eventSet)
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, &flags));
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, SAFE_PMC_FLAGS)
@@ -1625,7 +1642,7 @@ int perfmon_readCountersThread_haswell(int thread_id, PerfmonEventSet* eventSet)
         {
             counter_result= 0x0ULL;
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1654,7 +1671,7 @@ int perfmon_readCountersThread_haswell(int thread_id, PerfmonEventSet* eventSet)
                     break;
 
                 case POWER:
-                    if (haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(POWER)))
+                    if (haveLock)
                     {
                         CHECK_POWER_READ_ERROR(power_read(cpu_id, counter1, (uint32_t*)&counter_result));
                         VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, READ_POWER)
@@ -1789,7 +1806,7 @@ int perfmon_readCountersThread_haswell(int thread_id, PerfmonEventSet* eventSet)
                                 counter_result = 0;
                                 break;
                         }
-
+                        
                     }
                     else if ((eventSet->events[i].event.eventId == 0x01) ||
                              (eventSet->events[i].event.eventId == 0x02))
@@ -1807,7 +1824,7 @@ int perfmon_readCountersThread_haswell(int thread_id, PerfmonEventSet* eventSet)
     }
 
     HASEP_UNFREEZE_UNCORE;
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         // Erratum HSW143
         //VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, RESTORE_PMC_FLAGS_WORKAROUND)
@@ -1839,7 +1856,7 @@ int perfmon_finalizeCountersThread_haswell(int thread_id, PerfmonEventSet* event
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -1885,15 +1902,19 @@ int perfmon_finalizeCountersThread_haswell(int thread_id, PerfmonEventSet* event
             }
             VERBOSEPRINTPCIREG(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL, CLEAR_CTR);
             CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL));
+            if ((type >= SBOX0) && (type <= SBOX3))
+                CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL));
             if (counter_map[index].counterRegister2 != 0x0)
             {
                 VERBOSEPRINTPCIREG(cpu_id, dev, counter_map[index].counterRegister2, 0x0ULL, CLEAR_CTR);
                 CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister2, 0x0ULL));
+                if ((type >= SBOX0) && (type <= SBOX3))
+                    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister2, 0x0ULL));
             }
         }
         eventSet->events[i].threadCounter[thread_id].init = FALSE;
     }
-    if (haveLock && eventSet->regTypeMask & ~(0xFULL))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_UNC_V3_U_PMON_GLOBAL_STATUS, LLU_CAST ovf_values_uncore, CLEAR_UNCORE_OVF)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_V3_U_PMON_GLOBAL_STATUS, ovf_values_uncore));
@@ -1901,16 +1922,18 @@ int perfmon_finalizeCountersThread_haswell(int thread_id, PerfmonEventSet* event
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_V3_U_PMON_GLOBAL_CTL, 0x0ULL));
         for (int i=UNCORE;i<NUM_UNITS;i++)
         {
-            if ((eventSet->regTypeMask & (REG_TYPE_MASK(i))) && box_map[i].ctrlRegister != 0x0)
+            if (TESTTYPE(eventSet, i) && box_map[i].ctrlRegister != 0x0)
             {
                 VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL, CLEAR_UNCORE_BOX_CTRL);
                 HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
-                if (box_map[i].filterRegister1)
+                if ((i >= SBOX0) && (i <= SBOX3))
+                    HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
+                if (box_map[i].filterRegister1 != 0x0)
                 {
                     VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].filterRegister1, 0x0ULL, CLEAR_FILTER);
                     HPMwrite(cpu_id, box_map[i].device, box_map[i].filterRegister1, 0x0ULL);
                 }
-                if (box_map[i].filterRegister2)
+                if (box_map[i].filterRegister2 != 0x0)
                 {
                     VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].filterRegister2, 0x0ULL, CLEAR_FILTER);
                     HPMwrite(cpu_id, box_map[i].device, box_map[i].filterRegister2, 0x0ULL);
@@ -1919,7 +1942,7 @@ int perfmon_finalizeCountersThread_haswell(int thread_id, PerfmonEventSet* event
         }
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, LLU_CAST ovf_values_core, CLEAR_GLOBAL_OVF)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core));
diff --git a/src/includes/perfmon_haswellEP_counters.h b/src/includes/perfmon_haswellEP_counters.h
index af4d524..67773ef 100644
--- a/src/includes/perfmon_haswellEP_counters.h
+++ b/src/includes/perfmon_haswellEP_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Counter Header File of perfmon module for Intel Haswell EP/EN/EX.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -29,8 +29,8 @@
  * =======================================================================================
  */
 
-#define NUM_COUNTERS_HASWELL_EP 187
-#define NUM_COUNTERS_CORE_HASWELL_EP 8
+#define NUM_COUNTERS_HASWELL_EP 191
+#define NUM_COUNTERS_CORE_HASWELL_EP 12
 #define NUM_COUNTERS_UNCORE_HASWELL_EP 111
 
 #define HAS_EP_VALID_OPTIONS_FIXED EVENT_OPTION_ANYTHREAD_MASK|EVENT_OPTION_COUNT_KERNEL_MASK
@@ -59,188 +59,193 @@ static RegisterMap haswellEP_counter_map[NUM_COUNTERS_HASWELL_EP] = {
     {"PMC1", PMC4, PMC, MSR_PERFEVTSEL1, MSR_PMC1, 0, 0, HAS_EP_VALID_OPTIONS_PMC},
     {"PMC2", PMC5, PMC, MSR_PERFEVTSEL2, MSR_PMC2, 0, 0, HAS_EP_VALID_OPTIONS_PMC|EVENT_OPTION_IN_TRANS_ABORT_MASK},
     {"PMC3", PMC6, PMC, MSR_PERFEVTSEL3, MSR_PMC3, 0, 0, HAS_EP_VALID_OPTIONS_PMC},
+    /* Additional PMC Counters: 4 48bit wide if HyperThreading is disabled*/
+    {"PMC4", PMC7, PMC, MSR_PERFEVTSEL4, MSR_PMC4, 0, 0, HAS_EP_VALID_OPTIONS_PMC},
+    {"PMC5", PMC8, PMC, MSR_PERFEVTSEL5, MSR_PMC5, 0, 0, HAS_EP_VALID_OPTIONS_PMC},
+    {"PMC6", PMC9, PMC, MSR_PERFEVTSEL6, MSR_PMC6, 0, 0, HAS_EP_VALID_OPTIONS_PMC|EVENT_OPTION_IN_TRANS_ABORT_MASK},
+    {"PMC7", PMC10, PMC, MSR_PERFEVTSEL7, MSR_PMC7, 0, 0, HAS_EP_VALID_OPTIONS_PMC},
     /* Temperature Sensor*/
-    {"TMP0", PMC7, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"TMP0", PMC11, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
     /* RAPL counters */
-    {"PWR0", PMC8, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR1", PMC9, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR2", PMC10, POWER, 0, MSR_PP1_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR3", PMC11, POWER, 0, MSR_DRAM_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
-    {"CBOX0C0", PMC12, CBOX0, MSR_UNC_V3_C0_PMON_CTL0, MSR_UNC_V3_C0_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX0C1", PMC13, CBOX0, MSR_UNC_V3_C0_PMON_CTL1, MSR_UNC_V3_C0_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX0C2", PMC14, CBOX0, MSR_UNC_V3_C0_PMON_CTL2, MSR_UNC_V3_C0_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX0C3", PMC15, CBOX0, MSR_UNC_V3_C0_PMON_CTL3, MSR_UNC_V3_C0_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX1C0", PMC16, CBOX1, MSR_UNC_V3_C1_PMON_CTL0, MSR_UNC_V3_C1_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX1C1", PMC17, CBOX1, MSR_UNC_V3_C1_PMON_CTL1, MSR_UNC_V3_C1_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX1C2", PMC18, CBOX1, MSR_UNC_V3_C1_PMON_CTL2, MSR_UNC_V3_C1_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX1C3", PMC19, CBOX1, MSR_UNC_V3_C1_PMON_CTL3, MSR_UNC_V3_C1_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX2C0", PMC20, CBOX2, MSR_UNC_V3_C2_PMON_CTL0, MSR_UNC_V3_C2_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX2C1", PMC21, CBOX2, MSR_UNC_V3_C2_PMON_CTL1, MSR_UNC_V3_C2_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX2C2", PMC22, CBOX2, MSR_UNC_V3_C2_PMON_CTL2, MSR_UNC_V3_C2_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX2C3", PMC23, CBOX2, MSR_UNC_V3_C2_PMON_CTL3, MSR_UNC_V3_C2_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX3C0", PMC24, CBOX3, MSR_UNC_V3_C3_PMON_CTL0, MSR_UNC_V3_C3_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX3C1", PMC25, CBOX3, MSR_UNC_V3_C3_PMON_CTL1, MSR_UNC_V3_C3_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX3C2", PMC26, CBOX3, MSR_UNC_V3_C3_PMON_CTL2, MSR_UNC_V3_C3_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX3C3", PMC27, CBOX3, MSR_UNC_V3_C3_PMON_CTL3, MSR_UNC_V3_C3_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX4C0", PMC28, CBOX4, MSR_UNC_V3_C4_PMON_CTL0, MSR_UNC_V3_C4_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX4C1", PMC29, CBOX4, MSR_UNC_V3_C4_PMON_CTL1, MSR_UNC_V3_C4_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX4C2", PMC30, CBOX4, MSR_UNC_V3_C4_PMON_CTL2, MSR_UNC_V3_C4_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX4C3", PMC31, CBOX4, MSR_UNC_V3_C4_PMON_CTL3, MSR_UNC_V3_C4_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX5C0", PMC32, CBOX5, MSR_UNC_V3_C5_PMON_CTL0, MSR_UNC_V3_C5_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX5C1", PMC33, CBOX5, MSR_UNC_V3_C5_PMON_CTL1, MSR_UNC_V3_C5_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX5C2", PMC34, CBOX5, MSR_UNC_V3_C5_PMON_CTL2, MSR_UNC_V3_C5_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX5C3", PMC35, CBOX5, MSR_UNC_V3_C5_PMON_CTL3, MSR_UNC_V3_C5_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX6C0", PMC36, CBOX6, MSR_UNC_V3_C6_PMON_CTL0, MSR_UNC_V3_C6_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX6C1", PMC37, CBOX6, MSR_UNC_V3_C6_PMON_CTL1, MSR_UNC_V3_C6_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX6C2", PMC38, CBOX6, MSR_UNC_V3_C6_PMON_CTL2, MSR_UNC_V3_C6_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX6C3", PMC39, CBOX6, MSR_UNC_V3_C6_PMON_CTL3, MSR_UNC_V3_C6_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX7C0", PMC40, CBOX7, MSR_UNC_V3_C7_PMON_CTL0, MSR_UNC_V3_C7_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX7C1", PMC41, CBOX7, MSR_UNC_V3_C7_PMON_CTL1, MSR_UNC_V3_C7_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX7C2", PMC42, CBOX7, MSR_UNC_V3_C7_PMON_CTL2, MSR_UNC_V3_C7_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX7C3", PMC43, CBOX7, MSR_UNC_V3_C7_PMON_CTL3, MSR_UNC_V3_C7_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX8C0", PMC44, CBOX8, MSR_UNC_V3_C8_PMON_CTL0, MSR_UNC_V3_C8_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX8C1", PMC45, CBOX8, MSR_UNC_V3_C8_PMON_CTL1, MSR_UNC_V3_C8_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX8C2", PMC46, CBOX8, MSR_UNC_V3_C8_PMON_CTL2, MSR_UNC_V3_C8_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX8C3", PMC47, CBOX8, MSR_UNC_V3_C8_PMON_CTL3, MSR_UNC_V3_C8_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX9C0", PMC48, CBOX9, MSR_UNC_V3_C9_PMON_CTL0, MSR_UNC_V3_C9_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX9C1", PMC49, CBOX9, MSR_UNC_V3_C9_PMON_CTL1, MSR_UNC_V3_C9_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX9C2", PMC50, CBOX9, MSR_UNC_V3_C9_PMON_CTL2, MSR_UNC_V3_C9_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX9C3", PMC51, CBOX9, MSR_UNC_V3_C9_PMON_CTL3, MSR_UNC_V3_C9_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX10C0", PMC52, CBOX10, MSR_UNC_V3_C10_PMON_CTL0, MSR_UNC_V3_C10_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX10C1", PMC53, CBOX10, MSR_UNC_V3_C10_PMON_CTL1, MSR_UNC_V3_C10_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX10C2", PMC54, CBOX10, MSR_UNC_V3_C10_PMON_CTL2, MSR_UNC_V3_C10_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX10C3", PMC55, CBOX10, MSR_UNC_V3_C10_PMON_CTL3, MSR_UNC_V3_C10_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX11C0", PMC56, CBOX11, MSR_UNC_V3_C11_PMON_CTL0, MSR_UNC_V3_C11_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX11C1", PMC57, CBOX11, MSR_UNC_V3_C11_PMON_CTL1, MSR_UNC_V3_C11_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX11C2", PMC58, CBOX11, MSR_UNC_V3_C11_PMON_CTL2, MSR_UNC_V3_C11_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX11C3", PMC59, CBOX11, MSR_UNC_V3_C11_PMON_CTL3, MSR_UNC_V3_C11_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX12C0", PMC60, CBOX12, MSR_UNC_V3_C12_PMON_CTL0, MSR_UNC_V3_C12_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX12C1", PMC61, CBOX12, MSR_UNC_V3_C12_PMON_CTL1, MSR_UNC_V3_C12_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX12C2", PMC62, CBOX12, MSR_UNC_V3_C12_PMON_CTL2, MSR_UNC_V3_C12_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX12C3", PMC63, CBOX12, MSR_UNC_V3_C12_PMON_CTL3, MSR_UNC_V3_C12_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX13C0", PMC64, CBOX13, MSR_UNC_V3_C13_PMON_CTL0, MSR_UNC_V3_C13_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX13C1", PMC65, CBOX13, MSR_UNC_V3_C13_PMON_CTL1, MSR_UNC_V3_C13_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX13C2", PMC66, CBOX13, MSR_UNC_V3_C13_PMON_CTL2, MSR_UNC_V3_C13_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX13C3", PMC67, CBOX13, MSR_UNC_V3_C13_PMON_CTL3, MSR_UNC_V3_C13_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX14C0", PMC68, CBOX14, MSR_UNC_V3_C14_PMON_CTL0, MSR_UNC_V3_C14_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX14C1", PMC69, CBOX14, MSR_UNC_V3_C14_PMON_CTL1, MSR_UNC_V3_C14_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX14C2", PMC70, CBOX14, MSR_UNC_V3_C14_PMON_CTL2, MSR_UNC_V3_C14_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX14C3", PMC71, CBOX14, MSR_UNC_V3_C14_PMON_CTL3, MSR_UNC_V3_C14_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX15C0", PMC72, CBOX15, MSR_UNC_V3_C15_PMON_CTL0, MSR_UNC_V3_C15_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX15C1", PMC73, CBOX15, MSR_UNC_V3_C15_PMON_CTL1, MSR_UNC_V3_C15_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX15C2", PMC74, CBOX15, MSR_UNC_V3_C15_PMON_CTL2, MSR_UNC_V3_C15_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX15C3", PMC75, CBOX15, MSR_UNC_V3_C15_PMON_CTL3, MSR_UNC_V3_C15_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX16C0", PMC76, CBOX16, MSR_UNC_V3_C16_PMON_CTL0, MSR_UNC_V3_C16_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX16C1", PMC77, CBOX16, MSR_UNC_V3_C16_PMON_CTL1, MSR_UNC_V3_C16_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX16C2", PMC78, CBOX16, MSR_UNC_V3_C16_PMON_CTL2, MSR_UNC_V3_C16_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX16C3", PMC79, CBOX16, MSR_UNC_V3_C16_PMON_CTL3, MSR_UNC_V3_C16_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX17C0", PMC80, CBOX17, MSR_UNC_V3_C17_PMON_CTL0, MSR_UNC_V3_C17_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX17C1", PMC81, CBOX17, MSR_UNC_V3_C17_PMON_CTL1, MSR_UNC_V3_C17_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX17C2", PMC82, CBOX17, MSR_UNC_V3_C17_PMON_CTL2, MSR_UNC_V3_C17_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"CBOX17C3", PMC83, CBOX17, MSR_UNC_V3_C17_PMON_CTL3, MSR_UNC_V3_C17_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
-    {"UBOX0", PMC84, UBOX, MSR_UNC_V3_U_PMON_CTL0, MSR_UNC_V3_U_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_UBOX},
-    {"UBOX1", PMC85, UBOX, MSR_UNC_V3_U_PMON_CTL1, MSR_UNC_V3_U_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_UBOX},
-    {"UBOXFIX", PMC86, UBOXFIX, MSR_UNC_V3_U_UCLK_FIXED_CTL, MSR_UNC_V3_U_UCLK_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"SBOX0C0", PMC87, SBOX0, MSR_UNC_V3_S0_PMON_CTL_0, MSR_UNC_V3_S0_PMON_CTR_0, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"SBOX0C1", PMC88, SBOX0, MSR_UNC_V3_S0_PMON_CTL_1, MSR_UNC_V3_S0_PMON_CTR_1, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"SBOX0C2", PMC89, SBOX0, MSR_UNC_V3_S0_PMON_CTL_2, MSR_UNC_V3_S0_PMON_CTR_2, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"SBOX0C3", PMC90, SBOX0, MSR_UNC_V3_S0_PMON_CTL_3, MSR_UNC_V3_S0_PMON_CTR_3, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"SBOX1C0", PMC91, SBOX1, MSR_UNC_V3_S1_PMON_CTL_0, MSR_UNC_V3_S1_PMON_CTR_0, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"SBOX1C1", PMC92, SBOX1, MSR_UNC_V3_S1_PMON_CTL_1, MSR_UNC_V3_S1_PMON_CTR_1, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"SBOX1C2", PMC93, SBOX1, MSR_UNC_V3_S1_PMON_CTL_2, MSR_UNC_V3_S1_PMON_CTR_2, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"SBOX1C3", PMC94, SBOX1, MSR_UNC_V3_S1_PMON_CTL_3, MSR_UNC_V3_S1_PMON_CTR_3, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"SBOX2C0", PMC95, SBOX2, MSR_UNC_V3_S2_PMON_CTL_0, MSR_UNC_V3_S2_PMON_CTR_0, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"SBOX2C1", PMC96, SBOX2, MSR_UNC_V3_S2_PMON_CTL_1, MSR_UNC_V3_S2_PMON_CTR_1, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"SBOX2C2", PMC97, SBOX2, MSR_UNC_V3_S2_PMON_CTL_2, MSR_UNC_V3_S2_PMON_CTR_2, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"SBOX2C3", PMC98, SBOX2, MSR_UNC_V3_S2_PMON_CTL_3, MSR_UNC_V3_S2_PMON_CTR_3, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"SBOX3C0", PMC99, SBOX3, MSR_UNC_V3_S3_PMON_CTL_0, MSR_UNC_V3_S3_PMON_CTR_0, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"SBOX3C1", PMC100, SBOX3, MSR_UNC_V3_S3_PMON_CTL_1, MSR_UNC_V3_S3_PMON_CTR_1, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"SBOX3C2", PMC101, SBOX3, MSR_UNC_V3_S3_PMON_CTL_2, MSR_UNC_V3_S3_PMON_CTR_2, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"SBOX3C3", PMC102, SBOX3, MSR_UNC_V3_S3_PMON_CTL_3, MSR_UNC_V3_S3_PMON_CTR_3, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
-    {"WBOX0", PMC103, WBOX, MSR_UNC_V3_PCU_PMON_CTL0, MSR_UNC_V3_PCU_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_WBOX},
-    {"WBOX1", PMC104, WBOX, MSR_UNC_V3_PCU_PMON_CTL1, MSR_UNC_V3_PCU_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_WBOX},
-    {"WBOX2", PMC105, WBOX, MSR_UNC_V3_PCU_PMON_CTL2, MSR_UNC_V3_PCU_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_WBOX},
-    {"WBOX3", PMC106, WBOX, MSR_UNC_V3_PCU_PMON_CTL3, MSR_UNC_V3_PCU_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_WBOX},
-    {"WBOX0FIX", PMC107, WBOX0FIX, 0, MSR_UNC_V3_PCU_CC6_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"WBOX1FIX", PMC108, WBOX0FIX, 0, MSR_UNC_V3_PCU_CC3_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"WBOX2FIX", PMC109, WBOX0FIX, 0, MSR_UNC_V3_PCU_PC2_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"WBOX3FIX", PMC110, WBOX0FIX, 0, MSR_UNC_V3_PCU_PC3_CTR , 0, 0, EVENT_OPTION_NONE_MASK},
-    {"BBOX0C0", PMC111, BBOX0, PCI_UNC_HA_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_0, HAS_EP_VALID_OPTIONS_BBOX},
-    {"BBOX0C1", PMC112, BBOX0, PCI_UNC_HA_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_0, HAS_EP_VALID_OPTIONS_BBOX},
-    {"BBOX0C2", PMC113, BBOX0, PCI_UNC_HA_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_0, HAS_EP_VALID_OPTIONS_BBOX},
-    {"BBOX0C3", PMC114, BBOX0, PCI_UNC_HA_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_0, HAS_EP_VALID_OPTIONS_BBOX},
-    {"BBOX1C0", PMC115, BBOX1, PCI_UNC_HA_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_1, HAS_EP_VALID_OPTIONS_BBOX},
-    {"BBOX1C1", PMC116, BBOX1, PCI_UNC_HA_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_1, HAS_EP_VALID_OPTIONS_BBOX},
-    {"BBOX1C2", PMC117, BBOX1, PCI_UNC_HA_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_1, HAS_EP_VALID_OPTIONS_BBOX},
-    {"BBOX1C3", PMC118, BBOX1, PCI_UNC_HA_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_1, HAS_EP_VALID_OPTIONS_BBOX},
-    {"MBOX0C0", PMC119, MBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX0C1", PMC120, MBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX0C2", PMC121, MBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX0C3", PMC122, MBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX0FIX", PMC123, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
-    {"MBOX1C0", PMC124, MBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX1C1", PMC125, MBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX1C2", PMC126, MBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX1C3", PMC127, MBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX0FIX", PMC128, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
-    {"MBOX2C0", PMC129, MBOX2, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX2C1", PMC130, MBOX2, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX2C2", PMC131, MBOX2, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX2C3", PMC132, MBOX2, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX0FIX", PMC133, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
-    {"MBOX3C0", PMC134, MBOX3, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX3C1", PMC135, MBOX3, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX3C2", PMC136, MBOX3, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX3C3", PMC137, MBOX3, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX0FIX", PMC138, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
-    {"MBOX4C0", PMC139, MBOX4, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX4C1", PMC140, MBOX4, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX4C2", PMC141, MBOX4, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX4C3", PMC142, MBOX4, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX0FIX", PMC43, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
-    {"MBOX5C0", PMC144, MBOX5, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX5C1", PMC145, MBOX5, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX5C2", PMC146, MBOX5, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX5C3", PMC147, MBOX5, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX0FIX", PMC148, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
-    {"MBOX6C0", PMC149, MBOX6, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX6C1", PMC150, MBOX6, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX6C2", PMC151, MBOX6, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX6C3", PMC152, MBOX6, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX0FIX", PMC153, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
-    {"MBOX7C0", PMC154, MBOX7, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX7C1", PMC155, MBOX7, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX7C2", PMC156, MBOX7, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX7C3", PMC157, MBOX7, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
-    {"MBOX0FIX", PMC158, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
-    {"IBOX0C0", PMC159, IBOX0, PCI_UNC_IRP0_PMON_CTL_0, PCI_UNC_IRP0_PMON_CTR_0, 0, PCI_IRP_DEVICE, HAS_EP_VALID_OPTIONS_IBOX},
-    {"IBOX0C1", PMC160, IBOX0, PCI_UNC_IRP0_PMON_CTL_1, PCI_UNC_IRP0_PMON_CTR_1, 0, PCI_IRP_DEVICE, HAS_EP_VALID_OPTIONS_IBOX},
-    {"IBOX1C0", PMC161, IBOX1, PCI_UNC_IRP1_PMON_CTL_0, PCI_UNC_IRP1_PMON_CTR_0, 0, PCI_IRP_DEVICE, HAS_EP_VALID_OPTIONS_IBOX},
-    {"IBOX1C1", PMC162, IBOX1, PCI_UNC_IRP1_PMON_CTL_1, PCI_UNC_IRP1_PMON_CTR_1, 0, PCI_IRP_DEVICE, HAS_EP_VALID_OPTIONS_IBOX},
-    {"PBOX0", PMC163, PBOX, PCI_UNC_R2PCIE_PMON_CTL_0, PCI_UNC_R2PCIE_PMON_CTR_0_A, PCI_UNC_R2PCIE_PMON_CTR_0_B, PCI_R2PCIE_DEVICE, HAS_EP_VALID_OPTIONS_PBOX},
-    {"PBOX1", PMC164, PBOX, PCI_UNC_R2PCIE_PMON_CTL_1, PCI_UNC_R2PCIE_PMON_CTR_1_A, PCI_UNC_R2PCIE_PMON_CTR_1_B, PCI_R2PCIE_DEVICE, HAS_EP_VALID_OPTIONS_PBOX},
-    {"PBOX2", PMC165, PBOX, PCI_UNC_R2PCIE_PMON_CTL_2, PCI_UNC_R2PCIE_PMON_CTR_2_A, PCI_UNC_R2PCIE_PMON_CTR_2_B, PCI_R2PCIE_DEVICE, HAS_EP_VALID_OPTIONS_PBOX},
-    {"PBOX3", PMC166, PBOX, PCI_UNC_R2PCIE_PMON_CTL_3, PCI_UNC_R2PCIE_PMON_CTR_3_A, PCI_UNC_R2PCIE_PMON_CTR_3_B, PCI_R2PCIE_DEVICE, HAS_EP_VALID_OPTIONS_PBOX},
-    {"RBOX0C0", PMC167, RBOX0, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_0, HAS_EP_VALID_OPTIONS_RBOX},
-    {"RBOX0C1", PMC168, RBOX0, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_0, HAS_EP_VALID_OPTIONS_RBOX},
-    {"RBOX0C2", PMC169, RBOX0, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_0, HAS_EP_VALID_OPTIONS_RBOX},
-    {"RBOX1C0", PMC170, RBOX1, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_1, HAS_EP_VALID_OPTIONS_RBOX},
-    {"RBOX1C1", PMC171, RBOX1, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_1, HAS_EP_VALID_OPTIONS_RBOX},
-    {"RBOX1C2", PMC172, RBOX1, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_1, HAS_EP_VALID_OPTIONS_RBOX},
-    {"QBOX0C0", PMC173, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_0, PCI_UNC_V3_QPI_PMON_CTR_0_A, PCI_UNC_V3_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_0, HAS_EP_VALID_OPTIONS_QBOX},
-    {"QBOX0C1", PMC174, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_1, PCI_UNC_V3_QPI_PMON_CTR_1_A, PCI_UNC_V3_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_0, HAS_EP_VALID_OPTIONS_QBOX},
-    {"QBOX0C2", PMC175, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_2, PCI_UNC_V3_QPI_PMON_CTR_2_A, PCI_UNC_V3_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_0, HAS_EP_VALID_OPTIONS_QBOX},
-    {"QBOX0C3", PMC176, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_3, PCI_UNC_V3_QPI_PMON_CTR_3_A, PCI_UNC_V3_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_0, HAS_EP_VALID_OPTIONS_QBOX},
-    {"QBOX1C0", PMC177, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_0, PCI_UNC_V3_QPI_PMON_CTR_0_A, PCI_UNC_V3_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_1, HAS_EP_VALID_OPTIONS_QBOX},
-    {"QBOX1C1", PMC178, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_1, PCI_UNC_V3_QPI_PMON_CTR_1_A, PCI_UNC_V3_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_1, HAS_EP_VALID_OPTIONS_QBOX},
-    {"QBOX1C2", PMC179, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_2, PCI_UNC_V3_QPI_PMON_CTR_2_A, PCI_UNC_V3_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_1, HAS_EP_VALID_OPTIONS_QBOX},
-    {"QBOX1C3", PMC180, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_3, PCI_UNC_V3_QPI_PMON_CTR_3_A, PCI_UNC_V3_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_1, HAS_EP_VALID_OPTIONS_QBOX},
-    {"QBOX0FIX0", PMC181, QBOX0FIX, 0x0, PCI_UNC_V3_QPI_RATE_STATUS, 0x0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
-    {"QBOX0FIX1", PMC182, QBOX0FIX, 0x0, PCI_UNC_V3_QPI_LINK_IDLE, 0x0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
-    {"QBOX0FIX2", PMC183, QBOX0FIX, 0x0, PCI_UNC_V3_QPI_LINK_LLR, 0x0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
-    {"QBOX1FIX0", PMC184, QBOX1FIX, 0x0, PCI_UNC_V3_QPI_RATE_STATUS, 0x0, PCI_QPI_MISC_DEVICE_PORT_1, EVENT_OPTION_NONE_MASK},
-    {"QBOX1FIX1", PMC185, QBOX1FIX, 0x0, PCI_UNC_V3_QPI_LINK_IDLE, 0x0, PCI_QPI_MISC_DEVICE_PORT_1, EVENT_OPTION_NONE_MASK},
-    {"QBOX1FIX2", PMC186, QBOX1FIX, 0x0, PCI_UNC_V3_QPI_LINK_LLR, 0x0, PCI_QPI_MISC_DEVICE_PORT_1, EVENT_OPTION_NONE_MASK},
+    {"PWR0", PMC12, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR1", PMC13, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR2", PMC14, POWER, 0, MSR_PP1_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR3", PMC15, POWER, 0, MSR_DRAM_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
+    {"CBOX0C0", PMC16, CBOX0, MSR_UNC_V3_C0_PMON_CTL0, MSR_UNC_V3_C0_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX0C1", PMC17, CBOX0, MSR_UNC_V3_C0_PMON_CTL1, MSR_UNC_V3_C0_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX0C2", PMC18, CBOX0, MSR_UNC_V3_C0_PMON_CTL2, MSR_UNC_V3_C0_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX0C3", PMC19, CBOX0, MSR_UNC_V3_C0_PMON_CTL3, MSR_UNC_V3_C0_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX1C0", PMC20, CBOX1, MSR_UNC_V3_C1_PMON_CTL0, MSR_UNC_V3_C1_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX1C1", PMC21, CBOX1, MSR_UNC_V3_C1_PMON_CTL1, MSR_UNC_V3_C1_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX1C2", PMC22, CBOX1, MSR_UNC_V3_C1_PMON_CTL2, MSR_UNC_V3_C1_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX1C3", PMC23, CBOX1, MSR_UNC_V3_C1_PMON_CTL3, MSR_UNC_V3_C1_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX2C0", PMC24, CBOX2, MSR_UNC_V3_C2_PMON_CTL0, MSR_UNC_V3_C2_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX2C1", PMC25, CBOX2, MSR_UNC_V3_C2_PMON_CTL1, MSR_UNC_V3_C2_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX2C2", PMC26, CBOX2, MSR_UNC_V3_C2_PMON_CTL2, MSR_UNC_V3_C2_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX2C3", PMC27, CBOX2, MSR_UNC_V3_C2_PMON_CTL3, MSR_UNC_V3_C2_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX3C0", PMC28, CBOX3, MSR_UNC_V3_C3_PMON_CTL0, MSR_UNC_V3_C3_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX3C1", PMC29, CBOX3, MSR_UNC_V3_C3_PMON_CTL1, MSR_UNC_V3_C3_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX3C2", PMC30, CBOX3, MSR_UNC_V3_C3_PMON_CTL2, MSR_UNC_V3_C3_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX3C3", PMC31, CBOX3, MSR_UNC_V3_C3_PMON_CTL3, MSR_UNC_V3_C3_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX4C0", PMC32, CBOX4, MSR_UNC_V3_C4_PMON_CTL0, MSR_UNC_V3_C4_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX4C1", PMC33, CBOX4, MSR_UNC_V3_C4_PMON_CTL1, MSR_UNC_V3_C4_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX4C2", PMC34, CBOX4, MSR_UNC_V3_C4_PMON_CTL2, MSR_UNC_V3_C4_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX4C3", PMC35, CBOX4, MSR_UNC_V3_C4_PMON_CTL3, MSR_UNC_V3_C4_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX5C0", PMC36, CBOX5, MSR_UNC_V3_C5_PMON_CTL0, MSR_UNC_V3_C5_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX5C1", PMC37, CBOX5, MSR_UNC_V3_C5_PMON_CTL1, MSR_UNC_V3_C5_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX5C2", PMC38, CBOX5, MSR_UNC_V3_C5_PMON_CTL2, MSR_UNC_V3_C5_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX5C3", PMC39, CBOX5, MSR_UNC_V3_C5_PMON_CTL3, MSR_UNC_V3_C5_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX6C0", PMC40, CBOX6, MSR_UNC_V3_C6_PMON_CTL0, MSR_UNC_V3_C6_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX6C1", PMC41, CBOX6, MSR_UNC_V3_C6_PMON_CTL1, MSR_UNC_V3_C6_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX6C2", PMC42, CBOX6, MSR_UNC_V3_C6_PMON_CTL2, MSR_UNC_V3_C6_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX6C3", PMC43, CBOX6, MSR_UNC_V3_C6_PMON_CTL3, MSR_UNC_V3_C6_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX7C0", PMC44, CBOX7, MSR_UNC_V3_C7_PMON_CTL0, MSR_UNC_V3_C7_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX7C1", PMC45, CBOX7, MSR_UNC_V3_C7_PMON_CTL1, MSR_UNC_V3_C7_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX7C2", PMC46, CBOX7, MSR_UNC_V3_C7_PMON_CTL2, MSR_UNC_V3_C7_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX7C3", PMC47, CBOX7, MSR_UNC_V3_C7_PMON_CTL3, MSR_UNC_V3_C7_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX8C0", PMC48, CBOX8, MSR_UNC_V3_C8_PMON_CTL0, MSR_UNC_V3_C8_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX8C1", PMC49, CBOX8, MSR_UNC_V3_C8_PMON_CTL1, MSR_UNC_V3_C8_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX8C2", PMC50, CBOX8, MSR_UNC_V3_C8_PMON_CTL2, MSR_UNC_V3_C8_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX8C3", PMC51, CBOX8, MSR_UNC_V3_C8_PMON_CTL3, MSR_UNC_V3_C8_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX9C0", PMC52, CBOX9, MSR_UNC_V3_C9_PMON_CTL0, MSR_UNC_V3_C9_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX9C1", PMC53, CBOX9, MSR_UNC_V3_C9_PMON_CTL1, MSR_UNC_V3_C9_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX9C2", PMC54, CBOX9, MSR_UNC_V3_C9_PMON_CTL2, MSR_UNC_V3_C9_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX9C3", PMC55, CBOX9, MSR_UNC_V3_C9_PMON_CTL3, MSR_UNC_V3_C9_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX10C0", PMC56, CBOX10, MSR_UNC_V3_C10_PMON_CTL0, MSR_UNC_V3_C10_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX10C1", PMC57, CBOX10, MSR_UNC_V3_C10_PMON_CTL1, MSR_UNC_V3_C10_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX10C2", PMC58, CBOX10, MSR_UNC_V3_C10_PMON_CTL2, MSR_UNC_V3_C10_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX10C3", PMC59, CBOX10, MSR_UNC_V3_C10_PMON_CTL3, MSR_UNC_V3_C10_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX11C0", PMC60, CBOX11, MSR_UNC_V3_C11_PMON_CTL0, MSR_UNC_V3_C11_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX11C1", PMC61, CBOX11, MSR_UNC_V3_C11_PMON_CTL1, MSR_UNC_V3_C11_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX11C2", PMC62, CBOX11, MSR_UNC_V3_C11_PMON_CTL2, MSR_UNC_V3_C11_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX11C3", PMC63, CBOX11, MSR_UNC_V3_C11_PMON_CTL3, MSR_UNC_V3_C11_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX12C0", PMC64, CBOX12, MSR_UNC_V3_C12_PMON_CTL0, MSR_UNC_V3_C12_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX12C1", PMC65, CBOX12, MSR_UNC_V3_C12_PMON_CTL1, MSR_UNC_V3_C12_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX12C2", PMC66, CBOX12, MSR_UNC_V3_C12_PMON_CTL2, MSR_UNC_V3_C12_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX12C3", PMC67, CBOX12, MSR_UNC_V3_C12_PMON_CTL3, MSR_UNC_V3_C12_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX13C0", PMC68, CBOX13, MSR_UNC_V3_C13_PMON_CTL0, MSR_UNC_V3_C13_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX13C1", PMC69, CBOX13, MSR_UNC_V3_C13_PMON_CTL1, MSR_UNC_V3_C13_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX13C2", PMC70, CBOX13, MSR_UNC_V3_C13_PMON_CTL2, MSR_UNC_V3_C13_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX13C3", PMC71, CBOX13, MSR_UNC_V3_C13_PMON_CTL3, MSR_UNC_V3_C13_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX14C0", PMC72, CBOX14, MSR_UNC_V3_C14_PMON_CTL0, MSR_UNC_V3_C14_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX14C1", PMC73, CBOX14, MSR_UNC_V3_C14_PMON_CTL1, MSR_UNC_V3_C14_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX14C2", PMC74, CBOX14, MSR_UNC_V3_C14_PMON_CTL2, MSR_UNC_V3_C14_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX14C3", PMC75, CBOX14, MSR_UNC_V3_C14_PMON_CTL3, MSR_UNC_V3_C14_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX15C0", PMC76, CBOX15, MSR_UNC_V3_C15_PMON_CTL0, MSR_UNC_V3_C15_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX15C1", PMC77, CBOX15, MSR_UNC_V3_C15_PMON_CTL1, MSR_UNC_V3_C15_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX15C2", PMC78, CBOX15, MSR_UNC_V3_C15_PMON_CTL2, MSR_UNC_V3_C15_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX15C3", PMC79, CBOX15, MSR_UNC_V3_C15_PMON_CTL3, MSR_UNC_V3_C15_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX16C0", PMC80, CBOX16, MSR_UNC_V3_C16_PMON_CTL0, MSR_UNC_V3_C16_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX16C1", PMC81, CBOX16, MSR_UNC_V3_C16_PMON_CTL1, MSR_UNC_V3_C16_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX16C2", PMC82, CBOX16, MSR_UNC_V3_C16_PMON_CTL2, MSR_UNC_V3_C16_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX16C3", PMC83, CBOX16, MSR_UNC_V3_C16_PMON_CTL3, MSR_UNC_V3_C16_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX17C0", PMC84, CBOX17, MSR_UNC_V3_C17_PMON_CTL0, MSR_UNC_V3_C17_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX17C1", PMC85, CBOX17, MSR_UNC_V3_C17_PMON_CTL1, MSR_UNC_V3_C17_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX17C2", PMC86, CBOX17, MSR_UNC_V3_C17_PMON_CTL2, MSR_UNC_V3_C17_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"CBOX17C3", PMC87, CBOX17, MSR_UNC_V3_C17_PMON_CTL3, MSR_UNC_V3_C17_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_CBOX},
+    {"UBOX0", PMC88, UBOX, MSR_UNC_V3_U_PMON_CTL0, MSR_UNC_V3_U_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_UBOX},
+    {"UBOX1", PMC89, UBOX, MSR_UNC_V3_U_PMON_CTL1, MSR_UNC_V3_U_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_UBOX},
+    {"UBOXFIX", PMC90, UBOXFIX, MSR_UNC_V3_U_UCLK_FIXED_CTL, MSR_UNC_V3_U_UCLK_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"SBOX0C0", PMC91, SBOX0, MSR_UNC_V3_S0_PMON_CTL_0, MSR_UNC_V3_S0_PMON_CTR_0, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"SBOX0C1", PMC92, SBOX0, MSR_UNC_V3_S0_PMON_CTL_1, MSR_UNC_V3_S0_PMON_CTR_1, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"SBOX0C2", PMC93, SBOX0, MSR_UNC_V3_S0_PMON_CTL_2, MSR_UNC_V3_S0_PMON_CTR_2, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"SBOX0C3", PMC94, SBOX0, MSR_UNC_V3_S0_PMON_CTL_3, MSR_UNC_V3_S0_PMON_CTR_3, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"SBOX1C0", PMC95, SBOX1, MSR_UNC_V3_S1_PMON_CTL_0, MSR_UNC_V3_S1_PMON_CTR_0, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"SBOX1C1", PMC96, SBOX1, MSR_UNC_V3_S1_PMON_CTL_1, MSR_UNC_V3_S1_PMON_CTR_1, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"SBOX1C2", PMC97, SBOX1, MSR_UNC_V3_S1_PMON_CTL_2, MSR_UNC_V3_S1_PMON_CTR_2, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"SBOX1C3", PMC98, SBOX1, MSR_UNC_V3_S1_PMON_CTL_3, MSR_UNC_V3_S1_PMON_CTR_3, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"SBOX2C0", PMC99, SBOX2, MSR_UNC_V3_S2_PMON_CTL_0, MSR_UNC_V3_S2_PMON_CTR_0, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"SBOX2C1", PMC100, SBOX2, MSR_UNC_V3_S2_PMON_CTL_1, MSR_UNC_V3_S2_PMON_CTR_1, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"SBOX2C2", PMC101, SBOX2, MSR_UNC_V3_S2_PMON_CTL_2, MSR_UNC_V3_S2_PMON_CTR_2, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"SBOX2C3", PMC102, SBOX2, MSR_UNC_V3_S2_PMON_CTL_3, MSR_UNC_V3_S2_PMON_CTR_3, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"SBOX3C0", PMC103, SBOX3, MSR_UNC_V3_S3_PMON_CTL_0, MSR_UNC_V3_S3_PMON_CTR_0, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"SBOX3C1", PMC104, SBOX3, MSR_UNC_V3_S3_PMON_CTL_1, MSR_UNC_V3_S3_PMON_CTR_1, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"SBOX3C2", PMC105, SBOX3, MSR_UNC_V3_S3_PMON_CTL_2, MSR_UNC_V3_S3_PMON_CTR_2, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"SBOX3C3", PMC106, SBOX3, MSR_UNC_V3_S3_PMON_CTL_3, MSR_UNC_V3_S3_PMON_CTR_3, 0, 0, HAS_EP_VALID_OPTIONS_SBOX},
+    {"WBOX0", PMC107, WBOX, MSR_UNC_V3_PCU_PMON_CTL0, MSR_UNC_V3_PCU_PMON_CTR0, 0, 0, HAS_EP_VALID_OPTIONS_WBOX},
+    {"WBOX1", PMC108, WBOX, MSR_UNC_V3_PCU_PMON_CTL1, MSR_UNC_V3_PCU_PMON_CTR1, 0, 0, HAS_EP_VALID_OPTIONS_WBOX},
+    {"WBOX2", PMC109, WBOX, MSR_UNC_V3_PCU_PMON_CTL2, MSR_UNC_V3_PCU_PMON_CTR2, 0, 0, HAS_EP_VALID_OPTIONS_WBOX},
+    {"WBOX3", PMC110, WBOX, MSR_UNC_V3_PCU_PMON_CTL3, MSR_UNC_V3_PCU_PMON_CTR3, 0, 0, HAS_EP_VALID_OPTIONS_WBOX},
+    {"WBOX0FIX", PMC111, WBOX0FIX, 0, MSR_UNC_V3_PCU_CC6_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"WBOX1FIX", PMC112, WBOX0FIX, 0, MSR_UNC_V3_PCU_CC3_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"WBOX2FIX", PMC113, WBOX0FIX, 0, MSR_UNC_V3_PCU_PC2_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"WBOX3FIX", PMC114, WBOX0FIX, 0, MSR_UNC_V3_PCU_PC3_CTR , 0, 0, EVENT_OPTION_NONE_MASK},
+    {"BBOX0C0", PMC115, BBOX0, PCI_UNC_HA_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_0, HAS_EP_VALID_OPTIONS_BBOX},
+    {"BBOX0C1", PMC116, BBOX0, PCI_UNC_HA_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_0, HAS_EP_VALID_OPTIONS_BBOX},
+    {"BBOX0C2", PMC117, BBOX0, PCI_UNC_HA_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_0, HAS_EP_VALID_OPTIONS_BBOX},
+    {"BBOX0C3", PMC118, BBOX0, PCI_UNC_HA_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_0, HAS_EP_VALID_OPTIONS_BBOX},
+    {"BBOX1C0", PMC119, BBOX1, PCI_UNC_HA_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_1, HAS_EP_VALID_OPTIONS_BBOX},
+    {"BBOX1C1", PMC120, BBOX1, PCI_UNC_HA_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_1, HAS_EP_VALID_OPTIONS_BBOX},
+    {"BBOX1C2", PMC121, BBOX1, PCI_UNC_HA_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_1, HAS_EP_VALID_OPTIONS_BBOX},
+    {"BBOX1C3", PMC122, BBOX1, PCI_UNC_HA_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_1, HAS_EP_VALID_OPTIONS_BBOX},
+    {"MBOX0C0", PMC123, MBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX0C1", PMC124, MBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX0C2", PMC125, MBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX0C3", PMC126, MBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX0FIX", PMC127, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
+    {"MBOX1C0", PMC128, MBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX1C1", PMC129, MBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX1C2", PMC130, MBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX1C3", PMC131, MBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX0FIX", PMC132, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
+    {"MBOX2C0", PMC133, MBOX2, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX2C1", PMC134, MBOX2, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX2C2", PMC135, MBOX2, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX2C3", PMC136, MBOX2, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX0FIX", PMC137, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
+    {"MBOX3C0", PMC138, MBOX3, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX3C1", PMC139, MBOX3, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX3C2", PMC140, MBOX3, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX3C3", PMC141, MBOX3, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX0FIX", PMC142, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
+    {"MBOX4C0", PMC143, MBOX4, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX4C1", PMC144, MBOX4, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX4C2", PMC145, MBOX4, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX4C3", PMC146, MBOX4, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_0, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX0FIX", PMC47, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
+    {"MBOX5C0", PMC148, MBOX5, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX5C1", PMC149, MBOX5, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX5C2", PMC150, MBOX5, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX5C3", PMC151, MBOX5, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_1, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX0FIX", PMC152, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
+    {"MBOX6C0", PMC153, MBOX6, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX6C1", PMC154, MBOX6, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX6C2", PMC155, MBOX6, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX6C3", PMC156, MBOX6, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_2, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX0FIX", PMC157, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
+    {"MBOX7C0", PMC158, MBOX7, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX7C1", PMC159, MBOX7, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX7C2", PMC160, MBOX7, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX7C3", PMC161, MBOX7, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_3, HAS_EP_VALID_OPTIONS_MBOX},
+    {"MBOX0FIX", PMC162, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
+    {"IBOX0C0", PMC163, IBOX0, PCI_UNC_IRP0_PMON_CTL_0, PCI_UNC_IRP0_PMON_CTR_0, 0, PCI_IRP_DEVICE, HAS_EP_VALID_OPTIONS_IBOX},
+    {"IBOX0C1", PMC164, IBOX0, PCI_UNC_IRP0_PMON_CTL_1, PCI_UNC_IRP0_PMON_CTR_1, 0, PCI_IRP_DEVICE, HAS_EP_VALID_OPTIONS_IBOX},
+    {"IBOX1C0", PMC165, IBOX1, PCI_UNC_IRP1_PMON_CTL_0, PCI_UNC_IRP1_PMON_CTR_0, 0, PCI_IRP_DEVICE, HAS_EP_VALID_OPTIONS_IBOX},
+    {"IBOX1C1", PMC166, IBOX1, PCI_UNC_IRP1_PMON_CTL_1, PCI_UNC_IRP1_PMON_CTR_1, 0, PCI_IRP_DEVICE, HAS_EP_VALID_OPTIONS_IBOX},
+    {"PBOX0", PMC167, PBOX, PCI_UNC_R2PCIE_PMON_CTL_0, PCI_UNC_R2PCIE_PMON_CTR_0_A, PCI_UNC_R2PCIE_PMON_CTR_0_B, PCI_R2PCIE_DEVICE, HAS_EP_VALID_OPTIONS_PBOX},
+    {"PBOX1", PMC168, PBOX, PCI_UNC_R2PCIE_PMON_CTL_1, PCI_UNC_R2PCIE_PMON_CTR_1_A, PCI_UNC_R2PCIE_PMON_CTR_1_B, PCI_R2PCIE_DEVICE, HAS_EP_VALID_OPTIONS_PBOX},
+    {"PBOX2", PMC169, PBOX, PCI_UNC_R2PCIE_PMON_CTL_2, PCI_UNC_R2PCIE_PMON_CTR_2_A, PCI_UNC_R2PCIE_PMON_CTR_2_B, PCI_R2PCIE_DEVICE, HAS_EP_VALID_OPTIONS_PBOX},
+    {"PBOX3", PMC170, PBOX, PCI_UNC_R2PCIE_PMON_CTL_3, PCI_UNC_R2PCIE_PMON_CTR_3_A, PCI_UNC_R2PCIE_PMON_CTR_3_B, PCI_R2PCIE_DEVICE, HAS_EP_VALID_OPTIONS_PBOX},
+    {"RBOX0C0", PMC171, RBOX0, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_0, HAS_EP_VALID_OPTIONS_RBOX},
+    {"RBOX0C1", PMC172, RBOX0, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_0, HAS_EP_VALID_OPTIONS_RBOX},
+    {"RBOX0C2", PMC173, RBOX0, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_0, HAS_EP_VALID_OPTIONS_RBOX},
+    {"RBOX1C0", PMC174, RBOX1, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_1, HAS_EP_VALID_OPTIONS_RBOX},
+    {"RBOX1C1", PMC175, RBOX1, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_1, HAS_EP_VALID_OPTIONS_RBOX},
+    {"RBOX1C2", PMC176, RBOX1, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_1, HAS_EP_VALID_OPTIONS_RBOX},
+    {"QBOX0C0", PMC177, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_0, PCI_UNC_V3_QPI_PMON_CTR_0_A, PCI_UNC_V3_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_0, HAS_EP_VALID_OPTIONS_QBOX},
+    {"QBOX0C1", PMC178, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_1, PCI_UNC_V3_QPI_PMON_CTR_1_A, PCI_UNC_V3_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_0, HAS_EP_VALID_OPTIONS_QBOX},
+    {"QBOX0C2", PMC179, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_2, PCI_UNC_V3_QPI_PMON_CTR_2_A, PCI_UNC_V3_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_0, HAS_EP_VALID_OPTIONS_QBOX},
+    {"QBOX0C3", PMC180, QBOX0, PCI_UNC_V3_QPI_PMON_CTL_3, PCI_UNC_V3_QPI_PMON_CTR_3_A, PCI_UNC_V3_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_0, HAS_EP_VALID_OPTIONS_QBOX},
+    {"QBOX1C0", PMC181, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_0, PCI_UNC_V3_QPI_PMON_CTR_0_A, PCI_UNC_V3_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_1, HAS_EP_VALID_OPTIONS_QBOX},
+    {"QBOX1C1", PMC182, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_1, PCI_UNC_V3_QPI_PMON_CTR_1_A, PCI_UNC_V3_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_1, HAS_EP_VALID_OPTIONS_QBOX},
+    {"QBOX1C2", PMC183, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_2, PCI_UNC_V3_QPI_PMON_CTR_2_A, PCI_UNC_V3_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_1, HAS_EP_VALID_OPTIONS_QBOX},
+    {"QBOX1C3", PMC184, QBOX1, PCI_UNC_V3_QPI_PMON_CTL_3, PCI_UNC_V3_QPI_PMON_CTR_3_A, PCI_UNC_V3_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_1, HAS_EP_VALID_OPTIONS_QBOX},
+    {"QBOX0FIX0", PMC185, QBOX0FIX, 0x0, PCI_UNC_V3_QPI_RATE_STATUS, 0x0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
+    {"QBOX0FIX1", PMC186, QBOX0FIX, 0x0, PCI_UNC_V3_QPI_LINK_IDLE, 0x0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
+    {"QBOX0FIX2", PMC187, QBOX0FIX, 0x0, PCI_UNC_V3_QPI_LINK_LLR, 0x0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
+    {"QBOX1FIX0", PMC188, QBOX1FIX, 0x0, PCI_UNC_V3_QPI_RATE_STATUS, 0x0, PCI_QPI_MISC_DEVICE_PORT_1, EVENT_OPTION_NONE_MASK},
+    {"QBOX1FIX1", PMC189, QBOX1FIX, 0x0, PCI_UNC_V3_QPI_LINK_IDLE, 0x0, PCI_QPI_MISC_DEVICE_PORT_1, EVENT_OPTION_NONE_MASK},
+    {"QBOX1FIX2", PMC190, QBOX1FIX, 0x0, PCI_UNC_V3_QPI_LINK_LLR, 0x0, PCI_QPI_MISC_DEVICE_PORT_1, EVENT_OPTION_NONE_MASK},
 };
 
 
diff --git a/src/includes/perfmon_haswellEP_events.txt b/src/includes/perfmon_haswellEP_events.txt
index 5ea1ad1..bfea428 100644
--- a/src/includes/perfmon_haswellEP_events.txt
+++ b/src/includes/perfmon_haswellEP_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for Intel Haswell EP/EN/EX
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
@@ -103,6 +103,18 @@ DEFAULT_OPTIONS_UOPS_ISSUED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_O
 UMASK_UOPS_ISSUED_CORE_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_TOTAL_CYCLES   0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_ISSUED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_ISSUED_CYCLES_GE_6_UOPS_EXEC 0x01
 
 EVENT_ARITH_DIVIDER_UOPS            0x14 PMC
 UMASK_ARITH_DIVIDER_CYCLES          0x01
@@ -237,33 +249,97 @@ UMASK_LOCK_CYCLES_CACHE_LOCK_COUNT           0x02
 EVENT_IDQ                               0x79   PMC
 UMASK_IDQ_EMPTY                         0x02
 UMASK_IDQ_MITE_UOPS                     0x04
+UMASK_IDQ_DSB_UOPS                      0x08
+UMASK_IDQ_MS_DSB_UOPS                   0x10
+UMASK_IDQ_MS_MITE_UOPS                  0x20
+UMASK_IDQ_MS_UOPS                       0x30
+UMASK_IDQ_DSB_UOPS                      0x18
+UMASK_IDQ_MITE_ALL_UOPS                 0x24
+UMASK_IDQ_ALL_UOPS                      0x3C
 DEFAULT_OPTIONS_IDQ_MITE_CYCLES         EVENT_OPTION_THRESHOLD=0x1
 UMASK_IDQ_MITE_CYCLES                   0x04
-UMASK_IDQ_DSB_UOPS                      0x08
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES_1_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MITE_CYCLES_2_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MITE_CYCLES_3_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MITE_CYCLES_4_UOPS            0x04
 DEFAULT_OPTIONS_IDQ_DSB_CYCLES          EVENT_OPTION_THRESHOLD=0x1
 UMASK_IDQ_DSB_CYCLES                    0x08
-UMASK_IDQ_MS_DSB_UOPS                   0x10
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES_1_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_DSB_CYCLES_2_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_DSB_CYCLES_3_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_DSB_CYCLES_4_UOPS             0x08
 DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES       EVENT_OPTION_THRESHOLD=0x1
 UMASK_IDQ_MS_DSB_CYCLES                 0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES_1_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_DSB_CYCLES_2_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_DSB_CYCLES_3_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_DSB_CYCLES_4_UOPS          0x10
 DEFAULT_OPTIONS_IDQ_MS_DSB_OCCUR        EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
 UMASK_IDQ_MS_DSB_OCCUR                  0x10
-UMASK_IDQ_MS_MITE_UOPS                  0x20
 DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES      EVENT_OPTION_THRESHOLD=0x1
 UMASK_IDQ_MS_MITE_CYCLES                0x20
-UMASK_IDQ_MS_UOPS                       0x30
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES_1_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_MITE_CYCLES_2_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_MITE_CYCLES_3_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_MITE_CYCLES_4_UOPS         0x20
 DEFAULT_OPTIONS_IDQ_MS_CYCLES           EVENT_OPTION_THRESHOLD=0x1
 UMASK_IDQ_MS_CYCLES                     0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES_1_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_CYCLES_2_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_CYCLES_3_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_CYCLES_4_UOPS              0x30
 DEFAULT_OPTIONS_IDQ_MS_SWITCHES         EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
 UMASK_IDQ_MS_SWITCHES                   0x30
 DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x1
 UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS       0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_1_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_DSB_CYCLES_2_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_DSB_CYCLES_3_UOPS         0x18
 DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
 UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS         0x18
 DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
 UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS      0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_1_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_MITE_CYCLES_2_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_MITE_CYCLES_3_UOPS        0x24
 DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
 UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS        0x24
-UMASK_IDQ_MITE_ALL_UOPS                 0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_ANY_UOPS      0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_1_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_CYCLES_2_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_CYCLES_3_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_CYCLES_4_UOPS        0x3C
 
 EVENT_ICACHE                    0x80   PMC
 UMASK_ICACHE_HIT                0x01
@@ -371,16 +447,21 @@ UMASK_RESOURCE_STALLS_SB              0x08
 UMASK_RESOURCE_STALLS_ROB             0x10
 
 EVENT_CYCLE_ACTIVITY                 0xA3   PMC
+# Errata HSW62: May be unreliable in SMT mode
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L2_PENDING  EVENT_OPTION_THRESHOLD=0x1
 UMASK_CYCLE_ACTIVITY_CYCLES_L2_PENDING            0x01
-DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_MEM_ANY     EVENT_OPTION_THRESHOLD=0x1
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_MEM_ANY     EVENT_OPTION_THRESHOLD=0x2
 UMASK_CYCLE_ACTIVITY_CYCLES_MEM_ANY               0x02
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_LDM_PENDING EVENT_OPTION_THRESHOLD=0x2
+UMASK_CYCLE_ACTIVITY_CYCLES_LDM_PENDING           0x02
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE  EVENT_OPTION_THRESHOLD=0x4
 UMASK_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE            0x04
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L2_PENDING  EVENT_OPTION_THRESHOLD=0x5
 UMASK_CYCLE_ACTIVITY_STALLS_L2_PENDING            0x05
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_MEM_ANY     EVENT_OPTION_THRESHOLD=0x6
 UMASK_CYCLE_ACTIVITY_STALLS_MEM_ANY               0x06
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_LDM_PENDING EVENT_OPTION_THRESHOLD=0x6
+UMASK_CYCLE_ACTIVITY_STALLS_LDM_PENDING           0x06
 
 EVENT_CYCLE_ACTIVITY_CYCLES                 0xA3   PMC2
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L1D_PENDING EVENT_OPTION_THRESHOLD=0x8
@@ -392,10 +473,18 @@ UMASK_CYCLE_ACTIVITY_STALLS_L1D_PENDING     0x0C
 
 EVENT_LSD_UOPS                  0xA8   PMC
 UMASK_LSD_UOPS                  0x01
+DEFAULT_OPTIONS_LSD_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_LSD_CYCLES_1_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_LSD_CYCLES_2_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_LSD_CYCLES_3_UOPS         0x01
 DEFAULT_OPTIONS_LSD_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
 UMASK_LSD_CYCLES_4_UOPS         0x01
 DEFAULT_OPTIONS_LSD_CYCLES_ACTIVE EVENT_OPTION_THRESHOLD=0x1
 UMASK_LSD_CYCLES_ACTIVE         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_INACTIVE EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
+UMASK_LSD_CYCLES_INACTIVE         0x01
 
 EVENT_DSB2MITE_SWITCHES                0xAB PMC
 UMASK_DSB2MITE_SWITCHES_COUNT          0x01
@@ -426,6 +515,14 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
 UMASK_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC 0x01
 DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_EXECUTED_CYCLES_GE_7_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_EXECUTED_CYCLES_GE_8_UOPS_EXEC 0x01
 UMASK_UOPS_EXECUTED_CORE                  0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
 UMASK_UOPS_EXECUTED_CORE_USED_CYCLES           0x02
@@ -441,6 +538,14 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC 0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_7_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_8_UOPS_EXEC 0x02
 
 EVENT_OFFCORE_REQUESTS_BUFFER_SQ_FULL 0xB2 PMC
 UMASK_OFFCORE_REQUESTS_BUFFER_SQ_FULL 0x01
@@ -497,6 +602,18 @@ DEFAULT_OPTIONS_UOPS_RETIRED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_
 UMASK_UOPS_RETIRED_CORE_STALL_CYCLES     0x01
 DEFAULT_OPTIONS_UOPS_RETIRED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_RETIRED_CORE_TOTAL_CYCLES     0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC 0x01
 
 EVENT_MACHINE_CLEARS                    0xC3  PMC
 UMASK_MACHINE_CLEARS_CYCLES             0x01
diff --git a/src/includes/perfmon_haswell_counters.h b/src/includes/perfmon_haswell_counters.h
index 8685420..3a7b7a6 100644
--- a/src/includes/perfmon_haswell_counters.h
+++ b/src/includes/perfmon_haswell_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Counter Header File of perfmon module for Intel Haswell.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -29,8 +29,8 @@
  * =======================================================================================
  */
 
-#define NUM_COUNTERS_HASWELL 23
-#define NUM_COUNTERS_CORE_HASWELL 8
+#define NUM_COUNTERS_HASWELL 27
+#define NUM_COUNTERS_CORE_HASWELL 12
 #define NUM_COUNTERS_UNCORE_HASWELL 15
 
 #define HAS_VALID_OPTIONS_FIXED EVENT_OPTION_ANYTHREAD_MASK|EVENT_OPTION_COUNT_KERNEL_MASK
@@ -49,24 +49,29 @@ static RegisterMap haswell_counter_map[NUM_COUNTERS_HASWELL] = {
     {"PMC1", PMC4, PMC, MSR_PERFEVTSEL1, MSR_PMC1, 0, 0, HAS_VALID_OPTIONS_PMC},
     {"PMC2", PMC5, PMC, MSR_PERFEVTSEL2, MSR_PMC2, 0, 0, HAS_VALID_OPTIONS_PMC|EVENT_OPTION_IN_TRANS_ABORT_MASK},
     {"PMC3", PMC6, PMC, MSR_PERFEVTSEL3, MSR_PMC3, 0, 0, HAS_VALID_OPTIONS_PMC},
+    /* Additional PMC Counters if HyperThreading is not active: 4 48bit wide */
+    {"PMC4", PMC7, PMC, MSR_PERFEVTSEL4, MSR_PMC4, 0, 0, HAS_VALID_OPTIONS_PMC},
+    {"PMC5", PMC8, PMC, MSR_PERFEVTSEL5, MSR_PMC5, 0, 0, HAS_VALID_OPTIONS_PMC},
+    {"PMC6", PMC9, PMC, MSR_PERFEVTSEL6, MSR_PMC6, 0, 0, HAS_VALID_OPTIONS_PMC|EVENT_OPTION_IN_TRANS_ABORT_MASK},
+    {"PMC7", PMC10, PMC, MSR_PERFEVTSEL7, MSR_PMC7, 0, 0, HAS_VALID_OPTIONS_PMC},
     /* Temperature Sensor*/
-    {"TMP0", PMC7, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"TMP0", PMC11, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
     /* RAPL counters */
-    {"PWR0", PMC8, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR1", PMC9, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR2", PMC10, POWER, 0, MSR_PP1_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR3", PMC11, POWER, 0, MSR_DRAM_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
-    {"CBOX0C0", PMC12, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL0, MSR_UNC_CBO_0_CTR0, 0, 0, HAS_VALID_OPTIONS_CBOX},
-    {"CBOX0C1", PMC13, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL1, MSR_UNC_CBO_0_CTR1, 0, 0, HAS_VALID_OPTIONS_CBOX},
-    {"CBOX1C0", PMC14, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL0, MSR_UNC_CBO_1_CTR0, 0, 0, HAS_VALID_OPTIONS_CBOX},
-    {"CBOX1C1", PMC15, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL1, MSR_UNC_CBO_1_CTR1, 0, 0, HAS_VALID_OPTIONS_CBOX},
-    {"CBOX2C0", PMC16, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL0, MSR_UNC_CBO_2_CTR0, 0, 0, HAS_VALID_OPTIONS_CBOX},
-    {"CBOX2C1", PMC17, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL1, MSR_UNC_CBO_2_CTR1, 0, 0, HAS_VALID_OPTIONS_CBOX},
-    {"CBOX3C0", PMC18, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL0, MSR_UNC_CBO_3_CTR0, 0, 0, HAS_VALID_OPTIONS_CBOX},
-    {"CBOX3C1", PMC19, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL1, MSR_UNC_CBO_3_CTR1, 0, 0, HAS_VALID_OPTIONS_CBOX},
-    {"UBOX0", PMC20, UBOX, MSR_UNC_ARB_PERFEVTSEL0, MSR_UNC_ARB_CTR0, 0, 0, HAS_VALID_OPTIONS_UBOX},
-    {"UBOX1", PMC21, UBOX, MSR_UNC_ARB_PERFEVTSEL1, MSR_UNC_ARB_CTR1, 0, 0, HAS_VALID_OPTIONS_UBOX},
-    {"UBOXFIX", PMC22, UBOXFIX, MSR_UNC_PERF_FIXED_CTRL, MSR_UNC_PERF_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR0", PMC12, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR1", PMC13, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR2", PMC14, POWER, 0, MSR_PP1_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR3", PMC15, POWER, 0, MSR_DRAM_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
+    {"CBOX0C0", PMC16, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL0, MSR_UNC_CBO_0_CTR0, 0, 0, HAS_VALID_OPTIONS_CBOX},
+    {"CBOX0C1", PMC17, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL1, MSR_UNC_CBO_0_CTR1, 0, 0, HAS_VALID_OPTIONS_CBOX},
+    {"CBOX1C0", PMC18, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL0, MSR_UNC_CBO_1_CTR0, 0, 0, HAS_VALID_OPTIONS_CBOX},
+    {"CBOX1C1", PMC19, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL1, MSR_UNC_CBO_1_CTR1, 0, 0, HAS_VALID_OPTIONS_CBOX},
+    {"CBOX2C0", PMC20, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL0, MSR_UNC_CBO_2_CTR0, 0, 0, HAS_VALID_OPTIONS_CBOX},
+    {"CBOX2C1", PMC21, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL1, MSR_UNC_CBO_2_CTR1, 0, 0, HAS_VALID_OPTIONS_CBOX},
+    {"CBOX3C0", PMC22, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL0, MSR_UNC_CBO_3_CTR0, 0, 0, HAS_VALID_OPTIONS_CBOX},
+    {"CBOX3C1", PMC23, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL1, MSR_UNC_CBO_3_CTR1, 0, 0, HAS_VALID_OPTIONS_CBOX},
+    {"UBOX0", PMC24, UBOX, MSR_UNC_ARB_PERFEVTSEL0, MSR_UNC_ARB_CTR0, 0, 0, HAS_VALID_OPTIONS_UBOX},
+    {"UBOX1", PMC25, UBOX, MSR_UNC_ARB_PERFEVTSEL1, MSR_UNC_ARB_CTR1, 0, 0, HAS_VALID_OPTIONS_UBOX},
+    {"UBOXFIX", PMC26, UBOXFIX, MSR_UNC_PERF_FIXED_CTRL, MSR_UNC_PERF_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
 };
 
 
diff --git a/src/includes/perfmon_haswell_events.txt b/src/includes/perfmon_haswell_events.txt
index 8ada1d0..190b7a4 100644
--- a/src/includes/perfmon_haswell_events.txt
+++ b/src/includes/perfmon_haswell_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for Intel Haswell
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
@@ -103,6 +103,18 @@ DEFAULT_OPTIONS_UOPS_ISSUED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_O
 UMASK_UOPS_ISSUED_CORE_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_TOTAL_CYCLES   0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_ISSUED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_ISSUED_CYCLES_GE_6_UOPS_EXEC 0x01
 
 EVENT_ARITH_DIVIDER_UOPS            0x14 PMC
 UMASK_ARITH_DIVIDER_CYCLES          0x01
@@ -238,34 +250,99 @@ UMASK_LOCK_CYCLES_CACHE_LOCK_COUNT           0x02
 EVENT_IDQ                               0x79   PMC
 UMASK_IDQ_EMPTY                         0x02
 UMASK_IDQ_MITE_UOPS                     0x04
+UMASK_IDQ_DSB_UOPS                      0x08
+UMASK_IDQ_MS_DSB_UOPS                   0x10
+UMASK_IDQ_MS_MITE_UOPS                  0x20
+UMASK_IDQ_MS_UOPS                       0x30
+UMASK_IDQ_DSB_UOPS                      0x18
+UMASK_IDQ_MITE_ALL_UOPS                 0x24
+UMASK_IDQ_ALL_UOPS                      0x3C
 DEFAULT_OPTIONS_IDQ_MITE_CYCLES         EVENT_OPTION_THRESHOLD=0x1
 UMASK_IDQ_MITE_CYCLES                   0x04
-UMASK_IDQ_DSB_UOPS                      0x08
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES_1_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MITE_CYCLES_2_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MITE_CYCLES_3_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MITE_CYCLES_4_UOPS            0x04
 DEFAULT_OPTIONS_IDQ_DSB_CYCLES          EVENT_OPTION_THRESHOLD=0x1
 UMASK_IDQ_DSB_CYCLES                    0x08
-UMASK_IDQ_MS_DSB_UOPS                   0x10
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES_1_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_DSB_CYCLES_2_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_DSB_CYCLES_3_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_DSB_CYCLES_4_UOPS             0x08
 DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES       EVENT_OPTION_THRESHOLD=0x1
 UMASK_IDQ_MS_DSB_CYCLES                 0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES_1_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_DSB_CYCLES_2_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_DSB_CYCLES_3_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_DSB_CYCLES_4_UOPS          0x10
 DEFAULT_OPTIONS_IDQ_MS_DSB_OCCUR        EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
 UMASK_IDQ_MS_DSB_OCCUR                  0x10
-UMASK_IDQ_MS_MITE_UOPS                  0x20
 DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES      EVENT_OPTION_THRESHOLD=0x1
 UMASK_IDQ_MS_MITE_CYCLES                0x20
-UMASK_IDQ_MS_UOPS                       0x30
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES_1_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_MITE_CYCLES_2_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_MITE_CYCLES_3_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_MITE_CYCLES_4_UOPS         0x20
 DEFAULT_OPTIONS_IDQ_MS_CYCLES           EVENT_OPTION_THRESHOLD=0x1
 UMASK_IDQ_MS_CYCLES                     0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES_1_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_CYCLES_2_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_CYCLES_3_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_CYCLES_4_UOPS              0x30
 DEFAULT_OPTIONS_IDQ_MS_SWITCHES         EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
 UMASK_IDQ_MS_SWITCHES                   0x30
 DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x1
 UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS       0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_1_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_DSB_CYCLES_2_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_DSB_CYCLES_3_UOPS         0x18
 DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
 UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS         0x18
 DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
 UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS      0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_1_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_MITE_CYCLES_2_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_MITE_CYCLES_3_UOPS        0x24
 DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
 UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS        0x24
-UMASK_IDQ_MITE_ALL_UOPS                 0x3C
-
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_ANY_UOPS      0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_1_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_CYCLES_2_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_CYCLES_3_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_CYCLES_4_UOPS        0x3C
+
+s
 EVENT_ICACHE                    0x80   PMC
 UMASK_ICACHE_HIT                0x01
 UMASK_ICACHE_MISSES             0x02
@@ -375,14 +452,18 @@ EVENT_CYCLE_ACTIVITY                 0xA3   PMC
 # Errata HSW62: May be unreliable in SMT mode
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L2_PENDING  EVENT_OPTION_THRESHOLD=0x1
 UMASK_CYCLE_ACTIVITY_CYCLES_L2_PENDING            0x01
-DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_MEM_ANY     EVENT_OPTION_THRESHOLD=0x1
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_MEM_ANY     EVENT_OPTION_THRESHOLD=0x2
 UMASK_CYCLE_ACTIVITY_CYCLES_MEM_ANY               0x02
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_LDM_PENDING EVENT_OPTION_THRESHOLD=0x2
+UMASK_CYCLE_ACTIVITY_CYCLES_LDM_PENDING           0x02
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE  EVENT_OPTION_THRESHOLD=0x4
 UMASK_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE            0x04
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L2_PENDING  EVENT_OPTION_THRESHOLD=0x5
 UMASK_CYCLE_ACTIVITY_STALLS_L2_PENDING            0x05
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_MEM_ANY     EVENT_OPTION_THRESHOLD=0x6
 UMASK_CYCLE_ACTIVITY_STALLS_MEM_ANY               0x06
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_LDM_PENDING EVENT_OPTION_THRESHOLD=0x6
+UMASK_CYCLE_ACTIVITY_STALLS_LDM_PENDING           0x06
 
 EVENT_CYCLE_ACTIVITY_CYCLES                 0xA3   PMC2
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L1D_PENDING EVENT_OPTION_THRESHOLD=0x8
@@ -394,10 +475,18 @@ UMASK_CYCLE_ACTIVITY_STALLS_L1D_PENDING     0x0C
 
 EVENT_LSD_UOPS                  0xA8   PMC
 UMASK_LSD_UOPS                  0x01
+DEFAULT_OPTIONS_LSD_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_LSD_CYCLES_1_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_LSD_CYCLES_2_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_LSD_CYCLES_3_UOPS         0x01
 DEFAULT_OPTIONS_LSD_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
 UMASK_LSD_CYCLES_4_UOPS         0x01
 DEFAULT_OPTIONS_LSD_CYCLES_ACTIVE EVENT_OPTION_THRESHOLD=0x1
 UMASK_LSD_CYCLES_ACTIVE         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_INACTIVE EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
+UMASK_LSD_CYCLES_INACTIVE         0x01
 
 EVENT_DSB2MITE_SWITCHES                0xAB PMC
 UMASK_DSB2MITE_SWITCHES_COUNT          0x01
@@ -428,6 +517,14 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
 UMASK_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC 0x01
 DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_EXECUTED_CYCLES_GE_7_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_EXECUTED_CYCLES_GE_8_UOPS_EXEC 0x01
 UMASK_UOPS_EXECUTED_CORE                  0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
 UMASK_UOPS_EXECUTED_CORE_USED_CYCLES           0x02
@@ -443,6 +540,14 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC 0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_7_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_8_UOPS_EXEC 0x02
 
 EVENT_OFFCORE_REQUESTS_BUFFER_SQ_FULL 0xB2 PMC
 UMASK_OFFCORE_REQUESTS_BUFFER_SQ_FULL 0x01
@@ -499,6 +604,18 @@ DEFAULT_OPTIONS_UOPS_RETIRED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_
 UMASK_UOPS_RETIRED_CORE_STALL_CYCLES     0x01
 DEFAULT_OPTIONS_UOPS_RETIRED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_RETIRED_CORE_TOTAL_CYCLES     0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC 0x01
 
 EVENT_MACHINE_CLEARS                    0xC3  PMC
 UMASK_MACHINE_CLEARS_CYCLES             0x01
diff --git a/src/includes/perfmon_interlagos.h b/src/includes/perfmon_interlagos.h
index 464a1af..e5faf5b 100644
--- a/src/includes/perfmon_interlagos.h
+++ b/src/includes/perfmon_interlagos.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Header file of perfmon module for AMD Interlagos
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -118,7 +118,7 @@ int perfmon_setupCounterThread_interlagos(int thread_id, PerfmonEventSet* eventS
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -157,7 +157,7 @@ int perfmon_startCountersThread_interlagos(int thread_id, PerfmonEventSet* event
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -195,7 +195,7 @@ int perfmon_stopCountersThread_interlagos(int thread_id, PerfmonEventSet* eventS
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -246,7 +246,7 @@ int perfmon_readCountersThread_interlagos(int thread_id, PerfmonEventSet* eventS
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -285,7 +285,7 @@ int perfmon_finalizeCountersThread_interlagos(int thread_id, PerfmonEventSet* ev
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
diff --git a/src/includes/perfmon_interlagos_counters.h b/src/includes/perfmon_interlagos_counters.h
index c3e1702..0f7f23a 100644
--- a/src/includes/perfmon_interlagos_counters.h
+++ b/src/includes/perfmon_interlagos_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Counter Header File of perfmon module for AMD Interlagos
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_interlagos_events.txt b/src/includes/perfmon_interlagos_events.txt
index 0847b2f..6d28687 100644
--- a/src/includes/perfmon_interlagos_events.txt
+++ b/src/includes/perfmon_interlagos_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for AMD Interlagos
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_ivybridge.h b/src/includes/perfmon_ivybridge.h
index e0c9616..75c2b5b 100644
--- a/src/includes/perfmon_ivybridge.h
+++ b/src/includes/perfmon_ivybridge.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Header File of perfmon module for Intel Ivy Bridge.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -46,11 +46,16 @@ static int perfmon_numCountersIvybridge = NUM_COUNTERS_IVYBRIDGE;
 static int perfmon_numCoreCountersIvybridge = NUM_COUNTERS_CORE_IVYBRIDGE;
 static int perfmon_numArchEventsIvybridge = NUM_ARCH_EVENTS_IVYBRIDGE;
 
+int ivb_did_cbox_test = 0;
 int ivb_cbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event);
 int ivbep_cbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event);
-int ivy_cbox_nosetup(int cpu_id, RegisterIndex index, PerfmonEvent *event);
 int (*ivy_cbox_setup)(int, RegisterIndex, PerfmonEvent*);
 
+int ivb_cbox_nosetup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
+{
+    return 0;
+}
+
 int perfmon_init_ivybridge(int cpu_id)
 {
     int ret;
@@ -58,21 +63,24 @@ int perfmon_init_ivybridge(int cpu_id)
     lock_acquire((int*) &socket_lock[affinity_core2node_lookup[cpu_id]], cpu_id);
     lock_acquire((int*) &tile_lock[affinity_thread2tile_lookup[cpu_id]], cpu_id);
     HPMwrite(cpu_id, MSR_DEV, MSR_PEBS_ENABLE, 0x0ULL);
-    ret = HPMwrite(cpu_id, MSR_DEV, MSR_UNC_CBO_0_PERFEVTSEL0, 0x0ULL);
-    ret += HPMread(cpu_id, MSR_DEV, MSR_UNC_PERF_GLOBAL_CTRL, &data);
-    ret += HPMwrite(cpu_id, MSR_DEV, MSR_UNC_PERF_GLOBAL_CTRL, 0x0ULL);
-    ret += HPMread(cpu_id, MSR_DEV, MSR_UNC_CBO_0_PERFEVTSEL0, &data);
     if ((cpuid_info.model == IVYBRIDGE_EP))
     {
         ivy_cbox_setup = ivbep_cbox_setup;
-    }
-    else if ((ret == 0) && (data == 0x0ULL))
-    {
-        ivy_cbox_setup = ivb_cbox_setup;
-    }
-    else
-    {
-        ivy_cbox_setup = ivy_cbox_nosetup;
+        ivb_did_cbox_test = 1;
+    }
+    else if (cpuid_info.model == IVYBRIDGE && 
+             socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id &&
+             ivb_did_cbox_test == 0)
+    {
+        ret = HPMwrite(cpu_id, MSR_DEV, MSR_UNC_CBO_0_PERFEVTSEL0, 0x0ULL);
+        ret += HPMread(cpu_id, MSR_DEV, MSR_UNC_PERF_GLOBAL_CTRL, &data);
+        ret += HPMwrite(cpu_id, MSR_DEV, MSR_UNC_PERF_GLOBAL_CTRL, 0x0ULL);
+        ret += HPMread(cpu_id, MSR_DEV, MSR_UNC_CBO_0_PERFEVTSEL0, &data);
+        if ((ret == 0) && (data == 0x0ULL))
+            ivy_cbox_setup = ivb_cbox_setup;
+        else
+            ivy_cbox_setup = ivb_cbox_nosetup;
+        ivb_did_cbox_test = 1;
     }
     return 0;
 }
@@ -393,12 +401,6 @@ int ivb_sbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event, PciDevi
     return 0;
 }
 
-int ivy_cbox_nosetup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
-{
-    return 0;
-}
-
-
 int ivb_cbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
 {
     uint32_t flags = 0x0UL;
@@ -675,12 +677,24 @@ int ivb_ibox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
 
 int ivb_uncore_freeze(int cpu_id, PerfmonEventSet* eventSet)
 {
-    uint32_t freeze_reg = (cpuid_info.model == IVYBRIDGE_EP ? MSR_UNC_U_PMON_GLOBAL_CTL : MSR_UNC_PERF_GLOBAL_CTRL);
+    uint32_t freeze_reg = 0x0;
+    if (cpuid_info.model == IVYBRIDGE_EP)
+    {
+        freeze_reg = MSR_UNC_U_PMON_GLOBAL_CTL;
+    }
+    else if (cpuid_info.model == IVYBRIDGE && ivy_cbox_setup == ivb_cbox_setup)
+    {
+        freeze_reg = MSR_UNC_PERF_GLOBAL_CTRL;
+    }
+    else
+    {
+        return 0;
+    }
     if (socket_lock[affinity_core2node_lookup[cpu_id]] != cpu_id)
     {
         return 0;
     }
-    if (eventSet->regTypeMask & ~(0xF))
+    if (MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, freeze_reg, LLU_CAST (1ULL<<31), FREEZE_UNCORE);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, freeze_reg, (1ULL<<31)));
@@ -690,13 +704,27 @@ int ivb_uncore_freeze(int cpu_id, PerfmonEventSet* eventSet)
 
 int ivb_uncore_unfreeze(int cpu_id, PerfmonEventSet* eventSet)
 {
-    uint32_t unfreeze_reg = (cpuid_info.model == IVYBRIDGE_EP ? MSR_UNC_U_PMON_GLOBAL_CTL : MSR_UNC_PERF_GLOBAL_CTRL);
-    uint32_t ovf_reg = (cpuid_info.model == IVYBRIDGE_EP ? MSR_UNC_U_PMON_GLOBAL_STATUS : MSR_UNC_PERF_GLOBAL_OVF_CTRL);
+    uint32_t unfreeze_reg = 0x0;
+    uint32_t ovf_reg = 0x0;
+    if (cpuid_info.model == IVYBRIDGE_EP)
+    {
+        unfreeze_reg = MSR_UNC_U_PMON_GLOBAL_CTL;
+        ovf_reg = MSR_UNC_U_PMON_GLOBAL_STATUS;
+    }
+    else if (cpuid_info.model == IVYBRIDGE && ivy_cbox_setup == ivb_cbox_setup)
+    {
+        unfreeze_reg = MSR_UNC_PERF_GLOBAL_CTRL;
+        ovf_reg = MSR_UNC_PERF_GLOBAL_OVF_CTRL;
+    }
+    else
+    {
+        return 0;
+    }
     if (socket_lock[affinity_core2node_lookup[cpu_id]] != cpu_id)
     {
         return 0;
     }
-    if (eventSet->regTypeMask & ~(0xF))
+    if (MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, ovf_reg, LLU_CAST 0x0ULL, CLEAR_UNCORE_OVF)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, ovf_reg, 0x0ULL));
@@ -720,18 +748,20 @@ int perfmon_setupCounterThread_ivybridge(
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_AND_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
     }
-
-    ivb_uncore_freeze(cpu_id, eventSet);
+    if (haveLock && MEASURE_UNCORE(eventSet))
+    {
+        ivb_uncore_freeze(cpu_id, eventSet);
+    }
 
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -763,6 +793,7 @@ int perfmon_setupCounterThread_ivybridge(
             case RBOX0:
             case RBOX1:
                 ivb_pci_box_setup(cpu_id, index, event);
+                
                 break;
 
             case BBOX0:
@@ -831,7 +862,7 @@ int perfmon_setupCounterThread_ivybridge(
     }
     for (int i=UNCORE;i<NUM_UNITS;i++)
     {
-        if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(i))) && box_map[i].ctrlRegister != 0x0)
+        if (haveLock && TESTTYPE(eventSet, i) && box_map[i].ctrlRegister != 0x0)
         {
             VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL, CLEAR_UNCORE_BOX_CTRL);
             HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
@@ -863,7 +894,7 @@ int perfmon_startCountersThread_ivybridge(int thread_id, PerfmonEventSet* eventS
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -876,23 +907,17 @@ int perfmon_startCountersThread_ivybridge(int thread_id, PerfmonEventSet* eventS
             switch (type)
             {
                 case PMC:
-                    if (eventSet->regTypeMask & REG_TYPE_MASK(PMC))
-                    {
-                        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, counter1, 0x0ULL));
-                        fixed_flags |= (1ULL<<(index-cpuid_info.perf_num_fixed_ctr));  /* enable counter */
-                    }
+                    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, counter1, 0x0ULL));
+                    fixed_flags |= (1ULL<<(index-cpuid_info.perf_num_fixed_ctr));  /* enable counter */
                     break;
 
                 case FIXED:
-                    if (eventSet->regTypeMask & REG_TYPE_MASK(FIXED))
-                    {
-                        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, counter1, 0x0ULL));
-                        fixed_flags |= (1ULL<<(index+32));  /* enable fixed counter */
-                    }
+                    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, counter1, 0x0ULL));
+                    fixed_flags |= (1ULL<<(index+32));  /* enable fixed counter */
                     break;
 
                 case POWER:
-                    if (haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(POWER)))
+                    if (haveLock)
                     {
                         CHECK_POWER_READ_ERROR(power_read(cpu_id, counter1, (uint32_t*)&tmp));
                         VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST field64(tmp, 0, box_map[type].regWidth), START_POWER)
@@ -901,7 +926,7 @@ int perfmon_startCountersThread_ivybridge(int thread_id, PerfmonEventSet* eventS
                     break;
 
                 default:
-                    if (eventSet->regTypeMask & REG_TYPE_MASK(type))
+                    if (type >= UNCORE && haveLock)
                     {
                         if (counter1 != 0x0)
                         {
@@ -916,9 +941,12 @@ int perfmon_startCountersThread_ivybridge(int thread_id, PerfmonEventSet* eventS
         }
     }
 
-    ivb_uncore_unfreeze(cpu_id, eventSet);
+    if (haveLock && MEASURE_UNCORE(eventSet))
+    {
+        ivb_uncore_unfreeze(cpu_id, eventSet);
+    }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST fixed_flags, UNFREEZE_PMC_AND_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, fixed_flags));
@@ -1047,19 +1075,23 @@ int perfmon_stopCountersThread_ivybridge(int thread_id, PerfmonEventSet* eventSe
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_AND_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
     }
-    ivb_uncore_freeze(cpu_id, eventSet);
+    if (haveLock && MEASURE_UNCORE(eventSet))
+    {
+        ivb_uncore_freeze(cpu_id, eventSet);
+    }
+
 
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1100,7 +1132,7 @@ int perfmon_stopCountersThread_ivybridge(int thread_id, PerfmonEventSet* eventSe
                     break;
 
                 case POWER:
-                    if (haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(POWER)))
+                    if (haveLock)
                     {
                         CHECK_POWER_READ_ERROR(power_read(cpu_id, counter1, (uint32_t*)&counter_result));
                         VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, STOP_POWER)
@@ -1239,19 +1271,22 @@ int perfmon_readCountersThread_ivybridge(int thread_id, PerfmonEventSet* eventSe
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, &pmc_flags));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
     }
-    ivb_uncore_freeze(cpu_id, eventSet);
+    if (haveLock && MEASURE_UNCORE(eventSet))
+    {
+        ivb_uncore_freeze(cpu_id, eventSet);
+    }
 
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1292,7 +1327,7 @@ int perfmon_readCountersThread_ivybridge(int thread_id, PerfmonEventSet* eventSe
                     break;
 
                 case POWER:
-                    if (haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(POWER)))
+                    if (haveLock)
                     {
                         CHECK_POWER_READ_ERROR(power_read(cpu_id, counter1, (uint32_t*)&counter_result));
                         VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, STOP_POWER)
@@ -1424,8 +1459,12 @@ int perfmon_readCountersThread_ivybridge(int thread_id, PerfmonEventSet* eventSe
         }
     }
 
-    ivb_uncore_unfreeze(cpu_id, eventSet);
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (haveLock && MEASURE_UNCORE(eventSet))
+    {
+        ivb_uncore_unfreeze(cpu_id, eventSet);
+    }
+
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, pmc_flags));
     }
@@ -1452,7 +1491,7 @@ int perfmon_finalizeCountersThread_ivybridge(int thread_id, PerfmonEventSet* eve
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -1485,33 +1524,23 @@ int perfmon_finalizeCountersThread_ivybridge(int thread_id, PerfmonEventSet* eve
         {
             VERBOSEPRINTPCIREG(cpu_id, dev, reg, 0x0ULL, CLEAR_CTL);
             CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev, reg, 0x0ULL));
-            if (type >= SBOX0 && type <= SBOX2)
-            {
-                VERBOSEPRINTPCIREG(cpu_id, dev, reg, 0x0ULL, CLEAR_CTL_TWICE);
-                CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev, reg, 0x0ULL));
-            }
+            if (type >= SBOX0 && type <= SBOX3)
+                CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, reg, 0x0ULL));
             VERBOSEPRINTPCIREG(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL, CLEAR_CTR);
             CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL));
-            if (type >= SBOX0 && type <= SBOX2)
-            {
-                VERBOSEPRINTPCIREG(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL, CLEAR_CTR_TWICE);
-                CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL));
-            }
+            if (type >= SBOX0 && type <= SBOX3)
+                CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL));
             if (counter_map[index].counterRegister2 != 0x0)
             {
                 VERBOSEPRINTPCIREG(cpu_id, dev, counter_map[index].counterRegister2, 0x0ULL, CLEAR_CTR);
-                CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister2, 0x0ULL));
-                if (type >= SBOX0 && type <= SBOX2)
-                {
-                    VERBOSEPRINTPCIREG(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL, CLEAR_CTR_TWICE);
-                    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister2, 0x0ULL));
-                }
+                CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister2, 0x0ULL));
+                if (type >= SBOX0 && type <= SBOX3)
+                    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL));
             }
-            
         }
         eventSet->events[i].threadCounter[thread_id].init = FALSE;
     }
-    if (haveLock && eventSet->regTypeMask & ~(0xFULL))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_UNC_U_PMON_GLOBAL_STATUS, LLU_CAST 0x0ULL, CLEAR_UNCORE_OVF)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_U_PMON_GLOBAL_STATUS, 0x0ULL));
@@ -1519,18 +1548,18 @@ int perfmon_finalizeCountersThread_ivybridge(int thread_id, PerfmonEventSet* eve
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_U_PMON_GLOBAL_CTL, 0x0ULL));
         for (int i=UNCORE;i<NUM_UNITS;i++)
         {
-            if ((eventSet->regTypeMask & (REG_TYPE_MASK(i))) && box_map[i].ctrlRegister != 0x0)
+            if (TESTTYPE(eventSet, i) && box_map[i].ctrlRegister != 0x0)
             {
                 VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL, CLEAR_UNCORE_BOX_CTRL);
                 HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
-                if (i >= SBOX0 && i <= SBOX2)
+                if (i >= SBOX0 && i <= SBOX3)
                     HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
-                if (box_map[i].filterRegister1)
+                if (box_map[i].filterRegister1 != 0x0)
                 {
                     VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].filterRegister1, 0x0ULL, CLEAR_FILTER);
                     HPMwrite(cpu_id, box_map[i].device, box_map[i].filterRegister1, 0x0ULL);
                 }
-                if (box_map[i].filterRegister2)
+                if (box_map[i].filterRegister2 != 0x0)
                 {
                     VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].filterRegister2, 0x0ULL, CLEAR_FILTER);
                     HPMwrite(cpu_id, box_map[i].device, box_map[i].filterRegister2, 0x0ULL);
@@ -1539,7 +1568,7 @@ int perfmon_finalizeCountersThread_ivybridge(int thread_id, PerfmonEventSet* eve
         }
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, LLU_CAST ovf_values_core, CLEAR_GLOBAL_OVF)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core));
diff --git a/src/includes/perfmon_ivybridgeEP_counters.h b/src/includes/perfmon_ivybridgeEP_counters.h
index dc32f9e..404bc36 100644
--- a/src/includes/perfmon_ivybridgeEP_counters.h
+++ b/src/includes/perfmon_ivybridgeEP_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description: Counter header file of perfmon module for Intel Ivy Bridge EP.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -29,9 +29,9 @@
  * =======================================================================================
  */
 
-#define NUM_COUNTERS_CORE_IVYBRIDGEEP 8
+#define NUM_COUNTERS_CORE_IVYBRIDGEEP 12
 #define NUM_COUNTERS_UNCORE_IVYBRIDGEEP 81
-#define NUM_COUNTERS_IVYBRIDGEEP 161
+#define NUM_COUNTERS_IVYBRIDGEEP 165
 
 #define IVBEP_VALID_OPTIONS_PMC EVENT_OPTION_EDGE_MASK|EVENT_OPTION_COUNT_KERNEL_MASK|EVENT_OPTION_INVERT_MASK|\
             EVENT_OPTION_ANYTHREAD_MASK|EVENT_OPTION_THRESHOLD_MASK
@@ -64,171 +64,176 @@ static RegisterMap ivybridgeEP_counter_map[NUM_COUNTERS_IVYBRIDGEEP] = {
     {"PMC1", PMC4, PMC, MSR_PERFEVTSEL1, MSR_PMC1, 0, 0, IVBEP_VALID_OPTIONS_PMC},
     {"PMC2", PMC5, PMC, MSR_PERFEVTSEL2, MSR_PMC2, 0, 0, IVBEP_VALID_OPTIONS_PMC},
     {"PMC3", PMC6, PMC, MSR_PERFEVTSEL3, MSR_PMC3, 0, 0, IVBEP_VALID_OPTIONS_PMC},
+    /* Additional PMC Counters: 4 48bit wide if HyperThreading is disabled*/
+    {"PMC4", PMC7, PMC, MSR_PERFEVTSEL4, MSR_PMC4, 0, 0, IVBEP_VALID_OPTIONS_PMC},
+    {"PMC5", PMC8, PMC, MSR_PERFEVTSEL5, MSR_PMC5, 0, 0, IVBEP_VALID_OPTIONS_PMC},
+    {"PMC6", PMC9, PMC, MSR_PERFEVTSEL6, MSR_PMC6, 0, 0, IVBEP_VALID_OPTIONS_PMC},
+    {"PMC7", PMC10, PMC, MSR_PERFEVTSEL7, MSR_PMC7, 0, 0, IVBEP_VALID_OPTIONS_PMC},
     /* Temperature Sensor*/
-    {"TMP0", PMC7, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"TMP0", PMC11, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
     /* RAPL counters */
-    {"PWR0", PMC8, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR1", PMC9, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR2", PMC10, POWER, 0, MSR_PP1_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR3", PMC11, POWER, 0, MSR_DRAM_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR0", PMC12, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR1", PMC13, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR2", PMC14, POWER, 0, MSR_PP1_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR3", PMC15, POWER, 0, MSR_DRAM_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
     /* CBOX counters, 44bits wide*/
-    {"CBOX0C0", PMC12, CBOX0, MSR_UNC_C0_PMON_CTL0, MSR_UNC_C0_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX0C1", PMC13, CBOX0, MSR_UNC_C0_PMON_CTL1, MSR_UNC_C0_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX0C2", PMC14, CBOX0, MSR_UNC_C0_PMON_CTL2, MSR_UNC_C0_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX0C3", PMC15, CBOX0, MSR_UNC_C0_PMON_CTL3, MSR_UNC_C0_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX1C0", PMC16, CBOX1, MSR_UNC_C1_PMON_CTL0, MSR_UNC_C1_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX1C1", PMC17, CBOX1, MSR_UNC_C1_PMON_CTL1, MSR_UNC_C1_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX1C2", PMC18, CBOX1, MSR_UNC_C1_PMON_CTL2, MSR_UNC_C1_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX1C3", PMC19, CBOX1, MSR_UNC_C1_PMON_CTL3, MSR_UNC_C1_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX2C0", PMC20, CBOX2, MSR_UNC_C2_PMON_CTL0, MSR_UNC_C2_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX2C1", PMC21, CBOX2, MSR_UNC_C2_PMON_CTL1, MSR_UNC_C2_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX2C2", PMC22, CBOX2, MSR_UNC_C2_PMON_CTL2, MSR_UNC_C2_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX2C3", PMC23, CBOX2, MSR_UNC_C2_PMON_CTL3, MSR_UNC_C2_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX3C0", PMC24, CBOX3, MSR_UNC_C3_PMON_CTL0, MSR_UNC_C3_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX3C1", PMC25, CBOX3, MSR_UNC_C3_PMON_CTL1, MSR_UNC_C3_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX3C2", PMC26, CBOX3, MSR_UNC_C3_PMON_CTL2, MSR_UNC_C3_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX3C3", PMC27, CBOX3, MSR_UNC_C3_PMON_CTL3, MSR_UNC_C3_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX4C0", PMC28, CBOX4, MSR_UNC_C4_PMON_CTL0, MSR_UNC_C4_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX4C1", PMC29, CBOX4, MSR_UNC_C4_PMON_CTL1, MSR_UNC_C4_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX4C2", PMC30, CBOX4, MSR_UNC_C4_PMON_CTL2, MSR_UNC_C4_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX4C3", PMC31, CBOX4, MSR_UNC_C4_PMON_CTL3, MSR_UNC_C4_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX5C0", PMC32, CBOX5, MSR_UNC_C5_PMON_CTL0, MSR_UNC_C5_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX5C1", PMC33, CBOX5, MSR_UNC_C5_PMON_CTL1, MSR_UNC_C5_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX5C2", PMC34, CBOX5, MSR_UNC_C5_PMON_CTL2, MSR_UNC_C5_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX5C3", PMC35, CBOX5, MSR_UNC_C5_PMON_CTL3, MSR_UNC_C5_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX6C0", PMC36, CBOX6, MSR_UNC_C6_PMON_CTL0, MSR_UNC_C6_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX6C1", PMC37, CBOX6, MSR_UNC_C6_PMON_CTL1, MSR_UNC_C6_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX6C2", PMC38, CBOX6, MSR_UNC_C6_PMON_CTL2, MSR_UNC_C6_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX6C3", PMC39, CBOX6, MSR_UNC_C6_PMON_CTL3, MSR_UNC_C6_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX7C0", PMC40, CBOX7, MSR_UNC_C7_PMON_CTL0, MSR_UNC_C7_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX7C1", PMC41, CBOX7, MSR_UNC_C7_PMON_CTL1, MSR_UNC_C7_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX7C2", PMC42, CBOX7, MSR_UNC_C7_PMON_CTL2, MSR_UNC_C7_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX7C3", PMC43, CBOX7, MSR_UNC_C7_PMON_CTL3, MSR_UNC_C7_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX8C0", PMC44, CBOX8, MSR_UNC_C8_PMON_CTL0, MSR_UNC_C8_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX8C1", PMC45, CBOX8, MSR_UNC_C8_PMON_CTL1, MSR_UNC_C8_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX8C2", PMC46, CBOX8, MSR_UNC_C8_PMON_CTL2, MSR_UNC_C8_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX8C3", PMC47, CBOX8, MSR_UNC_C8_PMON_CTL3, MSR_UNC_C8_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX9C0", PMC48, CBOX9, MSR_UNC_C9_PMON_CTL0, MSR_UNC_C9_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX9C1", PMC49, CBOX9, MSR_UNC_C9_PMON_CTL1, MSR_UNC_C9_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX9C2", PMC50, CBOX9, MSR_UNC_C9_PMON_CTL2, MSR_UNC_C9_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX9C3", PMC51, CBOX9, MSR_UNC_C9_PMON_CTL3, MSR_UNC_C9_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX10C0", PMC52, CBOX10, MSR_UNC_C10_PMON_CTL0, MSR_UNC_C10_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX10C1", PMC53, CBOX10, MSR_UNC_C10_PMON_CTL1, MSR_UNC_C10_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX10C2", PMC54, CBOX10, MSR_UNC_C10_PMON_CTL2, MSR_UNC_C10_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX10C3", PMC55, CBOX10, MSR_UNC_C10_PMON_CTL3, MSR_UNC_C10_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX11C0", PMC56, CBOX11, MSR_UNC_C11_PMON_CTL0, MSR_UNC_C11_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX11C1", PMC57, CBOX11, MSR_UNC_C11_PMON_CTL1, MSR_UNC_C11_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX11C2", PMC58, CBOX11, MSR_UNC_C11_PMON_CTL2, MSR_UNC_C11_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX11C3", PMC59, CBOX11, MSR_UNC_C11_PMON_CTL3, MSR_UNC_C11_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX12C0", PMC60, CBOX12, MSR_UNC_C12_PMON_CTL0, MSR_UNC_C12_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX12C1", PMC61, CBOX12, MSR_UNC_C12_PMON_CTL1, MSR_UNC_C12_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX12C2", PMC62, CBOX12, MSR_UNC_C12_PMON_CTL2, MSR_UNC_C12_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX12C3", PMC63, CBOX12, MSR_UNC_C12_PMON_CTL3, MSR_UNC_C12_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX13C0", PMC64, CBOX13, MSR_UNC_C13_PMON_CTL0, MSR_UNC_C13_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX13C1", PMC65, CBOX13, MSR_UNC_C13_PMON_CTL1, MSR_UNC_C13_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX13C2", PMC66, CBOX13, MSR_UNC_C13_PMON_CTL2, MSR_UNC_C13_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX13C3", PMC67, CBOX13, MSR_UNC_C13_PMON_CTL3, MSR_UNC_C13_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX14C0", PMC68, CBOX14, MSR_UNC_C14_PMON_CTL0, MSR_UNC_C14_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX14C1", PMC69, CBOX14, MSR_UNC_C14_PMON_CTL1, MSR_UNC_C14_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX14C2", PMC70, CBOX14, MSR_UNC_C14_PMON_CTL2, MSR_UNC_C14_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
-    {"CBOX14C3", PMC71, CBOX14, MSR_UNC_C14_PMON_CTL3, MSR_UNC_C14_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX0C0", PMC16, CBOX0, MSR_UNC_C0_PMON_CTL0, MSR_UNC_C0_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX0C1", PMC17, CBOX0, MSR_UNC_C0_PMON_CTL1, MSR_UNC_C0_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX0C2", PMC18, CBOX0, MSR_UNC_C0_PMON_CTL2, MSR_UNC_C0_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX0C3", PMC19, CBOX0, MSR_UNC_C0_PMON_CTL3, MSR_UNC_C0_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX1C0", PMC20, CBOX1, MSR_UNC_C1_PMON_CTL0, MSR_UNC_C1_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX1C1", PMC21, CBOX1, MSR_UNC_C1_PMON_CTL1, MSR_UNC_C1_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX1C2", PMC22, CBOX1, MSR_UNC_C1_PMON_CTL2, MSR_UNC_C1_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX1C3", PMC23, CBOX1, MSR_UNC_C1_PMON_CTL3, MSR_UNC_C1_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX2C0", PMC24, CBOX2, MSR_UNC_C2_PMON_CTL0, MSR_UNC_C2_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX2C1", PMC25, CBOX2, MSR_UNC_C2_PMON_CTL1, MSR_UNC_C2_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX2C2", PMC26, CBOX2, MSR_UNC_C2_PMON_CTL2, MSR_UNC_C2_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX2C3", PMC27, CBOX2, MSR_UNC_C2_PMON_CTL3, MSR_UNC_C2_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX3C0", PMC28, CBOX3, MSR_UNC_C3_PMON_CTL0, MSR_UNC_C3_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX3C1", PMC29, CBOX3, MSR_UNC_C3_PMON_CTL1, MSR_UNC_C3_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX3C2", PMC30, CBOX3, MSR_UNC_C3_PMON_CTL2, MSR_UNC_C3_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX3C3", PMC31, CBOX3, MSR_UNC_C3_PMON_CTL3, MSR_UNC_C3_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX4C0", PMC32, CBOX4, MSR_UNC_C4_PMON_CTL0, MSR_UNC_C4_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX4C1", PMC33, CBOX4, MSR_UNC_C4_PMON_CTL1, MSR_UNC_C4_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX4C2", PMC34, CBOX4, MSR_UNC_C4_PMON_CTL2, MSR_UNC_C4_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX4C3", PMC35, CBOX4, MSR_UNC_C4_PMON_CTL3, MSR_UNC_C4_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX5C0", PMC36, CBOX5, MSR_UNC_C5_PMON_CTL0, MSR_UNC_C5_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX5C1", PMC37, CBOX5, MSR_UNC_C5_PMON_CTL1, MSR_UNC_C5_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX5C2", PMC38, CBOX5, MSR_UNC_C5_PMON_CTL2, MSR_UNC_C5_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX5C3", PMC39, CBOX5, MSR_UNC_C5_PMON_CTL3, MSR_UNC_C5_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX6C0", PMC40, CBOX6, MSR_UNC_C6_PMON_CTL0, MSR_UNC_C6_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX6C1", PMC41, CBOX6, MSR_UNC_C6_PMON_CTL1, MSR_UNC_C6_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX6C2", PMC42, CBOX6, MSR_UNC_C6_PMON_CTL2, MSR_UNC_C6_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX6C3", PMC43, CBOX6, MSR_UNC_C6_PMON_CTL3, MSR_UNC_C6_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX7C0", PMC44, CBOX7, MSR_UNC_C7_PMON_CTL0, MSR_UNC_C7_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX7C1", PMC45, CBOX7, MSR_UNC_C7_PMON_CTL1, MSR_UNC_C7_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX7C2", PMC46, CBOX7, MSR_UNC_C7_PMON_CTL2, MSR_UNC_C7_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX7C3", PMC47, CBOX7, MSR_UNC_C7_PMON_CTL3, MSR_UNC_C7_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX8C0", PMC48, CBOX8, MSR_UNC_C8_PMON_CTL0, MSR_UNC_C8_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX8C1", PMC49, CBOX8, MSR_UNC_C8_PMON_CTL1, MSR_UNC_C8_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX8C2", PMC50, CBOX8, MSR_UNC_C8_PMON_CTL2, MSR_UNC_C8_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX8C3", PMC51, CBOX8, MSR_UNC_C8_PMON_CTL3, MSR_UNC_C8_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX9C0", PMC52, CBOX9, MSR_UNC_C9_PMON_CTL0, MSR_UNC_C9_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX9C1", PMC53, CBOX9, MSR_UNC_C9_PMON_CTL1, MSR_UNC_C9_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX9C2", PMC54, CBOX9, MSR_UNC_C9_PMON_CTL2, MSR_UNC_C9_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX9C3", PMC55, CBOX9, MSR_UNC_C9_PMON_CTL3, MSR_UNC_C9_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX10C0", PMC56, CBOX10, MSR_UNC_C10_PMON_CTL0, MSR_UNC_C10_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX10C1", PMC57, CBOX10, MSR_UNC_C10_PMON_CTL1, MSR_UNC_C10_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX10C2", PMC58, CBOX10, MSR_UNC_C10_PMON_CTL2, MSR_UNC_C10_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX10C3", PMC59, CBOX10, MSR_UNC_C10_PMON_CTL3, MSR_UNC_C10_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX11C0", PMC60, CBOX11, MSR_UNC_C11_PMON_CTL0, MSR_UNC_C11_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX11C1", PMC61, CBOX11, MSR_UNC_C11_PMON_CTL1, MSR_UNC_C11_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX11C2", PMC62, CBOX11, MSR_UNC_C11_PMON_CTL2, MSR_UNC_C11_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX11C3", PMC63, CBOX11, MSR_UNC_C11_PMON_CTL3, MSR_UNC_C11_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX12C0", PMC64, CBOX12, MSR_UNC_C12_PMON_CTL0, MSR_UNC_C12_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX12C1", PMC65, CBOX12, MSR_UNC_C12_PMON_CTL1, MSR_UNC_C12_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX12C2", PMC66, CBOX12, MSR_UNC_C12_PMON_CTL2, MSR_UNC_C12_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX12C3", PMC67, CBOX12, MSR_UNC_C12_PMON_CTL3, MSR_UNC_C12_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX13C0", PMC68, CBOX13, MSR_UNC_C13_PMON_CTL0, MSR_UNC_C13_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX13C1", PMC69, CBOX13, MSR_UNC_C13_PMON_CTL1, MSR_UNC_C13_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX13C2", PMC70, CBOX13, MSR_UNC_C13_PMON_CTL2, MSR_UNC_C13_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX13C3", PMC71, CBOX13, MSR_UNC_C13_PMON_CTL3, MSR_UNC_C13_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX14C0", PMC72, CBOX14, MSR_UNC_C14_PMON_CTL0, MSR_UNC_C14_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX14C1", PMC73, CBOX14, MSR_UNC_C14_PMON_CTL1, MSR_UNC_C14_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX14C2", PMC74, CBOX14, MSR_UNC_C14_PMON_CTL2, MSR_UNC_C14_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
+    {"CBOX14C3", PMC75, CBOX14, MSR_UNC_C14_PMON_CTL3, MSR_UNC_C14_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_CBOX},
     /* Uncore management Counters: 2 48bit wide counters */
-    {"UBOX0", PMC72, UBOX, MSR_UNC_U_PMON_CTL0, MSR_UNC_U_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_UBOX},
-    {"UBOX1", PMC73, UBOX, MSR_UNC_U_PMON_CTL1, MSR_UNC_U_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_UBOX},
-    {"UBOXFIX", PMC74, UBOXFIX, MSR_UNC_U_UCLK_FIXED_CTL, MSR_UNC_U_UCLK_FIXED_CTR, 0, 0, 0},
+    {"UBOX0", PMC76, UBOX, MSR_UNC_U_PMON_CTL0, MSR_UNC_U_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_UBOX},
+    {"UBOX1", PMC77, UBOX, MSR_UNC_U_PMON_CTL1, MSR_UNC_U_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_UBOX},
+    {"UBOXFIX", PMC78, UBOXFIX, MSR_UNC_U_UCLK_FIXED_CTL, MSR_UNC_U_UCLK_FIXED_CTR, 0, 0, 0},
     /* PCU Counters: 4 48bit wide counters */
-    {"WBOX0", PMC75, WBOX, MSR_UNC_PCU_PMON_CTL0, MSR_UNC_PCU_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_WBOX},
-    {"WBOX1", PMC76, WBOX, MSR_UNC_PCU_PMON_CTL1, MSR_UNC_PCU_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_WBOX},
-    {"WBOX2", PMC77, WBOX, MSR_UNC_PCU_PMON_CTL2, MSR_UNC_PCU_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_WBOX},
-    {"WBOX3", PMC78, WBOX, MSR_UNC_PCU_PMON_CTL3, MSR_UNC_PCU_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_WBOX},
-    {"WBOX0FIX", PMC79, WBOX0FIX, 0, MSR_UNC_PCU_PMON_FIXED_CTR0, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"WBOX1FIX", PMC80, WBOX1FIX, 0, MSR_UNC_PCU_PMON_FIXED_CTR1, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"WBOX0", PMC79, WBOX, MSR_UNC_PCU_PMON_CTL0, MSR_UNC_PCU_PMON_CTR0, 0, 0, IVBEP_VALID_OPTIONS_WBOX},
+    {"WBOX1", PMC80, WBOX, MSR_UNC_PCU_PMON_CTL1, MSR_UNC_PCU_PMON_CTR1, 0, 0, IVBEP_VALID_OPTIONS_WBOX},
+    {"WBOX2", PMC81, WBOX, MSR_UNC_PCU_PMON_CTL2, MSR_UNC_PCU_PMON_CTR2, 0, 0, IVBEP_VALID_OPTIONS_WBOX},
+    {"WBOX3", PMC82, WBOX, MSR_UNC_PCU_PMON_CTL3, MSR_UNC_PCU_PMON_CTR3, 0, 0, IVBEP_VALID_OPTIONS_WBOX},
+    {"WBOX0FIX", PMC83, WBOX0FIX, 0, MSR_UNC_PCU_PMON_FIXED_CTR0, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"WBOX1FIX", PMC84, WBOX1FIX, 0, MSR_UNC_PCU_PMON_FIXED_CTR1, 0, 0, EVENT_OPTION_NONE_MASK},
     /* IMC Counters: 4 48bit wide per memory channel, split in two reads */
-    {"MBOX0C0",PMC81, MBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_0, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX0C1",PMC82, MBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_0, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX0C2",PMC83, MBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_0, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX0C3",PMC84, MBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_0, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX0FIX",PMC85, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
-    {"MBOX1C0",PMC86, MBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_1, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX1C1",PMC87, MBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_1, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX1C2",PMC88, MBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_1, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX1C3",PMC89, MBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_1, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX1FIX",PMC90, MBOX1FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_1, EVENT_OPTION_NONE_MASK},
-    {"MBOX2C0",PMC91, MBOX2, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_2, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX2C1",PMC92, MBOX2, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_2, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX2C2",PMC93, MBOX2, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_2, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX2C3",PMC94, MBOX2, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_2, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX2FIX",PMC95, MBOX2FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_2, EVENT_OPTION_NONE_MASK},
-    {"MBOX3C0",PMC96, MBOX3, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_3, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX3C1",PMC97, MBOX3, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_3, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX3C2",PMC98, MBOX3, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_3, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX3C3",PMC99, MBOX3, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_3, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX3FIX",PMC100, MBOX3FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_3, EVENT_OPTION_NONE_MASK},
-    {"MBOX4C0",PMC101, MBOX4, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_0, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX4C1",PMC102, MBOX4, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_0, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX4C2",PMC103, MBOX4, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_0, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX4C3",PMC104, MBOX4, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_0, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX4FIX",PMC105, MBOX4FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_0, EVENT_OPTION_NONE_MASK},
-    {"MBOX5C0",PMC106, MBOX5, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_1, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX5C1",PMC107, MBOX5, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_1, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX5C2",PMC108, MBOX5, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_1, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX5C3",PMC109, MBOX5, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_1, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX5FIX",PMC110, MBOX5FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_1, EVENT_OPTION_NONE_MASK},
-    {"MBOX6C0",PMC111, MBOX6, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_2, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX6C1",PMC112, MBOX6, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_2, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX6C2",PMC113, MBOX6, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_2, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX6C3",PMC114, MBOX6, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_2, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX6FIX",PMC115, MBOX6FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_2, EVENT_OPTION_NONE_MASK},
-    {"MBOX7C0",PMC116, MBOX7, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_3, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX7C1",PMC117, MBOX7, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_3, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX7C2",PMC118, MBOX7, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_3, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX7C3",PMC119, MBOX7, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_3, IVBEP_VALID_OPTIONS_MBOX},
-    {"MBOX7FIX",PMC120, MBOX7FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_3, EVENT_OPTION_NONE_MASK},
+    {"MBOX0C0",PMC85, MBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_0, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX0C1",PMC86, MBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_0, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX0C2",PMC87, MBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_0, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX0C3",PMC88, MBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_0, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX0FIX",PMC89, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
+    {"MBOX1C0",PMC90, MBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_1, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX1C1",PMC91, MBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_1, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX1C2",PMC92, MBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_1, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX1C3",PMC93, MBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_1, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX1FIX",PMC94, MBOX1FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_1, EVENT_OPTION_NONE_MASK},
+    {"MBOX2C0",PMC95, MBOX2, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_2, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX2C1",PMC96, MBOX2, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_2, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX2C2",PMC97, MBOX2, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_2, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX2C3",PMC98, MBOX2, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_2, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX2FIX",PMC99, MBOX2FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_2, EVENT_OPTION_NONE_MASK},
+    {"MBOX3C0",PMC100, MBOX3, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_3, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX3C1",PMC101, MBOX3, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_3, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX3C2",PMC102, MBOX3, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_3, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX3C3",PMC103, MBOX3, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_3, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX3FIX",PMC104, MBOX3FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_3, EVENT_OPTION_NONE_MASK},
+    {"MBOX4C0",PMC105, MBOX4, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_0, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX4C1",PMC106, MBOX4, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_0, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX4C2",PMC107, MBOX4, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_0, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX4C3",PMC108, MBOX4, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_0, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX4FIX",PMC109, MBOX4FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_0, EVENT_OPTION_NONE_MASK},
+    {"MBOX5C0",PMC110, MBOX5, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_1, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX5C1",PMC111, MBOX5, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_1, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX5C2",PMC112, MBOX5, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_1, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX5C3",PMC113, MBOX5, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_1, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX5FIX",PMC114, MBOX5FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_1, EVENT_OPTION_NONE_MASK},
+    {"MBOX6C0",PMC115, MBOX6, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_2, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX6C1",PMC116, MBOX6, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_2, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX6C2",PMC117, MBOX6, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_2, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX6C3",PMC118, MBOX6, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_2, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX6FIX",PMC119, MBOX6FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_2, EVENT_OPTION_NONE_MASK},
+    {"MBOX7C0",PMC120, MBOX7, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_1_CH_3, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX7C1",PMC121, MBOX7, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_1_CH_3, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX7C2",PMC122, MBOX7, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_1_CH_3, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX7C3",PMC123, MBOX7, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_1_CH_3, IVBEP_VALID_OPTIONS_MBOX},
+    {"MBOX7FIX",PMC124, MBOX7FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_3, EVENT_OPTION_NONE_MASK},
     /* QPI counters four 48bit wide per port, split in two reads */
-    {"SBOX0C0",PMC121, SBOX0, PCI_UNC_QPI_PMON_CTL_0, PCI_UNC_QPI_PMON_CTR_0_A, PCI_UNC_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_0, IVBEP_VALID_OPTIONS_SBOX},
-    {"SBOX0C1",PMC122, SBOX0, PCI_UNC_QPI_PMON_CTL_1, PCI_UNC_QPI_PMON_CTR_1_A, PCI_UNC_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_0, IVBEP_VALID_OPTIONS_SBOX},
-    {"SBOX0C2",PMC123, SBOX0, PCI_UNC_QPI_PMON_CTL_2, PCI_UNC_QPI_PMON_CTR_2_A, PCI_UNC_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_0, IVBEP_VALID_OPTIONS_SBOX},
-    {"SBOX0C3",PMC124, SBOX0, PCI_UNC_QPI_PMON_CTL_3, PCI_UNC_QPI_PMON_CTR_3_A, PCI_UNC_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_0, IVBEP_VALID_OPTIONS_SBOX},
-    {"SBOX1C0",PMC125, SBOX1, PCI_UNC_QPI_PMON_CTL_0, PCI_UNC_QPI_PMON_CTR_0_A, PCI_UNC_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_1, IVBEP_VALID_OPTIONS_SBOX},
-    {"SBOX1C1",PMC126, SBOX1, PCI_UNC_QPI_PMON_CTL_1, PCI_UNC_QPI_PMON_CTR_1_A, PCI_UNC_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_1, IVBEP_VALID_OPTIONS_SBOX},
-    {"SBOX1C2",PMC127, SBOX1, PCI_UNC_QPI_PMON_CTL_2, PCI_UNC_QPI_PMON_CTR_2_A, PCI_UNC_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_1, IVBEP_VALID_OPTIONS_SBOX},
-    {"SBOX1C3",PMC128, SBOX1, PCI_UNC_QPI_PMON_CTL_3, PCI_UNC_QPI_PMON_CTR_3_A, PCI_UNC_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_1, IVBEP_VALID_OPTIONS_SBOX},
-    {"SBOX2C0",PMC129, SBOX2, PCI_UNC_QPI_PMON_CTL_0, PCI_UNC_QPI_PMON_CTR_0_A, PCI_UNC_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_2, IVBEP_VALID_OPTIONS_SBOX},
-    {"SBOX2C1",PMC130, SBOX2, PCI_UNC_QPI_PMON_CTL_1, PCI_UNC_QPI_PMON_CTR_1_A, PCI_UNC_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_2, IVBEP_VALID_OPTIONS_SBOX},
-    {"SBOX2C2",PMC131, SBOX2, PCI_UNC_QPI_PMON_CTL_2, PCI_UNC_QPI_PMON_CTR_2_A, PCI_UNC_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_2, IVBEP_VALID_OPTIONS_SBOX},
-    {"SBOX2C3",PMC132, SBOX2, PCI_UNC_QPI_PMON_CTL_3, PCI_UNC_QPI_PMON_CTR_3_A, PCI_UNC_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_2, IVBEP_VALID_OPTIONS_SBOX},
-    {"SBOX0FIX",PMC133, SBOX0FIX, 0, PCI_UNC_QPI_RATE_STATUS, 0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
-    {"SBOX1FIX",PMC134, SBOX1FIX, 0, PCI_UNC_QPI_RATE_STATUS, 0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
-    {"SBOX2FIX",PMC135, SBOX2FIX, 0, PCI_UNC_QPI_RATE_STATUS, 0, PCI_QPI_MISC_DEVICE_PORT_2, EVENT_OPTION_NONE_MASK},
+    {"SBOX0C0",PMC125, SBOX0, PCI_UNC_QPI_PMON_CTL_0, PCI_UNC_QPI_PMON_CTR_0_A, PCI_UNC_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_0, IVBEP_VALID_OPTIONS_SBOX},
+    {"SBOX0C1",PMC126, SBOX0, PCI_UNC_QPI_PMON_CTL_1, PCI_UNC_QPI_PMON_CTR_1_A, PCI_UNC_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_0, IVBEP_VALID_OPTIONS_SBOX},
+    {"SBOX0C2",PMC127, SBOX0, PCI_UNC_QPI_PMON_CTL_2, PCI_UNC_QPI_PMON_CTR_2_A, PCI_UNC_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_0, IVBEP_VALID_OPTIONS_SBOX},
+    {"SBOX0C3",PMC128, SBOX0, PCI_UNC_QPI_PMON_CTL_3, PCI_UNC_QPI_PMON_CTR_3_A, PCI_UNC_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_0, IVBEP_VALID_OPTIONS_SBOX},
+    {"SBOX1C0",PMC129, SBOX1, PCI_UNC_QPI_PMON_CTL_0, PCI_UNC_QPI_PMON_CTR_0_A, PCI_UNC_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_1, IVBEP_VALID_OPTIONS_SBOX},
+    {"SBOX1C1",PMC130, SBOX1, PCI_UNC_QPI_PMON_CTL_1, PCI_UNC_QPI_PMON_CTR_1_A, PCI_UNC_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_1, IVBEP_VALID_OPTIONS_SBOX},
+    {"SBOX1C2",PMC131, SBOX1, PCI_UNC_QPI_PMON_CTL_2, PCI_UNC_QPI_PMON_CTR_2_A, PCI_UNC_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_1, IVBEP_VALID_OPTIONS_SBOX},
+    {"SBOX1C3",PMC132, SBOX1, PCI_UNC_QPI_PMON_CTL_3, PCI_UNC_QPI_PMON_CTR_3_A, PCI_UNC_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_1, IVBEP_VALID_OPTIONS_SBOX},
+    {"SBOX2C0",PMC133, SBOX2, PCI_UNC_QPI_PMON_CTL_0, PCI_UNC_QPI_PMON_CTR_0_A, PCI_UNC_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_2, IVBEP_VALID_OPTIONS_SBOX},
+    {"SBOX2C1",PMC134, SBOX2, PCI_UNC_QPI_PMON_CTL_1, PCI_UNC_QPI_PMON_CTR_1_A, PCI_UNC_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_2, IVBEP_VALID_OPTIONS_SBOX},
+    {"SBOX2C2",PMC135, SBOX2, PCI_UNC_QPI_PMON_CTL_2, PCI_UNC_QPI_PMON_CTR_2_A, PCI_UNC_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_2, IVBEP_VALID_OPTIONS_SBOX},
+    {"SBOX2C3",PMC136, SBOX2, PCI_UNC_QPI_PMON_CTL_3, PCI_UNC_QPI_PMON_CTR_3_A, PCI_UNC_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_2, IVBEP_VALID_OPTIONS_SBOX},
+    {"SBOX0FIX",PMC137, SBOX0FIX, 0, PCI_UNC_QPI_RATE_STATUS, 0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
+    {"SBOX1FIX",PMC138, SBOX1FIX, 0, PCI_UNC_QPI_RATE_STATUS, 0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
+    {"SBOX2FIX",PMC139, SBOX2FIX, 0, PCI_UNC_QPI_RATE_STATUS, 0, PCI_QPI_MISC_DEVICE_PORT_2, EVENT_OPTION_NONE_MASK},
     /* HA counters four 48bit wide per counter, split in two reads */
-    {"BBOX0C0", PMC136, BBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_0, IVBEP_VALID_OPTIONS_BBOX},
-    {"BBOX0C1", PMC137, BBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_0, IVBEP_VALID_OPTIONS_BBOX},
-    {"BBOX0C2", PMC138, BBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_0, IVBEP_VALID_OPTIONS_BBOX},
-    {"BBOX0C3", PMC139, BBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_0, IVBEP_VALID_OPTIONS_BBOX},
-    {"BBOX1C0", PMC140, BBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_1, IVBEP_VALID_OPTIONS_BBOX},
-    {"BBOX1C1", PMC141, BBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_1, IVBEP_VALID_OPTIONS_BBOX},
-    {"BBOX1C2", PMC142, BBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_1, IVBEP_VALID_OPTIONS_BBOX},
-    {"BBOX1C3", PMC143, BBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_1, IVBEP_VALID_OPTIONS_BBOX},
+    {"BBOX0C0", PMC140, BBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_0, IVBEP_VALID_OPTIONS_BBOX},
+    {"BBOX0C1", PMC141, BBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_0, IVBEP_VALID_OPTIONS_BBOX},
+    {"BBOX0C2", PMC142, BBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_0, IVBEP_VALID_OPTIONS_BBOX},
+    {"BBOX0C3", PMC143, BBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_0, IVBEP_VALID_OPTIONS_BBOX},
+    {"BBOX1C0", PMC144, BBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_1, IVBEP_VALID_OPTIONS_BBOX},
+    {"BBOX1C1", PMC145, BBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_1, IVBEP_VALID_OPTIONS_BBOX},
+    {"BBOX1C2", PMC146, BBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_1, IVBEP_VALID_OPTIONS_BBOX},
+    {"BBOX1C3", PMC147, BBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_1, IVBEP_VALID_OPTIONS_BBOX},
     /* R2PCIe counters four 44bit wide per counter, split in two reads */
-    {"PBOX0", PMC144, PBOX, PCI_UNC_R2PCIE_PMON_CTL_0, PCI_UNC_R2PCIE_PMON_CTR_0_A, PCI_UNC_R2PCIE_PMON_CTR_0_B, PCI_R2PCIE_DEVICE, IVBEP_VALID_OPTIONS_PBOX},
-    {"PBOX1", PMC145, PBOX, PCI_UNC_R2PCIE_PMON_CTL_1, PCI_UNC_R2PCIE_PMON_CTR_1_A, PCI_UNC_R2PCIE_PMON_CTR_1_B, PCI_R2PCIE_DEVICE, IVBEP_VALID_OPTIONS_PBOX},
-    {"PBOX2", PMC146, PBOX, PCI_UNC_R2PCIE_PMON_CTL_2, PCI_UNC_R2PCIE_PMON_CTR_2_A, PCI_UNC_R2PCIE_PMON_CTR_2_B, PCI_R2PCIE_DEVICE, IVBEP_VALID_OPTIONS_PBOX},
-    {"PBOX3", PMC147, PBOX, PCI_UNC_R2PCIE_PMON_CTL_3, PCI_UNC_R2PCIE_PMON_CTR_3_A, PCI_UNC_R2PCIE_PMON_CTR_3_B, PCI_R2PCIE_DEVICE, IVBEP_VALID_OPTIONS_PBOX},
+    {"PBOX0", PMC148, PBOX, PCI_UNC_R2PCIE_PMON_CTL_0, PCI_UNC_R2PCIE_PMON_CTR_0_A, PCI_UNC_R2PCIE_PMON_CTR_0_B, PCI_R2PCIE_DEVICE, IVBEP_VALID_OPTIONS_PBOX},
+    {"PBOX1", PMC149, PBOX, PCI_UNC_R2PCIE_PMON_CTL_1, PCI_UNC_R2PCIE_PMON_CTR_1_A, PCI_UNC_R2PCIE_PMON_CTR_1_B, PCI_R2PCIE_DEVICE, IVBEP_VALID_OPTIONS_PBOX},
+    {"PBOX2", PMC150, PBOX, PCI_UNC_R2PCIE_PMON_CTL_2, PCI_UNC_R2PCIE_PMON_CTR_2_A, PCI_UNC_R2PCIE_PMON_CTR_2_B, PCI_R2PCIE_DEVICE, IVBEP_VALID_OPTIONS_PBOX},
+    {"PBOX3", PMC151, PBOX, PCI_UNC_R2PCIE_PMON_CTL_3, PCI_UNC_R2PCIE_PMON_CTR_3_A, PCI_UNC_R2PCIE_PMON_CTR_3_B, PCI_R2PCIE_DEVICE, IVBEP_VALID_OPTIONS_PBOX},
     /* R3QPI counters four 44bit wide per counter, split in two reads */
-    {"RBOX0C0", PMC148, RBOX0, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_0, IVBEP_VALID_OPTIONS_RBOX},
-    {"RBOX0C1", PMC149, RBOX0, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_0, IVBEP_VALID_OPTIONS_RBOX},
-    {"RBOX0C2", PMC150, RBOX0, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_0, IVBEP_VALID_OPTIONS_RBOX},
-    {"RBOX1C0", PMC151, RBOX1, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_1, IVBEP_VALID_OPTIONS_RBOX},
-    {"RBOX1C1", PMC152, RBOX1, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_1, IVBEP_VALID_OPTIONS_RBOX},
-    {"RBOX1C2", PMC153, RBOX1, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_1, IVBEP_VALID_OPTIONS_RBOX},
-    {"RBOX2C0", PMC154, RBOX2, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_2, IVBEP_VALID_OPTIONS_RBOX},
-    {"RBOX2C1", PMC155, RBOX2, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_2, IVBEP_VALID_OPTIONS_RBOX},
-    {"RBOX2C2", PMC156, RBOX2, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_2, IVBEP_VALID_OPTIONS_RBOX},
+    {"RBOX0C0", PMC152, RBOX0, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_0, IVBEP_VALID_OPTIONS_RBOX},
+    {"RBOX0C1", PMC153, RBOX0, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_0, IVBEP_VALID_OPTIONS_RBOX},
+    {"RBOX0C2", PMC154, RBOX0, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_0, IVBEP_VALID_OPTIONS_RBOX},
+    {"RBOX1C0", PMC155, RBOX1, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_1, IVBEP_VALID_OPTIONS_RBOX},
+    {"RBOX1C1", PMC156, RBOX1, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_1, IVBEP_VALID_OPTIONS_RBOX},
+    {"RBOX1C2", PMC157, RBOX1, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_1, IVBEP_VALID_OPTIONS_RBOX},
+    {"RBOX2C0", PMC158, RBOX2, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_2, IVBEP_VALID_OPTIONS_RBOX},
+    {"RBOX2C1", PMC159, RBOX2, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_2, IVBEP_VALID_OPTIONS_RBOX},
+    {"RBOX2C2", PMC160, RBOX2, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_2, IVBEP_VALID_OPTIONS_RBOX},
     /* IRP counters four 44bit wide per counter */
-    {"IBOX0C0", PMC157, IBOX0, PCI_UNC_IRP0_PMON_CTL_0, PCI_UNC_IRP0_PMON_CTR_0, 0, PCI_IRP_DEVICE, IVBEP_VALID_OPTIONS_IBOX},
-    {"IBOX0C1", PMC158, IBOX0, PCI_UNC_IRP0_PMON_CTL_1, PCI_UNC_IRP0_PMON_CTR_1, 0, PCI_IRP_DEVICE, IVBEP_VALID_OPTIONS_IBOX},
-    {"IBOX1C0", PMC159, IBOX1, PCI_UNC_IRP1_PMON_CTL_0, PCI_UNC_IRP1_PMON_CTR_0, 0, PCI_IRP_DEVICE, IVBEP_VALID_OPTIONS_IBOX},
-    {"IBOX1C1", PMC160, IBOX1, PCI_UNC_IRP1_PMON_CTL_1, PCI_UNC_IRP1_PMON_CTR_1, 0, PCI_IRP_DEVICE, IVBEP_VALID_OPTIONS_IBOX},
+    {"IBOX0C0", PMC161, IBOX0, PCI_UNC_IRP0_PMON_CTL_0, PCI_UNC_IRP0_PMON_CTR_0, 0, PCI_IRP_DEVICE, IVBEP_VALID_OPTIONS_IBOX},
+    {"IBOX0C1", PMC162, IBOX0, PCI_UNC_IRP0_PMON_CTL_1, PCI_UNC_IRP0_PMON_CTR_1, 0, PCI_IRP_DEVICE, IVBEP_VALID_OPTIONS_IBOX},
+    {"IBOX1C0", PMC163, IBOX1, PCI_UNC_IRP1_PMON_CTL_0, PCI_UNC_IRP1_PMON_CTR_0, 0, PCI_IRP_DEVICE, IVBEP_VALID_OPTIONS_IBOX},
+    {"IBOX1C1", PMC164, IBOX1, PCI_UNC_IRP1_PMON_CTL_1, PCI_UNC_IRP1_PMON_CTR_1, 0, PCI_IRP_DEVICE, IVBEP_VALID_OPTIONS_IBOX},
 };
 
 static BoxMap ivybridgeEP_box_map[NUM_UNITS] = {
diff --git a/src/includes/perfmon_ivybridgeEP_events.txt b/src/includes/perfmon_ivybridgeEP_events.txt
index fe77350..86f764d 100644
--- a/src/includes/perfmon_ivybridgeEP_events.txt
+++ b/src/includes/perfmon_ivybridgeEP_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for Intel Ivy Bridge EP/EN/EX
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,6 @@
 
 EVENT_TEMP_CORE               0x00   TMP0
 UMASK_TEMP_CORE               0x00
-
 EVENT_PWR_PKG_ENERGY          0x00   PWR0
 UMASK_PWR_PKG_ENERGY          0x00
 
@@ -71,31 +70,31 @@ UMASK_INT_MISC_RECOVERY_CYCLES       0x03
 DEFAULT_OPTIONS_INT_MISC_RECOVERY_COUNT EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
 UMASK_INT_MISC_RECOVERY_COUNT        0x03
 
-EVENT_UOPS_ISSUED                     0x0E  PMC
-UMASK_UOPS_ISSUED_ANY                 0x01
-UMASK_UOPS_ISSUED_FLAGS_MERGE         0x10
-UMASK_UOPS_ISSUED_SLOW_LEA            0x20
-UMASK_UOPS_ISSUED_SINGLE_MUL          0x40
+EVENT_UOPS_ISSUED                0x0E  PMC
+UMASK_UOPS_ISSUED_ANY            0x01
+UMASK_UOPS_ISSUED_FLAGS_MERGE    0x10
+UMASK_UOPS_ISSUED_SLOW_LEA       0x20
+UMASK_UOPS_ISSUED_SINGLE_MUL     0x40
 DEFAULT_OPTIONS_UOPS_ISSUED_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
-UMASK_UOPS_ISSUED_USED_CYCLES         0x01
+UMASK_UOPS_ISSUED_USED_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
-UMASK_UOPS_ISSUED_STALL_CYCLES        0x01
+UMASK_UOPS_ISSUED_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1
-UMASK_UOPS_ISSUED_TOTAL_CYCLES        0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_ANY EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_ANY            0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_FLAGS_MERGE EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_FLAGS_MERGE    0x10
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_SLOW_LEA EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_SLOW_LEA       0x20
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_SINGLE_MUL EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_SINGLE_MUL     0x40
+UMASK_UOPS_ISSUED_TOTAL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_USED_CYCLES    0x01
+UMASK_UOPS_ISSUED_CORE_USED_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_TOTAL_CYCLES   0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC 0x01
 
 EVENT_FP_COMP_OPS_EXE                          0x10   PMC
 UMASK_FP_COMP_OPS_EXE_X87                      0x01
@@ -193,22 +192,100 @@ UMASK_CACHE_LOCK_CYCLES_CACHE_LOCK_DURATION          0x02
 DEFAULT_OPTIONS_CACHE_LOCK_CYCLES_CACHE_LOCK_COUNT EVENT_OPTION_EDGE=1
 UMASK_CACHE_LOCK_CYCLES_CACHE_LOCK_COUNT             0x02
 
-EVENT_IDQ                              0x79   PMC
-UMASK_IDQ_EMPTY                        0x02
-UMASK_IDQ_MITE_UOPS                    0x04
-UMASK_IDQ_DSB_UOPS                     0x08
-UMASK_IDQ_MS_DSB_UOPS                  0x10
-UMASK_IDQ_MS_MITE_UOPS                 0x20
-UMASK_IDQ_MS_UOPS                      0x30
+EVENT_IDQ                               0x79   PMC
+UMASK_IDQ_EMPTY                         0x02
+UMASK_IDQ_MITE_UOPS                     0x04
+UMASK_IDQ_DSB_UOPS                      0x08
+UMASK_IDQ_MS_DSB_UOPS                   0x10
+UMASK_IDQ_MS_MITE_UOPS                  0x20
+UMASK_IDQ_MS_UOPS                       0x30
+UMASK_IDQ_DSB_UOPS                      0x18
+UMASK_IDQ_MITE_ALL_UOPS                 0x24
+UMASK_IDQ_ALL_UOPS                      0x3C
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES         EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES                   0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES_1_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MITE_CYCLES_2_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MITE_CYCLES_3_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MITE_CYCLES_4_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES          EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES                    0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES_1_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_DSB_CYCLES_2_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_DSB_CYCLES_3_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_DSB_CYCLES_4_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES       EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES                 0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES_1_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_DSB_CYCLES_2_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_DSB_CYCLES_3_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_DSB_CYCLES_4_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_OCCUR        EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
+UMASK_IDQ_MS_DSB_OCCUR                  0x10
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES      EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES                0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES_1_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_MITE_CYCLES_2_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_MITE_CYCLES_3_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_MITE_CYCLES_4_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_CYCLES           EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES                     0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES_1_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_CYCLES_2_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_CYCLES_3_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_CYCLES_4_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_SWITCHES         EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
+UMASK_IDQ_MS_SWITCHES                   0x30
 DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x1
-UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS      0x18
+UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS       0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_1_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_DSB_CYCLES_2_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_DSB_CYCLES_3_UOPS         0x18
 DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
-UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS        0x18
-DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x1
-UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS     0x24
+UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS      0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_1_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_MITE_CYCLES_2_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_MITE_CYCLES_3_UOPS        0x24
 DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
-UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS       0x24
-UMASK_IDQ_ALL_MITE_ALL_UOPS            0x3C
+UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_ANY_UOPS      0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_1_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_CYCLES_2_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_CYCLES_3_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_CYCLES_4_UOPS        0x3C
 
 EVENT_ICACHE                    0x80   PMC
 UMASK_ICACHE_HITS               0x01
@@ -290,14 +367,24 @@ UMASK_RESOURCE_STALLS_SB              0x08
 UMASK_RESOURCE_STALLS_ROB             0x10
 
 EVENT_CYCLE_ACTIVITY                               0xA3   PMC
-DEFAULT_OPTIONS_CYCLE_ACTIVITY_L2_PENDING          EVENT_OPTION_THRESHOLD=0x01
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L2_PENDING   EVENT_OPTION_THRESHOLD=0x01
 UMASK_CYCLE_ACTIVITY_CYCLES_L2_PENDING             0x01
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_LDM_PENDING  EVENT_OPTION_THRESHOLD=0x02
 UMASK_CYCLE_ACTIVITY_CYCLES_LDM_PENDING            0x02
-DEFAULT_OPTIONS_CYCLE_ACTIVITY_L1D_PENDING         EVENT_OPTION_THRESHOLD=0x08
-UMASK_CYCLE_ACTIVITY_L1D_PENDING                   0x08
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE   EVENT_OPTION_THRESHOLD=0x04
 UMASK_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE             0x04
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L2_PENDING   EVENT_OPTION_THRESHOLD=0x05
+UMASK_CYCLE_ACTIVITY_STALLS_L2_PENDING             0x05
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_LDM_PENDING  EVENT_OPTION_THRESHOLD=0x06
+UMASK_CYCLE_ACTIVITY_STALLS_LDM_PENDING            0x06
+
+EVENT_CYCLE_ACTIVITY_CYCLES_L1D_PENDING                  0xA3   PMC2
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L1D_PENDING        EVENT_OPTION_THRESHOLD=0x08
+UMASK_CYCLE_ACTIVITY_CYCLES_L1D_PENDING                  0x08
+
+EVENT_CYCLE_ACTIVITY_STALLS_L1D_PENDING                  0xA3   PMC2
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L1D_PENDING        EVENT_OPTION_THRESHOLD=0x0C
+UMASK_CYCLE_ACTIVITY_STALLS_L1D_PENDING                  0x0C
 
 EVENT_DSB2MITE_SWITCHES                 0xAB   PMC
 UMASK_DSB2MITE_SWITCHES_COUNT           0x01
@@ -331,6 +418,10 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
 UMASK_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC 0x01
 DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC 0x01
 UMASK_UOPS_EXECUTED_CORE                       0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
 UMASK_UOPS_EXECUTED_CORE_USED_CYCLES           0x02
@@ -346,6 +437,13 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC 0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC 0x02
+
+EVENT_OFFCORE_REQUESTS_BUFFER         0xB2 PMC
+UMASK_OFFCORE_REQUESTS_BUFFER_SQ_FULL 0x01
 
 EVENT_TLB_FLUSH                 0xBD  PMC
 UMASK_TLB_FLUSH_DTLB_THREAD     0x01
@@ -383,6 +481,18 @@ DEFAULT_OPTIONS_UOPS_RETIRED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_
 UMASK_UOPS_RETIRED_CORE_STALL_CYCLES     0x01
 DEFAULT_OPTIONS_UOPS_RETIRED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_RETIRED_CORE_TOTAL_CYCLES     0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC 0x01
 
 EVENT_MACHINE_CLEARS                    0xC3  PMC
 UMASK_MACHINE_CLEARS_CYCLES             0x01
diff --git a/src/includes/perfmon_ivybridge_counters.h b/src/includes/perfmon_ivybridge_counters.h
index d28f86a..dcc7491 100644
--- a/src/includes/perfmon_ivybridge_counters.h
+++ b/src/includes/perfmon_ivybridge_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description: Counter header file of perfmon module for Intel Ivy Bridge.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -29,9 +29,9 @@
  * =======================================================================================
  */
 
-#define NUM_COUNTERS_CORE_IVYBRIDGE 8
+#define NUM_COUNTERS_CORE_IVYBRIDGE 12
 #define NUM_COUNTERS_UNCORE_IVYBRIDGE 12
-#define NUM_COUNTERS_IVYBRIDGE 23
+#define NUM_COUNTERS_IVYBRIDGE 27
 
 
 #define IVB_VALID_OPTIONS_PMC EVENT_OPTION_EDGE_MASK|EVENT_OPTION_COUNT_KERNEL_MASK|EVENT_OPTION_INVERT_MASK|\
@@ -50,24 +50,29 @@ static RegisterMap ivybridge_counter_map[NUM_COUNTERS_IVYBRIDGE] = {
     {"PMC1", PMC4, PMC, MSR_PERFEVTSEL1, MSR_PMC1, 0, 0, IVB_VALID_OPTIONS_PMC},
     {"PMC2", PMC5, PMC, MSR_PERFEVTSEL2, MSR_PMC2, 0, 0, IVB_VALID_OPTIONS_PMC},
     {"PMC3", PMC6, PMC, MSR_PERFEVTSEL3, MSR_PMC3, 0, 0, IVB_VALID_OPTIONS_PMC},
+    /* Additional PMC Counters: 4 48bit wide if HyperThreading is disabled*/
+    {"PMC4", PMC7, PMC, MSR_PERFEVTSEL4, MSR_PMC4, 0, 0, IVB_VALID_OPTIONS_PMC},
+    {"PMC5", PMC8, PMC, MSR_PERFEVTSEL5, MSR_PMC5, 0, 0, IVB_VALID_OPTIONS_PMC},
+    {"PMC6", PMC9, PMC, MSR_PERFEVTSEL6, MSR_PMC6, 0, 0, IVB_VALID_OPTIONS_PMC},
+    {"PMC7", PMC10, PMC, MSR_PERFEVTSEL7, MSR_PMC7, 0, 0, IVB_VALID_OPTIONS_PMC},
     /* Temperature Sensor*/
-    {"TMP0", PMC7, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"TMP0", PMC11, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
     /* RAPL counters */
-    {"PWR0", PMC8, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR1", PMC9, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR2", PMC10, POWER, 0, MSR_PP1_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR3", PMC11, POWER, 0, MSR_DRAM_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"CBOX0C0", PMC12, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL0, MSR_UNC_CBO_0_CTR0, 0, 0, IVB_VALID_OPTIONS_CBOX},
-    {"CBOX0C1", PMC13, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL1, MSR_UNC_CBO_0_CTR1, 0, 0, IVB_VALID_OPTIONS_CBOX},
-    {"CBOX1C0", PMC14, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL0, MSR_UNC_CBO_1_CTR0, 0, 0, IVB_VALID_OPTIONS_CBOX},
-    {"CBOX1C1", PMC15, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL1, MSR_UNC_CBO_1_CTR1, 0, 0, IVB_VALID_OPTIONS_CBOX},
-    {"CBOX2C0", PMC16, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL0, MSR_UNC_CBO_2_CTR0, 0, 0, IVB_VALID_OPTIONS_CBOX},
-    {"CBOX2C1", PMC17, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL1, MSR_UNC_CBO_2_CTR1, 0, 0, IVB_VALID_OPTIONS_CBOX},
-    {"CBOX3C0", PMC18, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL0, MSR_UNC_CBO_3_CTR0, 0, 0, IVB_VALID_OPTIONS_CBOX},
-    {"CBOX3C1", PMC19, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL1, MSR_UNC_CBO_3_CTR1, 0, 0, IVB_VALID_OPTIONS_CBOX},
-    {"UBOX0", PMC20, UBOX, MSR_UNC_ARB_PERFEVTSEL0, MSR_UNC_ARB_CTR0, 0, 0, IVB_VALID_OPTIONS_UBOX},
-    {"UBOX1", PMC21, UBOX, MSR_UNC_ARB_PERFEVTSEL1, MSR_UNC_ARB_CTR1, 0, 0, IVB_VALID_OPTIONS_UBOX},
-    {"UBOXFIX", PMC22, UBOXFIX, MSR_UNC_PERF_FIXED_CTRL, MSR_UNC_PERF_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR0", PMC12, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR1", PMC13, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR2", PMC14, POWER, 0, MSR_PP1_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR3", PMC15, POWER, 0, MSR_DRAM_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"CBOX0C0", PMC16, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL0, MSR_UNC_CBO_0_CTR0, 0, 0, IVB_VALID_OPTIONS_CBOX},
+    {"CBOX0C1", PMC17, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL1, MSR_UNC_CBO_0_CTR1, 0, 0, IVB_VALID_OPTIONS_CBOX},
+    {"CBOX1C0", PMC18, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL0, MSR_UNC_CBO_1_CTR0, 0, 0, IVB_VALID_OPTIONS_CBOX},
+    {"CBOX1C1", PMC19, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL1, MSR_UNC_CBO_1_CTR1, 0, 0, IVB_VALID_OPTIONS_CBOX},
+    {"CBOX2C0", PMC20, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL0, MSR_UNC_CBO_2_CTR0, 0, 0, IVB_VALID_OPTIONS_CBOX},
+    {"CBOX2C1", PMC21, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL1, MSR_UNC_CBO_2_CTR1, 0, 0, IVB_VALID_OPTIONS_CBOX},
+    {"CBOX3C0", PMC22, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL0, MSR_UNC_CBO_3_CTR0, 0, 0, IVB_VALID_OPTIONS_CBOX},
+    {"CBOX3C1", PMC23, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL1, MSR_UNC_CBO_3_CTR1, 0, 0, IVB_VALID_OPTIONS_CBOX},
+    {"UBOX0", PMC24, UBOX, MSR_UNC_ARB_PERFEVTSEL0, MSR_UNC_ARB_CTR0, 0, 0, IVB_VALID_OPTIONS_UBOX},
+    {"UBOX1", PMC25, UBOX, MSR_UNC_ARB_PERFEVTSEL1, MSR_UNC_ARB_CTR1, 0, 0, IVB_VALID_OPTIONS_UBOX},
+    {"UBOXFIX", PMC26, UBOXFIX, MSR_UNC_PERF_FIXED_CTRL, MSR_UNC_PERF_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
 };
 
 
diff --git a/src/includes/perfmon_ivybridge_events.txt b/src/includes/perfmon_ivybridge_events.txt
index 99a5011..d789667 100644
--- a/src/includes/perfmon_ivybridge_events.txt
+++ b/src/includes/perfmon_ivybridge_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for Intel Ivy Bridge
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
@@ -74,31 +74,32 @@ UMASK_INT_MISC_RECOVERY_CYCLES       0x03
 DEFAULT_OPTIONS_INT_MISC_RECOVERY_COUNT EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
 UMASK_INT_MISC_RECOVERY_COUNT        0x03
 
-EVENT_UOPS_ISSUED                     0x0E  PMC
-UMASK_UOPS_ISSUED_ANY                 0x01
-UMASK_UOPS_ISSUED_FLAGS_MERGE         0x10
-UMASK_UOPS_ISSUED_SLOW_LEA            0x20
-UMASK_UOPS_ISSUED_SINGLE_MUL          0x40
+EVENT_UOPS_ISSUED                0x0E  PMC
+UMASK_UOPS_ISSUED_ANY            0x01
+UMASK_UOPS_ISSUED_FLAGS_MERGE    0x10
+UMASK_UOPS_ISSUED_SLOW_LEA       0x20
+UMASK_UOPS_ISSUED_SINGLE_MUL     0x40
 DEFAULT_OPTIONS_UOPS_ISSUED_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
-UMASK_UOPS_ISSUED_USED_CYCLES         0x01
+UMASK_UOPS_ISSUED_USED_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
-UMASK_UOPS_ISSUED_STALL_CYCLES        0x01
+UMASK_UOPS_ISSUED_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1
-UMASK_UOPS_ISSUED_TOTAL_CYCLES        0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_ANY EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_ANY            0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_FLAGS_MERGE EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_FLAGS_MERGE    0x10
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_SLOW_LEA EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_SLOW_LEA       0x20
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_SINGLE_MUL EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_SINGLE_MUL     0x40
+UMASK_UOPS_ISSUED_TOTAL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_USED_CYCLES    0x01
+UMASK_UOPS_ISSUED_CORE_USED_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_TOTAL_CYCLES   0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC 0x01
+
 
 EVENT_FP_COMP_OPS_EXE                          0x10   PMC
 UMASK_FP_COMP_OPS_EXE_X87                      0x01
@@ -196,22 +197,100 @@ UMASK_CACHE_LOCK_CYCLES_CACHE_LOCK_DURATION          0x02
 DEFAULT_OPTIONS_CACHE_LOCK_CYCLES_CACHE_LOCK_COUNT EVENT_OPTION_EDGE=1
 UMASK_CACHE_LOCK_CYCLES_CACHE_LOCK_COUNT             0x02
 
-EVENT_IDQ                              0x79   PMC
-UMASK_IDQ_EMPTY                        0x02
-UMASK_IDQ_MITE_UOPS                    0x04
-UMASK_IDQ_DSB_UOPS                     0x08
-UMASK_IDQ_MS_DSB_UOPS                  0x10
-UMASK_IDQ_MS_MITE_UOPS                 0x20
-UMASK_IDQ_MS_UOPS                      0x30
+EVENT_IDQ                               0x79   PMC
+UMASK_IDQ_EMPTY                         0x02
+UMASK_IDQ_MITE_UOPS                     0x04
+UMASK_IDQ_DSB_UOPS                      0x08
+UMASK_IDQ_MS_DSB_UOPS                   0x10
+UMASK_IDQ_MS_MITE_UOPS                  0x20
+UMASK_IDQ_MS_UOPS                       0x30
+UMASK_IDQ_DSB_UOPS                      0x18
+UMASK_IDQ_MITE_ALL_UOPS                 0x24
+UMASK_IDQ_ALL_UOPS                      0x3C
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES         EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES                   0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES_1_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MITE_CYCLES_2_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MITE_CYCLES_3_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MITE_CYCLES_4_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES          EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES                    0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES_1_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_DSB_CYCLES_2_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_DSB_CYCLES_3_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_DSB_CYCLES_4_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES       EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES                 0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES_1_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_DSB_CYCLES_2_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_DSB_CYCLES_3_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_DSB_CYCLES_4_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_OCCUR        EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
+UMASK_IDQ_MS_DSB_OCCUR                  0x10
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES      EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES                0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES_1_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_MITE_CYCLES_2_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_MITE_CYCLES_3_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_MITE_CYCLES_4_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_CYCLES           EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES                     0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES_1_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_CYCLES_2_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_CYCLES_3_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_CYCLES_4_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_SWITCHES         EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
+UMASK_IDQ_MS_SWITCHES                   0x30
 DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x1
-UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS      0x18
+UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS       0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_1_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_DSB_CYCLES_2_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_DSB_CYCLES_3_UOPS         0x18
 DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
-UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS        0x18
-DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x1
-UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS     0x24
+UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS      0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_1_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_MITE_CYCLES_2_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_MITE_CYCLES_3_UOPS        0x24
 DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
-UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS       0x24
-UMASK_IDQ_ALL_MITE_ALL_UOPS            0x3C
+UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_ANY_UOPS      0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_1_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_CYCLES_2_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_CYCLES_3_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_CYCLES_4_UOPS        0x3C
 
 EVENT_ICACHE                    0x80   PMC
 UMASK_ICACHE_HITS               0x01
@@ -293,14 +372,24 @@ UMASK_RESOURCE_STALLS_SB              0x08
 UMASK_RESOURCE_STALLS_ROB             0x10
 
 EVENT_CYCLE_ACTIVITY                               0xA3   PMC
-DEFAULT_OPTIONS_CYCLE_ACTIVITY_L2_PENDING          EVENT_OPTION_THRESHOLD=0x01
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L2_PENDING   EVENT_OPTION_THRESHOLD=0x01
 UMASK_CYCLE_ACTIVITY_CYCLES_L2_PENDING             0x01
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_LDM_PENDING  EVENT_OPTION_THRESHOLD=0x02
 UMASK_CYCLE_ACTIVITY_CYCLES_LDM_PENDING            0x02
-DEFAULT_OPTIONS_CYCLE_ACTIVITY_L1D_PENDING         EVENT_OPTION_THRESHOLD=0x08
-UMASK_CYCLE_ACTIVITY_L1D_PENDING                   0x08
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE   EVENT_OPTION_THRESHOLD=0x04
 UMASK_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE             0x04
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L2_PENDING   EVENT_OPTION_THRESHOLD=0x05
+UMASK_CYCLE_ACTIVITY_STALLS_L2_PENDING             0x05
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_LDM_PENDING  EVENT_OPTION_THRESHOLD=0x06
+UMASK_CYCLE_ACTIVITY_STALLS_LDM_PENDING            0x06
+
+EVENT_CYCLE_ACTIVITY_CYCLES_L1D_PENDING                  0xA3   PMC2
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L1D_PENDING        EVENT_OPTION_THRESHOLD=0x08
+UMASK_CYCLE_ACTIVITY_CYCLES_L1D_PENDING                  0x08
+
+EVENT_CYCLE_ACTIVITY_STALLS_L1D_PENDING                  0xA3   PMC2
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L1D_PENDING        EVENT_OPTION_THRESHOLD=0x0C
+UMASK_CYCLE_ACTIVITY_STALLS_L1D_PENDING                  0x0C
 
 EVENT_DSB2MITE_SWITCHES                 0xAB   PMC
 UMASK_DSB2MITE_SWITCHES_COUNT           0x01
@@ -334,6 +423,10 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
 UMASK_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC 0x01
 DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC 0x01
 UMASK_UOPS_EXECUTED_CORE                       0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
 UMASK_UOPS_EXECUTED_CORE_USED_CYCLES           0x02
@@ -349,6 +442,13 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC 0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC 0x02
+
+EVENT_OFFCORE_REQUESTS_BUFFER         0xB2 PMC
+UMASK_OFFCORE_REQUESTS_BUFFER_SQ_FULL 0x01
 
 EVENT_TLB_FLUSH                 0xBD  PMC
 UMASK_TLB_FLUSH_DTLB_THREAD     0x01
@@ -386,6 +486,18 @@ DEFAULT_OPTIONS_UOPS_RETIRED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_
 UMASK_UOPS_RETIRED_CORE_STALL_CYCLES     0x01
 DEFAULT_OPTIONS_UOPS_RETIRED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_RETIRED_CORE_TOTAL_CYCLES     0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC 0x01
 
 EVENT_MACHINE_CLEARS                    0xC3  PMC
 UMASK_MACHINE_CLEARS_CYCLES             0x01
diff --git a/src/includes/perfmon_k10.h b/src/includes/perfmon_k10.h
index bd4e32a..c2effb3 100644
--- a/src/includes/perfmon_k10.h
+++ b/src/includes/perfmon_k10.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Header file of perfmon module for AMD K10
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -90,7 +90,7 @@ int perfmon_setupCounterThread_k10(int thread_id, PerfmonEventSet* eventSet)
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -115,7 +115,7 @@ int perfmon_startCountersThread_k10(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -147,7 +147,7 @@ int perfmon_stopCountersThread_k10(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -182,7 +182,7 @@ int perfmon_readCountersThread_k10(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -209,7 +209,7 @@ int perfmon_finalizeCountersThread_k10(int thread_id, PerfmonEventSet* eventSet)
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
diff --git a/src/includes/perfmon_k10_counters.h b/src/includes/perfmon_k10_counters.h
index b1a794f..71278af 100644
--- a/src/includes/perfmon_k10_counters.h
+++ b/src/includes/perfmon_k10_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description:  AMD K10 performance counter definition. Also used for AMD K8.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_k10_events.txt b/src/includes/perfmon_k10_events.txt
index ab56f1d..c66931b 100644
--- a/src/includes/perfmon_k10_events.txt
+++ b/src/includes/perfmon_k10_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for AMD K10
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_k8.h b/src/includes/perfmon_k8.h
index dd55c32..4c5f8dc 100644
--- a/src/includes/perfmon_k8.h
+++ b/src/includes/perfmon_k8.h
@@ -6,14 +6,14 @@
  *      Description:  Header File of perfmon module for AMD K8 support.
  *                    The setup routines and registers are similar to AMD K10
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_k8_events.txt b/src/includes/perfmon_k8_events.txt
index d71316a..a431b72 100644
--- a/src/includes/perfmon_k8_events.txt
+++ b/src/includes/perfmon_k8_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for AMD K8
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_kabini.h b/src/includes/perfmon_kabini.h
index dec1436..9441b54 100644
--- a/src/includes/perfmon_kabini.h
+++ b/src/includes/perfmon_kabini.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Header file of perfmon module for AMD Family 16
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -158,7 +158,7 @@ int perfmon_setupCounterThread_kabini(int thread_id, PerfmonEventSet* eventSet)
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -205,7 +205,7 @@ int perfmon_startCountersThread_kabini(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -250,7 +250,7 @@ int perfmon_stopCountersThread_kabini(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -299,7 +299,7 @@ int perfmon_readCountersThread_kabini(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -342,7 +342,7 @@ int perfmon_finalizeCountersThread_kabini(int thread_id, PerfmonEventSet* eventS
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
diff --git a/src/includes/perfmon_kabini_counters.h b/src/includes/perfmon_kabini_counters.h
index dd15fbb..1171ac9 100644
--- a/src/includes/perfmon_kabini_counters.h
+++ b/src/includes/perfmon_kabini_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Counter Header File of perfmon module for AMD Family 16
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_kabini_events.txt b/src/includes/perfmon_kabini_events.txt
index bfa15f2..3fc1a2e 100644
--- a/src/includes/perfmon_kabini_events.txt
+++ b/src/includes/perfmon_kabini_events.txt
@@ -4,13 +4,13 @@
 #
 #      Description:  Event list for AMD Kabini
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   saravanan.ekanathan at amd.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_knl.h b/src/includes/perfmon_knl.h
new file mode 100644
index 0000000..0010b08
--- /dev/null
+++ b/src/includes/perfmon_knl.h
@@ -0,0 +1,1299 @@
+/*
+ * =======================================================================================
+ *
+ *      Filename:  perfmon_knl.h
+ *
+ *      Description:  Header file of perfmon module for Intel Xeon Phi (Knights Landing)
+ *
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
+ *
+ *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
+ *      Project:  likwid
+ *
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ *
+ *      This program is free software: you can redistribute it and/or modify it under
+ *      the terms of the GNU General Public License as published by the Free Software
+ *      Foundation, either version 3 of the License, or (at your option) any later
+ *      version.
+ *
+ *      This program is distributed in the hope that it will be useful, but WITHOUT ANY
+ *      WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ *      PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License along with
+ *      this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * =======================================================================================
+ */
+
+#include <perfmon_knl_events.h>
+#include <perfmon_knl_counters.h>
+
+static int perfmon_numCountersKNL = NUM_COUNTERS_KNL;
+static int perfmon_numCoreCountersKNL = NUM_COUNTERS_KNL;
+static int perfmon_numArchEventsKNL = NUM_ARCH_EVENTS_KNL;
+
+
+int perfmon_init_knl(int cpu_id)
+{
+    lock_acquire((int*) &socket_lock[affinity_core2node_lookup[cpu_id]], cpu_id);
+    lock_acquire((int*) &tile_lock[affinity_thread2tile_lookup[cpu_id]], cpu_id);
+    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PEBS_ENABLE, 0x0ULL));
+    return 0;
+}
+
+uint32_t knl_fixed_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
+{
+    uint32_t flags = (1ULL<<(1+(index*4)));
+    if (event->numberOfOptions > 0)
+    {
+        for(int i=0;i<event->numberOfOptions;i++)
+        {
+            switch(event->options[i].type)
+            {
+                case EVENT_OPTION_ANYTHREAD:
+                    flags |= (1ULL<<(2+(index*4)));
+                    break;
+                case EVENT_OPTION_COUNT_KERNEL:
+                    flags |= (1ULL<<(index*4));
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+    return flags;
+}
+
+int knl_pmc_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
+{
+    uint64_t flags = 0x0ULL;
+    uint64_t offcore_flags = 0x0ULL;
+
+
+    flags |= (1ULL<<16)|(1ULL<<22);
+    flags |= (event->umask<<8) + event->eventId;
+    /* For event id 0xB7 the cmask must be written in an extra register */
+    if ((event->cmask != 0x00) && (event->eventId != 0xB7))
+    {
+        flags |= (event->cmask << 24);
+    }
+    /* set custom cfgbits */
+    if ((event->cfgBits != 0x00) && (event->eventId != 0xB7))
+    {
+        flags |= (event->cfgBits << 16);
+    }
+
+    if (event->numberOfOptions > 0)
+    {
+        for(int i=0;i<event->numberOfOptions;i++)
+        {
+            switch(event->options[i].type)
+            {
+                case EVENT_OPTION_EDGE:
+                    flags |= (1ULL<<18);
+                    break;
+                case EVENT_OPTION_ANYTHREAD:
+                    flags |= (1ULL<<21);
+                    break;
+                case EVENT_OPTION_INVERT:
+                    flags |= (1ULL<<23);
+                    break;
+                case EVENT_OPTION_COUNT_KERNEL:
+                    flags |= (1ULL<<17);
+                    break;
+                case EVENT_OPTION_THRESHOLD:
+                    flags |= (event->options[i].value & 0xFFULL)<<24;
+                    break;
+                case EVENT_OPTION_MATCH0:
+                    if (event->eventId == 0xB7)
+                    {
+                        offcore_flags |= (event->options[i].value & 0xFFFFULL);
+                    }
+                    break;
+                case EVENT_OPTION_MATCH1:
+                    if (event->eventId == 0xB7)
+                    {
+                        offcore_flags |= (event->options[i].value & 0x3FFFFFFFULL)<<16;
+                    }
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+
+    // Offcore event with additional configuration register
+    // cfgBits contain offset of "request type" bit
+    // cmask contain offset of "response type" bit
+    if (event->eventId == 0xB7)
+    {
+        uint32_t reg = 0x0;
+        if (event->umask == 0x01)
+        {
+            reg = MSR_OFFCORE_RESP0;
+        }
+        else if (event->umask == 0x02)
+        {
+            reg = MSR_OFFCORE_RESP1;
+        }
+        if (reg)
+        {
+            if ((event->cfgBits != 0xFF) && (event->cmask != 0xFF))
+            {
+                offcore_flags = (1ULL<<event->cfgBits)|(1ULL<<event->cmask);
+            }
+            VERBOSEPRINTREG(cpu_id, reg, LLU_CAST offcore_flags, SETUP_PMC_OFFCORE);
+            CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, reg , offcore_flags));
+        }
+    }
+    if (flags != currentConfig[cpu_id][index])
+    {
+        VERBOSEPRINTREG(cpu_id, counter_map[index].configRegister, LLU_CAST flags, SETUP_PMC)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, counter_map[index].configRegister, flags));
+        currentConfig[cpu_id][index] = flags;
+    }
+    return 0;
+}
+
+int knl_ubox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
+{
+    uint64_t flags = 0x0ULL;
+    uint64_t offcore_flags = 0x0ULL;
+
+
+    flags |= (1ULL<<16)|(1ULL<<22);
+    flags |= (event->umask<<8) + event->eventId;
+    if (event->numberOfOptions > 0)
+    {
+        for(int i=0;i<event->numberOfOptions;i++)
+        {
+            switch(event->options[i].type)
+            {
+                case EVENT_OPTION_EDGE:
+                    flags |= (1ULL<<18);
+                    break;
+                case EVENT_OPTION_ANYTHREAD:
+                    flags |= (1ULL<<21);
+                    break;
+                case EVENT_OPTION_INVERT:
+                    flags |= (1ULL<<23);
+                    break;
+                case EVENT_OPTION_TID:
+                    flags |= (1ULL<<19);
+                    break;
+            }
+        }
+    }
+    if (flags != currentConfig[cpu_id][index])
+    {
+        VERBOSEPRINTREG(cpu_id, counter_map[index].configRegister, LLU_CAST flags, SETUP_UBOX)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, counter_map[index].configRegister, flags));
+        currentConfig[cpu_id][index] = flags;
+    }
+    return 0;
+}
+
+int knl_wbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
+{
+    int j;
+    uint64_t flags = 0x0ULL;
+
+    if (socket_lock[affinity_core2node_lookup[cpu_id]] != cpu_id)
+    {
+        return 0;
+    }
+
+    flags = (1ULL<<22)|(1ULL<<20);
+    flags |= event->eventId;
+    if (event->numberOfOptions > 0)
+    {
+        for(j = 0; j < event->numberOfOptions; j++)
+        {
+            switch (event->options[j].type)
+            {
+                case EVENT_OPTION_EDGE:
+                    flags |= (1ULL<<18);
+                    break;
+                case EVENT_OPTION_INVERT:
+                    flags |= (1ULL<<23);
+                    break;
+                case EVENT_OPTION_THRESHOLD:
+                    flags |= (event->options[j].value & 0x1FULL) << 24;
+                    break;
+                case EVENT_OPTION_OCCUPANCY:
+                    flags |= ((event->options[j].value & 0x3ULL)<<14);
+                    flags |= (1ULL<<7);
+                    break;
+                case EVENT_OPTION_OCCUPANCY_EDGE:
+                    flags |= (1ULL<<31);
+                    flags |= (1ULL<<7);
+                    break;
+                case EVENT_OPTION_OCCUPANCY_INVERT:
+                    flags |= (1ULL<<30);
+                    flags |= (1ULL<<7);
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+
+    if (flags != currentConfig[cpu_id][index])
+    {
+        VERBOSEPRINTREG(cpu_id, counter_map[index].configRegister, flags, SETUP_WBOX);
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, counter_map[index].configRegister, flags));
+        currentConfig[cpu_id][index] = flags;
+    }
+    return 0;
+}
+
+int knl_cbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
+{
+    int j;
+    uint64_t flags = 0x0ULL;
+    uint64_t filter_flags0 = 0x0ULL;
+    uint64_t filter_flags1 = 0x0ULL;
+    uint32_t filter0 = box_map[counter_map[index].type].filterRegister1;
+    uint32_t filter1 = box_map[counter_map[index].type].filterRegister2;
+    int set_state_all = 0;
+    int set_opcode_all = 0;
+    int set_match1_all = 1;
+
+    if (socket_lock[affinity_core2node_lookup[cpu_id]] != cpu_id)
+    {
+        return 0;
+    }
+
+    flags = (1ULL<<22)|(1ULL<<20);
+    flags |= (event->umask<<8) + event->eventId;
+    if (event->eventId == 0x34)
+    {
+        set_state_all = 1;
+    }
+    if (event->eventId == 0x00 && event->cfgBits == 1)
+    {
+        filter_flags0 |= (1ULL<<12);
+    }
+    if (event->eventId == 0x00 || event->eventId == 0x00)
+    {
+        set_opcode_all = 1;
+    }
+    if (event->numberOfOptions > 0)
+    {
+        for(j = 0; j < event->numberOfOptions; j++)
+        {
+            switch (event->options[j].type)
+            {
+                case EVENT_OPTION_EDGE:
+                    flags |= (1ULL<<18);
+                    break;
+                case EVENT_OPTION_INVERT:
+                    flags |= (1ULL<<23);
+                    break;
+                case EVENT_OPTION_THRESHOLD:
+                    flags |= (event->options[j].value & 0xFFULL) << 24;
+                    break;
+                case EVENT_OPTION_OPCODE:
+                    filter_flags1 |= (extractBitField(event->options[j].value,20,0) << 9);
+                    set_opcode_all = 0;
+                    break;
+                case EVENT_OPTION_STATE:
+                    filter_flags0 |= (extractBitField(event->options[j].value,10,0) << 17);
+                    set_state_all = 0;
+                    break;
+                case EVENT_OPTION_TID:
+                    filter_flags0 |= (extractBitField(event->options[j].value,9,0));
+                    flags |= (1ULL<<19);
+                    break;
+                case EVENT_OPTION_MATCH0:
+                    filter_flags1 |= (extractBitField(event->options[j].value,3,0) << 29);
+                    break;
+                case EVENT_OPTION_MATCH1:
+                    filter_flags1 |= (extractBitField(event->options[j].value,2,0) << 4);
+                    set_match1_all = 0;
+                    break;
+                case EVENT_OPTION_NID:
+                    filter_flags1 |= extractBitField(event->options[j].value,2,0);
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+
+    if (filter_flags0 != 0x0ULL)
+    {
+        VERBOSEPRINTREG(cpu_id, filter0, filter_flags0, SETUP_CBOX_FILTER0);
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, filter0, filter_flags0));
+    }
+    else
+    {
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, filter0, 0x0ULL));
+    }
+    if (filter_flags1 != 0x0ULL)
+    {
+        VERBOSEPRINTREG(cpu_id, filter1, filter_flags1, SETUP_CBOX_FILTER1);
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, filter1, filter_flags1));
+    }
+    else
+    {
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, filter1, 0x0ULL));
+    }
+
+    if (set_state_all)
+    {
+        CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, filter0, &filter_flags0));
+        filter_flags0 |= (1ULL<<18)|(1ULL<<19)|(1ULL<<20);
+        VERBOSEPRINTREG(cpu_id, filter0, filter_flags0, SETUP_CBOX_DEF_FILTER_STATE);
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, filter0, filter_flags0));
+    }
+    if (set_match1_all)
+    {
+        CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, filter1, &filter_flags1));
+        filter_flags1 |= (1ULL<<4)|(1ULL<<5);
+        VERBOSEPRINTREG(cpu_id, filter1, filter_flags1, SETUP_CBOX_COUNT_ALL_CACHE_EVENTS);
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, filter1, filter_flags1));
+    }
+    if (set_opcode_all)
+    {
+        CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, filter1, &filter_flags1));
+        filter_flags1 |= (1ULL<<3);
+        VERBOSEPRINTREG(cpu_id, filter1, filter_flags1, SETUP_CBOX_COUNT_ALL_OPCODES);
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, filter1, filter_flags1));
+    }
+    if (flags != currentConfig[cpu_id][index])
+    {
+        VERBOSEPRINTREG(cpu_id, counter_map[index].configRegister, flags, SETUP_CBOX);
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, counter_map[index].configRegister, flags));
+        currentConfig[cpu_id][index] = flags;
+    }
+    return 0;
+}
+
+int knl_mbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
+{
+    int j;
+    uint64_t flags = 0x0ULL;
+    PciDeviceIndex dev = counter_map[index].device;
+
+    if (socket_lock[affinity_core2node_lookup[cpu_id]] != cpu_id)
+    {
+        return 0;
+    }
+    if (!HPMcheck(dev, cpu_id))
+    {
+        return -ENODEV;
+    }
+
+    flags = (1ULL<<20)|(1ULL<<22);
+    flags |= (event->umask<<8) + event->eventId;
+    if (event->numberOfOptions > 0)
+    {
+        for(j = 0; j < event->numberOfOptions; j++)
+        {
+            switch (event->options[j].type)
+            {
+                case EVENT_OPTION_EDGE:
+                    flags |= (1ULL<<18);
+                    break;
+                case EVENT_OPTION_INVERT:
+                    flags |= (1ULL<<23);
+                    break;
+                case EVENT_OPTION_THRESHOLD:
+                    flags |= (event->options[j].value & 0xFFULL) << 24;
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+    if (flags != currentConfig[cpu_id][index])
+    {
+        VERBOSEPRINTPCIREG(cpu_id, dev, counter_map[index].configRegister, flags, SETUP_BOX);
+        CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].configRegister, flags));
+        currentConfig[cpu_id][index] = flags;
+    }
+    return 0;
+}
+
+
+
+
+int perfmon_setupCountersThread_knl(
+        int thread_id,
+        PerfmonEventSet* eventSet)
+{
+    int haveLock = 0;
+    uint64_t flags = 0x0ULL;
+    uint64_t fixed_flags = 0x0ULL;
+    int cpu_id = groupSet->threads[thread_id].processorId;
+
+    if (socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id)
+    {
+        haveLock = 1;
+    }
+
+    if (MEASURE_CORE(eventSet))
+    {
+        VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC);
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
+        VERBOSEPRINTREG(cpu_id, MSR_PERF_FIXED_CTR_CTRL, 0x0ULL, FREEZE_FIXED);
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_FIXED_CTR_CTRL, 0x0ULL));
+    }
+
+    if (haveLock && MEASURE_UNCORE(eventSet))
+    {
+        VERBOSEPRINTREG(cpu_id, MSR_MIC2_U_GLOBAL_CTRL, 0x0ULL, FREEZE_UNCORE);
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_MIC2_U_GLOBAL_CTRL, 0x0ULL));
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_MIC2_U_GLOBAL_CTRL, (1ULL<<63)));
+    }
+
+    for (int i=0;i < eventSet->numberOfEvents;i++)
+    {
+        RegisterType type = eventSet->events[i].type;
+        if (!TESTTYPE(eventSet, type))
+        {
+            continue;
+        }
+        flags = 0x0ULL;
+        RegisterIndex index = eventSet->events[i].index;
+        PerfmonEvent *event = &(eventSet->events[i].event);
+        uint64_t reg = counter_map[index].configRegister;
+        PciDeviceIndex dev = counter_map[index].device;
+        eventSet->events[i].threadCounter[thread_id].init = TRUE;
+        switch (type)
+        {
+            case PMC:
+                knl_pmc_setup(cpu_id, index, event);
+                break;
+
+            case FIXED:
+                fixed_flags |= knl_fixed_setup(cpu_id, index, event);
+                break;
+
+            case POWER:
+                break;
+
+            case UBOX:
+                knl_ubox_setup(cpu_id, index, event);
+                break;
+
+            case UBOXFIX:
+                if (haveLock)
+                {
+                    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, reg, (1ULL<<22)|(1ULL<<20)));
+                    VERBOSEPRINTREG(cpu_id, reg, (1ULL<<22)|(1ULL<<20), SETUP_UBOXFIX);
+                }
+                break;
+
+            case CBOX0:
+            case CBOX1:
+            case CBOX2:
+            case CBOX3:
+            case CBOX4:
+            case CBOX5:
+            case CBOX6:
+            case CBOX7:
+            case CBOX8:
+            case CBOX9:
+            case CBOX10:
+            case CBOX11:
+            case CBOX12:
+            case CBOX13:
+            case CBOX14:
+            case CBOX15:
+            case CBOX16:
+            case CBOX17:
+            case CBOX18:
+            case CBOX19:
+            case CBOX20:
+            case CBOX21:
+            case CBOX22:
+            case CBOX23:
+            case CBOX24:
+            case CBOX25:
+            case CBOX26:
+            case CBOX27:
+            case CBOX28:
+            case CBOX29:
+            case CBOX30:
+            case CBOX31:
+            case CBOX32:
+            case CBOX33:
+            case CBOX34:
+            case CBOX35:
+            case CBOX36:
+            case CBOX37:
+                knl_cbox_setup(cpu_id, index, event);
+                break;
+
+            case WBOX:
+                knl_wbox_setup(cpu_id, index, event);
+                break;
+
+            case MBOX0:
+            case MBOX1:
+            case MBOX2:
+            case MBOX3:
+            case MBOX4:
+            case MBOX5:
+            case MBOX6:
+            case MBOX7:
+            case IBOX0:
+            case EUBOX0:
+            case EUBOX1:
+            case EUBOX2:
+            case EUBOX3:
+            case EUBOX4:
+            case EUBOX5:
+            case EUBOX6:
+            case EUBOX7:
+            case EDBOX0:
+            case EDBOX1:
+            case EDBOX2:
+            case EDBOX3:
+            case EDBOX4:
+            case EDBOX5:
+            case EDBOX6:
+            case EDBOX7:
+            case PBOX:
+                knl_mbox_setup(cpu_id, index, event);
+                break;
+
+            case MBOX0FIX:
+            case MBOX1FIX:
+            case MBOX2FIX:
+            case MBOX3FIX:
+            case MBOX4FIX:
+            case MBOX5FIX:
+            case MBOX6FIX:
+            case MBOX7FIX:
+            case EUBOX0FIX:
+            case EUBOX1FIX:
+            case EUBOX2FIX:
+            case EUBOX3FIX:
+            case EUBOX4FIX:
+            case EUBOX5FIX:
+            case EUBOX6FIX:
+            case EUBOX7FIX:
+            case EDBOX0FIX:
+            case EDBOX1FIX:
+            case EDBOX2FIX:
+            case EDBOX3FIX:
+            case EDBOX4FIX:
+            case EDBOX5FIX:
+            case EDBOX6FIX:
+            case EDBOX7FIX:
+                if (haveLock)
+                {
+                    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, reg, 0x1ULL));
+                    VERBOSEPRINTREG(cpu_id, reg, 0x1ULL, SETUP_MBOXFIX);
+                }
+                break;
+
+            default:
+                break;
+        }
+    }
+    for (int i=UNCORE;i<NUM_UNITS;i++)
+    {
+        if (haveLock && TESTTYPE(eventSet, i) && box_map[i].ctrlRegister != 0x0)
+        {
+            VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL, CLEAR_UNCORE_BOX_CTRL);
+            HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
+        }
+    }
+    if (fixed_flags > 0x0)
+    {
+        VERBOSEPRINTREG(cpu_id, MSR_PERF_FIXED_CTR_CTRL, LLU_CAST fixed_flags, SETUP_FIXED)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_FIXED_CTR_CTRL, fixed_flags));
+    }
+    return 0;
+}
+
+#define KNL_FREEZE_UNCORE \
+    if (haveLock && MEASURE_UNCORE(eventSet)) \
+    { \
+        VERBOSEPRINTREG(cpu_id, MSR_UNC_V3_U_PMON_GLOBAL_CTL, LLU_CAST (1ULL<<63), FREEZE_UNCORE); \
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_V3_U_PMON_GLOBAL_CTL, (1ULL<<63))); \
+    }
+
+#define KNL_UNFREEZE_UNCORE \
+    if (haveLock && MEASURE_UNCORE(eventSet)) \
+    { \
+        VERBOSEPRINTREG(cpu_id, MSR_UNC_V3_U_PMON_GLOBAL_CTL, LLU_CAST (1ULL<<61), UNFREEZE_UNCORE); \
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_V3_U_PMON_GLOBAL_CTL, (1ULL<<61))); \
+    }
+
+#define KNL_UNFREEZE_UNCORE_AND_RESET_CTR \
+    if (haveLock && MEASURE_UNCORE(eventSet)) \
+    { \
+        for (int i=0;i < eventSet->numberOfEvents;i++) \
+        { \
+            RegisterIndex index = eventSet->events[i].index; \
+            RegisterType type = counter_map[index].type; \
+            if (type < UNCORE) \
+            { \
+                continue; \
+            } \
+            PciDeviceIndex dev = counter_map[index].device; \
+            if (HPMcheck(dev, cpu_id) && TESTTYPE(eventSet, type)) \
+            { \
+                VERBOSEPRINTPCIREG(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL, CLEAR_CTR_MANUAL); \
+                CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL)); \
+                if (counter_map[index].counterRegister2 != 0x0) \
+                { \
+                    VERBOSEPRINTPCIREG(cpu_id, dev, counter_map[index].counterRegister2, 0x0ULL, CLEAR_CTR_MANUAL); \
+                    CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister2, 0x0ULL)); \
+                } \
+            } \
+        } \
+        VERBOSEPRINTREG(cpu_id, MSR_MIC2_U_GLOBAL_CTRL, LLU_CAST (1ULL<<61), UNFREEZE_UNCORE); \
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_MIC2_U_GLOBAL_CTRL, (1ULL<<61))); \
+    }
+
+
+int perfmon_startCountersThread_knl(int thread_id, PerfmonEventSet* eventSet)
+{
+    int haveLock = 0;
+    uint64_t tmp;
+    uint64_t flags = 0x0ULL;
+    int cpu_id = groupSet->threads[thread_id].processorId;
+
+    if (socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id)
+    {
+        haveLock = 1;
+    }
+
+    for (int i=0;i < eventSet->numberOfEvents;i++)
+    {
+        if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
+        {
+            RegisterType type = eventSet->events[i].type;
+            if (!TESTTYPE(eventSet, type))
+            {
+                continue;
+            }
+            tmp = 0x0ULL;
+            RegisterIndex index = eventSet->events[i].index;
+            PciDeviceIndex dev = counter_map[index].device;
+            uint64_t counter1 = counter_map[index].counterRegister;
+            uint64_t counter2 = counter_map[index].counterRegister2;
+            eventSet->events[i].threadCounter[thread_id].startData = 0;
+            eventSet->events[i].threadCounter[thread_id].counterData = 0;
+            switch (type)
+            {
+                case PMC:
+                    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, counter1, 0x0ULL));
+                    flags |= (1<<(index-cpuid_info.perf_num_fixed_ctr));  /* enable counter */
+                    break;
+
+                case FIXED:
+                    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, counter1, 0x0ULL));
+                    flags |= (1ULL<<(index+32));  /* enable fixed counter */
+                    break;
+
+                case POWER:
+                    if(haveLock)
+                    {
+                        CHECK_POWER_READ_ERROR(power_read(cpu_id, counter1, (uint32_t*)&tmp));
+                        eventSet->events[i].threadCounter[thread_id].startData = field64(tmp, 0, box_map[type].regWidth);
+                    }
+                    break;
+                case MBOX0FIX:
+                case MBOX1FIX:
+                case MBOX2FIX:
+                case MBOX3FIX:
+                case MBOX4FIX:
+                case MBOX5FIX:
+                case MBOX6FIX:
+                case MBOX7FIX:
+                case EUBOX0FIX:
+                case EUBOX1FIX:
+                case EUBOX2FIX:
+                case EUBOX3FIX:
+                case EUBOX4FIX:
+                case EUBOX5FIX:
+                case EUBOX6FIX:
+                case EUBOX7FIX:
+                case EDBOX0FIX:
+                case EDBOX1FIX:
+                case EDBOX2FIX:
+                case EDBOX3FIX:
+                case EDBOX4FIX:
+                case EDBOX5FIX:
+                case EDBOX6FIX:
+                case EDBOX7FIX:
+                    if (haveLock)
+                    {
+                        CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev, counter1, 0x0ULL));
+                        CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev, counter2, 0x0ULL));
+                    }
+                    break;
+
+                default:
+                    break;
+            }
+            eventSet->events[i].threadCounter[thread_id].counterData = eventSet->events[i].threadCounter[thread_id].startData;
+        }
+    }
+
+    KNL_UNFREEZE_UNCORE_AND_RESET_CTR;
+
+    if (MEASURE_CORE(eventSet))
+    {
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, flags));
+        VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, UNFREEZE_PMC_OR_FIXED)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, flags));
+    }
+
+    return 0;
+}
+
+int knl_uncore_read(int cpu_id, RegisterIndex index, PerfmonEvent *event,
+                     uint64_t* cur_result, int* overflows, int flags,
+                     int global_offset, int box_offset)
+{
+    uint64_t result = 0x0ULL;
+    uint64_t tmp = 0x0ULL;
+    RegisterType type = counter_map[index].type;
+    PciDeviceIndex dev = counter_map[index].device;
+    uint64_t counter1 = counter_map[index].counterRegister;
+    uint64_t counter2 = counter_map[index].counterRegister2;
+    if (socket_lock[affinity_core2node_lookup[cpu_id]] != cpu_id)
+    {
+        return 0;
+    }
+
+    CHECK_PCI_READ_ERROR(HPMread(cpu_id, dev, counter1, &result));
+    VERBOSEPRINTPCIREG(cpu_id, dev, counter1, LLU_CAST result, READ_REG_1);
+    if (flags & FREEZE_FLAG_CLEAR_CTR)
+    {
+        VERBOSEPRINTPCIREG(cpu_id, dev, counter1, LLU_CAST 0x0U, CLEAR_PCI_REG_1);
+        CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev, counter1, 0x0U));
+    }
+    if (counter2 != 0x0)
+    {
+        result <<= 32;
+        CHECK_PCI_READ_ERROR(HPMread(cpu_id, dev, counter2, &tmp));
+        VERBOSEPRINTPCIREG(cpu_id, dev, counter2, LLU_CAST tmp, READ_REG_2);
+        result += tmp;
+        if (flags & FREEZE_FLAG_CLEAR_CTR)
+        {
+            VERBOSEPRINTPCIREG(cpu_id, dev, counter2, LLU_CAST 0x0U, CLEAR_PCI_REG_2);
+            CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev, counter2, 0x0U));
+        }
+    }
+    result = field64(result, 0, box_map[type].regWidth);
+    if (result < *cur_result)
+    {
+        uint64_t ovf_values = 0x0ULL;
+        int global_offset = box_map[type].ovflOffset;
+        int test_local = 0;
+        if (global_offset != -1)
+        {
+            CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV,
+                                           MSR_PERF_GLOBAL_STATUS,
+                                           &ovf_values));
+            VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_STATUS, LLU_CAST ovf_values, READ_GLOBAL_OVFL);
+            if (ovf_values & (1<<global_offset))
+            {
+                VERBOSEPRINTREG(cpu_id, MSR_MIC2_U_GLOBAL_STATUS, LLU_CAST (1<<global_offset), CLEAR_GLOBAL_OVFL);
+                CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV,
+                                                 MSR_MIC2_U_GLOBAL_STATUS,
+                                                 (1<<global_offset)));
+                test_local = 1;
+            }
+        }
+        else
+        {
+            test_local = 1;
+        }
+
+        if (test_local && box_map[type].statusRegister != 0x0)
+        {
+            ovf_values = 0x0ULL;
+            CHECK_PCI_READ_ERROR(HPMread(cpu_id, dev,
+                                              box_map[type].statusRegister,
+                                              &ovf_values));
+            VERBOSEPRINTPCIREG(cpu_id, dev, box_map[type].statusRegister, LLU_CAST ovf_values, READ_BOX_OVFL);
+            if (ovf_values & (1<<box_offset))
+            {
+                (*overflows)++;
+                VERBOSEPRINTPCIREG(cpu_id, dev, box_map[type].statusRegister, LLU_CAST (1<<box_offset), RESET_BOX_OVFL);
+                CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev,
+                                                    box_map[type].statusRegister,
+                                                    (1<<box_offset)));
+            }
+        }
+        else if ((ovf_values & (1<<global_offset)) && test_local)
+        {
+            (*overflows)++;
+        }
+    }
+    *cur_result = result;
+    return 0;
+}
+
+#define KNL_CHECK_CORE_OVERFLOW(offset) \
+    if (counter_result < eventSet->events[i].threadCounter[thread_id].counterData) \
+    { \
+        uint64_t ovf_values = 0x0ULL; \
+        CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_STATUS, &ovf_values)); \
+        if (ovf_values & (1ULL<<offset)) \
+        { \
+            eventSet->events[i].threadCounter[thread_id].overflows++; \
+        } \
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_STATUS, (1ULL<<offset))); \
+    }
+
+int perfmon_stopCountersThread_knl(int thread_id, PerfmonEventSet* eventSet)
+{
+    uint64_t counter_result = 0x0ULL;
+    int haveLock = 0;
+    int cpu_id = groupSet->threads[thread_id].processorId;
+
+    if (socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id)
+    {
+        haveLock = 1;
+    }
+
+    if (MEASURE_CORE(eventSet))
+    {
+        VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_OR_FIXED)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
+    }
+
+    KNL_FREEZE_UNCORE;
+
+    for (int i=0;i < eventSet->numberOfEvents;i++)
+    {
+        if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
+        {
+            RegisterType type = eventSet->events[i].type;
+            if (!TESTTYPE(eventSet, type))
+            {
+                continue;
+            }
+            counter_result = 0x0ULL;
+            RegisterIndex index = eventSet->events[i].index;
+            PerfmonEvent *event = &(eventSet->events[i].event);
+            PciDeviceIndex dev = counter_map[index].device;
+            uint64_t counter1 = counter_map[index].counterRegister;
+            uint64_t counter2 = counter_map[index].counterRegister2;
+            uint64_t* current = &(eventSet->events[i].threadCounter[thread_id].counterData);
+            int* overflows = &(eventSet->events[i].threadCounter[thread_id].overflows);
+            int ovf_offset = box_map[type].ovflOffset;
+            switch (type)
+            {
+                case PMC:
+                    CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, counter1, &counter_result));
+                    KNL_CHECK_CORE_OVERFLOW(index-cpuid_info.perf_num_fixed_ctr);
+                    VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, READ_PMC);
+                    break;
+                case FIXED:
+                    CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, counter1, &counter_result));
+                    KNL_CHECK_CORE_OVERFLOW(index+32);
+                    VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, READ_FIXED);
+                    break;
+
+                case POWER:
+                    if(haveLock)
+                    {
+                        CHECK_POWER_READ_ERROR(power_read(cpu_id, counter1, (uint32_t*)&counter_result));
+                        if (counter_result < eventSet->events[i].threadCounter[thread_id].counterData)
+                        {
+                            eventSet->events[i].threadCounter[thread_id].overflows++;
+                        }
+                    }
+                    break;
+
+                case THERMAL:
+                    CHECK_TEMP_READ_ERROR(thermal_read(cpu_id, (uint32_t*)&counter_result));
+                    break;
+
+                case CBOX0:
+                case CBOX1:
+                case CBOX2:
+                case CBOX3:
+                case CBOX4:
+                case CBOX5:
+                case CBOX6:
+                case CBOX7:
+                case CBOX8:
+                case CBOX9:
+                case CBOX10:
+                case CBOX11:
+                case CBOX12:
+                case CBOX13:
+                case CBOX14:
+                case CBOX15:
+                case CBOX16:
+                case CBOX17:
+                case CBOX18:
+                case CBOX19:
+                case CBOX20:
+                case CBOX21:
+                case CBOX22:
+                case CBOX23:
+                case CBOX24:
+                case CBOX25:
+                case CBOX26:
+                case CBOX27:
+                case CBOX28:
+                case CBOX29:
+                case CBOX30:
+                case CBOX31:
+                case CBOX32:
+                case CBOX33:
+                case CBOX34:
+                case CBOX35:
+                case CBOX36:
+                case CBOX37:
+                case MBOX0:
+                case MBOX1:
+                case MBOX2:
+                case MBOX3:
+                case MBOX4:
+                case MBOX5:
+                case MBOX6:
+                case MBOX7:
+                case IBOX0:
+                case EUBOX0:
+                case EUBOX1:
+                case EUBOX2:
+                case EUBOX3:
+                case EUBOX4:
+                case EUBOX5:
+                case EUBOX6:
+                case EUBOX7:
+                case EDBOX0:
+                case EDBOX1:
+                case EDBOX2:
+                case EDBOX3:
+                case EDBOX4:
+                case EDBOX5:
+                case EDBOX6:
+                case EDBOX7:
+                case WBOX:
+                case UBOX:
+                case PBOX:
+                    knl_uncore_read(cpu_id, index, event, &counter_result, overflows,
+                                    FREEZE_FLAG_CLEAR_CTR, ovf_offset, getCounterTypeOffset(index));
+                    break;
+
+                case MBOX0FIX:
+                case MBOX1FIX:
+                case MBOX2FIX:
+                case MBOX3FIX:
+                case MBOX4FIX:
+                case MBOX5FIX:
+                case MBOX6FIX:
+                case MBOX7FIX:
+                case EUBOX0FIX:
+                case EUBOX1FIX:
+                case EUBOX2FIX:
+                case EUBOX3FIX:
+                case EUBOX4FIX:
+                case EUBOX5FIX:
+                case EUBOX6FIX:
+                case EUBOX7FIX:
+                case EDBOX0FIX:
+                case EDBOX1FIX:
+                case EDBOX2FIX:
+                case EDBOX3FIX:
+                case EDBOX4FIX:
+                case EDBOX5FIX:
+                case EDBOX6FIX:
+                case EDBOX7FIX:
+                    if (haveLock)
+                    {
+                        uint64_t tmp = 0x0ULL;
+                        CHECK_MSR_READ_ERROR(HPMread(cpu_id, dev, counter1, &counter_result));
+                        VERBOSEPRINTPCIREG(cpu_id, dev, counter1, LLU_CAST counter_result, READ_FIXED_BOX_1);
+                        CHECK_MSR_READ_ERROR(HPMread(cpu_id, dev, counter2, &tmp));
+                        VERBOSEPRINTPCIREG(cpu_id, dev, counter2, LLU_CAST tmp , READ_FIXED_BOX_1);
+                        counter_result = (counter_result<<32)|tmp;
+                        counter_result = field64(counter_result, 0, box_map[type].regWidth);
+                        if (counter_result < eventSet->events[i].threadCounter[thread_id].counterData)
+                        {
+                            eventSet->events[i].threadCounter[thread_id].overflows++;
+                        }
+                    }
+                    break;
+
+                default:
+                    break;
+            }
+            eventSet->events[i].threadCounter[thread_id].counterData = field64(counter_result, 0, box_map[type].regWidth);
+            eventSet->events[i].threadCounter[thread_id].init = FALSE;
+        }
+    }
+    return 0;
+}
+
+int perfmon_readCountersThread_knl(int thread_id, PerfmonEventSet* eventSet)
+{
+    uint64_t counter_result = 0x0ULL;
+    uint64_t pmc_flags = 0x0ULL;
+    int haveLock = 0;
+    int cpu_id = groupSet->threads[thread_id].processorId;
+
+    if (socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id)
+    {
+        haveLock = 1;
+    }
+
+    if (MEASURE_CORE(eventSet))
+    {
+        CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, &pmc_flags));
+        VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_OR_FIXED)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
+    }
+    KNL_FREEZE_UNCORE;
+
+    for (int i=0;i < eventSet->numberOfEvents;i++)
+    {
+        if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
+        {
+            RegisterType type = eventSet->events[i].type;
+            if (!TESTTYPE(eventSet, type))
+            {
+                continue;
+            }
+            counter_result = 0x0ULL;
+            RegisterIndex index = eventSet->events[i].index;
+            PerfmonEvent *event = &(eventSet->events[i].event);
+            PciDeviceIndex dev = counter_map[index].device;
+            uint64_t counter1 = counter_map[index].counterRegister;
+            uint64_t* current = &(eventSet->events[i].threadCounter[thread_id].counterData);
+            int* overflows = &(eventSet->events[i].threadCounter[thread_id].overflows);
+            int ovf_offset = box_map[type].ovflOffset;
+            switch (type)
+            {
+                case PMC:
+                    CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, counter1, &counter_result));
+                    KNL_CHECK_CORE_OVERFLOW(index-cpuid_info.perf_num_fixed_ctr);
+                    VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, READ_PMC);
+                    break;
+                case FIXED:
+                    CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, counter1, &counter_result));
+                    KNL_CHECK_CORE_OVERFLOW(index+32);
+                    VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, READ_FIXED);
+                    break;
+
+                case POWER:
+                    if(haveLock)
+                    {
+                        CHECK_POWER_READ_ERROR(power_read(cpu_id, counter1, (uint32_t*)&counter_result));
+                        if (counter_result < eventSet->events[i].threadCounter[thread_id].counterData)
+                        {
+                            eventSet->events[i].threadCounter[thread_id].overflows++;
+                        }
+                    }
+                    break;
+
+                case THERMAL:
+                    CHECK_TEMP_READ_ERROR(thermal_read(cpu_id, (uint32_t*)&counter_result));
+                    break;
+
+                case CBOX0:
+                case CBOX1:
+                case CBOX2:
+                case CBOX3:
+                case CBOX4:
+                case CBOX5:
+                case CBOX6:
+                case CBOX7:
+                case CBOX8:
+                case CBOX9:
+                case CBOX10:
+                case CBOX11:
+                case CBOX12:
+                case CBOX13:
+                case CBOX14:
+                case CBOX15:
+                case CBOX16:
+                case CBOX17:
+                case CBOX18:
+                case CBOX19:
+                case CBOX20:
+                case CBOX21:
+                case CBOX22:
+                case CBOX23:
+                case CBOX24:
+                case CBOX25:
+                case CBOX26:
+                case CBOX27:
+                case CBOX28:
+                case CBOX29:
+                case CBOX30:
+                case CBOX31:
+                case CBOX32:
+                case CBOX33:
+                case CBOX34:
+                case CBOX35:
+                case CBOX36:
+                case CBOX37:
+                case MBOX0:
+                case MBOX1:
+                case MBOX2:
+                case MBOX3:
+                case MBOX4:
+                case MBOX5:
+                case MBOX6:
+                case MBOX7:
+                case IBOX0:
+                case EUBOX0:
+                case EUBOX1:
+                case EUBOX2:
+                case EUBOX3:
+                case EUBOX4:
+                case EUBOX5:
+                case EUBOX6:
+                case EUBOX7:
+                case EDBOX0:
+                case EDBOX1:
+                case EDBOX2:
+                case EDBOX3:
+                case EDBOX4:
+                case EDBOX5:
+                case EDBOX6:
+                case EDBOX7:
+                case WBOX:
+                case UBOX:
+                case PBOX:
+                    knl_uncore_read(cpu_id, index, event, &counter_result, overflows,
+                                    FREEZE_FLAG_ONLYFREEZE, ovf_offset, getCounterTypeOffset(index));
+                    break;
+
+                case MBOX0FIX:
+                case MBOX1FIX:
+                case MBOX2FIX:
+                case MBOX3FIX:
+                case MBOX4FIX:
+                case MBOX5FIX:
+                case MBOX6FIX:
+                case MBOX7FIX:
+                case EUBOX0FIX:
+                case EUBOX1FIX:
+                case EUBOX2FIX:
+                case EUBOX3FIX:
+                case EUBOX4FIX:
+                case EUBOX5FIX:
+                case EUBOX6FIX:
+                case EUBOX7FIX:
+                case EDBOX0FIX:
+                case EDBOX1FIX:
+                case EDBOX2FIX:
+                case EDBOX3FIX:
+                case EDBOX4FIX:
+                case EDBOX5FIX:
+                case EDBOX6FIX:
+                case EDBOX7FIX:
+                    if (haveLock)
+                    {
+                        CHECK_MSR_READ_ERROR(HPMread(cpu_id, dev, counter1, &counter_result));
+                        if (counter_result < eventSet->events[i].threadCounter[thread_id].counterData)
+                        {
+                            eventSet->events[i].threadCounter[thread_id].overflows++;
+                        }
+                    }
+                    break;
+
+                default:
+                    break;
+            }
+            eventSet->events[i].threadCounter[thread_id].counterData = field64(counter_result, 0, box_map[type].regWidth);
+        }
+    }
+    KNL_UNFREEZE_UNCORE;
+    if (MEASURE_CORE(eventSet))
+    {
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, pmc_flags));
+    }
+    return 0;
+}
+
+
+int perfmon_finalizeCountersThread_knl(int thread_id, PerfmonEventSet* eventSet)
+{
+    int haveLock = 0;
+    int haveTileLock = 1;
+    int cpu_id = groupSet->threads[thread_id].processorId;
+    uint64_t ovf_values_core = (1ULL<<63)|(1ULL<<62);
+
+    if (socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id)
+    {
+        haveLock = 1;
+    }
+    /*if (tile_lock[affinity_thread2tile_lookup[cpu_id]] == cpu_id)
+    {
+        haveTileLock = 1;
+    }*/
+
+    for (int i=0;i < eventSet->numberOfEvents;i++)
+    {
+        RegisterType type = eventSet->events[i].type;
+        if (!TESTTYPE(eventSet, type))
+        {
+            continue;
+        }
+        RegisterIndex index = eventSet->events[i].index;
+        PerfmonEvent *event = &(eventSet->events[i].event);
+        uint64_t reg = counter_map[index].configRegister;
+        PciDeviceIndex dev = counter_map[index].device;
+        switch (type)
+        {
+            case PMC:
+                ovf_values_core |= (1ULL<<(index-cpuid_info.perf_num_fixed_ctr));
+                if ((haveTileLock) && (event->eventId == 0xB7))
+                {
+                    if (event->umask == 0x1)
+                    {
+                        VERBOSEPRINTREG(cpu_id, MSR_OFFCORE_RESP0, 0x0ULL, CLEAR_OFFCORE_RESP0);
+                        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_OFFCORE_RESP0, 0x0ULL));
+                    }
+                    else if (event->umask == 0x2)
+                    {
+                        VERBOSEPRINTREG(cpu_id, MSR_OFFCORE_RESP1, 0x0ULL, CLEAR_OFFCORE_RESP1);
+                        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_OFFCORE_RESP1, 0x0ULL));
+                    }
+                }
+                break;
+            case FIXED:
+                ovf_values_core |= (1ULL<<(index+32));
+                break;
+            default:
+                break;
+        }
+        if ((reg) && (((type == PMC)||(type == FIXED))||((type >= UNCORE) && (haveLock))))
+        {
+            VERBOSEPRINTPCIREG(cpu_id, dev, reg, 0x0ULL, CLEAR_CTL);
+            CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, reg, 0x0ULL));
+        }
+        eventSet->events[i].threadCounter[thread_id].init = FALSE;
+    }
+
+    if (haveLock && MEASURE_UNCORE(eventSet))
+    {
+        uint64_t ovf_values_uncore = 0x0ULL;
+        CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV,MSR_MIC2_U_GLOBAL_STATUS, &ovf_values_uncore));
+        VERBOSEPRINTREG(cpu_id, MSR_MIC2_U_GLOBAL_STATUS, LLU_CAST ovf_values_uncore, CLEAR_UNCORE_OVF)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_MIC2_U_GLOBAL_STATUS, ovf_values_uncore));
+        VERBOSEPRINTREG(cpu_id, MSR_MIC2_U_GLOBAL_CTRL, LLU_CAST (1ULL<<59), CLEAR_UNCORE_CTRL)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_MIC2_U_GLOBAL_CTRL, (1ULL<<59)));
+    }
+
+    if (MEASURE_CORE(eventSet))
+    {
+        VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST 0x0ULL, CLEAR_GLOBAL_CTRL)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
+        VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, LLU_CAST ovf_values_core, CLEAR_GLOBAL_OVF)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core));
+    }
+    return 0;
+}
diff --git a/src/includes/perfmon_knl_counters.h b/src/includes/perfmon_knl_counters.h
new file mode 100644
index 0000000..9dc5b93
--- /dev/null
+++ b/src/includes/perfmon_knl_counters.h
@@ -0,0 +1,475 @@
+/*
+ * =======================================================================================
+ *
+ *      Filename:  perfmon_knl_counters.h
+ *
+ *      Description: Counter header file of perfmon module for Intel Atom (Silvermont)
+ *
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
+ *
+ *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
+ *      Project:  likwid
+ *
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ *
+ *      This program is free software: you can redistribute it and/or modify it under
+ *      the terms of the GNU General Public License as published by the Free Software
+ *      Foundation, either version 3 of the License, or (at your option) any later
+ *      version.
+ *
+ *      This program is distributed in the hope that it will be useful, but WITHOUT ANY
+ *      WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ *      PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License along with
+ *      this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * =======================================================================================
+ */
+#include <registers.h>
+
+#define NUM_COUNTERS_CORE_KNL 6
+#define NUM_COUNTERS_UNCORE_KNL 290
+#define NUM_COUNTERS_KNL 294
+
+#define KNL_VALID_OPTIONS_FIXED EVENT_OPTION_COUNT_KERNEL_MASK|EVENT_OPTION_ANYTHREAD_MASK
+#define KNL_VALID_OPTIONS_PMC EVENT_OPTION_EDGE_MASK|EVENT_OPTION_COUNT_KERNEL_MASK|EVENT_OPTION_INVERT_MASK|EVENT_OPTION_ANYTHREAD_MASK|EVENT_OPTION_THRESHOLD_MASK
+#define KNL_VALID_OPTIONS_UBOX EVENT_OPTION_EDGE_MASK|EVENT_OPTION_INVERT_MASK|EVENT_OPTION_THRESHOLD_MASK|EVENT_OPTION_TID_MASK
+#define KNL_VALID_OPTIONS_CBOX EVENT_OPTION_EDGE_MASK|EVENT_OPTION_INVERT_MASK|EVENT_OPTION_THRESHOLD_MASK|EVENT_OPTION_TID_MASK
+#define KNL_VALID_OPTIONS_WBOX EVENT_OPTION_EDGE_MASK|EVENT_OPTION_INVERT_MASK|EVENT_OPTION_THRESHOLD_MASK|\
+            EVENT_OPTION_OCCUPANCY_MASK|EVENT_OPTION_OCCUPANCY_FILTER_MASK|EVENT_OPTION_OCCUPANCY_EDGE_MASK|\
+            EVENT_OPTION_OCCUPANCY_INVERT_MASK
+#define KNL_VALID_OPTIONS_MBOX EVENT_OPTION_EDGE_MASK|EVENT_OPTION_INVERT_MASK|EVENT_OPTION_THRESHOLD_MASK
+#define KNL_VALID_OPTIONS_EBOX EVENT_OPTION_EDGE_MASK|EVENT_OPTION_INVERT_MASK|EVENT_OPTION_THRESHOLD_MASK
+#define KNL_VALID_OPTIONS_PBOX EVENT_OPTION_EDGE_MASK|EVENT_OPTION_INVERT_MASK|EVENT_OPTION_THRESHOLD_MASK
+#define KNL_VALID_OPTIONS_IBOX EVENT_OPTION_EDGE_MASK|EVENT_OPTION_INVERT_MASK|EVENT_OPTION_THRESHOLD_MASK
+
+static RegisterMap knl_counter_map[NUM_COUNTERS_KNL] = {
+    /* Fixed Counters: instructions retired, cycles unhalted core */
+    {"FIXC0", PMC0, FIXED, MSR_PERF_FIXED_CTR_CTRL, MSR_PERF_FIXED_CTR0, 0, 0, KNL_VALID_OPTIONS_FIXED},
+    {"FIXC1", PMC1, FIXED, MSR_PERF_FIXED_CTR_CTRL, MSR_PERF_FIXED_CTR1, 0, 0, KNL_VALID_OPTIONS_FIXED},
+    {"FIXC2", PMC2, FIXED, MSR_PERF_FIXED_CTR_CTRL, MSR_PERF_FIXED_CTR2, 0, 0, KNL_VALID_OPTIONS_FIXED},
+    /* PMC Counters: 4 48bit wide */
+    {"PMC0", PMC3, PMC, MSR_MIC2_PERFEVTSEL0, MSR_MIC2_PMC0, MSR_OFFCORE_RESP0, 0, KNL_VALID_OPTIONS_PMC},
+    {"PMC1", PMC4, PMC, MSR_MIC2_PERFEVTSEL1, MSR_MIC2_PMC1, MSR_OFFCORE_RESP1, 0, KNL_VALID_OPTIONS_PMC},
+    /* Temperature Sensor*/
+    {"TMP0", PMC5, THERMAL, 0, IA32_THERM_STATUS, 0, 0},
+    /* RAPL counters */
+    {"PWR0", PMC6, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0},
+    {"PWR1", PMC7, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0},
+    {"PWR3", PMC8, POWER, 0, MSR_DRAM_ENERGY_STATUS, 0, 0},
+    {"UBOX0", PMC9, UBOX, MSR_MIC2_U_CTRL0, MSR_MIC2_U_CTR0, 0, 0, KNL_VALID_OPTIONS_UBOX},
+    {"UBOX1", PMC10, UBOX, MSR_MIC2_U_CTRL1, MSR_MIC2_U_CTR1, 0, 0, KNL_VALID_OPTIONS_UBOX},
+    {"UBOXFIX", PMC11, UBOXFIX, MSR_MIC2_U_FIXED_CTRL, MSR_MIC2_U_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"CBOX0C0", PMC12, CBOX0, MSR_MIC2_C0_CTRL0, MSR_MIC2_C0_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX0C1", PMC13, CBOX0, MSR_MIC2_C0_CTRL1, MSR_MIC2_C0_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX0C2", PMC14, CBOX0, MSR_MIC2_C0_CTRL2, MSR_MIC2_C0_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX0C3", PMC15, CBOX0, MSR_MIC2_C0_CTRL3, MSR_MIC2_C0_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX1C0", PMC16, CBOX1, MSR_MIC2_C1_CTRL0, MSR_MIC2_C1_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX1C1", PMC17, CBOX1, MSR_MIC2_C1_CTRL1, MSR_MIC2_C1_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX1C2", PMC18, CBOX1, MSR_MIC2_C1_CTRL2, MSR_MIC2_C1_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX1C3", PMC19, CBOX1, MSR_MIC2_C1_CTRL3, MSR_MIC2_C1_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX2C0", PMC20, CBOX2, MSR_MIC2_C2_CTRL0, MSR_MIC2_C2_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX2C1", PMC21, CBOX2, MSR_MIC2_C2_CTRL1, MSR_MIC2_C2_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX2C2", PMC22, CBOX2, MSR_MIC2_C2_CTRL2, MSR_MIC2_C2_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX2C3", PMC23, CBOX2, MSR_MIC2_C2_CTRL3, MSR_MIC2_C2_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX3C0", PMC24, CBOX3, MSR_MIC2_C3_CTRL0, MSR_MIC2_C3_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX3C1", PMC25, CBOX3, MSR_MIC2_C3_CTRL1, MSR_MIC2_C3_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX3C2", PMC26, CBOX3, MSR_MIC2_C3_CTRL2, MSR_MIC2_C3_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX3C3", PMC27, CBOX3, MSR_MIC2_C3_CTRL3, MSR_MIC2_C3_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX4C0", PMC28, CBOX4, MSR_MIC2_C4_CTRL0, MSR_MIC2_C4_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX4C1", PMC29, CBOX4, MSR_MIC2_C4_CTRL1, MSR_MIC2_C4_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX4C2", PMC30, CBOX4, MSR_MIC2_C4_CTRL2, MSR_MIC2_C4_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX4C3", PMC31, CBOX4, MSR_MIC2_C4_CTRL3, MSR_MIC2_C4_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX5C0", PMC32, CBOX5, MSR_MIC2_C5_CTRL0, MSR_MIC2_C5_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX5C1", PMC33, CBOX5, MSR_MIC2_C5_CTRL1, MSR_MIC2_C5_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX5C2", PMC34, CBOX5, MSR_MIC2_C5_CTRL2, MSR_MIC2_C5_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX5C3", PMC35, CBOX5, MSR_MIC2_C5_CTRL3, MSR_MIC2_C5_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX6C0", PMC36, CBOX6, MSR_MIC2_C6_CTRL0, MSR_MIC2_C6_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX6C1", PMC37, CBOX6, MSR_MIC2_C6_CTRL1, MSR_MIC2_C6_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX6C2", PMC38, CBOX6, MSR_MIC2_C6_CTRL2, MSR_MIC2_C6_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX6C3", PMC39, CBOX6, MSR_MIC2_C6_CTRL3, MSR_MIC2_C6_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX7C0", PMC40, CBOX7, MSR_MIC2_C7_CTRL0, MSR_MIC2_C7_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX7C1", PMC41, CBOX7, MSR_MIC2_C7_CTRL1, MSR_MIC2_C7_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX7C2", PMC42, CBOX7, MSR_MIC2_C7_CTRL2, MSR_MIC2_C7_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX7C3", PMC43, CBOX7, MSR_MIC2_C7_CTRL3, MSR_MIC2_C7_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX8C0", PMC44, CBOX8, MSR_MIC2_C8_CTRL0, MSR_MIC2_C8_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX8C1", PMC45, CBOX8, MSR_MIC2_C8_CTRL1, MSR_MIC2_C8_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX8C2", PMC46, CBOX8, MSR_MIC2_C8_CTRL2, MSR_MIC2_C8_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX8C3", PMC47, CBOX8, MSR_MIC2_C8_CTRL3, MSR_MIC2_C8_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX9C0", PMC48, CBOX9, MSR_MIC2_C9_CTRL0, MSR_MIC2_C9_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX9C1", PMC49, CBOX9, MSR_MIC2_C9_CTRL1, MSR_MIC2_C9_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX9C2", PMC50, CBOX9, MSR_MIC2_C9_CTRL2, MSR_MIC2_C9_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX9C3", PMC51, CBOX9, MSR_MIC2_C9_CTRL3, MSR_MIC2_C9_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX10C0", PMC52, CBOX10, MSR_MIC2_C10_CTRL0, MSR_MIC2_C10_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX10C1", PMC53, CBOX10, MSR_MIC2_C10_CTRL1, MSR_MIC2_C10_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX10C2", PMC54, CBOX10, MSR_MIC2_C10_CTRL2, MSR_MIC2_C10_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX10C3", PMC55, CBOX10, MSR_MIC2_C10_CTRL3, MSR_MIC2_C10_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX11C0", PMC56, CBOX11, MSR_MIC2_C11_CTRL0, MSR_MIC2_C11_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX11C1", PMC57, CBOX11, MSR_MIC2_C11_CTRL1, MSR_MIC2_C11_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX11C2", PMC58, CBOX11, MSR_MIC2_C11_CTRL2, MSR_MIC2_C11_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX11C3", PMC59, CBOX11, MSR_MIC2_C11_CTRL3, MSR_MIC2_C11_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX12C0", PMC60, CBOX12, MSR_MIC2_C12_CTRL0, MSR_MIC2_C12_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX12C1", PMC61, CBOX12, MSR_MIC2_C12_CTRL1, MSR_MIC2_C12_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX12C2", PMC62, CBOX12, MSR_MIC2_C12_CTRL2, MSR_MIC2_C12_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX12C3", PMC63, CBOX12, MSR_MIC2_C12_CTRL3, MSR_MIC2_C12_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX13C0", PMC64, CBOX13, MSR_MIC2_C13_CTRL0, MSR_MIC2_C13_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX13C1", PMC65, CBOX13, MSR_MIC2_C13_CTRL1, MSR_MIC2_C13_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX13C2", PMC66, CBOX13, MSR_MIC2_C13_CTRL2, MSR_MIC2_C13_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX13C3", PMC67, CBOX13, MSR_MIC2_C13_CTRL3, MSR_MIC2_C13_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX14C0", PMC68, CBOX14, MSR_MIC2_C14_CTRL0, MSR_MIC2_C14_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX14C1", PMC69, CBOX14, MSR_MIC2_C14_CTRL1, MSR_MIC2_C14_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX14C2", PMC70, CBOX14, MSR_MIC2_C14_CTRL2, MSR_MIC2_C14_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX14C3", PMC71, CBOX14, MSR_MIC2_C14_CTRL3, MSR_MIC2_C14_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX15C0", PMC72, CBOX15, MSR_MIC2_C15_CTRL0, MSR_MIC2_C15_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX15C1", PMC73, CBOX15, MSR_MIC2_C15_CTRL1, MSR_MIC2_C15_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX15C2", PMC74, CBOX15, MSR_MIC2_C15_CTRL2, MSR_MIC2_C15_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX15C3", PMC75, CBOX15, MSR_MIC2_C15_CTRL3, MSR_MIC2_C15_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX16C0", PMC76, CBOX16, MSR_MIC2_C16_CTRL0, MSR_MIC2_C16_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX16C1", PMC77, CBOX16, MSR_MIC2_C16_CTRL1, MSR_MIC2_C16_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX16C2", PMC78, CBOX16, MSR_MIC2_C16_CTRL2, MSR_MIC2_C16_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX16C3", PMC79, CBOX16, MSR_MIC2_C16_CTRL3, MSR_MIC2_C16_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX17C0", PMC80, CBOX17, MSR_MIC2_C17_CTRL0, MSR_MIC2_C17_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX17C1", PMC81, CBOX17, MSR_MIC2_C17_CTRL1, MSR_MIC2_C17_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX17C2", PMC82, CBOX17, MSR_MIC2_C17_CTRL2, MSR_MIC2_C17_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX17C3", PMC83, CBOX17, MSR_MIC2_C17_CTRL3, MSR_MIC2_C17_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX18C0", PMC84, CBOX18, MSR_MIC2_C18_CTRL0, MSR_MIC2_C18_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX18C1", PMC85, CBOX18, MSR_MIC2_C18_CTRL1, MSR_MIC2_C18_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX18C2", PMC86, CBOX18, MSR_MIC2_C18_CTRL2, MSR_MIC2_C18_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX18C3", PMC87, CBOX18, MSR_MIC2_C18_CTRL3, MSR_MIC2_C18_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX19C0", PMC88, CBOX19, MSR_MIC2_C19_CTRL0, MSR_MIC2_C19_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX19C1", PMC89, CBOX19, MSR_MIC2_C19_CTRL1, MSR_MIC2_C19_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX19C2", PMC90, CBOX19, MSR_MIC2_C19_CTRL2, MSR_MIC2_C19_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX19C3", PMC91, CBOX19, MSR_MIC2_C19_CTRL3, MSR_MIC2_C19_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX20C0", PMC92, CBOX20, MSR_MIC2_C20_CTRL0, MSR_MIC2_C20_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX20C1", PMC93, CBOX20, MSR_MIC2_C20_CTRL1, MSR_MIC2_C20_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX20C2", PMC94, CBOX20, MSR_MIC2_C20_CTRL2, MSR_MIC2_C20_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX20C3", PMC95, CBOX20, MSR_MIC2_C20_CTRL3, MSR_MIC2_C20_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX21C0", PMC96, CBOX21, MSR_MIC2_C21_CTRL0, MSR_MIC2_C21_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX21C1", PMC97, CBOX21, MSR_MIC2_C21_CTRL1, MSR_MIC2_C21_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX21C2", PMC98, CBOX21, MSR_MIC2_C21_CTRL2, MSR_MIC2_C21_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX21C3", PMC99, CBOX21, MSR_MIC2_C21_CTRL3, MSR_MIC2_C21_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX22C0", PMC100, CBOX22, MSR_MIC2_C22_CTRL0, MSR_MIC2_C22_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX22C1", PMC101, CBOX22, MSR_MIC2_C22_CTRL1, MSR_MIC2_C22_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX22C2", PMC102, CBOX22, MSR_MIC2_C22_CTRL2, MSR_MIC2_C22_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX22C3", PMC103, CBOX22, MSR_MIC2_C22_CTRL3, MSR_MIC2_C22_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX23C0", PMC104, CBOX23, MSR_MIC2_C23_CTRL0, MSR_MIC2_C23_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX23C1", PMC105, CBOX23, MSR_MIC2_C23_CTRL1, MSR_MIC2_C23_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX23C2", PMC106, CBOX23, MSR_MIC2_C23_CTRL2, MSR_MIC2_C23_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX23C3", PMC107, CBOX23, MSR_MIC2_C23_CTRL3, MSR_MIC2_C23_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX24C0", PMC108, CBOX24, MSR_MIC2_C24_CTRL0, MSR_MIC2_C24_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX24C1", PMC109, CBOX24, MSR_MIC2_C24_CTRL1, MSR_MIC2_C24_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX24C2", PMC110, CBOX24, MSR_MIC2_C24_CTRL2, MSR_MIC2_C24_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX24C3", PMC111, CBOX24, MSR_MIC2_C24_CTRL3, MSR_MIC2_C24_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX25C0", PMC112, CBOX25, MSR_MIC2_C25_CTRL0, MSR_MIC2_C25_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX25C1", PMC113, CBOX25, MSR_MIC2_C25_CTRL1, MSR_MIC2_C25_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX25C2", PMC114, CBOX25, MSR_MIC2_C25_CTRL2, MSR_MIC2_C25_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX25C3", PMC115, CBOX25, MSR_MIC2_C25_CTRL3, MSR_MIC2_C25_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX26C0", PMC116, CBOX26, MSR_MIC2_C26_CTRL0, MSR_MIC2_C26_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX26C1", PMC117, CBOX26, MSR_MIC2_C26_CTRL1, MSR_MIC2_C26_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX26C2", PMC118, CBOX26, MSR_MIC2_C26_CTRL2, MSR_MIC2_C26_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX26C3", PMC119, CBOX26, MSR_MIC2_C26_CTRL3, MSR_MIC2_C26_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX27C0", PMC120, CBOX27, MSR_MIC2_C27_CTRL0, MSR_MIC2_C27_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX27C1", PMC121, CBOX27, MSR_MIC2_C27_CTRL1, MSR_MIC2_C27_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX27C2", PMC122, CBOX27, MSR_MIC2_C27_CTRL2, MSR_MIC2_C27_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX27C3", PMC123, CBOX27, MSR_MIC2_C27_CTRL3, MSR_MIC2_C27_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX28C0", PMC124, CBOX28, MSR_MIC2_C28_CTRL0, MSR_MIC2_C28_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX28C1", PMC125, CBOX28, MSR_MIC2_C28_CTRL1, MSR_MIC2_C28_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX28C2", PMC126, CBOX28, MSR_MIC2_C28_CTRL2, MSR_MIC2_C28_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX28C3", PMC127, CBOX28, MSR_MIC2_C28_CTRL3, MSR_MIC2_C28_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX29C0", PMC128, CBOX29, MSR_MIC2_C29_CTRL0, MSR_MIC2_C29_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX29C1", PMC129, CBOX29, MSR_MIC2_C29_CTRL1, MSR_MIC2_C29_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX29C2", PMC130, CBOX29, MSR_MIC2_C29_CTRL2, MSR_MIC2_C29_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX29C3", PMC131, CBOX29, MSR_MIC2_C29_CTRL3, MSR_MIC2_C29_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX30C0", PMC132, CBOX30, MSR_MIC2_C30_CTRL0, MSR_MIC2_C30_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX30C1", PMC133, CBOX30, MSR_MIC2_C30_CTRL1, MSR_MIC2_C30_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX30C2", PMC134, CBOX30, MSR_MIC2_C30_CTRL2, MSR_MIC2_C30_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX30C3", PMC135, CBOX30, MSR_MIC2_C30_CTRL3, MSR_MIC2_C30_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX31C0", PMC136, CBOX31, MSR_MIC2_C31_CTRL0, MSR_MIC2_C31_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX31C1", PMC137, CBOX31, MSR_MIC2_C31_CTRL1, MSR_MIC2_C31_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX31C2", PMC138, CBOX31, MSR_MIC2_C31_CTRL2, MSR_MIC2_C31_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX31C3", PMC139, CBOX31, MSR_MIC2_C31_CTRL3, MSR_MIC2_C31_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX32C0", PMC140, CBOX32, MSR_MIC2_C32_CTRL0, MSR_MIC2_C32_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX32C1", PMC141, CBOX32, MSR_MIC2_C32_CTRL1, MSR_MIC2_C32_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX32C2", PMC142, CBOX32, MSR_MIC2_C32_CTRL2, MSR_MIC2_C32_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX32C3", PMC143, CBOX32, MSR_MIC2_C32_CTRL3, MSR_MIC2_C32_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX33C0", PMC144, CBOX33, MSR_MIC2_C33_CTRL0, MSR_MIC2_C33_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX33C1", PMC145, CBOX33, MSR_MIC2_C33_CTRL1, MSR_MIC2_C33_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX33C2", PMC146, CBOX33, MSR_MIC2_C33_CTRL2, MSR_MIC2_C33_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX33C3", PMC147, CBOX33, MSR_MIC2_C33_CTRL3, MSR_MIC2_C33_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX34C0", PMC148, CBOX34, MSR_MIC2_C34_CTRL0, MSR_MIC2_C34_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX34C1", PMC149, CBOX34, MSR_MIC2_C34_CTRL1, MSR_MIC2_C34_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX34C2", PMC150, CBOX34, MSR_MIC2_C34_CTRL2, MSR_MIC2_C34_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX34C3", PMC151, CBOX34, MSR_MIC2_C34_CTRL3, MSR_MIC2_C34_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX35C0", PMC152, CBOX35, MSR_MIC2_C35_CTRL0, MSR_MIC2_C35_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX35C1", PMC153, CBOX35, MSR_MIC2_C35_CTRL1, MSR_MIC2_C35_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX35C2", PMC154, CBOX35, MSR_MIC2_C35_CTRL2, MSR_MIC2_C35_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX35C3", PMC155, CBOX35, MSR_MIC2_C35_CTRL3, MSR_MIC2_C35_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX36C0", PMC156, CBOX36, MSR_MIC2_C36_CTRL0, MSR_MIC2_C36_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX36C1", PMC157, CBOX36, MSR_MIC2_C36_CTRL1, MSR_MIC2_C36_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX36C2", PMC158, CBOX36, MSR_MIC2_C36_CTRL2, MSR_MIC2_C36_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX36C3", PMC159, CBOX36, MSR_MIC2_C36_CTRL3, MSR_MIC2_C36_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX37C0", PMC160, CBOX37, MSR_MIC2_C37_CTRL0, MSR_MIC2_C37_CTR0, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX37C1", PMC161, CBOX37, MSR_MIC2_C37_CTRL1, MSR_MIC2_C37_CTR1, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX37C2", PMC162, CBOX37, MSR_MIC2_C37_CTRL2, MSR_MIC2_C37_CTR2, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"CBOX37C3", PMC163, CBOX37, MSR_MIC2_C37_CTRL3, MSR_MIC2_C37_CTR3, 0, 0, KNL_VALID_OPTIONS_CBOX},
+    {"WBOX0", PMC164, WBOX, MSR_MIC2_PCU_CTRL0, MSR_MIC2_PCU_CTR0, 0, 0, KNL_VALID_OPTIONS_WBOX},
+    {"WBOX1", PMC165, WBOX, MSR_MIC2_PCU_CTRL1, MSR_MIC2_PCU_CTR1, 0, 0, KNL_VALID_OPTIONS_WBOX},
+    {"WBOX2", PMC166, WBOX, MSR_MIC2_PCU_CTRL2, MSR_MIC2_PCU_CTR2, 0, 0, KNL_VALID_OPTIONS_WBOX},
+    {"WBOX3", PMC167, WBOX, MSR_MIC2_PCU_CTRL3, MSR_MIC2_PCU_CTR3, 0, 0, KNL_VALID_OPTIONS_WBOX},
+    {"MBOX0C0", PMC168, MBOX0, PCI_MIC2_MC_D_CTRL0, PCI_MIC2_MC_D_CTR0_A, PCI_MIC2_MC_D_CTR0_B, PCI_IMC_DEVICE_0_CH_0, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX0C1", PMC169, MBOX0, PCI_MIC2_MC_D_CTRL1, PCI_MIC2_MC_D_CTR1_A, PCI_MIC2_MC_D_CTR1_B, PCI_IMC_DEVICE_0_CH_0, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX0C2", PMC170, MBOX0, PCI_MIC2_MC_D_CTRL2, PCI_MIC2_MC_D_CTR2_A, PCI_MIC2_MC_D_CTR2_B, PCI_IMC_DEVICE_0_CH_0, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX0C3", PMC171, MBOX0, PCI_MIC2_MC_D_CTRL3, PCI_MIC2_MC_D_CTR3_A, PCI_MIC2_MC_D_CTR3_B, PCI_IMC_DEVICE_0_CH_0, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX0FIX", PMC172, MBOX0FIX, PCI_MIC2_MC_D_FIXED_CTRL, PCI_MIC2_MC_D_FIXED_CTR_A, PCI_MIC2_MC_D_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
+    {"MBOX1C0", PMC173, MBOX1, PCI_MIC2_MC_D_CTRL0, PCI_MIC2_MC_D_CTR0_A, PCI_MIC2_MC_D_CTR0_B, PCI_IMC_DEVICE_0_CH_1, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX1C1", PMC174, MBOX1, PCI_MIC2_MC_D_CTRL1, PCI_MIC2_MC_D_CTR1_A, PCI_MIC2_MC_D_CTR1_B, PCI_IMC_DEVICE_0_CH_1, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX1C2", PMC175, MBOX1, PCI_MIC2_MC_D_CTRL2, PCI_MIC2_MC_D_CTR2_A, PCI_MIC2_MC_D_CTR2_B, PCI_IMC_DEVICE_0_CH_1, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX1C3", PMC176, MBOX1, PCI_MIC2_MC_D_CTRL3, PCI_MIC2_MC_D_CTR3_A, PCI_MIC2_MC_D_CTR3_B, PCI_IMC_DEVICE_0_CH_1, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX1FIX", PMC177, MBOX1FIX, PCI_MIC2_MC_D_FIXED_CTRL, PCI_MIC2_MC_D_FIXED_CTR_A, PCI_MIC2_MC_D_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_1, EVENT_OPTION_NONE_MASK},
+    {"MBOX2C0", PMC178, MBOX2, PCI_MIC2_MC_D_CTRL0, PCI_MIC2_MC_D_CTR0_A, PCI_MIC2_MC_D_CTR0_B, PCI_IMC_DEVICE_0_CH_2, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX2C1", PMC179, MBOX2, PCI_MIC2_MC_D_CTRL1, PCI_MIC2_MC_D_CTR1_A, PCI_MIC2_MC_D_CTR1_B, PCI_IMC_DEVICE_0_CH_2, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX2C2", PMC180, MBOX2, PCI_MIC2_MC_D_CTRL2, PCI_MIC2_MC_D_CTR2_A, PCI_MIC2_MC_D_CTR2_B, PCI_IMC_DEVICE_0_CH_2, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX2C3", PMC181, MBOX2, PCI_MIC2_MC_D_CTRL3, PCI_MIC2_MC_D_CTR3_A, PCI_MIC2_MC_D_CTR3_B, PCI_IMC_DEVICE_0_CH_2, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX2FIX", PMC182, MBOX2FIX, PCI_MIC2_MC_D_FIXED_CTRL, PCI_MIC2_MC_D_FIXED_CTR_A, PCI_MIC2_MC_D_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_2, EVENT_OPTION_NONE_MASK},
+    {"MBOX3C0", PMC183, MBOX3, PCI_MIC2_MC_U_CTRL0, PCI_MIC2_MC_U_CTR0_A, PCI_MIC2_MC_U_CTR0_B, PCI_IMC_DEVICE_0_CH_3, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX3C1", PMC184, MBOX3, PCI_MIC2_MC_U_CTRL1, PCI_MIC2_MC_U_CTR1_A, PCI_MIC2_MC_U_CTR1_B, PCI_IMC_DEVICE_0_CH_3, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX3C2", PMC185, MBOX3, PCI_MIC2_MC_U_CTRL2, PCI_MIC2_MC_U_CTR2_A, PCI_MIC2_MC_U_CTR2_B, PCI_IMC_DEVICE_0_CH_3, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX3C3", PMC186, MBOX3, PCI_MIC2_MC_U_CTRL3, PCI_MIC2_MC_U_CTR3_A, PCI_MIC2_MC_U_CTR3_B, PCI_IMC_DEVICE_0_CH_3, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX3FIX", PMC187, MBOX3FIX, PCI_MIC2_MC_U_FIXED_CTRL, PCI_MIC2_MC_U_FIXED_CTR_A, PCI_MIC2_MC_U_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_3, EVENT_OPTION_NONE_MASK},
+    {"MBOX4C0", PMC188, MBOX4, PCI_MIC2_MC_D_CTRL0, PCI_MIC2_MC_D_CTR0_A, PCI_MIC2_MC_D_CTR0_B, PCI_IMC_DEVICE_1_CH_0, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX4C1", PMC189, MBOX4, PCI_MIC2_MC_D_CTRL1, PCI_MIC2_MC_D_CTR1_A, PCI_MIC2_MC_D_CTR1_B, PCI_IMC_DEVICE_1_CH_0, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX4C2", PMC190, MBOX4, PCI_MIC2_MC_D_CTRL2, PCI_MIC2_MC_D_CTR2_A, PCI_MIC2_MC_D_CTR2_B, PCI_IMC_DEVICE_1_CH_0, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX4C3", PMC191, MBOX4, PCI_MIC2_MC_D_CTRL3, PCI_MIC2_MC_D_CTR3_A, PCI_MIC2_MC_D_CTR3_B, PCI_IMC_DEVICE_1_CH_0, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX4FIX", PMC192, MBOX4FIX, PCI_MIC2_MC_D_FIXED_CTRL, PCI_MIC2_MC_D_FIXED_CTR_A, PCI_MIC2_MC_D_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_0, EVENT_OPTION_NONE_MASK},
+    {"MBOX5C0", PMC193, MBOX5, PCI_MIC2_MC_D_CTRL0, PCI_MIC2_MC_D_CTR0_A, PCI_MIC2_MC_D_CTR0_B, PCI_IMC_DEVICE_1_CH_1, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX5C1", PMC194, MBOX5, PCI_MIC2_MC_D_CTRL1, PCI_MIC2_MC_D_CTR1_A, PCI_MIC2_MC_D_CTR1_B, PCI_IMC_DEVICE_1_CH_1, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX5C2", PMC195, MBOX5, PCI_MIC2_MC_D_CTRL2, PCI_MIC2_MC_D_CTR2_A, PCI_MIC2_MC_D_CTR2_B, PCI_IMC_DEVICE_1_CH_1, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX5C3", PMC196, MBOX5, PCI_MIC2_MC_D_CTRL3, PCI_MIC2_MC_D_CTR3_A, PCI_MIC2_MC_D_CTR3_B, PCI_IMC_DEVICE_1_CH_1, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX5FIX", PMC197, MBOX5FIX, PCI_MIC2_MC_D_FIXED_CTRL, PCI_MIC2_MC_D_FIXED_CTR_A, PCI_MIC2_MC_D_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_1, EVENT_OPTION_NONE_MASK},
+    {"MBOX6C0", PMC198, MBOX6, PCI_MIC2_MC_D_CTRL0, PCI_MIC2_MC_D_CTR0_A, PCI_MIC2_MC_D_CTR0_B, PCI_IMC_DEVICE_1_CH_2, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX6C1", PMC199, MBOX6, PCI_MIC2_MC_D_CTRL1, PCI_MIC2_MC_D_CTR1_A, PCI_MIC2_MC_D_CTR1_B, PCI_IMC_DEVICE_1_CH_2, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX6C2", PMC200, MBOX6, PCI_MIC2_MC_D_CTRL2, PCI_MIC2_MC_D_CTR2_A, PCI_MIC2_MC_D_CTR2_B, PCI_IMC_DEVICE_1_CH_2, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX6C3", PMC201, MBOX6, PCI_MIC2_MC_D_CTRL3, PCI_MIC2_MC_D_CTR3_A, PCI_MIC2_MC_D_CTR3_B, PCI_IMC_DEVICE_1_CH_2, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX6FIX", PMC202, MBOX6FIX, PCI_MIC2_MC_D_FIXED_CTRL, PCI_MIC2_MC_D_FIXED_CTR_A, PCI_MIC2_MC_D_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_2, EVENT_OPTION_NONE_MASK},
+    {"MBOX7C0", PMC203, MBOX7, PCI_MIC2_MC_U_CTRL0, PCI_MIC2_MC_U_CTR0_A, PCI_MIC2_MC_U_CTR0_B, PCI_IMC_DEVICE_1_CH_3, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX7C1", PMC204, MBOX7, PCI_MIC2_MC_U_CTRL1, PCI_MIC2_MC_U_CTR1_A, PCI_MIC2_MC_U_CTR1_B, PCI_IMC_DEVICE_1_CH_3, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX7C2", PMC205, MBOX7, PCI_MIC2_MC_U_CTRL2, PCI_MIC2_MC_U_CTR2_A, PCI_MIC2_MC_U_CTR2_B, PCI_IMC_DEVICE_1_CH_3, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX7C3", PMC206, MBOX7, PCI_MIC2_MC_U_CTRL3, PCI_MIC2_MC_U_CTR3_A, PCI_MIC2_MC_U_CTR3_B, PCI_IMC_DEVICE_1_CH_3, KNL_VALID_OPTIONS_MBOX},
+    {"MBOX7FIX", PMC207, MBOX7FIX, PCI_MIC2_MC_U_FIXED_CTRL, PCI_MIC2_MC_U_FIXED_CTR_A, PCI_MIC2_MC_U_FIXED_CTR_B, PCI_IMC_DEVICE_1_CH_3, EVENT_OPTION_NONE_MASK},
+    {"EUBOX0C0", PMC208, EUBOX0, PCI_MIC2_EDC_U_CTRL0, PCI_MIC2_EDC_U_CTR0_A, PCI_MIC2_EDC_U_CTR0_B, PCI_EDC0_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX0C1", PMC209, EUBOX0, PCI_MIC2_EDC_U_CTRL1, PCI_MIC2_EDC_U_CTR1_A, PCI_MIC2_EDC_U_CTR1_B, PCI_EDC0_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX0C2", PMC210, EUBOX0, PCI_MIC2_EDC_U_CTRL2, PCI_MIC2_EDC_U_CTR2_A, PCI_MIC2_EDC_U_CTR2_B, PCI_EDC0_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX0C3", PMC211, EUBOX0, PCI_MIC2_EDC_U_CTRL3, PCI_MIC2_EDC_U_CTR3_A, PCI_MIC2_EDC_U_CTR3_B, PCI_EDC0_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX0FIX", PMC212, EUBOX0FIX, PCI_MIC2_EDC_U_FIXED_CTRL, PCI_MIC2_EDC_U_FIXED_CTR_A, PCI_MIC2_EDC_U_FIXED_CTR_B, PCI_EDC0_UCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"EDBOX0C0", PMC213, EDBOX0, PCI_MIC2_EDC_D_CTRL0, PCI_MIC2_EDC_D_CTR0_A, PCI_MIC2_EDC_D_CTR0_B, PCI_EDC0_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX0C1", PMC214, EDBOX0, PCI_MIC2_EDC_D_CTRL1, PCI_MIC2_EDC_D_CTR1_A, PCI_MIC2_EDC_D_CTR1_B, PCI_EDC0_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX0C2", PMC215, EDBOX0, PCI_MIC2_EDC_D_CTRL2, PCI_MIC2_EDC_D_CTR2_A, PCI_MIC2_EDC_D_CTR2_B, PCI_EDC0_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX0C3", PMC216, EDBOX0, PCI_MIC2_EDC_D_CTRL3, PCI_MIC2_EDC_D_CTR3_A, PCI_MIC2_EDC_D_CTR3_B, PCI_EDC0_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX0FIX", PMC217, EDBOX0FIX, PCI_MIC2_EDC_D_FIXED_CTRL, PCI_MIC2_EDC_D_FIXED_CTR_A, PCI_MIC2_EDC_D_FIXED_CTR_B, PCI_EDC0_DCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"EUBOX1C0", PMC218, EUBOX1, PCI_MIC2_EDC_U_CTRL0, PCI_MIC2_EDC_U_CTR0_A, PCI_MIC2_EDC_U_CTR0_B, PCI_EDC1_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX1C1", PMC219, EUBOX1, PCI_MIC2_EDC_U_CTRL1, PCI_MIC2_EDC_U_CTR1_A, PCI_MIC2_EDC_U_CTR1_B, PCI_EDC1_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX1C2", PMC220, EUBOX1, PCI_MIC2_EDC_U_CTRL2, PCI_MIC2_EDC_U_CTR2_A, PCI_MIC2_EDC_U_CTR2_B, PCI_EDC1_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX1C3", PMC221, EUBOX1, PCI_MIC2_EDC_U_CTRL3, PCI_MIC2_EDC_U_CTR3_A, PCI_MIC2_EDC_U_CTR3_B, PCI_EDC1_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX1FIX", PMC222, EUBOX1FIX, PCI_MIC2_EDC_U_FIXED_CTRL, PCI_MIC2_EDC_U_FIXED_CTR_A, PCI_MIC2_EDC_U_FIXED_CTR_B, PCI_EDC1_UCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"EDBOX1C0", PMC223, EDBOX1, PCI_MIC2_EDC_D_CTRL0, PCI_MIC2_EDC_D_CTR0_A, PCI_MIC2_EDC_D_CTR0_B, PCI_EDC1_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX1C1", PMC224, EDBOX1, PCI_MIC2_EDC_D_CTRL1, PCI_MIC2_EDC_D_CTR1_A, PCI_MIC2_EDC_D_CTR1_B, PCI_EDC1_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX1C2", PMC225, EDBOX1, PCI_MIC2_EDC_D_CTRL2, PCI_MIC2_EDC_D_CTR2_A, PCI_MIC2_EDC_D_CTR2_B, PCI_EDC1_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX1C3", PMC226, EDBOX1, PCI_MIC2_EDC_D_CTRL3, PCI_MIC2_EDC_D_CTR3_A, PCI_MIC2_EDC_D_CTR3_B, PCI_EDC1_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX1FIX", PMC227, EDBOX1FIX, PCI_MIC2_EDC_D_FIXED_CTRL, PCI_MIC2_EDC_D_FIXED_CTR_A, PCI_MIC2_EDC_D_FIXED_CTR_B, PCI_EDC1_DCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"EUBOX2C0", PMC228, EUBOX2, PCI_MIC2_EDC_U_CTRL0, PCI_MIC2_EDC_U_CTR0_A, PCI_MIC2_EDC_U_CTR0_B, PCI_EDC2_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX2C1", PMC229, EUBOX2, PCI_MIC2_EDC_U_CTRL1, PCI_MIC2_EDC_U_CTR1_A, PCI_MIC2_EDC_U_CTR1_B, PCI_EDC2_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX2C2", PMC230, EUBOX2, PCI_MIC2_EDC_U_CTRL2, PCI_MIC2_EDC_U_CTR2_A, PCI_MIC2_EDC_U_CTR2_B, PCI_EDC2_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX2C3", PMC231, EUBOX2, PCI_MIC2_EDC_U_CTRL3, PCI_MIC2_EDC_U_CTR3_A, PCI_MIC2_EDC_U_CTR3_B, PCI_EDC2_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX2FIX", PMC232, EUBOX2FIX, PCI_MIC2_EDC_U_FIXED_CTRL, PCI_MIC2_EDC_U_FIXED_CTR_A, PCI_MIC2_EDC_U_FIXED_CTR_B, PCI_EDC2_UCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"EDBOX2C0", PMC233, EDBOX2, PCI_MIC2_EDC_D_CTRL0, PCI_MIC2_EDC_D_CTR0_A, PCI_MIC2_EDC_D_CTR0_B, PCI_EDC2_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX2C1", PMC234, EDBOX2, PCI_MIC2_EDC_D_CTRL1, PCI_MIC2_EDC_D_CTR1_A, PCI_MIC2_EDC_D_CTR1_B, PCI_EDC2_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX2C2", PMC235, EDBOX2, PCI_MIC2_EDC_D_CTRL2, PCI_MIC2_EDC_D_CTR2_A, PCI_MIC2_EDC_D_CTR2_B, PCI_EDC2_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX2C3", PMC236, EDBOX2, PCI_MIC2_EDC_D_CTRL3, PCI_MIC2_EDC_D_CTR3_A, PCI_MIC2_EDC_D_CTR3_B, PCI_EDC2_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX2FIX", PMC237, EDBOX2FIX, PCI_MIC2_EDC_D_FIXED_CTRL, PCI_MIC2_EDC_D_FIXED_CTR_A, PCI_MIC2_EDC_D_FIXED_CTR_B, PCI_EDC2_DCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"EUBOX3C0", PMC238, EUBOX3, PCI_MIC2_EDC_U_CTRL0, PCI_MIC2_EDC_U_CTR0_A, PCI_MIC2_EDC_U_CTR0_B, PCI_EDC3_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX3C1", PMC239, EUBOX3, PCI_MIC2_EDC_U_CTRL1, PCI_MIC2_EDC_U_CTR1_A, PCI_MIC2_EDC_U_CTR1_B, PCI_EDC3_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX3C2", PMC240, EUBOX3, PCI_MIC2_EDC_U_CTRL2, PCI_MIC2_EDC_U_CTR2_A, PCI_MIC2_EDC_U_CTR2_B, PCI_EDC3_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX3C3", PMC241, EUBOX3, PCI_MIC2_EDC_U_CTRL3, PCI_MIC2_EDC_U_CTR3_A, PCI_MIC2_EDC_U_CTR3_B, PCI_EDC3_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX3FIX", PMC242, EUBOX3FIX, PCI_MIC2_EDC_U_FIXED_CTRL, PCI_MIC2_EDC_U_FIXED_CTR_A, PCI_MIC2_EDC_U_FIXED_CTR_B, PCI_EDC3_UCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"EDBOX3C0", PMC243, EDBOX3, PCI_MIC2_EDC_D_CTRL0, PCI_MIC2_EDC_D_CTR0_A, PCI_MIC2_EDC_D_CTR0_B, PCI_EDC3_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX3C1", PMC244, EDBOX3, PCI_MIC2_EDC_D_CTRL1, PCI_MIC2_EDC_D_CTR1_A, PCI_MIC2_EDC_D_CTR1_B, PCI_EDC3_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX3C2", PMC245, EDBOX3, PCI_MIC2_EDC_D_CTRL2, PCI_MIC2_EDC_D_CTR2_A, PCI_MIC2_EDC_D_CTR2_B, PCI_EDC3_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX3C3", PMC246, EDBOX3, PCI_MIC2_EDC_D_CTRL3, PCI_MIC2_EDC_D_CTR3_A, PCI_MIC2_EDC_D_CTR3_B, PCI_EDC3_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX3FIX", PMC247, EDBOX3FIX, PCI_MIC2_EDC_D_FIXED_CTRL, PCI_MIC2_EDC_D_FIXED_CTR_A, PCI_MIC2_EDC_D_FIXED_CTR_B, PCI_EDC3_DCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"EUBOX4C0", PMC248, EUBOX4, PCI_MIC2_EDC_U_CTRL0, PCI_MIC2_EDC_U_CTR0_A, PCI_MIC2_EDC_U_CTR0_B, PCI_EDC4_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX4C1", PMC249, EUBOX4, PCI_MIC2_EDC_U_CTRL1, PCI_MIC2_EDC_U_CTR1_A, PCI_MIC2_EDC_U_CTR1_B, PCI_EDC4_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX4C2", PMC250, EUBOX4, PCI_MIC2_EDC_U_CTRL2, PCI_MIC2_EDC_U_CTR2_A, PCI_MIC2_EDC_U_CTR2_B, PCI_EDC4_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX4C3", PMC251, EUBOX4, PCI_MIC2_EDC_U_CTRL3, PCI_MIC2_EDC_U_CTR3_A, PCI_MIC2_EDC_U_CTR3_B, PCI_EDC4_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX4FIX", PMC252, EUBOX4FIX, PCI_MIC2_EDC_U_FIXED_CTRL, PCI_MIC2_EDC_U_FIXED_CTR_A, PCI_MIC2_EDC_U_FIXED_CTR_B, PCI_EDC4_UCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"EDBOX4C0", PMC253, EDBOX4, PCI_MIC2_EDC_D_CTRL0, PCI_MIC2_EDC_D_CTR0_A, PCI_MIC2_EDC_D_CTR0_B, PCI_EDC4_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX4C1", PMC254, EDBOX4, PCI_MIC2_EDC_D_CTRL1, PCI_MIC2_EDC_D_CTR1_A, PCI_MIC2_EDC_D_CTR1_B, PCI_EDC4_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX4C2", PMC255, EDBOX4, PCI_MIC2_EDC_D_CTRL2, PCI_MIC2_EDC_D_CTR2_A, PCI_MIC2_EDC_D_CTR2_B, PCI_EDC4_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX4C3", PMC256, EDBOX4, PCI_MIC2_EDC_D_CTRL3, PCI_MIC2_EDC_D_CTR3_A, PCI_MIC2_EDC_D_CTR3_B, PCI_EDC4_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX4FIX", PMC257, EDBOX4FIX, PCI_MIC2_EDC_D_FIXED_CTRL, PCI_MIC2_EDC_D_FIXED_CTR_A, PCI_MIC2_EDC_D_FIXED_CTR_B, PCI_EDC4_DCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"EUBOX5C0", PMC258, EUBOX5, PCI_MIC2_EDC_U_CTRL0, PCI_MIC2_EDC_U_CTR0_A, PCI_MIC2_EDC_U_CTR0_B, PCI_EDC5_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX5C1", PMC259, EUBOX5, PCI_MIC2_EDC_U_CTRL1, PCI_MIC2_EDC_U_CTR1_A, PCI_MIC2_EDC_U_CTR1_B, PCI_EDC5_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX5C2", PMC260, EUBOX5, PCI_MIC2_EDC_U_CTRL2, PCI_MIC2_EDC_U_CTR2_A, PCI_MIC2_EDC_U_CTR2_B, PCI_EDC5_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX5C3", PMC261, EUBOX5, PCI_MIC2_EDC_U_CTRL3, PCI_MIC2_EDC_U_CTR3_A, PCI_MIC2_EDC_U_CTR3_B, PCI_EDC5_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX5FIX", PMC262, EUBOX5FIX, PCI_MIC2_EDC_U_FIXED_CTRL, PCI_MIC2_EDC_U_FIXED_CTR_A, PCI_MIC2_EDC_U_FIXED_CTR_B, PCI_EDC5_UCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"EDBOX5C0", PMC263, EDBOX5, PCI_MIC2_EDC_D_CTRL0, PCI_MIC2_EDC_D_CTR0_A, PCI_MIC2_EDC_D_CTR0_B, PCI_EDC5_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX5C1", PMC264, EDBOX5, PCI_MIC2_EDC_D_CTRL1, PCI_MIC2_EDC_D_CTR1_A, PCI_MIC2_EDC_D_CTR1_B, PCI_EDC5_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX5C2", PMC265, EDBOX5, PCI_MIC2_EDC_D_CTRL2, PCI_MIC2_EDC_D_CTR2_A, PCI_MIC2_EDC_D_CTR2_B, PCI_EDC5_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX5C3", PMC266, EDBOX5, PCI_MIC2_EDC_D_CTRL3, PCI_MIC2_EDC_D_CTR3_A, PCI_MIC2_EDC_D_CTR3_B, PCI_EDC5_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX5FIX", PMC267, EDBOX5FIX, PCI_MIC2_EDC_D_FIXED_CTRL, PCI_MIC2_EDC_D_FIXED_CTR_A, PCI_MIC2_EDC_D_FIXED_CTR_B, PCI_EDC5_DCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"EUBOX6C0", PMC268, EUBOX6, PCI_MIC2_EDC_U_CTRL0, PCI_MIC2_EDC_U_CTR0_A, PCI_MIC2_EDC_U_CTR0_B, PCI_EDC6_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX6C1", PMC269, EUBOX6, PCI_MIC2_EDC_U_CTRL1, PCI_MIC2_EDC_U_CTR1_A, PCI_MIC2_EDC_U_CTR1_B, PCI_EDC6_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX6C2", PMC270, EUBOX6, PCI_MIC2_EDC_U_CTRL2, PCI_MIC2_EDC_U_CTR2_A, PCI_MIC2_EDC_U_CTR2_B, PCI_EDC6_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX6C3", PMC271, EUBOX6, PCI_MIC2_EDC_U_CTRL3, PCI_MIC2_EDC_U_CTR3_A, PCI_MIC2_EDC_U_CTR3_B, PCI_EDC6_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX6FIX", PMC272, EUBOX6FIX, PCI_MIC2_EDC_U_FIXED_CTRL, PCI_MIC2_EDC_U_FIXED_CTR_A, PCI_MIC2_EDC_U_FIXED_CTR_B, PCI_EDC6_UCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"EDBOX6C0", PMC273, EDBOX6, PCI_MIC2_EDC_D_CTRL0, PCI_MIC2_EDC_D_CTR0_A, PCI_MIC2_EDC_D_CTR0_B, PCI_EDC6_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX6C1", PMC274, EDBOX6, PCI_MIC2_EDC_D_CTRL1, PCI_MIC2_EDC_D_CTR1_A, PCI_MIC2_EDC_D_CTR1_B, PCI_EDC6_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX6C2", PMC275, EDBOX6, PCI_MIC2_EDC_D_CTRL2, PCI_MIC2_EDC_D_CTR2_A, PCI_MIC2_EDC_D_CTR2_B, PCI_EDC6_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX6C3", PMC276, EDBOX6, PCI_MIC2_EDC_D_CTRL3, PCI_MIC2_EDC_D_CTR3_A, PCI_MIC2_EDC_D_CTR3_B, PCI_EDC6_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX6FIX", PMC277, EDBOX6FIX, PCI_MIC2_EDC_D_FIXED_CTRL, PCI_MIC2_EDC_D_FIXED_CTR_A, PCI_MIC2_EDC_D_FIXED_CTR_B, PCI_EDC6_DCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"EUBOX7C0", PMC278, EUBOX7, PCI_MIC2_EDC_U_CTRL0, PCI_MIC2_EDC_U_CTR0_A, PCI_MIC2_EDC_U_CTR0_B, PCI_EDC7_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX7C1", PMC279, EUBOX7, PCI_MIC2_EDC_U_CTRL1, PCI_MIC2_EDC_U_CTR1_A, PCI_MIC2_EDC_U_CTR1_B, PCI_EDC7_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX7C2", PMC280, EUBOX7, PCI_MIC2_EDC_U_CTRL2, PCI_MIC2_EDC_U_CTR2_A, PCI_MIC2_EDC_U_CTR2_B, PCI_EDC7_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX7C3", PMC281, EUBOX7, PCI_MIC2_EDC_U_CTRL3, PCI_MIC2_EDC_U_CTR3_A, PCI_MIC2_EDC_U_CTR3_B, PCI_EDC7_UCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EUBOX7FIX", PMC282, EUBOX7FIX, PCI_MIC2_EDC_U_FIXED_CTRL, PCI_MIC2_EDC_U_FIXED_CTR_A, PCI_MIC2_EDC_U_FIXED_CTR_B, PCI_EDC7_UCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"EDBOX7C0", PMC283, EDBOX7, PCI_MIC2_EDC_D_CTRL0, PCI_MIC2_EDC_D_CTR0_A, PCI_MIC2_EDC_D_CTR0_B, PCI_EDC7_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX7C1", PMC284, EDBOX7, PCI_MIC2_EDC_D_CTRL1, PCI_MIC2_EDC_D_CTR1_A, PCI_MIC2_EDC_D_CTR1_B, PCI_EDC7_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX7C2", PMC285, EDBOX7, PCI_MIC2_EDC_D_CTRL2, PCI_MIC2_EDC_D_CTR2_A, PCI_MIC2_EDC_D_CTR2_B, PCI_EDC7_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX7C3", PMC286, EDBOX7, PCI_MIC2_EDC_D_CTRL3, PCI_MIC2_EDC_D_CTR3_A, PCI_MIC2_EDC_D_CTR3_B, PCI_EDC7_DCLK_DEVICE, KNL_VALID_OPTIONS_EBOX},
+    {"EDBOX7FIX", PMC287, EDBOX7FIX, PCI_MIC2_EDC_D_FIXED_CTRL, PCI_MIC2_EDC_D_FIXED_CTR_A, PCI_MIC2_EDC_D_FIXED_CTR_B, PCI_EDC7_DCLK_DEVICE, EVENT_OPTION_NONE_MASK},
+    {"PBOX0", PMC288, PBOX, PCI_MIC2_M2PCIE_CTRL0, PCI_MIC2_M2PCIE_CTR0_A, PCI_MIC2_M2PCIE_CTR0_B, PCI_R2PCIE_DEVICE, KNL_VALID_OPTIONS_PBOX},
+    {"PBOX1", PMC289, PBOX, PCI_MIC2_M2PCIE_CTRL1, PCI_MIC2_M2PCIE_CTR1_A, PCI_MIC2_M2PCIE_CTR1_B, PCI_R2PCIE_DEVICE, KNL_VALID_OPTIONS_PBOX},
+    {"PBOX2", PMC290, PBOX, PCI_MIC2_M2PCIE_CTRL2, PCI_MIC2_M2PCIE_CTR2_A, PCI_MIC2_M2PCIE_CTR2_B, PCI_R2PCIE_DEVICE, KNL_VALID_OPTIONS_PBOX},
+    {"PBOX3", PMC291, PBOX, PCI_MIC2_M2PCIE_CTRL3, PCI_MIC2_M2PCIE_CTR3_A, PCI_MIC2_M2PCIE_CTR3_B, PCI_R2PCIE_DEVICE, KNL_VALID_OPTIONS_PBOX},
+    {"IBOX0", PMC292, IBOX0, PCI_MIC2_IRP_CTRL0, PCI_MIC2_IRP_CTR0, 0, PCI_IRP_DEVICE, KNL_VALID_OPTIONS_IBOX},
+    {"IBOX1", PMC293, IBOX0, PCI_MIC2_IRP_CTRL1, PCI_MIC2_IRP_CTR1, 0, PCI_IRP_DEVICE, KNL_VALID_OPTIONS_IBOX},
+};
+
+static BoxMap knl_box_map[NUM_UNITS] = {
+    [PMC] = {MSR_PERF_GLOBAL_CTRL, MSR_PERF_GLOBAL_STATUS, MSR_PERF_GLOBAL_OVF_CTRL, -1, 0, 0, 48},
+    [FIXED] = {MSR_PERF_GLOBAL_CTRL, MSR_PERF_GLOBAL_STATUS, MSR_PERF_GLOBAL_OVF_CTRL, -1, 0, 0, 48},
+    [THERMAL] = {0, 0, 0, 0, 0, MSR_DEV, 8},
+    [POWER] = {0, 0, 0, 0, 0, MSR_DEV, 32},
+    [UBOX] = {MSR_MIC2_U_GLOBAL_CTRL, MSR_MIC2_U_GLOBAL_STATUS, MSR_MIC2_U_GLOBAL_STATUS, 1, 0, 0, 48},
+    [UBOXFIX] = {MSR_MIC2_U_GLOBAL_CTRL, MSR_MIC2_U_GLOBAL_STATUS, MSR_MIC2_U_GLOBAL_STATUS, 0, 0, 0, 48},
+    [CBOX0] = {MSR_MIC2_C0_GLOBAL_CTRL, MSR_MIC2_C0_STATUS, MSR_MIC2_C0_STATUS, 3, 0, 0, 48, MSR_MIC2_C0_FILTER0, MSR_MIC2_C0_FILTER1},
+    [CBOX1] = {MSR_MIC2_C1_GLOBAL_CTRL, MSR_MIC2_C0_STATUS, MSR_MIC2_C0_STATUS, 4, 0, 0, 48, MSR_MIC2_C0_FILTER0, MSR_MIC2_C0_FILTER1},
+    [CBOX2] = {MSR_MIC2_C2_GLOBAL_CTRL, MSR_MIC2_C0_STATUS, MSR_MIC2_C0_STATUS, 5, 0, 0, 48, MSR_MIC2_C0_FILTER0, MSR_MIC2_C0_FILTER1},
+    [CBOX3] = {MSR_MIC2_C3_GLOBAL_CTRL, MSR_MIC2_C0_STATUS, MSR_MIC2_C0_STATUS, 6, 0, 0, 48, MSR_MIC2_C0_FILTER0, MSR_MIC2_C0_FILTER1},
+    [CBOX4] = {MSR_MIC2_C4_GLOBAL_CTRL, MSR_MIC2_C0_STATUS, MSR_MIC2_C0_STATUS, 7, 0, 0, 48, MSR_MIC2_C0_FILTER0, MSR_MIC2_C0_FILTER1},
+    [CBOX5] = {MSR_MIC2_C5_GLOBAL_CTRL, MSR_MIC2_C0_STATUS, MSR_MIC2_C0_STATUS, 8, 0, 0, 48, MSR_MIC2_C0_FILTER0, MSR_MIC2_C0_FILTER1},
+    [CBOX6] = {MSR_MIC2_C6_GLOBAL_CTRL, MSR_MIC2_C0_STATUS, MSR_MIC2_C0_STATUS, 9, 0, 0, 48, MSR_MIC2_C0_FILTER0, MSR_MIC2_C0_FILTER1},
+    [CBOX7] = {MSR_MIC2_C7_GLOBAL_CTRL, MSR_MIC2_C0_STATUS, MSR_MIC2_C0_STATUS, 10, 0, 0, 48, MSR_MIC2_C0_FILTER0, MSR_MIC2_C0_FILTER1},
+    [CBOX8] = {MSR_MIC2_C8_GLOBAL_CTRL, MSR_MIC2_C0_STATUS, MSR_MIC2_C0_STATUS, 11, 0, 0, 48, MSR_MIC2_C0_FILTER0, MSR_MIC2_C0_FILTER1},
+    [CBOX9] = {MSR_MIC2_C9_GLOBAL_CTRL, MSR_MIC2_C0_STATUS, MSR_MIC2_C0_STATUS, 12, 0, 0, 48, MSR_MIC2_C0_FILTER0, MSR_MIC2_C0_FILTER1},
+    [CBOX10] = {MSR_MIC2_C10_GLOBAL_CTRL, MSR_MIC2_C10_STATUS, MSR_MIC2_C10_STATUS, 13, 0, 0, 48, MSR_MIC2_C10_FILTER0, MSR_MIC2_C10_FILTER1},
+    [CBOX11] = {MSR_MIC2_C11_GLOBAL_CTRL, MSR_MIC2_C11_STATUS, MSR_MIC2_C11_STATUS, 14, 0, 0, 48, MSR_MIC2_C11_FILTER0, MSR_MIC2_C11_FILTER1},
+    [CBOX12] = {MSR_MIC2_C12_GLOBAL_CTRL, MSR_MIC2_C12_STATUS, MSR_MIC2_C12_STATUS, 15, 0, 0, 48, MSR_MIC2_C12_FILTER0, MSR_MIC2_C12_FILTER1},
+    [CBOX13] = {MSR_MIC2_C13_GLOBAL_CTRL, MSR_MIC2_C13_STATUS, MSR_MIC2_C13_STATUS, 16, 0, 0, 48, MSR_MIC2_C13_FILTER0, MSR_MIC2_C13_FILTER1},
+    [CBOX14] = {MSR_MIC2_C14_GLOBAL_CTRL, MSR_MIC2_C14_STATUS, MSR_MIC2_C14_STATUS, 17, 0, 0, 48, MSR_MIC2_C14_FILTER0, MSR_MIC2_C14_FILTER1},
+    [CBOX15] = {MSR_MIC2_C15_GLOBAL_CTRL, MSR_MIC2_C15_STATUS, MSR_MIC2_C15_STATUS, 18, 0, 0, 48, MSR_MIC2_C15_FILTER0, MSR_MIC2_C15_FILTER1},
+    [CBOX16] = {MSR_MIC2_C16_GLOBAL_CTRL, MSR_MIC2_C16_STATUS, MSR_MIC2_C16_STATUS, 19, 0, 0, 48, MSR_MIC2_C16_FILTER0, MSR_MIC2_C16_FILTER1},
+    [CBOX17] = {MSR_MIC2_C17_GLOBAL_CTRL, MSR_MIC2_C17_STATUS, MSR_MIC2_C17_STATUS, 20, 0, 0, 48, MSR_MIC2_C17_FILTER0, MSR_MIC2_C17_FILTER1},
+    [CBOX18] = {MSR_MIC2_C18_GLOBAL_CTRL, MSR_MIC2_C18_STATUS, MSR_MIC2_C18_STATUS, 21, 0, 0, 48, MSR_MIC2_C18_FILTER0, MSR_MIC2_C18_FILTER1},
+    [CBOX19] = {MSR_MIC2_C19_GLOBAL_CTRL, MSR_MIC2_C19_STATUS, MSR_MIC2_C19_STATUS, 22, 0, 0, 48, MSR_MIC2_C19_FILTER0, MSR_MIC2_C19_FILTER1},
+    [CBOX20] = {MSR_MIC2_C20_GLOBAL_CTRL, MSR_MIC2_C20_STATUS, MSR_MIC2_C20_STATUS, 23, 0, 0, 48, MSR_MIC2_C20_FILTER0, MSR_MIC2_C20_FILTER1},
+    [CBOX21] = {MSR_MIC2_C21_GLOBAL_CTRL, MSR_MIC2_C21_STATUS, MSR_MIC2_C21_STATUS, 24, 0, 0, 48, MSR_MIC2_C21_FILTER0, MSR_MIC2_C21_FILTER1},
+    [CBOX22] = {MSR_MIC2_C22_GLOBAL_CTRL, MSR_MIC2_C22_STATUS, MSR_MIC2_C22_STATUS, 25, 0, 0, 48, MSR_MIC2_C22_FILTER0, MSR_MIC2_C22_FILTER1},
+    [CBOX23] = {MSR_MIC2_C23_GLOBAL_CTRL, MSR_MIC2_C23_STATUS, MSR_MIC2_C23_STATUS, 26, 0, 0, 48, MSR_MIC2_C23_FILTER0, MSR_MIC2_C23_FILTER1},
+    [CBOX24] = {MSR_MIC2_C24_GLOBAL_CTRL, MSR_MIC2_C24_STATUS, MSR_MIC2_C24_STATUS, 27, 0, 0, 48, MSR_MIC2_C24_FILTER0, MSR_MIC2_C24_FILTER1},
+    [CBOX25] = {MSR_MIC2_C25_GLOBAL_CTRL, MSR_MIC2_C25_STATUS, MSR_MIC2_C25_STATUS, 28, 0, 0, 48, MSR_MIC2_C25_FILTER0, MSR_MIC2_C25_FILTER1},
+    [CBOX26] = {MSR_MIC2_C26_GLOBAL_CTRL, MSR_MIC2_C26_STATUS, MSR_MIC2_C26_STATUS, 29, 0, 0, 48, MSR_MIC2_C26_FILTER0, MSR_MIC2_C26_FILTER1},
+    [CBOX27] = {MSR_MIC2_C27_GLOBAL_CTRL, MSR_MIC2_C27_STATUS, MSR_MIC2_C27_STATUS, 30, 0, 0, 48, MSR_MIC2_C27_FILTER0, MSR_MIC2_C27_FILTER1},
+    [CBOX28] = {MSR_MIC2_C28_GLOBAL_CTRL, MSR_MIC2_C28_STATUS, MSR_MIC2_C28_STATUS, 31, 0, 0, 48, MSR_MIC2_C28_FILTER0, MSR_MIC2_C28_FILTER1},
+    [CBOX29] = {MSR_MIC2_C29_GLOBAL_CTRL, MSR_MIC2_C29_STATUS, MSR_MIC2_C29_STATUS, 32, 0, 0, 48, MSR_MIC2_C29_FILTER0, MSR_MIC2_C29_FILTER1},
+    [CBOX30] = {MSR_MIC2_C30_GLOBAL_CTRL, MSR_MIC2_C30_STATUS, MSR_MIC2_C30_STATUS, 33, 0, 0, 48, MSR_MIC2_C30_FILTER0, MSR_MIC2_C30_FILTER1},
+    [CBOX31] = {MSR_MIC2_C31_GLOBAL_CTRL, MSR_MIC2_C31_STATUS, MSR_MIC2_C31_STATUS, 34, 0, 0, 48, MSR_MIC2_C31_FILTER0, MSR_MIC2_C31_FILTER1},
+    [CBOX32] = {MSR_MIC2_C32_GLOBAL_CTRL, MSR_MIC2_C32_STATUS, MSR_MIC2_C32_STATUS, 35, 0, 0, 48, MSR_MIC2_C32_FILTER0, MSR_MIC2_C32_FILTER1},
+    [CBOX33] = {MSR_MIC2_C33_GLOBAL_CTRL, MSR_MIC2_C33_STATUS, MSR_MIC2_C33_STATUS, 36, 0, 0, 48, MSR_MIC2_C33_FILTER0, MSR_MIC2_C33_FILTER1},
+    [CBOX34] = {MSR_MIC2_C34_GLOBAL_CTRL, MSR_MIC2_C34_STATUS, MSR_MIC2_C34_STATUS, 37, 0, 0, 48, MSR_MIC2_C34_FILTER0, MSR_MIC2_C34_FILTER1},
+    [CBOX35] = {MSR_MIC2_C35_GLOBAL_CTRL, MSR_MIC2_C35_STATUS, MSR_MIC2_C35_STATUS, 38, 0, 0, 48, MSR_MIC2_C35_FILTER0, MSR_MIC2_C35_FILTER1},
+    [CBOX36] = {MSR_MIC2_C36_GLOBAL_CTRL, MSR_MIC2_C36_STATUS, MSR_MIC2_C36_STATUS, 39, 0, 0, 48, MSR_MIC2_C36_FILTER0, MSR_MIC2_C36_FILTER1},
+    [CBOX37] = {MSR_MIC2_C37_GLOBAL_CTRL, MSR_MIC2_C37_STATUS, MSR_MIC2_C37_STATUS, 40, 0, 0, 48, MSR_MIC2_C37_FILTER0, MSR_MIC2_C37_FILTER1},
+    [WBOX] = {MSR_MIC2_PCU_GLOBAL_CTRL, 0, 0, 2, 0, 0, 48},
+    [MBOX0] = {PCI_MIC2_MC_D_BOX_CTRL, PCI_MIC2_MC_D_BOX_STATUS, PCI_MIC2_MC_D_BOX_STATUS, 41, 1, PCI_IMC_DEVICE_0_CH_0, 48},
+    [MBOX1] = {PCI_MIC2_MC_D_BOX_CTRL, PCI_MIC2_MC_D_BOX_STATUS, PCI_MIC2_MC_D_BOX_STATUS, 41, 1, PCI_IMC_DEVICE_0_CH_1, 48},
+    [MBOX2] = {PCI_MIC2_MC_D_BOX_CTRL, PCI_MIC2_MC_D_BOX_STATUS, PCI_MIC2_MC_D_BOX_STATUS, 41, 1, PCI_IMC_DEVICE_0_CH_2, 48},
+    [MBOX3] = {PCI_MIC2_MC_U_BOX_CTRL, PCI_MIC2_MC_U_BOX_STATUS, PCI_MIC2_MC_U_BOX_STATUS, 41, 1, PCI_IMC_DEVICE_0_CH_3, 48},
+    [MBOX4] = {PCI_MIC2_MC_D_BOX_CTRL, PCI_MIC2_MC_D_BOX_STATUS, PCI_MIC2_MC_D_BOX_STATUS, 42, 1, PCI_IMC_DEVICE_1_CH_0, 48},
+    [MBOX5] = {PCI_MIC2_MC_D_BOX_CTRL, PCI_MIC2_MC_D_BOX_STATUS, PCI_MIC2_MC_D_BOX_STATUS, 42, 1, PCI_IMC_DEVICE_1_CH_1, 48},
+    [MBOX6] = {PCI_MIC2_MC_D_BOX_CTRL, PCI_MIC2_MC_D_BOX_STATUS, PCI_MIC2_MC_D_BOX_STATUS, 42, 1, PCI_IMC_DEVICE_1_CH_2, 48},
+    [MBOX7] = {PCI_MIC2_MC_U_BOX_CTRL, PCI_MIC2_MC_U_BOX_STATUS, PCI_MIC2_MC_U_BOX_STATUS, 42, 1, PCI_IMC_DEVICE_1_CH_3, 48},
+    [MBOX0FIX] = {PCI_MIC2_MC_D_BOX_CTRL, PCI_MIC2_MC_D_BOX_STATUS, PCI_MIC2_MC_D_BOX_STATUS, 41, 1, PCI_IMC_DEVICE_0_CH_0, 48},
+    [MBOX1FIX] = {PCI_MIC2_MC_D_BOX_CTRL, PCI_MIC2_MC_D_BOX_STATUS, PCI_MIC2_MC_D_BOX_STATUS, 41, 1, PCI_IMC_DEVICE_0_CH_1, 48},
+    [MBOX2FIX] = {PCI_MIC2_MC_D_BOX_CTRL, PCI_MIC2_MC_D_BOX_STATUS, PCI_MIC2_MC_D_BOX_STATUS, 41, 1, PCI_IMC_DEVICE_0_CH_2, 48},
+    [MBOX3FIX] =  {PCI_MIC2_MC_U_BOX_CTRL, PCI_MIC2_MC_U_BOX_STATUS, PCI_MIC2_MC_U_BOX_STATUS, 41, 1, PCI_IMC_DEVICE_0_CH_3, 48},
+    [MBOX4FIX] = {PCI_MIC2_MC_D_BOX_CTRL, PCI_MIC2_MC_D_BOX_STATUS, PCI_MIC2_MC_D_BOX_STATUS, 41, 1, PCI_IMC_DEVICE_1_CH_0, 48},
+    [MBOX5FIX] = {PCI_MIC2_MC_D_BOX_CTRL, PCI_MIC2_MC_D_BOX_STATUS, PCI_MIC2_MC_D_BOX_STATUS, 41, 1, PCI_IMC_DEVICE_1_CH_1, 48},
+    [MBOX6FIX] = {PCI_MIC2_MC_D_BOX_CTRL, PCI_MIC2_MC_D_BOX_STATUS, PCI_MIC2_MC_D_BOX_STATUS, 41, 1, PCI_IMC_DEVICE_1_CH_2, 48},
+    [MBOX7FIX] = {PCI_MIC2_MC_U_BOX_CTRL, PCI_MIC2_MC_U_BOX_STATUS, PCI_MIC2_MC_U_BOX_STATUS, 42, 1, PCI_IMC_DEVICE_1_CH_3, 48},
+    [EUBOX0] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 43, 1, PCI_EDC0_UCLK_DEVICE, 48},
+    [EUBOX0FIX] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 43, 1, PCI_EDC0_UCLK_DEVICE, 48},
+    [EDBOX0] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 43, 1, PCI_EDC0_DCLK_DEVICE, 48},
+    [EDBOX0FIX] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 43, 1, PCI_EDC0_DCLK_DEVICE, 48},
+    [EUBOX1] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 44, 1, PCI_EDC1_UCLK_DEVICE, 48},
+    [EUBOX1FIX] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 44, 1, PCI_EDC1_UCLK_DEVICE, 48},
+    [EDBOX1] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 44, 1, PCI_EDC1_DCLK_DEVICE, 48},
+    [EDBOX1FIX] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 44, 1, PCI_EDC1_DCLK_DEVICE, 48},
+    [EUBOX2] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 45, 1, PCI_EDC2_UCLK_DEVICE, 48},
+    [EUBOX2FIX] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 45, 1, PCI_EDC2_UCLK_DEVICE, 48},
+    [EDBOX2] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 45, 1, PCI_EDC2_DCLK_DEVICE, 48},
+    [EDBOX2FIX] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 45, 1, PCI_EDC2_DCLK_DEVICE, 48},
+    [EUBOX3] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 46, 1, PCI_EDC3_UCLK_DEVICE, 48},
+    [EUBOX3FIX] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 46, 1, PCI_EDC3_UCLK_DEVICE, 48},
+    [EDBOX3] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 46, 1, PCI_EDC3_DCLK_DEVICE, 48},
+    [EDBOX3FIX] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 46, 1, PCI_EDC3_DCLK_DEVICE, 48},
+    [EUBOX4] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 47, 1, PCI_EDC4_UCLK_DEVICE, 48},
+    [EUBOX4FIX] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 47, 1, PCI_EDC4_UCLK_DEVICE, 48},
+    [EDBOX4] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 47, 1, PCI_EDC4_DCLK_DEVICE, 48},
+    [EDBOX4FIX] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 47, 1, PCI_EDC4_DCLK_DEVICE, 48},
+    [EUBOX5] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 48, 1, PCI_EDC5_UCLK_DEVICE, 48},
+    [EUBOX5FIX] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 48, 1, PCI_EDC5_UCLK_DEVICE, 48},
+    [EDBOX5] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 48, 1, PCI_EDC5_DCLK_DEVICE, 48},
+    [EDBOX5FIX] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 48, 1, PCI_EDC5_DCLK_DEVICE, 48},
+    [EUBOX6] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 49, 1, PCI_EDC6_UCLK_DEVICE, 48},
+    [EUBOX6FIX] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 49, 1, PCI_EDC6_UCLK_DEVICE, 48},
+    [EDBOX6] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 49, 1, PCI_EDC6_DCLK_DEVICE, 48},
+    [EDBOX6FIX] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 49, 1, PCI_EDC6_DCLK_DEVICE, 48},
+    [EUBOX7] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 50, 1, PCI_EDC7_UCLK_DEVICE, 48},
+    [EUBOX7FIX] = {PCI_MIC2_EDC_U_BOX_CTRL, PCI_MIC2_EDC_U_BOX_STATUS, PCI_MIC2_EDC_U_BOX_STATUS, 50, 1, PCI_EDC7_UCLK_DEVICE, 48},
+    [EDBOX7] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 50, 1, PCI_EDC7_DCLK_DEVICE, 48},
+    [EDBOX7FIX] = {PCI_MIC2_EDC_D_BOX_CTRL, PCI_MIC2_EDC_D_BOX_STATUS, PCI_MIC2_EDC_D_BOX_STATUS, 50, 1, PCI_EDC7_DCLK_DEVICE, 48},
+    [PBOX] = {PCI_MIC2_M2PCIE_BOX_CTRL, PCI_MIC2_M2PCIE_BOX_STATUS, PCI_MIC2_M2PCIE_BOX_STATUS, 51, 1, PCI_R2PCIE_DEVICE, 48},
+    [IBOX0] = {PCI_MIC2_IRP_BOX_CTRL, PCI_MIC2_IRP_BOX_STATUS, PCI_MIC2_IRP_BOX_STATUS, 52, 1, PCI_IRP_DEVICE, 48},
+};
+
+static PciDevice knl_pci_devices[MAX_NUM_PCI_DEVICES] = {
+ [MSR_DEV] = {NODEVTYPE, "", "", ""},
+ [PCI_IMC_DEVICE_0_CH_0] = {IMC, "08.2", "PCI_IMC_DEVICE_CH_0", "MBOX0", 0x3cb0},
+ [PCI_IMC_DEVICE_0_CH_1] = {IMC, "08.3", "PCI_IMC_DEVICE_CH_0", "MBOX1", 0x3cb0},
+ [PCI_IMC_DEVICE_0_CH_2] = {IMC, "08.4", "PCI_IMC_DEVICE_CH_0", "MBOX2", 0x3cb0},
+ [PCI_IMC_DEVICE_0_CH_3] = {IMC, "0a.0", "PCI_IMC_DEVICE_CH_0", "MBOX3", 0x3cb0},
+ [PCI_IMC_DEVICE_1_CH_0] = {IMC, "09.2", "PCI_IMC_DEVICE_CH_1", "MBOX4", 0x3cb0},
+ [PCI_IMC_DEVICE_1_CH_1] = {IMC, "09.3", "PCI_IMC_DEVICE_CH_1", "MBOX5", 0x3cb0},
+ [PCI_IMC_DEVICE_1_CH_2] = {IMC, "09.4", "PCI_IMC_DEVICE_CH_1", "MBOX6", 0x3cb0},
+ [PCI_IMC_DEVICE_1_CH_3] = {IMC, "0b.0", "PCI_IMC_DEVICE_CH_1", "MBOX7", 0x3cb0},
+ [PCI_R2PCIE_DEVICE] = {R2PCIE, "0c.1", "PCI_R2PCIE_DEVICE", "R2PCIE", 0x0000},
+ [PCI_IRP_DEVICE] = {IRP, "05.6", "PCI_IRP_DEVICE", "IRP", 0x0000},
+ [PCI_EDC0_UCLK_DEVICE] = {EDC, "0f.0", "PCI_EDC0_UCLK_DEVICE", "EDC", 0x0000},
+ [PCI_EDC0_DCLK_DEVICE] = {EDC, "18.2", "PCI_EDC0_DCLK_DEVICE", "EDC", 0x0000},
+ [PCI_EDC1_UCLK_DEVICE] = {EDC, "10.0", "PCI_EDC1_UCLK_DEVICE", "EDC", 0x0000},
+ [PCI_EDC1_DCLK_DEVICE] = {EDC, "19.2", "PCI_EDC1_DCLK_DEVICE", "EDC", 0x0000},
+ [PCI_EDC2_UCLK_DEVICE] = {EDC, "11.0", "PCI_EDC2_UCLK_DEVICE", "EDC", 0x0000},
+ [PCI_EDC2_DCLK_DEVICE] = {EDC, "1a.2", "PCI_EDC2_DCLK_DEVICE", "EDC", 0x0000},
+ [PCI_EDC3_UCLK_DEVICE] = {EDC, "12.0", "PCI_EDC3_UCLK_DEVICE", "EDC", 0x0000},
+ [PCI_EDC3_DCLK_DEVICE] = {EDC, "1b.2", "PCI_EDC3_DCLK_DEVICE", "EDC", 0x0000},
+ [PCI_EDC4_UCLK_DEVICE] = {EDC, "13.0", "PCI_EDC4_UCLK_DEVICE", "EDC", 0x0000},
+ [PCI_EDC4_DCLK_DEVICE] = {EDC, "1c.2", "PCI_EDC4_DCLK_DEVICE", "EDC", 0x0000},
+ [PCI_EDC5_UCLK_DEVICE] = {EDC, "14.0", "PCI_EDC5_UCLK_DEVICE", "EDC", 0x0000},
+ [PCI_EDC5_DCLK_DEVICE] = {EDC, "1d.2", "PCI_EDC5_DCLK_DEVICE", "EDC", 0x0000},
+ [PCI_EDC6_UCLK_DEVICE] = {EDC, "15.0", "PCI_EDC6_UCLK_DEVICE", "EDC", 0x0000},
+ [PCI_EDC6_DCLK_DEVICE] = {EDC, "1e.2", "PCI_EDC6_DCLK_DEVICE", "EDC", 0x0000},
+ [PCI_EDC7_UCLK_DEVICE] = {EDC, "16.0", "PCI_EDC7_UCLK_DEVICE", "EDC", 0x0000},
+ [PCI_EDC7_DCLK_DEVICE] = {EDC, "1f.2", "PCI_EDC7_DCLK_DEVICE", "EDC", 0x0000},
+};
diff --git a/src/includes/perfmon_knl_events.txt b/src/includes/perfmon_knl_events.txt
new file mode 100644
index 0000000..091ed95
--- /dev/null
+++ b/src/includes/perfmon_knl_events.txt
@@ -0,0 +1,1033 @@
+# =======================================================================================
+#
+#      Filename:  perfmon_knl_events.txt
+#
+#      Description:  Event list for Intel Xeon Phi (Knights Landing)
+#
+#      Version:   <VERSION>
+#      Released:  <DATE>
+#
+#      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
+#      Project:  likwid
+#
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+#
+#      This program is free software: you can redistribute it and/or modify it under
+#      the terms of the GNU General Public License as published by the Free Software
+#      Foundation, either version 3 of the License, or (at your option) any later
+#      version.
+#
+#      This program is distributed in the hope that it will be useful, but WITHOUT ANY
+#      WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+#      PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+#      You should have received a copy of the GNU General Public License along with
+#      this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# =======================================================================================
+
+EVENT_TEMP_CORE          0x00   TMP0
+UMASK_TEMP_CORE          0x00
+
+EVENT_PWR_PKG_ENERGY          0x00   PWR0
+UMASK_PWR_PKG_ENERGY          0x00
+
+EVENT_PWR_PP0_ENERGY          0x00   PWR1
+UMASK_PWR_PP0_ENERGY          0x00
+
+EVENT_PWR_DRAM_ENERGY          0x00   PWR3
+UMASK_PWR_DRAM_ENERGY          0x00
+
+EVENT_INSTR_RETIRED              0x00   FIXC0
+UMASK_INSTR_RETIRED_ANY          0x00
+
+EVENT_CPU_CLK_UNHALTED           0x00   FIXC1
+UMASK_CPU_CLK_UNHALTED_CORE      0x00
+
+EVENT_CPU_CLK_UNHALTED           0x00   FIXC2
+UMASK_CPU_CLK_UNHALTED_REF       0x00
+
+EVENT_INST_RETIRED      0xC0 PMC
+UMASK_INST_RETIRED_ANY  0x00
+
+EVENT_UOPS_RETIRED          0xC2 PMC
+UMASK_UOPS_RETIRED_ANY      0x10
+UMASK_UOPS_RETIRED_MS       0x01
+UMASK_UOPS_RETIRED_X87      0x02
+UMASK_UOPS_RETIRED_MUL      0x04
+UMASK_UOPS_RETIRED_DIV      0x08
+DEFAULT_OPTIONS_UOPS_RETIRED_STALLED_CYCLES EVENT_OPTION_INVERT=1|EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_STALLED_CYCLES      0x10
+DEFAULT_OPTIONS_UOPS_RETIRED_STALLS EVENT_OPTION_INVERT=1|EVENT_OPTION_EDGE=1|EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_STALLS   0x10
+UMASK_UOPS_RETIRED_SCALAR_SIMD      0x20
+UMASK_UOPS_RETIRED_PACKED_SIMD      0x40
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC 0x10
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC 0x10
+DEFAULT_OPTIONS_UOPS_RETIRED_MS_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_MS_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_MS_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_RETIRED_MS_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_MUL_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_MUL_CYCLES_GE_1_UOPS_EXEC 0x04
+DEFAULT_OPTIONS_UOPS_RETIRED_MUL_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_RETIRED_MUL_CYCLES_GE_2_UOPS_EXEC 0x04
+DEFAULT_OPTIONS_UOPS_RETIRED_SCALAR_SIMD_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_SCALAR_SIMD_CYCLES_GE_1_UOPS_EXEC 0x20
+DEFAULT_OPTIONS_UOPS_RETIRED_SCALAR_SIMD_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_RETIRED_SCALAR_SIMD_CYCLES_GE_2_UOPS_EXEC 0x20
+DEFAULT_OPTIONS_UOPS_RETIRED_PACKED_SIMD_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_PACKED_SIMD_CYCLES_GE_1_UOPS_EXEC 0x40
+DEFAULT_OPTIONS_UOPS_RETIRED_PACKED_SIMD_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_RETIRED_PACKED_SIMD_CYCLES_GE_2_UOPS_EXEC 0x40
+
+
+
+
+EVENT_MEM_UOPS_RETIRED              0x04 PMC
+UMASK_MEM_UOPS_RETIRED_L1_MISS_LOADS  0x01
+UMASK_MEM_UOPS_RETIRED_L2_HIT_LOADS    0x02
+UMASK_MEM_UOPS_RETIRED_L2_MISS_LOADS   0x04
+UMASK_MEM_UOPS_RETIRED_L2_LOADS_ANY    0x06
+UMASK_MEM_UOPS_RETIRED_DTLB_MISS_LOADS 0x08
+UMASK_MEM_UOPS_RETIRED_UTLB_MISS_LOADS 0x10
+UMASK_MEM_UOPS_RETIRED_HITM         0x20
+UMASK_MEM_UOPS_RETIRED_ALL_LOADS       0x40
+UMASK_MEM_UOPS_RETIRED_ALL_STORES       0x80
+
+EVENT_RECYCLEQ                        0x03  PMC
+UMASK_RECYCLEQ_LD_BLOCK_ST_FORWARD    0x01
+UMASK_RECYCLEQ_LD_BLOCK_STD_NOTREADY  0x02
+UMASK_RECYCLEQ_ST_SPLITS              0x04
+UMASK_RECYCLEQ_LD_SPLITS              0x08
+UMASK_RECYCLEQ_LOCK                   0x10
+UMASK_RECYCLEQ_STA_FULL               0x20
+UMASK_RECYCLEQ_ANY_LD                 0x40
+UMASK_RECYCLEQ_ANY_ST                 0x80
+
+EVENT_BR_INST_RETIRED                0xC4  PMC
+UMASK_BR_INST_RETIRED_ALL_BRANCHES   0x00
+UMASK_BR_INST_RETIRED_JCC            0x7E
+UMASK_BR_INST_RETIRED_TAKEN_JCC      0xFE
+UMASK_BR_INST_RETIRED_NON_RETURN_IND 0xEB
+UMASK_BR_INST_RETIRED_RETURN         0xF7
+UMASK_BR_INST_RETIRED_IND_CALL       0xFB
+UMASK_BR_INST_RETIRED_CALL           0xF9
+UMASK_BR_INST_RETIRED_REL_CALL       0xFD
+UMASK_BR_INST_RETIRED_FAR_BRANCH     0xBF
+
+EVENT_BR_MISP_RETIRED                0xC5  PMC
+UMASK_BR_MISP_RETIRED_ALL_BRANCHES   0x00
+UMASK_BR_MISP_RETIRED_JCC            0x7E
+UMASK_BR_MISP_RETIRED_TAKEN_JCC      0xFE
+UMASK_BR_MISP_RETIRED_NON_RETURN_IND 0xEB
+UMASK_BR_MISP_RETIRED_RETURN         0xF7
+UMASK_BR_MISP_RETIRED_IND_CALL       0xFB
+UMASK_BR_MISP_RETIRED_CALL           0xF9
+UMASK_BR_MISP_RETIRED_REL_CALL       0xFD
+UMASK_BR_MISP_RETIRED_FAR_BRANCH     0xBF
+
+EVENT_NO_ALLOC_CYCLES               0xCA PMC
+UMASK_NO_ALLOC_CYCLES_ROB_FULL      0x01
+DEFAULT_OPTIONS_NO_ALLOC_CYCLES_ROB_FULL_COUNT EVENT_OPTION_EDGE=1
+UMASK_NO_ALLOC_CYCLES_ROB_FULL_COUNT      0x01
+UMASK_NO_ALLOC_CYCLES_MISPREDICTS   0x04
+DEFAULT_OPTIONS_NO_ALLOC_CYCLES_MISPREDICTS_COUNT EVENT_OPTION_EDGE=1
+UMASK_NO_ALLOC_CYCLES_MISPREDICTS_COUNT   0x04
+UMASK_NO_ALLOC_CYCLES_RAT_STALL     0x20
+DEFAULT_OPTIONS_NO_ALLOC_CYCLES_RAT_STALL_COUNT EVENT_OPTION_EDGE=1
+UMASK_NO_ALLOC_CYCLES_RAT_STALL_COUNT     0x20
+UMASK_NO_ALLOC_CYCLES_ALL           0x7F
+DEFAULT_OPTIONS_NO_ALLOC_CYCLES_ALL_COUNT EVENT_OPTION_EDGE=1
+UMASK_NO_ALLOC_CYCLES_ALL_COUNT     0x7F
+UMASK_NO_ALLOC_CYCLES_NOT_DELIVERED 0x90
+DEFAULT_OPTIONS_NO_ALLOC_CYCLES_NOT_DELIVERED_COUNT EVENT_OPTION_EDGE=1
+UMASK_NO_ALLOC_CYCLES_NOT_DELIVERED_COUNT 0x90
+
+EVENT_ICACHE                    0x80   PMC
+UMASK_ICACHE_HITS               0x01
+UMASK_ICACHE_MISSES             0x02
+UMASK_ICACHE_ACCESSES           0x03
+
+EVENT_L2_REQUESTS_REJECT                  0x30 PMC
+UMASK_L2_REQUESTS_REJECT_ALL              0x00
+
+EVENT_LONGEST_LAT_CACHE             0x2E  PMC
+UMASK_LONGEST_LAT_CACHE_MISS        0x41
+UMASK_LONGEST_LAT_CACHE_REFERENCE   0x4F
+
+EVENT_L2_REQUESTS             0x2E  PMC
+UMASK_L2_REQUESTS_MISS        0x41
+UMASK_L2_REQUESTS_REFERENCE   0x4F
+
+EVENT_CPU_CLK_UNHALTED                0x3C PMC
+UMASK_CPU_CLK_UNHALTED_THREAD_P       0x00
+UMASK_CPU_CLK_UNHALTED_REF            0x01
+
+EVENT_BACLEARS                      0xE6  PMC
+UMASK_BACLEARS_ALL                  0x01
+UMASK_BACLEARS_RETURN               0x08
+UMASK_BACLEARS_COND                 0x10
+
+EVENT_PAGE_WALKS                    0x05  PMC
+DEFAULT_OPTIONS_PAGE_WALKS_DTLB_COUNT EVENT_OPTION_EDGE=1|EVENT_OPTION_THRESHOLD=0x1
+UMASK_PAGE_WALKS_DTLB_COUNT         0x01
+UMASK_PAGE_WALKS_DTLB_CYCLES        0x01
+DEFAULT_OPTIONS_PAGE_WALKS_ITLB_COUNT EVENT_OPTION_EDGE=1|EVENT_OPTION_THRESHOLD=0x1
+UMASK_PAGE_WALKS_ITLB_COUNT         0x02
+UMASK_PAGE_WALKS_ITLB_CYCLES        0x02
+DEFAULT_OPTIONS_PAGE_WALKS_COUNT EVENT_OPTION_EDGE=1|EVENT_OPTION_THRESHOLD=0x1
+UMASK_PAGE_WALKS_COUNT              0x03
+UMASK_PAGE_WALKS_CYCLES             0x03
+
+EVENT_MACHINE_CLEARS                   0xC3 PMC
+UMASK_MACHINE_CLEARS_SMC               0x01
+UMASK_MACHINE_CLEARS_MEMORY_ORDERING   0x02
+UMASK_MACHINE_CLEARS_FP_ASSIST         0x04
+UMASK_MACHINE_CLEARS_ALL               0x08
+
+EVENT_OFFCORE_REQUESTS                  0xB0 PMC
+UMASK_OFFCORE_REQUESTS_DEMAND_DATA_RD   0x01
+UMASK_OFFCORE_REQUESTS_DEMAND_RFO       0x02
+UMASK_OFFCORE_REQUESTS_DEMAND_CODE_RD   0x04
+UMASK_OFFCORE_REQUESTS_ALL_DATA_RD      0x08
+
+EVENT_RS_FULL_STALL                 0xCB PMC
+UMASK_RS_FULL_STALL_MEC                 0x01
+DEFAULT_OPTIONS_RS_FULL_STALL_MEC_COUNT EVENT_OPTION_EDGE=1
+UMASK_RS_FULL_STALL_MEC_COUNT           0x01
+UMASK_RS_FULL_STALL_ALL                 0x1F
+DEFAULT_OPTIONS_RS_FULL_STALL_ALL_COUNT EVENT_OPTION_EDGE=1
+UMASK_RS_FULL_STALL_ALL_COUNT           0x1F
+
+EVENT_CYCLES_DIV_BUSY               0xCD PMC
+UMASK_CYCLES_DIV_BUSY               0x01
+DEFAULT_OPTIONS_CYCLES_DIV_BUSY_COUNT EVENT_OPTION_EDGE=1
+UMASK_CYCLES_DIV_BUSY_COUNT         0x01
+
+EVENT_CORE_REJECT_L2Q               0x31 PMC
+UMASK_CORE_REJECT_L2Q_ALL           0x00
+
+EVENT_FETCH_STALL                   0x86 PMC
+UMASK_FETCH_STALL_ICACHE_FILL_PENDING_CYCLES 0x04
+DEFAULT_OPTIONS_FETCH_STALL_ICACHE_FILL_PENDING_COUNT EVENT_OPTION_EDGE=1|EVENT_OPTION_THRESHOLD=0x1
+UMASK_FETCH_STALL_ICACHE_FILL_PENDING_COUNT 0x04
+
+EVENT_MS_DECODED                0xE7 PMC
+UMASK_MS_DECODED_MS_ENTRY       0x01
+
+EVENT_OFFCORE_RESPONSE_0              0xB7 PMC
+OPTIONS_OFFCORE_RESPONSE_0_OPTIONS    EVENT_OPTION_MATCH0_MASK|EVENT_OPTION_MATCH1_MASK
+UMASK_OFFCORE_RESPONSE_0_OPTIONS      0x01 0xFF 0xFF
+UMASK_OFFCORE_RESPONSE_0_DMND_DATA_RD_ANY           0x01 0x00 0x10
+UMASK_OFFCORE_RESPONSE_0_DMND_RFO_ANY               0x01 0x01 0x10
+UMASK_OFFCORE_RESPONSE_0_DMND_CODE_RD_ANY           0x01 0x02 0x10
+UMASK_OFFCORE_RESPONSE_0_WRITEBACK_ANY              0x01 0x03 0x10
+UMASK_OFFCORE_RESPONSE_0_PF_DATA_RD_ANY             0x01 0x04 0x10
+UMASK_OFFCORE_RESPONSE_0_PF_L2_RFO_ANY              0x01 0x05 0x10
+UMASK_OFFCORE_RESPONSE_0_PF_L2_CODE_RD_ANY          0x01 0x06 0x10
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_READ_ANY           0x01 0x07 0x10
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_WRITE_ANY          0x01 0x08 0x10
+UMASK_OFFCORE_RESPONSE_0_UC_CODE_RD_ANY             0x01 0x09 0x10
+UMASK_OFFCORE_RESPONSE_0_BUS_LOCKS_ANY              0x01 0x0A 0x10
+UMASK_OFFCORE_RESPONSE_0_FULL_STRM_STORES_ANY       0x01 0x0B 0x10
+UMASK_OFFCORE_RESPONSE_0_SW_PREFETCH_ANY            0x01 0x0C 0x10
+UMASK_OFFCORE_RESPONSE_0_PF_L1_DATA_RD_ANY          0x01 0x0D 0x10
+UMASK_OFFCORE_RESPONSE_0_PARTIAL_STRM_STORES_ANY    0x01 0x0E 0x10
+UMASK_OFFCORE_RESPONSE_0_ANY_ANY                    0x01 0x0F 0x10
+
+EVENT_OFFCORE_RESPONSE_1              0xB7 PMC
+OPTIONS_OFFCORE_RESPONSE_1_OPTIONS    EVENT_OPTION_MATCH0_MASK|EVENT_OPTION_MATCH1_MASK
+UMASK_OFFCORE_RESPONSE_1_OPTIONS      0x02 0xFF 0xFF
+UMASK_OFFCORE_RESPONSE_1_DMND_DATA_RD_ANY           0x02 0x00 0x10
+UMASK_OFFCORE_RESPONSE_1_DMND_RFO_ANY               0x02 0x01 0x10
+UMASK_OFFCORE_RESPONSE_1_DMND_CODE_RD_ANY           0x02 0x02 0x10
+UMASK_OFFCORE_RESPONSE_1_WRITEBACK_ANY              0x02 0x03 0x10
+UMASK_OFFCORE_RESPONSE_1_PF_DATA_RD_ANY             0x02 0x04 0x10
+UMASK_OFFCORE_RESPONSE_1_PF_L2_RFO_ANY              0x02 0x05 0x10
+UMASK_OFFCORE_RESPONSE_1_PF_L2_CODE_RD_ANY          0x02 0x06 0x10
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_READ_ANY           0x02 0x07 0x10
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_WRITE_ANY          0x02 0x08 0x10
+UMASK_OFFCORE_RESPONSE_1_UC_CODE_RD_ANY             0x02 0x09 0x10
+UMASK_OFFCORE_RESPONSE_1_BUS_LOCKS_ANY              0x02 0x0A 0x10
+UMASK_OFFCORE_RESPONSE_1_FULL_STRM_STORES_ANY       0x02 0x0B 0x10
+UMASK_OFFCORE_RESPONSE_1_SW_PREFETCH_ANY            0x02 0x0C 0x10
+UMASK_OFFCORE_RESPONSE_1_PF_L1_DATA_RD_ANY          0x02 0x0D 0x10
+UMASK_OFFCORE_RESPONSE_1_PARTIAL_STRM_STORES_ANY    0x02 0x0E 0x10
+UMASK_OFFCORE_RESPONSE_1_ANY_ANY                    0x02 0x0F 0x10
+
+EVENT_MCDRAM_CLOCKTICKS   0x00 EDBOX0FIX|EDBOX1FIX|EDBOX2FIX|EDBOX3FIX|EDBOX4FIX|EDBOX5FIX|EDBOX6FIX|EDBOX7FIX
+UMASK_MCDRAM_CLOCKTICKS   0x00
+
+EVENT_EDC_CLOCKTICKS   0x00 EUBOX0FIX|EUBOX1FIX|EUBOX2FIX|EUBOX3FIX|EUBOX4FIX|EUBOX5FIX|EUBOX6FIX|EUBOX7FIX
+UMASK_EDC_CLOCKTICKS   0x00
+
+EVENT_EDC_UCLK          0x00 EUBOX0|EUBOX1|EUBOX2|EUBOX3|EUBOX4|EUBOX5|EUBOX6|EUBOX7
+UMASK_EDC_UCLK          0x00
+
+EVENT_EDC               0x02 EUBOX0|EUBOX1|EUBOX2|EUBOX3|EUBOX4|EUBOX5|EUBOX6|EUBOX7
+UMASK_EDC_HIT_CLEAN     0x01
+UMASK_EDC_HIT_DIRTY     0x02
+UMASK_EDC_HIT_ALL	0x03
+UMASK_EDC_MISS_CLEAN    0x04
+UMASK_EDC_MISS_DIRTY    0x08
+UMASK_EDC_MISS_INVALID  0x10
+UMASK_EDC_MISS_ALL	0x1C
+
+EVENT_EDC_ECLK          0x00 EDBOX0|EDBOX1|EDBOX2|EDBOX3|EDBOX4|EDBOX5|EDBOX6|EDBOX7
+UMASK_EDC_ECLK          0x00
+
+EVENT_EDC_RPQ           0x01 EDBOX0|EDBOX1|EDBOX2|EDBOX3|EDBOX4|EDBOX5|EDBOX6|EDBOX7
+UMASK_EDC_RPQ_INSERTS   0x01
+
+EVENT_EDC_WPQ           0x02 EDBOX0|EDBOX1|EDBOX2|EDBOX3|EDBOX4|EDBOX5|EDBOX6|EDBOX7
+UMASK_EDC_WPQ_INSERTS   0x01
+
+EVENT_DRAM_CLOCKTICKS   0x00 MBOX0FIX|MBOX1FIX|MBOX2FIX|MBOX4FIX|MBOX5FIX|MBOX6FIX
+UMASK_DRAM_CLOCKTICKS   0x00
+
+EVENT_IMC_CLOCKTICKS   0x00 MBOX3FIX|MBOX7FIX
+UMASK_IMC_CLOCKTICKS   0x00
+
+EVENT_MC_UCLK           0x00 MBOX3|MBOX7
+UMASK_MC_UCLK           0x00
+
+EVENT_MC_DCLK           0x00 MBOX0|MBOX1|MBOX2|MBOX4|MBOX5|MBOX6
+UMASK_MC_DCLK           0x00
+
+EVENT_MC_CAS           0x03 MBOX0|MBOX1|MBOX2|MBOX4|MBOX5|MBOX6
+UMASK_MC_CAS_READS     0x01
+UMASK_MC_CAS_WRITES    0x02
+UMASK_MC_CAS_ALL       0x03
+
+EVENT_CBOX_CLOCKTICKS               0x00 CBOX
+UMASK_CBOX_CLOCKTICKS               0x00
+
+EVENT_INGRESS_OCCUPANCY                 0x11 CBOX0C0|CBOX1C0|CBOX2C0|CBOX3C0|CBOX4C0|CBOX5C0|CBOX6C0|CBOX7C0|CBOX8C0|CBOX9C0|CBOX10C0|CBOX11C0|CBOX12C0|CBOX13C0|CBOX14C0|CBOX15C0|CBOX16C0|CBOX17C0|CBOX18C0|CBOX19C0|CBOX20C0|CBOX21C0|CBOX22C0|CBOX23C0|CBOX24C0|CBOX25C0|CBOX26C0|CBOX27C0|CBOX28C0|CBOX29C0|CBOX30C0|CBOX31C0|CBOX32C0|CBOX33C0|CBOX34C0|CBOX35C0|CBOX36C0|CBOX37C0
+UMASK_INGRESS_OCCUPANCY_IRQ             0x01
+UMASK_INGRESS_OCCUPANCY_IRQ_REJ         0x02
+UMASK_INGRESS_OCCUPANCY_IPQ             0x04
+UMASK_INGRESS_OCCUPANCY_PRQ             0x10
+UMASK_INGRESS_OCCUPANCY_PRQ_REJ         0x20
+
+EVENT_INGRESS_INSERTS                   0x13 CBOX
+UMASK_INGRESS_INSERTS_IRQ               0x01
+UMASK_INGRESS_INSERTS_IRQ_REJ           0x02
+UMASK_INGRESS_INSERTS_IPQ               0x04
+UMASK_INGRESS_INSERTS_PRQ               0x10
+UMASK_INGRESS_INSERTS_PRQ_REJ           0x20
+
+EVENT_INGRESS_INT_STARVED               0x14 CBOX
+UMASK_INGRESS_INT_STARVED_IRQ           0x01
+UMASK_INGRESS_INT_STARVED_IPQ           0x04
+UMASK_INGRESS_INT_STARVED_ISMQ          0x08
+UMASK_INGRESS_INT_STARVED_PRQ           0x10
+
+EVENT_INGRESS_IRQ0_REJECT               0x18 CBOX
+UMASK_INGRESS_IRQ0_REJECT_AD_REQ_VN0    0x01
+UMASK_INGRESS_IRQ0_REJECT_BL_NCS_VN0    0x20
+UMASK_INGRESS_IRQ0_REJECT_AK_NON_UPI    0x40
+UMASK_INGRESS_IRQ0_REJECT_IV_NON_UPI    0x80
+
+EVENT_INGRESS_IRQ1_REJECT                    0x19 CBOX
+UMASK_INGRESS_IRQ1_REJECT_ANY_REJECT_IRQ0    0x01
+UMASK_INGRESS_IRQ1_REJECT_SF_VICTIM          0x08
+UMASK_INGRESS_IRQ1_REJECT_SF_WAY             0x20
+UMASK_INGRESS_IRQ1_REJECT_PA_MATCH           0x80
+
+EVENT_INGRESS_PRQ0_REJECT               0x20 CBOX
+UMASK_INGRESS_PRQ0_REJECT_AD_REQ_VN0    0x01
+UMASK_INGRESS_PRQ0_REJECT_AK_NON_UPI    0x40
+UMASK_INGRESS_PRQ0_REJECT_IV_NON_UPI    0x80
+
+EVENT_INGRESS_PRQ1_REJECT                    0x21 CBOX
+UMASK_INGRESS_PRQ1_REJECT_ANY_REJECT_IRQ0    0x01
+UMASK_INGRESS_PRQ1_REJECT_SF_VICTIM          0x08
+UMASK_INGRESS_PRQ1_REJECT_SF_WAY             0x20
+UMASK_INGRESS_PRQ1_REJECT_PA_MATCH           0x80
+
+EVENT_INGRESS_IPQ0_REJECT                   0x22 CBOX
+UMASK_INGRESS_IPQ0_REJECT_AD_REQ_VN0        0x01
+UMASK_INGRESS_IPQ0_REJECT_AD_RSP_VN0        0x02
+UMASK_INGRESS_IPQ0_REJECT_BL_RSP_VN0        0x04
+UMASK_INGRESS_IPQ0_REJECT_BL_WB_VN0         0x08
+UMASK_INGRESS_IPQ0_REJECT_BL_NCB_VN0        0x10
+UMASK_INGRESS_IPQ0_REJECT_BL_NCS_VN0        0x20
+UMASK_INGRESS_IPQ0_REJECT_AK_NON_UPI        0x40
+
+EVENT_INGRESS_IPQ1_REJECT                    0x23 CBOX
+UMASK_INGRESS_IPQ1_REJECT_ANY_REJECT_IPQ0    0x01
+UMASK_INGRESS_IPQ1_REJECT_SF_VICTIM          0x08
+UMASK_INGRESS_IPQ1_REJECT_SF_WAY             0x20
+UMASK_INGRESS_IPQ1_REJECT_ALLOW_SNP          0x40
+UMASK_INGRESS_IPQ1_REJECT_PA_MATCH           0x80
+
+EVENT_INGRESS_ISMQ0_REJECT                   0x24 CBOX
+UMASK_INGRESS_ISMQ0_REJECT_AD_REQ_VN0        0x01
+UMASK_INGRESS_ISMQ0_REJECT_AD_RSP_VN0        0x02
+UMASK_INGRESS_ISMQ0_REJECT_BL_RSP_VN0        0x04
+UMASK_INGRESS_ISMQ0_REJECT_BL_WB_VN0         0x08
+UMASK_INGRESS_ISMQ0_REJECT_BL_NCB_VN0        0x10
+UMASK_INGRESS_ISMQ0_REJECT_BL_NCS_VN0        0x20
+UMASK_INGRESS_ISMQ0_REJECT_AK_NON_UPI        0x40
+UMASK_INGRESS_ISMQ0_REJECT_IV_NON_UPI        0x80
+
+EVENT_INGRESS_REQ_Q0_RETRY                  0x2A CBOX
+UMASK_INGRESS_REQ_Q0_RETRY_AD_REQ_VN0       0x01
+UMASK_INGRESS_REQ_Q0_RETRY_AD_RSP_VN0       0x02
+UMASK_INGRESS_REQ_Q0_RETRY_BL_NCS_VN0       0x20
+UMASK_INGRESS_REQ_Q0_RETRY_AK_NON_UPI       0x40
+UMASK_INGRESS_REQ_Q0_RETRY_IV_NON_UPI       0x80
+
+EVENT_INGRESS_Q1_REJECT                    0x2B CBOX
+UMASK_INGRESS_Q1_REJECT_ANY_REJECT_IPQ0    0x01
+UMASK_INGRESS_Q1_REJECT_SF_VICTIM          0x08
+UMASK_INGRESS_Q1_REJECT_SF_WAY             0x20
+UMASK_INGRESS_Q1_REJECT_ALLOW_SNP          0x40
+UMASK_INGRESS_Q1_REJECT_PA_MATCH           0x80
+
+EVENT_INGRESS_ISMQ0_RETRY                   0x2C CBOX
+UMASK_INGRESS_ISMQ0_RETRY_AD_REQ_VN0        0x01
+UMASK_INGRESS_ISMQ0_RETRY_AD_RSP_VN0        0x02
+UMASK_INGRESS_ISMQ0_RETRY_BL_RSP_VN0        0x04
+UMASK_INGRESS_ISMQ0_RETRY_BL_WB_VN0         0x08
+UMASK_INGRESS_ISMQ0_RETRY_BL_NCB_VN0        0x10
+UMASK_INGRESS_ISMQ0_RETRY_BL_NCS_VN0        0x20
+UMASK_INGRESS_ISMQ0_RETRY_AK_NON_UPI        0x40
+UMASK_INGRESS_ISMQ0_RETRY_IV_NON_UPI        0x80
+
+EVENT_INGRESS_OTHER0_RETRY                  0x2E CBOX
+UMASK_INGRESS_OTHER0_RETRY_AD_REQ_VN0       0x01
+UMASK_INGRESS_OTHER0_RETRY_AD_RSP_VN0       0x02
+UMASK_INGRESS_OTHER0_RETRY_BL_RSP_VN0       0x04
+UMASK_INGRESS_OTHER0_RETRY_BL_WB_VN0        0x08
+UMASK_INGRESS_OTHER0_RETRY_BL_NCB_VN0       0x10
+UMASK_INGRESS_OTHER0_RETRY_BL_NCS_VN0       0x20
+UMASK_INGRESS_OTHER0_RETRY_AK_NON_UPI       0x40
+UMASK_INGRESS_OTHER0_RETRY_IV_NON_UPI       0x80
+
+EVENT_INGRESS_OTHER1_RETRY                    0x2F CBOX
+UMASK_INGRESS_OTHER1_RETRY_ANY_REJECT_IPQ0    0x01
+UMASK_INGRESS_OTHER1_RETRY_SF_VICTIM          0x08
+UMASK_INGRESS_OTHER1_RETRY_SF_WAY             0x20
+UMASK_INGRESS_OTHER1_RETRY_ALLOW_SNP          0x40
+UMASK_INGRESS_OTHER1_RETRY_PA_MATCH           0x80
+
+EVENT_SF_LOOKUP                     0x34 CBOX
+UMASK_SF_LOOKUP_DATA_READ           0x03
+UMASK_SF_LOOKUP_WRITE               0x05
+UMASK_SF_LOOKUP_REMOTE_SNOOP        0x09
+UMASK_SF_LOOKUP_ANY                 0x11
+
+EVENT_COUNTER0_OCCUPANCY                 0x1F CBOX0C0|CBOX1C0|CBOX2C0|CBOX3C0|CBOX4C0|CBOX5C0|CBOX6C0|CBOX7C0|CBOX8C0|CBOX9C0|CBOX10C0|CBOX11C0|CBOX12C0|CBOX13C0|CBOX14C0|CBOX15C0|CBOX16C0|CBOX17C0|CBOX18C0|CBOX19C0|CBOX20C0|CBOX21C0|CBOX22C0|CBOX23C0|CBOX24C0|CBOX25C0|CBOX26C0|CBOX27C0|CBOX28C0|CBOX29C0|CBOX30C0|CBOX31C0|CBOX32C0|CBOX33C0|CBOX34C0|CBOX35C0|CBOX36C0|CBOX37C0
+UMASK_COUNTER0_OCCUPANCY                 0x00
+
+
+EVENT_TOR_INSERTS                   0x35 CBOX
+UMASK_TOR_INSERTS_IRQ               0x31
+UMASK_TOR_INSERTS_IRQ_HIT           0x11
+UMASK_TOR_INSERTS_IRQ_MISS          0x21
+UMASK_TOR_INSERTS_EVICT             0x32
+UMASK_TOR_INSERTS_PRQ               0x34
+UMASK_TOR_INSERTS_PRQ_HIT           0x14
+UMASK_TOR_INSERTS_PRQ_MISS          0x24
+UMASK_TOR_INSERTS_IPQ               0x38
+UMASK_TOR_INSERTS_IPQ_HIT           0x18
+UMASK_TOR_INSERTS_IPQ_MISS          0x28
+UMASK_TOR_INSERTS_HIT               0x1D
+UMASK_TOR_INSERTS_MISS              0x2D
+
+EVENT_TOR_OCCUPANCY                 0x36 CBOX0C0|CBOX1C0|CBOX2C0|CBOX3C0|CBOX4C0|CBOX5C0|CBOX6C0|CBOX7C0|CBOX8C0|CBOX9C0|CBOX10C0|CBOX11C0|CBOX12C0|CBOX13C0|CBOX14C0|CBOX15C0|CBOX16C0|CBOX17C0|CBOX18C0|CBOX19C0|CBOX20C0|CBOX21C0|CBOX22C0|CBOX23C0|CBOX24C0|CBOX25C0|CBOX26C0|CBOX27C0|CBOX28C0|CBOX29C0|CBOX30C0|CBOX31C0|CBOX32C0|CBOX33C0|CBOX34C0|CBOX35C0|CBOX36C0|CBOX37C0
+UMASK_TOR_OCCUPANCY_IRQ               0x31
+UMASK_TOR_OCCUPANCY_IRQ_HIT           0x11
+UMASK_TOR_OCCUPANCY_IRQ_MISS          0x21
+UMASK_TOR_OCCUPANCY_EVICT             0x32
+UMASK_TOR_OCCUPANCY_PRQ               0x34
+UMASK_TOR_OCCUPANCY_PRQ_HIT           0x14
+UMASK_TOR_OCCUPANCY_PRQ_MISS          0x24
+UMASK_TOR_OCCUPANCY_IPQ               0x38
+UMASK_TOR_OCCUPANCY_IPQ_HIT           0x18
+UMASK_TOR_OCCUPANCY_IPQ_MISS          0x28
+UMASK_TOR_OCCUPANCY_HIT               0x1D
+UMASK_TOR_OCCUPANCY_MISS              0x2D
+
+EVENT_MISC                              0x39 CBOX
+UMASK_MISC_RSPI_WAS_FSE                 0x01
+UMASK_MISC_WC_ALIASING                  0x02
+UMASK_MISC_RFO_HIT_S                    0x08
+UMASK_MISC_CV0_PREF_VIC                 0x10
+UMASK_MISC_CV0_PREF_MISS                0x20
+
+EVENT_UCLK                  0xC0 CBOX
+UMASK_UCLK                  0x00
+
+EVENT_AG0_AD_CRD_ACQUIRED       0x80 CBOX
+UMASK_AG0_AD_CRD_ACQUIRED_TGR0  0x01
+UMASK_AG0_AD_CRD_ACQUIRED_TGR1  0x02
+UMASK_AG0_AD_CRD_ACQUIRED_TGR2  0x04
+UMASK_AG0_AD_CRD_ACQUIRED_TGR3  0x08
+UMASK_AG0_AD_CRD_ACQUIRED_TGR4  0x10
+UMASK_AG0_AD_CRD_ACQUIRED_TGR5  0x20
+UMASK_AG0_AD_CRD_ACQUIRED_TGR6  0x40
+UMASK_AG0_AD_CRD_ACQUIRED_TGR7  0x80
+
+EVENT_AG0_AD_CRD_ACQUIRED_EXT                             0x81 CBOX
+UMASK_AG0_AD_CRD_ACQUIRED_EXT_TGR8                        0x01
+UMASK_AG0_AD_CRD_ACQUIRED_EXT_ANY_OF_TGR0_THRU_TGR7       0x02
+UMASK_AG0_AD_CRD_ACQUIRED_EXT_ANY                         0x03
+
+EVENT_AG0_AD_CRD_OCCUPANCY          0x82 CBOX
+UMASK_AG0_AD_CRD_OCCUPANCY_TGR0     0x01
+UMASK_AG0_AD_CRD_OCCUPANCY_TGR1     0x02
+UMASK_AG0_AD_CRD_OCCUPANCY_TGR2     0x04
+UMASK_AG0_AD_CRD_OCCUPANCY_TGR3     0x08
+UMASK_AG0_AD_CRD_OCCUPANCY_TGR4     0x10
+UMASK_AG0_AD_CRD_OCCUPANCY_TGR5     0x20
+UMASK_AG0_AD_CRD_OCCUPANCY_TGR6     0x40
+UMASK_AG0_AD_CRD_OCCUPANCY_TGR7     0x80
+
+EVENT_AG0_AD_CRD_OCCUPANCY_EXT                             0x83 CBOX
+UMASK_AG0_AD_CRD_OCCUPANCY_EXT_TGR8                        0x01
+UMASK_AG0_AD_CRD_OCCUPANCY_EXT_ANY_OF_TGR0_THRU_TGR7       0x02
+UMASK_AG0_AD_CRD_OCCUPANCY_EXT_ANY                         0x03
+
+EVENT_AG1_AD_CRD_ACQUIRED       0x84 CBOX
+UMASK_AG1_AD_CRD_ACQUIRED_TGR0  0x01
+UMASK_AG1_AD_CRD_ACQUIRED_TGR1  0x02
+UMASK_AG1_AD_CRD_ACQUIRED_TGR2  0x04
+UMASK_AG1_AD_CRD_ACQUIRED_TGR3  0x08
+UMASK_AG1_AD_CRD_ACQUIRED_TGR4  0x10
+UMASK_AG1_AD_CRD_ACQUIRED_TGR5  0x20
+UMASK_AG1_AD_CRD_ACQUIRED_TGR6  0x40
+UMASK_AG1_AD_CRD_ACQUIRED_TGR7  0x80
+
+EVENT_AG1_AD_CRD_ACQUIRED_EXT                             0x85 CBOX
+UMASK_AG1_AD_CRD_ACQUIRED_EXT_TGR8                        0x01
+UMASK_AG1_AD_CRD_ACQUIRED_EXT_ANY_OF_TGR0_THRU_TGR7       0x02
+UMASK_AG1_AD_CRD_ACQUIRED_EXT_ANY                         0x03
+
+EVENT_AG1_AD_CRD_OCCUPANCY          0x86 CBOX
+UMASK_AG1_AD_CRD_OCCUPANCY_TGR0     0x01
+UMASK_AG1_AD_CRD_OCCUPANCY_TGR1     0x02
+UMASK_AG1_AD_CRD_OCCUPANCY_TGR2     0x04
+UMASK_AG1_AD_CRD_OCCUPANCY_TGR3     0x08
+UMASK_AG1_AD_CRD_OCCUPANCY_TGR4     0x10
+UMASK_AG1_AD_CRD_OCCUPANCY_TGR5     0x20
+UMASK_AG1_AD_CRD_OCCUPANCY_TGR6     0x40
+UMASK_AG1_AD_CRD_OCCUPANCY_TGR7     0x80
+
+EVENT_AG1_AD_CRD_OCCUPANCY_EXT                             0x87 CBOX
+UMASK_AG1_AD_CRD_OCCUPANCY_EXT_TGR8                        0x01
+UMASK_AG1_AD_CRD_OCCUPANCY_EXT_ANY_OF_TGR0_THRU_TGR7       0x02
+UMASK_AG1_AD_CRD_OCCUPANCY_EXT_ANY                         0x03
+
+EVENT_AG0_BL_CRD_ACQUIRED       0x88 CBOX
+UMASK_AG0_BL_CRD_ACQUIRED_TGR0  0x01
+UMASK_AG0_BL_CRD_ACQUIRED_TGR1  0x02
+UMASK_AG0_BL_CRD_ACQUIRED_TGR2  0x04
+UMASK_AG0_BL_CRD_ACQUIRED_TGR3  0x08
+UMASK_AG0_BL_CRD_ACQUIRED_TGR4  0x10
+UMASK_AG0_BL_CRD_ACQUIRED_TGR5  0x20
+UMASK_AG0_BL_CRD_ACQUIRED_TGR6  0x40
+UMASK_AG0_BL_CRD_ACQUIRED_TGR7  0x80
+
+EVENT_AG0_BL_CRD_ACQUIRED_EXT                             0x89 CBOX
+UMASK_AG0_BL_CRD_ACQUIRED_EXT_TGR8                        0x01
+UMASK_AG0_BL_CRD_ACQUIRED_EXT_ANY_OF_TGR0_THRU_TGR7       0x02
+UMASK_AG0_BL_CRD_ACQUIRED_EXT_ANY                         0x03
+
+EVENT_AG0_BL_CRD_OCCUPANCY          0x8A CBOX
+UMASK_AG0_BL_CRD_OCCUPANCY_TGR0     0x01
+UMASK_AG0_BL_CRD_OCCUPANCY_TGR1     0x02
+UMASK_AG0_BL_CRD_OCCUPANCY_TGR2     0x04
+UMASK_AG0_BL_CRD_OCCUPANCY_TGR3     0x08
+UMASK_AG0_BL_CRD_OCCUPANCY_TGR4     0x10
+UMASK_AG0_BL_CRD_OCCUPANCY_TGR5     0x20
+UMASK_AG0_BL_CRD_OCCUPANCY_TGR6     0x40
+UMASK_AG0_BL_CRD_OCCUPANCY_TGR7     0x80
+
+EVENT_AG0_BL_CRD_OCCUPANCY_EXT                             0x8B CBOX
+UMASK_AG0_BL_CRD_OCCUPANCY_EXT_TGR8                        0x01
+UMASK_AG0_BL_CRD_OCCUPANCY_EXT_ANY_OF_TGR0_THRU_TGR7       0x02
+UMASK_AG0_BL_CRD_OCCUPANCY_EXT_ANY                         0x03
+
+
+EVENT_AG1_BL_CRD_ACQUIRED       0x8C CBOX
+UMASK_AG1_BL_CRD_ACQUIRED_TGR0  0x01
+UMASK_AG1_BL_CRD_ACQUIRED_TGR1  0x02
+UMASK_AG1_BL_CRD_ACQUIRED_TGR2  0x04
+UMASK_AG1_BL_CRD_ACQUIRED_TGR3  0x08
+UMASK_AG1_BL_CRD_ACQUIRED_TGR4  0x10
+UMASK_AG1_BL_CRD_ACQUIRED_TGR5  0x20
+UMASK_AG1_BL_CRD_ACQUIRED_TGR6  0x40
+UMASK_AG1_BL_CRD_ACQUIRED_TGR7  0x80
+
+EVENT_AG1_BL_CRD_ACQUIRED_EXT                             0x8D CBOX
+UMASK_AG1_BL_CRD_ACQUIRED_EXT_TGR8                        0x01
+UMASK_AG1_BL_CRD_ACQUIRED_EXT_ANY_OF_TGR0_THRU_TGR7       0x02
+UMASK_AG1_BL_CRD_ACQUIRED_EXT_ANY                         0x03
+
+EVENT_AG1_BL_CRD_OCCUPANCY          0x8E CBOX
+UMASK_AG1_BL_CRD_OCCUPANCY_TGR0     0x01
+UMASK_AG1_BL_CRD_OCCUPANCY_TGR1     0x02
+UMASK_AG1_BL_CRD_OCCUPANCY_TGR2     0x04
+UMASK_AG1_BL_CRD_OCCUPANCY_TGR3     0x08
+UMASK_AG1_BL_CRD_OCCUPANCY_TGR4     0x10
+UMASK_AG1_BL_CRD_OCCUPANCY_TGR5     0x20
+UMASK_AG1_BL_CRD_OCCUPANCY_TGR6     0x40
+UMASK_AG1_BL_CRD_OCCUPANCY_TGR7     0x80
+
+EVENT_AG1_BL_CRD_OCCUPANCY_EXT                             0x8F CBOX
+UMASK_AG1_BL_CRD_OCCUPANCY_EXT_TGR8                        0x01
+UMASK_AG1_BL_CRD_OCCUPANCY_EXT_ANY_OF_TGR0_THRU_TGR7       0x02
+UMASK_AG1_BL_CRD_OCCUPANCY_EXT_ANY                         0x03
+
+EVENT_AG0_STALL_NO_CRD_EGRESS_HORZ_AD      0xD0 CBOX
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_AD_TGR0 0x01
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_AD_TGR1 0x02
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_AD_TGR2 0x04
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_AD_TGR3 0x08
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_AD_TGR4 0x10
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_AD_TGR5 0x20
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_AD_TGR6 0x40
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_AD_TGR7 0x80
+
+EVENT_AG0_STALL_NO_CRD_EGRESS_HORZ_AD_EXT                             0xD1 CBOX
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_AD_EXT_TGR8                        0x01
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_AD_EXT_ANY_OF_TGR0_THRU_TGR7       0x02
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_AD_EXT_ANY                         0x03
+
+EVENT_AG1_STALL_NO_CRD_EGRESS_HORZ_AD      0xD2 CBOX
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_AD_TGR0 0x01
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_AD_TGR1 0x02
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_AD_TGR2 0x04
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_AD_TGR3 0x08
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_AD_TGR4 0x10
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_AD_TGR5 0x20
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_AD_TGR6 0x40
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_AD_TGR7 0x80
+
+EVENT_AG1_STALL_NO_CRD_EGRESS_HORZ_AD_EXT                             0xD3 CBOX
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_AD_EXT_TGR8                        0x01
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_AD_EXT_ANY_OF_TGR0_THRU_TGR7       0x02
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_AD_EXT_ANY                         0x03
+
+EVENT_AG0_STALL_NO_CRD_EGRESS_HORZ_BL      0xD4 CBOX
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_BL_TGR0 0x01
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_BL_TGR1 0x02
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_BL_TGR2 0x04
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_BL_TGR3 0x08
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_BL_TGR4 0x10
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_BL_TGR5 0x20
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_BL_TGR6 0x40
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_BL_TGR7 0x80
+
+EVENT_AG0_STALL_NO_CRD_EGRESS_HORZ_BL_EXT                             0xD5 CBOX
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_BL_EXT_TGR8                        0x01
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_BL_EXT_ANY_OF_TGR0_THRU_TGR7       0x02
+UMASK_AG0_STALL_NO_CRD_EGRESS_HORZ_BL_EXT_ANY                         0x03
+
+EVENT_AG1_STALL_NO_CRD_EGRESS_HORZ_BL      0xD6 CBOX
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_BL_TGR0 0x01
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_BL_TGR1 0x02
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_BL_TGR2 0x04
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_BL_TGR3 0x08
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_BL_TGR4 0x10
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_BL_TGR5 0x20
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_BL_TGR6 0x40
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_BL_TGR7 0x80
+
+EVENT_AG1_STALL_NO_CRD_EGRESS_HORZ_BL_EXT                             0xD7 CBOX
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_BL_EXT_TGR8                        0x01
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_BL_EXT_ANY_OF_TGR0_THRU_TGR7       0x02
+UMASK_AG1_STALL_NO_CRD_EGRESS_HORZ_BL_EXT_ANY                         0x03
+
+EVENT_EGRESS_VERT_OCCUPANCY                 0x90 CBOX
+UMASK_EGRESS_VERT_OCCUPANCY_AD_AG0          0x01
+UMASK_EGRESS_VERT_OCCUPANCY_AK_AG0          0x02
+UMASK_EGRESS_VERT_OCCUPANCY_BL_AG0          0x04
+UMASK_EGRESS_VERT_OCCUPANCY_IV_AG0          0x08
+UMASK_EGRESS_VERT_OCCUPANCY_AD_AG1          0x10
+UMASK_EGRESS_VERT_OCCUPANCY_AK_AG1          0x20
+UMASK_EGRESS_VERT_OCCUPANCY_BL_AG1          0x40
+UMASK_EGRESS_VERT_OCCUPANCY_AD              0x11
+UMASK_EGRESS_VERT_OCCUPANCY_AK              0x22
+UMASK_EGRESS_VERT_OCCUPANCY_BL              0x44
+
+EVENT_EGRESS_VERT_INSERTS                 0x91 CBOX
+UMASK_EGRESS_VERT_INSERTS_AD_AG0          0x01
+UMASK_EGRESS_VERT_INSERTS_AK_AG0          0x02
+UMASK_EGRESS_VERT_INSERTS_BL_AG0          0x04
+UMASK_EGRESS_VERT_INSERTS_IV_AG0          0x08
+UMASK_EGRESS_VERT_INSERTS_AD_AG1          0x10
+UMASK_EGRESS_VERT_INSERTS_AK_AG1          0x20
+UMASK_EGRESS_VERT_INSERTS_BL_AG1          0x40
+UMASK_EGRESS_VERT_INSERTS_AD              0x11
+UMASK_EGRESS_VERT_INSERTS_AK              0x22
+UMASK_EGRESS_VERT_INSERTS_BL              0x44
+
+EVENT_EGRESS_VERT_CYCLES_FULL                 0x92 CBOX
+UMASK_EGRESS_VERT_CYCLES_FULL_AD_AG0          0x01
+UMASK_EGRESS_VERT_CYCLES_FULL_AK_AG0          0x02
+UMASK_EGRESS_VERT_CYCLES_FULL_BL_AG0          0x04
+UMASK_EGRESS_VERT_CYCLES_FULL_IV_AG0          0x08
+UMASK_EGRESS_VERT_CYCLES_FULL_AD_AG1          0x10
+UMASK_EGRESS_VERT_CYCLES_FULL_AK_AG1          0x20
+UMASK_EGRESS_VERT_CYCLES_FULL_BL_AG1          0x40
+UMASK_EGRESS_VERT_CYCLES_FULL_AD              0x11
+UMASK_EGRESS_VERT_CYCLES_FULL_AK              0x22
+UMASK_EGRESS_VERT_CYCLES_FULL_BL              0x44
+
+EVENT_EGRESS_VERT_CYCLES_NE                 0x93 CBOX
+UMASK_EGRESS_VERT_CYCLES_NE_AD_AG0          0x01
+UMASK_EGRESS_VERT_CYCLES_NE_AK_AG0          0x02
+UMASK_EGRESS_VERT_CYCLES_NE_BL_AG0          0x04
+UMASK_EGRESS_VERT_CYCLES_NE_IV_AG0          0x08
+UMASK_EGRESS_VERT_CYCLES_NE_AD_AG1          0x10
+UMASK_EGRESS_VERT_CYCLES_NE_AK_AG1          0x20
+UMASK_EGRESS_VERT_CYCLES_NE_BL_AG1          0x40
+UMASK_EGRESS_VERT_CYCLES_NE_AD              0x11
+UMASK_EGRESS_VERT_CYCLES_NE_AK              0x22
+UMASK_EGRESS_VERT_CYCLES_NE_BL              0x44
+
+EVENT_EGRESS_VERT_NACK                 0x98 CBOX
+UMASK_EGRESS_VERT_NACK_AD_AG0          0x01
+UMASK_EGRESS_VERT_NACK_AK_AG0          0x02
+UMASK_EGRESS_VERT_NACK_BL_AG0          0x04
+UMASK_EGRESS_VERT_NACK_IV_AG0          0x08
+UMASK_EGRESS_VERT_NACK_AD_AG1          0x10
+UMASK_EGRESS_VERT_NACK_AK_AG1          0x20
+UMASK_EGRESS_VERT_NACK_BL_AG1          0x40
+UMASK_EGRESS_VERT_NACK_AD              0x11
+UMASK_EGRESS_VERT_NACK_AK              0x22
+UMASK_EGRESS_VERT_NACK_BL              0x44
+
+EVENT_EGRESS_VERT_STARVED                 0x9A CBOX
+UMASK_EGRESS_VERT_STARVED_AD_AG0          0x01
+UMASK_EGRESS_VERT_STARVED_AK_AG0          0x02
+UMASK_EGRESS_VERT_STARVED_BL_AG0          0x04
+UMASK_EGRESS_VERT_STARVED_IV_AG0          0x08
+UMASK_EGRESS_VERT_STARVED_AD_AG1          0x10
+UMASK_EGRESS_VERT_STARVED_AK_AG1          0x20
+UMASK_EGRESS_VERT_STARVED_BL_AG1          0x40
+UMASK_EGRESS_VERT_STARVED_AD              0x11
+UMASK_EGRESS_VERT_STARVED_AK              0x22
+UMASK_EGRESS_VERT_STARVED_BL              0x44
+
+EVENT_EGRESS_VERT_ADS_USED                 0x9C CBOX
+UMASK_EGRESS_VERT_ADS_USED_AD_AG0          0x01
+UMASK_EGRESS_VERT_ADS_USED_AK_AG0          0x02
+UMASK_EGRESS_VERT_ADS_USED_BL_AG0          0x04
+UMASK_EGRESS_VERT_ADS_USED_AD_AG1          0x10
+UMASK_EGRESS_VERT_ADS_USED_AK_AG1          0x20
+UMASK_EGRESS_VERT_ADS_USED_BL_AG1          0x40
+UMASK_EGRESS_VERT_ADS_USED_AD              0x11
+UMASK_EGRESS_VERT_ADS_USED_AK              0x22
+UMASK_EGRESS_VERT_ADS_USED_BL              0x44
+
+EVENT_EGRESS_VERT_BYPASS                 0x9E CBOX
+UMASK_EGRESS_VERT_BYPASS_AD_AG0          0x01
+UMASK_EGRESS_VERT_BYPASS_AK_AG0          0x02
+UMASK_EGRESS_VERT_BYPASS_BL_AG0          0x04
+UMASK_EGRESS_VERT_BYPASS_IV_AG0          0x08
+UMASK_EGRESS_VERT_BYPASS_AD_AG1          0x10
+UMASK_EGRESS_VERT_BYPASS_AK_AG1          0x20
+UMASK_EGRESS_VERT_BYPASS_BL_AG1          0x40
+UMASK_EGRESS_VERT_BYPASS_AD              0x11
+UMASK_EGRESS_VERT_BYPASS_AK              0x22
+UMASK_EGRESS_VERT_BYPASS_BL              0x44
+
+EVENT_EGRESS_HORZ_OCCUPANCY            0x94 CBOX
+UMASK_EGRESS_HORZ_OCCUPANCY_AD         0x01
+UMASK_EGRESS_HORZ_OCCUPANCY_AK         0x02
+UMASK_EGRESS_HORZ_OCCUPANCY_BL         0x04
+UMASK_EGRESS_HORZ_OCCUPANCY_IV         0x08
+
+EVENT_EGRESS_HORZ_INSERTS            0x95 CBOX
+UMASK_EGRESS_HORZ_INSERTS_AD         0x01
+UMASK_EGRESS_HORZ_INSERTS_AK         0x02
+UMASK_EGRESS_HORZ_INSERTS_BL         0x04
+UMASK_EGRESS_HORZ_INSERTS_IV         0x08
+
+EVENT_EGRESS_HORZ_CYCLES_FULL            0x96 CBOX
+UMASK_EGRESS_HORZ_CYCLES_FULL_AD         0x01
+UMASK_EGRESS_HORZ_CYCLES_FULL_AK         0x02
+UMASK_EGRESS_HORZ_CYCLES_FULL_BL         0x04
+UMASK_EGRESS_HORZ_CYCLES_FULL_IV         0x08
+
+EVENT_EGRESS_HORZ_CYCLES_NE            0x97 CBOX
+UMASK_EGRESS_HORZ_CYCLES_NE_AD         0x01
+UMASK_EGRESS_HORZ_CYCLES_NE_AK         0x02
+UMASK_EGRESS_HORZ_CYCLES_NE_BL         0x04
+UMASK_EGRESS_HORZ_CYCLES_NE_IV         0x08
+
+EVENT_EGRESS_HORZ_NACK            0x99 CBOX
+UMASK_EGRESS_HORZ_NACK_AD         0x01
+UMASK_EGRESS_HORZ_NACK_AK         0x02
+UMASK_EGRESS_HORZ_NACK_BL         0x04
+UMASK_EGRESS_HORZ_NACK_IV         0x08
+
+EVENT_EGRESS_HORZ_STARVED            0x9B CBOX
+UMASK_EGRESS_HORZ_STARVED_AD         0x01
+UMASK_EGRESS_HORZ_STARVED_AK         0x02
+UMASK_EGRESS_HORZ_STARVED_BL         0x04
+UMASK_EGRESS_HORZ_STARVED_IV         0x08
+
+EVENT_EGRESS_HORZ_ADS_USED            0x9D CBOX
+UMASK_EGRESS_HORZ_ADS_USED_AD         0x01
+UMASK_EGRESS_HORZ_ADS_USED_AK         0x02
+UMASK_EGRESS_HORZ_ADS_USED_BL         0x04
+
+EVENT_EGRESS_HORZ_BYPASS            0x9F CBOX
+UMASK_EGRESS_HORZ_BYPASS_AD         0x01
+UMASK_EGRESS_HORZ_BYPASS_AK         0x02
+UMASK_EGRESS_HORZ_BYPASS_BL         0x04
+UMASK_EGRESS_HORZ_BYPASS_IV         0x08
+
+EVENT_RING_BOUNCES_VERT             0xA0 CBOX
+UMASK_RING_BOUNCES_VERT_AD          0x01
+UMASK_RING_BOUNCES_VERT_AK          0x02
+UMASK_RING_BOUNCES_VERT_BL          0x04
+UMASK_RING_BOUNCES_VERT_IV          0x08
+
+EVENT_RING_BOUNCES_HORZ             0xA1 CBOX
+UMASK_RING_BOUNCES_HORZ_AD          0x01
+UMASK_RING_BOUNCES_HORZ_AK          0x02
+UMASK_RING_BOUNCES_HORZ_BL          0x04
+UMASK_RING_BOUNCES_HORZ_IV          0x08
+
+EVENT_RING_SINK_STARVED_VERT             0xA2 CBOX
+UMASK_RING_SINK_STARVED_VERT_AD          0x01
+UMASK_RING_SINK_STARVED_VERT_AK          0x02
+UMASK_RING_SINK_STARVED_VERT_BL          0x04
+UMASK_RING_SINK_STARVED_VERT_IV          0x08
+
+EVENT_RING_SINK_STARVED_HORZ             0xA3 CBOX
+UMASK_RING_SINK_STARVED_HORZ_AD          0x01
+UMASK_RING_SINK_STARVED_HORZ_AK          0x02
+UMASK_RING_SINK_STARVED_HORZ_BL          0x04
+UMASK_RING_SINK_STARVED_HORZ_IV          0x08
+
+EVENT_RING_SRC_THRTL                0xA4 CBOX
+UMASK_RING_SRC_THRTL                0x00
+
+EVENT_FAST_ASSERTED                 0xA5 CBOX
+UMASK_FAST_ASSERTED_VERT            0x01
+UMASK_FAST_ASSERTED_HORZ            0x02
+
+EVENT_VERT_RING_AD_IN_USE           0xA6 CBOX
+UMASK_VERT_RING_AD_IN_USE_UP_EVEN   0x01
+UMASK_VERT_RING_AD_IN_USE_UP_ODD    0x02
+UMASK_VERT_RING_AD_IN_USE_UP        0x03
+UMASK_VERT_RING_AD_IN_USE_DN_EVEN   0x04
+UMASK_VERT_RING_AD_IN_USE_DN_ODD    0x08
+UMASK_VERT_RING_AD_IN_USE_DN        0x0C
+
+EVENT_HORZ_RING_AD_IN_USE               0xA7 CBOX
+UMASK_HORZ_RING_AD_IN_USE_LEFT_EVEN     0x01
+UMASK_HORZ_RING_AD_IN_USE_LEFT_ODD      0x02
+UMASK_HORZ_RING_AD_IN_USE_LEFT          0x03
+UMASK_HORZ_RING_AD_IN_USE_RIGHT_EVEN    0x04
+UMASK_HORZ_RING_AD_IN_USE_RIGHT_ODD     0x08
+UMASK_HORZ_RING_AD_IN_USE_RIGHT         0x0C
+
+EVENT_VERT_RING_AK_IN_USE           0xA8 CBOX
+UMASK_VERT_RING_AK_IN_USE_UP_EVEN   0x01
+UMASK_VERT_RING_AK_IN_USE_UP_ODD    0x02
+UMASK_VERT_RING_AK_IN_USE_UP        0x03
+UMASK_VERT_RING_AK_IN_USE_DN_EVEN   0x04
+UMASK_VERT_RING_AK_IN_USE_DN_ODD    0x08
+UMASK_VERT_RING_AK_IN_USE_DN        0x0C
+
+EVENT_HORZ_RING_AK_IN_USE               0xA9 CBOX
+UMASK_HORZ_RING_AK_IN_USE_LEFT_EVEN     0x01
+UMASK_HORZ_RING_AK_IN_USE_LEFT_ODD      0x02
+UMASK_HORZ_RING_AK_IN_USE_LEFT          0x03
+UMASK_HORZ_RING_AK_IN_USE_RIGHT_EVEN    0x04
+UMASK_HORZ_RING_AK_IN_USE_RIGHT_ODD     0x08
+UMASK_HORZ_RING_AK_IN_USE_RIGHT         0x0C
+
+EVENT_VERT_RING_BL_IN_USE           0xAA CBOX
+UMASK_VERT_RING_BL_IN_USE_UP_EVEN   0x01
+UMASK_VERT_RING_BL_IN_USE_UP_ODD    0x02
+UMASK_VERT_RING_BL_IN_USE_UP        0x03
+UMASK_VERT_RING_BL_IN_USE_DN_EVEN   0x04
+UMASK_VERT_RING_BL_IN_USE_DN_ODD    0x08
+UMASK_VERT_RING_BL_IN_USE_DN        0x0C
+
+EVENT_HORZ_RING_BL_IN_USE               0xAB CBOX
+UMASK_HORZ_RING_BL_IN_USE_LEFT_EVEN     0x01
+UMASK_HORZ_RING_BL_IN_USE_LEFT_ODD      0x02
+UMASK_HORZ_RING_BL_IN_USE_LEFT          0x03
+UMASK_HORZ_RING_BL_IN_USE_RIGHT_EVEN    0x04
+UMASK_HORZ_RING_BL_IN_USE_RIGHT_ODD     0x08
+UMASK_HORZ_RING_BL_IN_USE_RIGHT         0x0C
+
+EVENT_VERT_RING_IV_IN_USE           0xAC CBOX
+UMASK_VERT_RING_IV_IN_USE_UP_EVEN   0x01
+UMASK_VERT_RING_IV_IN_USE_UP_ODD    0x02
+UMASK_VERT_RING_IV_IN_USE_UP        0x03
+UMASK_VERT_RING_IV_IN_USE_DN_EVEN   0x04
+UMASK_VERT_RING_IV_IN_USE_DN_ODD    0x08
+UMASK_VERT_RING_IV_IN_USE_DN        0x0C
+
+EVENT_HORZ_RING_IV_IN_USE               0xAD CBOX
+UMASK_HORZ_RING_IV_IN_USE_LEFT_EVEN     0x01
+UMASK_HORZ_RING_IV_IN_USE_LEFT_ODD      0x02
+UMASK_HORZ_RING_IV_IN_USE_LEFT          0x03
+UMASK_HORZ_RING_IV_IN_USE_RIGHT_EVEN    0x04
+UMASK_HORZ_RING_IV_IN_USE_RIGHT_ODD     0x08
+UMASK_HORZ_RING_IV_IN_USE_RIGHT         0x0C
+
+EVENT_EGRESS_ORDERING               0xAE CBOX
+UMASK_EGRESS_ORDERING_IV_SNP_GO_UP  0x01
+UMASK_EGRESS_ORDERING_IV_SNP_GO_DN  0x04
+
+EVENT_TG_INGRESS_OCCUPANCY             0xB0 CBOX
+UMASK_TG_INGRESS_OCCUPANCY_AD_BNC      0x01
+UMASK_TG_INGRESS_OCCUPANCY_AK_BNC      0x02
+UMASK_TG_INGRESS_OCCUPANCY_BL_BNC      0x04
+UMASK_TG_INGRESS_OCCUPANCY_IV_BNC      0x08
+UMASK_TG_INGRESS_OCCUPANCY_AD_CRD      0x10
+UMASK_TG_INGRESS_OCCUPANCY_BL_CRD      0x40
+
+EVENT_TG_INGRESS_INSERTS             0xB1 CBOX
+UMASK_TG_INGRESS_INSERTS_AD_BNC      0x01
+UMASK_TG_INGRESS_INSERTS_AK_BNC      0x02
+UMASK_TG_INGRESS_INSERTS_BL_BNC      0x04
+UMASK_TG_INGRESS_INSERTS_IV_BNC      0x08
+UMASK_TG_INGRESS_INSERTS_AD_CRD      0x10
+UMASK_TG_INGRESS_INSERTS_BL_CRD      0x40
+
+EVENT_TG_INGRESS_BYPASS             0xB2 CBOX
+UMASK_TG_INGRESS_BYPASS_AD_BNC      0x01
+UMASK_TG_INGRESS_BYPASS_AK_BNC      0x02
+UMASK_TG_INGRESS_BYPASS_BL_BNC      0x04
+UMASK_TG_INGRESS_BYPASS_IV_BNC      0x08
+UMASK_TG_INGRESS_BYPASS_AD_CRD      0x10
+UMASK_TG_INGRESS_BYPASS_BL_CRD      0x40
+
+EVENT_TG_INGRESS_CRD_STARVED             0xB3 CBOX
+UMASK_TG_INGRESS_CRD_STARVED_AD_BNC      0x01
+UMASK_TG_INGRESS_CRD_STARVED_AK_BNC      0x02
+UMASK_TG_INGRESS_CRD_STARVED_BL_BNC      0x04
+UMASK_TG_INGRESS_CRD_STARVED_IV_BNC      0x08
+UMASK_TG_INGRESS_CRD_STARVED_AD_CRD      0x10
+UMASK_TG_INGRESS_CRD_STARVED_BL_CRD      0x40
+UMASK_TG_INGRESS_CRD_STARVED_IFV         0x80
+
+EVENT_TG_INGRESS_BUSY_STARVED             0xB4 CBOX
+UMASK_TG_INGRESS_BUSY_STARVED_AD_BNC      0x01
+UMASK_TG_INGRESS_BUSY_STARVED_BL_BNC      0x04
+UMASK_TG_INGRESS_BUSY_STARVED_AD_CRD      0x10
+UMASK_TG_INGRESS_BUSY_STARVED_BL_CRD      0x40
+
+EVENT_RXR_CYCLES_NE                 0x10 PBOX
+UMASK_RXR_CYCLES_NE_CBO_IDI         0x01
+UMASK_RXR_CYCLES_NE_CBO_NCB         0x02
+UMASK_RXR_CYCLES_NE_CBO_NCS         0x04
+UMASK_RXR_CYCLES_NE_ALL             0x80
+
+EVENT_TXC_CYCLES_NE         0x23 PBOX0|PBOX1
+UMASK_TXC_CYCLES_NE_AD_0    0x01
+UMASK_TXC_CYCLES_NE_AK_0    0x02
+UMASK_TXC_CYCLES_NE_BL_0    0x04
+UMASK_TXC_CYCLES_NE_AD_1    0x08
+UMASK_TXC_CYCLES_NE_AK_1    0x10
+UMASK_TXC_CYCLES_NE_BL_1    0x20
+
+EVENT_TXC_INSERTS           0x24 PBOX
+UMASK_TXC_INSERTS_AD_0      0x01
+UMASK_TXC_INSERTS_AK_0      0x02
+UMASK_TXC_INSERTS_BL_0      0x04
+UMASK_TXC_INSERTS_AK_CRD_0  0x08
+UMASK_TXC_INSERTS_AD_1      0x10
+UMASK_TXC_INSERTS_AK_1      0x20
+UMASK_TXC_INSERTS_BL_1      0x40
+UMASK_TXC_INSERTS_AK_CRD_1  0x80
+
+EVENT_TXC_CYCLES_FULL         0x25 PBOX
+UMASK_TXC_CYCLES_FULL_AD_0    0x01
+UMASK_TXC_CYCLES_FULL_AK_0    0x02
+UMASK_TXC_CYCLES_FULL_BL_0    0x04
+UMASK_TXC_CYCLES_FULL_AD_1    0x08
+UMASK_TXC_CYCLES_FULL_AK_1    0x10
+UMASK_TXC_CYCLES_FULL_BL_1    0x20
+
+EVENT_IO_CLKS_COUNT_ESEL        0x00 IBOX
+UMASK_IO_CLKS_COUNT_ESEL        0x00
+
+EVENT_BL_INGRESS_ALLOCATIONS_DRS_ESEL        0x01 IBOX
+UMASK_BL_INGRESS_ALLOCATIONS_DRS_ESEL        0x00
+
+EVENT_BL_INGRESS_ALLOCATIONS_NCB_ESEL        0x02 IBOX
+UMASK_BL_INGRESS_ALLOCATIONS_NCB_ESEL        0x00
+
+EVENT_BL_INGRESS_ALLOCATIONS_NCS_ESEL        0x03 IBOX
+UMASK_BL_INGRESS_ALLOCATIONS_NCS_ESEL        0x00
+
+EVENT_BL_INGRESS_FULL_DRS_ESEL          0x04 IBOX
+UMASK_BL_INGRESS_FULL_DRS_ESEL          0x00
+
+EVENT_BL_INGRESS_FULL_NCB_ESEL          0x05 IBOX
+UMASK_BL_INGRESS_FULL_NCB_ESEL          0x00
+
+EVENT_BL_INGRESS_FULL_NCS_ESEL          0x06 IBOX
+UMASK_BL_INGRESS_FULL_NCS_ESEL          0x00
+
+EVENT_BL_INGRESS_OCCUPANCY_DRS_ESEL     0x07 IBOX
+UMASK_BL_INGRESS_OCCUPANCY_DRS_ESEL     0x00
+
+EVENT_BL_INGRESS_OCCUPANCY_NCB_ESEL     0x08 IBOX
+UMASK_BL_INGRESS_OCCUPANCY_NCB_ESEL     0x00
+
+EVENT_BL_INGRESS_OCCUPANCY_NCS_ESEL     0x09 IBOX
+UMASK_BL_INGRESS_OCCUPANCY_NCS_ESEL     0x00
+
+EVENT_AK_INGRESS_ALLOCATIONS_ESEL       0x0A IBOX
+UMASK_AK_INGRESS_ALLOCATIONS_ESEL       0x00
+
+EVENT_OUTBOUND_REQUESTS_REQUEST_Q_OCCUPANCY_ESEL    0x0D IBOX
+UMASK_OUTBOUND_REQUESTS_REQUEST_Q_OCCUPANCY_ESEL    0x00
+
+EVENT_OUTBOUND_REQUESTS_REQUEST_Q_ALLOCATIONS_NCB_ESEL 0x0E IBOX
+UMASK_OUTBOUND_REQUESTS_REQUEST_Q_ALLOCATIONS_NCB_ESEL 0x00
+
+EVENT_OUTBOUND_REQUESTS_REQUEST_Q_ALLOCATIONS_NCS_ESEL  0x0F IBOX
+UMASK_OUTBOUND_REQUESTS_REQUEST_Q_ALLOCATIONS_NCS_ESEL  0x00
+
+EVENT_WRITE_CACHE_TOTAL_ESEL         0x12 IBOX
+UMASK_WRITE_CACHE_TOTAL_ESEL_ANY     0x01
+UMASK_WRITE_CACHE_TOTAL_ESEL_IV_Q    0x02
+
+EVENT_COHERENT_OP_ESEL              0x13 IBOX
+UMASK_COHERENT_OP_ESEL_RDCUR        0x01
+UMASK_COHERENT_OP_ESEL_RFO          0x08
+UMASK_COHERENT_OP_ESEL_I2M          0x10
+UMASK_COHERENT_OP_ESEL_WBMTOI       0x40
+UMASK_COHERENT_OP_ESEL_CLFLUSH      0x80
+
+EVENT_TRANSACTIONS_COUNT_ESEL                   0x16 IBOX
+UMASK_TRANSACTIONS_COUNT_ESEL_READ_FETCH        0x01
+UMASK_TRANSACTIONS_COUNT_ESEL_WRITE_FETCH       0x02
+UMASK_TRANSACTIONS_COUNT_ESEL_WRITE_PREFETCH    0x08
+UMASK_TRANSACTIONS_COUNT_ESEL_ATOMIC            0x10
+UMASK_TRANSACTIONS_COUNT_ESEL_OTHER             0x20
+UMASK_TRANSACTIONS_COUNT_ESEL_SOURCE            0x40
+
+EVENT_SNOOP_RESPONSES_ESEL              0x17 IBOX
+UMASK_SNOOP_RESPONSES_ESEL_IRPMISS      0x01
+UMASK_SNOOP_RESPONSES_ESEL_IRPHITI      0x02
+UMASK_SNOOP_RESPONSES_ESEL_IRPHITES     0x04
+UMASK_SNOOP_RESPONSES_ESEL_IRPHITM      0x08
+UMASK_SNOOP_RESPONSES_ESEL_SNPCODE      0x10
+UMASK_SNOOP_RESPONSES_ESEL_SNPDATA      0x20
+UMASK_SNOOP_RESPONSES_ESEL_SNPINV       0x40
+
+EVENT_STALL_CYCLES_AD_EGRESS_CREDITS_ESEL 0x18 IBOX
+UMASK_STALL_CYCLES_AD_EGRESS_CREDITS_ESEL 0x00
+
+EVENT_STALL_CYCLES_BL_EGRESS_CREDITS_ESEL 0x19 IBOX
+UMASK_STALL_CYCLES_BL_EGRESS_CREDITS_ESEL 0x00
diff --git a/src/includes/perfmon_nehalem.h b/src/includes/perfmon_nehalem.h
index 772f9e4..9c25137 100644
--- a/src/includes/perfmon_nehalem.h
+++ b/src/includes/perfmon_nehalem.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Header File of perfmon module for Intel Nehalem.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -230,12 +230,12 @@ int perfmon_setupCounterThread_nehalem(int thread_id, PerfmonEventSet* eventSet)
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, 0x0ULL));
     }
-    if (haveLock && (eventSet->regTypeMask & ~(0xF)))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, 0x0ULL));
     }
@@ -243,7 +243,7 @@ int perfmon_setupCounterThread_nehalem(int thread_id, PerfmonEventSet* eventSet)
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -303,7 +303,7 @@ int perfmon_startCountersThread_nehalem(int thread_id, PerfmonEventSet* eventSet
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -341,13 +341,13 @@ int perfmon_startCountersThread_nehalem(int thread_id, PerfmonEventSet* eventSet
         }
     }
 
-    if (haveLock && (uflags != 0x0ULL) && (eventSet->regTypeMask & ~(0xF)))
+    if (haveLock && (uflags != 0x0ULL) && MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_UNCORE_PERF_GLOBAL_CTRL, LLU_CAST uflags, UNFREEZE_UNCORE);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, uflags));
     }
 
-    if ((flags != 0x0ULL) && (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED))))
+    if ((flags != 0x0ULL) && (MEASURE_CORE(eventSet)))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, UNFREEZE_PMC_AND_FIXED);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, flags));
@@ -391,13 +391,13 @@ int perfmon_stopCountersThread_nehalem(int thread_id, PerfmonEventSet* eventSet)
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_AND_FIXED);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
     }
 
-    if (haveLock && (eventSet->regTypeMask & ~(0xF)))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_UNCORE_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_UNCORE);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, 0x0ULL));
@@ -408,7 +408,7 @@ int perfmon_stopCountersThread_nehalem(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -464,14 +464,14 @@ int perfmon_readCountersThread_nehalem(int thread_id, PerfmonEventSet* eventSet)
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, &pmc_flags));
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_AND_FIXED);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
     }
 
-    if (haveLock && (eventSet->regTypeMask & ~(0xF)))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, &uncore_flags));
         VERBOSEPRINTREG(cpu_id, MSR_UNCORE_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_UNCORE);
@@ -483,7 +483,7 @@ int perfmon_readCountersThread_nehalem(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -524,12 +524,12 @@ int perfmon_readCountersThread_nehalem(int thread_id, PerfmonEventSet* eventSet)
         }
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, pmc_flags, UNFREEZE_PMC_AND_FIXED);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, pmc_flags));
     }
-    if (haveLock && (eventSet->regTypeMask & ~(0xF)))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_UNCORE_PERF_GLOBAL_CTRL, uncore_flags, UNFREEZE_UNCORE);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, uncore_flags));
@@ -556,7 +556,7 @@ int perfmon_finalizeCountersThread_nehalem(int thread_id, PerfmonEventSet* event
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -606,7 +606,7 @@ int perfmon_finalizeCountersThread_nehalem(int thread_id, PerfmonEventSet* event
         eventSet->events[i].threadCounter[thread_id].init = FALSE;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core, CLEAR_OVF_CTRL);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core));
@@ -614,7 +614,7 @@ int perfmon_finalizeCountersThread_nehalem(int thread_id, PerfmonEventSet* event
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
     }
 
-    if (haveLock && eventSet->regTypeMask & ~(0xFULL))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_UNCORE_PERF_GLOBAL_OVF_CTRL, 0x0ULL, CLEAR_UNCORE_OVF);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_OVF_CTRL, 0x0ULL));
@@ -622,7 +622,7 @@ int perfmon_finalizeCountersThread_nehalem(int thread_id, PerfmonEventSet* event
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, 0x0ULL));
         for (int i=UNCORE;i<NUM_UNITS;i++)
         {
-            if ((eventSet->regTypeMask & (REG_TYPE_MASK(i))) && box_map[i].ctrlRegister != 0x0)
+            if (TESTTYPE(eventSet, i) && box_map[i].ctrlRegister != 0x0)
             {
                 VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL, CLEAR_UNCORE_BOX_CTRL);
                 HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
diff --git a/src/includes/perfmon_nehalemEX.h b/src/includes/perfmon_nehalemEX.h
index f50c8ec..7c37fbb 100644
--- a/src/includes/perfmon_nehalemEX.h
+++ b/src/includes/perfmon_nehalemEX.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Header File of perfmon module for Intel Nehalem EX.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -709,7 +709,7 @@ int nex_sbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
 }
 
 #define NEX_FREEZE_UNCORE \
-    if (haveLock && (eventSet->regTypeMask & ~(0xF))) \
+    if (haveLock && MEASURE_UNCORE(eventSet)) \
     { \
         uint64_t tmp = 0x0ULL; \
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_U_PMON_GLOBAL_CTRL, &tmp)); \
@@ -736,18 +736,18 @@ int perfmon_setupCounterThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
     {
         haveTileLock = 1;
     }
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_AND_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
     }
 
-    if (haveLock && (eventSet->regTypeMask & ~(0xFULL)))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_U_PMON_GLOBAL_CTRL, 0x0ULL, FREEZE_UNCORE)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_U_PMON_GLOBAL_CTRL, 0x0ULL));
     }
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(MBOX0))))
+    if (haveLock && TESTTYPE(eventSet, MBOX0))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M0_PMON_TIMESTAMP, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M0_PMON_DSP, 0x0ULL));
@@ -758,7 +758,7 @@ int perfmon_setupCounterThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M0_PMON_PLD, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M0_PMON_ZDP, 0x0ULL));
     }
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(MBOX1))))
+    if (haveLock && TESTTYPE(eventSet, MBOX1))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M1_PMON_TIMESTAMP, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M1_PMON_DSP, 0x0ULL));
@@ -769,7 +769,7 @@ int perfmon_setupCounterThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M1_PMON_PLD, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M1_PMON_ZDP, 0x0ULL));
     }
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(RBOX0))))
+    if (haveLock && TESTTYPE(eventSet, RBOX0))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_R0_PMON_IPERF0_P0, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_R0_PMON_IPERF0_P1, 0x0ULL));
@@ -784,7 +784,7 @@ int perfmon_setupCounterThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_R0_PMON_QLX_P2, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_R0_PMON_QLX_P3, 0x0ULL));
     }
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(RBOX1))))
+    if (haveLock && TESTTYPE(eventSet, RBOX1))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_R1_PMON_IPERF0_P0, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_R1_PMON_IPERF0_P1, 0x0ULL));
@@ -803,7 +803,7 @@ int perfmon_setupCounterThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -860,17 +860,18 @@ int perfmon_setupCounterThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
                 break;
 
             case WBOX0FIX:
-                if (haveLock && eventSet->regTypeMask & (REG_TYPE_MASK(WBOX0FIX)))
+                if (haveLock && TESTTYPE(eventSet, WBOX0FIX))
                 {
                     flags = 0x1ULL;
+                    RegisterType newtype = WBOX;
                     CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, reg , flags));
                     VERBOSEPRINTREG(cpu_id, reg, flags, SETUP_WBOXFIX)
-                    eventSet->regTypeMask |= REG_TYPE_MASK(WBOX);
+                    SETTYPE(eventSet, newtype);
                 }
                 break;
 
             case UBOX:
-                if (haveLock && eventSet->regTypeMask & (REG_TYPE_MASK(UBOX)))
+                if (haveLock && TESTTYPE(eventSet, UBOX))
                 {
                     flags |= (1ULL<<22); /* set enable bit */
                     flags |= event->eventId;
@@ -907,7 +908,7 @@ int perfmon_setupCounterThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
 }
 
 #define NEX_RESET_ALL_UNCORE_COUNTERS \
-    if (haveLock && (eventSet->regTypeMask & ~(0xF))) \
+    if (haveLock && MEASURE_UNCORE(eventSet)) \
     { \
         uint64_t tmp = 0x0ULL; \
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_U_PMON_GLOBAL_CTRL, &tmp)); \
@@ -918,7 +919,7 @@ int perfmon_setupCounterThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
     }
 
 #define NEX_UNFREEZE_UNCORE \
-    if (haveLock && (eventSet->regTypeMask & ~(0xF))) \
+    if (haveLock && MEASURE_UNCORE(eventSet)) \
     { \
         uint64_t tmp = 0x0ULL; \
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_U_PMON_GLOBAL_CTRL, &tmp)); \
@@ -928,7 +929,7 @@ int perfmon_setupCounterThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
     }
 
 #define NEX_UNFREEZE_BOX(id, flags) \
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(id)))) \
+    if (haveLock && TESTTYPE(eventSet, id)) \
     { \
         VERBOSEPRINTREG(cpu_id, box_map[id].ctrlRegister, LLU_CAST flags, UNFREEZE_BOX); \
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, box_map[id].ctrlRegister, flags)); \
@@ -954,7 +955,7 @@ int perfmon_startCountersThread_nehalemEX(int thread_id, PerfmonEventSet* eventS
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE) 
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -973,13 +974,13 @@ int perfmon_startCountersThread_nehalemEX(int thread_id, PerfmonEventSet* eventS
                     core_ctrl_flags |= (1ULL<<(index+32));
                     break;
                 case WBOX0FIX:
-                    if (haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(WBOX0FIX)))
+                    if (haveLock && TESTTYPE(eventSet, WBOX0FIX))
                     {
                         uflags[WBOX] |= (1ULL<<31);
                     }
                     break;
                 default:
-                    if (haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(counter_map[index].type)))
+                    if (haveLock && TESTTYPE(eventSet, counter_map[index].type))
                     {
                         uflags[counter_map[index].type] |= (1<<getCounterTypeOffset(index));
                     }
@@ -1002,7 +1003,7 @@ int perfmon_startCountersThread_nehalemEX(int thread_id, PerfmonEventSet* eventS
     NEX_UNFREEZE_UNCORE;
 
     /* Finally enable counters */
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST core_ctrl_flags, GLOBAL_CTRL);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, core_ctrl_flags));
@@ -1046,7 +1047,7 @@ int perfmon_stopCountersThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST 0x0ULL, FREEZE_PMC_AND_FIXED);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
@@ -1058,7 +1059,7 @@ int perfmon_stopCountersThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1078,7 +1079,7 @@ int perfmon_stopCountersThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
                     VERBOSEPRINTREG(cpu_id, counter_map[index].counterRegister, LLU_CAST counter_result, READ_FIXED);
                     break;
                 default:
-                    if(haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(counter_map[index].type)))
+                    if (haveLock && TESTTYPE(eventSet, counter_map[index].type))
                     {
                         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, counter_map[index].counterRegister, &counter_result));
                         NEX_CHECK_UNCORE_OVERFLOW(counter_map[index].type, getCounterTypeOffset(index));
@@ -1105,7 +1106,7 @@ int perfmon_readCountersThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, &core_ctrl_flags));
     }
@@ -1116,7 +1117,7 @@ int perfmon_readCountersThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1136,7 +1137,7 @@ int perfmon_readCountersThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
                     VERBOSEPRINTREG(cpu_id, counter, LLU_CAST counter_result, READ_FIXED);
                     break;
                 default:
-                    if(haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(counter_map[index].type)))
+                    if (haveLock && TESTTYPE(eventSet, counter_map[index].type))
                     {
                         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, counter, &counter_result));
                         NEX_CHECK_UNCORE_OVERFLOW(counter_map[index].type, getCounterTypeOffset(index));
@@ -1149,7 +1150,7 @@ int perfmon_readCountersThread_nehalemEX(int thread_id, PerfmonEventSet* eventSe
     }
 
     NEX_UNFREEZE_UNCORE;
-    if ((eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED))) && (core_ctrl_flags != 0x0ULL))
+    if ((MEASURE_CORE(eventSet)) && (core_ctrl_flags != 0x0ULL))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, core_ctrl_flags));
     }
@@ -1174,7 +1175,7 @@ int perfmon_finalizeCountersThread_nehalemEX(int thread_id, PerfmonEventSet* eve
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -1265,7 +1266,7 @@ int perfmon_finalizeCountersThread_nehalemEX(int thread_id, PerfmonEventSet* eve
         eventSet->events[i].threadCounter[thread_id].init = FALSE;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core, CLEAR_OVF_CTRL);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core));
@@ -1273,7 +1274,7 @@ int perfmon_finalizeCountersThread_nehalemEX(int thread_id, PerfmonEventSet* eve
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
     }
 
-    if (haveLock && (eventSet->regTypeMask & ~(0xFULL)))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_U_PMON_GLOBAL_OVF_CTRL, 0x0ULL, CLEAR_UNCORE_OVF);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_U_PMON_GLOBAL_OVF_CTRL, 0x0ULL));
@@ -1281,7 +1282,7 @@ int perfmon_finalizeCountersThread_nehalemEX(int thread_id, PerfmonEventSet* eve
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_U_PMON_GLOBAL_CTRL, 0x0ULL));
         for (int i=UNCORE;i<NUM_UNITS;i++)
         {
-            if ((eventSet->regTypeMask & (REG_TYPE_MASK(i))) && box_map[i].ctrlRegister != 0x0)
+            if (TESTTYPE(eventSet, i) && box_map[i].ctrlRegister != 0x0)
             {
                 VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL, CLEAR_UNCORE_BOX_CTRL);
                 HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
diff --git a/src/includes/perfmon_nehalemEX_counters.h b/src/includes/perfmon_nehalemEX_counters.h
index 137c414..c0fefad 100644
--- a/src/includes/perfmon_nehalemEX_counters.h
+++ b/src/includes/perfmon_nehalemEX_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description: Counter Header File of perfmon module for Intel Westmere EX.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_nehalemEX_events.txt b/src/includes/perfmon_nehalemEX_events.txt
index 62a3f02..50446b3 100644
--- a/src/includes/perfmon_nehalemEX_events.txt
+++ b/src/includes/perfmon_nehalemEX_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for Intel Nehalem EX
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_nehalemEX_westmereEX_common.h b/src/includes/perfmon_nehalemEX_westmereEX_common.h
index a2d0ebb..7f8767f 100644
--- a/src/includes/perfmon_nehalemEX_westmereEX_common.h
+++ b/src/includes/perfmon_nehalemEX_westmereEX_common.h
@@ -5,13 +5,13 @@
  *
  *      Description:  Common definitions for Intel Nehalem EX and Westmere EX
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_nehalem_counters.h b/src/includes/perfmon_nehalem_counters.h
index 332b46a..53f20e0 100644
--- a/src/includes/perfmon_nehalem_counters.h
+++ b/src/includes/perfmon_nehalem_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Counter Header File of perfmon module for Intel Nehalem.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_nehalem_events.txt b/src/includes/perfmon_nehalem_events.txt
index 48c9b41..a240ba9 100644
--- a/src/includes/perfmon_nehalem_events.txt
+++ b/src/includes/perfmon_nehalem_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for Intel Nehalem
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_p6_events.txt b/src/includes/perfmon_p6_events.txt
index e8cdda9..74b5fe1 100644
--- a/src/includes/perfmon_p6_events.txt
+++ b/src/includes/perfmon_p6_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for Intel Pentium 3
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_perf.h b/src/includes/perfmon_perf.h
index a21aaad..56ff2c8 100644
--- a/src/includes/perfmon_perf.h
+++ b/src/includes/perfmon_perf.h
@@ -6,14 +6,14 @@
  *      Description: Header file of example perfmon module for software events using
  *                   the perf_event interface
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_perfevent.h b/src/includes/perfmon_perfevent.h
new file mode 100644
index 0000000..868fd25
--- /dev/null
+++ b/src/includes/perfmon_perfevent.h
@@ -0,0 +1,423 @@
+/*
+ * =======================================================================================
+ *
+ *      Filename:  perfmon_perfevent.h
+ *
+ *      Description:  Header File of perfmon module for perf_event kernel interface.
+ *
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
+ *
+ *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
+ *      Project:  likwid
+ *
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ *
+ *      This program is free software: you can redistribute it and/or modify it under
+ *      the terms of the GNU General Public License as published by the Free Software
+ *      Foundation, either version 3 of the License, or (at your option) any later
+ *      version.
+ *
+ *      This program is distributed in the hope that it will be useful, but WITHOUT ANY
+ *      WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ *      PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License along with
+ *      this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * =======================================================================================
+ */
+
+#include <error.h>
+#include <affinity.h>
+#include <limits.h>
+#include <topology.h>
+#include <access.h>
+#include <perfmon.h>
+#include <linux/perf_event.h>
+#include <linux/version.h>
+#include <sys/ioctl.h>
+#include <asm/unistd.h>
+#include <string.h>
+
+
+static int* cpu_event_fds[MAX_NUM_THREADS] = { NULL };
+static int paranoid_level = -1;
+static int informed_paranoid = 0;
+static int running_group = -1;
+
+static char* translate_types[NUM_UNITS] = {
+    [FIXED] = "/sys/bus/event_source/devices/cpu",
+    [PMC] = "/sys/bus/event_source/devices/cpu",
+    [MBOX0] = "/sys/bus/event_source/devices/uncore_imc_0",
+    [MBOX1] = "/sys/bus/event_source/devices/uncore_imc_1",
+    [MBOX2] = "/sys/bus/event_source/devices/uncore_imc_2",
+    [MBOX3] = "/sys/bus/event_source/devices/uncore_imc_3",
+    [CBOX0] = "/sys/bus/event_source/devices/uncore_cbox_0",
+    [CBOX1] = "/sys/bus/event_source/devices/uncore_cbox_1",
+    [CBOX2] = "/sys/bus/event_source/devices/uncore_cbox_2",
+    [CBOX3] = "/sys/bus/event_source/devices/uncore_cbox_3",
+    [CBOX4] = "/sys/bus/event_source/devices/uncore_cbox_4",
+    [CBOX5] = "/sys/bus/event_source/devices/uncore_cbox_5",
+    [CBOX6] = "/sys/bus/event_source/devices/uncore_cbox_6",
+    [CBOX7] = "/sys/bus/event_source/devices/uncore_cbox_7",
+    [CBOX8] = "/sys/bus/event_source/devices/uncore_cbox_8",
+    [CBOX9] = "/sys/bus/event_source/devices/uncore_cbox_9",
+    [CBOX10] = "/sys/bus/event_source/devices/uncore_cbox_10",
+    [CBOX11] = "/sys/bus/event_source/devices/uncore_cbox_11",
+    [CBOX12] = "/sys/bus/event_source/devices/uncore_cbox_12",
+    [CBOX13] = "/sys/bus/event_source/devices/uncore_cbox_13",
+    [CBOX14] = "/sys/bus/event_source/devices/uncore_cbox_14",
+    [CBOX15] = "/sys/bus/event_source/devices/uncore_cbox_15",
+    [CBOX16] = "/sys/bus/event_source/devices/uncore_cbox_16",
+    [CBOX17] = "/sys/bus/event_source/devices/uncore_cbox_17",
+    [CBOX18] = "/sys/bus/event_source/devices/uncore_cbox_18",
+    [CBOX19] = "/sys/bus/event_source/devices/uncore_cbox_19",
+    [CBOX20] = "/sys/bus/event_source/devices/uncore_cbox_20",
+    [CBOX21] = "/sys/bus/event_source/devices/uncore_cbox_21",
+    [CBOX22] = "/sys/bus/event_source/devices/uncore_cbox_22",
+    [CBOX23] = "/sys/bus/event_source/devices/uncore_cbox_23",
+    [BBOX0] = "/sys/bus/event_source/devices/uncore_ha",
+    [WBOX] = "/sys/bus/event_source/devices/uncore_pcu",
+    [SBOX0] = "/sys/bus/event_source/devices/uncore_qpi_0",
+    [SBOX1] = "/sys/bus/event_source/devices/uncore_qpi_1",
+    [PBOX] = "/sys/bus/event_source/devices/uncore_r2pcie",
+    [RBOX0] = "/sys/bus/event_source/devices/uncore_r3qpi_0",
+    [RBOX1] = "/sys/bus/event_source/devices/uncore_r3qpi_1",
+    [UBOX] = "/sys/bus/event_source/devices/uncore_ubox",
+};
+
+
+static long
+perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
+                int cpu, int group_fd, unsigned long flags)
+{
+    int ret;
+
+    ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
+                   group_fd, flags);
+    return ret;
+}
+
+int perfmon_init_perfevent(int cpu_id)
+{
+    size_t read;
+    int paranoid = -1;
+    char buff[100];
+    FILE* fd;
+    if (!informed_paranoid)
+    {
+        fd = fopen("/proc/sys/kernel/perf_event_paranoid", "r");
+        if (fd == NULL)
+        {
+            fprintf(stderr, "ERROR: Linux kernel has no perf_event support\n");
+            fprintf(stderr, "ERROR: Cannot open file /proc/sys/kernel/perf_event_paranoid\n");
+            fclose(fd);
+            exit(EXIT_FAILURE);
+        }
+        read = fread(buff, sizeof(char), 100, fd);
+        if (read > 0)
+        {
+            paranoid_level = atoi(buff);
+        }
+        fclose(fd);
+        if (paranoid_level > 0)
+        {
+            fprintf(stderr, "WARN: Linux kernel configured with paranoid level %d\n", paranoid_level);
+#if defined(__x86_64__) || defined(__i386__)
+            fprintf(stderr, "WARN: Paranoid level 0 is required to measure Uncore counters\n");
+#endif
+        }
+        informed_paranoid = 1;
+    }
+    if (cpu_event_fds[cpu_id] == NULL)
+    {
+        cpu_event_fds[cpu_id] = (int*) malloc(perfmon_numCounters * sizeof(int));
+        if (cpu_event_fds[cpu_id] == NULL)
+        {
+            return -ENOMEM;
+        }
+        memset(cpu_event_fds[cpu_id], -1, perfmon_numCounters * sizeof(int));
+    }
+    return 0;
+}
+
+int perf_fixed_setup(struct perf_event_attr *attr, PerfmonEvent *event)
+{
+    int ret = -1;
+    attr->type = PERF_TYPE_HARDWARE;
+    attr->exclude_kernel = 1;
+    attr->exclude_hv = 1;
+    attr->disabled = 1;
+    attr->inherit = 1;
+    //attr->exclusive = 1;
+    if (strcmp(event->name, "INSTR_RETIRED_ANY") == 0)
+    {
+        attr->config = PERF_COUNT_HW_INSTRUCTIONS;
+        ret = 0;
+    }
+    if (strcmp(event->name, "CPU_CLK_UNHALTED_CORE") == 0)
+    {
+        attr->config = PERF_COUNT_HW_CPU_CYCLES;
+        ret = 0;
+    }
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0)
+    if (strcmp(event->name, "CPU_CLK_UNHALTED_REF") == 0)
+    {
+        attr->config = PERF_COUNT_HW_REF_CPU_CYCLES;
+        ret = 0;
+    }
+#endif
+    
+    return ret;
+}
+
+int perf_pmc_setup(struct perf_event_attr *attr, PerfmonEvent *event)
+{
+    attr->type = PERF_TYPE_RAW;
+    attr->config = (event->umask<<8) + event->eventId;
+    attr->exclude_kernel = 1;
+    attr->exclude_hv = 1;
+    attr->disabled = 1;
+    attr->inherit = 1;
+    //attr->exclusive = 1;
+    if (event->numberOfOptions > 0)
+    {
+        for(int j = 0; j < event->numberOfOptions; j++)
+        {
+            switch (event->options[j].type)
+            {
+                case EVENT_OPTION_COUNT_KERNEL:
+                    attr->exclude_kernel = 0;
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+    
+    return 0;
+}
+
+int perf_uncore_setup(struct perf_event_attr *attr, RegisterType type, PerfmonEvent *event)
+{
+
+    char checkfolder[1024];
+    int ret;
+    FILE* fp;
+    int perf_type;
+    if (paranoid_level > 0)
+    {
+        return 1;
+    }
+    attr->type = 0;
+    ret = sprintf(checkfolder, "%s", translate_types[type]);
+    if (access(checkfolder, F_OK))
+    {
+        if ((type == UBOX)||(type == UBOXFIX))
+        {
+            ret = sprintf(checkfolder, "%s", "/sys/bus/event_source/devices/uncore_arb");
+            if (access(checkfolder, F_OK))
+            {
+                return 1;
+            }
+        }
+        else
+        {
+            return 1;
+        }
+    }
+    ret = sprintf(&(checkfolder[ret]), "/type");
+    fp = fopen(checkfolder, "r");
+    if (fp == NULL)
+    {
+        return 1;
+    }
+    ret = fread(checkfolder, sizeof(char), 1024, fp);
+    perf_type = atoi(checkfolder);
+    fclose(fp);
+    attr->type = perf_type;
+    attr->config = (event->umask<<8) + event->eventId;
+    attr->disabled = 1;
+    attr->inherit = 1;
+    attr->exclude_kernel = 1;
+    attr->exclude_hv = 1;
+    return 0;
+}
+
+
+
+
+int perfmon_setupCountersThread_perfevent(
+        int thread_id,
+        PerfmonEventSet* eventSet)
+{
+    int ret;
+    int cpu_id = groupSet->threads[thread_id].processorId;
+    struct perf_event_attr attr;
+    int group_fd = -1;
+    for (int i=0;i < eventSet->numberOfEvents;i++)
+    {
+        RegisterIndex index = eventSet->events[i].index;
+        if (cpu_event_fds[cpu_id][index] != -1)
+        {
+            continue;
+        }
+        RegisterType type = eventSet->events[i].type;
+        PerfmonEvent *event = &(eventSet->events[i].event);
+        memset(&attr, 0, sizeof(struct perf_event_attr));
+        attr.size = sizeof(struct perf_event_attr);
+        switch (type)
+        {
+            case FIXED:
+                ret = perf_fixed_setup(&attr, event);
+                if (ret < 0)
+                {
+                    continue;
+                }
+                VERBOSEPRINTREG(cpu_id, index, attr.config, SETUP_FIXED);
+                break;
+            case PMC:
+                ret = perf_pmc_setup(&attr, event);
+                VERBOSEPRINTREG(cpu_id, index, attr.config, SETUP_PMC);
+                break;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
+            case MBOX0:
+            case MBOX1:
+            case MBOX2:
+            case MBOX3:
+            case CBOX0:
+            case CBOX1:
+            case CBOX2:
+            case CBOX3:
+            case CBOX4:
+            case CBOX5:
+            case CBOX6:
+            case CBOX7:
+            case UBOX:
+            case SBOX0:
+            case SBOX1:
+            case WBOX:
+            case PBOX:
+            case RBOX0:
+            case RBOX1:
+            case BBOX0:
+                ret = perf_uncore_setup(&attr, type, event);
+                break;
+#endif
+            default:
+                break;
+        }
+        if (ret == 0)
+        {
+            cpu_event_fds[cpu_id][index] = perf_event_open(&attr, 0, cpu_id, -1, 0);
+            if (cpu_event_fds[cpu_id][index] < 0)
+            {
+                fprintf(stderr, "Setup of event %s on CPU %d failed: %s\n", event->name, cpu_id, strerror(errno));
+                fprintf(stderr, "Config of event 0x%X\n", attr.config);
+                fprintf(stderr, "Type of event 0x%X\n", attr.type);
+                continue;
+            }
+            if (group_fd < 0)
+            {
+                group_fd = cpu_event_fds[cpu_id][index];
+                running_group = group_fd;
+            }
+            eventSet->events[i].threadCounter[thread_id].init = TRUE;
+        }
+    }
+    return 0;
+}
+
+int perfmon_startCountersThread_perfevent(int thread_id, PerfmonEventSet* eventSet)
+{
+    int cpu_id = groupSet->threads[thread_id].processorId;
+    for (int i=0;i < eventSet->numberOfEvents;i++)
+    {
+        if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
+        {
+            RegisterIndex index = eventSet->events[i].index;
+            if (cpu_event_fds[cpu_id][index] < 0)
+                continue;
+            VERBOSEPRINTREG(cpu_id, 0x0, 0x0, RESET_COUNTER);
+            ioctl(cpu_event_fds[cpu_id][index], PERF_EVENT_IOC_RESET, 0);
+            eventSet->events[i].threadCounter[thread_id].startData = 0x0ULL;
+            VERBOSEPRINTREG(cpu_id, 0x0, 0x0, START_COUNTER);
+            ioctl(cpu_event_fds[cpu_id][index], PERF_EVENT_IOC_ENABLE, 0);
+        }
+    }
+    return 0;
+}
+
+int perfmon_stopCountersThread_perfevent(int thread_id, PerfmonEventSet* eventSet)
+{
+    int ret;
+    int cpu_id = groupSet->threads[thread_id].processorId;
+    long long tmp = 0;
+    for (int i=0;i < eventSet->numberOfEvents;i++)
+    {
+        if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
+        {
+            RegisterIndex index = eventSet->events[i].index;
+            if (cpu_event_fds[cpu_id][index] < 0)
+                continue;
+            VERBOSEPRINTREG(cpu_id, cpu_event_fds[cpu_id][index], 0x0, FREEZE_COUNTER);
+            ioctl(cpu_event_fds[cpu_id][index], PERF_EVENT_IOC_DISABLE, 0);
+            tmp = 0;
+            ret = read(cpu_event_fds[cpu_id][index], &tmp, sizeof(long long));
+            DEBUG_PRINT(DEBUGLEV_DEVELOP, READ CPU %d COUNTER %d VALUE %llu, cpu_id, index, tmp);
+            if (ret == sizeof(long long))
+            {
+                eventSet->events[i].threadCounter[thread_id].counterData = tmp;
+            }
+            ioctl(cpu_event_fds[cpu_id][index], PERF_EVENT_IOC_RESET, 0);
+            VERBOSEPRINTREG(cpu_id, cpu_event_fds[cpu_id][index], 0x0, RESET_COUNTER);
+        }
+    }
+}
+
+int perfmon_readCountersThread_perfevent(int thread_id, PerfmonEventSet* eventSet)
+{
+    int ret;
+    int cpu_id = groupSet->threads[thread_id].processorId;
+    long long tmp = 0;
+    for (int i=0;i < eventSet->numberOfEvents;i++)
+    {
+        if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
+        {
+            RegisterIndex index = eventSet->events[i].index;
+            if (cpu_event_fds[cpu_id][index] < 0)
+                continue;
+            VERBOSEPRINTREG(cpu_id, cpu_event_fds[cpu_id][index], 0x0, FREEZE_COUNTER);
+            ioctl(cpu_event_fds[cpu_id][index], PERF_EVENT_IOC_DISABLE, 0);
+            tmp = 0;
+            ret = read(cpu_event_fds[cpu_id][index], &tmp, sizeof(long long));
+            VERBOSEPRINTREG(cpu_id, cpu_event_fds[cpu_id][index], tmp, READ_COUNTER);
+            if (ret == sizeof(long long))
+            {
+                eventSet->events[i].threadCounter[thread_id].counterData = tmp;
+            }
+            VERBOSEPRINTREG(cpu_id, cpu_event_fds[cpu_id][index], 0x0, UNFREEZE_COUNTER);
+            ioctl(cpu_event_fds[cpu_id][index], PERF_EVENT_IOC_ENABLE, 0);
+        }
+    }
+}
+
+int perfmon_finalizeCountersThread_perfevent(int thread_id, PerfmonEventSet* eventSet)
+{
+    int cpu_id = groupSet->threads[thread_id].processorId;
+    for (int i=0;i < eventSet->numberOfEvents;i++)
+    {
+        if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
+        {
+            RegisterIndex index = eventSet->events[i].index;
+            ioctl(cpu_event_fds[cpu_id][index], PERF_EVENT_IOC_DISABLE, 0);
+            ioctl(cpu_event_fds[cpu_id][index], PERF_EVENT_IOC_RESET, 0);
+            eventSet->events[i].threadCounter[thread_id].init = FALSE;
+            close(cpu_event_fds[cpu_id][index]);
+            cpu_event_fds[cpu_id][index] = -1;
+        }
+    }
+    free(cpu_event_fds[cpu_id]);
+    return 0;
+}
diff --git a/src/includes/perfmon_phi.h b/src/includes/perfmon_phi.h
index 9fde8cf..7a005ac 100644
--- a/src/includes/perfmon_phi.h
+++ b/src/includes/perfmon_phi.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Header File of perfmon module for Intel Xeon Phi.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -90,7 +90,7 @@ int perfmon_setupCounterThread_phi(int thread_id, PerfmonEventSet* eventSet)
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -117,7 +117,7 @@ int perfmon_startCountersThread_phi(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE) 
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -148,7 +148,7 @@ int perfmon_stopCountersThread_phi(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -186,7 +186,7 @@ int perfmon_readCountersThread_phi(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -221,7 +221,7 @@ int perfmon_finalizeCountersThread_phi(int thread_id, PerfmonEventSet* eventSet)
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
diff --git a/src/includes/perfmon_phi_counters.h b/src/includes/perfmon_phi_counters.h
index 43523d4..ebed2da 100644
--- a/src/includes/perfmon_phi_counters.h
+++ b/src/includes/perfmon_phi_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description: Counter Header File of perfmon module for Intel Xeon Phi.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_phi_events.txt b/src/includes/perfmon_phi_events.txt
index 4b280c7..d2d9a6d 100644
--- a/src/includes/perfmon_phi_events.txt
+++ b/src/includes/perfmon_phi_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for Intel Xeon Phi
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_pm.h b/src/includes/perfmon_pm.h
index 5992733..d49c4f3 100644
--- a/src/includes/perfmon_pm.h
+++ b/src/includes/perfmon_pm.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Header File of perfmon module Pentium M.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -90,7 +90,7 @@ int perfmon_setupCounterThread_pm(int thread_id, PerfmonEventSet* eventSet)
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -113,7 +113,7 @@ int perfmon_startCountersThread_pm(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -152,7 +152,7 @@ int perfmon_stopCountersThread_pm(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE) 
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -186,7 +186,7 @@ int perfmon_readCountersThread_pm(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE) 
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -214,7 +214,7 @@ int perfmon_finalizeCountersThread_pm(int thread_id, PerfmonEventSet* eventSet)
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
diff --git a/src/includes/perfmon_pm_counters.h b/src/includes/perfmon_pm_counters.h
index 3e2d6ec..9ac4275 100644
--- a/src/includes/perfmon_pm_counters.h
+++ b/src/includes/perfmon_pm_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description: Counter Header File of perfmon module for Intel Pentium M.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_pm_events.txt b/src/includes/perfmon_pm_events.txt
index c4670e0..7c63f11 100644
--- a/src/includes/perfmon_pm_events.txt
+++ b/src/includes/perfmon_pm_events.txt
@@ -4,13 +4,13 @@
 #
 #      Description:  Event list for Intel Pentium M
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_sandybridge.h b/src/includes/perfmon_sandybridge.h
index 74b017e..da8eefb 100644
--- a/src/includes/perfmon_sandybridge.h
+++ b/src/includes/perfmon_sandybridge.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Header File of perfmon module for Intel Sandy Bridge.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -43,11 +43,16 @@ static int perfmon_numCountersSandybridge = NUM_COUNTERS_SANDYBRIDGE;
 static int perfmon_numCoreCountersSandybridge = NUM_COUNTERS_CORE_SANDYBRIDGE;
 static int perfmon_numArchEventsSandybridge = NUM_ARCH_EVENTS_SANDYBRIDGE;
 
+int snb_did_cbox_test = 0;
 int snb_cbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event);
 int snbep_cbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event);
-int sandy_cbox_nosetup(int cpu_id, RegisterIndex index, PerfmonEvent *event);
 int (*sandy_cbox_setup)(int, RegisterIndex, PerfmonEvent*);
 
+int snb_cbox_nosetup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
+{
+    return 0; 
+}
+
 int perfmon_init_sandybridge(int cpu_id)
 {
     int ret;
@@ -55,23 +60,27 @@ int perfmon_init_sandybridge(int cpu_id)
     lock_acquire((int*) &socket_lock[affinity_core2node_lookup[cpu_id]], cpu_id);
     lock_acquire((int*) &tile_lock[affinity_thread2tile_lookup[cpu_id]], cpu_id);
     CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PEBS_ENABLE, 0x0ULL));
-    ret = HPMwrite(cpu_id, MSR_DEV, MSR_UNC_CBO_0_PERFEVTSEL0, 0x0ULL);
-    ret += HPMread(cpu_id, MSR_DEV, MSR_UNC_PERF_GLOBAL_CTRL, &data);
-    ret += HPMwrite(cpu_id, MSR_DEV, MSR_UNC_PERF_GLOBAL_CTRL, 0x0ULL);
-    ret += HPMread(cpu_id, MSR_DEV, MSR_UNC_CBO_0_PERFEVTSEL0, &data);
+    
     if ((cpuid_info.model == SANDYBRIDGE_EP))
     {
         sandy_cbox_setup = snbep_cbox_setup;
-    }
-    else if ((ret == 0) && (data == 0x0ULL))
-    {
-        sandy_cbox_setup = snb_cbox_setup;
-    }
-    else
-    {
-        sandy_cbox_setup = sandy_cbox_nosetup;
-    }
-
+        snb_did_cbox_test = 1;
+    }
+    else if (cpuid_info.model == SANDYBRIDGE &&
+             socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id &&
+             snb_did_cbox_test == 0)
+    {
+        ret = HPMwrite(cpu_id, MSR_DEV, MSR_UNC_CBO_0_PERFEVTSEL0, 0x0ULL);
+        ret += HPMread(cpu_id, MSR_DEV, MSR_UNC_PERF_GLOBAL_CTRL, &data);
+        ret += HPMwrite(cpu_id, MSR_DEV, MSR_UNC_PERF_GLOBAL_CTRL, 0x0ULL);
+        ret += HPMread(cpu_id, MSR_DEV, MSR_UNC_CBO_0_PERFEVTSEL0, &data);
+        if ((ret == 0) && (data == 0x0ULL))
+            sandy_cbox_setup = snb_cbox_setup;
+        else
+            sandy_cbox_setup = snb_cbox_nosetup;
+        snb_did_cbox_test = 1;
+    }
+    
     return 0;
 }
 
@@ -217,11 +226,6 @@ int snb_mbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
 }
 
 
-int sandy_cbox_nosetup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
-{
-    return 0;
-}
-
 uint32_t snb_cbox_filter(PerfmonEvent *event)
 {
     int j;
@@ -726,14 +730,14 @@ int snb_pbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
 // Macros to stop counting and reset control registers
 // FREEZE(_AND_RESET_CTL) uses central box register to freeze (bit 8 + 16) and bit 1 to reset control registers
 #define SNB_FREEZE_AND_RESET_CTL_BOX(id) \
-    if (haveLock && eventSet->regTypeMask & (REG_TYPE_MASK(id))) \
+    if (haveLock && TESTTYPE(eventSet, id)) \
     { \
         VERBOSEPRINTREG(cpu_id, box_map[id].ctrlRegister, 0x10101U, FREEZE_AND_RESET_CTL_BOX_##id) \
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, box_map[id].ctrlRegister, 0x10101ULL)); \
     }
 
 #define SNB_FREEZE_BOX(id) \
-    if (haveLock && eventSet->regTypeMask & (REG_TYPE_MASK(id))) \
+    if (haveLock && TESTTYPE(eventSet, id)) \
     { \
         VERBOSEPRINTREG(cpu_id, box_map[id].ctrlRegister, 0x10100U, FREEZE_AND_RESET_CTL_BOX_##id) \
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, box_map[id].ctrlRegister, 0x10100ULL)); \
@@ -743,7 +747,7 @@ int snb_pbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
 // Checks whether PCI device exists, because this is the first operation we do on the devices
 #define SNB_FREEZE_AND_RESET_CTL_PCI_BOX(id) \
     if (haveLock && \
-        (eventSet->regTypeMask & (REG_TYPE_MASK(id))) && \
+        TESTTYPE(eventSet, id) && \
         (HPMcheck(box_map[id].device, cpu_id) == 0)) \
     { \
         VERBOSEPRINTPCIREG(cpu_id, box_map[id].device, box_map[id].ctrlRegister, 0x10101ULL, FREEZE_AND_RESET_CTL_PCI_BOX_##id); \
@@ -752,7 +756,7 @@ int snb_pbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
 
 #define SNB_FREEZE_PCI_BOX(id) \
     if (haveLock && \
-        (eventSet->regTypeMask & (REG_TYPE_MASK(id))) && \
+        TESTTYPE(eventSet, id) && \
         (HPMcheck(box_map[id].device, cpu_id) == 0)) \
     { \
         VERBOSEPRINTPCIREG(cpu_id, box_map[id].device, box_map[id].ctrlRegister, 0x10100ULL, FREEZE_PCI_BOX_##id) \
@@ -761,7 +765,7 @@ int snb_pbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
 
 // MBOX*FIX have a slightly different scheme, setting the whole register to 0 freeze the counter
 #define SNB_FREEZE_MBOXFIX(number) \
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(MBOX##number##FIX))) && \
+    if (haveLock && TESTTYPE(eventSet, MBOX##number##FIX) && \
                     (HPMcheck(PCI_IMC_DEVICE_0_CH_##number, cpu_id))) \
     { \
         VERBOSEPRINTPCIREG(cpu_id, PCI_IMC_DEVICE_0_CH_##number, PCI_UNC_MC_PMON_FIXED_CTL, 0x0ULL, FREEZE_MBOXFIX##number) \
@@ -784,12 +788,12 @@ int perfmon_setupCounterThread_sandybridge(
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, 0x0ULL));
     }
-    if (cpuid_info.model == SANDYBRIDGE_EP)
+    if (MEASURE_UNCORE(eventSet) && cpuid_info.model == SANDYBRIDGE_EP)
     {
         SNB_FREEZE_BOX(CBOX0);
         SNB_FREEZE_BOX(CBOX1);
@@ -821,7 +825,7 @@ int perfmon_setupCounterThread_sandybridge(
         SNB_FREEZE_PCI_BOX(BBOX0);
         SNB_FREEZE_BOX(WBOX);
     }
-    else
+    else if (MEASURE_UNCORE(eventSet) && cpuid_info.model == SANDYBRIDGE && sandy_cbox_setup == snb_cbox_setup)
     {
         VERBOSEPRINTREG(cpu_id, MSR_UNC_PERF_GLOBAL_CTRL, LLU_CAST (1ULL<<31), FREEZE_UNCORE)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_PERF_GLOBAL_CTRL, (1ULL<<31)));
@@ -831,7 +835,7 @@ int perfmon_setupCounterThread_sandybridge(
     {
         flags = 0x0ULL;
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -887,7 +891,7 @@ int perfmon_setupCounterThread_sandybridge(
             case UBOX:
                 snb_ubox_setup(cpu_id, index, event);
                 break;
-
+                
             case UBOXFIX:
                 if (cpuid_info.model == SANDYBRIDGE_EP)
                 {
@@ -934,15 +938,7 @@ int perfmon_setupCounterThread_sandybridge(
                 break;
         }
     }
-    for (int i=UNCORE;i<NUM_UNITS;i++)
-    {
-        if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(i))) && box_map[i].ctrlRegister != 0x0)
-        {
-            VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL, CLEAR_UNCORE_BOX_CTRL);
-            HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
-        }
-    }
-
+    
     if (fixed_flags > 0x0)
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_FIXED_CTR_CTRL, LLU_CAST fixed_flags, SETUP_FIXED)
@@ -955,20 +951,20 @@ int perfmon_setupCounterThread_sandybridge(
 // Macros for MSR HPM counters
 // UNFREEZE(_AND_RESET_CTR) uses the central box registers to unfreeze and reset the counter registers
 #define SNB_UNFREEZE_BOX(id) \
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(id)))) { \
+    if (haveLock && TESTTYPE(eventSet, id)) { \
         VERBOSEPRINTREG(cpu_id, box_map[id].ctrlRegister, LLU_CAST 0x0ULL, UNFREEZE_BOX_##id) \
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, box_map[id].ctrlRegister, 0x0ULL)); \
     }
 
 #define SNB_UNFREEZE_AND_RESET_CTR_BOX(id) \
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(id)))) { \
+    if (haveLock && TESTTYPE(eventSet, id)) { \
         VERBOSEPRINTREG(cpu_id, box_map[id].ctrlRegister, LLU_CAST 0x2ULL, UNFREEZE_BOX_##id) \
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, box_map[id].ctrlRegister, 0x2ULL)); \
     }
 
 // ENABLE(_AND_RESET_CTR) uses the control registers to enable (bit 22) and reset the counter registers (bit 19)
 #define SNB_ENABLE_BOX(id, reg) \
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(id)))) { \
+    if (haveLock && TESTTYPE(eventSet, id)) { \
         uint64_t tmp = 0x0ULL; \
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, reg, &tmp)); \
         tmp |= (1ULL<<22); \
@@ -977,7 +973,7 @@ int perfmon_setupCounterThread_sandybridge(
     }
 
 #define SNB_ENABLE_AND_RESET_CTR_BOX(id) \
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(id)))) { \
+    if (haveLock && TESTTYPE(eventSet, id)) { \
         uint64_t tmp = 0x0ULL; \
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, box_map[id].ctrlRegister, &tmp)); \
         tmp |= (1ULL<<22)|(1ULL<<17); \
@@ -987,15 +983,13 @@ int perfmon_setupCounterThread_sandybridge(
 
 // UNFREEZE(_AND_RESET_CTR)_PCI is similar to MSR UNFREEZE but for PCI devices
 #define SNB_UNFREEZE_PCI_BOX(id) \
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(id))) \
-                && (HPMcheck(box_map[id].device, cpu_id))) \
+    if (haveLock && TESTTYPE(eventSet, id) && (HPMcheck(box_map[id].device, cpu_id))) \
     { \
         VERBOSEPRINTPCIREG(cpu_id, box_map[id].device, box_map[id].ctrlRegister, LLU_CAST 0x0ULL, UNFREEZE_PCI_BOX_##id) \
         CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, box_map[id].device, box_map[id].ctrlRegister, 0x0ULL)); \
     }
 #define SNB_UNFREEZE_AND_RESET_CTR_PCI_BOX(id) \
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(id))) \
-                && (HPMcheck(box_map[id].device, cpu_id))) \
+    if (haveLock && TESTTYPE(eventSet, id) && (HPMcheck(box_map[id].device, cpu_id))) \
     { \
         VERBOSEPRINTPCIREG(cpu_id, box_map[id].device, box_map[id].ctrlRegister, LLU_CAST 0x2ULL, UNFREEZE_AND_RESET_CTR_PCI_BOX_##id) \
         CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, box_map[id].device, box_map[id].ctrlRegister, 0x2ULL)); \
@@ -1003,7 +997,7 @@ int perfmon_setupCounterThread_sandybridge(
 
 // UNFREEZE(_AND_RESET_CTR)_MBOXFIX is kind of ENABLE for PCI but uses bit 19 for reset
 #define SNB_UNFREEZE_AND_RESET_CTR_MBOXFIX(number) \
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(MBOX##number##FIX))) && \
+    if (haveLock && TESTTYPE(eventSet, MBOX##number##FIX) && \
                     (HPMcheck(PCI_IMC_DEVICE_0_CH_##number, cpu_id))) \
     { \
         VERBOSEPRINTPCIREG(cpu_id, PCI_IMC_DEVICE_0_CH_##number, \
@@ -1011,7 +1005,7 @@ int perfmon_setupCounterThread_sandybridge(
         CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, PCI_IMC_DEVICE_0_CH_##number, PCI_UNC_MC_PMON_FIXED_CTL, (1ULL<<22)|(1ULL<<19))); \
     }
 #define SNB_UNFREEZE_MBOXFIX(number) \
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(MBOX##number##FIX))) && \
+    if (haveLock && TESTTYPE(eventSet, MBOX##number##FIX) && \
                     (HPMcheck(PCI_IMC_DEVICE_0_CH_##number, cpu_id))) \
     { \
         VERBOSEPRINTPCIREG(cpu_id, PCI_IMC_DEVICE_0_CH_##number, \
@@ -1036,7 +1030,7 @@ int perfmon_startCountersThread_sandybridge(int thread_id, PerfmonEventSet* even
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1052,12 +1046,12 @@ int perfmon_startCountersThread_sandybridge(int thread_id, PerfmonEventSet* even
             switch (type)
             {
                 case PMC:
-                    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, counter1, 0x0ULL));
-                    flags |= (1<<(index-cpuid_info.perf_num_fixed_ctr));  /* enable counter */
+                    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter1, 0x0ULL));
+                    flags |= (1ULL<<(index-cpuid_info.perf_num_fixed_ctr));  /* enable counter */
                     break;
 
                 case FIXED:
-                    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, counter1, 0x0ULL));
+                    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter1, 0x0ULL));
                     flags |= (1ULL<<(index+32));  /* enable fixed counter */
                     break;
 
@@ -1154,13 +1148,14 @@ int perfmon_startCountersThread_sandybridge(int thread_id, PerfmonEventSet* even
         }
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, UNFREEZE_PMC_OR_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, 0x0ULL));
+        VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, UNFREEZE_PMC_AND_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, flags));
     }
-    if (cpuid_info.model == SANDYBRIDGE_EP)
+    if (MEASURE_UNCORE(eventSet) && cpuid_info.model == SANDYBRIDGE_EP)
     {
         SNB_UNFREEZE_AND_RESET_CTR_BOX(CBOX0);
         SNB_UNFREEZE_AND_RESET_CTR_BOX(CBOX1);
@@ -1186,17 +1181,17 @@ int perfmon_startCountersThread_sandybridge(int thread_id, PerfmonEventSet* even
         SNB_UNFREEZE_AND_RESET_CTR_PCI_BOX(RBOX1);
         SNB_UNFREEZE_AND_RESET_CTR_PCI_BOX(PBOX);
     }
-    else
+    else if (MEASURE_UNCORE(eventSet) && cpuid_info.model == SANDYBRIDGE && sandy_cbox_setup == snb_cbox_setup)
     {
-        VERBOSEPRINTREG(cpu_id, MSR_UNC_U_PMON_GLOBAL_CTL, LLU_CAST (1ULL<<29), UNFREEZE_UNCORE)
-        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_U_PMON_GLOBAL_CTL, (1ULL<<29)));
+        VERBOSEPRINTREG(cpu_id, MSR_UNCORE_PERF_GLOBAL_CTRL, LLU_CAST (1ULL<<29), UNFREEZE_UNCORE)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, (1ULL<<29)));
     }
     return 0;
 }
 
 // Read MSR counter register
 #define SNB_READ_BOX(id, reg1) \
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(id)))) \
+    if (haveLock && TESTTYPE(eventSet, id)) \
     { \
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, reg1, &counter_result)); \
         VERBOSEPRINTREG(cpu_id, reg1, LLU_CAST counter_result, READ_BOX_##id) \
@@ -1204,7 +1199,7 @@ int perfmon_startCountersThread_sandybridge(int thread_id, PerfmonEventSet* even
 
 // Read PCI counter registers and combine them to a single value
 #define SNB_READ_PCI_BOX(id, dev, reg1, reg2) \
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(id))) && HPMcheck(dev, cpu_id)) \
+    if (haveLock && TESTTYPE(eventSet, id) && HPMcheck(dev, cpu_id)) \
     { \
         uint64_t tmp = 0x0ULL; \
         CHECK_PCI_READ_ERROR(HPMread(cpu_id, dev, reg1, &tmp)); \
@@ -1234,11 +1229,12 @@ int perfmon_stopCountersThread_sandybridge(int thread_id, PerfmonEventSet* event
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
+        VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_AND_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
     }
-    if (cpuid_info.model == SANDYBRIDGE_EP)
+    if (MEASURE_UNCORE(eventSet) && cpuid_info.model == SANDYBRIDGE_EP)
     {
         SNB_FREEZE_BOX(CBOX0);
         SNB_FREEZE_BOX(CBOX1);
@@ -1265,10 +1261,10 @@ int perfmon_stopCountersThread_sandybridge(int thread_id, PerfmonEventSet* event
         SNB_FREEZE_PCI_BOX(BBOX0);
         SNB_FREEZE_AND_RESET_CTL_BOX(WBOX);
     }
-    else
+    else if (MEASURE_UNCORE(eventSet) && cpuid_info.model == SANDYBRIDGE && sandy_cbox_setup == snb_cbox_setup)
     {
-        VERBOSEPRINTREG(cpu_id, MSR_UNC_PERF_GLOBAL_CTRL, LLU_CAST (1ULL<<31), FREEZE_UNCORE)
-        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_PERF_GLOBAL_CTRL, (1ULL<<31)));
+        VERBOSEPRINTREG(cpu_id, MSR_UNCORE_PERF_GLOBAL_CTRL, LLU_CAST (1ULL<<31), FREEZE_UNCORE)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, (1ULL<<31)));
     }
 
     for (int i=0;i < eventSet->numberOfEvents;i++)
@@ -1276,7 +1272,7 @@ int perfmon_stopCountersThread_sandybridge(int thread_id, PerfmonEventSet* event
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1518,12 +1514,14 @@ int perfmon_readCountersThread_sandybridge(int thread_id, PerfmonEventSet* event
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, &pmc_flags));
+        VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST pmc_flags, SAFE_PMC_FLAGS)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
+        VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, RESET_PMC_FLAGS)
     }
-    if (cpuid_info.model == SANDYBRIDGE_EP)
+    if (MEASURE_UNCORE(eventSet) && cpuid_info.model == SANDYBRIDGE_EP)
     {
         SNB_FREEZE_BOX(CBOX0);
         SNB_FREEZE_BOX(CBOX1);
@@ -1555,10 +1553,10 @@ int perfmon_readCountersThread_sandybridge(int thread_id, PerfmonEventSet* event
         SNB_FREEZE_PCI_BOX(BBOX0);
         SNB_FREEZE_BOX(WBOX);
     }
-    else
+    else if (MEASURE_UNCORE(eventSet) && cpuid_info.model == SANDYBRIDGE && sandy_cbox_setup == snb_cbox_setup)
     {
-        VERBOSEPRINTREG(cpu_id, MSR_UNC_PERF_GLOBAL_CTRL, LLU_CAST (1ULL<<31), FREEZE_UNCORE)
-        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_PERF_GLOBAL_CTRL, (1ULL<<31)));
+        VERBOSEPRINTREG(cpu_id, MSR_UNCORE_PERF_GLOBAL_CTRL, LLU_CAST (1ULL<<31), FREEZE_UNCORE)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, (1ULL<<31)));
     }
 
     for (int i=0;i < eventSet->numberOfEvents;i++)
@@ -1566,7 +1564,7 @@ int perfmon_readCountersThread_sandybridge(int thread_id, PerfmonEventSet* event
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1718,7 +1716,7 @@ int perfmon_readCountersThread_sandybridge(int thread_id, PerfmonEventSet* event
 
                 case SBOX0FIX:
                 case SBOX1FIX:
-
+                    
                     HPMread(cpu_id, dev, counter1, &counter_result);
                     if (eventSet->events[i].event.eventId == 0x00)
                     {
@@ -1789,7 +1787,7 @@ int perfmon_readCountersThread_sandybridge(int thread_id, PerfmonEventSet* event
                     field64(counter_result, 0, box_map[type].regWidth);
         }
     }
-    if (cpuid_info.model == SANDYBRIDGE_EP)
+    if (MEASURE_UNCORE(eventSet) && cpuid_info.model == SANDYBRIDGE_EP)
     {
         SNB_UNFREEZE_BOX(CBOX0);
         SNB_UNFREEZE_BOX(CBOX1);
@@ -1821,14 +1819,15 @@ int perfmon_readCountersThread_sandybridge(int thread_id, PerfmonEventSet* event
         SNB_UNFREEZE_PCI_BOX(BBOX0);
         SNB_UNFREEZE_BOX(WBOX);
     }
-    else
+    else if (MEASURE_UNCORE(eventSet) && cpuid_info.model == SANDYBRIDGE && sandy_cbox_setup == snb_cbox_setup)
     {
-        VERBOSEPRINTREG(cpu_id, MSR_UNC_PERF_GLOBAL_CTRL, LLU_CAST (1ULL<<29), UNFREEZE_UNCORE)
-        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_PERF_GLOBAL_CTRL, (1ULL<<29)));
+        VERBOSEPRINTREG(cpu_id, MSR_UNCORE_PERF_GLOBAL_CTRL, LLU_CAST (1ULL<<29), UNFREEZE_UNCORE)
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, (1ULL<<29)));
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
+        VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST pmc_flags, RESTORE_PMC_FLAGS)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, pmc_flags));
     }
 
@@ -1854,7 +1853,7 @@ int perfmon_finalizeCountersThread_sandybridge(int thread_id, PerfmonEventSet* e
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -1887,25 +1886,33 @@ int perfmon_finalizeCountersThread_sandybridge(int thread_id, PerfmonEventSet* e
         {
             VERBOSEPRINTPCIREG(cpu_id, dev, reg, 0x0ULL, CLEAR_CTL);
             CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, reg, 0x0ULL));
+            if (type >= SBOX0 && type <= SBOX3)
+                CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, reg, 0x0ULL));
             VERBOSEPRINTPCIREG(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL, CLEAR_CTR);
             CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL));
+            if (type >= SBOX0 && type <= SBOX3)
+                CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister, 0x0ULL));
             if (counter_map[index].counterRegister2 != 0x0)
             {
                 VERBOSEPRINTPCIREG(cpu_id, dev, counter_map[index].counterRegister2, 0x0ULL, CLEAR_CTR);
                 CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister2, 0x0ULL));
+                if (type >= SBOX0 && type <= SBOX3)
+                    CHECK_PCI_WRITE_ERROR(HPMwrite(cpu_id, dev, counter_map[index].counterRegister2, 0x0ULL));
             }
         }
         eventSet->events[i].threadCounter[thread_id].init = FALSE;
     }
 
-    if (haveLock && eventSet->regTypeMask & ~(0xFULL))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         for (int i=UNCORE;i<NUM_UNITS;i++)
         {
-            if ((eventSet->regTypeMask & (REG_TYPE_MASK(i))) && box_map[i].ctrlRegister != 0x0)
+            if (TESTTYPE(eventSet, i) && box_map[i].ctrlRegister != 0x0)
             {
                 VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL, CLEAR_UNCORE_BOX_CTRL);
                 HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
+                if (i >= SBOX0 && i <= SBOX3)
+                    HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
                 if (box_map[i].filterRegister1 != 0x0)
                 {
                     VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].filterRegister1, 0x0ULL, CLEAR_FILTER);
@@ -1920,7 +1927,7 @@ int perfmon_finalizeCountersThread_sandybridge(int thread_id, PerfmonEventSet* e
         }
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, LLU_CAST ovf_values_core, CLEAR_GLOBAL_OVF)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core));
diff --git a/src/includes/perfmon_sandybridgeEP_counters.h b/src/includes/perfmon_sandybridgeEP_counters.h
index 5b634ec..940be74 100644
--- a/src/includes/perfmon_sandybridgeEP_counters.h
+++ b/src/includes/perfmon_sandybridgeEP_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description: Counter header file of perfmon module for Intel Sandy Bridge EP.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -29,9 +29,9 @@
  * =======================================================================================
  */
 
-#define NUM_COUNTERS_SANDYBRIDGEEP 97
+#define NUM_COUNTERS_SANDYBRIDGEEP 101
 #define NUM_COUNTERS_UNCORE_SANDYBRIDGEEP 53
-#define NUM_COUNTERS_CORE_SANDYBRIDGEEP 8
+#define NUM_COUNTERS_CORE_SANDYBRIDGEEP 12
 
 #define SNBEP_VALID_OPTIONS_FIXED EVENT_OPTION_COUNT_KERNEL_MASK|EVENT_OPTION_ANYTHREAD_MASK
 #define SNBEP_VALID_OPTIONS_PMC EVENT_OPTION_EDGE_MASK|EVENT_OPTION_COUNT_KERNEL_MASK| \
@@ -61,103 +61,108 @@ static RegisterMap sandybridgeEP_counter_map[NUM_COUNTERS_SANDYBRIDGEEP] = {
     {"PMC1", PMC4, PMC, MSR_PERFEVTSEL1, MSR_PMC1, 0, 0, SNBEP_VALID_OPTIONS_PMC},
     {"PMC2", PMC5, PMC, MSR_PERFEVTSEL2, MSR_PMC2, 0, 0, SNBEP_VALID_OPTIONS_PMC},
     {"PMC3", PMC6, PMC, MSR_PERFEVTSEL3, MSR_PMC3, 0, 0, SNBEP_VALID_OPTIONS_PMC},
+    /* Additional PMC Counters: 4 48bit wide if HyperThreading is disabled*/
+    {"PMC4", PMC7, PMC, MSR_PERFEVTSEL4, MSR_PMC4, 0, 0, SNBEP_VALID_OPTIONS_PMC},
+    {"PMC5", PMC8, PMC, MSR_PERFEVTSEL5, MSR_PMC5, 0, 0, SNBEP_VALID_OPTIONS_PMC},
+    {"PMC6", PMC9, PMC, MSR_PERFEVTSEL6, MSR_PMC6, 0, 0, SNBEP_VALID_OPTIONS_PMC},
+    {"PMC7", PMC10, PMC, MSR_PERFEVTSEL7, MSR_PMC7, 0, 0, SNBEP_VALID_OPTIONS_PMC},
     /* Temperature Sensor*/
-    {"TMP0", PMC7, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"TMP0", PMC11, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
     /* RAPL counters */
-    {"PWR0", PMC8, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR1", PMC9, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR2", PMC10, POWER, 0, MSR_PP1_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR3", PMC11, POWER, 0, MSR_DRAM_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR0", PMC12, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR1", PMC13, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR2", PMC14, POWER, 0, MSR_PP1_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR3", PMC15, POWER, 0, MSR_DRAM_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
     /* CBOX counters */
-    {"CBOX0C0", PMC12, CBOX0, MSR_UNC_C0_PMON_CTL0, MSR_UNC_C0_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX0C1", PMC13, CBOX0, MSR_UNC_C0_PMON_CTL1, MSR_UNC_C0_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX0C2", PMC14, CBOX0, MSR_UNC_C0_PMON_CTL2, MSR_UNC_C0_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX0C3", PMC15, CBOX0, MSR_UNC_C0_PMON_CTL3, MSR_UNC_C0_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX1C0", PMC16, CBOX1, MSR_UNC_C1_PMON_CTL0, MSR_UNC_C1_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX1C1", PMC17, CBOX1, MSR_UNC_C1_PMON_CTL1, MSR_UNC_C1_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX1C2", PMC18, CBOX1, MSR_UNC_C1_PMON_CTL2, MSR_UNC_C1_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX1C3", PMC19, CBOX1, MSR_UNC_C1_PMON_CTL3, MSR_UNC_C1_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX2C0", PMC20, CBOX2, MSR_UNC_C2_PMON_CTL0, MSR_UNC_C2_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX2C1", PMC21, CBOX2, MSR_UNC_C2_PMON_CTL1, MSR_UNC_C2_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX2C2", PMC22, CBOX2, MSR_UNC_C2_PMON_CTL2, MSR_UNC_C2_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX2C3", PMC23, CBOX2, MSR_UNC_C2_PMON_CTL3, MSR_UNC_C2_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX3C0", PMC24, CBOX3, MSR_UNC_C3_PMON_CTL0, MSR_UNC_C3_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX3C1", PMC25, CBOX3, MSR_UNC_C3_PMON_CTL1, MSR_UNC_C3_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX3C2", PMC26, CBOX3, MSR_UNC_C3_PMON_CTL2, MSR_UNC_C3_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX3C3", PMC27, CBOX3, MSR_UNC_C3_PMON_CTL3, MSR_UNC_C3_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX4C0", PMC28, CBOX4, MSR_UNC_C4_PMON_CTL0, MSR_UNC_C4_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX4C1", PMC29, CBOX4, MSR_UNC_C4_PMON_CTL1, MSR_UNC_C4_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX4C2", PMC30, CBOX4, MSR_UNC_C4_PMON_CTL2, MSR_UNC_C4_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX4C3", PMC31, CBOX4, MSR_UNC_C4_PMON_CTL3, MSR_UNC_C4_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX5C0", PMC32, CBOX5, MSR_UNC_C5_PMON_CTL0, MSR_UNC_C5_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX5C1", PMC33, CBOX5, MSR_UNC_C5_PMON_CTL1, MSR_UNC_C5_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX5C2", PMC34, CBOX5, MSR_UNC_C5_PMON_CTL2, MSR_UNC_C5_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX5C3", PMC35, CBOX5, MSR_UNC_C5_PMON_CTL3, MSR_UNC_C5_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX6C0", PMC36, CBOX6, MSR_UNC_C6_PMON_CTL0, MSR_UNC_C6_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX6C1", PMC37, CBOX6, MSR_UNC_C6_PMON_CTL1, MSR_UNC_C6_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX6C2", PMC38, CBOX6, MSR_UNC_C6_PMON_CTL2, MSR_UNC_C6_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX6C3", PMC39, CBOX6, MSR_UNC_C6_PMON_CTL3, MSR_UNC_C6_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX7C0", PMC40, CBOX7, MSR_UNC_C7_PMON_CTL0, MSR_UNC_C7_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX7C1", PMC41, CBOX7, MSR_UNC_C7_PMON_CTL1, MSR_UNC_C7_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX7C2", PMC42, CBOX7, MSR_UNC_C7_PMON_CTL2, MSR_UNC_C7_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
-    {"CBOX7C3", PMC43, CBOX7, MSR_UNC_C7_PMON_CTL3, MSR_UNC_C7_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX0C0", PMC16, CBOX0, MSR_UNC_C0_PMON_CTL0, MSR_UNC_C0_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX0C1", PMC17, CBOX0, MSR_UNC_C0_PMON_CTL1, MSR_UNC_C0_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX0C2", PMC18, CBOX0, MSR_UNC_C0_PMON_CTL2, MSR_UNC_C0_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX0C3", PMC19, CBOX0, MSR_UNC_C0_PMON_CTL3, MSR_UNC_C0_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX1C0", PMC20, CBOX1, MSR_UNC_C1_PMON_CTL0, MSR_UNC_C1_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX1C1", PMC21, CBOX1, MSR_UNC_C1_PMON_CTL1, MSR_UNC_C1_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX1C2", PMC22, CBOX1, MSR_UNC_C1_PMON_CTL2, MSR_UNC_C1_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX1C3", PMC23, CBOX1, MSR_UNC_C1_PMON_CTL3, MSR_UNC_C1_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX2C0", PMC24, CBOX2, MSR_UNC_C2_PMON_CTL0, MSR_UNC_C2_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX2C1", PMC25, CBOX2, MSR_UNC_C2_PMON_CTL1, MSR_UNC_C2_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX2C2", PMC26, CBOX2, MSR_UNC_C2_PMON_CTL2, MSR_UNC_C2_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX2C3", PMC27, CBOX2, MSR_UNC_C2_PMON_CTL3, MSR_UNC_C2_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX3C0", PMC28, CBOX3, MSR_UNC_C3_PMON_CTL0, MSR_UNC_C3_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX3C1", PMC29, CBOX3, MSR_UNC_C3_PMON_CTL1, MSR_UNC_C3_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX3C2", PMC30, CBOX3, MSR_UNC_C3_PMON_CTL2, MSR_UNC_C3_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX3C3", PMC31, CBOX3, MSR_UNC_C3_PMON_CTL3, MSR_UNC_C3_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX4C0", PMC32, CBOX4, MSR_UNC_C4_PMON_CTL0, MSR_UNC_C4_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX4C1", PMC33, CBOX4, MSR_UNC_C4_PMON_CTL1, MSR_UNC_C4_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX4C2", PMC34, CBOX4, MSR_UNC_C4_PMON_CTL2, MSR_UNC_C4_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX4C3", PMC35, CBOX4, MSR_UNC_C4_PMON_CTL3, MSR_UNC_C4_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX5C0", PMC36, CBOX5, MSR_UNC_C5_PMON_CTL0, MSR_UNC_C5_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX5C1", PMC37, CBOX5, MSR_UNC_C5_PMON_CTL1, MSR_UNC_C5_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX5C2", PMC38, CBOX5, MSR_UNC_C5_PMON_CTL2, MSR_UNC_C5_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX5C3", PMC39, CBOX5, MSR_UNC_C5_PMON_CTL3, MSR_UNC_C5_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX6C0", PMC40, CBOX6, MSR_UNC_C6_PMON_CTL0, MSR_UNC_C6_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX6C1", PMC41, CBOX6, MSR_UNC_C6_PMON_CTL1, MSR_UNC_C6_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX6C2", PMC42, CBOX6, MSR_UNC_C6_PMON_CTL2, MSR_UNC_C6_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX6C3", PMC43, CBOX6, MSR_UNC_C6_PMON_CTL3, MSR_UNC_C6_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX7C0", PMC44, CBOX7, MSR_UNC_C7_PMON_CTL0, MSR_UNC_C7_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX7C1", PMC45, CBOX7, MSR_UNC_C7_PMON_CTL1, MSR_UNC_C7_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX7C2", PMC46, CBOX7, MSR_UNC_C7_PMON_CTL2, MSR_UNC_C7_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
+    {"CBOX7C3", PMC47, CBOX7, MSR_UNC_C7_PMON_CTL3, MSR_UNC_C7_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_CBOX},
     /* UBOX counters */
-    {"UBOX0", PMC44, UBOX, MSR_UNC_U_PMON_CTL0, MSR_UNC_U_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_UBOX},
-    {"UBOX1", PMC45, UBOX, MSR_UNC_U_PMON_CTL1, MSR_UNC_U_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_UBOX},
-    {"UBOXFIX", PMC46, UBOXFIX, MSR_UNC_U_UCLK_FIXED_CTL, MSR_UNC_U_UCLK_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"WBOX0",PMC47, WBOX, MSR_UNC_PCU_PMON_CTL0, MSR_UNC_PCU_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_WBOX},
-    {"WBOX1",PMC48, WBOX, MSR_UNC_PCU_PMON_CTL1, MSR_UNC_PCU_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_WBOX},
-    {"WBOX2",PMC49, WBOX, MSR_UNC_PCU_PMON_CTL2, MSR_UNC_PCU_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_WBOX},
-    {"WBOX3",PMC50, WBOX, MSR_UNC_PCU_PMON_CTL3, MSR_UNC_PCU_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_WBOX},
-    {"WBOXFIX0", PMC51, WBOX0FIX, 0, MSR_UNC_PCU_PMON_FIXED_CTR0, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"WBOXFIX1", PMC52, WBOX0FIX, 0, MSR_UNC_PCU_PMON_FIXED_CTR1, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"UBOX0", PMC48, UBOX, MSR_UNC_U_PMON_CTL0, MSR_UNC_U_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_UBOX},
+    {"UBOX1", PMC49, UBOX, MSR_UNC_U_PMON_CTL1, MSR_UNC_U_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_UBOX},
+    {"UBOXFIX", PMC50, UBOXFIX, MSR_UNC_U_UCLK_FIXED_CTL, MSR_UNC_U_UCLK_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"WBOX0",PMC51, WBOX, MSR_UNC_PCU_PMON_CTL0, MSR_UNC_PCU_PMON_CTR0, 0, 0, SNBEP_VALID_OPTIONS_WBOX},
+    {"WBOX1",PMC52, WBOX, MSR_UNC_PCU_PMON_CTL1, MSR_UNC_PCU_PMON_CTR1, 0, 0, SNBEP_VALID_OPTIONS_WBOX},
+    {"WBOX2",PMC53, WBOX, MSR_UNC_PCU_PMON_CTL2, MSR_UNC_PCU_PMON_CTR2, 0, 0, SNBEP_VALID_OPTIONS_WBOX},
+    {"WBOX3",PMC54, WBOX, MSR_UNC_PCU_PMON_CTL3, MSR_UNC_PCU_PMON_CTR3, 0, 0, SNBEP_VALID_OPTIONS_WBOX},
+    {"WBOXFIX0", PMC55, WBOX0FIX, 0, MSR_UNC_PCU_PMON_FIXED_CTR0, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"WBOXFIX1", PMC56, WBOX0FIX, 0, MSR_UNC_PCU_PMON_FIXED_CTR1, 0, 0, EVENT_OPTION_NONE_MASK},
     /* IMC Counters: 4 48bit wide per memory channel, split in two reads */
-    {"MBOX0C0",PMC53, MBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_0, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX0C1",PMC54, MBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_0, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX0C2",PMC55, MBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_0, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX0C3",PMC56, MBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_0, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX0FIX", PMC57, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
-    {"MBOX1C0",PMC58, MBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_1, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX1C1",PMC59, MBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_1, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX1C2",PMC60, MBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_1, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX1C3",PMC61, MBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_1, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX1FIX", PMC62, MBOX1FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_1, EVENT_OPTION_NONE_MASK},
-    {"MBOX2C0",PMC63, MBOX2, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_2, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX2C1",PMC64, MBOX2, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_2, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX2C2",PMC65, MBOX2, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_2, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX2C3",PMC66, MBOX2, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_2, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX2FIX", PMC67, MBOX2FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_2, EVENT_OPTION_NONE_MASK},
-    {"MBOX3C0",PMC68, MBOX3, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_3, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX3C1",PMC69, MBOX3, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_3, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX3C2",PMC70, MBOX3, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_3, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX3C3",PMC71, MBOX3, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_3, SNBEP_VALID_OPTIONS_MBOX},
-    {"MBOX3FIX", PMC72, MBOX3FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_3, EVENT_OPTION_NONE_MASK},
+    {"MBOX0C0",PMC57, MBOX0, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_0, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX0C1",PMC58, MBOX0, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_0, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX0C2",PMC59, MBOX0, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_0, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX0C3",PMC60, MBOX0, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_0, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX0FIX", PMC61, MBOX0FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_0, EVENT_OPTION_NONE_MASK},
+    {"MBOX1C0",PMC62, MBOX1, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_1, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX1C1",PMC63, MBOX1, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_1, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX1C2",PMC64, MBOX1, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_1, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX1C3",PMC65, MBOX1, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_1, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX1FIX", PMC66, MBOX1FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_1, EVENT_OPTION_NONE_MASK},
+    {"MBOX2C0",PMC67, MBOX2, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_2, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX2C1",PMC68, MBOX2, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_2, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX2C2",PMC69, MBOX2, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_2, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX2C3",PMC70, MBOX2, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_2, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX2FIX", PMC71, MBOX2FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_2, EVENT_OPTION_NONE_MASK},
+    {"MBOX3C0",PMC72, MBOX3, PCI_UNC_MC_PMON_CTL_0, PCI_UNC_MC_PMON_CTR_0_A, PCI_UNC_MC_PMON_CTR_0_B, PCI_IMC_DEVICE_0_CH_3, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX3C1",PMC73, MBOX3, PCI_UNC_MC_PMON_CTL_1, PCI_UNC_MC_PMON_CTR_1_A, PCI_UNC_MC_PMON_CTR_1_B, PCI_IMC_DEVICE_0_CH_3, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX3C2",PMC74, MBOX3, PCI_UNC_MC_PMON_CTL_2, PCI_UNC_MC_PMON_CTR_2_A, PCI_UNC_MC_PMON_CTR_2_B, PCI_IMC_DEVICE_0_CH_3, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX3C3",PMC75, MBOX3, PCI_UNC_MC_PMON_CTL_3, PCI_UNC_MC_PMON_CTR_3_A, PCI_UNC_MC_PMON_CTR_3_B, PCI_IMC_DEVICE_0_CH_3, SNBEP_VALID_OPTIONS_MBOX},
+    {"MBOX3FIX", PMC76, MBOX3FIX, PCI_UNC_MC_PMON_FIXED_CTL, PCI_UNC_MC_PMON_FIXED_CTR_A, PCI_UNC_MC_PMON_FIXED_CTR_B, PCI_IMC_DEVICE_0_CH_3, EVENT_OPTION_NONE_MASK},
     /* QPI counters four 48bit  wide per port, split in two reads */
-    {"SBOX0C0",PMC73, SBOX0, PCI_UNC_QPI_PMON_CTL_0, PCI_UNC_QPI_PMON_CTR_0_A, PCI_UNC_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_0, SNBEP_VALID_OPTIONS_SBOX},
-    {"SBOX0C1",PMC74, SBOX0, PCI_UNC_QPI_PMON_CTL_1, PCI_UNC_QPI_PMON_CTR_1_A, PCI_UNC_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_0, SNBEP_VALID_OPTIONS_SBOX},
-    {"SBOX0C2",PMC75, SBOX0, PCI_UNC_QPI_PMON_CTL_2, PCI_UNC_QPI_PMON_CTR_2_A, PCI_UNC_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_0, SNBEP_VALID_OPTIONS_SBOX},
-    {"SBOX0C3",PMC76, SBOX0, PCI_UNC_QPI_PMON_CTL_3, PCI_UNC_QPI_PMON_CTR_3_A, PCI_UNC_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_0, SNBEP_VALID_OPTIONS_SBOX},
-    {"SBOX0FIX", PMC77, SBOX0FIX, 0, PCI_UNC_QPI_RATE_STATUS, 0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
-    {"SBOX1C0",PMC78, SBOX1, PCI_UNC_QPI_PMON_CTL_0, PCI_UNC_QPI_PMON_CTR_0_A, PCI_UNC_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_1, SNBEP_VALID_OPTIONS_SBOX},
-    {"SBOX1C1",PMC79, SBOX1, PCI_UNC_QPI_PMON_CTL_1, PCI_UNC_QPI_PMON_CTR_1_A, PCI_UNC_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_1, SNBEP_VALID_OPTIONS_SBOX},
-    {"SBOX1C2",PMC80, SBOX1, PCI_UNC_QPI_PMON_CTL_2, PCI_UNC_QPI_PMON_CTR_2_A, PCI_UNC_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_1, SNBEP_VALID_OPTIONS_SBOX},
-    {"SBOX1C3",PMC81, SBOX1, PCI_UNC_QPI_PMON_CTL_3, PCI_UNC_QPI_PMON_CTR_3_A, PCI_UNC_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_1, SNBEP_VALID_OPTIONS_SBOX},
-    {"SBOX1FIX", PMC82, SBOX1FIX, 0, PCI_UNC_QPI_RATE_STATUS, 0, PCI_QPI_MISC_DEVICE_PORT_1, EVENT_OPTION_NONE_MASK},
+    {"SBOX0C0",PMC77, SBOX0, PCI_UNC_QPI_PMON_CTL_0, PCI_UNC_QPI_PMON_CTR_0_A, PCI_UNC_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_0, SNBEP_VALID_OPTIONS_SBOX},
+    {"SBOX0C1",PMC78, SBOX0, PCI_UNC_QPI_PMON_CTL_1, PCI_UNC_QPI_PMON_CTR_1_A, PCI_UNC_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_0, SNBEP_VALID_OPTIONS_SBOX},
+    {"SBOX0C2",PMC79, SBOX0, PCI_UNC_QPI_PMON_CTL_2, PCI_UNC_QPI_PMON_CTR_2_A, PCI_UNC_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_0, SNBEP_VALID_OPTIONS_SBOX},
+    {"SBOX0C3",PMC80, SBOX0, PCI_UNC_QPI_PMON_CTL_3, PCI_UNC_QPI_PMON_CTR_3_A, PCI_UNC_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_0, SNBEP_VALID_OPTIONS_SBOX},
+    {"SBOX0FIX", PMC81, SBOX0FIX, 0, PCI_UNC_QPI_RATE_STATUS, 0, PCI_QPI_MISC_DEVICE_PORT_0, EVENT_OPTION_NONE_MASK},
+    {"SBOX1C0",PMC82, SBOX1, PCI_UNC_QPI_PMON_CTL_0, PCI_UNC_QPI_PMON_CTR_0_A, PCI_UNC_QPI_PMON_CTR_0_B, PCI_QPI_DEVICE_PORT_1, SNBEP_VALID_OPTIONS_SBOX},
+    {"SBOX1C1",PMC83, SBOX1, PCI_UNC_QPI_PMON_CTL_1, PCI_UNC_QPI_PMON_CTR_1_A, PCI_UNC_QPI_PMON_CTR_1_B, PCI_QPI_DEVICE_PORT_1, SNBEP_VALID_OPTIONS_SBOX},
+    {"SBOX1C2",PMC84, SBOX1, PCI_UNC_QPI_PMON_CTL_2, PCI_UNC_QPI_PMON_CTR_2_A, PCI_UNC_QPI_PMON_CTR_2_B, PCI_QPI_DEVICE_PORT_1, SNBEP_VALID_OPTIONS_SBOX},
+    {"SBOX1C3",PMC85, SBOX1, PCI_UNC_QPI_PMON_CTL_3, PCI_UNC_QPI_PMON_CTR_3_A, PCI_UNC_QPI_PMON_CTR_3_B, PCI_QPI_DEVICE_PORT_1, SNBEP_VALID_OPTIONS_SBOX},
+    {"SBOX1FIX", PMC86, SBOX1FIX, 0, PCI_UNC_QPI_RATE_STATUS, 0, PCI_QPI_MISC_DEVICE_PORT_1, EVENT_OPTION_NONE_MASK},
     /* BBOX or better known as Home Agent (HA) */
-    {"BBOX0",PMC83, BBOX0, PCI_UNC_HA_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_0, SNBEP_VALID_OPTIONS_BBOX},
-    {"BBOX1",PMC84, BBOX0, PCI_UNC_HA_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_0, SNBEP_VALID_OPTIONS_BBOX},
-    {"BBOX2",PMC85, BBOX0, PCI_UNC_HA_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_0, SNBEP_VALID_OPTIONS_BBOX},
-    {"BBOX3",PMC86, BBOX0, PCI_UNC_HA_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_0, SNBEP_VALID_OPTIONS_BBOX},
-    {"RBOX0C0", PMC87, RBOX0, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_0, SNBEP_VALID_OPTIONS_RBOX},
-    {"RBOX0C1", PMC88, RBOX0, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_0, SNBEP_VALID_OPTIONS_RBOX},
-    {"RBOX0C2", PMC89, RBOX0, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_0, SNBEP_VALID_OPTIONS_RBOX},
-    {"RBOX1C0", PMC90, RBOX1, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_1, SNBEP_VALID_OPTIONS_RBOX},
-    {"RBOX1C1", PMC91, RBOX1, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_1, SNBEP_VALID_OPTIONS_RBOX},
-    {"RBOX1C2", PMC92, RBOX1, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_1, SNBEP_VALID_OPTIONS_RBOX},
-    {"PBOX0", PMC93, PBOX, PCI_UNC_R2PCIE_PMON_CTL_0, PCI_UNC_R2PCIE_PMON_CTR_0_A, PCI_UNC_R2PCIE_PMON_CTR_0_B, PCI_R2PCIE_DEVICE, SNBEP_VALID_OPTIONS_PBOX},
-    {"PBOX1", PMC94, PBOX, PCI_UNC_R2PCIE_PMON_CTL_1, PCI_UNC_R2PCIE_PMON_CTR_1_A, PCI_UNC_R2PCIE_PMON_CTR_1_B, PCI_R2PCIE_DEVICE, SNBEP_VALID_OPTIONS_PBOX},
-    {"PBOX2", PMC95, PBOX, PCI_UNC_R2PCIE_PMON_CTL_2, PCI_UNC_R2PCIE_PMON_CTR_2_A, PCI_UNC_R2PCIE_PMON_CTR_2_B, PCI_R2PCIE_DEVICE, SNBEP_VALID_OPTIONS_PBOX},
-    {"PBOX3", PMC96, PBOX, PCI_UNC_R2PCIE_PMON_CTL_3, PCI_UNC_R2PCIE_PMON_CTR_3_A, PCI_UNC_R2PCIE_PMON_CTR_3_B, PCI_R2PCIE_DEVICE, SNBEP_VALID_OPTIONS_PBOX},
+    {"BBOX0",PMC87, BBOX0, PCI_UNC_HA_PMON_CTL_0, PCI_UNC_HA_PMON_CTR_0_A, PCI_UNC_HA_PMON_CTR_0_B, PCI_HA_DEVICE_0, SNBEP_VALID_OPTIONS_BBOX},
+    {"BBOX1",PMC88, BBOX0, PCI_UNC_HA_PMON_CTL_1, PCI_UNC_HA_PMON_CTR_1_A, PCI_UNC_HA_PMON_CTR_1_B, PCI_HA_DEVICE_0, SNBEP_VALID_OPTIONS_BBOX},
+    {"BBOX2",PMC89, BBOX0, PCI_UNC_HA_PMON_CTL_2, PCI_UNC_HA_PMON_CTR_2_A, PCI_UNC_HA_PMON_CTR_2_B, PCI_HA_DEVICE_0, SNBEP_VALID_OPTIONS_BBOX},
+    {"BBOX3",PMC90, BBOX0, PCI_UNC_HA_PMON_CTL_3, PCI_UNC_HA_PMON_CTR_3_A, PCI_UNC_HA_PMON_CTR_3_B, PCI_HA_DEVICE_0, SNBEP_VALID_OPTIONS_BBOX},
+    {"RBOX0C0", PMC91, RBOX0, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_0, SNBEP_VALID_OPTIONS_RBOX},
+    {"RBOX0C1", PMC92, RBOX0, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_0, SNBEP_VALID_OPTIONS_RBOX},
+    {"RBOX0C2", PMC93, RBOX0, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_0, SNBEP_VALID_OPTIONS_RBOX},
+    {"RBOX1C0", PMC94, RBOX1, PCI_UNC_R3QPI_PMON_CTL_0, PCI_UNC_R3QPI_PMON_CTR_0_A, PCI_UNC_R3QPI_PMON_CTR_0_B, PCI_R3QPI_DEVICE_LINK_1, SNBEP_VALID_OPTIONS_RBOX},
+    {"RBOX1C1", PMC95, RBOX1, PCI_UNC_R3QPI_PMON_CTL_1, PCI_UNC_R3QPI_PMON_CTR_1_A, PCI_UNC_R3QPI_PMON_CTR_1_B, PCI_R3QPI_DEVICE_LINK_1, SNBEP_VALID_OPTIONS_RBOX},
+    {"RBOX1C2", PMC96, RBOX1, PCI_UNC_R3QPI_PMON_CTL_2, PCI_UNC_R3QPI_PMON_CTR_2_A, PCI_UNC_R3QPI_PMON_CTR_2_B, PCI_R3QPI_DEVICE_LINK_1, SNBEP_VALID_OPTIONS_RBOX},
+    {"PBOX0", PMC97, PBOX, PCI_UNC_R2PCIE_PMON_CTL_0, PCI_UNC_R2PCIE_PMON_CTR_0_A, PCI_UNC_R2PCIE_PMON_CTR_0_B, PCI_R2PCIE_DEVICE, SNBEP_VALID_OPTIONS_PBOX},
+    {"PBOX1", PMC98, PBOX, PCI_UNC_R2PCIE_PMON_CTL_1, PCI_UNC_R2PCIE_PMON_CTR_1_A, PCI_UNC_R2PCIE_PMON_CTR_1_B, PCI_R2PCIE_DEVICE, SNBEP_VALID_OPTIONS_PBOX},
+    {"PBOX2", PMC99, PBOX, PCI_UNC_R2PCIE_PMON_CTL_2, PCI_UNC_R2PCIE_PMON_CTR_2_A, PCI_UNC_R2PCIE_PMON_CTR_2_B, PCI_R2PCIE_DEVICE, SNBEP_VALID_OPTIONS_PBOX},
+    {"PBOX3", PMC100, PBOX, PCI_UNC_R2PCIE_PMON_CTL_3, PCI_UNC_R2PCIE_PMON_CTR_3_A, PCI_UNC_R2PCIE_PMON_CTR_3_B, PCI_R2PCIE_DEVICE, SNBEP_VALID_OPTIONS_PBOX},
 };
 
 static BoxMap sandybridgeEP_box_map[NUM_UNITS] = {
diff --git a/src/includes/perfmon_sandybridgeEP_events.txt b/src/includes/perfmon_sandybridgeEP_events.txt
index 1ccccbb..ebac2fd 100644
--- a/src/includes/perfmon_sandybridgeEP_events.txt
+++ b/src/includes/perfmon_sandybridgeEP_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for Intel SandyBridge EP
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
@@ -80,22 +80,28 @@ UMASK_INT_MISC_RAT_STALL_CYCLES      0x40
 DEFAULT_OPTIONS_INT_MISC_RAT_STALL_COUNT EVENT_OPTION_EDGE=1
 UMASK_INT_MISC_RAT_STALL_COUNT       0x40
 
-EVENT_UOPS_ISSUED                     0x0E  PMC
-UMASK_UOPS_ISSUED_ANY                 0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_ACTIVE_CYCLES EVENT_OPTION_THRESHOLD=0x1
-UMASK_UOPS_ISSUED_ACTIVE_CYCLES       0x01
+EVENT_UOPS_ISSUED                0x0E  PMC
+UMASK_UOPS_ISSUED_ANY            0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_ISSUED_USED_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
-UMASK_UOPS_ISSUED_STALL_CYCLES        0x01
+UMASK_UOPS_ISSUED_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1
-UMASK_UOPS_ISSUED_TOTAL_CYCLES        0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_ANY EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_ANY            0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_ACTIVE_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_ACTIVE_CYCLES  0x01
+UMASK_UOPS_ISSUED_TOTAL_CYCLES   0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_ANYTHREAD=1
+UMASK_UOPS_ISSUED_CORE_USED_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_TOTAL_CYCLES   0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC 0x01
 
 EVENT_FP_COMP_OPS_EXE                          0x10   PMC
 UMASK_FP_COMP_OPS_EXE_X87                      0x01
@@ -201,13 +207,100 @@ EVENT_CACHE_LOCK_CYCLES                             0x63   PMC
 UMASK_CACHE_LOCK_CYCLES_SPLIT_LOCK_UC_LOCK_DURATION 0x01
 UMASK_CACHE_LOCK_CYCLES_CACHE_LOCK_DURATION         0x02
 
-EVENT_IDQ               0x79   PMC
-UMASK_IDQ_EMPTY         0x02
-UMASK_IDQ_MITE_UOPS     0x04
-UMASK_IDQ_DSB_UOPS      0x08
-UMASK_IDQ_MS_DSB_UOPS   0x10
-UMASK_IDQ_MS_MITE_UOPS  0x20
-UMASK_IDQ_MS_UOPS       0x30
+EVENT_IDQ                               0x79   PMC
+UMASK_IDQ_EMPTY                         0x02
+UMASK_IDQ_MITE_UOPS                     0x04
+UMASK_IDQ_DSB_UOPS                      0x08
+UMASK_IDQ_MS_DSB_UOPS                   0x10
+UMASK_IDQ_MS_MITE_UOPS                  0x20
+UMASK_IDQ_MS_UOPS                       0x30
+UMASK_IDQ_DSB_UOPS                      0x18
+UMASK_IDQ_MITE_ALL_UOPS                 0x24
+UMASK_IDQ_ALL_UOPS                      0x3C
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES         EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES                   0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES_1_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MITE_CYCLES_2_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MITE_CYCLES_3_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MITE_CYCLES_4_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES          EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES                    0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES_1_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_DSB_CYCLES_2_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_DSB_CYCLES_3_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_DSB_CYCLES_4_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES       EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES                 0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES_1_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_DSB_CYCLES_2_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_DSB_CYCLES_3_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_DSB_CYCLES_4_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_OCCUR        EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
+UMASK_IDQ_MS_DSB_OCCUR                  0x10
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES      EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES                0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES_1_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_MITE_CYCLES_2_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_MITE_CYCLES_3_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_MITE_CYCLES_4_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_CYCLES           EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES                     0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES_1_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_CYCLES_2_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_CYCLES_3_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_CYCLES_4_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_SWITCHES         EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
+UMASK_IDQ_MS_SWITCHES                   0x30
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS       0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_1_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_DSB_CYCLES_2_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_DSB_CYCLES_3_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS      0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_1_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_MITE_CYCLES_2_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_MITE_CYCLES_3_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_ANY_UOPS      0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_1_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_CYCLES_2_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_CYCLES_3_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_CYCLES_4_UOPS        0x3C
 
 EVENT_ICACHE                    0x80   PMC
 UMASK_ICACHE_HITS               0x01
@@ -255,10 +348,16 @@ UMASK_BR_MISP_EXEC_ALL_BRANCHES                        0xFF
 
 EVENT_IDQ_UOPS_NOT_DELIVERED                    0x9C   PMC
 UMASK_IDQ_UOPS_NOT_DELIVERED_CORE               0x01
-DEFAULT_OPTIONS_IDQ_UOPS_NOT_DELIVERED_CYCLES_0_UOPS EVENT_OPTION_THRESHOLD=0x4
-UMASK_IDQ_UOPS_NOT_DELIVERED_CYCLES_0_UOPS      0x01
-DEFAULT_OPTIONS_IDQ_UOPS_NOT_DELIVERED_CYCLES_LE_1_UOP EVENT_OPTION_THRESHOLD=0x3
-UMASK_IDQ_UOPS_NOT_DELIVERED_CYCLES_LE_1_UOP    0x01
+DEFAULT_OPTIONS_IDQ_UOPS_NOT_DELIVERED_CYCLES_0_UOPS_DELIV_CORE EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_UOPS_NOT_DELIVERED_CYCLES_0_UOPS_DELIV_CORE   0x01
+DEFAULT_OPTIONS_IDQ_UOPS_NOT_DELIVERED_CYCLES_LE_1_UOP_DELIV_CORE EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_UOPS_NOT_DELIVERED_CYCLES_LE_1_UOP_DELIV_CORE 0x01
+DEFAULT_OPTIONS_IDQ_UOPS_NOT_DELIVERED_CYCLES_LE_2_UOP_DELIV_CORE EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_UOPS_NOT_DELIVERED_CYCLES_LE_2_UOP_DELIV_CORE 0x01
+DEFAULT_OPTIONS_IDQ_UOPS_NOT_DELIVERED_CYCLES_LE_3_UOP_DELIV_CORE EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_UOPS_NOT_DELIVERED_CYCLES_LE_3_UOP_DELIV_CORE 0x01
+DEFAULT_OPTIONS_IDQ_UOPS_NOT_DELIVERED_CYCLES_FE_WAS_OK EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
+UMASK_IDQ_UOPS_NOT_DELIVERED_CYCLES_FE_WAS_OK           0x01
 
 EVENT_UOPS_DISPATCHED_PORT                  0xA1   PMC
 UMASK_UOPS_DISPATCHED_PORT_PORT_0           0x01
@@ -280,12 +379,29 @@ EVENT_RESOURCE_STALLS                 0xA2   PMC
 UMASK_RESOURCE_STALLS_ANY             0x01
 UMASK_RESOURCE_STALLS_LB              0x02
 UMASK_RESOURCE_STALLS_RS              0x04
-UMASK_RESOURCE_STALLS_B               0x08
+UMASK_RESOURCE_STALLS_SB              0x08
 UMASK_RESOURCE_STALLS_ROB             0x10
 UMASK_RESOURCE_STALLS_FCSW            0x20
 UMASK_RESOURCE_STALLS_MXCSR           0x40
 UMASK_RESOURCE_STALLS_OTHER           0x80
 
+EVENT_CYCLE_ACTIVITY                               0xA3   PMC
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_  
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L2_PENDING   EVENT_OPTION_THRESHOLD=0x01
+UMASK_CYCLE_ACTIVITY_CYCLES_L2_PENDING             0x01
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L2_PENDING   EVENT_OPTION_THRESHOLD=0x05
+UMASK_CYCLE_ACTIVITY_STALLS_L2_PENDING             0x05
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_NO_DISPATCH  EVENT_OPTION_THRESHOLD=0x04
+UMASK_CYCLE_ACTIVITY_CYCLES_NO_DISPATCH            0x04
+
+EVENT_CYCLE_ACTIVITY_CYCLES_L1D_PENDING                  0xA3   PMC2
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L1D_PENDING        EVENT_OPTION_THRESHOLD=0x02
+UMASK_CYCLE_ACTIVITY_CYCLES_L1D_PENDING                  0x02
+
+EVENT_CYCLE_ACTIVITY_STALLS_L1D_PENDING                  0xA3   PMC2
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L1D_PENDING        EVENT_OPTION_THRESHOLD=0x06
+UMASK_CYCLE_ACTIVITY_STALLS_L1D_PENDING                  0x06
+
 EVENT_DSB2MITE_SWITCHES                  0xAB   PMC
 UMASK_DSB2MITE_SWITCHES_COUNT            0x01
 UMASK_DSB2MITE_SWITCHES_PENALTY_CYCLES   0x02
@@ -320,6 +436,10 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
 UMASK_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC 0x01
 DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC 0x01
 UMASK_UOPS_EXECUTED_CORE                       0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
 UMASK_UOPS_EXECUTED_CORE_USED_CYCLES           0x02
@@ -335,6 +455,10 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC 0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC 0x02
 
 EVENT_OFFCORE_REQUESTS_BUFFER             0xB2  PMC
 UMASK_OFFCORE_REQUESTS_BUFFER_SQ_FULL     0x01
@@ -376,7 +500,7 @@ DEFAULT_OPTIONS_UOPS_RETIRED_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTIO
 UMASK_UOPS_RETIRED_TOTAL_CYCLES          0x01
 DEFAULT_OPTIONS_UOPS_RETIRED_CORE_ALL EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_RETIRED_CORE_ALL              0x01
-DEFAULT_OPTIONS_UOPS_RETIRED_CORE_RETIRE_SLOTS EVENT_OPTION_ANYTHREAD=1
+DEFAULT_OPTIONS__UOPS_RETIRED_CORE_RETIRE_SLOTS EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_RETIRED_CORE_RETIRE_SLOTS     0x02
 DEFAULT_OPTIONS_UOPS_RETIRED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_RETIRED_CORE_USED_CYCLES      0x01
@@ -384,6 +508,18 @@ DEFAULT_OPTIONS_UOPS_RETIRED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_
 UMASK_UOPS_RETIRED_CORE_STALL_CYCLES     0x01
 DEFAULT_OPTIONS_UOPS_RETIRED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_RETIRED_CORE_TOTAL_CYCLES     0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC 0x01
 
 EVENT_MACHINE_CLEARS                    0xC3  PMC
 UMASK_MACHINE_CLEARS_CYCLES             0x01
@@ -451,6 +587,10 @@ UMASK_MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_HIT          0x02
 UMASK_MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_HITM         0x04
 UMASK_MEM_LOAD_UOPS_LLC_HIT_RETIRED_XSNP_NONE         0x08
 
+EVENT_MEM_LOAD_UOPS_LLC_MISS_RETIRED                   0xD3   PMC
+UMASK_MEM_LOAD_UOPS_LLC_MISS_RETIRED_LOCAL_DRAM        0x01
+UMASK_MEM_LOAD_UOPS_LLC_MISS_RETIRED_REMOTE_DRAM       0x0C
+
 EVENT_MEM_LOAD_UOPS_MISC_RETIRED               0xD4   PMC
 UMASK_MEM_LOAD_UOPS_MISC_RETIRED_LLC_MISS      0x02
 
diff --git a/src/includes/perfmon_sandybridge_counters.h b/src/includes/perfmon_sandybridge_counters.h
index 7dd83d4..e571d7a 100644
--- a/src/includes/perfmon_sandybridge_counters.h
+++ b/src/includes/perfmon_sandybridge_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description: Counter header file of perfmon module for Intel Sandy Bridge.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -30,9 +30,9 @@
  */
 
 
-#define NUM_COUNTERS_CORE_SANDYBRIDGE 8
+#define NUM_COUNTERS_CORE_SANDYBRIDGE 12
 #define NUM_COUNTERS_UNCORE_SANDYBRIDGE 15
-#define NUM_COUNTERS_SANDYBRIDGE 23
+#define NUM_COUNTERS_SANDYBRIDGE 27
 
 #define SNB_VALID_OPTIONS_FIXED EVENT_OPTION_COUNT_KERNEL_MASK|EVENT_OPTION_ANYTHREAD_MASK
 #define SNB_VALID_OPTIONS_PMC EVENT_OPTION_EDGE_MASK|EVENT_OPTION_COUNT_KERNEL_MASK| \
@@ -51,24 +51,29 @@ static RegisterMap sandybridge_counter_map[NUM_COUNTERS_SANDYBRIDGE] = {
     {"PMC1", PMC4, PMC, MSR_PERFEVTSEL1, MSR_PMC1, 0, 0, SNB_VALID_OPTIONS_PMC},
     {"PMC2", PMC5, PMC, MSR_PERFEVTSEL2, MSR_PMC2, 0, 0, SNB_VALID_OPTIONS_PMC},
     {"PMC3", PMC6, PMC, MSR_PERFEVTSEL3, MSR_PMC3, 0, 0, SNB_VALID_OPTIONS_PMC},
+    /* Additional PMC Counters: 4 48bit wide if HyperThreading is disabled*/
+    {"PMC4", PMC7, PMC, MSR_PERFEVTSEL4, MSR_PMC4, 0, 0, SNB_VALID_OPTIONS_PMC},
+    {"PMC5", PMC8, PMC, MSR_PERFEVTSEL5, MSR_PMC5, 0, 0, SNB_VALID_OPTIONS_PMC},
+    {"PMC6", PMC9, PMC, MSR_PERFEVTSEL6, MSR_PMC6, 0, 0, SNB_VALID_OPTIONS_PMC},
+    {"PMC7", PMC10, PMC, MSR_PERFEVTSEL7, MSR_PMC7, 0, 0, SNB_VALID_OPTIONS_PMC},
     /* Temperature Sensor*/
-    {"TMP0", PMC7, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"TMP0", PMC11, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
     /* RAPL counters */
-    {"PWR0", PMC8, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR1", PMC9, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR2", PMC10, POWER, 0, MSR_PP1_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR3", PMC11, POWER, 0, MSR_DRAM_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"CBOX0C0", PMC12, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL0, MSR_UNC_CBO_0_CTR0, 0, 0, SNB_VALID_OPTIONS_CBOX},
-    {"CBOX0C1", PMC13, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL1, MSR_UNC_CBO_0_CTR1, 0, 0, SNB_VALID_OPTIONS_CBOX},
-    {"CBOX1C0", PMC14, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL0, MSR_UNC_CBO_1_CTR0, 0, 0, SNB_VALID_OPTIONS_CBOX},
-    {"CBOX1C1", PMC15, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL1, MSR_UNC_CBO_1_CTR1, 0, 0, SNB_VALID_OPTIONS_CBOX},
-    {"CBOX2C0", PMC16, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL0, MSR_UNC_CBO_2_CTR0, 0, 0, SNB_VALID_OPTIONS_CBOX},
-    {"CBOX2C1", PMC17, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL1, MSR_UNC_CBO_2_CTR1, 0, 0, SNB_VALID_OPTIONS_CBOX},
-    {"CBOX3C0", PMC18, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL0, MSR_UNC_CBO_3_CTR0, 0, 0, SNB_VALID_OPTIONS_CBOX},
-    {"CBOX3C1", PMC19, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL1, MSR_UNC_CBO_3_CTR1, 0, 0, SNB_VALID_OPTIONS_CBOX},
-    {"UBOX0", PMC20, UBOX, MSR_UNC_ARB_PERFEVTSEL0, MSR_UNC_ARB_CTR0, 0, 0, SNB_VALID_OPTIONS_UBOX},
-    {"UBOX1", PMC21, UBOX, MSR_UNC_ARB_PERFEVTSEL1, MSR_UNC_ARB_CTR1, 0, 0, SNB_VALID_OPTIONS_UBOX},
-    {"UBOXFIX", PMC22, UBOXFIX, MSR_UNC_PERF_FIXED_CTRL, MSR_UNC_PERF_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR0", PMC12, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR1", PMC13, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR2", PMC14, POWER, 0, MSR_PP1_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR3", PMC15, POWER, 0, MSR_DRAM_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"CBOX0C0", PMC16, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL0, MSR_UNC_CBO_0_CTR0, 0, 0, SNB_VALID_OPTIONS_CBOX},
+    {"CBOX0C1", PMC17, CBOX0, MSR_UNC_CBO_0_PERFEVTSEL1, MSR_UNC_CBO_0_CTR1, 0, 0, SNB_VALID_OPTIONS_CBOX},
+    {"CBOX1C0", PMC18, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL0, MSR_UNC_CBO_1_CTR0, 0, 0, SNB_VALID_OPTIONS_CBOX},
+    {"CBOX1C1", PMC19, CBOX1, MSR_UNC_CBO_1_PERFEVTSEL1, MSR_UNC_CBO_1_CTR1, 0, 0, SNB_VALID_OPTIONS_CBOX},
+    {"CBOX2C0", PMC20, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL0, MSR_UNC_CBO_2_CTR0, 0, 0, SNB_VALID_OPTIONS_CBOX},
+    {"CBOX2C1", PMC21, CBOX2, MSR_UNC_CBO_2_PERFEVTSEL1, MSR_UNC_CBO_2_CTR1, 0, 0, SNB_VALID_OPTIONS_CBOX},
+    {"CBOX3C0", PMC22, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL0, MSR_UNC_CBO_3_CTR0, 0, 0, SNB_VALID_OPTIONS_CBOX},
+    {"CBOX3C1", PMC23, CBOX3, MSR_UNC_CBO_3_PERFEVTSEL1, MSR_UNC_CBO_3_CTR1, 0, 0, SNB_VALID_OPTIONS_CBOX},
+    {"UBOX0", PMC24, UBOX, MSR_UNC_ARB_PERFEVTSEL0, MSR_UNC_ARB_CTR0, 0, 0, SNB_VALID_OPTIONS_UBOX},
+    {"UBOX1", PMC25, UBOX, MSR_UNC_ARB_PERFEVTSEL1, MSR_UNC_ARB_CTR1, 0, 0, SNB_VALID_OPTIONS_UBOX},
+    {"UBOXFIX", PMC26, UBOXFIX, MSR_UNC_PERF_FIXED_CTRL, MSR_UNC_PERF_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
 };
 
 static BoxMap sandybridge_box_map[NUM_UNITS] = {
diff --git a/src/includes/perfmon_sandybridge_events.txt b/src/includes/perfmon_sandybridge_events.txt
index 8cb10dd..b1cf8cf 100644
--- a/src/includes/perfmon_sandybridge_events.txt
+++ b/src/includes/perfmon_sandybridge_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for Intel SandyBridge
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
@@ -80,22 +80,28 @@ UMASK_INT_MISC_RAT_STALL_CYCLES      0x40
 DEFAULT_OPTIONS_INT_MISC_RAT_STALL_COUNT EVENT_OPTION_EDGE=1
 UMASK_INT_MISC_RAT_STALL_COUNT       0x40
 
-EVENT_UOPS_ISSUED                     0x0E  PMC
-UMASK_UOPS_ISSUED_ANY                 0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_ACTIVE_CYCLES EVENT_OPTION_THRESHOLD=0x1
-UMASK_UOPS_ISSUED_ACTIVE_CYCLES       0x01
+EVENT_UOPS_ISSUED                0x0E  PMC
+UMASK_UOPS_ISSUED_ANY            0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_ISSUED_USED_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
-UMASK_UOPS_ISSUED_STALL_CYCLES        0x01
+UMASK_UOPS_ISSUED_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1
-UMASK_UOPS_ISSUED_TOTAL_CYCLES        0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_ANY EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_ANY            0x01
-DEFAULT_OPTIONS_UOPS_ISSUED_CORE_ACTIVE_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_ISSUED_CORE_ACTIVE_CYCLES  0x01
+UMASK_UOPS_ISSUED_TOTAL_CYCLES   0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_ANYTHREAD=1
+UMASK_UOPS_ISSUED_CORE_USED_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_TOTAL_CYCLES   0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC 0x01
 
 EVENT_FP_COMP_OPS_EXE                          0x10   PMC
 UMASK_FP_COMP_OPS_EXE_X87                      0x01
@@ -201,13 +207,100 @@ EVENT_CACHE_LOCK_CYCLES                             0x63   PMC
 UMASK_CACHE_LOCK_CYCLES_SPLIT_LOCK_UC_LOCK_DURATION 0x01
 UMASK_CACHE_LOCK_CYCLES_CACHE_LOCK_DURATION         0x02
 
-EVENT_IDQ               0x79   PMC
-UMASK_IDQ_EMPTY         0x02
-UMASK_IDQ_MITE_UOPS     0x04
-UMASK_IDQ_DSB_UOPS      0x08
-UMASK_IDQ_MS_DSB_UOPS   0x10
-UMASK_IDQ_MS_MITE_UOPS  0x20
-UMASK_IDQ_MS_UOPS       0x30
+EVENT_IDQ                               0x79   PMC
+UMASK_IDQ_EMPTY                         0x02
+UMASK_IDQ_MITE_UOPS                     0x04
+UMASK_IDQ_DSB_UOPS                      0x08
+UMASK_IDQ_MS_DSB_UOPS                   0x10
+UMASK_IDQ_MS_MITE_UOPS                  0x20
+UMASK_IDQ_MS_UOPS                       0x30
+UMASK_IDQ_DSB_UOPS                      0x18
+UMASK_IDQ_MITE_ALL_UOPS                 0x24
+UMASK_IDQ_ALL_UOPS                      0x3C
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES         EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES                   0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES_1_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MITE_CYCLES_2_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MITE_CYCLES_3_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MITE_CYCLES_4_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES          EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES                    0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES_1_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_DSB_CYCLES_2_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_DSB_CYCLES_3_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_DSB_CYCLES_4_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES       EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES                 0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES_1_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_DSB_CYCLES_2_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_DSB_CYCLES_3_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_DSB_CYCLES_4_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_OCCUR        EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
+UMASK_IDQ_MS_DSB_OCCUR                  0x10
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES      EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES                0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES_1_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_MITE_CYCLES_2_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_MITE_CYCLES_3_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_MITE_CYCLES_4_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_CYCLES           EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES                     0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES_1_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_CYCLES_2_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_CYCLES_3_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_CYCLES_4_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_SWITCHES         EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
+UMASK_IDQ_MS_SWITCHES                   0x30
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS       0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_1_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_DSB_CYCLES_2_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_DSB_CYCLES_3_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS      0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_1_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_MITE_CYCLES_2_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_MITE_CYCLES_3_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_ANY_UOPS      0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_1_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_CYCLES_2_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_CYCLES_3_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_CYCLES_4_UOPS        0x3C
 
 EVENT_ICACHE                    0x80   PMC
 UMASK_ICACHE_HITS               0x01
@@ -276,12 +369,29 @@ EVENT_RESOURCE_STALLS                 0xA2   PMC
 UMASK_RESOURCE_STALLS_ANY             0x01
 UMASK_RESOURCE_STALLS_LB              0x02
 UMASK_RESOURCE_STALLS_RS              0x04
-UMASK_RESOURCE_STALLS_B               0x08
+UMASK_RESOURCE_STALLS_SB              0x08
 UMASK_RESOURCE_STALLS_ROB             0x10
 UMASK_RESOURCE_STALLS_FCSW            0x20
 UMASK_RESOURCE_STALLS_MXCSR           0x40
 UMASK_RESOURCE_STALLS_OTHER           0x80
 
+EVENT_CYCLE_ACTIVITY                               0xA3   PMC
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L2_PENDING   EVENT_OPTION_THRESHOLD=0x01
+UMASK_CYCLE_ACTIVITY_CYCLES_L2_PENDING             0x01
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L2_PENDING   EVENT_OPTION_THRESHOLD=0x05
+UMASK_CYCLE_ACTIVITY_STALLS_L2_PENDING             0x05
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_NO_DISPATCH  EVENT_OPTION_THRESHOLD=0x04
+UMASK_CYCLE_ACTIVITY_CYCLES_NO_DISPATCH            0x04
+
+EVENT_CYCLE_ACTIVITY_CYCLES_L1D_PENDING                  0xA3   PMC2
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L1D_PENDING        EVENT_OPTION_THRESHOLD=0x02
+UMASK_CYCLE_ACTIVITY_CYCLES_L1D_PENDING                  0x02
+
+EVENT_CYCLE_ACTIVITY_STALLS_L1D_PENDING                  0xA3   PMC2
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L1D_PENDING        EVENT_OPTION_THRESHOLD=0x06
+UMASK_CYCLE_ACTIVITY_STALLS_L1D_PENDING                  0x06
+
 EVENT_DSB2MITE_SWITCHES                  0xAB   PMC
 UMASK_DSB2MITE_SWITCHES_COUNT            0x01
 UMASK_DSB2MITE_SWITCHES_PENALTY_CYCLES   0x02
@@ -316,6 +426,10 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
 UMASK_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC 0x01
 DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC 0x01
 UMASK_UOPS_EXECUTED_CORE                       0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
 UMASK_UOPS_EXECUTED_CORE_USED_CYCLES           0x02
@@ -331,6 +445,10 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC 0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC 0x02
 
 EVENT_OFFCORE_REQUESTS_BUFFER             0xB2  PMC
 UMASK_OFFCORE_REQUESTS_BUFFER_SQ_FULL     0x01
@@ -380,6 +498,19 @@ DEFAULT_OPTIONS_UOPS_RETIRED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_
 UMASK_UOPS_RETIRED_CORE_STALL_CYCLES     0x01
 DEFAULT_OPTIONS_UOPS_RETIRED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_RETIRED_CORE_TOTAL_CYCLES     0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC 0x01
+
 
 EVENT_MACHINE_CLEARS                    0xC3  PMC
 UMASK_MACHINE_CLEARS_MEMORY_ORDERING    0x02
diff --git a/src/includes/perfmon_silvermont.h b/src/includes/perfmon_silvermont.h
index 3512b99..d6e2cab 100644
--- a/src/includes/perfmon_silvermont.h
+++ b/src/includes/perfmon_silvermont.h
@@ -5,13 +5,13 @@
  *
  *      Description:  Header file of perfmon module for Intel Atom (Silvermont)
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -173,7 +173,7 @@ int perfmon_setupCountersThread_silvermont(
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_FIXED_CTR_CTRL, 0x0ULL));
@@ -182,7 +182,7 @@ int perfmon_setupCountersThread_silvermont(
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -235,7 +235,7 @@ int perfmon_startCountersThread_silvermont(int thread_id, PerfmonEventSet* event
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -271,7 +271,7 @@ int perfmon_startCountersThread_silvermont(int thread_id, PerfmonEventSet* event
         }
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, flags));
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, UNFREEZE_PMC_OR_FIXED)
@@ -293,7 +293,7 @@ int perfmon_stopCountersThread_silvermont(int thread_id, PerfmonEventSet* eventS
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_OR_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
@@ -304,7 +304,7 @@ int perfmon_stopCountersThread_silvermont(int thread_id, PerfmonEventSet* eventS
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE) 
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -377,7 +377,7 @@ int perfmon_readCountersThread_silvermont(int thread_id, PerfmonEventSet* eventS
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, &pmc_flags));
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_OR_FIXED)
@@ -389,7 +389,7 @@ int perfmon_readCountersThread_silvermont(int thread_id, PerfmonEventSet* eventS
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -447,7 +447,7 @@ int perfmon_readCountersThread_silvermont(int thread_id, PerfmonEventSet* eventS
             eventSet->events[i].threadCounter[thread_id].counterData = field64(counter_result, 0, box_map[type].regWidth);
         }
     }
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, pmc_flags));
     }
@@ -474,7 +474,7 @@ int perfmon_finalizeCountersThread_silvermont(int thread_id, PerfmonEventSet* ev
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -520,7 +520,10 @@ int perfmon_finalizeCountersThread_silvermont(int thread_id, PerfmonEventSet* ev
         }
         eventSet->events[i].threadCounter[thread_id].init = FALSE;
     }
-    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
-    CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core));
+    if (MEASURE_CORE(eventSet))
+    {
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
+        CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core));
+    }
     return 0;
 }
diff --git a/src/includes/perfmon_silvermont_counters.h b/src/includes/perfmon_silvermont_counters.h
index 6428d7f..c16edfc 100644
--- a/src/includes/perfmon_silvermont_counters.h
+++ b/src/includes/perfmon_silvermont_counters.h
@@ -5,13 +5,13 @@
  *
  *      Description: Counter header file of perfmon module for Intel Atom (Silvermont)
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_silvermont_events.txt b/src/includes/perfmon_silvermont_events.txt
index a8a9bea..1c222d3 100644
--- a/src/includes/perfmon_silvermont_events.txt
+++ b/src/includes/perfmon_silvermont_events.txt
@@ -4,13 +4,13 @@
 #
 #      Description:  Event list for Intel Atom (Silvermont)
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_skylake.h b/src/includes/perfmon_skylake.h
index 28363c5..f01a4a7 100644
--- a/src/includes/perfmon_skylake.h
+++ b/src/includes/perfmon_skylake.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Header File of perfmon module for Intel Skylake.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -246,14 +246,14 @@ int perfmon_setupCounterThread_skylake(
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_AND_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, 0xC00000070000000F));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PEBS_ENABLE, 0x0ULL));
     }
-    if (haveLock && eventSet->regTypeMask & ~(0xFULL))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_V4_UNC_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_UBOXFIX)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_V4_UNC_PERF_GLOBAL_CTRL, 0x0ULL));
@@ -263,7 +263,7 @@ int perfmon_setupCounterThread_skylake(
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -332,7 +332,7 @@ int perfmon_startCountersThread_skylake(int thread_id, PerfmonEventSet* eventSet
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -394,13 +394,13 @@ int perfmon_startCountersThread_skylake(int thread_id, PerfmonEventSet* eventSet
         }
     }
 
-    if ((haveLock) && (eventSet->regTypeMask & ~(0xFULL)))
+    if ((haveLock) && MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_V4_UNC_PERF_GLOBAL_CTRL, uflags|(1ULL<<29), UNFREEZE_UBOXFIX)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_V4_UNC_PERF_GLOBAL_CTRL, uflags|(1ULL<<29)));
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, LLU_CAST (1ULL<<63)|(1ULL<<62)|flags, CLEAR_PMC_AND_FIXED_OVERFLOW)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, (1ULL<<63)|(1ULL<<62)|flags));
@@ -459,12 +459,12 @@ int perfmon_stopCountersThread_skylake(int thread_id, PerfmonEventSet* eventSet)
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_PMC_AND_FIXED)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
     }
-    if ((haveLock) && (eventSet->regTypeMask & ~(0xFULL)))
+    if ((haveLock) && MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_V4_UNC_PERF_GLOBAL_CTRL, 0x0ULL, FREEZE_UBOXFIX)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_V4_UNC_PERF_GLOBAL_CTRL, 0x0ULL));
@@ -475,7 +475,7 @@ int perfmon_stopCountersThread_skylake(int thread_id, PerfmonEventSet* eventSet)
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -503,7 +503,7 @@ int perfmon_stopCountersThread_skylake(int thread_id, PerfmonEventSet* eventSet)
                     break;
 
                 case POWER:
-                    if (haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(POWER)))
+                    if (haveLock)
                     {
                         CHECK_POWER_READ_ERROR(power_read(cpu_id, counter1, (uint32_t*)&counter_result));
                         VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, STOP_POWER)
@@ -552,8 +552,9 @@ int perfmon_stopCountersThread_skylake(int thread_id, PerfmonEventSet* eventSet)
                     break;
             }
         }
+        eventSet->events[i].threadCounter[thread_id].init = FALSE;
     }
-    if ((haveLock) && (eventSet->regTypeMask & ~(0xFULL)))
+    if ((haveLock) && MEASURE_UNCORE(eventSet))
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_V4_UNC_PERF_GLOBAL_STATUS, &counter_result));
         if (counter_result != 0x0ULL)
@@ -580,7 +581,7 @@ int perfmon_readCountersThread_skylake(int thread_id, PerfmonEventSet* eventSet)
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, &flags));
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, SAFE_PMC_FLAGS)
@@ -588,7 +589,7 @@ int perfmon_readCountersThread_skylake(int thread_id, PerfmonEventSet* eventSet)
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, RESET_PMC_FLAGS)
     }
 
-    if ((haveLock) && (eventSet->regTypeMask & ~(0xFULL)))
+    if ((haveLock) && MEASURE_UNCORE(eventSet))
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_V4_UNC_PERF_GLOBAL_CTRL, &uflags));
         VERBOSEPRINTREG(cpu_id, MSR_V4_UNC_PERF_GLOBAL_CTRL, LLU_CAST uflags, SAFE_UNCORE_FLAGS)
@@ -602,7 +603,7 @@ int perfmon_readCountersThread_skylake(int thread_id, PerfmonEventSet* eventSet)
         {
             counter_result= 0x0ULL;
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -629,7 +630,7 @@ int perfmon_readCountersThread_skylake(int thread_id, PerfmonEventSet* eventSet)
                     break;
 
                 case POWER:
-                    if (haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(POWER)))
+                    if (haveLock)
                     {
                         CHECK_POWER_READ_ERROR(power_read(cpu_id, counter1, (uint32_t*)&counter_result));
                         VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, STOP_POWER)
@@ -680,7 +681,7 @@ int perfmon_readCountersThread_skylake(int thread_id, PerfmonEventSet* eventSet)
             }
         }
     }
-    if ((haveLock) && (eventSet->regTypeMask & ~(0xFULL)))
+    if ((haveLock) && MEASURE_UNCORE(eventSet))
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_V4_UNC_PERF_GLOBAL_STATUS, &counter_result));
         if (counter_result != 0x0ULL)
@@ -691,7 +692,7 @@ int perfmon_readCountersThread_skylake(int thread_id, PerfmonEventSet* eventSet)
         VERBOSEPRINTREG(cpu_id, MSR_V4_UNC_PERF_GLOBAL_CTRL, uflags|(1ULL<<29), RESTORE_UNCORE_FLAGS)
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST flags, RESTORE_PMC_FLAGS)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, flags));
@@ -720,7 +721,7 @@ int perfmon_finalizeCountersThread_skylake(int thread_id, PerfmonEventSet* event
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -769,7 +770,7 @@ int perfmon_finalizeCountersThread_skylake(int thread_id, PerfmonEventSet* event
         }
         eventSet->events[i].threadCounter[thread_id].init = FALSE;
     }
-    if (haveLock && eventSet->regTypeMask & ~(0xFULL))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_V4_UNC_PERF_GLOBAL_STATUS, LLU_CAST 0x0ULL, CLEAR_UNCORE_STATUS)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_V4_UNC_PERF_GLOBAL_STATUS, 0x0ULL));
@@ -777,7 +778,7 @@ int perfmon_finalizeCountersThread_skylake(int thread_id, PerfmonEventSet* event
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_V4_UNC_PERF_GLOBAL_CTRL, 0x0ULL));
         for (int i=UNCORE;i<NUM_UNITS;i++)
         {
-            if ((eventSet->regTypeMask & (REG_TYPE_MASK(i))) && box_map[i].ctrlRegister != 0x0)
+            if (TESTTYPE(eventSet, i) && box_map[i].ctrlRegister != 0x0)
             {
                 VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL, CLEAR_UNCORE_BOX_CTRL);
                 HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
@@ -795,7 +796,7 @@ int perfmon_finalizeCountersThread_skylake(int thread_id, PerfmonEventSet* event
         }
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, LLU_CAST ovf_values_core, CLEAR_GLOBAL_OVF)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core));
diff --git a/src/includes/perfmon_skylake_counters.h b/src/includes/perfmon_skylake_counters.h
index 2c2630d..afba36b 100644
--- a/src/includes/perfmon_skylake_counters.h
+++ b/src/includes/perfmon_skylake_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Counter Header File of perfmon module for Intel Skylake.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -29,8 +29,8 @@
  * =======================================================================================
  */
 
-#define NUM_COUNTERS_SKYLAKE 24
-#define NUM_COUNTERS_CORE_SKYLAKE 8
+#define NUM_COUNTERS_SKYLAKE 28
+#define NUM_COUNTERS_CORE_SKYLAKE 12
 #define NUM_COUNTERS_UNCORE_SKYLAKE 24
 
 #define SKL_VALID_OPTIONS_FIXED EVENT_OPTION_ANYTHREAD_MASK|EVENT_OPTION_COUNT_KERNEL_MASK
@@ -49,26 +49,31 @@ static RegisterMap skylake_counter_map[NUM_COUNTERS_SKYLAKE] = {
     {"PMC1", PMC4, PMC, MSR_PERFEVTSEL1, MSR_PMC1, 0, 0, SKL_VALID_OPTIONS_PMC},
     {"PMC2", PMC5, PMC, MSR_PERFEVTSEL2, MSR_PMC2, 0, 0, SKL_VALID_OPTIONS_PMC|EVENT_OPTION_IN_TRANS_ABORT_MASK},
     {"PMC3", PMC6, PMC, MSR_PERFEVTSEL3, MSR_PMC3, 0, 0, SKL_VALID_OPTIONS_PMC},
+    /* Additional PMC Counters if HyperThreading is not active: 4 48bit wide */
+    {"PMC4", PMC7, PMC, MSR_PERFEVTSEL4, MSR_PMC4, 0, 0, SKL_VALID_OPTIONS_PMC},
+    {"PMC5", PMC8, PMC, MSR_PERFEVTSEL5, MSR_PMC5, 0, 0, SKL_VALID_OPTIONS_PMC},
+    {"PMC6", PMC9, PMC, MSR_PERFEVTSEL6, MSR_PMC6, 0, 0, SKL_VALID_OPTIONS_PMC|EVENT_OPTION_IN_TRANS_ABORT_MASK},
+    {"PMC7", PMC10, PMC, MSR_PERFEVTSEL7, MSR_PMC7, 0, 0, SKL_VALID_OPTIONS_PMC},
     /* Temperature Sensor*/
-    {"TMP0", PMC7, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"TMP0", PMC11, THERMAL, 0, IA32_THERM_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
     /* RAPL counters */
-    {"PWR0", PMC8, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR1", PMC9, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR2", PMC10, POWER, 0, MSR_PP1_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR3", PMC11, POWER, 0, MSR_DRAM_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
-    {"PWR4", PMC12, POWER, 0, MSR_PLATFORM_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR0", PMC12, POWER, 0, MSR_PKG_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR1", PMC13, POWER, 0, MSR_PP0_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR2", PMC14, POWER, 0, MSR_PP1_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR3", PMC15, POWER, 0, MSR_DRAM_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
+    {"PWR4", PMC16, POWER, 0, MSR_PLATFORM_ENERGY_STATUS,  0, 0, EVENT_OPTION_NONE_MASK},
     /* Test */
-    {"UBOXFIX", PMC13, UBOXFIX, MSR_UNC_PERF_FIXED_CTRL, MSR_UNC_PERF_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
-    {"UBOX0", PMC14, UBOX, MSR_V4_ARB_PERF_FIXED_CTRL0, MSR_V4_ARB_PERF_FIXED_CTR0, 0, 0, SKL_VALID_OPTIONS_UBOX},
-    {"UBOX1", PMC15, UBOX, MSR_V4_ARB_PERF_FIXED_CTRL1, MSR_V4_ARB_PERF_FIXED_CTR1, 0, 0, SKL_VALID_OPTIONS_UBOX},
-    {"CBOX0C0", PMC16, CBOX0, MSR_V4_C0_PERF_FIXED_CTRL0, MSR_V4_C0_PERF_FIXED_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
-    {"CBOX0C1", PMC17, CBOX0, MSR_V4_C0_PERF_FIXED_CTRL1, MSR_V4_C0_PERF_FIXED_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
-    {"CBOX1C0", PMC18, CBOX1, MSR_V4_C1_PERF_FIXED_CTRL0, MSR_V4_C1_PERF_FIXED_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
-    {"CBOX1C1", PMC19, CBOX1, MSR_V4_C1_PERF_FIXED_CTRL1, MSR_V4_C1_PERF_FIXED_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
-    {"CBOX2C0", PMC20, CBOX2, MSR_V4_C2_PERF_FIXED_CTRL0, MSR_V4_C2_PERF_FIXED_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
-    {"CBOX2C1", PMC21, CBOX2, MSR_V4_C2_PERF_FIXED_CTRL1, MSR_V4_C2_PERF_FIXED_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
-    {"CBOX3C0", PMC22, CBOX3, MSR_V4_C3_PERF_FIXED_CTRL0, MSR_V4_C3_PERF_FIXED_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
-    {"CBOX3C1", PMC23, CBOX3, MSR_V4_C3_PERF_FIXED_CTRL1, MSR_V4_C3_PERF_FIXED_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
+    {"UBOXFIX", PMC17, UBOXFIX, MSR_UNC_PERF_FIXED_CTRL, MSR_UNC_PERF_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
+    {"UBOX0", PMC18, UBOX, MSR_V4_ARB_PERF_FIXED_CTRL0, MSR_V4_ARB_PERF_FIXED_CTR0, 0, 0, SKL_VALID_OPTIONS_UBOX},
+    {"UBOX1", PMC19, UBOX, MSR_V4_ARB_PERF_FIXED_CTRL1, MSR_V4_ARB_PERF_FIXED_CTR1, 0, 0, SKL_VALID_OPTIONS_UBOX},
+    {"CBOX0C0", PMC20, CBOX0, MSR_V4_C0_PERF_FIXED_CTRL0, MSR_V4_C0_PERF_FIXED_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
+    {"CBOX0C1", PMC21, CBOX0, MSR_V4_C0_PERF_FIXED_CTRL1, MSR_V4_C0_PERF_FIXED_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
+    {"CBOX1C0", PMC22, CBOX1, MSR_V4_C1_PERF_FIXED_CTRL0, MSR_V4_C1_PERF_FIXED_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
+    {"CBOX1C1", PMC23, CBOX1, MSR_V4_C1_PERF_FIXED_CTRL1, MSR_V4_C1_PERF_FIXED_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
+    {"CBOX2C0", PMC24, CBOX2, MSR_V4_C2_PERF_FIXED_CTRL0, MSR_V4_C2_PERF_FIXED_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
+    {"CBOX2C1", PMC25, CBOX2, MSR_V4_C2_PERF_FIXED_CTRL1, MSR_V4_C2_PERF_FIXED_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
+    {"CBOX3C0", PMC26, CBOX3, MSR_V4_C3_PERF_FIXED_CTRL0, MSR_V4_C3_PERF_FIXED_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
+    {"CBOX3C1", PMC27, CBOX3, MSR_V4_C3_PERF_FIXED_CTRL1, MSR_V4_C3_PERF_FIXED_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
 };
 
 
diff --git a/src/includes/perfmon_skylake_events.txt b/src/includes/perfmon_skylake_events.txt
index 35aede4..1948bde 100644
--- a/src/includes/perfmon_skylake_events.txt
+++ b/src/includes/perfmon_skylake_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for Intel Skylake
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
@@ -80,34 +80,9 @@ UMASK_BACLEARS_ANY                  0x01
 EVENT_ITLB_FLUSH                    0xAE PMC
 UMASK_ITLB_FLUSH                    0x01
 
-EVENT_LSD_UOPS                      0xA8 PMC
-UMASK_LSD_UOPS                      0x01
-
 EVENT_ILD_STALL_LCP                 0x87 PMC
 UMASK_ILD_STALL_LCP                 0x01
 
-EVENT_IDQ                           0x79 PMC
-UMASK_IDQ_MITE_UOPS                 0x04
-UMASK_IDQ_DSB_UOPS                  0x08
-UMASK_IDQ_MS_MITE_UOPS              0x20
-
-EVENT_IDQ                           0x79 PMC
-DEFAULT_OPTIONS_IDQ_MS_CYCLES       EVENT_OPTION_THRESHOLD=0x1
-UMASK_IDQ_MS_CYCLES                 0x30
-DEFAULT_OPTIONS_IDQ_MITE_CYCLES     EVENT_OPTION_THRESHOLD=0x1
-UMASK_IDQ_MITE_CYCLES               0x04
-DEFAULT_OPTIONS_IDQ_DSB_CYCLES      EVENT_OPTION_THRESHOLD=0x1
-UMASK_IDQ_DSB_CYCLES                0x08
-DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES   EVENT_OPTION_THRESHOLD=0x1
-UMASK_IDQ_MS_DSB_CYCLES             0x10
-DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_4_UOPS   EVENT_OPTION_THRESHOLD=0x4
-UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS     0x18
-DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_ANY_UOPS   EVENT_OPTION_THRESHOLD=0x1
-UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS   0x18
-DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_4_UOPS   EVENT_OPTION_THRESHOLD=0x4
-UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS    0x24
-DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS   EVENT_OPTION_THRESHOLD=0x1
-UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS  0x24
 
 EVENT_IDQ_ALL_MITE_CYCLES_ANY_UOPS      0x9C PMC
 UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS_CORE 0x01
@@ -158,6 +133,17 @@ DEFAULT_OPTIONS_UOPS_ISSUED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_O
 UMASK_UOPS_ISSUED_CORE_STALL_CYCLES   0x01
 DEFAULT_OPTIONS_UOPS_ISSUED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_ISSUED_CORE_TOTAL_CYCLES   0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_ISSUED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_ISSUED_CYCLES_GE_5_UOPS_EXEC 0x01
+
 
 
 EVENT_TX_EXEC                           0x5D PMC
@@ -204,25 +190,45 @@ UMASK_HW_INTERRUPTS_RECEIVED            0x01
 EVENT_INST_RETIRED                      0xC0 PMC
 UMASK_INST_RETIRED_ANY                  0x00
 
-EVENT_UOPS_RETIRED                  0xC2  PMC
-UMASK_UOPS_RETIRED_ALL              0x01
+EVENT_UOPS_RETIRED                       0xC2  PMC
+UMASK_UOPS_RETIRED_ALL                   0x01
 DEFAULT_OPTIONS_UOPS_RETIRED_CORE_ALL EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_RETIRED_CORE_ALL         0x01
-UMASK_UOPS_RETIRED_RETIRE_SLOTS     0x02
+UMASK_UOPS_RETIRED_CORE_ALL              0x01
+UMASK_UOPS_RETIRED_RETIRE_SLOTS          0x02
 DEFAULT_OPTIONS__UOPS_RETIRED_CORE_RETIRE_SLOTS EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_RETIRED_CORE_RETIRE_SLOTS     0x02
 DEFAULT_OPTIONS_UOPS_RETIRED_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
-UMASK_UOPS_RETIRED_USED_CYCLES     0x01
+UMASK_UOPS_RETIRED_USED_CYCLES           0x01
 DEFAULT_OPTIONS_UOPS_RETIRED_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
-UMASK_UOPS_RETIRED_STALL_CYCLES     0x01
+UMASK_UOPS_RETIRED_STALL_CYCLES          0x01
 DEFAULT_OPTIONS_UOPS_RETIRED_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1
-UMASK_UOPS_RETIRED_TOTAL_CYCLES     0x01
+UMASK_UOPS_RETIRED_TOTAL_CYCLES          0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CORE_ALL EVENT_OPTION_ANYTHREAD=1
+UMASK_UOPS_RETIRED_CORE_ALL              0x01
+DEFAULT_OPTIONS__UOPS_RETIRED_CORE_RETIRE_SLOTS EVENT_OPTION_ANYTHREAD=1
+UMASK_UOPS_RETIRED_CORE_RETIRE_SLOTS     0x02
 DEFAULT_OPTIONS_UOPS_RETIRED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_ANYTHREAD=1
-UMASK_UOPS_RETIRED_CORE_USED_CYCLES     0x01
+UMASK_UOPS_RETIRED_CORE_USED_CYCLES      0x01
 DEFAULT_OPTIONS_UOPS_RETIRED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_RETIRED_CORE_STALL_CYCLES     0x01
 DEFAULT_OPTIONS_UOPS_RETIRED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_RETIRED_CORE_TOTAL_CYCLES     0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
+UMASK_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
+UMASK_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
+UMASK_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
+UMASK_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_RETIRED_CYCLES_GE_7_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_RETIRED_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_RETIRED_CYCLES_GE_8_UOPS_EXEC 0x01
 
 EVENT_BR_INST_RETIRED                   0xC4 PMC
 UMASK_BR_INST_RETIRED_ALL_BRANCHES      0x00
@@ -286,6 +292,7 @@ UMASK_FRONTEND_RETIRED_LATENCY_GE_2_BUBBLES_GE_2 0x01 0x00 0x200206
 UMASK_FRONTEND_RETIRED_LATENCY_GE_4     0x01 0x00 0x400406
 
 EVENT_UOPS_EXECUTED                       0xB1   PMC
+EVENT_UOPS_EXECUTED                       0xB1   PMC
 UMASK_UOPS_EXECUTED_THREAD                0x01
 DEFAULT_OPTIONS_UOPS_EXECUTED_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
 UMASK_UOPS_EXECUTED_USED_CYCLES           0x01
@@ -293,8 +300,6 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTI
 UMASK_UOPS_EXECUTED_STALL_CYCLES          0x01
 DEFAULT_OPTIONS_UOPS_EXECUTED_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1
 UMASK_UOPS_EXECUTED_TOTAL_CYCLES          0x01
-DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_NONE_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
-UMASK_UOPS_EXECUTED_CYCLES_NONE_UOPS_EXEC 0x01
 DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
 UMASK_UOPS_EXECUTED_CYCLES_GE_1_UOPS_EXEC 0x01
 DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
@@ -303,6 +308,14 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x3
 UMASK_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC 0x01
 DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_EXECUTED_CYCLES_GE_7_UOPS_EXEC 0x01
+DEFAULT_OPTIONS_UOPS_EXECUTED_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_EXECUTED_CYCLES_GE_8_UOPS_EXEC 0x01
 UMASK_UOPS_EXECUTED_CORE                  0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_USED_CYCLES EVENT_OPTION_THRESHOLD=0x1
 UMASK_UOPS_EXECUTED_CORE_USED_CYCLES           0x02
@@ -310,8 +323,6 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_STALL_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT
 UMASK_UOPS_EXECUTED_CORE_STALL_CYCLES          0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_TOTAL_CYCLES EVENT_OPTION_THRESHOLD=0xA,EVENT_OPTION_INVERT=1
 UMASK_UOPS_EXECUTED_CORE_TOTAL_CYCLES          0x02
-DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_NONE_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
-UMASK_UOPS_EXECUTED_CORE_CYCLES_NONE_UOPS_EXEC 0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_1_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x1
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_1_UOPS_EXEC 0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_2_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x2
@@ -320,6 +331,14 @@ DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC EVENT_OPTION_THRESHOLD=
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC 0x02
 DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x4
 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x5
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x6
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_7_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x7
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_7_UOPS_EXEC 0x02
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_CYCLES_GE_8_UOPS_EXEC EVENT_OPTION_THRESHOLD=0x8
+UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_8_UOPS_EXEC 0x02
 UMASK_UOPS_EXECUTED_X87                 0x10
 
 
@@ -341,60 +360,93 @@ UMASK_UOPS_DISPATCHED_PORT_PORT_5       0x20
 UMASK_UOPS_DISPATCHED_PORT_PORT_6       0x40
 UMASK_UOPS_DISPATCHED_PORT_PORT_7       0x80
 UMASK_UOPS_DISPATCHED_PORT_ARITH_PORTS      0x63
-DEFAULT_OPTIONS_UOPS_EXECUTED_PORT_ARITH_PORTS_CORE    EVENT_OPTION_ANYTHREAD=1
+DEFAULT_OPTIONS_UOPS_DISPATCHED_PORT_ARITH_PORTS_CORE    EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_DISPATCHED_PORT_ARITH_PORTS_CORE 0x63
-DEFAULT_OPTIONS_UOPS_EXECUTED_PORT_DATA_PORTS    EVENT_OPTION_ANYTHREAD=1
+DEFAULT_OPTIONS_UOPS_DISPATCHED_PORT_DATA_PORTS    EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_DISPATCHED_PORT_DATA_PORTS       0x9C
 
 EVENT_CYCLE_ACTIVITY_STALLS_TOTAL       0xA3 PMC
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_TOTAL EVENT_OPTION_THRESHOLD=0x4
 UMASK_CYCLE_ACTIVITY_STALLS_TOTAL       0x04
-DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L1D_MISS EVENT_OPTION_THRESHOLD=0x8
-UMASK_CYCLE_ACTIVITY_CYCLES_L1D_MISS    0x08
-DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L1D_MISS EVENT_OPTION_THRESHOLD=0xC
-UMASK_CYCLE_ACTIVITY_CYCLES_L1D_MISS    0x0C
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE EVENT_OPTION_THRESHOLD=0x4
+UMASK_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE       0x04
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L2_MISS EVENT_OPTION_THRESHOLD=0x1
 UMASK_CYCLE_ACTIVITY_CYCLES_L2_MISS     0x01
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L2_MISS EVENT_OPTION_THRESHOLD=0x5
 UMASK_CYCLE_ACTIVITY_STALLS_L2_MISS     0x05
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L2_PENDING EVENT_OPTION_THRESHOLD=0x1
+UMASK_CYCLE_ACTIVITY_CYCLES_L2_PENDING     0x01
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L2_PENDING EVENT_OPTION_THRESHOLD=0x5
+UMASK_CYCLE_ACTIVITY_STALLS_L2_PENDING     0x05
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L3_MISS EVENT_OPTION_THRESHOLD=0x2
 UMASK_CYCLE_ACTIVITY_CYCLES_L3_MISS     0x02
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L3_MISS EVENT_OPTION_THRESHOLD=0x6
 UMASK_CYCLE_ACTIVITY_STALLS_L3_MISS     0x06
-
-
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L3_PENDING EVENT_OPTION_THRESHOLD=0x2
+UMASK_CYCLE_ACTIVITY_CYCLES_L3_PENDING     0x02
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L3_PENDING EVENT_OPTION_THRESHOLD=0x6
+UMASK_CYCLE_ACTIVITY_STALLS_L3_PENDING     0x06
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_MEM_ANY EVENT_OPTION_THRESHOLD=0x10
 UMASK_CYCLE_ACTIVITY_CYCLES_MEM_ANY     0x10
 DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_MEM_ANY EVENT_OPTION_THRESHOLD=0x14
 UMASK_CYCLE_ACTIVITY_STALLS_MEM_ANY     0x14
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_LDM_PENDING EVENT_OPTION_THRESHOLD=0x10
+UMASK_CYCLE_ACTIVITY_CYCLES_LDM_PENDING     0x10
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_LDM_PENDING EVENT_OPTION_THRESHOLD=0x14
+UMASK_CYCLE_ACTIVITY_STALLS_LDM_PENDING     0x14
 
+EVENT_CYCLE_ACTIVITY_CYCLES_L1D_MISS  0xA3 PMC2
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L1D_MISS EVENT_OPTION_THRESHOLD=0x8
+UMASK_CYCLE_ACTIVITY_CYCLES_L1D_MISS    0x08
+
+EVENT_CYCLE_ACTIVITY_STALLS_L1D_MISS  0xA3 PMC2
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L1D_MISS EVENT_OPTION_THRESHOLD=0xC
+UMASK_CYCLE_ACTIVITY_STALLS_L1D_MISS    0x0C
+
+EVENT_CYCLE_ACTIVITY_CYCLES_L1D_PENDING 0xA3 PMC2
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L1D_PENDING EVENT_OPTION_THRESHOLD=0x8
+UMASK_CYCLE_ACTIVITY_CYCLES_L1D_PENDING    0x08
+
+EVENT_CYCLE_ACTIVITY_STALLS_L1D_PENDING 0xA3 PMC2
+DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L1D_PENDING EVENT_OPTION_THRESHOLD=0xC
+UMASK_CYCLE_ACTIVITY_STALLS_L1D_PENDING    0x0C
 
-EVENT_EPT_WALK_PENDING                  0x4F PMC
+EVENT_EPT_WALK_PENDING  0x4F PMC
 UMASK_EPT_WALK_PENDING                  0x10
 
 EVENT_ITLB_MISSES                       0x85 PMC
 UMASK_ITLB_MISSES_CAUSES_A_WALK         0x01
+
 UMASK_ITLB_MISSES_WALK_PENDING          0x10
 UMASK_ITLB_MISSES_STLB_HIT              0x20
 UMASK_ITLB_MISSES_WALK_COMPLETED        0x0E
+UMASK_ITLB_MISSES_WALK_COMPLETED_4K     0x02
+UMASK_ITLB_MISSES_WALK_COMPLETED_2M_4M  0x04
+UMASK_ITLB_MISSES_WALK_COMPLETED_1G     0x08
 DEFAULT_OPTIONS_ITLB_MISSES_WALK_ACTIVE EVENT_OPTION_THRESHOLD=0x1
 UMASK_ITLB_MISSES_WALK_ACTIVE           0x10
 
-EVENT_DTLB_LOAD_MISSES                  0x08 PMC
-UMASK_DTLB_LOAD_MISSES_CAUSES_A_WALK    0x01
-UMASK_DTLB_LOAD_MISSES_WALK_PENDING     0x10
-UMASK_DTLB_LOAD_MISSES_STLB_HIT         0x20
-UMASK_DTLB_LOAD_MISSES_WALK_COMPLETED   0x0E
+EVENT_DTLB_LOAD_MISSES                      0x08 PMC
+UMASK_DTLB_LOAD_MISSES_CAUSES_A_WALK        0x01
+UMASK_DTLB_LOAD_MISSES_WALK_PENDING         0x10
+UMASK_DTLB_LOAD_MISSES_STLB_HIT             0x20
+UMASK_DTLB_LOAD_MISSES_WALK_COMPLETED       0x0E
+UMASK_DTLB_LOAD_MISSES_WALK_COMPLETED_4K    0x02
+UMASK_DTLB_LOAD_MISSES_WALK_COMPLETED_2M_4M 0x04
+UMASK_DTLB_LOAD_MISSES_WALK_COMPLETED_1G    0x08
 DEFAULT_OPTIONS_DTLB_LOAD_MISSES_WALK_ACTIVE EVENT_OPTION_THRESHOLD=0x1
-UMASK_DTLB_LOAD_MISSES_WALK_ACTIVE      0x10
-
-EVENT_DTLB_STORE_MISSES                 0x49 PMC
-UMASK_DTLB_STORE_MISSES_CAUSES_A_WALK   0x01
-UMASK_DTLB_STORE_MISSES_WALK_PENDING    0x10
-UMASK_DTLB_STORE_MISSES_STLB_HIT        0x20
-UMASK_DTLB_STORE_MISSES_WALK_COMPLETED  0x0E
+UMASK_DTLB_LOAD_MISSES_WALK_ACTIVE          0x10
+
+EVENT_DTLB_STORE_MISSES                      0x49 PMC
+UMASK_DTLB_STORE_MISSES_CAUSES_A_WALK        0x01
+UMASK_DTLB_STORE_MISSES_WALK_PENDING         0x10
+UMASK_DTLB_STORE_MISSES_STLB_HIT             0x20
+UMASK_DTLB_STORE_MISSES_WALK_COMPLETED       0x0E
+UMASK_DTLB_STORE_MISSES_WALK_COMPLETED_4K    0x02
+UMASK_DTLB_STORE_MISSES_WALK_COMPLETED_2M_4M 0x04
+UMASK_DTLB_STORE_MISSES_WALK_COMPLETED_1G    0x08
 DEFAULT_OPTIONS_DTLB_STORE_MISSES_WALK_ACTIVE EVENT_OPTION_THRESHOLD=0x1
-UMASK_DTLB_STORE_MISSES_WALK_ACTIVE     0x10
+UMASK_DTLB_STORE_MISSES_WALK_ACTIVE          0x10
 
 EVENT_TLB_FLUSH                         0xBD PMC
 UMASK_TLB_FLUSH_DTLB_THREAD             0x01
@@ -465,6 +517,10 @@ UMASK_OFFCORE_REQUESTS_OUTSTANDING_CYCLES_WITH_L3_MISS_DEMAND_DATA_RD 0x10
 DEFAULT_OPTIONS_OFFCORE_REQUESTS_OUTSTANDING_L3_MISS_DEMAND_DATA_RD_GE_6 EVENT_OPTION_THRESHOLD=0x6
 UMASK_OFFCORE_REQUESTS_OUTSTANDING_L3_MISS_DEMAND_DATA_RD_GE_6 0x10
 
+EVENT_LOCK_CYCLES_CACHE_LOCK_DURATION   0x63 PMC
+UMASK_LOCK_CYCLES_CACHE_LOCK_DURATION   0x02
+DEFAULT_OPTIONS_LOCK_CYCLES_CACHE_LOCK_COUNT EVENT_OPTION_EDGE=0x1
+UMASK_LOCK_CYCLES_CACHE_LOCK_COUNT      0x02
 
 EVENT_OFFCORE_REQUESTS_BUFFER_SQ_FULL   0xB2 PMC
 UMASK_OFFCORE_REQUESTS_BUFFER_SQ_FULL   0x01
@@ -497,11 +553,132 @@ UMASK_L2_RQSTS_MISS                     0x3F
 UMASK_L2_RQSTS_REFERENCES               0xFF
 
 EVENT_IDQ_MS                            0x79 PMC
+UMASK_IDQ_MITE_UOPS                     0x04
+UMASK_IDQ_DSB_UOPS                      0x08
+UMASK_IDQ_MS_DSB_UOPS                   0x10
+UMASK_IDQ_MS_MITE_UOPS                  0x20
 UMASK_IDQ_MS_UOPS                       0x30
-DEFAULT_OPTIONS_IDQ_MS_SWITCHES         EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=0x1
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES         EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES                   0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MITE_CYCLES_1_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MITE_CYCLES_2_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MITE_CYCLES_3_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MITE_CYCLES_4_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_5_UOPS EVENT_OPTION_THRESHOLD=0x5
+UMASK_IDQ_MITE_CYCLES_5_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_MITE_CYCLES_6_UOPS EVENT_OPTION_THRESHOLD=0x6
+UMASK_IDQ_MITE_CYCLES_6_UOPS            0x04
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES          EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES                    0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_DSB_CYCLES_1_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_DSB_CYCLES_2_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_DSB_CYCLES_3_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_DSB_CYCLES_4_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_5_UOPS EVENT_OPTION_THRESHOLD=0x5
+UMASK_IDQ_DSB_CYCLES_5_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_DSB_CYCLES_6_UOPS EVENT_OPTION_THRESHOLD=0x6
+UMASK_IDQ_DSB_CYCLES_6_UOPS             0x08
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES       EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES                 0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_DSB_CYCLES_1_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_DSB_CYCLES_2_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_DSB_CYCLES_3_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_DSB_CYCLES_4_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_5_UOPS EVENT_OPTION_THRESHOLD=0x5
+UMASK_IDQ_MS_DSB_CYCLES_5_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_CYCLES_6_UOPS EVENT_OPTION_THRESHOLD=0x6
+UMASK_IDQ_MS_DSB_CYCLES_6_UOPS          0x10
+DEFAULT_OPTIONS_IDQ_MS_DSB_OCCUR        EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
+UMASK_IDQ_MS_DSB_OCCUR                  0x10
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES      EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES                0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_MITE_CYCLES_1_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_MITE_CYCLES_2_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_MITE_CYCLES_3_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_MITE_CYCLES_4_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_5_UOPS EVENT_OPTION_THRESHOLD=0x5
+UMASK_IDQ_MS_MITE_CYCLES_5_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_MITE_CYCLES_6_UOPS EVENT_OPTION_THRESHOLD=0x6
+UMASK_IDQ_MS_MITE_CYCLES_6_UOPS         0x20
+DEFAULT_OPTIONS_IDQ_MS_CYCLES           EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES                     0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_MS_CYCLES_1_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_MS_CYCLES_2_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_MS_CYCLES_3_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_MS_CYCLES_4_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_5_UOPS EVENT_OPTION_THRESHOLD=0x5
+UMASK_IDQ_MS_CYCLES_5_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_CYCLES_6_UOPS EVENT_OPTION_THRESHOLD=0x6
+UMASK_IDQ_MS_CYCLES_6_UOPS              0x30
+DEFAULT_OPTIONS_IDQ_MS_SWITCHES         EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_EDGE=1
 UMASK_IDQ_MS_SWITCHES                   0x30
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_ANY_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS       0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_DSB_CYCLES_1_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_DSB_CYCLES_2_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_DSB_CYCLES_3_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_5_UOPS EVENT_OPTION_THRESHOLD=0x5
+UMASK_IDQ_ALL_DSB_CYCLES_5_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_DSB_CYCLES_6_UOPS EVENT_OPTION_THRESHOLD=0x6
+UMASK_IDQ_ALL_DSB_CYCLES_6_UOPS         0x18
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS      0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_MITE_CYCLES_1_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_MITE_CYCLES_2_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_MITE_CYCLES_3_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_5_UOPS EVENT_OPTION_THRESHOLD=0x5
+UMASK_IDQ_ALL_MITE_CYCLES_5_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_MITE_CYCLES_6_UOPS EVENT_OPTION_THRESHOLD=0x6
+UMASK_IDQ_ALL_MITE_CYCLES_6_UOPS        0x24
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_ANY_UOPS  EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_ANY_UOPS      0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_IDQ_ALL_CYCLES_1_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_IDQ_ALL_CYCLES_2_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_IDQ_ALL_CYCLES_3_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_IDQ_ALL_CYCLES_4_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_5_UOPS EVENT_OPTION_THRESHOLD=0x5
+UMASK_IDQ_ALL_CYCLES_5_UOPS        0x3C
+DEFAULT_OPTIONS_IDQ_ALL_CYCLES_6_UOPS EVENT_OPTION_THRESHOLD=0x6
+UMASK_IDQ_ALL_CYCLES_6_UOPS        0x3C
 
 EVENT_L2_LINES                          0xF1 PMC
+UMASK_L2_LINES_IN_I                     0x01
+UMASK_L2_LINES_IN_S                     0x02
+UMASK_L2_LINES_IN_E                     0x04
 UMASK_L2_LINES_IN_ALL                   0x07
 
 EVENT_ARITH_DIVIDER_ACTIVE              0x14 PMC
@@ -509,11 +686,20 @@ UMASK_ARITH_DIVIDER_ACTIVE              0x01
 DEFAULT_OPTIONS_ARITH_DIVIDER_COUNT     EVENT_OPTION_EDGE=0x1
 UMASK_ARITH_DIVIDER_COUNT               0x01
 
-EVENT_LSD_CYCLES                        0xA8 PMC
-DEFAULT_OPTIONS_LSD_CYCLES_ACTIVE       EVENT_OPTION_THRESHOLD=0x1
-UMASK_LSD_CYCLES_ACTIVE                 0x01
-DEFAULT_OPTIONS_LSD_CYCLES_4_UOPS       EVENT_OPTION_THRESHOLD=0x4
-UMASK_LSD_CYCLES_4_UOPS                 0x01
+EVENT_LSD_UOPS                 0xA8   PMC
+UMASK_LSD_UOPS                 0x01
+DEFAULT_OPTIONS_LSD_CYCLES_1_UOPS EVENT_OPTION_THRESHOLD=0x1
+UMASK_LSD_CYCLES_1_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_2_UOPS EVENT_OPTION_THRESHOLD=0x2
+UMASK_LSD_CYCLES_2_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_3_UOPS EVENT_OPTION_THRESHOLD=0x3
+UMASK_LSD_CYCLES_3_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_4_UOPS EVENT_OPTION_THRESHOLD=0x4
+UMASK_LSD_CYCLES_4_UOPS         0x01
+DEFAULT_OPTIONS_LSD_CYCLES_ACTIVE EVENT_OPTION_THRESHOLD=0x01
+UMASK_LSD_CYCLES_ACTIVE        0x01
+DEFAULT_OPTIONS_LSD_CYCLES_INACTIVE EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_INVERT=1
+UMASK_LSD_CYCLES_INACTIVE         0x01
 
 EVENT_OTHER_ASSISTS_ANY                 0xC1 PMC
 UMASK_OTHER_ASSISTS_ANY                 0x3F
diff --git a/src/includes/perfmon_types.h b/src/includes/perfmon_types.h
index e4e427b..8351ce5 100644
--- a/src/includes/perfmon_types.h
+++ b/src/includes/perfmon_types.h
@@ -7,14 +7,14 @@
  *                    Configures and reads out performance counters
  *                    on x86 based architectures. Supports multi threading.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -107,7 +107,7 @@ extern char* eventOptionTypeName[NUM_EVENT_OPTIONS];
         (1ULL<<type))
 
 
-/** @cond */ 
+/** @cond */
 #define EVENT_OPTION_OPCODE_MASK (1ULL<<EVENT_OPTION_OPCODE)
 #define EVENT_OPTION_MATCH0_MASK (1ULL<<EVENT_OPTION_MATCH0)
 #define EVENT_OPTION_MATCH1_MASK (1ULL<<EVENT_OPTION_MATCH1)
@@ -135,7 +135,7 @@ extern char* eventOptionTypeName[NUM_EVENT_OPTIONS];
 
 /*! \brief Structure specifying thread to CPU relation
 
-Threads are always numbered incrementally. This structure is used in order to 
+Threads are always numbered incrementally. This structure is used in order to
 resolve the real HW thread ID.
 \extends PerfmonGroupSet
 */
@@ -214,11 +214,10 @@ typedef struct {
     TimerData             timer; /*!< \brief Time information how long the counters were running */
     double                rdtscTime; /*!< \brief Evaluation of the Time information in seconds */
     double                runTime; /*!< \brief Sum of all time information in seconds that the group was running */
-#ifdef __x86_64
-    __uint128_t           regTypeMask; /*!< \brief Bitmask for easy checks which types are included in the eventSet */
-#else
-    uint64_t              regTypeMask; /*!< \brief Bitmask for easy checks which types are included in the eventSet */
-#endif
+    uint64_t              regTypeMask1; /*!< \brief Bitmask1 for easy checks which types are included in the eventSet */
+    uint64_t              regTypeMask2; /*!< \brief Bitmask2 for easy checks which types are included in the eventSet */
+    uint64_t              regTypeMask3; /*!< \brief Bitmask3 for easy checks which types are included in the eventSet */
+    uint64_t              regTypeMask4; /*!< \brief Bitmask4 for easy checks which types are included in the eventSet */
     GroupState            state; /*!< \brief Current state of the event group (configured, started, none) */
     GroupInfo             group; /*!< \brief Structure holding the performance group information */
 } PerfmonEventSet;
diff --git a/src/includes/perfmon_westmere.h b/src/includes/perfmon_westmere.h
index e6c2390..d9b3782 100644
--- a/src/includes/perfmon_westmere.h
+++ b/src/includes/perfmon_westmere.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Header File of perfmon module for Intel Westmere.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_westmereEX.h b/src/includes/perfmon_westmereEX.h
index bdf577d..20c86db 100644
--- a/src/includes/perfmon_westmereEX.h
+++ b/src/includes/perfmon_westmereEX.h
@@ -5,14 +5,14 @@
  *
  *      Description:  Header File of perfmon module for Intel Westmere EX.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
@@ -733,7 +733,7 @@ int wex_uncore_freeze(int cpu_id, PerfmonEventSet* eventSet, int flags)
     {
         return 0;
     }
-    if (eventSet->regTypeMask & ~(0xF))
+    if MEASURE_UNCORE(eventSet)
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_U_PMON_GLOBAL_CTRL, &freeze_flags));
         freeze_flags &= ~(1ULL<<28);
@@ -801,7 +801,7 @@ int wex_uncore_unfreeze(int cpu_id, PerfmonEventSet* eventSet, int flags)
             }
         }
     }
-    if (eventSet->regTypeMask & ~(0xF))
+    if MEASURE_UNCORE(eventSet)
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_U_PMON_GLOBAL_CTRL, &unfreeze_flags));
         unfreeze_flags |= (1ULL<<28);
@@ -812,7 +812,7 @@ int wex_uncore_unfreeze(int cpu_id, PerfmonEventSet* eventSet, int flags)
 }
 
 #define WEX_RESET_OVF_BOX(id) \
-    if (haveLock && eventSet->regTypeMask & (REG_TYPE_MASK(id))) \
+    if (haveLock && TESTTYPE(eventSet, id)) \
     { \
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, box_map[id].ovflRegister, 0xFFFFFFFF)); \
     }
@@ -832,18 +832,18 @@ int perfmon_setupCounterThread_westmereEX(int thread_id, PerfmonEventSet* eventS
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(PMC)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, 0x0ULL));
     }
 
-    if (haveLock && (eventSet->regTypeMask & ~(0xF)))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_U_PMON_GLOBAL_CTRL, 0x0ULL));
     }
 
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(MBOX0))))
+    if (haveLock && TESTTYPE(eventSet, MBOX0))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M0_PMON_TIMESTAMP, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M0_PMON_DSP, 0x0ULL));
@@ -854,7 +854,7 @@ int perfmon_setupCounterThread_westmereEX(int thread_id, PerfmonEventSet* eventS
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M0_PMON_PLD, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M0_PMON_ZDP, 0x0ULL));
     }
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(MBOX1))))
+    if (haveLock && TESTTYPE(eventSet, MBOX1))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M1_PMON_TIMESTAMP, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M1_PMON_DSP, 0x0ULL));
@@ -865,7 +865,7 @@ int perfmon_setupCounterThread_westmereEX(int thread_id, PerfmonEventSet* eventS
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M1_PMON_PLD, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_M1_PMON_ZDP, 0x0ULL));
     }
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(RBOX0))))
+    if (haveLock && TESTTYPE(eventSet, RBOX0))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_R0_PMON_IPERF0_P0, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_R0_PMON_IPERF0_P1, 0x0ULL));
@@ -880,7 +880,7 @@ int perfmon_setupCounterThread_westmereEX(int thread_id, PerfmonEventSet* eventS
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_R0_PMON_QLX_P2, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_R0_PMON_QLX_P3, 0x0ULL));
     }
-    if (haveLock && (eventSet->regTypeMask & (REG_TYPE_MASK(RBOX1))))
+    if (haveLock && TESTTYPE(eventSet, RBOX1))
     {
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_R1_PMON_IPERF0_P0, 0x0ULL));
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_R1_PMON_IPERF0_P1, 0x0ULL));
@@ -899,7 +899,7 @@ int perfmon_setupCounterThread_westmereEX(int thread_id, PerfmonEventSet* eventS
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -951,12 +951,13 @@ int perfmon_setupCounterThread_westmereEX(int thread_id, PerfmonEventSet* eventS
                 break;
 
             case WBOX0FIX:
-                if (haveLock && eventSet->regTypeMask & (REG_TYPE_MASK(WBOX0FIX)))
+                if (haveLock)
                 {
                     flags = 0x1;
+                    RegisterType newtype = WBOX;
                     CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, reg , flags));
                     VERBOSEPRINTREG(cpu_id, reg, LLU_CAST flags, WBOX0FIX_CTRL);
-                    eventSet->regTypeMask |= REG_TYPE_MASK(WBOX);
+                    SETTYPE(eventSet, newtype);
                 }
                 break;
 
@@ -981,7 +982,7 @@ int perfmon_setupCounterThread_westmereEX(int thread_id, PerfmonEventSet* eventS
         }
     }
 
-    if (haveLock && (eventSet->regTypeMask & ~(0xF)))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         for ( int i=0; i<NUM_UNITS; i++ )
         {
@@ -1043,7 +1044,7 @@ int perfmon_startCountersThread_westmereEX(int thread_id, PerfmonEventSet* event
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE) 
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1071,7 +1072,7 @@ int perfmon_startCountersThread_westmereEX(int thread_id, PerfmonEventSet* event
     wex_uncore_unfreeze(cpu_id, eventSet, FREEZE_FLAG_CLEAR_CTR);
 
     /* Finally enable counters */
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST core_ctrl_flags, GLOBAL_CTRL);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, core_ctrl_flags));
@@ -1155,7 +1156,7 @@ int perfmon_stopCountersThread_westmereEX(int thread_id, PerfmonEventSet* eventS
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST 0x0ULL, GLOBAL_CTRL);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
@@ -1167,7 +1168,7 @@ int perfmon_stopCountersThread_westmereEX(int thread_id, PerfmonEventSet* eventS
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1187,7 +1188,7 @@ int perfmon_stopCountersThread_westmereEX(int thread_id, PerfmonEventSet* eventS
                     VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, READ_FIXED);
                     break;
                 default:
-                    if(haveLock && (eventSet->regTypeMask & REG_TYPE_MASK(type)))
+                    if(haveLock && TESTTYPE(eventSet, type))
                     {
                         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, counter1, &counter_result));
                         WEX_CHECK_UNCORE_OVERFLOW(type, index);
@@ -1214,7 +1215,7 @@ int perfmon_readCountersThread_westmereEX(int thread_id, PerfmonEventSet* eventS
         haveLock = 1;
     }
 
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, &core_ctrl_flags));
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST core_ctrl_flags, SAFE_PMC_FLAGS)
@@ -1228,7 +1229,7 @@ int perfmon_readCountersThread_westmereEX(int thread_id, PerfmonEventSet* eventS
         if (eventSet->events[i].threadCounter[thread_id].init == TRUE)
         {
             RegisterType type = eventSet->events[i].type;
-            if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+            if (!TESTTYPE(eventSet, type))
             {
                 continue;
             }
@@ -1261,7 +1262,7 @@ int perfmon_readCountersThread_westmereEX(int thread_id, PerfmonEventSet* eventS
     }
 
     wex_uncore_unfreeze(cpu_id, eventSet, FREEZE_FLAG_ONLYFREEZE);
-    if ((eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED))) && (core_ctrl_flags != 0x0ULL))
+    if ((MEASURE_CORE(eventSet)) && (core_ctrl_flags != 0x0ULL))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, LLU_CAST core_ctrl_flags, RESTORE_PMC_FLAGS)
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, core_ctrl_flags));
@@ -1289,7 +1290,7 @@ int perfmon_finalizeCountersThread_westmereEX(int thread_id, PerfmonEventSet* ev
     for (int i=0;i < eventSet->numberOfEvents;i++)
     {
         RegisterType type = eventSet->events[i].type;
-        if (!(eventSet->regTypeMask & (REG_TYPE_MASK(type))))
+        if (!TESTTYPE(eventSet, type))
         {
             continue;
         }
@@ -1331,14 +1332,14 @@ int perfmon_finalizeCountersThread_westmereEX(int thread_id, PerfmonEventSet* ev
         }
         eventSet->events[i].threadCounter[thread_id].init = FALSE;
     }
-    if (eventSet->regTypeMask & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+    if (MEASURE_CORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_CTRL, 0x0ULL, CLEAR_PMC_AND_FIXED_CTL);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_CTRL, 0x0ULL));
         VERBOSEPRINTREG(cpu_id, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core, CLEAR_PMC_AND_FIXED_OVERFLOW);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, ovf_values_core));
     }
-    if (haveLock && (eventSet->regTypeMask & ~(0xF)))
+    if (haveLock && MEASURE_UNCORE(eventSet))
     {
         VERBOSEPRINTREG(cpu_id, MSR_U_PMON_GLOBAL_CTRL, 0x0ULL, CLEAR_UNCORE_CTL);
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_U_PMON_GLOBAL_CTRL, 0x0ULL));
@@ -1346,7 +1347,7 @@ int perfmon_finalizeCountersThread_westmereEX(int thread_id, PerfmonEventSet* ev
         CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_U_PMON_GLOBAL_OVF_CTRL, 0x0ULL));
         for (int i=UNCORE;i<NUM_UNITS;i++)
         {
-            if ((eventSet->regTypeMask & (REG_TYPE_MASK(i))) && box_map[i].ctrlRegister != 0x0)
+            if (TESTTYPE(eventSet, i) && box_map[i].ctrlRegister != 0x0)
             {
                 VERBOSEPRINTPCIREG(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL, CLEAR_UNCORE_BOX_CTRL);
                 HPMwrite(cpu_id, box_map[i].device, box_map[i].ctrlRegister, 0x0ULL);
diff --git a/src/includes/perfmon_westmereEX_counters.h b/src/includes/perfmon_westmereEX_counters.h
index af9ab15..63f77c5 100644
--- a/src/includes/perfmon_westmereEX_counters.h
+++ b/src/includes/perfmon_westmereEX_counters.h
@@ -5,14 +5,14 @@
  *
  *      Description: Counter Header File of perfmon module for Westmere EX.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
  *
- *      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
  *
  *      This program is free software: you can redistribute it and/or modify it under
  *      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_westmereEX_events.txt b/src/includes/perfmon_westmereEX_events.txt
index 4b68ecb..9dedb39 100644
--- a/src/includes/perfmon_westmereEX_events.txt
+++ b/src/includes/perfmon_westmereEX_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for Intel WestmereEX
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
@@ -309,9 +309,15 @@ UMASK_UOPS_EXECUTED_PORT1           0x02
 UMASK_UOPS_EXECUTED_PORT2_CORE      0x04
 UMASK_UOPS_EXECUTED_PORT3_CORE      0x08
 UMASK_UOPS_EXECUTED_PORT4_CORE      0x10
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_ACTIVE_CYCLES_NO_PORT5 EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_EXECUTED_CORE_ACTIVE_CYCLES_NO_PORT5 0x1F
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_STALL_COUNT_NO_PORT5 EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_ANYTHREAD=1,EVENT_OPTION_EDGE=1
+UMASK_UOPS_EXECUTED_CORE_STALL_COUNT_NO_PORT5 0x1F
 UMASK_UOPS_EXECUTED_PORT5           0x20
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_ACTIVE_CYCLES EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_ANYTHREAD=1
 UMASK_UOPS_EXECUTED_CORE_ACTIVE_CYCLES 0x3F
+DEFAULT_OPTIONS_UOPS_EXECUTED_CORE_STALL_COUNT EVENT_OPTION_THRESHOLD=0x1,EVENT_OPTION_ANYTHREAD=1,EVENT_OPTION_EDGE=1
+UMASK_UOPS_EXECUTED_CORE_STALL_COUNT 0x3F
 UMASK_UOPS_EXECUTED_PORT015         0x40
 UMASK_UOPS_EXECUTED_PORT015_STALL_CYCLES   0x40 0xC1  0x01
 UMASK_UOPS_EXECUTED_PORT234         0x80
diff --git a/src/includes/perfmon_westmere_events.txt b/src/includes/perfmon_westmere_events.txt
index add7948..ff18125 100644
--- a/src/includes/perfmon_westmere_events.txt
+++ b/src/includes/perfmon_westmere_events.txt
@@ -4,14 +4,14 @@
 #
 #      Description:  Event list for Intel Westmere
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:   Jan Treibig (jt), jan.treibig at gmail.com
 #                Thomas Roehl (tr), thomas.roehl at googlemail.com
 #      Project:  likwid
 #
-#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 #
 #      This program is free software: you can redistribute it and/or modify it under
 #      the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/power.h b/src/includes/power.h
index abe6fe7..82b26ab 100644
--- a/src/includes/power.h
+++ b/src/includes/power.h
@@ -6,8 +6,8 @@
  *      Description:  Header File Power Module
  *                    Implements Intel RAPL Interface.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -29,7 +29,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef POWER_H
 #define POWER_H
 
@@ -68,7 +67,7 @@ uint32_t info_regs[NUM_POWER_DOMAINS] = {MSR_PKG_POWER_INFO,
 
 
 double
-power_printEnergy(PowerData* data)
+power_printEnergy(const PowerData* data)
 {
     return  (double) ((data->after - data->before) * power_info.domains[data->domain].energyUnit);
 }
diff --git a/src/includes/power_types.h b/src/includes/power_types.h
index 09dff96..2f33329 100644
--- a/src/includes/power_types.h
+++ b/src/includes/power_types.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Types file for power module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -28,15 +28,12 @@
  *
  * =======================================================================================
  */
-
 #ifndef POWER_TYPES_H
 #define POWER_TYPES_H
 
 #include <stdint.h>
 #include <likwid.h>
 
-
 extern uint32_t power_regs[NUM_POWER_DOMAINS];
 
-
 #endif /*POWER_TYPES_H*/
diff --git a/src/includes/registers.h b/src/includes/registers.h
index bc81c79..6448d8e 100644
--- a/src/includes/registers.h
+++ b/src/includes/registers.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Register Defines for the perfmon module
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -28,7 +28,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef REGISTERS_H
 #define REGISTERS_H
 
@@ -48,10 +47,10 @@
 #define MSR_PERFEVTSEL1           0x187
 #define MSR_PERFEVTSEL2           0x188
 #define MSR_PERFEVTSEL3           0x189
-#define MSR_PERFEVTSEL4           0x190
-#define MSR_PERFEVTSEL5           0x191
-#define MSR_PERFEVTSEL6           0x192
-#define MSR_PERFEVTSEL7           0x193
+#define MSR_PERFEVTSEL4           0x18A
+#define MSR_PERFEVTSEL5           0x18B
+#define MSR_PERFEVTSEL6           0x18C
+#define MSR_PERFEVTSEL7           0x18D
 #define MSR_PMC0                  0x0C1
 #define MSR_PMC1                  0x0C2
 #define MSR_PMC2                  0x0C3
@@ -91,8 +90,8 @@
 #define MSR_UNCORE_PMC5                0x3B5
 #define MSR_UNCORE_PMC6                0x3B6
 #define MSR_UNCORE_PMC7                0x3B7
-/* 
- * Perfmon V3 (starting with Haswell, according to 
+/*
+ * Perfmon V3 (starting with Haswell, according to
  * Intel software developers guide also for SandyBridge,
  * IvyBridge not mentioned in this section)
  */
@@ -165,10 +164,585 @@
 #define MSR_MIC_PERF_GLOBAL_OVF_CTRL  0x02E
 #define MSR_MIC_PERF_GLOBAL_CTRL      0x02F
 /* Xeon Phi (Knights Landing)*/
-#define MSR_MIC2_PMC0                 0x4C1
-#define MSR_MIC2_PMC1                 0x4C2
+#define MSR_MIC2_PMC0                 0xC1
+#define MSR_MIC2_PMC1                 0xC2
+#define MSR_MIC2_PERFEVTSEL0          0x186
+#define MSR_MIC2_PERFEVTSEL1          0x187
 #define MSR_MIC2_TURBO_RATIO_LIMIT    0x1AD
-
+#define MSR_MIC2_SPFLT_CONTROL	      0x02C
+#define MSR_MIC2_PERF_GLOBAL_STATUS    0x02D
+#define MSR_MIC2_PERF_GLOBAL_OVF_CTRL  0x02E
+#define MSR_MIC2_PERF_GLOBAL_CTRL      0x02F
+/* Xeon Phi (Knights Landing) UBOX*/
+#define MSR_MIC2_U_GLOBAL_CTRL      0x700
+#define MSR_MIC2_U_GLOBAL_STATUS    0x701
+#define MSR_MIC2_U_CONFIG           0x702
+#define MSR_MIC2_U_FIXED_CTRL	      0x703
+#define MSR_MIC2_U_FIXED_CTR	      0x704
+#define MSR_MIC2_U_CTRL0	      0x705
+#define MSR_MIC2_U_CTRL1	      0x706
+#define MSR_MIC2_U_OVFL	      0x708
+#define MSR_MIC2_U_CTR0	      0x709
+#define MSR_MIC2_U_CTR1	      0x70A
+/* Xeon Phi (Knights Landing) WBOX*/
+#define MSR_MIC2_PCU_GLOBAL_CTRL    0x710
+#define MSR_MIC2_PCU_CTRL0   0x711
+#define MSR_MIC2_PCU_CTRL1   0x712
+#define MSR_MIC2_PCU_CTRL2   0x713
+#define MSR_MIC2_PCU_CTRL3   0x714
+#define MSR_MIC2_PCU_CTR0    0x717
+#define MSR_MIC2_PCU_CTR1    0x718
+#define MSR_MIC2_PCU_CTR2    0x719
+#define MSR_MIC2_PCU_CTR3    0x71A
+/* Xeon Phi (Knights Landing) Cache boxes*/
+#define MSR_MIC2_C0_GLOBAL_CTRL       0xE00
+#define MSR_MIC2_C0_CTRL0             0xE01
+#define MSR_MIC2_C0_CTRL1             0xE02
+#define MSR_MIC2_C0_CTRL2             0xE03
+#define MSR_MIC2_C0_CTRL3             0xE04
+#define MSR_MIC2_C0_CTR0             0xE08
+#define MSR_MIC2_C0_CTR1             0xE09
+#define MSR_MIC2_C0_CTR2             0xE0A
+#define MSR_MIC2_C0_CTR3             0xE0B
+#define MSR_MIC2_C0_FILTER0          0xE05
+#define MSR_MIC2_C0_FILTER1          0xE06
+#define MSR_MIC2_C0_STATUS           0xE07
+#define MSR_MIC2_C1_GLOBAL_CTRL       0xE0C
+#define MSR_MIC2_C1_CTRL0             0xE0D
+#define MSR_MIC2_C1_CTRL1             0xE0E
+#define MSR_MIC2_C1_CTRL2             0xE0F
+#define MSR_MIC2_C1_CTRL3             0xE10
+#define MSR_MIC2_C1_CTR0             0xE14
+#define MSR_MIC2_C1_CTR1             0xE15
+#define MSR_MIC2_C1_CTR2             0xE16
+#define MSR_MIC2_C1_CTR3             0xE17
+#define MSR_MIC2_C1_FILTER0          0xE11
+#define MSR_MIC2_C1_FILTER1          0xE12
+#define MSR_MIC2_C1_STATUS           0xE13
+#define MSR_MIC2_C2_GLOBAL_CTRL       0xE18
+#define MSR_MIC2_C2_CTRL0             0xE19
+#define MSR_MIC2_C2_CTRL1             0xE1A
+#define MSR_MIC2_C2_CTRL2             0xE1B
+#define MSR_MIC2_C2_CTRL3             0xE1C
+#define MSR_MIC2_C2_CTR0             0xE20
+#define MSR_MIC2_C2_CTR1             0xE21
+#define MSR_MIC2_C2_CTR2             0xE22
+#define MSR_MIC2_C2_CTR3             0xE23
+#define MSR_MIC2_C2_FILTER0          0xE1D
+#define MSR_MIC2_C2_FILTER1          0xE1E
+#define MSR_MIC2_C2_STATUS           0xE1F
+#define MSR_MIC2_C3_GLOBAL_CTRL       0xE24
+#define MSR_MIC2_C3_CTRL0             0xE25
+#define MSR_MIC2_C3_CTRL1             0xE26
+#define MSR_MIC2_C3_CTRL2             0xE27
+#define MSR_MIC2_C3_CTRL3             0xE28
+#define MSR_MIC2_C3_CTR0             0xE2C
+#define MSR_MIC2_C3_CTR1             0xE2D
+#define MSR_MIC2_C3_CTR2             0xE2E
+#define MSR_MIC2_C3_CTR3             0xE2F
+#define MSR_MIC2_C3_FILTER0          0xE29
+#define MSR_MIC2_C3_FILTER1          0xE2A
+#define MSR_MIC2_C3_STATUS           0xE2B
+#define MSR_MIC2_C4_GLOBAL_CTRL       0xE30
+#define MSR_MIC2_C4_CTRL0             0xE31
+#define MSR_MIC2_C4_CTRL1             0xE32
+#define MSR_MIC2_C4_CTRL2             0xE33
+#define MSR_MIC2_C4_CTRL3             0xE34
+#define MSR_MIC2_C4_CTR0             0xE38
+#define MSR_MIC2_C4_CTR1             0xE39
+#define MSR_MIC2_C4_CTR2             0xE3A
+#define MSR_MIC2_C4_CTR3             0xE3B
+#define MSR_MIC2_C4_FILTER0          0xE35
+#define MSR_MIC2_C4_FILTER1          0xE36
+#define MSR_MIC2_C4_STATUS           0xE37
+#define MSR_MIC2_C5_GLOBAL_CTRL       0xE3C
+#define MSR_MIC2_C5_CTRL0             0xE3D
+#define MSR_MIC2_C5_CTRL1             0xE3E
+#define MSR_MIC2_C5_CTRL2             0xE3F
+#define MSR_MIC2_C5_CTRL3             0xE40
+#define MSR_MIC2_C5_CTR0             0xE44
+#define MSR_MIC2_C5_CTR1             0xE45
+#define MSR_MIC2_C5_CTR2             0xE46
+#define MSR_MIC2_C5_CTR3             0xE47
+#define MSR_MIC2_C5_FILTER0          0xE41
+#define MSR_MIC2_C5_FILTER1          0xE42
+#define MSR_MIC2_C5_STATUS           0xE43
+#define MSR_MIC2_C6_GLOBAL_CTRL       0xE48
+#define MSR_MIC2_C6_CTRL0             0xE49
+#define MSR_MIC2_C6_CTRL1             0xE4A
+#define MSR_MIC2_C6_CTRL2             0xE4B
+#define MSR_MIC2_C6_CTRL3             0xE4C
+#define MSR_MIC2_C6_CTR0             0xE50
+#define MSR_MIC2_C6_CTR1             0xE51
+#define MSR_MIC2_C6_CTR2             0xE52
+#define MSR_MIC2_C6_CTR3             0xE53
+#define MSR_MIC2_C6_FILTER0          0xE4D
+#define MSR_MIC2_C6_FILTER1          0xE4E
+#define MSR_MIC2_C6_STATUS           0xE4F
+#define MSR_MIC2_C7_GLOBAL_CTRL       0xE54
+#define MSR_MIC2_C7_CTRL0             0xE55
+#define MSR_MIC2_C7_CTRL1             0xE56
+#define MSR_MIC2_C7_CTRL2             0xE57
+#define MSR_MIC2_C7_CTRL3             0xE58
+#define MSR_MIC2_C7_CTR0             0xE5C
+#define MSR_MIC2_C7_CTR1             0xE5D
+#define MSR_MIC2_C7_CTR2             0xE5E
+#define MSR_MIC2_C7_CTR3             0xE5F
+#define MSR_MIC2_C7_FILTER0          0xE59
+#define MSR_MIC2_C7_FILTER1          0xE5A
+#define MSR_MIC2_C7_STATUS           0xE5B
+#define MSR_MIC2_C8_GLOBAL_CTRL       0xE60
+#define MSR_MIC2_C8_CTRL0             0xE61
+#define MSR_MIC2_C8_CTRL1             0xE62
+#define MSR_MIC2_C8_CTRL2             0xE63
+#define MSR_MIC2_C8_CTRL3             0xE64
+#define MSR_MIC2_C8_CTR0             0xE68
+#define MSR_MIC2_C8_CTR1             0xE69
+#define MSR_MIC2_C8_CTR2             0xE6A
+#define MSR_MIC2_C8_CTR3             0xE6B
+#define MSR_MIC2_C8_FILTER0          0xE65
+#define MSR_MIC2_C8_FILTER1          0xE66
+#define MSR_MIC2_C8_STATUS           0xE67
+#define MSR_MIC2_C9_GLOBAL_CTRL       0xE6C
+#define MSR_MIC2_C9_CTRL0             0xE6D
+#define MSR_MIC2_C9_CTRL1             0xE6E
+#define MSR_MIC2_C9_CTRL2             0xE6F
+#define MSR_MIC2_C9_CTRL3             0xE70
+#define MSR_MIC2_C9_CTR0             0xE74
+#define MSR_MIC2_C9_CTR1             0xE75
+#define MSR_MIC2_C9_CTR2             0xE76
+#define MSR_MIC2_C9_CTR3             0xE77
+#define MSR_MIC2_C9_FILTER0          0xE71
+#define MSR_MIC2_C9_FILTER1          0xE72
+#define MSR_MIC2_C9_STATUS           0xE73
+#define MSR_MIC2_C10_GLOBAL_CTRL       0xE78
+#define MSR_MIC2_C10_CTRL0             0xE79
+#define MSR_MIC2_C10_CTRL1             0xE7A
+#define MSR_MIC2_C10_CTRL2             0xE7B
+#define MSR_MIC2_C10_CTRL3             0xE7C
+#define MSR_MIC2_C10_CTR0             0xE80
+#define MSR_MIC2_C10_CTR1             0xE81
+#define MSR_MIC2_C10_CTR2             0xE82
+#define MSR_MIC2_C10_CTR3             0xE83
+#define MSR_MIC2_C10_FILTER0          0xE7D
+#define MSR_MIC2_C10_FILTER1          0xE7E
+#define MSR_MIC2_C10_STATUS           0xE7F
+#define MSR_MIC2_C11_GLOBAL_CTRL       0xE84
+#define MSR_MIC2_C11_CTRL0             0xE85
+#define MSR_MIC2_C11_CTRL1             0xE86
+#define MSR_MIC2_C11_CTRL2             0xE87
+#define MSR_MIC2_C11_CTRL3             0xE88
+#define MSR_MIC2_C11_CTR0             0xE8C
+#define MSR_MIC2_C11_CTR1             0xE8D
+#define MSR_MIC2_C11_CTR2             0xE8E
+#define MSR_MIC2_C11_CTR3             0xE8F
+#define MSR_MIC2_C11_FILTER0          0xE89
+#define MSR_MIC2_C11_FILTER1          0xE8A
+#define MSR_MIC2_C11_STATUS           0xE8B
+#define MSR_MIC2_C12_GLOBAL_CTRL       0xE90
+#define MSR_MIC2_C12_CTRL0             0xE91
+#define MSR_MIC2_C12_CTRL1             0xE92
+#define MSR_MIC2_C12_CTRL2             0xE93
+#define MSR_MIC2_C12_CTRL3             0xE94
+#define MSR_MIC2_C12_CTR0             0xE98
+#define MSR_MIC2_C12_CTR1             0xE99
+#define MSR_MIC2_C12_CTR2             0xE9A
+#define MSR_MIC2_C12_CTR3             0xE9B
+#define MSR_MIC2_C12_FILTER0          0xE95
+#define MSR_MIC2_C12_FILTER1          0xE96
+#define MSR_MIC2_C12_STATUS           0xE97
+#define MSR_MIC2_C13_GLOBAL_CTRL       0xE9C
+#define MSR_MIC2_C13_CTRL0             0xE9D
+#define MSR_MIC2_C13_CTRL1             0xE9E
+#define MSR_MIC2_C13_CTRL2             0xE9F
+#define MSR_MIC2_C13_CTRL3             0xEA0
+#define MSR_MIC2_C13_CTR0             0xEA4
+#define MSR_MIC2_C13_CTR1             0xEA5
+#define MSR_MIC2_C13_CTR2             0xEA6
+#define MSR_MIC2_C13_CTR3             0xEA7
+#define MSR_MIC2_C13_FILTER0          0xEA1
+#define MSR_MIC2_C13_FILTER1          0xEA2
+#define MSR_MIC2_C13_STATUS           0xEA3
+#define MSR_MIC2_C14_GLOBAL_CTRL       0xEA8
+#define MSR_MIC2_C14_CTRL0             0xEA9
+#define MSR_MIC2_C14_CTRL1             0xEAA
+#define MSR_MIC2_C14_CTRL2             0xEAB
+#define MSR_MIC2_C14_CTRL3             0xEAC
+#define MSR_MIC2_C14_CTR0             0xEB0
+#define MSR_MIC2_C14_CTR1             0xEB1
+#define MSR_MIC2_C14_CTR2             0xEB2
+#define MSR_MIC2_C14_CTR3             0xEB3
+#define MSR_MIC2_C14_FILTER0          0xEAD
+#define MSR_MIC2_C14_FILTER1          0xEAE
+#define MSR_MIC2_C14_STATUS           0xEAF
+#define MSR_MIC2_C15_GLOBAL_CTRL       0xEB4
+#define MSR_MIC2_C15_CTRL0             0xEB5
+#define MSR_MIC2_C15_CTRL1             0xEB6
+#define MSR_MIC2_C15_CTRL2             0xEB7
+#define MSR_MIC2_C15_CTRL3             0xEB8
+#define MSR_MIC2_C15_CTR0             0xEBC
+#define MSR_MIC2_C15_CTR1             0xEBD
+#define MSR_MIC2_C15_CTR2             0xEBE
+#define MSR_MIC2_C15_CTR3             0xEBF
+#define MSR_MIC2_C15_FILTER0          0xEB9
+#define MSR_MIC2_C15_FILTER1          0xEBA
+#define MSR_MIC2_C15_STATUS           0xEBB
+#define MSR_MIC2_C16_GLOBAL_CTRL       0xEC0
+#define MSR_MIC2_C16_CTRL0             0xEC1
+#define MSR_MIC2_C16_CTRL1             0xEC2
+#define MSR_MIC2_C16_CTRL2             0xEC3
+#define MSR_MIC2_C16_CTRL3             0xEC4
+#define MSR_MIC2_C16_CTR0             0xEC8
+#define MSR_MIC2_C16_CTR1             0xEC9
+#define MSR_MIC2_C16_CTR2             0xECA
+#define MSR_MIC2_C16_CTR3             0xECB
+#define MSR_MIC2_C16_FILTER0          0xEC5
+#define MSR_MIC2_C16_FILTER1          0xEC6
+#define MSR_MIC2_C16_STATUS           0xEC7
+#define MSR_MIC2_C17_GLOBAL_CTRL       0xECC
+#define MSR_MIC2_C17_CTRL0             0xECD
+#define MSR_MIC2_C17_CTRL1             0xECE
+#define MSR_MIC2_C17_CTRL2             0xECF
+#define MSR_MIC2_C17_CTRL3             0xED0
+#define MSR_MIC2_C17_CTR0             0xED4
+#define MSR_MIC2_C17_CTR1             0xED5
+#define MSR_MIC2_C17_CTR2             0xED6
+#define MSR_MIC2_C17_CTR3             0xED7
+#define MSR_MIC2_C17_FILTER0          0xED1
+#define MSR_MIC2_C17_FILTER1          0xED2
+#define MSR_MIC2_C17_STATUS           0xED3
+#define MSR_MIC2_C18_GLOBAL_CTRL       0xED8
+#define MSR_MIC2_C18_CTRL0             0xED9
+#define MSR_MIC2_C18_CTRL1             0xEDA
+#define MSR_MIC2_C18_CTRL2             0xEDB
+#define MSR_MIC2_C18_CTRL3             0xED0
+#define MSR_MIC2_C18_CTR0             0xEDF
+#define MSR_MIC2_C18_CTR1             0xEE0
+#define MSR_MIC2_C18_CTR2             0xEE1
+#define MSR_MIC2_C18_CTR3             0xEE2
+#define MSR_MIC2_C18_FILTER0          0xEE3
+#define MSR_MIC2_C18_FILTER1          0xEDD
+#define MSR_MIC2_C18_STATUS           0xEDE
+#define MSR_MIC2_C19_GLOBAL_CTRL       0xEE4
+#define MSR_MIC2_C19_CTRL0             0xEE5
+#define MSR_MIC2_C19_CTRL1             0xEE6
+#define MSR_MIC2_C19_CTRL2             0xEE7
+#define MSR_MIC2_C19_CTRL3             0xEE8
+#define MSR_MIC2_C19_CTR0             0xEEC
+#define MSR_MIC2_C19_CTR1             0xEED
+#define MSR_MIC2_C19_CTR2             0xEEE
+#define MSR_MIC2_C19_CTR3             0xEEF
+#define MSR_MIC2_C19_FILTER0          0xEE9
+#define MSR_MIC2_C19_FILTER1          0xEEA
+#define MSR_MIC2_C19_STATUS           0xEEB
+#define MSR_MIC2_C20_GLOBAL_CTRL       0xEF0
+#define MSR_MIC2_C20_CTRL0             0xEF1
+#define MSR_MIC2_C20_CTRL1             0xEF2
+#define MSR_MIC2_C20_CTRL2             0xEF3
+#define MSR_MIC2_C20_CTRL3             0xEF4
+#define MSR_MIC2_C20_CTR0             0xEF8
+#define MSR_MIC2_C20_CTR1             0xEF9
+#define MSR_MIC2_C20_CTR2             0xEFA
+#define MSR_MIC2_C20_CTR3             0xEFB
+#define MSR_MIC2_C20_FILTER0          0xEF5
+#define MSR_MIC2_C20_FILTER1          0xEF6
+#define MSR_MIC2_C20_STATUS           0xEF7
+#define MSR_MIC2_C21_GLOBAL_CTRL       0xEFC
+#define MSR_MIC2_C21_CTRL0             0xEFD
+#define MSR_MIC2_C21_CTRL1             0xEFE
+#define MSR_MIC2_C21_CTRL2             0xEFF
+#define MSR_MIC2_C21_CTRL3             0xF00
+#define MSR_MIC2_C21_CTR0             0xF04
+#define MSR_MIC2_C21_CTR1             0xF05
+#define MSR_MIC2_C21_CTR2             0xF06
+#define MSR_MIC2_C21_CTR3             0xF07
+#define MSR_MIC2_C21_FILTER0          0xF01
+#define MSR_MIC2_C21_FILTER1          0xF02
+#define MSR_MIC2_C21_STATUS           0xF03
+#define MSR_MIC2_C22_GLOBAL_CTRL       0xF08
+#define MSR_MIC2_C22_CTRL0             0xF09
+#define MSR_MIC2_C22_CTRL1             0xF0A
+#define MSR_MIC2_C22_CTRL2             0xF0B
+#define MSR_MIC2_C22_CTRL3             0xF0C
+#define MSR_MIC2_C22_CTR0             0xF10
+#define MSR_MIC2_C22_CTR1             0xF11
+#define MSR_MIC2_C22_CTR2             0xF12
+#define MSR_MIC2_C22_CTR3             0xF13
+#define MSR_MIC2_C22_FILTER0          0xF0D
+#define MSR_MIC2_C22_FILTER1          0xF0E
+#define MSR_MIC2_C22_STATUS           0xF0F
+#define MSR_MIC2_C23_GLOBAL_CTRL       0xF14
+#define MSR_MIC2_C23_CTRL0             0xF15
+#define MSR_MIC2_C23_CTRL1             0xF16
+#define MSR_MIC2_C23_CTRL2             0xF17
+#define MSR_MIC2_C23_CTRL3             0xF18
+#define MSR_MIC2_C23_CTR0             0xF1C
+#define MSR_MIC2_C23_CTR1             0xF1D
+#define MSR_MIC2_C23_CTR2             0xF1E
+#define MSR_MIC2_C23_CTR3             0xF1F
+#define MSR_MIC2_C23_FILTER0          0xF19
+#define MSR_MIC2_C23_FILTER1          0xF1A
+#define MSR_MIC2_C23_STATUS           0xF1B
+#define MSR_MIC2_C24_GLOBAL_CTRL       0xF20
+#define MSR_MIC2_C24_CTRL0             0xF21
+#define MSR_MIC2_C24_CTRL1             0xF22
+#define MSR_MIC2_C24_CTRL2             0xF23
+#define MSR_MIC2_C24_CTRL3             0xF24
+#define MSR_MIC2_C24_CTR0             0xF28
+#define MSR_MIC2_C24_CTR1             0xF29
+#define MSR_MIC2_C24_CTR2             0xF2A
+#define MSR_MIC2_C24_CTR3             0xF2B
+#define MSR_MIC2_C24_FILTER0          0xF25
+#define MSR_MIC2_C24_FILTER1          0xF26
+#define MSR_MIC2_C24_STATUS           0xF27
+#define MSR_MIC2_C25_GLOBAL_CTRL       0xF2C
+#define MSR_MIC2_C25_CTRL0             0xF2D
+#define MSR_MIC2_C25_CTRL1             0xF2E
+#define MSR_MIC2_C25_CTRL2             0xF2F
+#define MSR_MIC2_C25_CTRL3             0xF30
+#define MSR_MIC2_C25_CTR0             0xF34
+#define MSR_MIC2_C25_CTR1             0xF35
+#define MSR_MIC2_C25_CTR2             0xF36
+#define MSR_MIC2_C25_CTR3             0xF37
+#define MSR_MIC2_C25_FILTER0          0xF31
+#define MSR_MIC2_C25_FILTER1          0xF32
+#define MSR_MIC2_C25_STATUS           0xF33
+#define MSR_MIC2_C26_GLOBAL_CTRL       0xF38
+#define MSR_MIC2_C26_CTRL0             0xF39
+#define MSR_MIC2_C26_CTRL1             0xF3A
+#define MSR_MIC2_C26_CTRL2             0xF3B
+#define MSR_MIC2_C26_CTRL3             0xF3C
+#define MSR_MIC2_C26_CTR0             0xF40
+#define MSR_MIC2_C26_CTR1             0xF41
+#define MSR_MIC2_C26_CTR2             0xF42
+#define MSR_MIC2_C26_CTR3             0xF43
+#define MSR_MIC2_C26_FILTER0          0xF3D
+#define MSR_MIC2_C26_FILTER1          0xF3E
+#define MSR_MIC2_C26_STATUS           0xF3F
+#define MSR_MIC2_C27_GLOBAL_CTRL       0xF44
+#define MSR_MIC2_C27_CTRL0             0xF45
+#define MSR_MIC2_C27_CTRL1             0xF46
+#define MSR_MIC2_C27_CTRL2             0xF47
+#define MSR_MIC2_C27_CTRL3             0xF48
+#define MSR_MIC2_C27_CTR0             0xF4C
+#define MSR_MIC2_C27_CTR1             0xF4D
+#define MSR_MIC2_C27_CTR2             0xF4E
+#define MSR_MIC2_C27_CTR3             0xF4F
+#define MSR_MIC2_C27_FILTER0          0xF49
+#define MSR_MIC2_C27_FILTER1          0xF4A
+#define MSR_MIC2_C27_STATUS           0xF4B
+#define MSR_MIC2_C28_GLOBAL_CTRL       0xF50
+#define MSR_MIC2_C28_CTRL0             0xF51
+#define MSR_MIC2_C28_CTRL1             0xF52
+#define MSR_MIC2_C28_CTRL2             0xF53
+#define MSR_MIC2_C28_CTRL3             0xF54
+#define MSR_MIC2_C28_CTR0             0xF58
+#define MSR_MIC2_C28_CTR1             0xF59
+#define MSR_MIC2_C28_CTR2             0xF5A
+#define MSR_MIC2_C28_CTR3             0xF5B
+#define MSR_MIC2_C28_FILTER0          0xF55
+#define MSR_MIC2_C28_FILTER1          0xF56
+#define MSR_MIC2_C28_STATUS           0xF57
+#define MSR_MIC2_C29_GLOBAL_CTRL       0xF5C
+#define MSR_MIC2_C29_CTRL0             0xF5D
+#define MSR_MIC2_C29_CTRL1             0xF5E
+#define MSR_MIC2_C29_CTRL2             0xF5F
+#define MSR_MIC2_C29_CTRL3             0xF60
+#define MSR_MIC2_C29_CTR0             0xF64
+#define MSR_MIC2_C29_CTR1             0xF65
+#define MSR_MIC2_C29_CTR2             0xF66
+#define MSR_MIC2_C29_CTR3             0xF67
+#define MSR_MIC2_C29_FILTER0          0xF61
+#define MSR_MIC2_C29_FILTER1          0xF62
+#define MSR_MIC2_C29_STATUS           0xF63
+#define MSR_MIC2_C30_GLOBAL_CTRL       0xF68
+#define MSR_MIC2_C30_CTRL0             0xF69
+#define MSR_MIC2_C30_CTRL1             0xF6A
+#define MSR_MIC2_C30_CTRL2             0xF6B
+#define MSR_MIC2_C30_CTRL3             0xF6C
+#define MSR_MIC2_C30_CTR0             0xF70
+#define MSR_MIC2_C30_CTR1             0xF71
+#define MSR_MIC2_C30_CTR2             0xF72
+#define MSR_MIC2_C30_CTR3             0xF73
+#define MSR_MIC2_C30_FILTER0          0xF6D
+#define MSR_MIC2_C30_FILTER1          0xF6E
+#define MSR_MIC2_C30_STATUS           0xF6F
+#define MSR_MIC2_C31_GLOBAL_CTRL       0xF74
+#define MSR_MIC2_C31_CTRL0             0xF75
+#define MSR_MIC2_C31_CTRL1             0xF76
+#define MSR_MIC2_C31_CTRL2             0xF77
+#define MSR_MIC2_C31_CTRL3             0xF78
+#define MSR_MIC2_C31_CTR0             0xF7C
+#define MSR_MIC2_C31_CTR1             0xF7D
+#define MSR_MIC2_C31_CTR2             0xF7E
+#define MSR_MIC2_C31_CTR3             0xF7F
+#define MSR_MIC2_C31_FILTER0          0xF79
+#define MSR_MIC2_C31_FILTER1          0xF7A
+#define MSR_MIC2_C31_STATUS           0xF7B
+#define MSR_MIC2_C32_GLOBAL_CTRL       0xF80
+#define MSR_MIC2_C32_CTRL0             0xF81
+#define MSR_MIC2_C32_CTRL1             0xF82
+#define MSR_MIC2_C32_CTRL2             0xF83
+#define MSR_MIC2_C32_CTRL3             0xF84
+#define MSR_MIC2_C32_CTR0             0xF88
+#define MSR_MIC2_C32_CTR1             0xF89
+#define MSR_MIC2_C32_CTR2             0xF8A
+#define MSR_MIC2_C32_CTR3             0xF8B
+#define MSR_MIC2_C32_FILTER0          0xF85
+#define MSR_MIC2_C32_FILTER1          0xF86
+#define MSR_MIC2_C32_STATUS           0xF87
+#define MSR_MIC2_C33_GLOBAL_CTRL       0xF8C
+#define MSR_MIC2_C33_CTRL0             0xF8D
+#define MSR_MIC2_C33_CTRL1             0xF8E
+#define MSR_MIC2_C33_CTRL2             0xF8F
+#define MSR_MIC2_C33_CTRL3             0xF90
+#define MSR_MIC2_C33_CTR0             0xF94
+#define MSR_MIC2_C33_CTR1             0xF95
+#define MSR_MIC2_C33_CTR2             0xF96
+#define MSR_MIC2_C33_CTR3             0xF97
+#define MSR_MIC2_C33_FILTER0          0xF91
+#define MSR_MIC2_C33_FILTER1          0xF92
+#define MSR_MIC2_C33_STATUS           0xF93
+#define MSR_MIC2_C34_GLOBAL_CTRL       0xF98
+#define MSR_MIC2_C34_CTRL0             0xF99
+#define MSR_MIC2_C34_CTRL1             0xF9A
+#define MSR_MIC2_C34_CTRL2             0xF9B
+#define MSR_MIC2_C34_CTRL3             0xF9C
+#define MSR_MIC2_C34_CTR0             0xFA0
+#define MSR_MIC2_C34_CTR1             0xFA1
+#define MSR_MIC2_C34_CTR2             0xFA2
+#define MSR_MIC2_C34_CTR3             0xFA3
+#define MSR_MIC2_C34_FILTER0          0xF9D
+#define MSR_MIC2_C34_FILTER1          0xF9E
+#define MSR_MIC2_C34_STATUS           0xF9F
+#define MSR_MIC2_C35_GLOBAL_CTRL       0xFA4
+#define MSR_MIC2_C35_CTRL0             0xFA5
+#define MSR_MIC2_C35_CTRL1             0xFA6
+#define MSR_MIC2_C35_CTRL2             0xFA7
+#define MSR_MIC2_C35_CTRL3             0xFA8
+#define MSR_MIC2_C35_CTR0             0xFAC
+#define MSR_MIC2_C35_CTR1             0xFAD
+#define MSR_MIC2_C35_CTR2             0xFAE
+#define MSR_MIC2_C35_CTR3             0xFAF
+#define MSR_MIC2_C35_FILTER0          0xFA9
+#define MSR_MIC2_C35_FILTER1          0xFAA
+#define MSR_MIC2_C35_STATUS           0xFAB
+#define MSR_MIC2_C36_GLOBAL_CTRL       0xFB0
+#define MSR_MIC2_C36_CTRL0             0xFB1
+#define MSR_MIC2_C36_CTRL1             0xFB2
+#define MSR_MIC2_C36_CTRL2             0xFB3
+#define MSR_MIC2_C36_CTRL3             0xFB4
+#define MSR_MIC2_C36_CTR0             0xFB8
+#define MSR_MIC2_C36_CTR1             0xFB9
+#define MSR_MIC2_C36_CTR2             0xFBA
+#define MSR_MIC2_C36_CTR3             0xFBB
+#define MSR_MIC2_C36_FILTER0          0xFB5
+#define MSR_MIC2_C36_FILTER1          0xFB6
+#define MSR_MIC2_C36_STATUS           0xFB7
+#define MSR_MIC2_C37_GLOBAL_CTRL       0xFBC
+#define MSR_MIC2_C37_CTRL0             0xFBD
+#define MSR_MIC2_C37_CTRL1             0xFBE
+#define MSR_MIC2_C37_CTRL2             0xFBF
+#define MSR_MIC2_C37_CTRL3             0xFC0
+#define MSR_MIC2_C37_CTR0             0xFC4
+#define MSR_MIC2_C37_CTR1             0xFC5
+#define MSR_MIC2_C37_CTR2             0xFC6
+#define MSR_MIC2_C37_CTR3             0xFC7
+#define MSR_MIC2_C37_FILTER0          0xFC1
+#define MSR_MIC2_C37_FILTER1          0xFC2
+#define MSR_MIC2_C37_STATUS           0xFC3
+/* Xeon Phi (Knights Landing) Embedded DRAM controller aka High Bandwidth Memory */
+#define PCI_MIC2_EDC_U_CTR0_A	     0x404
+#define PCI_MIC2_EDC_U_CTR0_B	     0x400
+#define PCI_MIC2_EDC_U_CTR1_A	     0x40C
+#define PCI_MIC2_EDC_U_CTR1_B	     0x408
+#define PCI_MIC2_EDC_U_CTR2_A	     0x414
+#define PCI_MIC2_EDC_U_CTR2_B	     0x410
+#define PCI_MIC2_EDC_U_CTR3_A	     0x41C
+#define PCI_MIC2_EDC_U_CTR3_B	     0x418
+#define PCI_MIC2_EDC_U_CTRL0	     0x420
+#define PCI_MIC2_EDC_U_CTRL1	     0x424
+#define PCI_MIC2_EDC_U_CTRL2	     0x428
+#define PCI_MIC2_EDC_U_CTRL3	     0x42C
+#define PCI_MIC2_EDC_U_BOX_CTRL	     0x430
+#define PCI_MIC2_EDC_U_BOX_STATUS    0x434
+#define PCI_MIC2_EDC_U_FIXED_CTR_A   0x450
+#define PCI_MIC2_EDC_U_FIXED_CTR_B   0x44C
+#define PCI_MIC2_EDC_U_FIXED_CTRL    0x454
+#define PCI_MIC2_EDC_D_CTR0_A	     0xA04
+#define PCI_MIC2_EDC_D_CTR0_B	     0xA00
+#define PCI_MIC2_EDC_D_CTR1_A	     0xA0C
+#define PCI_MIC2_EDC_D_CTR1_B	     0xA08
+#define PCI_MIC2_EDC_D_CTR2_A	     0xA14
+#define PCI_MIC2_EDC_D_CTR2_B	     0xA10
+#define PCI_MIC2_EDC_D_CTR3_A	     0xA1C
+#define PCI_MIC2_EDC_D_CTR3_B	     0xA18
+#define PCI_MIC2_EDC_D_CTRL0	     0xA20
+#define PCI_MIC2_EDC_D_CTRL1	     0xA24
+#define PCI_MIC2_EDC_D_CTRL2	     0xA28
+#define PCI_MIC2_EDC_D_CTRL3	     0xA2C
+#define PCI_MIC2_EDC_D_BOX_CTRL	     0xA30
+#define PCI_MIC2_EDC_D_BOX_STATUS    0xA34
+#define PCI_MIC2_EDC_D_FIXED_CTR_A   0xA40
+#define PCI_MIC2_EDC_D_FIXED_CTR_B   0xA3C
+#define PCI_MIC2_EDC_D_FIXED_CTRL    0xA44
+/* Xeon Phi (Knights Landing) Memory controller*/
+#define PCI_MIC2_MC_U_CTR0_A	     0x404
+#define PCI_MIC2_MC_U_CTR0_B	     0x400
+#define PCI_MIC2_MC_U_CTR1_A	     0x40C
+#define PCI_MIC2_MC_U_CTR1_B	     0x408
+#define PCI_MIC2_MC_U_CTR2_A	     0x414
+#define PCI_MIC2_MC_U_CTR2_B	     0x410
+#define PCI_MIC2_MC_U_CTR3_A	     0x41C
+#define PCI_MIC2_MC_U_CTR3_B	     0x418
+#define PCI_MIC2_MC_U_CTRL0	     0x420
+#define PCI_MIC2_MC_U_CTRL1	     0x424
+#define PCI_MIC2_MC_U_CTRL2	     0x428
+#define PCI_MIC2_MC_U_CTRL3	     0x42C
+#define PCI_MIC2_MC_U_BOX_CTRL	     0x430
+#define PCI_MIC2_MC_U_BOX_STATUS    0x434
+#define PCI_MIC2_MC_U_FIXED_CTR_A   0x450
+#define PCI_MIC2_MC_U_FIXED_CTR_B   0x44C
+#define PCI_MIC2_MC_U_FIXED_CTRL    0x454
+#define PCI_MIC2_MC_D_CTR0_A	     0xB04
+#define PCI_MIC2_MC_D_CTR0_B	     0xB00
+#define PCI_MIC2_MC_D_CTR1_A	     0xB0C
+#define PCI_MIC2_MC_D_CTR1_B	     0xB08
+#define PCI_MIC2_MC_D_CTR2_A	     0xB14
+#define PCI_MIC2_MC_D_CTR2_B	     0xB10
+#define PCI_MIC2_MC_D_CTR3_A	     0xB1C
+#define PCI_MIC2_MC_D_CTR3_B	     0xB18
+#define PCI_MIC2_MC_D_CTRL0	     0xB20
+#define PCI_MIC2_MC_D_CTRL1	     0xB24
+#define PCI_MIC2_MC_D_CTRL2	     0xB28
+#define PCI_MIC2_MC_D_CTRL3	     0xB2C
+#define PCI_MIC2_MC_D_BOX_CTRL	     0xB30
+#define PCI_MIC2_MC_D_BOX_STATUS    0xB34
+#define PCI_MIC2_MC_D_FIXED_CTR_A   0xB40
+#define PCI_MIC2_MC_D_FIXED_CTR_B   0xB3C
+#define PCI_MIC2_MC_D_FIXED_CTRL    0xB44
+/* Xeon Phi (Knights Landing) M2PCIE */
+#define PCI_MIC2_M2PCIE_CTR0_A	    0xA4
+#define PCI_MIC2_M2PCIE_CTR0_B	    0xA0
+#define PCI_MIC2_M2PCIE_CTR1_A	    0xAC
+#define PCI_MIC2_M2PCIE_CTR1_B	    0xA8
+#define PCI_MIC2_M2PCIE_CTR2_A	    0xB4
+#define PCI_MIC2_M2PCIE_CTR2_B	    0xB0
+#define PCI_MIC2_M2PCIE_CTR3_A	    0xBC
+#define PCI_MIC2_M2PCIE_CTR3_B	    0xB8
+#define PCI_MIC2_M2PCIE_CTRL0	    0xD8
+#define PCI_MIC2_M2PCIE_CTRL1	    0xDC
+#define PCI_MIC2_M2PCIE_CTRL2	    0xE0
+#define PCI_MIC2_M2PCIE_CTRL3	    0xE4
+#define PCI_MIC2_M2PCIE_BOX_CTRL    0xF4
+#define PCI_MIC2_M2PCIE_BOX_STATUS  0xF8
+/* Xeon Phi (Knights Landing) IRP */
+#define PCI_MIC2_IRP_CTR0	    0xA0
+#define PCI_MIC2_IRP_CTR1	    0xA8
+#define PCI_MIC2_IRP_CTRL0	    0xD8
+#define PCI_MIC2_IRP_CTRL1	    0xDC
+#define PCI_MIC2_IRP_BOX_CTRL	    0xF0
+#define PCI_MIC2_IRP_BOX_STATUS	    0xF4
 
 /* Core v1/v2 type uncore
  * Naming following Intel Uncore Performance Monitoring Guide
@@ -1328,10 +1902,13 @@
 #define MSR_ANY_CORE_C0                 0x659
 #define MSR_ANY_GFXE_C0                 0x65A
 #define MSR_CORE_GFXE_OVERLAP_C0        0x65B
+#define MSR_UNCORE_FREQ                 0x620
+#define MSR_FSB_FREQ                    0xCD
+#define MSR_ENERGY_PERF_BIAS            0x1B0
+#define MSR_ALT_PEBS                    0x39C
 /*
  * AMD
  */
-
 #define MSR_AMD_PERFEVTSEL0           0xC0010000
 #define MSR_AMD_PERFEVTSEL1           0xC0010001
 #define MSR_AMD_PERFEVTSEL2           0xC0010002
@@ -1396,4 +1973,3 @@
 #define MSR_AMD16_NB_PMC3               0xC0010247
 
 #endif /* REGISTERS_H */
-
diff --git a/src/includes/registers_types.h b/src/includes/registers_types.h
index 0859ff8..f1a5ec2 100644
--- a/src/includes/registers_types.h
+++ b/src/includes/registers_types.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File of registers.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -28,7 +28,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef REGISTERS_TYPES_H
 #define REGISTERS_TYPES_H
 
@@ -72,6 +71,20 @@ typedef enum {
     PMC199, PMC200, PMC201, PMC202, PMC203, PMC204,
     PMC205, PMC206, PMC207, PMC208, PMC209, PMC210,
     PMC211, PMC212, PMC213, PMC214, PMC215, PMC216,
+    PMC217, PMC218, PMC219, PMC220, PMC221, PMC222,
+    PMC223, PMC224, PMC225, PMC226, PMC227, PMC228,
+    PMC229, PMC230, PMC231, PMC232, PMC233, PMC234,
+    PMC235, PMC236, PMC237, PMC238, PMC239, PMC240,
+    PMC241, PMC242, PMC243, PMC244, PMC245, PMC246,
+    PMC247, PMC248, PMC249, PMC250, PMC251, PMC252,
+    PMC253, PMC254, PMC255, PMC256, PMC257, PMC258,
+    PMC259, PMC260, PMC261, PMC262, PMC263, PMC264,
+    PMC265, PMC266, PMC267, PMC268, PMC269, PMC270,
+    PMC271, PMC272, PMC273, PMC274, PMC275, PMC276,
+    PMC277, PMC278, PMC279, PMC280, PMC281, PMC282,
+    PMC283, PMC284, PMC285, PMC286, PMC287, PMC288,
+    PMC289, PMC290, PMC291, PMC292, PMC293, PMC294,
+    PMC295, PMC296, PMC297, PMC298, PMC299, PMC300,
     NUM_PMC
 } RegisterIndex;
 
@@ -97,12 +110,25 @@ typedef enum {
     CBOX15, CBOX16, CBOX17,
     CBOX18, CBOX19, CBOX20,
     CBOX21, CBOX22, CBOX23,
+    CBOX24, CBOX25, CBOX26,
+    CBOX27, CBOX28, CBOX29,
+    CBOX30, CBOX31, CBOX32,
+    CBOX33, CBOX34, CBOX35,
+    CBOX36, CBOX37,
     PBOX,
     UBOX,
     UBOXFIX,
     IBOX0, IBOX1,
     QBOX0, QBOX1, QBOX2,
     QBOX0FIX, QBOX1FIX, QBOX2FIX,
+    EUBOX0, EUBOX0FIX, EUBOX1, EUBOX1FIX,
+    EUBOX2, EUBOX2FIX, EUBOX3, EUBOX3FIX,
+    EUBOX4, EUBOX4FIX, EUBOX5, EUBOX5FIX,
+    EUBOX6, EUBOX6FIX, EUBOX7, EUBOX7FIX,
+    EDBOX0, EDBOX0FIX, EDBOX1, EDBOX1FIX,
+    EDBOX2, EDBOX2FIX, EDBOX3, EDBOX3FIX,
+    EDBOX4, EDBOX4FIX, EDBOX5, EDBOX5FIX,
+    EDBOX6, EDBOX6FIX, EDBOX7, EDBOX7FIX,
     NUM_UNITS, NOTYPE, MAX_UNITS
 } RegisterType;
 
@@ -168,6 +194,20 @@ static char* RegisterTypeNames[MAX_UNITS] = {
     [CBOX21] = "Caching Agent box 21",
     [CBOX22] = "Caching Agent box 22",
     [CBOX23] = "Caching Agent box 23",
+    [CBOX24] = "Caching Agent box 24",
+    [CBOX25] = "Caching Agent box 25",
+    [CBOX26] = "Caching Agent box 26",
+    [CBOX27] = "Caching Agent box 27",
+    [CBOX28] = "Caching Agent box 28",
+    [CBOX29] = "Caching Agent box 29",
+    [CBOX30] = "Caching Agent box 30",
+    [CBOX31] = "Caching Agent box 31",
+    [CBOX32] = "Caching Agent box 32",
+    [CBOX33] = "Caching Agent box 33",
+    [CBOX34] = "Caching Agent box 34",
+    [CBOX35] = "Caching Agent box 35",
+    [CBOX36] = "Caching Agent box 36",
+    [CBOX37] = "Caching Agent box 37",
     [PBOX] = "Physical Layer box",
     [UBOX] = "System Configuration box",
     [UBOXFIX] = "System Configuration box fixed counter",
@@ -177,15 +217,77 @@ static char* RegisterTypeNames[MAX_UNITS] = {
     [QBOX1] = "QPI Link Layer 1",
     [QBOX0FIX] = "QPI Link Layer rate status 0",
     [QBOX1FIX] = "QPI Link Layer rate status 1",
+    [EUBOX0] = "Embedded DRAM controller 0",
+    [EUBOX1] = "Embedded DRAM controller 1",
+    [EUBOX2] = "Embedded DRAM controller 2",
+    [EUBOX3] = "Embedded DRAM controller 3",
+    [EUBOX4] = "Embedded DRAM controller 4",
+    [EUBOX5] = "Embedded DRAM controller 5",
+    [EUBOX6] = "Embedded DRAM controller 6",
+    [EUBOX7] = "Embedded DRAM controller 7",
+    [EUBOX0FIX] = "Embedded DRAM controller 0 fixed counter",
+    [EUBOX1FIX] = "Embedded DRAM controller 1 fixed counter",
+    [EUBOX2FIX] = "Embedded DRAM controller 2 fixed counter",
+    [EUBOX3FIX] = "Embedded DRAM controller 3 fixed counter",
+    [EUBOX4FIX] = "Embedded DRAM controller 4 fixed counter",
+    [EUBOX5FIX] = "Embedded DRAM controller 5 fixed counter",
+    [EUBOX6FIX] = "Embedded DRAM controller 6 fixed counter",
+    [EUBOX7FIX] = "Embedded DRAM controller 7 fixed counter",
+    [EDBOX0] = "Embedded DRAM controller 0",
+    [EDBOX1] = "Embedded DRAM controller 1",
+    [EDBOX2] = "Embedded DRAM controller 2",
+    [EDBOX3] = "Embedded DRAM controller 3",
+    [EDBOX4] = "Embedded DRAM controller 4",
+    [EDBOX5] = "Embedded DRAM controller 5",
+    [EDBOX6] = "Embedded DRAM controller 6",
+    [EDBOX7] = "Embedded DRAM controller 7",
+    [EDBOX0FIX] = "Embedded DRAM controller 0 fixed counter",
+    [EDBOX1FIX] = "Embedded DRAM controller 1 fixed counter",
+    [EDBOX2FIX] = "Embedded DRAM controller 2 fixed counter",
+    [EDBOX3FIX] = "Embedded DRAM controller 3 fixed counter",
+    [EDBOX4FIX] = "Embedded DRAM controller 4 fixed counter",
+    [EDBOX5FIX] = "Embedded DRAM controller 5 fixed counter",
+    [EDBOX6FIX] = "Embedded DRAM controller 6 fixed counter",
+    [EDBOX7FIX] = "Embedded DRAM controller 7 fixed counter",
     [NUM_UNITS] = "Maximally usable register types",
     [NOTYPE] = "No Type, used for skipping unavailable counters"
 };
 
-#ifdef __x86_64
-#define REG_TYPE_MASK(type) (type < NUM_UNITS ? (((__uint128_t)1ULL)<<type) : (((__uint128_t)0ULL)<<64|0ULL))
-#else
-#define REG_TYPE_MASK(type) (type < NUM_UNITS ? (1ULL<<type) : (0x0ULL)
-#endif
+#define REG_TYPE_MASK(type) (type < NUM_UNITS ? ((1ULL)<<(type)) : 0x0ULL)
+
+#define TESTTYPE(eventset, type) \
+        (((type) >= 0 && (type) <= 63 ? eventset->regTypeMask1 & (1ULL<<(type)) : \
+        ((type) >= 64 && (type) <= 127 ? eventset->regTypeMask2 & (1ULL<<((type)-64)) : \
+        ((type) >= 128 && (type) <= 191 ? eventset->regTypeMask3 & (1ULL<<((type)-128)) : \
+        ((type) >= 192 && (type) <= 255 ? eventset->regTypeMask4 & (1ULL<<((type)-192)) : 0x0ULL)))))
+
+#define SETTYPE(eventset, type) \
+        if ((type) >= 0 && (type) <= 63) \
+        { \
+            eventset->regTypeMask1 |= (1ULL<<(type)); \
+        } \
+        else if ((type) >= 64 && (type) <= 127) \
+        { \
+            eventset->regTypeMask2 |= (1ULL<<((type)-64)); \
+        } \
+        else if ((type) >= 128 && (type) <= 191) \
+        { \
+            eventset->regTypeMask3 |= (1ULL<<((type)-128)); \
+        } \
+        else if ((type) >= 192 && (type) <= 255) \
+        { \
+            eventset->regTypeMask4 |= (1ULL<<((type)-192)); \
+        } \
+        else \
+        { \
+            ERROR_PRINT(Cannot set out-of-bounds type %d, (type)); \
+        }
+#define MEASURE_CORE(eventset) \
+        (eventset->regTypeMask1 & (REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)))
+
+#define MEASURE_UNCORE(eventset) \
+        (eventset->regTypeMask1 & ~(REG_TYPE_MASK(PMC)|REG_TYPE_MASK(FIXED)|REG_TYPE_MASK(THERMAL)|REG_TYPE_MASK(POWER)) || eventset->regTypeMask2 || eventset->regTypeMask3 || eventset->regTypeMask4)
+
 
 typedef struct {
     char*               key;
@@ -210,4 +312,4 @@ typedef struct {
     uint32_t  filterRegister2;
 } BoxMap;
 
-#endif
+#endif /* REGISTERS_TYPES_H */
diff --git a/src/includes/textcolor.h b/src/includes/textcolor.h
index bd9b919..03b2c51 100644
--- a/src/includes/textcolor.h
+++ b/src/includes/textcolor.h
@@ -3,12 +3,12 @@
  *
  *      Filename:  textcolor.h
  *
- *      Description:  Header File textcolor Module. 
- *                    Allows toggling of terminal escape sequences for 
+ *      Description:  Header File textcolor Module.
+ *                    Allows toggling of terminal escape sequences for
  *                    colored text.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -29,7 +29,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef TEXTCOLOR_H
 #define TEXTCOLOR_H
 
@@ -53,7 +52,8 @@
 static void color_on(int attr, int fg);
 static void color_reset(void);
 
-static void color_on(int attr, int fg)
+static void
+color_on(int attr, int fg)
 {
     char command[13];
 
@@ -61,7 +61,8 @@ static void color_on(int attr, int fg)
     printf("%s", command);
 }
 
-static void color_reset()
+static void
+color_reset()
 {
     char command[13];
 
diff --git a/src/includes/thermal.h b/src/includes/thermal.h
index 1b5e400..126981d 100644
--- a/src/includes/thermal.h
+++ b/src/includes/thermal.h
@@ -6,8 +6,8 @@
  *      Description:  Header File Thermal Module.
  *                    Implements Intel TM/TM2 Interface.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -37,8 +37,6 @@
 #include <error.h>
 #include <access.h>
 
-
-
 int
 thermal_read(int cpuId, uint32_t *data)
 {
diff --git a/src/includes/thermal_types.h b/src/includes/thermal_types.h
index 0fb0791..12ddbcf 100644
--- a/src/includes/thermal_types.h
+++ b/src/includes/thermal_types.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Types file for thermal module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,7 +27,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef THERMAL_TYPES_H
 #define THERMAL_TYPES_H
 
@@ -49,5 +48,4 @@ typedef ThermalInfo* ThermalInfo_t;
 
 extern ThermalInfo thermal_info;
 
-
 #endif /*THERMAL_TYPES_H*/
diff --git a/src/includes/timer.h b/src/includes/timer.h
index 8673630..58c0aa3 100644
--- a/src/includes/timer.h
+++ b/src/includes/timer.h
@@ -10,8 +10,8 @@
  *      with rdtsc of 100 cycles in the worst case. Therefore sensible
  *      measurements should be over 1000 cycles.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -37,10 +37,9 @@
 
 #include <types.h>
 
-
 extern void timer_init( void );
-extern double timer_print( TimerData* );
-extern uint64_t timer_printCycles( TimerData* );
+extern double timer_print( const TimerData* );
+extern uint64_t timer_printCycles( const TimerData* );
 extern uint64_t timer_getCpuClock( void );
 extern uint64_t timer_getCpuClockCurrent( int cpu_id );
 extern uint64_t timer_getCycleClock( void );
@@ -49,8 +48,4 @@ extern uint64_t timer_getBaseline( void );
 extern void timer_start( TimerData* );
 extern void timer_stop ( TimerData* );
 
-
-
-
-
 #endif /* TIMER_H */
diff --git a/src/includes/timer_types.h b/src/includes/timer_types.h
index 5e972cc..0899cec 100644
--- a/src/includes/timer_types.h
+++ b/src/includes/timer_types.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Types file for timer module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,7 +27,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef TIMER_TYPES_H
 #define TIMER_TYPES_H
 
@@ -44,5 +43,4 @@ typedef struct {
     TscCounter stop;
 } TimerData;
 
-
 #endif /*TIMER_TYPES_H*/
diff --git a/src/includes/tlb-info.h b/src/includes/tlb-info.h
index ca6d65e..365dbe0 100644
--- a/src/includes/tlb-info.h
+++ b/src/includes/tlb-info.h
@@ -6,8 +6,8 @@
  *      Description:  Header File of topology module that contains the TLB
  *                    describing strings. Not used currently.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -28,8 +28,6 @@
  *
  * =======================================================================================
  */
-
-
 #ifndef TLB_INFO_H
 #define TLB_INFO_H
 
@@ -85,5 +83,6 @@ static char* intel_tlb_info[256] = {
     [240] = "64-Byte prefetching",
     [241] = "128-Byte prefetching",
     [242 ... 255] = NULL
-}; 
-#endif
+};
+
+#endif /* TLB_INFO_H */
diff --git a/src/includes/topology.h b/src/includes/topology.h
index 05ff5ed..57b14d9 100644
--- a/src/includes/topology.h
+++ b/src/includes/topology.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File of topology module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -28,9 +28,8 @@
  *
  * =======================================================================================
  */
-
-#ifndef LIKWID_TOPOLOGY
-#define LIKWID_TOPOLOGY
+#ifndef TOPOLOGY_H
+#define TOPOLOGY_H
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -43,7 +42,6 @@
 #include <types.h>
 #include <tree.h>
 
-
 #define MAX_FEATURE_STRING_LENGTH 512
 #define MAX_MODEL_STRING_LENGTH 512
 
@@ -97,10 +95,12 @@ struct topology_functions {
 #define BROADWELL_D          0x56U
 #define SKYLAKE1             0x4EU
 #define SKYLAKE2             0x5EU
+#define KABYLAKE1            0x8EU
+#define KABYLAKE2            0x9EU
 
 /* Intel MIC */
 #define XEON_PHI           0x01U
-#define XEON_PHI2          0x57U
+#define XEON_PHI_KNL       0x57U
 
 /* AMD K10 */
 #define BARCELONA      0x02U
@@ -129,10 +129,6 @@ struct topology_functions {
 #define  K10_FAMILY       0x10U
 #define  K8_FAMILY        0xFU
 
-
-
-
-
 extern int cpu_count(cpu_set_t* set);
 
 static inline int cpuid_hasFeature(FeatureBit bit)
@@ -140,5 +136,4 @@ static inline int cpuid_hasFeature(FeatureBit bit)
       return (cpuid_info.featureFlags & (1<<bit));
 }
 
-
-#endif
+#endif /* TOPOLOGY_H */
diff --git a/src/includes/topology_cpuid.h b/src/includes/topology_cpuid.h
index 5f5a8bd..24c60c5 100644
--- a/src/includes/topology_cpuid.h
+++ b/src/includes/topology_cpuid.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File of topology backend using cpuid instruction.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -28,9 +28,8 @@
  *
  * =======================================================================================
  */
-
-#ifndef LIKWID_TOPOLOGY_CPUID
-#define LIKWID_TOPOLOGY_CPUID
+#ifndef TOPOLOGY_CPUID_H
+#define TOPOLOGY_CPUID_H
 
 #include <sched.h>
 
@@ -39,5 +38,4 @@ void cpuid_init_cpuFeatures(void);
 void cpuid_init_nodeTopology(cpu_set_t cpuSet);
 void cpuid_init_cacheTopology(void);
 
-
-#endif
+#endif /* TOPOLOGY_CPUID_H */
diff --git a/src/includes/topology_hwloc.h b/src/includes/topology_hwloc.h
index bd990d5..a8a3cdc 100644
--- a/src/includes/topology_hwloc.h
+++ b/src/includes/topology_hwloc.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File of topology backend using the hwloc library
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -27,26 +27,20 @@
  *
  * =======================================================================================
  */
-
-#ifndef LIKWID_TOPOLOGY_HWLOC
-#define LIKWID_TOPOLOGY_HWLOC
-
+#ifndef TOPOLOGY_HWLOC_H
+#define TOPOLOGY_HWLOC_H
 
 #include <hwloc.h>
 #include <sched.h>
 
-
 extern hwloc_topology_t hwloc_topology;
 
 int likwid_hwloc_record_objs_of_type_below_obj(hwloc_topology_t t, hwloc_obj_t obj, hwloc_obj_type_t type, int* index, uint32_t **list);
 
-
-
 void hwloc_init_cpuInfo(cpu_set_t cpuSet);
 void hwloc_init_cpuFeatures(void);
 void hwloc_init_nodeTopology(cpu_set_t cpuSet);
 void hwloc_init_cacheTopology(void);
 void hwloc_close(void);
 
-
-#endif
+#endif /* TOPOLOGY_HWLOC_H */
diff --git a/src/includes/topology_proc.h b/src/includes/topology_proc.h
index 2b0a13b..640cbd3 100644
--- a/src/includes/topology_proc.h
+++ b/src/includes/topology_proc.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Header File of topology backend using procfs/sysfs
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -27,9 +27,8 @@
  *
  * =======================================================================================
  */
-
-#ifndef LIKWID_TOPOLOGY_PROC
-#define LIKWID_TOPOLOGY_PROC
+#ifndef TOPOLOGY_PROC_H
+#define TOPOLOGY_PROC_H
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -47,5 +46,4 @@ void proc_init_cpuFeatures(void);
 void proc_init_nodeTopology(cpu_set_t cpuSet);
 void proc_init_cacheTopology(void);
 
-
-#endif
+#endif /* TOPOLOGY_PROC_H */
diff --git a/src/includes/topology_types.h b/src/includes/topology_types.h
index a9b0d96..af20379 100644
--- a/src/includes/topology_types.h
+++ b/src/includes/topology_types.h
@@ -6,8 +6,8 @@
  *      Description:  Types file for topology module. External definitions are
  *                    in likwid.h
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com,
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -29,7 +29,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef CPUID_TYPES_H
 #define CPUID_TYPES_H
 
@@ -39,7 +38,7 @@
 /*! \brief Enum of possible CPU features
 
 CPUs implement different features that likely improve application performance if
-optimized using the feature. The list contains all features that are currently 
+optimized using the feature. The list contains all features that are currently
 supported by LIKWID. LIKWID does not perform any action based on these features,
 it gathers the data only for output purposes. It is not a complete list.
 \extends CpuInfo
diff --git a/src/includes/tree.h b/src/includes/tree.h
index f4b5529..445bcfb 100644
--- a/src/includes/tree.h
+++ b/src/includes/tree.h
@@ -3,11 +3,11 @@
  *
  *      Filename:  tree.h
  *
- *      Description:  Header File tree Module. 
+ *      Description:  Header File tree Module.
  *                    Implements a simple tree data structure.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -28,7 +28,6 @@
  *
  * =======================================================================================
  */
-
 #ifndef TREE_H
 #define TREE_H
 
diff --git a/src/includes/tree_types.h b/src/includes/tree_types.h
index dac1a4b..fd1d50b 100644
--- a/src/includes/tree_types.h
+++ b/src/includes/tree_types.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Types file for tree module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,11 +27,9 @@
  *
  * =======================================================================================
  */
-
 #ifndef TREE_TYPES_H
 #define TREE_TYPES_H
 
-
 /** \addtogroup CPUTopology
 *  @{
 */
diff --git a/src/includes/types.h b/src/includes/types.h
index 45df468..56e77e9 100644
--- a/src/includes/types.h
+++ b/src/includes/types.h
@@ -5,8 +5,8 @@
  *
  *      Description:  Global  Types file
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -28,11 +28,9 @@
  *
  * =======================================================================================
  */
-
 #ifndef TYPES_H
 #define TYPES_H
 
-
 /* #####   HEADER FILE INCLUDES   ######################################### */
 #include <stdint.h>
 #include <bstrlib.h>
@@ -47,7 +45,6 @@
 #include <perfmon_types.h>
 #include <libperfctr_types.h>
 
-
 typedef struct {
     uint64_t mask[2];
 } BitMask;
diff --git a/src/libperfctr.c b/src/libperfctr.c
index bf4ae39..5b63f2a 100644
--- a/src/libperfctr.c
+++ b/src/libperfctr.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Marker API interface of module perfmon
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Authors:  Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -56,6 +56,7 @@
 /* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
 
 int socket_lock[MAX_NUM_NODES];
+int core_lock[MAX_NUM_THREADS];
 static int likwid_init = 0;
 static int numberOfGroups = 0;
 static int* groups;
@@ -68,6 +69,7 @@ static pthread_mutex_t globalLock = PTHREAD_MUTEX_INITIALIZER;
 static int use_locks = 0;
 static pthread_mutex_t threadLocks[MAX_NUM_THREADS] = { [ 0 ... (MAX_NUM_THREADS-1)] = PTHREAD_MUTEX_INITIALIZER};
 
+
 /* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
 
 #define gettid() syscall(SYS_gettid)
@@ -75,7 +77,9 @@ static pthread_mutex_t threadLocks[MAX_NUM_THREADS] = { [ 0 ... (MAX_NUM_THREADS
 /* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
 
 
-static int getProcessorID(cpu_set_t* cpu_set)
+
+static int
+getProcessorID(cpu_set_t* cpu_set)
 {
     int processorId;
 
@@ -89,7 +93,8 @@ static int getProcessorID(cpu_set_t* cpu_set)
     return processorId;
 }
 
-static int getThreadID(int cpu_id)
+static int
+getThreadID(int cpu_id)
 {
     int i;
     for(i=0;i<groupSet->numberOfThreads;i++)
@@ -130,10 +135,12 @@ calculateMarkerResult(RegisterIndex index, uint64_t start, uint64_t stop, int ov
 
 /* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
 
-void likwid_markerInit(void)
+void
+likwid_markerInit(void)
 {
     int i;
     int verbosity;
+    int setinit = 0;
     bstring bThreadStr;
     bstring bEventStr;
     struct bstrList* threadTokens;
@@ -146,9 +153,9 @@ void likwid_markerInit(void)
     int (*ownatoi)(const char*);
     ownatoi = &atoi;
 
-    if ((modeStr != NULL) && (filepath != NULL) && (eventStr != NULL) && (cThreadStr != NULL))
+    if ((modeStr != NULL) && (filepath != NULL) && (eventStr != NULL) && (cThreadStr != NULL) && likwid_init == 0)
     {
-        likwid_init = 1;
+        setinit = 1;
     }
     else if (likwid_init == 0)
     {
@@ -172,9 +179,9 @@ void likwid_markerInit(void)
     hashTable_init();
 
     for(int i=0; i<MAX_NUM_NODES; i++) socket_lock[i] = LOCK_INIT;
-
+#ifndef LIKWID_USE_PERFEVENT
     HPMmode(atoi(modeStr));
-
+#endif
     if (getenv("LIKWID_DEBUG") != NULL)
     {
         perfmon_verbosity = atoi(getenv("LIKWID_DEBUG"));
@@ -190,7 +197,7 @@ void likwid_markerInit(void)
     }
     bdestroy(bThreadStr);
     bstrListDestroy(threadTokens);
-    
+
     if (getenv("LIKWID_PIN") != NULL)
     {
         likwid_pinThread(threads2Cpu[0]);
@@ -213,7 +220,7 @@ void likwid_markerInit(void)
     i = perfmon_init(num_cpus, threads2Cpu);
     if (i<0)
     {
-        fprintf(stderr,"Failed to initialize LIKWID perfmon library.\n");
+        //fprintf(stderr,"Failed to initialize LIKWID perfmon library.\n");
         return;
     }
 
@@ -243,18 +250,26 @@ void likwid_markerInit(void)
             groupSet->groups[groups[0]].state = STATE_START;
         }
     }
-
+    if (setinit)
+    {
+        likwid_init = 1;
+    }
     groupSet->activeGroup = 0;
+#ifdef LIKWID_USE_PERFEVENT
+    perfmon_setupCounters(groupSet->activeGroup);
+    perfmon_startCounters();
+#endif
 }
 
-void likwid_markerThreadInit(void)
+void
+likwid_markerThreadInit(void)
 {
     int myID;
     if ( !likwid_init )
     {
         return;
     }
-    
+
     pthread_mutex_lock(&globalLock);
     myID = registered_cpus++;
     pthread_mutex_unlock(&globalLock);
@@ -272,7 +287,8 @@ void likwid_markerThreadInit(void)
     }
 }
 
-void likwid_markerNextGroup(void)
+void
+likwid_markerNextGroup(void)
 {
     int i;
     int next_group;
@@ -297,7 +313,8 @@ void likwid_markerNextGroup(void)
  * 4 regionID threadID countersvalues(space separated)
  * 5 regionID threadID countersvalues
  */
-void likwid_markerClose(void)
+void
+likwid_markerClose(void)
 {
     FILE *file = NULL;
     LikwidResults* results = NULL;
@@ -320,16 +337,20 @@ void likwid_markerClose(void)
     markerfile = getenv("LIKWID_FILEPATH");
     if (markerfile == NULL)
     {
-        fprintf(stderr, "Is the application executed with LIKWID wrapper? No file path for the Marker API output defined.\n");
+        fprintf(stderr,
+                "Is the application executed with LIKWID wrapper? No file path for the Marker API output defined.\n");
         return;
     }
     file = fopen(markerfile,"w");
 
     if (file != NULL)
     {
-        DEBUG_PRINT(DEBUGLEV_DEVELOP, Creating Marker file %s with %d regions %d groups and %d threads, markerfile, numberOfRegions, numberOfGroups, numberOfThreads);
+        DEBUG_PRINT(DEBUGLEV_DEVELOP,
+                Creating Marker file %s with %d regions %d groups and %d threads,
+                markerfile, numberOfRegions, numberOfGroups, numberOfThreads);
         fprintf(file,"%d %d %d\n",numberOfThreads, numberOfRegions, numberOfGroups);
         DEBUG_PRINT(DEBUGLEV_DEVELOP, %d %d %d, numberOfThreads, numberOfRegions, numberOfGroups);
+
         for (int i=0; i<numberOfRegions; i++)
         {
             fprintf(file,"%d:%s\n",i,bdata(results[i].tag));
@@ -345,7 +366,10 @@ void likwid_markerClose(void)
                 fprintf(file,"%u ",results[i].count[j]);
                 fprintf(file,"%e ",results[i].time[j]);
                 fprintf(file,"%d ",groupSet->groups[results[i].groupID].numberOfEvents);
-                lineidx = sprintf(&(line[0]), "%d %d %d %u %e %d ", i, results[i].groupID,results[i].cpulist[j],results[i].count[j],results[i].time[j],groupSet->groups[results[i].groupID].numberOfEvents);
+                lineidx = sprintf(&(line[0]), "%d %d %d %u %e %d ",
+                        i, results[i].groupID,results[i].cpulist[j],results[i].count[j],
+                        results[i].time[j],groupSet->groups[results[i].groupID].numberOfEvents);
+
                 for (int k=0; k<groupSet->groups[results[i].groupID].numberOfEvents; k++)
                 {
                     fprintf(file,"%e ",results[i].counters[j][k]);
@@ -363,6 +387,20 @@ void likwid_markerClose(void)
         fprintf(stderr, "%s", strerror(errno));
     }
 
+}
+
+void __attribute__((destructor (101))) likwid_markerCloseDestruct(void)
+{
+    LikwidResults* results = NULL;
+    int numberOfThreads = 0;
+    int numberOfRegions = 0;
+    if (!likwid_init)
+        return;
+    hashTable_finalize(&numberOfThreads, &numberOfRegions, &results);
+    if ((numberOfThreads == 0)||(numberOfThreads == 0))
+    {
+        return;
+    }
     for (int i=0;i<numberOfRegions; i++)
     {
         for (int j=0;j<numberOfThreads; j++)
@@ -384,7 +422,8 @@ void likwid_markerClose(void)
     HPMfinalize();
 }
 
-int likwid_markerRegisterRegion(const char* regionTag)
+int
+likwid_markerRegisterRegion(const char* regionTag)
 {
     if ( ! likwid_init )
     {
@@ -398,11 +437,15 @@ int likwid_markerRegisterRegion(const char* regionTag)
     bcatcstr(tag, groupSuffix);
     int cpu_id = hashTable_get(tag, &results);
     bdestroy(tag);
+#ifdef LIKWID_USE_PERFEVENT
+    return HPMaddThread(cpu_id);
+#else
     return 0;
+#endif
 }
 
-
-int likwid_markerStartRegion(const char* regionTag)
+int
+likwid_markerStartRegion(const char* regionTag)
 {
     if ( ! likwid_init )
     {
@@ -419,30 +462,30 @@ int likwid_markerStartRegion(const char* regionTag)
     char groupSuffix[10];
     sprintf(groupSuffix, "-%d", groupSet->activeGroup);
     bcatcstr(tag, groupSuffix);
-    
+
     int cpu_id = hashTable_get(tag, &results);
     int thread_id = getThreadID(cpu_id);
     perfmon_readCountersCpu(cpu_id);
     results->cpuID = cpu_id;
     for(int i=0;i<groupSet->groups[groupSet->activeGroup].numberOfEvents;i++)
     {
-        DEBUG_PRINT(DEBUGLEV_DEVELOP, START [%s] READ EVENT [%d=%d] EVENT %d VALUE %llu , regionTag, thread_id, cpu_id, i,
-                        LLU_CAST groupSet->groups[groupSet->activeGroup].events[i].threadCounter[thread_id].counterData);
+        DEBUG_PRINT(DEBUGLEV_DEVELOP, START [%s] READ EVENT [%d=%d] EVENT %d VALUE %llu,
+                regionTag, thread_id, cpu_id, i,
+                LLU_CAST groupSet->groups[groupSet->activeGroup].events[i].threadCounter[thread_id].counterData);
         //groupSet->groups[groupSet->activeGroup].events[i].threadCounter[thread_id].startData =
         //        groupSet->groups[groupSet->activeGroup].events[i].threadCounter[thread_id].counterData;
-        
+
         results->StartPMcounters[i] = groupSet->groups[groupSet->activeGroup].events[i].threadCounter[thread_id].counterData;
         results->StartOverflows[i] = groupSet->groups[groupSet->activeGroup].events[i].threadCounter[thread_id].overflows;
     }
-    
+
     bdestroy(tag);
     timer_start(&(results->startTime));
     return 0;
 }
 
-
-
-int likwid_markerStopRegion(const char* regionTag)
+int
+likwid_markerStopRegion(const char* regionTag)
 {
     if (! likwid_init)
     {
@@ -468,7 +511,7 @@ int likwid_markerStopRegion(const char* regionTag)
     {
         pthread_mutex_lock(&threadLocks[myCPU]);
     }
-    
+
     cpu_id = hashTable_get(tag, &results);
     thread_id = getThreadID(cpu_id);
     results->groupID = groupSet->activeGroup;
@@ -485,7 +528,7 @@ int likwid_markerStopRegion(const char* regionTag)
                         LLU_CAST groupSet->groups[groupSet->activeGroup].events[i].threadCounter[thread_id].counterData);
         result = calculateMarkerResult(groupSet->groups[groupSet->activeGroup].events[i].index, results->StartPMcounters[i],
                                         groupSet->groups[groupSet->activeGroup].events[i].threadCounter[thread_id].counterData,
-                                        groupSet->groups[groupSet->activeGroup].events[i].threadCounter[thread_id].overflows - 
+                                        groupSet->groups[groupSet->activeGroup].events[i].threadCounter[thread_id].overflows -
                                         results->StartOverflows[i]);
         if (counter_map[groupSet->groups[groupSet->activeGroup].events[i].index].type != THERMAL)
         {
@@ -503,8 +546,13 @@ int likwid_markerStopRegion(const char* regionTag)
     return 0;
 }
 
-
-void likwid_markerGetRegion(const char* regionTag, int* nr_events, double* events, double *time, int *count)
+void
+likwid_markerGetRegion(
+        const char* regionTag,
+        int* nr_events,
+        double* events,
+        double *time,
+        int *count)
 {
     if (! likwid_init)
     {
@@ -537,8 +585,8 @@ void likwid_markerGetRegion(const char* regionTag, int* nr_events, double* event
     return;
 }
 
-
-int  likwid_getProcessorId()
+int
+likwid_getProcessorId()
 {
     int i;
     cpu_set_t  cpu_set;
@@ -556,7 +604,8 @@ int  likwid_getProcessorId()
 }
 
 #ifdef HAS_SCHEDAFFINITY
-int  likwid_pinThread(int processorId)
+int
+likwid_pinThread(int processorId)
 {
     int ret;
     cpu_set_t cpuset;
@@ -577,8 +626,8 @@ int  likwid_pinThread(int processorId)
 }
 #endif
 
-
-int  likwid_pinProcess(int processorId)
+int
+likwid_pinProcess(int processorId)
 {
     int ret;
     cpu_set_t cpuset;
@@ -596,4 +645,3 @@ int  likwid_pinProcess(int processorId)
     return TRUE;
 }
 
-
diff --git a/src/likwid.f90 b/src/likwid.f90
index 0735f13..20ca8b7 100644
--- a/src/likwid.f90
+++ b/src/likwid.f90
@@ -4,14 +4,14 @@
 !
 !     Description: Marker API f90 module
 !
-!      Version:   4.1
-!      Released:  8.8.2016
+!      Version:   <VERSION>
+!      Released:  <DATE>
 !
 !     Authors:  Jan Treibig (jt), jan.treibig at gmail.com,
 !               Thomas Roehl (tr), thomas.roehl at googlemail.com
 !     Project:  likwid
 !
-!      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+!      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
 !
 !      This program is free software: you can redistribute it and/or modify it under
 !      the terms of the GNU General Public License as published by the Free Software
@@ -30,14 +30,14 @@
 !> \defgroup Fortran_Interface Likwid Fortran90 Module
 
 !> \ingroup Fortran_Interface
-!> Likwid Fortran90 Module for embedding the Marker API into Fortran applications
-!> In the basic configuration the module is compiled with the Intel Fortran Compiler
+!! Likwid Fortran90 Module for embedding the Marker API into Fortran applications
+!! In the basic configuration the module is compiled with the Intel Fortran Compiler
 module likwid
 
 interface
 
 !> \ingroup Fortran_Interface
-!> \brief Initialize the Likwid Marker API
+!! \brief Initialize the Likwid Marker API
 !! This routine initializes the Marker API for Fortran. It reads some 
 !! environment commonly set by likwid-perfctr.
 !! \note Must be called once in a serial region.
@@ -45,7 +45,7 @@ interface
   end subroutine likwid_markerInit
 
 !> \ingroup Fortran_Interface
-!> \brief Add current thread to Likwid for Marker API measurements
+!! \brief Add current thread to Likwid for Marker API measurements
 !! This routine adds the current thread to Likwid that it performs measurements
 !! for this thread. If using the daemon access mode, it starts a deamon for the
 !! current thread.
@@ -54,27 +54,27 @@ interface
   end subroutine likwid_markerThreadInit
 
 !> \ingroup Fortran_Interface
-!> \brief Setup performance counters for the next event set
-!> If multiple groups should be measured this function
-!> switches to the next group in a round robin fashion.
-!> Each call reprogramms the performance counters for the current CPU,
-!> \note Do not call it while measuring a code region.
+!! \brief Setup performance counters for the next event set
+!! If multiple groups should be measured this function
+!! switches to the next group in a round robin fashion.
+!! Each call reprogramms the performance counters for the current CPU,
+!! \note Do not call it while measuring a code region.
   subroutine likwid_markerNextGroup()
   end subroutine likwid_markerNextGroup
 
 !> \ingroup Fortran_Interface
-!> \brief Close the Likwid Marker API
-!> Close the Likwid Marker API and write measured results to temporary file
-!> for evaluation done by likwid-perfctr
-!> \note Must be called once in a serial region and no further
-!> Likwid calls should be used
+!! \brief Close the Likwid Marker API
+!! Close the Likwid Marker API and write measured results to temporary file
+!! for evaluation done by likwid-perfctr
+!! \note Must be called once in a serial region and no further
+!! Likwid calls should be used
   subroutine likwid_markerClose()
   end subroutine likwid_markerClose
 
 !> \ingroup Fortran_Interface
-!> \brief Register a code region
-!> Initializes the hash table with an empty entry to reduce the overhead
-!> at likwid_markerStartRegion()
+!! \brief Register a code region
+!! Initializes the hash table with an empty entry to reduce the overhead
+!! at likwid_markerStartRegion()
   subroutine likwid_markerRegisterRegion( regionTag )
 !> \param regionTag Name for the code region for later identification
   character(*) :: regionTag
@@ -82,34 +82,34 @@ interface
 
 
 !> \ingroup Fortran_Interface
-!> \brief Start the measurement for a code region
-!> Reads the currently running event set and store the results as start values.
-!> for the measurement group identified by regionTag
+!! \brief Start the measurement for a code region
+!! Reads the currently running event set and store the results as start values.
+!! for the measurement group identified by regionTag
   subroutine likwid_markerStartRegion( regionTag )
 !> \param regionTag Name for the code region for later identification
   character(*) :: regionTag
   end subroutine likwid_markerStartRegion
 
 !> \ingroup Fortran_Interface
-!> \brief Stop the measurement for a code region
-!> Reads the currently running event set and accumulate the difference between
-!> stop and start data in the measurement group identified by regionTag.
+!! \brief Stop the measurement for a code region
+!! Reads the currently running event set and accumulate the difference between
+!! stop and start data in the measurement group identified by regionTag.
   subroutine likwid_markerStopRegion( regionTag )
 !> \param regionTag Name for the code region for later identification
   character(*) :: regionTag
   end subroutine likwid_markerStopRegion
 
 !> \ingroup Fortran_Interface
-!> \brief Get accumulated measurement results for a code region
-!> Get the accumulated data in the measurement group identified by regionTag
-!> for the current thread.
-!> \warning Experimental
+!! \brief Get accumulated measurement results for a code region
+!! Get the accumulated data in the measurement group identified by regionTag
+!! for the current thread.
+!! \warning Experimental
   subroutine likwid_markerGetRegion( regionTag, nr_events, events, time, count )
 !> \param regionTag [in] Name for the code region for later identification
-!> \param nr_events [in,out] Length of the events array
-!> \param events [out] Events array to store intermediate results
-!> \param time [out] Accumulated measurement time
-!> \param count [out] Call count of the region
+!! \param nr_events [in,out] Length of the events array
+!! \param events [out] Events array to store intermediate results
+!! \param time [out] Accumulated measurement time
+!! \param count [out] Call count of the region
   character(*) :: regionTag
   INTEGER :: nr_events
   DOUBLE PRECISION, DIMENSION(*) :: events
diff --git a/src/likwid_f90_interface.c b/src/likwid_f90_interface.c
index 1fe555b..96ec8e9 100644
--- a/src/likwid_f90_interface.c
+++ b/src/likwid_f90_interface.c
@@ -5,8 +5,8 @@
  *
  *      Description: F90 interface for marker API
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com,
  *               Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -29,32 +29,41 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <string.h>
 
 #include <likwid.h>
 
-void __attribute__ ((visibility ("default") )) likwid_markerinit_(void)
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
+void __attribute__ ((visibility ("default") ))
+likwid_markerinit_(void)
 {
     likwid_markerInit();
 }
 
-void __attribute__ ((visibility ("default") )) likwid_markerthreadinit_(void)
+void __attribute__ ((visibility ("default") ))
+likwid_markerthreadinit_(void)
 {
     likwid_markerThreadInit();
 }
 
-void __attribute__ ((visibility ("default") )) likwid_markerclose_(void)
+void __attribute__ ((visibility ("default") ))
+likwid_markerclose_(void)
 {
     likwid_markerClose();
 }
 
-void __attribute__ ((visibility ("default") )) likwid_markernextgroup_(void)
+void __attribute__ ((visibility ("default") ))
+likwid_markernextgroup_(void)
 {
     likwid_markerNextGroup();
 }
 
-void __attribute__ ((visibility ("default") )) likwid_markerregisterregion_(char* regionTag, int len)
+void __attribute__ ((visibility ("default") ))
+likwid_markerregisterregion_(char* regionTag, int len)
 {
     char* tmp = (char*) malloc((len+1) * sizeof(char) );
     strncpy(tmp, regionTag, len * sizeof(char) );
@@ -71,7 +80,8 @@ void __attribute__ ((visibility ("default") )) likwid_markerregisterregion_(char
     free(tmp);
 }
 
-void __attribute__ ((visibility ("default") )) likwid_markerstartregion_(char* regionTag, int len)
+void __attribute__ ((visibility ("default") ))
+likwid_markerstartregion_(char* regionTag, int len)
 {
     char* tmp = (char*) malloc((len+1) * sizeof(char) );
     strncpy(tmp, regionTag, len * sizeof(char) );
@@ -88,7 +98,8 @@ void __attribute__ ((visibility ("default") )) likwid_markerstartregion_(char* r
     free(tmp);
 }
 
-void __attribute__ ((visibility ("default") )) likwid_markerstopregion_(char* regionTag, int len)
+void __attribute__ ((visibility ("default") ))
+likwid_markerstopregion_(char* regionTag, int len)
 {
     char* tmp = (char*) malloc((len+1) * sizeof(char));
     strncpy(tmp, regionTag, len * sizeof(char) );
@@ -105,7 +116,14 @@ void __attribute__ ((visibility ("default") )) likwid_markerstopregion_(char* re
     free(tmp);
 }
 
-void __attribute__ ((visibility ("default") )) likwid_markergetregion_(char* regionTag, int* nr_events, double* events, double *time, int *count, int len)
+void __attribute__ ((visibility ("default") ))
+likwid_markergetregion_(
+        char* regionTag,
+        int* nr_events,
+        double* events,
+        double *time,
+        int *count,
+        int len)
 {
     char* tmp = (char*) malloc((len+1) * sizeof(char));
     strncpy(tmp, regionTag, len * sizeof(char) );
diff --git a/src/luawid.c b/src/luawid.c
index 791c511..c872d72 100644
--- a/src/luawid.c
+++ b/src/luawid.c
@@ -5,8 +5,8 @@
  *
  *      Description:  C part of the Likwid Lua interface
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -28,6 +28,8 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -37,6 +39,8 @@
 #include <sys/time.h>
 #include <time.h>
 #include <sched.h>
+#include <sys/types.h>
+#include <pwd.h>
 
 #include <lua.h>                               /* Always include this */
 #include <lauxlib.h>                           /* Always include this */
@@ -51,24 +55,30 @@
 #include <textcolor.h>
 #endif
 
-static int topology_isInitialized = 0;
-CpuInfo_t cpuinfo = NULL;
-CpuTopology_t cputopo = NULL;
+/* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
 
+static int topology_isInitialized = 0;
 static int numa_isInitialized = 0;
-NumaTopology_t numainfo = NULL;
 static int affinity_isInitialized = 0;
-AffinityDomains_t affinity = NULL;
 static int perfmon_isInitialized = 0;
 static int timer_isInitialized = 0;
 static int power_isInitialized = 0;
-PowerInfo_t power;
 static int power_hasRAPL = 0;
 static int config_isInitialized = 0;
+
+/* #####   VARIABLES  -  EXPORTED VARIABLES   ############################# */
+
+CpuInfo_t cpuinfo = NULL;
+CpuTopology_t cputopo = NULL;
+NumaTopology_t numainfo = NULL;
+AffinityDomains_t affinity = NULL;
+PowerInfo_t power;
 Configuration_t configfile = NULL;
 
+/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
 
-static int lua_likwid_getConfiguration(lua_State* L)
+static int
+lua_likwid_getConfiguration(lua_State* L)
 {
     int ret = 0;
     if (config_isInitialized == 0)
@@ -135,7 +145,8 @@ static int lua_likwid_getConfiguration(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_putConfiguration(lua_State* L)
+static int
+lua_likwid_putConfiguration(lua_State* L)
 {
     if (config_isInitialized == 1)
     {
@@ -146,7 +157,8 @@ static int lua_likwid_putConfiguration(lua_State* L)
     return 0;
 }
 
-static int lua_likwid_setGroupPath(lua_State* L)
+static int
+lua_likwid_setGroupPath(lua_State* L)
 {
     int ret;
     const char* tmpString;
@@ -168,7 +180,8 @@ static int lua_likwid_setGroupPath(lua_State* L)
     return 0;
 }
 
-static int lua_likwid_setAccessMode(lua_State* L)
+static int
+lua_likwid_setAccessMode(lua_State* L)
 {
     int flag;
     flag = luaL_checknumber(L,1);
@@ -178,7 +191,8 @@ static int lua_likwid_setAccessMode(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_init(lua_State* L)
+static int
+lua_likwid_init(lua_State* L)
 {
     int ret;
     int nrThreads = luaL_checknumber(L,1);
@@ -191,7 +205,11 @@ static int lua_likwid_init(lua_State* L)
     for (ret = 1; ret<=nrThreads; ret++)
     {
         lua_rawgeti(L,-1,ret);
+#if LUA_VERSION_NUM == 501
+        cpus[ret-1] = ((lua_Integer)lua_tointeger(L,-1));
+#else
         cpus[ret-1] = ((lua_Unsigned)lua_tointegerx(L,-1, NULL));
+#endif
         lua_pop(L,1);
     }
     if (topology_isInitialized == 0)
@@ -241,8 +259,8 @@ static int lua_likwid_init(lua_State* L)
     return 1;
 }
 
-
-static int lua_likwid_addEventSet(lua_State* L)
+static int
+lua_likwid_addEventSet(lua_State* L)
 {
     int groupId, n;
     const char* tmpString;
@@ -255,11 +273,15 @@ static int lua_likwid_addEventSet(lua_State* L)
     luaL_argcheck(L, strlen(tmpString) > 0, n, "Event string must be larger than 0");
 
     groupId = perfmon_addEventSet((char*)tmpString);
-    lua_pushinteger(L, groupId+1);
+    if (groupId >= 0)
+        lua_pushinteger(L, groupId+1);
+    else
+        lua_pushinteger(L, groupId);
     return 1;
 }
 
-static int lua_likwid_setupCounters(lua_State* L)
+static int
+lua_likwid_setupCounters(lua_State* L)
 {
     int ret;
     int groupId = lua_tonumber(L,1);
@@ -272,8 +294,8 @@ static int lua_likwid_setupCounters(lua_State* L)
     return 1;
 }
 
-
-static int lua_likwid_startCounters(lua_State* L)
+static int
+lua_likwid_startCounters(lua_State* L)
 {
     int ret;
     if (perfmon_isInitialized == 0)
@@ -285,7 +307,8 @@ static int lua_likwid_startCounters(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_stopCounters(lua_State* L)
+static int
+lua_likwid_stopCounters(lua_State* L)
 {
     int ret;
     if (perfmon_isInitialized == 0)
@@ -297,7 +320,8 @@ static int lua_likwid_stopCounters(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_readCounters(lua_State* L)
+static int
+lua_likwid_readCounters(lua_State* L)
 {
     int ret;
     if (perfmon_isInitialized == 0)
@@ -309,7 +333,8 @@ static int lua_likwid_readCounters(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_switchGroup(lua_State* L)
+static int
+lua_likwid_switchGroup(lua_State* L)
 {
     int ret = -1;
     int newgroup = lua_tonumber(L,1)-1;
@@ -331,7 +356,8 @@ static int lua_likwid_switchGroup(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_finalize(lua_State* L)
+static int
+lua_likwid_finalize(lua_State* L)
 {
     if (perfmon_isInitialized == 1)
     {
@@ -371,7 +397,8 @@ static int lua_likwid_finalize(lua_State* L)
     return 0;
 }
 
-static int lua_likwid_getResult(lua_State* L)
+static int
+lua_likwid_getResult(lua_State* L)
 {
     int groupId, eventId, threadId;
     double result = 0;
@@ -383,7 +410,8 @@ static int lua_likwid_getResult(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getLastResult(lua_State* L)
+static int
+lua_likwid_getLastResult(lua_State* L)
 {
     int groupId, eventId, threadId;
     double result = 0;
@@ -395,7 +423,8 @@ static int lua_likwid_getLastResult(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getMetric(lua_State* L)
+static int
+lua_likwid_getMetric(lua_State* L)
 {
     int groupId, metricId, threadId;
     double result = 0;
@@ -407,7 +436,8 @@ static int lua_likwid_getMetric(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getLastMetric(lua_State* L)
+static int
+lua_likwid_getLastMetric(lua_State* L)
 {
     int groupId, metricId, threadId;
     double result = 0;
@@ -419,7 +449,8 @@ static int lua_likwid_getLastMetric(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getNumberOfGroups(lua_State* L)
+static int
+lua_likwid_getNumberOfGroups(lua_State* L)
 {
     int number;
     if (perfmon_isInitialized == 0)
@@ -431,7 +462,8 @@ static int lua_likwid_getNumberOfGroups(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getIdOfActiveGroup(lua_State* L)
+static int
+lua_likwid_getIdOfActiveGroup(lua_State* L)
 {
     int number;
     if (perfmon_isInitialized == 0)
@@ -443,7 +475,8 @@ static int lua_likwid_getIdOfActiveGroup(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getRuntimeOfGroup(lua_State* L)
+static int
+lua_likwid_getRuntimeOfGroup(lua_State* L)
 {
     double time;
     int groupId;
@@ -457,7 +490,8 @@ static int lua_likwid_getRuntimeOfGroup(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getNumberOfEvents(lua_State* L)
+static int
+lua_likwid_getNumberOfEvents(lua_State* L)
 {
     int number, groupId;
     if (perfmon_isInitialized == 0)
@@ -470,7 +504,8 @@ static int lua_likwid_getNumberOfEvents(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getNumberOfThreads(lua_State* L)
+static int
+lua_likwid_getNumberOfThreads(lua_State* L)
 {
     int number;
     if (perfmon_isInitialized == 0)
@@ -482,7 +517,8 @@ static int lua_likwid_getNumberOfThreads(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getNameOfEvent(lua_State* L)
+static int
+lua_likwid_getNameOfEvent(lua_State* L)
 {
     int eventId, groupId;
     char* tmp;
@@ -497,7 +533,8 @@ static int lua_likwid_getNameOfEvent(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getNameOfCounter(lua_State* L)
+static int
+lua_likwid_getNameOfCounter(lua_State* L)
 {
     int eventId, groupId;
     char* tmp;
@@ -512,7 +549,8 @@ static int lua_likwid_getNameOfCounter(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getNumberOfMetrics(lua_State* L)
+static int
+lua_likwid_getNumberOfMetrics(lua_State* L)
 {
     int number, groupId;
     if (perfmon_isInitialized == 0)
@@ -525,7 +563,8 @@ static int lua_likwid_getNumberOfMetrics(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getNameOfMetric(lua_State* L)
+static int
+lua_likwid_getNameOfMetric(lua_State* L)
 {
     int metricId, groupId;
     char* tmp;
@@ -540,7 +579,8 @@ static int lua_likwid_getNameOfMetric(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getNameOfGroup(lua_State* L)
+static int
+lua_likwid_getNameOfGroup(lua_State* L)
 {
     int groupId;
     char* tmp;
@@ -554,7 +594,8 @@ static int lua_likwid_getNameOfGroup(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getShortInfoOfGroup(lua_State* L)
+static int
+lua_likwid_getShortInfoOfGroup(lua_State* L)
 {
     int groupId;
     char* tmp;
@@ -568,7 +609,8 @@ static int lua_likwid_getShortInfoOfGroup(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getLongInfoOfGroup(lua_State* L)
+static int
+lua_likwid_getLongInfoOfGroup(lua_State* L)
 {
     int groupId;
     char* tmp;
@@ -582,7 +624,8 @@ static int lua_likwid_getLongInfoOfGroup(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getGroups(lua_State* L)
+static int
+lua_likwid_getGroups(lua_State* L)
 {
     int i, ret;
     char** tmp, **infos, **longs;
@@ -616,13 +659,15 @@ static int lua_likwid_getGroups(lua_State* L)
 }
 
 
-static int lua_likwid_printSupportedCPUs(lua_State* L)
+static int
+lua_likwid_printSupportedCPUs(lua_State* L)
 {
     print_supportedCPUs();
     return 0;
 }
 
-static int lua_likwid_getCpuInfo(lua_State* L)
+static int
+lua_likwid_getCpuInfo(lua_State* L)
 {
     if (topology_isInitialized == 0)
     {
@@ -683,7 +728,8 @@ static int lua_likwid_getCpuInfo(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getCpuTopology(lua_State* L)
+static int
+lua_likwid_getCpuTopology(lua_State* L)
 {
     int i;
     TreeNode* socketNode;
@@ -877,7 +923,8 @@ static int lua_likwid_getCpuTopology(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_putTopology(lua_State* L)
+static int
+lua_likwid_putTopology(lua_State* L)
 {
     if (topology_isInitialized == 1)
     {
@@ -890,10 +937,11 @@ static int lua_likwid_putTopology(lua_State* L)
 }
 
 
-static int lua_likwid_getEventsAndCounters(lua_State* L)
+static int
+lua_likwid_getEventsAndCounters(lua_State* L)
 {
     int i;
-    
+
     if (topology_isInitialized == 0)
     {
         topology_init();
@@ -905,11 +953,14 @@ static int lua_likwid_getEventsAndCounters(lua_State* L)
         cpuinfo = get_cpuInfo();
     }
     perfmon_init_maps();
+    perfmon_check_counter_map(0);
     lua_newtable(L);
     lua_pushstring(L,"Counters");
     lua_newtable(L);
     for(i=1;i<=perfmon_numCounters;i++)
     {
+        if (counter_map[i-1].type == NOTYPE)
+            continue;
         bstring optString = bfromcstr("");
         lua_pushinteger(L, (lua_Integer)(i));
         lua_newtable(L);
@@ -926,6 +977,7 @@ static int lua_likwid_getEventsAndCounters(lua_State* L)
                 bdestroy(tmp);
             }
         }
+        bdelete(optString, blength(optString)-1, 1);
         lua_pushstring(L,bdata(optString));
         lua_settable(L,-3);
         lua_pushstring(L,"Type");
@@ -945,6 +997,8 @@ static int lua_likwid_getEventsAndCounters(lua_State* L)
     lua_newtable(L);
     for(i=1;i<=perfmon_numArchEvents;i++)
     {
+        if (strlen(eventHash[i-1].limit) == 0)
+            continue;
         bstring optString = bfromcstr("");
         lua_pushinteger(L, (lua_Integer)(i));
         lua_newtable(L);
@@ -970,16 +1024,19 @@ static int lua_likwid_getEventsAndCounters(lua_State* L)
                 bdestroy(tmp);
             }
         }
+        bdelete(optString, blength(optString)-1, 1);
         lua_pushstring(L,bdata(optString));
         lua_settable(L,-3);
         lua_settable(L,-3);
         bdestroy(optString);
     }
     lua_settable(L,-3);
+    HPMfinalize();
     return 1;
 }
 
-static int lua_likwid_getOnlineDevices(lua_State* L)
+static int
+lua_likwid_getOnlineDevices(lua_State* L)
 {
     int i;
     lua_newtable(L);
@@ -1007,7 +1064,8 @@ static int lua_likwid_getOnlineDevices(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getNumaInfo(lua_State* L)
+static int
+lua_likwid_getNumaInfo(lua_State* L)
 {
     uint32_t i,j;
     if (topology_isInitialized == 0)
@@ -1069,7 +1127,6 @@ static int lua_likwid_getNumaInfo(lua_State* L)
     {
         lua_pushinteger(L, i+1);
         lua_newtable(L);
-        
         lua_pushstring(L,"id");
         lua_pushinteger(L, (lua_Integer)(numainfo->nodes[i].id));
         lua_settable(L,-3);
@@ -1085,7 +1142,6 @@ static int lua_likwid_getNumaInfo(lua_State* L)
         lua_pushstring(L,"numberOfDistances");
         lua_pushinteger(L, (lua_Integer)(numainfo->nodes[i].numberOfDistances));
         lua_settable(L,-3);
-        
         lua_pushstring(L,"processors");
         lua_newtable(L);
         for(j=0;j<numainfo->nodes[i].numberOfProcessors;j++)
@@ -1095,7 +1151,6 @@ static int lua_likwid_getNumaInfo(lua_State* L)
             lua_settable(L,-3);
         }
         lua_settable(L,-3);
-        
         /*lua_pushstring(L,"processorsCompact");
         lua_newtable(L);
         for(j=0;j<numa->nodes[i].numberOfProcessors;j++)
@@ -1105,7 +1160,6 @@ static int lua_likwid_getNumaInfo(lua_State* L)
             lua_settable(L,-3);
         }
         lua_settable(L,-3);*/
-        
         lua_pushstring(L,"distances");
         lua_newtable(L);
         for(j=0;j<numainfo->nodes[i].numberOfDistances;j++)
@@ -1118,14 +1172,14 @@ static int lua_likwid_getNumaInfo(lua_State* L)
             lua_settable(L,-3);
         }
         lua_settable(L,-3);
-        
         lua_settable(L,-3);
     }
     lua_settable(L,-3);
     return 1;
 }
 
-static int lua_likwid_putNumaInfo(lua_State* L)
+static int
+lua_likwid_putNumaInfo(lua_State* L)
 {
     if (numa_isInitialized)
     {
@@ -1136,7 +1190,8 @@ static int lua_likwid_putNumaInfo(lua_State* L)
     return 0;
 }
 
-static int lua_likwid_setMemInterleaved(lua_State* L)
+static int
+lua_likwid_setMemInterleaved(lua_State* L)
 {
     int ret;
     int nrThreads = luaL_checknumber(L,1);
@@ -1149,17 +1204,22 @@ static int lua_likwid_setMemInterleaved(lua_State* L)
     for (ret = 1; ret<=nrThreads; ret++)
     {
         lua_rawgeti(L,-1,ret);
+#if LUA_VERSION_NUM == 501
+        cpus[ret-1] = ((lua_Integer)lua_tointeger(L,-1));
+#else
         cpus[ret-1] = ((lua_Unsigned)lua_tointegerx(L,-1, NULL));
+#endif
         lua_pop(L,1);
     }
     numa_setInterleaved(cpus, nrThreads);
     return 0;
 }
 
-static int lua_likwid_getAffinityInfo(lua_State* L)
+static int
+lua_likwid_getAffinityInfo(lua_State* L)
 {
     int i,j;
-    
+
     if (topology_isInitialized == 0)
     {
         topology_init();
@@ -1255,7 +1315,8 @@ static int lua_likwid_getAffinityInfo(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_cpustr_to_cpulist(lua_State* L)
+static int
+lua_likwid_cpustr_to_cpulist(lua_State* L)
 {
     int ret = 0;
     char* cpustr = (char *)luaL_checkstring(L, 1);
@@ -1283,7 +1344,8 @@ static int lua_likwid_cpustr_to_cpulist(lua_State* L)
     return 2;
 }
 
-static int lua_likwid_nodestr_to_nodelist(lua_State* L)
+static int
+lua_likwid_nodestr_to_nodelist(lua_State* L)
 {
     int ret = 0;
     char* nodestr = (char *)luaL_checkstring(L, 1);
@@ -1311,7 +1373,8 @@ static int lua_likwid_nodestr_to_nodelist(lua_State* L)
     return 2;
 }
 
-static int lua_likwid_sockstr_to_socklist(lua_State* L)
+static int
+lua_likwid_sockstr_to_socklist(lua_State* L)
 {
     int ret = 0;
     char* sockstr = (char *)luaL_checkstring(L, 1);
@@ -1339,7 +1402,8 @@ static int lua_likwid_sockstr_to_socklist(lua_State* L)
     return 2;
 }
 
-static int lua_likwid_putAffinityInfo(lua_State* L)
+static int
+lua_likwid_putAffinityInfo(lua_State* L)
 {
     if (affinity_isInitialized)
     {
@@ -1350,9 +1414,9 @@ static int lua_likwid_putAffinityInfo(lua_State* L)
     return 0;
 }
 
-static int lua_likwid_getPowerInfo(lua_State* L)
+static int
+lua_likwid_getPowerInfo(lua_State* L)
 {
-    
     int i;
     if (topology_isInitialized == 0)
     {
@@ -1379,19 +1443,18 @@ static int lua_likwid_getPowerInfo(lua_State* L)
     {
         affinity = get_affinityDomains();
     }
-
     if (power_isInitialized == 0)
     {
         power_hasRAPL = power_init(0);
-        for(i=0;i<affinity->numberOfAffinityDomains;i++)
+        if (power_hasRAPL > 0)
         {
-            if (bstrchrp(affinity->domains[i].tag, 'S', 0) != BSTR_ERR)
+            for(i=0;i<affinity->numberOfAffinityDomains;i++)
             {
-                HPMaddThread(affinity->domains[i].processorList[0]);
+                if (bstrchrp(affinity->domains[i].tag, 'S', 0) != BSTR_ERR)
+                {
+                    HPMaddThread(affinity->domains[i].processorList[0]);
+                }
             }
-        }
-        if (power_hasRAPL)
-        {
             power_isInitialized = 1;
             power = get_powerInfo();
         }
@@ -1401,7 +1464,6 @@ static int lua_likwid_getPowerInfo(lua_State* L)
         }
     }
 
-
     lua_newtable(L);
     lua_pushstring(L,"hasRAPL");
     lua_pushboolean(L,power_hasRAPL);
@@ -1418,7 +1480,15 @@ static int lua_likwid_getPowerInfo(lua_State* L)
     lua_pushstring(L,"timeUnit");
     lua_pushnumber(L,power->timeUnit);
     lua_settable(L,-3);
-    
+    lua_pushstring(L,"minUncoreFreq");
+    lua_pushnumber(L,power->uncoreMinFreq);
+    lua_settable(L,-3);
+    lua_pushstring(L,"maxUncoreFreq");
+    lua_pushnumber(L,power->uncoreMaxFreq);
+    lua_settable(L,-3);
+    lua_pushstring(L,"perfBias");
+    lua_pushnumber(L,power->perfBias);
+    lua_settable(L,-3);
     lua_pushstring(L,"turbo");
     lua_newtable(L);
     lua_pushstring(L,"numSteps");
@@ -1516,12 +1586,12 @@ static int lua_likwid_getPowerInfo(lua_State* L)
         lua_settable(L,-3);
     }
     lua_settable(L,-3);
-    
 
     return 1;
 }
 
-static int lua_likwid_putPowerInfo(lua_State* L)
+static int
+lua_likwid_putPowerInfo(lua_State* L)
 {
     if (power_isInitialized)
     {
@@ -1532,31 +1602,42 @@ static int lua_likwid_putPowerInfo(lua_State* L)
     return 0;
 }
 
-static int lua_likwid_startPower(lua_State* L)
+static int
+lua_likwid_startPower(lua_State* L)
 {
     PowerData pwrdata;
     int cpuId = lua_tonumber(L,1);
     luaL_argcheck(L, cpuId >= 0, 1, "CPU ID must be greater than 0");
+#if LUA_VERSION_NUM == 501
+    PowerType type = (PowerType) ((lua_Integer)lua_tointeger(L,2));
+#else
     PowerType type = (PowerType) ((lua_Unsigned)lua_tointegerx(L,2, NULL));
+#endif
     luaL_argcheck(L, type >= PKG+1 && type <= DRAM+1, 2, "Type not valid");
     power_start(&pwrdata, cpuId, type-1);
     lua_pushnumber(L,pwrdata.before);
     return 1;
 }
 
-static int lua_likwid_stopPower(lua_State* L)
+static int
+lua_likwid_stopPower(lua_State* L)
 {
     PowerData pwrdata;
     int cpuId = lua_tonumber(L,1);
     luaL_argcheck(L, cpuId >= 0, 1, "CPU ID must be greater than 0");
+#if LUA_VERSION_NUM == 501
+    PowerType type = (PowerType) ((lua_Integer)lua_tointeger(L,2));
+#else
     PowerType type = (PowerType) ((lua_Unsigned)lua_tointegerx(L,2, NULL));
+#endif
     luaL_argcheck(L, type >= PKG+1 && type <= DRAM+1, 2, "Type not valid");
     power_stop(&pwrdata, cpuId, type-1);
     lua_pushnumber(L,pwrdata.after);
     return 1;
 }
 
-static int lua_likwid_printEnergy(lua_State* L)
+static int
+lua_likwid_printEnergy(lua_State* L)
 {
     PowerData pwrdata;
     pwrdata.before = lua_tonumber(L,1);
@@ -1566,7 +1647,8 @@ static int lua_likwid_printEnergy(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_power_limitGet(lua_State* L)
+static int
+lua_likwid_power_limitGet(lua_State* L)
 {
     int err;
     int cpuId = lua_tonumber(L,1);
@@ -1584,7 +1666,8 @@ static int lua_likwid_power_limitGet(lua_State* L)
     return 2;
 }
 
-static int lua_likwid_power_limitSet(lua_State* L)
+static int
+lua_likwid_power_limitSet(lua_State* L)
 {
     int cpuId = lua_tonumber(L,1);
     int domain = lua_tonumber(L,2);
@@ -1595,7 +1678,8 @@ static int lua_likwid_power_limitSet(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_power_limitState(lua_State* L)
+static int
+lua_likwid_power_limitState(lua_State* L)
 {
     int cpuId = lua_tonumber(L,1);
     int domain = lua_tonumber(L,2);
@@ -1603,7 +1687,8 @@ static int lua_likwid_power_limitState(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getCpuClock(lua_State* L)
+static int
+lua_likwid_getCpuClock(lua_State* L)
 {
     if (timer_isInitialized == 0)
     {
@@ -1614,7 +1699,8 @@ static int lua_likwid_getCpuClock(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getCycleClock(lua_State* L)
+static int
+lua_likwid_getCycleClock(lua_State* L)
 {
     if (timer_isInitialized == 0)
     {
@@ -1625,13 +1711,19 @@ static int lua_likwid_getCycleClock(lua_State* L)
     return 1;
 }
 
-static int lua_sleep(lua_State* L)
+static int
+lua_sleep(lua_State* L)
 {
+#if LUA_VERSION_NUM == 501
+    lua_pushnumber(L, timer_sleep(((lua_Integer)lua_tointeger(L,-1))));
+#else
     lua_pushnumber(L, timer_sleep(((lua_Unsigned)lua_tointegerx(L,-1, NULL))));
+#endif
     return 1;
 }
 
-static int lua_likwid_startClock(lua_State* L)
+static int
+lua_likwid_startClock(lua_State* L)
 {
     TimerData timer;
     double value;
@@ -1646,7 +1738,8 @@ static int lua_likwid_startClock(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_stopClock(lua_State* L)
+static int
+lua_likwid_stopClock(lua_State* L)
 {
     TimerData timer;
     double value;
@@ -1661,7 +1754,8 @@ static int lua_likwid_stopClock(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getClockCycles(lua_State* L)
+static int
+lua_likwid_getClockCycles(lua_State* L)
 {
     TimerData timer;
     double start, stop;
@@ -1678,7 +1772,8 @@ static int lua_likwid_getClockCycles(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_getClock(lua_State* L)
+static int
+lua_likwid_getClock(lua_State* L)
 {
     TimerData timer;
     double runtime, start, stop;
@@ -1696,18 +1791,27 @@ static int lua_likwid_getClock(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_initTemp(lua_State* L)
+static int
+lua_likwid_initTemp(lua_State* L)
 {
+#if LUA_VERSION_NUM == 501
+    int cpuid = ((lua_Integer)lua_tointeger(L,-1));
+#else
     int cpuid = ((lua_Unsigned)lua_tointegerx(L,-1, NULL));
+#endif
     thermal_init(cpuid);
     return 0;
 }
 
-static int lua_likwid_readTemp(lua_State* L)
+static int
+lua_likwid_readTemp(lua_State* L)
 {
+#if LUA_VERSION_NUM == 501
+    int cpuid = ((lua_Integer)lua_tointeger(L,-1));
+#else
     int cpuid = ((lua_Unsigned)lua_tointegerx(L,-1, NULL));
+#endif
     uint32_t data;
-    
     if (thermal_read(cpuid, &data)) {
         lua_pushstring(L,"Cannot read thermal data");
         lua_error(L);
@@ -1716,10 +1820,9 @@ static int lua_likwid_readTemp(lua_State* L)
     return 1;
 }
 
-
 static volatile int recv_sigint = 0;
 
-static void signal_catcher(int signo) 
+static void signal_catcher(int signo)
 {
     if (signo == SIGINT)
     {
@@ -1728,37 +1831,46 @@ static void signal_catcher(int signo)
     return;
 }
 
-static int lua_likwid_catch_signal(lua_State* L)
+static int
+lua_likwid_catch_signal(lua_State* L)
 {
     signal(SIGINT,signal_catcher);
     return 0;
 }
 
-static int lua_likwid_return_signal_state(lua_State* L)
+static int
+lua_likwid_return_signal_state(lua_State* L)
 {
     lua_pushnumber(L, recv_sigint);
     return 1;
 }
 
-void parse(char *line, char **argv)
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
+void
+parse(char *line, char **argv)
 {
-     while (*line != '\0') {       /* if not the end of line ....... */ 
+     while (*line != '\0') {       /* if not the end of line ....... */
           while (*line == ' ' || *line == '\t' || *line == '\n')
                *line++ = '\0';     /* replace white spaces with 0    */
           *argv++ = line;          /* save the argument position     */
-          while (*line != '\0' && *line != ' ' && 
-                 *line != '\t' && *line != '\n') 
+          while (*line != '\0' && *line != ' ' &&
+                 *line != '\t' && *line != '\n')
                line++;             /* skip the argument until ...    */
      }
      *argv = '\0';                 /* mark the end of argument list  */
 }
 
+/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
 
-static void catch_sigchild(int signo) {
+static void
+catch_sigchild(int signo)
+{
     ;;
 }
 
-static int lua_likwid_startProgram(lua_State* L)
+static int
+lua_likwid_startProgram(lua_State* L)
 {
     pid_t pid, ppid;
     int status;
@@ -1777,7 +1889,11 @@ static int lua_likwid_startProgram(lua_State* L)
         for (status = 1; status<=nrThreads; status++)
         {
             lua_rawgeti(L,-1,status);
+#if LUA_VERSION_NUM == 501
+            cpus[status-1] = ((lua_Integer)lua_tointeger(L,-1));
+#else
             cpus[status-1] = ((lua_Unsigned)lua_tointegerx(L,-1, NULL));
+#endif
             lua_pop(L,1);
         }
     }
@@ -1815,7 +1931,9 @@ static int lua_likwid_startProgram(lua_State* L)
     }
     return 1;
 }
-static int lua_likwid_checkProgram(lua_State* L)
+
+static int
+lua_likwid_checkProgram(lua_State* L)
 {
     int ret = -1;
     if (lua_gettop(L) == 1)
@@ -1834,14 +1952,16 @@ static int lua_likwid_checkProgram(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_killProgram(lua_State* L)
+static int
+lua_likwid_killProgram(lua_State* L)
 {
     pid_t pid = lua_tonumber(L, 1);
     kill(pid, SIGTERM);
     return 0;
 }
 
-static int lua_likwid_waitpid(lua_State* L)
+static int
+lua_likwid_waitpid(lua_State* L)
 {
     int status;
     int ret = -1;
@@ -1856,7 +1976,8 @@ static int lua_likwid_waitpid(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_memSweep(lua_State* L)
+static int
+lua_likwid_memSweep(lua_State* L)
 {
     int i;
     int nrThreads = luaL_checknumber(L,1);
@@ -1869,14 +1990,19 @@ static int lua_likwid_memSweep(lua_State* L)
     for (i = 1; i <= nrThreads; i++)
     {
         lua_rawgeti(L,-1,i);
+#if LUA_VERSION_NUM == 501
+        cpus[i-1] = ((lua_Integer)lua_tointeger(L,-1));
+#else
         cpus[i-1] = ((lua_Unsigned)lua_tointegerx(L,-1, NULL));
+#endif
         lua_pop(L,1);
     }
     memsweep_threadGroup(cpus, nrThreads);
     return 0;
 }
 
-static int lua_likwid_memSweepDomain(lua_State* L)
+static int
+lua_likwid_memSweepDomain(lua_State* L)
 {
     int domain = luaL_checknumber(L,1);
     luaL_argcheck(L, domain >= 0, 1, "Domain ID must be greater or equal 0");
@@ -1884,7 +2010,8 @@ static int lua_likwid_memSweepDomain(lua_State* L)
     return 0;
 }
 
-static int lua_likwid_pinProcess(lua_State* L)
+static int
+lua_likwid_pinProcess(lua_State* L)
 {
     int cpuID = luaL_checknumber(L,-2);
     int silent = luaL_checknumber(L,-1);
@@ -1910,7 +2037,8 @@ static int lua_likwid_pinProcess(lua_State* L)
     return 0;
 }
 
-static int lua_likwid_setenv(lua_State* L)
+static int
+lua_likwid_setenv(lua_State* L)
 {
     const char* element = (const char*)luaL_checkstring(L, -2);
     const char* value = (const char*)luaL_checkstring(L, -1);
@@ -1918,22 +2046,25 @@ static int lua_likwid_setenv(lua_State* L)
     return 0;
 }
 
-static int lua_likwid_getpid(lua_State* L)
+static int
+lua_likwid_getpid(lua_State* L)
 {
     lua_pushinteger(L, (lua_Integer)(getpid()));
     return 1;
 }
 
-static int lua_likwid_setVerbosity(lua_State* L)
+static int
+lua_likwid_setVerbosity(lua_State* L)
 {
     int verbosity = lua_tointeger(L,-1);
-    luaL_argcheck(L, (verbosity >= 0 && verbosity <= DEBUGLEV_DEVELOP), -1, 
+    luaL_argcheck(L, (verbosity >= 0 && verbosity <= DEBUGLEV_DEVELOP), -1,
                 "Verbosity must be between 0 (only errors) and 3 (developer)");
     perfmon_verbosity = verbosity;
     return 0;
 }
 
-static int lua_likwid_access(lua_State* L)
+static int
+lua_likwid_access(lua_State* L)
 {
     int flags = 0;
     const char* file = (const char*)luaL_checkstring(L, 1);
@@ -1966,52 +2097,60 @@ static int lua_likwid_access(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_markerInit(lua_State* L)
+static int
+lua_likwid_markerInit(lua_State* L)
 {
     likwid_markerInit();
     return 0;
 }
 
-static int lua_likwid_markerThreadInit(lua_State* L)
+static int
+lua_likwid_markerThreadInit(lua_State* L)
 {
     likwid_markerThreadInit();
     return 0;
 }
 
-static int lua_likwid_markerClose(lua_State* L)
+static int
+lua_likwid_markerClose(lua_State* L)
 {
     likwid_markerClose();
     return 0;
 }
 
-static int lua_likwid_markerNext(lua_State* L)
+static int
+lua_likwid_markerNext(lua_State* L)
 {
     likwid_markerNextGroup();
     return 0;
 }
 
-static int lua_likwid_registerRegion(lua_State* L)
+static int
+lua_likwid_registerRegion(lua_State* L)
 {
     const char* tag = (const char*)luaL_checkstring(L, -1);
     lua_pushinteger(L, likwid_markerRegisterRegion(tag));
     return 1;
 }
 
-static int lua_likwid_startRegion(lua_State* L)
+static int
+lua_likwid_startRegion(lua_State* L)
 {
     const char* tag = (const char*)luaL_checkstring(L, -1);
     lua_pushinteger(L, likwid_markerStartRegion(tag));
     return 1;
 }
 
-static int lua_likwid_stopRegion(lua_State* L)
+static int
+lua_likwid_stopRegion(lua_State* L)
 {
     const char* tag = (const char*)luaL_checkstring(L, -1);
     lua_pushinteger(L, likwid_markerStopRegion(tag));
     return 1;
 }
 
-static int lua_likwid_getRegion(lua_State* L)
+static int
+lua_likwid_getRegion(lua_State* L)
 {
     int i = 0;
     const char* tag = (const char*)luaL_checkstring(L, -2);
@@ -2019,7 +2158,6 @@ static int lua_likwid_getRegion(lua_State* L)
     double* events = NULL;
     double time = 0.0;
     int count = 0;
-    
     events = (double*) malloc(nr_events * sizeof(double));
     if (events == NULL)
     {
@@ -2031,7 +2169,6 @@ static int lua_likwid_getRegion(lua_State* L)
         events[i] = 0.0;
     }
     likwid_markerGetRegion(tag, &nr_events, events, &time, &count);
-    
     lua_pushinteger(L, nr_events);
     lua_newtable(L);
     for (i=0;i<nr_events;i++)
@@ -2046,20 +2183,23 @@ static int lua_likwid_getRegion(lua_State* L)
     return 4;
 }
 
-static int lua_likwid_cpuFeatures_init(lua_State* L)
+static int
+lua_likwid_cpuFeatures_init(lua_State* L)
 {
     cpuFeatures_init();
     return 0;
 }
 
-static int lua_likwid_cpuFeatures_print(lua_State* L)
+static int
+lua_likwid_cpuFeatures_print(lua_State* L)
 {
     int cpu = lua_tointeger(L,-1);
     cpuFeatures_print(cpu);
     return 0;
 }
 
-static int lua_likwid_cpuFeatures_get(lua_State* L)
+static int
+lua_likwid_cpuFeatures_get(lua_State* L)
 {
     int cpu = lua_tointeger(L,-2);
     CpuFeature feature = lua_tointeger(L,-1);
@@ -2067,10 +2207,15 @@ static int lua_likwid_cpuFeatures_get(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_cpuFeatures_name(lua_State* L)
+static int
+lua_likwid_cpuFeatures_name(lua_State* L)
 {
     char* name = NULL;
+#if LUA_VERSION_NUM == 501
+    CpuFeature feature = ((lua_Integer)lua_tointeger(L,-1));
+#else
     CpuFeature feature = ((lua_Unsigned)lua_tointegerx(L,-1, NULL));
+#endif
     name = cpuFeatures_name(feature);
     if (name != NULL)
     {
@@ -2080,7 +2225,8 @@ static int lua_likwid_cpuFeatures_name(lua_State* L)
     return 0;
 }
 
-static int lua_likwid_cpuFeatures_enable(lua_State* L)
+static int
+lua_likwid_cpuFeatures_enable(lua_State* L)
 {
     int cpu = lua_tointeger(L,-3);
     CpuFeature feature = lua_tointeger(L,-2);
@@ -2089,7 +2235,8 @@ static int lua_likwid_cpuFeatures_enable(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_cpuFeatures_disable(lua_State* L)
+static int
+lua_likwid_cpuFeatures_disable(lua_State* L)
 {
     int cpu = lua_tointeger(L,-3);
     CpuFeature feature = lua_tointeger(L,-2);
@@ -2098,54 +2245,63 @@ static int lua_likwid_cpuFeatures_disable(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_markerFile_read(lua_State* L)
+static int
+lua_likwid_markerFile_read(lua_State* L)
 {
     const char* filename = (const char*)luaL_checkstring(L, -1);
-    perfmon_readMarkerFile(filename);
-    return 0;
+    int ret = perfmon_readMarkerFile(filename);
+    lua_pushinteger(L, ret);
+    return 1;
 }
 
-static int lua_likwid_markerFile_destroy(lua_State* L)
+static int
+lua_likwid_markerFile_destroy(lua_State* L)
 {
     perfmon_destroyMarkerResults();
     return 0;
 }
 
-static int lua_likwid_markerNumRegions(lua_State* L)
+static int
+lua_likwid_markerNumRegions(lua_State* L)
 {
     lua_pushinteger(L, perfmon_getNumberOfRegions());
     return 1;
 }
 
-static int lua_likwid_markerRegionGroup(lua_State* L)
+static int
+lua_likwid_markerRegionGroup(lua_State* L)
 {
     int region = lua_tointeger(L,-1);
     lua_pushinteger(L, perfmon_getGroupOfRegion(region-1)+1);
     return 1;
 }
 
-static int lua_likwid_markerRegionTag(lua_State* L)
+static int
+lua_likwid_markerRegionTag(lua_State* L)
 {
     int region = lua_tointeger(L,-1);
     lua_pushstring(L, perfmon_getTagOfRegion(region-1));
     return 1;
 }
 
-static int lua_likwid_markerRegionEvents(lua_State* L)
+static int
+lua_likwid_markerRegionEvents(lua_State* L)
 {
     int region = lua_tointeger(L,-1);
     lua_pushinteger(L, perfmon_getEventsOfRegion(region-1));
     return 1;
 }
 
-static int lua_likwid_markerRegionThreads(lua_State* L)
+static int
+lua_likwid_markerRegionThreads(lua_State* L)
 {
     int region = lua_tointeger(L,-1);
     lua_pushinteger(L, perfmon_getThreadsOfRegion(region-1));
     return 1;
 }
 
-static int lua_likwid_markerRegionCpulist(lua_State* L)
+static int
+lua_likwid_markerRegionCpulist(lua_State* L)
 {
     int i = 0;
     int region = lua_tointeger(L,-1);
@@ -2186,7 +2342,8 @@ static int lua_likwid_markerRegionCpulist(lua_State* L)
     return 0;
 }
 
-static int lua_likwid_markerRegionTime(lua_State* L)
+static int
+lua_likwid_markerRegionTime(lua_State* L)
 {
     int region = lua_tointeger(L,-2);
     int thread = lua_tointeger(L,-1);
@@ -2194,7 +2351,8 @@ static int lua_likwid_markerRegionTime(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_markerRegionCount(lua_State* L)
+static int
+lua_likwid_markerRegionCount(lua_State* L)
 {
     int region = lua_tointeger(L,-2);
     int thread = lua_tointeger(L,-1);
@@ -2202,7 +2360,8 @@ static int lua_likwid_markerRegionCount(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_markerRegionResult(lua_State* L)
+static int
+lua_likwid_markerRegionResult(lua_State* L)
 {
     int region = lua_tointeger(L,-3);
     int event = lua_tointeger(L,-2);
@@ -2211,7 +2370,8 @@ static int lua_likwid_markerRegionResult(lua_State* L)
     return 1;
 }
 
-static int lua_likwid_markerRegionMetric(lua_State* L)
+static int
+lua_likwid_markerRegionMetric(lua_State* L)
 {
     int region = lua_tointeger(L,-3);
     int metric = lua_tointeger(L,-2);
@@ -2220,7 +2380,227 @@ static int lua_likwid_markerRegionMetric(lua_State* L)
     return 1;
 }
 
-int __attribute__ ((visibility ("default") )) luaopen_liblikwid(lua_State* L){
+static int
+lua_likwid_getCpuClockCurrent(lua_State* L)
+{
+    const int cpu_id = lua_tointeger(L,-1);
+    lua_pushnumber(L, freq_getCpuClockCurrent(cpu_id));
+    return 1;
+}
+
+static int
+lua_likwid_setCpuClockCurrent(lua_State* L)
+{
+    const int cpu_id = lua_tointeger(L,-2);
+    const unsigned long freq = lua_tointeger(L,-1);
+    lua_pushnumber(L, freq_setCpuClockCurrent(cpu_id, freq));
+    return 1;
+}
+
+static int
+lua_likwid_getCpuClockMin(lua_State* L)
+{
+    const int cpu_id = lua_tointeger(L,-1);
+    lua_pushnumber(L, freq_getCpuClockMin(cpu_id));
+    return 1;
+}
+
+static int
+lua_likwid_setCpuClockMin(lua_State* L)
+{
+    const int cpu_id = lua_tointeger(L,-2);
+    const unsigned long freq = lua_tointeger(L,-1);
+    lua_pushnumber(L, freq_setCpuClockMin(cpu_id, freq));
+    return 1;
+}
+
+static int
+lua_likwid_getCpuClockMax(lua_State* L)
+{
+    const int cpu_id = lua_tointeger(L,-1);
+    lua_pushnumber(L, freq_getCpuClockMax(cpu_id));
+    return 1;
+}
+
+static int
+lua_likwid_setCpuClockMax(lua_State* L)
+{
+    const int cpu_id = lua_tointeger(L,-2);
+    const unsigned long freq = lua_tointeger(L,-1);
+    lua_pushnumber(L, freq_setCpuClockMax(cpu_id, freq));
+    return 1;
+}
+
+static int
+lua_likwid_getGovernor(lua_State* L)
+{
+    const int cpu_id = lua_tointeger(L,-1);
+    char *gov = freq_getGovernor(cpu_id);
+    if (gov)
+        lua_pushstring(L, gov);
+    else
+        lua_pushnil(L);
+    return 1;
+}
+
+static int
+lua_likwid_setGovernor(lua_State* L)
+{
+    const int cpu_id = lua_tointeger(L,-2);
+    const char* gov = (const char*)luaL_checkstring(L, -1);
+    lua_pushnumber(L, freq_setGovernor(cpu_id, gov));
+    return 1;
+}
+
+static int
+lua_likwid_getAvailFreq(lua_State* L)
+{
+    const int cpu_id = lua_tointeger(L,-1);
+    char* avail = freq_getAvailFreq(cpu_id);
+    if (avail)
+        lua_pushstring(L, avail);
+    else
+        lua_pushnil(L);
+    return 1;
+}
+
+static int
+lua_likwid_getAvailGovs(lua_State* L)
+{
+    const int cpu_id = lua_tointeger(L,-1);
+    char* avail = freq_getAvailGovs(cpu_id);
+    if (avail)
+        lua_pushstring(L, avail);
+    else
+        lua_pushnil(L);
+    return 1;
+}
+
+static int
+lua_likwid_getDriver(lua_State* L)
+{
+    const int cpu_id = lua_tointeger(L,-1);
+    char* drv = freq_getDriver(cpu_id);
+    if (drv)
+        lua_pushstring(L, drv);
+    else
+        lua_pushnil(L);
+    return 1;
+}
+
+static int
+lua_likwid_getuid(lua_State* L)
+{
+    int r = geteuid();
+    lua_pushnumber(L, r);
+    return 1;
+}
+
+static int
+lua_likwid_geteuid(lua_State* L)
+{
+    int r = geteuid();
+    lua_pushnumber(L, r);
+    return 1;
+}
+
+static int
+lua_likwid_setuid(lua_State* L)
+{
+    int id = (int) lua_tonumber(L, 1);
+    int r = setuid((uid_t) id);
+    if (r == 0)
+    {
+        lua_pushboolean(L, 1);
+    }
+    else
+    {
+        lua_pushboolean(L, 0);
+    }
+    return 1;
+}
+
+static int
+lua_likwid_seteuid(lua_State* L)
+{
+    int id = (int) lua_tonumber(L, 1);
+    int r = seteuid((uid_t) id);
+    if (r == 0)
+    {
+        lua_pushboolean(L, 1);
+    }
+    else
+    {
+        lua_pushboolean(L, 0);
+    }
+    return 1;
+}
+
+static int
+lua_likwid_setresuid(lua_State* L)
+{
+    int ruid = (int) lua_tonumber(L, 1);
+    int euid = (int) lua_tonumber(L, 2);
+    int suid = (int) lua_tonumber(L, 3);
+    int r = setresuid((uid_t)ruid, (uid_t)euid, (uid_t)suid);
+    if (r == 0)
+    {
+        lua_pushboolean(L, 1);
+    }
+    else
+    {
+        lua_pushboolean(L, 0);
+    }
+    return 1;
+}
+
+static int
+lua_likwid_setresuser(lua_State* L)
+{
+    const char* ruser = (const char*) luaL_checkstring(L, 1);
+    const char* euser = (const char*) luaL_checkstring(L, 2);
+    const char* suser = (const char*) luaL_checkstring(L, 3);
+    struct passwd *p;
+    p = getpwnam(ruser);
+    if ( p == NULL )
+    {
+        lua_pushboolean(L, 0);
+        return 1;
+    }
+    uid_t ruid = p->pw_uid;
+    p = getpwnam(euser);
+    if ( p == NULL )
+    {
+        lua_pushboolean(L, 0);
+        return 1;
+    }
+    uid_t euid = p->pw_uid;
+    p = getpwnam(suser);
+    if ( p == NULL )
+    {
+        lua_pushboolean(L, 0);
+        return 1;
+    }
+    uid_t suid = p->pw_uid;
+
+    int r = setresuid(ruid, euid, suid);
+    if (r == 0)
+    {
+        lua_pushboolean(L, 1);
+    }
+    else
+    {
+        lua_pushboolean(L, 0);
+    }
+    return 1;
+}
+
+
+
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
+int __attribute__ ((visibility ("default") ))
+luaopen_liblikwid(lua_State* L){
     // Configuration functions
     lua_register(L, "likwid_getConfiguration", lua_likwid_getConfiguration);
     lua_register(L, "likwid_setGroupPath", lua_likwid_setGroupPath);
@@ -2334,9 +2714,29 @@ int __attribute__ ((visibility ("default") )) luaopen_liblikwid(lua_State* L){
     lua_register(L, "likwid_markerRegionCount", lua_likwid_markerRegionCount);
     lua_register(L, "likwid_markerRegionResult", lua_likwid_markerRegionResult);
     lua_register(L, "likwid_markerRegionMetric", lua_likwid_markerRegionMetric);
+    // CPU frequency functions
+    lua_register(L, "likwid_getCpuClockCurrent", lua_likwid_getCpuClockCurrent);
+    lua_register(L, "likwid_setCpuClockCurrent", lua_likwid_setCpuClockCurrent);
+    lua_register(L, "likwid_getCpuClockMin", lua_likwid_getCpuClockMin);
+    lua_register(L, "likwid_setCpuClockMin", lua_likwid_setCpuClockMin);
+    lua_register(L, "likwid_getCpuClockMax", lua_likwid_getCpuClockMax);
+    lua_register(L, "likwid_setCpuClockMax", lua_likwid_setCpuClockMax);
+    lua_register(L, "likwid_getGovernor", lua_likwid_getGovernor);
+    lua_register(L, "likwid_setGovernor", lua_likwid_setGovernor);
+    lua_register(L, "likwid_getAvailFreq", lua_likwid_getAvailFreq);
+    lua_register(L, "likwid_getAvailGovs", lua_likwid_getAvailGovs);
+    lua_register(L, "likwid_getDriver", lua_likwid_getDriver);
+    // setuid&friends
+    lua_register(L, "likwid_getuid", lua_likwid_getuid);
+    lua_register(L, "likwid_geteuid", lua_likwid_geteuid);
+    lua_register(L, "likwid_setuid", lua_likwid_setuid);
+    lua_register(L, "likwid_seteuid", lua_likwid_seteuid);
+    lua_register(L, "likwid_setresuid", lua_likwid_setresuid);
+    lua_register(L, "likwid_setresuser", lua_likwid_setresuser);
 #ifdef __MIC__
     setuid(0);
     seteuid(0);
 #endif
     return 0;
 }
+
diff --git a/src/memsweep.c b/src/memsweep.c
index f9ea77e..b87e297 100644
--- a/src/memsweep.c
+++ b/src/memsweep.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Implementation of sweeper module.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -29,6 +29,7 @@
  */
 
 /* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -41,29 +42,24 @@
 #include <numa.h>
 #include <affinity.h>
 
-extern void _loadData(uint32_t size, void* ptr);
-
 /* #####   EXPORTED VARIABLES   ########################################### */
 
+extern void _loadData(uint32_t size, void* ptr);
 
-/* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
-
-
-/* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
+/* #####   LOCAL VARIABLES   ############################################## */
 
 static uint64_t  memoryFraction = 80ULL;
 
-
 /* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
 
-static void* 
+static void*
 allocateOnNode(size_t size, int domainId)
 {
-	char *ptr; 
+    char *ptr;
 
-	ptr = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);  
+    ptr = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
 
-	if (ptr == (char *)-1)
+    if (ptr == (char *)-1)
     {
         ERROR;
     }
@@ -73,7 +69,7 @@ allocateOnNode(size_t size, int domainId)
     return ptr;
 }
 
-static void 
+static void
 initMemory(size_t size, char* ptr, int domainId)
 {
     affinity_pinProcess(numa_info.nodes[domainId].processors[0]);
@@ -100,7 +96,8 @@ findProcessor(uint32_t nodeId, uint32_t coreId)
 }
 
 /* evict all dirty cachelines from last level cache */
-static void cleanupCache(char* ptr)
+static void
+cleanupCache(char* ptr)
 {
 #if defined(__x86_64__) || defined(__i386__)
     uint32_t cachesize = 2 * cpuid_topology.cacheLevels[cpuid_topology.numCacheLevels-1].size;
@@ -112,7 +109,6 @@ static void cleanupCache(char* ptr)
 #endif
 }
 
-
 /* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
 
 void
@@ -121,7 +117,6 @@ memsweep_setMemoryFraction(uint64_t fraction)
     memoryFraction = fraction;
 }
 
-
 void
 memsweep_node(void)
 {
@@ -131,7 +126,6 @@ memsweep_node(void)
     }
 }
 
-
 void
 memsweep_domain(int domainId)
 {
@@ -151,7 +145,7 @@ memsweep_domain(int domainId)
 }
 
 void
-memsweep_threadGroup(int* processorList, int numberOfProcessors)
+memsweep_threadGroup(const int* processorList, int numberOfProcessors)
 {
     for (uint32_t i=0; i<numa_info.numberOfNodes; i++)
     {
@@ -166,6 +160,3 @@ memsweep_threadGroup(int* processorList, int numberOfProcessors)
     }
 }
 
-
-
-
diff --git a/src/numa.c b/src/numa.c
index c882508..11efa63 100644
--- a/src/numa.c
+++ b/src/numa.c
@@ -6,8 +6,8 @@
  *      Description:  Implementation of Linux NUMA interface. Selects between hwloc and
  *                    procfs/sysfs backends.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -29,6 +29,7 @@
  *
  * =======================================================================================
  */
+
 /* #####   HEADER FILE INCLUDES   ######################################### */
 
 #include <stdlib.h>
@@ -62,14 +63,17 @@
 
 
 /* #####   EXPORTED VARIABLES   ########################################### */
+
 NumaTopology numa_info = {0,NULL};
-static int numaInitialized = 0;
 
-/* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
 /* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
-/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
 
-int str2int(const char* str)
+static int numaInitialized = 0;
+
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
+int
+str2int(const char* str)
 {
     char* endptr;
     errno = 0;
@@ -92,8 +96,6 @@ int str2int(const char* str)
     return (int) val;
 }
 
-/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
-
 int
 empty_numa_init()
 {
@@ -101,8 +103,8 @@ empty_numa_init()
     return 0;
 }
 
-void 
-empty_numa_setInterleaved(int* processorList, int numberOfProcessors)
+void
+empty_numa_setInterleaved(const int* processorList, int numberOfProcessors)
 {
     printf("MEMPOLICY NOT supported in kernel!\n");
     return;
@@ -115,7 +117,6 @@ empty_numa_membind(void* ptr, size_t size, int domainId)
     return;
 }
 
-
 const struct numa_functions numa_funcs = {
 #ifndef HAS_MEMPOLICY
     .numa_init = empty_numa_init,
@@ -132,8 +133,8 @@ const struct numa_functions numa_funcs = {
 #endif
 };
 
-
-int numa_init(void)
+int
+numa_init(void)
 {
     const struct numa_functions funcs = numa_funcs;
     int ret = 0;
@@ -171,25 +172,29 @@ int numa_init(void)
     return ret;
 }
 
-void numa_setInterleaved(int* processorList, int numberOfProcessors)
+void
+numa_setInterleaved(const int* processorList, int numberOfProcessors)
 {
     const struct numa_functions funcs = numa_funcs;
     return funcs.numa_setInterleaved(processorList, numberOfProcessors);
 }
 
-void numa_membind(void* ptr, size_t size, int domainId)
+void
+numa_membind(void* ptr, size_t size, int domainId)
 {
     const struct numa_functions funcs = numa_funcs;
     return funcs.numa_membind(ptr, size, domainId);
 }
 
 #ifndef HAS_MEMPOLICY
-void numa_finalize(void)
+void
+numa_finalize(void)
 {
     return;
 }
 #else
-void numa_finalize(void)
+void
+numa_finalize(void)
 {
     int i;
     if (!numaInitialized)
@@ -221,7 +226,8 @@ void numa_finalize(void)
     return;
 }
 
-int likwid_getNumberOfNodes()
+int
+likwid_getNumberOfNodes()
 {
     if (numaInitialized)
     {
diff --git a/src/numa_hwloc.c b/src/numa_hwloc.c
index ccd69ad..5a85059 100644
--- a/src/numa_hwloc.c
+++ b/src/numa_hwloc.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Interface to hwloc for NUMA topology
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -28,6 +28,8 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -40,21 +42,23 @@
 #include <topology_hwloc.h>
 #endif
 
+/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
 
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
 
-/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
 #ifdef LIKWID_USE_HWLOC
-uint64_t getFreeNodeMem(int nodeId)
+uint64_t
+getFreeNodeMem(int nodeId)
 {
     FILE *fp;
     bstring filename;
     uint64_t free = 0;
     bstring freeString  = bformat("MemFree:");
     int i;
-    
+
     filename = bformat("/sys/devices/system/node/node%d/meminfo", nodeId);
 
-    if (NULL != (fp = fopen (bdata(filename), "r"))) 
+    if (NULL != (fp = fopen (bdata(filename), "r")))
     {
         bstring src = bread ((bNread) fread, fp);
         struct bstrList* tokens = bsplit(src,(char) '\n');
@@ -79,7 +83,7 @@ uint64_t getFreeNodeMem(int nodeId)
     {
         bdestroy(filename);
         filename = bfromcstr("/proc/meminfo");
-        if (NULL != (fp = fopen (bdata(filename), "r"))) 
+        if (NULL != (fp = fopen (bdata(filename), "r")))
         {
             bstring src = bread ((bNread) fread, fp);
             struct bstrList* tokens = bsplit(src,(char) '\n');
@@ -109,10 +113,10 @@ uint64_t getFreeNodeMem(int nodeId)
     bdestroy(freeString);
     bdestroy(filename);
     return free;
-    
 }
 
-uint64_t getTotalNodeMem(int nodeId)
+uint64_t
+getTotalNodeMem(int nodeId)
 {
     int i;
     FILE *fp;
@@ -120,8 +124,9 @@ uint64_t getTotalNodeMem(int nodeId)
     bstring totalString  = bformat("MemTotal:");
     bstring sysfilename = bformat("/sys/devices/system/node/node%d/meminfo", nodeId);
     bstring procfilename = bformat("/proc/meminfo");
+    char *sptr = bdata(procfilename);
 
-    if (NULL != (fp = fopen (bdata(sysfilename), "r"))) 
+    if (NULL != (fp = fopen (bdata(sysfilename), "r")))
     {
         bstring src = bread ((bNread) fread, fp);
         struct bstrList* tokens = bsplit(src,(char) '\n');
@@ -142,9 +147,9 @@ uint64_t getTotalNodeMem(int nodeId)
         bdestroy(src);
         fclose(fp);
     }
-    else if (!access(bdata(procfilename), R_OK))
+    else if (!access(sptr, R_OK))
     {
-        if (NULL != (fp = fopen (bdata(procfilename), "r"))) 
+        if (NULL != (fp = fopen (bdata(procfilename), "r")))
         {
             bstring src = bread ((bNread) fread, fp);
             struct bstrList* tokens = bsplit(src,(char) '\n');
@@ -179,12 +184,13 @@ uint64_t getTotalNodeMem(int nodeId)
     return total;
 }
 
-int likwid_hwloc_findProcessor(int nodeID, int cpuID)
+int
+likwid_hwloc_findProcessor(int nodeID, int cpuID)
 {
     hwloc_obj_t obj;
     int i;
     int pu_count = likwid_hwloc_get_nbobjs_by_type(hwloc_topology, HWLOC_OBJ_PU);
-    
+
     for (i=0; i<pu_count; i++)
     {
         obj = likwid_hwloc_get_obj_by_type(hwloc_topology, HWLOC_OBJ_PU, i);
@@ -204,8 +210,9 @@ int likwid_hwloc_findProcessor(int nodeID, int cpuID)
 
 }
 
-/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
-int hwloc_numa_init(void)
+
+int
+hwloc_numa_init(void)
 {
     int errno;
     uint32_t i;
@@ -237,7 +244,6 @@ int hwloc_numa_init(void)
             fprintf(stderr,"No memory to allocate %ld byte for nodes array\n",sizeof(NumaNode));
             return -1;
         }
-        
         numa_info.nodes[0].id = 0;
         numa_info.nodes[0].numberOfProcessors = 0;
         numa_info.nodes[0].totalMemory = getTotalNodeMem(0);
@@ -245,23 +251,25 @@ int hwloc_numa_init(void)
         numa_info.nodes[0].processors = (uint32_t*) malloc(MAX_NUM_THREADS * sizeof(uint32_t));
         if (!numa_info.nodes[0].processors)
         {
-            fprintf(stderr,"No memory to allocate %ld byte for processors array of NUMA node %d\n",MAX_NUM_THREADS * sizeof(uint32_t),0);
+            fprintf(stderr,"No memory to allocate %ld byte for processors array of NUMA node %d\n",
+                    MAX_NUM_THREADS * sizeof(uint32_t),0);
             return -1;
         }
         numa_info.nodes[0].distances = (uint32_t*) malloc(sizeof(uint32_t));
         if (!numa_info.nodes[0].distances)
         {
-            fprintf(stderr,"No memory to allocate %ld byte for distances array of NUMA node %d\n",sizeof(uint32_t),0);
+            fprintf(stderr,"No memory to allocate %ld byte for distances array of NUMA node %d\n",
+                    sizeof(uint32_t),0);
             return -1;
         }
         numa_info.nodes[0].distances[0] = 10;
         numa_info.nodes[0].numberOfDistances = 1;
         cores_per_socket = cpuid_topology.numHWThreads/cpuid_topology.numSockets;
-        
+
         for (d=0; d<likwid_hwloc_get_nbobjs_by_type(hwloc_topology, hwloc_type); d++)
         {
             obj = likwid_hwloc_get_obj_by_type(hwloc_topology, hwloc_type, d);
-            /* depth is here used as index in the processors array */        
+            /* depth is here used as index in the processors array */
             depth = d * cores_per_socket;
             numa_info.nodes[0].numberOfProcessors += likwid_hwloc_record_objs_of_type_below_obj(
                     likwid_hwloc_topology, obj, HWLOC_OBJ_PU, &depth, &numa_info.nodes[0].processors);
@@ -272,7 +280,8 @@ int hwloc_numa_init(void)
         numa_info.nodes = (NumaNode*) malloc(numa_info.numberOfNodes * sizeof(NumaNode));
         if (!numa_info.nodes)
         {
-            fprintf(stderr,"No memory to allocate %ld byte for nodes array\n",numa_info.numberOfNodes * sizeof(NumaNode));
+            fprintf(stderr,"No memory to allocate %ld byte for nodes array\n",
+                    numa_info.numberOfNodes * sizeof(NumaNode));
             return -1;
         }
         depth = likwid_hwloc_get_type_depth(hwloc_topology, hwloc_type);
@@ -295,23 +304,23 @@ int hwloc_numa_init(void)
             {
                 numa_info.nodes[i].totalMemory = getTotalNodeMem(numa_info.nodes[i].id);
             }
-            
             /* freeMemory not detected by hwloc, do it the native way */
             numa_info.nodes[i].freeMemory = getFreeNodeMem(numa_info.nodes[i].id);
             numa_info.nodes[i].processors = (uint32_t*) malloc(MAX_NUM_THREADS * sizeof(uint32_t));
             if (!numa_info.nodes[i].processors)
             {
-                fprintf(stderr,"No memory to allocate %ld byte for processors array of NUMA node %d\n",MAX_NUM_THREADS * sizeof(uint32_t), i);
+                fprintf(stderr,"No memory to allocate %ld byte for processors array of NUMA node %d\n",
+                        MAX_NUM_THREADS * sizeof(uint32_t), i);
                 return -1;
             }
             d = 0;
             numa_info.nodes[i].numberOfProcessors = likwid_hwloc_record_objs_of_type_below_obj(
                     hwloc_topology, obj, HWLOC_OBJ_PU, &d, &numa_info.nodes[i].processors);
-            
             numa_info.nodes[i].distances = (uint32_t*) malloc(numa_info.numberOfNodes * sizeof(uint32_t));
             if (!numa_info.nodes[i].distances)
             {
-                fprintf(stderr,"No memory to allocate %ld byte for distances array of NUMA node %d\n",numa_info.numberOfNodes*sizeof(uint32_t),i);
+                fprintf(stderr,"No memory to allocate %ld byte for distances array of NUMA node %d\n",
+                        numa_info.numberOfNodes*sizeof(uint32_t),i);
                 return -1;
             }
             if (distances)
@@ -332,7 +341,6 @@ int hwloc_numa_init(void)
             }
 
         }
-    
     }
 
     if (numa_info.nodes[0].numberOfProcessors == 0)
@@ -345,17 +353,15 @@ int hwloc_numa_init(void)
     }
 }
 
-void hwloc_numa_membind(void* ptr, size_t size, int domainId)
+void
+hwloc_numa_membind(void* ptr, size_t size, int domainId)
 {
     int ret = 0;
     hwloc_membind_flags_t flags = HWLOC_MEMBIND_STRICT|HWLOC_MEMBIND_PROCESS;
     hwloc_nodeset_t nodeset = likwid_hwloc_bitmap_alloc();
-    
     likwid_hwloc_bitmap_zero(nodeset);
     likwid_hwloc_bitmap_set(nodeset, domainId);
-    
     ret = likwid_hwloc_set_area_membind_nodeset(hwloc_topology, ptr, size, nodeset, HWLOC_MEMBIND_BIND, flags);
-    
     likwid_hwloc_bitmap_free(nodeset);
 
     if (ret < 0)
@@ -364,17 +370,14 @@ void hwloc_numa_membind(void* ptr, size_t size, int domainId)
     }
 }
 
-
-
-void hwloc_numa_setInterleaved(int* processorList, int numberOfProcessors)
+void
+hwloc_numa_setInterleaved(int* processorList, int numberOfProcessors)
 {
     int i,j;
     int ret = 0;
     likwid_hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
     likwid_hwloc_membind_flags_t flags = HWLOC_MEMBIND_STRICT|HWLOC_MEMBIND_PROCESS;
-    
     likwid_hwloc_bitmap_zero(cpuset);
-    
     for (i=0; i<numa_info.numberOfNodes; i++)
     {
         for (j=0; j<numberOfProcessors; j++)
@@ -385,24 +388,22 @@ void hwloc_numa_setInterleaved(int* processorList, int numberOfProcessors)
             }
         }
     }
-    
-    
     ret = likwid_hwloc_set_membind(hwloc_topology, cpuset, HWLOC_MEMBIND_INTERLEAVE, flags);
-    
     likwid_hwloc_bitmap_free(cpuset);
-    
     if (ret < 0)
     {
         ERROR;
     }
 }
 #else
-int hwloc_numa_init(void)
+int
+hwloc_numa_init(void)
 {
     return 1;
 }
 
-void hwloc_numa_membind(void* ptr, size_t size, int domainId)
+void
+hwloc_numa_membind(void* ptr, size_t size, int domainId)
 {
     return;
 }
diff --git a/src/numa_proc.c b/src/numa_proc.c
index a7623e7..5ed4617 100644
--- a/src/numa_proc.c
+++ b/src/numa_proc.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Get NUMA topology from procfs and sysfs
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -28,7 +28,9 @@
  *
  * =======================================================================================
  */
- 
+
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -44,11 +46,6 @@
 #include <numa.h>
 #include <topology.h>
 
-/* #####   EXPORTED VARIABLES   ########################################### */
-
-
-
-
 /* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
 
 #ifdef HAS_MEMPOLICY
@@ -57,8 +54,9 @@
 #define mbind(start, len, nmask, maxnode, flags) syscall(SYS_mbind,(start),len,MPOL_BIND,(nmask),maxnode,flags)
 #endif
 
-/* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
-int
+/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
+
+static int
 proc_findProcessor(uint32_t nodeId, uint32_t coreId)
 {
     int i;
@@ -73,7 +71,6 @@ proc_findProcessor(uint32_t nodeId, uint32_t coreId)
     return 0;
 }
 
-/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
 static int
 setConfiguredNodes(void)
 {
@@ -83,13 +80,13 @@ setConfiguredNodes(void)
 
     dir = opendir("/sys/devices/system/node");
 
-    if (!dir) 
+    if (!dir)
     {
         maxIdConfiguredNode = 0;
     }
     else
     {
-        while ((de = readdir(dir)) != NULL) 
+        while ((de = readdir(dir)) != NULL)
         {
             int nd;
             if (strncmp(de->d_name, "node", 4))
@@ -109,7 +106,6 @@ setConfiguredNodes(void)
     return maxIdConfiguredNode;
 }
 
-
 static void
 nodeMeminfo(int node, uint64_t* totalMemory, uint64_t* freeMemory)
 {
@@ -121,7 +117,7 @@ nodeMeminfo(int node, uint64_t* totalMemory, uint64_t* freeMemory)
 
     filename = bformat("/sys/devices/system/node/node%d/meminfo", node);
 
-    if (NULL != (fp = fopen (bdata(filename), "r"))) 
+    if (NULL != (fp = fopen (bdata(filename), "r")))
     {
         bstring src = bread ((bNread) fread, fp);
         struct bstrList* tokens = bsplit(src,(char) '\n');
@@ -185,11 +181,10 @@ nodeProcessorList(int node, uint32_t** list)
     }
 
     /* the cpumap interface should be always there */
-    filename = bformat("/sys/devices/system/node/node%d/cpumap", node); 
+    filename = bformat("/sys/devices/system/node/node%d/cpumap", node);
 
-    if (NULL != (fp = fopen (bdata(filename), "r"))) 
+    if (NULL != (fp = fopen (bdata(filename), "r")))
     {
-
         src = bread ((bNread) fread, fp);
         tokens = bsplit(src,',');
 
@@ -198,12 +193,12 @@ nodeProcessorList(int node, uint32_t** list)
             val = strtoul((char*) tokens->entry[i]->data, &endptr, 16);
 
             if ((errno != 0 && val == LONG_MAX )
-                    || (errno != 0 && val == 0)) 
+                    || (errno != 0 && val == 0))
             {
                 return -EFAULT;
             }
 
-            if (endptr == (char*) tokens->entry[i]->data) 
+            if (endptr == (char*) tokens->entry[i]->data)
             {
                 ERROR_PLAIN_PRINT(No digits were found);
                 return -EFAULT;
@@ -234,18 +229,16 @@ nodeProcessorList(int node, uint32_t** list)
         bstrListDestroy(tokens);
         bdestroy(src);
         bdestroy(filename);
-        fclose(fp); 
+        fclose(fp);
 
         /* FIXME: CPU list here is not physical cores first but numerical sorted */
-
-
         return count;
     }
 
     /* something went wrong */
     return -1;
 }
- 
+
 static int
 nodeDistanceList(int node, int numberOfNodes, uint32_t** list)
 {
@@ -299,14 +292,15 @@ nodeDistanceList(int node, int numberOfNodes, uint32_t** list)
 
 int proc_numa_init(void)
 {
-    int errno;
+    int err = 0;
     uint32_t i;
+    uint64_t nrCPUs = 0;
 
     if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS)
     {
         numa_info.numberOfNodes = 0;
         numa_info.nodes = NULL;
-        return -1; 
+        return -1;
     }
     /* First determine maximum number of nodes */
     numa_info.numberOfNodes = setConfiguredNodes()+1;
@@ -321,22 +315,30 @@ int proc_numa_init(void)
         numa_info.nodes[i].id = i;
         nodeMeminfo(i, &numa_info.nodes[i].totalMemory, &numa_info.nodes[i].freeMemory);
         numa_info.nodes[i].numberOfProcessors = nodeProcessorList(i,&numa_info.nodes[i].processors);
-        if (numa_info.nodes[i].numberOfProcessors == 0)
+        nrCPUs += numa_info.nodes[i].numberOfProcessors;
+        if (numa_info.nodes[i].numberOfProcessors == 0 && nrCPUs != cpuid_topology.activeHWThreads)
         {
-            return -EFAULT;
+            err = -EFAULT;
+            break;
         }
         numa_info.nodes[i].numberOfDistances = nodeDistanceList(i, numa_info.numberOfNodes, &numa_info.nodes[i].distances);
         if (numa_info.nodes[i].numberOfDistances == 0)
         {
-            return -EFAULT;
+            err = -EFAULT;
+            break;
         }
     }
+    for (; i<numa_info.numberOfNodes; i++)
+    {
+        numa_info.nodes[i].numberOfProcessors = 0;
+        numa_info.nodes[i].numberOfDistances = nodeDistanceList(i, numa_info.numberOfNodes, &numa_info.nodes[i].distances);
+    }
 
-    return 0;
+    return err;
 }
 
-void 
-proc_numa_setInterleaved(int* processorList, int numberOfProcessors)
+void
+proc_numa_setInterleaved(const int* processorList, int numberOfProcessors)
 {
     long i;
     int j;
@@ -381,3 +383,4 @@ proc_numa_membind(void* ptr, size_t size, int domainId)
         ERROR;
     }
 }
+
diff --git a/src/pci_hwloc.c b/src/pci_hwloc.c
index 6177e21..d853e5d 100644
--- a/src/pci_hwloc.c
+++ b/src/pci_hwloc.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Interface to hwloc for PCI device lookup
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -28,6 +28,8 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -41,7 +43,9 @@
 #include <topology_hwloc.h>
 #include <error.h>
 
-int 
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
+int
 hwloc_pci_init(uint16_t testDevice, char** socket_bus, int* nrSockets)
 {
     int cntr = 0;
@@ -79,3 +83,4 @@ hwloc_pci_init(uint16_t testDevice, char** socket_bus, int* nrSockets)
 
     return 0;
 }
+
diff --git a/src/pci_proc.c b/src/pci_proc.c
index 854a526..fada99c 100644
--- a/src/pci_proc.c
+++ b/src/pci_proc.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Interface to procfs/sysfs for PCI device lookup
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -28,19 +28,23 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <fcntl.h>
 
-
 #include <types.h>
 #include <bstrlib.h>
 #include <affinity.h>
 #include <topology.h>
 #include <error.h>
 
-int getBusFromSocket(const uint32_t socket)
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
+int
+getBusFromSocket(const uint32_t socket)
 {
     int cur_bus = 0;
     uint32_t cur_socket = 0;
@@ -84,7 +88,6 @@ proc_pci_init(uint16_t testDevice, char** socket_bus, int* nrSockets)
     uint16_t testVendor = 0x8086;
     uint32_t sbus, sdevfn, svend, sdev;
     int busID;
-    
 
     if ( (fptr = fopen( "/proc/bus/pci/devices", "r")) == NULL )
     {
@@ -112,14 +115,12 @@ proc_pci_init(uint16_t testDevice, char** socket_bus, int* nrSockets)
         }
     }
     fclose(fptr);
-    
     *nrSockets = cntr;
-    
     if ( cntr == 0 )
     {
         //fprintf(stderr, "Uncore not supported on this system\n");
         return -ENODEV;
     }
-    
     return 0;
 }
+
diff --git a/src/perfgroup.c b/src/perfgroup.c
index aff2e7c..d810825 100644
--- a/src/perfgroup.c
+++ b/src/perfgroup.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Handler for performance groups and event sets
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at gmail.com
@@ -29,6 +29,8 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -44,7 +46,12 @@
 #include <calculator.h>
 #include <likwid.h>
 
-int isdir(char* dirname)
+static int totalgroups = 0;
+
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
+int
+isdir(char* dirname)
 {
     struct stat st;
     if (NULL == dirname) {
@@ -56,7 +63,54 @@ int isdir(char* dirname)
     return S_ISDIR(st.st_mode) ? 1 : 0;
 }
 
-int get_groups(char* grouppath, char* architecture, char*** groupnames, char*** groupshort, char*** grouplong)
+void
+return_groups(int groups, char** groupnames, char** groupshort, char** grouplong)
+{
+    int i;
+    int freegroups = (totalgroups < groups ? groups : totalgroups);
+    for (i = 0; i <freegroups; i++)
+    {
+        free(groupnames[i]);
+        groupnames[i] = NULL;
+        if (i < groups)
+        {
+            if (groupshort[i] != NULL)
+            {
+                free(groupshort[i]);
+                groupshort[i] = NULL;
+            }
+            if (grouplong[i] != NULL)
+            {
+                free(grouplong[i]);
+                grouplong[i] = NULL;
+            }
+        }
+    }
+    if (groupnames != NULL)
+    {
+        free(groupnames);
+        groupnames = NULL;
+    }
+    if (groupshort != NULL)
+    {
+        free(groupshort);
+        groupshort = NULL;
+    }
+    if (grouplong != NULL)
+    {
+        free(grouplong);
+        grouplong = NULL;
+    }
+}
+
+
+int
+get_groups(
+        const char* grouppath,
+        const char* architecture,
+        char*** groupnames,
+        char*** groupshort,
+        char*** grouplong)
 {
     int i = 0, j = 0, s = 0;
     int fsize = 0, hsize = 0;
@@ -70,6 +124,7 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
     int search_home = 0;
     bstring SHORT = bformat("SHORT");
     bstring LONG = bformat("LONG");
+    bstring REQUIRE = bformat("REQUIRE_NOHT");
     int read_long = 0;
     if ((grouppath == NULL)||(architecture == NULL)||(groupnames == NULL))
         return -EINVAL;
@@ -78,6 +133,7 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
     {
         bdestroy(SHORT);
         bdestroy(LONG);
+        bdestroy(REQUIRE);
         return -ENOMEM;
     }
     char* homepath = malloc((strlen(getenv("HOME"))+strlen(architecture)+50) * sizeof(char));
@@ -86,6 +142,7 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
         free(fullpath);
         bdestroy(SHORT);
         bdestroy(LONG);
+        bdestroy(REQUIRE);
         return -ENOMEM;
     }
     fsize = sprintf(fullpath, "%s/%s", grouppath, architecture);
@@ -99,6 +156,7 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
             free(homepath);
             bdestroy(SHORT);
             bdestroy(LONG);
+            bdestroy(REQUIRE);
             return -EACCES;
         }
     }
@@ -109,6 +167,7 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
         free(homepath);
         bdestroy(SHORT);
         bdestroy(LONG);
+        bdestroy(REQUIRE);
         return -EACCES;
     }
     i = 0;
@@ -117,7 +176,7 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
     {
         if (strncmp(&(ep->d_name[strlen(ep->d_name)-4]), ".txt", 4) == 0)
         {
-            i++;
+            totalgroups++;
             if (strlen(ep->d_name)-4 > s)
                 s = strlen(ep->d_name)-4;
         }
@@ -138,7 +197,7 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
             {
                 if (strncmp(&(ep->d_name[strlen(ep->d_name)-4]), ".txt", 4) == 0)
                 {
-                    i++;
+                    totalgroups++;
                     if (strlen(ep->d_name)-4 > s)
                         s = strlen(ep->d_name)-4;
                 }
@@ -146,17 +205,18 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
             closedir(dp);
         }
     }
-
-    *groupnames = malloc(i * sizeof(char**));
+    *groupnames = malloc(totalgroups * sizeof(char**));
     if (*groupnames == NULL)
     {
         free(fullpath);
         free(homepath);
         bdestroy(SHORT);
         bdestroy(LONG);
+        bdestroy(REQUIRE);
         return -ENOMEM;
     }
-    *groupshort = malloc(i * sizeof(char**));
+    memset(*groupnames, 0, totalgroups * sizeof(char**));
+    *groupshort = malloc(totalgroups * sizeof(char**));
     if (*groupshort == NULL)
     {
         free(*groupnames);
@@ -165,9 +225,11 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
         free(homepath);
         bdestroy(SHORT);
         bdestroy(LONG);
+        bdestroy(REQUIRE);
         return -ENOMEM;
     }
-    *grouplong = malloc(i * sizeof(char**));
+    memset(*groupshort, 0, totalgroups * sizeof(char**));
+    *grouplong = malloc(totalgroups * sizeof(char**));
     if (*grouplong == NULL)
     {
         free(*groupnames);
@@ -178,15 +240,19 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
         free(homepath);
         bdestroy(SHORT);
         bdestroy(LONG);
+        bdestroy(REQUIRE);
         return -ENOMEM;
     }
-    for (j=0; j < i; j++)
+    memset(*grouplong, 0, totalgroups * sizeof(char**));
+    for (j=0; j < totalgroups; j++)
     {
-        (*grouplong)[i] == NULL;
-        (*groupshort)[i] == NULL;
         (*groupnames)[j] = malloc((s+1) * sizeof(char));
         if ((*groupnames)[j] == NULL)
         {
+            for (s=0; s<j; s++)
+            {
+                free((*groupnames)[s]);
+            }
             free(*groupnames);
             *groupnames = NULL;
             free(*groupshort);
@@ -197,18 +263,20 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
             free(homepath);
             bdestroy(SHORT);
             bdestroy(LONG);
+            bdestroy(REQUIRE);
             return -ENOMEM;
         }
     }
     dp = opendir(fullpath);
     i = 0;
-    
+    int skip_group = 0;
+
     while (ep = readdir(dp))
     {
         if (strncmp(&(ep->d_name[strlen(ep->d_name)-4]), ".txt", 4) == 0)
         {
             read_long = 0;
-            bstring long_info = bfromcstr("");;
+            bstring long_info = bfromcstr("");
             sprintf(&(fullpath[fsize]), "/%s", ep->d_name);
             if (!access(fullpath, R_OK))
             {
@@ -216,7 +284,7 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
                 s = sprintf((*groupnames)[i], "%.*s", (int)(strlen(ep->d_name)-4), ep->d_name);
                 (*groupnames)[i][s] = '\0';
                 fp = fopen(fullpath,"r");
-                
+
                 while (fgets (buf, sizeof(buf), fp)) {
                     bstring bbuf = bfromcstr(buf);
                     btrimws(bbuf);
@@ -262,9 +330,12 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
                             bdestroy(LONG);
                             bdestroy(bbuf);
                             bdestroy(sinfo);
+                            bdestroy(REQUIRE);
                             free(homepath);
                             free(fullpath);
+                            bdestroy(long_info);
                             bstrListDestroy(linelist);
+                            return_groups(i, *groupnames, *groupshort, *grouplong);
                             return -ENOMEM;
                         }
                         s = sprintf((*groupshort)[i], "%s", bdata(sinfo));
@@ -272,6 +343,13 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
                         bstrListDestroy(linelist);
                         bdestroy(sinfo);
                     }
+                    else if (bstrncmp(bbuf, REQUIRE, blength(REQUIRE)) == 0)
+                    {
+                        if (cpuid_topology.numThreadsPerCore > 1)
+                        {
+                            skip_group = 1;
+                        }
+                    }
                     else if (bstrncmp(bbuf, LONG, 4) == 0)
                     {
                         read_long = 1;
@@ -286,6 +364,7 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
                 }
                 if (read_long)
                 {
+
                     (*grouplong)[i] = malloc((blength(long_info) + 1) * sizeof(char) );
                     if ((*grouplong)[i] != NULL)
                     {
@@ -294,18 +373,46 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
                     }
                 }
                 fclose(fp);
-                
+                if (skip_group)
+                {
+                    if ((*grouplong)[i] != NULL)
+                    {
+                        free((*grouplong)[i]);
+                        (*grouplong)[i] = NULL;
+                    }
+                    if ((*groupshort)[i] != NULL)
+                    {
+                        free((*groupshort)[i]);
+                        (*groupshort)[i] = NULL;
+                    }
+                    (*groupnames)[i][0] = '\0';
+                    bdestroy(long_info);
+                    goto skip_cur_def_group;
+                }
                 i++;
             }
             bdestroy(long_info);
         }
+skip_cur_def_group:
+        skip_group = 0;
     }
     closedir(dp);
     if (!search_home)
     {
+        if (i==0)
+            return_groups(totalgroups, *groupnames, *groupshort, *grouplong);
+        /*else if (i < totalgroups)
+        {
+            for (s=i;s<totalgroups;s++)
+            {
+                (*grouplong)[i] = NULL;
+                (*groupshort)[i] = NULL;
+            }
+        }*/
         free(homepath);
         free(fullpath);
         bdestroy(SHORT);
+        bdestroy(REQUIRE);
         bdestroy(LONG);
         return i;
     }
@@ -326,7 +433,6 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
                     (*groupnames)[i][s] = '\0';
                     fp = fopen(homepath,"r");
                     while (fgets (buf, sizeof(buf), fp)) {
-                        
                         bstring bbuf = bfromcstr(buf);
                         btrimws(bbuf);
                         if ((blength(bbuf) == 0) || (buf[0] == '#'))
@@ -363,17 +469,19 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
                                 bconcat(sinfo, tmp);
                                 bdestroy(tmp);
                             }
-
                             (*groupshort)[i] = malloc((blength(sinfo)+1) * sizeof(char));
                             if ((*groupshort)[i] == NULL)
                             {
                                 bdestroy(SHORT);
                                 bdestroy(LONG);
+                                bdestroy(REQUIRE);
                                 bdestroy(bbuf);
                                 bdestroy(sinfo);
                                 free(homepath);
                                 free(fullpath);
                                 bstrListDestroy(linelist);
+                                bdestroy(long_info);
+                                return_groups(i, *groupnames, *groupshort, *grouplong);
                                 return -ENOMEM;
                             }
                             s = sprintf((*groupshort)[i], "%s", bdata(sinfo));
@@ -381,6 +489,13 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
                             bstrListDestroy(linelist);
                             bdestroy(sinfo);
                         }
+                        else if (bstrncmp(bbuf, REQUIRE, blength(REQUIRE)) == 0)
+                        {
+                            if (cpuid_topology.numThreadsPerCore > 1)
+                            {
+                                skip_group = 1;
+                            }
+                        }
                         else if (bstrncmp(bbuf, LONG, 4) == 0)
                         {
                             read_long = 1;
@@ -403,43 +518,53 @@ int get_groups(char* grouppath, char* architecture, char*** groupnames, char***
                         }
                     }
                     fclose(fp);
+                    if (skip_group)
+                    {
+                        if ((*groupshort)[i])
+                        {
+                            free((*groupshort)[i]);
+                            (*groupshort)[i] = NULL;
+                        }
+                        if ((*grouplong)[i])
+                        {
+                            free((*grouplong)[i]);
+                            (*grouplong)[i] = NULL;
+                        }
+                        bdestroy(long_info);
+                        goto skip_cur_home_group;
+                    }
                     i++;
                 }
                 bdestroy(long_info);
             }
+skip_cur_home_group:
+        skip_group = 0;
         }
         closedir(dp);
     }
+    if (i==0)
+        return_groups(totalgroups, *groupnames, *groupshort, *grouplong);
+/*    else if (i < totalgroups)
+    {
+        for (s=i;s<totalgroups;s++)
+        {
+            printf("Setting NULL for group %d\n", s);
+            (*groupnames)[i] = NULL;
+            (*grouplong)[i] = NULL;
+            (*groupshort)[i] = NULL;
+        }
+    }*/
     bdestroy(SHORT);
     bdestroy(LONG);
+    bdestroy(REQUIRE);
     free(fullpath);
     free(homepath);
     return i;
 }
 
-void return_groups(int groups, char** groupnames, char** groupshort, char** grouplong)
-{
-    int i;
-    for (i = 0; i <groups; i++)
-    {
-        if (groupnames[i])
-            free(groupnames[i]);
-        if (groupshort[i])
-            free(groupshort[i]);
-        if (grouplong[i])
-            free(grouplong[i]);
-    }
-    if (groupnames)
-        free(groupnames);
-    if (groupshort)
-        free(groupshort);
-    if (grouplong)
-        free(grouplong);
-}
-
 
 
-int custom_group(char* eventStr, GroupInfo* ginfo)
+int custom_group(const char* eventStr, GroupInfo* ginfo)
 {
     int i, j;
     int err = 0;
@@ -462,7 +587,6 @@ int custom_group(char* eventStr, GroupInfo* ginfo)
     bstring fix1 = bformat("FIXC1");
     bstring fix2 = bformat("FIXC2");
     DEBUG_PRINT(DEBUGLEV_INFO, Creating custom group for event string %s, eventStr);
-    
     ginfo->shortinfo = malloc(7 * sizeof(char));
     if (ginfo->shortinfo == NULL)
     {
@@ -484,7 +608,6 @@ int custom_group(char* eventStr, GroupInfo* ginfo)
         goto cleanup;
     }
     sprintf(ginfo->groupname, "%s", "Custom");
-    
     eventBstr = bfromcstr(eventStr);
     eventList = bsplit(eventBstr, delim);
     ginfo->nevents = eventList->qty;
@@ -610,12 +733,18 @@ cleanup:
     return err;
 }
 
-int read_group(char* grouppath, char* architecture, char* groupname, GroupInfo* ginfo)
+int
+read_group(
+        const char* grouppath,
+        const char* architecture,
+        const char* groupname,
+        GroupInfo* ginfo)
 {
     FILE* fp;
     int i, s, e, err = 0;
     char buf[512];
     GroupFileSections sec = GROUP_NONE;
+    bstring REQUIRE = bformat("REQUIRE_NOHT");
     if ((grouppath == NULL)||(architecture == NULL)||(groupname == NULL)||(ginfo == NULL))
         return -EINVAL;
 
@@ -628,6 +757,7 @@ int read_group(char* grouppath, char* architecture, char* groupname, GroupInfo*
         if (access(bdata(homepath), R_OK))
         {
             ERROR_PRINT(Cannot read group file %s.txt. Searched in %s and %s, groupname, bdata(fullpath), bdata(homepath));
+            bdestroy(REQUIRE);
             bdestroy(fullpath);
             bdestroy(homepath);
             return -EACCES;
@@ -684,6 +814,15 @@ int read_group(char* grouppath, char* architecture, char* groupname, GroupInfo*
             sprintf(ginfo->shortinfo, "%.*s", (int)strlen(&(buf[i]))-1, &(buf[i]));
             continue;
         }
+        else if (strncmp(bdata(REQUIRE), buf, blength(REQUIRE)) == 0)
+        {
+            if (cpuid_topology.numThreadsPerCore > 1)
+            {
+                err = -ENODEV;
+                goto cleanup;
+            }
+            continue;
+        }
         else if (strncmp(groupFileSectionNames[GROUP_EVENTSET], buf, strlen(groupFileSectionNames[GROUP_EVENTSET])) == 0)
         {
             sec = GROUP_EVENTSET;
@@ -781,8 +920,6 @@ int read_group(char* grouppath, char* architecture, char* groupname, GroupInfo*
                 }
             }
             bstrListDestroy(linelist);
-            
-
             linelist = bsplit(bbuf, ' ');
             bdestroy(bbuf);
             for (i=0; i<linelist->qty; i++)
@@ -801,7 +938,6 @@ int read_group(char* grouppath, char* architecture, char* groupname, GroupInfo*
             }
             sprintf(ginfo->counters[ginfo->nevents], "%s", bdata(linelist->entry[0]));
             sprintf(ginfo->events[ginfo->nevents], "%s", bdata(linelist->entry[1]));
-            
             ginfo->nevents++;
             bstrListDestroy(linelist);
             continue;
@@ -915,10 +1051,12 @@ int read_group(char* grouppath, char* architecture, char* groupname, GroupInfo*
     }
     //bstrListDestroy(linelist);
     fclose(fp);
+    bdestroy(REQUIRE);
     bdestroy(homepath);
     bdestroy(fullpath);
     return 0;
 cleanup:
+    bdestroy(REQUIRE);
     bdestroy(homepath);
     bdestroy(fullpath);
     if (ginfo->groupname)
@@ -947,10 +1085,20 @@ cleanup:
                 free(ginfo->metricnames[i]);
         }
     }
+    /*ginfo->shortinfo = NULL;
+    ginfo->nevents = 0;
+    ginfo->events = NULL;
+    ginfo->counters = NULL;
+    ginfo->nmetrics = 0;
+    ginfo->metricformulas = NULL;
+    ginfo->metricnames = NULL;
+    ginfo->longinfo = NULL;
+    ginfo->groupname = NULL;*/
     return err;
 }
 
-int new_group(GroupInfo* ginfo)
+int
+new_group(GroupInfo* ginfo)
 {
     if (!ginfo)
         return -EINVAL;
@@ -966,7 +1114,8 @@ int new_group(GroupInfo* ginfo)
     return 0;
 }
 
-char* get_eventStr(GroupInfo* ginfo)
+char*
+get_eventStr(GroupInfo* ginfo)
 {
     int i;
     char* string;
@@ -994,7 +1143,8 @@ char* get_eventStr(GroupInfo* ginfo)
     return string;
 }
 
-void put_eventStr(char* eventset)
+void
+put_eventStr(char* eventset)
 {
     if (eventset != NULL)
     {
@@ -1003,7 +1153,8 @@ void put_eventStr(char* eventset)
     }
 }
 
-int add_event(GroupInfo* ginfo, char* event, char* counter)
+int
+add_event(GroupInfo* ginfo, char* event, char* counter)
 {
     if ((!ginfo) || (!event) || (!counter))
         return -EINVAL;
@@ -1025,7 +1176,8 @@ int add_event(GroupInfo* ginfo, char* event, char* counter)
     return 0;
 }
 
-int add_metric(GroupInfo* ginfo, char* mname, char* mcalc)
+int
+add_metric(GroupInfo* ginfo, char* mname, char* mcalc)
 {
     if ((!ginfo) || (!mname) || (!mcalc))
         return -EINVAL;
@@ -1048,7 +1200,8 @@ int add_metric(GroupInfo* ginfo, char* mname, char* mcalc)
 }
 
 
-char* get_groupName(GroupInfo* ginfo)
+char*
+get_groupName(GroupInfo* ginfo)
 {
     if ((ginfo != NULL) && (ginfo->groupname != NULL))
     {
@@ -1060,7 +1213,8 @@ char* get_groupName(GroupInfo* ginfo)
     return NULL;
 }
 
-int set_groupName(GroupInfo* ginfo, char* groupName)
+int
+set_groupName(GroupInfo* ginfo, char* groupName)
 {
     if ((ginfo == NULL) || (groupName == NULL))
         return -EINVAL;
@@ -1072,7 +1226,8 @@ int set_groupName(GroupInfo* ginfo, char* groupName)
     return 0;
 }
 
-char* get_shortInfo(GroupInfo* ginfo)
+char*
+get_shortInfo(GroupInfo* ginfo)
 {
     if ((ginfo != NULL) && (ginfo->shortinfo != NULL))
     {
@@ -1084,7 +1239,8 @@ char* get_shortInfo(GroupInfo* ginfo)
     return NULL;
 }
 
-void put_shortInfo(char* sinfo)
+void
+put_shortInfo(char* sinfo)
 {
     if (sinfo != NULL)
     {
@@ -1093,7 +1249,8 @@ void put_shortInfo(char* sinfo)
     }
 }
 
-int set_shortInfo(GroupInfo* ginfo, char* shortInfo)
+int
+set_shortInfo(GroupInfo* ginfo, char* shortInfo)
 {
     if ((ginfo == NULL) || (shortInfo == NULL))
         return -EINVAL;
@@ -1105,7 +1262,8 @@ int set_shortInfo(GroupInfo* ginfo, char* shortInfo)
     return 0;
 }
 
-char* get_longInfo(GroupInfo* ginfo)
+char*
+get_longInfo(GroupInfo* ginfo)
 {
     if ((ginfo != NULL) && (ginfo->longinfo != NULL))
     {
@@ -1117,7 +1275,8 @@ char* get_longInfo(GroupInfo* ginfo)
     return NULL;
 }
 
-void put_longInfo(char* linfo)
+void
+put_longInfo(char* linfo)
 {
     if (linfo != NULL)
     {
@@ -1126,7 +1285,8 @@ void put_longInfo(char* linfo)
     }
 }
 
-int set_longInfo(GroupInfo* ginfo, char* longInfo)
+int
+set_longInfo(GroupInfo* ginfo, char* longInfo)
 {
     if ((ginfo == NULL) || (longInfo == NULL))
         return -EINVAL;
@@ -1138,7 +1298,8 @@ int set_longInfo(GroupInfo* ginfo, char* longInfo)
     return 0;
 }
 
-void return_group(GroupInfo* ginfo)
+void
+return_group(GroupInfo* ginfo)
 {
     int i;
     if (ginfo->groupname)
@@ -1182,14 +1343,16 @@ void return_group(GroupInfo* ginfo)
     ginfo->nmetrics = 0;
 }
 
-void init_clist(CounterList* clist)
+void
+init_clist(CounterList* clist)
 {
     clist->counters = 0;
     clist->cnames = NULL;
     clist->cvalues = NULL;
 }
 
-int add_to_clist(CounterList* clist, char* counter, double result)
+int
+add_to_clist(CounterList* clist, char* counter, double result)
 {
     char** tmpnames;
     double* tmpvalues;
@@ -1218,7 +1381,31 @@ int add_to_clist(CounterList* clist, char* counter, double result)
     return 0;
 }
 
-void destroy_clist(CounterList* clist)
+int
+update_clist(CounterList* clist, char* counter, double result)
+{
+    int i;
+    int found = 0;
+    if ((clist == NULL)||(counter == NULL))
+        return -EINVAL;
+    for (i=0; i< clist->counters; i++)
+    {
+        if (strcmp(clist->cnames[i], counter) == 0)
+        {
+            clist->cvalues[i] = result;
+            found = 1;
+            break;
+        }
+    }
+    if (!found)
+    {
+        return -ENOENT;
+    }
+    return 0;
+}
+
+void
+destroy_clist(CounterList* clist)
 {
     int i;
     if (clist != NULL)
@@ -1232,12 +1419,11 @@ void destroy_clist(CounterList* clist)
     }
 }
 
-
-int calc_metric(char* formula, CounterList* clist, double *result)
+int
+calc_metric(char* formula, CounterList* clist, double *result)
 {
     int i=0;
     *result = 0.0;
-    int fail = 0;
     int maxstrlen = 0, minstrlen = 10000;
 
     if ((formula == NULL) || (clist == NULL))
@@ -1268,18 +1454,9 @@ int calc_metric(char* formula, CounterList* clist, double *result)
         }
         maxstrlen--;
     }
-    bstring test = bfromcstr("aAbBcCdDfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ,_:;!'§$&=?°´`#<>");
-    if (binchr(f, 0, test) != BSTR_ERR)
-    {
-        fprintf(stderr, "Not all counter names in formula can be substituted\n");
-        fprintf(stderr, "%s\n", bdata(f));
-        i = -EINVAL;
-        fail = 1;
-    }
-    bdestroy(test);
     // now we can calculate the formula
-    if (!fail)
-        i = calculate_infix(bdata(f), result);
+    i = calculate_infix(bdata(f), result);
     bdestroy(f);
     return i;
 }
+
diff --git a/src/perfmon.c b/src/perfmon.c
index 09b957b..521b11c 100644
--- a/src/perfmon.c
+++ b/src/perfmon.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Main implementation of the performance monitoring module
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -29,6 +29,8 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -37,7 +39,6 @@
 #include <unistd.h>
 #include <sys/types.h>
 
-
 #include <types.h>
 #include <likwid.h>
 #include <bitUtil.h>
@@ -48,6 +49,7 @@
 #include <topology.h>
 #include <access.h>
 #include <perfgroup.h>
+#include <cpuid.h>
 
 #include <perfmon_pm.h>
 #include <perfmon_atom.h>
@@ -60,6 +62,7 @@
 #include <perfmon_ivybridge.h>
 #include <perfmon_haswell.h>
 #include <perfmon_phi.h>
+#include <perfmon_knl.h>
 #include <perfmon_k8.h>
 #include <perfmon_k10.h>
 #include <perfmon_interlagos.h>
@@ -69,6 +72,11 @@
 #include <perfmon_broadwell.h>
 #include <perfmon_skylake.h>
 
+#ifdef LIKWID_USE_PERFEVENT
+#include <perfmon_perfevent.h>
+#endif
+
+/* #####   EXPORTED VARIABLES   ########################################### */
 
 PerfmonEvent* eventHash = NULL;
 RegisterMap* counter_map = NULL;
@@ -79,6 +87,7 @@ int perfmon_numCoreCounters = 0;
 int perfmon_numArchEvents = 0;
 int perfmon_initialized = 0;
 int perfmon_verbosity = DEBUGLEV_ONLY_ERROR;
+int maps_checked = 0;
 uint64_t currentConfig[MAX_NUM_THREADS][NUM_PMC] = { 0 };
 
 PerfmonGroupSet* groupSet = NULL;
@@ -92,7 +101,6 @@ int (*perfmon_setupCountersThread) (int thread_id, PerfmonEventSet* eventSet);
 int (*perfmon_finalizeCountersThread) (int thread_id, PerfmonEventSet* eventSet);
 
 int (*initThreadArch) (int cpu_id);
-
 void perfmon_delEventSet(int groupID);
 
 char* eventOptionTypeName[NUM_EVENT_OPTIONS] = {
@@ -122,15 +130,13 @@ char* eventOptionTypeName[NUM_EVENT_OPTIONS] = {
     "IN_TRANSACTION_ABORTED"
 };
 
+/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
+
 static int
-getIndexAndType (bstring reg, RegisterIndex* index, RegisterType* type, int force)
+getIndexAndType (bstring reg, RegisterIndex* index, RegisterType* type)
 {
-    int err = 0;
     int ret = FALSE;
-    uint64_t tmp = 0x0ULL;
-    int (*ownstrcmp)(const char*, const char*);
-    ownstrcmp = &strcmp;
-    int testcpu = groupSet->threads[0].processorId;
+
     for (int i=0; i< perfmon_numCounters; i++)
     {
         if (biseqcstr(reg, counter_map[i].key))
@@ -141,65 +147,92 @@ getIndexAndType (bstring reg, RegisterIndex* index, RegisterType* type, int forc
             break;
         }
     }
-    if (ret == FALSE)
+    return ret;
+}
+
+static RegisterType
+checkAccess(bstring reg, RegisterIndex index, RegisterType oldtype, int force)
+{
+    int err = 0;
+    uint64_t tmp = 0x0ULL;
+    RegisterType type = oldtype;
+    int (*ownstrcmp)(const char*, const char*);
+    ownstrcmp = &strcmp;
+    int testcpu = groupSet->threads[0].processorId;
+    int firstpmcindex = -1;
+
+    for (int i=0; i< perfmon_numCounters; i++)
+    {
+        if (counter_map[i].type == PMC && firstpmcindex < 0)
+        {
+            firstpmcindex = i;
+            break;
+        }
+    }
+
+    if (type == PMC && (index - firstpmcindex) > cpuid_info.perf_num_ctr)
+    {
+        fprintf(stderr,
+                "WARNING: Counter %s is only available with deactivated HyperThreading. Counter results defaults to 0.\n",
+                bdata(reg));
+        return NOTYPE;
+    }
+    if (type == NOTYPE)
     {
-        fprintf(stderr, "ERROR: Counter %s not available\n",bdata(reg));
-        *type = NOTYPE;
-        return FALSE;
+        DEBUG_PRINT(DEBUGLEV_INFO, WARNING: Counter %s not available on the current system. Counter results defaults to 0.,bdata(reg));
+        return NOTYPE;
     }
-    if (ret && (ownstrcmp(bdata(reg), counter_map[*index].key) != 0))
+    if (ownstrcmp(bdata(reg), counter_map[index].key) != 0)
     {
-        *type = NOTYPE;
-        return FALSE;
+        DEBUG_PRINT(DEBUGLEV_INFO, WARNING: Counter %s does not exist ,bdata(reg));
+        return NOTYPE;
     }
-    err = HPMcheck(counter_map[*index].device, 0);
+    err = HPMcheck(counter_map[index].device, 0);
     if (!err)
     {
-        *type = NOTYPE;
-        return FALSE;
+        DEBUG_PRINT(DEBUGLEV_INFO, WARNING: The device for counter %s does not exist ,bdata(reg));
+        return NOTYPE;
     }
-    if ((ret) && (*type != THERMAL) && (*type != POWER) && (*type != WBOX0FIX))
+    if ((type != THERMAL) && (type != POWER) && (type != WBOX0FIX))
     {
         int check_settings = 1;
-        uint32_t reg = counter_map[*index].configRegister;
+        uint32_t reg = counter_map[index].configRegister;
         if (reg == 0x0)
         {
-            reg = counter_map[*index].counterRegister;
+            reg = counter_map[index].counterRegister;
             check_settings = 0;
         }
-        err = HPMread(testcpu, counter_map[*index].device, reg, &tmp);
+        err = HPMread(testcpu, counter_map[index].device, reg, &tmp);
         if (err != 0)
         {
             if (err == -ENODEV)
             {
                 DEBUG_PRINT(DEBUGLEV_DETAIL, Device %s not accessible on this machine,
-                                         pci_devices[box_map[*type].device].name);
+                                         pci_devices[box_map[type].device].name);
             }
             else
             {
                 DEBUG_PRINT(DEBUGLEV_DETAIL, Counter %s not readable on this machine,
-                                             counter_map[*index].key);
+                                             counter_map[index].key);
             }
-            *type = NOTYPE;
-            ret = FALSE;
+            type = NOTYPE;
         }
         else if (tmp == 0x0ULL)
         {
-            err = HPMwrite(testcpu, counter_map[*index].device, reg, 0x0ULL);
+            err = HPMwrite(testcpu, counter_map[index].device, reg, 0x0ULL);
             if (err != 0)
             {
                 if (err == -ENODEV)
                 {
                     DEBUG_PRINT(DEBUGLEV_DETAIL, Device %s not accessible on this machine,
-                                             pci_devices[box_map[*type].device].name);
+                                             pci_devices[box_map[type].device].name);
                 }
                 else
                 {
                     DEBUG_PRINT(DEBUGLEV_DETAIL, Counter %s not writeable on this machine,
-                                             counter_map[*index].key);
+                                             counter_map[index].key);
                 }
-                *type = NOTYPE;
-                ret = FALSE;
+                type = NOTYPE;
             }
             check_settings = 0;
         }
@@ -208,34 +241,32 @@ getIndexAndType (bstring reg, RegisterIndex* index, RegisterType* type, int forc
             if (force == 1)
             {
                 DEBUG_PRINT(DEBUGLEV_DETAIL, Counter %s has bits set (0x%llx) but we are forced to overwrite them,
-                                             counter_map[*index].key, tmp);
-                err = HPMwrite(testcpu, counter_map[*index].device, reg, 0x0ULL);
+                                             counter_map[index].key, LLU_CAST tmp);
+                err = HPMwrite(testcpu, counter_map[index].device, reg, 0x0ULL);
             }
-            else if ((force == 0) && ((*type != FIXED)&&(*type != THERMAL)&&(*type != POWER)&&(*type != WBOX0FIX)))
+            else if ((force == 0) && ((type != FIXED)&&(type != THERMAL)&&(type != POWER)&&(type != WBOX0FIX)))
             {
-                fprintf(stderr, "ERROR: The selected register %s is in use.\n", counter_map[*index].key);
+                fprintf(stderr, "ERROR: The selected register %s is in use.\n", counter_map[index].key);
                 fprintf(stderr, "Please run likwid with force option (-f, --force) to overwrite settings\n");
                 exit(EXIT_SUCCESS);
             }
         }
     }
-    else if ((ret) && ((*type == POWER) || (*type == WBOX0FIX) || (*type == THERMAL)))
+    else if ((type == POWER) || (type == WBOX0FIX) || (type == THERMAL))
     {
-        err = HPMread(testcpu, MSR_DEV, counter_map[*index].counterRegister, &tmp);
+        err = HPMread(testcpu, MSR_DEV, counter_map[index].counterRegister, &tmp);
         if (err != 0)
         {
             DEBUG_PRINT(DEBUGLEV_DETAIL, Counter %s not readable on this machine,
-                                         counter_map[*index].key);
-            *type = NOTYPE;
-            ret = FALSE;
+                                         counter_map[index].key);
+            type = NOTYPE;
         }
     }
     else
     {
-        *type = NOTYPE;
-        ret = FALSE;
+        type = NOTYPE;
     }
-    return ret;
+    return type;
 }
 
 static int
@@ -549,7 +580,6 @@ parseOptions(struct bstrList* tokens, PerfmonEvent* event, RegisterIndex index)
         }
     }
 
-    
     return event->numberOfOptions;
 }
 
@@ -571,7 +601,8 @@ calculateResult(int groupId, int eventId, int threadId)
     }
     else if (counter->overflows > 0)
     {
-        result += (double) ((perfmon_getMaxCounterValue(counter_map[event->index].type) - counter->startData) + counter->counterData);
+        result += (double) ((perfmon_getMaxCounterValue(counter_map[event->index].type) -
+                    counter->startData) + counter->counterData);
         counter->overflows--;
     }
     result += (double) (counter->overflows * perfmon_getMaxCounterValue(counter_map[event->index].type));
@@ -604,16 +635,109 @@ getCounterTypeOffset(int index)
     return off;
 }
 
-void perfmon_setVerbosity(int level)
+void
+perfmon_setVerbosity(int level)
 {
     if ((level >= DEBUGLEV_ONLY_ERROR) && (level <= DEBUGLEV_DEVELOP))
         perfmon_verbosity = level;
 }
 
 void
+perfmon_check_counter_map(int cpu_id)
+{
+    int own_hpm = 0;
+    if (perfmon_numCounters == 0 || perfmon_numArchEvents == 0)
+    {
+        ERROR_PLAIN_PRINT(Counter and event maps not initialized.);
+        return;
+    }
+    if (maps_checked)
+        return;
+#ifndef LIKWID_USE_PERFEVENT
+    if (!HPMinitialized())
+    {
+        HPMinit();
+        if (HPMaddThread(cpu_id) != 0)
+        {
+            ERROR_PLAIN_PRINT(Cannot check counters without access to performance counters)
+            return;
+        }
+        own_hpm = 1;
+    }
+#endif
+    int startpmcindex = -1;
+    for (int i=0;i<perfmon_numCounters;i++)
+    {
+        if (counter_map[i].type == NOTYPE)
+        {
+            continue;
+        }
+        if (counter_map[i].type == PMC && startpmcindex < 0)
+        {
+            startpmcindex = i;
+        }
+        if (counter_map[i].type == PMC && (counter_map[i].index - counter_map[startpmcindex].index) >= cpuid_info.perf_num_ctr)
+        {
+            counter_map[i].type = NOTYPE;
+            counter_map[i].optionMask = 0x0ULL;
+        }
+#ifndef LIKWID_USE_PERFEVENT
+        if (HPMcheck(counter_map[i].device, cpu_id))
+        {
+            uint32_t reg = counter_map[i].configRegister;
+            uint64_t tmp = 0x0ULL;
+            if (reg == 0x0U)
+                reg = counter_map[i].counterRegister;
+            int err = HPMread(cpu_id, counter_map[i].device, reg, &tmp);
+            if (err)
+            {
+                counter_map[i].type = NOTYPE;
+                counter_map[i].optionMask = 0x0ULL;
+            }
+        }
+        else
+        {
+            counter_map[i].type = NOTYPE;
+            counter_map[i].optionMask = 0x0ULL;
+        }
+#endif
+    }
+    if (own_hpm)
+        HPMfinalize();
+    for (int i=0; i<perfmon_numArchEvents; i++)
+    {
+        int found = 0;
+        bstring estr = bfromcstr(eventHash[i].name);
+        if (i > 0 && strlen(eventHash[i-1].limit) != 0 && strcmp(eventHash[i-1].limit, eventHash[i].limit) == 0)
+            continue;
+        for (int j=0;j<perfmon_numCounters; j++)
+        {
+            if (counter_map[j].type == NOTYPE)
+                continue;
+            PerfmonEvent event;
+            bstring cstr = bfromcstr(counter_map[j].key);
+            if (getEvent(estr, cstr, &event))
+            {
+                found = 1;
+                break;
+            }
+            bdestroy(cstr);
+        }
+        bdestroy(estr);
+        if (!found)
+        {
+            eventHash[i].limit = "";
+        }
+    }
+    maps_checked = 1;
+}
+
+void
 perfmon_init_maps(void)
 {
-    box_map = NULL;
+    uint32_t eax, ebx, ecx, edx;
+    if (eventHash != NULL && counter_map != NULL && box_map != NULL && perfmon_numCounters > 0 && perfmon_numArchEvents > 0)
+        return;
     switch ( cpuid_info.family )
     {
         case P6_FAMILY:
@@ -777,6 +901,7 @@ perfmon_init_maps(void)
                     perfmon_numCoreCounters = perfmon_numCoreCountersBroadwell;
                     break;
                 case BROADWELL_D:
+                    pci_devices = broadwelld_pci_devices;
                     box_map = broadwelld_box_map;
                     eventHash = broadwelld_arch_events;
                     counter_map = broadwelld_counter_map;
@@ -785,6 +910,7 @@ perfmon_init_maps(void)
                     perfmon_numCoreCounters = perfmon_numCoreCountersBroadwellD;
                     break;
                 case BROADWELL_E:
+                    pci_devices = broadwellEP_pci_devices;
                     box_map = broadwellEP_box_map;
                     eventHash = broadwellEP_arch_events;
                     counter_map = broadwellEP_counter_map;
@@ -795,6 +921,8 @@ perfmon_init_maps(void)
 
                 case SKYLAKE1:
                 case SKYLAKE2:
+                case KABYLAKE1:
+                case KABYLAKE2:
                     box_map = skylake_box_map;
                     eventHash = skylake_arch_events;
                     counter_map = skylake_counter_map;
@@ -803,6 +931,15 @@ perfmon_init_maps(void)
                     perfmon_numCoreCounters = perfmon_numCoreCountersSkylake;
                     break;
 
+                case XEON_PHI_KNL:
+                    pci_devices = knl_pci_devices;
+                    eventHash = knl_arch_events;
+                    perfmon_numArchEvents = perfmon_numArchEventsKNL;
+                    counter_map = knl_counter_map;
+                    box_map = knl_box_map;
+                    perfmon_numCounters = perfmon_numCountersKNL;
+                    break;
+
                 default:
                     ERROR_PLAIN_PRINT(Unsupported Processor);
                     break;
@@ -871,6 +1008,7 @@ perfmon_init_funcs(int* init_power, int* init_temp)
 {
     int initialize_power = FALSE;
     int initialize_thermal = FALSE;
+#ifndef LIKWID_USE_PERFEVENT
     switch ( cpuid_info.family )
     {
         case P6_FAMILY:
@@ -1034,6 +1172,8 @@ perfmon_init_funcs(int* init_power, int* init_temp)
 
                 case SKYLAKE1:
                 case SKYLAKE2:
+                case KABYLAKE1:
+                case KABYLAKE2:
                     initialize_power = TRUE;
                     initialize_thermal = TRUE;
                     initThreadArch = perfmon_init_skylake;
@@ -1044,6 +1184,17 @@ perfmon_init_funcs(int* init_power, int* init_temp)
                     perfmon_finalizeCountersThread = perfmon_finalizeCountersThread_skylake;
                     break;
 
+                case XEON_PHI_KNL:
+                    initialize_power = TRUE;
+                    initialize_thermal = TRUE;
+                    initThreadArch = perfmon_init_knl;
+                    perfmon_startCountersThread = perfmon_startCountersThread_knl;
+                    perfmon_stopCountersThread = perfmon_stopCountersThread_knl;
+                    perfmon_readCountersThread = perfmon_readCountersThread_knl;
+                    perfmon_setupCountersThread = perfmon_setupCountersThread_knl;
+                    perfmon_finalizeCountersThread = perfmon_finalizeCountersThread_knl;
+                    break;
+
                 default:
                     ERROR_PLAIN_PRINT(Unsupported Processor);
                     break;
@@ -1109,13 +1260,20 @@ perfmon_init_funcs(int* init_power, int* init_temp)
             ERROR_PLAIN_PRINT(Unsupported Processor);
             break;
     }
+#else
+    initThreadArch = perfmon_init_perfevent;
+    perfmon_startCountersThread = perfmon_startCountersThread_perfevent;
+    perfmon_stopCountersThread = perfmon_stopCountersThread_perfevent;
+    perfmon_readCountersThread = perfmon_readCountersThread_perfevent;
+    perfmon_setupCountersThread = perfmon_setupCountersThread_perfevent;
+    perfmon_finalizeCountersThread = perfmon_finalizeCountersThread_perfevent;
+#endif
     *init_power = initialize_power;
     *init_temp = initialize_thermal;
 }
 
-
 int
-perfmon_init(int nrThreads, int threadsToCpu[])
+perfmon_init(int nrThreads, const int* threadsToCpu)
 {
     int i;
     int ret;
@@ -1174,23 +1332,28 @@ perfmon_init(int nrThreads, int threadsToCpu[])
     groupSet->activeGroup = -1;
 
     for(i=0; i<MAX_NUM_NODES; i++) socket_lock[i] = LOCK_INIT;
-    for(i=0; i<MAX_NUM_THREADS; i++) tile_lock[i] = LOCK_INIT;
+    for(i=0; i<MAX_NUM_THREADS; i++)
+    {
+        tile_lock[i] = LOCK_INIT;
+        core_lock[i] = LOCK_INIT;
+    }
 
-    /* Initialize maps pointer to current architecture maps */
-    perfmon_init_maps();
 
     /* Initialize access interface */
+#ifndef LIKWID_USE_PERFEVENT
     ret = HPMinit();
     if (ret)
     {
         ERROR_PLAIN_PRINT(Cannot set access functions);
         free(groupSet->threads);
         free(groupSet);
-        exit(EXIT_FAILURE);
         return ret;
     }
+#endif
     timer_init();
 
+    /* Initialize maps pointer to current architecture maps */
+    perfmon_init_maps();
 
     /* Initialize function pointer to current architecture functions */
     perfmon_init_funcs(&initialize_power, &initialize_thermal);
@@ -1199,18 +1362,28 @@ perfmon_init(int nrThreads, int threadsToCpu[])
     /* If the arch supports it, initialize power and thermal measurements */
     for(i=0;i<nrThreads;i++)
     {
-        if (HPMaddThread(threadsToCpu[i]) != 0)
+#ifndef LIKWID_USE_PERFEVENT
+        ret = HPMaddThread(threadsToCpu[i]);
+        if (ret != 0)
         {
             ERROR_PLAIN_PRINT(Cannot get access to performance counters);
+            free(groupSet->threads);
+            free(groupSet);
+            return ret;
         }
-        groupSet->threads[i].thread_id = i;
-        groupSet->threads[i].processorId = threadsToCpu[i];
 
-        if (HPMcheck(MSR_DEV, threadsToCpu[i]) == 0)
+        ret = HPMcheck(MSR_DEV, threadsToCpu[i]);
+        if (ret != 1)
         {
             fprintf(stderr, "Cannot get access to MSRs. Please check permissions to the MSRs\n");
-            exit(EXIT_FAILURE);
+            free(groupSet->threads);
+            free(groupSet);
+            return -EACCES;
         }
+#endif
+        groupSet->threads[i].thread_id = i;
+        groupSet->threads[i].processorId = threadsToCpu[i];
+
         if (initialize_power == TRUE)
         {
             power_init(threadsToCpu[i]);
@@ -1240,7 +1413,6 @@ perfmon_finalize(void)
     }
     for(group=0;group < groupSet->numberOfActiveGroups; group++)
     {
-        
         for (thread=0;thread< groupSet->numberOfThreads; thread++)
         {
             perfmon_finalizeCountersThread(thread, &(groupSet->groups[group]));
@@ -1271,16 +1443,18 @@ perfmon_finalize(void)
         perfmon_destroyMarkerResults();
     }
     power_finalize();
+#ifndef LIKWID_USE_PERFEVENT
     HPMfinalize();
+#endif
     perfmon_initialized = 0;
     groupSet = NULL;
     return;
 }
 
 int
-perfmon_addEventSet(char* eventCString)
+perfmon_addEventSet(const char* eventCString)
 {
-    int i, j, err;
+    int i, j, err, isPerfGroup = 0;
     bstring eventBString;
     struct bstrList* eventtokens;
     PerfmonEventSet* eventSet;
@@ -1356,11 +1530,22 @@ perfmon_addEventSet(char* eventCString)
         err = read_group(config->groupPath, cpuid_info.short_name,
                          eventCString,
                          &groupSet->groups[groupSet->numberOfActiveGroups].group);
-        if (err)
+        if (err == -EACCES)
+        {
+            ERROR_PRINT(Access to performance group %s not allowed, eventCString);
+            return err;
+        }
+        else if (err == -ENODEV)
+        {
+            ERROR_PRINT(Performance group %s only available with deactivated HyperThreading, eventCString);
+            return err;
+        }
+        else if (err < 0)
         {
             ERROR_PRINT(Cannot read performance group %s, eventCString);
             return err;
         }
+        isPerfGroup = 1;
     }
     else
     {
@@ -1386,13 +1571,17 @@ perfmon_addEventSet(char* eventCString)
     }
     eventSet->numberOfEvents = 0;
 #ifdef __x86_64
-    eventSet->regTypeMask = ((__uint128_t)0x0ULL<<64)|0x0ULL;
+//    eventSet->regTypeMask = ((__uint128_t)0x0ULL<<64)|0x0ULL;
+    eventSet->regTypeMask1 = 0x0ULL;
+    eventSet->regTypeMask2 = 0x0ULL;
+    eventSet->regTypeMask3 = 0x0ULL;
+    eventSet->regTypeMask4 = 0x0ULL;
 #else
     eventSet->regTypeMask = 0x0ULL;
 #endif
 
-
     int forceOverwrite = 0;
+    int valid_events = 0;
     if (getenv("LIKWID_FORCE") != NULL)
     {
         forceOverwrite = atoi(getenv("LIKWID_FORCE"));
@@ -1409,13 +1598,27 @@ perfmon_addEventSet(char* eventCString)
         }
         else
         {
-            if (!getIndexAndType(subtokens->entry[1], &event->index, &event->type, forceOverwrite))
+            if (!getIndexAndType(subtokens->entry[1], &event->index, &event->type))
             {
-                DEBUG_PRINT(DEBUGLEV_INFO, Counter register %s not supported or PCI device not available,
-                            bdata(subtokens->entry[1]));
                 event->type = NOTYPE;
                 goto past_checks;
             }
+#ifndef LIKWID_USE_PERFEVENT
+            event->type = checkAccess(subtokens->entry[1], event->index, event->type, forceOverwrite);
+            if (event->type == NOTYPE)
+            {
+                DEBUG_PRINT(DEBUGLEV_INFO, Cannot access counter register %s, bdata(subtokens->entry[1]));
+                goto past_checks;
+            }
+#else
+            char* path = translate_types[counter_map[event->index].type];
+            struct stat st;
+            if (path == NULL || stat(path, &st) != 0)
+            {
+                DEBUG_PRINT(DEBUGLEV_INFO, Cannot access counter register %s, bdata(subtokens->entry[1]));
+                goto past_checks;
+            }
+#endif
 
             if (!getEvent(subtokens->entry[0], subtokens->entry[1], &event->event))
             {
@@ -1424,7 +1627,7 @@ perfmon_addEventSet(char* eventCString)
                 event->type = NOTYPE;
                 goto past_checks;
             }
-
+#ifndef LIKWID_USE_PERFEVENT
             if (!checkCounter(subtokens->entry[1], event->event.limit))
             {
                 DEBUG_PRINT(DEBUGLEV_INFO, Register %s not allowed for event %s,
@@ -1432,6 +1635,7 @@ perfmon_addEventSet(char* eventCString)
                 event->type = NOTYPE;
                 goto past_checks;
             }
+#endif
             if (parseOptions(subtokens, &event->event, event->index) < 0)
             {
                 DEBUG_PRINT(DEBUGLEV_INFO, Cannot parse options in %s, bdata(eventtokens->entry[i]));
@@ -1439,7 +1643,7 @@ perfmon_addEventSet(char* eventCString)
                 goto past_checks;
             }
 
-            eventSet->regTypeMask |= REG_TYPE_MASK(event->type);
+            SETTYPE(eventSet, event->type);
 past_checks:
             event->threadCounter = (PerfmonCounter*) malloc(
                 groupSet->numberOfThreads * sizeof(PerfmonCounter));
@@ -1459,10 +1663,12 @@ past_checks:
                 event->threadCounter[j].overflows = 0;
                 event->threadCounter[j].init = FALSE;
             }
+
             eventSet->numberOfEvents++;
 
             if (event->type != NOTYPE)
             {
+                valid_events++;
                 DEBUG_PRINT(DEBUGLEV_INFO,
                         Added event %s for counter %s to group %d,
                         event->event.name,
@@ -1473,7 +1679,20 @@ past_checks:
         bstrListDestroy(subtokens);
     }
     bstrListDestroy(eventtokens);
-    if ((eventSet->numberOfEvents > 0) && (eventSet->regTypeMask != 0x0ULL))
+    int fixed_counters = 0;
+    char fix[] = "FIXC";
+    char* ptr;
+    ptr = strstr(eventCString, fix);
+    if (cpuid_info.isIntel && !ptr)
+    {
+        fixed_counters = cpuid_info.perf_num_fixed_ctr;
+    }
+
+    if (((valid_events > fixed_counters) || isPerfGroup) &&
+        ((eventSet->regTypeMask1 != 0x0ULL) ||
+        (eventSet->regTypeMask2 != 0x0ULL) ||
+        (eventSet->regTypeMask3 != 0x0ULL) ||
+        (eventSet->regTypeMask4 != 0x0ULL)))
     {
         eventSet->state = STATE_NONE;
         groupSet->numberOfActiveGroups++;
@@ -1481,7 +1700,10 @@ past_checks:
     }
     else
     {
-        fprintf(stderr,"No event in given event string can be configured\n");
+        fprintf(stderr,"ERROR: No event in given event string can be configured.\n");
+        fprintf(stderr,"       Either the events or counters do not exist for the\n");
+        fprintf(stderr,"       current architecture. If event options are set, they might\n");
+        fprintf(stderr,"       be invalid.\n");
         return -EINVAL;
     }
 }
@@ -1517,6 +1739,11 @@ perfmon_setupCounters(int groupId)
 {
     int i;
     int ret = 0;
+    if (!lock_check())
+    {
+        ERROR_PLAIN_PRINT(Access to performance monitoring registers locked);
+        return -ENOLCK;
+    }
     if (perfmon_initialized != 1)
     {
         ERROR_PLAIN_PRINT(Perfmon module not properly initialized);
@@ -1526,12 +1753,13 @@ perfmon_setupCounters(int groupId)
     {
         return -EINVAL;
     }
+
     if (groupId >= groupSet->numberOfActiveGroups)
     {
         ERROR_PRINT(Group %d does not exist in groupSet, groupId);
         return -ENOENT;
     }
-    
+
     for(i=0;i<groupSet->numberOfThreads;i++)
     {
         ret = __perfmon_setupCountersThread(groupSet->threads[i].thread_id, groupId);
@@ -1553,6 +1781,11 @@ __perfmon_startCounters(int groupId)
     {
         return -EINVAL;
     }
+    if (!lock_check())
+    {
+        ERROR_PLAIN_PRINT(Access to performance monitoring registers locked);
+        return -ENOLCK;
+    }
     for(;i<groupSet->numberOfThreads;i++)
     {
         for (j=0; j<perfmon_getNumberOfEvents(groupId); j++)
@@ -1619,6 +1852,12 @@ __perfmon_stopCounters(int groupId)
     int ret = 0;
     double result = 0.0;
 
+    if (!lock_check())
+    {
+        ERROR_PLAIN_PRINT(Access to performance monitoring registers locked);
+        return -ENOLCK;
+    }
+
     timer_stop(&groupSet->groups[groupId].timer);
 
     for (i = 0; i<groupSet->numberOfThreads; i++)
@@ -1646,7 +1885,8 @@ __perfmon_stopCounters(int groupId)
     return 0;
 }
 
-int perfmon_stopCounters(void)
+int
+perfmon_stopCounters(void)
 {
     if (perfmon_initialized != 1)
     {
@@ -1669,7 +1909,8 @@ int perfmon_stopCounters(void)
     return __perfmon_stopCounters(groupSet->activeGroup);
 }
 
-int perfmon_stopGroupCounters(int groupId)
+int
+perfmon_stopGroupCounters(int groupId)
 {
     if (perfmon_initialized != 1)
     {
@@ -1734,7 +1975,7 @@ __perfmon_readCounters(int groupId, int threadId)
                     result = (double)calculateResult(groupId, j, threadId);
                     groupSet->groups[groupId].events[j].threadCounter[threadId].lastResult = result;
                     groupSet->groups[groupId].events[j].threadCounter[threadId].fullResult += result;
-                    groupSet->groups[groupId].events[j].threadCounter[threadId].startData = 
+                    groupSet->groups[groupId].events[j].threadCounter[threadId].startData =
                         groupSet->groups[groupId].events[j].threadCounter[threadId].counterData;
                     groupSet->groups[groupId].events[j].threadCounter[threadId].overflows = 0;
                 }
@@ -1762,12 +2003,14 @@ __perfmon_readCounters(int groupId, int threadId)
     return 0;
 }
 
-int perfmon_readCounters(void)
+int
+perfmon_readCounters(void)
 {
     return __perfmon_readCounters(-1,-1);
 }
 
-int perfmon_readCountersCpu(int cpu_id)
+int
+perfmon_readCountersCpu(int cpu_id)
 {
     int i;
     int thread_id = -1;
@@ -1793,15 +2036,49 @@ int perfmon_readCountersCpu(int cpu_id)
     return i;
 }
 
-int perfmon_readGroupCounters(int groupId)
+int
+perfmon_readGroupCounters(int groupId)
 {
     return __perfmon_readCounters(groupId, -1);
 }
-int perfmon_readGroupThreadCounters(int groupId, int threadId)
+
+int
+perfmon_readGroupThreadCounters(int groupId, int threadId)
 {
     return __perfmon_readCounters(groupId, threadId);
 }
 
+int
+perfmon_isUncoreCounter(char* counter)
+{
+    char fix[] = "FIXC";
+    char pmc[] = "PMC";
+    char upmc[] = "UPMC";
+    char tmp[] = "TMP";
+    char *ptr = NULL;
+    ptr = strstr(counter, fix);
+    if (ptr)
+    {
+        return 0;
+    }
+    ptr = NULL;
+    ptr = strstr(counter, tmp);
+    if (ptr)
+    {
+        return 0;
+    }
+    ptr = NULL;
+    ptr = strstr(counter, pmc);
+    if (ptr)
+    {
+        ptr = strstr(counter, upmc);
+        if (!ptr)
+        {
+            return 0;
+        }
+    }
+    return 1;
+}
 
 double
 perfmon_getResult(int groupId, int eventId, int threadId)
@@ -1892,7 +2169,6 @@ perfmon_getMetric(int groupId, int metricId, int threadId)
     int e = 0;
     double result = 0;
     CounterList clist;
-    char* teststr = malloc(1024 * sizeof(char));
     if (unlikely(groupSet == NULL))
     {
         return 0;
@@ -1927,6 +2203,39 @@ perfmon_getMetric(int groupId, int metricId, int threadId)
     }
     add_to_clist(&clist, "time", perfmon_getTimeOfGroup(groupId));
     add_to_clist(&clist, "inverseClock", 1.0/timer_getCycleClock());
+    add_to_clist(&clist, "true", 1);
+    add_to_clist(&clist, "false", 0);
+    int cpu = 0, sock_cpu = 0, err = 0;
+    for (e=0; e<groupSet->numberOfThreads; e++)
+    {
+        if (groupSet->threads[e].thread_id == threadId)
+        {
+            cpu = groupSet->threads[e].processorId;
+        }
+    }
+    sock_cpu = socket_lock[affinity_core2node_lookup[cpu]];
+    if (cpu != sock_cpu)
+    {
+        for (e=0; e<groupSet->numberOfThreads; e++)
+        {
+            if (groupSet->threads[e].processorId == sock_cpu)
+            {
+                sock_cpu = groupSet->threads[e].thread_id;
+            }
+        }
+        for (e=0;e<groupSet->groups[groupId].numberOfEvents;e++)
+        {
+            if (perfmon_isUncoreCounter(groupSet->groups[groupId].group.counters[e]) &&
+                !perfmon_isUncoreCounter(groupSet->groups[groupId].group.metricformulas[metricId]))
+            {
+                err = update_clist(&clist,groupSet->groups[groupId].group.counters[e], perfmon_getResult(groupId, e, sock_cpu));
+                if (err < 0)
+                {
+                    DEBUG_PRINT(DEBUGLEV_DEVELOP, Cannot add socket result of counter %s for thread %d, groupSet->groups[groupId].group.counters[e], threadId);
+                }
+            }
+        }
+    }
     e = calc_metric(groupSet->groups[groupId].group.metricformulas[metricId], &clist, &result);
     if (e < 0)
     {
@@ -1977,6 +2286,39 @@ perfmon_getLastMetric(int groupId, int metricId, int threadId)
     }
     add_to_clist(&clist, "time", perfmon_getLastTimeOfGroup(groupId));
     add_to_clist(&clist, "inverseClock", 1.0/timer_getCycleClock());
+    add_to_clist(&clist, "true", 1);
+    add_to_clist(&clist, "false", 0);
+    int cpu = 0, sock_cpu = 0, err = 0;
+    for (e=0; e<groupSet->numberOfThreads; e++)
+    {
+        if (groupSet->threads[e].thread_id == threadId)
+        {
+            cpu = groupSet->threads[e].processorId;
+        }
+    }
+    sock_cpu = socket_lock[affinity_core2node_lookup[cpu]];
+    if (cpu != sock_cpu)
+    {
+        for (e=0; e<groupSet->numberOfThreads; e++)
+        {
+            if (groupSet->threads[e].processorId == sock_cpu)
+            {
+                sock_cpu = groupSet->threads[e].thread_id;
+            }
+        }
+        for (e=0;e<groupSet->groups[groupId].numberOfEvents;e++)
+        {
+            if (perfmon_isUncoreCounter(groupSet->groups[groupId].group.counters[e]) &&
+                !perfmon_isUncoreCounter(groupSet->groups[groupId].group.metricformulas[metricId]))
+            {
+                err = update_clist(&clist,groupSet->groups[groupId].group.counters[e], perfmon_getLastResult(groupId, e, sock_cpu));
+                if (err < 0)
+                {
+                    DEBUG_PRINT(DEBUGLEV_DEVELOP, Cannot add socket result of counter %s for thread %d, groupSet->groups[groupId].group.counters[e], threadId);
+                }
+            }
+        }
+    }
     e = calc_metric(groupSet->groups[groupId].group.metricformulas[metricId], &clist, &result);
     if (e < 0)
     {
@@ -1987,8 +2329,8 @@ perfmon_getLastMetric(int groupId, int metricId, int threadId)
     return result;
 }
 
-
-int __perfmon_switchActiveGroupThread(int thread_id, int new_group)
+int
+__perfmon_switchActiveGroupThread(int thread_id, int new_group)
 {
     int ret = 0;
     int i = 0;
@@ -2143,7 +2485,8 @@ perfmon_getMaxCounterValue(RegisterType type)
     return tmp;
 }
 
-char* perfmon_getEventName(int groupId, int eventId)
+char*
+perfmon_getEventName(int groupId, int eventId)
 {
     if (unlikely(groupSet == NULL))
     {
@@ -2170,7 +2513,8 @@ char* perfmon_getEventName(int groupId, int eventId)
     return groupSet->groups[groupId].group.events[eventId];
 }
 
-char* perfmon_getCounterName(int groupId, int eventId)
+char*
+perfmon_getCounterName(int groupId, int eventId)
 {
     if (unlikely(groupSet == NULL))
     {
@@ -2197,7 +2541,8 @@ char* perfmon_getCounterName(int groupId, int eventId)
     return groupSet->groups[groupId].group.counters[eventId];
 }
 
-char* perfmon_getMetricName(int groupId, int metricId)
+char*
+perfmon_getMetricName(int groupId, int metricId)
 {
     if (unlikely(groupSet == NULL))
     {
@@ -2223,7 +2568,8 @@ char* perfmon_getMetricName(int groupId, int metricId)
     return groupSet->groups[groupId].group.metricnames[metricId];
 }
 
-char* perfmon_getGroupName(int groupId)
+char*
+perfmon_getGroupName(int groupId)
 {
     if (unlikely(groupSet == NULL))
     {
@@ -2245,7 +2591,8 @@ char* perfmon_getGroupName(int groupId)
     return groupSet->groups[groupId].group.groupname;
 }
 
-char* perfmon_getGroupInfoShort(int groupId)
+char*
+perfmon_getGroupInfoShort(int groupId)
 {
     if (unlikely(groupSet == NULL))
     {
@@ -2267,7 +2614,8 @@ char* perfmon_getGroupInfoShort(int groupId)
     return groupSet->groups[groupId].group.shortinfo;
 }
 
-char* perfmon_getGroupInfoLong(int groupId)
+char*
+perfmon_getGroupInfoLong(int groupId)
 {
     if (unlikely(groupSet == NULL))
     {
@@ -2289,7 +2637,8 @@ char* perfmon_getGroupInfoLong(int groupId)
     return groupSet->groups[groupId].group.longinfo;
 }
 
-int perfmon_getGroups(char*** groups, char*** shortinfos, char*** longinfos)
+int
+perfmon_getGroups(char*** groups, char*** shortinfos, char*** longinfos)
 {
     int ret = 0;
     init_configuration();
@@ -2298,12 +2647,14 @@ int perfmon_getGroups(char*** groups, char*** shortinfos, char*** longinfos)
     return ret;
 }
 
-void perfmon_returnGroups(int nrgroups, char** groups, char** shortinfos, char** longinfos)
+void
+perfmon_returnGroups(int nrgroups, char** groups, char** shortinfos, char** longinfos)
 {
     return_groups(nrgroups, groups, shortinfos, longinfos);
 }
 
-int perfmon_getNumberOfMetrics(int groupId)
+int
+perfmon_getNumberOfMetrics(int groupId)
 {
     if (perfmon_initialized != 1)
     {
@@ -2317,7 +2668,8 @@ int perfmon_getNumberOfMetrics(int groupId)
     return groupSet->groups[groupId].group.nmetrics;
 }
 
-void perfmon_printMarkerResults()
+void
+perfmon_printMarkerResults()
 {
     int i = 0, j = 0, k = 0;
     for (i=0; i<markerRegions; i++)
@@ -2337,7 +2689,8 @@ void perfmon_printMarkerResults()
     }
 }
 
-int perfmon_getNumberOfRegions()
+int
+perfmon_getNumberOfRegions()
 {
     if (perfmon_initialized != 1)
     {
@@ -2351,8 +2704,8 @@ int perfmon_getNumberOfRegions()
     return markerRegions;
 }
 
-
-int perfmon_getGroupOfRegion(int region)
+int
+perfmon_getGroupOfRegion(int region)
 {
     if (perfmon_initialized != 1)
     {
@@ -2370,7 +2723,8 @@ int perfmon_getGroupOfRegion(int region)
     return markerResults[region].groupID;
 }
 
-char* perfmon_getTagOfRegion(int region)
+char*
+perfmon_getTagOfRegion(int region)
 {
     if (perfmon_initialized != 1)
     {
@@ -2388,8 +2742,8 @@ char* perfmon_getTagOfRegion(int region)
     return bdata(markerResults[region].tag);
 }
 
-
-int perfmon_getEventsOfRegion(int region)
+int
+perfmon_getEventsOfRegion(int region)
 {
     if (perfmon_initialized != 1)
     {
@@ -2407,9 +2761,9 @@ int perfmon_getEventsOfRegion(int region)
     return markerResults[region].eventCount;
 }
 
-int perfmon_getMetricsOfRegion(int region)
+int
+perfmon_getMetricsOfRegion(int region)
 {
-    
     if (region < 0 || region >= markerRegions)
     {
         return -EINVAL;
@@ -2421,8 +2775,8 @@ int perfmon_getMetricsOfRegion(int region)
     return perfmon_getNumberOfMetrics(markerResults[region].groupID);
 }
 
-
-int perfmon_getThreadsOfRegion(int region)
+int
+perfmon_getThreadsOfRegion(int region)
 {
     if (perfmon_initialized != 1)
     {
@@ -2440,7 +2794,8 @@ int perfmon_getThreadsOfRegion(int region)
     return markerResults[region].threadCount;
 }
 
-int perfmon_getCpulistOfRegion(int region, int count, int* cpulist)
+int
+perfmon_getCpulistOfRegion(int region, int count, int* cpulist)
 {
     int i;
     if (perfmon_initialized != 1)
@@ -2467,8 +2822,8 @@ int perfmon_getCpulistOfRegion(int region, int count, int* cpulist)
     return MIN(count, markerResults[region].threadCount);
 }
 
-
-double perfmon_getTimeOfRegion(int region, int thread)
+double
+perfmon_getTimeOfRegion(int region, int thread)
 {
     if (perfmon_initialized != 1)
     {
@@ -2490,7 +2845,8 @@ double perfmon_getTimeOfRegion(int region, int thread)
     return markerResults[region].time[thread];
 }
 
-int perfmon_getCountOfRegion(int region, int thread)
+int
+perfmon_getCountOfRegion(int region, int thread)
 {
     if (perfmon_initialized != 1)
     {
@@ -2512,7 +2868,8 @@ int perfmon_getCountOfRegion(int region, int thread)
     return markerResults[region].count[thread];
 }
 
-double perfmon_getResultOfRegionThread(int region, int event, int thread)
+double
+perfmon_getResultOfRegionThread(int region, int event, int thread)
 {
     if (perfmon_initialized != 1)
     {
@@ -2586,6 +2943,39 @@ perfmon_getMetricOfRegionThread(int region, int metricId, int threadId)
     }
     add_to_clist(&clist, "time", perfmon_getTimeOfRegion(region, threadId));
     add_to_clist(&clist, "inverseClock", 1.0/timer_getCycleClock());
+    add_to_clist(&clist, "true", 1);
+    add_to_clist(&clist, "false", 0);
+    int cpu = 0, sock_cpu = 0;
+    for (e=0; e<groupSet->numberOfThreads; e++)
+    {
+        if (groupSet->threads[e].thread_id == threadId)
+        {
+            cpu = groupSet->threads[e].processorId;
+        }
+    }
+    sock_cpu = socket_lock[affinity_core2node_lookup[cpu]];
+    if (cpu != sock_cpu)
+    {
+        for (e=0; e<groupSet->numberOfThreads; e++)
+        {
+            if (groupSet->threads[e].processorId == sock_cpu)
+            {
+                sock_cpu = groupSet->threads[e].thread_id;
+            }
+        }
+        for (e=0;e<markerResults[region].eventCount;e++)
+        {
+            if (perfmon_isUncoreCounter(groupSet->groups[markerResults[region].groupID].group.counters[e]) &&
+                !perfmon_isUncoreCounter(groupSet->groups[markerResults[region].groupID].group.metricformulas[metricId]))
+            {
+                err = update_clist(&clist,groupSet->groups[markerResults[region].groupID].group.counters[e], perfmon_getResultOfRegionThread(region, e, sock_cpu));
+                if (err < 0)
+                {
+                    DEBUG_PRINT(DEBUGLEV_DEVELOP, Cannot add socket result of counter %s for thread %d, groupSet->groups[markerResults[region].groupID].group.counters[e], threadId);
+                }
+            }
+        }
+    }
     err = calc_metric(groupSet->groups[markerResults[region].groupID].group.metricformulas[metricId], &clist, &result);
     if (err < 0)
     {
@@ -2595,15 +2985,18 @@ perfmon_getMetricOfRegionThread(int region, int metricId, int threadId)
     return result;
 }
 
-int perfmon_readMarkerFile(const char* filename)
+int
+perfmon_readMarkerFile(const char* filename)
 {
     FILE* fp = NULL;
     int i = 0;
+    int ret = 0;
     char buf[2048];
     buf[0] = '\0';
     char *ptr = NULL;
+    int nr_regions = 0;
     int cpus = 0, groups = 0, regions = 0;
-    
+
     if (filename == NULL)
     {
         return -EINVAL;
@@ -2618,7 +3011,12 @@ int perfmon_readMarkerFile(const char* filename)
         fprintf(stderr, "Error opening file %s\n", filename);
     }
     ptr = fgets(buf, sizeof(buf), fp);
-    sscanf(buf, "%d %d %d", &cpus, &regions, &groups);
+    ret = sscanf(buf, "%d %d %d", &cpus, &regions, &groups);
+    if (ret != 3)
+    {
+        fprintf(stderr, "Marker file missformatted.\n");
+        return -EINVAL;
+    }
     //markerResults = malloc(regions * sizeof(LikwidResults));
     markerResults = realloc(markerResults, regions * sizeof(LikwidResults));
     if (markerResults == NULL)
@@ -2667,13 +3065,25 @@ int perfmon_readMarkerFile(const char* filename)
     {
         if (strchr(buf,':'))
         {
-            int regionid = 0, groupid = 0;
+            int regionid = 0, groupid = -1;
             char regiontag[100];
+            char* ptr = NULL;
+            char* colonptr = NULL;
             regiontag[0] = '\0';
-            sscanf(buf, "%d:%s-%d", &regionid, regiontag, &groupid);
-            snprintf(regiontag, strlen(buf)-4, "%s", &(buf[2]));
+            ret = sscanf(buf, "%d:%s", &regionid, regiontag);
+
+            ptr = strrchr(regiontag,'-');
+            colonptr = strchr(buf,':');
+            if (ret != 2 || ptr == NULL || colonptr == NULL)
+            {
+                fprintf(stderr, "Line %s not a valid region description\n", buf);
+                continue;
+            }
+            groupid = atoi(ptr+1);
+            snprintf(regiontag, strlen(regiontag)-strlen(ptr)+1, "%s", &(buf[colonptr-buf+1]));
             markerResults[regionid].groupID = groupid;
             markerResults[regionid].tag = bfromcstr(regiontag);
+            nr_regions++;
         }
         else
         {
@@ -2682,7 +3092,12 @@ int perfmon_readMarkerFile(const char* filename)
             double time = 0;
             char remain[1024];
             remain[0] = '\0';
-            sscanf(buf, "%d %d %d %d %lf %d %[^\t\n]", &regionid, &groupid, &cpu, &count, &time, &nevents, remain);
+            ret = sscanf(buf, "%d %d %d %d %lf %d %[^\t\n]", &regionid, &groupid, &cpu, &count, &time, &nevents, remain);
+            if (ret != 7)
+            {
+                fprintf(stderr, "Line %s not a valid region values line\n", buf);
+                continue;
+            }
             if (cpu >= 0)
             {
                 cpuidx = regionCPUs[regionid];
@@ -2710,10 +3125,11 @@ int perfmon_readMarkerFile(const char* filename)
     }
     free(regionCPUs);
     fclose(fp);
-    return 0;
+    return nr_regions;
 }
 
-void perfmon_destroyMarkerResults()
+void
+perfmon_destroyMarkerResults()
 {
     int i = 0, j = 0;
     if (markerResults != NULL)
@@ -2733,3 +3149,4 @@ void perfmon_destroyMarkerResults()
         free(markerResults);
     }
 }
+
diff --git a/src/perfmon_perf.c b/src/perfmon_perf.c
index cfc40ac..87a8c42 100644
--- a/src/perfmon_perf.c
+++ b/src/perfmon_perf.c
@@ -6,8 +6,8 @@
  *      Description:  Example perfmon module for software events through perf_event
  *                    Currently not integrated in perfmon.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
  *      Project:  likwid
@@ -29,6 +29,8 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -42,8 +44,13 @@
 #include <perfmon.h>
 #include <perfmon_perf.h>
 
+/* #####   LOCAL VARIABLES   ############################################## */
+
 static int* cpu_event_fds[MAX_NUM_THREADS] = { NULL };
 
+
+/* #####   EXPORTED VARIABLES   ########################################### */
+
 const uint64_t configList[MAX_SW_EVENTS] = {
     [0x00] = PERF_COUNT_SW_CPU_CLOCK,
     [0x01] = PERF_COUNT_SW_TASK_CLOCK,
@@ -56,9 +63,15 @@ const uint64_t configList[MAX_SW_EVENTS] = {
     [0x08] = PERF_COUNT_SW_EMULATION_FAULTS,
 };
 
+/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
+
 static long
-perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
-                int cpu, int group_fd, unsigned long flags)
+perf_event_open(
+        struct perf_event_attr *hw_event,
+        pid_t pid,
+        int cpu,
+        int group_fd,
+        unsigned long flags)
 {
     int ret;
 
@@ -67,7 +80,10 @@ perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
     return ret;
 }
 
-int init_perf_event(int cpu_id)
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
+int
+init_perf_event(int cpu_id)
 {
     if (cpu_event_fds[cpu_id] == NULL)
     {
@@ -81,7 +97,8 @@ int init_perf_event(int cpu_id)
     return 0;
 }
 
-int setup_perf_event(int cpu_id, PerfmonEvent* event)
+int
+setup_perf_event(int cpu_id, PerfmonEvent* event)
 {
     struct perf_event_attr attr;
     if (event == NULL)
@@ -127,7 +144,8 @@ int setup_perf_event(int cpu_id, PerfmonEvent* event)
     return 0;
 }
 
-int read_perf_event(int cpu_id, uint64_t eventID, uint64_t *data)
+int
+read_perf_event(int cpu_id, uint64_t eventID, uint64_t *data)
 {
     int ret = 0;
     long long tmp = 0;
@@ -148,7 +166,8 @@ int read_perf_event(int cpu_id, uint64_t eventID, uint64_t *data)
     return 0;
 }
 
-int stop_perf_event(int cpu_id, uint64_t eventID)
+int
+stop_perf_event(int cpu_id, uint64_t eventID)
 {
     if ((cpu_event_fds[cpu_id] != NULL) && (cpu_event_fds[cpu_id][eventID] != -1))
     {
@@ -161,7 +180,8 @@ int stop_perf_event(int cpu_id, uint64_t eventID)
     return 0;
 }
 
-int stop_all_perf_event(int cpu_id)
+int
+stop_all_perf_event(int cpu_id)
 {
     if (cpu_event_fds[cpu_id] != NULL)
     {
@@ -176,7 +196,8 @@ int stop_all_perf_event(int cpu_id)
     return 0;
 }
 
-int clear_perf_event(int cpu_id, uint64_t eventID)
+int
+clear_perf_event(int cpu_id, uint64_t eventID)
 {
     if ((cpu_event_fds[cpu_id] != NULL) && (cpu_event_fds[cpu_id][eventID] != -1))
     {
@@ -189,7 +210,8 @@ int clear_perf_event(int cpu_id, uint64_t eventID)
     return 0;
 }
 
-int clear_all_perf_event(int cpu_id)
+int
+clear_all_perf_event(int cpu_id)
 {
     if (cpu_event_fds[cpu_id] != NULL)
     {
@@ -204,7 +226,8 @@ int clear_all_perf_event(int cpu_id)
     return 0;
 }
 
-int start_perf_event(int cpu_id, uint64_t eventID)
+int
+start_perf_event(int cpu_id, uint64_t eventID)
 {
     if ((cpu_event_fds[cpu_id] != NULL) && (cpu_event_fds[cpu_id][eventID] != -1))
     {
@@ -217,7 +240,8 @@ int start_perf_event(int cpu_id, uint64_t eventID)
     return 0;
 }
 
-int start_all_perf_event(int cpu_id)
+int
+start_all_perf_event(int cpu_id)
 {
     if (cpu_event_fds[cpu_id] != NULL)
     {
@@ -232,7 +256,8 @@ int start_all_perf_event(int cpu_id)
     return 0;
 }
 
-int close_perf_event(int cpu_id, uint64_t eventID)
+int
+close_perf_event(int cpu_id, uint64_t eventID)
 {
     if ((cpu_event_fds[cpu_id] != NULL) && (cpu_event_fds[cpu_id][eventID] != -1))
     {
@@ -242,7 +267,8 @@ int close_perf_event(int cpu_id, uint64_t eventID)
     return 0;
 }
 
-int finalize_perf_event(int cpu_id)
+int
+finalize_perf_event(int cpu_id)
 {
     if (cpu_event_fds[cpu_id] != NULL)
     {
@@ -255,6 +281,6 @@ int finalize_perf_event(int cpu_id)
         }
         free(cpu_event_fds[cpu_id]);
     }
-    
     return 0;
 }
+
diff --git a/src/power.c b/src/power.c
index e33695c..94c7ea4 100644
--- a/src/power.c
+++ b/src/power.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Module implementing Intel RAPL interface
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Authors:  Jan Treibig (jt), jan.treibig at gmail.com,
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -28,6 +28,9 @@
  *
  * =======================================================================================
  */
+
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <math.h>
@@ -40,9 +43,9 @@
 
 PowerInfo power_info;
 
-/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
-static int power_initialized = 0;
+/* #####   LOCAL VARIABLES   ############################################## */
 
+static int power_initialized = 0;
 
 /* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
 
@@ -67,6 +70,9 @@ power_init(int cpuId)
     power_info.powerUnit = 0;
     power_info.timeUnit = 0;
     power_info.hasRAPL = 0;
+    power_info.uncoreMinFreq = 0;
+    power_info.uncoreMaxFreq = 0;
+    power_info.perfBias = 0;
 
     switch (cpuid_info.model)
     {
@@ -89,6 +95,9 @@ power_init(int cpuId)
         case HASWELL_M2:
         case SKYLAKE1:
         case SKYLAKE2:
+        case KABYLAKE1:
+        case KABYLAKE2:
+        case XEON_PHI_KNL:
             power_info.hasRAPL = 1;
             break;
         case ATOM_SILVERMONT_C:
@@ -107,8 +116,15 @@ power_init(int cpuId)
     if (!HPMinitialized())
     {
         HPMinit();
-        HPMaddThread(cpuId);
+        err = HPMaddThread(cpuId);
+        if (err != 0)
+        {
+            ERROR_PLAIN_PRINT(Cannot get access to RAPL counters)
+            return err;
+        }
     }
+
+
     if ( power_info.hasRAPL )
     {
         busSpeed = 100.0;
@@ -155,9 +171,42 @@ power_init(int cpuId)
                     }
                 }
             }
-            //TODO: Haswell EP and possibly Broadwell EP support multiple turbo 
-            //      registers besides MSR_TURBO_RATIO_LIMIT:
-            //      MSR_TURBO_RATIO_LIMIT1 and MSR_TURBO_RATIO_LIMIT2
+            if (power_info.turbo.numSteps > 8)
+            {
+                err = HPMread(cpuId, MSR_DEV, MSR_TURBO_RATIO_LIMIT1, &flags);
+                if (!err)
+                {
+                    for (int i=8; i < power_info.turbo.numSteps; i++)
+                    {
+                        if (i < 16)
+                        {
+                            power_info.turbo.steps[i] = busSpeed * (double) field64(flags,i*8, 8);
+                        }
+                        else
+                        {
+                            power_info.turbo.steps[i] = power_info.turbo.steps[15];
+                        }
+                    }
+                }
+            }
+            if (power_info.turbo.numSteps > 16)
+            {
+                err = HPMread(cpuId, MSR_DEV, MSR_TURBO_RATIO_LIMIT2, &flags);
+                if (!err)
+                {
+                    for (int i=16; i < power_info.turbo.numSteps; i++)
+                    {
+                        if (i < 24)
+                        {
+                            power_info.turbo.steps[i] = busSpeed * (double) field64(flags,i*8, 8);
+                        }
+                        else
+                        {
+                            power_info.turbo.steps[i] = power_info.turbo.steps[23];
+                        }
+                    }
+                }
+            }
         }
         else
         {
@@ -182,7 +231,6 @@ power_init(int cpuId)
             {
                 energyUnit = 1.0 * (1 << ((flags >> 8) & 0x1F)) / 1000000;
             }
-            
             for (i = 0; i < NUM_POWER_DOMAINS; i++)
             {
                 power_info.domains[i].energyUnit = energyUnit;
@@ -193,10 +241,12 @@ power_init(int cpuId)
                 power_info.domains[i].maxPower = 0.0;
                 power_info.domains[i].maxTimeWindow = 0.0;
             }
-            
             if ((cpuid_info.model == HASWELL_EP) ||
                 (cpuid_info.model == HASWELL_M1) ||
-                (cpuid_info.model == HASWELL_M2))
+                (cpuid_info.model == HASWELL_M2) ||
+                (cpuid_info.model == BROADWELL_D) ||
+                (cpuid_info.model == BROADWELL_E) ||
+                (cpuid_info.model == XEON_PHI_KNL))
             {
                 power_info.domains[DRAM].energyUnit = 15.3E-6;
             }
@@ -279,6 +329,18 @@ power_init(int cpuId)
             fprintf(stderr,"Cannot gather values from MSR_RAPL_POWER_UNIT, deactivating RAPL support\n");
             power_info.hasRAPL =  0;
         }
+
+        err = HPMread(cpuId, MSR_DEV, MSR_UNCORE_FREQ, &flags);
+        if (err == 0)
+        {
+            power_info.uncoreMinFreq = ((double)((flags >> 8) & 0xFFULL)) * busSpeed;
+            power_info.uncoreMaxFreq = ((double)(flags & 0xFF)) * busSpeed;
+        }
+        err = HPMread(cpuId, MSR_DEV, MSR_ENERGY_PERF_BIAS, &flags);
+        if (err == 0)
+        {
+            power_info.perfBias = flags & 0xF;
+        }
         power_initialized = 1;
         return power_info.hasRAPL;
     }
@@ -290,7 +352,8 @@ power_init(int cpuId)
 }
 
 /* All functions below are experimental and probably don't work */
-int power_perfGet(int cpuId, PowerType domain, uint32_t* status)
+int
+power_perfGet(int cpuId, PowerType domain, uint32_t* status)
 {
     int err = 0;
     *status = 0x0U;
@@ -310,7 +373,8 @@ int power_perfGet(int cpuId, PowerType domain, uint32_t* status)
     return 0;
 }
 
-int power_limitSet(int cpuId, PowerType domain, double power, double time, int doClamping)
+int
+power_limitSet(int cpuId, PowerType domain, double power, double time, int doClamping)
 {
     int err = 0;
     if (domain >= NUM_POWER_DOMAINS)
@@ -340,7 +404,8 @@ int power_limitSet(int cpuId, PowerType domain, double power, double time, int d
     return 0;
 }
 
-int power_limitGet(int cpuId, PowerType domain, double* power, double* time)
+int
+power_limitGet(int cpuId, PowerType domain, double* power, double* time)
 {
     int err = 0;
     *power = 0;
@@ -367,7 +432,8 @@ int power_limitGet(int cpuId, PowerType domain, double* power, double* time)
     return 0;
 }
 
-int power_limitState(int cpuId, PowerType domain)
+int
+power_limitState(int cpuId, PowerType domain)
 {
     int err = 0;
     if (domain >= NUM_POWER_DOMAINS)
@@ -392,7 +458,8 @@ int power_limitState(int cpuId, PowerType domain)
     return 0;
 }
 
-int power_limitActivate(int cpuId, PowerType domain)
+int
+power_limitActivate(int cpuId, PowerType domain)
 {
     int err = 0;
     if (domain >= NUM_POWER_DOMAINS)
@@ -420,7 +487,8 @@ int power_limitActivate(int cpuId, PowerType domain)
     return 0;
 }
 
-int power_limitDectivate(int cpuId, PowerType domain)
+int
+power_limitDectivate(int cpuId, PowerType domain)
 {
     int err = 0;
     uint64_t flags = 0x0ULL;
@@ -444,7 +512,8 @@ int power_limitDectivate(int cpuId, PowerType domain)
     return 0;
 }
 
-int power_policySet(int cpuId, PowerType domain, uint32_t priority)
+int
+power_policySet(int cpuId, PowerType domain, uint32_t priority)
 {
     int err = 0;
     if (domain >= NUM_POWER_DOMAINS)
@@ -464,7 +533,8 @@ int power_policySet(int cpuId, PowerType domain, uint32_t priority)
     return 0;
 }
 
-int power_policyGet(int cpuId, PowerType domain, uint32_t* priority)
+int
+power_policyGet(int cpuId, PowerType domain, uint32_t* priority)
 {
     int err = 0;
     *priority = 0x0U;
@@ -484,8 +554,8 @@ int power_policyGet(int cpuId, PowerType domain, uint32_t* priority)
     return 0;
 }
 
-
-void power_finalize(void)
+void
+power_finalize(void)
 {
     if (power_initialized == 0)
     {
@@ -502,10 +572,14 @@ void power_finalize(void)
     power_info.powerUnit = 0;
     power_info.timeUnit = 0;
     power_info.hasRAPL = 0;
+    power_info.uncoreMinFreq = 0;
+    power_info.uncoreMaxFreq = 0;
     memset(power_info.domains, 0, NUM_POWER_DOMAINS*sizeof(PowerDomain));
 }
 
-PowerInfo_t get_powerInfo(void)
+PowerInfo_t
+get_powerInfo(void)
 {
     return &power_info;
 }
+
diff --git a/src/pthread-overload/Makefile b/src/pthread-overload/Makefile
index eaf6adf..613e595 100644
--- a/src/pthread-overload/Makefile
+++ b/src/pthread-overload/Makefile
@@ -4,8 +4,8 @@
 #
 #      Description:  pthread-overload Makefile
 #
-#      Version:   4.1
-#      Released:  8.8.2016
+#      Version:   <VERSION>
+#      Released:  <DATE>
 #
 #      Author:  Jan Treibig (jt), jan.treibig at gmail.com
 #               Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -33,6 +33,7 @@ include  ../../make/include_$(COMPILER).mk
 include  ../../make/config_checks.mk
 include  ../../make/config_defines.mk
 
+Q         ?= @
 
 TARGET   = $(PINLIB)
 
@@ -43,10 +44,10 @@ endif
 DEFINES  += -DMAX_NUM_THREADS=$(MAX_NUM_THREADS) -D_GNU_SOURCE
 INCLUDES += -I../includes
 LIBS     += -ldl
-CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(INCLUDES) 
+CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(INCLUDES)
 
 all: $(TARGET)
 
 $(TARGET): pthread-overload.c
-	$(CC) -Wl,-soname,$(TARGET).$(VERSION).$(RELEASE) $(CFLAGS) $(ANSI_CFLAGS) $(CPPFLAGS) $(INCLUDES) $(SHARED_CFLAGS) $(SHARED_LFLAGS) -o ../../$(TARGET) pthread-overload.c $(LIBS)
+	$(Q)$(CC) -Wl,-soname,$(TARGET).$(VERSION).$(RELEASE) $(CFLAGS) $(ANSI_CFLAGS) $(CPPFLAGS) $(INCLUDES) $(SHARED_CFLAGS) $(SHARED_LFLAGS) -o ../../$(TARGET) pthread-overload.c $(LIBS)
 
diff --git a/src/pthread-overload/pthread-overload.c b/src/pthread-overload/pthread-overload.c
index 05e74fc..b4c2b28 100644
--- a/src/pthread-overload/pthread-overload.c
+++ b/src/pthread-overload/pthread-overload.c
@@ -6,8 +6,8 @@
  *      Description:  Overloaded library for pthread_create call. 
  *                    Implements pinning of threads together with likwid-pin.
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -93,7 +93,6 @@ pthread_create(pthread_t* thread,
     static uint64_t skipMask = 0x0;
     static int ncpus = 0;
 
-
     /* On first entry: Get Evironment Variable and initialize pin_ids */
     if (ncalled == 0)
     {
diff --git a/src/thermal.c b/src/thermal.c
index 8a46e67..c967e09 100644
--- a/src/thermal.c
+++ b/src/thermal.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Module implementing Intel TM/TM2 interface
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -28,6 +28,8 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <math.h>
@@ -40,17 +42,15 @@
 
 ThermalInfo thermal_info;
 
-/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
-
-
-
 /* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
 
-void thermal_init(int cpuId)
+void
+thermal_init(int cpuId)
 {
     uint64_t flags=0ULL;
     HPMinit();
-    HPMaddThread(cpuId);
+    if (HPMaddThread(cpuId) < 0)
+        fprintf(stderr, "Cannot initialize access to registers on CPU %d\n", cpuId);
 
     if ( cpuid_hasFeature(TM2) )
     {
diff --git a/src/timer.c b/src/timer.c
index 706158d..f75c5d8 100644
--- a/src/timer.c
+++ b/src/timer.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Implementation of timer module
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:  Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,7 +27,9 @@
  *
  * =======================================================================================
  */
+
 /* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -39,8 +41,8 @@
 #include <likwid.h>
 #include <cpuid.h>
 
-/* #####   EXPORTED VARIABLES   ########################################### */
 /* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
+
 static uint64_t baseline = 0ULL;
 static uint64_t cpuClock = 0ULL;
 static uint64_t cyclesClock = 0ULL;
@@ -50,12 +52,10 @@ static int timer_initialized = 0;
 void (*TSTART)(TscCounter*) = NULL;
 void (*TSTOP)(TscCounter*) = NULL;
 
-/* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
-
-
 /* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
 #if defined(__x86_64)
-static void fRDTSC(TscCounter* cpu_c)
+static void
+fRDTSC(TscCounter* cpu_c)
 {
     __asm__ volatile("xor %%eax,%%eax\n\t"           \
     "cpuid\n\t"           \
@@ -66,7 +66,8 @@ static void fRDTSC(TscCounter* cpu_c)
     : : "%eax","%ebx","%ecx","%edx");
 }
 
-static void fRDTSC_CR(TscCounter* cpu_c)
+static void
+fRDTSC_CR(TscCounter* cpu_c)
 {
     __asm__ volatile(   \
     "rdtsc\n\t"           \
@@ -76,7 +77,8 @@ static void fRDTSC_CR(TscCounter* cpu_c)
     : : "%eax","%ebx","%ecx","%edx");
 }
 #ifndef __MIC__
-static void fRDTSCP(TscCounter* cpu_c)
+static void
+fRDTSCP(TscCounter* cpu_c)
 {
     __asm__ volatile(     \
     "rdtscp\n\t"          \
@@ -90,7 +92,8 @@ static void fRDTSCP(TscCounter* cpu_c)
 #endif
 
 #if defined(__i386__)
-static void fRDTSC(TscCounter* cpu_c)
+static void
+fRDTSC(TscCounter* cpu_c)
 {
     uint64_t tmp;
     __asm__ volatile( \
@@ -105,7 +108,8 @@ static void fRDTSC(TscCounter* cpu_c)
     : : "%eax","%ecx","%edx");
 }
 
-static void fRDTSC_CR(TscCounter* cpu_c)
+static void
+fRDTSC_CR(TscCounter* cpu_c)
 {
     __asm__ volatile(     \
     "rdtsc\n\t"           \
@@ -115,7 +119,8 @@ static void fRDTSC_CR(TscCounter* cpu_c)
     : : "%eax","%edx");
 }
 #ifndef __MIC__
-static void fRDTSCP(TscCounter* cpu_c)
+static void
+fRDTSCP(TscCounter* cpu_c)
 {
     uint64_t tmp;
     __asm__ volatile(     \
@@ -130,7 +135,8 @@ static void fRDTSCP(TscCounter* cpu_c)
 }
 #endif
 #endif
-static void _timer_start( TimerData* time )
+static void
+_timer_start( TimerData* time )
 {
 #if defined(__x86_64) || defined(__i386__)
     if (TSTART)
@@ -149,7 +155,8 @@ static void _timer_start( TimerData* time )
 #endif
 }
 
-static void _timer_stop( TimerData* time )
+static void
+_timer_stop( TimerData* time )
 {
 #if defined(__x86_64) || defined(__i386__)
     if (TSTOP)
@@ -167,7 +174,8 @@ static void _timer_stop( TimerData* time )
 #endif
 }
 
-static uint64_t _timer_printCycles( TimerData* time )
+static uint64_t
+_timer_printCycles( const TimerData* time )
 {
     /* clamp to zero if something goes wrong */
     if (((time->stop.int64-baseline) < time->start.int64) ||
@@ -182,7 +190,8 @@ static uint64_t _timer_printCycles( TimerData* time )
 }
 
 /* Return time duration in seconds */
-static double _timer_print( TimerData* time )
+static double
+_timer_print( const TimerData* time )
 {
     uint64_t cycles;
     /* clamp to zero if something goes wrong */
@@ -265,14 +274,14 @@ getCpuSpeed(void)
 
     cpuClock = (uint64_t)   atoi(buff);
     cpuClock *= 1E6;
+    pclose(fpipe);
 #endif
 }
 
-
-
 /* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
 
-void init_sleep()
+void
+init_sleep()
 {
     int status;
     TimerData timer;
@@ -290,8 +299,8 @@ void init_sleep()
     }
 }
 
-
-void timer_init( void )
+void
+timer_init( void )
 {
     uint32_t eax = 0x0,ebx = 0x0,ecx = 0x0,edx = 0x0;
     if (timer_initialized == 1)
@@ -323,7 +332,8 @@ void timer_init( void )
     timer_initialized = 1;
 }
 
-uint64_t timer_printCycles( TimerData* time )
+uint64_t
+timer_printCycles( const TimerData* time )
 {
     if (timer_initialized != 1)
     {
@@ -334,7 +344,8 @@ uint64_t timer_printCycles( TimerData* time )
 }
 
 /* Return time duration in seconds */
-double timer_print( TimerData* time )
+double
+timer_print( const TimerData* time )
 {
     uint64_t cycles;
     if (timer_initialized != 1)
@@ -345,7 +356,8 @@ double timer_print( TimerData* time )
     return _timer_print(time);
 }
 
-uint64_t timer_getCpuClock( void )
+uint64_t
+timer_getCpuClock( void )
 {
     if (timer_initialized != 1)
     {
@@ -355,7 +367,8 @@ uint64_t timer_getCpuClock( void )
     return cpuClock;
 }
 
-uint64_t timer_getCpuClockCurrent( int cpu_id )
+uint64_t
+timer_getCpuClockCurrent( int cpu_id )
 {
     int err;
     uint64_t clock = 0x0ULL;
@@ -385,7 +398,8 @@ uint64_t timer_getCpuClockCurrent( int cpu_id )
     return clock *1E3;
 }
 
-uint64_t timer_getCycleClock( void )
+uint64_t
+timer_getCycleClock( void )
 {
     if (timer_initialized != 1)
     {
@@ -395,7 +409,8 @@ uint64_t timer_getCycleClock( void )
     return cyclesClock;
 }
 
-uint64_t timer_getBaseline( void )
+uint64_t
+timer_getBaseline( void )
 {
     if (timer_initialized != 1)
     {
@@ -405,7 +420,8 @@ uint64_t timer_getBaseline( void )
     return baseline;
 }
 
-void timer_start( TimerData* time )
+void
+timer_start( TimerData* time )
 {
     if (timer_initialized != 1)
     {
@@ -415,8 +431,8 @@ void timer_start( TimerData* time )
     _timer_start(time);
 }
 
-
-void timer_stop( TimerData* time )
+void
+timer_stop( TimerData* time )
 {
     if (timer_initialized != 1)
     {
@@ -426,9 +442,8 @@ void timer_stop( TimerData* time )
     _timer_stop(time);
 }
 
-
-
-int timer_sleep(unsigned long usec)
+int
+timer_sleep(unsigned long usec)
 {
     int status = -1;
     struct timespec req;
@@ -454,8 +469,8 @@ int timer_sleep(unsigned long usec)
     return status;
 }
 
-
-void timer_finalize(void)
+void
+timer_finalize(void)
 {
     if (timer_initialized != 1)
     {
@@ -469,8 +484,10 @@ void timer_finalize(void)
     timer_initialized = 0;
 }
 
-void timer_reset( TimerData* time )
+void
+timer_reset( TimerData* time )
 {
     time->start.int64 = 0;
     time->stop.int64 = 0;
 }
+
diff --git a/src/topology.c b/src/topology.c
index b4ab30b..ad70ab7 100644
--- a/src/topology.c
+++ b/src/topology.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Interface to the topology backends
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Authors:  Jan Treibig (jt), jan.treibig at gmail.com,
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -28,6 +28,9 @@
  *
  * =======================================================================================
  */
+
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <sys/types.h>
@@ -43,13 +46,9 @@
 //#include <strUtil.h>
 #include <configuration.h>
 
+/* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
 
 static int topology_initialized = 0;
-CpuInfo cpuid_info;
-CpuTopology cpuid_topology;
-
-int affinity_thread2tile_lookup[MAX_NUM_THREADS];
-
 static char* pentium_m_b_str = "Intel Pentium M Banias processor";
 static char* pentium_m_d_str = "Intel Pentium M Dothan processor";
 static char* core_duo_str = "Intel Core Duo processor";
@@ -74,11 +73,12 @@ static char* broadwell_str = "Intel Core Broadwell processor";
 static char* broadwell_d_str = "Intel Xeon D Broadwell processor";
 static char* broadwell_ep_str = "Intel Xeon Broadwell EN/EP/EX processor";
 static char* skylake_str = "Intel Skylake processor";
+static char* kabylake_str = "Intel Kabylake processor";
 static char* nehalem_ex_str = "Intel Nehalem EX processor";
 static char* westmere_ex_str = "Intel Westmere EX processor";
 static char* xeon_mp_string = "Intel Xeon MP processor";
 static char* xeon_phi_string = "Intel Xeon Phi (Knights Corner) Coprocessor";
-static char* xeon_phi2_string = "Intel Xeon Phi (Knights Landing) Coprocessor";
+static char* xeon_phi2_string = "Intel Xeon Phi (Knights Landing) (Co)Processor";
 static char* barcelona_str = "AMD Barcelona processor";
 static char* shanghai_str = "AMD Shanghai processor";
 static char* istanbul_str = "AMD Istanbul processor";
@@ -115,45 +115,25 @@ static char* short_ivybridge_ep = "ivybridgeEP";
 static char* short_sandybridge = "sandybridge";
 static char* short_sandybridge_ep = "sandybridgeEP";
 static char* short_skylake = "skylake";
+static char* short_kabylake = "skylake";
 static char* short_phi = "phi";
-static char* short_phi2 = "phi2";
+static char* short_phi2 = "knl";
 static char* short_k8 = "k8";
 static char* short_k10 = "k10";
 static char* short_k15 = "interlagos";
 static char* short_k16 = "kabini";
 static char* short_unknown = "unknown";
 
+/* #####  EXPORTED VARIABLES  ########################################## */
 
+CpuInfo cpuid_info;
+CpuTopology cpuid_topology;
+int affinity_thread2tile_lookup[MAX_NUM_THREADS];
 
-int cpu_count(cpu_set_t* set)
-{
-    uint32_t i;
-    int s = 0;
-    const __cpu_mask *p = set->__bits;
-    const __cpu_mask *end = &set->__bits[sizeof(cpu_set_t) / sizeof (__cpu_mask)];
-
-    while (p < end)
-    {
-        __cpu_mask l = *p++;
-
-        if (l == 0)
-        {
-            continue;
-        }
-
-        for (i=0; i< (sizeof(__cpu_mask)*8); i++)
-        {
-            if (l&(1UL<<i))
-            {
-                s++;
-            }
-        }
-    }
-
-    return s;
-}
+/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
 
-static void initTopologyFile(FILE* file)
+static void
+initTopologyFile(FILE* file)
 {
     size_t items;
     HWThread* hwThreadPool;
@@ -198,8 +178,8 @@ static void initTopologyFile(FILE* file)
     }
 }
 
-
-static int readTopologyFile(const char* filename)
+static int
+readTopologyFile(const char* filename)
 {
     FILE* fp;
     char structure[256];
@@ -516,7 +496,39 @@ static int readTopologyFile(const char* filename)
     return 0;
 }
 
-int topology_setName(void)
+/* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
+int
+cpu_count(cpu_set_t* set)
+{
+    uint32_t i;
+    int s = 0;
+    const __cpu_mask *p = set->__bits;
+    const __cpu_mask *end = &set->__bits[sizeof(cpu_set_t) / sizeof (__cpu_mask)];
+
+    while (p < end)
+    {
+        __cpu_mask l = *p++;
+
+        if (l == 0)
+        {
+            continue;
+        }
+
+        for (i=0; i< (sizeof(__cpu_mask)*8); i++)
+        {
+            if (l&(1UL<<i))
+            {
+                s++;
+            }
+        }
+    }
+    return s;
+}
+
+
+int
+topology_setName(void)
 {
     switch ( cpuid_info.family )
     {
@@ -604,10 +616,12 @@ int topology_setName(void)
                     cpuid_info.short_name = short_broadwell;
                     break;
                 case BROADWELL_D:
+                    cpuid_info.supportUncore = 1;
                     cpuid_info.name = broadwell_d_str;
                     cpuid_info.short_name = short_broadwell_d;
                     break;
                 case BROADWELL_E:
+                    cpuid_info.supportUncore = 1;
                     cpuid_info.name = broadwell_ep_str;
                     cpuid_info.short_name = short_broadwell_ep;
                     break;
@@ -618,7 +632,14 @@ int topology_setName(void)
                     cpuid_info.short_name = short_skylake;
                     break;
 
-                case XEON_PHI2:
+                case KABYLAKE1:
+                case KABYLAKE2:
+                    cpuid_info.name = kabylake_str;
+                    cpuid_info.short_name = short_skylake;
+                    break;
+
+                case XEON_PHI_KNL:
+                    cpuid_info.supportUncore = 1;
                     cpuid_info.name = xeon_phi2_string;
                     cpuid_info.short_name = short_phi2;
                     break;
@@ -783,7 +804,8 @@ int topology_setName(void)
     return EXIT_SUCCESS;
 }
 
-const struct topology_functions topology_funcs = {
+const struct
+topology_functions topology_funcs = {
 #ifndef LIKWID_USE_HWLOC
     .init_cpuInfo = cpuid_init_cpuInfo,
     .init_cpuFeatures = cpuid_init_cpuFeatures,
@@ -837,15 +859,22 @@ void topology_setupTree(void)
         }
 
     }
-    cpuid_topology.numSockets = tree_countChildren(cpuid_topology.topologyTree);
+    i = tree_countChildren(cpuid_topology.topologyTree);
+    if (cpuid_topology.numSockets == 0)
+        cpuid_topology.numSockets = i;
     currentNode = tree_getChildNode(cpuid_topology.topologyTree);
-    cpuid_topology.numCoresPerSocket = tree_countChildren(currentNode);
+    i = tree_countChildren(currentNode);
+    if (cpuid_topology.numCoresPerSocket == 0)
+        cpuid_topology.numCoresPerSocket = i;
     currentNode = tree_getChildNode(currentNode);
-    cpuid_topology.numThreadsPerCore = tree_countChildren(currentNode);
+    i = tree_countChildren(currentNode);
+    if (cpuid_topology.numThreadsPerCore == 0)
+        cpuid_topology.numThreadsPerCore = i;
     return;
 }
 
-int topology_init(void)
+int
+topology_init(void)
 {
     int ret = 0;
     cpu_set_t cpuSet;
@@ -886,8 +915,8 @@ standard_init:
         topology_setName();
         funcs.init_cpuFeatures();
         funcs.init_nodeTopology(cpuSet);
-        topology_setupTree();
         funcs.init_cacheTopology();
+        topology_setupTree();
         sched_setaffinity(0, sizeof(cpu_set_t), &cpuSet);
     }
     else
@@ -917,7 +946,8 @@ standard_init:
 }
 
 
-void topology_finalize(void)
+void
+topology_finalize(void)
 {
     struct topology_functions funcs = topology_funcs;
     if (!topology_initialized)
@@ -978,11 +1008,8 @@ void topology_finalize(void)
     topology_initialized = 0;
 }
 
-
-
-
-
-void print_supportedCPUs (void)
+void
+print_supportedCPUs (void)
 {
     printf("Supported Intel processors:\n");
     printf("\t%s\n",core_2a_str);
@@ -1024,18 +1051,20 @@ void print_supportedCPUs (void)
     printf("\n");
 }
 
-
-
-CpuTopology_t get_cpuTopology(void)
+CpuTopology_t
+get_cpuTopology(void)
 {
     return &cpuid_topology;
 }
 
-CpuInfo_t get_cpuInfo(void)
+CpuInfo_t
+get_cpuInfo(void)
 {
     return &cpuid_info;
 }
-NumaTopology_t get_numaTopology(void)
+
+NumaTopology_t
+get_numaTopology(void)
 {
     return &numa_info;
 }
diff --git a/src/topology_cpuid.c b/src/topology_cpuid.c
index 5fbcea4..1e9a5eb 100644
--- a/src/topology_cpuid.c
+++ b/src/topology_cpuid.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Interface to the cpuid based topology backend
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Authors:  Jan Treibig (jt), jan.treibig at gmail.com,
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -29,31 +29,37 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <sched.h>
 #include <unistd.h>
 
 #include <error.h>
-
 #include <tree.h>
 #include <bitUtil.h>
 #include <tlb-info.h>
 #include <topology.h>
 #include <cpuid.h>
+#include <affinity.h>
 
 /* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
+
 #define MAX_CACHE_LEVELS 4
 
 /* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
-static int largest_function = 0;        
+
+static int largest_function = 0;
 static uint32_t eax, ebx, ecx, edx;
 
 /* Dirty hack to avoid nonull warnings */
 char* (*ownstrcpy)(char *__restrict __dest, const char *__restrict __src);
 
 /* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
-static int intelCpuidFunc_4(CacheLevel** cachePool)
+
+static int
+intelCpuidFunc_4(CacheLevel** cachePool)
 {
     int i;
     int level=0;
@@ -77,7 +83,7 @@ static int intelCpuidFunc_4(CacheLevel** cachePool)
     *cachePool = (CacheLevel*) malloc(maxNumLevels * sizeof(CacheLevel));
     pool = *cachePool;
 
-    for (i=0; i < maxNumLevels; i++) 
+    for (i=0; i < maxNumLevels; i++)
     {
         eax = 0x04;
         ecx = i;
@@ -119,7 +125,7 @@ static int intelCpuidFunc_4(CacheLevel** cachePool)
             }
         }
 
-        /* :WORKAROUND:08/13/2009 08:34:15 AM:jt: For L3 caches the value is sometimes 
+        /* :WORKAROUND:08/13/2009 08:34:15 AM:jt: For L3 caches the value is sometimes
          * too large in here. Ask Intel what is wrong here!
          * Limit threads per Socket then to the maximum possible value.*/
         if(pool[i].threads > (int)
@@ -131,11 +137,11 @@ static int intelCpuidFunc_4(CacheLevel** cachePool)
         }
         pool[i].inclusive = edx&0x2;
     }
-
     return maxNumLevels;
 }
 
-static uint32_t amdGetAssociativity(uint32_t flag)
+static uint32_t
+amdGetAssociativity(uint32_t flag)
 {
     uint32_t asso= 0;
 
@@ -193,13 +199,12 @@ static uint32_t amdGetAssociativity(uint32_t flag)
             break;
     }
     return asso;
-
 }
 
-
 /* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
 
-void cpuid_printTlbTopology()
+void
+cpuid_printTlbTopology()
 {
     int i;
     uint32_t loop = 1;
@@ -208,8 +213,6 @@ void cpuid_printTlbTopology()
     {
         eax = 0x02;
         CPUID(eax, ebx, ecx, edx);
-    
-    
         loop = extractBitField(eax,8,0);
         for(i=1;i<loop;i++)
         {
@@ -280,7 +283,7 @@ void cpuid_printTlbTopology()
         printf("L2ITlb4KAssoc: 0x%x\n",extractBitField(eax,4,12));
         printf("L2ITlb4KAssoc_c: %d\n",amdGetAssociativity(extractBitField(eax,4,12)));
         printf("L2ITlb4KSize: 0x%x\n",extractBitField(eax,12,0));
-    }        
+    }
     return;
 }
 
@@ -294,7 +297,7 @@ cpuid_set_osname(void)
     ownstrcpy = strcpy;
     int i;
 
-    if (NULL != (fp = fopen ("/proc/cpuinfo", "r"))) 
+    if (NULL != (fp = fopen ("/proc/cpuinfo", "r")))
     {
         bstring src = bread ((bNread) fread, fp);
         struct bstrList* tokens = bsplit(src,(char) '\n');
@@ -321,8 +324,8 @@ cpuid_set_osname(void)
     fclose(fp);
 }
 
-
-void cpuid_init_cpuInfo(cpu_set_t cpuSet)
+void
+cpuid_init_cpuInfo(cpu_set_t cpuSet)
 {
     int cpus_in_set = 0;
     cpuid_info.isIntel = 1;
@@ -358,7 +361,8 @@ void cpuid_init_cpuInfo(cpu_set_t cpuSet)
     return;
 }
 
-void cpuid_init_cpuFeatures(void)
+void
+cpuid_init_cpuFeatures(void)
 {
     eax = 0x01;
     CPUID(eax, ebx, ecx, edx);
@@ -490,7 +494,11 @@ void cpuid_init_cpuFeatures(void)
         cpuid_info.featureFlags |= (1<<RDTSCP);
     }
 
-    cpuid_info.perf_version   =  0;
+    cpuid_info.perf_version = 0;
+    cpuid_info.perf_num_ctr = 0;
+    cpuid_info.perf_width_ctr = 0;
+    cpuid_info.perf_num_fixed_ctr = 0;
+    cpuid_info.turbo = 0;
     if( cpuid_info.family == P6_FAMILY && 0x0A <= largest_function)
     {
         eax = 0x0A;
@@ -506,16 +514,13 @@ void cpuid_init_cpuFeatures(void)
         {
             cpuid_info.turbo = 1;
         }
-        else
-        {
-            cpuid_info.turbo = 0;
-        }
     }
 
     return;
 }
 
-void cpuid_init_nodeTopology(cpu_set_t cpuSet)
+void
+cpuid_init_nodeTopology(cpu_set_t cpuSet)
 {
     uint32_t apicId;
     uint32_t bitField;
@@ -529,10 +534,7 @@ void cpuid_init_nodeTopology(cpu_set_t cpuSet)
     int maxNumLogicalProcsPerCore;
     int maxNumCores;
     int width;
-    
     hwThreadPool = (HWThread*) malloc(cpuid_topology.numHWThreads * sizeof(HWThread));
-    
-    
     /* check if 0x0B cpuid leaf is supported */
     if (largest_function >= 0x0B)
     {
@@ -586,6 +588,7 @@ void cpuid_init_nodeTopology(cpu_set_t cpuSet)
                                 currOffset-prevOffset,
                                 prevOffset);
                         hwThreadPool[id].coreId = bitField;
+                        affinity_thread2core_lookup[hwThreadPool[id].apicId] = hwThreadPool[id].coreId;
                         break;
 
                     case 2:  /* Package */
@@ -620,7 +623,6 @@ void cpuid_init_nodeTopology(cpu_set_t cpuSet)
                 ecx = 0;
                 CPUID(eax, ebx, ecx, edx);
                 maxNumCores = extractBitField(eax,6,26)+1;
-
                 maxNumLogicalProcsPerCore = maxNumLogicalProcs/maxNumCores;
 
                 for (uint32_t i=0; i<  cpuid_topology.numHWThreads; i++)
@@ -640,21 +642,22 @@ void cpuid_init_nodeTopology(cpu_set_t cpuSet)
                      * */
                     hwThreadPool[id].threadId =
                         extractBitField(hwThreadPool[id].apicId,
-                                getBitFieldWidth(maxNumLogicalProcsPerCore),0); 
+                                getBitFieldWidth(maxNumLogicalProcsPerCore),0);
 
-                    /* CoreId is extracted from th apicId using the bitWidth 
+                    /* CoreId is extracted from th apicId using the bitWidth
                      * of the number of logical processors as offset and the
                      * bit width of the number of cores as width
                      * */
                     hwThreadPool[id].coreId =
                         extractBitField(hwThreadPool[id].apicId,
                                 getBitFieldWidth(maxNumCores),
-                                getBitFieldWidth(maxNumLogicalProcsPerCore)); 
+                                getBitFieldWidth(maxNumLogicalProcsPerCore));
 
                     hwThreadPool[id].packageId =
                         extractBitField(hwThreadPool[id].apicId,
                                 8-getBitFieldWidth(maxNumLogicalProcs),
                                 getBitFieldWidth(maxNumLogicalProcs));
+
                     DEBUG_PRINT(DEBUGLEV_DEVELOP, I[%d] ID[%d] APIC[%d] T[%d] C[%d] P [%d], i, id,
                                     hwThreadPool[id].apicId, hwThreadPool[id].threadId,
                                     hwThreadPool[id].coreId, hwThreadPool[id].packageId);
@@ -691,16 +694,16 @@ void cpuid_init_nodeTopology(cpu_set_t cpuSet)
                      * */
                     hwThreadPool[id].threadId =
                         extractBitField(hwThreadPool[i].apicId,
-                                getBitFieldWidth(maxNumLogicalProcsPerCore),0); 
+                                getBitFieldWidth(maxNumLogicalProcsPerCore),0);
 
-                    /* CoreId is extracted from th apicId using the bitWidth 
+                    /* CoreId is extracted from th apicId using the bitWidth
                      * of the number of logical processors as offset and the
                      * bit width of the number of cores as width
                      * */
                     hwThreadPool[id].coreId =
                         extractBitField(hwThreadPool[i].apicId,
                                 getBitFieldWidth(maxNumCores),
-                                0); 
+                                0);
 
                     hwThreadPool[id].packageId =
                         extractBitField(hwThreadPool[i].apicId,
@@ -734,7 +737,6 @@ void cpuid_init_nodeTopology(cpu_set_t cpuSet)
                 maxNumLogicalProcs =  extractBitField(ebx,8,16);
                 maxNumCores = extractBitField(ecx,8,0)+1;
 
-
                 for (uint32_t i=0; i<  cpuid_topology.numHWThreads; i++)
                 {
                     int id;
@@ -751,7 +753,7 @@ void cpuid_init_nodeTopology(cpu_set_t cpuSet)
 
                     hwThreadPool[id].coreId =
                         extractBitField(hwThreadPool[i].apicId,
-                                width, 0); 
+                                width, 0);
                     hwThreadPool[id].packageId =
                         extractBitField(hwThreadPool[i].apicId,
                                 (8-width), width);
@@ -764,19 +766,18 @@ void cpuid_init_nodeTopology(cpu_set_t cpuSet)
         }
     }
     cpuid_topology.threadPool = hwThreadPool;
-    
     return;
 }
 
-
-void cpuid_init_cacheTopology(void)
+void
+cpuid_init_cacheTopology(void)
 {
     int maxNumLevels=0;
     int id=0;
     CacheLevel* cachePool = NULL;
     CacheType type = DATACACHE;
 
-    switch ( cpuid_info.family ) 
+    switch ( cpuid_info.family )
     {
         case MIC_FAMILY:
 
@@ -792,7 +793,6 @@ void cpuid_init_cacheTopology(void)
             }
 
             break;
-
         case K8_FAMILY:
             maxNumLevels = 2;
             cachePool = (CacheLevel*) malloc(maxNumLevels * sizeof(CacheLevel));
@@ -816,7 +816,7 @@ void cpuid_init_cacheTopology(void)
             CPUID(eax, ebx, ecx, edx);
             cachePool[1].level = 2;
             cachePool[1].type = UNIFIEDCACHE;
-            cachePool[1].associativity = 
+            cachePool[1].associativity =
                 amdGetAssociativity(extractBitField(ecx,4,12));
             cachePool[1].lineSize = extractBitField(ecx,8,0);
             cachePool[1].size =  extractBitField(ecx,16,16) * 1024;
@@ -829,8 +829,6 @@ void cpuid_init_cacheTopology(void)
             cachePool[1].inclusive = 1;
 
             break;
-
-
         case K10_FAMILY:
             /* FIXME: Adds one level for the instruction cache on Intel
              * This fixes the level for the cores
@@ -894,7 +892,6 @@ void cpuid_init_cacheTopology(void)
             cachePool[2].inclusive = 1;
 
             break;
-
         case K16_FAMILY:
 
         case K15_FAMILY:
@@ -925,15 +922,13 @@ void cpuid_init_cacheTopology(void)
                 id++;
             }
             break;
-
         default:
             ERROR_PLAIN_PRINT(Processor is not supported);
             break;
     }
-    
 
     cpuid_topology.numCacheLevels = maxNumLevels;
     cpuid_topology.cacheLevels = cachePool;
-    
     return;
 }
+
diff --git a/src/topology_hwloc.c b/src/topology_hwloc.c
index 5111faa..13991f1 100644
--- a/src/topology_hwloc.c
+++ b/src/topology_hwloc.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Interface to the hwloc based topology backend
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Authors:  Thomas Roehl (tr), thomas.roehl at googlemail.com
  *
@@ -28,29 +28,34 @@
  *
  * =======================================================================================
  */
- 
+
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <error.h>
 
 #include <topology.h>
+#include <affinity.h>
 #ifdef LIKWID_USE_HWLOC
 #include <hwloc.h>
 #include <topology_hwloc.h>
 #endif
 
-hwloc_topology_t hwloc_topology = NULL;
-
-
-/* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
+/* #####  EXPORTED VARIABLESE   ########################################### */
 
-/* #####   VARIABLES  -  LOCAL TO THIS SOURCE FILE   ###################### */
-
-/* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
+hwloc_topology_t hwloc_topology = NULL;
 
 /* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
+
 #ifdef LIKWID_USE_HWLOC
-int likwid_hwloc_record_objs_of_type_below_obj(hwloc_topology_t t, hwloc_obj_t obj, hwloc_obj_type_t type, int* index, uint32_t **list)
+int
+likwid_hwloc_record_objs_of_type_below_obj(
+        hwloc_topology_t t,
+        hwloc_obj_t obj,
+        hwloc_obj_type_t type,
+        int* index,
+        uint32_t **list)
 {
     int i;
     int count = 0;
@@ -73,7 +78,8 @@ int likwid_hwloc_record_objs_of_type_below_obj(hwloc_topology_t t, hwloc_obj_t o
     return count;
 }
 
-void hwloc_init_cpuInfo(cpu_set_t cpuSet)
+void
+hwloc_init_cpuInfo(cpu_set_t cpuSet)
 {
     int i;
     hwloc_obj_t obj;
@@ -120,15 +126,16 @@ void hwloc_init_cpuInfo(cpu_set_t cpuSet)
     return;
 }
 
-void hwloc_init_nodeTopology(cpu_set_t cpuSet)
+void
+hwloc_init_nodeTopology(cpu_set_t cpuSet)
 {
-    HWThread* hwThreadPool;
-    int maxNumLogicalProcs;
-    int maxNumLogicalProcsPerCore;
-    int maxNumCores;
-    int maxNumSockets;
-    int maxNumCoresPerSocket;
-    hwloc_obj_t obj;
+    HWThread* hwThreadPool = NULL;
+    int maxNumLogicalProcs = 0;
+    int maxNumLogicalProcsPerCore = 0;
+    int maxNumCores = 0;
+    int maxNumSockets = 0;
+    int maxNumCoresPerSocket = 0;
+    hwloc_obj_t obj = NULL;
     int poolsize = 0;
     int nr_sockets = 1;
     int id = 0;
@@ -199,50 +206,70 @@ void hwloc_init_nodeTopology(cpu_set_t cpuSet)
             {
                 hwThreadPool[id].coreId = 0;
                 hwThreadPool[id].packageId = 0;
+                affinity_thread2core_lookup[hwThreadPool[id].apicId] = hwThreadPool[id].coreId;
                 continue;
             }
-            hwThreadPool[id].coreId = obj->os_index;
         }
-        else
+        if (skip)
         {
-            hwThreadPool[id].coreId = hwThreadPool[id].apicId % maxNumCoresPerSocket;
+            hwThreadPool[id].coreId = 0;
+            hwThreadPool[id].packageId = 0;
+            continue;
         }
-        if (maxNumSockets > 1)
+        hwThreadPool[id].coreId = obj->logical_index;
+#if defined(__x86_64) || defined(__i386__)
+        if (maxNumLogicalProcsPerCore == 1 && cpuid_info.isIntel == 0)
         {
-            while (obj->type != socket_type) {
-                obj = obj->parent;
-                if (!obj)
+            if (id == 0)
+            {
+                hwThreadPool[id].coreId = hwThreadPool[id].apicId;
+            }
+            else
+            {
+                if (hwThreadPool[id].apicId == hwThreadPool[id-1].apicId + 1 &&
+                    hwThreadPool[id].packageId == hwThreadPool[id-1].packageId)
                 {
-                    skip = 1;
-                    break;
+                    hwThreadPool[id].coreId = hwThreadPool[id].apicId % maxNumCoresPerSocket;
+                }
+                else
+                {
+                    hwThreadPool[id].coreId = hwThreadPool[id].apicId;
                 }
             }
-            if (skip)
+        }
+#endif
+        affinity_thread2core_lookup[hwThreadPool[id].apicId] = hwThreadPool[id].coreId;
+        while (obj->type != socket_type) {
+            obj = obj->parent;
+            if (!obj)
             {
-                hwThreadPool[id].packageId = 0;
-                continue;
+                skip = 1;
+                break;
             }
-            hwThreadPool[id].packageId = obj->os_index;
         }
-        else
+        if (skip)
         {
             hwThreadPool[id].packageId = 0;
+            continue;
         }
+        hwThreadPool[id].packageId = obj->os_index;
         DEBUG_PRINT(DEBUGLEV_DEVELOP, HWLOC Thread Pool PU %d Thread %d Core %d Socket %d inCpuSet %d,
-                            hwThreadPool[i].apicId,
-                            hwThreadPool[i].threadId,
-                            hwThreadPool[i].coreId,
-                            hwThreadPool[i].packageId,
-                            hwThreadPool[i].inCpuSet)
+                            hwThreadPool[id].apicId,
+                            hwThreadPool[id].threadId,
+                            hwThreadPool[id].coreId,
+                            hwThreadPool[id].packageId,
+                            hwThreadPool[id].inCpuSet)
     }
 
     cpuid_topology.threadPool = hwThreadPool;
-
+    cpuid_topology.numThreadsPerCore = maxNumLogicalProcsPerCore;
+    cpuid_topology.numCoresPerSocket = maxNumCoresPerSocket;
+    cpuid_topology.numSockets = maxNumSockets;
     return;
 }
 
-
-void hwloc_init_cacheTopology(void)
+void
+hwloc_init_cacheTopology(void)
 {
     int maxNumLevels=0;
     int id=0;
@@ -263,7 +290,6 @@ void hwloc_init_cacheTopology(void)
     /* Start at the bottom of the tree to get all cache levels in order */
     depth = likwid_hwloc_topology_get_depth(hwloc_topology);
     id = 0;
-    
     for(d=depth-1;d >= 0; d--)
     {
         /* We only need caches, so skip other levels */
@@ -271,6 +297,14 @@ void hwloc_init_cacheTopology(void)
         {
             continue;
         }
+        cachePool[id].level = 0;
+        cachePool[id].type = NOCACHE;
+        cachePool[id].associativity = 0;
+        cachePool[id].lineSize = 0;
+        cachePool[id].size = 0;
+        cachePool[id].sets = 0;
+        cachePool[id].inclusive = 0;
+        cachePool[id].threads = 0;
         /* Get the cache object */
         obj = likwid_hwloc_get_obj_by_depth(hwloc_topology, d, 0);
         /* All caches have this attribute, so safe to access */
@@ -304,7 +338,7 @@ void hwloc_init_cacheTopology(void)
         /* Count all HWThreads below the current cache */
         cachePool[id].threads = likwid_hwloc_record_objs_of_type_below_obj(
                         hwloc_topology, obj, HWLOC_OBJ_PU, NULL, NULL);
-
+#if defined(__x86_64) || defined(__i386__)
         while (!(info = likwid_hwloc_obj_get_info_by_name(obj, "inclusiveness")) && obj->next_cousin)
         {
             obj = obj->next_cousin; // If some PU/core are not bindable because of cgroup, hwloc may not know the inclusiveness of some of their cache.
@@ -313,11 +347,18 @@ void hwloc_init_cacheTopology(void)
         {
             cachePool[id].inclusive = info[0]=='t';
         }
-        else
+        else if (cpuid_info.isIntel)
         {
-            ERROR_PLAIN_PRINT(Processor is not supported);
+            DEBUG_PLAIN_PRINT(DEBUGLEV_ONLY_ERROR, Processor is not supported);
             break;
         }
+#endif
+#if defined(_ARCH_PPC)
+        cachePool[id].inclusive = 0;
+#endif
+        DEBUG_PRINT(DEBUGLEV_DEVELOP, HWLOC Cache Pool ID %d Level %d Size %d,
+                                      id, cachePool[id].level,
+                                      cachePool[id].size);
         id++;
     }
 
@@ -326,17 +367,17 @@ void hwloc_init_cacheTopology(void)
     return;
 }
 
-void hwloc_close(void)
+void
+hwloc_close(void)
 {
     if (hwloc_topology)
     {
         hwloc_topology_destroy(hwloc_topology);
     }
 }
-
 #else
-
-void hwloc_init_cpuInfo(void)
+void
+hwloc_init_cpuInfo(void)
 {
     return;
 }
diff --git a/src/topology_proc.c b/src/topology_proc.c
index 5e09485..530f8e2 100644
--- a/src/topology_proc.c
+++ b/src/topology_proc.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Interface to the procfs/sysfs based topology backend
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Authors:  Jan Treibig (jt), jan.treibig at gmail.com,
  *                Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -29,18 +29,27 @@
  * =======================================================================================
  */
 
+/* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <topology_proc.h>
+#include <affinity.h>
 #include <cpuid.h>
 
-/* #####   MACROS  -  LOCAL TO THIS SOURCE FILE   ######################### */
 /* #####   FUNCTION DEFINITIONS  -  LOCAL TO THIS SOURCE FILE   ########### */
-static int get_cpu_perf_data(void)
+
+static int
+get_cpu_perf_data(void)
 {
     uint32_t eax = 0x0U, ebx = 0x0U, ecx = 0x0U, edx = 0x0U;
     int largest_function = 0;
     eax = 0x00;
     CPUID(eax, ebx, ecx, edx);
     largest_function = eax;
+    cpuid_info.perf_version = 0;
+    cpuid_info.perf_num_ctr = 0;
+    cpuid_info.perf_width_ctr = 0;
+    cpuid_info.perf_num_fixed_ctr = 0;
+    cpuid_info.turbo = 0;
     if (cpuid_info.family == P6_FAMILY && 0x0A <= largest_function)
     {
         eax = 0x0A;
@@ -64,7 +73,8 @@ static int get_cpu_perf_data(void)
     return 0;
 }
 
-int get_listPosition(int ownid, bstring list)
+static int
+get_listPosition(int ownid, bstring list)
 {
     bstring ownStr = bformat("%d",ownid);
     struct bstrList* tokens = bsplit(list,(char) ',');
@@ -80,7 +90,8 @@ int get_listPosition(int ownid, bstring list)
     return -1;
 }
 
-int fillList(int* outList, int outOffset, bstring list)
+static int
+fillList(int* outList, int outOffset, bstring list)
 {
     int current = 0;
     int (*ownatoi)(const char*);
@@ -108,7 +119,6 @@ int fillList(int* outList, int outOffset, bstring list)
                     {
                         outList[outOffset+current] = j;
                     }
-                    
                     current++;
                 }
             }
@@ -119,7 +129,8 @@ int fillList(int* outList, int outOffset, bstring list)
     return current;
 }
 
-static int readCacheInclusiveIntel(int level)
+static int
+readCacheInclusiveIntel(int level)
 {
     uint32_t eax = 0x0U, ebx = 0x0U, ecx = 0x0U, edx = 0x0U;
     eax = 0x04;
@@ -138,7 +149,9 @@ static int readCacheInclusiveAMD(int level)
 }
 
 /* #####   FUNCTION DEFINITIONS  -  EXPORTED FUNCTIONS   ################## */
-void proc_init_cpuInfo(cpu_set_t cpuSet)
+
+void
+proc_init_cpuInfo(cpu_set_t cpuSet)
 {
     int i = 0;
     int HWthreads = 0;
@@ -164,7 +177,7 @@ void proc_init_cpuInfo(cpu_set_t cpuSet)
     cpuid_topology.numHWThreads = 0;
     cpuid_info.osname = malloc(MAX_MODEL_STRING_LENGTH * sizeof(char));
 
-    if (NULL != (fp = fopen ("/proc/cpuinfo", "r"))) 
+    if (NULL != (fp = fopen ("/proc/cpuinfo", "r")))
     {
         bstring src = bread ((bNread) fread, fp);
         struct bstrList* tokens = bsplit(src,(char) '\n');
@@ -221,7 +234,8 @@ void proc_init_cpuInfo(cpu_set_t cpuSet)
     return;
 }
 
-void proc_init_cpuFeatures(void)
+void
+proc_init_cpuFeatures(void)
 {
     int ret;
     FILE* file;
@@ -391,9 +405,8 @@ void proc_init_cpuFeatures(void)
     return;
 }
 
-
-
-void proc_init_nodeTopology(cpu_set_t cpuSet)
+void
+proc_init_nodeTopology(cpu_set_t cpuSet)
 {
     HWThread* hwThreadPool;
     FILE *fp;
@@ -420,6 +433,7 @@ void proc_init_nodeTopology(cpu_set_t cpuSet)
         {
             bstring src = bread ((bNread) fread, fp);
             hwThreadPool[i].coreId = ownatoi(bdata(src));
+            affinity_thread2core_lookup[hwThreadPool[i].apicId] = hwThreadPool[i].coreId;
             fclose(fp);
         }
         bdestroy(file);
@@ -451,7 +465,8 @@ void proc_init_nodeTopology(cpu_set_t cpuSet)
     return;
 }
 
-void proc_init_cacheTopology(void)
+void
+proc_init_cacheTopology(void)
 {
     FILE *fp;
     CacheLevel* cachePool = NULL;
diff --git a/src/tree.c b/src/tree.c
index e60c9f9..836444d 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -5,8 +5,8 @@
  *
  *      Description:  Module implementing a tree data structure
  *
- *      Version:   4.1
- *      Released:  8.8.2016
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
  *
  *      Author:   Jan Treibig (jt), jan.treibig at gmail.com
  *      Project:  likwid
@@ -27,7 +27,9 @@
  *
  * =======================================================================================
  */
+
 /* #####   HEADER FILE INCLUDES   ######################################### */
+
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -35,6 +37,7 @@
 #include <tree.h>
 
 /* #####   FUNCTION DEFINITIONS  -  INTERNAL FUNCTIONS   ################## */
+
 void _tree_destroy(TreeNode* nodePtr)
 {
     if (nodePtr == NULL)
@@ -100,7 +103,6 @@ tree_print(TreeNode* nodePtr)
   }
 }
 
-
 void
 tree_destroy(TreeNode* nodePtr)
 {
@@ -285,5 +287,3 @@ tree_getNextNode(TreeNode* nodePtr)
     return nodePtr->rlink;
 }
 
-
-
diff --git a/test/Makefile b/test/Makefile
index 1209136..6442444 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -13,50 +13,50 @@ ICPC_AVAILABLE=$(shell /bin/bash -c "which icpc | wc -l")
 TBB_AVAILABLE=$(shell /bin/bash -c "ldconfig -v 2>/dev/null | grep libtbb.so | wc -l")
 
 streamGCC: stream.c
-	gcc -O3 -std=c99 $(LIKWID_INCLUDE) $(LIKWID_DEFINES) -fopenmp  -o $@  stream.c $(LIKWID_LIB) -lm
+	gcc -O3 -std=c99 $(LIKWID_INCLUDE) $(LIKWID_DEFINES) -fopenmp  -o $@  stream.c $(LIKWID_LIB) -lm -llikwid
 
 streamAPIGCC: stream-API.c
-	gcc -O3 -std=c99 $(LIKWID_INCLUDE) -fopenmp -ftree-vectorize -ffast-math -o $@  stream-API.c $(LIKWID_LIB) -lm
+	gcc -O3 -std=c99 $(LIKWID_INCLUDE) -fopenmp -ftree-vectorize -ffast-math -o $@  stream-API.c $(LIKWID_LIB) -lm -llikwid
 
 serial: serial.c
-	gcc -O3 -std=c99 $(LIKWID_INCLUDE) $(LIKWID_DEFINES) -o $@  serial.c $(LIKWID_LIB) -lm
+	gcc -O3 -std=c99 $(LIKWID_INCLUDE) $(LIKWID_DEFINES) -o $@  serial.c $(LIKWID_LIB) -lm -llikwid
 
 test-likwidAPI: test-likwidAPI.c
-	gcc -O3 -std=c99 $(LIKWID_INCLUDE) $(LIKWID_DEFINES) -o $@  test-likwidAPI.c $(LIKWID_LIB) -lm
+	gcc -O3 -std=c99 $(LIKWID_INCLUDE) $(LIKWID_DEFINES) -o $@  test-likwidAPI.c $(LIKWID_LIB) -lm -llikwid
 
 test-msr-access: test-msr-access.c
 	gcc -o $@  test-msr-access.c
 
 streamICC: stream.c
-	if [ $(ICC_AVAILABLE) -ne 0 ]; then icc -O3 -xHost -std=c99 $(LIKWID_INCLUDES) -openmp  -o $@  $(LIKWID_DEFINES) stream.c $(LIKWID_LIB) -lm; fi
+	if [ $(ICC_AVAILABLE) -ne 0 ]; then icc -O3 -xHost -std=c99 $(LIKWID_INCLUDES) -openmp  -o $@  $(LIKWID_DEFINES) stream.c $(LIKWID_LIB) -lm -llikwid; fi
 
 streamGCC_C11: stream.cc
-	@if [ $(GCC_C11_SUPPORT_MAJOR) -eq 4  -a  $(GCC_C11_SUPPORT_MINOR) -gt 8 ]; then g++ -O3 -std=c++11 -pthread -o $@ $(LIKWID_DEFINES) stream.cc $(LIKWID_LIB) -lm; fi
-	@if [ $(GCC_C11_SUPPORT_MAJOR) -gt 4 ]; then g++ -O3 -std=c++11 -pthread -o $@ $(LIKWID_DEFINES) stream.cc $(LIKWID_LIB) -lm; fi
+	@if [ $(GCC_C11_SUPPORT_MAJOR) -eq 4  -a  $(GCC_C11_SUPPORT_MINOR) -gt 8 ]; then g++ -O3 -std=c++11 -pthread -o $@ $(LIKWID_DEFINES) stream.cc $(LIKWID_LIB) -lm -llikwid; fi
+	@if [ $(GCC_C11_SUPPORT_MAJOR) -gt 4 ]; then g++ -O3 -std=c++11 -pthread -o $@ $(LIKWID_DEFINES) stream.cc $(LIKWID_LIB) -lm -llikwid; fi
 
 streamICC_C11: stream.cc
-	@if [ $(ICPC_AVAILABLE) -ne 0 ]; then icpc -restrict -O3 -std=c++11 -pthread -o $@ $(LIKWID_DEFINES) stream.cc $(LIKWID_LIB) -lm; fi
+	@if [ $(ICPC_AVAILABLE) -ne 0 ]; then icpc -restrict -O3 -std=c++11 -pthread -o $@ $(LIKWID_DEFINES) stream.cc $(LIKWID_LIB) -lm -llikwid; fi
 
 testmarker-cnt: testmarker-cnt.c
-	gcc -O3 -std=c99  $(LIKWID_INCLUDES) -fopenmp $(LIKWID_DEFINES) -o $@ testmarker-cnt.c $(LIKWID_LIB) -lm
+	gcc -O3 -std=c99  $(LIKWID_INCLUDES) -fopenmp $(LIKWID_DEFINES) -o $@ testmarker-cnt.c $(LIKWID_LIB) -lm -llikwid
 
 testmarker-omp: testmarker-omp.c
-	gcc -O3 -std=c99  $(LIKWID_INCLUDES) -fopenmp $(LIKWID_DEFINES) -o $@ testmarker-omp.c $(LIKWID_LIB)
+	gcc -O3 -std=c99  $(LIKWID_INCLUDES) -fopenmp $(LIKWID_DEFINES) -o $@ testmarker-omp.c $(LIKWID_LIB) -llikwid
 
 testmarkerF90: chaos.F90
-	ifort $(LIKWID_INCLUDES) $(LIKWID_DEFINES) -O3  -o $@ chaos.F90 $(LIKWID_LIB) -lpthread
+	ifort $(LIKWID_INCLUDES) $(LIKWID_DEFINES) -O3  -o $@ chaos.F90 $(LIKWID_LIB) -lpthread -llikwid
 
 test-mpi: MPI_pin_test.c$
 	mpicc -O2 -fopenmp -D_GNU_SOURCE  -o $@ MPI_pin_test.c
 
 stream_cilk: stream_cilk.c
-	@if [ $(ICC_AVAILABLE) -ne 0 ]; then icc -O3 $(LIKWID_DEFINES) $(LIKWID_INCLUDES) -o $@ stream_cilk.c $(LIKWID_LIB); fi
+	@if [ $(ICC_AVAILABLE) -ne 0 ]; then icc -O3 $(LIKWID_DEFINES) $(LIKWID_INCLUDES) -o $@ stream_cilk.c $(LIKWID_LIB) -llikwid; fi
 
 testTBBGCC:
-	@if [ $(TBB_AVAILABLE) -ne 0 ]; then g++ -O3 $(LIKWID_DEFINES) $(LIKWID_INCLUDES) -o $@ testTBB.cc -ltbb $(LIKWID_LIB); fi
+	@if [ $(TBB_AVAILABLE) -ne 0 ]; then g++ -O3 $(LIKWID_DEFINES) $(LIKWID_INCLUDES) -o $@ testTBB.cc -ltbb $(LIKWID_LIB) -llikwid; fi
 
 testTBBICC:
-	@if [ $(TBB_AVAILABLE) -ne 0 -a $(ICPC_AVAILABLE) -ne 0 ]; then icpc -O3 $(LIKWID_DEFINES) $(LIKWID_INCLUDES) -o $@ testTBB.cc -ltbb $(LIKWID_LIB); else echo "Either TBB or ICPC missing"; fi
+	@if [ $(TBB_AVAILABLE) -ne 0 -a $(ICPC_AVAILABLE) -ne 0 ]; then icpc -O3 $(LIKWID_DEFINES) $(LIKWID_INCLUDES) -o $@ testTBB.cc -ltbb $(LIKWID_LIB) -llikwid; else echo "Either TBB or ICPC missing"; fi
 
 .PHONY: clean streamGCC streamICC streamGCC_C11 streamICC_C11 testmarker-cnt testmarker-omp testmarkerF90 test-mpi stream_cilk serial test-likwidAPI streamAPIGCC test-msr-access testTBBGCC testTBBICC
 
diff --git a/test/accuracy/TESTS/HBM.txt b/test/accuracy/TESTS/HBM.txt
new file mode 100644
index 0000000..71288a4
--- /dev/null
+++ b/test/accuracy/TESTS/HBM.txt
@@ -0,0 +1,58 @@
+REGEX_BENCH MByte\/s:\s+([0-9]+)
+REGEX_PERF \|\s+Memory bandwidth \[MBytes\/s\]\s+\|\s+([0-9\.e\+\-]+)
+
+TEST load
+RUNS 10
+WA_FACTOR 1.0
+VARIANT 218725kB 1000
+VARIANT 426801kB 1000
+VARIANT 634877kB 1000
+VARIANT 842953kB 1000
+
+TEST store
+RUNS 10
+WA_FACTOR 2.0
+VARIANT 218725kB 1000
+VARIANT 426801kB 1000
+VARIANT 634877kB 1000
+VARIANT 842953kB 1000
+
+TEST copy
+RUNS 10
+WA_FACTOR 1.5
+VARIANT 218725kB 1000
+VARIANT 426801kB 1000
+VARIANT 634877kB 1000
+VARIANT 842953kB 1000
+
+TEST stream
+RUNS 10
+WA_FACTOR 1.3333
+VARIANT 218725kB 1000
+VARIANT 426801kB 1000
+VARIANT 634877kB 1000
+VARIANT 842953kB 1000
+
+TEST triad
+RUNS 10
+WA_FACTOR 1.25
+VARIANT 218725kB 1000
+VARIANT 426801kB 1000
+VARIANT 634877kB 1000
+VARIANT 842953kB 1000
+
+TEST daxpy
+RUNS 10
+WA_FACTOR 1.0
+VARIANT 218725kB 1000
+VARIANT 426801kB 1000
+VARIANT 634877kB 1000
+VARIANT 842953kB 1000
+
+TEST ddot
+RUNS 10
+WA_FACTOR 1.0
+VARIANT 218725kB 1000
+VARIANT 426801kB 1000
+VARIANT 634877kB 1000
+VARIANT 842953kB 1000
diff --git a/test/check_group_files.py b/test/check_group_files.py
new file mode 100755
index 0000000..a7ce85a
--- /dev/null
+++ b/test/check_group_files.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python
+
+# =======================================================================================
+#
+#      Filename:  check_group_files.py
+#
+#      Description:  Basic checks for performance group files
+#
+#      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
+#      Project:  likwid
+#
+#      Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+#
+#      This program is free software: you can redistribute it and/or modify it under
+#      the terms of the GNU General Public License as published by the Free Software
+#      Foundation, either version 3 of the License, or (at your option) any later
+#      version.
+#
+#      This program is distributed in the hope that it will be useful, but WITHOUT ANY
+#      WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+#      PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+#      You should have received a copy of the GNU General Public License along with
+#      this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# =======================================================================================
+
+import sys, os, re, glob, os.path, subprocess
+
+SRCPATH="/home/rrze/unrz/unrz139/Work/likwid"
+GROUPPATH=os.path.join(SRCPATH, "groups")
+EVENTHEADERPATH=os.path.join(SRCPATH, "src/includes")
+PERFCTR="likwid-perfctr"
+
+def get_valid_shorts():
+    l = os.listdir(GROUPPATH)
+    return l
+
+
+def get_local_arch():
+    p = subprocess.Popen("%s -i" % PERFCTR, stdout=subprocess.PIPE, shell=True)
+    stdoutdata, stderrdata = p.communicate()
+    if p.returncode == 0:
+        for l in stdoutdata.split("\n"):
+            if l.startswith("CPU short:"):
+                return re.split("\s+", l)[-1]
+
+
+def get_all_events_local_arch():
+    events = {}
+    p = subprocess.Popen("%s -e" % PERFCTR, stdout=subprocess.PIPE, shell=True)
+    stdoutdata, stderrdata = p.communicate()
+    if p.returncode == 0:
+        parse = False
+
+        for l in stdoutdata.split("\n"):
+            if l.startswith("Event tags"):
+                parse = True
+                continue
+            if parse:
+                if not l.strip(): continue
+                llist = re.split(",\s+", l)
+                events[llist[0]] = llist[-1]
+    return events
+
+def get_all_events_given_arch(arch):
+    events = {}
+    efile = os.path.join(EVENTHEADERPATH, "perfmon_%s_events.txt" % arch)
+    if os.path.exists(efile):
+        f = open(efile, "r")
+        raw = f.read().strip().split("\n")
+        f.close()
+        limit = None
+        for l in raw:
+            if l.startswith("EVENT_"):
+                limit = re.split("\s+", l)[-1]
+            if l.startswith("UMASK_"):
+                e = re.match("UMASK_([\w\d_]+)", l)
+                if e:
+                    events[e.group(1)] = limit
+    return events
+
+def get_all_counters_local_arch():
+    counters = []
+    p = subprocess.Popen("%s -e" % PERFCTR, stdout=subprocess.PIPE, shell=True)
+    stdoutdata, stderrdata = p.communicate()
+    if p.returncode == 0:
+        parse = False
+
+        for l in stdoutdata.split("\n"):
+            if l.startswith("Counter tags"):
+                parse = True
+                continue
+            if parse:
+                if not l.strip(): continue
+                llist = re.split(",\s+", l)
+                counters.append(llist[0])
+    return counters
+
+def get_all_counters_given_arch(arch):
+    counters = []
+    cfile = os.path.join(EVENTHEADERPATH, "perfmon_%s_counters.h" % arch)
+    if os.path.exists(cfile):
+        f = open(cfile, "r")
+        raw = f.read().strip().split("\n")
+        f.close()
+        parse = False
+        for l in raw:
+            if l.startswith("static RegisterMap"):
+                parse = True
+                continue
+            if l.startswith("};"):
+                parse = False
+            if parse:
+                c = re.match("^\s+{\"([\w\d]+)\"", l)
+                if c:
+                    counters.append(c.group(1))
+    return counters
+
+def get_all_groupfiles(arch):
+    path=os.path.join(GROUPPATH, arch)
+    if not os.path.exists(path):
+        print("Cannot find group path %s" % path)
+        return []
+    grouplist = glob.glob(path+"/*")
+    return grouplist
+
+def check_short(gfile):
+    if not os.path.exists(gfile):
+        print("Cannot find group file %s" % gfile)
+        return False
+    with open(gfile) as f:
+        inf = f.read().split("\n")
+        for l in inf:
+            if l.startswith("SHORT"):
+                if len(re.split("\s+", l)) > 1:
+                    return True
+                else:
+                    return False
+    return False
+
+def check_eventset(gfile, allevents, allcounters):
+    events = {}
+    noht = False
+    if not os.path.exists(gfile):
+        print("Cannot find group file %s" % gfile)
+        return False
+    with open(gfile) as f:
+        parse = False
+        inf = f.read().split("\n")
+        for l in inf:
+            if l.startswith("REQUIRE_NOHT"):
+                noht = True
+            if l.startswith("EVENTSET"):
+                parse = True
+                continue
+            if l.startswith("METRICS"):
+                parse = False
+                continue
+            if parse:
+                if not l.strip(): continue
+                elist = re.split("\s+", l)
+                o = None
+                c = elist[0]
+                e = elist[1]
+                if ":" in c:
+                    tmp = c.split(":")
+                    c = tmp[0]
+                    o = ":".join(tmp[1:])
+                if not events.has_key(c):
+                    events[c] = e
+                else:
+                    print("Counter register used twice: %s and %s" % (e, events[c]))
+                    return False
+    for c in events.keys():
+        if c not in allcounters and not noht:
+            print("Counter register %s does not exist" % c)
+            return False
+        elif c not in allcounters:
+            print("Group requires HyperThreading to be off!")
+            return True
+        if events[c] not in allevents.keys():
+            print("Event %s unknown" % events[c])
+            return False
+    return True
+
+def check_metrics(gfile):
+    if not os.path.exists(gfile):
+        print("Cannot find group file %s" % gfile)
+        return False
+    metrics = {}
+    with open(gfile) as f:
+        parse = False
+        inf = f.read().split("\n")
+        for l in inf:
+            if l.startswith("METRICS"):
+                parse = True
+                continue
+            if l.startswith("LONG"):
+                parse = False
+                continue
+            if parse and len(l) > 0:
+                llist = re.split("\s+", l)
+                name = " ".join(llist[:-1])
+                if "[G" in l and not re.match("1[\.\d]*E[-+][\d]*9", llist[-1]):
+                    print("Wrong unit? %s" % l)
+                if "[M" in l and not "[MHz]" in l and not re.match("1[\.\d]*E[-+][\d]*6", llist[-1]):
+                    print("Wrong unit? %s" % l)
+                if "[%]" in l and not ("100*" in l or "*100" in l):
+                    print("Scaling factor missing? %s" % l)
+                metrics[" ".join(llist[:-1])] = llist[-1]
+    if len(metrics.keys()) > 0:
+        return True
+    return False
+
+def check_long(gfile):
+    if not os.path.exists(gfile):
+        print("Cannot find group file %s" % gfile)
+        return False
+    longlines = []
+    with open(gfile) as f:
+        parse = False
+        inf = f.read().split("\n")
+        for l in inf:
+            if l.startswith("LONG"):
+                parse = True
+                continue
+            if parse and len(l) > 0:
+                longlines.append(l)
+    if len(longlines) > 0:
+        return True
+    return False
+
+arch = None
+if len(sys.argv) == 2:
+    arch = sys.argv[1]
+    if arch == "-h" or arch == "--help":
+        print("Checks performance group files for some basic problems:")
+        print("\t- Is a short description defined?")
+        print("\t- Is an eventset defined?")
+        print("\t- Are all counters in the eventset available?")
+        print("\t- Are all events in the eventset available?")
+        print("\t- Is the scaling factor correct accoring to the unit in metric name")
+        print("\t- Is there a long description?")
+        print("")
+        print("If no command line argument is given, all architectures are tested.")
+        print("")
+        print("If the first argument is a valid architecture string, the lists are")
+        print("filled from definitions from source.")
+        print("")
+        print("If first argument is 'local' the current system is checked. This can")
+        print("throw more errors as the other modes as the output of likwid-perfctr -e")
+        print("contains only counters and events that are accessible on the current system.")
+        print("")
+        print("Available architectures:")
+        print(", ".join(get_valid_shorts()))
+        sys.exit(0)
+    if arch != "local" and arch not in get_valid_shorts():
+        print("Given arch not available")
+        sys.exit(1)
+
+alist = []
+if arch == "local":
+    events = get_all_events_local_arch()
+    counters = get_all_counters_local_arch()
+    alist = [get_local_arch()]
+elif arch != None:
+    ea = arch
+    ca = arch
+    if arch == "broadwellD":
+        ea = "broadwelld"
+        ca = "broadwelld"
+    if arch == "pentiumm":
+        ea = "pm"
+        ca = "pm"
+    if arch == "westmere":
+        ca = "nehalem"
+    if arch == "atom":
+        ca = "core2"
+    if arch == "k8":
+        ca = "k10"
+    events = get_all_events_given_arch(ea)
+    counters = get_all_counters_given_arch(ca)
+    alist = [arch]
+else:
+    alist = get_valid_shorts()
+
+for a in alist:
+    print("Checking architecture %s" % a)
+    glist = get_all_groupfiles(a)
+    if len(alist) > 1:
+        ea = a
+        ca = a
+        if a == "broadwellD":
+            ea = "broadwelld"
+            ca = "broadwelld"
+        if a == "pentiumm":
+            ea = "pm"
+            ca = "pm"
+        if a == "westmere":
+            ca = "nehalem"
+        if a == "atom":
+            ca = "core2"
+        if a == "k8":
+            ca = "k10"
+        events = get_all_events_given_arch(ea)
+        counters = get_all_counters_given_arch(ca)
+    for f in glist:
+        g = os.path.basename(f).split(".")[0]
+        print("Checking group %s" % g)
+        if not check_short(f):
+            print("Short failure in group file %s" % f)
+        if not check_eventset(f, events, counters):
+            print("Eventset failure in group file %s" % f)
+        if not check_metrics(f):
+            print("Metrics failure in group file %s" % f)
+        if not check_long(f):
+            print("Long failure in group file %s" % f)
+    print("")
diff --git a/test/executable_tests/likwid-perfctr.txt b/test/executable_tests/likwid-perfctr.txt
index e3a7fa9..c52de96 100644
--- a/test/executable_tests/likwid-perfctr.txt
+++ b/test/executable_tests/likwid-perfctr.txt
@@ -40,6 +40,6 @@
 -S 1s -C M:scatter -g BRANCH -f | EXIT 0 | GREP Group 1: BRANCH | GREP core 0 | GREP Branch
 -c 0 -g BRANCH -f hostname | EXIT 0 | GREP Group 1: BRANCH | GREP core 0 | GREP Branch
 -C 0 -g BRANCH -f hostname | EXIT 0 | GREP Group 1: BRANCH | GREP core 0 | GREP Branch
--C 0 -g BRANCH -f -m hostname | EXIT 1 | GREP No regions
+-C 0 -g BRANCH -f -m hostname | EXIT 1 | GREP Marker API result file does not exist
 -C 0 -g BRANCH -f -t 200ms hostname | EXIT 0 | GREP CORES: 0
 -C 0 -g BRANCH -f -m ../streamGCC | EXIT 0 | GREP Region triad | GREP Region copy
diff --git a/test/executable_tests/likwid-pin.txt b/test/executable_tests/likwid-pin.txt
index 64d2d96..54b62b4 100644
--- a/test/executable_tests/likwid-pin.txt
+++ b/test/executable_tests/likwid-pin.txt
@@ -12,7 +12,7 @@
 -c S0:0-1 -p | EXIT 0 | GREP 0,1
 -c N:0 at N:1 -p | EXIT 0 | GREP 0,1
 -c N:0 at N:1 at N:2 -p | EXIT 0 | GREP 0,1,2
--c C0:1-0 -p | EXIT 0 | GREP 1,0
+-c C0:1-0 -p | EXIT 0 | GREP 0 | GREP 1
 -c E:N:1 -p | EXIT 0 | GREP 0
 -c E:N:2 -p | EXIT 0 | LISTLEN , 2
 -c E:N:2:1:2 -p | EXIT 0 | LISTLEN , 2
diff --git a/test/executable_tests/tester.sh b/test/executable_tests/tester.sh
index 119613f..ec02bda 100755
--- a/test/executable_tests/tester.sh
+++ b/test/executable_tests/tester.sh
@@ -42,8 +42,8 @@ if [ ! -e ${EXEC}.txt ]; then
     exit 1
 fi
 if [ "${EXEC}" == "likwid-setFrequencies" ]; then
-    FREQ=$(likwid-setFrequencies -l | grep -v frequencies | awk '{print $2}')
-    CURFREQ=$(likwid-setFrequencies -p | head -n2 | tail -n 1 | rev | awk '{print $2}' | rev)
+    FREQ=$(${EXECPATH}/likwid-setFrequencies -l | grep -v frequencies | awk '{print $2}')
+    CURFREQ=$(${EXECPATH}/likwid-setFrequencies -p | head -n2 | tail -n 1 | rev | awk '{print $2}' | rev | awk -F'/' '{print $2}')
 fi
 if [ "${EXEC}" == "likwid-mpirun" ]; then
     if [ -z "$(which mpiexec)" ] && [ -z "$(which mpiexec.hydra)" ] && [ -z "$(which mpirun)" ]; then
@@ -89,7 +89,7 @@ done < ${EXEC}.txt
 
 
 if [ "${EXEC}" == "likwid-setFrequencies" ]; then
-    ${EXEC} -f "${CURFREQ}"
+    ${EXECPATH}/${EXEC} -f "${CURFREQ}"
 fi
 
 rm -f /tmp/topo.txt /tmp/test /tmp/test.txt /tmp/out.txt /tmp/out
diff --git a/test/stream.c b/test/stream.c
index 15f2ca3..ea811a2 100644
--- a/test/stream.c
+++ b/test/stream.c
@@ -1,199 +1,249 @@
-#define _GNU_SOURCE
-#include <stdlib.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/syscall.h>
-#ifdef _OPENMP
-#include <omp.h>
-# endif
-#include <stdint.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sched.h>
-#include <pthread.h>
-
-#define ITER 10
-#define SIZE 40000000
-
-#define gettid() syscall(SYS_gettid)
-#include <likwid.h>
-#define HLINE "-------------------------------------------------------------\n"
-
-#ifndef MIN
-#define MIN(x,y) ((x)<(y)?(x):(y))
-#endif
-
-typedef struct {
-    struct timeval before;
-    struct timeval after;
-} TimeData;
-
-
-void time_start(TimeData* time)
-{
-    gettimeofday(&(time->before),NULL);
-}
-
-
-void time_stop(TimeData* time)
-{
-    gettimeofday(&(time->after),NULL);
-}
-
-double time_print(TimeData* time)
-{
-    long int sec;
-    double timeDuration;
-
-    sec = time->after.tv_sec - time->before.tv_sec;
-    timeDuration = ((double)((sec*1000000)+time->after.tv_usec) - (double) time->before.tv_usec);
-
-    return (timeDuration/1000000);
-}
-
-static int
-getProcessorID(cpu_set_t* cpu_set)
-{
-    int processorId;
-
-    for (processorId=0;processorId<128;processorId++)
-    {
-	if (CPU_ISSET(processorId,cpu_set))
-	{
-	    break;
-	}
-    }
-    return processorId;
-}
-
-int  threadGetProcessorId()
-{
-    cpu_set_t  cpu_set;
-    CPU_ZERO(&cpu_set);
-    sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
-
-    return getProcessorID(&cpu_set);
-}
-
-void allocate_vector(double** ptr, uint64_t size)
-{
-    int errorCode;
-
-    errorCode = posix_memalign((void**) ptr, 64, size*sizeof(double));
-
-    if (errorCode)
-    {
-	if (errorCode == EINVAL)
-	{
-	    fprintf(stderr,
-		    "Alignment parameter is not a power of two\n");
-	    exit(EXIT_FAILURE);
-	}
-	if (errorCode == ENOMEM)
-	{
-	    fprintf(stderr,
-		    "Insufficient memory to fulfill the request\n");
-	    exit(EXIT_FAILURE);
-	}
-    }
-}
-
-
-int main(int argn, char** argc)
-{
-    double *a,*b,*c,*d;
-    TimeData timer;
-    double triad_time, copy_time;
-
-    allocate_vector(&a, SIZE);
-    allocate_vector(&b, SIZE);
-    allocate_vector(&c, SIZE);
-    allocate_vector(&d, SIZE);
-
-#ifdef LIKWID_PERFMON
-    printf("Using likwid\n");
-#endif
-
-    LIKWID_MARKER_INIT;
-
-#ifdef _OPENMP
-    printf(HLINE);
-#pragma omp parallel
-    {
-#pragma omp master
-	{
-	    printf ("Number of Threads requested = %i\n",omp_get_num_threads());
-	}
-	printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu());
-    }
-#endif
-
-#pragma omp parallel for
-    for (int j=0; j<SIZE; j++) {
-	a[j] = 1.0;
-	b[j] = 2.0;
-	c[j] = 0.0;
-	d[j] = 1.0;
-    }
-
-    time_start(&timer);
-#pragma omp parallel
-    {
-        for (int k=0; k<ITER; k++)
-        {
-            LIKWID_MARKER_START("copy");
-#pragma simd
-#pragma omp for
-            for (int j=0; j<SIZE; j++)
-            {
-            
-                c[j] = a[j];
-            }
-            LIKWID_MARKER_STOP("copy");
-        }
-    }
-    time_stop(&timer);
-    copy_time = time_print(&timer)/(double)ITER;
-
-    time_start(&timer);
-#pragma omp parallel
-    {
-	LIKWID_MARKER_START("triad_total");
-        for (int k=0; k<ITER; k++)
-        {
-
-            LIKWID_MARKER_START("triad");
-#pragma simd
-#pragma omp for
-            for (int j=0; j<SIZE; j++)
-            {
-
-                a[j] = b[j] +  c[j] * d[j];
-            }
-            LIKWID_MARKER_STOP("triad");
-        }
-	LIKWID_MARKER_STOP("triad_total");
-    }
-    time_stop(&timer);
-    triad_time = time_print(&timer)/(double)ITER;
-
-
-    printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
-                        1E-6*(2*SIZE*sizeof(double)),
-                        copy_time,
-                        1E-6*((2*SIZE*sizeof(double))/copy_time));
-    printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
-                        1E-6*(4*SIZE*sizeof(double)),
-                        triad_time,
-                        1E-6*((4*SIZE*sizeof(double))/triad_time));
-
-
-    LIKWID_MARKER_CLOSE;
-    free(a);
-    free(b);
-    free(c);
-    free(d);
-    return 0;
-}
-
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#ifdef _OPENMP
+#include <omp.h>
+# endif
+#include <stdint.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sched.h>
+#include <pthread.h>
+
+#define ITER 10
+#define SIZE 40000000
+
+#define gettid() syscall(SYS_gettid)
+#include <likwid.h>
+#define HLINE "-------------------------------------------------------------\n"
+
+#ifndef MIN
+#define MIN(x,y) ((x)<(y)?(x):(y))
+#endif
+
+typedef struct {
+    struct timeval before;
+    struct timeval after;
+} TimeData;
+
+
+void time_start(TimeData* time)
+{
+    gettimeofday(&(time->before),NULL);
+}
+
+
+void time_stop(TimeData* time)
+{
+    gettimeofday(&(time->after),NULL);
+}
+
+double time_print(TimeData* time)
+{
+    long int sec;
+    double timeDuration;
+
+    sec = time->after.tv_sec - time->before.tv_sec;
+    timeDuration = ((double)((sec*1000000)+time->after.tv_usec) - (double) time->before.tv_usec);
+
+    return (timeDuration/1000000);
+}
+
+static int
+getProcessorID(cpu_set_t* cpu_set)
+{
+    int processorId;
+
+    for (processorId=0;processorId<128;processorId++)
+    {
+        if (CPU_ISSET(processorId,cpu_set))
+        {
+            break;
+        }
+    }
+    return processorId;
+}
+
+int  threadGetProcessorId()
+{
+    cpu_set_t  cpu_set;
+    CPU_ZERO(&cpu_set);
+    sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
+
+    return getProcessorID(&cpu_set);
+}
+
+void allocate_vector(double** ptr, uint64_t size)
+{
+    int errorCode;
+
+    errorCode = posix_memalign((void**) ptr, 64, size*sizeof(double));
+
+    if (errorCode)
+    {
+    if (errorCode == EINVAL)
+    {
+        fprintf(stderr,
+            "Alignment parameter is not a power of two\n");
+        exit(EXIT_FAILURE);
+    }
+    if (errorCode == ENOMEM)
+    {
+        fprintf(stderr,
+            "Insufficient memory to fulfill the request\n");
+        exit(EXIT_FAILURE);
+    }
+    }
+}
+
+
+int main(int argn, char** argc)
+{
+    double *a,*b,*c,*d;
+    double scalar = 3.0;
+    TimeData timer;
+    double triad_time, copy_time, scale_time, add_time;
+
+    allocate_vector(&a, SIZE);
+    allocate_vector(&b, SIZE);
+    allocate_vector(&c, SIZE);
+    allocate_vector(&d, SIZE);
+
+#ifdef LIKWID_PERFMON
+    printf("Using likwid\n");
+#endif
+
+    LIKWID_MARKER_INIT;
+
+#ifdef _OPENMP
+    printf(HLINE);
+#pragma omp parallel
+    {
+#pragma omp master
+    {
+        printf ("Number of Threads requested = %i\n",omp_get_num_threads());
+    }
+    printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu());
+    }
+    printf(HLINE);
+#endif
+
+#pragma omp parallel for
+    for (int j=0; j<SIZE; j++) {
+        a[j] = 1.0;
+        b[j] = 2.0;
+        c[j] = 0.0;
+        d[j] = 1.0;
+    }
+
+    time_start(&timer);
+#pragma omp parallel
+    {
+        for (int k=0; k<ITER; k++)
+        {
+            LIKWID_MARKER_START("copy");
+#pragma simd
+#pragma omp for
+            for (int j=0; j<SIZE; j++)
+            {
+                c[j] = a[j];
+            }
+            LIKWID_MARKER_STOP("copy");
+        }
+    }
+    time_stop(&timer);
+    copy_time = time_print(&timer)/(double)ITER;
+
+    time_start(&timer);
+#pragma omp parallel
+    {
+        for (int k=0; k<ITER; k++)
+        {
+
+            LIKWID_MARKER_START("scale");
+#pragma simd
+#pragma omp for
+            for (int j=0; j<SIZE; j++)
+            {
+                b[j] = scalar * c[j];
+            }
+            LIKWID_MARKER_STOP("scale");
+        }
+    }
+    time_stop(&timer);
+    scale_time = time_print(&timer)/(double)ITER;
+
+    time_start(&timer);
+#pragma omp parallel
+    {
+        for (int k=0; k<ITER; k++)
+        {
+
+            LIKWID_MARKER_START("add");
+#pragma simd
+#pragma omp for
+            for (int j=0; j<SIZE; j++)
+            {
+                c[j] = a[j] + b[j];
+            }
+            LIKWID_MARKER_STOP("add");
+        }
+    }
+    time_stop(&timer);
+    add_time = time_print(&timer)/(double)ITER;
+
+    time_start(&timer);
+#pragma omp parallel
+    {
+        LIKWID_MARKER_START("triad_total");
+        for (int k=0; k<ITER; k++)
+        {
+
+            LIKWID_MARKER_START("triad");
+#pragma simd
+#pragma omp for
+            for (int j=0; j<SIZE; j++)
+            {
+
+                a[j] = b[j] +  c[j] * d[j];
+            }
+            LIKWID_MARKER_STOP("triad");
+        }
+        LIKWID_MARKER_STOP("triad_total");
+    }
+    time_stop(&timer);
+    triad_time = time_print(&timer)/(double)ITER;
+
+
+    printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
+                        1E-6*(2*SIZE*sizeof(double)),
+                        copy_time,
+                        1E-6*((2*SIZE*sizeof(double))/copy_time));
+    printf("Processed %.1f Mbyte at scale benchmark in %.4f seconds: %.2f MByte/s %.2f MFLOP/s\n",
+                        1E-6*(2*SIZE*sizeof(double)),
+                        scale_time,
+                        1E-6*((2*SIZE*sizeof(double))/scale_time),
+                        1E-6*(SIZE/scale_time));
+    printf("Processed %.1f Mbyte at add benchmark in %.4f seconds: %.2f MByte/s %.2f MFLOP/s\n",
+                        1E-6*(3*SIZE*sizeof(double)),
+                        add_time,
+                        1E-6*((3*SIZE*sizeof(double))/add_time),
+                        1E-6*(SIZE/add_time));
+    printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s %.2f MFLOP/s\n",
+                        1E-6*(4*SIZE*sizeof(double)),
+                        triad_time,
+                        1E-6*((4*SIZE*sizeof(double))/triad_time),
+                        1E-6*((2*SIZE)/triad_time));
+
+
+    LIKWID_MARKER_CLOSE;
+    free(a);
+    free(b);
+    free(c);
+    free(d);
+    return 0;
+}
+
diff --git a/test/test-likwidAPI.c b/test/test-likwidAPI.c
index 7a2001f..6d723f0 100644
--- a/test/test-likwidAPI.c
+++ b/test/test-likwidAPI.c
@@ -206,7 +206,9 @@ int test_numainit()
 {
     int i = 0;
     topology_init();
+    CpuInfo_t cpuinfo = get_cpuInfo();
     numa_init();
+    int valid = 0, filled_domains = 0;
     NumaTopology_t numainfo = get_numaTopology();
     if (numainfo == NULL)
         goto fail;
@@ -216,17 +218,38 @@ int test_numainit()
         goto fail;
     for (i = 0; i < likwid_getNumberOfNodes(); i++)
     {
+        valid = 1;
         if (numainfo->nodes[i].totalMemory == 0)
-            goto fail;
+        {
+            valid = 0;
+            fprintf(stderr, "WARNING: NUMA domain %d: totalMemory = 0\n", numainfo->nodes[i].id);
+        }
         if (numainfo->nodes[i].freeMemory == 0)
-            goto fail;
+        {
+            valid = 0;
+            fprintf(stderr, "WARNING: NUMA domain %d: freeMemory = 0\n", numainfo->nodes[i].id);
+        }
         if (numainfo->nodes[i].numberOfProcessors == 0)
-            goto fail;
+        {
+            valid = 0;
+            fprintf(stderr, "WARNING: NUMA domain %d: numberOfProcessors = 0\n", numainfo->nodes[i].id);
+        }
         if (numainfo->nodes[i].numberOfDistances == 0)
-            goto fail;
+        {
+            valid = 0;
+            fprintf(stderr, "WARNING: NUMA domain %d: numberOfDistances = 0\n", numainfo->nodes[i].id);
+        }
         if (numainfo->nodes[i].numberOfDistances != likwid_getNumberOfNodes())
+        {
+            valid = 0;
+        }
+        if (valid)
+            filled_domains++;
+        else if (strcmp(cpuinfo->short_name, "knl") != 0)
             goto fail;
     }
+    if (strcmp(cpuinfo->short_name, "knl") != 0 && likwid_getNumberOfNodes() % filled_domains != 0)
+        goto fail;
     numa_finalize();
     topology_finalize();
     return 1;
@@ -239,7 +262,9 @@ fail:
 int test_affinityinit()
 {
     int i = 0;
+    int valid = 0, filled_domains = 0;
     topology_init();
+    CpuInfo_t cpuinfo = get_cpuInfo();
     CpuTopology_t cputopo = get_cpuTopology();
     numa_init();
     affinity_init();
@@ -266,14 +291,34 @@ int test_affinityinit()
         goto fail;
     for (i = 0; i < doms->numberOfAffinityDomains; i++)
     {
+        valid = 1;
         if (doms->domains[i].numberOfProcessors == 0)
-            goto fail;
+        {
+            valid = 0;
+            fprintf(stderr, "WARNING: Affinity domain %d: numberOfProcessors = 0\n", i);
+        }
         if (doms->domains[i].numberOfCores == 0)
-            goto fail;
+        {
+            valid = 0;
+            fprintf(stderr, "WARNING: Affinity domain %d: numberOfCores = 0\n", i);
+        }
         if (doms->domains[i].numberOfProcessors < doms->domains[i].numberOfCores)
-            goto fail;
+        {
+            valid = 0;
+            fprintf(stderr, "WARNING: Affinity domain %d: numberOfProcessors < doms->domains[i].numberOfCores\n", i);
+        }
         if (doms->domains[i].processorList == NULL)
+        {
+            valid = 0;
+            fprintf(stderr, "WARNING: Affinity domain %d: processorList == NULL\n", i);
+        }
+        if (valid)
+            filled_domains++;
+        else if (strcmp(cpuinfo->short_name, "knl") != 0)
+        {
+            fprintf(stderr, "Domain %s failed\n", bdata(doms->domains[i].tag));
             goto fail;
+        }
     }
     affinity_finalize();
     topology_finalize();
@@ -340,20 +385,27 @@ int test_cpustring_expression()
 
 int test_cpustring_scatter()
 {
-    int test[100];
-    int len = 100;
+    CpuTopology_t cputopo = get_cpuTopology();
+    int len = cputopo->numHWThreads;
+    int *test = (int*) malloc(len * sizeof(int));
+    if (!test)
+    {
+        return 0;
+    }
     int ret = cpustr_to_cpulist("S:scatter", test, len);
     if (ret < 0)
     {
         if (verbose) printf("Returned %d\n", ret);
+        free(test);
         return 0;
     }
-    CpuTopology_t cputopo = get_cpuTopology();
     if (ret != cputopo->numHWThreads)
     {
         if (verbose) printf("Returned with %d not enough CPUs (%d)\n", ret, cputopo->numHWThreads);
+        free(test);
         return 0;
     }
+    free(test);
     return 1;
 }
 
@@ -1992,7 +2044,7 @@ int test_timersleep()
     }
     if (timer_print(&timer) > 1.1E6*1E-6)
     {
-        printf("Sleeping too long. timer is %f instead of 1 s\n", 2E6*1E-6, timer_print(&timer));
+        printf("Sleeping too long. timer is %f instead of 1 s\n", timer_print(&timer));
         goto fail;
     }
     timer_finalize();

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/likwid/likwid.git



More information about the Likwid-commit mailing list