[Likwid-commit] [likwid] 04/04: re-add stream test (changed license)
Christoph Martin
chrism at debian.org
Tue Jun 21 10:34:20 UTC 2016
This is an automated email from the git hooks/post-receive script.
chrism pushed a commit to branch upstream
in repository likwid.
commit c568e344449256a2060afcff9419d291b907d3d1
Author: Christoph Martin <martin at uni-mainz.de>
Date: Tue Jun 21 12:29:48 2016 +0200
re-add stream test (changed license)
---
test/stream-API.c | 437 +++++++++++++++++++++++++++++++++++++++++++++++++++++
test/stream.c | 199 ++++++++++++++++++++++++
test/stream.cc | 227 ++++++++++++++++++++++++++++
test/stream_cilk.c | 217 ++++++++++++++++++++++++++
4 files changed, 1080 insertions(+)
diff --git a/test/stream-API.c b/test/stream-API.c
new file mode 100644
index 0000000..0fe292e
--- /dev/null
+++ b/test/stream-API.c
@@ -0,0 +1,437 @@
+/*
+ * =======================================================================================
+ *
+ * Filename: streamAPI.c
+ *
+ * Description: Copy of the STREAM benchmark (only copy and triad) with hardware
+ * performance measurement instrumentation using LIKWID
+ *
+ * Version: <VERSION>
+ * Released: <DATE>
+ *
+ * Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
+ * Project: likwid
+ *
+ * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ *
+ * This program is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 3 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * =======================================================================================
+ */
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#ifdef _OPENMP
+#include <omp.h>
+# endif
+#include <stdint.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sched.h>
+#include <pthread.h>
+
+#define ITER 100
+#define SIZE 40000000
+#define DATATYPE float
+
+#define gettid() syscall(SYS_gettid)
+#include <likwid.h>
+#define HLINE "-------------------------------------------------------------\n"
+
+#ifndef MIN
+#define MIN(x,y) ((x)<(y)?(x):(y))
+#endif
+
+typedef struct {
+ struct timeval before;
+ struct timeval after;
+} TimeData;
+
+
+void time_start(TimeData* time)
+{
+ gettimeofday(&(time->before),NULL);
+}
+
+
+void time_stop(TimeData* time)
+{
+ gettimeofday(&(time->after),NULL);
+}
+
+double time_print(TimeData* time)
+{
+ long int sec;
+ double timeDuration;
+
+ sec = time->after.tv_sec - time->before.tv_sec;
+ timeDuration = ((double)((sec*1000000)+time->after.tv_usec) - (double) time->before.tv_usec);
+
+ return (timeDuration/1000000);
+}
+
+static int
+getProcessorID(cpu_set_t* cpu_set)
+{
+ int processorId;
+
+ for (processorId=0;processorId<128;processorId++)
+ {
+ if (CPU_ISSET(processorId,cpu_set))
+ {
+ break;
+ }
+ }
+ return processorId;
+}
+
+int threadGetProcessorId()
+{
+ cpu_set_t cpu_set;
+ CPU_ZERO(&cpu_set);
+ sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
+
+ return getProcessorID(&cpu_set);
+}
+
+void allocate_vector(DATATYPE** ptr, uint64_t size)
+{
+ int errorCode;
+
+ errorCode = posix_memalign((void**) ptr, 64, size*sizeof(DATATYPE));
+
+ if (errorCode)
+ {
+ if (errorCode == EINVAL)
+ {
+ fprintf(stderr,
+ "Alignment parameter is not a power of two\n");
+ exit(EXIT_FAILURE);
+ }
+ if (errorCode == ENOMEM)
+ {
+ fprintf(stderr,
+ "Insufficient memory to fulfill the request\n");
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+
+int main(int argn, char** argc)
+{
+ int err, i ,j;
+ int numCPUs = 0;
+ int gid;
+ DATATYPE *a,*b,*c,*d;
+ TimeData timer;
+ double triad_time, copy_time, scale_time, stream_time;
+ char estr[1024];
+ double result, scalar = 3.0;
+ char* ptr;
+
+ if (argn != 3)
+ {
+ printf("Usage: %s <cpustr> <events>\n", argc[0]);
+ return 1;
+ }
+
+ strcpy(estr, argc[2]);
+
+ allocate_vector(&a, SIZE);
+ allocate_vector(&b, SIZE);
+ allocate_vector(&c, SIZE);
+ allocate_vector(&d, SIZE);
+
+ err = topology_init();
+ if (err < 0)
+ {
+ printf("Failed to initialize LIKWID's topology module\n");
+ return 1;
+ }
+ CpuTopology_t topo = get_cpuTopology();
+ affinity_init();
+ int* cpus = (int*)malloc(topo->numHWThreads * sizeof(int));
+ if (!cpus)
+ return 1;
+ numCPUs = cpustr_to_cpulist(argc[1], cpus, topo->numHWThreads);
+ omp_set_num_threads(numCPUs);
+ err = perfmon_init(numCPUs, cpus);
+ if (err < 0)
+ {
+ printf("Failed to initialize LIKWID's performance monitoring module\n");
+ affinity_finalize();
+ topology_finalize();
+ return 1;
+ }
+ gid = perfmon_addEventSet(estr);
+ if (gid < 0)
+ {
+ printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr);
+ perfmon_finalize();
+ affinity_finalize();
+ topology_finalize();
+ return 1;
+ }
+
+ err = perfmon_setupCounters(gid);
+ if (err < 0)
+ {
+ printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid);
+ perfmon_finalize();
+ affinity_finalize();
+ topology_finalize();
+ return 1;
+ }
+
+#ifdef _OPENMP
+ printf(HLINE);
+#pragma omp parallel
+ {
+#pragma omp master
+ {
+ printf ("Number of Threads requested = %i\n",omp_get_num_threads());
+ }
+ likwid_pinThread(cpus[omp_get_thread_num()]);
+ printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu());
+ }
+#endif
+
+#pragma omp parallel for
+ for (int j=0; j<SIZE; j++) {
+ a[j] = 1.0;
+ b[j] = 2.0;
+ c[j] = 0.0;
+ d[j] = 1.0;
+ }
+
+ err = perfmon_startCounters();
+ if (err < 0)
+ {
+ printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
+ perfmon_finalize();
+ topology_finalize();
+ return 1;
+ }
+ time_start(&timer);
+#pragma omp parallel
+ {
+ for (int k=0; k<ITER; k++)
+ {
+ LIKWID_MARKER_START("copy");
+#pragma omp for
+ for (int j=0; j<SIZE; j++)
+ {
+ c[j] = a[j];
+ }
+ LIKWID_MARKER_STOP("copy");
+ }
+ }
+ time_stop(&timer);
+ err = perfmon_stopCounters();
+ copy_time = time_print(&timer)/(double)ITER;
+ if (err < 0)
+ {
+ printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
+ perfmon_finalize();
+ topology_finalize();
+ return 1;
+ }
+
+ printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
+ 1E-6*(2*SIZE*sizeof(DATATYPE)),
+ copy_time,
+ 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));
+
+ ptr = strtok(estr,",");
+ j = 0;
+ while (ptr != NULL)
+ {
+ for (i = 0;i < numCPUs; i++)
+ {
+ result = perfmon_getResult(gid, j, cpus[i]);
+ printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
+ }
+ ptr = strtok(NULL,",");
+ j++;
+ }
+ strcpy(estr, argc[2]);
+ perfmon_setupCounters(gid);
+
+ err = perfmon_startCounters();
+ if (err < 0)
+ {
+ printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
+ perfmon_finalize();
+ topology_finalize();
+ return 1;
+ }
+ time_start(&timer);
+#pragma omp parallel
+ {
+ for (int k=0; k<ITER; k++)
+ {
+ LIKWID_MARKER_START("scale");
+#pragma omp for
+ for (int j=0; j<SIZE; j++)
+ {
+ b[j] = scalar*c[j];
+ }
+ LIKWID_MARKER_STOP("scale");
+ }
+ }
+ time_stop(&timer);
+ err = perfmon_stopCounters();
+ scale_time = time_print(&timer)/(double)ITER;
+ if (err < 0)
+ {
+ printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
+ perfmon_finalize();
+ topology_finalize();
+ return 1;
+ }
+
+ printf("Processed %.1f Mbyte at scale benchmark in %.4f seconds: %.2f MByte/s\n",
+ 1E-6*(2*SIZE*sizeof(DATATYPE)),
+ copy_time,
+ 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));
+
+ ptr = strtok(estr,",");
+ j = 0;
+ while (ptr != NULL)
+ {
+ for (i = 0;i < numCPUs; i++)
+ {
+ result = perfmon_getResult(gid, j, cpus[i]);
+ printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
+ }
+ ptr = strtok(NULL,",");
+ j++;
+ }
+ strcpy(estr, argc[2]);
+ perfmon_setupCounters(gid);
+ err = perfmon_startCounters();
+ if (err < 0)
+ {
+ printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
+ perfmon_finalize();
+ topology_finalize();
+ return 1;
+ }
+ time_start(&timer);
+#pragma omp parallel
+ {
+ for (int k=0; k<ITER; k++)
+ {
+ LIKWID_MARKER_START("stream");
+#pragma omp for
+ for (int j=0; j<SIZE; j++)
+ {
+ c[j] = a[j] + b[j];
+ }
+ LIKWID_MARKER_STOP("stream");
+ }
+ }
+ time_stop(&timer);
+ err = perfmon_stopCounters();
+ stream_time = time_print(&timer)/(double)ITER;
+ if (err < 0)
+ {
+ printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
+ perfmon_finalize();
+ topology_finalize();
+ return 1;
+ }
+
+ printf("Processed %.1f Mbyte at stream benchmark in %.4f seconds: %.2f MByte/s\n",
+ 1E-6*(2*SIZE*sizeof(DATATYPE)),
+ copy_time,
+ 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));
+
+ ptr = strtok(estr,",");
+ j = 0;
+ while (ptr != NULL)
+ {
+ for (i = 0;i < numCPUs; i++)
+ {
+ result = perfmon_getResult(gid, j, cpus[i]);
+ printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
+ }
+ ptr = strtok(NULL,",");
+ j++;
+ }
+ strcpy(estr, argc[2]);
+ perfmon_setupCounters(gid);
+ err = perfmon_startCounters();
+ if (err < 0)
+ {
+ printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
+ perfmon_finalize();
+ topology_finalize();
+ return 1;
+ }
+ time_start(&timer);
+#pragma omp parallel
+ {
+ for (int k=0; k<ITER; k++)
+ {
+
+ LIKWID_MARKER_START("triad");
+#pragma omp for
+ for (int j=0; j<SIZE; j++)
+ {
+ a[j] = b[j] + c[j] * scalar;
+ }
+ LIKWID_MARKER_STOP("triad");
+ }
+ }
+ time_stop(&timer);
+ err = perfmon_stopCounters();
+ triad_time = time_print(&timer)/(double)ITER;
+ if (err < 0)
+ {
+ printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
+ perfmon_finalize();
+ topology_finalize();
+ return 1;
+ }
+
+
+
+ printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
+ 1E-6*(4*SIZE*sizeof(DATATYPE)),
+ triad_time,
+ 1E-6*((4*SIZE*sizeof(DATATYPE))/triad_time));
+ ptr = strtok(estr,",");
+ j = 0;
+ while (ptr != NULL)
+ {
+ for (i = 0;i < numCPUs; i++)
+ {
+ result = perfmon_getResult(gid, j, cpus[i]);
+ printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
+ }
+ ptr = strtok(NULL,",");
+ j++;
+ }
+
+ perfmon_finalize();
+ affinity_finalize();
+ topology_finalize();
+ return 0;
+}
+
diff --git a/test/stream.c b/test/stream.c
new file mode 100644
index 0000000..15f2ca3
--- /dev/null
+++ b/test/stream.c
@@ -0,0 +1,199 @@
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#ifdef _OPENMP
+#include <omp.h>
+# endif
+#include <stdint.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sched.h>
+#include <pthread.h>
+
+#define ITER 10
+#define SIZE 40000000
+
+#define gettid() syscall(SYS_gettid)
+#include <likwid.h>
+#define HLINE "-------------------------------------------------------------\n"
+
+#ifndef MIN
+#define MIN(x,y) ((x)<(y)?(x):(y))
+#endif
+
+typedef struct {
+ struct timeval before;
+ struct timeval after;
+} TimeData;
+
+
+void time_start(TimeData* time)
+{
+ gettimeofday(&(time->before),NULL);
+}
+
+
+void time_stop(TimeData* time)
+{
+ gettimeofday(&(time->after),NULL);
+}
+
+double time_print(TimeData* time)
+{
+ long int sec;
+ double timeDuration;
+
+ sec = time->after.tv_sec - time->before.tv_sec;
+ timeDuration = ((double)((sec*1000000)+time->after.tv_usec) - (double) time->before.tv_usec);
+
+ return (timeDuration/1000000);
+}
+
+static int
+getProcessorID(cpu_set_t* cpu_set)
+{
+ int processorId;
+
+ for (processorId=0;processorId<128;processorId++)
+ {
+ if (CPU_ISSET(processorId,cpu_set))
+ {
+ break;
+ }
+ }
+ return processorId;
+}
+
+int threadGetProcessorId()
+{
+ cpu_set_t cpu_set;
+ CPU_ZERO(&cpu_set);
+ sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
+
+ return getProcessorID(&cpu_set);
+}
+
+void allocate_vector(double** ptr, uint64_t size)
+{
+ int errorCode;
+
+ errorCode = posix_memalign((void**) ptr, 64, size*sizeof(double));
+
+ if (errorCode)
+ {
+ if (errorCode == EINVAL)
+ {
+ fprintf(stderr,
+ "Alignment parameter is not a power of two\n");
+ exit(EXIT_FAILURE);
+ }
+ if (errorCode == ENOMEM)
+ {
+ fprintf(stderr,
+ "Insufficient memory to fulfill the request\n");
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+
+int main(int argn, char** argc)
+{
+ double *a,*b,*c,*d;
+ TimeData timer;
+ double triad_time, copy_time;
+
+ allocate_vector(&a, SIZE);
+ allocate_vector(&b, SIZE);
+ allocate_vector(&c, SIZE);
+ allocate_vector(&d, SIZE);
+
+#ifdef LIKWID_PERFMON
+ printf("Using likwid\n");
+#endif
+
+ LIKWID_MARKER_INIT;
+
+#ifdef _OPENMP
+ printf(HLINE);
+#pragma omp parallel
+ {
+#pragma omp master
+ {
+ printf ("Number of Threads requested = %i\n",omp_get_num_threads());
+ }
+ printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu());
+ }
+#endif
+
+#pragma omp parallel for
+ for (int j=0; j<SIZE; j++) {
+ a[j] = 1.0;
+ b[j] = 2.0;
+ c[j] = 0.0;
+ d[j] = 1.0;
+ }
+
+ time_start(&timer);
+#pragma omp parallel
+ {
+ for (int k=0; k<ITER; k++)
+ {
+ LIKWID_MARKER_START("copy");
+#pragma simd
+#pragma omp for
+ for (int j=0; j<SIZE; j++)
+ {
+
+ c[j] = a[j];
+ }
+ LIKWID_MARKER_STOP("copy");
+ }
+ }
+ time_stop(&timer);
+ copy_time = time_print(&timer)/(double)ITER;
+
+ time_start(&timer);
+#pragma omp parallel
+ {
+ LIKWID_MARKER_START("triad_total");
+ for (int k=0; k<ITER; k++)
+ {
+
+ LIKWID_MARKER_START("triad");
+#pragma simd
+#pragma omp for
+ for (int j=0; j<SIZE; j++)
+ {
+
+ a[j] = b[j] + c[j] * d[j];
+ }
+ LIKWID_MARKER_STOP("triad");
+ }
+ LIKWID_MARKER_STOP("triad_total");
+ }
+ time_stop(&timer);
+ triad_time = time_print(&timer)/(double)ITER;
+
+
+ printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
+ 1E-6*(2*SIZE*sizeof(double)),
+ copy_time,
+ 1E-6*((2*SIZE*sizeof(double))/copy_time));
+ printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
+ 1E-6*(4*SIZE*sizeof(double)),
+ triad_time,
+ 1E-6*((4*SIZE*sizeof(double))/triad_time));
+
+
+ LIKWID_MARKER_CLOSE;
+ free(a);
+ free(b);
+ free(c);
+ free(d);
+ return 0;
+}
+
diff --git a/test/stream.cc b/test/stream.cc
new file mode 100644
index 0000000..489bb09
--- /dev/null
+++ b/test/stream.cc
@@ -0,0 +1,227 @@
+#include <sys/types.h>
+#include <unistd.h>
+#include <iostream>
+#include <string>
+#include <atomic>
+#include <thread>
+#include <likwid.h>
+#include <sched.h>
+#include <syscall.h>
+#include <sys/time.h>
+
+#define gettid() syscall(SYS_gettid)
+#define ITER 10
+#define SIZE 40000000
+#ifdef __GNUG__
+#define RESTRICT __restrict__
+#else
+#define RESTRICT restrict
+#endif
+using namespace std;
+
+typedef struct {
+ struct timeval before;
+ struct timeval after;
+} TimeData;
+
+
+void time_start(TimeData* time)
+{
+ gettimeofday(&(time->before),NULL);
+}
+
+
+void time_stop(TimeData* time)
+{
+ gettimeofday(&(time->after),NULL);
+}
+
+double time_print(TimeData* time)
+{
+ long int sec;
+ double timeDuration;
+
+ sec = time->after.tv_sec - time->before.tv_sec;
+ timeDuration = ((double)((sec*1000000)+time->after.tv_usec) - (double) time->before.tv_usec);
+
+ return (timeDuration/1000000);
+}
+
+static int
+getProcessorID(cpu_set_t* cpu_set)
+{
+ int processorId;
+
+ for (processorId=0;processorId<128;processorId++)
+ {
+ if (CPU_ISSET(processorId,cpu_set))
+ {
+ break;
+ }
+ }
+ return processorId;
+}
+
+int threadGetProcessorId()
+{
+ cpu_set_t cpu_set;
+ CPU_ZERO(&cpu_set);
+ sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
+ if (CPU_COUNT(&cpu_set) > 1)
+ {
+ return sched_getcpu();
+ }
+ else
+ {
+ return getProcessorID(&cpu_set);
+ }
+ return -1;
+}
+
+
+double copy_times[CPU_SETSIZE];
+double triad_times[CPU_SETSIZE];
+
+void allocate_vector(double** ptr, uint64_t size)
+{
+ int errorCode;
+
+ errorCode = posix_memalign((void**) ptr, 64, size*sizeof(double));
+
+ if (errorCode)
+ {
+ if (errorCode == EINVAL)
+ {
+ fprintf(stderr,
+ "Alignment parameter is not a power of two\n");
+ exit(EXIT_FAILURE);
+ }
+ if (errorCode == ENOMEM)
+ {
+ fprintf(stderr,
+ "Insufficient memory to fulfill the request\n");
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+
+
+int calc_thread(double* RESTRICT a, double* RESTRICT b, double* RESTRICT c, double* RESTRICT d, int id, int all)
+{
+ int i;
+ int start;
+ int end;
+ TimeData timer;
+ start = id*(SIZE/all);
+ end = start+(SIZE/all);
+
+ LIKWID_MARKER_THREADINIT;
+
+ printf ("Thread %d running on processor %d ....\n", id, threadGetProcessorId());
+
+ time_start(&timer);
+ for (int k=0; k<ITER; k++)
+ {
+ LIKWID_MARKER_START("copy");
+ #pragma simd
+ for(i=start;i<end;i++)
+ {
+ c[i] = a[i];
+ }
+ LIKWID_MARKER_STOP("copy");
+ }
+ time_stop(&timer);
+ copy_times[id] = time_print(&timer);
+
+ time_start(&timer);
+ for (int k=0; k<ITER; k++)
+ {
+ LIKWID_MARKER_START("triad");
+ #pragma simd
+ for(i=start;i<end;i++)
+ {
+ a[i] = b[i] + c[i] * d[i];
+ }
+ LIKWID_MARKER_STOP("triad");
+ }
+ time_stop(&timer);
+ triad_times[id] = time_print(&timer);
+ return 0;
+}
+
+int
+main(int argc, char ** argv)
+{
+ cpu_set_t cpuset;
+ sched_getaffinity(getpid(),sizeof(cpu_set_t), &cpuset);
+ std::thread t[CPU_SETSIZE];
+ double *a,*b,*c,*d;
+ double copy_time = 0.0;
+ double triad_time = 0.0;
+ int num_threads = 0;
+ int id = 0;
+
+ for (int i=0;i<CPU_SETSIZE; i++)
+ {
+ if (CPU_ISSET(i, &cpuset))
+ {
+ num_threads++;
+ }
+ copy_times[i] = 0.0;
+ triad_times[i] = 0.0;
+ }
+
+ printf ("Number of Threads requested = %i\n",num_threads);
+
+ allocate_vector(&a, SIZE);
+ allocate_vector(&b, SIZE);
+ allocate_vector(&c, SIZE);
+ allocate_vector(&d, SIZE);
+ LIKWID_MARKER_INIT;
+
+ #pragma ivdep
+ for (int j=0; j<SIZE; ++j) {
+ a[j] = 1.0;
+ b[j] = 2.0;
+ c[j] = 0.0;
+ d[j] = 1.0;
+ }
+
+ for (int i=0;i<CPU_SETSIZE; i++)
+ {
+ if (CPU_ISSET(i, &cpuset))
+ {
+ t[i] = std::thread( calc_thread, a, b, c, d, id, num_threads);
+ id++;
+ if (id >= num_threads)
+ break;
+ }
+ }
+ id = 0;
+ for (int i=0;i<CPU_SETSIZE; i++)
+ {
+ if (CPU_ISSET(i, &cpuset))
+ {
+ t[i].join();
+ copy_time += copy_times[id]/(double)ITER;
+ triad_time += triad_times[id]/(double)ITER;
+ id++;
+ if (id >= num_threads)
+ break;
+ }
+ }
+
+ printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
+ 1E-6*(2*SIZE*sizeof(double)),
+ copy_time,
+ 1E-6*((2*SIZE*sizeof(double))/copy_time));
+ printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
+ 1E-6*(4*SIZE*sizeof(double)),
+ triad_time,
+ 1E-6*((4*SIZE*sizeof(double))/triad_time));
+
+ LIKWID_MARKER_CLOSE;
+
+ return 0;
+}
diff --git a/test/stream_cilk.c b/test/stream_cilk.c
new file mode 100644
index 0000000..fcbe1a7
--- /dev/null
+++ b/test/stream_cilk.c
@@ -0,0 +1,217 @@
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sched.h>
+
+#include <cilk/cilk.h>
+#include <cilk/cilk_api.h>
+
+
+#define ITER 10
+#define SIZE 40000000
+
+#define gettid() syscall(SYS_gettid)
+#include <likwid.h>
+#define HLINE "-------------------------------------------------------------\n"
+
+#ifndef MIN
+#define MIN(x,y) ((x)<(y)?(x):(y))
+#endif
+
+typedef struct {
+ struct timeval before;
+ struct timeval after;
+} TimeData;
+
+
+void time_start(TimeData* time)
+{
+ gettimeofday(&(time->before),NULL);
+}
+
+
+void time_stop(TimeData* time)
+{
+ gettimeofday(&(time->after),NULL);
+}
+
+double time_print(TimeData* time)
+{
+ long int sec;
+ double timeDuration;
+
+ sec = time->after.tv_sec - time->before.tv_sec;
+ timeDuration = ((double)((sec*1000000)+time->after.tv_usec) - (double) time->before.tv_usec);
+
+ return (timeDuration/1000000);
+}
+
+static int nprocessors = 0;
+
+static int
+getProcessorID(cpu_set_t* cpu_set)
+{
+ int processorId;
+
+ for (processorId=0;processorId<nprocessors;processorId++)
+ {
+ if (CPU_ISSET(processorId,cpu_set))
+ {
+ break;
+ }
+ }
+ return processorId;
+}
+
+
+int threadProcessorId()
+{
+ cpu_set_t cpu_set;
+ CPU_ZERO(&cpu_set);
+ sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
+
+ return getProcessorID(&cpu_set);
+}
+
+void allocate_vector(double** ptr, uint64_t size)
+{
+ int errorCode;
+
+ errorCode = posix_memalign((void**) ptr, 64, size*sizeof(double));
+
+ if (errorCode)
+ {
+ if (errorCode == EINVAL)
+ {
+ fprintf(stderr,
+ "Alignment parameter is not a power of two\n");
+ exit(EXIT_FAILURE);
+ }
+ if (errorCode == ENOMEM)
+ {
+ fprintf(stderr,
+ "Insufficient memory to fulfill the request\n");
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+
+
+static int get_nworkers()
+{
+ return __cilkrts_get_nworkers();
+}
+static int get_totalworkers()
+{
+ return __cilkrts_get_total_workers();
+}
+
+static int show_thread()
+{
+ int ID = __cilkrts_get_worker_number();
+ printf("Thread %d TID %lu CPU %d\n", ID, gettid(), sched_getcpu());
+ return 0;
+}
+
+int main(){
+ int i, k;
+ int nworkers, totalworkers;
+ char cpuCount[20];
+ double *a, *b, *c, *d;
+ double sums[2000];
+ cpu_set_t cpuset;
+ TimeData timer;
+ double triad_time, copy_time, total = 0;
+
+ nprocessors = sysconf(_SC_NPROCESSORS_CONF);
+
+ nworkers = cilk_spawn get_nworkers();
+ totalworkers = cilk_spawn get_totalworkers();
+
+ for (i=0;i<nworkers;i++)
+ {
+ sums[i] = 0;
+ }
+
+ LIKWID_MARKER_INIT;
+
+ cilk_spawn allocate_vector(&a, SIZE);
+ cilk_spawn allocate_vector(&b, SIZE);
+ cilk_spawn allocate_vector(&c, SIZE);
+ cilk_spawn allocate_vector(&d, SIZE);
+ cilk_sync;
+
+ for (i=0; i<SIZE; i++) {
+ a[i] = 1.0;
+ b[i] = 2.0;
+ c[i] = 0.0;
+ d[i] = 1.0;
+ }
+
+ time_start(&timer);
+ for (k=0; k<ITER; k++)
+ {
+ for (i=0;i<nworkers;i++)
+ {
+ cilk_spawn LIKWID_MARKER_START("copy");
+ }
+ cilk_sync;
+ cilk_for(i=0;i<SIZE;i++)
+ {
+ c[i] = a[i];
+ }
+ for (i=0;i<nworkers;i++)
+ {
+ cilk_spawn LIKWID_MARKER_STOP("copy");
+ }
+ cilk_sync;
+ }
+ time_stop(&timer);
+ copy_time = time_print(&timer)/(double)ITER;
+
+ time_start(&timer);
+ for (k=0; k<ITER; k++)
+ {
+ for (i=0;i<nworkers;i++)
+ {
+ cilk_spawn LIKWID_MARKER_START("triad");
+ }
+ cilk_sync;
+ cilk_for(i=0;i<SIZE;i++)
+ {
+ a[i] = b[i] + c[i] * d[i];
+ }
+ for (i=0;i<nworkers;i++)
+ {
+ cilk_spawn LIKWID_MARKER_STOP("triad");
+ }
+ cilk_sync;
+ }
+ time_stop(&timer);
+ triad_time = time_print(&timer)/(double)ITER;
+
+ printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
+ 1E-6*(2*SIZE*sizeof(double)),
+ copy_time,
+ 1E-6*((2*SIZE*sizeof(double))/copy_time));
+ printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
+ 1E-6*(4*SIZE*sizeof(double)),
+ triad_time,
+ 1E-6*((4*SIZE*sizeof(double))/triad_time));
+
+ printf("Main PID %d\n",getpid());
+ for (i=0;i<nworkers;i++)
+ {
+ cilk_spawn show_thread();
+ }
+ cilk_sync;
+
+ LIKWID_MARKER_CLOSE;
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/likwid/likwid.git
More information about the Likwid-commit
mailing list