[Likwid-commit] [likwid] 04/04: re-add stream test (changed license)

Christoph Martin chrism at debian.org
Tue Jun 21 10:34:20 UTC 2016


This is an automated email from the git hooks/post-receive script.

chrism pushed a commit to branch upstream
in repository likwid.

commit c568e344449256a2060afcff9419d291b907d3d1
Author: Christoph Martin <martin at uni-mainz.de>
Date:   Tue Jun 21 12:29:48 2016 +0200

    re-add stream test (changed license)
---
 test/stream-API.c  | 437 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 test/stream.c      | 199 ++++++++++++++++++++++++
 test/stream.cc     | 227 ++++++++++++++++++++++++++++
 test/stream_cilk.c | 217 ++++++++++++++++++++++++++
 4 files changed, 1080 insertions(+)

diff --git a/test/stream-API.c b/test/stream-API.c
new file mode 100644
index 0000000..0fe292e
--- /dev/null
+++ b/test/stream-API.c
@@ -0,0 +1,437 @@
+/*
+ * =======================================================================================
+ *
+ *      Filename:  streamAPI.c
+ *
+ *      Description:  Copy of the STREAM benchmark (only copy and triad) with hardware
+ *                    performance measurement instrumentation using LIKWID
+ *
+ *      Version:   <VERSION>
+ *      Released:  <DATE>
+ *
+ *      Author:   Thomas Roehl (tr), thomas.roehl at googlemail.com
+ *      Project:  likwid
+ *
+ *      Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ *
+ *      This program is free software: you can redistribute it and/or modify it under
+ *      the terms of the GNU General Public License as published by the Free Software
+ *      Foundation, either version 3 of the License, or (at your option) any later
+ *      version.
+ *
+ *      This program is distributed in the hope that it will be useful, but WITHOUT ANY
+ *      WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ *      PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License along with
+ *      this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * =======================================================================================
+ */
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#ifdef _OPENMP
+#include <omp.h>
+# endif
+#include <stdint.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sched.h>
+#include <pthread.h>
+
+#define ITER 100
+#define SIZE 40000000
+#define DATATYPE float
+
+#define gettid() syscall(SYS_gettid)
+#include <likwid.h>
+#define HLINE "-------------------------------------------------------------\n"
+
+#ifndef MIN
+#define MIN(x,y) ((x)<(y)?(x):(y))
+#endif
+
+typedef struct {
+    struct timeval before;
+    struct timeval after;
+} TimeData;
+
+
+void time_start(TimeData* time)
+{
+    gettimeofday(&(time->before),NULL);
+}
+
+
+void time_stop(TimeData* time)
+{
+    gettimeofday(&(time->after),NULL);
+}
+
+double time_print(TimeData* time)
+{
+    long int sec;
+    double timeDuration;
+
+    sec = time->after.tv_sec - time->before.tv_sec;
+    timeDuration = ((double)((sec*1000000)+time->after.tv_usec) - (double) time->before.tv_usec);
+
+    return (timeDuration/1000000);
+}
+
+static int
+getProcessorID(cpu_set_t* cpu_set)
+{
+    int processorId;
+
+    for (processorId=0;processorId<128;processorId++)
+    {
+    if (CPU_ISSET(processorId,cpu_set))
+    {
+        break;
+    }
+    }
+    return processorId;
+}
+
+int  threadGetProcessorId()
+{
+    cpu_set_t  cpu_set;
+    CPU_ZERO(&cpu_set);
+    sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
+
+    return getProcessorID(&cpu_set);
+}
+
+void allocate_vector(DATATYPE** ptr, uint64_t size)
+{
+    int errorCode;
+
+    errorCode = posix_memalign((void**) ptr, 64, size*sizeof(DATATYPE));
+
+    if (errorCode)
+    {
+    if (errorCode == EINVAL)
+    {
+        fprintf(stderr,
+            "Alignment parameter is not a power of two\n");
+        exit(EXIT_FAILURE);
+    }
+    if (errorCode == ENOMEM)
+    {
+        fprintf(stderr,
+            "Insufficient memory to fulfill the request\n");
+        exit(EXIT_FAILURE);
+    }
+    }
+}
+
+
+int main(int argn, char** argc)
+{
+    int err, i ,j;
+    int numCPUs = 0;
+    int gid;
+    DATATYPE *a,*b,*c,*d;
+    TimeData timer;
+    double triad_time, copy_time, scale_time, stream_time;
+    char estr[1024];
+    double result, scalar = 3.0;
+    char* ptr;
+
+    if (argn != 3)
+    {
+        printf("Usage: %s <cpustr> <events>\n", argc[0]);
+        return 1;
+    }
+
+    strcpy(estr, argc[2]);
+
+    allocate_vector(&a, SIZE);
+    allocate_vector(&b, SIZE);
+    allocate_vector(&c, SIZE);
+    allocate_vector(&d, SIZE);
+
+    err = topology_init();
+    if (err < 0)
+    {
+        printf("Failed to initialize LIKWID's topology module\n");
+        return 1;
+    }
+    CpuTopology_t topo = get_cpuTopology();
+    affinity_init();
+    int* cpus = (int*)malloc(topo->numHWThreads * sizeof(int));
+    if (!cpus)
+        return 1;
+    numCPUs = cpustr_to_cpulist(argc[1], cpus, topo->numHWThreads);
+    omp_set_num_threads(numCPUs);
+    err = perfmon_init(numCPUs, cpus);
+    if (err < 0)
+    {
+        printf("Failed to initialize LIKWID's performance monitoring module\n");
+        affinity_finalize();
+        topology_finalize();
+        return 1;
+    }
+    gid = perfmon_addEventSet(estr);
+    if (gid < 0)
+    {
+        printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr);
+        perfmon_finalize();
+        affinity_finalize();
+        topology_finalize();
+        return 1;
+    }
+
+    err = perfmon_setupCounters(gid);
+    if (err < 0)
+    {
+        printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid);
+        perfmon_finalize();
+        affinity_finalize();
+        topology_finalize();
+        return 1;
+    }
+
+#ifdef _OPENMP
+    printf(HLINE);
+#pragma omp parallel
+    {
+#pragma omp master
+    {
+        printf ("Number of Threads requested = %i\n",omp_get_num_threads());
+    }
+    likwid_pinThread(cpus[omp_get_thread_num()]);
+    printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu());
+    }
+#endif
+
+#pragma omp parallel for
+    for (int j=0; j<SIZE; j++) {
+        a[j] = 1.0;
+        b[j] = 2.0;
+        c[j] = 0.0;
+        d[j] = 1.0;
+    }
+
+    err = perfmon_startCounters();
+    if (err < 0)
+    {
+        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
+        perfmon_finalize();
+        topology_finalize();
+        return 1;
+    }
+    time_start(&timer);
+#pragma omp parallel
+    {
+        for (int k=0; k<ITER; k++)
+        {
+            LIKWID_MARKER_START("copy");
+#pragma omp for
+            for (int j=0; j<SIZE; j++)
+            {
+                c[j] = a[j];
+            }
+            LIKWID_MARKER_STOP("copy");
+        }
+    }
+    time_stop(&timer);
+    err = perfmon_stopCounters();
+    copy_time = time_print(&timer)/(double)ITER;
+    if (err < 0)
+    {
+        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
+        perfmon_finalize();
+        topology_finalize();
+        return 1;
+    }
+
+    printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
+                        1E-6*(2*SIZE*sizeof(DATATYPE)),
+                        copy_time,
+                        1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));
+
+    ptr = strtok(estr,",");
+    j = 0;
+    while (ptr != NULL)
+    {
+        for (i = 0;i < numCPUs; i++)
+        {
+            result = perfmon_getResult(gid, j, cpus[i]);
+            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
+        }
+        ptr = strtok(NULL,",");
+        j++;
+    }
+    strcpy(estr, argc[2]);
+    perfmon_setupCounters(gid);
+
+    err = perfmon_startCounters();
+    if (err < 0)
+    {
+        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
+        perfmon_finalize();
+        topology_finalize();
+        return 1;
+    }
+    time_start(&timer);
+#pragma omp parallel
+    {
+        for (int k=0; k<ITER; k++)
+        {
+            LIKWID_MARKER_START("scale");
+#pragma omp for
+            for (int j=0; j<SIZE; j++)
+            {
+                b[j] = scalar*c[j];
+            }
+            LIKWID_MARKER_STOP("scale");
+        }
+    }
+    time_stop(&timer);
+    err = perfmon_stopCounters();
+    scale_time = time_print(&timer)/(double)ITER;
+    if (err < 0)
+    {
+        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
+        perfmon_finalize();
+        topology_finalize();
+        return 1;
+    }
+
+    printf("Processed %.1f Mbyte at scale benchmark in %.4f seconds: %.2f MByte/s\n",
+                        1E-6*(2*SIZE*sizeof(DATATYPE)),
+                        copy_time,
+                        1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));
+
+    ptr = strtok(estr,",");
+    j = 0;
+    while (ptr != NULL)
+    {
+        for (i = 0;i < numCPUs; i++)
+        {
+            result = perfmon_getResult(gid, j, cpus[i]);
+            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
+        }
+        ptr = strtok(NULL,",");
+        j++;
+    }
+    strcpy(estr, argc[2]);
+    perfmon_setupCounters(gid);
+    err = perfmon_startCounters();
+    if (err < 0)
+    {
+        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
+        perfmon_finalize();
+        topology_finalize();
+        return 1;
+    }
+    time_start(&timer);
+#pragma omp parallel
+    {
+        for (int k=0; k<ITER; k++)
+        {
+            LIKWID_MARKER_START("stream");
+#pragma omp for
+            for (int j=0; j<SIZE; j++)
+            {
+                c[j] = a[j] + b[j];
+            }
+            LIKWID_MARKER_STOP("stream");
+        }
+    }
+    time_stop(&timer);
+    err = perfmon_stopCounters();
+    stream_time = time_print(&timer)/(double)ITER;
+    if (err < 0)
+    {
+        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
+        perfmon_finalize();
+        topology_finalize();
+        return 1;
+    }
+
+    printf("Processed %.1f Mbyte at stream benchmark in %.4f seconds: %.2f MByte/s\n",
+                        1E-6*(2*SIZE*sizeof(DATATYPE)),
+                        copy_time,
+                        1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));
+
+    ptr = strtok(estr,",");
+    j = 0;
+    while (ptr != NULL)
+    {
+        for (i = 0;i < numCPUs; i++)
+        {
+            result = perfmon_getResult(gid, j, cpus[i]);
+            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
+        }
+        ptr = strtok(NULL,",");
+        j++;
+    }
+    strcpy(estr, argc[2]);
+    perfmon_setupCounters(gid);
+    err = perfmon_startCounters();
+    if (err < 0)
+    {
+        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
+        perfmon_finalize();
+        topology_finalize();
+        return 1;
+    }
+    time_start(&timer);
+#pragma omp parallel
+    {
+        for (int k=0; k<ITER; k++)
+        {
+
+            LIKWID_MARKER_START("triad");
+#pragma omp for
+            for (int j=0; j<SIZE; j++)
+            {
+                a[j] = b[j] +  c[j] * scalar;
+            }
+            LIKWID_MARKER_STOP("triad");
+        }
+    }
+    time_stop(&timer);
+    err = perfmon_stopCounters();
+    triad_time = time_print(&timer)/(double)ITER;
+    if (err < 0)
+    {
+        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
+        perfmon_finalize();
+        topology_finalize();
+        return 1;
+    }
+
+
+
+    printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
+                        1E-6*(4*SIZE*sizeof(DATATYPE)),
+                        triad_time,
+                        1E-6*((4*SIZE*sizeof(DATATYPE))/triad_time));
+    ptr = strtok(estr,",");
+    j = 0;
+    while (ptr != NULL)
+    {
+        for (i = 0;i < numCPUs; i++)
+        {
+            result = perfmon_getResult(gid, j, cpus[i]);
+            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
+        }
+        ptr = strtok(NULL,",");
+        j++;
+    }
+
+    perfmon_finalize();
+    affinity_finalize();
+    topology_finalize();
+    return 0;
+}
+
diff --git a/test/stream.c b/test/stream.c
new file mode 100644
index 0000000..15f2ca3
--- /dev/null
+++ b/test/stream.c
@@ -0,0 +1,199 @@
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#ifdef _OPENMP
+#include <omp.h>
+# endif
+#include <stdint.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sched.h>
+#include <pthread.h>
+
+#define ITER 10
+#define SIZE 40000000
+
+#define gettid() syscall(SYS_gettid)
+#include <likwid.h>
+#define HLINE "-------------------------------------------------------------\n"
+
+#ifndef MIN
+#define MIN(x,y) ((x)<(y)?(x):(y))
+#endif
+
+typedef struct {
+    struct timeval before;
+    struct timeval after;
+} TimeData;
+
+
+void time_start(TimeData* time)
+{
+    gettimeofday(&(time->before),NULL);
+}
+
+
+void time_stop(TimeData* time)
+{
+    gettimeofday(&(time->after),NULL);
+}
+
+double time_print(TimeData* time)
+{
+    long int sec;
+    double timeDuration;
+
+    sec = time->after.tv_sec - time->before.tv_sec;
+    timeDuration = ((double)((sec*1000000)+time->after.tv_usec) - (double) time->before.tv_usec);
+
+    return (timeDuration/1000000);
+}
+
+static int
+getProcessorID(cpu_set_t* cpu_set)
+{
+    int processorId;
+
+    for (processorId=0;processorId<128;processorId++)
+    {
+	if (CPU_ISSET(processorId,cpu_set))
+	{
+	    break;
+	}
+    }
+    return processorId;
+}
+
+int  threadGetProcessorId()
+{
+    cpu_set_t  cpu_set;
+    CPU_ZERO(&cpu_set);
+    sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
+
+    return getProcessorID(&cpu_set);
+}
+
+void allocate_vector(double** ptr, uint64_t size)
+{
+    int errorCode;
+
+    errorCode = posix_memalign((void**) ptr, 64, size*sizeof(double));
+
+    if (errorCode)
+    {
+	if (errorCode == EINVAL)
+	{
+	    fprintf(stderr,
+		    "Alignment parameter is not a power of two\n");
+	    exit(EXIT_FAILURE);
+	}
+	if (errorCode == ENOMEM)
+	{
+	    fprintf(stderr,
+		    "Insufficient memory to fulfill the request\n");
+	    exit(EXIT_FAILURE);
+	}
+    }
+}
+
+
+int main(int argn, char** argc)
+{
+    double *a,*b,*c,*d;
+    TimeData timer;
+    double triad_time, copy_time;
+
+    allocate_vector(&a, SIZE);
+    allocate_vector(&b, SIZE);
+    allocate_vector(&c, SIZE);
+    allocate_vector(&d, SIZE);
+
+#ifdef LIKWID_PERFMON
+    printf("Using likwid\n");
+#endif
+
+    LIKWID_MARKER_INIT;
+
+#ifdef _OPENMP
+    printf(HLINE);
+#pragma omp parallel
+    {
+#pragma omp master
+	{
+	    printf ("Number of Threads requested = %i\n",omp_get_num_threads());
+	}
+	printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu());
+    }
+#endif
+
+#pragma omp parallel for
+    for (int j=0; j<SIZE; j++) {
+	a[j] = 1.0;
+	b[j] = 2.0;
+	c[j] = 0.0;
+	d[j] = 1.0;
+    }
+
+    time_start(&timer);
+#pragma omp parallel
+    {
+        for (int k=0; k<ITER; k++)
+        {
+            LIKWID_MARKER_START("copy");
+#pragma simd
+#pragma omp for
+            for (int j=0; j<SIZE; j++)
+            {
+            
+                c[j] = a[j];
+            }
+            LIKWID_MARKER_STOP("copy");
+        }
+    }
+    time_stop(&timer);
+    copy_time = time_print(&timer)/(double)ITER;
+
+    time_start(&timer);
+#pragma omp parallel
+    {
+	LIKWID_MARKER_START("triad_total");
+        for (int k=0; k<ITER; k++)
+        {
+
+            LIKWID_MARKER_START("triad");
+#pragma simd
+#pragma omp for
+            for (int j=0; j<SIZE; j++)
+            {
+
+                a[j] = b[j] +  c[j] * d[j];
+            }
+            LIKWID_MARKER_STOP("triad");
+        }
+	LIKWID_MARKER_STOP("triad_total");
+    }
+    time_stop(&timer);
+    triad_time = time_print(&timer)/(double)ITER;
+
+
+    printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
+                        1E-6*(2*SIZE*sizeof(double)),
+                        copy_time,
+                        1E-6*((2*SIZE*sizeof(double))/copy_time));
+    printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
+                        1E-6*(4*SIZE*sizeof(double)),
+                        triad_time,
+                        1E-6*((4*SIZE*sizeof(double))/triad_time));
+
+
+    LIKWID_MARKER_CLOSE;
+    free(a);
+    free(b);
+    free(c);
+    free(d);
+    return 0;
+}
+
diff --git a/test/stream.cc b/test/stream.cc
new file mode 100644
index 0000000..489bb09
--- /dev/null
+++ b/test/stream.cc
@@ -0,0 +1,227 @@
+#include <sys/types.h>
+#include <unistd.h>
+#include <iostream>
+#include <string>
+#include <atomic>
+#include <thread>
+#include <likwid.h>
+#include <sched.h>
+#include <syscall.h>
+#include <sys/time.h>
+
+#define gettid() syscall(SYS_gettid)
+#define ITER 10
+#define SIZE 40000000
+#ifdef __GNUG__
+#define RESTRICT __restrict__
+#else
+#define RESTRICT restrict
+#endif
+using namespace std;
+
+typedef struct {
+    struct timeval before;
+    struct timeval after;
+} TimeData;
+
+
+void time_start(TimeData* time)
+{
+    gettimeofday(&(time->before),NULL);
+}
+
+
+void time_stop(TimeData* time)
+{
+    gettimeofday(&(time->after),NULL);
+}
+
+double time_print(TimeData* time)
+{
+    long int sec;
+    double timeDuration;
+
+    sec = time->after.tv_sec - time->before.tv_sec;
+    timeDuration = ((double)((sec*1000000)+time->after.tv_usec) - (double) time->before.tv_usec);
+
+    return (timeDuration/1000000);
+}
+
+static int
+getProcessorID(cpu_set_t* cpu_set)
+{
+    int processorId;
+
+    for (processorId=0;processorId<128;processorId++)
+    {
+        if (CPU_ISSET(processorId,cpu_set))
+        {
+            break;
+        }
+    }
+    return processorId;
+}
+
+int  threadGetProcessorId()
+{
+    cpu_set_t  cpu_set;
+    CPU_ZERO(&cpu_set);
+    sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
+    if (CPU_COUNT(&cpu_set) > 1)
+    {
+        return sched_getcpu();
+    }
+    else
+    {
+        return getProcessorID(&cpu_set);
+    }
+    return -1;
+}
+
+
+double copy_times[CPU_SETSIZE];
+double triad_times[CPU_SETSIZE];
+
+void allocate_vector(double** ptr, uint64_t size)
+{
+    int errorCode;
+
+    errorCode = posix_memalign((void**) ptr, 64, size*sizeof(double));
+
+    if (errorCode)
+    {
+        if (errorCode == EINVAL)
+        {
+            fprintf(stderr,
+                "Alignment parameter is not a power of two\n");
+            exit(EXIT_FAILURE);
+        }
+        if (errorCode == ENOMEM)
+        {
+            fprintf(stderr,
+                "Insufficient memory to fulfill the request\n");
+            exit(EXIT_FAILURE);
+        }
+    }
+}
+
+
+
+int calc_thread(double* RESTRICT a, double* RESTRICT b, double* RESTRICT c, double* RESTRICT d, int id, int all)
+{
+    int i;
+    int start;
+    int end;
+    TimeData timer;
+    start = id*(SIZE/all);
+    end = start+(SIZE/all);
+
+    LIKWID_MARKER_THREADINIT;
+
+    printf ("Thread %d running on processor %d ....\n", id, threadGetProcessorId());
+
+    time_start(&timer);
+    for (int k=0; k<ITER; k++)
+    {
+        LIKWID_MARKER_START("copy");
+        #pragma simd
+        for(i=start;i<end;i++)
+        {
+            c[i] = a[i];
+        }
+        LIKWID_MARKER_STOP("copy");
+    }
+    time_stop(&timer);
+    copy_times[id] = time_print(&timer);
+
+    time_start(&timer);
+    for (int k=0; k<ITER; k++)
+    {
+        LIKWID_MARKER_START("triad");
+        #pragma simd
+        for(i=start;i<end;i++)
+        {
+            a[i] = b[i] +  c[i] * d[i];
+        }
+        LIKWID_MARKER_STOP("triad");
+    }
+    time_stop(&timer);
+    triad_times[id] = time_print(&timer);
+    return 0;
+}
+
+int
+main(int argc, char ** argv)
+{
+    cpu_set_t cpuset;
+    sched_getaffinity(getpid(),sizeof(cpu_set_t), &cpuset);
+    std::thread t[CPU_SETSIZE];
+    double *a,*b,*c,*d;
+    double copy_time = 0.0;
+    double triad_time = 0.0;
+    int num_threads = 0;
+    int id = 0;
+
+    for (int i=0;i<CPU_SETSIZE; i++)
+    {
+        if (CPU_ISSET(i, &cpuset))
+        {
+            num_threads++;
+        }
+        copy_times[i] = 0.0;
+        triad_times[i] = 0.0;
+    }
+
+    printf ("Number of Threads requested = %i\n",num_threads);
+
+    allocate_vector(&a, SIZE);
+    allocate_vector(&b, SIZE);
+    allocate_vector(&c, SIZE);
+    allocate_vector(&d, SIZE);
+    LIKWID_MARKER_INIT;
+
+    #pragma ivdep
+    for (int j=0; j<SIZE; ++j) {
+        a[j] = 1.0;
+        b[j] = 2.0;
+        c[j] = 0.0;
+        d[j] = 1.0;
+    }
+
+    for (int i=0;i<CPU_SETSIZE; i++)
+    {
+        if (CPU_ISSET(i, &cpuset))
+        {
+            t[i] = std::thread( calc_thread, a, b, c, d, id, num_threads);
+            id++;
+            if (id >= num_threads)
+                break;
+        }
+    }
+    id = 0;
+    for (int i=0;i<CPU_SETSIZE; i++)
+    {
+        if (CPU_ISSET(i, &cpuset))
+        {
+            t[i].join();
+            copy_time += copy_times[id]/(double)ITER;
+            triad_time += triad_times[id]/(double)ITER;
+            id++;
+            if (id >= num_threads)
+                break;
+        }
+    }
+
+    printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
+                        1E-6*(2*SIZE*sizeof(double)),
+                        copy_time,
+                        1E-6*((2*SIZE*sizeof(double))/copy_time));
+    printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
+                        1E-6*(4*SIZE*sizeof(double)),
+                        triad_time,
+                        1E-6*((4*SIZE*sizeof(double))/triad_time));
+
+    LIKWID_MARKER_CLOSE;
+
+    return 0;
+}
diff --git a/test/stream_cilk.c b/test/stream_cilk.c
new file mode 100644
index 0000000..fcbe1a7
--- /dev/null
+++ b/test/stream_cilk.c
@@ -0,0 +1,217 @@
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sched.h>
+
+#include <cilk/cilk.h>
+#include <cilk/cilk_api.h>
+
+
+#define ITER 10
+#define SIZE 40000000
+
+#define gettid() syscall(SYS_gettid)
+#include <likwid.h>
+#define HLINE "-------------------------------------------------------------\n"
+
+#ifndef MIN
+#define MIN(x,y) ((x)<(y)?(x):(y))
+#endif
+
+typedef struct {
+    struct timeval before;
+    struct timeval after;
+} TimeData;
+
+
+void time_start(TimeData* time)
+{
+    gettimeofday(&(time->before),NULL);
+}
+
+
+void time_stop(TimeData* time)
+{
+    gettimeofday(&(time->after),NULL);
+}
+
+double time_print(TimeData* time)
+{
+    long int sec;
+    double timeDuration;
+
+    sec = time->after.tv_sec - time->before.tv_sec;
+    timeDuration = ((double)((sec*1000000)+time->after.tv_usec) - (double) time->before.tv_usec);
+
+    return (timeDuration/1000000);
+}
+
+static int nprocessors = 0;
+
+static int
+getProcessorID(cpu_set_t* cpu_set)
+{
+    int processorId;
+
+    for (processorId=0;processorId<nprocessors;processorId++)
+    {
+	if (CPU_ISSET(processorId,cpu_set))
+	{
+	    break;
+	}
+    }
+    return processorId;
+}
+
+
+int  threadProcessorId()
+{
+    cpu_set_t  cpu_set;
+    CPU_ZERO(&cpu_set);
+    sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
+
+    return getProcessorID(&cpu_set);
+}
+
+void allocate_vector(double** ptr, uint64_t size)
+{
+    int errorCode;
+
+    errorCode = posix_memalign((void**) ptr, 64, size*sizeof(double));
+
+    if (errorCode)
+    {
+	if (errorCode == EINVAL)
+	{
+	    fprintf(stderr,
+		    "Alignment parameter is not a power of two\n");
+	    exit(EXIT_FAILURE);
+	}
+	if (errorCode == ENOMEM)
+	{
+	    fprintf(stderr,
+		    "Insufficient memory to fulfill the request\n");
+	    exit(EXIT_FAILURE);
+	}
+    }
+}
+
+
+
+static int get_nworkers()
+{
+    return __cilkrts_get_nworkers();
+}
+static int get_totalworkers()
+{
+    return __cilkrts_get_total_workers();
+}
+
+static int show_thread()
+{
+    int ID = __cilkrts_get_worker_number();
+    printf("Thread %d TID %lu CPU %d\n", ID, gettid(), sched_getcpu());
+    return 0;
+}
+
+int main(){
+    int i, k;
+    int nworkers, totalworkers;
+    char cpuCount[20];
+    double *a, *b, *c, *d;
+    double sums[2000];
+    cpu_set_t cpuset;
+    TimeData timer;
+    double triad_time, copy_time, total = 0;
+
+    nprocessors = sysconf(_SC_NPROCESSORS_CONF);
+
+    nworkers = cilk_spawn get_nworkers();
+    totalworkers = cilk_spawn get_totalworkers();
+
+    for (i=0;i<nworkers;i++)
+    {
+        sums[i] = 0;
+    }
+
+    LIKWID_MARKER_INIT;
+
+    cilk_spawn allocate_vector(&a, SIZE);
+    cilk_spawn allocate_vector(&b, SIZE);
+    cilk_spawn allocate_vector(&c, SIZE);
+    cilk_spawn allocate_vector(&d, SIZE);
+    cilk_sync;
+
+    for (i=0; i<SIZE; i++) {
+        a[i] = 1.0;
+        b[i] = 2.0;
+        c[i] = 0.0;
+        d[i] = 1.0;
+    }
+
+    time_start(&timer);
+    for (k=0; k<ITER; k++)
+    {
+        for (i=0;i<nworkers;i++)
+        {
+            cilk_spawn LIKWID_MARKER_START("copy");
+        }
+        cilk_sync;
+        cilk_for(i=0;i<SIZE;i++)
+        {
+            c[i] = a[i];
+        }
+        for (i=0;i<nworkers;i++)
+        {
+            cilk_spawn LIKWID_MARKER_STOP("copy");
+        }
+        cilk_sync;
+    }
+    time_stop(&timer);
+    copy_time = time_print(&timer)/(double)ITER;
+
+    time_start(&timer);
+    for (k=0; k<ITER; k++)
+    {
+        for (i=0;i<nworkers;i++)
+        {
+            cilk_spawn LIKWID_MARKER_START("triad");
+        }
+        cilk_sync;
+        cilk_for(i=0;i<SIZE;i++)
+        {
+            a[i] = b[i] +  c[i] * d[i];
+        }
+        for (i=0;i<nworkers;i++)
+        {
+            cilk_spawn LIKWID_MARKER_STOP("triad");
+        }
+        cilk_sync;
+    }
+    time_stop(&timer);
+    triad_time = time_print(&timer)/(double)ITER;
+    
+    printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
+                        1E-6*(2*SIZE*sizeof(double)),
+                        copy_time,
+                        1E-6*((2*SIZE*sizeof(double))/copy_time));
+    printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
+                        1E-6*(4*SIZE*sizeof(double)),
+                        triad_time,
+                        1E-6*((4*SIZE*sizeof(double))/triad_time));
+
+    printf("Main PID %d\n",getpid());
+    for (i=0;i<nworkers;i++)
+    {
+        cilk_spawn show_thread();
+    }
+    cilk_sync;
+
+    LIKWID_MARKER_CLOSE;
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/likwid/likwid.git



More information about the Likwid-commit mailing list