From 5a17f58adf391c94cace31eb9f0613e1a952f2bd Mon Sep 17 00:00:00 2001 From: kmazouzi Date: Wed, 23 Mar 2016 00:10:27 +0100 Subject: [PATCH] add jacobi for openacc --- openacc/Makefile | 28 +++++++++++++++++++ openacc/jacobi.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++ openacc/timer.h | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 185 insertions(+) create mode 100644 openacc/Makefile create mode 100644 openacc/jacobi.c create mode 100644 openacc/timer.h diff --git a/openacc/Makefile b/openacc/Makefile new file mode 100644 index 0000000..1fdee41 --- /dev/null +++ b/openacc/Makefile @@ -0,0 +1,28 @@ + +CC=gcc + +CFLAGS=-O3 -std=c99 -Wall +LDFLAG= -lm + +OMPFLAG=-fopenmp -D_OMP + +PGCC=pgcc +PGCFLAGS=-acc -ta=nvidia -Minfo + + +NAME=jacobi + +default: seq omp + +seq: + $(CC) $(CFLAGS) $(NAME).c -o $(NAME).seq.exe $(LDFLAG) + +omp: + $(CC) $(CFLAGS) $(OMPFLAG) $(NAME).c -o $(NAME).omp.exe $(LDFLAG) + + +gpu: + $(PGCC) $(PGCFLAGS) $(NAME).c -o $(NAME).pgi.exe + +clean: + rm -f *.exe diff --git a/openacc/jacobi.c b/openacc/jacobi.c new file mode 100644 index 0000000..5e21f42 --- /dev/null +++ b/openacc/jacobi.c @@ -0,0 +1,75 @@ +#include +#include +#include + +#ifdef _OMP +#include +#endif + +#include +// #include +#include "timer.h" +#define NN 4096 +#define NM 4096 + +double A[NN][NM]; +double Anew[NN][NM]; + +int main(int argc, char** argv) +{ + const int n = NN; + const int m = NM; + const int iter_max = 200; + + const double tol = 1.0e-6; + double error = 1.0; + + memset(A, 0, n * m * sizeof(double)); + memset(Anew, 0, n * m * sizeof(double)); + + for (int j = 0; j < n; j++) + { + A[j][0] = 1.0; + Anew[j][0] = 1.0; + } + + printf("Jacobi relaxation Calculation: %d x %d mesh\n", n, m); + + StartTimer(); + int iter = 0; + +{ + while ( error > tol && iter < iter_max ) + { + error = 0.0; + +#ifdef _OMP +#pragma omp parallel for reduction (max:error) +#endif + for( int j = 1; j < n-1; j++) + { + for( int i = 1; i < m-1; i++ ) + { + Anew[j][i] = 0.25 * ( A[j][i+1] + A[j][i-1] + + A[j-1][i] + A[j+1][i]); + error = fmax( error, fabs(Anew[j][i] - A[j][i])); + } + } + + for( int j = 1; j < n-1; j++) + { + for( int i = 1; i < m-1; i++ ) + { + A[j][i] = Anew[j][i]; + } + } + + if(iter % 10 == 0) printf("%5d, %0.6f\n", iter, error); + + iter++; + } +} + double runtime = GetTimer(); + + printf(" total: %f s\n", runtime / 1000); +} diff --git a/openacc/timer.h b/openacc/timer.h new file mode 100644 index 0000000..18738ef --- /dev/null +++ b/openacc/timer.h @@ -0,0 +1,82 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TIMER_H +#define TIMER_H + +#include + +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#else +#include + +/* RR: Apparently if no BSD or mac, this is missing */ +#ifndef __USE_BSD +# define timersub(a, b, result) \ + do { \ + (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ + (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ + if ((result)->tv_usec < 0) { \ + --(result)->tv_sec; \ + (result)->tv_usec += 1000000; \ + } \ + } while (0) +#endif /* BSD */ + + +#endif + + + +#ifdef WIN32 +double PCFreq = 0.0; +__int64 timerStart = 0; +#else +struct timeval timerStart; +#endif + +void StartTimer() +{ +#ifdef WIN32 + LARGE_INTEGER li; + if(!QueryPerformanceFrequency(&li)) + printf("QueryPerformanceFrequency failed!\n"); + + PCFreq = (double)li.QuadPart/1000.0; + + QueryPerformanceCounter(&li); + timerStart = li.QuadPart; +#else + gettimeofday(&timerStart, NULL); +#endif +} + +// time elapsed in ms +double GetTimer() +{ +#ifdef WIN32 + LARGE_INTEGER li; + QueryPerformanceCounter(&li); + return (double)(li.QuadPart-timerStart)/PCFreq; +#else + struct timeval timerStop, timerElapsed; + gettimeofday(&timerStop, NULL); + timersub(&timerStop, &timerStart, &timerElapsed); + return timerElapsed.tv_sec*1000.0+timerElapsed.tv_usec/1000.0; +#endif +} + +#endif // TIMER_H -- 1.7.10.4