Commit 5a17f58adf391c94cace31eb9f0613e1a952f2bd

Authored by kmazouzi
1 parent 37b3fc7f42
Exists in master

add jacobi for openacc

Showing 3 changed files with 185 additions and 0 deletions Side-by-side Diff

openacc/Makefile View file @ 5a17f58
  1 +
  2 +CC=gcc
  3 +
  4 +CFLAGS=-O3 -std=c99 -Wall
  5 +LDFLAG= -lm
  6 +
  7 +OMPFLAG=-fopenmp -D_OMP
  8 +
  9 +PGCC=pgcc
  10 +PGCFLAGS=-acc -ta=nvidia -Minfo
  11 +
  12 +
  13 +NAME=jacobi
  14 +
  15 +default: seq omp
  16 +
  17 +seq:
  18 + $(CC) $(CFLAGS) $(NAME).c -o $(NAME).seq.exe $(LDFLAG)
  19 +
  20 +omp:
  21 + $(CC) $(CFLAGS) $(OMPFLAG) $(NAME).c -o $(NAME).omp.exe $(LDFLAG)
  22 +
  23 +
  24 +gpu:
  25 + $(PGCC) $(PGCFLAGS) $(NAME).c -o $(NAME).pgi.exe
  26 +
  27 +clean:
  28 + rm -f *.exe
openacc/jacobi.c View file @ 5a17f58
  1 +#include <math.h>
  2 +#include <string.h>
  3 +#include <stdlib.h>
  4 +
  5 +#ifdef _OMP
  6 +#include <omp.h>
  7 +#endif
  8 +
  9 +#include <stdio.h>
  10 +// #include <openacc.h>
  11 +#include "timer.h"
  12 +#define NN 4096
  13 +#define NM 4096
  14 +
  15 +double A[NN][NM];
  16 +double Anew[NN][NM];
  17 +
  18 +int main(int argc, char** argv)
  19 +{
  20 + const int n = NN;
  21 + const int m = NM;
  22 + const int iter_max = 200;
  23 +
  24 + const double tol = 1.0e-6;
  25 + double error = 1.0;
  26 +
  27 + memset(A, 0, n * m * sizeof(double));
  28 + memset(Anew, 0, n * m * sizeof(double));
  29 +
  30 + for (int j = 0; j < n; j++)
  31 + {
  32 + A[j][0] = 1.0;
  33 + Anew[j][0] = 1.0;
  34 + }
  35 +
  36 + printf("Jacobi relaxation Calculation: %d x %d mesh\n", n, m);
  37 +
  38 + StartTimer();
  39 + int iter = 0;
  40 +
  41 +{
  42 + while ( error > tol && iter < iter_max )
  43 + {
  44 + error = 0.0;
  45 +
  46 +#ifdef _OMP
  47 +#pragma omp parallel for reduction (max:error)
  48 +#endif
  49 + for( int j = 1; j < n-1; j++)
  50 + {
  51 + for( int i = 1; i < m-1; i++ )
  52 + {
  53 + Anew[j][i] = 0.25 * ( A[j][i+1] + A[j][i-1]
  54 + + A[j-1][i] + A[j+1][i]);
  55 + error = fmax( error, fabs(Anew[j][i] - A[j][i]));
  56 + }
  57 + }
  58 +
  59 + for( int j = 1; j < n-1; j++)
  60 + {
  61 + for( int i = 1; i < m-1; i++ )
  62 + {
  63 + A[j][i] = Anew[j][i];
  64 + }
  65 + }
  66 +
  67 + if(iter % 10 == 0) printf("%5d, %0.6f\n", iter, error);
  68 +
  69 + iter++;
  70 + }
  71 +}
  72 + double runtime = GetTimer();
  73 +
  74 + printf(" total: %f s\n", runtime / 1000);
  75 +}
openacc/timer.h View file @ 5a17f58
  1 +/*
  2 + * Licensed under the Apache License, Version 2.0 (the "License");
  3 + * you may not use this file except in compliance with the License.
  4 + * You may obtain a copy of the License at
  5 + *
  6 + * http://www.apache.org/licenses/LICENSE-2.0
  7 + *
  8 + * Unless required by applicable law or agreed to in writing, software
  9 + * distributed under the License is distributed on an "AS IS" BASIS,
  10 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 + * See the License for the specific language governing permissions and
  12 + * limitations under the License.
  13 + */
  14 +
  15 +#ifndef TIMER_H
  16 +#define TIMER_H
  17 +
  18 +#include <stdlib.h>
  19 +
  20 +#ifdef WIN32
  21 +#define WIN32_LEAN_AND_MEAN
  22 +#include <windows.h>
  23 +#else
  24 +#include <sys/time.h>
  25 +
  26 +/* RR: Apparently if no BSD or mac, this is missing */
  27 +#ifndef __USE_BSD
  28 +# define timersub(a, b, result) \
  29 + do { \
  30 + (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
  31 + (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
  32 + if ((result)->tv_usec < 0) { \
  33 + --(result)->tv_sec; \
  34 + (result)->tv_usec += 1000000; \
  35 + } \
  36 + } while (0)
  37 +#endif /* BSD */
  38 +
  39 +
  40 +#endif
  41 +
  42 +
  43 +
  44 +#ifdef WIN32
  45 +double PCFreq = 0.0;
  46 +__int64 timerStart = 0;
  47 +#else
  48 +struct timeval timerStart;
  49 +#endif
  50 +
  51 +void StartTimer()
  52 +{
  53 +#ifdef WIN32
  54 + LARGE_INTEGER li;
  55 + if(!QueryPerformanceFrequency(&li))
  56 + printf("QueryPerformanceFrequency failed!\n");
  57 +
  58 + PCFreq = (double)li.QuadPart/1000.0;
  59 +
  60 + QueryPerformanceCounter(&li);
  61 + timerStart = li.QuadPart;
  62 +#else
  63 + gettimeofday(&timerStart, NULL);
  64 +#endif
  65 +}
  66 +
  67 +// time elapsed in ms
  68 +double GetTimer()
  69 +{
  70 +#ifdef WIN32
  71 + LARGE_INTEGER li;
  72 + QueryPerformanceCounter(&li);
  73 + return (double)(li.QuadPart-timerStart)/PCFreq;
  74 +#else
  75 + struct timeval timerStop, timerElapsed;
  76 + gettimeofday(&timerStop, NULL);
  77 + timersub(&timerStop, &timerStart, &timerElapsed);
  78 + return timerElapsed.tv_sec*1000.0+timerElapsed.tv_usec/1000.0;
  79 +#endif
  80 +}
  81 +
  82 +#endif // TIMER_H