kmazouzi / FormationMeso

Commit a67a1dd2b58c27862714a21269b2f77b69304b74

Authored by kmazouzi 2016-03-23 10:39:44 +0100

Exists in master

rewrite error with fmax

Showing 4 changed files with 108 additions and 128 deletions Inline Diff

lab3/Makefile
lab3/mpi_heat2D.c
lab3/omp_heat2D.c
lab3/ser_heat2D.c

GCC = gcc	1	1	GCC = gcc
CFLAGS = -O3 -fopenmp	2	2	CFLAGS = -O3 -fopenmp
		3	LDFLAG = -lm
OMP_FLAG = -fopenmp	3	4	OMP_FLAG = -fopenmp
RM = rm -rf	4	5	RM = rm -rf
MPI = mpicc	5	6	MPI = mpicc
MPI_FLAG = -O1 -g	6	7	MPI_FLAG = -O1 -g
EXE = omp_heat2D ser_heat2D mpi_heat2D	7	8	EXE = omp_heat2D ser_heat2D mpi_heat2D
	8	9
all : $(EXE)	9	10	all : $(EXE)
	10	11
#.PHONY: all clean purge	11	12	#.PHONY: all clean purge
	12	13
	13	14
ser_heat2D: ser_heat2D.o	14	15	ser_heat2D: ser_heat2D.o
$(GCC) $(CFLAGS) -o $@ $^	15	16	$(GCC) $(CFLAGS) -o $@ $^ $(LDFLAG)
	16	17
omp_heat2D: omp_heat2D.o	17	18	omp_heat2D: omp_heat2D.o
$(GCC) $(CFLAGS) -o $@ $^	18	19	$(GCC) $(CFLAGS) -o $@ $^ $(LDFLAG)
	19	20
mpi_heat2D:	20	21	mpi_heat2D:
$(MPI) $(MPI_FLAG) mpi_heat2D.c -o $@	21	22	$(MPI) $(MPI_FLAG) mpi_heat2D.c -o $@ $(LDFLAG)
	22	23
	23	24
	24	25
%.o :%.c	25	26	%.o :%.c

/****************************************************************************	1	1	/****************************************************************************
* DESCRIPTION:	2	2	* DESCRIPTION:
* Serial HEAT2D Example - C Version	3	3	* MPI HEAT2D Example - C Version
* This example is based on a simplified	4	4	* This example is based on a simplified
* two-dimensional heat equation domain decomposition. The initial	5	5	* two-dimensional heat equation domain decomposition. The initial
* temperature is computed to be high in the middle of the domain and	6	6	* temperature is computed to be high in the middle of the domain and
* zero at the boundaries. The boundaries are held at zero throughout	7	7	* zero at the boundaries. The boundaries are held at zero throughout
* the simulation. During the time-stepping, an array containing two	8	8	* the simulation. During the time-stepping, an array containing two
* domains is used; these domains alternate between old data and new data.	9	9	* domains is used; these domains alternate between old data and new data.
*	10	10	*
* The physical region, and the boundary conditions, are suggested	11	11	* The physical region, and the boundary conditions, are suggested
by this diagram;	12	12	by this diagram;
	13	13
u = 0	14	14	u = 0
+------------------+	15	15	+------------------+
\| \|	16	16	\| \|
u = 100 \| \| u = 100	17	17	u = 100 \| \| u = 100
\| \|	18	18	\| \|
\| \|	19	19	\| \|
\| \|	20	20	\| \|
\| \|	21	21	\| \|
+------------------+	22	22	+------------------+
u = 100	23	23	u = 100
	24	24
Interrior point :	25	25	Interrior point :
u[Central] = (1/4) * ( u[North] + u[South] + u[East] + u[West] )	26	26	u[Central] = (1/4) * ( u[North] + u[South] + u[East] + u[West] )
	27	27
	28	28
PARALLEL MPI VERSION :	29	29	PARALLEL MPI VERSION :
	30	30
+-------------------+	31	31	+-------------------+
\| \| P0 m=(n-2)/P +2	32	32	\| \| P0 m=(n-2)/P +2
+-------------------+	33	33	+-------------------+
\| \| P1	34	34	\| \| P1
+-------------------+	35	35	+-------------------+
n \| \| ..	36	36	n \| \| ..
+-------------------+	37	37	+-------------------+
\| \| Pq	38	38	\| \| Pq
+-------------------+	39	39	+-------------------+
	40	40
<-------- n -------->	41	41	<-------- n -------->
<-------n-2 ------>	42	42	<-------n-2 ------>
	43	43
	44	44
****************************************************************************/	45	45	****************************************************************************/
#include <stdio.h>	46	46	#include <stdio.h>
#include <stdlib.h>	47	47	#include <stdlib.h>
#include <math.h>	48	48	#include <math.h>
#include <mpi.h>	49	49	#include <mpi.h>
#define NN 50	50	50	#define NN 50
#define MM 50	51	51	#define MM 50
	52	52
#define RING 100	53	53	#define RING 100
#define ITER_PRINT 100	54	54	#define ITER_PRINT 100
#define PRINT_DATA 1	55	55	#define PRINT_DATA 1
#define MAX_ITER 1000	56	56	#define _MAX_ITER 1000
#define _EPSILON 0.01	57	57	#define _EPSILON 0.01
	58	58
	59	59
		60
		61
float update(int rank,int size, int nx,int ny, float u, float unew);	60	62	float update(int rank,int size, int nx,int ny, float u, float unew);
void inidat(int rank, int size, int nx, int ny, float u, float unew);	61	63	void inidat(int rank, int size, int nx, int ny, float u, float unew);
void prtdat(int rank, int size, int nx, int ny, float u,const char fnam);	62	64	void prtdat(int rank, int size, int nx, int ny, float u,const char fnam);
	63	65
	64	66
	65	67
	66	68
int main(int argc, char *argv[])	67	69	int main(int argc, char *argv[])
{	68	70	{
int N=NN,M=MM;	69	71	int N=NN,M=MM;
	70
int rank,size;	71
	72
float EPSILON=_EPSILON;	73	72	float EPSILON=_EPSILON;
	74	73	int MAX_ITER= _MAX_ITER;
	75	74
		75	int size,rank;
		76
/* INITIALIZE MPI */	76	77	/* INITIALIZE MPI */
MPI_Init(&argc, &argv);	77	78	MPI_Init(&argc, &argv);
	78	79
/* GET THE PROCESSOR ID AND NUMBER OF PROCESSORS */	79	80	/* GET THE PROCESSOR ID AND NUMBER OF PROCESSORS */
MPI_Comm_rank(MPI_COMM_WORLD, &rank);	80	81	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);	81	82	MPI_Comm_size(MPI_COMM_WORLD, &size);
	82	83
//Only Rank 0 read application parameters	83	84	//Only Rank 0 read application parameters
if(rank==0) {	84	85	if(rank==0) {
	85	86
if(argc !=3)	86	87	if(argc !=4)
{	87	88	{
fprintf(stderr,"usage %s N EPSILON\n ", argv[0]);	88	89	fprintf(stderr,"usage %s N EPSILON MAX_ITER\n ", argv[0]);
fprintf(stderr,"\t\twhere N is GRID size, EPSILON is Tolerance\n");	89	90	fprintf(stderr,"\t\twhere N is GRID size, EPSILON is Tolerance, MAX_ITER is max iteration\n");
fprintf(stderr,"\t\texample N = 100, EPSILON = 0.1\n");	90	91	fprintf(stderr,"\t\texample N = 100, EPSILON = 0.1, MAX_ITER=1000\n");
return -1;	91	92	return -1;
}	92	93	}
	93
N = M = atoi(argv[1]);	94	94	N = M = atoi(argv[1]);
EPSILON = atof(argv[2]);	95	95	EPSILON = atof(argv[2]);
	96	96	MAX_ITER = atoi (argv[3]);
if(N % size!=0)	97	97	if(N % size!=0)
{	98	98	{
fprintf(stderr,"Grid Size MUST be divisible by the number of processors !");	99	99	fprintf(stderr,"Grid Size MUST be divisible by the number of processors !");
return -1;	100	100	return -1;
}	101	101	}
	102	102
}	103	103	}
	104	104
//Wait for rank 0 , all process start here	105	105	//Wait for rank 0 , all process start here
MPI_Barrier(MPI_COMM_WORLD);	106	106	MPI_Barrier(MPI_COMM_WORLD);
	107	107
//Exchange N	108	108	//Exchange N
MPI_Bcast(&N , 1, MPI_FLOAT, 0 , MPI_COMM_WORLD);	109	109	MPI_Bcast(&N , 1, MPI_INT, 0 , MPI_COMM_WORLD);
//Exchange EPSILON	110	110	//Exchange EPSILON
MPI_Bcast(&EPSILON , 1, MPI_FLOAT, 0 , MPI_COMM_WORLD);	111	111	MPI_Bcast(&EPSILON , 1, MPI_FLOAT, 0 , MPI_COMM_WORLD);
		112
		113	//Exchange MAX_ITER
		114	MPI_Bcast(&MAX_ITER , 1, MPI_INT, 0 , MPI_COMM_WORLD);
	112	115
//local size	113	116	//local size
M = (N-2) / size + 2 ;	114	117	M = (N-2) / size + 2 ;
	115	118
float u = (float )malloc(N * M * sizeof(float));	116	119	float u = (float )malloc(N * M * sizeof(float));
float unew = (float )malloc(N * M * sizeof(float));	117	120	float unew = (float )malloc(N * M * sizeof(float));
	118	121
if(u==0 \|\| unew ==0)	119	122	if(u==0 \|\| unew ==0)
{	120	123	{
perror("Can't allocated data\n");	121	124	perror("Can't allocated data\n");
return -1;	122	125	return -1;
}	123	126	}
	124	127
if(rank==0) {	125	128	if(rank==0) {
	126	129
printf ( "\n" );	127	130	printf ( "\n" );
printf ( "HEATED_PLATE\n" );	128	131	printf ( "HEATED_PLATE\n" );
printf ( " Parallel MPI version using %d processors \n",size );	129	132	printf ( " Parallel MPI version using %d processors \n",size );
printf ( " A program to solve for the steady state temperature distribution\n" );	130	133	printf ( " A program to solve for the steady state temperature distribution\n" );
printf ( " over a rectangular plate.\n" );	131	134	printf ( " over a rectangular plate.\n" );
printf ( "\n" );	132	135	printf ( "\n" );
printf ( " Spatial grid of %d by %d points.\n", N, N );	133	136	printf ( " Spatial grid of %d by %d points.\n", N, N );
printf ( " Each processor will use grid of %d by %d points.\n", M, N );	134	137	printf ( " Each processor will use grid of %d by %d points.\n", M, N );
printf ( " The iteration will end until tolerance <= %f\n\n",EPSILON);	135	138	printf ( " The iteration will end until tolerance <= %f\n\n",EPSILON);
	136	139
}	137	140	}
	138	141
/* Initialize grid and create input file	139	142	/* Initialize grid and create input file
* each process initialize its part	140	143	* each process initialize its part
* */	141	144	* */
	142	145
inidat(rank,size,M,N,u,unew);	143	146	inidat(rank,size,M,N,u,unew);
	144	147
prtdat(rank,size,M,N,u, "initial.dat");	145	148	prtdat(rank,size,M,N,u, "initial.dat");
	146	149
	147	150
float diff, globaldiff=1.0;	148	151	float diff, globaldiff=1.0;
int iter=0;	149	152	int iter=0;
	150	153
/*	151	154	/*
* iterate (JACOBI ITERATION) until the new solution unew differs from the old solution u	152	155	* iterate (JACOBI ITERATION) until the new solution unew differs from the old solution u
* by no more than EPSILON.	153	156	* by no more than EPSILON.
**/	154	157	**/
	155	158
while(globaldiff> EPSILON) {	156	159	while(globaldiff> EPSILON && iter < MAX_ITER) {
	157	160
diff= update(rank,size,M,N, u, unew);	158	161	diff= update(rank,size,M,N, u, unew);
	159	162
/**	160	163	/**
* COMPUTE GLOBAL CONVERGENCE	161	164	* COMPUTE GLOBAL CONVERGENCE
* */	162	165	* */
	163	166
MPI_Allreduce(&diff, &globaldiff , 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);	164	167	MPI_Allreduce(&diff, &globaldiff , 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
	165	168
	166	169
if(rank==0)	167	170	if(rank==0)
if(iter%ITER_PRINT==0)	168	171	if(iter%ITER_PRINT==0)
printf("Processor #%d, iteration %d, epsilon = %f\n ", rank,iter,globaldiff);	169	172	printf("Processor #%d, iteration %d, epsilon = %f\n ", rank,iter,globaldiff);
iter++;	170	173	iter++;
}	171	174	}
		175
	172	176
prtdat(rank,size,M,N, u, "final.dat");	173	177	prtdat(rank,size,M,N, u, "final.dat");
free(u);	174	178	free(u);
free(unew);	175	179	free(unew);
MPI_Finalize();	176	180	MPI_Finalize();
}	177	181	}
	178	182
	179	183
	180	184
/****************************************************************************	181	185	/****************************************************************************
* subroutine update	182	186	* subroutine update
****************************************************************************/	183	187	****************************************************************************/
float update(int rank, int size, int nx,int ny, float u, float unew){	184	188	float update(int rank, int size, int nx,int ny, float u, float unew){
int ix, iy;	185	189	int ix, iy;
float diff=0.0;	186	190	float diff=0.0;
MPI_Status status;	187	191	MPI_Status status;
	188	192
	189	193
/*	190	194	/*
* EXCHANGE GHOST CELL	191	195	* EXCHANGE GHOST CELL
*/	192	196	*/
if (rank > 0 && rank< size-1)	193	197	if (rank > 0 && rank< size-1)
{	194	198	{
MPI_Sendrecv(&u[ny*(nx-2)], ny, MPI_FLOAT, rank+1, 0,	195	199	MPI_Sendrecv(&u[ny*(nx-2)], ny, MPI_FLOAT, rank+1, 0,
&u[ny*0], ny, MPI_FLOAT, rank-1, 0, MPI_COMM_WORLD, &status);	196	200	&u[ny*0], ny, MPI_FLOAT, rank-1, 0, MPI_COMM_WORLD, &status);
MPI_Sendrecv(&u[ny*1], ny, MPI_FLOAT, rank-1, 1,	197	201	MPI_Sendrecv(&u[ny*1], ny, MPI_FLOAT, rank-1, 1,
&u[ny*(nx-1)], ny, MPI_FLOAT, rank+1, 1, MPI_COMM_WORLD, &status);	198	202	&u[ny*(nx-1)], ny, MPI_FLOAT, rank+1, 1, MPI_COMM_WORLD, &status);
}	199	203	}
	200	204
else if (rank == 0)	201	205	else if (rank == 0)
MPI_Sendrecv(&u[ny*(nx-2)], ny, MPI_FLOAT, rank+1, 0,	202	206	MPI_Sendrecv(&u[ny*(nx-2)], ny, MPI_FLOAT, rank+1, 0,
&u[ny*(nx-1)], ny, MPI_FLOAT, rank+1, 1, MPI_COMM_WORLD, &status);	203	207	&u[ny*(nx-1)], ny, MPI_FLOAT, rank+1, 1, MPI_COMM_WORLD, &status);
else if (rank == size-1)	204	208	else if (rank == size-1)
MPI_Sendrecv(&u[ny*1], ny, MPI_FLOAT, rank-1, 1,	205	209	MPI_Sendrecv(&u[ny*1], ny, MPI_FLOAT, rank-1, 1,
&u[ny*0], ny, MPI_FLOAT, rank-1, 0, MPI_COMM_WORLD, &status);	206	210	&u[ny*0], ny, MPI_FLOAT, rank-1, 0, MPI_COMM_WORLD, &status);
	207	211
	208	212
	209	213
/**	210	214	/**
* PERFORM LOCAL COMPUTATION	211	215	* PERFORM LOCAL COMPUTATION
* */	212	216	* */
	213	217
for (ix = 1; ix < nx-1; ix++) {	214	218	for (ix = 1; ix < nx-1; ix++) {
for (iy = 1; iy < ny-1; iy++) {	215	219	for (iy = 1; iy < ny-1; iy++) {
unew[ixny+iy] = (u[(ix+1)ny+iy] + u[(ix-1)ny+iy] + u[ixny+iy+1] + u[ix*ny+iy-1] )/4.0	216	220	unew[ixny+iy] = (u[(ix+1)ny+iy] + u[(ix-1)ny+iy] + u[ixny+iy+1] + u[ix*ny+iy-1] )/4.0
;	217	221	;
if (diff < fabs (unew[ixny+iy] - u[ixny+iy] ))	218	222	diff = fmax( diff, fabs(unew[ixny+iy] - u[ixny+iy]));
{	219
diff = fabs ( unew[ixny+iy] - u[ixny+iy] );	220
}	221
}	222	223	}
	223	224
}	224	225	}
	225	226
	226	227
for (ix = 1; ix < nx-1; ix++) {	227	228	for (ix = 1; ix < nx-1; ix++) {
for (iy = 1; iy < ny-1; iy++) {	228	229	for (iy = 1; iy < ny-1; iy++) {
u[ixny+iy] = unew[ixny+iy];	229	230	u[ixny+iy] = unew[ixny+iy];
}	230	231	}
}	231	232	}
	232	233
	233	234
return diff;	234	235	return diff;
}	235	236	}
	236	237
/*****************************************************************************	237	238	/*****************************************************************************
* Initialize Data	238	239	* Initialize Data
*****************************************************************************/	239	240	*****************************************************************************/
	240	241
void inidat(int rank, int size,int nx, int ny, float u, float unew)	241	242	void inidat(int rank, int size,int nx, int ny, float u, float unew)
{	242	243	{
int ix, iy;	243	244	int ix, iy;
	244	245
/*	245	246	/*
*Set boundary data and interrior values	246	247	*Set boundary data and interrior values
* */	247	248	* */
	248	249
	249	250
// interior points	250	251	// interior points
for (ix = 1; ix < nx-1; ix++)	251	252	for (ix = 1; ix < nx-1; ix++)
for (iy = 1; iy < ny-1; iy++) {	252	253	for (iy = 1; iy < ny-1; iy++) {
u[ix*ny+iy]=0.0;	253	254	u[ix*ny+iy]=0.0;
}	254	255	}
	255	256
//boundary left	256	257	//boundary left
for (ix = 0; ix < nx; ix++){	257	258	for (ix = 0; ix < nx; ix++){
u[ix*ny]=100.0;	258	259	u[ix*ny]=100.0;

/****************************************************************************	1	1	/****************************************************************************
* DESCRIPTION:	2	2	* DESCRIPTION:
* Serial HEAT2D Example - C Version	3	3	* OpenMP HEAT2D Example - C Version
* This example is based on a simplified	4	4	* This example is based on a simplified
* two-dimensional heat equation domain decomposition. The initial	5	5	* two-dimensional heat equation domain decomposition. The initial
* temperature is computed to be high in the middle of the domain and	6	6	* temperature is computed to be high in the middle of the domain and
* zero at the boundaries. The boundaries are held at zero throughout	7	7	* zero at the boundaries. The boundaries are held at zero throughout
* the simulation. During the time-stepping, an array containing two	8	8	* the simulation. During the time-stepping, an array containing two
* domains is used; these domains alternate between old data and new data.	9	9	* domains is used; these domains alternate between old data and new data.
*	10	10	*
* The physical region, and the boundary conditions, are suggested	11	11	* The physical region, and the boundary conditions, are suggested
by this diagram;	12	12	by this diagram;
	13	13
u = 0	14	14	u = 0
+------------------+	15	15	+------------------+
\| \|	16	16	\| \|
u = 100 \| \| u = 100	17	17	u = 100 \| \| u = 100
\| \|	18	18	\| \|
+------------------+	19	19	+------------------+
u = 100	20	20	u = 100
	21	21
Interrior point :	22	22	Interrior point :
u[Central] = (1/4) * ( u[North] + u[South] + u[East] + u[West] )	23	23	u[Central] = (1/4) * ( u[North] + u[South] + u[East] + u[West] )
	24	24
****************************************************************************/	25	25	****************************************************************************/
#include <stdio.h>	26	26
		27	#include <stdio.h>
#include <stdlib.h>	27	28	#include <stdlib.h>
#include <math.h>	28	29	#include <math.h>
#include <omp.h>	29
	30	30
#define NN 50	31	31	#define NN 50
#define MM 50	32	32	#define MM 50
	33	33
#define ITER_PRINT 100	34	34	#define ITER_PRINT 100
		35	#define _MAX_ITER 400
#define PRINT_DATA 1	35	36	#define PRINT_DATA 1
		37	#define _EPSILON 0.01
	36	38
#define _EPSILON 0.001	37
	38	39
	39
float update(int nx,int ny, float u, float unew);	40	40	float update(int nx,int ny, float u, float unew);
void inidat(int nx, int ny, float u, float unew);	41	41	void inidat(int nx, int ny, float u, float unew);
void prtdat(int nx, int ny, float u,const char fnam);	42	42	void prtdat(int nx, int ny, float u,const char fnam);
	43	43
	44	44
	45	45
	46	46
int main(int argc, char *argv[])	47	47	int main(int argc, char *argv[])
{	48	48	{
	49
float diff=1.0;	50
float EPSILON=_EPSILON;	51
int N=NN,M=MM;	52	49	int N=NN,M=MM;
		50	float EPSILON=_EPSILON;
		51	int MAX_ITER= _MAX_ITER;
	53	52
if(argc !=3)	54	53	if(argc !=4)
{	55	54	{
fprintf(stderr,"usage %s N EPSILON\n ", argv[0]);	56	55	fprintf(stderr,"usage %s N EPSILON MAX_ITER\n ", argv[0]);
fprintf(stderr,"\t\twhere N is GRID size, EPSILON is Tolerance\n");	57	56	fprintf(stderr,"\t\twhere N is GRID size, EPSILON is Tolerance, MAX_ITER is max iteration\n");
fprintf(stderr,"\t\texample N = 100, EPSILON = 0.1\n");	58	57	fprintf(stderr,"\t\texample N = 100, EPSILON = 0.1, MAX_ITER=1000\n");
return -1;	59	58	return -1;
}	60	59	}
	61	60
N = M = atoi(argv[1]);	62	61	N = M = atoi(argv[1]);
EPSILON = atof(argv[2]);	63	62	EPSILON = atof(argv[2]);
		63	MAX_ITER = atoi (argv[3]);
	64	64
		65	float diff=1.0;
		66
float u = (float )malloc(N * M * sizeof(float));	65	67	float u = (float )malloc(N * M * sizeof(float));
float unew = (float )malloc(N * M * sizeof(float));	66	68	float unew = (float )malloc(N * M * sizeof(float));
	67	69
if(u==0 \|\| unew ==0)	68	70	if(u==0 \|\| unew ==0)
{	69	71	{
perror("Can't allocated data\n");	70	72	perror("Can't allocated data\n");
return -1;	71	73	return -1;
}	72	74	}
	73	75
printf ( "\n" );	74	76	printf ( "\n" );
printf ( "HEATED_PLATE\n" );	75	77	printf ( "HEATED_PLATE\n" );
printf ( " Parallel OpenMP version, using %d Threads\n",omp_get_max_threads() );	76	78	printf ( " OpenMP version\n" );
printf ( " A program to solve for the steady state temperature distribution\n" );	77	79	printf ( " A program to solve for the steady state temperature distribution\n" );
printf ( " over a rectangular plate.\n" );	78	80	printf ( " over a rectangular plate.\n" );
printf ( " Spatial grid of %d by %d points.\n\n", M, N );	79	81	printf ( "\n" );
		82	printf ( " Spatial grid of %d by %d points.\n", M, N );
		83	printf ( " The iteration will end until tolerance <= %f\n\n",EPSILON);
	80	84
	81
/* Initialize grid and create input file */	82	85	/* Initialize grid and create input file */
printf("Initializing grid\n");	83	86	printf("Initializing grid\n");
	84	87
inidat(N, M,u,unew);	85	88	inidat(N, M,u,unew);
	86	89
prtdat(N, M,u, "initial.dat");	87	90	prtdat(N, M,u, "initial.dat");
	88	91
printf("Start computing\n");	89	92	printf("Start computing\n\n");
	90	93
int iter=0;	91	94	int iter=0;
	92	95
/*	93	96	/*
* iterate until the new solution unew differs from the old solution u	94	97	* iterate until the new solution unew differs from the old solution u
* by no more than EPSILON.	95	98	* by no more than EPSILON.
* */	96	99	* */
	97	100
while(diff> EPSILON) {	98	101	while(diff> EPSILON && iter <MAX_ITER) {
	99	102
diff = update(N, M, u, unew);	100	103	diff= update(N, M, u, unew);
	101	104
if(iter%ITER_PRINT==0)	102	105	if(iter%ITER_PRINT==0)
printf("Iteration %d, diff = %f\n ", iter,diff);	103
	104	106
		107	printf("Iteration %d, diff = %e\n ", iter,diff);
		108
iter++;	105	109	iter++;
}	106	110	}
	107	111
		112	if(diff>EPSILON)
		113	printf("*Not converged MAX_ITERATION %d reached\n*", MAX_ITER);
		114
prtdat(N, M, u, "final.dat");	108	115	prtdat(N, M, u, "final.dat");
	109	116
free(u);	110	117	free(u);
free(unew);	111	118	free(unew);
}	112	119	}
	113	120
	114	121
	115	122
/****************************************************************************	116	123	/****************************************************************************
* subroutine update	117	124	* subroutine update
****************************************************************************/	118	125	****************************************************************************/
float update(int nx,int ny, float u, float unew)	119	126	float update(int nx,int ny, float u, float unew)
{	120	127	{
int ix, iy;	121	128	int ix, iy;
float diff=0.0;	122	129	float diff=0.0;
	123	130
#pragma omp parallel for shared(nx,ny,u,unew) private (ix,iy)	124	131	#pragma omp parallel for shared(nx,ny,u,unew) private (ix,iy) reduction(max:diff)
for (ix = 1; ix < nx-1; ix++) {	125	132	for (ix = 1; ix < nx-1; ix++) {
for (iy = 1; iy < ny-1; iy++) {	126	133	for (iy = 1; iy < ny-1; iy++) {
unew[ix*ny+iy] =	127	134	unew[ix*ny+iy] =
(u[(ix+1)ny+iy] + u[(ix-1)ny+iy] +	128	135	(u[(ix+1)ny+iy] + u[(ix-1)ny+iy] +
u[ixny+iy+1] + u[ixny+iy-1] )/4.0;	129	136	u[ixny+iy+1] + u[ixny+iy-1] )*0.25;
	130	137	diff = fmax( diff, fabs(unew[ixny+iy] - u[ixny+iy]));
}	131	138	}
}	132	139	}
		140
	133	141
//compute reduction	134
	135
	136
float mydiff;	137
	138
/**	139
* IMPLEMENT OMP REDUCE MAX	140
*/	141
	142
#pragma omp parallel shared(nx,ny,u,unew, diff) private (ix,iy,mydiff)	143
{	144
mydiff=0.0;	145
#pragma omp for	146
for (ix = 1; ix < nx-1; ix++) {	147
for (iy = 1; iy < ny-1; iy++) {	148
if (mydiff < fabs (unew[ixny+iy] - u[ixny+iy] ))	149
{	150
mydiff = fabs ( unew[ixny+iy] - u[ixny+iy] );	151
}	152
}	153
}	154
	155
	156
#pragma omp critical	157
{	158
if (diff < mydiff )	159
{	160
diff = mydiff;	161
}	162
}	163
	164
/*	165	142	/*
* COPY OLD DATA	166	143	* COPY OLD DATA
*/	167	144	*/
#pragma omp for	168	145	#pragma omp parallel for
for (ix = 1; ix < nx-1; ix++) {	169	146	for (ix = 1; ix < nx-1; ix++) {
for (iy = 1; iy < ny-1; iy++) {	170	147	for (iy = 1; iy < ny-1; iy++) {
u[ixny+iy] = unew[ixny+iy];	171	148	u[ixny+iy] = unew[ixny+iy];
}	172	149	}
}	173	150	}
}	174	151
	175	152
return diff;	176	153	return diff;
}	177	154	}
	178	155
/*****************************************************************************	179	156	/*****************************************************************************
* Initialize Data	180	157	* Initialize Data
*****************************************************************************/	181	158	*****************************************************************************/
	182	159
void inidat(int nx, int ny, float u, float unew)	183	160	void inidat(int nx, int ny, float u, float unew)
{	184	161	{
int ix, iy;	185	162	int ix, iy;
	186	163
/*	187	164	/*
*Set boundary data and interrior values	188	165	*Set boundary data and interrior values
* */	189	166	* */
	190	167
#pragma omp parallel private (ix,iy)	191	168	#pragma omp parallel private (ix,iy)
{	192	169	{
// interior points	193	170	// interior points
#pragma omp for	194	171	#pragma omp for
for (ix = 1; ix < nx-1; ix++)	195	172	for (ix = 1; ix < nx-1; ix++)
for (iy = 1; iy < ny-1; iy++) {	196	173	for (iy = 1; iy < ny-1; iy++) {
u[ix*ny+iy]=5.0;	197	174	u[ix*ny+iy]=0.0;
}	198	175	}
	199	176
//boundary left	200	177	//boundary left
#pragma omp for	201	178	#pragma omp for
for (ix = 1; ix < nx-1; ix++){	202	179	for (ix = 1; ix < nx-1; ix++){
u[ix*ny]=100.0;	203	180	u[ix*ny]=100.0;
	204	181
}	205	182	}
	206	183
//boundary right	207	184	//boundary right
#pragma omp for	208	185	#pragma omp for
for (ix = 1; ix < nx-1; ix++){	209	186	for (ix = 1; ix < nx-1; ix++){
u[ix*ny+ (ny-1)]=100.0;	210	187	u[ix*ny+ (ny-1)]=100.0;
	211	188
}	212	189	}

/****************************************************************************	1	1	/****************************************************************************
* DESCRIPTION:	2	2	* DESCRIPTION:
* Serial HEAT2D Example - C Version	3	3	* Serial HEAT2D Example - C Version
* This example is based on a simplified	4	4	* This example is based on a simplified
* two-dimensional heat equation domain decomposition. The initial	5	5	* two-dimensional heat equation domain decomposition. The initial
* temperature is computed to be high in the middle of the domain and	6	6	* temperature is computed to be high in the middle of the domain and
* zero at the boundaries. The boundaries are held at zero throughout	7	7	* zero at the boundaries. The boundaries are held at zero throughout
* the simulation. During the time-stepping, an array containing two	8	8	* the simulation. During the time-stepping, an array containing two
* domains is used; these domains alternate between old data and new data.	9	9	* domains is used; these domains alternate between old data and new data.
*	10	10	*
* The physical region, and the boundary conditions, are suggested	11	11	* The physical region, and the boundary conditions, are suggested
by this diagram;	12	12	by this diagram;
	13	13
u = 0	14	14	u = 0
+------------------+	15	15	+------------------+
\| \|	16	16	\| \|
u = 100 \| \| u = 100	17	17	u = 100 \| \| u = 100
\| \|	18	18	\| \|
+------------------+	19	19	+------------------+
u = 100	20	20	u = 100
	21	21
Interrior point :	22	22	Interrior point :
u[Central] = (1/4) * ( u[North] + u[South] + u[East] + u[West] )	23	23	u[Central] = (1/4) * ( u[North] + u[South] + u[East] + u[West] )
	24	24
****************************************************************************/	25	25	****************************************************************************/
#include <stdio.h>	26	26	#include <stdio.h>
#include <stdlib.h>	27	27	#include <stdlib.h>
#include <math.h>	28	28	#include <math.h>
	29	29
#define NN 50	30	30	#define NN 50
#define MM 50	31	31	#define MM 50
	32	32
#define ITER_PRINT 100	33	33	#define ITER_PRINT 100
		34	#define _MAX_ITER 400
#define PRINT_DATA 1	34	35	#define PRINT_DATA 1
	35
#define _EPSILON 0.01	36	36	#define _EPSILON 0.01
	37	37
	38	38
float update(int nx,int ny, float u, float unew);	39	39	float update(int nx,int ny, float u, float unew);
void inidat(int nx, int ny, float u, float unew);	40	40	void inidat(int nx, int ny, float u, float unew);
void prtdat(int nx, int ny, float u,const char fnam);	41	41	void prtdat(int nx, int ny, float u,const char fnam);
	42	42
	43	43
	44	44
	45	45
int main(int argc, char *argv[])	46	46	int main(int argc, char *argv[])
{	47	47	{
int N=NN,M=MM;	48	48	int N=NN,M=MM;
	49
float EPSILON=_EPSILON;	50	49	float EPSILON=_EPSILON;
		50	int MAX_ITER= _MAX_ITER;
	51	51
if(argc !=3)	52	52	if(argc !=4)
{	53	53	{
fprintf(stderr,"usage %s N EPSILON\n ", argv[0]);	54	54	fprintf(stderr,"usage %s N EPSILON MAX_ITER\n ", argv[0]);
fprintf(stderr,"\t\twhere N is GRID size, EPSILON is Tolerance\n");	55	55	fprintf(stderr,"\t\twhere N is GRID size, EPSILON is Tolerance, MAX_ITER is max iteration\n");
fprintf(stderr,"\t\texample N = 100, EPSILON = 0.1\n");	56	56	fprintf(stderr,"\t\texample N = 100, EPSILON = 0.1, MAX_ITER=1000\n");
return -1;	57	57	return -1;
}	58	58	}
	59	59
N = M = atoi(argv[1]);	60	60	N = M = atoi(argv[1]);
EPSILON = atof(argv[2]);	61	61	EPSILON = atof(argv[2]);
		62	MAX_ITER = atoi (argv[3]);
	62	63
float diff=1.0;	63	64	float diff=1.0;
	64	65
float u = (float )malloc(N * M * sizeof(float));	65	66	float u = (float )malloc(N * M * sizeof(float));
float unew = (float )malloc(N * M * sizeof(float));	66	67	float unew = (float )malloc(N * M * sizeof(float));
	67	68
if(u==0 \|\| unew ==0)	68	69	if(u==0 \|\| unew ==0)
{	69	70	{
perror("Can't allocated data\n");	70	71	perror("Can't allocated data\n");
return -1;	71	72	return -1;
}	72	73	}
	73	74
printf ( "\n" );	74	75	printf ( "\n" );
printf ( "HEATED_PLATE\n" );	75	76	printf ( "HEATED_PLATE\n" );
printf ( " Serial version\n" );	76	77	printf ( " Serial version\n" );
printf ( " A program to solve for the steady state temperature distribution\n" );	77	78	printf ( " A program to solve for the steady state temperature distribution\n" );
printf ( " over a rectangular plate.\n" );	78	79	printf ( " over a rectangular plate.\n" );
printf ( "\n" );	79	80	printf ( "\n" );
printf ( " Spatial grid of %d by %d points.\n", M, N );	80	81	printf ( " Spatial grid of %d by %d points.\n", M, N );
printf ( " The iteration will end until tolerance <= %f\n\n",EPSILON);	81	82	printf ( " The iteration will end until tolerance <= %f\n\n",EPSILON);
	82	83
/* Initialize grid and create input file */	83	84	/* Initialize grid and create input file */
printf("Initializing grid\n");	84	85	printf("Initializing grid\n");
	85	86
inidat(N, M,u,unew);	86	87	inidat(N, M,u,unew);
	87	88
prtdat(N, M,u, "initial.dat");	88	89	prtdat(N, M,u, "initial.dat");
	89	90
printf("Start computing\n\n");	90	91	printf("Start computing\n\n");
	91	92
int iter=0;	92	93	int iter=0;
	93	94
/*	94	95	/*
* iterate until the new solution unew differs from the old solution u	95	96	* iterate until the new solution unew differs from the old solution u
* by no more than EPSILON.	96	97	* by no more than EPSILON.
* */	97	98	* */
	98
while(diff> EPSILON) {	99
	100	99
diff= update(N, M, u, unew);	101	100	while(diff> EPSILON && iter <MAX_ITER) {
	102	101
		102	diff= update(N, M, u, unew);
		103
if(iter%ITER_PRINT==0)	103	104	if(iter%ITER_PRINT==0)
	104	105
printf("Iteration %d, diff = %e\n ", iter,diff);	105	106	printf("Iteration %d, diff = %e\n ", iter,diff);
	106	107
iter++;	107	108	iter++;
}	108	109	}
	109	110
		111	if(diff>EPSILON)
		112	printf("*Not converged MAX_ITERATION %d reached\n*", MAX_ITER);
		113
prtdat(N, M, u, "final.dat");	110	114	prtdat(N, M, u, "final.dat");
	111	115
free(u);	112	116	free(u);
free(unew);	113	117	free(unew);
}	114	118	}
	115	119
	116	120
	117	121
/****************************************************************************	118	122	/****************************************************************************
* subroutine update	119	123	* subroutine update
****************************************************************************/	120	124	****************************************************************************/
float update(int nx,int ny, float u, float unew)	121	125	float update(int nx,int ny, float u, float unew)
{	122	126	{
int ix, iy;	123	127	int ix, iy;
float diff=0.0;	124	128	float diff=0.0;
	125	129
for (ix = 1; ix < nx-1; ix++) {	126	130	for (ix = 1; ix < nx-1; ix++) {
for (iy = 1; iy < ny-1; iy++) {	127	131	for (iy = 1; iy < ny-1; iy++) {
unew[ixny+iy] = (u[(ix+1)ny+iy] + u[(ix-1)ny+iy] + u[ixny+iy+1] + u[ix*ny+iy-1] )/4.0	128	132
;	129	133	unew[ixny+iy] = (u[(ix+1)ny+iy] + u[(ix-1)ny+iy] + u[ixny+iy+1] + u[ixny+iy-1] )0.25;
if (diff < fabs (unew[ixny+iy] - u[ixny+iy] ))	130	134	diff = fmax( diff, fabs(unew[ixny+iy] - u[ixny+iy]));
{	131
diff = fabs ( unew[ixny+iy] - u[ixny+iy] );	132
}	133
}	134	135	}
	135	136
}	136	137	}
	137	138
/**	138	139	/**
* COPY OLD DATA	139	140	* COPY OLD DATA
*/	140	141	*/
for (ix = 1; ix < nx-1; ix++) {	141	142	for (ix = 1; ix < nx-1; ix++) {
for (iy = 1; iy < ny-1; iy++) {	142	143	for (iy = 1; iy < ny-1; iy++) {
u[ixny+iy] = unew[ixny+iy];	143	144	u[ixny+iy] = unew[ixny+iy];
}	144	145	}
}	145	146	}
	146	147
return diff;	147	148	return diff;
}	148	149	}
	149	150
/*****************************************************************************	150	151	/*****************************************************************************
* Initialize Data	151	152	* Initialize Data
*****************************************************************************/	152	153	*****************************************************************************/
void inidat(int nx, int ny, float u, float unew)	153	154	void inidat(int nx, int ny, float u, float unew)
{	154	155	{
int ix, iy;	155	156	int ix, iy;
	156	157
/*	157	158	/*
*Set boundary data and interrior values	158	159	*Set boundary data and interrior values
* */	159	160	* */
	160	161
	161	162
// interior points	162	163	// interior points
for (ix = 1; ix < nx-1; ix++)	163	164	for (ix = 1; ix < nx-1; ix++)
for (iy = 1; iy < ny-1; iy++) {	164	165	for (iy = 1; iy < ny-1; iy++) {
u[ix*ny+iy]=5.0;	165	166	u[ix*ny+iy]=0.0;
}	166	167	}
	167	168
//boundary left	168	169	//boundary left
for (ix = 1; ix < nx-1; ix++){	169	170	for (ix = 1; ix < nx-1; ix++){
u[ix*ny]=100.0;	170	171	u[ix*ny]=100.0;
	171	172
}	172	173	}
	173	174
//boundary right	174	175	//boundary right
for (ix = 1; ix < nx-1; ix++){	175	176	for (ix = 1; ix < nx-1; ix++){
u[ix*ny+ (ny-1)]=100.0;	176	177	u[ix*ny+ (ny-1)]=100.0;
	177	178
}	178	179	}
	179	180
//boundary down	180	181	//boundary down
for (iy = 0; iy < ny; iy++){	181	182	for (iy = 0; iy < ny; iy++){
u[(nx-1)*(ny)+iy]=100.0;	182	183	u[(nx-1)*(ny)+iy]=100.0;
	183	184
}	184	185	}
	185	186
//boundary top	186	187	//boundary top
for (iy = 0; iy < ny; iy++){	187	188	for (iy = 0; iy < ny; iy++){
u[iy]=100.0;	188	189	u[iy]=100.0;
	189	190
}	190	191	}
	191	192
	192	193
}	193	194	}
	194	195
	195	196
	196	197
/**************************************************************************	197	198	/**************************************************************************