/*
CPP_CONTEST=2017Final
CPP_PROBLEM=E
CPP_LANG=CPP+OPENMP
CPP_PROCESSES_PER_NODE=1
CPP_NUM_NODES=1
*/
/* RECORD
Vitaly Aksenov
Contest in EuroPar 2017
time 617 msec
speed-up 14.75
*/
#include
#include
extern void writem(double *,int,int);
void update(int n,double *vo,double *vd)
{
vd[0] += vo[0] + vo[1];
vd[0] -= (int) vd[0];
for(int i = 1; i < n - 1; i++)
{
vd[i] += vo[i - 1] + vo[i] + vo[i + 1];
vd[i] -= (int) vd[i];
}
vd[n-1] += vo[n-2] + vo[n-1];
vd[n-1] -= (int) vd[n-1];
}
void sec(int n, int m, double *A)
{
/**
The idea behind the solution is pipelining. Suppose,
the first row starts to fall: the second row is updated,
the third row is updated, the fourth row is updated and then we stop for a moment.
Now, the second row could start to fall: the values in it are already calculated and the third
row could be updated without data races
(since the fall of the first row does not touch values in the second and third row anymore).
Thus, fifth and third rows are updated in parallel.
Sixth and fourth rows are updated in parallel.
Seventh and fifth rows are updated in parallel.
The fall of the third row could be now performed: the values in it are already calculated and the
fourth row could be updated without data races. Fourth, Sixth and Eighth are updated in parallel.
Continue similarly.
**/
int* row = new int[n]; // the fall of the i-th row is ready to update the (row[i] + 1)-th row
for (int i = 0; i < n - 1; i++) {
row[i] = i;
}
int l = 0; // the first fall that is still in process
int it = 0;
int r = 1; // the first fall that is not in process
while (l < n - 1) {
if (r < n - 1 && (row[r - 1] == n - 1 || row[r] + 1 < row[r - 1])) {
r++; // the r-th row could start to fall if the previous fall already updated the (r + 2)-th row
}
// Update the next row for each fall in process in parallel
#pragma omp parallel for
for (int i = l; i < r; i++) {
update(m, A + row[i] * m, A + (row[i] + 1) * m);
row[i]++;
}
// Check that the oldest fall is still in process
if (row[l] == n - 1) {
l++;
}
}
}