Displaced additions on a vector of floats. CUDA version

We consider a vector v of float numbers. The value at each position i of vector v (v[i]) is updated by accumulating to this value those entries in v at a distance from position i a multiple of fabs(v[i]/100)+2. The multiple can be positive and negative: positions before and after i are considered. A given number of steps is carried out.


A number of problems is solved. For each problem the function to parallelize has:

Input parameters:

-int n: size of vector v

-float *v: vector to be updated

-int s: number of iterations

Files

3

10 1 1 -1000 1000 1

145000 4 2 -994 994 853

140000 3 3 -500 499 757

/*

CPP_CONTEST=2017ClaA

CPP_PROBLEM=C

CPP_LANG=CUDA

CPP_PROCESSES_PER_NODE=1

CPP_NUM_NODES=1

*/

#include <stdlib.h>

#include <stdio.h>

#include <iostream>

#include <iomanip>

#include <math.h>

using namespace std;

void updateone(int n,float *vi,float *vf,long pos)

{

vf[pos]=vi[pos];

int ld=(int) (fabs(vi[pos])/100)+2;

for(int i=pos+ld;i<n;i+=ld)

vf[pos]+=vi[i];

for(int i=pos-ld;i>=0;i-=ld)

vf[pos]+=vi[i];

}

void update(int n,float *vi,float *vf)

{

for(int i=0;i<n;i++)

updateone(n,vi,vf,i); //update one position

}

void sec(int n,float *v,int s)

{

float *vi,*vf,*vt,*vaux; //temporal vectors

vi=v;

vaux=new float[n];

vf=vaux;

for(int i=0;i<s;i++) //s steps are carried out

{

update(n,vi,vf); //the vector is updated and the result is in vf

//vf and vi are swapped

vt=vi;

vi=vf;

vf=vt;

}

if(vi!=v)

{

for(long i=0;i<n;i++) //the result is copied to the vector

v[i]=vi[i];

}

delete[] vaux;

}

#include <stdlib.h>

#include <stdio.h>

#include <sys/time.h>

#include <signal.h>

#include <unistd.h>

#include <iostream>

#include <iomanip>

#include <math.h>

using namespace std;

// Scheme for the problem of displaced additions in a vector - CUDA version

// First classification contesti for the Spanish Parallel Programming Contest 2017, problem C

//initialize a vector to a constant value

void inicializevector(int n,float *m,int lv,int uv)

{

for(int i=0;i<n;i++)

m[i]=((1.*rand())/RAND_MAX)*(uv-lv)+lv;

}

//output to be compared with the output of the solution provided by the organization

//only positions multiple of step are written, to spare disk space

void escribirresult(int n,float *a,int step)

{

for(int i=0;i<n;i++)

{

if((i%step)==0)

{

cout << setiosflags(ios::fixed) << setprecision(3) <<a[i]<<" ";

}

}

cout <<endl;

}

/*

c

c mseconds - returns elapsed milliseconds since Jan 1st, 1970.

c

*/

long long mseconds(){

struct timeval t;

gettimeofday(&t, NULL);

return t.tv_sec*1000 + t.tv_usec/1000;

}

//abort with the maximum execution time is reached

static void alarm_handler(int sig) {

fprintf(stderr, "Time Limit Exceeded\n");

abort();

}

//the external function to be parallelized by the contestants

extern void sec(int,float *,int);

int main(int argc,char *argv[]) {

int num_problems; //number of problems

int n; //size of the vector

int s; //number of steps

float *v; //vector with the data

int seed; //seed for random generation

int gap; //number of data between each output (only for output)

int lower_value; //lower bound for the values in the vector

int upper_value; //upper bound for the values in the vector

long long ti,tf,tt=0; //initial, final and total times

FILE *stats_file = fopen("stats", "w");

//to stablish a time limit

struct sigaction sact;

sigemptyset(&sact.sa_mask);

sact.sa_flags = 0;

sact.sa_handler = alarm_handler;

sigaction(SIGALRM, &sact, NULL);

alarm(100); /* time limit */



// The number of test cases is read

cin >> num_problems;

for(int i=0;i<num_problems;i++)

{

cin >>n; // The first argument is the size of the vector

cin >>s; //number of steps

cin >>seed; //seed for the random generation of data to generate the vector

cin >>lower_value; //lower bound for the values in the vector

cin >>upper_value; //upper bound for the values in the vector

cin >>gap; //gap between values of the vector to write in the output

// Space for the data

v = new float[n];

srand(seed);

inicializevector(n,v,lower_value,upper_value);

ti=mseconds();

sec(n,v,s);

tf=mseconds();

// The time of the first input is not considered

if(i!=0) tt+=tf-ti;

// The results of each problem are written,

// even for the first problem

escribirresult(n,v,gap);

delete[] v;

}

fprintf(stats_file, "%Ld\n", tt);

fclose(stats_file);

return 0;

}