We consider a matrix of natural numbers, A, of size NxN. Each value in the matrix, A[i,j], is substituted by the sum of the values in the same row i (without considering A[i,j]) which are multiples or divisors of A minus the sum of the values in the same column j which are multiples or divisors of A.
A number of problems is solved. For each problem the function to parallelize has:
Input parameters:
-int N: for the size of the matrix
-int *A: matrix NxN of natural numbers
3
3 1 1 5 1
800 4 1 20 72
750 3 2 30 67
/* CPP_CONTEST=2018Qual CPP_PROBLEM=C CPP_LANG=CUDA CPP_PROCESSES_PER_NODE=1 CPP_NUM_NODES=1 */ #include <stdlib.h> #include <stdio.h> #include <iostream> #include <iomanip> #include <math.h> //#include <helper_functions.h> //#include <helper_cuda.h> using namespace std; void copy(int n,int *vd,int *vo) { for(int i=0;i<n;i++) vd[i]=vo[i]; } void update(int n,int *a,int *b,int row,int column) { int element=a[row*n+column]; int *da=&a[row*n+column]; int *db=&b[row*n+column]; for(int i=1;i<n-column;i++) //the previous elements in the same row have been compared in previous calls to the routine { if(element%da[i]==0 || da[i]%element==0) { db[0]+=da[i]; db[i]+=da[0]; } } for(int i=1;i<n-row;i++) //the previous elements in the same column have been compared in previous calls to the routine { if(element%da[i*n]==0 || da[i*n]%element==0) { db[0]-=da[i*n]; db[i*n]-=da[0]; } } } void sec(int n,int *a) { int *b=(int *) calloc(sizeof(int),n*n); //to store the updated values for(int i=0;i<n;i++) { for(int j=0;j<n;j++) update(n,a,b,i,j); } copy(n*n,a,b); free(b); }
esquema-cuda.cu (non modifiable, used by mooshak for in/out and evaluation, provided for local experimentation)
#include <stdlib.h> #include <stdio.h> #include <sys/time.h> #include <signal.h> #include <unistd.h> #include <iostream> #include <iomanip> #include <math.h> using namespace std; // Scheme for the problem of Multiples and divisors - CUDA version // Qualification contest for the Spanish Parallel Programming Contest 2018, problem D //initialize a vector void inicialize(int n,int *m,int lv,int uv) { for(int i=0;i<n;i++) m[i]=int(((1.*rand())/RAND_MAX)*(uv-lv)+lv); } //write a matrix, only for debug void escribir(int n,int m,int *a){ for (int i = 0; i < n; i++) { for(int j=0;j<m;j++) cout << a[i*m+j]<<" "; cout <<endl; } cout <<endl; } //output to be compared with the output of the solution provided by the organization //only possitions multiple of step are written, to spare disk space void escribirresult(int n,int *a,int step) { for(int i=0;i<n;i++) { if((i%step)==0) { cout << a[i]<<endl; } } } /* c c mseconds - returns elapsed milliseconds since Jan 1st, 1970. c */ long long mseconds(){ struct timeval t; gettimeofday(&t, NULL); return t.tv_sec*1000 + t.tv_usec/1000; } //abort with the maximum execution time is reached static void alarm_handler(int sig) { fprintf(stderr, "Time Limit Exceeded\n"); abort(); } //the external function to be parallelized by the contestants extern void sec(int,int *); int main(int argc,char *argv[]) { int num_problems; //number of problems int N; //for the size of the matrix int *A; //matrix of data int seed; //seed for random generation int gap; //number of data between each output (only for output) int lower_value; //lower bound for the values in the vector int upper_value; //upper bound for the values in the vector long long ti,tf,tt=0; //initial, final and total times FILE *stats_file = fopen("stats", "w"); //to stablish a time limit struct sigaction sact; sigemptyset(&sact.sa_mask); sact.sa_flags = 0; sact.sa_handler = alarm_handler; sigaction(SIGALRM, &sact, NULL); alarm(100); /* time limit */ // MPI_Init(&argc,&argv); // MPI_Comm_size(MPI_COMM_WORLD,&np); // MPI_Comm_rank(MPI_COMM_WORLD,&nodo); // The number of test cases is read // and sent to all the processes // if(nodo==0) // { cin >> num_problems; // MPI_Bcast(&cuantos,1,MPI_INT,0,MPI_COMM_WORLD); // } // else // { // MPI_Bcast(&cuantos,1,MPI_INT,0,MPI_COMM_WORLD); // } for(int i=0;i<num_problems;i++) { // if(nodo==0) // { cin >>N; // The first argument is the size of the vector cin >>seed; //seed for the random generation of data to generate cin >>lower_value; //lower bound for the values in the vector cin >>upper_value; //upper bound for the values in the vector cin >>gap; //gap between values to write in the output // Space for the data A = new int[N*N]; srand(seed); inicialize(N*N,A,lower_value,upper_value); #ifdef DEBUG // While debugging the vector written escribir(N,N,A); #endif // } // MPI_Barrier(MPI_COMM_WORLD); ti=mseconds(); sec(N,A); // MPI_Barrier(MPI_COMM_WORLD); tf=mseconds(); // if(nodo==0) // { // The time of the first input is not considered if(i!=0) tt+=tf-ti; #ifdef DEBUG // While debugging the time of each execution is written fprintf(stats_file, "%Ld\n", tf-ti); escribir(N,N,A); #endif // The results of each problem are written, // even for the first problem escribirresult(N*N,A,gap); delete[] A; // } */ } // if(nodo==0) // { fprintf(stats_file, "%Ld\n", tt); fclose(stats_file); // } // MPI_Finalize(); return 0; }