We want to multiply square matrices of doubles, A and B of size NxN, with matrix A with zeros at the end of each row and matrix B with zeros at the end of each column.
A number of problems is solved. For each problem the function to parallelize has:
Input parameters:
-int n: for the size of the matrices
-double *a,*b: the matrices to be multiplied
Input/Output parameters:
-double *c: resulting matrix
3
10 2 1
2250 5 200
2850 7 300
/*
CPP_CONTEST=2017
CPP_PROBLEM=C
CPP_LANG=CUDA
CPP_PROCESSES_PER_NODE=1
CPP_NUM_NODES=1
*/
#include <stdlib.h>
void obtainzerosrows(int n,double *a,int *zeros)
{
for(int i=0;i<n;i++)
{
int j=n-1;
while(j>=0 and a[i*n+j]==0.)
j--;
zeros[i]=j+1;
}
}
void trasponer(int n,double *a)
{
double temp;
for(int i=0;i<n;i++)
{
for(int j=i+1;j<n;j++)
{
temp=a[i*n+j];
a[i*n+j]=a[j*n+i];
a[j*n+i]=temp;
}
}
}
void sec(int n,double *a,double *b,double *c)
{
int *zerosrows=new int[n],*zeroscolumns=new int[n];
trasponer(n,b);
obtainzerosrows(n,a,zerosrows);
obtainzerosrows(n,b,zeroscolumns);
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++) {
double s = 0.;
for (int k = 0; k < (zerosrows[i]<zeroscolumns[j]?zerosrows[i]:zeroscolumns[j]); k++)
s += a[i * n + k] * b[j * n + k];
c[i * n + j] = s;
}
delete[] zerosrows;
delete[] zeroscolumns;
}
esquema-cuda.cu (non modifiable, used by mooshak for in/out and evaluation, provided for local experiments)
#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>
#include <signal.h>
#include <unistd.h>
//Spanish Parallel Programming Contest 2017. Problem D - Heterosolar
//Multiplication of tailed matrices. Version CUDA.
//Schema for In/Out, validation and execution time
void generar(double *m, int t) {
int i;
for (i = 0; i < t; i++) {
m[i] = (20. * rand()) / RAND_MAX-10.;
}
}
void generatezerosrows(int n,double *a)
{
int pos;
for(int i=0;i<n;i++)
{
pos=n*((1.*rand())/RAND_MAX);
for(int j=pos;j<n;j++)
a[i*n+j]=0.;
}
}
void generatezeroscolumns(int n,double *a)
{
int pos;
for(int i=0;i<n;i++)
{
pos=n*((1.*rand())/RAND_MAX);
for(int j=pos;j<n;j++)
a[j*n+i]=0.;
}
}
void escribir(double *m, int t) {
int i, j;
for (i = 0; i < t; i++) {
for (j = 0; j < t; j++)
printf("%.4lf ", m[i * t + j]);
printf("\n");
}
printf("\n");
}
void escribirresult(double *a,int N,int salida)
{
int i;
for(i=0;i<N;i++)
{
if((i%salida)==0)
{
printf("%lf \n",a[i]);
}
}
printf("\n");
}
/*
c
c mseconds - returns elapsed milliseconds since Jan 1st, 1970.
c
*/
long long mseconds(){
struct timeval t;
gettimeofday(&t, NULL);
return t.tv_sec*1000 + t.tv_usec/1000;
}
static void alarm_handler(int sig) {
fprintf(stderr, "Time Limit Exceeded\n");
abort();
}
extern void sec(int t,double *a,double *b,double *c);
int main(int argc,char *argv[]) {
int i,N;
int cuantos,semilla,salida;
long long ti,tf,tt=0;
double *a,*b,*c;
FILE *stats_file = fopen("stats", "w");
struct sigaction sact;
sigemptyset(&sact.sa_mask);
sact.sa_flags = 0;
sact.sa_handler = alarm_handler;
sigaction(SIGALRM, &sact, NULL);
alarm(40); /* time limit */
scanf("%d",&cuantos);
for(i=0;i<cuantos;i++)
{
scanf("%d",&N); // Matrices size
scanf("%d",&semilla); // seed for random generation
scanf("%d",&salida); // to determine the elements to be written to the output
// Space for the matrix, the values, rows and columns
a = (double *) calloc(sizeof(double),N*N);
b = (double *) calloc(sizeof(double),N*N);
c = (double *) calloc(sizeof(double),N*N);
srand(semilla);
generar(a,N*N);
generatezerosrows(N,a);
generar(b,N*N);
generatezeroscolumns(N,b);
ti=mseconds();
sec(N,a,b,c);
tf=mseconds();
if(i!=0) tt+=tf-ti;
escribirresult(c,N*N,salida);
free(a);
free(b);
free(c);
}
fprintf(stats_file, "%Ld\n", tt);
fclose(stats_file);
return 0;
}