/* MYBENCH.C This program benchmarks a bsp communications library. It measures throughput and latency using different communcation primitives. compile: cc -o putbench mybench.c -O3 -DPUT where cc is the c frontend to compile a bsp program use instead of -DPUT: -DGET to acquire a program which measures 'bsp_get' -DSEND to measure 'bsp_send' author: "Wijnand Suijlen" date: June 29th 2006 */ #include #include #define ITS 400 #define MAXH 256 #define MESG_TYPE double #if PUT #define BSP_COMM(b, s, P, o, local, remote, type, n) \ bsp_put( (s - b + P) % P, local, remote, sizeof(type)*o, sizeof(type)*n) #elif GET #define BSP_COMM(b, s, P, o, local, remote, type, n) \ bsp_get( (b + s) % P, remote, sizeof(type)*o, local, sizeof(type)*n) #elif SEND #define BSP_COMM(b, s, P, o, local, remote, type, n) \ bsp_send( (s - b + P) %P, NULL, local, sizeof(type)*n) #endif double sum(double x) { int i; double array[bsp_nprocs()], result = 0; bsp_push_reg(array, bsp_nprocs() * sizeof(double)); bsp_sync(); for (i = 0; i < bsp_nprocs(); i++) bsp_put(i, &x, array, bsp_pid()*sizeof(double), sizeof(double)); bsp_sync(); for (i = 0; i < bsp_nprocs(); i++) result += array[i]; bsp_pop_reg(array); return result; } void measure_throughput() { const int s = bsp_pid(), P = bsp_nprocs(); MESG_TYPE input_array[MAXH]; MESG_TYPE output_array[MAXH]; int h, i, j, o, b; double time, time0; double timing_size[MAXH+1], timing_number[MAXH+1]; bsp_push_reg(output_array, MAXH*sizeof(MESG_TYPE) ); bsp_sync(); /* fill input array with some sensible values */ for (i = 0; i < MAXH; i++) input_array[i] = (MESG_TYPE) i; /* measure throughput varying message size (using MESG_TYPE) */ for (h = 0 ; h <= MAXH ; h++) { time0 = bsp_time(); for (j = 0; j < ITS; j++) { o = 0; for (b = 0; b < P; b++) { BSP_COMM( b,s,P,o, &input_array[o], output_array , MESG_TYPE,(h / P + (h%P - s + P -1)/P) ); o += h / P + (h%P - s + P -1)/P ; } bsp_sync(); } time = bsp_time() - time0; timing_size[h] = sum(time) / (double) (P * ITS); } /* measure throughput varying message numbers (using bytes) */ for (h = 0 ; h <= MAXH ; h++) { time0 = bsp_time(); for (j = 0; j < ITS; j++) { o = 0; for (b = 0; b < P; b++) { for (i = 0; i < (h / P + (h % P - s + P - 1)/P); i++) BSP_COMM(b, s, P, o+i, &input_array[o + i], output_array,MESG_TYPE, 1); o += h / P + (h%P - s + P -1)/P ; } bsp_sync(); } time = bsp_time() - time0; timing_number[h] = sum(time) / (double) (P * ITS); } if ( s == 0) { printf("# Varying messages size\n"); printf("# Size h-relation\tTime (seconds)\n"); for (h = 0; h <= MAXH; h++) printf("%d\t%e\n", h, timing_size[h]); printf("\n# Varying number of messages\n"); printf("# Size h-relastion\tTime (seconds)\n"); for (h = 0; h <= MAXH; h++) printf("%d\t%e\n", h, timing_number[h]); fflush(stdout); } } void measure_latency() { int i, j; double time0, time, avg, input[bsp_nprocs()], output[bsp_nprocs()]; int s, P; /* who am I ? */ s = bsp_pid(); P = bsp_nprocs(); bsp_sync(); /* measure latency empty synchronisation */ time0 = bsp_time(); for (i = 0; i < ITS; i++) bsp_sync(); time = bsp_time() - time0; avg = sum(time) / ( (double) P * ITS); if (s == 0) printf("# Empty sync takes: (seconds)\n%e\n", avg); /* measure latency full h-relation (using doubles) */ // fill input with some sensible values for (i = 0; i < P; i++) input[i] = (double) i; bsp_push_reg(output, sizeof(double) * P ); bsp_sync(); time0 = bsp_time(); for (i = 0; i < ITS; i++) { for (j = 0; j < P; j++) bsp_put(j, &input[j], output, s*sizeof(double), sizeof(double) ); bsp_sync(); } time = bsp_time() - time0; avg = sum(time) / ( (double) ITS * P); if (s == 0) printf("# Full h-relation sync takes: (seconds)\n%e\n", avg); } void bench() { bsp_begin(bsp_nprocs()); measure_latency(); measure_throughput(); bsp_end(); } int main(int argc, char *argv []) { bsp_init( &bench, argc, argv); printf("Benchmarking on BSP computer having %d processors using %s\n" , bsp_nprocs(), #if PUT "bsp_put()" #elif GET "bsp_get()" #elif SEND "bsp_send()" #endif ); bench(); return 0; }