/* author: Guochun Shi */ #include #include #include #include #include "spu/speinfo.h" #include #include #include #include #include #include extern spe_program_handle_t simple_spu; int need_verify=0; #define SPU_THREADS 8 #define COUNT (1024*1024*4) #ifndef HUGETLBFS float bufa[COUNT] __attribute__ ((aligned (128))); float bufb[COUNT] __attribute__ ((aligned (128))); float bufc[COUNT] __attribute__ ((aligned (128))); float resultc[COUNT] __attribute__ ((aligned (128))); #define alloc_mem() #define dealloc_mem() #else float* bufa, *bufb, *bufc, *resultc; int fd; char* addr; int num_array; #define FILE_NAME "/mnt/huge/hugefile" int alloc_mem(){ char* tmpaddr; fd = open(FILE_NAME, O_CREAT|O_RDWR, 0755); if(fd < 0){ perror("Open failed\n"); exit(1); } num_array = need_verify?4:3; printf("Using hugetlbfs. \n"); addr = mmap(0, COUNT*num_array*sizeof(float), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (addr ==MAP_FAILED){ perror("mmap"); unlink(FILE_NAME); exit(1); } tmpaddr =addr; while ((((unsigned long)tmpaddr) & 0x80) != 0){ tmpaddr++; } bufa = (float*)tmpaddr; bufb = (float*)(tmpaddr + COUNT*sizeof(float)); bufc = (float*)(tmpaddr + 2*COUNT*sizeof(float)); if (need_verify){ resultc = (float*)(tmpaddr + 3*COUNT*sizeof(float)); } return 0; } void dealloc_mem() { munmap(addr, COUNT*num_array*sizeof(float)); close(fd); unlink(FILE_NAME); } #endif float gettime_print(int num) { static struct timeval oldt={0,0}; struct timeval t; double result; if (gettimeofday(&t, NULL) < 0){ fprintf(stderr, "ERROR: gettimeofday() failed\n"); return -1; } if (oldt.tv_sec == 0 && oldt.tv_usec ==0){ result = 0; }else { result = t.tv_sec - oldt.tv_sec + (t.tv_usec-oldt.tv_usec)*0.000001; } oldt = t; if (num !=0){ //printf("#%d time: %f seconds\n",num, result); } return result; } void display_matrix(float*a ) { int i, j; for (i =0 ; i < 4; i++){ for (j=0; j< 4; j++){ printf("\t%f", a[i*4+j]); } printf("\n"); } printf("\n"); } int init_value(float* a, float* b, float* c) { float* fa,*fb, *fc; int i; memset(c, 0, COUNT*sizeof(float)); fa = (float*)a; fb = (float*)b; fc = (float*)c; for (i = 0; i< COUNT;i++){ fa[i] = 1.0*rand()/RAND_MAX; fb[i] = 1.0*rand()/RAND_MAX; } return 0; } void matrix_mult_ppu(complex_t* c, complex_t* a, complex_t* b) { int i, j, k, count; complex_t* fa,*fb, *fc; fa = a; fb = b; fc = c; for(count = 0; count < COUNT/32; count++){ for (i = 0; i< 4; i++){ for (j = 0; j< 4;j++){ fc[i*4+j] = (complex_t){0,0}; for (k =0; k < 4; k++){ fc[i*4+j].real += fa[i*4 +k].real* fb[k*4+j].real - fa[i*4+k].imag*fb[k*4+j].imag; fc[i*4+j].imag += fa[i*4+k].real* fb[k*4+j].imag + fa[i*4+k].imag*fb[k*4+j].real; } } } fa += 16; fb += 16; fc += 16; } } void matrix_compare(float* m1, float* m2) { int errcount =0; int i; float tmpf; for (i = 0; i< COUNT ; i++){ tmpf = m1[i] - m2[i]; if (tmpf < 0 ){ tmpf = - tmpf; } if (tmpf > 1.0e-4){ printf("i=%d, m1=%f, m2=%f\n", i, m1[i], m2[i]); errcount ++; } } if (errcount == 0){ printf("passed\n"); }else{ printf("failed(%d)\n", errcount); exit(1); } return; } int do_work_in_ppu() { matrix_mult_ppu((complex_t*)bufc, (complex_t*)bufa, (complex_t*)bufb); return 0; } void usage(char* prog){ printf("%s: -n -v \n", prog); return; } int g_spe_threads ; int repeat_num=2; int processopt(int argc, char **argv) { char *cvalue = NULL; int n; signed char c; opterr = 0; n = 1;/*default number of spus*/ while ((c = getopt (argc, argv, "n:vr:")) != -1){ switch (c) { case 'n': cvalue = optarg; if (sscanf(cvalue, "%d", &n) ==0 || n < 0){ fprintf(stderr, "Invalid spu number(%d)\n", n); return -1; } break; case 'r': cvalue=optarg; if (sscanf(cvalue, "%d", &repeat_num) ==0 || repeat_num < 0){ fprintf(stderr, "Invalid repeat num(%d)\n", repeat_num); return -1; } break; case 'v': need_verify =1; break; case '?': if (isprint (optopt)) fprintf (stderr, "Unknown option `-%c'.\n", optopt); else fprintf (stderr, "Unknown option character `\\x%x'.\n", optopt); usage(argv[0]); exit(-1); default: usage(argv[0]); exit(-1); } } g_spe_threads = n; printf("Program running on %d SPUs\n", n); return 0; } int main(int argc, char **argv) { speid_t spe_ids[SPU_THREADS]; int i; struct spe_info si[SPU_THREADS] __attribute__ ((aligned (128))); int totalcount = COUNT*sizeof(float); /* number of matrces*/ int num_runs = 0 ; float totaltime = 0; float ave_time = 0; (void)argc; (void)argv; processopt(argc,argv); for(i=0; i