/* author: Guochun Shi */ #include #include #include #include #include "spu/speinfo.h" #include #include #include #include #include #include extern spe_program_handle_t simple_spu; int need_verify=0; #define SPU_THREADS 8 #define COUNT (1024*1024*3) #ifndef HUGETLBFS float bufa[COUNT] __attribute__ ((aligned (128))); float bufb[32] __attribute__ ((aligned (128))); float bufc[COUNT/3] __attribute__ ((aligned (128))); float resultc[COUNT/3] __attribute__ ((aligned (128))); #define alloc_mem() #define dealloc_mem() #else float* bufa, *bufb, *bufc, *resultc; int fd; char* addr; int num_array; #define FILE_NAME "/mnt/huge/hugefile" int alloc_mem(){ char* tmpaddr; fd = open(FILE_NAME, O_CREAT|O_RDWR, 0755); if(fd < 0){ perror("Open failed\n"); exit(1); } num_array = need_verify?4:3; printf("Using hugetlbfs. \n"); addr = mmap(0, COUNT*num_array*sizeof(float), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (addr ==MAP_FAILED){ perror("mmap"); unlink(FILE_NAME); exit(1); } tmpaddr =addr; while ((((unsigned long)tmpaddr) & 0x80) != 0){ tmpaddr++; } bufa = (float*)tmpaddr; bufb = (float*)(tmpaddr + COUNT*sizeof(float)); bufc = (float*)(tmpaddr + 2*COUNT*sizeof(float)); if (need_verify){ resultc = (float*)(tmpaddr + 3*COUNT*sizeof(float)); } return 0; } void dealloc_mem() { munmap(addr, COUNT*num_array*sizeof(float)); close(fd); unlink(FILE_NAME); } #endif float gettime_print(int num) { static struct timeval oldt={0,0}; struct timeval t; double result; if (gettimeofday(&t, NULL) < 0){ fprintf(stderr, "ERROR: gettimeofday() failed\n"); return -1; } if (oldt.tv_sec == 0 && oldt.tv_usec ==0){ result = 0; }else { result = t.tv_sec - oldt.tv_sec + (t.tv_usec-oldt.tv_usec)*0.000001; } oldt = t; if (num !=0){ //printf("#%d time: %f seconds\n",num, result); } return result; } void display_vector(float*a ) { } int init_value(float* a, float* b, float* c) { float* fa,*fb, *fc; int i; memset(c, 0, COUNT/3*sizeof(float)); fa = (float*)a; fb = (float*)b; fc = (float*)c; for (i = 0; i < 3 ; i++){ fb[i] = 1.0*rand()/RAND_MAX; } for (i = 0; i< COUNT;i++){ fa[i] = 1.0*rand()/RAND_MAX; } return 0; } void dot_product_ppu(float* c, float* a, float * b) { int i, j, k, count; float* fa,*fb, *fc; fa = a; fb = b; fc = c; for (i = 0; i< COUNT/3; i++){ float ra, rb, rc; ra = a[i*3] - b[0]; rb = a[i*3 + 1] - b[1]; rc = a[i*3 + 2] - b[2]; c[i] = ra*ra + rb*rb + rc*rc; } return; } void array_compare(float* m1, float* m2) { int errcount =0; int i; float tmp; for (i = 0; i< COUNT/3 ; i++){ tmp = m1[i] - m2[i]; if (tmp < 0){ tmp = -tmp; } if (tmp > 1.0e-4){ printf("i=%d, m1=%f, m2=%f\n", i, m1[i], m2[i]); errcount ++; } } if (errcount == 0){ printf("passed\n"); }else{ printf("failed(%d)\n", errcount); exit(1); } return; } int do_work_in_ppu() { dot_product_ppu(bufc, bufa, bufb); return 0; } void usage(char* prog){ printf("%s: -n -v \n", prog); return; } int g_spe_threads ; int repeat_num=2; int processopt(int argc, char **argv) { char *cvalue = NULL; int n; signed char c; opterr = 0; n = 1;/*default number of spus*/ while ((c = getopt (argc, argv, "n:vr:")) != -1){ switch (c) { case 'n': cvalue = optarg; if (sscanf(cvalue, "%d", &n) ==0 || n < 0){ fprintf(stderr, "Invalid spu number(%d)\n", n); return -1; } break; case 'r': cvalue=optarg; if (sscanf(cvalue, "%d", &repeat_num) ==0 || repeat_num < 0){ fprintf(stderr, "Invalid repeat num(%d)\n", repeat_num); return -1; } break; case 'v': need_verify =1; break; case '?': if (isprint (optopt)) fprintf (stderr, "Unknown option `-%c'.\n", optopt); else fprintf (stderr, "Unknown option character `\\x%x'.\n", optopt); usage(argv[0]); exit(-1); default: usage(argv[0]); exit(-1); } } g_spe_threads = n; printf("Program running on %d SPUs\n", n); return 0; } int main(int argc, char **argv) { speid_t spe_ids[SPU_THREADS]; int i; struct spe_info si[SPU_THREADS] __attribute__ ((aligned (128))); int totalcount = COUNT*sizeof(float); /* number of matrces*/ int num_runs = 0 ; float totaltime = 0; float ave_time = 0; (void)argc; (void)argv; processopt(argc,argv); for(i=0; i