/* author: Guochun Shi */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "speinfo.h" #include "spu/compute_task.h" compute_task_t task __attribute__ ((aligned (128))); control_t control[16] __attribute__ ((aligned (128))); extern spe_program_handle_t spu_main; #define SPU_THREADS 16 #define COUNT (1024*1024*16) int stride = 128; //#define __HUGETLBFS__ #ifndef __HUGETLBFS__ char bufa[COUNT] __attribute__ ((aligned (128))); char bufc[COUNT] __attribute__ ((aligned (128))); int alloc_mem() { return 0; } void dealloc_mem() { return; } #else char* bufa; char* bufc; int fd; #define FILE_NAME "/mnt/huge/hugefile" int alloc_mem(){ char* addr; printf("Using hugetlbfs \n"); fd = open(FILE_NAME, O_CREAT|O_RDWR, 0755); if(fd < 0){ perror("Open failed\n"); exit(1); } addr = mmap(0, COUNT*2, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (addr ==MAP_FAILED){ perror("mmap"); unlink(FILE_NAME); exit(1); } bufa = addr; bufc = addr + COUNT; printf("addr =%p, bufc=%p\n", addr, bufc); return 0; } void dealloc_mem() { munmap(bufa, COUNT*2); close(fd); unlink(FILE_NAME); } #endif float gettime_print(int num) { static struct timeval oldt={0,0}; struct timeval t; double result; if (gettimeofday(&t, NULL) < 0){ fprintf(stderr, "ERROR: gettimeofday() failed\n"); return -1; } if (oldt.tv_sec == 0 && oldt.tv_usec ==0){ result = 0; }else { result = t.tv_sec - oldt.tv_sec + (t.tv_usec-oldt.tv_usec)*0.000001; } oldt = t; if (num !=0){ //printf("#%d time: %f seconds\n",num, result); } return result; } void array_compare(char* m1, char* m2) { int errcount =0; int i; for (i = 0; i< COUNT ; i++){ if (m1[i] != m2[i]){ printf("i=%d, m1=%d, m2=%d\n", i, m1[i], m2[i]); errcount ++; exit(1); } } if (errcount == 0){ printf("passed\n"); }else{ printf("failed(%d)\n", errcount); exit(1); } return; } int init_value(void) { int i; printf("bufa =%p, bufc=%p\n", bufa, bufc); memset(bufa, 0, COUNT*sizeof(char)); memset(bufc, 0, COUNT*sizeof(char)); for (i = 0; i< COUNT;i++){ bufa[i] = rand() % 256; } return 0; } int need_verify = 0; int process_options(int argc, char **argv) { char *cvalue = NULL; signed char c; while ((c = getopt (argc, argv, "s:")) != -1){ switch (c) { case 's': cvalue = optarg; if (sscanf(cvalue, "%d", &stride) == 0 || stride < 0){ fprintf(stderr, "Invalid stride value(%d)(%s)\n", stride, cvalue); _exit(1); } break; default: return -1; } } printf("running with stride = %d\n", stride); return 0; } int main(int argc, char **argv) { int i; int totalcount = COUNT; /* number of matrces*/ float totaltime = 0; float bandwidth ; float ave_time; int n = 8; (void)argc; (void)argv; process_options(argc, argv); ppu_task_spe_num_set(n); ppu_task_debug_level_set(0); if (ppu_task_init(0, NULL, spu_main) < 0){ printf("Init failed\n"); return -1; } alloc_mem(); init_value(); for (i=0;i < n;i++){ control[i].ites = 1; control[i].ticks = 0; } while(stride <= 17*128){ task.common.cmd = SAMPLE_TASK; task.common.size = sizeof(task); task.bufa = (char*)bufa; task.bufc = (char*)bufc; task.count = totalcount; task.control = (char*)&control[0]; task.stride = stride; gettime_print(0); ppu_task_run((task_t*)&task); totaltime = gettime_print(1); int freq = 14318000; int totticks = 0; for ( i=0;i < n; i++){ //printf("%d ticks=%d\n", i, control[i].ticks); totticks += control[i].ticks; } ave_time = ((double)(totticks)) *1.0/freq/n; if (need_verify){ printf("verifying data ..."); array_compare(bufa, bufc); } dealloc_mem(); double latency = 0; //ave_time = totaltime - latency; //printf("average time is =%f\n", ave_time); bandwidth = n*1.0*128*8*2048/ave_time/1000000000; printf("stride=%d, bandwidth is %f GB/s\n",stride/128, bandwidth); stride += 128; } return (0); }