/* author: Guochun Shi */ #include #include #include #include #include "spu/speinfo.h" #include #include #include #include #include #include #include #include #include #include #include #include extern spe_program_handle_t simple_spu; #define SPU_THREADS 16 #define COUNT (1024*1024*16) //#define __HUGETLBFS__ #ifndef __HUGETLBFS__ //char bufa[COUNT] __attribute__ ((aligned (128))); //char bufc[COUNT] __attribute__ ((aligned (128))); char* node0_bufa = NULL; char* node0_bufc = NULL; char* node1_bufa = NULL; char* node1_bufc = NULL; char* node0_bufa_orig = NULL; char* node0_bufc_orig = NULL; char* node1_bufa_orig = NULL; char* node1_bufc_orig = NULL; int alloc_mem() { node0_bufa_orig = numa_alloc_onnode(COUNT/2+128, 0); node0_bufc_orig = numa_alloc_onnode(COUNT/2+128, 0); node1_bufa_orig = numa_alloc_onnode(COUNT/2+128, 1); node1_bufc_orig = numa_alloc_onnode(COUNT/2+128, 1); memset(node0_bufa_orig, 0, COUNT/2+128); memset(node0_bufc_orig, 0, COUNT/2+128); memset(node1_bufa_orig, 0, COUNT/2+128); memset(node1_bufc_orig, 0, COUNT/2+128); node0_bufa = node0_bufa_orig; node0_bufc = node0_bufc_orig; node1_bufa = node1_bufa_orig; node1_bufc = node1_bufc_orig; while( ((unsigned int)node0_bufa &0x80) != 0) node0_bufa ++; while( ((unsigned int)node0_bufc &0x80) != 0) node0_bufc ++; while( ((unsigned int)node1_bufa &0x80) != 0) node1_bufa ++; while( ((unsigned int)node1_bufc &0x80) != 0) node1_bufc ++; return 0; } void dealloc_mem() { numa_free(node0_bufa_orig, COUNT/2+128); numa_free(node0_bufc_orig, COUNT/2+128); numa_free(node1_bufa_orig, COUNT/2+128); numa_free(node1_bufc_orig, COUNT/2+128); return; } #else char* bufa; char* bufc; int fd; #define FILE_NAME "/mnt/huge/hugefile" int alloc_mem(){ char* addr; printf("Using hugetlbfs \n"); fd = open(FILE_NAME, O_CREAT|O_RDWR, 0755); if(fd < 0){ perror("Open failed\n"); exit(1); } addr = mmap(0, COUNT*2, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (addr ==MAP_FAILED){ perror("mmap"); unlink(FILE_NAME); exit(1); } bufa = addr; bufc = addr + COUNT; printf("addr =%p, bufc=%p\n", addr, bufc); return 0; } void dealloc_mem() { munmap(bufa, COUNT*2); close(fd); unlink(FILE_NAME); } #endif float gettime_print(int num) { static struct timeval oldt={0,0}; struct timeval t; double result; if (gettimeofday(&t, NULL) < 0){ fprintf(stderr, "ERROR: gettimeofday() failed\n"); return -1; } if (oldt.tv_sec == 0 && oldt.tv_usec ==0){ result = 0; }else { result = t.tv_sec - oldt.tv_sec + (t.tv_usec-oldt.tv_usec)*0.000001; } oldt = t; if (num !=0){ printf("#%d time: %f seconds\n",num, result); } return result; } void array_compare(char* m1, char* m2) { int errcount =0; int i; for (i = 0; i< COUNT/2 ; i++){ if (m1[i] != m2[i]){ printf("i=%d, m1=%d, m2=%d\n", i, m1[i], m2[i]); errcount ++; exit(1); } } if (errcount == 0){ printf("passed\n"); }else{ printf("failed(%d)\n", errcount); exit(1); } return; } int init_value(void) { int i; memset(node0_bufc, 0, COUNT/2*sizeof(char)); memset(node1_bufc, 0, COUNT/2*sizeof(char)); for (i = 0; i< COUNT/2;i++){ node0_bufa[i] = rand() % 256; node1_bufa[i] = rand() % 256; } return 0; } int spe_get_phys_id(speid_t speid) { char filename[64]; FILE* file; int phys_id; char buf[32]; sprintf(filename, "/spu/spethread-%d-%lu/phys-id", getpid(), (long unsigned int)speid); file = fopen(filename, "r"); if (file == NULL){ fprintf(stderr, "ERROR: open file(%s) failed\n", filename); return -1; } if ( fread(buf,1, sizeof(buf), file) <= 0){ fprintf(stderr, "Reading file (%s) failed\n", filename); fclose(file); return -1; } if ( (sscanf(buf, "%x", &phys_id) <= 0) || (phys_id < 0)){ fprintf(stderr, "Scanning string (%s) failed\n", buf); fclose(file); return -1; } fclose(file); return phys_id; } void usage(char* prog){ printf("%s: -n -v \n", prog); return; } int need_verify = 0; int g_spe_threads ; int repeat_num=2; int processopt(int argc, char **argv) { char *cvalue = NULL; int n; signed char c; opterr = 0; n = 1;/*default number of spus*/ while ((c = getopt (argc, argv, "n:vr:")) != -1){ switch (c) { case 'n': cvalue = optarg; if (sscanf(cvalue, "%d", &n) ==0 || n < 0){ fprintf(stderr, "Invalid spu number(%d)\n", n); return -1; } break; case 'r': cvalue=optarg; if (sscanf(cvalue, "%d", &repeat_num) ==0 || repeat_num < 0){ fprintf(stderr, "Invalid repeat num(%d)\n", repeat_num); return -1; } break; case 'v': need_verify = 1; break; case '?': if (isprint (optopt)) fprintf (stderr, "Unknown option `-%c'.\n", optopt); else fprintf (stderr, "Unknown option character `\\x%x'.\n", optopt); usage(argv[0]); return - 1; default: usage(argv[0]); return -1; } } g_spe_threads = n; printf("Program running on %d SPUs\n", n); return 0; } int main(int argc, char **argv) { speid_t spe_ids[SPU_THREADS]; int i; struct spe_info si[SPU_THREADS] __attribute__ ((aligned (128))); int totalcount = COUNT; /* number of matrces*/ int num_runs = 0 ; float totaltime = 0; float bandwidth ; float ave_time; (void)argc; (void)argv; processopt(argc,argv); if (g_spe_threads != 16){ printf("ERROR: this program only runs with 16 spes\n"); exit(1); } alloc_mem(); for(i=0; i