changeset 7:28650a4df2b9

working version
author Merten Sach <msach@mailbox.tu-berlin.de>
date Mon, 21 Nov 2011 21:39:03 +0100
parents c8995a602b46
children b2a84bc2b274
files src/Application/main.c
diffstat 1 files changed, 87 insertions(+), 61 deletions(-) [+]
line diff
     1.1 --- a/src/Application/main.c	Mon Nov 21 19:16:03 2011 +0100
     1.2 +++ b/src/Application/main.c	Mon Nov 21 21:39:03 2011 +0100
     1.3 @@ -126,6 +126,8 @@
     1.4  size_t chunk_size = 0;
     1.5  
     1.6  int cycles_counter_fd[NUM_CORES];
     1.7 +int cycles_counter_main_fd;
     1.8 +struct perf_event_attr* hw_event;
     1.9  
    1.10  //======================== App Code =========================
    1.11  /*
    1.12 @@ -137,15 +139,16 @@
    1.13     int nread;                                           \
    1.14                                                          \
    1.15     nread = read(cycles_fd,&(cycles),sizeof(cycles));    \
    1.16 -   if(nread<=0){                                         \
    1.17 +   if(nread<0){                                         \
    1.18         perror("Error reading cycles counter");          \
    1.19         cycles = 0;                                      \
    1.20     }                                                    \
    1.21  } while (0) //macro magic for scoping
    1.22  
    1.23 +
    1.24  void work(void* input, VirtProcr* animatingPr)
    1.25   {
    1.26 -   int n,m;
    1.27 +   int i,o;
    1.28     struct input_t* in = (struct input_t*)input;
    1.29     unsigned int totalWorkCycles = 0;
    1.30     unsigned int workspace1;
    1.31 @@ -154,7 +157,7 @@
    1.32     
    1.33     int cpuid = sched_getcpu();
    1.34      
    1.35 -   for(m=0; m<repetitions; m++)
    1.36 +   for(o=0; o<repetitions; o++)
    1.37      {
    1.38         
    1.39        //measure inner workload to determine synchronisation overhead by subtraction
    1.40 @@ -162,7 +165,7 @@
    1.41        saveCyclesAndInstrs(cpuid,startWorkload.cycles);
    1.42  
    1.43        //workload
    1.44 -      for(n=0; n<workload_size; n++)
    1.45 +      for(i=0; i<workload_size; i++)
    1.46         {
    1.47           workspace1 += (workspace1 + 32)/2;
    1.48           workspace2 += (workspace2 + 23.2)/1.4;
    1.49 @@ -174,10 +177,11 @@
    1.50        uint64 numCycles = endWorkload.cycles - startWorkload.cycles;
    1.51        
    1.52        
    1.53 -     if( numCycles < 100000000 ) totalWorkCycles += numCycles;//sanity check
    1.54 +     if( numCycles < 4000000 ) //sanity check (4M is about 200K iters)
    1.55 +         totalWorkCycles += numCycles;
    1.56  
    1.57 +      //mutex access causes switch to different Slave VP
    1.58        VPThread__mutex_lock(privateMutex, animatingPr);
    1.59 -      //lock access to switch to different tast
    1.60        VPThread__mutex_unlock(privateMutex, animatingPr);
    1.61      }
    1.62  
    1.63 @@ -190,7 +194,7 @@
    1.64     
    1.65     //Shutdown worker
    1.66     VPThread__dissipate_thread(animatingPr);
    1.67 -   
    1.68 +     //below printfs never reached --> there for gcc
    1.69     printf("%d", workspace1);  //This is to prevent gcc from optimizing out the
    1.70     printf("%f", workspace2);  //two workspace variables
    1.71   }
    1.72 @@ -198,63 +202,21 @@
    1.73  /* this is run after the VMS is set up*/
    1.74  void benchmark(void *in, VirtProcr *animatingPr)
    1.75   {
    1.76 -   int i;
    1.77 +   int i, cpuID;
    1.78     struct input_t input[num_threads];
    1.79     struct barrier_t barr;
    1.80     barrier_init(&barr, num_threads+1, animatingPr);
    1.81     
    1.82 -    //setup performance counters
    1.83 -    struct perf_event_attr* hw_event;
    1.84 -    hw_event = VMS__malloc(sizeof(struct perf_event_attr));
    1.85 -    memset(hw_event,0,sizeof(hw_event));
    1.86 -        hw_event->type = PERF_TYPE_HARDWARE;
    1.87 -        hw_event->size = sizeof(hw_event);
    1.88 -        hw_event->disabled = 0;
    1.89 -        hw_event->freq = 0;
    1.90 -        hw_event->inherit = 1; /* children inherit it   */
    1.91 -        hw_event->pinned = 1; /* must always be on PMU */
    1.92 -        hw_event->exclusive = 0; /* only group on PMU     */
    1.93 -        hw_event->exclude_user = 0; /* don't count user      */
    1.94 -        hw_event->exclude_kernel = 1; /* ditto kernel          */
    1.95 -        hw_event->exclude_hv = 1; /* ditto hypervisor      */
    1.96 -        hw_event->exclude_idle = 1; /* don't count when idle */
    1.97 -        hw_event->mmap = 0; /* include mmap data     */
    1.98 -        hw_event->comm = 0; /* include comm data     */
    1.99 -
   1.100 -
   1.101 -    for( i = 0; i < NUM_CORES; i++ )
   1.102 -    {
   1.103 -        hw_event->config = PERF_COUNT_HW_CPU_CYCLES; //cycles
   1.104 -        cycles_counter_fd[i] = syscall(__NR_perf_event_open, hw_event,
   1.105 -                0,//pid_t pid, 
   1.106 -                i,//int cpu, 
   1.107 -                -1,//int group_fd,
   1.108 -                0//unsigned long flags
   1.109 -        );
   1.110 -        if (cycles_counter_fd[i]<0){
   1.111 -            fprintf(stderr,"On core %d: ",i);
   1.112 -            perror("Failed to open cycles counter");
   1.113 -        }
   1.114 -    }
   1.115 -
   1.116 -   //Count on all CPUs
   1.117 -   int cycles_counter_main_fd = syscall(__NR_perf_event_open, hw_event,
   1.118 -                0,//pid_t pid, 
   1.119 -                -1,//int cpu, 
   1.120 -                -1,//int group_fd,
   1.121 -                0//unsigned long flags
   1.122 -        );
   1.123 -    if (cycles_counter_main_fd<0){
   1.124 -        fprintf(stderr,"On core %d: ",i);
   1.125 -        perror("Failed to open cycles counter");
   1.126 -    } 
   1.127 +   
   1.128     
   1.129     //prepare input
   1.130     for(i=0; i<num_threads; i++)
   1.131      { 
   1.132         input[i].barrier = &barr;
   1.133      }
   1.134 -    
   1.135 +     
   1.136 +   printf("just before first counter read, inside benchmark\n");
   1.137 +  
   1.138     //save cycles before execution of threads to get longest runtime
   1.139     measurement_t startBenchTime, endBenchTime;
   1.140     int nread = read(cycles_counter_main_fd,&(startBenchTime.cycles),
   1.141 @@ -262,7 +224,7 @@
   1.142     if(nread<0){                                         
   1.143         perror("Error reading cycles counter");
   1.144     }
   1.145 -   
   1.146 +   printf("finished first counter read, inside benchmark\n");
   1.147     //create all threads
   1.148     for(i=0; i<num_threads; i++)
   1.149      { VPThread__create_thread((VirtProcrFnPtr)work, (void*)&input[i], animatingPr);}
   1.150 @@ -270,7 +232,7 @@
   1.151     barrier_wait(&barr, animatingPr);
   1.152    
   1.153     
   1.154 -   //longest thread  measurement
   1.155 +   //accumulated cycles of all cores
   1.156     nread = read(cycles_counter_main_fd,&(endBenchTime.cycles),
   1.157                  sizeof(endBenchTime.cycles));
   1.158     if(nread<0){                                         
   1.159 @@ -279,7 +241,7 @@
   1.160  
   1.161     uint64_t overallWorkCycles = 0;
   1.162     for(i=0; i<num_threads; i++){ 
   1.163 -       printf("WorkCycles: %d\n",input[i].totalWorkCycles);
   1.164 +       printf("WorkCycles: %lu\n",input[i].totalWorkCycles);
   1.165         overallWorkCycles += input[i].totalWorkCycles;
   1.166      }
   1.167     
   1.168 @@ -349,9 +311,73 @@
   1.169  			return EXIT_FAILURE;
   1.170         }
   1.171      }//for
   1.172 -        
   1.173 -        //This is the transition to the VMS runtime
   1.174 -        VPThread__create_seed_procr_and_do_work(benchmark, NULL);
   1.175 +   
   1.176 +   //setup performance counters
   1.177 +    hw_event = malloc(sizeof(struct perf_event_attr));
   1.178 +    memset(hw_event,0,sizeof(struct perf_event_attr));
   1.179 +    
   1.180 +    hw_event->type = PERF_TYPE_HARDWARE;
   1.181 +    hw_event->size = sizeof(hw_event);
   1.182 +    hw_event->disabled = 0;
   1.183 +    hw_event->freq = 0;
   1.184 +    hw_event->inherit = 1; /* children inherit it   */
   1.185 +    hw_event->pinned = 1; /* says this virt counter must always be on HW */
   1.186 +    hw_event->exclusive = 0; /* only group on PMU     */
   1.187 +    hw_event->exclude_user = 0; /* don't count user      */
   1.188 +    hw_event->exclude_kernel = 1; /* don't count kernel  */
   1.189 +    hw_event->exclude_hv = 1; /* ditto hypervisor      */
   1.190 +    hw_event->exclude_idle = 1; /* don't count when idle */
   1.191 +    hw_event->mmap = 0; /* include mmap data     */
   1.192 +    hw_event->comm = 0; /* include comm data     */
   1.193 +
   1.194 +    hw_event->config = PERF_COUNT_HW_CPU_CYCLES; //cycles
   1.195 +    
   1.196 +    int cpuID, retries;
   1.197 +
   1.198 +   for( cpuID = 0; cpuID < NUM_CORES; cpuID++ )
   1.199 +    { retries = 0;
   1.200 +      do
   1.201 +       { retries += 1;
   1.202 +         cycles_counter_fd[cpuID] = 
   1.203 +          syscall(__NR_perf_event_open, hw_event,
   1.204 +                  0,//pid_t: 0 is "pid of calling process" 
   1.205 +                  cpuID,//int: cpu, the value returned by "CPUID" instr(?)
   1.206 +                  -1,//int: group_fd, -1 is "leader" or independent
   1.207 +                  0//unsigned long: flags
   1.208 +                 );
   1.209 +       }
   1.210 +      while(cycles_counter_fd[cpuID]<0 && retries < 100);
   1.211 +      if(retries >= 100)
   1.212 +       {
   1.213 +         fprintf(stderr,"On core %d: ",cpuID);
   1.214 +         perror("Failed to open cycles counter");
   1.215 +       }
   1.216 +    }
   1.217 +   printf("counters now set up\n");
   1.218 +
   1.219 +   //Set up counter to accumulate total cycles to process, across all CPUs
   1.220 +
   1.221 +   retries = 0;
   1.222 +   do
   1.223 +    { retries += 1;
   1.224 +      cycles_counter_main_fd = 
   1.225 +       syscall(__NR_perf_event_open, hw_event,
   1.226 +               0,//pid_t: 0 is "pid of calling process" 
   1.227 +               -1,//int: cpu, -1 means accumulate from all cores
   1.228 +               -1,//int: group_fd, -1 is "leader" == independent
   1.229 +               0//unsigned long: flags
   1.230 +              );
   1.231 +    }
   1.232 +   while(cycles_counter_main_fd<0 && retries < 100);
   1.233 +   if(retries >= 100)
   1.234 +    {
   1.235 +      fprintf(stderr,"in main ");
   1.236 +      perror("Failed to open cycles counter");
   1.237 +    }
   1.238 +
   1.239 +
   1.240 +    //This is the transition to the VMS runtime
   1.241 +   VPThread__create_seed_procr_and_do_work(benchmark, NULL);
   1.242  
   1.243     return 0;
   1.244 -}
   1.245 + }