Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
changeset 242:b4f684e98d0b Common_Ancestor
add cache miss counter
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 08 May 2012 18:58:41 +0200 |
| parents | 1cfcf49dc7ab |
| children | 227cd4d33d94 |
| files | Defines/VMS_defs__HW_constants.h HW_Dependent_Primitives/VMS__HW_measurement.c Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h |
| diffstat | 3 files changed, 60 insertions(+), 41 deletions(-) [+] |
line diff
1.1 --- a/Defines/VMS_defs__HW_constants.h Sun Apr 01 13:53:46 2012 -0700 1.2 +++ b/Defines/VMS_defs__HW_constants.h Tue May 08 18:58:41 2012 +0200 1.3 @@ -14,7 +14,7 @@ 1.4 //========================= Hardware related Constants ===================== 1.5 //This value is the number of hardware threads in the shared memory 1.6 // machine 1.7 -#define NUM_CORES 40 1.8 +#define NUM_CORES 4 1.9 1.10 // tradeoff amortizing master fixed overhead vs imbalance potential 1.11 // when work-stealing, can make bigger, at risk of losing cache affinity
2.1 --- a/HW_Dependent_Primitives/VMS__HW_measurement.c Sun Apr 01 13:53:46 2012 -0700 2.2 +++ b/HW_Dependent_Primitives/VMS__HW_measurement.c Tue May 08 18:58:41 2012 +0200 2.3 @@ -12,10 +12,8 @@ 2.4 #ifdef HOLISTIC__TURN_ON_PERF_COUNTERS 2.5 struct perf_event_attr hw_event; 2.6 memset(&hw_event,0,sizeof(hw_event)); 2.7 - hw_event.type = PERF_TYPE_HARDWARE; 2.8 - hw_event.size = sizeof(hw_event); 2.9 + hw_event.size = sizeof(struct perf_event_attr); 2.10 hw_event.disabled = 1; 2.11 - hw_event.freq = 0; 2.12 hw_event.inherit = 1; /* children inherit it */ 2.13 hw_event.pinned = 1; /* must always be on PMU */ 2.14 hw_event.exclusive = 0; /* only group on PMU */ 2.15 @@ -23,13 +21,12 @@ 2.16 hw_event.exclude_kernel = 0; /* ditto kernel */ 2.17 hw_event.exclude_hv = 0; /* ditto hypervisor */ 2.18 hw_event.exclude_idle = 0; /* don't count when idle */ 2.19 - hw_event.mmap = 0; /* include mmap data */ 2.20 - hw_event.comm = 0; /* include comm data */ 2.21 2.22 int coreIdx; 2.23 for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 2.24 { 2.25 - hw_event.config = 0x0000000000000000; //cycles 2.26 + hw_event.type = PERF_TYPE_HARDWARE; 2.27 + hw_event.config = PERF_COUNT_HW_CPU_CYCLES; //cycles 2.28 _VMSMasterEnv->cycles_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event, 2.29 0,//pid_t pid, 2.30 coreIdx,//int cpu, 2.31 @@ -40,7 +37,8 @@ 2.32 fprintf(stderr,"On core %d: ",coreIdx); 2.33 perror("Failed to open cycles counter"); 2.34 } 2.35 - hw_event.config = 0x0000000000000001; //instrs 2.36 + hw_event.type = PERF_TYPE_HARDWARE; 2.37 + hw_event.config = PERF_COUNT_HW_INSTRUCTIONS; //instrs 2.38 _VMSMasterEnv->instrs_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event, 2.39 0,//pid_t pid, 2.40 coreIdx,//int cpu, 2.41 @@ -51,6 +49,21 @@ 2.42 fprintf(stderr,"On core %d: ",coreIdx); 2.43 perror("Failed to open instrs counter"); 2.44 } 2.45 + hw_event.type = PERF_TYPE_HW_CACHE; 2.46 + hw_event.config = PERF_COUNT_HW_CACHE_L1D << 0 | 2.47 + (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2.48 + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16); //cache misses 2.49 + _VMSMasterEnv->cachem_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event, 2.50 + 0,//pid_t pid, 2.51 + coreIdx,//int cpu, 2.52 + -1,//int group_fd, 2.53 + 0//unsigned long flags 2.54 + ); 2.55 + if (_VMSMasterEnv->cachem_counter_fd[coreIdx]<0){ 2.56 + fprintf(stderr,"On core %d: ",coreIdx); 2.57 + perror("Failed to open cache miss counter"); 2.58 + exit(1); 2.59 + } 2.60 } 2.61 2.62 prctl(PR_TASK_PERF_EVENTS_ENABLE);
3.1 --- a/Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h Sun Apr 01 13:53:46 2012 -0700 3.2 +++ b/Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h Tue May 08 18:58:41 2012 +0200 3.3 @@ -295,7 +295,7 @@ 3.4 #ifdef HOLISTIC__TURN_ON_PERF_COUNTERS 3.5 3.6 #define MEAS__Insert_Counter_Handler \ 3.7 - typedef void (*CounterHandler) (int,int,int,SlaveVP*,uint64,uint64); 3.8 + typedef void (*CounterHandler) (int,int,int,SlaveVP*,uint64,uint64,uint64); 3.9 3.10 enum eventType { 3.11 DebugEvt = 0, 3.12 @@ -313,9 +313,10 @@ 3.13 Timestamp_end 3.14 }; 3.15 3.16 - #define saveCyclesAndInstrs(core,cycles,instrs) do{ \ 3.17 + #define saveCyclesAndInstrs(core,cycles,instrs,cachem) do{ \ 3.18 int cycles_fd = _VMSMasterEnv->cycles_counter_fd[core]; \ 3.19 int instrs_fd = _VMSMasterEnv->instrs_counter_fd[core]; \ 3.20 + int cachem_fd = _VMSMasterEnv->cachem_counter_fd[core]; \ 3.21 int nread; \ 3.22 \ 3.23 nread = read(cycles_fd,&(cycles),sizeof(cycles)); \ 3.24 @@ -329,12 +330,18 @@ 3.25 perror("Error reading cycles counter"); \ 3.26 instrs = 0; \ 3.27 } \ 3.28 + nread = read(cachem_fd,&(cachem),sizeof(cachem)); \ 3.29 + if(nread<0){ \ 3.30 + perror("Error reading last level cache miss counter"); \ 3.31 + cachem = 0; \ 3.32 + } \ 3.33 } while (0) 3.34 3.35 #define MEAS__Insert_Counter_Meas_Fields_into_MasterEnv \ 3.36 int cycles_counter_fd[NUM_CORES]; \ 3.37 int instrs_counter_fd[NUM_CORES]; \ 3.38 - uint64 start_master_lock[NUM_CORES][2]; \ 3.39 + int cachem_counter_fd[NUM_CORES]; \ 3.40 + uint64 start_master_lock[NUM_CORES][3]; \ 3.41 CounterHandler counterHandler; 3.42 3.43 #define HOLISTIC__Setup_Perf_Counters setup_perf_counters(); 3.44 @@ -365,14 +372,15 @@ 3.45 CounterHandler counterHandler = masterEnv->counterHandler; 3.46 3.47 #define HOLISTIC__Record_AppResponderInvocation_start \ 3.48 - uint64 cycles,instrs; \ 3.49 - saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \ 3.50 + uint64 cycles,instrs,cachem; \ 3.51 + saveCyclesAndInstrs(thisCoresIdx,cycles, instrs,cachem); \ 3.52 if(lastVPBeforeMaster){ \ 3.53 - (*counterHandler)(AppResponderInvocation_start,lastVPBeforeMaster->slaveID,lastVPBeforeMaster->assignCount,lastVPBeforeMaster,cycles,instrs); \ 3.54 + (*counterHandler)(AppResponderInvocation_start,lastVPBeforeMaster->slaveID,lastVPBeforeMaster->assignCount,lastVPBeforeMaster,cycles,instrs,cachem); \ 3.55 lastVPBeforeMaster = NULL; \ 3.56 } else { \ 3.57 _VMSMasterEnv->start_master_lock[thisCoresIdx][0] = cycles; \ 3.58 _VMSMasterEnv->start_master_lock[thisCoresIdx][1] = instrs; \ 3.59 + _VMSMasterEnv->start_master_lock[thisCoresIdx][2] = cachem; \ 3.60 } 3.61 3.62 /* Request Handler may call resume() on the VP, but we want to 3.63 @@ -388,15 +396,15 @@ 3.64 #define HOLISTIC__Record_AppResponder_start \ 3.65 vpid = currSlot->slaveAssignedToSlot->slaveID; \ 3.66 task = currSlot->slaveAssignedToSlot->assignCount; \ 3.67 - uint64 cycles, instrs; \ 3.68 - saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \ 3.69 - (*counterHandler)(AppResponder_start,vpid,task,currSlot->slaveAssignedToSlot,cycles,instrs); 3.70 + uint64 cycles, instrs, cachem; \ 3.71 + saveCyclesAndInstrs(thisCoresIdx,cycles, instrs,cachem); \ 3.72 + (*counterHandler)(AppResponder_start,vpid,task,currSlot->slaveAssignedToSlot,cycles,instrs,cachem); 3.73 3.74 #define HOLISTIC__Record_AppResponder_end \ 3.75 - uint64 cycles2,instrs2; \ 3.76 - saveCyclesAndInstrs(thisCoresIdx,cycles2, instrs2); \ 3.77 - (*counterHandler)(AppResponder_end,vpid,task,currSlot->slaveAssignedToSlot,cycles2,instrs2); \ 3.78 - (*counterHandler)(Timestamp_end,vpid,task,currSlot->slaveAssignedToSlot,rdtsc(),0); 3.79 + uint64 cycles2,instrs2,cachem2; \ 3.80 + saveCyclesAndInstrs(thisCoresIdx,cycles2, instrs2,cachem2); \ 3.81 + (*counterHandler)(AppResponder_end,vpid,task,currSlot->slaveAssignedToSlot,cycles2,instrs2,cachem2); \ 3.82 + (*counterHandler)(Timestamp_end,vpid,task,currSlot->slaveAssignedToSlot,rdtsc(),0,0); 3.83 3.84 3.85 /* Don't know who to account time to yet - goes to assigned VP 3.86 @@ -407,45 +415,43 @@ 3.87 if(currSlot->slaveAssignedToSlot == NULL){ \ 3.88 empty= TRUE; \ 3.89 } \ 3.90 - uint64 tmp_cycles; \ 3.91 - uint64 tmp_instrs; \ 3.92 - saveCyclesAndInstrs(thisCoresIdx,tmp_cycles,tmp_instrs); \ 3.93 + uint64 tmp_cycles, tmp_instrs, tmp_cachem; \ 3.94 + saveCyclesAndInstrs(thisCoresIdx,tmp_cycles,tmp_instrs,tmp_cachem); \ 3.95 uint64 tsc = rdtsc(); \ 3.96 if(vpid > 0) { \ 3.97 - (*counterHandler)(NextAssigner_start,vpid,task,currSlot->slaveAssignedToSlot,tmp_cycles,tmp_instrs); \ 3.98 + (*counterHandler)(NextAssigner_start,vpid,task,currSlot->slaveAssignedToSlot,tmp_cycles,tmp_instrs,tmp_cachem); \ 3.99 vpid = 0; \ 3.100 task = 0; \ 3.101 } 3.102 3.103 #define HOLISTIC__Record_Assigner_end \ 3.104 - uint64 cycles; \ 3.105 - uint64 instrs; \ 3.106 - saveCyclesAndInstrs(thisCoresIdx,cycles,instrs); \ 3.107 + uint64 cycles,instrs,cachem; \ 3.108 + saveCyclesAndInstrs(thisCoresIdx,cycles,instrs,cachem); \ 3.109 if(empty){ \ 3.110 - (*counterHandler)(AssignerInvocation_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,masterEnv->start_master_lock[thisCoresIdx][0],masterEnv->start_master_lock[thisCoresIdx][1]); \ 3.111 + (*counterHandler)(AssignerInvocation_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,masterEnv->start_master_lock[thisCoresIdx][0],masterEnv->start_master_lock[thisCoresIdx][1],masterEnv->start_master_lock[thisCoresIdx][2]); \ 3.112 } \ 3.113 - (*counterHandler)(Timestamp_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,tsc,0); \ 3.114 - (*counterHandler)(Assigner_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,tmp_cycles,tmp_instrs); \ 3.115 - (*counterHandler)(Assigner_end,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,cycles,instrs); 3.116 + (*counterHandler)(Timestamp_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,tsc,0,0); \ 3.117 + (*counterHandler)(Assigner_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,tmp_cycles,tmp_instrs,tmp_cachem); \ 3.118 + (*counterHandler)(Assigner_end,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,cycles,instrs,tmp_cachem); 3.119 3.120 #define HOLISTIC__Record_Work_start \ 3.121 if(currVP){ \ 3.122 - uint64 cycles,instrs; \ 3.123 - saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \ 3.124 - (*counterHandler)(Work_start,currVP->slaveID,currVP->assignCount,currVP,cycles,instrs); \ 3.125 + uint64 cycles,instrs,cachem; \ 3.126 + saveCyclesAndInstrs(thisCoresIdx,cycles, instrs,cachem); \ 3.127 + (*counterHandler)(Work_start,currVP->slaveID,currVP->assignCount,currVP,cycles,instrs,cachem); \ 3.128 } 3.129 3.130 #define HOLISTIC__Record_Work_end \ 3.131 if(currVP){ \ 3.132 - uint64 cycles,instrs; \ 3.133 - saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \ 3.134 - (*counterHandler)(Work_end,currVP->slaveID,currVP->assignCount,currVP,cycles,instrs); \ 3.135 + uint64 cycles,instrs,cachem; \ 3.136 + saveCyclesAndInstrs(thisCoresIdx,cycles, instrs,cachem); \ 3.137 + (*counterHandler)(Work_end,currVP->slaveID,currVP->assignCount,currVP,cycles,instrs,cachem); \ 3.138 } 3.139 3.140 #define HOLISTIC__Record_HwResponderInvocation_start \ 3.141 - uint64 cycles,instrs; \ 3.142 - saveCyclesAndInstrs(animatingSlv->coreAnimatedBy,cycles, instrs); \ 3.143 - (*(_VMSMasterEnv->counterHandler))(HwResponderInvocation_start,animatingSlv->slaveID,animatingSlv->assignCount,animatingSlv,cycles,instrs); 3.144 + uint64 cycles,instrs,cachem; \ 3.145 + saveCyclesAndInstrs(animatingSlv->coreAnimatedBy,cycles, instrs,cachem); \ 3.146 + (*(_VMSMasterEnv->counterHandler))(HwResponderInvocation_start,animatingSlv->slaveID,animatingSlv->assignCount,animatingSlv,cycles,instrs,cachem); 3.147 3.148 3.149 #define getReturnAddressBeforeLibraryCall(vp_ptr, res_ptr) do{ \
