# HG changeset patch # User Nina Engelhardt # Date 1336496321 -7200 # Node ID b4f684e98d0be2d6477262725ac20d56b7a4427a # Parent 1cfcf49dc7ab67b9bb18b52d60c74027cadb6360 add cache miss counter diff -r 1cfcf49dc7ab -r b4f684e98d0b Defines/VMS_defs__HW_constants.h --- a/Defines/VMS_defs__HW_constants.h Sun Apr 01 13:53:46 2012 -0700 +++ b/Defines/VMS_defs__HW_constants.h Tue May 08 18:58:41 2012 +0200 @@ -14,7 +14,7 @@ //========================= Hardware related Constants ===================== //This value is the number of hardware threads in the shared memory // machine -#define NUM_CORES 40 +#define NUM_CORES 4 // tradeoff amortizing master fixed overhead vs imbalance potential // when work-stealing, can make bigger, at risk of losing cache affinity diff -r 1cfcf49dc7ab -r b4f684e98d0b HW_Dependent_Primitives/VMS__HW_measurement.c --- a/HW_Dependent_Primitives/VMS__HW_measurement.c Sun Apr 01 13:53:46 2012 -0700 +++ b/HW_Dependent_Primitives/VMS__HW_measurement.c Tue May 08 18:58:41 2012 +0200 @@ -12,10 +12,8 @@ #ifdef HOLISTIC__TURN_ON_PERF_COUNTERS struct perf_event_attr hw_event; memset(&hw_event,0,sizeof(hw_event)); - hw_event.type = PERF_TYPE_HARDWARE; - hw_event.size = sizeof(hw_event); + hw_event.size = sizeof(struct perf_event_attr); hw_event.disabled = 1; - hw_event.freq = 0; hw_event.inherit = 1; /* children inherit it */ hw_event.pinned = 1; /* must always be on PMU */ hw_event.exclusive = 0; /* only group on PMU */ @@ -23,13 +21,12 @@ hw_event.exclude_kernel = 0; /* ditto kernel */ hw_event.exclude_hv = 0; /* ditto hypervisor */ hw_event.exclude_idle = 0; /* don't count when idle */ - hw_event.mmap = 0; /* include mmap data */ - hw_event.comm = 0; /* include comm data */ int coreIdx; for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) { - hw_event.config = 0x0000000000000000; //cycles + hw_event.type = PERF_TYPE_HARDWARE; + hw_event.config = PERF_COUNT_HW_CPU_CYCLES; //cycles _VMSMasterEnv->cycles_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event, 0,//pid_t pid, coreIdx,//int cpu, @@ -40,7 +37,8 @@ fprintf(stderr,"On core %d: ",coreIdx); perror("Failed to open cycles counter"); } - hw_event.config = 0x0000000000000001; //instrs + hw_event.type = PERF_TYPE_HARDWARE; + hw_event.config = PERF_COUNT_HW_INSTRUCTIONS; //instrs _VMSMasterEnv->instrs_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event, 0,//pid_t pid, coreIdx,//int cpu, @@ -51,6 +49,21 @@ fprintf(stderr,"On core %d: ",coreIdx); perror("Failed to open instrs counter"); } + hw_event.type = PERF_TYPE_HW_CACHE; + hw_event.config = PERF_COUNT_HW_CACHE_L1D << 0 | + (PERF_COUNT_HW_CACHE_OP_READ << 8) | + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16); //cache misses + _VMSMasterEnv->cachem_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event, + 0,//pid_t pid, + coreIdx,//int cpu, + -1,//int group_fd, + 0//unsigned long flags + ); + if (_VMSMasterEnv->cachem_counter_fd[coreIdx]<0){ + fprintf(stderr,"On core %d: ",coreIdx); + perror("Failed to open cache miss counter"); + exit(1); + } } prctl(PR_TASK_PERF_EVENTS_ENABLE); diff -r 1cfcf49dc7ab -r b4f684e98d0b Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h --- a/Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h Sun Apr 01 13:53:46 2012 -0700 +++ b/Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h Tue May 08 18:58:41 2012 +0200 @@ -295,7 +295,7 @@ #ifdef HOLISTIC__TURN_ON_PERF_COUNTERS #define MEAS__Insert_Counter_Handler \ - typedef void (*CounterHandler) (int,int,int,SlaveVP*,uint64,uint64); + typedef void (*CounterHandler) (int,int,int,SlaveVP*,uint64,uint64,uint64); enum eventType { DebugEvt = 0, @@ -313,9 +313,10 @@ Timestamp_end }; - #define saveCyclesAndInstrs(core,cycles,instrs) do{ \ + #define saveCyclesAndInstrs(core,cycles,instrs,cachem) do{ \ int cycles_fd = _VMSMasterEnv->cycles_counter_fd[core]; \ int instrs_fd = _VMSMasterEnv->instrs_counter_fd[core]; \ + int cachem_fd = _VMSMasterEnv->cachem_counter_fd[core]; \ int nread; \ \ nread = read(cycles_fd,&(cycles),sizeof(cycles)); \ @@ -329,12 +330,18 @@ perror("Error reading cycles counter"); \ instrs = 0; \ } \ + nread = read(cachem_fd,&(cachem),sizeof(cachem)); \ + if(nread<0){ \ + perror("Error reading last level cache miss counter"); \ + cachem = 0; \ + } \ } while (0) #define MEAS__Insert_Counter_Meas_Fields_into_MasterEnv \ int cycles_counter_fd[NUM_CORES]; \ int instrs_counter_fd[NUM_CORES]; \ - uint64 start_master_lock[NUM_CORES][2]; \ + int cachem_counter_fd[NUM_CORES]; \ + uint64 start_master_lock[NUM_CORES][3]; \ CounterHandler counterHandler; #define HOLISTIC__Setup_Perf_Counters setup_perf_counters(); @@ -365,14 +372,15 @@ CounterHandler counterHandler = masterEnv->counterHandler; #define HOLISTIC__Record_AppResponderInvocation_start \ - uint64 cycles,instrs; \ - saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \ + uint64 cycles,instrs,cachem; \ + saveCyclesAndInstrs(thisCoresIdx,cycles, instrs,cachem); \ if(lastVPBeforeMaster){ \ - (*counterHandler)(AppResponderInvocation_start,lastVPBeforeMaster->slaveID,lastVPBeforeMaster->assignCount,lastVPBeforeMaster,cycles,instrs); \ + (*counterHandler)(AppResponderInvocation_start,lastVPBeforeMaster->slaveID,lastVPBeforeMaster->assignCount,lastVPBeforeMaster,cycles,instrs,cachem); \ lastVPBeforeMaster = NULL; \ } else { \ _VMSMasterEnv->start_master_lock[thisCoresIdx][0] = cycles; \ _VMSMasterEnv->start_master_lock[thisCoresIdx][1] = instrs; \ + _VMSMasterEnv->start_master_lock[thisCoresIdx][2] = cachem; \ } /* Request Handler may call resume() on the VP, but we want to @@ -388,15 +396,15 @@ #define HOLISTIC__Record_AppResponder_start \ vpid = currSlot->slaveAssignedToSlot->slaveID; \ task = currSlot->slaveAssignedToSlot->assignCount; \ - uint64 cycles, instrs; \ - saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \ - (*counterHandler)(AppResponder_start,vpid,task,currSlot->slaveAssignedToSlot,cycles,instrs); + uint64 cycles, instrs, cachem; \ + saveCyclesAndInstrs(thisCoresIdx,cycles, instrs,cachem); \ + (*counterHandler)(AppResponder_start,vpid,task,currSlot->slaveAssignedToSlot,cycles,instrs,cachem); #define HOLISTIC__Record_AppResponder_end \ - uint64 cycles2,instrs2; \ - saveCyclesAndInstrs(thisCoresIdx,cycles2, instrs2); \ - (*counterHandler)(AppResponder_end,vpid,task,currSlot->slaveAssignedToSlot,cycles2,instrs2); \ - (*counterHandler)(Timestamp_end,vpid,task,currSlot->slaveAssignedToSlot,rdtsc(),0); + uint64 cycles2,instrs2,cachem2; \ + saveCyclesAndInstrs(thisCoresIdx,cycles2, instrs2,cachem2); \ + (*counterHandler)(AppResponder_end,vpid,task,currSlot->slaveAssignedToSlot,cycles2,instrs2,cachem2); \ + (*counterHandler)(Timestamp_end,vpid,task,currSlot->slaveAssignedToSlot,rdtsc(),0,0); /* Don't know who to account time to yet - goes to assigned VP @@ -407,45 +415,43 @@ if(currSlot->slaveAssignedToSlot == NULL){ \ empty= TRUE; \ } \ - uint64 tmp_cycles; \ - uint64 tmp_instrs; \ - saveCyclesAndInstrs(thisCoresIdx,tmp_cycles,tmp_instrs); \ + uint64 tmp_cycles, tmp_instrs, tmp_cachem; \ + saveCyclesAndInstrs(thisCoresIdx,tmp_cycles,tmp_instrs,tmp_cachem); \ uint64 tsc = rdtsc(); \ if(vpid > 0) { \ - (*counterHandler)(NextAssigner_start,vpid,task,currSlot->slaveAssignedToSlot,tmp_cycles,tmp_instrs); \ + (*counterHandler)(NextAssigner_start,vpid,task,currSlot->slaveAssignedToSlot,tmp_cycles,tmp_instrs,tmp_cachem); \ vpid = 0; \ task = 0; \ } #define HOLISTIC__Record_Assigner_end \ - uint64 cycles; \ - uint64 instrs; \ - saveCyclesAndInstrs(thisCoresIdx,cycles,instrs); \ + uint64 cycles,instrs,cachem; \ + saveCyclesAndInstrs(thisCoresIdx,cycles,instrs,cachem); \ if(empty){ \ - (*counterHandler)(AssignerInvocation_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,masterEnv->start_master_lock[thisCoresIdx][0],masterEnv->start_master_lock[thisCoresIdx][1]); \ + (*counterHandler)(AssignerInvocation_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,masterEnv->start_master_lock[thisCoresIdx][0],masterEnv->start_master_lock[thisCoresIdx][1],masterEnv->start_master_lock[thisCoresIdx][2]); \ } \ - (*counterHandler)(Timestamp_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,tsc,0); \ - (*counterHandler)(Assigner_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,tmp_cycles,tmp_instrs); \ - (*counterHandler)(Assigner_end,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,cycles,instrs); + (*counterHandler)(Timestamp_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,tsc,0,0); \ + (*counterHandler)(Assigner_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,tmp_cycles,tmp_instrs,tmp_cachem); \ + (*counterHandler)(Assigner_end,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,cycles,instrs,tmp_cachem); #define HOLISTIC__Record_Work_start \ if(currVP){ \ - uint64 cycles,instrs; \ - saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \ - (*counterHandler)(Work_start,currVP->slaveID,currVP->assignCount,currVP,cycles,instrs); \ + uint64 cycles,instrs,cachem; \ + saveCyclesAndInstrs(thisCoresIdx,cycles, instrs,cachem); \ + (*counterHandler)(Work_start,currVP->slaveID,currVP->assignCount,currVP,cycles,instrs,cachem); \ } #define HOLISTIC__Record_Work_end \ if(currVP){ \ - uint64 cycles,instrs; \ - saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \ - (*counterHandler)(Work_end,currVP->slaveID,currVP->assignCount,currVP,cycles,instrs); \ + uint64 cycles,instrs,cachem; \ + saveCyclesAndInstrs(thisCoresIdx,cycles, instrs,cachem); \ + (*counterHandler)(Work_end,currVP->slaveID,currVP->assignCount,currVP,cycles,instrs,cachem); \ } #define HOLISTIC__Record_HwResponderInvocation_start \ - uint64 cycles,instrs; \ - saveCyclesAndInstrs(animatingSlv->coreAnimatedBy,cycles, instrs); \ - (*(_VMSMasterEnv->counterHandler))(HwResponderInvocation_start,animatingSlv->slaveID,animatingSlv->assignCount,animatingSlv,cycles,instrs); + uint64 cycles,instrs,cachem; \ + saveCyclesAndInstrs(animatingSlv->coreAnimatedBy,cycles, instrs,cachem); \ + (*(_VMSMasterEnv->counterHandler))(HwResponderInvocation_start,animatingSlv->slaveID,animatingSlv->assignCount,animatingSlv,cycles,instrs,cachem); #define getReturnAddressBeforeLibraryCall(vp_ptr, res_ptr) do{ \