Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
changeset 238:b95711c6965c Common_Ancestor
counters work now
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Wed, 21 Mar 2012 11:09:11 +0100 |
| parents | ce1f57e10fac |
| children | 7ed97c961901 |
| files | AnimationMaster.c CoreController.c Hardware_Dependent/VMS__HW_measurement.c Hardware_Dependent/VMS__HW_measurement.h Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h VMS__int.c |
| diffstat | 6 files changed, 229 insertions(+), 15 deletions(-) [+] |
line diff
1.1 --- a/AnimationMaster.c Mon Mar 19 10:03:45 2012 -0700 1.2 +++ b/AnimationMaster.c Wed Mar 21 11:09:11 2012 +0100 1.3 @@ -130,7 +130,7 @@ 1.4 RequestHandler requestHandler; 1.5 void *semanticEnv; 1.6 int32 thisCoresIdx; 1.7 - 1.8 + 1.9 //======================== Initializations ======================== 1.10 masterEnv = (MasterEnv*)_VMSMasterEnv; 1.11 1.12 @@ -140,7 +140,8 @@ 1.13 requestHandler = masterEnv->requestHandler; 1.14 slaveAssigner = masterEnv->slaveAssigner; 1.15 semanticEnv = masterEnv->semanticEnv; 1.16 - 1.17 + 1.18 + HOLISTIC__Insert_Master_Global_Vars; 1.19 1.20 //======================== animationMaster ======================== 1.21 while(1){ 1.22 @@ -158,17 +159,20 @@ 1.23 { 1.24 currSlot->workIsDone = FALSE; 1.25 currSlot->needsSlaveAssigned = TRUE; 1.26 - 1.27 + 1.28 + HOLISTIC__Record_AppResponder_start; 1.29 MEAS__startReqHdlr; 1.30 1.31 //process the requests made by the slave (held inside slave struc) 1.32 (*requestHandler)( currSlot->slaveAssignedToSlot, semanticEnv ); 1.33 1.34 + HOLISTIC__Record_AppResponder_end; 1.35 MEAS__endReqHdlr; 1.36 } 1.37 //If slot empty, hand to Assigner to fill with a slave 1.38 if( currSlot->needsSlaveAssigned ) 1.39 { //Call plugin's Assigner to give slot a new slave 1.40 + HOLISTIC__Record_Assigner_start; 1.41 assignedSlaveVP = 1.42 (*slaveAssigner)( semanticEnv, currSlot ); 1.43 1.44 @@ -178,6 +182,8 @@ 1.45 assignedSlaveVP->animSlotAssignedTo = currSlot; 1.46 currSlot->needsSlaveAssigned = FALSE; 1.47 numSlotsFilled += 1; 1.48 + 1.49 + HOLISTIC__Record_Assigner_end; 1.50 } 1.51 } 1.52 }
2.1 --- a/CoreController.c Mon Mar 19 10:03:45 2012 -0700 2.2 +++ b/CoreController.c Wed Mar 21 11:09:11 2012 +0100 2.3 @@ -77,7 +77,7 @@ 2.4 volatile int32 *addrOfMasterLock; //thing pointed to is volatile, not ptr 2.5 SlaveVP *thisCoresMasterVP; 2.6 //Variables used for pthread related things 2.7 - ThdParams *coreCtlrThdParams; 2.8 + ThdParams *thisCoresThdParams; 2.9 cpu_set_t coreMask; //used during pinning pthread to CPU core 2.10 int32 errorCode; 2.11 //Variables used during measurements 2.12 @@ -88,8 +88,8 @@ 2.13 2.14 2.15 //=============== Initializations =================== 2.16 - coreCtlrThdParams = (ThdParams *)paramsIn; 2.17 - thisCoresIdx = coreCtlrThdParams->coreNum; 2.18 + thisCoresThdParams = (ThdParams *)paramsIn; 2.19 + thisCoresIdx = thisCoresThdParams->coreNum; 2.20 2.21 //Assembly that saves addr of label of return instr -- label in assmbly 2.22 recordCoreCtlrReturnLabelAddr((void**)&(_VMSMasterEnv->coreCtlrReturnPt)); 2.23 @@ -105,7 +105,7 @@ 2.24 //Linux requires pinning to be done inside the thread-function 2.25 //Designate a core by a 1 in bit-position corresponding to the core 2.26 CPU_ZERO(&coreMask); //initialize mask bits to zero 2.27 - CPU_SET(coreCtlrThdParams->coreNum,&coreMask); //set bit repr the coreNum 2.28 + CPU_SET(thisCoresThdParams->coreNum,&coreMask); //set bit repr the coreNum 2.29 pthread_t selfThd = pthread_self(); 2.30 errorCode = 2.31 pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask); 2.32 @@ -118,8 +118,10 @@ 2.33 } 2.34 pthread_mutex_unlock( &suspendLock ); 2.35 2.36 + HOLISTIC__CoreCtrl_Setup; 2.37 + 2.38 DEBUG__printf1(TRUE, "started coreCtrlr", thisCoresIdx ); 2.39 - 2.40 + 2.41 //====================== The Core Controller ====================== 2.42 while(1) //An endless loop is just one way of doing the control structure 2.43 { //Assembly code switches the core between animating a VP and 2.44 @@ -141,6 +143,7 @@ 2.45 { numRepetitionsWithNoWork = 0; //reset back2back master count 2.46 currSlotIdx ++; 2.47 currVP = currSlot->slaveAssignedToSlot; 2.48 + HOLISTIC__Record_last_work; 2.49 } 2.50 else //slot is empty, so switch to master 2.51 { 2.52 @@ -149,6 +152,7 @@ 2.53 currVP = NULL; 2.54 2.55 MEAS__Capture_Pre_Master_Lock_Point; 2.56 + HOLISTIC__Record_AppResponderInvocation_start; 2.57 2.58 int numTriesToGetLock = 0; int gotLock = 0; 2.59 while( currVP == NULL ) //keep going until get master lock 2.60 @@ -189,10 +193,13 @@ 2.61 MEAS__Capture_Post_Master_Lock_Point; 2.62 } 2.63 2.64 + HOLISTIC__Record_Work_start; 2.65 2.66 switchToSlv(currVP); //Slave suspend makes core "return" from this call 2.67 flushRegisters(); //prevent GCC optimization from doing bad things 2.68 2.69 + HOLISTIC__Record_Work_end; 2.70 + 2.71 MEAS__Capture_End_Susp_in_CoreCtlr_ForSys; 2.72 2.73 }//while(1)
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/Hardware_Dependent/VMS__HW_measurement.c Wed Mar 21 11:09:11 2012 +0100 3.3 @@ -0,0 +1,74 @@ 3.4 +#include <unistd.h> 3.5 +#include <fcntl.h> 3.6 +#include <linux/types.h> 3.7 +#include <linux/perf_event.h> 3.8 +#include <errno.h> 3.9 +#include <sys/syscall.h> 3.10 +#include <linux/prctl.h> 3.11 + 3.12 +#include "../VMS.h" 3.13 + 3.14 +void setup_perf_counters(){ 3.15 +#ifdef HOLISTIC__TURN_ON_PERF_COUNTERS 3.16 + struct perf_event_attr hw_event; 3.17 + memset(&hw_event,0,sizeof(hw_event)); 3.18 + hw_event.type = PERF_TYPE_HARDWARE; 3.19 + hw_event.size = sizeof(hw_event); 3.20 + hw_event.disabled = 1; 3.21 + hw_event.freq = 0; 3.22 + hw_event.inherit = 1; /* children inherit it */ 3.23 + hw_event.pinned = 1; /* must always be on PMU */ 3.24 + hw_event.exclusive = 0; /* only group on PMU */ 3.25 + hw_event.exclude_user = 0; /* don't count user */ 3.26 + hw_event.exclude_kernel = 0; /* ditto kernel */ 3.27 + hw_event.exclude_hv = 0; /* ditto hypervisor */ 3.28 + hw_event.exclude_idle = 0; /* don't count when idle */ 3.29 + hw_event.mmap = 0; /* include mmap data */ 3.30 + hw_event.comm = 0; /* include comm data */ 3.31 + 3.32 + int coreIdx; 3.33 + for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ ) 3.34 + { 3.35 + hw_event.config = 0x0000000000000000; //cycles 3.36 + _VMSMasterEnv->cycles_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event, 3.37 + 0,//pid_t pid, 3.38 + coreIdx,//int cpu, 3.39 + -1,//int group_fd, 3.40 + 0//unsigned long flags 3.41 + ); 3.42 + if (_VMSMasterEnv->cycles_counter_fd[coreIdx]<0){ 3.43 + fprintf(stderr,"On core %d: ",coreIdx); 3.44 + perror("Failed to open cycles counter"); 3.45 + } 3.46 + hw_event.config = 0x0000000000000001; //instrs 3.47 + _VMSMasterEnv->instrs_counter_fd[coreIdx] = syscall(__NR_perf_event_open, &hw_event, 3.48 + 0,//pid_t pid, 3.49 + coreIdx,//int cpu, 3.50 + -1,//int group_fd, 3.51 + 0//unsigned long flags 3.52 + ); 3.53 + if (_VMSMasterEnv->instrs_counter_fd[coreIdx]<0){ 3.54 + fprintf(stderr,"On core %d: ",coreIdx); 3.55 + perror("Failed to open instrs counter"); 3.56 + } 3.57 + } 3.58 + 3.59 + prctl(PR_TASK_PERF_EVENTS_ENABLE); 3.60 +#endif 3.61 +} 3.62 + 3.63 +__inline__ uint64_t rdtsc(){ 3.64 + uint32_t lo, hi; 3.65 + __asm__ __volatile__ ( // serialize 3.66 + "xorl %%eax,%%eax \n cpuid" 3.67 + ::: "%rax", "%rbx", "%rcx", "%rdx"); 3.68 + __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); 3.69 + /* asm volatile("RDTSC;" 3.70 + "movl %%eax, %0;" 3.71 + "movl %%edx, %1;" 3.72 + : "=m" (lo), "=m" (hi) 3.73 + : 3.74 + : "%eax", "%edx" 3.75 + ); */ 3.76 + return (uint64_t)hi << 32 | lo; 3.77 +} 3.78 \ No newline at end of file
4.1 --- a/Hardware_Dependent/VMS__HW_measurement.h Mon Mar 19 10:03:45 2012 -0700 4.2 +++ b/Hardware_Dependent/VMS__HW_measurement.h Wed Mar 21 11:09:11 2012 +0100 4.3 @@ -58,5 +58,6 @@ 4.4 //#define NUM_TSC_ROUND_TRIPS 10 4.5 4.6 void setup_perf_counters(); 4.7 +uint64_t rdtsc(void); 4.8 #endif /* */ 4.9
5.1 --- a/Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h Mon Mar 19 10:03:45 2012 -0700 5.2 +++ b/Services_Offered_by_VMS/Measurement_and_Stats/MEAS__macros.h Wed Mar 21 11:09:11 2012 +0100 5.3 @@ -313,6 +313,23 @@ 5.4 Timestamp_end 5.5 }; 5.6 5.7 + #define saveCyclesAndInstrs(core,cycles,instrs) do{ \ 5.8 + int cycles_fd = _VMSMasterEnv->cycles_counter_fd[core]; \ 5.9 + int instrs_fd = _VMSMasterEnv->instrs_counter_fd[core]; \ 5.10 + int nread; \ 5.11 + \ 5.12 + nread = read(cycles_fd,&(cycles),sizeof(cycles)); \ 5.13 + if(nread<0){ \ 5.14 + perror("Error reading cycles counter"); \ 5.15 + cycles = 0; \ 5.16 + } \ 5.17 + \ 5.18 + nread = read(instrs_fd,&(instrs),sizeof(instrs)); \ 5.19 + if(nread<0){ \ 5.20 + perror("Error reading cycles counter"); \ 5.21 + instrs = 0; \ 5.22 + } \ 5.23 + } while (0) 5.24 5.25 #define MEAS__Insert_Counter_Meas_Fields_into_MasterEnv \ 5.26 int cycles_counter_fd[NUM_CORES]; \ 5.27 @@ -320,22 +337,130 @@ 5.28 uint64 start_master_lock[NUM_CORES][2]; \ 5.29 CounterHandler counterHandler; 5.30 5.31 - #define HOLISTIC__Setup_Perf_Counters void setup_perf_counters(); 5.32 + #define HOLISTIC__Setup_Perf_Counters setup_perf_counters(); 5.33 5.34 - #define HOLISTIC__Start_Perf_Counters prctl(PR_TASK_PERF_EVENTS_ENABLE); 5.35 + 5.36 + #define HOLISTIC__CoreCtrl_Setup \ 5.37 + CounterHandler counterHandler = _VMSMasterEnv->counterHandler; \ 5.38 + SlaveVP *lastVPBeforeMaster = NULL; \ 5.39 + /*if(thisCoresThdParams->coreNum == 0){ \ 5.40 + uint64 initval = tsc_offset_send(thisCoresThdParams,0); \ 5.41 + while(!coreCtlrThdParams[NUM_CORES - 2]->ret_tsc); \ 5.42 + } \ 5.43 + if(0 < (thisCoresThdParams->coreNum) && (thisCoresThdParams->coreNum) < (NUM_CORES - 1)){ \ 5.44 + ThdParams* sendCoresThdParams = coreCtlrThdParams[thisCoresThdParams->coreNum - 1]; \ 5.45 + int sndctr = tsc_offset_resp(sendCoresThdParams, 0); \ 5.46 + uint64 initval = tsc_offset_send(thisCoresThdParams,0); \ 5.47 + while(!coreCtlrThdParams[NUM_CORES - 2]->ret_tsc); \ 5.48 + } \ 5.49 + if(thisCoresThdParams->coreNum == (NUM_CORES - 1)){ \ 5.50 + ThdParams* sendCoresThdParams = coreCtlrThdParams[thisCoresThdParams->coreNum - 1]; \ 5.51 + int sndctr = tsc_offset_resp(sendCoresThdParams,0); \ 5.52 + }*/ 5.53 + 5.54 +#define HOLISTIC__Record_last_work lastVPBeforeMaster = currVP; 5.55 + 5.56 + #define HOLISTIC__Insert_Master_Global_Vars \ 5.57 + int vpid,task; \ 5.58 + CounterHandler counterHandler = masterEnv->counterHandler; 5.59 + 5.60 + #define HOLISTIC__Record_AppResponderInvocation_start \ 5.61 + uint64 cycles,instrs; \ 5.62 + saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \ 5.63 + if(lastVPBeforeMaster){ \ 5.64 + (*counterHandler)(AppResponderInvocation_start,lastVPBeforeMaster->slaveID,lastVPBeforeMaster->assignCount,lastVPBeforeMaster,cycles,instrs); \ 5.65 + lastVPBeforeMaster = NULL; \ 5.66 + } else { \ 5.67 + _VMSMasterEnv->start_master_lock[thisCoresIdx][0] = cycles; \ 5.68 + _VMSMasterEnv->start_master_lock[thisCoresIdx][1] = instrs; \ 5.69 + } 5.70 + 5.71 + /* Request Handler may call resume() on the VP, but we want to 5.72 + * account the whole interval to the same task. Therefore, need 5.73 + * to save task ID at the beginning. 5.74 + * 5.75 + * Using this value as "end of AppResponder Invocation Time" 5.76 + * is possible if there is only one SchedSlot per core - 5.77 + * invoking processor is last to be treated here! If more than 5.78 + * one slot, MasterLoop processing time for all but the last VP 5.79 + * would be erroneously counted as invocation time. 5.80 + */ 5.81 + #define HOLISTIC__Record_AppResponder_start \ 5.82 + vpid = currSlot->slaveAssignedToSlot->slaveID; \ 5.83 + task = currSlot->slaveAssignedToSlot->assignCount; \ 5.84 + uint64 cycles, instrs; \ 5.85 + saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \ 5.86 + (*counterHandler)(AppResponder_start,vpid,task,currSlot->slaveAssignedToSlot,cycles,instrs); 5.87 + 5.88 + #define HOLISTIC__Record_AppResponder_end \ 5.89 + uint64 cycles2,instrs2; \ 5.90 + saveCyclesAndInstrs(thisCoresIdx,cycles2, instrs2); \ 5.91 + (*counterHandler)(AppResponder_end,vpid,task,currSlot->slaveAssignedToSlot,cycles2,instrs2); \ 5.92 + (*counterHandler)(Timestamp_end,vpid,task,currSlot->slaveAssignedToSlot,rdtsc(),0); 5.93 + 5.94 + 5.95 + /* Don't know who to account time to yet - goes to assigned VP 5.96 + * after the call. 5.97 + */ 5.98 + #define HOLISTIC__Record_Assigner_start \ 5.99 + int empty = FALSE; \ 5.100 + if(currSlot->slaveAssignedToSlot == NULL){ \ 5.101 + empty= TRUE; \ 5.102 + } \ 5.103 + uint64 tmp_cycles; \ 5.104 + uint64 tmp_instrs; \ 5.105 + saveCyclesAndInstrs(thisCoresIdx,tmp_cycles,tmp_instrs); \ 5.106 + uint64 tsc = rdtsc(); \ 5.107 + if(vpid > 0) { \ 5.108 + (*counterHandler)(NextAssigner_start,vpid,task,currSlot->slaveAssignedToSlot,tmp_cycles,tmp_instrs); \ 5.109 + vpid = 0; \ 5.110 + task = 0; \ 5.111 + } 5.112 + 5.113 + #define HOLISTIC__Record_Assigner_end \ 5.114 + uint64 cycles; \ 5.115 + uint64 instrs; \ 5.116 + saveCyclesAndInstrs(thisCoresIdx,cycles,instrs); \ 5.117 + if(empty){ \ 5.118 + (*counterHandler)(AssignerInvocation_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,masterEnv->start_master_lock[thisCoresIdx][0],masterEnv->start_master_lock[thisCoresIdx][1]); \ 5.119 + } \ 5.120 + (*counterHandler)(Timestamp_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,tsc,0); \ 5.121 + (*counterHandler)(Assigner_start,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,tmp_cycles,tmp_instrs); \ 5.122 + (*counterHandler)(Assigner_end,assignedSlaveVP->slaveID,assignedSlaveVP->assignCount,assignedSlaveVP,cycles,instrs); 5.123 + 5.124 + #define HOLISTIC__Record_Work_start \ 5.125 + if(currVP){ \ 5.126 + uint64 cycles,instrs; \ 5.127 + saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \ 5.128 + (*counterHandler)(Work_start,currVP->slaveID,currVP->assignCount,currVP,cycles,instrs); \ 5.129 + } 5.130 + 5.131 + #define HOLISTIC__Record_Work_end \ 5.132 + if(currVP){ \ 5.133 + uint64 cycles,instrs; \ 5.134 + saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); \ 5.135 + (*counterHandler)(Work_end,currVP->slaveID,currVP->assignCount,currVP,cycles,instrs); \ 5.136 + } 5.137 5.138 #define HOLISTIC__Record_HwResponderInvocation_start \ 5.139 uint64 cycles,instrs; \ 5.140 - saveCyclesAndInstrs(animatingPr->coreAnimatedBy,cycles, instrs); \ 5.141 - (*(_VMSMasterEnv->counterHandler))(HwResponderInvocation_start,animatingPr->procrID,animatingPr->numTimesScheduled,animatingPr,cycles,instrs); 5.142 + saveCyclesAndInstrs(animatingSlv->coreAnimatedBy,cycles, instrs); \ 5.143 + (*(_VMSMasterEnv->counterHandler))(HwResponderInvocation_start,animatingSlv->slaveID,animatingSlv->assignCount,animatingSlv,cycles,instrs); 5.144 + 5.145 5.146 - 5.147 - 5.148 + 5.149 #else 5.150 #define MEAS__Insert_Counter_Handler 5.151 #define MEAS__Insert_Counter_Meas_Fields_into_MasterEnv 5.152 #define HOLISTIC__Setup_Perf_Counters 5.153 - #define HOLISTIC__Start_Perf_Counters 5.154 + #define HOLISTIC__Record_AppResponderInvocation_start 5.155 + #define HOLISTIC__Record_AppResponder_start 5.156 + #define HOLISTIC__Record_AppResponder_end 5.157 + #define HOLISTIC__Record_Assigner_start 5.158 + #define HOLISTIC__Record_Assigner_end 5.159 + #define HOLISTIC__Record_Work_start 5.160 + #define HOLISTIC__Record_Work_end 5.161 + #define HOLISTIC__Record_HwResponderInvocation_start 5.162 #endif 5.163 5.164 //Experiment in two-step macros -- if doesn't work, insert each separately
6.1 --- a/VMS__int.c Mon Mar 19 10:03:45 2012 -0700 6.2 +++ b/VMS__int.c Wed Mar 21 11:09:11 2012 +0100 6.3 @@ -81,6 +81,7 @@ 6.4 //return ownership of the Slv and anim slot to Master virt pr 6.5 animatingSlv->animSlotAssignedTo->workIsDone = TRUE; 6.6 6.7 + HOLISTIC__Record_HwResponderInvocation_start; 6.8 MEAS__Capture_Pre_Susp_Point; 6.9 switchToCoreCtlr(animatingSlv); 6.10 flushRegisters();
