Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
changeset 211:5b419522dc7f perf_counters
time stamp checks added
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Thu, 08 Mar 2012 19:02:16 +0100 |
| parents | f6d81915512c |
| children | df00af7eb307 |
| files | CoreLoop.c Counters/Counters.c Counters/Counters.h MasterLoop.c VMS.c VMS.h __brch__perf_counters |
| diffstat | 7 files changed, 162 insertions(+), 163 deletions(-) [+] |
line diff
1.1 --- a/CoreLoop.c Fri Feb 10 18:35:00 2012 +0100 1.2 +++ b/CoreLoop.c Thu Mar 08 19:02:16 2012 +0100 1.3 @@ -33,7 +33,7 @@ 1.4 void * 1.5 coreLoop( void *paramsIn ) 1.6 { 1.7 - ThdParams *coreLoopThdParams; 1.8 + ThdParams *thisCoresThdParams; 1.9 int thisCoresIdx; 1.10 VirtProcr *currPr = NULL; 1.11 VMSQueueStruc *readyToAnimateQ; 1.12 @@ -45,8 +45,8 @@ 1.13 //preGateProgress, waitProgress, exitProgress, gateClosed; 1.14 1.15 1.16 - coreLoopThdParams = (ThdParams *)paramsIn; 1.17 - thisCoresIdx = coreLoopThdParams->coreNum; 1.18 + thisCoresThdParams = (ThdParams *)paramsIn; 1.19 + thisCoresIdx = thisCoresThdParams->coreNum; 1.20 1.21 gate.gateClosed = FALSE; 1.22 gate.preGateProgress = 0; 1.23 @@ -69,7 +69,7 @@ 1.24 //Linux requires pinning thd to core inside thread-function 1.25 //Designate a core by a 1 in bit-position corresponding to the core 1.26 CPU_ZERO(&coreMask); 1.27 - CPU_SET(coreLoopThdParams->coreNum,&coreMask); 1.28 + CPU_SET(thisCoresThdParams->coreNum,&coreMask); 1.29 //coreMask = 1L << coreLoopThdParams->coreNum; 1.30 1.31 pthread_t selfThd = pthread_self(); 1.32 @@ -80,6 +80,28 @@ 1.33 #ifdef MEAS__PERF_COUNTERS 1.34 CounterHandler counterHandler = _VMSMasterEnv->counterHandler; 1.35 VirtProcr *lastVPBeforeMaster = NULL; 1.36 + 1.37 + if(thisCoresThdParams->coreNum == 0){ 1.38 + uint64 initval = tsc_offset_send(thisCoresThdParams,0); 1.39 + ThdParams* sendCoresThdParams = coreLoopThdParams[NUM_CORES - 1]; 1.40 + int sndctr = tsc_offset_resp(sendCoresThdParams,0); 1.41 + tsc_offset_send(thisCoresThdParams,initval); 1.42 + } 1.43 + if(0 < (thisCoresThdParams->coreNum) && (thisCoresThdParams->coreNum) < (NUM_CORES - 1)){ 1.44 + ThdParams* sendCoresThdParams = coreLoopThdParams[thisCoresThdParams->coreNum - 1]; 1.45 + int sndctr = tsc_offset_resp(sendCoresThdParams, 0); 1.46 + uint64 initval = tsc_offset_send(thisCoresThdParams,0); 1.47 + tsc_offset_resp(sendCoresThdParams, sndctr); 1.48 + tsc_offset_send(thisCoresThdParams,initval); 1.49 + } 1.50 + if(thisCoresThdParams->coreNum == (NUM_CORES - 1)){ 1.51 + ThdParams* sendCoresThdParams = coreLoopThdParams[thisCoresThdParams->coreNum - 1]; 1.52 + int sndctr = tsc_offset_resp(sendCoresThdParams,0); 1.53 + uint64 initval = tsc_offset_send(thisCoresThdParams,0); 1.54 + tsc_offset_resp(sendCoresThdParams,sndctr); 1.55 + } 1.56 + 1.57 + //printf("Core %d starting!\n",thisCoresThdParams->coreNum); 1.58 #endif 1.59 1.60 //Save the return address in the SwitchVP function 1.61 @@ -133,11 +155,14 @@ 1.62 saveLowTimeStampCountInto( startStamp ); 1.63 #endif 1.64 #ifdef MEAS__PERF_COUNTERS 1.65 + uint64 cycles,instrs; 1.66 + saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); 1.67 if(lastVPBeforeMaster){ 1.68 - uint64 cycles,instrs; 1.69 - saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); 1.70 (*counterHandler)(AppResponderInvocation_start,lastVPBeforeMaster->procrID,lastVPBeforeMaster->numTimesScheduled,lastVPBeforeMaster,cycles,instrs); 1.71 lastVPBeforeMaster = NULL; 1.72 + } else { 1.73 + _VMSMasterEnv->start_master_lock[thisCoresIdx][0] = cycles; 1.74 + _VMSMasterEnv->start_master_lock[thisCoresIdx][1] = instrs; 1.75 } 1.76 #endif 1.77 //=====================================================================
2.1 --- a/Counters/Counters.c Fri Feb 10 18:35:00 2012 +0100 2.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 2.3 @@ -1,56 +0,0 @@ 2.4 -#include "Counters.h" 2.5 - 2.6 - 2.7 - 2.8 -void print_record_human_readable(CounterRecord* record){ 2.9 - printf("=== Loop Stats (VP %d # %d suspended at %p): ===\n",record->vp_id,record->task_position,record->addr_of_libcall_for_req); 2.10 - printf("Constraints check done:\t%llu cycles, %llu instrs [Core %d]\n",record->sc_done_cycles,record->sc_done_instrs,record->req_core); 2.11 - printf("Time in ready queue:\tblocked at %u, unblocked at %u\n",record->blocked_timestamp,record->unblocked_timestamp); 2.12 - printf("Assigner started:\t%llu cycles, %llu instrs [Core %d]\n",record->start_assign_cycles,record->start_assign_instrs,record->assigning_core); 2.13 - printf("Assigner ended: \t%llu cycles, %llu instrs [Core %d]\n",record->end_assign_cycles,record->end_assign_instrs,record->assigning_core); 2.14 - printf("Work+comm started:\t%llu cycles, %llu instrs [Core %d]\n",record->start_work_cycles,record->start_work_instrs,record->work_core); 2.15 - printf("Work+comm ended:\t%llu cycles, %llu instrs [Core %d]\n",record->suspend_cycles,record->suspend_instrs,record->work_core); 2.16 - printf("Status request read:\t%llu cycles, %llu instrs [Core %d]\n",record->req_cycles,record->req_instrs,record->req_core); 2.17 - fflush(stdin); 2.18 -} 2.19 - 2.20 -void print_record_csv(CounterRecord* record) { 2.21 - //Columns are VP,task,ret_addr, (core,cycles,instrs)* for each savepoint, blocked,unblocked 2.22 - printf("%d,%d,%p,",record->vp_id,record->task_position,record->addr_of_libcall_for_req); 2.23 - printf("%d,%llu,%llu,",record->req_core,record->sc_done_cycles,record->sc_done_instrs); 2.24 - printf("%d,%llu,%llu,",record->assigning_core,record->start_assign_cycles,record->start_assign_instrs); 2.25 - printf("%d,%llu,%llu,",record->assigning_core,record->end_assign_cycles,record->end_assign_instrs); 2.26 - printf("%d,%llu,%llu,",record->work_core,record->start_work_cycles,record->start_work_instrs); 2.27 - printf("%d,%llu,%llu,",record->work_core,record->suspend_cycles,record->suspend_instrs); 2.28 - printf("%d,%llu,%llu,",record->req_core,record->req_cycles,record->req_instrs); 2.29 - printf("%u,%u\n",record->blocked_timestamp,record->unblocked_timestamp); 2.30 - fflush(stdin); 2.31 -} 2.32 - 2.33 -void print_record_csv_to_file(CounterRecord* record, FILE* file) { 2.34 - //Columns are VP,task,ret_addr, (core,cycles,instrs)* for each savepoint, blocked,unblocked 2.35 - fprintf(file,"%d,%d,%p,",record->vp_id,record->task_position,record->addr_of_libcall_for_req); 2.36 - fprintf(file,"%d,%llu,%llu,",record->req_core,record->sc_done_cycles,record->sc_done_instrs); 2.37 - fprintf(file,"%d,%llu,%llu,",record->assigning_core,record->start_assign_cycles,record->start_assign_instrs); 2.38 - fprintf(file,"%d,%llu,%llu,",record->assigning_core,record->end_assign_cycles,record->end_assign_instrs); 2.39 - fprintf(file,"%d,%llu,%llu,",record->work_core,record->start_work_cycles,record->start_work_instrs); 2.40 - fprintf(file,"%d,%llu,%llu,",record->work_core,record->suspend_cycles,record->suspend_instrs); 2.41 - fprintf(file,"%d,%llu,%llu,",record->req_core,record->req_cycles,record->req_instrs); 2.42 - fprintf(file,"%d,%llu,%llu,",record->req_core,record->next_task_req_cycles,record->next_task_req_instrs); 2.43 - fprintf(file,"%u,%u\n",record->blocked_timestamp,record->unblocked_timestamp); 2.44 -} 2.45 - 2.46 -void set_dot_file(FILE* file){ 2.47 - dot_file = file; 2.48 -} 2.49 - 2.50 -void print_dot_node_info(void* _record){ 2.51 - CounterRecord* record = (CounterRecord*) _record; 2.52 - fprintf(dot_file,"VP_%d_%d [shape=record,label=\"{ VP %d # %d ",record->vp_id,record->task_position,record->vp_id,record->task_position); 2.53 - fprintf(dot_file,"| { sc_ch | C:%lld I:%lld }",record->sc_done_cycles - record->req_cycles,record->sc_done_instrs - record->req_instrs); 2.54 - fprintf(dot_file,"| { sync | C:%d }", record->task_position ? record->unblocked_timestamp - record->blocked_timestamp : 0); 2.55 - fprintf(dot_file,"| { assign | C:%lld I:%lld }",record->end_assign_cycles - record->start_assign_cycles,record->end_assign_instrs - record->start_assign_instrs); 2.56 - fprintf(dot_file,"| { W + C | C:%lld I:%lld }",record->suspend_cycles - record->start_work_cycles,record->suspend_instrs - record->start_work_instrs); 2.57 - fprintf(dot_file,"| { status | C:%lld I:%lld }",record->next_task_req_cycles - record->suspend_cycles,record->next_task_req_instrs - record->suspend_instrs); 2.58 - fprintf(dot_file,"}\"];\n"); 2.59 -} 2.60 \ No newline at end of file
3.1 --- a/Counters/Counters.h Fri Feb 10 18:35:00 2012 +0100 3.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 3.3 @@ -1,56 +0,0 @@ 3.4 -/* 3.5 - * File: counters.h 3.6 - * Author: engelhardt 3.7 - * 3.8 - * Created on 20. Juli 2011, 13:27 3.9 - */ 3.10 - 3.11 -#include "../VMS_primitive_data_types.h" 3.12 -#include "../../../C_Libraries/DynArray/DynArray.h" 3.13 - 3.14 -#include <stdio.h> 3.15 - 3.16 -#ifndef COUNTERS_H 3.17 -#define COUNTERS_H 3.18 - 3.19 -typedef struct { 3.20 - int work_core; 3.21 - int assigning_core; 3.22 - int req_core; 3.23 - int vp_id; 3.24 - int task_position; 3.25 - uint32 blocked_timestamp; 3.26 - uint32 unblocked_timestamp; 3.27 - uint64 req_cycles; 3.28 - uint64 req_instrs; 3.29 - uint64 sc_done_cycles; 3.30 - uint64 sc_done_instrs; 3.31 -// uint64 enter_readyQ_cycles; 3.32 -// uint64 enter_readyQ_instrs; 3.33 - uint64 start_assign_cycles; 3.34 - uint64 start_assign_instrs; 3.35 - uint64 end_assign_cycles; 3.36 - uint64 end_assign_instrs; 3.37 - uint64 start_work_cycles; 3.38 - uint64 start_work_instrs; 3.39 - uint64 suspend_cycles; 3.40 - uint64 suspend_instrs; 3.41 - uint64 next_task_req_cycles; 3.42 - uint64 next_task_req_instrs; 3.43 - void* addr_of_libcall_for_req; 3.44 -} CounterRecord; 3.45 - 3.46 -FILE* dot_file; 3.47 - 3.48 -void print_record_human_readable(CounterRecord* record); 3.49 - 3.50 -void print_record_csv(CounterRecord* record); 3.51 - 3.52 -void print_record_csv_to_file(CounterRecord* record, FILE* file); 3.53 - 3.54 -void set_dot_file(FILE* file); 3.55 - 3.56 -void print_dot_node_info(void* counterRecord); 3.57 - 3.58 -#endif /* COUNTERS_H */ 3.59 -
4.1 --- a/MasterLoop.c Fri Feb 10 18:35:00 2012 +0100 4.2 +++ b/MasterLoop.c Thu Mar 08 19:02:16 2012 +0100 4.3 @@ -89,6 +89,7 @@ 4.4 4.5 volatileMasterPr = animatingPr; 4.6 masterPr = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp 4.7 + int vpid,task; 4.8 4.9 //First animation of each MasterVP will in turn animate this part 4.10 // of setup code.. (VP creator sets up the stack as if this function 4.11 @@ -152,8 +153,18 @@ 4.12 saveLowTimeStampCountInto( startStamp1 ); 4.13 #endif 4.14 #ifdef MEAS__PERF_COUNTERS 4.15 - int vpid = currSlot->procrAssignedToSlot->procrID; 4.16 - int task = currSlot->procrAssignedToSlot->numTimesScheduled; 4.17 + /* Request Handler may call resume() on the VP, but we want to 4.18 + * account the whole interval to the same task. Therefore, need 4.19 + * to save task ID at the beginning. 4.20 + * 4.21 + * Using this value as "end of AppResponder Invocation Time" 4.22 + * is possible if there is only one SchedSlot per core - 4.23 + * invoking processor is last to be treated here! If more than 4.24 + * one slot, MasterLoop processing time for all but the last VP 4.25 + * would be erroneously counted as invocation time. 4.26 + */ 4.27 + vpid = currSlot->procrAssignedToSlot->procrID; 4.28 + task = currSlot->procrAssignedToSlot->numTimesScheduled; 4.29 uint64 cycles, instrs; 4.30 saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); 4.31 (*counterHandler)(AppResponder_start,vpid,task,currSlot->procrAssignedToSlot,cycles,instrs); 4.32 @@ -169,10 +180,11 @@ 4.33 _VMSMasterEnv->reqHdlrHighTimeHist ); 4.34 #endif 4.35 #ifdef MEAS__PERF_COUNTERS 4.36 - //done with constraints check 4.37 + //use previous task ID here (may or may not be the same as current VP state) 4.38 uint64 cycles2,instrs2; 4.39 saveCyclesAndInstrs(thisCoresIdx,cycles2, instrs2); 4.40 (*counterHandler)(AppResponder_end,vpid,task,currSlot->procrAssignedToSlot,cycles2,instrs2); 4.41 + (*counterHandler)(Timestamp_end,vpid,task,currSlot->procrAssignedToSlot,rdtsc(),0); 4.42 #endif 4.43 //============================================================ 4.44 } 4.45 @@ -180,11 +192,22 @@ 4.46 { //give slot a new virt procr 4.47 #ifdef MEAS__PERF_COUNTERS 4.48 //start assigner 4.49 + /* Don't know who to account time to yet - goes to assigned VP 4.50 + * after the call. 4.51 + */ 4.52 + int empty = FALSE; 4.53 + if(currSlot->procrAssignedToSlot == NULL){ 4.54 + empty= TRUE; 4.55 + } 4.56 uint64 tmp_cycles; 4.57 uint64 tmp_instrs; 4.58 saveCyclesAndInstrs(thisCoresIdx,tmp_cycles,tmp_instrs); 4.59 - //FIXME WTF AM I DOING WHY DOES THIS EVEN WORK 4.60 - //(*counterHandler)(MasterLoop_beforeNextAssign,schedVirtPr,tmp_cycles,tmp_instrs); 4.61 + uint64 tsc = rdtsc(); 4.62 + if(vpid > 0) { 4.63 + (*counterHandler)(NextAssigner_start,vpid,task,currSlot->procrAssignedToSlot,tmp_cycles,tmp_instrs); 4.64 + vpid = 0; 4.65 + task = 0; 4.66 + } 4.67 #endif 4.68 schedVirtPr = 4.69 (*slaveScheduler)( semanticEnv, thisCoresIdx, slotIdx ); 4.70 @@ -201,6 +224,11 @@ 4.71 uint64 cycles; 4.72 uint64 instrs; 4.73 saveCyclesAndInstrs(thisCoresIdx,cycles,instrs); 4.74 + 4.75 + if(empty){ 4.76 + (*counterHandler)(AssignerInvocation_start,schedVirtPr->procrID,schedVirtPr->numTimesScheduled,schedVirtPr,masterEnv->start_master_lock[thisCoresIdx][0],masterEnv->start_master_lock[thisCoresIdx][1]); 4.77 + } 4.78 + (*counterHandler)(Timestamp_start,schedVirtPr->procrID,schedVirtPr->numTimesScheduled,schedVirtPr,tsc,0); 4.79 (*counterHandler)(Assigner_start,schedVirtPr->procrID,schedVirtPr->numTimesScheduled,schedVirtPr,tmp_cycles,tmp_instrs); 4.80 (*counterHandler)(Assigner_end,schedVirtPr->procrID,schedVirtPr->numTimesScheduled,schedVirtPr,cycles,instrs); 4.81 #endif
5.1 --- a/VMS.c Fri Feb 10 18:35:00 2012 +0100 5.2 +++ b/VMS.c Thu Mar 08 19:02:16 2012 +0100 5.3 @@ -193,31 +193,6 @@ 5.4 #endif 5.5 5.6 #ifdef MEAS__PERF_COUNTERS 5.7 -/* 5.8 - _VMSMasterEnv->counter_history = VMS__malloc(10*sizeof(void*)); 5.9 - _VMSMasterEnv->counter_history_array_info = makePrivDynArrayInfoFrom((void***)&(_VMSMasterEnv->counter_history),10); 5.10 -*/ 5.11 - //printf("Creating HW counters..."); 5.12 -/* 5.13 - FILE* output; 5.14 - int n; 5.15 - char filename[255]; 5.16 - for(n=0;n<255;n++) 5.17 - { 5.18 - sprintf(filename, "./counters/Counters.%d.csv",n); 5.19 - output = fopen(filename,"r"); 5.20 - if(output) 5.21 - { 5.22 - fclose(output); 5.23 - }else{ 5.24 - break; 5.25 - } 5.26 - } 5.27 - printf("Saving Counter measurements to File: %s ...\n", filename); 5.28 - output = fopen(filename,"w+"); 5.29 - _VMSMasterEnv->counteroutput = output; 5.30 -*/ 5.31 - 5.32 struct perf_event_attr hw_event; 5.33 memset(&hw_event,0,sizeof(hw_event)); 5.34 hw_event.type = PERF_TYPE_HARDWARE; 5.35 @@ -228,8 +203,8 @@ 5.36 hw_event.pinned = 1; /* must always be on PMU */ 5.37 hw_event.exclusive = 0; /* only group on PMU */ 5.38 hw_event.exclude_user = 0; /* don't count user */ 5.39 - hw_event.exclude_kernel = 1; /* ditto kernel */ 5.40 - hw_event.exclude_hv = 1; /* ditto hypervisor */ 5.41 + hw_event.exclude_kernel = 0; /* ditto kernel */ 5.42 + hw_event.exclude_hv = 0; /* ditto hypervisor */ 5.43 hw_event.exclude_idle = 0; /* don't count when idle */ 5.44 hw_event.mmap = 0; /* include mmap data */ 5.45 hw_event.comm = 0; /* include comm data */ 5.46 @@ -260,10 +235,9 @@ 5.47 perror("Failed to open instrs counter"); 5.48 } 5.49 } 5.50 - prctl(PR_TASK_PERF_EVENTS_ENABLE); 5.51 - uint64 tmpc,tmpi; 5.52 - saveCyclesAndInstrs(0,tmpc,tmpi); 5.53 - printf("Start: cycles = %llu, instrs = %llu\n",tmpc,tmpi); 5.54 + //uint64 tmpc,tmpi; 5.55 + //saveCyclesAndInstrs(0,tmpc,tmpi); 5.56 + //printf("Start: cycles = %llu, instrs = %llu\n",tmpc,tmpi); 5.57 #endif 5.58 5.59 //======================================================================== 5.60 @@ -314,8 +288,10 @@ 5.61 5.62 //Make the threads that animate the core loops 5.63 for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) 5.64 - { coreLoopThdParams[coreIdx] = VMS__malloc( sizeof(ThdParams) ); 5.65 + { coreLoopThdParams[coreIdx] = VMS__malloc( sizeof(ThdParams) + CACHE_LINE ); //make sure there is no false sharing 5.66 coreLoopThdParams[coreIdx]->coreNum = coreIdx; 5.67 + coreLoopThdParams[coreIdx]->sent_ctr = 0; 5.68 + coreLoopThdParams[coreIdx]->ret_tsc = 0; 5.69 5.70 retCode = 5.71 pthread_create( &(coreLoopThdHandles[coreIdx]), 5.72 @@ -324,6 +300,7 @@ 5.73 (void *)(coreLoopThdParams[coreIdx]) ); 5.74 if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(1);} 5.75 } 5.76 + prctl(PR_TASK_PERF_EVENTS_ENABLE); 5.77 } 5.78 5.79 /*Semantic layer calls this when it want the system to start running.. 5.80 @@ -737,8 +714,7 @@ 5.81 #ifdef MEAS__PERF_COUNTERS 5.82 uint64 tmpc,tmpi; 5.83 saveCyclesAndInstrs(0,tmpc,tmpi); 5.84 - printf("End: cycles = %llu, instrs = %llu\n",tmpc,tmpi); 5.85 - prctl(PR_TASK_PERF_EVENTS_DISABLE); 5.86 + //printf("End: cycles = %llu, instrs = %llu\n",tmpc,tmpi); 5.87 /* 5.88 for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ){ 5.89 close(_VMSMasterEnv->cycles_counter_fd[coreIdx]); 5.90 @@ -789,7 +765,7 @@ 5.91 //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile); 5.92 //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHist ); 5.93 5.94 - 5.95 + prctl(PR_TASK_PERF_EVENTS_DISABLE); 5.96 #ifdef MEAS__TIME_PLUGIN 5.97 printHist( _VMSMasterEnv->reqHdlrLowTimeHist ); 5.98 saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist ); 5.99 @@ -879,3 +855,74 @@ 5.100 exit(1); 5.101 } 5.102 5.103 + __inline__ uint64_t rdtsc(void){ 5.104 + uint32_t lo, hi; 5.105 + __asm__ __volatile__ ( // serialize 5.106 + "xorl %%eax,%%eax \n cpuid" 5.107 + ::: "%rax", "%rbx", "%rcx", "%rdx"); 5.108 + __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); 5.109 + /* asm volatile("RDTSC;" 5.110 + "movl %%eax, %0;" 5.111 + "movl %%edx, %1;" 5.112 + : "=m" (lo), "=m" (hi) 5.113 + : 5.114 + : "%eax", "%edx" 5.115 + ); */ 5.116 + return (uint64_t)hi << 32 | lo; 5.117 + } 5.118 + 5.119 + uint64 tsc_offset_send(ThdParams* thisCoresThdParams, uint64 initval){ 5.120 + uint64 ret_tsc_curr; //local copy of coreLoopThdParams->ret_tsc 5.121 + uint64 ret_tsc_prev; 5.122 + uint64 local_before; 5.123 + uint64 local_after; 5.124 + 5.125 + ret_tsc_prev = initval; 5.126 + ret_tsc_curr = initval; 5.127 + local_before = rdtsc(); 5.128 + thisCoresThdParams->sent_ctr++; 5.129 + while(ret_tsc_curr == ret_tsc_prev) 5.130 + ret_tsc_curr = thisCoresThdParams->ret_tsc; 5.131 + local_after = rdtsc(); 5.132 + ret_tsc_prev = ret_tsc_curr; 5.133 + 5.134 + int i; 5.135 + for(i=0;i<3;++i){ 5.136 + local_before = rdtsc(); 5.137 + thisCoresThdParams->sent_ctr++; 5.138 + while(ret_tsc_curr == ret_tsc_prev) 5.139 + ret_tsc_curr = thisCoresThdParams->ret_tsc; 5.140 + local_after = rdtsc(); 5.141 + int64 midpoint = local_before + (local_after-local_before)/2; 5.142 + int64 difference; 5.143 + if (midpoint > ret_tsc_curr) 5.144 + difference = midpoint - (int64)ret_tsc_curr; 5.145 + else 5.146 + difference = (int64)ret_tsc_curr - midpoint; 5.147 + //printf("TSC: %llu (Core %d) = %llu (Core %d) // difference=%llu\n",midpoint,thisCoresThdParams->coreNum,ret_tsc_curr,thisCoresThdParams->coreNum + 1,difference); 5.148 + ret_tsc_prev = ret_tsc_curr; 5.149 + } 5.150 + 5.151 + return ret_tsc_curr; 5.152 + } 5.153 + 5.154 + int tsc_offset_resp(ThdParams* sendCoresThdParams, int initialctrval){ 5.155 + 5.156 + int send_ctr_curr = initialctrval; 5.157 + int send_ctr_prev = initialctrval; 5.158 + 5.159 + 5.160 + while(send_ctr_curr == send_ctr_prev) 5.161 + send_ctr_curr = sendCoresThdParams->sent_ctr; 5.162 + sendCoresThdParams->ret_tsc = rdtsc(); 5.163 + send_ctr_prev = send_ctr_curr; 5.164 + 5.165 + int i; 5.166 + for(i=0;i<3;++i){ 5.167 + while(send_ctr_curr == send_ctr_prev) 5.168 + send_ctr_curr = sendCoresThdParams->sent_ctr; 5.169 + sendCoresThdParams->ret_tsc = rdtsc(); 5.170 + send_ctr_prev = send_ctr_curr; 5.171 + } 5.172 + return send_ctr_curr; 5.173 + } 5.174 \ No newline at end of file
6.1 --- a/VMS.h Fri Feb 10 18:35:00 2012 +0100 6.2 +++ b/VMS.h Thu Mar 08 19:02:16 2012 +0100 6.3 @@ -82,7 +82,7 @@ 6.4 //========================= Hardware related Constants ===================== 6.5 //This value is the number of hardware threads in the shared memory 6.6 // machine 6.7 -#define NUM_CORES 4 6.8 +#define NUM_CORES 80 6.9 6.10 // tradeoff amortizing master fixed overhead vs imbalance potential 6.11 // when work-stealing, can make bigger, at risk of losing cache affinity 6.12 @@ -98,7 +98,7 @@ 6.13 // memory for VMS__malloc 6.14 #define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */ 6.15 6.16 -#define CACHE_LINE 64 6.17 +#define CACHE_LINE 256 6.18 #define PAGE_SIZE 4096 6.19 6.20 6.21 @@ -274,6 +274,7 @@ 6.22 #ifdef MEAS__PERF_COUNTERS 6.23 int cycles_counter_fd[NUM_CORES]; 6.24 int instrs_counter_fd[NUM_CORES]; 6.25 + uint64 start_master_lock[NUM_CORES][2]; 6.26 //FILE* counteroutput; 6.27 //CounterRecord** counter_history; 6.28 //PrivDynArrayInfo* counter_history_array_info; 6.29 @@ -309,6 +310,8 @@ 6.30 { 6.31 void *endThdPt; 6.32 unsigned int coreNum; 6.33 + volatile int sent_ctr; 6.34 + volatile uint64 ret_tsc; 6.35 } 6.36 ThdParams; 6.37 6.38 @@ -317,8 +320,9 @@ 6.39 pthread_mutex_t suspendLock; 6.40 pthread_cond_t suspend_cond; 6.41 6.42 - 6.43 - 6.44 + uint64 tsc_offset_send(ThdParams* thisCoresThdParams,uint64 initval); 6.45 + int tsc_offset_resp(ThdParams* sendCoresThdParams,int initctr); 6.46 + 6.47 //===================== Global Vars =================== 6.48 6.49 volatile MasterEnv *_VMSMasterEnv; 6.50 @@ -420,6 +424,8 @@ 6.51 /* clobber */ : "%eax", "%edx" \ 6.52 ); 6.53 6.54 + __inline__ uint64_t rdtsc(void); 6.55 + 6.56 //==================== 6.57 #define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \ 6.58 makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \ 6.59 @@ -449,11 +455,15 @@ 6.60 AppResponderInvocation_start, 6.61 AppResponder_start, 6.62 AppResponder_end, 6.63 + AssignerInvocation_start, 6.64 + NextAssigner_start, 6.65 Assigner_start, 6.66 Assigner_end, 6.67 Work_start, 6.68 Work_end, 6.69 - HwResponderInvocation_start 6.70 + HwResponderInvocation_start, 6.71 + Timestamp_start, 6.72 + Timestamp_end 6.73 }; 6.74 6.75 #define getReturnAddressBeforeLibraryCall(vp_ptr, res_ptr) do{ \
