# HG changeset patch # User Nina Engelhardt # Date 1331229736 -3600 # Node ID 5b419522dc7f3e834fc5ce4ff127904a4100936e # Parent f6d81915512c49ccba8494853003e5fc6bde39a8 time stamp checks added diff -r f6d81915512c -r 5b419522dc7f CoreLoop.c --- a/CoreLoop.c Fri Feb 10 18:35:00 2012 +0100 +++ b/CoreLoop.c Thu Mar 08 19:02:16 2012 +0100 @@ -33,7 +33,7 @@ void * coreLoop( void *paramsIn ) { - ThdParams *coreLoopThdParams; + ThdParams *thisCoresThdParams; int thisCoresIdx; VirtProcr *currPr = NULL; VMSQueueStruc *readyToAnimateQ; @@ -45,8 +45,8 @@ //preGateProgress, waitProgress, exitProgress, gateClosed; - coreLoopThdParams = (ThdParams *)paramsIn; - thisCoresIdx = coreLoopThdParams->coreNum; + thisCoresThdParams = (ThdParams *)paramsIn; + thisCoresIdx = thisCoresThdParams->coreNum; gate.gateClosed = FALSE; gate.preGateProgress = 0; @@ -69,7 +69,7 @@ //Linux requires pinning thd to core inside thread-function //Designate a core by a 1 in bit-position corresponding to the core CPU_ZERO(&coreMask); - CPU_SET(coreLoopThdParams->coreNum,&coreMask); + CPU_SET(thisCoresThdParams->coreNum,&coreMask); //coreMask = 1L << coreLoopThdParams->coreNum; pthread_t selfThd = pthread_self(); @@ -80,6 +80,28 @@ #ifdef MEAS__PERF_COUNTERS CounterHandler counterHandler = _VMSMasterEnv->counterHandler; VirtProcr *lastVPBeforeMaster = NULL; + + if(thisCoresThdParams->coreNum == 0){ + uint64 initval = tsc_offset_send(thisCoresThdParams,0); + ThdParams* sendCoresThdParams = coreLoopThdParams[NUM_CORES - 1]; + int sndctr = tsc_offset_resp(sendCoresThdParams,0); + tsc_offset_send(thisCoresThdParams,initval); + } + if(0 < (thisCoresThdParams->coreNum) && (thisCoresThdParams->coreNum) < (NUM_CORES - 1)){ + ThdParams* sendCoresThdParams = coreLoopThdParams[thisCoresThdParams->coreNum - 1]; + int sndctr = tsc_offset_resp(sendCoresThdParams, 0); + uint64 initval = tsc_offset_send(thisCoresThdParams,0); + tsc_offset_resp(sendCoresThdParams, sndctr); + tsc_offset_send(thisCoresThdParams,initval); + } + if(thisCoresThdParams->coreNum == (NUM_CORES - 1)){ + ThdParams* sendCoresThdParams = coreLoopThdParams[thisCoresThdParams->coreNum - 1]; + int sndctr = tsc_offset_resp(sendCoresThdParams,0); + uint64 initval = tsc_offset_send(thisCoresThdParams,0); + tsc_offset_resp(sendCoresThdParams,sndctr); + } + + //printf("Core %d starting!\n",thisCoresThdParams->coreNum); #endif //Save the return address in the SwitchVP function @@ -133,11 +155,14 @@ saveLowTimeStampCountInto( startStamp ); #endif #ifdef MEAS__PERF_COUNTERS + uint64 cycles,instrs; + saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); if(lastVPBeforeMaster){ - uint64 cycles,instrs; - saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); (*counterHandler)(AppResponderInvocation_start,lastVPBeforeMaster->procrID,lastVPBeforeMaster->numTimesScheduled,lastVPBeforeMaster,cycles,instrs); lastVPBeforeMaster = NULL; + } else { + _VMSMasterEnv->start_master_lock[thisCoresIdx][0] = cycles; + _VMSMasterEnv->start_master_lock[thisCoresIdx][1] = instrs; } #endif //===================================================================== diff -r f6d81915512c -r 5b419522dc7f Counters/Counters.c --- a/Counters/Counters.c Fri Feb 10 18:35:00 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ -#include "Counters.h" - - - -void print_record_human_readable(CounterRecord* record){ - printf("=== Loop Stats (VP %d # %d suspended at %p): ===\n",record->vp_id,record->task_position,record->addr_of_libcall_for_req); - printf("Constraints check done:\t%llu cycles, %llu instrs [Core %d]\n",record->sc_done_cycles,record->sc_done_instrs,record->req_core); - printf("Time in ready queue:\tblocked at %u, unblocked at %u\n",record->blocked_timestamp,record->unblocked_timestamp); - printf("Assigner started:\t%llu cycles, %llu instrs [Core %d]\n",record->start_assign_cycles,record->start_assign_instrs,record->assigning_core); - printf("Assigner ended: \t%llu cycles, %llu instrs [Core %d]\n",record->end_assign_cycles,record->end_assign_instrs,record->assigning_core); - printf("Work+comm started:\t%llu cycles, %llu instrs [Core %d]\n",record->start_work_cycles,record->start_work_instrs,record->work_core); - printf("Work+comm ended:\t%llu cycles, %llu instrs [Core %d]\n",record->suspend_cycles,record->suspend_instrs,record->work_core); - printf("Status request read:\t%llu cycles, %llu instrs [Core %d]\n",record->req_cycles,record->req_instrs,record->req_core); - fflush(stdin); -} - -void print_record_csv(CounterRecord* record) { - //Columns are VP,task,ret_addr, (core,cycles,instrs)* for each savepoint, blocked,unblocked - printf("%d,%d,%p,",record->vp_id,record->task_position,record->addr_of_libcall_for_req); - printf("%d,%llu,%llu,",record->req_core,record->sc_done_cycles,record->sc_done_instrs); - printf("%d,%llu,%llu,",record->assigning_core,record->start_assign_cycles,record->start_assign_instrs); - printf("%d,%llu,%llu,",record->assigning_core,record->end_assign_cycles,record->end_assign_instrs); - printf("%d,%llu,%llu,",record->work_core,record->start_work_cycles,record->start_work_instrs); - printf("%d,%llu,%llu,",record->work_core,record->suspend_cycles,record->suspend_instrs); - printf("%d,%llu,%llu,",record->req_core,record->req_cycles,record->req_instrs); - printf("%u,%u\n",record->blocked_timestamp,record->unblocked_timestamp); - fflush(stdin); -} - -void print_record_csv_to_file(CounterRecord* record, FILE* file) { - //Columns are VP,task,ret_addr, (core,cycles,instrs)* for each savepoint, blocked,unblocked - fprintf(file,"%d,%d,%p,",record->vp_id,record->task_position,record->addr_of_libcall_for_req); - fprintf(file,"%d,%llu,%llu,",record->req_core,record->sc_done_cycles,record->sc_done_instrs); - fprintf(file,"%d,%llu,%llu,",record->assigning_core,record->start_assign_cycles,record->start_assign_instrs); - fprintf(file,"%d,%llu,%llu,",record->assigning_core,record->end_assign_cycles,record->end_assign_instrs); - fprintf(file,"%d,%llu,%llu,",record->work_core,record->start_work_cycles,record->start_work_instrs); - fprintf(file,"%d,%llu,%llu,",record->work_core,record->suspend_cycles,record->suspend_instrs); - fprintf(file,"%d,%llu,%llu,",record->req_core,record->req_cycles,record->req_instrs); - fprintf(file,"%d,%llu,%llu,",record->req_core,record->next_task_req_cycles,record->next_task_req_instrs); - fprintf(file,"%u,%u\n",record->blocked_timestamp,record->unblocked_timestamp); -} - -void set_dot_file(FILE* file){ - dot_file = file; -} - -void print_dot_node_info(void* _record){ - CounterRecord* record = (CounterRecord*) _record; - fprintf(dot_file,"VP_%d_%d [shape=record,label=\"{ VP %d # %d ",record->vp_id,record->task_position,record->vp_id,record->task_position); - fprintf(dot_file,"| { sc_ch | C:%lld I:%lld }",record->sc_done_cycles - record->req_cycles,record->sc_done_instrs - record->req_instrs); - fprintf(dot_file,"| { sync | C:%d }", record->task_position ? record->unblocked_timestamp - record->blocked_timestamp : 0); - fprintf(dot_file,"| { assign | C:%lld I:%lld }",record->end_assign_cycles - record->start_assign_cycles,record->end_assign_instrs - record->start_assign_instrs); - fprintf(dot_file,"| { W + C | C:%lld I:%lld }",record->suspend_cycles - record->start_work_cycles,record->suspend_instrs - record->start_work_instrs); - fprintf(dot_file,"| { status | C:%lld I:%lld }",record->next_task_req_cycles - record->suspend_cycles,record->next_task_req_instrs - record->suspend_instrs); - fprintf(dot_file,"}\"];\n"); -} \ No newline at end of file diff -r f6d81915512c -r 5b419522dc7f Counters/Counters.h --- a/Counters/Counters.h Fri Feb 10 18:35:00 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ -/* - * File: counters.h - * Author: engelhardt - * - * Created on 20. Juli 2011, 13:27 - */ - -#include "../VMS_primitive_data_types.h" -#include "../../../C_Libraries/DynArray/DynArray.h" - -#include - -#ifndef COUNTERS_H -#define COUNTERS_H - -typedef struct { - int work_core; - int assigning_core; - int req_core; - int vp_id; - int task_position; - uint32 blocked_timestamp; - uint32 unblocked_timestamp; - uint64 req_cycles; - uint64 req_instrs; - uint64 sc_done_cycles; - uint64 sc_done_instrs; -// uint64 enter_readyQ_cycles; -// uint64 enter_readyQ_instrs; - uint64 start_assign_cycles; - uint64 start_assign_instrs; - uint64 end_assign_cycles; - uint64 end_assign_instrs; - uint64 start_work_cycles; - uint64 start_work_instrs; - uint64 suspend_cycles; - uint64 suspend_instrs; - uint64 next_task_req_cycles; - uint64 next_task_req_instrs; - void* addr_of_libcall_for_req; -} CounterRecord; - -FILE* dot_file; - -void print_record_human_readable(CounterRecord* record); - -void print_record_csv(CounterRecord* record); - -void print_record_csv_to_file(CounterRecord* record, FILE* file); - -void set_dot_file(FILE* file); - -void print_dot_node_info(void* counterRecord); - -#endif /* COUNTERS_H */ - diff -r f6d81915512c -r 5b419522dc7f MasterLoop.c --- a/MasterLoop.c Fri Feb 10 18:35:00 2012 +0100 +++ b/MasterLoop.c Thu Mar 08 19:02:16 2012 +0100 @@ -89,6 +89,7 @@ volatileMasterPr = animatingPr; masterPr = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp + int vpid,task; //First animation of each MasterVP will in turn animate this part // of setup code.. (VP creator sets up the stack as if this function @@ -152,8 +153,18 @@ saveLowTimeStampCountInto( startStamp1 ); #endif #ifdef MEAS__PERF_COUNTERS - int vpid = currSlot->procrAssignedToSlot->procrID; - int task = currSlot->procrAssignedToSlot->numTimesScheduled; + /* Request Handler may call resume() on the VP, but we want to + * account the whole interval to the same task. Therefore, need + * to save task ID at the beginning. + * + * Using this value as "end of AppResponder Invocation Time" + * is possible if there is only one SchedSlot per core - + * invoking processor is last to be treated here! If more than + * one slot, MasterLoop processing time for all but the last VP + * would be erroneously counted as invocation time. + */ + vpid = currSlot->procrAssignedToSlot->procrID; + task = currSlot->procrAssignedToSlot->numTimesScheduled; uint64 cycles, instrs; saveCyclesAndInstrs(thisCoresIdx,cycles, instrs); (*counterHandler)(AppResponder_start,vpid,task,currSlot->procrAssignedToSlot,cycles,instrs); @@ -169,10 +180,11 @@ _VMSMasterEnv->reqHdlrHighTimeHist ); #endif #ifdef MEAS__PERF_COUNTERS - //done with constraints check + //use previous task ID here (may or may not be the same as current VP state) uint64 cycles2,instrs2; saveCyclesAndInstrs(thisCoresIdx,cycles2, instrs2); (*counterHandler)(AppResponder_end,vpid,task,currSlot->procrAssignedToSlot,cycles2,instrs2); + (*counterHandler)(Timestamp_end,vpid,task,currSlot->procrAssignedToSlot,rdtsc(),0); #endif //============================================================ } @@ -180,11 +192,22 @@ { //give slot a new virt procr #ifdef MEAS__PERF_COUNTERS //start assigner + /* Don't know who to account time to yet - goes to assigned VP + * after the call. + */ + int empty = FALSE; + if(currSlot->procrAssignedToSlot == NULL){ + empty= TRUE; + } uint64 tmp_cycles; uint64 tmp_instrs; saveCyclesAndInstrs(thisCoresIdx,tmp_cycles,tmp_instrs); - //FIXME WTF AM I DOING WHY DOES THIS EVEN WORK - //(*counterHandler)(MasterLoop_beforeNextAssign,schedVirtPr,tmp_cycles,tmp_instrs); + uint64 tsc = rdtsc(); + if(vpid > 0) { + (*counterHandler)(NextAssigner_start,vpid,task,currSlot->procrAssignedToSlot,tmp_cycles,tmp_instrs); + vpid = 0; + task = 0; + } #endif schedVirtPr = (*slaveScheduler)( semanticEnv, thisCoresIdx, slotIdx ); @@ -201,6 +224,11 @@ uint64 cycles; uint64 instrs; saveCyclesAndInstrs(thisCoresIdx,cycles,instrs); + + if(empty){ + (*counterHandler)(AssignerInvocation_start,schedVirtPr->procrID,schedVirtPr->numTimesScheduled,schedVirtPr,masterEnv->start_master_lock[thisCoresIdx][0],masterEnv->start_master_lock[thisCoresIdx][1]); + } + (*counterHandler)(Timestamp_start,schedVirtPr->procrID,schedVirtPr->numTimesScheduled,schedVirtPr,tsc,0); (*counterHandler)(Assigner_start,schedVirtPr->procrID,schedVirtPr->numTimesScheduled,schedVirtPr,tmp_cycles,tmp_instrs); (*counterHandler)(Assigner_end,schedVirtPr->procrID,schedVirtPr->numTimesScheduled,schedVirtPr,cycles,instrs); #endif diff -r f6d81915512c -r 5b419522dc7f VMS.c --- a/VMS.c Fri Feb 10 18:35:00 2012 +0100 +++ b/VMS.c Thu Mar 08 19:02:16 2012 +0100 @@ -193,31 +193,6 @@ #endif #ifdef MEAS__PERF_COUNTERS -/* - _VMSMasterEnv->counter_history = VMS__malloc(10*sizeof(void*)); - _VMSMasterEnv->counter_history_array_info = makePrivDynArrayInfoFrom((void***)&(_VMSMasterEnv->counter_history),10); -*/ - //printf("Creating HW counters..."); -/* - FILE* output; - int n; - char filename[255]; - for(n=0;n<255;n++) - { - sprintf(filename, "./counters/Counters.%d.csv",n); - output = fopen(filename,"r"); - if(output) - { - fclose(output); - }else{ - break; - } - } - printf("Saving Counter measurements to File: %s ...\n", filename); - output = fopen(filename,"w+"); - _VMSMasterEnv->counteroutput = output; -*/ - struct perf_event_attr hw_event; memset(&hw_event,0,sizeof(hw_event)); hw_event.type = PERF_TYPE_HARDWARE; @@ -228,8 +203,8 @@ hw_event.pinned = 1; /* must always be on PMU */ hw_event.exclusive = 0; /* only group on PMU */ hw_event.exclude_user = 0; /* don't count user */ - hw_event.exclude_kernel = 1; /* ditto kernel */ - hw_event.exclude_hv = 1; /* ditto hypervisor */ + hw_event.exclude_kernel = 0; /* ditto kernel */ + hw_event.exclude_hv = 0; /* ditto hypervisor */ hw_event.exclude_idle = 0; /* don't count when idle */ hw_event.mmap = 0; /* include mmap data */ hw_event.comm = 0; /* include comm data */ @@ -260,10 +235,9 @@ perror("Failed to open instrs counter"); } } - prctl(PR_TASK_PERF_EVENTS_ENABLE); - uint64 tmpc,tmpi; - saveCyclesAndInstrs(0,tmpc,tmpi); - printf("Start: cycles = %llu, instrs = %llu\n",tmpc,tmpi); + //uint64 tmpc,tmpi; + //saveCyclesAndInstrs(0,tmpc,tmpi); + //printf("Start: cycles = %llu, instrs = %llu\n",tmpc,tmpi); #endif //======================================================================== @@ -314,8 +288,10 @@ //Make the threads that animate the core loops for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ) - { coreLoopThdParams[coreIdx] = VMS__malloc( sizeof(ThdParams) ); + { coreLoopThdParams[coreIdx] = VMS__malloc( sizeof(ThdParams) + CACHE_LINE ); //make sure there is no false sharing coreLoopThdParams[coreIdx]->coreNum = coreIdx; + coreLoopThdParams[coreIdx]->sent_ctr = 0; + coreLoopThdParams[coreIdx]->ret_tsc = 0; retCode = pthread_create( &(coreLoopThdHandles[coreIdx]), @@ -324,6 +300,7 @@ (void *)(coreLoopThdParams[coreIdx]) ); if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(1);} } + prctl(PR_TASK_PERF_EVENTS_ENABLE); } /*Semantic layer calls this when it want the system to start running.. @@ -737,8 +714,7 @@ #ifdef MEAS__PERF_COUNTERS uint64 tmpc,tmpi; saveCyclesAndInstrs(0,tmpc,tmpi); - printf("End: cycles = %llu, instrs = %llu\n",tmpc,tmpi); - prctl(PR_TASK_PERF_EVENTS_DISABLE); + //printf("End: cycles = %llu, instrs = %llu\n",tmpc,tmpi); /* for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ){ close(_VMSMasterEnv->cycles_counter_fd[coreIdx]); @@ -789,7 +765,7 @@ //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile); //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHist ); - + prctl(PR_TASK_PERF_EVENTS_DISABLE); #ifdef MEAS__TIME_PLUGIN printHist( _VMSMasterEnv->reqHdlrLowTimeHist ); saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist ); @@ -879,3 +855,74 @@ exit(1); } + __inline__ uint64_t rdtsc(void){ + uint32_t lo, hi; + __asm__ __volatile__ ( // serialize + "xorl %%eax,%%eax \n cpuid" + ::: "%rax", "%rbx", "%rcx", "%rdx"); + __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); + /* asm volatile("RDTSC;" + "movl %%eax, %0;" + "movl %%edx, %1;" + : "=m" (lo), "=m" (hi) + : + : "%eax", "%edx" + ); */ + return (uint64_t)hi << 32 | lo; + } + + uint64 tsc_offset_send(ThdParams* thisCoresThdParams, uint64 initval){ + uint64 ret_tsc_curr; //local copy of coreLoopThdParams->ret_tsc + uint64 ret_tsc_prev; + uint64 local_before; + uint64 local_after; + + ret_tsc_prev = initval; + ret_tsc_curr = initval; + local_before = rdtsc(); + thisCoresThdParams->sent_ctr++; + while(ret_tsc_curr == ret_tsc_prev) + ret_tsc_curr = thisCoresThdParams->ret_tsc; + local_after = rdtsc(); + ret_tsc_prev = ret_tsc_curr; + + int i; + for(i=0;i<3;++i){ + local_before = rdtsc(); + thisCoresThdParams->sent_ctr++; + while(ret_tsc_curr == ret_tsc_prev) + ret_tsc_curr = thisCoresThdParams->ret_tsc; + local_after = rdtsc(); + int64 midpoint = local_before + (local_after-local_before)/2; + int64 difference; + if (midpoint > ret_tsc_curr) + difference = midpoint - (int64)ret_tsc_curr; + else + difference = (int64)ret_tsc_curr - midpoint; + //printf("TSC: %llu (Core %d) = %llu (Core %d) // difference=%llu\n",midpoint,thisCoresThdParams->coreNum,ret_tsc_curr,thisCoresThdParams->coreNum + 1,difference); + ret_tsc_prev = ret_tsc_curr; + } + + return ret_tsc_curr; + } + + int tsc_offset_resp(ThdParams* sendCoresThdParams, int initialctrval){ + + int send_ctr_curr = initialctrval; + int send_ctr_prev = initialctrval; + + + while(send_ctr_curr == send_ctr_prev) + send_ctr_curr = sendCoresThdParams->sent_ctr; + sendCoresThdParams->ret_tsc = rdtsc(); + send_ctr_prev = send_ctr_curr; + + int i; + for(i=0;i<3;++i){ + while(send_ctr_curr == send_ctr_prev) + send_ctr_curr = sendCoresThdParams->sent_ctr; + sendCoresThdParams->ret_tsc = rdtsc(); + send_ctr_prev = send_ctr_curr; + } + return send_ctr_curr; + } \ No newline at end of file diff -r f6d81915512c -r 5b419522dc7f VMS.h --- a/VMS.h Fri Feb 10 18:35:00 2012 +0100 +++ b/VMS.h Thu Mar 08 19:02:16 2012 +0100 @@ -82,7 +82,7 @@ //========================= Hardware related Constants ===================== //This value is the number of hardware threads in the shared memory // machine -#define NUM_CORES 4 +#define NUM_CORES 80 // tradeoff amortizing master fixed overhead vs imbalance potential // when work-stealing, can make bigger, at risk of losing cache affinity @@ -98,7 +98,7 @@ // memory for VMS__malloc #define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */ -#define CACHE_LINE 64 +#define CACHE_LINE 256 #define PAGE_SIZE 4096 @@ -274,6 +274,7 @@ #ifdef MEAS__PERF_COUNTERS int cycles_counter_fd[NUM_CORES]; int instrs_counter_fd[NUM_CORES]; + uint64 start_master_lock[NUM_CORES][2]; //FILE* counteroutput; //CounterRecord** counter_history; //PrivDynArrayInfo* counter_history_array_info; @@ -309,6 +310,8 @@ { void *endThdPt; unsigned int coreNum; + volatile int sent_ctr; + volatile uint64 ret_tsc; } ThdParams; @@ -317,8 +320,9 @@ pthread_mutex_t suspendLock; pthread_cond_t suspend_cond; - - + uint64 tsc_offset_send(ThdParams* thisCoresThdParams,uint64 initval); + int tsc_offset_resp(ThdParams* sendCoresThdParams,int initctr); + //===================== Global Vars =================== volatile MasterEnv *_VMSMasterEnv; @@ -420,6 +424,8 @@ /* clobber */ : "%eax", "%edx" \ ); + __inline__ uint64_t rdtsc(void); + //==================== #define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \ makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \ @@ -449,11 +455,15 @@ AppResponderInvocation_start, AppResponder_start, AppResponder_end, + AssignerInvocation_start, + NextAssigner_start, Assigner_start, Assigner_end, Work_start, Work_end, - HwResponderInvocation_start + HwResponderInvocation_start, + Timestamp_start, + Timestamp_end }; #define getReturnAddressBeforeLibraryCall(vp_ptr, res_ptr) do{ \ diff -r f6d81915512c -r 5b419522dc7f __brch__perf_counters --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/__brch__perf_counters Thu Mar 08 19:02:16 2012 +0100 @@ -0,0 +1,1 @@ +Performance counter support