changeset 211:5b419522dc7f perf_counters

time stamp checks added
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Thu, 08 Mar 2012 19:02:16 +0100
parents f6d81915512c
children df00af7eb307
files CoreLoop.c Counters/Counters.c Counters/Counters.h MasterLoop.c VMS.c VMS.h __brch__perf_counters
diffstat 7 files changed, 162 insertions(+), 163 deletions(-) [+]
line diff
     1.1 --- a/CoreLoop.c	Fri Feb 10 18:35:00 2012 +0100
     1.2 +++ b/CoreLoop.c	Thu Mar 08 19:02:16 2012 +0100
     1.3 @@ -33,7 +33,7 @@
     1.4  void *
     1.5  coreLoop( void *paramsIn )
     1.6   { 
     1.7 -   ThdParams      *coreLoopThdParams;
     1.8 +   ThdParams      *thisCoresThdParams;
     1.9     int             thisCoresIdx;
    1.10     VirtProcr      *currPr = NULL;
    1.11     VMSQueueStruc *readyToAnimateQ;
    1.12 @@ -45,8 +45,8 @@
    1.13     //preGateProgress, waitProgress, exitProgress, gateClosed;
    1.14  
    1.15  
    1.16 -   coreLoopThdParams = (ThdParams *)paramsIn;
    1.17 -   thisCoresIdx = coreLoopThdParams->coreNum;
    1.18 +   thisCoresThdParams = (ThdParams *)paramsIn;
    1.19 +   thisCoresIdx = thisCoresThdParams->coreNum;
    1.20  
    1.21     gate.gateClosed      = FALSE;
    1.22     gate.preGateProgress = 0;
    1.23 @@ -69,7 +69,7 @@
    1.24        //Linux requires pinning thd to core inside thread-function
    1.25        //Designate a core by a 1 in bit-position corresponding to the core
    1.26     CPU_ZERO(&coreMask);
    1.27 -   CPU_SET(coreLoopThdParams->coreNum,&coreMask);
    1.28 +   CPU_SET(thisCoresThdParams->coreNum,&coreMask);
    1.29     //coreMask = 1L << coreLoopThdParams->coreNum;
    1.30     
    1.31     pthread_t selfThd = pthread_self();
    1.32 @@ -80,6 +80,28 @@
    1.33  #ifdef MEAS__PERF_COUNTERS
    1.34     CounterHandler counterHandler = _VMSMasterEnv->counterHandler;
    1.35     VirtProcr      *lastVPBeforeMaster = NULL;
    1.36 +   
    1.37 +   if(thisCoresThdParams->coreNum == 0){
    1.38 +       uint64 initval = tsc_offset_send(thisCoresThdParams,0);
    1.39 +       ThdParams* sendCoresThdParams = coreLoopThdParams[NUM_CORES - 1];
    1.40 +       int sndctr = tsc_offset_resp(sendCoresThdParams,0);
    1.41 +       tsc_offset_send(thisCoresThdParams,initval);
    1.42 +   } 
    1.43 +   if(0 < (thisCoresThdParams->coreNum) && (thisCoresThdParams->coreNum) < (NUM_CORES - 1)){
    1.44 +       ThdParams* sendCoresThdParams = coreLoopThdParams[thisCoresThdParams->coreNum - 1];
    1.45 +       int sndctr = tsc_offset_resp(sendCoresThdParams, 0);
    1.46 +       uint64 initval = tsc_offset_send(thisCoresThdParams,0);
    1.47 +       tsc_offset_resp(sendCoresThdParams, sndctr);
    1.48 +       tsc_offset_send(thisCoresThdParams,initval);
    1.49 +   } 
    1.50 +   if(thisCoresThdParams->coreNum == (NUM_CORES - 1)){
    1.51 +       ThdParams* sendCoresThdParams = coreLoopThdParams[thisCoresThdParams->coreNum - 1];
    1.52 +       int sndctr = tsc_offset_resp(sendCoresThdParams,0);
    1.53 +       uint64 initval = tsc_offset_send(thisCoresThdParams,0);
    1.54 +       tsc_offset_resp(sendCoresThdParams,sndctr);
    1.55 +   }
    1.56 +   
    1.57 +   //printf("Core %d starting!\n",thisCoresThdParams->coreNum);
    1.58  #endif
    1.59     
    1.60     //Save the return address in the SwitchVP function
    1.61 @@ -133,11 +155,14 @@
    1.62        saveLowTimeStampCountInto( startStamp );
    1.63        #endif
    1.64        #ifdef MEAS__PERF_COUNTERS
    1.65 +      uint64 cycles,instrs;
    1.66 +      saveCyclesAndInstrs(thisCoresIdx,cycles, instrs);
    1.67        if(lastVPBeforeMaster){
    1.68 -        uint64 cycles,instrs;
    1.69 -        saveCyclesAndInstrs(thisCoresIdx,cycles, instrs);
    1.70          (*counterHandler)(AppResponderInvocation_start,lastVPBeforeMaster->procrID,lastVPBeforeMaster->numTimesScheduled,lastVPBeforeMaster,cycles,instrs);
    1.71          lastVPBeforeMaster = NULL;
    1.72 +      } else {
    1.73 +          _VMSMasterEnv->start_master_lock[thisCoresIdx][0] = cycles;
    1.74 +          _VMSMasterEnv->start_master_lock[thisCoresIdx][1] = instrs;
    1.75        }
    1.76        #endif
    1.77        //=====================================================================
     2.1 --- a/Counters/Counters.c	Fri Feb 10 18:35:00 2012 +0100
     2.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.3 @@ -1,56 +0,0 @@
     2.4 -#include "Counters.h"
     2.5 -
     2.6 -
     2.7 -
     2.8 -void print_record_human_readable(CounterRecord* record){
     2.9 -    printf("=== Loop Stats (VP %d # %d suspended at %p): ===\n",record->vp_id,record->task_position,record->addr_of_libcall_for_req);
    2.10 -    printf("Constraints check done:\t%llu cycles, %llu instrs [Core %d]\n",record->sc_done_cycles,record->sc_done_instrs,record->req_core);
    2.11 -    printf("Time in ready queue:\tblocked at %u, unblocked at %u\n",record->blocked_timestamp,record->unblocked_timestamp);
    2.12 -    printf("Assigner started:\t%llu cycles, %llu instrs [Core %d]\n",record->start_assign_cycles,record->start_assign_instrs,record->assigning_core);
    2.13 -    printf("Assigner ended:  \t%llu cycles, %llu instrs [Core %d]\n",record->end_assign_cycles,record->end_assign_instrs,record->assigning_core);
    2.14 -    printf("Work+comm started:\t%llu cycles, %llu instrs [Core %d]\n",record->start_work_cycles,record->start_work_instrs,record->work_core);
    2.15 -    printf("Work+comm ended:\t%llu cycles, %llu instrs [Core %d]\n",record->suspend_cycles,record->suspend_instrs,record->work_core);
    2.16 -    printf("Status request read:\t%llu cycles, %llu instrs [Core %d]\n",record->req_cycles,record->req_instrs,record->req_core);  
    2.17 -    fflush(stdin);
    2.18 -}
    2.19 -    
    2.20 -void print_record_csv(CounterRecord* record) {
    2.21 -    //Columns are VP,task,ret_addr, (core,cycles,instrs)* for each savepoint, blocked,unblocked
    2.22 -    printf("%d,%d,%p,",record->vp_id,record->task_position,record->addr_of_libcall_for_req);
    2.23 -    printf("%d,%llu,%llu,",record->req_core,record->sc_done_cycles,record->sc_done_instrs);
    2.24 -    printf("%d,%llu,%llu,",record->assigning_core,record->start_assign_cycles,record->start_assign_instrs);
    2.25 -    printf("%d,%llu,%llu,",record->assigning_core,record->end_assign_cycles,record->end_assign_instrs);
    2.26 -    printf("%d,%llu,%llu,",record->work_core,record->start_work_cycles,record->start_work_instrs);
    2.27 -    printf("%d,%llu,%llu,",record->work_core,record->suspend_cycles,record->suspend_instrs);
    2.28 -    printf("%d,%llu,%llu,",record->req_core,record->req_cycles,record->req_instrs); 
    2.29 -    printf("%u,%u\n",record->blocked_timestamp,record->unblocked_timestamp);
    2.30 -    fflush(stdin);
    2.31 -}
    2.32 -
    2.33 -void print_record_csv_to_file(CounterRecord* record, FILE* file) {
    2.34 -    //Columns are VP,task,ret_addr, (core,cycles,instrs)* for each savepoint, blocked,unblocked
    2.35 -    fprintf(file,"%d,%d,%p,",record->vp_id,record->task_position,record->addr_of_libcall_for_req);
    2.36 -    fprintf(file,"%d,%llu,%llu,",record->req_core,record->sc_done_cycles,record->sc_done_instrs);
    2.37 -    fprintf(file,"%d,%llu,%llu,",record->assigning_core,record->start_assign_cycles,record->start_assign_instrs);
    2.38 -    fprintf(file,"%d,%llu,%llu,",record->assigning_core,record->end_assign_cycles,record->end_assign_instrs);
    2.39 -    fprintf(file,"%d,%llu,%llu,",record->work_core,record->start_work_cycles,record->start_work_instrs);
    2.40 -    fprintf(file,"%d,%llu,%llu,",record->work_core,record->suspend_cycles,record->suspend_instrs);
    2.41 -    fprintf(file,"%d,%llu,%llu,",record->req_core,record->req_cycles,record->req_instrs); 
    2.42 -    fprintf(file,"%d,%llu,%llu,",record->req_core,record->next_task_req_cycles,record->next_task_req_instrs);   
    2.43 -    fprintf(file,"%u,%u\n",record->blocked_timestamp,record->unblocked_timestamp);
    2.44 -}
    2.45 -
    2.46 -void set_dot_file(FILE* file){
    2.47 -    dot_file = file;
    2.48 -}
    2.49 -
    2.50 -void print_dot_node_info(void* _record){
    2.51 -    CounterRecord* record = (CounterRecord*) _record;
    2.52 -    fprintf(dot_file,"VP_%d_%d [shape=record,label=\"{ VP %d # %d ",record->vp_id,record->task_position,record->vp_id,record->task_position);
    2.53 -    fprintf(dot_file,"| { sc_ch | C:%lld I:%lld }",record->sc_done_cycles - record->req_cycles,record->sc_done_instrs - record->req_instrs);
    2.54 -    fprintf(dot_file,"| { sync | C:%d }", record->task_position ? record->unblocked_timestamp - record->blocked_timestamp : 0);
    2.55 -    fprintf(dot_file,"| { assign | C:%lld I:%lld }",record->end_assign_cycles - record->start_assign_cycles,record->end_assign_instrs - record->start_assign_instrs);
    2.56 -    fprintf(dot_file,"| { W + C | C:%lld I:%lld }",record->suspend_cycles - record->start_work_cycles,record->suspend_instrs - record->start_work_instrs);
    2.57 -    fprintf(dot_file,"| { status | C:%lld I:%lld }",record->next_task_req_cycles - record->suspend_cycles,record->next_task_req_instrs - record->suspend_instrs);
    2.58 -    fprintf(dot_file,"}\"];\n");
    2.59 -}
    2.60 \ No newline at end of file
     3.1 --- a/Counters/Counters.h	Fri Feb 10 18:35:00 2012 +0100
     3.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.3 @@ -1,56 +0,0 @@
     3.4 -/* 
     3.5 - * File:   counters.h
     3.6 - * Author: engelhardt
     3.7 - *
     3.8 - * Created on 20. Juli 2011, 13:27
     3.9 - */
    3.10 -
    3.11 -#include "../VMS_primitive_data_types.h"
    3.12 -#include "../../../C_Libraries/DynArray/DynArray.h"
    3.13 -
    3.14 -#include <stdio.h>
    3.15 -
    3.16 -#ifndef COUNTERS_H
    3.17 -#define	COUNTERS_H
    3.18 -
    3.19 -typedef struct {
    3.20 -    int work_core;
    3.21 -    int assigning_core;
    3.22 -    int req_core;
    3.23 -    int vp_id;
    3.24 -    int task_position;
    3.25 -    uint32 blocked_timestamp;
    3.26 -    uint32 unblocked_timestamp;
    3.27 -    uint64 req_cycles;
    3.28 -    uint64 req_instrs;
    3.29 -    uint64 sc_done_cycles;
    3.30 -    uint64 sc_done_instrs;
    3.31 -//    uint64 enter_readyQ_cycles;
    3.32 -//    uint64 enter_readyQ_instrs;
    3.33 -    uint64 start_assign_cycles;
    3.34 -    uint64 start_assign_instrs;
    3.35 -    uint64 end_assign_cycles;
    3.36 -    uint64 end_assign_instrs;
    3.37 -    uint64 start_work_cycles;
    3.38 -    uint64 start_work_instrs;
    3.39 -    uint64 suspend_cycles;
    3.40 -    uint64 suspend_instrs;
    3.41 -    uint64 next_task_req_cycles;
    3.42 -    uint64 next_task_req_instrs;
    3.43 -    void* addr_of_libcall_for_req;
    3.44 -} CounterRecord;
    3.45 -
    3.46 -FILE* dot_file;
    3.47 -
    3.48 -void print_record_human_readable(CounterRecord* record);
    3.49 -
    3.50 -void print_record_csv(CounterRecord* record);
    3.51 -
    3.52 -void print_record_csv_to_file(CounterRecord* record, FILE* file);
    3.53 -
    3.54 -void set_dot_file(FILE* file);
    3.55 -
    3.56 -void print_dot_node_info(void* counterRecord);
    3.57 -
    3.58 -#endif	/* COUNTERS_H */
    3.59 -
     4.1 --- a/MasterLoop.c	Fri Feb 10 18:35:00 2012 +0100
     4.2 +++ b/MasterLoop.c	Thu Mar 08 19:02:16 2012 +0100
     4.3 @@ -89,6 +89,7 @@
     4.4     
     4.5     volatileMasterPr = animatingPr;
     4.6     masterPr         = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp
     4.7 +   int vpid,task;
     4.8  
     4.9        //First animation of each MasterVP will in turn animate this part
    4.10        // of setup code.. (VP creator sets up the stack as if this function
    4.11 @@ -152,8 +153,18 @@
    4.12                 saveLowTimeStampCountInto( startStamp1 );
    4.13                 #endif
    4.14                 #ifdef MEAS__PERF_COUNTERS
    4.15 -               int vpid = currSlot->procrAssignedToSlot->procrID;
    4.16 -               int task = currSlot->procrAssignedToSlot->numTimesScheduled;
    4.17 +               /* Request Handler may call resume() on the VP, but we want to 
    4.18 +                * account the whole interval to the same task. Therefore, need
    4.19 +                * to save task ID at the beginning.
    4.20 +                * 
    4.21 +                * Using this value as "end of AppResponder Invocation Time"
    4.22 +                * is possible if there is only one SchedSlot per core -
    4.23 +                * invoking processor is last to be treated here! If more than
    4.24 +                * one slot, MasterLoop processing time for all but the last VP
    4.25 +                * would be erroneously counted as invocation time.
    4.26 +                */
    4.27 +               vpid = currSlot->procrAssignedToSlot->procrID;
    4.28 +               task = currSlot->procrAssignedToSlot->numTimesScheduled;
    4.29                 uint64 cycles, instrs;
    4.30                 saveCyclesAndInstrs(thisCoresIdx,cycles, instrs);
    4.31                 (*counterHandler)(AppResponder_start,vpid,task,currSlot->procrAssignedToSlot,cycles,instrs);
    4.32 @@ -169,10 +180,11 @@
    4.33                                          _VMSMasterEnv->reqHdlrHighTimeHist );
    4.34                 #endif
    4.35                 #ifdef MEAS__PERF_COUNTERS
    4.36 -               //done with constraints check
    4.37 +               //use previous task ID here (may or may not be the same as current VP state)
    4.38                 uint64 cycles2,instrs2;
    4.39                 saveCyclesAndInstrs(thisCoresIdx,cycles2, instrs2);
    4.40                 (*counterHandler)(AppResponder_end,vpid,task,currSlot->procrAssignedToSlot,cycles2,instrs2);
    4.41 +               (*counterHandler)(Timestamp_end,vpid,task,currSlot->procrAssignedToSlot,rdtsc(),0);
    4.42                 #endif
    4.43                 //============================================================
    4.44         }
    4.45 @@ -180,11 +192,22 @@
    4.46         {    //give slot a new virt procr
    4.47                 #ifdef MEAS__PERF_COUNTERS
    4.48                  //start assigner
    4.49 +               /* Don't know who to account time to yet - goes to assigned VP
    4.50 +                * after the call.
    4.51 +                */
    4.52 +                int empty = FALSE;
    4.53 +                if(currSlot->procrAssignedToSlot == NULL){
    4.54 +                   empty= TRUE;
    4.55 +                }
    4.56                 uint64 tmp_cycles;
    4.57                 uint64 tmp_instrs;
    4.58                 saveCyclesAndInstrs(thisCoresIdx,tmp_cycles,tmp_instrs);
    4.59 -               //FIXME WTF AM I DOING WHY DOES THIS EVEN WORK
    4.60 -               //(*counterHandler)(MasterLoop_beforeNextAssign,schedVirtPr,tmp_cycles,tmp_instrs);
    4.61 +               uint64 tsc = rdtsc();
    4.62 +               if(vpid > 0) {
    4.63 +                   (*counterHandler)(NextAssigner_start,vpid,task,currSlot->procrAssignedToSlot,tmp_cycles,tmp_instrs);
    4.64 +                   vpid = 0;
    4.65 +                   task = 0;
    4.66 +               }
    4.67                 #endif
    4.68           schedVirtPr =
    4.69            (*slaveScheduler)( semanticEnv, thisCoresIdx, slotIdx );
    4.70 @@ -201,6 +224,11 @@
    4.71                 uint64 cycles;
    4.72                 uint64 instrs;
    4.73                 saveCyclesAndInstrs(thisCoresIdx,cycles,instrs);
    4.74 + 
    4.75 +               if(empty){
    4.76 +                   (*counterHandler)(AssignerInvocation_start,schedVirtPr->procrID,schedVirtPr->numTimesScheduled,schedVirtPr,masterEnv->start_master_lock[thisCoresIdx][0],masterEnv->start_master_lock[thisCoresIdx][1]);
    4.77 +               }
    4.78 +               (*counterHandler)(Timestamp_start,schedVirtPr->procrID,schedVirtPr->numTimesScheduled,schedVirtPr,tsc,0);
    4.79                 (*counterHandler)(Assigner_start,schedVirtPr->procrID,schedVirtPr->numTimesScheduled,schedVirtPr,tmp_cycles,tmp_instrs);
    4.80                 (*counterHandler)(Assigner_end,schedVirtPr->procrID,schedVirtPr->numTimesScheduled,schedVirtPr,cycles,instrs);
    4.81                 #endif
     5.1 --- a/VMS.c	Fri Feb 10 18:35:00 2012 +0100
     5.2 +++ b/VMS.c	Thu Mar 08 19:02:16 2012 +0100
     5.3 @@ -193,31 +193,6 @@
     5.4     #endif
     5.5  
     5.6     #ifdef MEAS__PERF_COUNTERS
     5.7 -/*
     5.8 -   _VMSMasterEnv->counter_history = VMS__malloc(10*sizeof(void*));
     5.9 -   _VMSMasterEnv->counter_history_array_info = makePrivDynArrayInfoFrom((void***)&(_VMSMasterEnv->counter_history),10);
    5.10 -*/
    5.11 -   //printf("Creating HW counters...");
    5.12 -/*
    5.13 -   FILE* output;
    5.14 -   int n;
    5.15 -   char filename[255];    
    5.16 -    for(n=0;n<255;n++)
    5.17 -    {
    5.18 -        sprintf(filename, "./counters/Counters.%d.csv",n);
    5.19 -        output = fopen(filename,"r");
    5.20 -        if(output)
    5.21 -        {
    5.22 -            fclose(output);
    5.23 -        }else{
    5.24 -            break;
    5.25 -        }
    5.26 -    }
    5.27 -    printf("Saving Counter measurements to File: %s ...\n", filename);
    5.28 -    output = fopen(filename,"w+");
    5.29 -   _VMSMasterEnv->counteroutput = output;
    5.30 -*/
    5.31 -    
    5.32     struct perf_event_attr hw_event;
    5.33     memset(&hw_event,0,sizeof(hw_event));
    5.34     	hw_event.type = PERF_TYPE_HARDWARE;
    5.35 @@ -228,8 +203,8 @@
    5.36  	hw_event.pinned = 1; /* must always be on PMU */
    5.37  	hw_event.exclusive = 0; /* only group on PMU     */
    5.38  	hw_event.exclude_user = 0; /* don't count user      */
    5.39 -	hw_event.exclude_kernel = 1; /* ditto kernel          */
    5.40 -	hw_event.exclude_hv = 1; /* ditto hypervisor      */
    5.41 +	hw_event.exclude_kernel = 0; /* ditto kernel          */
    5.42 +	hw_event.exclude_hv = 0; /* ditto hypervisor      */
    5.43  	hw_event.exclude_idle = 0; /* don't count when idle */
    5.44  	hw_event.mmap = 0; /* include mmap data     */
    5.45  	hw_event.comm = 0; /* include comm data     */
    5.46 @@ -260,10 +235,9 @@
    5.47              perror("Failed to open instrs counter");
    5.48          }
    5.49     }
    5.50 -   prctl(PR_TASK_PERF_EVENTS_ENABLE);
    5.51 -   uint64 tmpc,tmpi;
    5.52 -   saveCyclesAndInstrs(0,tmpc,tmpi);
    5.53 -   printf("Start: cycles = %llu, instrs = %llu\n",tmpc,tmpi);
    5.54 +   //uint64 tmpc,tmpi;
    5.55 +   //saveCyclesAndInstrs(0,tmpc,tmpi);
    5.56 +   //printf("Start: cycles = %llu, instrs = %llu\n",tmpc,tmpi);
    5.57     #endif
    5.58     
    5.59     //========================================================================
    5.60 @@ -314,8 +288,10 @@
    5.61  
    5.62        //Make the threads that animate the core loops
    5.63     for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
    5.64 -    { coreLoopThdParams[coreIdx]          = VMS__malloc( sizeof(ThdParams) );
    5.65 +    { coreLoopThdParams[coreIdx]          = VMS__malloc( sizeof(ThdParams) + CACHE_LINE ); //make sure there is no false sharing
    5.66        coreLoopThdParams[coreIdx]->coreNum = coreIdx;
    5.67 +      coreLoopThdParams[coreIdx]->sent_ctr = 0;
    5.68 +      coreLoopThdParams[coreIdx]->ret_tsc = 0;
    5.69  
    5.70        retCode =
    5.71        pthread_create( &(coreLoopThdHandles[coreIdx]),
    5.72 @@ -324,6 +300,7 @@
    5.73                 (void *)(coreLoopThdParams[coreIdx]) );
    5.74        if(retCode){printf("ERROR creating thread: %d\n", retCode); exit(1);}
    5.75      }
    5.76 +      prctl(PR_TASK_PERF_EVENTS_ENABLE);
    5.77   }
    5.78  
    5.79  /*Semantic layer calls this when it want the system to start running..
    5.80 @@ -737,8 +714,7 @@
    5.81  #ifdef MEAS__PERF_COUNTERS 
    5.82     uint64 tmpc,tmpi;
    5.83     saveCyclesAndInstrs(0,tmpc,tmpi);
    5.84 -   printf("End: cycles = %llu, instrs = %llu\n",tmpc,tmpi);
    5.85 -   prctl(PR_TASK_PERF_EVENTS_DISABLE);
    5.86 +   //printf("End: cycles = %llu, instrs = %llu\n",tmpc,tmpi);
    5.87  /*
    5.88     for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ ){
    5.89         close(_VMSMasterEnv->cycles_counter_fd[coreIdx]);
    5.90 @@ -789,7 +765,7 @@
    5.91     //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, (DynArrayFnPtr)&saveHistToFile);
    5.92     //forAllInDynArrayDo( _VMSMasterEnv->measHistsInfo, &freeHist );
    5.93  
    5.94 -
    5.95 +   prctl(PR_TASK_PERF_EVENTS_DISABLE);
    5.96     #ifdef MEAS__TIME_PLUGIN
    5.97     printHist( _VMSMasterEnv->reqHdlrLowTimeHist );
    5.98     saveHistToFile( _VMSMasterEnv->reqHdlrLowTimeHist );
    5.99 @@ -879,3 +855,74 @@
   5.100     exit(1);
   5.101   }
   5.102  
   5.103 + __inline__ uint64_t rdtsc(void){
   5.104 +    uint32_t lo, hi;
   5.105 +    __asm__ __volatile__ (      // serialize
   5.106 +    "xorl %%eax,%%eax \n        cpuid"
   5.107 +    ::: "%rax", "%rbx", "%rcx", "%rdx");
   5.108 +    __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); 
   5.109 +   /* asm volatile("RDTSC;"                   
   5.110 +                 "movl %%eax, %0;"         
   5.111 +                 "movl %%edx, %1;"         
   5.112 +               : "=m" (lo), "=m" (hi)
   5.113 +               :                        
   5.114 +               : "%eax", "%edx"         
   5.115 +                ); */
   5.116 +    return (uint64_t)hi << 32 | lo;
   5.117 +  }
   5.118 + 
   5.119 + uint64 tsc_offset_send(ThdParams* thisCoresThdParams, uint64 initval){
   5.120 +     uint64 ret_tsc_curr; //local copy of coreLoopThdParams->ret_tsc
   5.121 +     uint64 ret_tsc_prev;
   5.122 +     uint64 local_before;
   5.123 +     uint64 local_after;
   5.124 +     
   5.125 +     ret_tsc_prev = initval;
   5.126 +     ret_tsc_curr = initval;
   5.127 +     local_before = rdtsc();
   5.128 +     thisCoresThdParams->sent_ctr++;
   5.129 +     while(ret_tsc_curr == ret_tsc_prev)
   5.130 +         ret_tsc_curr = thisCoresThdParams->ret_tsc;
   5.131 +     local_after = rdtsc();
   5.132 +     ret_tsc_prev = ret_tsc_curr;
   5.133 +     
   5.134 +     int i;
   5.135 +     for(i=0;i<3;++i){
   5.136 +         local_before = rdtsc();
   5.137 +         thisCoresThdParams->sent_ctr++;
   5.138 +         while(ret_tsc_curr == ret_tsc_prev)
   5.139 +             ret_tsc_curr = thisCoresThdParams->ret_tsc;
   5.140 +         local_after = rdtsc();
   5.141 +         int64 midpoint = local_before + (local_after-local_before)/2;
   5.142 +         int64 difference;
   5.143 +         if (midpoint > ret_tsc_curr)
   5.144 +             difference = midpoint - (int64)ret_tsc_curr;
   5.145 +         else
   5.146 +             difference = (int64)ret_tsc_curr - midpoint;
   5.147 +         //printf("TSC: %llu (Core %d) =  %llu (Core %d) // difference=%llu\n",midpoint,thisCoresThdParams->coreNum,ret_tsc_curr,thisCoresThdParams->coreNum + 1,difference);
   5.148 +         ret_tsc_prev = ret_tsc_curr;
   5.149 +     }
   5.150 +     
   5.151 +     return ret_tsc_curr;
   5.152 + }
   5.153 + 
   5.154 + int tsc_offset_resp(ThdParams* sendCoresThdParams, int initialctrval){
   5.155 +
   5.156 +       int send_ctr_curr = initialctrval;
   5.157 +       int send_ctr_prev = initialctrval;
   5.158 +
   5.159 +       
   5.160 +       while(send_ctr_curr == send_ctr_prev)
   5.161 +           send_ctr_curr = sendCoresThdParams->sent_ctr;
   5.162 +       sendCoresThdParams->ret_tsc = rdtsc();
   5.163 +       send_ctr_prev = send_ctr_curr;
   5.164 +       
   5.165 +       int i;
   5.166 +       for(i=0;i<3;++i){
   5.167 +           while(send_ctr_curr == send_ctr_prev)
   5.168 +               send_ctr_curr = sendCoresThdParams->sent_ctr;
   5.169 +           sendCoresThdParams->ret_tsc = rdtsc();
   5.170 +           send_ctr_prev = send_ctr_curr;
   5.171 +       }
   5.172 +       return send_ctr_curr;
   5.173 + }
   5.174 \ No newline at end of file
     6.1 --- a/VMS.h	Fri Feb 10 18:35:00 2012 +0100
     6.2 +++ b/VMS.h	Thu Mar 08 19:02:16 2012 +0100
     6.3 @@ -82,7 +82,7 @@
     6.4  //=========================  Hardware related Constants =====================
     6.5     //This value is the number of hardware threads in the shared memory
     6.6     // machine
     6.7 -#define NUM_CORES        4
     6.8 +#define NUM_CORES        80
     6.9  
    6.10     // tradeoff amortizing master fixed overhead vs imbalance potential
    6.11     // when work-stealing, can make bigger, at risk of losing cache affinity
    6.12 @@ -98,7 +98,7 @@
    6.13     // memory for VMS__malloc
    6.14  #define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */
    6.15  
    6.16 -#define CACHE_LINE 64
    6.17 +#define CACHE_LINE 256
    6.18  #define PAGE_SIZE 4096
    6.19  
    6.20  
    6.21 @@ -274,6 +274,7 @@
    6.22     #ifdef MEAS__PERF_COUNTERS
    6.23     int cycles_counter_fd[NUM_CORES];
    6.24     int instrs_counter_fd[NUM_CORES];
    6.25 +   uint64 start_master_lock[NUM_CORES][2];
    6.26     //FILE* counteroutput;
    6.27     //CounterRecord** counter_history;
    6.28     //PrivDynArrayInfo* counter_history_array_info;
    6.29 @@ -309,6 +310,8 @@
    6.30   {
    6.31     void           *endThdPt;
    6.32     unsigned int    coreNum;
    6.33 +   volatile int    sent_ctr;
    6.34 +   volatile uint64 ret_tsc;
    6.35   }
    6.36  ThdParams;
    6.37  
    6.38 @@ -317,8 +320,9 @@
    6.39  pthread_mutex_t suspendLock;
    6.40  pthread_cond_t  suspend_cond;
    6.41  
    6.42 -
    6.43 -
    6.44 + uint64 tsc_offset_send(ThdParams* thisCoresThdParams,uint64 initval);
    6.45 + int tsc_offset_resp(ThdParams* sendCoresThdParams,int initctr);
    6.46 +  
    6.47  //=====================  Global Vars ===================
    6.48  
    6.49  volatile MasterEnv      *_VMSMasterEnv;
    6.50 @@ -420,6 +424,8 @@
    6.51     /* clobber */ : "%eax", "%edx"         \
    6.52                  );
    6.53  
    6.54 +  __inline__ uint64_t rdtsc(void); 
    6.55 +
    6.56  //====================
    6.57  #define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \
    6.58     makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \
    6.59 @@ -449,11 +455,15 @@
    6.60      AppResponderInvocation_start,
    6.61      AppResponder_start,
    6.62      AppResponder_end,
    6.63 +    AssignerInvocation_start,
    6.64 +    NextAssigner_start,
    6.65      Assigner_start,
    6.66      Assigner_end,
    6.67      Work_start,
    6.68      Work_end,
    6.69 -    HwResponderInvocation_start
    6.70 +    HwResponderInvocation_start,
    6.71 +    Timestamp_start,
    6.72 +    Timestamp_end
    6.73  };
    6.74  
    6.75  #define getReturnAddressBeforeLibraryCall(vp_ptr, res_ptr) do{     \
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/__brch__perf_counters	Thu Mar 08 19:02:16 2012 +0100
     7.3 @@ -0,0 +1,1 @@
     7.4 +Performance counter support