changeset 174:c3f458403cd6 false_sharing

Always allocate more than 256 byte, measurements removed in master loop, aligned slots
author Merten Sach <msach@mailbox.tu-berlin.de>
date Tue, 20 Dec 2011 16:50:21 +0100
parents bfaebdf60df3
children de5e7c522f1f
files MasterLoop.c VMS.c vmalloc.c
diffstat 3 files changed, 28 insertions(+), 17 deletions(-) [+]
line diff
     1.1 --- a/MasterLoop.c	Tue Dec 20 15:39:30 2011 +0100
     1.2 +++ b/MasterLoop.c	Tue Dec 20 16:50:21 2011 +0100
     1.3 @@ -95,7 +95,18 @@
     1.4         uint64 numCycles;
     1.5        //==========================================================
     1.6     
     1.7 -   //masterLoopStartPt:
     1.8 +   masterEnv        = (MasterEnv*)_VMSMasterEnv;
     1.9 +   
    1.10 +   masterVP         = (VirtProcr*)volatilemasterVP;  //just to make sure after jmp
    1.11 +   thisCoresIdx     = masterVP->coreAnimatedBy;
    1.12 +   readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
    1.13 +   schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];
    1.14 +
    1.15 +   requestHandler   = masterEnv->requestHandler;
    1.16 +   slaveScheduler   = masterEnv->slaveScheduler;
    1.17 +   semanticEnv      = masterEnv->semanticEnv;
    1.18 +   
    1.19 +      //masterLoopStartPt:
    1.20     while(1){    //switch to core_loop and back to here is at end of loop
    1.21         
    1.22         //============================= MEASUREMENT STUFF =======================
    1.23 @@ -107,22 +118,12 @@
    1.24         #endif
    1.25         //=======================================================================
    1.26  
    1.27 -   masterEnv        = (MasterEnv*)_VMSMasterEnv;
    1.28 -   
    1.29 -      //GCC may optimize so doesn't always re-define from frame-storage
    1.30 -   masterVP         = (VirtProcr*)volatilemasterVP;  //just to make sure after jmp
    1.31 -   thisCoresIdx     = masterVP->coreAnimatedBy;
    1.32 -   readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
    1.33 -   schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];
    1.34 -
    1.35 -   requestHandler   = masterEnv->requestHandler;
    1.36 -   slaveScheduler   = masterEnv->slaveScheduler;
    1.37 -   semanticEnv      = masterEnv->semanticEnv;
    1.38 -
    1.39  
    1.40        //Poll each slot's Done flag
    1.41     numSlotsFilled = 0;
    1.42 +   /*
    1.43           Meas_startMasterLoop
    1.44 +    */
    1.45     for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++)
    1.46      {
    1.47        currSlot = schedSlots[ slotIdx ];
    1.48 @@ -133,9 +134,13 @@
    1.49           currSlot->needsProcrAssigned = TRUE;
    1.50  
    1.51              //process requests from slave to master
    1.52 +         /*
    1.53                 Meas_startReqHdlr
    1.54 +          */
    1.55           (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv );
    1.56 +         /*
    1.57                 Meas_endReqHdlr
    1.58 +          */
    1.59         }
    1.60        if( currSlot->needsProcrAssigned )
    1.61         {    //give slot a new virt procr
    1.62 @@ -152,7 +157,9 @@
    1.63            }
    1.64         }
    1.65      }
    1.66 +        /*
    1.67           Meas_endMasterLoop
    1.68 +         */
    1.69     
    1.70     #ifdef USE_WORK_STEALING
    1.71        //If no slots filled, means no more work, look for work to steal.
     2.1 --- a/VMS.c	Tue Dec 20 15:39:30 2011 +0100
     2.2 +++ b/VMS.c	Tue Dec 20 16:50:21 2011 +0100
     2.3 @@ -187,11 +187,13 @@
     2.4   { SchedSlot  **schedSlots;
     2.5     int i;
     2.6  
     2.7 -   schedSlots  = VMS__malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
     2.8 +   //schedSlots  = VMS__malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
     2.9 +   posix_memalign(&schedSlots, CACHELINE_SIZE, NUM_SCHED_SLOTS * sizeof(SchedSlot *));
    2.10  
    2.11     for( i = 0; i < NUM_SCHED_SLOTS; i++ )
    2.12      {
    2.13 -      schedSlots[i] = VMS__malloc( sizeof(SchedSlot) );
    2.14 +      //schedSlots[i] = VMS__malloc( sizeof(SchedSlot) );
    2.15 +       posix_memalign(&schedSlots[i], CACHELINE_SIZE, sizeof(SchedSlot) );
    2.16  
    2.17           //Set state to mean "handling requests done, slot needs filling"
    2.18        schedSlots[i]->workIsDone         = FALSE;
    2.19 @@ -590,8 +592,8 @@
    2.20        // itself
    2.21        //Note, should not stack-allocate initial data -- no guarantee, in
    2.22        // general that creating processor will outlive ones it creates.
    2.23 -   //VMS__free( animatingPr->startOfStack );
    2.24 -   //VMS__free( animatingPr );
    2.25 +   free( animatingPr->startOfStack );
    2.26 +   free( animatingPr );
    2.27   }
    2.28  
    2.29  
     3.1 --- a/vmalloc.c	Tue Dec 20 15:39:30 2011 +0100
     3.2 +++ b/vmalloc.c	Tue Dec 20 16:50:21 2011 +0100
     3.3 @@ -212,6 +212,8 @@
     3.4     #endif
     3.5     //========================================================================
     3.6     
     3.7 +   sizeRequested += CACHELINE_SIZE; //Allocate more than cacheline_size to avoid false sharing
     3.8 +   
     3.9     MallocArrays* freeLists = _VMSMasterEnv->freeLists;
    3.10     MallocProlog* foundChunk;
    3.11