Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
changeset 218:82f7defac851 Common_Ancestor
Added backoff to core loop, cleaned up core loop code, cleaned comments
| author | Some Random Person <seanhalle@yahoo.com> |
|---|---|
| date | Sat, 10 Mar 2012 21:48:53 -0800 |
| parents | ecbdb74cad97 |
| children | 8059fb8d5465 |
| files | CoreLoop.c Defines/VMS_defs__HW_constants.h |
| diffstat | 2 files changed, 82 insertions(+), 16 deletions(-) [+] |
line diff
1.1 --- a/CoreLoop.c Sat Mar 10 20:38:25 2012 -0800 1.2 +++ b/CoreLoop.c Sat Mar 10 21:48:53 2012 -0800 1.3 @@ -16,6 +16,12 @@ 1.4 1.5 //===================== Functions local to this file ======================= 1.6 void *terminateCoreController(SlaveVP *currSlv); 1.7 +inline void 1.8 +doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1, 1.9 + uint32 *seed2 ); 1.10 +inline void 1.11 +doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1, 1.12 + uint32 *seed2 ); 1.13 //=========================================================================== 1.14 1.15 1.16 @@ -75,7 +81,6 @@ 1.17 //Variables used during measurements 1.18 TSCountLowHigh endSusp; 1.19 //Variables used in random-backoff, for master-lock and waiting for work 1.20 - volatile double workspace1,workspace2; //busy-wait fake work 1.21 uint32_t seed1 = rand()%1000; // init random number generator for retries 1.22 uint32_t seed2 = rand()%1000; 1.23 //Variable for work-stealing -- a gate protects a critical section 1.24 @@ -137,6 +142,7 @@ 1.25 if( currSlotIdx >= NUM_SCHED_SLOTS ) goto switchToMaster; 1.26 currSlot = schedSlots[ currSlotIdx ]; 1.27 1.28 + 1.29 if( ! currSlot->needsSlaveAssigned ) //slot does have slave assigned 1.30 { numRepetitionsWithNoWork = 0; //reset B2B master count 1.31 currSlotIdx ++; 1.32 @@ -150,9 +156,20 @@ 1.33 1.34 MEAS__Capture_Pre_Master_Lock_Point; 1.35 1.36 - int tries = 0; int gotLock = 0; 1.37 + int numTriesToGetLock = 0; int gotLock = 0; 1.38 while( currVP == NULL ) //keep going until get master lock 1.39 { 1.40 + //At this point, first thing to do is get lock. But, want to 1.41 + // reduce lock contention from cores with no work, so first 1.42 + // check if this is a core with no work, and busy wait if so. 1.43 + //Then, if it's been way too long without work, yield pthread 1.44 + if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_BACKOFF) 1.45 + doBackoff_for_TooLongWithNoWork( numRepetitionsWithNoWork, &seed1, &seed2 ); 1.46 + if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD ) 1.47 + { numRepetitionsWithNoWork = 0; pthread_yield(); } 1.48 + 1.49 + 1.50 + //Now, try to get the lock 1.51 gotLock = __sync_bool_compare_and_swap( addrOfMasterLock, 1.52 UNLOCKED, LOCKED ); 1.53 if( gotLock ) 1.54 @@ -164,16 +181,16 @@ 1.55 // done, the masterVP will use assembly to switch the core 1.56 // back to animating this core controller 1.57 currVP = thisCoresMasterVP; 1.58 - if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD ) 1.59 - { DEBUG_Print( dbgB2BMaster,"Lots of reps w/o work\n"); 1.60 - pthread_yield(); 1.61 - } 1.62 numRepetitionsWithNoWork += 1; 1.63 break; //end while -- have a VP to animate now 1.64 } 1.65 + //Get here only when failed to get lock 1.66 1.67 - tries++; //if too many, means too much contention 1.68 - if( tries > MASTERLOCK_RETRIES_BEFORE_YIELD ) { tries = 0; pthread_yield(); } 1.69 + numTriesToGetLock++; //if too many, means too much contention 1.70 + if( numTriesToGetLock > NUM_TRIES_BEFORE_DO_BACKOFF ) 1.71 + doBackoff_for_TooLongToGetLock( numTriesToGetLock, &seed1, &seed2 ); 1.72 + if( numTriesToGetLock > MASTERLOCK_RETRIES_BEFORE_YIELD ) 1.73 + { numTriesToGetLock = 0; pthread_yield(); } 1.74 } 1.75 MEAS__Capture_Post_Master_Lock_Point; 1.76 } 1.77 @@ -198,6 +215,53 @@ 1.78 } 1.79 1.80 1.81 +/*Used by the backoff to pick a random amount of busy-wait. Can't use the 1.82 + * system rand because it takes much too long. 1.83 + *Note, are passing pointers to the seeds, which are then modified 1.84 + */ 1.85 +inline uint32_t 1.86 +randomNumber(uint32_t* seed1, uint32_t* seed2) 1.87 + { 1.88 + *seed1 = 36969 * (*seed1 & 65535) + (*seed1 >> 16); 1.89 + *seed2 = 18000 * (*seed2 & 65535) + (*seed2 >> 16); 1.90 + return (*seed1 << 16) + *seed2; 1.91 + } 1.92 + 1.93 +/*Busy-wait for a random number of cycles -- chooses number of cycles 1.94 + * differently than for the too-many-tries-to-get-lock backoff 1.95 + */ 1.96 +inline void 1.97 +doBackoff_for_TooLongWithNoWork( int32 numRepsWithNoWork, uint32 *seed1, 1.98 + uint32 *seed2 ) 1.99 + { int32 i, waitIterations; 1.100 + volatile double fakeWorkVar; //busy-wait fake work 1.101 + 1.102 + waitIterations = 1.103 + randomNumber(seed1, seed2) % 1.104 + (numRepsWithNoWork * numRepsWithNoWork * NUM_CORES); 1.105 + for( i = 0; i < waitIterations; i++ ) 1.106 + { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait 1.107 + } 1.108 + } 1.109 + 1.110 +/*Busy-waits for a random number of cycles -- chooses number of cycles 1.111 + * differently than for the no-work backoff 1.112 + */ 1.113 +inline void 1.114 +doBackoff_for_TooLongToGetLock( int32 numTriesToGetLock, uint32 *seed1, 1.115 + uint32 *seed2 ) 1.116 + { int32 i, waitIterations; 1.117 + volatile double fakeWorkVar; //busy-wait fake work 1.118 + 1.119 + waitIterations = 1.120 + randomNumber(seed1, seed2) % 1.121 + (numTriesToGetLock * NUM_TRIES_TO_GET_LOCK_BACKOFF_WEIGHT); 1.122 + //addToHist( wait_iterations, coreLoopThdParams->wait_iterations_hist ); 1.123 + for( i = 0; i < waitIterations; i++ ) 1.124 + { fakeWorkVar += (fakeWorkVar + 32.0) / 2.0; //busy-wait 1.125 + } 1.126 + } 1.127 + 1.128 1.129 #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE 1.130
2.1 --- a/Defines/VMS_defs__HW_constants.h Sat Mar 10 20:38:25 2012 -0800 2.2 +++ b/Defines/VMS_defs__HW_constants.h Sat Mar 10 21:48:53 2012 -0800 2.3 @@ -20,23 +20,25 @@ 2.4 // when work-stealing, can make bigger, at risk of losing cache affinity 2.5 #define NUM_SCHED_SLOTS 3 2.6 2.7 -#define MIN_WORK_UNIT_CYCLES 20000 2.8 - 2.9 -#define NUM_REPS_W_NO_WORK_BEFORE_YIELD 10 2.10 -#define MASTERLOCK_RETRIES_BEFORE_YIELD 100 2.11 - 2.12 + //These are for backoff inside core-loop, which reduces lock contention 2.13 +#define NUM_REPS_W_NO_WORK_BEFORE_YIELD 10 2.14 +#define NUM_REPS_W_NO_WORK_BEFORE_BACKOFF 2 2.15 +#define MASTERLOCK_RETRIES_BEFORE_YIELD 100 2.16 +#define NUM_TRIES_BEFORE_DO_BACKOFF 10 2.17 +#define NUM_TRIES_TO_GET_LOCK_BACKOFF_WEIGHT 100 2.18 + 2.19 // stack size in virtual processors created 2.20 #define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */ 2.21 2.22 - // memory for VMS_WL__malloc 2.23 + // memory for VMS_int__malloc 2.24 #define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x40000000 /* 1G */ 2.25 2.26 //Frequency of TS counts -- have to do tests to verify 2.27 //NOTE: turn off (in BIOS) TURBO-BOOST and SPEED-STEP else won't be const 2.28 #define TSCOUNT_FREQ 3180000000 2.29 2.30 -#define CACHE_LINE_SZ 256 2.31 -#define PAGE_SIZE 4096 2.32 +#define CACHE_LINE_SZ 256 2.33 +#define PAGE_SIZE 4096 2.34 2.35 //To prevent false-sharing, aligns a variable to a cache-line boundary. 2.36 //No need to use for local vars because those are never shared between cores
