# HG changeset patch # User Sean Halle # Date 1358208637 28800 # Node ID e6a68e7ea63fc1602fca10d6eb79f62d889246e7 # Parent e5bd470b562b4b7df73fb22b1b5e2ebc3d84c92f Removed the extra level of core controller -- now only one anim slot and master called after every work unit diff -r e5bd470b562b -r e6a68e7ea63f AnimationMaster.c --- a/AnimationMaster.c Mon Jan 14 15:31:23 2013 -0800 +++ b/AnimationMaster.c Mon Jan 14 16:10:37 2013 -0800 @@ -22,11 +22,11 @@ inline void PRHandle_CreateTask( PRReqst *req, SlaveVP *slave ); inline void PRHandle_EndTask( PRReqst *req, SlaveVP *slave ); inline void PRHandle_CreateSlave(PRReqst *req, SlaveVP *slave ); -void PRHandle_Dissipate( PRReqst *req, SlaveVP *slave ); +void PRHandle_EndSlave( PRReqst *req, SlaveVP *slave ); //inline void masterFunction_SingleLang( PRLangEnv *protoLangEnv, AnimSlot *slot ); -inline void masterFunction_MultiLang( AnimSlot *slot ); +inline void masterFunction( AnimSlot *slot ); inline PRProcess * pickAProcess( AnimSlot *slot ); inline SlaveVP * assignWork( PRProcess *process, AnimSlot *slot ); @@ -78,461 +78,29 @@ //Have three different modes, and the master behavior is different for // each, so jump to the loop that corresponds to the mode. // - switch(masterEnv->mode) - { -/* - { case SingleLang: - while(1) - { MEAS__Capture_Pre_Master_Point - for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++) - { - currSlot = animSlots[ slotIdx ]; + while(1) + { MEAS__Capture_Pre_Master_Point + for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++) + { + currSlot = animSlots[ slotIdx ]; - masterFunction_StandaloneSlavesOnly( masterEnv, currSlot ); - } - MEAS__Capture_Post_Master_Point; - masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master - flushRegisters(); - } - - case SingleLang: - { PRLangEnv *protoLangEnv = _PRTopEnv->protoLangEnv; - while(1) - { MEAS__Capture_Pre_Master_Point - for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++) - { - currSlot = animSlots[ slotIdx ]; - - masterFunction_SingleLang( protoLangEnv, currSlot ); - } - MEAS__Capture_Post_Master_Point; - masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master - flushRegisters(); - } + masterFunction( currSlot ); } - */ - case MultiLang: - { while(1) - { MEAS__Capture_Pre_Master_Point - for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++) - { - currSlot = animSlots[ slotIdx ]; - - masterFunction_MultiLang( currSlot ); - } - MEAS__Capture_Post_Master_Point; - masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master - flushRegisters(); - } - } + MEAS__Capture_Post_Master_Point; + masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master + flushRegisters(); } } - -//===================== The versions of the Animation Master ================= -// -//============================================================================== - -/* 1) This version is for a single language, that has only slaves, no tasks, - * such as Vthread or SSR. - *This version is for when an application has only a single language, and - * that language exposes slaves explicitly (as opposed to a task based - * language like pure dataflow). - * - * - *It scans the animation slots for just-completed slaves. - * Each completed slave has a request in it. So, the master hands each to - * the plugin's request handler (there is only one plugin, because only one - * lang). - *Each request represents a language construct that has been encountered - * by the application code in the slave. Passing the request to the - * request handler is how that language construct's behavior gets invoked. - * The request handler then performs the actions of the construct's - * behavior. So, the request handler encodes the behavior of the - * language's parallelism constructs, and performs that when the master - * hands it a slave containing a request to perform that construct. - * - *On a shared-memory machine, the behavior of parallelism constructs - * equals control, over order of execution of code. Hence, the behavior - * of the language constructs performed by the request handler is to - * choose the order that slaves get animated, and thereby control the - * order that application code in the slaves executes. - * - *To control order of animation of slaves, the request handler has a - * language environment that holds data structures used to hold slaves - * and choose when they're ready to be animated. - * - *Once a slave is marked as ready to be animated by the request handler, - * it is the second plugin function, the Assigner, which chooses the core - * the slave gets assigned to for animation. Hence, the Assigner doesn't - * perform any of the semantic behavior of language constructs, rather - * it gives the language a chance to improve performance. The performance - * of application code is strongly related to communication between - * cores. On shared-memory machines, communication is caused during - * execution of code, by memory accesses, and how much depends on contents - * of caches connected to the core executing the code. So, the placement - * of slaves determines the communication caused during execution of the - * slave's code. - *The point of the Assigner, then, is to use application information during - * execution of the program, to make choices about slave placement onto - * cores, with the aim to put slaves close to caches containing the data - * used by the slave's code. - * - *========================================================================== - *In summary, the animationMaster scans the slots, finds slaves - * just-finished, which hold requests, pass those to the request handler, - * along with the language environment, and the request handler then manages - * the structures in the language env, which controls the order of - * animation of slaves, and so embodies the behavior of the language - * constructs. - *The animationMaster then rescans the slots, offering each empty one to - * the Assigner, along with the language environment. The Assigner chooses - * among the ready slaves in the language env, finding the one best suited - * to be animated by that slot's associated core. - * - *========================================================================== - *Implementation Details: - * - *There is a separate masterVP for each core, but a single language - * environment shared by all cores. Each core also has its own scheduling - * slots, which are used to communicate slaves between animationMaster and - * coreController. There is only one global variable, _PRTopEnv, which - * holds the language env and other things shared by the different - * masterVPs. The request handler and Assigner are registered with - * the animationMaster by the language's init function, and a pointer to - * each is in the _PRTopEnv. (There are also some pthread related global - * vars, but they're only used during init of PR). - *PR gains control over the cores by essentially "turning off" the OS's - * scheduler, using pthread pin-to-core commands. - * - *The masterVPs are created during init, with this animationMaster as their - * top level function. The masterVPs use the same SlaveVP data structure, - * even though they're not slave VPs. - *A "seed slave" is also created during init -- this is equivalent to the - * "main" function in C, and acts as the entry-point to the PR-language- - * based application. - *The masterVPs share a single system-wide master-lock, so only one - * masterVP may be animated at a time. - *The core controllers access _PRTopEnv to get the masterVP, and when - * they start, the slots are all empty, so they run their associated core's - * masterVP. The first of those to get the master lock sees the seed slave - * in the shared language environment, so when it runs the Assigner, that - * returns the seed slave, which the animationMaster puts into a scheduling - * slot then switches to the core controller. That then switches the core - * over to the seed slave, which then proceeds to execute language - * constructs to create more slaves, and so on. Each of those constructs - * causes the seed slave to suspend, switching over to the core controller, - * which eventually switches to the masterVP, which executes the - * request handler, which uses PR primitives to carry out the creation of - * new slave VPs, which are marked as ready for the Assigner, and so on.. - * - *On animation slots, and system behavior: - * A request may linger in an animation slot for a long time while - * the slaves in the other slots are animated. This only becomes a problem - * when such a request is a choke-point in the constraints, and is needed - * to free work for *other* cores. To reduce this occurrence, the number - * of animation slots should be kept low. In balance, having multiple - * animation slots amortizes the overhead of switching to the masterVP and - * executing the animationMaster code, which drives for more than one. In - * practice, the best balance should be discovered by profiling. - */ -/* -void masterFunction_StandaloneSlavesOnly( AnimSlot *slot ) - { - SlaveVP *slave; - PRReqst *req; - PRLangEnv *langEnv = _PRTopEnv->langEnv; - - - //======================== animationMaster ======================== - - //Check if newly-done slave in slot, which will need request handled - if( slot->workIsDone ) - { slot->workIsDone = FALSE; - slot->needsWorkAssigned = TRUE; - - - HOLISTIC__Record_AppResponder_start; - MEAS__startReqHdlr; - //process the request made by the slave (held inside slave struc) - slave = slot->slaveAssignedToSlot; - req = slave->request; - - //Handle task create and end first -- they're special cases.. - switch( req->reqType ) - { case SlvCreate: PRHandle_CreateSlave( slave ); break; - case SlvDissipate: PRHandle_Dissipate( slave ); break; - case Service: PR_int__handle_PRServiceReq( slave ); break; //resume into PR's own language env - case Hardware: //for future expansion - case IO: //for future expansion - case OSCall: //for future expansion - PR_int__throw_exception("Not implemented"); break; - case Language: //normal lang request - { - (*langEnv->requestHdlr)( req->langReq, slave, langEnv ); - } - } - HOLISTIC__Record_AppResponder_end; - MEAS__endReqHdlr; - } - //If slot empty, hand to Assigner to fill with a slave - if( slot->needsWorkAssigned ) - { //Call plugin's Assigner to give slot a new slave - HOLISTIC__Record_Assigner_start; - - if( langEnv->hasWork ) - { (*langEnv->slaveAssigner)( langEnv, slot ); //calls PR fn that inserts work into slot - goto ReturnAfterAssigningWork; //quit for-loop, cause found work - } - else - goto NoWork; - } - - NoWork: - //No work, if reach here.. - { - #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC - coreNum = slot->coreSlotIsOn; - returnSlv = process->idleSlv[coreNum][slotNum]; - - //things that would normally happen in resume(), but idle VPs - // never go there - returnSlv->numTimesAssignedToASlot++; //gives each idle unit a unique ID - Unit newU; - newU.vp = returnSlv->slaveNum; - newU.task = returnSlv->numTimesAssignedToASlot; - addToListOfArrays(Unit,newU,process->unitList); - - if (returnSlv->numTimesAssignedToASlot > 1) //make a dependency from prev idle unit - { Dependency newD; // to this one - newD.from_vp = returnSlv->slaveNum; - newD.from_task = returnSlv->numTimesAssignedToASlot - 1; - newD.to_vp = returnSlv->slaveNum; - newD.to_task = returnSlv->numTimesAssignedToASlot; - addToListOfArrays(Dependency, newD ,process->ctlDependenciesList); - } - #endif - HOLISTIC__Record_Assigner_end; - return; - } - - ReturnAfterAssigningWork: //All paths goto here.. to provide single point for holistic.. - { - HOLISTIC__Record_Assigner_end; - return; - } - } -*/ - - -/*This is the master when just multi-lang, but not multi-process mode is on. - * This version has to handle both tasks and slaves, and do extra work of - * looking up the language env and handlers to use, for each completed bit of - * work. - *It also has to search through the language envs to find one with work, - * then ask that env's assigner to return a unit of that work. - * - *The language is written to startup in the same way as if it were the only - * language in the app, and it operates in the same way, - * the only difference between single language and multi-lang is here, in the - * master. - *This invisibility to mode is why the language has to use registration calls - * for everything during startup -- those calls do different things depending - * on whether it's single-language or multi-language mode. - * - *In this version of the master, work can either be a task or a resumed slave - *Having two cases makes this logic complex.. can be finishing either, and - * then the next available work may be either.. so really have two distinct - * loops that are inter-twined.. - * - *Some special cases: - * A task-end is a special case for a few reasons (below). - * A task-end can't block a slave (can't cause it to "logically suspend") - * A task available for work can only be assigned to a special slave, which - * has been set aside for doing tasks, one such task-slave is always - * assigned to each slot. So, when a task ends, a new task is assigned to - * that slot's task-slave right away. - * But if no tasks are available, then have to switch over to looking at - * slaves to find one ready to resume, to find work for the slot. - * If a task just suspends, not ends, then its task-slave is no longer - * available to take new tasks, so a new task-slave has to be assigned to - * that slot. Then the slave of the suspended task is turned into a free - * task-slave and request handling is done on it as if it were a slave - * that suspended. - * After request handling, do the same sequence of looking for a task to be - * work, and if none, look for a slave ready to resume, as work for the slot. - * If a slave suspends, handle its request, then look for work.. first for a - * task to assign, and if none, slaves ready to resume. - * Another special case is when task-end is done on a free task-slave.. in - * that case, the slave has no more work and no way to get more.. so place - * it into a recycle queue. - * If no work is found of either type, then do a special thing to prune down - * the extra slaves in the recycle queue, just so don't get too many.. - * - *The multi-lang thing complicates matters.. - * - *For request handling, it means have to first fetch the language environment - * of the language, and then do the request handler pointed to by that - * language env. - *For assigning, things get more complex because of competing goals.. One - * goal is for language specific stuff to be used during assignment, so - * assigner can make higher quality decisions.. but with multiple languages, - * which only get mixed in the application, the assigners can't be written - * with knowledge of each other. So, they can only make localized decisions, - * and so different language's assigners may interfere with each other.. - * - *So, have some possibilities available: - *1) can have a fixed scheduler in the proto-runtime, that all the - * languages give their work to.. (but then lose language-specific info, - * there is a standard PR format for assignment info, and the langauge - * attaches this to the work-unit when it gives it to PR.. also have issue - * with HWSim, which uses a priority Q instead of FIFO, and requests can - * "undo" previous work put in, so request handlers need way to manipulate - * the work-holding Q..) (this might be fudgeable with - * HWSim, if the master did a lang-supplied callback each time it assigns a - * unit to a slot.. then HWSim can keep exactly one unit of work in PR's - * queue at a time.. but this is quite hack-like.. or perhaps HWSim supplies - * a task-end handler that kicks the next unit of work from HWSim internal - * priority queue, over to PR readyQ) - *2) can have each language have its own language env, that holds its own - * work, which is assigned by its own assigner.. then the master searches - * through all the language envs to find one with work and asks it give work.. - * (this has downside of blinding assigners to each other.. but does work - * for HWSim case) - *3) could make PR have a different readyQ for each core, and ask the lang - * to put work to the core it prefers.. but the work may be moved by PR if - * needed, say if one core idles for too long. This is a hybrid approach, - * letting the language decide which core, but PR keeps the work and does it - * FIFO style.. (this might als be fudgeable with HWSim, in similar fashion, - * but it would be complicated by having to track cores separately) - * - *Choosing 2, to keep compatibility with single-lang mode.. it allows the same - * assigner to be used for single-lang as for multi-lang.. the overhead of - * the extra master search for work is part of the price of the flexibility, - * but should be fairly small.. takes the first env that has work available, - * and whatever it returns is assigned to the slot.. - * - *As a hybrid, giving an option for a unified override assigner to be registered - * and used.. This allows something like a static analysis to detect - * which languages are grouped together, and then analyze the pattern of - * construct calls, and generate a custom assigner that uses info from all - * the languages in a unified way.. Don't really expect this to happen, - * but making it possible. - */ -/* -inline -void -masterFunction_SingleLang( PRLangEnv *protoLangEnv, AnimSlot *slot ) - { //Scan the animation slots - SlaveVP *slave; - PRReqst *req; - - //Check if newly-done slave in slot, which will need request handled - if( slot->workIsDone ) - { slot->workIsDone = FALSE; - slot->needsWorkAssigned = TRUE; - - HOLISTIC__Record_AppResponder_start; //TODO: update to check which process for each slot - MEAS__startReqHdlr; - - - //process the request made by the slave (held inside slave struc) - slave = slot->slaveAssignedToSlot; - req = slave->request; - - //If the requesting slave is a slot slave, and request is not - // task-end, then turn it into a free task slave. - if( slave->typeOfVP == SlotTaskSlv && req->reqType != TaskEnd ) - PR_int__replace_with_new_slot_slv( slave ); - - //Handle task create and end first -- they're special cases.. - switch( req->reqType ) - { case TaskEnd: - { //do PR handler, which calls lang's hdlr and does recycle of - // free task slave if needed -- PR handler checks for free task Slv - PRHandle_EndTask_SL( slave ); break; - } - case TaskCreate: - { //Do PR's create-task handler, which calls the lang's hdlr - // PR handler checks for free task Slv - PRHandle_CreateTask_SL( slave ); break; - } - case SlvCreate: PRHandle_CreateSlave_SL( slave ); break; - case SlvDissipate: PRHandle_Dissipate_SL( slave ); break; - case Service: PR_int__handle_PRServiceReq_SL( slave ); break; //resume into PR's own language env - case Hardware: //for future expansion - case IO: //for future expansion - case OSCall: //for future expansion - PR_int__throw_exception("Not implemented", slave, NULL); break; - case Language: //normal lang request - { - (*protoLangEnv->requestHdlr)( req->langReq, slave, (void*)PR_int__give_lang_env(protoLangEnv )); - } - } - - MEAS__endReqHdlr; - HOLISTIC__Record_AppResponder_end; - } //if have request to be handled - - //If slot empty, hand to Assigner to fill with a slave - if( slot->needsWorkAssigned ) - { //Call plugin's Assigner to give slot a new slave - HOLISTIC__Record_Assigner_start; - - if( protoLangEnv->hasWork ) - { (*protoLangEnv->slaveAssigner)( protoLangEnv, slot ); //calls PR fn that inserts work into slot - goto ReturnAfterAssigningWork; //quit for-loop, cause found work - } - else - goto NoWork; - } - - NoWork: - //No work, if reach here.. - { - #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC - coreNum = slot->coreSlotIsOn; - returnSlv = process->idleSlv[coreNum][slotNum]; - - //things that would normally happen in resume(), but idle VPs - // never go there - returnSlv->numTimesAssignedToASlot++; //gives each idle unit a unique ID - Unit newU; - newU.vp = returnSlv->slaveNum; - newU.task = returnSlv->numTimesAssignedToASlot; - addToListOfArrays(Unit,newU,process->unitList); - - if (returnSlv->numTimesAssignedToASlot > 1) //make a dependency from prev idle unit - { Dependency newD; // to this one - newD.from_vp = returnSlv->slaveNum; - newD.from_task = returnSlv->numTimesAssignedToASlot - 1; - newD.to_vp = returnSlv->slaveNum; - newD.to_task = returnSlv->numTimesAssignedToASlot; - addToListOfArrays(Dependency, newD ,process->ctlDependenciesList); - } - #endif - HOLISTIC__Record_Assigner_end; - return; - } - - ReturnAfterAssigningWork: //All paths goto here.. to provide single point for holistic.. - { - HOLISTIC__Record_Assigner_end; - return; - } - } -*/ - inline void -masterFunction_MultiLang( AnimSlot *slot ) +masterFunction( AnimSlot *slot ) { //Scan the animation slots int32 magicNumber; SlaveVP *slave; PRLangEnv *langEnv; PRReqst *req; - RequestHandler requestHandler; PRProcess *process; //Check if newly-done slave in slot, which will need request handled @@ -566,8 +134,8 @@ PRHandle_CreateTask( req, slave ); break; } case SlvCreate: PRHandle_CreateSlave( req, slave ); break; - case SlvDissipate: PRHandle_Dissipate( req, slave ); break; - case Service: PR_int__handle_PRServiceReq( slave ); break; //resume into PR's own language env + case SlvDissipate: PRHandle_EndSlave( req, slave ); break; + case Service: PR_int__handle_PRServiceReq( slave ); break; //resumes into Service lang env case Hardware: //for future expansion case IO: //for future expansion case OSCall: //for future expansion @@ -704,13 +272,13 @@ } #endif HOLISTIC__Record_Assigner_end; - return; + return FALSE; } ReturnAfterAssigningWork: //All paths goto here.. to provide single point for holistic.. { HOLISTIC__Record_Assigner_end; - return; + return TRUE; } } @@ -774,7 +342,7 @@ */ inline void -PRHandle_Dissipate( PRReqst *req, SlaveVP *slave ) +PRHandle_EndSlave( PRReqst *req, SlaveVP *slave ) { PRProcess *process; PRLangEnv *protoLangEnv; diff -r e5bd470b562b -r e6a68e7ea63f CoreController.c --- a/CoreController.c Mon Jan 14 15:31:23 2013 -0800 +++ b/CoreController.c Mon Jan 14 16:10:37 2013 -0800 @@ -71,8 +71,9 @@ { int32 thisCoresIdx; int32 numRepetitionsWithNoWork; + bool32 foundWork; SlaveVP *currVP; - AnimSlot *currSlot, **animSlots; + AnimSlot *animSlot; int32 currSlotIdx; volatile int32 *addrOfMasterLock; //thing pointed to is volatile, not ptr SlaveVP *thisCoresMasterVP; @@ -94,8 +95,7 @@ //Assembly that saves addr of label of return instr -- label in assmbly recordCoreCtlrReturnLabelAddr((void**)&(_PRTopEnv->coreCtlrReturnPt)); - animSlots = _PRTopEnv->allAnimSlots[ thisCoresIdx ]; - currSlotIdx = 0; //start at slot 0, go up until one empty, then do master + animSlot = _PRTopEnv->allAnimSlots[ thisCoresIdx ]; numRepetitionsWithNoWork = 0; addrOfMasterLock = &(_PRTopEnv->masterLock); thisCoresMasterVP = _PRTopEnv->masterVPs[ thisCoresIdx ]; @@ -136,77 +136,67 @@ //Alternatively, the VP suspend primitive could just not bother // returning from switchToSlv, and instead jmp directly to here. - if( currSlotIdx >= NUM_ANIM_SLOTS ) goto SwitchToMaster; - currSlot = animSlots[ currSlotIdx ]; + if(animSlot->slaveAssignedToSlot->typeOfVP == Idle) + { //The Holistic stuff turns on idle slaves.. but can also be in mode + // where have no idle slaves.. so, this IF statement can only be true + // executed when HOLISTIC is turned on.. + numRepetitionsWithNoWork ++; + HOLISTIC__Record_last_work; + } + - if( ! currSlot->needsWorkAssigned ) //slot does have slave assigned - { if(currSlot->slaveAssignedToSlot->typeOfVP == Idle) - { numRepetitionsWithNoWork ++; - } - else - { numRepetitionsWithNoWork = 0; //reset back2back master count + + HOLISTIC__Record_AppResponderInvocation_start; + MEAS__Capture_Pre_Master_Lock_Point; + + int numTriesToGetLock = 0; int gotLock = 0; + while( currVP == NULL ) //keep going until get master lock + { + //At this point, first thing to do is get lock. But, want to + // reduce lock contention from cores with no work, so first + // check if this is a core with no work, and busy wait if so. + //Then, if it's been way too long without work, yield pthread + if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_BACKOFF) + doBackoff_for_TooLongWithNoWork( numRepetitionsWithNoWork, &seed1, &seed2 ); + if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD ) + { numRepetitionsWithNoWork = 0; pthread_yield(); } + + + //Now, try to get the lock + gotLock = __sync_bool_compare_and_swap( addrOfMasterLock, + UNLOCKED, LOCKED ); + if( gotLock ) + { //At this point, have successfully gotten master lock. + //So, break out of get-lock loop. + break; //end while -- have a VP to animate now } - currSlotIdx ++; - currVP = currSlot->slaveAssignedToSlot; - HOLISTIC__Record_last_work; - } - else //slot is empty, so switch to master - { - SwitchToMaster: - currSlotIdx = 0; //doing switch to master, so start over at slot 0 - currVP = NULL; + //Get here only when failed to get lock - MEAS__Capture_Pre_Master_Lock_Point; - HOLISTIC__Record_AppResponderInvocation_start; + numTriesToGetLock++; //if too many, means too much contention + if( numTriesToGetLock > NUM_TRIES_BEFORE_DO_BACKOFF ) + doBackoff_for_TooLongToGetLock( numTriesToGetLock, &seed1, &seed2 ); + if( numTriesToGetLock > MASTERLOCK_RETRIES_BEFORE_YIELD ) + { numTriesToGetLock = 0; pthread_yield(); } + } //while( currVP == NULL ) + MEAS__Capture_Post_Master_Lock_Point; - int numTriesToGetLock = 0; int gotLock = 0; - while( currVP == NULL ) //keep going until get master lock - { - //At this point, first thing to do is get lock. But, want to - // reduce lock contention from cores with no work, so first - // check if this is a core with no work, and busy wait if so. - //Then, if it's been way too long without work, yield pthread - if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_BACKOFF) - doBackoff_for_TooLongWithNoWork( numRepetitionsWithNoWork, &seed1, &seed2 ); - if( numRepetitionsWithNoWork > NUM_REPS_W_NO_WORK_BEFORE_YIELD ) - { numRepetitionsWithNoWork = 0; pthread_yield(); } + //have master lock, perform master function, which manages request + // handling and assigning work to this core's slot + foundWork = + masterFunction( animSlot ); + if( foundWork ) + numRepetitionsWithNoWork = 0; + else + numRepetitionsWithNoWork += 1; - - //Now, try to get the lock - gotLock = __sync_bool_compare_and_swap( addrOfMasterLock, - UNLOCKED, LOCKED ); - if( gotLock ) - { //At this point, have run out of slaves, so tried to get - // the master lock, and have successfully gotten it. - //So, set the currVP to this core's masterVP and break out - // of the get-lock loop. Below, assembly code will switch - // the core over to animating the masterVP. When it's - // done, the masterVP will use assembly to switch the core - // back to animating this core controller - currVP = thisCoresMasterVP; - numRepetitionsWithNoWork += 1; - break; //end while -- have a VP to animate now - } - //Get here only when failed to get lock - - numTriesToGetLock++; //if too many, means too much contention - if( numTriesToGetLock > NUM_TRIES_BEFORE_DO_BACKOFF ) - doBackoff_for_TooLongToGetLock( numTriesToGetLock, &seed1, &seed2 ); - if( numTriesToGetLock > MASTERLOCK_RETRIES_BEFORE_YIELD ) - { numTriesToGetLock = 0; pthread_yield(); } - } //while( currVP == NULL ) - MEAS__Capture_Post_Master_Lock_Point; - } //else - - HOLISTIC__Record_Work_start; + HOLISTIC__Record_Work_start; switchToSlv(currVP); //Slave suspend makes core "return" from this call flushRegisters(); //prevent GCC optimization from doing bad things - HOLISTIC__Record_Work_end; - MEAS__Capture_End_Susp_in_CoreCtlr_ForSys; - + HOLISTIC__Record_Work_end; + }//while(1) }