changeset 29:e4de34fd220b

merged useless branch test_without_inline
author Merten Sach <msach@mailbox.tu-berlin.de>
date Thu, 22 Sep 2011 12:15:04 +0200
parents b549ad140f18 2c146b6b3890
children 531eb1f6d024
files
diffstat 7 files changed, 534 insertions(+), 271 deletions(-) [+]
line diff
     1.1 --- a/DESIGN_NOTES.txt	Thu Nov 11 04:59:48 2010 -0800
     1.2 +++ b/DESIGN_NOTES.txt	Thu Sep 22 12:15:04 2011 +0200
     1.3 @@ -1,212 +1,212 @@
     1.4 -
     1.5 -From e-mail to Albert, on design of app-virt-procr to core-loop animation
     1.6 -switch and back.
     1.7 -
     1.8 -====================
     1.9 -General warnings about this code:
    1.10 -It only compiles in GCC 4.x  (label addr and computed goto)
    1.11 -Has assembly for x86  32bit
    1.12 -
    1.13 -
    1.14 -====================
    1.15 -AVProcr data-struc has: stack-ptr, jump-ptr, data-ptr, slotNum, coreloop-ptr
    1.16 - and semantic-custom-ptr
    1.17 -
    1.18 -The VMS Creator: takes ptr to function and ptr to initial data
    1.19 --- creates a new AVProcr struc
    1.20 --- sets the jmp-ptr field to the ptr-to-function passed in
    1.21 --- sets the data-ptr to ptr to initial data passed in
    1.22 --- if this is for a suspendable virt  processor, then create a stack and set
    1.23 -   the stack-ptr
    1.24 -
    1.25 -VMS__create_procr( AVProcrFnPtr fnPtr, void *initialData )
    1.26 -{
    1.27 -AVProcr   newPr = malloc( sizeof(AVProcr) );
    1.28 -newPr->jmpPtr = fnPtr;
    1.29 -newPr->coreLoopDonePt = &CoreLoopDonePt; //label is in coreLoop
    1.30 -newPr->data = initialData;
    1.31 -newPr->stackPtr = createNewStack();
    1.32 -return newPr;
    1.33 -}
    1.34 -
    1.35 -The semantic layer can then add its own state in the cusom-ptr field
    1.36 -
    1.37 -The Scheduler plug-in:
    1.38 --- Sets slave-ptr in AVProcr, and points the slave to AVProcr
    1.39 --- if non-suspendable, sets the AVProcr's stack-ptr to the slave's stack-ptr
    1.40 -
    1.41 -MasterLoop:
    1.42 --- puts AVProcr structures onto the workQ
    1.43 -
    1.44 -CoreLoop:
    1.45 --- gets stack-ptr out of AVProcr and sets the core's stack-ptr to that
    1.46 --- gets data-ptr out of AVProcr and puts it into reg GCC uses for that param
    1.47 --- puts AVProcr's addr into reg GCC uses for the AVProcr-pointer param
    1.48 --- jumps to the addr in AVProcr's jmp-ptr field
    1.49 -CoreLoop()
    1.50 -{ while( FOREVER )
    1.51 - { nextPr = readQ( workQ );  //workQ is static (global) var declared volatile
    1.52 -   <dataPtr-param-register>       = nextPr->data;
    1.53 -   <AVProcrPtr-param-register> = nextPr;
    1.54 -   <stack-pointer register>          = nextPr->stackPtr;
    1.55 -   jmp nextPr->jmpPtr;
    1.56 -CoreLoopDonePt:   //label's addr put into AVProcr when create new one
    1.57 - }
    1.58 -}
    1.59 -(Note, for suspendable processors coming back from suspension, there is no
    1.60 - need to fill the parameter registers -- they will be discarded)
    1.61 -
    1.62 -Suspend an application-level virtual processor:
    1.63 -VMS__AVPSuspend( AVProcr *pr )
    1.64 -{
    1.65 -pr->jmpPtr = &ResumePt;  //label defined a few lines below
    1.66 -pr->slave->doneFlag = TRUE;
    1.67 -pr->stackPtr = <current SP reg value>;
    1.68 -jmp pr->coreLoopDonePt;
    1.69 -ResumePt: return;
    1.70 -}
    1.71 -
    1.72 -This works because the core loop will have switched back to this stack
    1.73 - before jumping to ResumePt..    also, the core loop never modifies the
    1.74 - stack pointer, it simply switches to whatever stack pointer is in the
    1.75 - next AVProcr it gets off the workQ.
    1.76 -
    1.77 -
    1.78 -
    1.79 -=============================================================================
    1.80 -As it is now, there's only one major unknown about GCC (first thing below
    1.81 -  the line),  and there are a few restrictions, the most intrusive being
    1.82 -  that the functions the application gives to the semantic layer have a
    1.83 -  pre-defined prototype -- return nothing, take a pointer to initial data
    1.84 -  and a pointer to an AVProcr struc, which they're not allowed to modify
    1.85 -  -- only pass it to semantic-lib calls.
    1.86 -
    1.87 -So, here are the assumptions, restrictions, and so forth:
    1.88 -===========================
    1.89 -Major assumption:  that GCC will do the following the same way every time:
    1.90 -  say the application defines a function that fits this typedef:
    1.91 -typedef void (*AVProcrFnPtr)  ( void *, AVProcr * );
    1.92 -
    1.93 -and let's say somewhere in the code they do this:
    1.94 -AVProcrFnPtr   fnPtr = &someFunc;
    1.95 -
    1.96 -then they do this:
    1.97 -(*fnPtr)( dataPtr, animatingVirtProcrPtr );
    1.98 -
    1.99 -Can the registers that GCC uses to pass the two pointers be predicted?
   1.100 - Will they always be the same registers, in every program that has the
   1.101 - same typedef?
   1.102 -If that typedef fixes, guaranteed, the registers (on x86) that GCC will use
   1.103 - to send the two pointers, then the rest of this solution works.
   1.104 -
   1.105 -Change in model: Instead of a virtual processor whose execution trace is
   1.106 - divided into work-units, replacing that with the pattern that a virtual
   1.107 - processor is suspended.  Which means, no more "work unit" data structure
   1.108 - -- instead, it's now an "Application Virtual Processor" structure
   1.109 - -- AVProcr -- which is given directly to the application function!
   1.110 -
   1.111 -   -- You were right, don't need slaves to be virtual processors, only need
   1.112 -      "scheduling buckets" -- just a way to keep track of things..
   1.113 -
   1.114 -Restrictions:
   1.115 --- the  "virtual entities"  created by the semantic layer must be virtual
   1.116 -   processors, created with a function-to-execute and initial data -- the
   1.117 -   function is restricted to return nothing and only take a pointer to the
   1.118 -   initial data plus a pointer to an AVProcr structure, which represents
   1.119 -   "self", the virtual processor created.  (This is the interface I showed
   1.120 -   you for "Hello World" semantic layer).
   1.121 -What this means for synchronous dataflow, is that the nodes in the graph
   1.122 -  are virtual processors that in turn spawn a new virtual processor for
   1.123 -  every "firing" of the node.  This should be fine because the function
   1.124 -  that the node itself is created with is a "canned" function that is part
   1.125 -  of the semantic layer -- the function that is spawned is the user-provided
   1.126 -  function.  The restriction only means that the values from the inputs to
   1.127 -  the node are packaged as the "initial data" given to the spawned virtual
   1.128 -  processor -- so the user-function has to cast a void * to the
   1.129 -  semantic-layer-defined structure by which it gets the inputs to the node.
   1.130 -
   1.131 --- Second restriction is that the semantic layer has to use VMS supplied
   1.132 -   stuff -- for example, the data structure that represents the
   1.133 -   application-level virtual processor is defined in VMS, and the semantic
   1.134 -   layer has to call a VMS function in order to suspend a virtual processor.
   1.135 -
   1.136 --- Third restriction is that the application code never do anything with
   1.137 -   the AVProcr structure except pass it to semantic-layer lib calls.
   1.138 -
   1.139 --- Fourth restriction is that every virtual processor must call a
   1.140 -   "dissipate" function as its last act -- the user-supplied
   1.141 -   virtual-processor function can't just end -- it has to call
   1.142 -   SemLib__dissipate( AVProcr ) before the closing brace.. and after the
   1.143 -   semantic layer is done cleaning up its own data, it has to in turn call
   1.144 -   VMS__disspate( AVProcr ).
   1.145 -
   1.146 --- For performance reasons, I think I want to have two different kinds of
   1.147 -   app-virtual processor -- suspendable ones and non-suspendable -- where
   1.148 -   non-suspendable are not allowed to perform any communication with other
   1.149 -   virtual processors, except at birth and death.  Suspendable ones, of
   1.150 -   course can perform communications, create other processors, and so forth
   1.151 -   -- all of which cause it to suspend.
   1.152 -The performance difference is that I need a separate stack for each
   1.153 -  suspendable, but non-suspendable can re-use a fixed number of stacks
   1.154 -  (one for each slave).
   1.155 -
   1.156 -
   1.157 -==================== May 29
   1.158 -
   1.159 -Qs:
   1.160 ---1 how to safely jump between virt processor's trace and coreloop
   1.161 ---2 how to set up __cdecl style stack + frame for just-born virtual processor
   1.162 ---3 how to switch stack-pointers + frame-pointers
   1.163 -
   1.164 -
   1.165 ---1:
   1.166 -Not sure if GCC's computed goto is safe, because modify the stack pointer
   1.167 -without GCC's knowledge -- although, don't use the stack in the coreloop
   1.168 -segment, so, actually, that should be safe!
   1.169 -
   1.170 -So, GCC has its own special C extensions, one of which gets address of label:
   1.171 -
   1.172 -void *labelAddr;
   1.173 -labelAddr = &&label;
   1.174 -goto *labelAddr;
   1.175 -
   1.176 ---2
   1.177 -In CoreLoop, will check whether VirtProc just born, or was suspended.
   1.178 -If just born, do bit of code that sets up the virtual processor's stack
   1.179 -and frame according to the __cdecl convention for the standard virt proc
   1.180 -fn typedef -- save the pointer to data and pointer to virt proc struc into
   1.181 -correct places in the frame
   1.182 -   __cdecl says, according to:
   1.183 -http://unixwiz.net/techtips/win32-callconv-asm.html
   1.184 -To do this:
   1.185 -push the parameters onto the stack, right most first, working backwards to
   1.186 - the left.
   1.187 -Then perform call instr, which pushes return addr onto stack.
   1.188 -Then callee first pushes the frame pointer, %EBP followed by placing the
   1.189 -then-current value of stack pointer into %EBP
   1.190 -push ebp
   1.191 -mov  ebp, esp    // ebp « esp
   1.192 -
   1.193 -Once %ebp has been changed, it can now refer directly to the function's
   1.194 - arguments as 8(%ebp), 12(%ebp). Note that 0(%ebp) is the old base pointer
   1.195 - and 4(%ebp) is the old instruction pointer.
   1.196 -
   1.197 -Then callee pushes regs it will use then adds to stack pointer the size of
   1.198 - its local vars.
   1.199 -
   1.200 -Stack in callee looks like this:
   1.201 -16(%ebp)	 - third function parameter
   1.202 -12(%ebp)	 - second function parameter
   1.203 -8(%ebp)	 - first function parameter
   1.204 -4(%ebp)	 - old %EIP (the function's "return address")
   1.205 -----------^^ State seen at first instr of callee ^^-----------
   1.206 -0(%ebp)	- old %EBP (previous function's base pointer)
   1.207 --4(%ebp)	 - save of EAX, the only reg used in function
   1.208 --8(%ebp)	 - first local variable
   1.209 --12(%ebp)	 - second local variable
   1.210 --16(%ebp)	 - third local variable
   1.211 -
   1.212 -
   1.213 ---3
   1.214 -It might be just as simple as two mov instrs, one for %ESP, one for %EBP..
   1.215 - the stack and frame pointer regs
   1.216 +
   1.217 
   1.218 +From e-mail to Albert, on design of app-virt-procr to core-loop animation
   1.219 
   1.220 +switch and back.
   1.221 
   1.222 +
   1.223 
   1.224 +====================
   1.225 
   1.226 +General warnings about this code:
   1.227 
   1.228 +It only compiles in GCC 4.x  (label addr and computed goto)
   1.229 
   1.230 +Has assembly for x86  32bit
   1.231 
   1.232 +
   1.233 
   1.234 +
   1.235 
   1.236 +====================
   1.237 
   1.238 +AVProcr data-struc has: stack-ptr, jump-ptr, data-ptr, slotNum, coreloop-ptr
   1.239 
   1.240 + and semantic-custom-ptr
   1.241 
   1.242 +
   1.243 
   1.244 +The VMS Creator: takes ptr to function and ptr to initial data
   1.245 
   1.246 +-- creates a new AVProcr struc
   1.247 
   1.248 +-- sets the jmp-ptr field to the ptr-to-function passed in
   1.249 
   1.250 +-- sets the data-ptr to ptr to initial data passed in
   1.251 
   1.252 +-- if this is for a suspendable virt  processor, then create a stack and set
   1.253 
   1.254 +   the stack-ptr
   1.255 
   1.256 +
   1.257 
   1.258 +VMS__create_procr( AVProcrFnPtr fnPtr, void *initialData )
   1.259 
   1.260 +{
   1.261 
   1.262 +AVProcr   newPr = malloc( sizeof(AVProcr) );
   1.263 
   1.264 +newPr->jmpPtr = fnPtr;
   1.265 
   1.266 +newPr->coreLoopDonePt = &CoreLoopDonePt; //label is in coreLoop
   1.267 
   1.268 +newPr->data = initialData;
   1.269 
   1.270 +newPr->stackPtr = createNewStack();
   1.271 
   1.272 +return newPr;
   1.273 
   1.274 +}
   1.275 
   1.276 +
   1.277 
   1.278 +The semantic layer can then add its own state in the cusom-ptr field
   1.279 
   1.280 +
   1.281 
   1.282 +The Scheduler plug-in:
   1.283 
   1.284 +-- Sets slave-ptr in AVProcr, and points the slave to AVProcr
   1.285 
   1.286 +-- if non-suspendable, sets the AVProcr's stack-ptr to the slave's stack-ptr
   1.287 
   1.288 +
   1.289 
   1.290 +MasterLoop:
   1.291 
   1.292 +-- puts AVProcr structures onto the workQ
   1.293 
   1.294 +
   1.295 
   1.296 +CoreLoop:
   1.297 
   1.298 +-- gets stack-ptr out of AVProcr and sets the core's stack-ptr to that
   1.299 
   1.300 +-- gets data-ptr out of AVProcr and puts it into reg GCC uses for that param
   1.301 
   1.302 +-- puts AVProcr's addr into reg GCC uses for the AVProcr-pointer param
   1.303 
   1.304 +-- jumps to the addr in AVProcr's jmp-ptr field
   1.305 
   1.306 +CoreLoop()
   1.307 
   1.308 +{ while( FOREVER )
   1.309 
   1.310 + { nextPr = readQ( workQ );  //workQ is static (global) var declared volatile
   1.311 
   1.312 +   <dataPtr-param-register>       = nextPr->data;
   1.313 
   1.314 +   <AVProcrPtr-param-register> = nextPr;
   1.315 
   1.316 +   <stack-pointer register>          = nextPr->stackPtr;
   1.317 
   1.318 +   jmp nextPr->jmpPtr;
   1.319 
   1.320 +CoreLoopDonePt:   //label's addr put into AVProcr when create new one
   1.321 
   1.322 + }
   1.323 
   1.324 +}
   1.325 
   1.326 +(Note, for suspendable processors coming back from suspension, there is no
   1.327 
   1.328 + need to fill the parameter registers -- they will be discarded)
   1.329 
   1.330 +
   1.331 
   1.332 +Suspend an application-level virtual processor:
   1.333 
   1.334 +VMS__AVPSuspend( AVProcr *pr )
   1.335 
   1.336 +{
   1.337 
   1.338 +pr->jmpPtr = &ResumePt;  //label defined a few lines below
   1.339 
   1.340 +pr->slave->doneFlag = TRUE;
   1.341 
   1.342 +pr->stackPtr = <current SP reg value>;
   1.343 
   1.344 +jmp pr->coreLoopDonePt;
   1.345 
   1.346 +ResumePt: return;
   1.347 
   1.348 +}
   1.349 
   1.350 +
   1.351 
   1.352 +This works because the core loop will have switched back to this stack
   1.353 
   1.354 + before jumping to ResumePt..    also, the core loop never modifies the
   1.355 
   1.356 + stack pointer, it simply switches to whatever stack pointer is in the
   1.357 
   1.358 + next AVProcr it gets off the workQ.
   1.359 
   1.360 +
   1.361 
   1.362 +
   1.363 
   1.364 +
   1.365 
   1.366 +=============================================================================
   1.367 
   1.368 +As it is now, there's only one major unknown about GCC (first thing below
   1.369 
   1.370 +  the line),  and there are a few restrictions, the most intrusive being
   1.371 
   1.372 +  that the functions the application gives to the semantic layer have a
   1.373 
   1.374 +  pre-defined prototype -- return nothing, take a pointer to initial data
   1.375 
   1.376 +  and a pointer to an AVProcr struc, which they're not allowed to modify
   1.377 
   1.378 +  -- only pass it to semantic-lib calls.
   1.379 
   1.380 +
   1.381 
   1.382 +So, here are the assumptions, restrictions, and so forth:
   1.383 
   1.384 +===========================
   1.385 
   1.386 +Major assumption:  that GCC will do the following the same way every time:
   1.387 
   1.388 +  say the application defines a function that fits this typedef:
   1.389 
   1.390 +typedef void (*AVProcrFnPtr)  ( void *, AVProcr * );
   1.391 
   1.392 +
   1.393 
   1.394 +and let's say somewhere in the code they do this:
   1.395 
   1.396 +AVProcrFnPtr   fnPtr = &someFunc;
   1.397 
   1.398 +
   1.399 
   1.400 +then they do this:
   1.401 
   1.402 +(*fnPtr)( dataPtr, animatingVirtProcrPtr );
   1.403 
   1.404 +
   1.405 
   1.406 +Can the registers that GCC uses to pass the two pointers be predicted?
   1.407 
   1.408 + Will they always be the same registers, in every program that has the
   1.409 
   1.410 + same typedef?
   1.411 
   1.412 +If that typedef fixes, guaranteed, the registers (on x86) that GCC will use
   1.413 
   1.414 + to send the two pointers, then the rest of this solution works.
   1.415 
   1.416 +
   1.417 
   1.418 +Change in model: Instead of a virtual processor whose execution trace is
   1.419 
   1.420 + divided into work-units, replacing that with the pattern that a virtual
   1.421 
   1.422 + processor is suspended.  Which means, no more "work unit" data structure
   1.423 
   1.424 + -- instead, it's now an "Application Virtual Processor" structure
   1.425 
   1.426 + -- AVProcr -- which is given directly to the application function!
   1.427 
   1.428 +
   1.429 
   1.430 +   -- You were right, don't need slaves to be virtual processors, only need
   1.431 
   1.432 +      "scheduling buckets" -- just a way to keep track of things..
   1.433 
   1.434 +
   1.435 
   1.436 +Restrictions:
   1.437 
   1.438 +-- the  "virtual entities"  created by the semantic layer must be virtual
   1.439 
   1.440 +   processors, created with a function-to-execute and initial data -- the
   1.441 
   1.442 +   function is restricted to return nothing and only take a pointer to the
   1.443 
   1.444 +   initial data plus a pointer to an AVProcr structure, which represents
   1.445 
   1.446 +   "self", the virtual processor created.  (This is the interface I showed
   1.447 
   1.448 +   you for "Hello World" semantic layer).
   1.449 
   1.450 +What this means for synchronous dataflow, is that the nodes in the graph
   1.451 
   1.452 +  are virtual processors that in turn spawn a new virtual processor for
   1.453 
   1.454 +  every "firing" of the node.  This should be fine because the function
   1.455 
   1.456 +  that the node itself is created with is a "canned" function that is part
   1.457 
   1.458 +  of the semantic layer -- the function that is spawned is the user-provided
   1.459 
   1.460 +  function.  The restriction only means that the values from the inputs to
   1.461 
   1.462 +  the node are packaged as the "initial data" given to the spawned virtual
   1.463 
   1.464 +  processor -- so the user-function has to cast a void * to the
   1.465 
   1.466 +  semantic-layer-defined structure by which it gets the inputs to the node.
   1.467 
   1.468 +
   1.469 
   1.470 +-- Second restriction is that the semantic layer has to use VMS supplied
   1.471 
   1.472 +   stuff -- for example, the data structure that represents the
   1.473 
   1.474 +   application-level virtual processor is defined in VMS, and the semantic
   1.475 
   1.476 +   layer has to call a VMS function in order to suspend a virtual processor.
   1.477 
   1.478 +
   1.479 
   1.480 +-- Third restriction is that the application code never do anything with
   1.481 
   1.482 +   the AVProcr structure except pass it to semantic-layer lib calls.
   1.483 
   1.484 +
   1.485 
   1.486 +-- Fourth restriction is that every virtual processor must call a
   1.487 
   1.488 +   "dissipate" function as its last act -- the user-supplied
   1.489 
   1.490 +   virtual-processor function can't just end -- it has to call
   1.491 
   1.492 +   SemLib__dissipate( AVProcr ) before the closing brace.. and after the
   1.493 
   1.494 +   semantic layer is done cleaning up its own data, it has to in turn call
   1.495 
   1.496 +   VMS__disspate( AVProcr ).
   1.497 
   1.498 +
   1.499 
   1.500 +-- For performance reasons, I think I want to have two different kinds of
   1.501 
   1.502 +   app-virtual processor -- suspendable ones and non-suspendable -- where
   1.503 
   1.504 +   non-suspendable are not allowed to perform any communication with other
   1.505 
   1.506 +   virtual processors, except at birth and death.  Suspendable ones, of
   1.507 
   1.508 +   course can perform communications, create other processors, and so forth
   1.509 
   1.510 +   -- all of which cause it to suspend.
   1.511 
   1.512 +The performance difference is that I need a separate stack for each
   1.513 
   1.514 +  suspendable, but non-suspendable can re-use a fixed number of stacks
   1.515 
   1.516 +  (one for each slave).
   1.517 
   1.518 +
   1.519 
   1.520 +
   1.521 
   1.522 +==================== May 29
   1.523 
   1.524 +
   1.525 
   1.526 +Qs:
   1.527 
   1.528 +--1 how to safely jump between virt processor's trace and coreloop
   1.529 
   1.530 +--2 how to set up __cdecl style stack + frame for just-born virtual processor
   1.531 
   1.532 +--3 how to switch stack-pointers + frame-pointers
   1.533 
   1.534 +
   1.535 
   1.536 +
   1.537 
   1.538 +--1:
   1.539 
   1.540 +Not sure if GCC's computed goto is safe, because modify the stack pointer
   1.541 
   1.542 +without GCC's knowledge -- although, don't use the stack in the coreloop
   1.543 
   1.544 +segment, so, actually, that should be safe!
   1.545 
   1.546 +
   1.547 
   1.548 +So, GCC has its own special C extensions, one of which gets address of label:
   1.549 
   1.550 +
   1.551 
   1.552 +void *labelAddr;
   1.553 
   1.554 +labelAddr = &&label;
   1.555 
   1.556 +goto *labelAddr;
   1.557 
   1.558 +
   1.559 
   1.560 +--2
   1.561 
   1.562 +In CoreLoop, will check whether VirtProc just born, or was suspended.
   1.563 
   1.564 +If just born, do bit of code that sets up the virtual processor's stack
   1.565 
   1.566 +and frame according to the __cdecl convention for the standard virt proc
   1.567 
   1.568 +fn typedef -- save the pointer to data and pointer to virt proc struc into
   1.569 
   1.570 +correct places in the frame
   1.571 
   1.572 +   __cdecl says, according to:
   1.573 
   1.574 +http://unixwiz.net/techtips/win32-callconv-asm.html
   1.575 
   1.576 +To do this:
   1.577 
   1.578 +push the parameters onto the stack, right most first, working backwards to
   1.579 
   1.580 + the left.
   1.581 
   1.582 +Then perform call instr, which pushes return addr onto stack.
   1.583 
   1.584 +Then callee first pushes the frame pointer, %EBP followed by placing the
   1.585 
   1.586 +then-current value of stack pointer into %EBP
   1.587 
   1.588 +push ebp
   1.589 
   1.590 +mov  ebp, esp    // ebp « esp
   1.591 
   1.592 +
   1.593 
   1.594 +Once %ebp has been changed, it can now refer directly to the function's
   1.595 
   1.596 + arguments as 8(%ebp), 12(%ebp). Note that 0(%ebp) is the old base pointer
   1.597 
   1.598 + and 4(%ebp) is the old instruction pointer.
   1.599 
   1.600 +
   1.601 
   1.602 +Then callee pushes regs it will use then adds to stack pointer the size of
   1.603 
   1.604 + its local vars.
   1.605 
   1.606 +
   1.607 
   1.608 +Stack in callee looks like this:
   1.609 
   1.610 +16(%ebp)	 - third function parameter
   1.611 
   1.612 +12(%ebp)	 - second function parameter
   1.613 
   1.614 +8(%ebp)	 - first function parameter
   1.615 
   1.616 +4(%ebp)	 - old %EIP (the function's "return address")
   1.617 
   1.618 +----------^^ State seen at first instr of callee ^^-----------
   1.619 
   1.620 +0(%ebp)	- old %EBP (previous function's base pointer)
   1.621 
   1.622 +-4(%ebp)	 - save of EAX, the only reg used in function
   1.623 
   1.624 +-8(%ebp)	 - first local variable
   1.625 
   1.626 +-12(%ebp)	 - second local variable
   1.627 
   1.628 +-16(%ebp)	 - third local variable
   1.629 
   1.630 +
   1.631 
   1.632 +
   1.633 
   1.634 +--3
   1.635 
   1.636 +It might be just as simple as two mov instrs, one for %ESP, one for %EBP..
   1.637 
   1.638 + the stack and frame pointer regs
   1.639 
     2.1 --- a/SSR.h	Thu Nov 11 04:59:48 2010 -0800
     2.2 +++ b/SSR.h	Thu Sep 22 12:15:04 2011 +0200
     2.3 @@ -27,6 +27,25 @@
     2.4  /*Semantic layer-specific data sent inside a request from lib called in app
     2.5   * to request handler called in MasterLoop
     2.6   */
     2.7 +
     2.8 +typedef struct
     2.9 + {
    2.10 +   VirtProcr      *VPCurrentlyExecuting;
    2.11 +   PrivQueueStruc *waitingVPQ;
    2.12 + }
    2.13 +SSRTrans;
    2.14 +
    2.15 +/*WARNING: assembly hard-codes position of endInstrAddr as first field
    2.16 + */
    2.17 +typedef struct
    2.18 + {
    2.19 +   void           *endInstrAddr;
    2.20 +   int32           hasBeenStarted;
    2.21 +   int32           hasFinished;
    2.22 +   PrivQueueStruc *waitQ;
    2.23 + }
    2.24 +SSRSingleton;
    2.25 +
    2.26  enum SSRReqType
    2.27   {
    2.28     send_type = 1,
    2.29 @@ -38,7 +57,10 @@
    2.30     transfer_out,
    2.31     malloc_req,
    2.32     free_req,
    2.33 -   singleton,
    2.34 +   singleton_fn_start,
    2.35 +   singleton_fn_end,
    2.36 +   singleton_data_start,
    2.37 +   singleton_data_end,
    2.38     atomic,
    2.39     trans_start,
    2.40     trans_end
    2.41 @@ -60,7 +82,7 @@
    2.42     void              *ptrToFree;
    2.43  
    2.44     int32              singletonID;
    2.45 -   void              *endJumpPt;
    2.46 +   SSRSingleton     **singletonPtrAddr;
    2.47  
    2.48     PtrToAtomicFn      fnToExecInMaster;
    2.49     void              *dataForFn;
    2.50 @@ -72,13 +94,6 @@
    2.51  
    2.52  typedef struct
    2.53   {
    2.54 -   VirtProcr      *VPCurrentlyExecuting;
    2.55 -   PrivQueueStruc *waitingVPQ;
    2.56 - }
    2.57 -SSRTrans;
    2.58 -
    2.59 -typedef struct
    2.60 - {
    2.61     PrivQueueStruc **readyVPQs;
    2.62     HashTable       *commHashTbl;
    2.63     int32            numVirtPr;
    2.64 @@ -86,7 +101,7 @@
    2.65     int32            primitiveStartTime;
    2.66  
    2.67                         //fix limit on num with dynArray
    2.68 -   int32            singletonHasBeenExecutedFlags[NUM_STRUCS_IN_SEM_ENV];
    2.69 +   SSRSingleton     fnSingletons[NUM_STRUCS_IN_SEM_ENV];
    2.70     SSRTrans         transactionStrucs[NUM_STRUCS_IN_SEM_ENV];
    2.71   }
    2.72  SSRSemEnv;
    2.73 @@ -115,10 +130,10 @@
    2.74  int32
    2.75  SSR__giveMinWorkUnitCycles( float32 percentOverhead );
    2.76  
    2.77 -void inline
    2.78 +void
    2.79  SSR__start_primitive();
    2.80  
    2.81 -int32 inline
    2.82 +int32
    2.83  SSR__end_primitive_and_give_cycles();
    2.84  
    2.85  int32
    2.86 @@ -137,11 +152,11 @@
    2.87  
    2.88  //=======================
    2.89  
    2.90 -inline VirtProcr *
    2.91 +  VirtProcr *
    2.92  SSR__create_procr_with( VirtProcrFnPtr fnPtr, void *initData,
    2.93                            VirtProcr *creatingPr );
    2.94  
    2.95 -inline VirtProcr *
    2.96 +  VirtProcr *
    2.97  SSR__create_procr_with_affinity( VirtProcrFnPtr fnPtr,    void *initData,
    2.98                              VirtProcr *creatingPr, int32 coreToScheduleOnto);
    2.99  
   2.100 @@ -187,8 +202,16 @@
   2.101  
   2.102  //======================= Concurrency Stuff ======================
   2.103  void
   2.104 -SSR__start_singleton( int32 singletonID, void *endSingletonLabelAddr,
   2.105 -                      VirtProcr *animPr );
   2.106 +SSR__start_fn_singleton( int32 singletonID, VirtProcr *animPr );
   2.107 +
   2.108 +void
   2.109 +SSR__end_fn_singleton( int32 singletonID, VirtProcr *animPr );
   2.110 +
   2.111 +void
   2.112 +SSR__start_data_singleton( SSRSingleton **singeltonAddr, VirtProcr *animPr );
   2.113 +
   2.114 +void
   2.115 +SSR__end_data_singleton( SSRSingleton **singletonAddr, VirtProcr *animPr );
   2.116  
   2.117  void
   2.118  SSR__animate_short_fn_in_isolation( PtrToAtomicFn ptrToFnToExecInMaster,
   2.119 @@ -208,6 +231,9 @@
   2.120  VirtProcr *
   2.121  SSR__schedule_virt_procr( void *_semEnv, int coreNum );
   2.122  
   2.123 +VirtProcr*
   2.124 +SSR__create_procr_helper( VirtProcrFnPtr fnPtr, void *initData,
   2.125 +                          SSRSemEnv *semEnv,    int32 coreToScheduleOnto );
   2.126  
   2.127  #endif	/* _SSR_H */
   2.128  
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/SSR.s	Thu Sep 22 12:15:04 2011 +0200
     3.3 @@ -0,0 +1,21 @@
     3.4 +
     3.5 +//Assembly code takes the return addr off the stack and saves
     3.6 +// into the singleton.  The first field in the singleton is the
     3.7 +// "endInstrAddr" field, and the return addr is at 0x4(%ebp)
     3.8 +.globl asm_save_ret_to_singleton
     3.9 +asm_save_ret_to_singleton:
    3.10 +    movq 0x8(%rbp),     %rax   #get ret address, ebp is the same as in the calling function
    3.11 +    movq     %rax,     (%rdi) #write ret addr to endInstrAddr field
    3.12 +    ret
    3.13 +
    3.14 +
    3.15 +//Assembly code changes the return addr on the stack to the one
    3.16 +// saved into the singleton by the end-singleton-fn
    3.17 +//The stack's return addr is at 0x4(%%ebp)
    3.18 +.globl asm_write_ret_from_singleton
    3.19 +asm_write_ret_from_singleton:
    3.20 +    movq    (%rdi),    %rax  #get endInstrAddr field
    3.21 +    movq      %rax,    0x8(%rbp) #write return addr to the stack of the caller
    3.22 +    ret
    3.23 +
    3.24 +
     4.1 --- a/SSR_PluginFns.c	Thu Nov 11 04:59:48 2010 -0800
     4.2 +++ b/SSR_PluginFns.c	Thu Sep 22 12:15:04 2011 +0200
     4.3 @@ -12,10 +12,10 @@
     4.4  #include "SSR_Request_Handlers.h"
     4.5  
     4.6  //=========================== Local Fn Prototypes ===========================
     4.7 -void inline
     4.8 +void
     4.9  resume_procr( VirtProcr *procr, SSRSemEnv *semEnv );
    4.10  
    4.11 -void inline
    4.12 +void
    4.13  handleSemReq( VMSReqst *req, VirtProcr *requestingPr, SSRSemEnv *semEnv );
    4.14  
    4.15  void
    4.16 @@ -33,6 +33,8 @@
    4.17   * to the slave -- return FALSE to let Master loop know scheduling that
    4.18   * slave failed.
    4.19   */
    4.20 +char __Scheduler[] = "FIFO Scheduler"; //Gobal variable for name in saved histogram
    4.21 +
    4.22  VirtProcr *
    4.23  SSR__schedule_virt_procr( void *_semEnv, int coreNum )
    4.24   { VirtProcr   *schedPr;
    4.25 @@ -63,7 +65,7 @@
    4.26  SSR__Request_Handler( VirtProcr *requestingPr, void *_semEnv )
    4.27   { SSRSemEnv *semEnv;
    4.28     VMSReqst    *req;
    4.29 - 
    4.30 +   
    4.31     semEnv = (SSRSemEnv *)_semEnv;
    4.32  
    4.33     req    = VMS__take_next_request_out_of( requestingPr );
    4.34 @@ -90,7 +92,7 @@
    4.35   }
    4.36  
    4.37  
    4.38 -void inline
    4.39 +void
    4.40  handleSemReq( VMSReqst *req, VirtProcr *reqPr, SSRSemEnv *semEnv )
    4.41   { SSRSemReq *semReq;
    4.42  
    4.43 @@ -114,7 +116,13 @@
    4.44           break;
    4.45        case free_req:        handleFree(         semReq, reqPr, semEnv);
    4.46           break;
    4.47 -      case singleton:       handleSingleton(    semReq, reqPr, semEnv);
    4.48 +      case singleton_fn_start:  handleStartFnSingleton(semReq, reqPr, semEnv);
    4.49 +         break;
    4.50 +      case singleton_fn_end:    handleEndFnSingleton(  semReq, reqPr, semEnv);
    4.51 +         break;
    4.52 +      case singleton_data_start:handleStartDataSingleton(semReq,reqPr,semEnv);
    4.53 +         break;
    4.54 +      case singleton_data_end:  handleEndDataSingleton(semReq, reqPr, semEnv);
    4.55           break;
    4.56        case atomic:          handleAtomic(       semReq, reqPr, semEnv);
    4.57           break;
    4.58 @@ -147,11 +155,11 @@
    4.59  
    4.60  /*Re-use this in the entry-point fn
    4.61   */
    4.62 -inline VirtProcr *
    4.63 +  VirtProcr *
    4.64  SSR__create_procr_helper( VirtProcrFnPtr fnPtr, void *initData,
    4.65                            SSRSemEnv *semEnv,    int32 coreToScheduleOnto )
    4.66   { VirtProcr    *newPr;
    4.67 -   SSRSemData    semData;
    4.68 +   SSRSemData   *semData;
    4.69  
    4.70        //This is running in master, so use internal version
    4.71     newPr = VMS__create_procr( fnPtr, initData );
    4.72 @@ -205,7 +213,7 @@
    4.73  
    4.74  
    4.75  //=========================== Helper ==============================
    4.76 -void inline
    4.77 +void
    4.78  resume_procr( VirtProcr *procr, SSRSemEnv *semEnv )
    4.79   {
    4.80     writePrivQ( procr, semEnv->readyVPQs[ procr->coreAnimatedBy] );
     5.1 --- a/SSR_Request_Handlers.c	Thu Nov 11 04:59:48 2010 -0800
     5.2 +++ b/SSR_Request_Handlers.c	Thu Sep 22 12:15:04 2011 +0200
     5.3 @@ -15,7 +15,7 @@
     5.4  
     5.5  
     5.6  //=========================== Local Fn Prototypes ===========================
     5.7 -void inline
     5.8 +void
     5.9  resume_procr( VirtProcr *procr, SSRSemEnv *semEnv );
    5.10  
    5.11  
    5.12 @@ -25,7 +25,7 @@
    5.13  
    5.14  /*Only clone the elements of req used in these reqst handlers
    5.15   */
    5.16 -inline SSRSemReq *
    5.17 +  SSRSemReq *
    5.18  cloneReq( SSRSemReq *semReq )
    5.19   { SSRSemReq *clonedReq;
    5.20  
    5.21 @@ -81,7 +81,7 @@
    5.22   * separate processors can send to the same receiver, and hashing on the
    5.23   * receive processor, so they will stack up.
    5.24   */
    5.25 -void inline
    5.26 +void
    5.27  handleSendType( SSRSemReq *semReq, SSRSemEnv *semEnv )
    5.28   { VirtProcr   *sendPr, *receivePr;
    5.29     int          key[] = {0,0,0};
    5.30 @@ -150,7 +150,7 @@
    5.31  /*Looks like can make single handler for both sends..
    5.32   */
    5.33  //TODO: combine both send handlers into single handler
    5.34 -void inline
    5.35 +void
    5.36  handleSendFromTo( SSRSemReq *semReq, SSRSemEnv *semEnv)
    5.37   { VirtProcr   *sendPr, *receivePr;
    5.38     int          key[] = {0,0,0};
    5.39 @@ -229,14 +229,14 @@
    5.40   * If ever add receive_any, looking like this second option easier and even
    5.41   * less costly.
    5.42   */
    5.43 -void inline
    5.44 +void
    5.45  handleReceiveAny( SSRSemReq *semReq, SSRSemEnv *semEnv)
    5.46   {
    5.47   
    5.48   }
    5.49  
    5.50  
    5.51 -void inline
    5.52 +void
    5.53  handleReceiveType( SSRSemReq *semReq, SSRSemEnv *semEnv)
    5.54   { VirtProcr   *sendPr, *receivePr;
    5.55     int          key[] = {0,0,0};
    5.56 @@ -284,7 +284,7 @@
    5.57  
    5.58  /*
    5.59   */
    5.60 -void inline
    5.61 +void
    5.62  handleReceiveFromTo( SSRSemReq *semReq, SSRSemEnv *semEnv)
    5.63   { VirtProcr   *sendPr, *receivePr;
    5.64     int          key[] = {0,0,0};
    5.65 @@ -332,13 +332,13 @@
    5.66  
    5.67  
    5.68  //===============================================
    5.69 -void inline
    5.70 +void
    5.71  handleTransferTo( SSRSemReq *semReq, SSRSemEnv *semEnv)
    5.72   {
    5.73  
    5.74   }
    5.75  
    5.76 -void inline
    5.77 +void
    5.78  handleTransferOut( SSRSemReq *semReq, SSRSemEnv *semEnv)
    5.79   {
    5.80  
    5.81 @@ -347,7 +347,7 @@
    5.82  
    5.83  /*
    5.84   */
    5.85 -void inline
    5.86 +void
    5.87  handleMalloc( SSRSemReq *semReq, VirtProcr *requestingPr, SSRSemEnv *semEnv )
    5.88   { void *ptr;
    5.89  
    5.90 @@ -358,7 +358,7 @@
    5.91  
    5.92  /*
    5.93   */
    5.94 -void inline
    5.95 +void
    5.96  handleFree( SSRSemReq *semReq, VirtProcr *requestingPr, SSRSemEnv *semEnv )
    5.97   {
    5.98     VMS__free( semReq->ptrToFree );
    5.99 @@ -372,22 +372,105 @@
   5.100   * end-label.  Else, sets flag and resumes normally.
   5.101   */
   5.102  void inline
   5.103 -handleSingleton( SSRSemReq *semReq, VirtProcr *requestingPr,
   5.104 -                 SSRSemEnv *semEnv )
   5.105 +handleStartSingleton_helper( SSRSingleton *singleton, VirtProcr *reqstingPr,
   5.106 +                             SSRSemEnv    *semEnv )
   5.107   {
   5.108 -   if( semEnv->singletonHasBeenExecutedFlags[ semReq->singletonID ] )
   5.109 -      requestingPr->nextInstrPt = semReq->endJumpPt;
   5.110 +   if( singleton->hasFinished )
   5.111 +    {    //the code that sets the flag to true first sets the end instr addr
   5.112 +      reqstingPr->dataRetFromReq = singleton->endInstrAddr;
   5.113 +      resume_procr( reqstingPr, semEnv );
   5.114 +      return;
   5.115 +    }
   5.116 +   else if( singleton->hasBeenStarted )
   5.117 +    {    //singleton is in-progress in a diff slave, so wait for it to finish
   5.118 +      writePrivQ(reqstingPr, singleton->waitQ );
   5.119 +      return;
   5.120 +    }
   5.121     else
   5.122 -      semEnv->singletonHasBeenExecutedFlags[ semReq->singletonID ] = TRUE;
   5.123 +    {    //hasn't been started, so this is the first attempt at the singleton
   5.124 +      singleton->hasBeenStarted = TRUE;
   5.125 +      reqstingPr->dataRetFromReq = 0x0;
   5.126 +      resume_procr( reqstingPr, semEnv );
   5.127 +      return;
   5.128 +    }
   5.129 + }
   5.130 +void inline
   5.131 +handleStartFnSingleton( SSRSemReq *semReq, VirtProcr *requestingPr,
   5.132 +                      SSRSemEnv *semEnv )
   5.133 + { SSRSingleton *singleton;
   5.134 +
   5.135 +   singleton = &(semEnv->fnSingletons[ semReq->singletonID ]);
   5.136 +   handleStartSingleton_helper( singleton, requestingPr, semEnv );
   5.137 + }
   5.138 +void inline
   5.139 +handleStartDataSingleton( SSRSemReq *semReq, VirtProcr *requestingPr,
   5.140 +                      SSRSemEnv *semEnv )
   5.141 + { SSRSingleton *singleton;
   5.142 +
   5.143 +   if( *(semReq->singletonPtrAddr) == NULL )
   5.144 +    { singleton                 = VMS__malloc( sizeof(SSRSingleton) );
   5.145 +      singleton->waitQ          = makeVMSPrivQ();
   5.146 +      singleton->endInstrAddr   = 0x0;
   5.147 +      singleton->hasBeenStarted = FALSE;
   5.148 +      singleton->hasFinished    = FALSE;
   5.149 +      *(semReq->singletonPtrAddr)  = singleton;
   5.150 +    }
   5.151 +   else
   5.152 +      singleton = *(semReq->singletonPtrAddr);
   5.153 +   handleStartSingleton_helper( singleton, requestingPr, semEnv );
   5.154 + }
   5.155 +
   5.156 +
   5.157 +void inline
   5.158 +handleEndSingleton_helper( SSRSingleton *singleton, VirtProcr *requestingPr,
   5.159 +                           SSRSemEnv    *semEnv )
   5.160 + { PrivQueueStruc *waitQ;
   5.161 +   int32           numWaiting, i;
   5.162 +   VirtProcr      *resumingPr;
   5.163 +
   5.164 +   if( singleton->hasFinished )
   5.165 +    { //by definition, only one slave should ever be able to run end singleton
   5.166 +      // so if this is true, is an error
   5.167 +      //VMS__throw_exception( "singleton code ran twice", requestingPr, NULL);
   5.168 +    }
   5.169 +
   5.170 +   singleton->hasFinished = TRUE;
   5.171 +   waitQ = singleton->waitQ;
   5.172 +   numWaiting = numInPrivQ( waitQ );
   5.173 +   for( i = 0; i < numWaiting; i++ )
   5.174 +    {    //they will resume inside start singleton, then jmp to end singleton
   5.175 +      resumingPr = readPrivQ( waitQ );
   5.176 +      resumingPr->dataRetFromReq = singleton->endInstrAddr;
   5.177 +      resume_procr( resumingPr, semEnv );
   5.178 +    }
   5.179  
   5.180     resume_procr( requestingPr, semEnv );
   5.181 - }
   5.182 +
   5.183 +}
   5.184 +void inline
   5.185 +handleEndFnSingleton( SSRSemReq *semReq, VirtProcr *requestingPr,
   5.186 +                        SSRSemEnv *semEnv )
   5.187 + {
   5.188 +   SSRSingleton   *singleton;
   5.189 +
   5.190 +   singleton = &(semEnv->fnSingletons[ semReq->singletonID ]);
   5.191 +   handleEndSingleton_helper( singleton, requestingPr, semEnv );
   5.192 +  }
   5.193 +void inline
   5.194 +handleEndDataSingleton( SSRSemReq *semReq, VirtProcr *requestingPr,
   5.195 +                        SSRSemEnv *semEnv )
   5.196 + {
   5.197 +   SSRSingleton   *singleton;
   5.198 +
   5.199 +   singleton = *(semReq->singletonPtrAddr);
   5.200 +   handleEndSingleton_helper( singleton, requestingPr, semEnv );
   5.201 +  }
   5.202  
   5.203  
   5.204  /*This executes the function in the masterVP, take the function
   5.205   * pointer out of the request and call it, then resume the VP.
   5.206   */
   5.207 -void inline
   5.208 +void
   5.209  handleAtomic( SSRSemReq *semReq, VirtProcr *requestingPr, SSRSemEnv *semEnv )
   5.210   {
   5.211     semReq->fnToExecInMaster( semReq->dataForFn );
   5.212 @@ -408,7 +491,7 @@
   5.213   * end-transaction, which will take this VP from the queue and resume it.)
   5.214   *If NULL, then write requesting into the field and resume.
   5.215   */
   5.216 -void inline
   5.217 +void
   5.218  handleTransStart( SSRSemReq *semReq, VirtProcr *requestingPr,
   5.219                    SSRSemEnv *semEnv )
   5.220   { SSRSemData *semData;
   5.221 @@ -459,7 +542,7 @@
   5.222   *If get somethine, set VP_currently_executing to the VP from the queue, then
   5.223   * resume both.
   5.224   */
   5.225 -void inline
   5.226 +void
   5.227  handleTransEnd(SSRSemReq *semReq, VirtProcr *requestingPr, SSRSemEnv *semEnv)
   5.228   { SSRSemData    *semData;
   5.229     VirtProcr     *waitingPr;
     6.1 --- a/SSR_Request_Handlers.h	Thu Nov 11 04:59:48 2010 -0800
     6.2 +++ b/SSR_Request_Handlers.h	Thu Sep 22 12:15:04 2011 +0200
     6.3 @@ -14,34 +14,43 @@
     6.4  /*This header defines everything specific to the SSR semantic plug-in
     6.5   */
     6.6  
     6.7 -void inline
     6.8 +inline void
     6.9  handleSendType( SSRSemReq *semReq, SSRSemEnv *semEnv);
    6.10 -void inline
    6.11 +inline void
    6.12  handleSendFromTo( SSRSemReq *semReq, SSRSemEnv *semEnv);
    6.13 -void inline
    6.14 +inline void
    6.15  handleReceiveAny( SSRSemReq *semReq, SSRSemEnv *semEnv);
    6.16 -void inline
    6.17 +inline void
    6.18  handleReceiveType( SSRSemReq *semReq, SSRSemEnv *semEnv);
    6.19 -void inline
    6.20 +inline void
    6.21  handleReceiveFromTo( SSRSemReq *semReq, SSRSemEnv *semEnv);
    6.22 -void inline
    6.23 +inline void
    6.24  handleTransferTo( SSRSemReq *semReq, SSRSemEnv *semEnv);
    6.25 -void inline
    6.26 +inline void
    6.27  handleTransferOut( SSRSemReq *semReq, SSRSemEnv *semEnv);
    6.28 -void inline
    6.29 +inline void
    6.30  handleMalloc( SSRSemReq *semReq, VirtProcr *requestingPr, SSRSemEnv *semEnv);
    6.31 -void inline
    6.32 +inline void
    6.33  handleFree( SSRSemReq *semReq, VirtProcr *requestingPr, SSRSemEnv *semEnv );
    6.34 -void inline
    6.35 +inline void
    6.36  handleTransEnd(SSRSemReq *semReq, VirtProcr *requestingPr, SSRSemEnv*semEnv);
    6.37 -void inline
    6.38 +inline void
    6.39  handleTransStart( SSRSemReq *semReq, VirtProcr *requestingPr,
    6.40                    SSRSemEnv *semEnv );
    6.41 -void inline
    6.42 +inline void
    6.43  handleAtomic( SSRSemReq *semReq, VirtProcr *requestingPr, SSRSemEnv *semEnv);
    6.44 -void inline
    6.45 -handleSingleton( SSRSemReq *semReq, VirtProcr *requestingPr,
    6.46 -                 SSRSemEnv *semEnv );
    6.47 +inline void
    6.48 +handleStartFnSingleton( SSRSemReq *semReq, VirtProcr *reqstingPr,
    6.49 +                      SSRSemEnv *semEnv );
    6.50 +inline void
    6.51 +handleEndFnSingleton( SSRSemReq *semReq, VirtProcr *requestingPr,
    6.52 +                    SSRSemEnv *semEnv );
    6.53 +inline void
    6.54 +handleStartDataSingleton( SSRSemReq *semReq, VirtProcr *reqstingPr,
    6.55 +                      SSRSemEnv *semEnv );
    6.56 +inline void
    6.57 +handleEndDataSingleton( SSRSemReq *semReq, VirtProcr *requestingPr,
    6.58 +                    SSRSemEnv *semEnv );
    6.59  
    6.60  #endif	/* _SSR_REQ_H */
    6.61  
     7.1 --- a/SSR_lib.c	Thu Nov 11 04:59:48 2010 -0800
     7.2 +++ b/SSR_lib.c	Thu Sep 22 12:15:04 2011 +0200
     7.3 @@ -12,6 +12,7 @@
     7.4  #include "SSR.h"
     7.5  #include "VMS/Queue_impl/PrivateQueue.h"
     7.6  #include "VMS/Hash_impl/PrivateHash.h"
     7.7 +#include "SSR.h"
     7.8  
     7.9  
    7.10  //==========================================================================
    7.11 @@ -118,7 +119,7 @@
    7.12   }
    7.13  
    7.14  
    7.15 -int32 inline
    7.16 +int32
    7.17  SSR__giveMinWorkUnitCycles( float32 percentOverhead )
    7.18   {
    7.19     return MIN_WORK_UNIT_CYCLES;
    7.20 @@ -139,7 +140,7 @@
    7.21  /*For now, use TSC -- later, make these two macros with assembly that first
    7.22   * saves jump point, and second jumps back several times to get reliable time
    7.23   */
    7.24 -void inline
    7.25 +void
    7.26  SSR__start_primitive()
    7.27   { saveLowTimeStampCountInto( ((SSRSemEnv *)(_VMSMasterEnv->semanticEnv))->
    7.28                                primitiveStartTime );
    7.29 @@ -150,7 +151,7 @@
    7.30   * because don't want comm time included in calc-time measurement -- and
    7.31   * also to throw out any "weird" values due to OS interrupt or TSC rollover
    7.32   */
    7.33 -int32 inline
    7.34 +int32
    7.35  SSR__end_primitive_and_give_cycles()
    7.36   { int32 endTime, startTime;
    7.37     //TODO: fix by repeating time-measurement
    7.38 @@ -181,14 +182,17 @@
    7.39     SSR__init_Helper();
    7.40   }
    7.41  
    7.42 +#ifdef SEQUENTIAL
    7.43  void
    7.44  SSR__init_Seq()
    7.45   {
    7.46     VMS__init_Seq();
    7.47 +   flushRegisters();
    7.48        //masterEnv, a global var, now is partially set up by init_VMS
    7.49  
    7.50     SSR__init_Helper();
    7.51   }
    7.52 +#endif
    7.53  
    7.54  void
    7.55  SSR__init_Helper()
    7.56 @@ -213,7 +217,7 @@
    7.57  
    7.58     for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
    7.59      {
    7.60 -      readyVPQs[ coreIdx ] = makePrivQ();
    7.61 +      readyVPQs[ coreIdx ] = makeVMSPrivQ();
    7.62      }
    7.63     
    7.64     semanticEnv->readyVPQs = readyVPQs;
    7.65 @@ -228,8 +232,11 @@
    7.66     //semanticEnv->transactionStrucs = makeDynArrayInfo( );
    7.67     for( i = 0; i < NUM_STRUCS_IN_SEM_ENV; i++ )
    7.68      {
    7.69 -      semanticEnv->singletonHasBeenExecutedFlags[i] = FALSE;
    7.70 -      semanticEnv->transactionStrucs[i].waitingVPQ = makePrivQ();
    7.71 +      semanticEnv->fnSingletons[i].endInstrAddr      = NULL;
    7.72 +      semanticEnv->fnSingletons[i].hasBeenStarted    = FALSE;
    7.73 +      semanticEnv->fnSingletons[i].hasFinished       = FALSE;
    7.74 +      semanticEnv->fnSingletons[i].waitQ             = makeVMSPrivQ();
    7.75 +      semanticEnv->transactionStrucs[i].waitingVPQ   = makeVMSPrivQ();
    7.76      }
    7.77   }
    7.78  
    7.79 @@ -263,7 +270,7 @@
    7.80  
    7.81  /*
    7.82   */
    7.83 -inline VirtProcr *
    7.84 +  VirtProcr *
    7.85  SSR__create_procr_with( VirtProcrFnPtr fnPtr,   void *initData,
    7.86                          VirtProcr *creatingPr )
    7.87   { SSRSemReq reqData;
    7.88 @@ -282,10 +289,10 @@
    7.89     return creatingPr->dataRetFromReq;
    7.90   }
    7.91  
    7.92 -inline VirtProcr *
    7.93 +  VirtProcr *
    7.94  SSR__create_procr_with_affinity( VirtProcrFnPtr fnPtr, void *initData,
    7.95                          VirtProcr *creatingPr,  int32  coreToScheduleOnto )
    7.96 - { SSRSemReq reqData;
    7.97 + { SSRSemReq  reqData;
    7.98  
    7.99        //the semantic request data is on the stack and disappears when this
   7.100        // call returns -- it's guaranteed to remain in the VP's stack for as
   7.101 @@ -297,10 +304,12 @@
   7.102     reqData.sendPr             = creatingPr;
   7.103  
   7.104     VMS__send_create_procr_req( &reqData, creatingPr );
   7.105 +
   7.106 +   return creatingPr->dataRetFromReq;
   7.107   }
   7.108  
   7.109  
   7.110 -inline void
   7.111 +  void
   7.112  SSR__dissipate_procr( VirtProcr *procrToDissipate )
   7.113   {
   7.114     VMS__send_dissipate_req( procrToDissipate );
   7.115 @@ -475,20 +484,111 @@
   7.116  
   7.117  
   7.118  //===========================================================================
   7.119 +//
   7.120 +/*A function singleton is a function whose body executes exactly once, on a
   7.121 + * single core, no matter how many times the fuction is called and no
   7.122 + * matter how many cores or the timing of cores calling it.
   7.123 + *
   7.124 + *A data singleton is a ticket attached to data.  That ticket can be used
   7.125 + * to get the data through the function exactly once, no matter how many
   7.126 + * times the data is given to the function, and no matter the timing of
   7.127 + * trying to get the data through from different cores.
   7.128 + */
   7.129  
   7.130 -/*Uses ID as index into array of flags.  If flag already set, resumes from
   7.131 - * end-label.  Else, sets flag and resumes normally.
   7.132 +/*asm function declarations*/
   7.133 +void asm_save_ret_to_singleton(SSRSingleton *singletonPtrAddr);
   7.134 +void asm_write_ret_from_singleton(SSRSingleton *singletonPtrAddr);
   7.135 +
   7.136 +/*Fn singleton uses ID as index into array of singleton structs held in the
   7.137 + * semantic environment.
   7.138   */
   7.139  void
   7.140 -SSR__start_singleton( int32 singletonID, void *endSingletonLabelAddr,
   7.141 -                      VirtProcr *animPr )
   7.142 +SSR__start_fn_singleton( int32 singletonID,   VirtProcr *animPr )
   7.143   {
   7.144     SSRSemReq  reqData;
   7.145  
   7.146        //
   7.147 -   reqData.reqType     = singleton;
   7.148 +   reqData.reqType     = singleton_fn_start;
   7.149     reqData.singletonID = singletonID;
   7.150 -   reqData.endJumpPt   = endSingletonLabelAddr;
   7.151 +
   7.152 +   VMS__send_sem_request( &reqData, animPr );
   7.153 +   if( animPr->dataRetFromReq ) //will be 0 or addr of label in end singleton
   7.154 +    {
   7.155 +       SSRSemEnv *semEnv = VMS__give_sem_env_for( animPr );
   7.156 +       asm_write_ret_from_singleton(&(semEnv->fnSingletons[ singletonID]));
   7.157 +    }
   7.158 + }
   7.159 +
   7.160 +/*Data singleton hands addr of loc holding a pointer to a singleton struct.
   7.161 + * The start_data_singleton makes the structure and puts its addr into the
   7.162 + * location.
   7.163 + */
   7.164 +void
   7.165 +SSR__start_data_singleton( SSRSingleton **singletonAddr,  VirtProcr *animPr )
   7.166 + {
   7.167 +   SSRSemReq  reqData;
   7.168 +
   7.169 +   if( *singletonAddr && (*singletonAddr)->hasFinished )
   7.170 +       goto JmpToEndSingleton;
   7.171 +   
   7.172 +   reqData.reqType          = singleton_data_start;
   7.173 +   reqData.singletonPtrAddr = singletonAddr;
   7.174 +
   7.175 +   VMS__send_sem_request( &reqData, animPr );
   7.176 +   if( animPr->dataRetFromReq ) //either 0 or end singleton's return addr
   7.177 +    {    //Assembly code changes the return addr on the stack to the one
   7.178 +         // saved into the singleton by the end-singleton-fn
   7.179 +         //The return addr is at 0x4(%%ebp)
   7.180 +        JmpToEndSingleton:
   7.181 +          asm_write_ret_from_singleton(*singletonAddr);
   7.182 +    }
   7.183 +   //now, simply return
   7.184 +   //will exit either from the start singleton call or the end-singleton call
   7.185 + }
   7.186 +
   7.187 +/*Uses ID as index into array of flags.  If flag already set, resumes from
   7.188 + * end-label.  Else, sets flag and resumes normally.
   7.189 + *
   7.190 + *Note, this call cannot be inlined because the instr addr at the label
   7.191 + * inside is shared by all invocations of a given singleton ID.
   7.192 + */
   7.193 +void
   7.194 +SSR__end_fn_singleton( int32 singletonID, VirtProcr *animPr )
   7.195 + {
   7.196 +   SSRSemReq  reqData;
   7.197 +
   7.198 +      //don't need this addr until after at least one singleton has reached
   7.199 +      // this function
   7.200 +   SSRSemEnv *semEnv = VMS__give_sem_env_for( animPr );
   7.201 +   asm_write_ret_from_singleton(&(semEnv->fnSingletons[ singletonID]));
   7.202 +
   7.203 +   reqData.reqType     = singleton_fn_end;
   7.204 +   reqData.singletonID = singletonID;
   7.205 +
   7.206 +   VMS__send_sem_request( &reqData, animPr );
   7.207 +
   7.208 +EndSingletonInstrAddr:
   7.209 +   return;
   7.210 + }
   7.211 +
   7.212 +void
   7.213 +SSR__end_data_singleton(  SSRSingleton **singletonPtrAddr, VirtProcr *animPr )
   7.214 + {
   7.215 +   SSRSemReq  reqData;
   7.216 +
   7.217 +      //don't need this addr until after singleton struct has reached
   7.218 +      // this function for first time
   7.219 +      //do assembly that saves the return addr of this fn call into the
   7.220 +      // data singleton -- that data-singleton can only be given to exactly
   7.221 +      // one instance in the code of this function.  However, can use this
   7.222 +      // function in different places for different data-singletons.
   7.223 +//   (*(singletonAddr))->endInstrAddr =  &&EndDataSingletonInstrAddr;
   7.224 +
   7.225 +
   7.226 +   asm_save_ret_to_singleton(*singletonPtrAddr);
   7.227 +
   7.228 +   reqData.reqType          = singleton_data_end;
   7.229 +   reqData.singletonPtrAddr = singletonPtrAddr;
   7.230  
   7.231     VMS__send_sem_request( &reqData, animPr );
   7.232   }