changeset 24:a8e41e0bfa61 test_without_inline

fixed: uninitialized variable
author Merten Sach <msach@mailbox.tu-berlin.de>
date Thu, 12 May 2011 14:25:03 +0200
parents 72b8d73c324d
children cd2d0a81e3f7
files DESIGN_NOTES.txt SSR.h SSR_PluginFns.c SSR_Request_Handlers.h SSR_lib.c
diffstat 5 files changed, 237 insertions(+), 231 deletions(-) [+]
line diff
     1.1 --- a/DESIGN_NOTES.txt	Sun Nov 14 11:09:18 2010 -0800
     1.2 +++ b/DESIGN_NOTES.txt	Thu May 12 14:25:03 2011 +0200
     1.3 @@ -1,212 +1,212 @@
     1.4 -
     1.5 -From e-mail to Albert, on design of app-virt-procr to core-loop animation
     1.6 -switch and back.
     1.7 -
     1.8 -====================
     1.9 -General warnings about this code:
    1.10 -It only compiles in GCC 4.x  (label addr and computed goto)
    1.11 -Has assembly for x86  32bit
    1.12 -
    1.13 -
    1.14 -====================
    1.15 -AVProcr data-struc has: stack-ptr, jump-ptr, data-ptr, slotNum, coreloop-ptr
    1.16 - and semantic-custom-ptr
    1.17 -
    1.18 -The VMS Creator: takes ptr to function and ptr to initial data
    1.19 --- creates a new AVProcr struc
    1.20 --- sets the jmp-ptr field to the ptr-to-function passed in
    1.21 --- sets the data-ptr to ptr to initial data passed in
    1.22 --- if this is for a suspendable virt  processor, then create a stack and set
    1.23 -   the stack-ptr
    1.24 -
    1.25 -VMS__create_procr( AVProcrFnPtr fnPtr, void *initialData )
    1.26 -{
    1.27 -AVProcr   newPr = malloc( sizeof(AVProcr) );
    1.28 -newPr->jmpPtr = fnPtr;
    1.29 -newPr->coreLoopDonePt = &CoreLoopDonePt; //label is in coreLoop
    1.30 -newPr->data = initialData;
    1.31 -newPr->stackPtr = createNewStack();
    1.32 -return newPr;
    1.33 -}
    1.34 -
    1.35 -The semantic layer can then add its own state in the cusom-ptr field
    1.36 -
    1.37 -The Scheduler plug-in:
    1.38 --- Sets slave-ptr in AVProcr, and points the slave to AVProcr
    1.39 --- if non-suspendable, sets the AVProcr's stack-ptr to the slave's stack-ptr
    1.40 -
    1.41 -MasterLoop:
    1.42 --- puts AVProcr structures onto the workQ
    1.43 -
    1.44 -CoreLoop:
    1.45 --- gets stack-ptr out of AVProcr and sets the core's stack-ptr to that
    1.46 --- gets data-ptr out of AVProcr and puts it into reg GCC uses for that param
    1.47 --- puts AVProcr's addr into reg GCC uses for the AVProcr-pointer param
    1.48 --- jumps to the addr in AVProcr's jmp-ptr field
    1.49 -CoreLoop()
    1.50 -{ while( FOREVER )
    1.51 - { nextPr = readQ( workQ );  //workQ is static (global) var declared volatile
    1.52 -   <dataPtr-param-register>       = nextPr->data;
    1.53 -   <AVProcrPtr-param-register> = nextPr;
    1.54 -   <stack-pointer register>          = nextPr->stackPtr;
    1.55 -   jmp nextPr->jmpPtr;
    1.56 -CoreLoopDonePt:   //label's addr put into AVProcr when create new one
    1.57 - }
    1.58 -}
    1.59 -(Note, for suspendable processors coming back from suspension, there is no
    1.60 - need to fill the parameter registers -- they will be discarded)
    1.61 -
    1.62 -Suspend an application-level virtual processor:
    1.63 -VMS__AVPSuspend( AVProcr *pr )
    1.64 -{
    1.65 -pr->jmpPtr = &ResumePt;  //label defined a few lines below
    1.66 -pr->slave->doneFlag = TRUE;
    1.67 -pr->stackPtr = <current SP reg value>;
    1.68 -jmp pr->coreLoopDonePt;
    1.69 -ResumePt: return;
    1.70 -}
    1.71 -
    1.72 -This works because the core loop will have switched back to this stack
    1.73 - before jumping to ResumePt..    also, the core loop never modifies the
    1.74 - stack pointer, it simply switches to whatever stack pointer is in the
    1.75 - next AVProcr it gets off the workQ.
    1.76 -
    1.77 -
    1.78 -
    1.79 -=============================================================================
    1.80 -As it is now, there's only one major unknown about GCC (first thing below
    1.81 -  the line),  and there are a few restrictions, the most intrusive being
    1.82 -  that the functions the application gives to the semantic layer have a
    1.83 -  pre-defined prototype -- return nothing, take a pointer to initial data
    1.84 -  and a pointer to an AVProcr struc, which they're not allowed to modify
    1.85 -  -- only pass it to semantic-lib calls.
    1.86 -
    1.87 -So, here are the assumptions, restrictions, and so forth:
    1.88 -===========================
    1.89 -Major assumption:  that GCC will do the following the same way every time:
    1.90 -  say the application defines a function that fits this typedef:
    1.91 -typedef void (*AVProcrFnPtr)  ( void *, AVProcr * );
    1.92 -
    1.93 -and let's say somewhere in the code they do this:
    1.94 -AVProcrFnPtr   fnPtr = &someFunc;
    1.95 -
    1.96 -then they do this:
    1.97 -(*fnPtr)( dataPtr, animatingVirtProcrPtr );
    1.98 -
    1.99 -Can the registers that GCC uses to pass the two pointers be predicted?
   1.100 - Will they always be the same registers, in every program that has the
   1.101 - same typedef?
   1.102 -If that typedef fixes, guaranteed, the registers (on x86) that GCC will use
   1.103 - to send the two pointers, then the rest of this solution works.
   1.104 -
   1.105 -Change in model: Instead of a virtual processor whose execution trace is
   1.106 - divided into work-units, replacing that with the pattern that a virtual
   1.107 - processor is suspended.  Which means, no more "work unit" data structure
   1.108 - -- instead, it's now an "Application Virtual Processor" structure
   1.109 - -- AVProcr -- which is given directly to the application function!
   1.110 -
   1.111 -   -- You were right, don't need slaves to be virtual processors, only need
   1.112 -      "scheduling buckets" -- just a way to keep track of things..
   1.113 -
   1.114 -Restrictions:
   1.115 --- the  "virtual entities"  created by the semantic layer must be virtual
   1.116 -   processors, created with a function-to-execute and initial data -- the
   1.117 -   function is restricted to return nothing and only take a pointer to the
   1.118 -   initial data plus a pointer to an AVProcr structure, which represents
   1.119 -   "self", the virtual processor created.  (This is the interface I showed
   1.120 -   you for "Hello World" semantic layer).
   1.121 -What this means for synchronous dataflow, is that the nodes in the graph
   1.122 -  are virtual processors that in turn spawn a new virtual processor for
   1.123 -  every "firing" of the node.  This should be fine because the function
   1.124 -  that the node itself is created with is a "canned" function that is part
   1.125 -  of the semantic layer -- the function that is spawned is the user-provided
   1.126 -  function.  The restriction only means that the values from the inputs to
   1.127 -  the node are packaged as the "initial data" given to the spawned virtual
   1.128 -  processor -- so the user-function has to cast a void * to the
   1.129 -  semantic-layer-defined structure by which it gets the inputs to the node.
   1.130 -
   1.131 --- Second restriction is that the semantic layer has to use VMS supplied
   1.132 -   stuff -- for example, the data structure that represents the
   1.133 -   application-level virtual processor is defined in VMS, and the semantic
   1.134 -   layer has to call a VMS function in order to suspend a virtual processor.
   1.135 -
   1.136 --- Third restriction is that the application code never do anything with
   1.137 -   the AVProcr structure except pass it to semantic-layer lib calls.
   1.138 -
   1.139 --- Fourth restriction is that every virtual processor must call a
   1.140 -   "dissipate" function as its last act -- the user-supplied
   1.141 -   virtual-processor function can't just end -- it has to call
   1.142 -   SemLib__dissipate( AVProcr ) before the closing brace.. and after the
   1.143 -   semantic layer is done cleaning up its own data, it has to in turn call
   1.144 -   VMS__disspate( AVProcr ).
   1.145 -
   1.146 --- For performance reasons, I think I want to have two different kinds of
   1.147 -   app-virtual processor -- suspendable ones and non-suspendable -- where
   1.148 -   non-suspendable are not allowed to perform any communication with other
   1.149 -   virtual processors, except at birth and death.  Suspendable ones, of
   1.150 -   course can perform communications, create other processors, and so forth
   1.151 -   -- all of which cause it to suspend.
   1.152 -The performance difference is that I need a separate stack for each
   1.153 -  suspendable, but non-suspendable can re-use a fixed number of stacks
   1.154 -  (one for each slave).
   1.155 -
   1.156 -
   1.157 -==================== May 29
   1.158 -
   1.159 -Qs:
   1.160 ---1 how to safely jump between virt processor's trace and coreloop
   1.161 ---2 how to set up __cdecl style stack + frame for just-born virtual processor
   1.162 ---3 how to switch stack-pointers + frame-pointers
   1.163 -
   1.164 -
   1.165 ---1:
   1.166 -Not sure if GCC's computed goto is safe, because modify the stack pointer
   1.167 -without GCC's knowledge -- although, don't use the stack in the coreloop
   1.168 -segment, so, actually, that should be safe!
   1.169 -
   1.170 -So, GCC has its own special C extensions, one of which gets address of label:
   1.171 -
   1.172 -void *labelAddr;
   1.173 -labelAddr = &&label;
   1.174 -goto *labelAddr;
   1.175 -
   1.176 ---2
   1.177 -In CoreLoop, will check whether VirtProc just born, or was suspended.
   1.178 -If just born, do bit of code that sets up the virtual processor's stack
   1.179 -and frame according to the __cdecl convention for the standard virt proc
   1.180 -fn typedef -- save the pointer to data and pointer to virt proc struc into
   1.181 -correct places in the frame
   1.182 -   __cdecl says, according to:
   1.183 -http://unixwiz.net/techtips/win32-callconv-asm.html
   1.184 -To do this:
   1.185 -push the parameters onto the stack, right most first, working backwards to
   1.186 - the left.
   1.187 -Then perform call instr, which pushes return addr onto stack.
   1.188 -Then callee first pushes the frame pointer, %EBP followed by placing the
   1.189 -then-current value of stack pointer into %EBP
   1.190 -push ebp
   1.191 -mov  ebp, esp    // ebp « esp
   1.192 -
   1.193 -Once %ebp has been changed, it can now refer directly to the function's
   1.194 - arguments as 8(%ebp), 12(%ebp). Note that 0(%ebp) is the old base pointer
   1.195 - and 4(%ebp) is the old instruction pointer.
   1.196 -
   1.197 -Then callee pushes regs it will use then adds to stack pointer the size of
   1.198 - its local vars.
   1.199 -
   1.200 -Stack in callee looks like this:
   1.201 -16(%ebp)	 - third function parameter
   1.202 -12(%ebp)	 - second function parameter
   1.203 -8(%ebp)	 - first function parameter
   1.204 -4(%ebp)	 - old %EIP (the function's "return address")
   1.205 -----------^^ State seen at first instr of callee ^^-----------
   1.206 -0(%ebp)	- old %EBP (previous function's base pointer)
   1.207 --4(%ebp)	 - save of EAX, the only reg used in function
   1.208 --8(%ebp)	 - first local variable
   1.209 --12(%ebp)	 - second local variable
   1.210 --16(%ebp)	 - third local variable
   1.211 -
   1.212 -
   1.213 ---3
   1.214 -It might be just as simple as two mov instrs, one for %ESP, one for %EBP..
   1.215 - the stack and frame pointer regs
   1.216 +
   1.217 
   1.218 +From e-mail to Albert, on design of app-virt-procr to core-loop animation
   1.219 
   1.220 +switch and back.
   1.221 
   1.222 +
   1.223 
   1.224 +====================
   1.225 
   1.226 +General warnings about this code:
   1.227 
   1.228 +It only compiles in GCC 4.x  (label addr and computed goto)
   1.229 
   1.230 +Has assembly for x86  32bit
   1.231 
   1.232 +
   1.233 
   1.234 +
   1.235 
   1.236 +====================
   1.237 
   1.238 +AVProcr data-struc has: stack-ptr, jump-ptr, data-ptr, slotNum, coreloop-ptr
   1.239 
   1.240 + and semantic-custom-ptr
   1.241 
   1.242 +
   1.243 
   1.244 +The VMS Creator: takes ptr to function and ptr to initial data
   1.245 
   1.246 +-- creates a new AVProcr struc
   1.247 
   1.248 +-- sets the jmp-ptr field to the ptr-to-function passed in
   1.249 
   1.250 +-- sets the data-ptr to ptr to initial data passed in
   1.251 
   1.252 +-- if this is for a suspendable virt  processor, then create a stack and set
   1.253 
   1.254 +   the stack-ptr
   1.255 
   1.256 +
   1.257 
   1.258 +VMS__create_procr( AVProcrFnPtr fnPtr, void *initialData )
   1.259 
   1.260 +{
   1.261 
   1.262 +AVProcr   newPr = malloc( sizeof(AVProcr) );
   1.263 
   1.264 +newPr->jmpPtr = fnPtr;
   1.265 
   1.266 +newPr->coreLoopDonePt = &CoreLoopDonePt; //label is in coreLoop
   1.267 
   1.268 +newPr->data = initialData;
   1.269 
   1.270 +newPr->stackPtr = createNewStack();
   1.271 
   1.272 +return newPr;
   1.273 
   1.274 +}
   1.275 
   1.276 +
   1.277 
   1.278 +The semantic layer can then add its own state in the cusom-ptr field
   1.279 
   1.280 +
   1.281 
   1.282 +The Scheduler plug-in:
   1.283 
   1.284 +-- Sets slave-ptr in AVProcr, and points the slave to AVProcr
   1.285 
   1.286 +-- if non-suspendable, sets the AVProcr's stack-ptr to the slave's stack-ptr
   1.287 
   1.288 +
   1.289 
   1.290 +MasterLoop:
   1.291 
   1.292 +-- puts AVProcr structures onto the workQ
   1.293 
   1.294 +
   1.295 
   1.296 +CoreLoop:
   1.297 
   1.298 +-- gets stack-ptr out of AVProcr and sets the core's stack-ptr to that
   1.299 
   1.300 +-- gets data-ptr out of AVProcr and puts it into reg GCC uses for that param
   1.301 
   1.302 +-- puts AVProcr's addr into reg GCC uses for the AVProcr-pointer param
   1.303 
   1.304 +-- jumps to the addr in AVProcr's jmp-ptr field
   1.305 
   1.306 +CoreLoop()
   1.307 
   1.308 +{ while( FOREVER )
   1.309 
   1.310 + { nextPr = readQ( workQ );  //workQ is static (global) var declared volatile
   1.311 
   1.312 +   <dataPtr-param-register>       = nextPr->data;
   1.313 
   1.314 +   <AVProcrPtr-param-register> = nextPr;
   1.315 
   1.316 +   <stack-pointer register>          = nextPr->stackPtr;
   1.317 
   1.318 +   jmp nextPr->jmpPtr;
   1.319 
   1.320 +CoreLoopDonePt:   //label's addr put into AVProcr when create new one
   1.321 
   1.322 + }
   1.323 
   1.324 +}
   1.325 
   1.326 +(Note, for suspendable processors coming back from suspension, there is no
   1.327 
   1.328 + need to fill the parameter registers -- they will be discarded)
   1.329 
   1.330 +
   1.331 
   1.332 +Suspend an application-level virtual processor:
   1.333 
   1.334 +VMS__AVPSuspend( AVProcr *pr )
   1.335 
   1.336 +{
   1.337 
   1.338 +pr->jmpPtr = &ResumePt;  //label defined a few lines below
   1.339 
   1.340 +pr->slave->doneFlag = TRUE;
   1.341 
   1.342 +pr->stackPtr = <current SP reg value>;
   1.343 
   1.344 +jmp pr->coreLoopDonePt;
   1.345 
   1.346 +ResumePt: return;
   1.347 
   1.348 +}
   1.349 
   1.350 +
   1.351 
   1.352 +This works because the core loop will have switched back to this stack
   1.353 
   1.354 + before jumping to ResumePt..    also, the core loop never modifies the
   1.355 
   1.356 + stack pointer, it simply switches to whatever stack pointer is in the
   1.357 
   1.358 + next AVProcr it gets off the workQ.
   1.359 
   1.360 +
   1.361 
   1.362 +
   1.363 
   1.364 +
   1.365 
   1.366 +=============================================================================
   1.367 
   1.368 +As it is now, there's only one major unknown about GCC (first thing below
   1.369 
   1.370 +  the line),  and there are a few restrictions, the most intrusive being
   1.371 
   1.372 +  that the functions the application gives to the semantic layer have a
   1.373 
   1.374 +  pre-defined prototype -- return nothing, take a pointer to initial data
   1.375 
   1.376 +  and a pointer to an AVProcr struc, which they're not allowed to modify
   1.377 
   1.378 +  -- only pass it to semantic-lib calls.
   1.379 
   1.380 +
   1.381 
   1.382 +So, here are the assumptions, restrictions, and so forth:
   1.383 
   1.384 +===========================
   1.385 
   1.386 +Major assumption:  that GCC will do the following the same way every time:
   1.387 
   1.388 +  say the application defines a function that fits this typedef:
   1.389 
   1.390 +typedef void (*AVProcrFnPtr)  ( void *, AVProcr * );
   1.391 
   1.392 +
   1.393 
   1.394 +and let's say somewhere in the code they do this:
   1.395 
   1.396 +AVProcrFnPtr   fnPtr = &someFunc;
   1.397 
   1.398 +
   1.399 
   1.400 +then they do this:
   1.401 
   1.402 +(*fnPtr)( dataPtr, animatingVirtProcrPtr );
   1.403 
   1.404 +
   1.405 
   1.406 +Can the registers that GCC uses to pass the two pointers be predicted?
   1.407 
   1.408 + Will they always be the same registers, in every program that has the
   1.409 
   1.410 + same typedef?
   1.411 
   1.412 +If that typedef fixes, guaranteed, the registers (on x86) that GCC will use
   1.413 
   1.414 + to send the two pointers, then the rest of this solution works.
   1.415 
   1.416 +
   1.417 
   1.418 +Change in model: Instead of a virtual processor whose execution trace is
   1.419 
   1.420 + divided into work-units, replacing that with the pattern that a virtual
   1.421 
   1.422 + processor is suspended.  Which means, no more "work unit" data structure
   1.423 
   1.424 + -- instead, it's now an "Application Virtual Processor" structure
   1.425 
   1.426 + -- AVProcr -- which is given directly to the application function!
   1.427 
   1.428 +
   1.429 
   1.430 +   -- You were right, don't need slaves to be virtual processors, only need
   1.431 
   1.432 +      "scheduling buckets" -- just a way to keep track of things..
   1.433 
   1.434 +
   1.435 
   1.436 +Restrictions:
   1.437 
   1.438 +-- the  "virtual entities"  created by the semantic layer must be virtual
   1.439 
   1.440 +   processors, created with a function-to-execute and initial data -- the
   1.441 
   1.442 +   function is restricted to return nothing and only take a pointer to the
   1.443 
   1.444 +   initial data plus a pointer to an AVProcr structure, which represents
   1.445 
   1.446 +   "self", the virtual processor created.  (This is the interface I showed
   1.447 
   1.448 +   you for "Hello World" semantic layer).
   1.449 
   1.450 +What this means for synchronous dataflow, is that the nodes in the graph
   1.451 
   1.452 +  are virtual processors that in turn spawn a new virtual processor for
   1.453 
   1.454 +  every "firing" of the node.  This should be fine because the function
   1.455 
   1.456 +  that the node itself is created with is a "canned" function that is part
   1.457 
   1.458 +  of the semantic layer -- the function that is spawned is the user-provided
   1.459 
   1.460 +  function.  The restriction only means that the values from the inputs to
   1.461 
   1.462 +  the node are packaged as the "initial data" given to the spawned virtual
   1.463 
   1.464 +  processor -- so the user-function has to cast a void * to the
   1.465 
   1.466 +  semantic-layer-defined structure by which it gets the inputs to the node.
   1.467 
   1.468 +
   1.469 
   1.470 +-- Second restriction is that the semantic layer has to use VMS supplied
   1.471 
   1.472 +   stuff -- for example, the data structure that represents the
   1.473 
   1.474 +   application-level virtual processor is defined in VMS, and the semantic
   1.475 
   1.476 +   layer has to call a VMS function in order to suspend a virtual processor.
   1.477 
   1.478 +
   1.479 
   1.480 +-- Third restriction is that the application code never do anything with
   1.481 
   1.482 +   the AVProcr structure except pass it to semantic-layer lib calls.
   1.483 
   1.484 +
   1.485 
   1.486 +-- Fourth restriction is that every virtual processor must call a
   1.487 
   1.488 +   "dissipate" function as its last act -- the user-supplied
   1.489 
   1.490 +   virtual-processor function can't just end -- it has to call
   1.491 
   1.492 +   SemLib__dissipate( AVProcr ) before the closing brace.. and after the
   1.493 
   1.494 +   semantic layer is done cleaning up its own data, it has to in turn call
   1.495 
   1.496 +   VMS__disspate( AVProcr ).
   1.497 
   1.498 +
   1.499 
   1.500 +-- For performance reasons, I think I want to have two different kinds of
   1.501 
   1.502 +   app-virtual processor -- suspendable ones and non-suspendable -- where
   1.503 
   1.504 +   non-suspendable are not allowed to perform any communication with other
   1.505 
   1.506 +   virtual processors, except at birth and death.  Suspendable ones, of
   1.507 
   1.508 +   course can perform communications, create other processors, and so forth
   1.509 
   1.510 +   -- all of which cause it to suspend.
   1.511 
   1.512 +The performance difference is that I need a separate stack for each
   1.513 
   1.514 +  suspendable, but non-suspendable can re-use a fixed number of stacks
   1.515 
   1.516 +  (one for each slave).
   1.517 
   1.518 +
   1.519 
   1.520 +
   1.521 
   1.522 +==================== May 29
   1.523 
   1.524 +
   1.525 
   1.526 +Qs:
   1.527 
   1.528 +--1 how to safely jump between virt processor's trace and coreloop
   1.529 
   1.530 +--2 how to set up __cdecl style stack + frame for just-born virtual processor
   1.531 
   1.532 +--3 how to switch stack-pointers + frame-pointers
   1.533 
   1.534 +
   1.535 
   1.536 +
   1.537 
   1.538 +--1:
   1.539 
   1.540 +Not sure if GCC's computed goto is safe, because modify the stack pointer
   1.541 
   1.542 +without GCC's knowledge -- although, don't use the stack in the coreloop
   1.543 
   1.544 +segment, so, actually, that should be safe!
   1.545 
   1.546 +
   1.547 
   1.548 +So, GCC has its own special C extensions, one of which gets address of label:
   1.549 
   1.550 +
   1.551 
   1.552 +void *labelAddr;
   1.553 
   1.554 +labelAddr = &&label;
   1.555 
   1.556 +goto *labelAddr;
   1.557 
   1.558 +
   1.559 
   1.560 +--2
   1.561 
   1.562 +In CoreLoop, will check whether VirtProc just born, or was suspended.
   1.563 
   1.564 +If just born, do bit of code that sets up the virtual processor's stack
   1.565 
   1.566 +and frame according to the __cdecl convention for the standard virt proc
   1.567 
   1.568 +fn typedef -- save the pointer to data and pointer to virt proc struc into
   1.569 
   1.570 +correct places in the frame
   1.571 
   1.572 +   __cdecl says, according to:
   1.573 
   1.574 +http://unixwiz.net/techtips/win32-callconv-asm.html
   1.575 
   1.576 +To do this:
   1.577 
   1.578 +push the parameters onto the stack, right most first, working backwards to
   1.579 
   1.580 + the left.
   1.581 
   1.582 +Then perform call instr, which pushes return addr onto stack.
   1.583 
   1.584 +Then callee first pushes the frame pointer, %EBP followed by placing the
   1.585 
   1.586 +then-current value of stack pointer into %EBP
   1.587 
   1.588 +push ebp
   1.589 
   1.590 +mov  ebp, esp    // ebp « esp
   1.591 
   1.592 +
   1.593 
   1.594 +Once %ebp has been changed, it can now refer directly to the function's
   1.595 
   1.596 + arguments as 8(%ebp), 12(%ebp). Note that 0(%ebp) is the old base pointer
   1.597 
   1.598 + and 4(%ebp) is the old instruction pointer.
   1.599 
   1.600 +
   1.601 
   1.602 +Then callee pushes regs it will use then adds to stack pointer the size of
   1.603 
   1.604 + its local vars.
   1.605 
   1.606 +
   1.607 
   1.608 +Stack in callee looks like this:
   1.609 
   1.610 +16(%ebp)	 - third function parameter
   1.611 
   1.612 +12(%ebp)	 - second function parameter
   1.613 
   1.614 +8(%ebp)	 - first function parameter
   1.615 
   1.616 +4(%ebp)	 - old %EIP (the function's "return address")
   1.617 
   1.618 +----------^^ State seen at first instr of callee ^^-----------
   1.619 
   1.620 +0(%ebp)	- old %EBP (previous function's base pointer)
   1.621 
   1.622 +-4(%ebp)	 - save of EAX, the only reg used in function
   1.623 
   1.624 +-8(%ebp)	 - first local variable
   1.625 
   1.626 +-12(%ebp)	 - second local variable
   1.627 
   1.628 +-16(%ebp)	 - third local variable
   1.629 
   1.630 +
   1.631 
   1.632 +
   1.633 
   1.634 +--3
   1.635 
   1.636 +It might be just as simple as two mov instrs, one for %ESP, one for %EBP..
   1.637 
   1.638 + the stack and frame pointer regs
   1.639 
     2.1 --- a/SSR.h	Sun Nov 14 11:09:18 2010 -0800
     2.2 +++ b/SSR.h	Thu May 12 14:25:03 2011 +0200
     2.3 @@ -35,6 +35,8 @@
     2.4   }
     2.5  SSRTrans;
     2.6  
     2.7 +/*WARNING: assembly hard-codes position of endInstrAddr as first field
     2.8 + */
     2.9  typedef struct
    2.10   {
    2.11     void           *endInstrAddr;
     3.1 --- a/SSR_PluginFns.c	Sun Nov 14 11:09:18 2010 -0800
     3.2 +++ b/SSR_PluginFns.c	Thu May 12 14:25:03 2011 +0200
     3.3 @@ -64,13 +64,6 @@
     3.4   { SSRSemEnv *semEnv;
     3.5     VMSReqst    *req;
     3.6     
     3.7 -   //============================= MEASUREMENT STUFF ========================
     3.8 -   #ifdef MEAS__TIME_PLUGIN
     3.9 -   int32 startStamp, endStamp;
    3.10 -   saveLowTimeStampCountInto( startStamp );
    3.11 -   #endif
    3.12 -   //========================================================================
    3.13 -
    3.14     semEnv = (SSRSemEnv *)_semEnv;
    3.15  
    3.16     req    = VMS__take_next_request_out_of( requestingPr );
    3.17 @@ -94,13 +87,6 @@
    3.18        req = VMS__take_next_request_out_of( requestingPr );
    3.19      } //while( req != NULL )
    3.20  
    3.21 -   //============================= MEASUREMENT STUFF ========================
    3.22 -   #ifdef MEAS__TIME_PLUGIN
    3.23 -   saveLowTimeStampCountInto( endStamp );
    3.24 -   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->pluginLowTimeHist );
    3.25 -   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->pluginHighTimeHist );
    3.26 -   #endif
    3.27 -   //========================================================================
    3.28   }
    3.29  
    3.30  
     4.1 --- a/SSR_Request_Handlers.h	Sun Nov 14 11:09:18 2010 -0800
     4.2 +++ b/SSR_Request_Handlers.h	Thu May 12 14:25:03 2011 +0200
     4.3 @@ -14,41 +14,41 @@
     4.4  /*This header defines everything specific to the SSR semantic plug-in
     4.5   */
     4.6  
     4.7 -void
     4.8 +inline void
     4.9  handleSendType( SSRSemReq *semReq, SSRSemEnv *semEnv);
    4.10 -void
    4.11 +inline void
    4.12  handleSendFromTo( SSRSemReq *semReq, SSRSemEnv *semEnv);
    4.13 -void
    4.14 +inline void
    4.15  handleReceiveAny( SSRSemReq *semReq, SSRSemEnv *semEnv);
    4.16 -void
    4.17 +inline void
    4.18  handleReceiveType( SSRSemReq *semReq, SSRSemEnv *semEnv);
    4.19 -void
    4.20 +inline void
    4.21  handleReceiveFromTo( SSRSemReq *semReq, SSRSemEnv *semEnv);
    4.22 -void
    4.23 +inline void
    4.24  handleTransferTo( SSRSemReq *semReq, SSRSemEnv *semEnv);
    4.25 -void
    4.26 +inline void
    4.27  handleTransferOut( SSRSemReq *semReq, SSRSemEnv *semEnv);
    4.28 -void
    4.29 +inline void
    4.30  handleMalloc( SSRSemReq *semReq, VirtProcr *requestingPr, SSRSemEnv *semEnv);
    4.31 -void
    4.32 +inline void
    4.33  handleFree( SSRSemReq *semReq, VirtProcr *requestingPr, SSRSemEnv *semEnv );
    4.34 -void
    4.35 +inline void
    4.36  handleTransEnd(SSRSemReq *semReq, VirtProcr *requestingPr, SSRSemEnv*semEnv);
    4.37 -void
    4.38 +inline void
    4.39  handleTransStart( SSRSemReq *semReq, VirtProcr *requestingPr,
    4.40                    SSRSemEnv *semEnv );
    4.41 -void
    4.42 +inline void
    4.43  handleAtomic( SSRSemReq *semReq, VirtProcr *requestingPr, SSRSemEnv *semEnv);
    4.44 -void
    4.45 +inline void
    4.46  handleStartFnSingleton( SSRSemReq *semReq, VirtProcr *reqstingPr,
    4.47                        SSRSemEnv *semEnv );
    4.48 -void
    4.49 +inline void
    4.50  handleEndFnSingleton( SSRSemReq *semReq, VirtProcr *requestingPr,
    4.51                      SSRSemEnv *semEnv );
    4.52 -void
    4.53 +inline void
    4.54  handleStartDataSingleton( SSRSemReq *semReq, VirtProcr *reqstingPr,
    4.55                        SSRSemEnv *semEnv );
    4.56 -void
    4.57 +inline void
    4.58  handleEndDataSingleton( SSRSemReq *semReq, VirtProcr *requestingPr,
    4.59                      SSRSemEnv *semEnv );
    4.60  
     5.1 --- a/SSR_lib.c	Sun Nov 14 11:09:18 2010 -0800
     5.2 +++ b/SSR_lib.c	Thu May 12 14:25:03 2011 +0200
     5.3 @@ -228,11 +228,11 @@
     5.4     //semanticEnv->transactionStrucs = makeDynArrayInfo( );
     5.5     for( i = 0; i < NUM_STRUCS_IN_SEM_ENV; i++ )
     5.6      {
     5.7 +      semanticEnv->fnSingletons[i].endInstrAddr      = NULL;
     5.8        semanticEnv->fnSingletons[i].hasBeenStarted    = FALSE;
     5.9        semanticEnv->fnSingletons[i].hasFinished       = FALSE;
    5.10 -      semanticEnv->fnSingletons[i].endInstrAddr      = NULL;
    5.11        semanticEnv->fnSingletons[i].waitQ             = makeVMSPrivQ();
    5.12 -      semanticEnv->transactionStrucs[i].waitingVPQ = makeVMSPrivQ();
    5.13 +      semanticEnv->transactionStrucs[i].waitingVPQ   = makeVMSPrivQ();
    5.14      }
    5.15   }
    5.16  
    5.17 @@ -480,7 +480,7 @@
    5.18  
    5.19  
    5.20  //===========================================================================
    5.21 -
    5.22 +//
    5.23  /*A function singleton is a function whose body executes exactly once, on a
    5.24   * single core, no matter how many times the fuction is called and no
    5.25   * matter how many cores or the timing of cores calling it.
    5.26 @@ -524,8 +524,9 @@
    5.27   {
    5.28     SSRSemReq  reqData;
    5.29  
    5.30 -      //
    5.31 -   reqData.reqType       = singleton_data_start;
    5.32 +   if( *singletonAddr && (*singletonAddr)->hasFinished ) goto JmpToEndSingleton;
    5.33 +   
    5.34 +   reqData.reqType          = singleton_data_start;
    5.35     reqData.singletonPtrAddr = singletonAddr;
    5.36  
    5.37     VMS__send_sem_request( &reqData, animPr );
    5.38 @@ -533,6 +534,7 @@
    5.39      {    //Assembly code changes the return addr on the stack to the one
    5.40           // saved into the singleton by the end-singleton-fn
    5.41           //The return addr is at 0x4(%%ebp)
    5.42 +      JmpToEndSingleton:
    5.43        asm volatile("movl        %0,      %%eax;   \
    5.44                      movl    (%%eax),     %%ebx;   \
    5.45                      movl    (%%ebx),     %%eax;   \