changeset 55:6dd906e3c9a4

added .hgeol to handle line-ending issues
author Me@portablequad
date Tue, 07 Feb 2012 13:52:44 -0800
parents 53825c49db83
children
files .hgeol DESIGN_NOTES.txt
diffstat 2 files changed, 226 insertions(+), 196 deletions(-) [+]
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/.hgeol	Tue Feb 07 13:52:44 2012 -0800
     1.3 @@ -0,0 +1,14 @@
     1.4 +
     1.5 +[patterns]
     1.6 +**.py = native
     1.7 +**.txt = native
     1.8 +**.c = native
     1.9 +**.h = native
    1.10 +**.cpp = native
    1.11 +**.java = native
    1.12 +**.class = bin
    1.13 +**.jar = bin
    1.14 +**.sh = native
    1.15 +**.pl = native
    1.16 +**.jpg = bin
    1.17 +**.gif = bin
     2.1 --- a/DESIGN_NOTES.txt	Tue Jan 31 18:34:07 2012 +0100
     2.2 +++ b/DESIGN_NOTES.txt	Tue Feb 07 13:52:44 2012 -0800
     2.3 @@ -1,212 +1,212 @@
     2.4 -
     2.5 
     2.6 -From e-mail to Albert, on design of app-virt-procr to core-loop animation
     2.7 
     2.8 -switch and back.
     2.9 
    2.10 -
    2.11 
    2.12 -====================
    2.13 
    2.14 -General warnings about this code:
    2.15 
    2.16 -It only compiles in GCC 4.x  (label addr and computed goto)
    2.17 
    2.18 -Has assembly for x86  32bit
    2.19 
    2.20 -
    2.21 
    2.22 -
    2.23 
    2.24 -====================
    2.25 
    2.26 -AVProcr data-struc has: stack-ptr, jump-ptr, data-ptr, slotNum, coreloop-ptr
    2.27 
    2.28 - and semantic-custom-ptr
    2.29 
    2.30 -
    2.31 
    2.32 -The VMS Creator: takes ptr to function and ptr to initial data
    2.33 
    2.34 --- creates a new AVProcr struc
    2.35 
    2.36 --- sets the jmp-ptr field to the ptr-to-function passed in
    2.37 
    2.38 --- sets the data-ptr to ptr to initial data passed in
    2.39 
    2.40 --- if this is for a suspendable virt  processor, then create a stack and set
    2.41 
    2.42 -   the stack-ptr
    2.43 
    2.44 -
    2.45 
    2.46 -VMS__create_procr( AVProcrFnPtr fnPtr, void *initialData )
    2.47 
    2.48 -{
    2.49 
    2.50 -AVProcr   newPr = malloc( sizeof(AVProcr) );
    2.51 
    2.52 -newPr->jmpPtr = fnPtr;
    2.53 
    2.54 -newPr->coreLoopDonePt = &CoreLoopDonePt; //label is in coreLoop
    2.55 
    2.56 -newPr->data = initialData;
    2.57 
    2.58 -newPr->stackPtr = createNewStack();
    2.59 
    2.60 -return newPr;
    2.61 
    2.62 -}
    2.63 
    2.64 -
    2.65 
    2.66 -The semantic layer can then add its own state in the cusom-ptr field
    2.67 
    2.68 -
    2.69 
    2.70 -The Scheduler plug-in:
    2.71 
    2.72 --- Sets slave-ptr in AVProcr, and points the slave to AVProcr
    2.73 
    2.74 --- if non-suspendable, sets the AVProcr's stack-ptr to the slave's stack-ptr
    2.75 
    2.76 -
    2.77 
    2.78 -MasterLoop:
    2.79 
    2.80 --- puts AVProcr structures onto the workQ
    2.81 
    2.82 -
    2.83 
    2.84 -CoreLoop:
    2.85 
    2.86 --- gets stack-ptr out of AVProcr and sets the core's stack-ptr to that
    2.87 
    2.88 --- gets data-ptr out of AVProcr and puts it into reg GCC uses for that param
    2.89 
    2.90 --- puts AVProcr's addr into reg GCC uses for the AVProcr-pointer param
    2.91 
    2.92 --- jumps to the addr in AVProcr's jmp-ptr field
    2.93 
    2.94 -CoreLoop()
    2.95 
    2.96 -{ while( FOREVER )
    2.97 
    2.98 - { nextPr = readQ( workQ );  //workQ is static (global) var declared volatile
    2.99 
   2.100 -   <dataPtr-param-register>       = nextPr->data;
   2.101 
   2.102 -   <AVProcrPtr-param-register> = nextPr;
   2.103 
   2.104 -   <stack-pointer register>          = nextPr->stackPtr;
   2.105 
   2.106 -   jmp nextPr->jmpPtr;
   2.107 
   2.108 -CoreLoopDonePt:   //label's addr put into AVProcr when create new one
   2.109 
   2.110 - }
   2.111 
   2.112 -}
   2.113 
   2.114 -(Note, for suspendable processors coming back from suspension, there is no
   2.115 
   2.116 - need to fill the parameter registers -- they will be discarded)
   2.117 
   2.118 -
   2.119 
   2.120 -Suspend an application-level virtual processor:
   2.121 
   2.122 -VMS__AVPSuspend( AVProcr *pr )
   2.123 
   2.124 -{
   2.125 
   2.126 -pr->jmpPtr = &ResumePt;  //label defined a few lines below
   2.127 
   2.128 -pr->slave->doneFlag = TRUE;
   2.129 
   2.130 -pr->stackPtr = <current SP reg value>;
   2.131 
   2.132 -jmp pr->coreLoopDonePt;
   2.133 
   2.134 -ResumePt: return;
   2.135 
   2.136 -}
   2.137 
   2.138 -
   2.139 
   2.140 -This works because the core loop will have switched back to this stack
   2.141 
   2.142 - before jumping to ResumePt..    also, the core loop never modifies the
   2.143 
   2.144 - stack pointer, it simply switches to whatever stack pointer is in the
   2.145 
   2.146 - next AVProcr it gets off the workQ.
   2.147 
   2.148 -
   2.149 
   2.150 -
   2.151 
   2.152 -
   2.153 
   2.154 -=============================================================================
   2.155 
   2.156 -As it is now, there's only one major unknown about GCC (first thing below
   2.157 
   2.158 -  the line),  and there are a few restrictions, the most intrusive being
   2.159 
   2.160 -  that the functions the application gives to the semantic layer have a
   2.161 
   2.162 -  pre-defined prototype -- return nothing, take a pointer to initial data
   2.163 
   2.164 -  and a pointer to an AVProcr struc, which they're not allowed to modify
   2.165 
   2.166 -  -- only pass it to semantic-lib calls.
   2.167 
   2.168 -
   2.169 
   2.170 -So, here are the assumptions, restrictions, and so forth:
   2.171 
   2.172 -===========================
   2.173 
   2.174 -Major assumption:  that GCC will do the following the same way every time:
   2.175 
   2.176 -  say the application defines a function that fits this typedef:
   2.177 
   2.178 -typedef void (*AVProcrFnPtr)  ( void *, AVProcr * );
   2.179 
   2.180 -
   2.181 
   2.182 -and let's say somewhere in the code they do this:
   2.183 
   2.184 -AVProcrFnPtr   fnPtr = &someFunc;
   2.185 
   2.186 -
   2.187 
   2.188 -then they do this:
   2.189 
   2.190 -(*fnPtr)( dataPtr, animatingVirtProcrPtr );
   2.191 
   2.192 -
   2.193 
   2.194 -Can the registers that GCC uses to pass the two pointers be predicted?
   2.195 
   2.196 - Will they always be the same registers, in every program that has the
   2.197 
   2.198 - same typedef?
   2.199 
   2.200 -If that typedef fixes, guaranteed, the registers (on x86) that GCC will use
   2.201 
   2.202 - to send the two pointers, then the rest of this solution works.
   2.203 
   2.204 -
   2.205 
   2.206 -Change in model: Instead of a virtual processor whose execution trace is
   2.207 
   2.208 - divided into work-units, replacing that with the pattern that a virtual
   2.209 
   2.210 - processor is suspended.  Which means, no more "work unit" data structure
   2.211 
   2.212 - -- instead, it's now an "Application Virtual Processor" structure
   2.213 
   2.214 - -- AVProcr -- which is given directly to the application function!
   2.215 
   2.216 -
   2.217 
   2.218 -   -- You were right, don't need slaves to be virtual processors, only need
   2.219 
   2.220 -      "scheduling buckets" -- just a way to keep track of things..
   2.221 
   2.222 -
   2.223 
   2.224 -Restrictions:
   2.225 
   2.226 --- the  "virtual entities"  created by the semantic layer must be virtual
   2.227 
   2.228 -   processors, created with a function-to-execute and initial data -- the
   2.229 
   2.230 -   function is restricted to return nothing and only take a pointer to the
   2.231 
   2.232 -   initial data plus a pointer to an AVProcr structure, which represents
   2.233 
   2.234 -   "self", the virtual processor created.  (This is the interface I showed
   2.235 
   2.236 -   you for "Hello World" semantic layer).
   2.237 
   2.238 -What this means for synchronous dataflow, is that the nodes in the graph
   2.239 
   2.240 -  are virtual processors that in turn spawn a new virtual processor for
   2.241 
   2.242 -  every "firing" of the node.  This should be fine because the function
   2.243 
   2.244 -  that the node itself is created with is a "canned" function that is part
   2.245 
   2.246 -  of the semantic layer -- the function that is spawned is the user-provided
   2.247 
   2.248 -  function.  The restriction only means that the values from the inputs to
   2.249 
   2.250 -  the node are packaged as the "initial data" given to the spawned virtual
   2.251 
   2.252 -  processor -- so the user-function has to cast a void * to the
   2.253 
   2.254 -  semantic-layer-defined structure by which it gets the inputs to the node.
   2.255 
   2.256 -
   2.257 
   2.258 --- Second restriction is that the semantic layer has to use VMS supplied
   2.259 
   2.260 -   stuff -- for example, the data structure that represents the
   2.261 
   2.262 -   application-level virtual processor is defined in VMS, and the semantic
   2.263 
   2.264 -   layer has to call a VMS function in order to suspend a virtual processor.
   2.265 
   2.266 -
   2.267 
   2.268 --- Third restriction is that the application code never do anything with
   2.269 
   2.270 -   the AVProcr structure except pass it to semantic-layer lib calls.
   2.271 
   2.272 -
   2.273 
   2.274 --- Fourth restriction is that every virtual processor must call a
   2.275 
   2.276 -   "dissipate" function as its last act -- the user-supplied
   2.277 
   2.278 -   virtual-processor function can't just end -- it has to call
   2.279 
   2.280 -   SemLib__dissipate( AVProcr ) before the closing brace.. and after the
   2.281 
   2.282 -   semantic layer is done cleaning up its own data, it has to in turn call
   2.283 
   2.284 -   VMS__disspate( AVProcr ).
   2.285 
   2.286 -
   2.287 
   2.288 --- For performance reasons, I think I want to have two different kinds of
   2.289 
   2.290 -   app-virtual processor -- suspendable ones and non-suspendable -- where
   2.291 
   2.292 -   non-suspendable are not allowed to perform any communication with other
   2.293 
   2.294 -   virtual processors, except at birth and death.  Suspendable ones, of
   2.295 
   2.296 -   course can perform communications, create other processors, and so forth
   2.297 
   2.298 -   -- all of which cause it to suspend.
   2.299 
   2.300 -The performance difference is that I need a separate stack for each
   2.301 
   2.302 -  suspendable, but non-suspendable can re-use a fixed number of stacks
   2.303 
   2.304 -  (one for each slave).
   2.305 
   2.306 -
   2.307 
   2.308 -
   2.309 
   2.310 -==================== May 29
   2.311 
   2.312 -
   2.313 
   2.314 -Qs:
   2.315 
   2.316 ---1 how to safely jump between virt processor's trace and coreloop
   2.317 
   2.318 ---2 how to set up __cdecl style stack + frame for just-born virtual processor
   2.319 
   2.320 ---3 how to switch stack-pointers + frame-pointers
   2.321 
   2.322 -
   2.323 
   2.324 -
   2.325 
   2.326 ---1:
   2.327 
   2.328 -Not sure if GCC's computed goto is safe, because modify the stack pointer
   2.329 
   2.330 -without GCC's knowledge -- although, don't use the stack in the coreloop
   2.331 
   2.332 -segment, so, actually, that should be safe!
   2.333 
   2.334 -
   2.335 
   2.336 -So, GCC has its own special C extensions, one of which gets address of label:
   2.337 
   2.338 -
   2.339 
   2.340 -void *labelAddr;
   2.341 
   2.342 -labelAddr = &&label;
   2.343 
   2.344 -goto *labelAddr;
   2.345 
   2.346 -
   2.347 
   2.348 ---2
   2.349 
   2.350 -In CoreLoop, will check whether VirtProc just born, or was suspended.
   2.351 
   2.352 -If just born, do bit of code that sets up the virtual processor's stack
   2.353 
   2.354 -and frame according to the __cdecl convention for the standard virt proc
   2.355 
   2.356 -fn typedef -- save the pointer to data and pointer to virt proc struc into
   2.357 
   2.358 -correct places in the frame
   2.359 
   2.360 -   __cdecl says, according to:
   2.361 
   2.362 -http://unixwiz.net/techtips/win32-callconv-asm.html
   2.363 
   2.364 -To do this:
   2.365 
   2.366 -push the parameters onto the stack, right most first, working backwards to
   2.367 
   2.368 - the left.
   2.369 
   2.370 -Then perform call instr, which pushes return addr onto stack.
   2.371 
   2.372 -Then callee first pushes the frame pointer, %EBP followed by placing the
   2.373 
   2.374 -then-current value of stack pointer into %EBP
   2.375 
   2.376 -push ebp
   2.377 
   2.378 -mov  ebp, esp    // ebp « esp
   2.379 
   2.380 -
   2.381 
   2.382 -Once %ebp has been changed, it can now refer directly to the function's
   2.383 
   2.384 - arguments as 8(%ebp), 12(%ebp). Note that 0(%ebp) is the old base pointer
   2.385 
   2.386 - and 4(%ebp) is the old instruction pointer.
   2.387 
   2.388 -
   2.389 
   2.390 -Then callee pushes regs it will use then adds to stack pointer the size of
   2.391 
   2.392 - its local vars.
   2.393 
   2.394 -
   2.395 
   2.396 -Stack in callee looks like this:
   2.397 
   2.398 -16(%ebp)	 - third function parameter
   2.399 
   2.400 -12(%ebp)	 - second function parameter
   2.401 
   2.402 -8(%ebp)	 - first function parameter
   2.403 
   2.404 -4(%ebp)	 - old %EIP (the function's "return address")
   2.405 
   2.406 -----------^^ State seen at first instr of callee ^^-----------
   2.407 
   2.408 -0(%ebp)	- old %EBP (previous function's base pointer)
   2.409 
   2.410 --4(%ebp)	 - save of EAX, the only reg used in function
   2.411 
   2.412 --8(%ebp)	 - first local variable
   2.413 
   2.414 --12(%ebp)	 - second local variable
   2.415 
   2.416 --16(%ebp)	 - third local variable
   2.417 
   2.418 -
   2.419 
   2.420 -
   2.421 
   2.422 ---3
   2.423 
   2.424 -It might be just as simple as two mov instrs, one for %ESP, one for %EBP..
   2.425 
   2.426 - the stack and frame pointer regs
   2.427 
   2.428 +
   2.429 +From e-mail to Albert, on design of app-virt-procr to core-loop animation
   2.430 +switch and back.
   2.431 +
   2.432 +====================
   2.433 +General warnings about this code:
   2.434 +It only compiles in GCC 4.x  (label addr and computed goto)
   2.435 +Has assembly for x86  32bit
   2.436 +
   2.437 +
   2.438 +====================
   2.439 +AVProcr data-struc has: stack-ptr, jump-ptr, data-ptr, slotNum, coreloop-ptr
   2.440 + and semantic-custom-ptr
   2.441 +
   2.442 +The VMS Creator: takes ptr to function and ptr to initial data
   2.443 +-- creates a new AVProcr struc
   2.444 +-- sets the jmp-ptr field to the ptr-to-function passed in
   2.445 +-- sets the data-ptr to ptr to initial data passed in
   2.446 +-- if this is for a suspendable virt  processor, then create a stack and set
   2.447 +   the stack-ptr
   2.448 +
   2.449 +VMS__create_procr( AVProcrFnPtr fnPtr, void *initialData )
   2.450 +{
   2.451 +AVProcr   newPr = malloc( sizeof(AVProcr) );
   2.452 +newPr->jmpPtr = fnPtr;
   2.453 +newPr->coreLoopDonePt = &CoreLoopDonePt; //label is in coreLoop
   2.454 +newPr->data = initialData;
   2.455 +newPr->stackPtr = createNewStack();
   2.456 +return newPr;
   2.457 +}
   2.458 +
   2.459 +The semantic layer can then add its own state in the cusom-ptr field
   2.460 +
   2.461 +The Scheduler plug-in:
   2.462 +-- Sets slave-ptr in AVProcr, and points the slave to AVProcr
   2.463 +-- if non-suspendable, sets the AVProcr's stack-ptr to the slave's stack-ptr
   2.464 +
   2.465 +MasterLoop:
   2.466 +-- puts AVProcr structures onto the workQ
   2.467 +
   2.468 +CoreLoop:
   2.469 +-- gets stack-ptr out of AVProcr and sets the core's stack-ptr to that
   2.470 +-- gets data-ptr out of AVProcr and puts it into reg GCC uses for that param
   2.471 +-- puts AVProcr's addr into reg GCC uses for the AVProcr-pointer param
   2.472 +-- jumps to the addr in AVProcr's jmp-ptr field
   2.473 +CoreLoop()
   2.474 +{ while( FOREVER )
   2.475 + { nextPr = readQ( workQ );  //workQ is static (global) var declared volatile
   2.476 +   <dataPtr-param-register>       = nextPr->data;
   2.477 +   <AVProcrPtr-param-register> = nextPr;
   2.478 +   <stack-pointer register>          = nextPr->stackPtr;
   2.479 +   jmp nextPr->jmpPtr;
   2.480 +CoreLoopDonePt:   //label's addr put into AVProcr when create new one
   2.481 + }
   2.482 +}
   2.483 +(Note, for suspendable processors coming back from suspension, there is no
   2.484 + need to fill the parameter registers -- they will be discarded)
   2.485 +
   2.486 +Suspend an application-level virtual processor:
   2.487 +VMS__AVPSuspend( AVProcr *pr )
   2.488 +{
   2.489 +pr->jmpPtr = &ResumePt;  //label defined a few lines below
   2.490 +pr->slave->doneFlag = TRUE;
   2.491 +pr->stackPtr = <current SP reg value>;
   2.492 +jmp pr->coreLoopDonePt;
   2.493 +ResumePt: return;
   2.494 +}
   2.495 +
   2.496 +This works because the core loop will have switched back to this stack
   2.497 + before jumping to ResumePt..    also, the core loop never modifies the
   2.498 + stack pointer, it simply switches to whatever stack pointer is in the
   2.499 + next AVProcr it gets off the workQ.
   2.500 +
   2.501 +
   2.502 +
   2.503 +=============================================================================
   2.504 +As it is now, there's only one major unknown about GCC (first thing below
   2.505 +  the line),  and there are a few restrictions, the most intrusive being
   2.506 +  that the functions the application gives to the semantic layer have a
   2.507 +  pre-defined prototype -- return nothing, take a pointer to initial data
   2.508 +  and a pointer to an AVProcr struc, which they're not allowed to modify
   2.509 +  -- only pass it to semantic-lib calls.
   2.510 +
   2.511 +So, here are the assumptions, restrictions, and so forth:
   2.512 +===========================
   2.513 +Major assumption:  that GCC will do the following the same way every time:
   2.514 +  say the application defines a function that fits this typedef:
   2.515 +typedef void (*AVProcrFnPtr)  ( void *, AVProcr * );
   2.516 +
   2.517 +and let's say somewhere in the code they do this:
   2.518 +AVProcrFnPtr   fnPtr = &someFunc;
   2.519 +
   2.520 +then they do this:
   2.521 +(*fnPtr)( dataPtr, animatingVirtProcrPtr );
   2.522 +
   2.523 +Can the registers that GCC uses to pass the two pointers be predicted?
   2.524 + Will they always be the same registers, in every program that has the
   2.525 + same typedef?
   2.526 +If that typedef fixes, guaranteed, the registers (on x86) that GCC will use
   2.527 + to send the two pointers, then the rest of this solution works.
   2.528 +
   2.529 +Change in model: Instead of a virtual processor whose execution trace is
   2.530 + divided into work-units, replacing that with the pattern that a virtual
   2.531 + processor is suspended.  Which means, no more "work unit" data structure
   2.532 + -- instead, it's now an "Application Virtual Processor" structure
   2.533 + -- AVProcr -- which is given directly to the application function!
   2.534 +
   2.535 +   -- You were right, don't need slaves to be virtual processors, only need
   2.536 +      "scheduling buckets" -- just a way to keep track of things..
   2.537 +
   2.538 +Restrictions:
   2.539 +-- the  "virtual entities"  created by the semantic layer must be virtual
   2.540 +   processors, created with a function-to-execute and initial data -- the
   2.541 +   function is restricted to return nothing and only take a pointer to the
   2.542 +   initial data plus a pointer to an AVProcr structure, which represents
   2.543 +   "self", the virtual processor created.  (This is the interface I showed
   2.544 +   you for "Hello World" semantic layer).
   2.545 +What this means for synchronous dataflow, is that the nodes in the graph
   2.546 +  are virtual processors that in turn spawn a new virtual processor for
   2.547 +  every "firing" of the node.  This should be fine because the function
   2.548 +  that the node itself is created with is a "canned" function that is part
   2.549 +  of the semantic layer -- the function that is spawned is the user-provided
   2.550 +  function.  The restriction only means that the values from the inputs to
   2.551 +  the node are packaged as the "initial data" given to the spawned virtual
   2.552 +  processor -- so the user-function has to cast a void * to the
   2.553 +  semantic-layer-defined structure by which it gets the inputs to the node.
   2.554 +
   2.555 +-- Second restriction is that the semantic layer has to use VMS supplied
   2.556 +   stuff -- for example, the data structure that represents the
   2.557 +   application-level virtual processor is defined in VMS, and the semantic
   2.558 +   layer has to call a VMS function in order to suspend a virtual processor.
   2.559 +
   2.560 +-- Third restriction is that the application code never do anything with
   2.561 +   the AVProcr structure except pass it to semantic-layer lib calls.
   2.562 +
   2.563 +-- Fourth restriction is that every virtual processor must call a
   2.564 +   "dissipate" function as its last act -- the user-supplied
   2.565 +   virtual-processor function can't just end -- it has to call
   2.566 +   SemLib__dissipate( AVProcr ) before the closing brace.. and after the
   2.567 +   semantic layer is done cleaning up its own data, it has to in turn call
   2.568 +   VMS__disspate( AVProcr ).
   2.569 +
   2.570 +-- For performance reasons, I think I want to have two different kinds of
   2.571 +   app-virtual processor -- suspendable ones and non-suspendable -- where
   2.572 +   non-suspendable are not allowed to perform any communication with other
   2.573 +   virtual processors, except at birth and death.  Suspendable ones, of
   2.574 +   course can perform communications, create other processors, and so forth
   2.575 +   -- all of which cause it to suspend.
   2.576 +The performance difference is that I need a separate stack for each
   2.577 +  suspendable, but non-suspendable can re-use a fixed number of stacks
   2.578 +  (one for each slave).
   2.579 +
   2.580 +
   2.581 +==================== May 29
   2.582 +
   2.583 +Qs:
   2.584 +--1 how to safely jump between virt processor's trace and coreloop
   2.585 +--2 how to set up __cdecl style stack + frame for just-born virtual processor
   2.586 +--3 how to switch stack-pointers + frame-pointers
   2.587 +
   2.588 +
   2.589 +--1:
   2.590 +Not sure if GCC's computed goto is safe, because modify the stack pointer
   2.591 +without GCC's knowledge -- although, don't use the stack in the coreloop
   2.592 +segment, so, actually, that should be safe!
   2.593 +
   2.594 +So, GCC has its own special C extensions, one of which gets address of label:
   2.595 +
   2.596 +void *labelAddr;
   2.597 +labelAddr = &&label;
   2.598 +goto *labelAddr;
   2.599 +
   2.600 +--2
   2.601 +In CoreLoop, will check whether VirtProc just born, or was suspended.
   2.602 +If just born, do bit of code that sets up the virtual processor's stack
   2.603 +and frame according to the __cdecl convention for the standard virt proc
   2.604 +fn typedef -- save the pointer to data and pointer to virt proc struc into
   2.605 +correct places in the frame
   2.606 +   __cdecl says, according to:
   2.607 +http://unixwiz.net/techtips/win32-callconv-asm.html
   2.608 +To do this:
   2.609 +push the parameters onto the stack, right most first, working backwards to
   2.610 + the left.
   2.611 +Then perform call instr, which pushes return addr onto stack.
   2.612 +Then callee first pushes the frame pointer, %EBP followed by placing the
   2.613 +then-current value of stack pointer into %EBP
   2.614 +push ebp
   2.615 +mov  ebp, esp    // ebp « esp
   2.616 +
   2.617 +Once %ebp has been changed, it can now refer directly to the function's
   2.618 + arguments as 8(%ebp), 12(%ebp). Note that 0(%ebp) is the old base pointer
   2.619 + and 4(%ebp) is the old instruction pointer.
   2.620 +
   2.621 +Then callee pushes regs it will use then adds to stack pointer the size of
   2.622 + its local vars.
   2.623 +
   2.624 +Stack in callee looks like this:
   2.625 +16(%ebp)	 - third function parameter
   2.626 +12(%ebp)	 - second function parameter
   2.627 +8(%ebp)	 - first function parameter
   2.628 +4(%ebp)	 - old %EIP (the function's "return address")
   2.629 +----------^^ State seen at first instr of callee ^^-----------
   2.630 +0(%ebp)	- old %EBP (previous function's base pointer)
   2.631 +-4(%ebp)	 - save of EAX, the only reg used in function
   2.632 +-8(%ebp)	 - first local variable
   2.633 +-12(%ebp)	 - second local variable
   2.634 +-16(%ebp)	 - third local variable
   2.635 +
   2.636 +
   2.637 +--3
   2.638 +It might be just as simple as two mov instrs, one for %ESP, one for %EBP..
   2.639 + the stack and frame pointer regs