PR/PR_Projects/PR__ML__MC_shared__Projects/VReo/VReo__Prod_Cons__ML_MC_shared__Proj

changeset 0:605be757e783

Initial add -- freshly debugged project
author Sean Halle <seanhalle@yahoo.com>
date Wed, 12 Jun 2013 15:48:52 -0700
parents
children 94ba280ac9a6
files .hgeol .hgignore .hgsub Design_Notes__Local_vs_Remote Design_Notes__Simplifying_PR Design_Notes__VReo ToDoList.txt VReo__Prod_Cons__MC_shared__Linux/core __brch__ML_dev src/PR_Implementations/PR_defs__turn_on_and_off.h
diffstat 9 files changed, 822 insertions(+), 0 deletions(-) [+]
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/.hgeol	Wed Jun 12 15:48:52 2013 -0700
     1.3 @@ -0,0 +1,14 @@
     1.4 +
     1.5 +[patterns]
     1.6 +**.py = native
     1.7 +**.txt = native
     1.8 +**.c = native
     1.9 +**.h = native
    1.10 +**.cpp = native
    1.11 +**.java = native
    1.12 +**.class = bin
    1.13 +**.jar = bin
    1.14 +**.sh = native
    1.15 +**.pl = native
    1.16 +**.jpg = bin
    1.17 +**.gif = bin
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/.hgignore	Wed Jun 12 15:48:52 2013 -0700
     2.3 @@ -0,0 +1,12 @@
     2.4 +nbproject
     2.5 +Makefile
     2.6 +build
     2.7 +dist
     2.8 +src/Default
     2.9 +src/.settings
    2.10 +src/.cproject
    2.11 +src/.project
    2.12 +.dep.inc
    2.13 +glob:.cproject
    2.14 +glob:.project
    2.15 +glob:Debug
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/.hgsub	Wed Jun 12 15:48:52 2013 -0700
     3.3 @@ -0,0 +1,12 @@
     3.4 +
     3.5 +src/Application = ../../../../Applications/VReo/VReo__Prod_Cons__LangDev
     3.6 +
     3.7 +src/PR_Implementations/VReo_impl = ../../../../PR_Implementations/VReo_impls/VReo__ML_MC_shared_impl
     3.8 +src/PR_Implementations/PR_impl = ../../../../../VMS/VMS_Implementations/VMS_impls/VMS__MC_shared_impl
     3.9 +
    3.10 +src/C_Libraries/DynArray = ../../../../../VMS/C_Libraries/DynArray
    3.11 +src/C_Libraries/Hash_impl = ../../../../../VMS/C_Libraries/Hash_impl
    3.12 +src/C_Libraries/Histogram = ../../../../../VMS/C_Libraries/Histogram
    3.13 +src/C_Libraries/ParamHelper = ../../../../../VMS/C_Libraries/ParamHelper
    3.14 +src/C_Libraries/Queue_impl = ../../../../../VMS/C_Libraries/Queue_impl
    3.15 +src/C_Libraries/ListOfArrays = ../../../../../VMS/C_Libraries/ListOfArrays
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/Design_Notes__Local_vs_Remote	Wed Jun 12 15:48:52 2013 -0700
     4.3 @@ -0,0 +1,95 @@
     4.4 +
     4.5 +
     4.6 +So, want to introduce optimization that have a purely local mechanism for tasks.  Maybe also for SlaveVPs..  
     4.7 +
     4.8 +For tasks, benefit is reduced overhead, due to no suspend of animating VP, no acquisition of lock, and no inter-core data touched.  
     4.9 +
    4.10 +End_task implementation takes the next slave out of local task queue, then jumps to the function of the task.  (might implement with call/return -- slave calls tasks's fn-pointer, which returns, then slave picks next, and so on.. no "end_task" call at the end).
    4.11 +
    4.12 +The only inter-core communication triggered by task activity is end-notice, with associated bookkeeping of ancestors and descendants.  Creating tasks and moving tasks between cores is handled entirely inside Master.. it only affects the bookkeeping structure..  that structure has to keep track of where to send notice of a task being created, or a task ending.
    4.13 +
    4.14 +So, end-task can be purely local, and can do a read of whether the given task needs to do remote updates.  If so, then trigger the higher-level inter-core runtime..  The task struct gets marked when it is moved..  if its ancestor gets moved, leave a ghost local, which indicates where the ancestor task was moved to.
    4.15 +
    4.16 +Task creation, then, doesn't suspend, but rather has an execution model (which breaks the tie-point abstraction of "control the relative ordering via creation of tie-points", but that's not a big deal for atomic timelines).  The call is asynchronous -- it creates the data struct, fills it in, queues it, and leaves notice for the inter-core runtime (which will do bookkeeping -- will be a bit tricky getting the flags right, because there is no sync, and task may finish and end before the inter-core ever gets a chance to do the bookkeeping.)
    4.17 +
    4.18 +So, this kind of task inherits no context on the stack, and leaves nothing on the stack after completion, (except, in some cases, the normal return value).  So, Cilk tasks cannot be done this way, but dataflow tasks can.  All data consumed by such a task is passed to it, via pointer, and all results from it are left via side-effect.
    4.19 +
    4.20 +The semantics 
    4.21 +
    4.22 +The important point is that such tasks all appear the same to the application, and all have the same data struct, and it is only inside the Master that distinctions are made, and flags set in the data struct.
    4.23 +
    4.24 +The inter-core runtime:
    4.25 +-] handles moving tasks among locals
    4.26 +-] handles animation-constraint updates that go between locals
    4.27 +-] interacts with a local only via shared flags and queues -- non-blocking and no special atomic instructions
    4.28 +-] Seeing a counter-based mechanism, where local piles up things for inter-core to see, incrementing its counter each time, then inter-core does the same back.  Each side keeps an inside-itself copy of the last counter value it read from the other side.  This way, they can quickly poll and can modify shared state without any atomic instructions, and with purely local-to-core reads and writes.
    4.29 +
    4.30 +The local runtime looks only at the meta-task struct:
    4.31 +-] to see if it has to inform the inter-core of completion
    4.32 +
    4.33 +Handling movement of a task that is the parent of other local tasks -- becomes a remote parent, and if it creates more children remotely, will have children on multiple cores.  The inter-core creates a local "ghost" copy of the task and leaves it local, but marked as a ghost, and containing the number of local children.  Then a local task ending causes update of the local children, and when all local children are done, the local runtime checks the ghost flag, sees it set, and tells the inter-core.  The inter-core, when it runs, will send the message to the remote core the task was moved to.   When the task was first moved there, it was marked as having only one child.  When the ghost completes, the message arrives, and decrements the child count.  In the meantime, the task could have been moved once again, the local marked as a ghost, and the process repeats.
    4.34 + 
    4.35 +Last thing is handling constraints on readiness of tasks..  have the local lang env, which has the hash table or queue, or whatever..  the end-task of local tasks updates the state of that, as do calls by VPs, and maybe even some asynch calls from tasks that just signal progress or something..  that update happens inside the code triggered by end-task call..
    4.36 +
    4.37 +Separately, the inter-core runtime can reach down into the local lang-env and update it when info comes from remote cores -- it owns the core while running, so no interference with the local.  It can do all the management of moving tasks and VPs out of structs and into readyQs..  It can also update meta-task state, such as liveness of remote children..
    4.38 +
    4.39 +local-to-intercore happens when local figures out the state of a ghost has changed..  it leaves notice for the inter-core to see..  not sure exactly where this will come up, because a task moved off-core should have been removed from lang-env, and placed into the remote one..  and it's not clear that an advantage exists for moving ones still constrained.
    4.40 +
    4.41 +Okay, that should work well (watch the local-to-intercore comm part) -- forces all tasks to allocate on the heap, but that's not so bad..  keeps everything local, with clean connection to remote..  works on distributed as well as shared.  Tasks never perform locking nor suspension nor assignment.  And, works perfectly well with conversion to VP upon invocation of a suspend.
    4.42 +
    4.43 +I like it.  Let's do that.
    4.44 +
    4.45 +==============================
    4.46 +As for VPs..  is there a *reason* to do an equivalent local-only VP suspend?   Perhaps limited..  say, if have semantics that are almost a task but need previous stack context or something like that. 
    4.47 +
    4.48 +Hmmm..  would mean that have a local suspend equivalent that changes stacks, but doesn't acquire lock, and doesn't touch any data shared between cores.  So, mainly, save the lock and inter-core data comm costs..  if do the message passing impl that pre-fetches inter-core shared data, then should be quite low overhead, especially on multi-socket..
    4.49 +
    4.50 +This also turns the runtime into distributed-memory ready..  so, implement it on shared memory, but then it's the same code, augmented by actual message passing, for real distributed memory.
    4.51 +
    4.52 +Okay, so what changes for VPs in the local-only runtime?
    4.53 +
    4.54 +Will need structs that track remote locals that need to know of state changes..
    4.55 +
    4.56 +=====================  Global Names  ======================
    4.57 +For pointers, there are really two things -- physical relation, which is distance in an ordered address space, which is *offset* -- and a sub-space, which is the base from which to perform that offset.
    4.58 +
    4.59 +So, for a data-struct on the heap, have the pointer to the start of the struct, which is the location of the sub-space..  and then have offsets from that to particular fields in the struct.
    4.60 +
    4.61 +The complicating factor is that multiple levels of this may exist -- a high level sub-space may contain multiple lower-level subspaces..  so, have the base of the top level, then offset from that to the contained ones, then offsets from there to particular fields.
    4.62 +
    4.63 +If want globally valid names for fields within data structs, then have to change the base when a space is sent to a different memory, but if the entire space is sent, then no changes to the offsets inside are needed.  However, if only a portion of the space is sent, then a compaction takes place during send, which moves sub-spaces or even fields relative to each other..  that causes a recalculation of the offsets.
    4.64 +
    4.65 +Now, if such memory is arranged into separate segments, so that base pointers are in one portion, with offsets in another, and values in a third, then the values can be picked out and sent without modification..  the base pointers will all be re-calculated at the dest.. and the offsets can all be recalculated efficiently, due to their separateness..  now, if special hardware is provided for performing the address calculations, which understands offset structures..  Say, separate register set for address calcs, and memory system that can hand a base plus offset and it does the add in the memory.
    4.66 +
    4.67 +So, no pointer arithmetic, except within the same sub-space, and then can only do arithmetic on the offset.  The hardware checks the sub-space size bounds.  
    4.68 +
    4.69 +This handles secure separation, instead of pages-tables and TLBs in virtual mem.  The hardware ensures that all target addresses are inside sub-space bounds (keeps size next to base addr)..  so there's no way for an application to get at memory that hasn't been allocated to it.
    4.70 +
    4.71 +Pointers, then, can only be created, via malloc, which has hardware support, or copied, or the offset modified by instructions.  The OS manages generating all base-addresses of sub-spaces, along with the sub-space sizes.  Hence, application code can only get at sub-spaces that have been given to the application.  No way to manufacture them.
    4.72 +
    4.73 +Thinking that a base pointer is pre-calculated for each sub-space, and the offset of each sub-space from the base pointer of its enclosing is kept..  so then, in a leaf data struct, what is currently a pointer becomes instead an index within the base pointers, plus an index within the offsets.  That thing will remain the same, even after moving or copying to a different memory, except when incoming from remote memories have conflicting offsets..  although it may burn some memory, due to some indexes not populated (when compact, the index stays the same, so the compaction causes currently valid indexes to become empty).
    4.74 +
    4.75 +Alternatively, could make a pointer be an offset to a sub-space struct, plus an offset within that sub-space.  Keep pointers all together in part of the top-level sub-space that gets sent as a unit, and keep values together in the other part..  the sub-space struct says where the separation is..  that way, when send, know when to start interpreting the bits as pointers, and modifying those according to which sub-space they point to.
    4.76 +
    4.77 +Okay, so, how about this..  keep the sub-space structs, and keep the pointers referencing a sub-space struct..  but make all pointers be referenced to an enclosing sub-space..  that enclosing may be several levels above..  but all pointers are implied to be offsets relative to the base of that enclosing..  so, the intermediate sub-space structs are only accessed when data gets copied or moved to a different memory.
    4.78 +
    4.79 +Malloc gives pointers only within a range that has been obtained from the OS and given to the application..  that range is the enclosing sub-space for all locally malloc'd structs..  then there are things that have come from remote memories..  those arrive in packages, each its own sub-space.  A pointer in one of those sub-spaces can, indeed, point to something in a different sub-space that is in the same memory.  It is the runtime system (proto-runtime ((OS)) ) that processes the incoming pointer-group of an arriving sub-space.  It checks whether any of the remote-pointers in that pointer-group refer to things that are inside other sub-spaces in this memory.  If so, it replaces the remote pointer with a semi-remote pointer.
    4.80 +
    4.81 +So, have three classes of pointer: 
    4.82 +-] local == collected into the pointer-group of the enclosing sub-space, implies use of the base-pointer of that enclosing sub-space, and has an offset within that enclosing sub-space.
    4.83 +-] semi-local == target is inside a different enclosing sub-space.  Indicates which enclosing sub-space, plus the offset within it.
    4.84 +-] remote == target is in a sub-space that's in a different memory.  Indicates which memory, which sub-space in it, and the offset.
    4.85 +
    4.86 +Memory management is done by the keeper pattern..  most data has a lifetime linked to the timeline of a VP or task or scope.  That is handled by the compiler.  Have to malloc to a scope, in the app.  The compiler inserts free calls into the timelines.  However, some data lingers with unclear lifetime..  unknown future work might want it..  that data is managed by the keeper..  the application includes releases of scopes, and computation milestones that are sent to the keeper..  those trigger freeing of dependent data.  For example, data kept speculatively, such as in H.264 frames, has maximum span, within the computation, that it can be of value..  the keeper told the span, and milestones update the passage of work.  The keeper calcs expiration in terms of milestones.
    4.87 +
    4.88 +
    4.89 +A pointer to remote has a bit set that says so..   
    4.90 +
    4.91 +So..  it is the keeper pattern and the scopes that allow the system to know when a remote enclosing sub-space disappears.  That management system ensures that no still-referencable pointers exist to remote enclosing sub-spaces that have disappeared.
    4.92 +
    4.93 +Hmmmm..  what about allocating all enclosing sub-spaces onto the heap?Then grow the heap from the middle outward, with pointers above and values below.  Then, when a new enclosing subspace arrives, allocate it's pointer and value sections separately, and re-calc the offsets in the pointers, relative to that center-point that separates them.  The normal free list mechanism, with coalescing, will gobble up the chunk, or even sub-spaces within it, when they go out of scope or are explicitly freed, or expire.  Then, only have two types of pointer, local and remote.  Also, allows for clean compaction of the heap, and moving of the center point when run out of room.
    4.94 +
    4.95 +
    4.96 +
    4.97 +
    4.98 +
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/Design_Notes__Simplifying_PR	Wed Jun 12 15:48:52 2013 -0700
     5.3 @@ -0,0 +1,15 @@
     5.4 +
     5.5 +
     5.6 +Improving usability of PR..
     5.7 +-] Confusing sequence to create 
     5.8 +-] Confusing having to chase the langData all over
     5.9 +-] in VReo, malloc'd the initData inside the wrapper lib..  so, when dissipate have to free that..  but a pointer to initData isn't kept in the slave struct..  so, have to put the pointer into the langData.. but then, to free it, have to go to the lang data freer..  so, visualizing programmer viewpoint, am sitting at wrapper lib at "create"..  now, where, exactly do I free the params just malloc'd?  First hurdle: where is the pointer to them saved?  Nowhere.. okay, have to save the pointer someplace can get it back.. that's what langData is for..  okay, now, where is langData created?  Well, had to register a creator for it inside startup..  oops! don't have the pointer available there..  wait, what's that for again?  Anyway, looking at sample code, have to call a Fn that creates the lang data inside the new slave..  do that in the create handler..  why doesn't PR do that automatically during create?  Okay, so can't pass it any params, so have to add the pointer to langData when get langData back from create call.  Fine, now it's in there..  so let's go to endVP handler..  hmmm.. says PR automatically frees all the langDatas..  but it'll miss freeing this!  Now what?  Didn't have to pass pointer to the freer to the register langData creator call?  Let's go look at that langData freer.. how do I find it?  Ahh, there it is..  now, let's put the free in here..
    5.10 +
    5.11 +That's a lot of voodoo stuff just have to "know" about PR, in order to just free the stupid params!  Illustrates many points of awkwardness..  the fact that params are not saved..  the fact that langData freer is hard to find in the code (no chain of calls lead to it)..  the register thing in the startup is weird..  the dissipate sequence and create sequence have odd separation of what PR does vs what lang has to do..  would be nice to regularize that all and make flows of associations in mind when creating lang.
    5.12 +
    5.13 +
    5.14 +
    5.15 +
    5.16 +
    5.17 +
    5.18 +
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/Design_Notes__VReo	Wed Jun 12 15:48:52 2013 -0700
     6.3 @@ -0,0 +1,16 @@
     6.4 +
     6.5 +
     6.6 +Reo:
     6.7 +
     6.8 +-] create_circuit -- custom function that builds data structures and connects them -- returns pointer to top-level ReoCircuit struct, which has an array of ports in it.
     6.9 +
    6.10 +-] create_VP -- hand it a top level function, a circuit pointer, and an array of port-idxes..  the top level function knows how many ports it expects, and what order they appear in the array.  So does the VP that creates the array are calls this function.  The top level function has its code and calls get or put on the ports, handing the call the circuit pointer plus whatever index was plucked out of the particular position in the array that was passed in.
    6.11 +
    6.12 +-] put_onto_port 
    6.13 +
    6.14 +-] get from port
    6.15 +
    6.16 +==============
    6.17 +ToDos:
    6.18 +
    6.19 +-] Doer function sets Buffer in port to NULL when it takes from the port.  Does this whether a 
    6.20 \ No newline at end of file
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/ToDoList.txt	Wed Jun 12 15:48:52 2013 -0700
     7.3 @@ -0,0 +1,567 @@
     7.4 +
     7.5 +Aussie jobs, send in apps
     7.6 +
     7.7 +-] Std struct that holds Assigner information used by override assigner.. this way lang can pass info helpful for data locality that assigner uses to pick placement that reduces comm.  All langs provide the same format of information..
     7.8 +
     7.9 +-] improve assigner in MasterFn -- make it find a process that has work -- and make it try different process if chosen one decides not to assign work
    7.10 +
    7.11 +-] in middle of PRServ
    7.12 +-] remove singleton and trans from VSs startup, .c and .h
    7.13 +-] Make sure singleton and trans good in PRServ startup, .c and .h
    7.14 +-] Changing meas macros -- make sure finished name change of 
    7.15 +-] Make_Meas_Hists_for_Laguage -- change to be invoked in lang's startup, and put lang's name on end -- change innards to take slave and magic num and lookup the lang env, then make the hists inside the lang env
    7.16 +-] makeAMeasHist -- change to be a normal Fn, called inside make macro -- takes pointer to a dynArrayInfo that is in langEnv's prolog, and adds a new histogram to it
    7.17 +-] to avoid macro name collision, add "__Langlet" to end of each meas macro name
    7.18 +-] change all Meas_ macros to use the langEnv, where hists are now stored..  thinking make a call or macro for it and use in all Meas_ macros
    7.19 +-] change LANG__dependency.c and .h to protect printing with a lock and opening a constraint file puts run ID, process ID, and lang name into file name
    7.20 +-] make sure PR_MEAS__Counter_Recording.c and .h are cross-languages (protect side-effect for setting output file, use langEnv to store events, and so on)
    7.21 +#-] change VSs and PRServ to use fn names from PR_MEAS__Counter_Recording
    7.22 +#-] remove VSs__Counter_Recording
    7.23 +-] Fix MEAS__macros.h -- puts things into master env.. should be moved to lang env
    7.24 +-] _PRTopEnv->counterHandler = &PR_MEAS__counter_handler; should be put into lang env, or else into PR if there's only one for all langlets
    7.25 +-] Remove all single-lang things, including init code, commented out code, and all fields in TopEnv and Process
    7.26 +-] Remove last vestiges of multiple slots -- elim CoreController, combine with Master into one file, elim extra dimension of slots
    7.27 +-] fix animation master fn -- has slot flags "needsWorkAssigned".. may be able to remove.. streamline logic
    7.28 +-] move things out of PR__SS that belong elsewhere..
    7.29 +-] make a libPR.h file, for use by application code -- all PR things callable from app, and ONLY those go in there..
    7.30 +-] clean up every comment in every file
    7.31 +-] PR__SS cleanup comments
    7.32 +-] AnimationMaster cleanup comments
    7.33 +
    7.34 +-] Thinking add to PR some tracking of work created by a particular language, so can create a "wait until work created by this language has completed" then end the langauge.. or start the language in mode that it 
    7.35 +
    7.36 +0x7ffff7556b4c seedSlv
    7.37 +0x7ffff7557130 process
    7.38 +0x7ffff754e908 prolog for langDatas collection
    7.39 +0x7ffff754e90c langDatas collection
    7.40 +0x7ffff754e6e8 metaTasks collection
    7.41 +0x7ffff7544a6c PRServ langEnv
    7.42 +0x7ffff7556e10 langEnvs in process1
    7.43 +0x7ffff7544a14 protoLangEnv for PRServ
    7.44 +0x7ffff7544a6c langEnv for PRServ
    7.45 +0x7ffff75449d4 slavesReadyToResumeQ in langEnv of PRServ
    7.46 +
    7.47 +0x7ffff759aed0 slot core 0
    7.48 +0x7ffff759ae50 slot core 1
    7.49 +
    7.50 +0x7ffff759acb0 idle slave core 0
    7.51 +0x7ffff758a6e8 idle slave core 1
    7.52 + idle slave core 2
    7.53 + idle slave core 3
    7.54 +
    7.55 +0x7ffff7592bf0 slot slave core 0
    7.56 +0x7ffff7582628 slot slave core 1
    7.57 + slot slave core 2
    7.58 + slot slave core 3
    7.59 +
    7.60 +0x7ffff6563d04 argPtrHashTbl in VSs
    7.61 +0x7ffff7557130 process 1 -- process slave is in..
    7.62 +0x7ffff7556f10 process->langEnvs
    7.63 +0x7ffff6567f74 VSs protoLangEnv
    7.64 +0x7ffff6567fcc VSs langEnv
    7.65 +
    7.66 +0x7ffff64636fc metaTask that is a repeat
    7.67 +0x7ffff6461074 metaTask -- second one
    7.68 +
    7.69 +----
    7.70 +0x7ffff7544b04 PRServ protoLangEnv (idx 0)
    7.71 +0x7ffff6565ef4 VSs protoLangEnv (idx 1)
    7.72 +
    7.73 +
    7.74 +#-] update end-task in VSs so uses "don't del" flag
    7.75 +-] check PR's end slave and end task to be sure delete AFTER lang's handler, and check dont-del flag
    7.76 +
    7.77 +-] remove last traces of multiple animation slots
    7.78 +-] remove last traces of core controller
    7.79 +
    7.80 +-] Make sure recycled slave coming out is "clean", so don't think it has work or something..
    7.81 +
    7.82 +#-] Make sequential mode work -- core ctlr
    7.83 +
    7.84 +-] Freeing of meta task, lang data, and slave -- some langlets may need meta task or lang data to persist past dissipate or task-end..  There may be complex patterns governing the point at which such data can be freed.  So.. want "malloc_to" such that abort cleanup is simple.. 
    7.85 +-] "malloc-to" implemented by linked list inside malloc prolog, which attaches ownership to something, such as a language instance or process -- deleting a malloc'd chunk equals copying pointers between the ones on either side..  so still fixed time 
    7.86 +-] Control over freeing PR portion of meta-task, lang-data and slave vs langlet portion -- who controls?  They are linked, so they have to be freed together..  VSs wants lang meta task to persist past dissipate? past task end?  Can a slave or task be waiting still after dissipate or task-end? NO -- BUT, then need impl of waiting that ONLY checks parent when that parent is in a list of current waiters..  sucky impl!!  Otherwise, child doesn't know if parent is waiting or not.. so has to check when it ends..  so parent's child-counts must persist past task-end or dissipate.. 
    7.87 +
    7.88 +-] Sequential mode: when multiple processes, have to keep the core controller alive, return from the "wait for process to end" call, then execute the next "wait for process to end call" and return from that..  The issue is about how to interleave the main thread with the work of the various processes..  for sequential, want to do all the work at once, then handle the various "wait for" calls from the main thread separately..  which means need a way to detect, in side the "wait for process to end" or "wait for PR activity to end" whether a different wait has already caused the work to happen..  so..  need a system of flags that the "wait for" check..  but need to keep core ctrl re-startable until get to "PR__shutdown()"..  so do a flag system -- for each "wait for process", check the flag in that  process -- if has already executed, return, else do core ctlr
    7.89 +
    7.90 +-] implement VSs "wait for all activity to end" 
    7.91 +
    7.92 +-] Wiki: PI SS.. WL.. int.. are indications to the langlet developer -- they should NEVER use _int__ version, and should always match to the location in the code.. so startup should only use SS, and request handlers and assigner should only use PI, and wrapper lib should only use WL..  if the appropriate form is not provided, contact OpenSourceResearchInstitute.org developers via this wiki and ask for it..  (in meantime, have to perform careful analysis of locks and so on to be sure what doing is safe..  chances are high that doing something that will break some non-apparent aspect of the system -- OSRI developers of PR are not stupid people -- what looks dumb is usually a case of not enough information about the complexities of the system or the other aspects taken into account in the design..  what looks obviously best for one langlet may be quite bad for a different one that a developer may not have experienced yet..
    7.93 +
    7.94 +-] Wiki: important to prevent name space collisions, preprend lang acronym to front of everything in plugin -- data structs, function names, etc
    7.95 +
    7.96 +#-] Put calling slave in PR's req struct. (Note, even if call from inside a task, the calling slave remains the same, it just changes type) -- think YES, do that, and provide "give requesting slave" that takes the langReq as input.
    7.97 +-] Wiki: sometimes a request is stored inside the lang env, and later processed in response to some other request or state change, aso..  at that point, the requesting slave is needed, but not available as part of the call..  so, it has to be gained from the stored request -- the question is  Decide whether lang needs calling slave from PR req struct.
    7.98 +-] Decide whether want PR to have a prolog to a request struct, or just link via pointers..  
    7.99 +-] if yes, prolog, then make a request struct recycler
   7.100 +-] if yes, prolog, then wrapper lib has to call PR_WL__create_req_of_size( sizeof(LangReq)) -- which gets from the recycler
   7.101 +-] if yes, prolog, then make a PR_PI__clone_req that uses memmove and keep total size inside prolog
   7.102 +
   7.103 +-] test the counting of live slaves and live tasks for shutdown detection
   7.104 +-] test the update of work avail counts when task or slave made ready
   7.105 +-] test freeing of langData and metaTasks when slave dissipated
   7.106 +
   7.107 +-] false sharing in SlaveVP and lang envs and others -- figure out how to pad to cache line size
   7.108 +
   7.109 +-] slice (and duplicate) PR.h -- make separate files -- one for use directly in application, one for use inside wrapper lib, one for use inside lang plugin (PI plus SS), one for internal to PR (only PR sees those functions and structs)
   7.110 +
   7.111 +-] Change ID, so each lang can have its own ID for a slave or task (a langlet defines the meaning of the ID elements)
   7.112 +
   7.113 +-] Wiki: startup sequence: chose to have a default seed langlet -- could have passed a langlet's startup Fn to the create_process command, but that's confusing, because then inside the seed VP's function, then in turn call more langlet start Fns explicitly, and pass the magically-created seed itself into them..  Feels more straight-forward to assume process starts with default langlet, and creates a seed, and that seed then uniformly starts all the langlets used in the process..  making it so that default langlet cannot create any work.. might even remove all commands.. put singleton, trans, and so on into a utility langlet that has to be explicitly started..
   7.114 +-] Wiki: security risk: the assigner names a slot to put work into, or a slot to assign a task to -- that gives plugin control.. so it can make a mistake and say the wrong slot!  Means need an analysis tool that checks a plugin's handling of slot pointers, to be sure it doesn't store slot pointers anywhere, and only passes along what is passed in..
   7.115 +
   7.116 +-] Wiki: chose Productivity over performance..  Change to numReadyWorkUnits in lang env -- mean write lang env every time slave or task becomes ready -- vs read Q in assigner after writing a new value, to check if should clear flag -- less traffic using a flag, which beats the extra computation of checking for empty Q -- however, with counter, can  put work avail inside PR, and langlet never sees!  What's more important, perf or productivity?
   7.117 +-] Wiki: Make sure VSs assigner calls the PR insert-into-slot Fns.. and assigner checks whatever structs hold ready slaves and ready tasks
   7.118 +-] Wiki: make VSs req hdlrs use PR calls to makeReady -- a generic PR call to make slave and task ready -- if PR has no override assigned, then it uses the registered versions, other wise it puts the slaves and tasks into a common structure used by the override assigner.
   7.119 +-] Wiki: Track work in lang envs by keeping counter of num work units ready
   7.120 +-] Wiki: VSs__start() register lang data creator and meta task creator and assigner and shutdown handler
   7.121 +-] Wiki: make PR__give_lang_data use the langData Creator Fn that's registered into the LangEnv to create the lang data if it's not already there..  gets the langData creator from langEnv retrieved by magic number  passed in, with the calling slave
   7.122 +-] Wiki: make lang data creator use PR_create_lang_data
   7.123 +-] Wiki: Make lang data and meta task be created when accessed -- because slot slave turned into free slave at unpredictable times -- and converted slave doesn't have lang data nor meta task.
   7.124 +-] Wiki: make lang data creator pass lang data freer to PR create lang data fn
   7.125 +-] Wiki: make lang meta task creator pass freer to PR create metatask fn
   7.126 +-] Wiki: Make langData creator insert ptr to free-Fn into langData
   7.127 +-] Wiki: For what's now in compiler flags, will have multiple versions of each dynamic library..  the compiler flags will affect what goes into executable, and will affect which dynamic library the executable connects to.  To handle probes, histograms, and so on, the lang's dynamic lib and PR's dynamic lib are compiled with whatever support is needed for that compiler flag setting.. the only restriction is that what's done in the app-code can't cause new data struct fields to be needed, only compiler flags can cause that..
   7.128 +-] Wiki: Another way to address the compiler flags is to make the data-structs that are compiler-flag controlled be instead heap allocated, and just don't malloc them in place of the macro that inserts the fields..
   7.129 +-] Wiki: Another way to address the compiler flags is to make a standard histogram struct, with variations added, and to indirect them..  for example, use the same hash-approach as lang magic number
   7.130 +-] Wiki: put request handler ptr into request, or register, or keep dispatcher?  PUtting pointer into request may be lower performance because of brch predictor for jump-to-reg?  Can't guess.. so ignore perf aspects.. One way is registering the handlers in init code, second way is providing a separate dispatcher function, third way is passing a pointer here.  Are there implications of each?  What's possible downside of passing a pointer here?  Performance is only thing can think of -- good thing is that can provide a PR defined prototype that compiler then checks, so automatically catch when provide pointer to hdlr with wrong signature.  Also, providing pointer here keeps it local -- don't have to check separate portions of code to be sure they're synchronized -- which is a pain with the current dispatch approach.. and would also happen with the register handler approach.. So, then, provide many different ways to send a request to PR -- one for creating task, one for creating Slave, one for ending task, one for ending slave, and one for general language request -- maybe add ones that have special PR interaction in future (such as IO related).  That, actually complicates creating the wrapper lib -- now have different PR calls..  but doesn't seem too bad.. and have different hdlr signatures for each kind anyway..  so, kind of draws attention to fact that create and end are special..  and slave and task are different.. Note, it's not important for the language to be able to change the signature of its handlers, because the dispatch shows they all are the same -- receive language env, plus request struct, plus slave..  It's just, if change mind in future, this is seen in EVERY wrapper lib function, would have to change in all those places..  (Don't care, have automation in code tools for that.. go for it).
   7.131 +XXX-] Wiki: NOTE: recycler for lang data, and meta task both have to handle receiving the proto version, and call PR Fn to turn it into lang-specific version.
   7.132 +-] Wiki: langData and metaTask -- for structure, three choices: prolog, or linked structs, or cast, by making lang put PR-used structs in correct order at start of their own struct def
   7.133 +-] Wiki: Unrealistic forcing every langlet's langEnv must begin with the exact same fields as PRLangLangEnv -- too easy to get wrong, too much for lang creator to learn
   7.134 +
   7.135 +
   7.136 +#-] free langEnv at point stop the lang
   7.137 +#-] VSs registers a "make slave ready" and a "make task ready" (makeSlaveReady, makeTaskReady) function, 
   7.138 +#-] Make VSs resume_slave compat w/ assigner checking work avail
   7.139 +#-] Make Master handle update of numReadyWork based on success return from assigner
   7.140 +#-] make processHasWork be managed in MasterFn, when calls assigner, and in make_ready fns in PR
   7.141 +#-] Remove "set_work_in_lang_env" from VSs -- move inside PR, via tracking number of work units ready
   7.142 +#-] fix PR_int__free_lang_data() same way did for meta task
   7.143 +#-] Modify VSs plugin file to eliminate request handler
   7.144 +#-] Make VSs assigner use the new assigner protocol
   7.145 +#-] put create handlers in
   7.146 +#-] Make seed slave be inserted into PRServ env at process creation
   7.147 +#-] Eliminate core controller -- carve current core controller code to instead call the master function directly -- get lock, call master, release lock, switch to slave, loop
   7.148 +#-] Modify VSs wrapper lib functions: pass handler ptr to the PR call that sends request from inside a wrapper lib function..  that pointer is a plugin into PR..
   7.149 +#-] Fix PR "send" calls for create and end, to take handler w/different signature
   7.150 +#-] when shutdown a process, free chained lang envs
   7.151 +#-] when shutdown a process, decrement num processes in top env
   7.152 +#-] PR__create_seed_slave( topFn, initData, process ) -- handle meta-task, setting type to be SeedSlv, and such inside this..
   7.153 +#-] make langlet startup call "PR__create_lang_env_in_slaves_process(size, slave, langMagicNum)" -- get process out of the slave -- return pointer to just past the prolog, which has chaining and ptrs to handlers, and so on -- perhaps add handler ptrs to creation call
   7.154 +#-] Fix creation of meta task and lang data arrays inside slave, when slave created -- malloc the array of pointers, plus extra int for size -- this is PRCollElem pattern
   7.155 +#-] Fix creation of lang env array inside process, when process created -- malloc the array of pointers plus extra int for size -- PRCollElem pattern
   7.156 +#-] Clean up animation master -- a bunch of repetitions of copied code..  move the case statement one to top, then look through the copies for any with alterations -- keep those, and package inside "masterFunction__X version", which is called from a loop inside the case statement.
   7.157 +#-] what's with needsTaskAssigned in slot slaves? -- get rid of it?
   7.158 +#-] Turn protoLangData and protoMetaTask into prologs -- create via PR__create_lang_data_in_slave( size, slave, langMagicNum ) -- adds the lang data to the slave and returns ptr to it -- PR__create_meta_task( size, langMagicNum ).. add any handlers might need to be kept with the task to the creator PR__create_meta_task_for_slave( size, slave, langMagicNum )
   7.159 +#-] retrieve meta task, lang data, and lang env through accessor fn -- that way it doesn't matter how implement them -- can easily change it later, if needed for performance or for flexibility (a lang comes along that needs diff way of doing things)
   7.160 +#-] Turn protoLangEnv into a prolog of langEnv -- 
   7.161 +#-] when create new process, initialize chainedLangEnv to NULL inside each in langEnvs array
   7.162 +#-] define PRLangLangEnv
   7.163 +#-] when convert from slot slave to free task slave, check what should do about num (live slaves + live tasks) inside VSs's task stub, and properly update process's count of liveFreeTaskSlaves -- think just ignore free task slaves -- the end task will still happen -- whether slot slave or free task slave animating task is invisible and irrelavant to lang
   7.164 +
   7.165 +=================================
   7.166 +=== DLL (specialize during init) ===
   7.167 +=
   7.168 +-] separate langlet code into a wrapper lib, as a static lib compiled into executable, then assigner + req handlers + startup and shutdown into two versions (single lang and multi-lang) as two dynamic libs that are the plugin
   7.169 +
   7.170 +-] Turn PR into a DLL, so can run same application binary on Xoanon as on SandyBridge, and it automatically specializes by connecting to whatever DLL (.so) is on the machine..
   7.171 +-] Two kinds of DLL -- a PR DLL, and (two) plugin DLLs (for each langlet)
   7.172 +-] Two DLLs for each langlet -- one for single lang mode, other for multi-lang mode -- because can't use macro magic for accessors Fns to switch behavior btwn single lang and multilang -- Also, multi-lang has to do things like set "has work" and the assigner and even startup/shutdown look different between the two DLLs.. but all versions of plugin code use accessor Fns for any access of langEnv, metaTask, langData or TopEnv
   7.173 +-] Once DLLs, can not change existing parts of TopEnv -- only mod allowed is to add to bottom of it..
   7.174 +
   7.175 +-] Rearrange PR code for DLL
   7.176 +-] Break PR.h into four files -- one for plugin's DLL code to use, second for PR's DLL to use, third for wrapper lib, fourth for application code to use for PRServ services
   7.177 +
   7.178 +-] Make sure the singleton assembly still works with DLL structure
   7.179 +-] Rearrange lang impl code for DLL, 
   7.180 +
   7.181 +
   7.182 +-] Wiki: Dynamic lib allows automatic specialization.  The library is written for the hardware, and the executable calls it.  The dynamic library has minimal impact on execution time.
   7.183 +
   7.184 +=================================
   7.185 +=== Code Structure ===
   7.186 +=
   7.187 +-] Q: Use seed slave in special way when end process?  
   7.188 +-] Q: handle end of seed slave in special way? -- think about ways that seed is special -- it is root slave of process..
   7.189 +-] Q: keep end-detection based on number of live tasks + generic slaves? (don't count free task slaves -- they're tied to a task, and counted by task create and task end)
   7.190 +
   7.191 +-] Q: put ptr to each req handler into request itself?  Does that work with DLL structure? (Yes!)  It means the wrapper lib is inserting addr of a label that is in the DLL interface of plugin's DLL.  This approach eliminates dispatch table -- perf hit? (compiler doesn't know addr inside ptr var.. may be branch pred penalty)?  Cleans up the Master code.. but ruins notion of request handler as a single plugin thing..  makes create and dissipate reasonable structure (have PR stuff both before and after lang-specific handler).. what are implications for DLL -- can DLL call be inserted as a pointer, inside the executable, and still function, when call through the pointer? (A: YES! Just leaves symbol inside executable, which is converted when DynLib connects)
   7.192 +-] A: fn ptrs can be put into requests, and still work w/DLL --  IE, make the fn ptrs be DLL fns..    Can put fn ptrs directly into req struct, and it still can be resolved via DLL..  it just doesn't have as nice a neatly-packaged structure as the dispatch table inside request handler..
   7.193 +
   7.194 +-] split files into groups that fit DLL structure: App calls wrapper libraries, which are provided as static libs.  The wrapper lib stays constant across hardware.  Internally, wrapper lib has calls to both PR DLL and Lang DLL -- the lang DLL's start sends pointers to its functions over to PR's DLL, which then calls-back when handling requests -- lang's DLL holds SS, req handling, and assigner -- PR gets the ptr to req handler passed to it, for create and end, or gets Fn ptr from PRLangEnv prolog, where registering the handler placed it..  (Can use DLL symbols same as normal symbols, they are turned into a physical pointer that can be passed inside a data struct, so PR DLL de-references that pointer, to execute the function that's inside the lang DLL)
   7.195 +
   7.196 +-] For multi-mode, write single lang plugin to be nearly same as multi-mode -- relative to previous version, both have different startup/shutdown but access all PR vars via accessor, even MasterEnv ((TopEnv))
   7.197 +-] A process is created in all cases -- even if single lang -- process struct holds things that used to be in master env -- PR accessor retrieves them, given a slave (which is always available)
   7.198 +-] Single lang startup is passed the seed, which PR creates when creates the process -- for single lang, have a call that starts PR, then a PR call that creates the process and starts the lang together (pass it ptr to lang-start-standalone fn).. or, make seed start lang stand-alone?  If do stand alone vs multi-lang by connecting to different DLLs, how does that change things?
   7.199 +
   7.200 +-] Q: make "PR__start_single_language()" and each lang provides a "lang__start_standalone( ptrToSeedFn, seedData )"..?  standalone is simplified form, easy to understand for newbies.. use the master env in traditional form.. so only change to existing code is the way a language is started up.
   7.201 +-] Q: make "PR__start_single_language_with_tasks()" for languages that include tasks..? only VSs will change, pull task stuff out and shutdown detection out and put it into PR..
   7.202 +
   7.203 +-] Q: Array idx vs magic num -- To speed up retrieving langEnv, langData, and langMetaTask, what about a PR-call made inside seed that states the total number of langlets will be started?  Then, each langlet-start call is given a number, which indexes an array -- instead of the magic number.. the langlet can be reused in several processes, so every call to wrapper lib would have to have that langlet number in it, provided by the application code.. "VSs__some_constr( foo, slave, langNumInThisProcess );" -- thinking wait and see what kind of perf hit the magic number is..  should be fine
   7.204 +
   7.205 +-] Q: Eliminate anim slot and core controller? -- don't need -- makes PR easier to learn -- important to get uptake, viral adoption..  They serve no purpose..  except, when integrate into pipeline, they are equivalent to multiple HW contexts in the pipeline..
   7.206 +
   7.207 +-] Q: Can assigner activity affect constraint state?  In other words, does some lang exist, where the act of assigning a slave to a slot causes update of constraints on readiness of other slaves?  This matters because it means assigner activity modifies constraint state..  could it be a fundamental (invariant) that those constraint updates always get triggered at either the movement into the readyQ or end of the assigned task/slave, or other activity internal to request handlers?  What about for HWSim?  The tasks are in a priorityQ, and can be pulled out (by handlers!) if incoming communications (which take place inside handlers) indicate a wrong ordering.. The assigner only ever pulls the top element of the priority Q..
   7.208 +-] Q: Is there any way that plugins communicate with PR, other than PR calls to plugin handlers and assigner? -- put all such comm into PRLangEnv
   7.209 +-] A: So far, have requests intercepted by Master, handled by PR, then passed along to lang handler, and results passed back to PR, via data inside SlaveVP struct..  also have lang's assigner calling PR assign-to-slot Fns directly..
   7.210 +-] Q: when have multiple processes, each with multiple languages, then will have the same language's DLL used by multiple processes, each with different langEnv contents..  but all DLLs will share same PR DLL's state..  so will the global vars inside the PR DLL be the same seen by all the lang DLLs, and also all the processes correctly getting the langEnv, which is held inside the PR DLL..
   7.211 +
   7.212 +-] Goal: Design in way can silently change to DLL..  means plugins never see master env, instead, access all via call, which is set to a macro for now..
   7.213 +-] Q: For the case when PR is loaded separately from app, as a DLL..  how will startup change?  What will be different inside master and ctlr?
   7.214 +
   7.215 +
   7.216 +-] Wiki: go with process ptr in slave, slaves stuck inside a single process, and process ptr fetches a data struct used to look up the Lang env..  each langlet has its own Lang env..  To look it up, the wrapper lib inserts a langlet ID into the request..  that ID is generated in some good way to make lookup fast..  the langlet ID is used to look up the Lang env inside the data struct retrieved via the process ptr..  the langlet ID is also sent to PR by the langlet's __start() call..  so, the app has to do the langlet__start(), inside the process, after it starts..
   7.217 +-] Wiki: give every slave and every task a name ("taskID"), which is set by a PR call and retrieved by a PR call..  langlets may ask to be provided with this PR defined name, but a langlet cannot define the interpretation -- the meaning of the integers in the name is defined by the application.  Although, a langlet is free to provide Lang sugar for setting and retrieving the fields of the name..
   7.218 +-] Wiki: The langlet start is called inside the seedVP's function, and the seedVP is passed to the langlet start, which then calls a PR__register() function, passing the seedVP and the Lang env it created (the langlet init must put a pointer to the request handler and the assigner inside the Lang env, via PR macros, and the Lang env is a union or something so PR can extract the pointers without knowing the langlet's lang env struct)..  
   7.219 +-] Wiki: For the magic number,  use the jenkins hash to generate the magic number from the langlet name, make the magic number be exactly one 32 bit int, and then just mask off the upper bits and use what's left as index into an array, and use chaining..  so, make it an array of structs, where the struct is the PR Lang env, which holds the pointer to the language-specific Lang env plus the 32 bit magic number, plus pointer for chaining. 
   7.220 +
   7.221 +-] Wiki: wait machinery is not in PR because it adds overhead -- don't want it if langlet doesn't need it..
   7.222 +-] Wiki: In langlet, use PR accessors for everything held inside PR defined structures -- example: in place where need the "slaveAssignedTo", use a PR accessor, and don't put slave into lang meta task
   7.223 +
   7.224 +-] Wiki: DLL structure: App calls static libraries which internally call to PR DLL, and to Lang DLL (just the start and end lang calls) -- the lang DLL's start sends pointers to its functions over to PR's DLL, which then calls-back when handling requests -- and SS plus the plugin will make a DLL -- let PR get the handler from PRLangEnv prolog, where registering the handler placed it..
   7.225 +-] Wiki: using macro-magic inside PR prevents turning PR into a DLL (macro forces static linking).. need the separation so that specialization happens by simply hooking up to whatever DLL is on the machine the binary runs on..
   7.226 +-] Wiki: PR impl plus plugins are made into a DLL, which is separately loaded onto machine -- Perhaps each plugin is a separate DLL loaded independently.. everything part of plugin must be makeable into DLL -- the symbols for ptrs put into requests must be resolvable via DLL mechanism..  lang_start() sets all ptrs, and is invoked via DLL 
   7.227 +
   7.228 +-] Wiki: VSs start wants to put langMetaTask into seed slave..  so do other langlets..  VSs uses for wait().. so either have multiple langMetaTasks, or have langlets move their metaTask data to langData when a task suspends.. do that by registering a task-suspend handler
   7.229 +-] Wiki: can VSs get by with just a langData?  Does it need a langData distinct from metaTask?  Can envision case where two can't be combined, when task suspends?  Only thing unique about meta task is that it exists before assignment to a slave.. would need langlet to put its own marker to check where the data is, or else use PR's "is_suspended_task"..  or else have PR's "give lang meta task" hide the difference.. hmmm.. but can langlet treat a suspended task as a VP? -- if langlet has notions of both, and has constructs for VP, which can be used to suspend a task.. then yes.  Means langlet needs to have its own suspend task handler, which combines or whatever..  but then, back in task code, it wants task-info, has to be way to separate again.. so, if PR keeps both inside lang data, but separate, that works..  then don't need a conversion handler in langlet..
   7.230 +
   7.231 +-] Wiki: PR process has a default lang, PRServ -- its readyQ gets the seed slave.  Other options were process-wide readyQ and assigner, or put the seed slave directly into an animation slot.
   7.232 +
   7.233 +#-] have two modes for PR: choose different version of Master depending upon mode
   7.234 +#-] make seed slave a separate type
   7.235 +#-] Q: PR__create_Process.. how handle numLiveSlaves and shutdown conditions..  seed is oddball..
   7.236 +#-] <Don't see advantage> Q: One thing thinking is to make individual request handlers independently called by PR itself.. put the switch statement inside PR, which then calls the appropriate individual handler..  only, will be less efficient when go to dynamic linked library, and puts extra structure in place that limits plugin flexibility..  although, that may be a good thing from a SW engin point of view..  register each handler, along with  the req type that selects it.. put req type into PRReqst..
   7.237 +
   7.238 +========================
   7.239 +=== Services Related ===
   7.240 +=
   7.241 +
   7.242 +-] PRServ -- trim PRServ files -- remove all VSs stuff..
   7.243 +-] PRServ -- move req handlers for probes and exception to PRServ files
   7.244 +-] PRServ -- move wrapper lib for probes and exception to PRServ files
   7.245 +-] PRServ -- make start() for PRServ
   7.246 +-] PRServ -- define PRServ__resume_slaveVP
   7.247 +
   7.248 +-] No Compiler-switch: look at general way for creating histograms, perf counters and the lang-specific part of UCC gathering.. that works with DLL, without compiler switches!
   7.249 +-] No Compiler-switch: look at how to eliminate compiler switches -- turn into branches everywhere..  perhaps several DLLs, and choose which to connect to, based on what used to be the compiler switch defines..
   7.250 +-] No Compiler-switch: how to eliminate macro-compiler-switch trick for Stats histogram and such -- with PR in its own DLL, langlet can't use macros to put things into top env!  look at VSs_measurement.h.. just need to modify that, placing histograms into langEnv, and making hist printout scan the langEnvs..
   7.251 +
   7.252 +-] single-lang mode PR calls PRServ req handlers directly from Master -- only one process, which is the single lang.. so PR Services have to be handled by master
   7.253 +
   7.254 +-] Wiki: master either puts a task to a slave, or gets a ready slave..  
   7.255 +-] Wiki: Make PR have its own langlet, with its own langEnv.  That way, can do things like the probes as PRLang calls, and also have a natural place for the seedVP
   7.256 +
   7.257 +#-] Give PR its own langlet -- that one gets the seed slave when process created.. and it does probes.. maybe singletons, transactions, atomic?..  it's there in both single-lang and multi-lang modes..  in multi-lang, it gets the seedVP.. in single-lang, seed is saved in the process and lang's single-lang start version takes the seed from the process and puts it into its own readyQ
   7.258 +#-] Process has a default PRServ langlet, which holds own task Q and slave Q -- (seed slave goes into that langEnv)
   7.259 +
   7.260 +
   7.261 +=============
   7.262 +=== VUtil ===
   7.263 +=
   7.264 +-] take singleton and trans out of VSs -- remove from init, and from .h and from .c and from req handler in plugin and from req handlers.c
   7.265 +-] copy startup from VSs into VUtil, take anything else need and get VUtil working as langlet
   7.266 +
   7.267 +-] copy-paste Singleton, transaction, and so on, from inside VSs, over to VUtil, or else PRServ
   7.268 +-] Add malloc with ownership system to VUtil
   7.269 +
   7.270 +=============================
   7.271 +=== Inter-process related ===
   7.272 +=
   7.273 +-] Implement "readyRing" data struct that can "add" "remove" and "next".. where next searches through elements, going in a circle, until finds one whose flag is true, or else gets back to initial element.  each time a process is created, add it, and remove when process shutsdown, then do next when advance to next process
   7.274 +-] Use the readyRing to implement the token scheme for deciding which process to ask for work.  For now, just take all work from same process, counting how many cycles via TSC, until reach a quota, then get next process from the ring.  Later can do things like let inter-process communication tigger having work from multiple processes on-going, or other schemes for choosing mix of processes. 
   7.275 +
   7.276 +-] At lang start, register a handler for external communications -- each lang fits external communications in its own execution model..
   7.277 +-] Put port and connection information into PRProcess
   7.278 +-] make the data struct for a process to store connection information and request-handler for incoming comm -- PR call connects out-port of one process  to in-port of another..  it stores connection information in PRProcess, and stores a request handler with each input port..
   7.279 +
   7.280 +-] trigger service -- for IO, and for dissipate, and for other conditions -- register a suspended slave w/PR, or perhaps register a task-creation handler or slave-creation handler.  Upon trigger condition, PR does resume, or does creation
   7.281 +
   7.282 +-] Q: have a process-discovery service, or channels, or other ways for anonymous sends and anonymous receives?  Makes inter-process comm more flexible than just fixed wiring.. this is compatible with and independent from the lang registering a handler for in-coming comm..
   7.283 +-] Q: how connect port-name used in process to the params in a call made by application.. thd-based lang will need some way to connect a thd to a port, or else if thds are spawned, then the spawned must come into existence with some way to communicate with the other threads -- either containing a known name the others ask for, or containing the name of something to talk to.
   7.284 +
   7.285 +-] Q: inside master, each slot scanned may be from a different process than the others.. look at implications for Holistic and measurement macros.. 
   7.286 +
   7.287 +-] Wiki: many ways for comm from outside a process to look like..  many ways to happen the transmogrification into the language's internal model.. 
   7.288 +-] Wiki: could have a slave attached to each in-port, that PR activates when a communication comes in..  
   7.289 +-] Wiki: could simply let any slave execute "wait for input", and have VMS buffer up input, and hand it to slaves as they ask..  buffer-full block gives risk of deadlock.. makes sender remain suspended until a receive takes from buffer..  likewise, buffer-empty blocks a receive..  that works for a persistent processor model..  what about a dataflow model, or task model?  Dataflow should be able to define an element that simply spits out data..  a task model should, what, generate a task to process each input?  
   7.290 +-] Wiki: but going with specifying the request-handler from a langlet started inside the process..  attach such a request handler to each in-port of the process.  That handler manages internal Lang state to decide how the application sees an incoming communication.  PR manages making sure the langlet's start() has completed before invoking the request handler. The plugin decides how an input gets into the program..  so, PR just has something that talks to the plugin..  so, the langlet supplies a request handler that handles the arrival of an input!  The handler does whatever the language decides with that input..  it can put the input into a slave's dataRetFromReq and resume it, or it can generate a new task and queue it up, or it can put that input into the input of a dataflow node, or..  Good, so the handler is told the port-number the input arrived on and which process it was sent to and which it came from..  The handler is invoked by PR, and it does whatever thing it does within the langlet's LangEnv..  maybe don't even need process to and from, just need port and langEnv of receiver..  But, maybe can use process ID as a name, to specify destination when send..  For sending, langlet has a request handler for whatever the construct looks like in the language..  the request handler has a PR call it makes that tells PR about the send..  PR in turn calls the registered request handler for the receiving port and receiving process..  Boom.  So..  after create a process, if want to communicate with it, do a langlet call that adds an input port to it..  that call registers the langlet's request handler with that port number for that process, inside of PR..  Boom.  Done.
   7.291 +
   7.292 +-] Wiki: could also have a process-discovery service, or channels, or other ways for anonymous sends and anonymous receives..  makes inter-process comm more flexible than just fixed wiring.. this is compatible with and independent from the lang registering a handler for in-coming comm..
   7.293 +
   7.294 +-] Wiki: Q: on how divide core among multiple processes?  Greedy only works if they're same program..  but should make it work for multiple different programs.. So, the issues are: choosing from among multiple with work ready, staying low overhead, being compatible with multiple "users" for OS purposes, staying clean and natural to existing structure..
   7.295 +-] Wiki: A: is to do round-robin, with a token (similar to SuperThread) -- one process keeps token for some accumulation, then PR passes it -- if process has no work, the token passes..  that way, the token can be managed by reading TSC or by setting a watch-dog that writes a flag, or by counting units, etc..  leaves room for simple upgrade in future to HW-based division.. and the stuff about which user is hidden  inside the token handling -- can even be interrupts set stuff inside token handling to decide when processes of a given user have used up their quota and change state internal to token handling, so those processes lose token and don't get it back until HW interrupt based stuff says so..
   7.296 +
   7.297 +
   7.298 +===============================================================
   7.299 +=== Single-lang vs Multi-lang  and Multi-lang complications ===
   7.300 +=
   7.301 +
   7.302 +-] Wiki: state how plugin must use different functions to create request of different types: slvCreate, slvDissipate, taskCreate, taskEnd, LangReq, PRServiceReq 
   7.303 +-] Wiki: when design a langlet, there's a combination of process + lang-ID that determines the environment the langlet has available to save things global to the langlet (which are necessarily private to process).. 
   7.304 +-] Wiki: Make sure rules for plugins are captured in wiki -- make a wiki as create DKU plugin, highlighting things like two-level langEnv struct, how to cross-link it, aso..
   7.305 +-] Wiki: make all langlet accesses of fields in slave, in PRMetaTask, in PRLangEnv, go through accessor functions that PR provides..  list the accessor functions, and give sample code for each, with comments in code
   7.306 +
   7.307 +
   7.308 +#-] Lang uses  PR__create_lang_env call, which also registers it..
   7.309 +#-] Make it so slave has the same LangData field indep of multi-lang mode or not..  
   7.310 +#-] change PR__give_lang_data impl betwn single lang and multi-lang..  for multi-lang, store a hash table, hashed by magic number, that retrieves indiv lang's langData..  
   7.311 +#-] make all langlet accesses of fields in slave go through an accessor Fn
   7.312 +
   7.313 +======================================================
   7.314 +=== Process create, Process structure, Process end ===
   7.315 +=
   7.316 +
   7.317 +-] Wiki: PR__malloc and PR__free are used in main, betwn PR_Start and PR_end.  Issue with mix of sys malloc with PR malloc.. if want to return from process, has to be PR__malloc'd, else have to do internal copy! -- also issue with sync btwn ending process and main thread.. need a struct hanging around to carry result back to main thread
   7.318 +-] Wiki: Detect end of process inside handlers, for dissipate and end task.. safe and much cleaner -- once all generic slaves have dissipated, and end-task has happened on all live tasks, then that's it..  no way for process to ever generate more work.. and that case always comes to pass.. most often when seed slave dissipates..
   7.319 +-] Wiki: turns out that trying to have a single-lang mode was too tangled up with process..  
   7.320 +-] Wiki: issue is alloc'd carrier of result left lying around.. how to get rid after this call?  If make this call free, then how get rid when no result is returned and waited for?  The transition from PR internally allocated results to OS allocated results is messy..  going with everything PR alloc'd and explicit copy in main if want to persist
   7.321 +
   7.322 +#-] The steps of shutting down a process -- are already in Master when detect, inside end-task or dissipate, so call "PR_SS__shutdown_process" -- it does: removes from queue of processes, and calls shutdown on each langlet started in it (which frees any langlet-alloc'd data in langEnv & langEnv itself).. then frees the process struct (which frees the proto-langEnv's in array)..  then writes a done flag (in TopEnv?) and signals the condition var that any OS threads might be waiting on (the wait-for-process-to-end call).
   7.323 +#-] make process shutdown cause resume of "PR__wait_for_process_to_end()" call -- requires OS thread syncing
   7.324 +#-] make shutdown of all processes cause resume of "PR__wait_for_all_activity_to_end" call (called inside PR -- "inside" means after PR__start, and before PR__shutdown)
   7.325 +
   7.326 +=================================
   7.327 +=== Lang startup and Shutdown ===
   7.328 +=
   7.329 +-] Q: When lang ends, free that lang's lang-data from inside slaves.. or just leave it there? 
   7.330 +-] Q: If slave dissipates that has lang-data from a lang that has already stopped, where is the "free" Fn for that lang's lang-data?
   7.331 +
   7.332 +-] register langData creator-Fn in PRLangEnv, via a "register" call..
   7.333 +-] VSs__start() must initialize its PRLangEnv with pointers to createSlv, endSlv, createTask, endTask, task assigner, slave assigner, reqHdlr 
   7.334 +
   7.335 +-] Q: have recycler for metaTasks..?
   7.336 +
   7.337 +-] two VSs startup versions..  single lang with tasks and multi-lang (accessor Fns for langEnv, metaTask, and langData.. in both cases, seedVP handed to startup)
   7.338 +-] In lang's startup portion of DLL, do the histogram creation, in a way that all will work right..  without macro magic and without compiler switches..
   7.339 +
   7.340 +
   7.341 +-] Wiki: Made meta-task be collection, just like lang data, because of seed slave..  each lang started in a process has the option of putting a meta-task into the seed slave..   For example, VSs_start is called from inside seed slave.. and wants to put langMetaTask into the seed.  Other langs are also free to do the same.will want to do the same when they start..  
   7.342 +
   7.343 +#-] Cleanup lang state when process done --  free langEnv contents, provide free fn for lang data inside slaves
   7.344 +
   7.345 +==============
   7.346 +=== Master ===
   7.347 +=
   7.348 +
   7.349 +-] make app call a PR Fn to set which mode -- single lang vs single w/tasks vs multi-lang
   7.350 +
   7.351 +
   7.352 +-] Wiki: Multiple distinct versions of master, which jumped to is set during startup of PR.  Have a single-lang masterEnv struct that is different from multi-lang masterEnv struct
   7.353 +-] Wiki: The Master extracts the processID from the slave, and uses it to look up the data structure, held within the process-struct, that in turn holds all the Lang environs..  then it gets the magic number out of the request, which is in the PRReq, and uses that to get the correct Lang env out of that data struct.. and then it hands the request + slave + Lang env to the request handler that is pointed to by the Lang env..
   7.354 +-] Wiki: The master checks whether a just-suspended slave is executing a task (after checking for task-end), and if so checks if slave is a slot-slave, and  if so converts it to an indep slave and replaces the slot-slave.
   7.355 +
   7.356 +
   7.357 +#-] set which version of master is jumped to during startup of PR.  
   7.358 +#-] verify that lang sets flag each time it makes work ready -- and assigner clears when takes last work out
   7.359 +#-] Make a single-lang masterEnv struct and a separate multi-lang masterEnv struct
   7.360 +#-] masterEnv -- move many of its things to the PRProcess struct
   7.361 +XXX-] masterEnv -- make two versions, to be compatible with both single-lang and multi-lang multi-process (maybe two different, with compiler switch..)
   7.362 +XXX-] fix create masterEnv, by making two versions.. keep old one, make new for new env, and wrap inside compiler conditional.. or _PRMasterEnv plus _PRMultLangMasterEnv
   7.363 +#-] pick version of master based on mode -- each process chooses its mode -- so both standalones happen inside a process.
   7.364 +#-] Turn the Master loop into a function.  Make it so can call that Fn either from current masterVP, or else from inside what's currently core controller..  that way, can test what's faster, with only a simple code change
   7.365 +#-] Make multiple distinct versions of master -- single lang version uses single-lang master env, multi uses multi
   7.366 +#-] Investigate whether simplify if eliminate metaTask->taskType.. instead just use slave type
   7.367 +#-] check flag in PRLangEnv for work avail (
   7.368 +
   7.369 +==========================================================================
   7.370 +=== assigner, shutdown detection ===
   7.371 +=
   7.372 +-] register a "resume slave" and a "make task ready" function, and then call PR's version of those -- if PR has no override assigned, then it uses the registered versions, other wise it puts the slaves and tasks into a common structure used by the override assigner.  Want to add some standard format data structure that holds information used during assignment..  all langs provide the same kind of information..
   7.373 +
   7.374 +
   7.375 +-] Wiki: In order to be compatible with an override assigner, all plugins will have to register a "resume slave" and a "make task ready" function, and then call PR's version of those -- if PR has no override assigned, then it uses the registered versions, other wise it puts the slaves and tasks into a common structure used by the override assigner.  Want to add some standard format data structure that holds information used during assignment..  all langs provide the same kind of information..
   7.376 +-] Wiki: Questions to consider when designing the assigner for multi-lang..  first, there are multiple Lang envs, each with its own ready structures (Q or priority Q or CAM, etc) and its own assigner..  but if want an override assigner, the slaves and tasks have to be inside PR, not in the langlet langEnvs..  Next, need to consider where assigner lives -- in langlets, or in PR.  Lastly, when make a choice, consider how that will look to the plugin.. what changes in plugin structure, calls made by plugin, aso
   7.377 +-] Wiki: langlet-request-handlers call a PR fn to make things ready..  that way can invisibly add an override assigner that is generated by static tools and knows about all the langlets and manages all the ready tasks and all the ready slaves (and adding this feature in future won't force rewrite of pre-existing plugins)..   can test this by creating such an override by hand..  
   7.378 +-] Wiki: when request handler wants to make a task or slave ready, it makes a PR call to make it ready, and includes a pointer to data for use by the assigner -- the data has a standard format..  If there is no override, then PR puts the task or slave into the language's langEnv, via registered "make slave ready" and "make task ready" fns, else it puts them into the  override assigner's environment.
   7.379 +
   7.380 +-] Wiki: one assigner for both tasks and slaves -- the assigner returns a PRMetaTask, and PR checks which and assigns to slot slave if a task..
   7.381 +
   7.382 +-] Wiki: static tools will know the mix of langlets, and will put an override assigner into the executable, for a specific machine.. such an assigner will only work when there's a BLIS-like distribution system for executables..  otherwise, the assigner has to be independent of the mix of langlets, and be written for the specific hardware.
   7.383 +-] Wiki: any override assigner receives all ready tasks and slaves, and sees all the free slots.. it has some form of UCC and HW model that it uses in predicting effectiveness of various assignment scenarios.. so it builds a tentative model of future events and incorporates that into current assignment decisions..
   7.384 +-] Wiki: override assigner must see all ready tasks and all ready slaves, and know propendents and dependents and constraint groups (ex of a constraint group that isn't propendent nor dependent -- singleton, mutex..) 
   7.385 +
   7.386 +#-] Change VSs assigner so it always returns a PRMetaTask -- with either a generic slave or a lang-specific task attached
   7.387 +#-] use accessor calls for all PR data accesses, to see if can hide difference single-lang vs multi-lang..
   7.388 +#-] finish VSs assigner
   7.389 +#-] Give each langlet its own assigner
   7.390 +
   7.391 +===================================
   7.392 +=== Request Handlers
   7.393 +=
   7.394 +-] verify all VSs req hdlrs are compatible with the new structure..
   7.395 +-] Q:Does lang's req handler need the calling slave out of the req struct?  Should put it into PRReqst?
   7.396 +
   7.397 +-] Wiki:  PRMetaTask is a prolog.  Wrapper lib has to call PR_WL__create_req_of_size( sizeof(LangLangReq)) -- can't stack-allocate
   7.398 +-] Wiki: have both PRReqst and PRLangReqst.. PRReqst covers anything needed by special forms, which require the Master or coreCtlr to do something
   7.399 +
   7.400 +#-] PR call inside wrapper lib to send request -- insert into PRReqst, the lang's req struct and the calling slave, and the magic number 
   7.401 +
   7.402 +============================
   7.403 +=== Meta Task and TaskID ===
   7.404 +=
   7.405 +
   7.406 +-] provide semantic sugar for the fields in a PR ID
   7.407 +-] provide a way to look up any given ID..  a self-expanding hash table..  Q: performance -- option to not do this if not used..  implies doing a hash on the ID for every creation
   7.408 +-] Q: how to wrap ID inside PR Fns..  maybe make process state how many int it wants in the ID, so that all created get array of that size alloc'd -- that's a new class of PR calls, one that defines things for the process as a whole -- so coder knows that there are process things that they set up, then start each langlet, then get to the application code.
   7.409 +-] Q: Allow taskID to be NULL?  Or, fill in with default (like counter)? (for when task created w/o stating its ID)
   7.410 +
   7.411 +-] Wiki: a langlet's task info can exist inside task created by a different lang's wrapper lib call -- for example because want "wait" to work across langs, or because have some interesting construct that needs info available to PR during create -- then will let each lang register its own "create foreign task" handler, which runs when a different lang's wrapper lib was used to create the task..
   7.412 +-] Wiki: metaTask lang-specific portion is retrieved via magic number.  Lang-specific is created when task is created, but not assigned to a slave yet.. 
   7.413 +-] Wiki: the slaveID is an array-of-int ID, same as used for tasks.. 
   7.414 +-] Wiki: use int-array "name" as follows: the oldest (closest to root) is the last int..  the newest (closest to leaf) name is the first int.  Hence, in a given location, can just access whatever's at the start of the int array, for local offsets, without worrying about hiistory..
   7.415 +
   7.416 +#-] A: No, not safe.. Q: safe to make metaTask have only a SINGLE lang-specific task portion?  Only for lang that created the task..  means no task constructs from other langs can operate on this one.. only slave constructs (any task operation suspends task, turning it into a suspended freeTaskSlave)..  so taskWait has to be from same lang as created the task..  so can't create tasks from diff langs, then wait for them all to end via a single statement, for example, would have to wait for ones from each lang separately..
   7.417 +XXX-] Q: examples of multiple languages wanting task-specific data in same task, which won't work well putting it into lang Data
   7.418 +#-] make PR handle taskID, which is in create request:  metaTask->taskID = langReq->taskID; //may be NULL
   7.419 +#-] Q: put taskID into generic slave metaTask?  Then, only have one kind of ID, for both tasks and slaves..  use PR__give_slaveID to get it
   7.420 +#-] add langMetaTask to metaTask -- use PR provided accessor Fn, to allow future change to the only-one-lang rule
   7.421 +#-] at point task is assigned to a slave, inside PR, the slaveAssignedTo in metaTask is set..
   7.422 +
   7.423 +==============================================
   7.424 +=== lang-specific langData and langMetaTask ===
   7.425 +=
   7.426 +-] make PR__give_lang_data have a Fn available to create the lang data if it's not already there..  gets the langData creator from langEnv retrieved by magic number  passed in, with the calling slave
   7.427 +-] PR__give_lang_data for single-lang.. it's a macro "slave->langData"
   7.428 +
   7.429 +
   7.430 +#-] define "PR__give_lang_data" 
   7.431 +#-] wite PR__give_lang_data( slave ) 
   7.432 +#-] Put langData creator pointer into Lang Env
   7.433 +
   7.434 +=============================================================
   7.435 +=== Creating and Dissipating and Converting between Types ===
   7.436 +=
   7.437 +-] In VSs, fix comments for create slave, dissipate, create task, and end task handlers -- now have different structure.. they're called from different place, are now special forms
   7.438 +
   7.439 +-] Q: does VSs need to do handling inside free-langData Fn for the taskStub?  Or, is that handled in end-task and dissipate handlers?
   7.440 +
   7.441 +
   7.442 +-] Wiki: add type to PRMetaTask -- generic slave vs atomic task vs suspended task..  use this in Master, to know what comes back from assigner
   7.443 +-] Wiki: PRHandle_EndTask.. PR has its own handler for end-task, which detects shutdown and such, and then in turn calls the language's end-task handler, which is pointed to by the Lang env..
   7.444 +-] Wiki: So a language's task create and end handlers are called directly by PR, and the language's request handler is only called for normal thread constructs. IE, for a task, if it's not a create or end call, it is first converted to suspended slaves by PR, then the normal slave handler for the construct is called.
   7.445 +-] Wiki: a langlet's create and dissipate handlers are only capable of affecting a few things: langMetaTask, langData, and knowledge of creation or dissipate -- all else is inside PR.  So, have lang register handlers for creating langData and free/recycle it, and for notification of create of task and of slave, and for adding langMetaTask and free/recycle it.  PR calls are provided to register each.
   7.446 +-] Wiki: end task is special form (means has a case in animation master's loop), to an end-task hdlr is registered separately, and PRReq has separate type for it, used in master's switch
   7.447 +
   7.448 +#-] Make two different PR calls to create slave -- one creates generic slave, other creates slot slave..  initialize the type inside creator, and make a generic-slave meta task
   7.449 +#-] when convert from slot slave to free task slave, check what should do about num (live slaves + live tasks) inside VSs's task stub, and properly update process's count of liveFreeTaskSlaves 
   7.450 +#-] When recycle a freeTaskSlave, whose task has just ended, free all the langData and meta task structs that won't be re-used, and reset to initial state all the structs that will be reused  
   7.451 +#-] make PR dissipate free or recycle the slave struc and all PR owned data, including metaTask, and call the registered handler for langData and langMetaTask
   7.452 +#-] make PR dissipate do calls to all lang-registered free-langData-fns -- 
   7.453 +X-] add type to PRMetaTask -- generic slave vs atomic task vs suspended task..  
   7.454 +X-] use PRMetaTask type in Master, to know what comes back from assigner
   7.455 +#-] in PRHandle_CreateSlave.. create the special generic-slave-metaTask
   7.456 +X-] in PRHandle_CreatTask.. set the metaTask type to AtomicTask
   7.457 +X-] in Master, where convert slot slave to free task slave, change type in MetaTask to SuspendedTask
   7.458 +#-] in PRHandle_DissipateSlave free or recycle the generic-slave-metaTask, or else the freed-task-slave-metaTask
   7.459 +#-] add task-end handler in PR
   7.460 +#-] Check that Master detects when an atomic task has suspended, and changes slot slave into free task slave, including modifying the metaTask for the new free task slave
   7.461 +#-] PR dissipate handler -- do recycling of langData and langMetaTask
   7.462 +#-] in endTask, if was a free task slave, recycle langData and langMetaTasks before putting into recycleQ
   7.463 +#-] When slot slave converted to a free task slave, insert the process pointer -- slot slaves are not assigned to any process.
   7.464 +#-] For task creation, the PRReqst must hold the top-level-fn of the task, plus the initData -- these are put into the metaTask, and from there the slot-slave is pointed at them.
   7.465 +
   7.466 +==============================================================================
   7.467 +=== Lang Env -- creation, passing around, retrieving, structure, usage ===
   7.468 +=
   7.469 +
   7.470 +-] Wiki: plugin cannot access PRLangEnv.. can only affect via PR's updater Fns.  Plugin has to trigger any PRLangEnv related behavior such as modifying "work avail" flag
   7.471 +-] Wiki: PR, itself, has the lang's langEnv inside a PRProcess, gives it to req handler.. and controls access via the accessor Fn
   7.472 +-] Wiki: state this requirement: Make lang's Lang env be a PRLangEnv, which then points to the language-specific Lang env -- make first elements of lang's langEnv be: X, and make first elements of lang's meta task be: X
   7.473 +-] Wiki: PR__register_langEnv() takes the Lang env and uses the magic number to insert the lang env into the env-struct for the processID that's inside the seedVP passed in.. 
   7.474 +-] Wiki: PRLangEnv points to the langlet's lang env.. PRLangEnv holds everything accessed inside the master, including magic number, ptrs to request hdlr, assigner, and end-task hdlr, and "has work" flag..  
   7.475 +
   7.476 +
   7.477 +#-] switch on a "has work" flag, and put multi-lang switch around updates of that flag
   7.478 +#-] implement PR_SS__create_PRLangEnv( VSs_MAGIC_NUMBER, seedVP );
   7.479 +
   7.480 +==================================
   7.481 +==================================
   7.482 +==================================
   7.483 +
   7.484 +#-] fn that turns slot slave into free task slave and makes new slot slave
   7.485 +#-] remove from VSs req hdlrs the checks for whether need to replace slot slave.
   7.486 +#-] change all accesses to langData to go through PR__give_lang_data call
   7.487 +#-] change all accesses to masterEnv to go through a PR__ call
   7.488 +
   7.489 +#-] make seed slave be created by "create process" call, and make seed start the langlets that will be used inside process..  
   7.490 +
   7.491 +#-] VSs create Thread -- make compile (finish all code, add all fields and structs) 
   7.492 +#-] VSs create-Thread only does bookkeeping & resumes slaves -- the PR portion handles creation of slave then gives it to VSs handler -- VSs hdlr can do langData right then if it wants (but already has "new lang data init" Fn registered with PR) -- 
   7.493 +#-] VSs dissipate     -- make compile (finish all code, add all fields and structs)
   7.494 +#-] VSs end task      -- make compile (finish all code, add all fields and structs)
   7.495 +#-] VSs create task   -- make compile (finish all code, add all fields and structs)
   7.496 +
   7.497 +#-] search for creations, decide if that needs to be special form inside master as well
   7.498 +#-] make creating a slave and creating a task be special forms for multi-lang, as end task and dissipate slave -- special form means a case inside master loop, with PR handlers
   7.499 +#-] confirm fully separated VSs task stub related code into metaTask code in PR vs wait-related code in VSs
   7.500 +#-] find all uses of taskStub in VSs and confirm all wait-related is moved to VSsTaskStub and all other uses moved to PR
   7.501 +
   7.502 +#-] back-link the protoMetaTask into lang meta-task inside PRHandle_CreateTask, after the lang-specific handler returns the lang-meta-task
   7.503 +#-] check if taskStub link is really needed inside VSsLangReq
   7.504 +
   7.505 +#-] In VSs decide whether VSs task stub is inside langData, or inside PRMetaTask..  liking putting it into PRMetaTask, and pass around PRMetaTask everywhere, instead of passing slaves..
   7.506 +#-] task stub is put into PRMetaTask (and first field of EVERY langMetaTask is the magic number, so a construct can tell if the task was created by a foreign lang), so fix all places in VSs where get task stub out of langData -- save langData for only slave-related stuff -- so there's one and only one "task info" which is created when task created -- no other langs can introduce their own info that goes into task..  means langs have to be careful during design to keep anything needed for suspended tasks inside langData NOT inside task-info!  If a construct needs info inside metaTask then it won't work anyway, because that info was never created for a task of a different lang!  
   7.507 +#-] VSs -- use accessor Fn to get langMetaTask from slave
   7.508 +
   7.509 +
   7.510 +#-] PRCreateSlvHandler -- finish it
   7.511 +#-] PRDissipateSlvHandler -- finish it
   7.512 +
   7.513 +#-] in single-lang, lang data is created when slave created, such as for transactions..  so, should PR create all those lang datas when it creates a new slot slave?  Should only need them at point gets turned into a free task slave.. or, better yet, 
   7.514 +#-] Split VSs taskStub, putting part into PR, and leaving rest in VSs
   7.515 +
   7.516 +#-] check if confusion between VSsTaskStub and parentLivenessStruc -- seem have both and are same thing..
   7.517 +#-] Check that VSs task stub has everything it needs, and nothing it doesn't
   7.518 +#-] PRMetaTask is attached to VSsTaskStub.  Go reverse, too, so don't need slave inside VSsTaskStub
   7.519 +#-] Check that PR metaTask has everything it needs, and nothing it doesn't
   7.520 +XXX-] When assign taskStub to langData, write the slave into the taskStub->slaveAssignedTo (use this on task end and dissipate to resume a waiting parent)
   7.521 +#-] lang-specific task info goes into a task created by that lang, as the langMetaTask inside metaTask struct
   7.522 +#-] do task wait in VSs by creating VSs-only structs that get put into langData..  
   7.523 +#-] make PR dissipate and end-task trigger counting live free task slaves and live generic slaves in the process, to determine when to shut down process
   7.524 +#-] make sure this works, in VSs dissipate req hdlr: resume_slaveVP( parentLivenessStruc->slaveAssignedTo, langEnv ); -- if was last child thread, resume the parent
   7.525 +#-] In create and in dissipate, master calls PR create or dissipate handler, which takes the lang's handler out of lang env and calls it
   7.526 +#-] put incr and decr of num live slaves and num live tasks of a process into PR's handler for creator, dissipator, create-task and end-task.. 
   7.527 +#-] Figure out which points detect process shutdown -- dissipate, create, taskend, aso..
   7.528 +#-] look at how to detect shutdown condition for a process..  have live slaves and live tasks counter (taken from VSs).. 
   7.529 +XXX -] Re do the "SERVICE__" and "MODE__" in the turn on and off defs file
   7.530 +XXX -] Change "PROBES__" to "SERVICE__PROBES__TURN_ON_STATS_PROBES"
   7.531 +
   7.532 +#-] in master loop, for multi-lang, cannot assign handlers or functions at the top, before the loop, then have them  always be the same -- rather, have to check which process the slave in the slot came from, and use the handlers and plugin fns for that processID
   7.533 +XXX -] ask Nina if okay to change "DETECT_LOOP_GRAPH" name to "HOLISTIC__TURN_ON_DETECT_CONSTRAINT_GRAPH"
   7.534 +#-] make all versions of plugins access langData via PR__give_lang_data( slave )..  
   7.535 +
   7.536 +#-] fix interference between diff langlets accessing their langData in slave.. langData allows fast access, and data is structured.. but there's interference between langlets, each with its own langData struct in slave..
   7.537 +# -] Design to have a simple case that new user doesn't need to worry about multi-lang nor multi-process stuff..
   7.538 +# -] What about two separate masters?  One for multi-lang, multi-proc case, other for simple case..  chose which to use by compiler switch or by call..
   7.539 +
   7.540 +#-] Decide what a process looks like, and how to create one -- just normal seed slave creation, and return ptr process, which in turn holds pointer to seed..  but creating the process starts the seed slave going, and the seed slave function has calls that start up various languages, then uses calls from those languages to create more slaves and tasks.  The start process call returns a pointer to the process struct, which contains a results field.
   7.541 +#-- --] After figuring out communications, appears need to have a data struct for a process, to store connection information -- PR manages which port of the process connects to which ports of what other processes..  it also stores a request handler with each input port..
   7.542 +
   7.543 +#-] Looks like VSs had to include taskID inside tasks that wanted to use SSR calls, in order to have a way to identify the destination tasks.. Q: is this general, that constructs from some langlets will require other langlets to have done something special, like include an ID.. such that the langlets have to be designed together?  That hurts genericity.. can somethig be done?  Is this just a one-off thing,  because "name" (the taskID) is so fundamental?
   7.544 +
   7.545 +# -] Choose how to identify the language, inside PR, in order to grab the right Lang env and call the right request handler..
   7.546 +# -- --] One way might be to attach the request handler to the Lang env.. so then just need way to get the right Lang env, given an incoming request..  so, have the slave, which tells the process ID..  but it's the particular library call that determines which request handler to use..  so.. need a way for library call to cause the corresponding request handler..
   7.547 +# -- --] Hmmm, kind of liking the idea of having the seed VP do the calls that start the various langlets..  and make a special slave type, for the seed VP..  and store the process ID inside the seedVP, and all descendants..  means will need to know the process ID inside PR when a task suspends (because PR converts the slot slave into an indep slave)..  so have to keep the processID inside a task too..
   7.548 +
   7.549 +#-] in main, start PR.. 
   7.550 +#-] then, use PR call to create a process, passing it top-level Fn with which to create a seed VP..  
   7.551 +#-] PR automatically generates the seedVP when it creates the process, and inserts the processID of the newly created process into it. 
   7.552 +#-] that seedVP does calls to start langlets, one call for each langlet the process will use..
   7.553 +#-] the langlet start calls have inside their library impl a magic number that identifies the langlet, which is used to register the Lang env of the language
   7.554 +#-] the wrapper libs for that langlet have the same magic number inserted into the requests they generate.  
   7.555 +#-] The seedVP has the processID inside it that was generated by PR
   7.556 +#-] Decide how handle create and dissipate -- many langs want to do own special thing on a created slave..  but only one lang can call create!  So, restrict those special things to ONLY modifying langData -- then register an "createLangData" in the Lang env, and when a construct in that lang asks for the langData, if it's null for that lang, the creator is called and new langData returned
   7.557 +#-] PR has its own 'create' request handler, inside there copy processID from invoking slave to the newly created slave.
   7.558 +#-] When a wrapper lib call is made, the call inserts the magic number 
   7.559 +
   7.560 +XXX-] PR__create_process: Make sure slot slaves have the process-pointer set inside them.
   7.561 +
   7.562 +#-] fix create slave, by moving things out of VSs create and into PR create
   7.563 +#-] Q: May need a whole list of dissipate handlers and create handlers -- one for each langlet!  Should langlet own a slave?  Perhaps add a magic number to the slave, or ptr to dissipate call to indicate which langlet's handler?  What about seed -- only one langlet allowed to handle dissipate of seed? A langlet-independent dissipate for seed, for the process?
   7.564 +
   7.565 +#-] Q: use macro-magic inside PR?  Means can't turn PR into a DLL..  must be statically linked..  Hmmm..  A: use macro-magic until get scheme working where have PR on multiple hardware, and use same binary for all -- DLL specializes binary to the hardware.
   7.566 +#-] Decide what a process looks like, and how to create one -- just normal seed slave creation, and return ptr to slave as the process ID?
   7.567 +#-] Choose how to identify the language, inside PR, in order to grab the right Lang env and call the right request handler..
   7.568 +#-- --] One way might be to attach the request handler to the Lang env.. 
   7.569 +
   7.570 +#-] Note: can still put ptr to handler Fn into request even though executable doesn't know which runtime it will get..  the ptr is to DLL trampoline
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/__brch__ML_dev	Wed Jun 12 15:48:52 2013 -0700
     8.3 @@ -0,0 +1,1 @@
     8.4 +This branch is for developing the multi-lang capability in VMS..  it uses the same project structure as was used to develop VSs with explicit thread creation and the wait construct.
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/src/PR_Implementations/PR_defs__turn_on_and_off.h	Wed Jun 12 15:48:52 2013 -0700
     9.3 @@ -0,0 +1,90 @@
     9.4 +/*
     9.5 + *  Copyright 2009 OpenSourceResearchInstitute.org
     9.6 + *  Licensed under GNU General Public License version 2
     9.7 + *
     9.8 + * Author: seanhalle@yahoo.com
     9.9 + * 
    9.10 + */
    9.11 +
    9.12 +#ifndef _PR_DEFS_TURN_ON_AND_OFF_H
    9.13 +#define _PR_DEFS_TURN_ON_AND_OFF_H
    9.14 +#define _GNU_SOURCE
    9.15 +
    9.16 +
    9.17 +#define MODE__MULTI_LANG
    9.18 +
    9.19 +//======================  Turn Debug things on and off  =====================
    9.20 +/*When DEBUG__TURN_ON_SEQUENTIAL_MODE is defined, PR does sequential exe in the main thread
    9.21 + * It still does co-routines and all the mechanisms are the same, it just
    9.22 + * has only a single thread and animates Slvs one at a time
    9.23 + */
    9.24 +//#define DEBUG__TURN_ON_SEQUENTIAL_MODE
    9.25 +   //check for sequential mode and redefine num cores to be 1 so that lang
    9.26 +   // code doesn't have to do special #ifdef for sequential mode
    9.27 +#ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
    9.28 +   #ifdef NUM_CORES
    9.29 +      #undef  NUM_CORES
    9.30 +      #define NUM_CORES 1
    9.31 +   #endif
    9.32 +#endif
    9.33 +
    9.34 +/*turns on the probe-instrumentation in the application -- when not
    9.35 + * defined, the calls to the probe functions turn into comments
    9.36 + */
    9.37 +#define DEBUG__TURN_ON_DEBUG_PRINT
    9.38 +
    9.39 +/*These defines turn types of bug messages on and off
    9.40 + */
    9.41 +#define dbgAppFlow   TRUE /* Top level flow of application code -- general*/
    9.42 +#define dbgProbes    FALSE /* for issues inside probes themselves*/
    9.43 +#define dbgMaster    FALSE /* obsolete*/
    9.44 +#define dbgRqstHdlr  TRUE /* in request handler code*/
    9.45 +#define dbgSS        FALSE /* in request handler code*/
    9.46 +#define dbgInfra     FALSE /* in request handler code*/
    9.47 +
    9.48 +//#define DEBUG__TURN_ON_ERROR_MSGS
    9.49 +
    9.50 +//==================  Turn Probe Things on and off ====================
    9.51 +/*Probes are used in the application as a cheap, convenient, and fast way
    9.52 + * to collect statistics.  Define this to enable them, else the probe
    9.53 + * statements in the application code all turn into empty whitespace
    9.54 + * in the pre-processor
    9.55 + */
    9.56 +//#define PROBES__TURN_ON_STATS_PROBES
    9.57 +
    9.58 +/*When PROBES__TURN_ON_STATS_PROBES is defined, turn on one of these to choose
    9.59 + * what kind of measurement the probes store
    9.60 + */
    9.61 +//#define PROBES__USE_TSC_PROBES
    9.62 +#define PROBES__USE_TIME_OF_DAY_PROBES
    9.63 +//#define PROBES__USE_PERF_CTR_PROBES
    9.64 +
    9.65 +
    9.66 +//==============  Turn Internal Measurement Things on and off ===============
    9.67 +
    9.68 +//#define MEAS__TURN_ON_SUSP_MEAS
    9.69 +//#define MEAS__TURN_ON_MASTER_MEAS
    9.70 +//#define MEAS__TURN_ON_MASTER_LOCK_MEAS
    9.71 +//#define MEAS__TURN_ON_MALLOC_MEAS
    9.72 +//#define MEAS__TURN_ON_PLUGIN_MEAS
    9.73 +//#define MEAS__TURN_ON_SYSTEM_MEAS
    9.74 +   /*turn on/off subtraction of create measurements from plugin meas*/
    9.75 +//#define MEAS__TURN_ON_EXCLUDE_CREATION_TIME 
    9.76 +
    9.77 +
    9.78 +//#define HOLISTIC__TURN_ON_PERF_COUNTERS
    9.79 +//#define HOLISTIC__TURN_ON_OBSERVE_UCC
    9.80 +//#define HOLISTIC__TURN_ON_DETECT_CONSTRAINT_GRAPH
    9.81 +
    9.82 +//===================  Turn on or off system options  =======================
    9.83 +//
    9.84 +/*Defining SYS__TURN_ON_WORK_STEALING causes the core controller behavior
    9.85 + * to change.  When it detects too many back-to-back masters, then it 
    9.86 + * searches the other core controllers, looking for work it can steal from
    9.87 + * them.
    9.88 + */
    9.89 +//#define SYS__TURN_ON_WORK_STEALING
    9.90 +
    9.91 +//===========================================================================
    9.92 +#endif	/*  */
    9.93 +