# HG changeset patch
# User Me
# Date 1274582278 25200
# Node ID a5fe730dfc2ec53fc99b446f7ee59fa54e248cea

Initial add -- for sourceforge repositories

diff -r 000000000000 -r a5fe730dfc2e CoreLoop.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CoreLoop.c	Sat May 22 19:37:58 2010 -0700
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2010  OpenSourceCodeStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+
+
+
+
+#include "VMS.h"
+#include "Queue_impl/BlockingQueue.h"
+
+#include <stdio.h>
+#include <time.h>
+
+
+/*This is the loop that runs in the PThread pinned to each core
+ * get work-unit struc from queue, 
+ * call function-ptr, passing it pointer to data
+ * transfer return value to slave's "requests" pointer
+ * write the slave's "Done" flag and repeat.
+ */
+//pthread_create requires ptr to func that takes void * and returns void *
+void * coreLoop( void *paramsIn )
+ { time_t startTime, endTime, timeToExecute;
+   WorkUnit   *currWorkUnit;
+   foobar     *workFn;
+   SlaveReqst *requestsFromSlave;
+   
+   ThdParams  *thdParams;
+   QueueStruc *workQ;
+   
+      // Get the communication queues out of the param passed in
+   thdParams = (ThdParams *)paramsIn;
+   
+   workQ = thdParams -> workQ;
+   
+      // Get to work!
+   while( TRUE )
+    {    
+         // get work-unit struc from queue
+      currWorkUnit = (WorkUnit *) readQ( workQ );
+      workFn  = currWorkUnit->ptrToWorkFunc;
+
+            time(&startTime);  //put time at call into var
+      
+         // call function-ptr, passing it pointer to data
+      requestsFromSlave =
+      (*workFn)( currWorkUnit->workData );
+
+            time(&endTime);
+            timeToExecute = endTime - startTime;
+
+            printf( "timeToComputePiece: %s", ctime(&timeToExecute) );
+
+         // transfer return value to slave's "requests" pointer
+      currWorkUnit->slaveAssignedTo->requestsToMaster = requestsFromSlave;
+         // write the slave's "Done" flag and repeat.
+      currWorkUnit->slaveAssignedTo->doneFlag = TRUE;
+    }
+ }
diff -r 000000000000 -r a5fe730dfc2e DESIGN_NOTES.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/DESIGN_NOTES.txt	Sat May 22 19:37:58 2010 -0700
@@ -0,0 +1,40 @@
+
+This project is for a paper -- the goal is to implement and measure the
+ performance of the Virtual Master-Slave system as an alternative to the
+ Thread model.
+
+Going to implement exactly one application -- a do-nothing benchmark kernel
+ that just measures scheduling overhead.
+
+Implement VMS this way:
+
+function to give to PThread that does the loop:
+
+get from queue, call function-ptr, passing it work-unit ptr (which contains a
+ pointer to data declared as void * in the application), then write the
+ "Done" flag and repeat.
+
+
+In an application:
+
+divide work up into pieces self, just make it a busy-wait loop that writes
+ data, then reads it back, declared volatile.  At end of work, call fn that
+ makes a continuation and notifies the Master its there, then return.
+
+
+Master:
+
+A loop that polls each virtual slave exactly once, processing each whose
+ Done flag is set, it forces work-units to be one-to-one with slaves, so
+ as soon as done polling slaves, it makes a continuation of itself, puts
+ half the scheduled slaves in, clears its own Done flag, puts its
+ continuation in, the other half of the scheduled slaves, then writes its
+ own Done flag.
+
+When continuation first runs, checks if own Done flag set -- if not, busy-
+ waits until set, then proceeds.  This ensures it doesn't overlap with
+ tail-end of previous -- IE, continuation may sneak through queue before
+ previous done putting second half of scheduled slaves in.  This is the only
+ race condition.
+
+
diff -r 000000000000 -r a5fe730dfc2e MasterLoop.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MasterLoop.c	Sat May 22 19:37:58 2010 -0700
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2010  OpenSourceCodeStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+
+
+#include <stdio.h>
+#include <malloc.h>
+
+#include "VMS.h"
+
+
+
+/*This code is animated by the virtual Master processor.
+ *Note, it is animated on a different level in virtual processor hierarchy
+ * than the CoreLoop -- this is the code pointed to in a work-unit that the
+ * coreLoop jumps to
+ *
+ *Polls each virtual slave exactly once, hands any requests made by the slave
+ * to the "request handler" plug-in function
+ *
+ *Any slaves that have no work-unit assigned are given to the "schedule"
+ * plug-in function, which tries to assign a work-unit to it.
+ *
+ *When all slaves that need work-units have been given to the schedule plug-in,
+ * half of the ones that were successfully scheduled are put into the work
+ * queue, then a continuation of this function is put in, then the rest of the
+ * slaves that were successfully scheduled.
+ *
+ *The first thing this function does is busy-wait until the previous work-unit
+ * running this function is done.  This ensures it doesn't overlap with
+ * tail-end of previous -- IE, continuation may sneak through queue before
+ * previous done putting second half of scheduled slaves in.  This is the only
+ * race condition.
+ *
+ */
+
+void masterLoop( void *data )
+ { bool8 success;
+   int slaveIdx, numScheduled, numInFirstHalf, schedSlaveIdx;
+   VMSProcr        currSlave, *virtSlaves;
+   MasterEnv      *masterEnv;
+   SlaveScheduler  slaveScheduler;
+   RequestHandler  requestHandler;
+
+
+   masterEnv = (MasterEnv *)data;
+
+   requestHandler   = masterEnv->requestHandler;
+   slaveScheduler   = masterEnv->slaveScheduler;
+   virtSlaves       = masterEnv->virtSlaves;
+   
+      //if another continuation of Master still running, busy-wait
+   while( masterEnv->stillRunning ) /*busy wait*/ ;
+
+      //this is the only master running now, set flag again
+   masterEnv->stillRunning = 1;
+
+      //prepare for scheduling
+   masterEnv->numScheduled = 0;
+
+      //Poll each slave structure's Done flag
+   for( slaveIdx = 0; slaveIdx < NUM_SLAVES; slaveIdx++)
+    {
+      currSlave = virtSlaves[ slaveIdx ];
+
+      if( currSlave->workIsDone )
+       {
+         currSlave->workIsDone        = FALSE;
+         currSlave->needsWorkAssigned = TRUE;
+
+            //process requests from slave to master
+         (*requestHandler)( currSlave );
+       }
+      if( currSlave->needsWorkAssigned )
+       {    //give slave a new work-unit
+         success =
+         (*slaveScheduler)( currSlave, masterEnv );
+         
+         if( success )
+          { addToVect( currSlave, &(masterEnv->scheduledSlaves),
+                                  &(masterEnv->numScheduled) );
+            currSlave->needsWorkAssigned = FALSE;
+          }
+       }
+    }
+
+      //put half scheduled slaves in, then continuation, then other half
+   VMSProcr **scheduledSlaves;
+   numInFirstHalf = masterEnv->numScheduled / 2;
+   scheduledSlaves = masterEnv->scheduledSlaves;
+   for( schedSlaveIdx = 0; schedSlaveIdx < numInFirstHalf; schedSlaveIdx++)
+    {
+      writeQ( scheduledSlaves[ schedSlaveIdx ], workQ );
+    }
+
+      //enqueue continuation of this loop
+      // note that After this enqueue, continuation might sneak through
+   writeQ( masterEnv->masterWorkUnit, workQ );
+   for( schedSlaveIdx = numInFirstHalf; 
+        schedSlaveIdx < numScheduled;
+        schedSlaveIdx++)
+    {
+      writeQ( scheduledSlaves[ schedSlaveIdx ]->workUnitToDo, workQ );
+    }
+
+      //all done, so okay for continuation to proceed
+   masterEnv->stillRunning = 0;
+ }
+
+
diff -r 000000000000 -r a5fe730dfc2e VMS.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/VMS.c	Sat May 22 19:37:58 2010 -0700
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2010  OpenSourceCodeStewardshipFoundation
+ *
+ * Licensed under BSD
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <malloc.h>
+
+#include "VMS.h"
+#include "Queue_impl/BlockingQueue.h"
+
+
+/*Setup has two phases:
+ * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts
+ *    the master work-unit into the work-queue
+ * 2) Semantic layer then does its own init, which creates the initial
+ *    work-units inside the semantic layer, ready to schedule them when
+ *    asked by the first run of the masterLoop.
+ *
+ *This part is bit weird because VMS really wants to be "always there", and
+ * have applications attach and detach..  for now, this VMS is part of
+ * the app, so the VMS system starts up as part of running the app.
+ *
+ *The semantic layer is fully isolated from the VMS internasl by
+ * making the semantic layer setup into a state that it's ready with its
+ * initial work-units, ready to schedule them to slaves when the masterLoop
+ * asks.  Without this pattern, the semantic layer's setup would
+ * have to modify slaves directly to assign the initial work-units, and put
+ * them into the workQ itself, breaking the isolation completely.
+ *
+ * 
+ *The semantic layer creates the initial work-unit(s), and adds its
+ * own environment data to masterEnv, and fills in the pointers to
+ * the requestHandler and slaveScheduler plug-in functions
+ *
+ *This allocates VMS data structures, populates the master VMSProc,
+ * and master environment, and returns the master environment to the semantic
+ * layer.
+ */
+   //Global vars are all inside VMS.h
+MasterEnv *
+init_VMS(  )
+ {
+      //Make the central work-queue
+   workQ = makeQ();
+
+   masterEnv = malloc( sizeof(MasterEnv) );
+
+   create_master( masterEnv );
+
+   create_slaves( masterEnv );
+
+      //When coreLoops start up, the first thing 
+   writeQ( masterEnv->masterWorkUnit, workQ );
+ }
+
+
+
+/*Fill up the virtual master data structure, which is already alloc'd in the
+ * masterEnv.
+ *The virtual Master is the same structure as a virtual slave, but it
+ * isn't in the array of virtual slaves.
+ * The reason it's the same structure is so that the coreLoop doesn't
+ *  have to differentiate -- all work units are assigned to a VMSProcr, and
+ *  the core loop treats them all the same way, whether it's the virtual
+ *  master continuation or a slave's work-unit.
+ *Note: masterLoop is jumped into an back out of, so have to be careful with
+ * register usage and saving all persistent-across-calls state to masterEnv
+ */
+void
+create_master( MasterEnv *masterEnv )
+ { VMSProcr virtMaster;
+
+   virtMaster = &(masterEnv->virtMaster);
+   virtMaster->workUnitToDo                  = malloc( sizeof( WorkUnit ) );
+   virtMaster->workUnitToDo->workData        = masterEnv;
+      //TODO: figure out call structure: what GCC will do with regs
+      // will jump to the masterLoop from the coreLoop -- what regs need
+      // saving, from before jump to after -- and what reg to put masterEnv
+      // pointer in when jump to masterLoop
+   virtMaster->workUnitToDo->addrToJumpTo    = &masterLoop;
+   virtMaster->workUnitToDo->slaveAssignedTo = virtMaster;
+ }
+
+void
+create_slaves( MasterEnv *masterEnv )
+ { VMSProcr *virtSlaves;
+   int i;
+
+   virtSlaves = masterEnv->virtSlaves;  //TODO: make sure this is right
+   for( i = 0; i < NUM_SLAVES; i++ )
+    {
+         //Set state to mean "everything done, schedule work to slave"
+      virtSlaves[i].workIsDone        = FALSE;
+      virtSlaves[i].needsWorkAssigned = TRUE;
+    }
+ }
+
+/*Semantic layer calls this when it want the system to start running..
+ *
+ *This creates the core loops, pins them to physical cores, gives them the
+ * pointer to the workQ, and starts them running.
+ */
+ void
+VMS__start()
+ { int retCode, coreIdx;
+
+//TODO: still just skeleton code -- figure out right way to do this
+
+      //Create the PThread loops that take from work-queue, and start them
+   for( coreIdx=0; coreIdx < NUM_WORKERS; coreIdx++ )
+    {
+      thdParams[coreIdx]        = (ThdParams *)malloc( sizeof(ThdParams) );
+      thdParams[coreIdx]->workQ = workQ;
+      thdParams[coreIdx]->id    = coreIdx;
+
+         //Now make and start thd..  the coreLoopThds entry
+         // has all the info needed to later stop the thread.
+      retCode =
+       pthread_create( &(coreLoopThds[coreIdx]), thdAttrs, &coreLoop,
+                       (void *)(thdParams[coreIdx]) );
+      if( retCode != 0 )
+       { //error
+         printf("ERROR creating coreLoop %d, code: %d\n", coreIdx, retCode);
+         exit(-1);
+       }
+
+      pinThdToCore( );  //figure out how to specify this..
+
+      startThd(); //look up PThread call to start the thread running, if it's
+                  // not automatic
+    }
+ }
+
+ /*there is a label inside this function -- save the addr of this label in
+ * the callingPr struc, as the pick-up point from which to start the next
+ * work-unit for that procr.  If turns out have to save registers, then
+ * save them in the procr struc too.  Then do assembly jump to the CoreLoop's
+ * "done with work-unit" label.  The procr struc is in the request in the
+ * slave that animated the just-ended work-unit, so all the state is saved
+ * there, and will get passed along, inside the request handler, to the
+ * next work-unit for that procr.
+ */
+VMS__save_ret_and_jump_to_CoreLoop( callingPr )
+ {
+   //TODO: figure out how to save the addr of a label into a mem loc
+   //NOTE: because resume pt is inside the VMS fn, it's always the same, no
+   // matter what the semantic layer is, no matter what semantic libr called.
+   callingPr->resumePt = &resumeNextWorkUnitPt;
+   save_processor_state_in( callingPr ); //save x86 regs, if GCC needs it to
+   coreLoopRetPt = callingPr->coreLoopRetPt;
+   //TODO: figure out how to do jump correctly -- target addr is constant
+   asm( jmp coreLoopRetPt );
+
+resumeNextWorkUnitPt:
+   return;
+ }
+
+
+/*The semantic virt procr is available in the request sent from the slave
+ * 
+ * The request handler has to add the work-unit created to the semantic
+ * virtual processor the work-unit is a section of its time-line -- does this when create the
+ * work-unit -- means the procr data struc is available in the request sent
+ * from the slave, from which the new work-unit is generated..
+ */
+VMS__add_request_to_slave( SlaveReqst req, VMSProcr callingPr )
+ { VMSProcr slave;
+   slave = callingPr->workUnit->currSlave
+ req->nextRequest =  callingPr->workUnit->currSlave->requests = req;
+ }
+
+
+
diff -r 000000000000 -r a5fe730dfc2e VMS.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/VMS.h	Sat May 22 19:37:58 2010 -0700
@@ -0,0 +1,105 @@
+/*
+ *  Copyright 2009 OpenSourceStewardshipFoundation.org
+ *  Licensed under GNU General Public License version 2
+ *
+ * Author: seanhalle@yahoo.com
+ *
+ */
+
+#ifndef _VMS_H
+#define	_VMS_H
+
+
+#include "VMS_primitive_data_types.h"
+#include "Queue_impl/BlockingQueue.h"
+
+//This value is the number of hardware threads in the shared memory
+// machine
+#define NUM_WORKERS 4
+#define NUM_SLAVES  8
+
+#define SUCCESS 0
+
+#define thdAttrs NULL
+
+typedef struct WorkUnit   WorkUnit;
+typedef struct VMSProcr   VMSProcr;
+typedef struct SlaveReqst SlaveReqst;
+
+typedef bool8 (*SlaveScheduler)    ( void * );
+typedef void  (*RequestHandler)  ( SlaveReqst * );
+
+typedef struct
+ {
+   QueueStruc     *workQ;
+   unsigned int    id;
+ }
+ThdParams;
+
+//This is application-level data of the scheduler that runs in the master
+// virtual processor.  This data is at a higher level than the slave data-
+// struc, which is part of the virtualization infrastructure..  this
+// MasterEnv sits on top of that level
+typedef struct
+ {
+   VMSProcr    virtSlaves[ NUM_SLAVES ];
+   VMSProcr    virtMaster;
+   
+   SlaveScheduler  slaveScheduler;
+   RequestHandler  requestHandler;
+
+   int         stillRunning;
+   WorkUnit   *masterWorkUnit;
+   
+   VMSProcr  **scheduledSlaves;
+   int         numScheduled;
+
+   void       *OSEventStruc;
+   void       *semanticEnv;
+ }
+MasterEnv;
+
+
+struct WorkUnit
+ {
+   VMSProcr   *slaveAssignedTo;
+   void       *addrToJumpTo;
+   void       *workData;
+
+   void       *pluginSpecific;
+ };
+
+
+struct VMSProcr
+ {
+   WorkUnit    *workUnitToDo;
+   SlaveReqst  *requestsToMaster;
+   int          workIsDone;
+   int          needsWorkAssigned;
+ };
+
+struct SlaveReqst
+ {
+   VMSProcr    *slaveFrom;
+   int          reqType;
+   void        *reqData;
+
+   SlaveReqst  *nextRequest;
+ };
+
+
+
+void * coreLoop( void *paramsIn );  //standard PThreads fn prototype
+
+
+//=====================  Global Vars ===================
+
+pthread_t      coreLoopThds[ NUM_WORKERS ];  // std struc, holds thread info
+QueueStruc    *workQ;
+ThdParams      thdParams[ NUM_WORKERS ];
+
+MasterEnv     *masterEnv;
+
+
+#endif	/* _VMS_H */
+
diff -r 000000000000 -r a5fe730dfc2e VMS_primitive_data_types.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/VMS_primitive_data_types.h	Sat May 22 19:37:58 2010 -0700
@@ -0,0 +1,53 @@
+/*
+ *  Copyright 2009 OpenSourceStewardshipFoundation.org
+ *  Licensed under GNU General Public License version 2
+ *  
+ * Author: seanhalle@yahoo.com
+ *  
+
+ */
+
+#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H
+#define	_BLIS_PRIMITIVE_DATA_TYPES_H
+
+
+/*For portability, need primitive data types that have a well defined
+ * size, and well-defined layout into bytes
+ *To do this, provide BLIS standard aliases for all primitive data types
+ *These aliases must be used in all BLIS functions instead of the ANSI types
+ *
+ *These definitions will be replaced inside each specialization module
+ * according to the compiler used in that module and the hardware being
+ * specialized to.
+ */
+/*
+#define    int8  char
+#define   uint8  char
+#define    int16 short
+#define   uint16 unsigned short
+#define    int32 int
+#define   uint32 unsigned int
+#define    int64 long long
+#define   uint64 unsigned long long
+#define  float32 float
+#define  float64 double
+*/
+typedef char               bool8;
+typedef char               int8;
+typedef char               uint8;
+typedef short              int16;
+typedef unsigned short     uint16;
+typedef int                int32;
+typedef unsigned int       uint32;
+typedef long long          int64;
+typedef unsigned long long uint64;
+typedef float              float32;
+typedef double             float64;
+//typedef double double      float128;
+#define float128 double double
+
+#define TRUE  1
+#define FALSE 0
+
+#endif	/* _BLIS_PRIMITIVE_DATA_TYPES_H */
+