changeset 30:c8823e0bb2b4

Started adding own version of malloc and free Just in case they're using TLS and causing the issues
author Me
date Mon, 09 Aug 2010 02:24:31 -0700
parents 0e008278fe3c
children e69579a0e797 65e5918731b8
files CoreLoop.c MasterLoop.c VMS.c VMS.h
diffstat 4 files changed, 91 insertions(+), 41 deletions(-) [+]
line diff
     1.1 --- a/CoreLoop.c	Wed Jul 28 13:12:10 2010 -0700
     1.2 +++ b/CoreLoop.c	Mon Aug 09 02:24:31 2010 -0700
     1.3 @@ -16,13 +16,18 @@
     1.4  #include <sched.h>
     1.5  
     1.6  
     1.7 -/*This is the loop that runs in the PThread pinned to each core
     1.8 - * get work-unit struc from queue, 
     1.9 - * call function-ptr, passing it pointer to data
    1.10 - * transfer return value to slave's "requests" pointer
    1.11 - * write the slave's "Done" flag and repeat.
    1.12 +/*This is the loop that runs in the OS Thread pinned to each core
    1.13 + *Get virt procr from queue,
    1.14 + * save state of current animator, then load in state of virt procr, using
    1.15 + * jmp instr to switch the program-counter state -- making the virt procr
    1.16 + * the new animator.
    1.17 + *At some point, the virt procr will suspend itself by saving out its
    1.18 + * animator state (stack ptr, frame ptr, program counter) and switching
    1.19 + * back to the OS Thread's animator state, which means restoring the
    1.20 + * stack and frame and jumping to the core loop start point.
    1.21 + *This cycle then repeats, until a special shutdown virtual processor is
    1.22 + * animated, which jumps to the end point at the bottom of core loop.
    1.23   */
    1.24 -//pthread_create requires ptr to func that takes void * and returns void *
    1.25  void *
    1.26  coreLoop( void *paramsIn )
    1.27   {   
    1.28 @@ -32,7 +37,6 @@
    1.29     unsigned long   coreMask;  //has 1 in bit positions of allowed cores
    1.30     int             errorCode;
    1.31     
    1.32 -      // Get the communication queues out of the param passed in
    1.33     coreLoopThdParams = (ThdParams *)paramsIn;
    1.34  
    1.35        //wait until signalled that setup is complete
    1.36 @@ -44,15 +48,11 @@
    1.37      }
    1.38     pthread_mutex_unlock( &suspendLock );
    1.39  
    1.40 -   printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum );
    1.41 +      //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum );
    1.42  
    1.43        //set thread affinity
    1.44        //Linux requires pinning thd to core inside thread-function
    1.45        //Designate a core by a 1 in bit-position corresponding to the core
    1.46 -//   cpu_set_t cpuMask;
    1.47 -//   CPU_ZERO( &cpuMask );
    1.48 -//   CPU_SET( coreLoopThdParams->coreNum, &cpuMask );
    1.49 -
    1.50     coreMask = 1 << coreLoopThdParams->coreNum;
    1.51  
    1.52     pthread_t selfThd = pthread_self();
    1.53 @@ -73,7 +73,7 @@
    1.54        // after the start point -- with the one exception of _VMSWorkQ
    1.55   
    1.56     
    1.57 -      // Get to work!  --  virt procr jumps back here when done or suspends
    1.58 +      // Get to work!  --  virt procr jumps back here when suspends
    1.59        //Note, have to restore the frame-pointer before jump to here, to get
    1.60        // this code to work right (workQ and so forth are frame-ptr relative)
    1.61  CoreLoopStartPt:
    1.62 @@ -84,13 +84,10 @@
    1.63     workQ  = _VMSWorkQ;
    1.64     currPr = (VirtProcr *) readVMSQ( workQ );
    1.65  
    1.66 -//   printf("core %d loop procr addr: %d\n", coreLoopThdParams->coreNum, \
    1.67 -//       (int)currPr ); fflush(stdin);
    1.68 -   currPr->coreLoopStartPt = &&CoreLoopStartPt;  //to be sure.(GCC specific)
    1.69 -   
    1.70 +   currPr->coreLoopStartPt = &&CoreLoopStartPt;  //to be sure -- chg for perf
    1.71     currPr->coreAnimatedBy  = coreLoopThdParams->coreNum;
    1.72  
    1.73 -      //switch to virt procr's stack and frame ptr then jump to virt procr
    1.74 +      //switch to virt procr's stack and frame ptr then jump to virt procr fn
    1.75     void *stackPtr, *framePtr, *jmpPt, *coreLoopFramePtrAddr, \
    1.76          *coreLoopStackPtrAddr;
    1.77     
    1.78 @@ -123,15 +120,18 @@
    1.79     /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" \
    1.80                  );
    1.81  
    1.82 -   //========================================================================
    1.83 -
    1.84 -      //jmp to here when want to shut down the VMS system
    1.85 +   //=========== jmp to here when want to shut down the VMS system ==========
    1.86     CoreLoopEndPt:
    1.87 +      //first free shutdown VP that jumped here -- it first restores the
    1.88 +      // coreloop's stack, so addr of currPr in stack frame is still correct
    1.89 +   VMS__handle_dissipate_reqst( currPr );
    1.90     pthread_exit( NULL );
    1.91   }
    1.92  
    1.93  
    1.94  
    1.95 +
    1.96 +//===========================================================================
    1.97  /*This sequential version is exact same as threaded, except doesn't do the
    1.98   * pin-threads part, nor the wait until setup complete part.
    1.99   */
     2.1 --- a/MasterLoop.c	Wed Jul 28 13:12:10 2010 -0700
     2.2 +++ b/MasterLoop.c	Mon Aug 09 02:24:31 2010 -0700
     2.3 @@ -194,7 +194,7 @@
     2.4                   movl %4, %%esp;      \
     2.5                   movl %5, %%ebp;      \
     2.6                   movl $0x0, (%%ebx);  \
     2.7 -                 jmp  %%eax "         \
     2.8 +                 jmp  %%eax;"         \
     2.9     /* outputs */ : "=g" (stackPtrAddr), "=g" (framePtrAddr),                \
    2.10                     "=g"(stillRunningAddr)                                   \
    2.11     /* inputs  */ : "g" (jmpPt), "g"(coreLoopStackPtr), "g"(coreLoopFramePtr)\
     3.1 --- a/VMS.c	Wed Jul 28 13:12:10 2010 -0700
     3.2 +++ b/VMS.c	Mon Aug 09 02:24:31 2010 -0700
     3.3 @@ -100,8 +100,49 @@
     3.4     writeVMSQ( masterEnv->masterVirtPr, workQ );
     3.5  
     3.6     numProcrsCreated = 1;  //global counter for debugging
     3.7 +
     3.8 +   //==================== malloc substitute ========================
     3.9 +   //
    3.10 +   //Testing whether malloc is using thread-local storage and therefore
    3.11 +   // causing unreliable behavior.
    3.12 +   //Just allocate a massive chunk of memory and roll own malloc/free and
    3.13 +   // make app use VMS__malloc_to, which will suspend and perform malloc
    3.14 +   // in the master, taking from this massive chunk.
    3.15 +
    3.16 +//   initFreeList();
    3.17   }
    3.18  
    3.19 +/*
    3.20 +void
    3.21 +initMasterMalloc()
    3.22 + {
    3.23 +   _VMSMasterEnv->mallocChunk = malloc( MASSIVE_MALLOC_SIZE );
    3.24 +
    3.25 +      //The free-list element is the first several locations of an
    3.26 +      // allocated chunk -- the address given to the application is pre-
    3.27 +      // pended with both the ownership structure and the free-list struc.
    3.28 +      //So, write the values of these into the first locations of
    3.29 +      // mallocChunk -- which marks it as free & puts in its size.
    3.30 +   listElem = (FreeListElem *)_VMSMasterEnv->mallocChunk;
    3.31 +   listElem->size = MASSIVE_MALLOC_SIZE - NUM_PREPEND_BYTES
    3.32 +   listElem->next = NULL;
    3.33 + }
    3.34 +
    3.35 +void
    3.36 +dissipateMasterMalloc()
    3.37 + {
    3.38 +      //Just foo code -- to get going -- doing as if free list were link-list
    3.39 +   currElem = _VMSMasterEnv->freeList;
    3.40 +   while( currElem != NULL )
    3.41 +    {
    3.42 +      nextElem = currElem->next;
    3.43 +      masterFree( currElem );
    3.44 +      currElem = nextElem;
    3.45 +    }
    3.46 +   free( _VMSMasterEnv->freeList );
    3.47 + }
    3.48 + */
    3.49 +
    3.50  void
    3.51  create_sched_slots( MasterEnv *masterEnv )
    3.52   { SchedSlot  **schedSlots, **filledSlots;
    3.53 @@ -551,7 +592,7 @@
    3.54   *This function has the sole purpose of setting the stack and framePtr
    3.55   * to the coreLoop's stack and framePtr.. it does that then jumps to the
    3.56   * core loop's shutdown point -- might be able to just call Pthread_exit
    3.57 - * from here, but going back to the pthread's stack and setting everything
    3.58 + * from here, but am going back to the pthread's stack and setting everything
    3.59   * up just as if it never jumped out, before calling pthread_exit.
    3.60   *The end-point of core loop will free the stack and so forth of the
    3.61   * processor that animates this function, (this fn is transfering the
    3.62 @@ -580,7 +621,9 @@
    3.63  
    3.64  
    3.65  
    3.66 -/*This is called has to free anything allocated during VMS_init, and any other alloc'd
    3.67 +/*This is called after the threads have shut down and control as returned
    3.68 + * to the semantic layer, in the entry point function in the main thread.
    3.69 + * It has to free anything allocated during VMS_init, and any other alloc'd
    3.70   * locations that might be left over.
    3.71   */
    3.72  void
     4.1 --- a/VMS.h	Wed Jul 28 13:12:10 2010 -0700
     4.2 +++ b/VMS.h	Mon Aug 09 02:24:31 2010 -0700
     4.3 @@ -22,15 +22,20 @@
     4.4  //#define NUM_SCHED_SLOTS  (2 * NUM_CORES + 1)
     4.5  #define NUM_SCHED_SLOTS  3
     4.6  
     4.7 -   //128K stack.. compromise, want 10K virtPr
     4.8 -#define VIRT_PROCR_STACK_SIZE 0x10000
     4.9 +   // 8K stack
    4.10 +#define VIRT_PROCR_STACK_SIZE 0x20000
    4.11 +
    4.12 +   //256M of total memory for VMS application to VMS__malloc
    4.13 +#define MASSIVE_MALLOC_SIZE 0x10000000
    4.14 +
    4.15 +#define NUM_PREPEND_BYTES sizeof(FreeListElem) + sizeof(ownerElem);
    4.16  
    4.17  #define SUCCESS 0
    4.18  
    4.19 -#define writeVMSQ     writePThdQ
    4.20 -#define readVMSQ      readPThdQ
    4.21 -#define makeVMSQ      makePThdQ
    4.22 -#define VMSQueueStruc PThdQueueStruc
    4.23 +#define writeVMSQ     writeCASQ
    4.24 +#define readVMSQ      readCASQ
    4.25 +#define makeVMSQ      makeCASQ
    4.26 +#define VMSQueueStruc CASQueueStruc
    4.27  
    4.28  //#define thdAttrs NULL  //For PThreads
    4.29  
    4.30 @@ -104,23 +109,25 @@
    4.31  
    4.32  typedef struct
    4.33   {
    4.34 -   SlaveScheduler   slaveScheduler;
    4.35 -   RequestHandler   requestHandler;
    4.36 +   SlaveScheduler slaveScheduler;
    4.37 +   RequestHandler requestHandler;
    4.38     
    4.39 -   SchedSlot **schedSlots;
    4.40 -   SchedSlot **filledSlots;
    4.41 -   int         numToPrecede;
    4.42 +   SchedSlot    **schedSlots;
    4.43 +   SchedSlot    **filledSlots;
    4.44 +   int            numToPrecede;
    4.45     
    4.46 -   volatile int stillRunning;
    4.47 +   volatile int   stillRunning;
    4.48     
    4.49 -   VirtProcr  *masterVirtPr;
    4.50 +   VirtProcr     *masterVirtPr;
    4.51  
    4.52 -   void       *semanticEnv;
    4.53 -   void       *OSEventStruc;    //for future, when add I/O to BLIS
    4.54 +   void          *semanticEnv;
    4.55 +   void          *OSEventStruc;    //for future, when add I/O to BLIS
    4.56  
    4.57 -   void       *coreLoopEndPt; //addr to jump to to shut down a coreLoop
    4.58 +   void          *coreLoopEndPt; //addr to jump to to shut down a coreLoop
    4.59  
    4.60 -   int         setupComplete;
    4.61 +   int            setupComplete;
    4.62 +
    4.63 +   void          *mallocChunk;
    4.64   }
    4.65  MasterEnv;
    4.66