changeset 132:dbfc8382d546 Inter-Master Requests

distributed memory allocation interface - unfinished
author Merten Sach <msach@mailbox.tu-berlin.de>
date Fri, 16 Sep 2011 14:25:49 +0200
parents 24466227d8bb
children 3a295609f045
files CoreLoop.c MasterLoop.c ProcrContext.h VMS.c VMS.h contextSwitch.s vmalloc.c vmalloc.h
diffstat 8 files changed, 172 insertions(+), 257 deletions(-) [+]
line diff
     1.1 --- a/CoreLoop.c	Wed Sep 07 17:45:05 2011 +0200
     1.2 +++ b/CoreLoop.c	Fri Sep 16 14:25:49 2011 +0200
     1.3 @@ -70,7 +70,6 @@
     1.4        //Designate a core by a 1 in bit-position corresponding to the core
     1.5     CPU_ZERO(&coreMask);
     1.6     CPU_SET(coreLoopThdParams->coreNum,&coreMask);
     1.7 -   //coreMask = 1L << coreLoopThdParams->coreNum;
     1.8  
     1.9     pthread_t selfThd = pthread_self();
    1.10     errorCode =
     2.1 --- a/MasterLoop.c	Wed Sep 07 17:45:05 2011 +0200
     2.2 +++ b/MasterLoop.c	Fri Sep 16 14:25:49 2011 +0200
     2.3 @@ -94,6 +94,7 @@
     2.4     
     2.5     volatileMasterPr = animatingPr;
     2.6     masterPr         = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp
     2.7 +   masterEnv        = (MasterEnv*)_VMSMasterEnv;
     2.8  
     2.9        //First animation of each MasterVP will in turn animate this part
    2.10        // of setup code.. (VP creator sets up the stack as if this function
    2.11 @@ -104,8 +105,7 @@
    2.12  	  // So, just make this an endless loop, and do assembly function at end
    2.13  	  // that saves its own return addr, then jumps to core_loop.
    2.14     while(1)
    2.15 -   {
    2.16 -       
    2.17 +   {       
    2.18     //============================= MEASUREMENT STUFF ========================
    2.19     #ifdef MEAS__TIME_MASTER
    2.20        //Total Master time includes one coreloop time -- just assume the core
    2.21 @@ -115,11 +115,9 @@
    2.22     #endif
    2.23     //========================================================================
    2.24  
    2.25 -   masterEnv        = (MasterEnv*)_VMSMasterEnv;
    2.26 -   
    2.27 -      //GCC may optimize so doesn't always re-define from frame-storage
    2.28 -   masterPr         = (VirtProcr*)volatileMasterPr; //on stack, reload after jmp
    2.29 +   //GCC may optimize so doesn't always re-define from frame-storage
    2.30     thisCoresIdx     = masterPr->coreAnimatedBy;
    2.31 +   masterEnv->currentMasterProcrID = thisCoresIdx;
    2.32     readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
    2.33     schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];
    2.34  
    2.35 @@ -174,6 +172,7 @@
    2.36           if( schedVirtPr != NULL )
    2.37            { currSlot->procrAssignedToSlot = schedVirtPr;
    2.38              schedVirtPr->schedSlot        = currSlot;
    2.39 +            schedVirtPr->coreAnimatedBy   = thisCoresIdx;
    2.40              currSlot->needsProcrAssigned  = FALSE;
    2.41              numSlotsFilled               += 1;
    2.42              
     3.1 --- a/ProcrContext.h	Wed Sep 07 17:45:05 2011 +0200
     3.2 +++ b/ProcrContext.h	Fri Sep 16 14:25:49 2011 +0200
     3.3 @@ -5,11 +5,60 @@
     3.4   * Author: seanhalle@yahoo.com
     3.5   * 
     3.6   */
     3.7 -
     3.8  #ifndef _ProcrContext_H
     3.9  #define	_ProcrContext_H
    3.10  #define _GNU_SOURCE
    3.11  
    3.12 +#include "VMS.h"
    3.13 +
    3.14 +typedef struct _SchedSlot SchedSlot;
    3.15 +typedef struct _VirtProcr     VirtProcr;
    3.16 +
    3.17 +/*WARNING: re-arranging this data structure could cause VP switching
    3.18 + *         assembly code to fail -- hard-codes offsets of fields
    3.19 + */
    3.20 +struct _VirtProcr
    3.21 + { int         procrID;  //for debugging -- count up each time create
    3.22 +   int         coreAnimatedBy;
    3.23 +   void       *startOfStack;
    3.24 +   void       *stackPtr;
    3.25 +   void       *framePtr;
    3.26 +   void       *nextInstrPt;
    3.27 +   
    3.28 +   void       *coreLoopStartPt;  //allows proto-runtime to be linked later
    3.29 +   void       *coreLoopFramePtr; //restore before jmp back to core loop
    3.30 +   void       *coreLoopStackPtr; //restore before jmp back to core loop
    3.31 +
    3.32 +   void       *initialData;
    3.33 +   
    3.34 +   SchedSlot  *schedSlot;
    3.35 +   VMSReqst   *requests;
    3.36 +
    3.37 +   void       *semanticData;
    3.38 +   void       *dataRetFromReq; //values returned from plugin to VP go here
    3.39 +
    3.40 +      //=========== MEASUREMENT STUFF ==========
    3.41 +   #ifdef MEAS__TIME_STAMP_SUSP
    3.42 +   unsigned int preSuspTSCLow;
    3.43 +   unsigned int postSuspTSCLow;
    3.44 +   #endif
    3.45 +   #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/
    3.46 +   unsigned int startMasterTSCLow;USE_GNU
    3.47 +   unsigned int endMasterTSCLow;
    3.48 +   #endif
    3.49 +      //========================================
    3.50 +   
    3.51 +   float64      createPtInSecs;  //have space but don't use on some configs
    3.52 + };
    3.53 +//VirtProcr
    3.54 +
    3.55 +struct _SchedSlot
    3.56 + {
    3.57 +   int         workIsDone;
    3.58 +   int         needsProcrAssigned;
    3.59 +   VirtProcr  *procrAssignedToSlot;
    3.60 + };
    3.61 +
    3.62  void saveCoreLoopReturnAddr(void **returnAddress);
    3.63  
    3.64  void switchToVP(VirtProcr *nextProcr);
     4.1 --- a/VMS.c	Wed Sep 07 17:45:05 2011 +0200
     4.2 +++ b/VMS.c	Fri Sep 16 14:25:49 2011 +0200
     4.3 @@ -105,7 +105,11 @@
     4.4          //Very first thing put into the master env is the free-list, seeded
     4.5          // with a massive initial chunk of memory.
     4.6          //After this, all other mallocs are VMS__malloc.
     4.7 -   _VMSMasterEnv->freeListHead        = VMS_ext__create_free_list();
     4.8 +   int i;
     4.9 +   for(i=0; i<NUM_CORES; i++)
    4.10 +   {
    4.11 +       _VMSMasterEnv->freeListHead[i]        = VMS_ext__create_free_list();
    4.12 +   }
    4.13  
    4.14  
    4.15     //============================= MEASUREMENT STUFF ========================
     5.1 --- a/VMS.h	Wed Sep 07 17:45:05 2011 +0200
     5.2 +++ b/VMS.h	Fri Sep 16 14:25:49 2011 +0200
     5.3 @@ -5,7 +5,6 @@
     5.4   * Author: seanhalle@yahoo.com
     5.5   * 
     5.6   */
     5.7 -
     5.8  #ifndef _VMS_H
     5.9  #define	_VMS_H
    5.10  #define _GNU_SOURCE
    5.11 @@ -111,9 +110,7 @@
    5.12  //===========================================================================
    5.13  typedef unsigned long long TSCount;
    5.14  
    5.15 -typedef struct _SchedSlot     SchedSlot;
    5.16  typedef struct _VMSReqst      VMSReqst;
    5.17 -typedef struct _VirtProcr     VirtProcr;
    5.18  typedef struct _InterMasterReqst InterMasterReqst;
    5.19  typedef struct _IntervalProbe IntervalProbe;
    5.20  typedef struct _GateStruc     GateStruc;
    5.21 @@ -215,53 +212,6 @@
    5.22  
    5.23  //====================  Core data structures  ===================
    5.24  
    5.25 -struct _SchedSlot
    5.26 - {
    5.27 -   int         workIsDone;
    5.28 -   int         needsProcrAssigned;
    5.29 -   VirtProcr  *procrAssignedToSlot;
    5.30 - };
    5.31 -//SchedSlot
    5.32 -
    5.33 -/*WARNING: re-arranging this data structure could cause VP switching
    5.34 - *         assembly code to fail -- hard-codes offsets of fields
    5.35 - */
    5.36 -struct _VirtProcr
    5.37 - { int         procrID;  //for debugging -- count up each time create
    5.38 -   int         coreAnimatedBy;
    5.39 -   void       *startOfStack;
    5.40 -   void       *stackPtr;
    5.41 -   void       *framePtr;
    5.42 -   void       *nextInstrPt;
    5.43 -   
    5.44 -   void       *coreLoopStartPt;  //allows proto-runtime to be linked later
    5.45 -   void       *coreLoopFramePtr; //restore before jmp back to core loop
    5.46 -   void       *coreLoopStackPtr; //restore before jmp back to core loop
    5.47 -
    5.48 -   void       *initialData;
    5.49 -
    5.50 -   SchedSlot  *schedSlot;
    5.51 -   VMSReqst   *requests;
    5.52 -
    5.53 -   void       *semanticData; //this livesUSE_GNU here for the life of VP
    5.54 -   void       *dataRetFromReq;//values returned from plugin to VP go here
    5.55 -
    5.56 -      //=========== MEASUREMENT STUFF ==========
    5.57 -   #ifdef MEAS__TIME_STAMP_SUSP
    5.58 -   unsigned int preSuspTSCLow;
    5.59 -   unsigned int postSuspTSCLow;
    5.60 -   #endif
    5.61 -   #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/
    5.62 -   unsigned int startMasterTSCLow;USE_GNU
    5.63 -   unsigned int endMasterTSCLow;
    5.64 -   #endif
    5.65 -      //========================================
    5.66 -   
    5.67 -   float64      createPtInSecs;  //have space but don't use on some configs
    5.68 - };
    5.69 -//VirtProcr
    5.70 -
    5.71 -
    5.72  /*Master Env is the only global variable -- has entry points for any other
    5.73   * data needed.  
    5.74   */
    5.75 @@ -276,22 +226,25 @@
    5.76  
    5.77     void            *semanticEnv;
    5.78     void            *OSEventStruc;   //for future, when add I/O to BLIS
    5.79 -   MallocProlog    *freeListHead;
    5.80 -   int32            amtOfOutstandingMem; //total currently allocated
    5.81  
    5.82     void            *coreLoopReturnPt;//addr to jump to to re-enter coreLoop
    5.83  
    5.84     int32            setupComplete;
    5.85     volatile int32   masterLock;
    5.86 +   
    5.87 +   MallocProlog    *freeListHead[NUM_CORES];
    5.88 +   int32            amtOfOutstandingMem; //total currently allocated
    5.89  
    5.90     int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
    5.91 -   GateStruc       *workStealingGates[ NUM_CORES ]; //concurrent work-steal
    5.92 +   GateStruc       *workStealingGates[NUM_CORES]; //concurrent work-steal
    5.93     int32            workStealingLock;
    5.94     
    5.95     InterMasterReqst*  interMasterRequestsFor[NUM_CORES];
    5.96     RequestHandler     interPluginReqHdlr;
    5.97     
    5.98     int32              numProcrsCreated; //gives ordering to processor creation
    5.99 +   
   5.100 +   int32              currentMasterProcrID;
   5.101  
   5.102        //=========== MEASUREMENT STUFF =============
   5.103     IntervalProbe    **intervalProbes;
     6.1 --- a/contextSwitch.s	Wed Sep 07 17:45:05 2011 +0200
     6.2 +++ b/contextSwitch.s	Fri Sep 16 14:25:49 2011 +0200
     6.3 @@ -2,7 +2,17 @@
     6.4  
     6.5  
     6.6  .text
     6.7 -
     6.8 +/* VirtProcr  offsets:
     6.9 + * 0x10  stackPtr
    6.10 + * 0x18 framePtr
    6.11 + * 0x20 nextInstrPt
    6.12 + * 0x30 coreLoopFramePtr
    6.13 + * 0x38 coreLoopStackPtr
    6.14 + *
    6.15 + * _VMSMasterEnv  offsets:
    6.16 + * 0x38 coreLoopReturnPt
    6.17 + * 0x44 masterLock
    6.18 + */
    6.19  //Save return label address for the coreLoop to pointer
    6.20  //Arguments: Pointer to variable holding address
    6.21  .globl saveCoreLoopReturnAddr
    6.22 @@ -23,17 +33,6 @@
    6.23  
    6.24  //Switches form CoreLoop to VP ether a normal VP or the Master Loop
    6.25  //switch to virt procr's stack and frame ptr then jump to virt procr fn
    6.26 -/* VirtProcr  offsets:
    6.27 - * 0x10  stackPtr
    6.28 - * 0x18 framePtr
    6.29 - * 0x20 nextInstrPt
    6.30 - * 0x30 coreLoopFramePtr
    6.31 - * 0x38 coreLoopStackPtr
    6.32 - *
    6.33 - * _VMSMasterEnv  offsets:
    6.34 - * 0x48 coreLoopReturnPt
    6.35 - * 0x54 masterLock
    6.36 - */
    6.37  .globl switchToVP
    6.38  switchToVP:
    6.39      #VirtProcr in %rdi
    6.40 @@ -48,17 +47,6 @@
    6.41  
    6.42      
    6.43  //switches to core loop. saves return address
    6.44 -/* VirtProcr  offsets:
    6.45 - * 0x10  stackPtr
    6.46 - * 0x18 framePtr
    6.47 - * 0x20 nextInstrPt
    6.48 - * 0x30 coreLoopFramePtr
    6.49 - * 0x38 coreLoopStackPtr
    6.50 - *
    6.51 - * _VMSMasterEnv  offsets:
    6.52 - * 0x48 coreLoopReturnPt
    6.53 - * 0x54 masterLock
    6.54 - */
    6.55  .globl switchToCoreLoop
    6.56  switchToCoreLoop:
    6.57      #VirtProcr in %rdi
    6.58 @@ -69,7 +57,7 @@
    6.59      movq    0x30(%rdi), %rbp         #restore frame pointer
    6.60      movq    $_VMSMasterEnv, %rcx
    6.61      movq    (%rcx)    , %rcx
    6.62 -    movq    0x48(%rcx), %rax         #get CoreLoopStartPt
    6.63 +    movq    0x38(%rcx), %rax         #get CoreLoopStartPt
    6.64      jmp     *%rax                    #jmp to CoreLoop
    6.65  VPReturn:
    6.66      ret
    6.67 @@ -78,17 +66,6 @@
    6.68  
    6.69  //switches to core loop from master. saves return address
    6.70  //Releases masterLock so the next MasterLoop can be executed
    6.71 -/* VirtProcr  offsets:
    6.72 - * 0x10  stackPtr
    6.73 - * 0x18 framePtr
    6.74 - * 0x20 nextInstrPt
    6.75 - * 0x30 coreLoopFramePtr
    6.76 - * 0x38 coreLoopStackPtr
    6.77 - *
    6.78 - * _VMSMasterEnv  offsets:
    6.79 - * 0x48 coreLoopReturnPt
    6.80 - * 0x54 masterLock
    6.81 - */
    6.82  .globl masterSwitchToCoreLoop
    6.83  masterSwitchToCoreLoop:
    6.84      #VirtProcr in %rdi
    6.85 @@ -99,8 +76,8 @@
    6.86      movq    0x30(%rdi), %rbp         #restore frame pointer
    6.87      movq    $_VMSMasterEnv, %rcx
    6.88      movq    (%rcx)    , %rcx
    6.89 -    movq    0x48(%rcx), %rax         #get CoreLoopStartPt
    6.90 -    movl    $0x0      , 0x54(%rcx)   #release lock
    6.91 +    movq    0x38(%rcx), %rax         #get CoreLoopStartPt
    6.92 +    movl    $0x0      , 0x44(%rcx)   #release lock
    6.93      jmp     *%rax                    #jmp to CoreLoop
    6.94  MasterReturn:
    6.95      ret
    6.96 @@ -112,17 +89,6 @@
    6.97  // and virtPr is in %rdi
    6.98  // and both functions have the same argument.
    6.99  // do not save register of VP because this function will never return
   6.100 -/* VirtProcr  offsets:
   6.101 - * 0x10  stackPtr
   6.102 - * 0x18 framePtr
   6.103 - * 0x20 nextInstrPt
   6.104 - * 0x30 coreLoopFramePtr
   6.105 - * 0x38 coreLoopStackPtr
   6.106 - *
   6.107 - * _VMSMasterEnv  offsets:
   6.108 - * 0x48 coreLoopReturnPt
   6.109 - * 0x58 masterLock
   6.110 - */
   6.111  .globl asmTerminateCoreLoop
   6.112  asmTerminateCoreLoop:
   6.113      #VirtProcr in %rdi
     7.1 --- a/vmalloc.c	Wed Sep 07 17:45:05 2011 +0200
     7.2 +++ b/vmalloc.c	Fri Sep 16 14:25:49 2011 +0200
     7.3 @@ -12,7 +12,7 @@
     7.4  #include <stdlib.h>
     7.5  #include <stdio.h>
     7.6  
     7.7 -#include "VMS.h"
     7.8 +#include "ProcrContext.h"
     7.9  #include "Histogram/Histogram.h"
    7.10  
    7.11  /*Helper function
    7.12 @@ -37,18 +37,37 @@
    7.13     listHead->nextChunkInFreeList  = chunk;
    7.14   }
    7.15  
    7.16 +/*
    7.17 + * This function is called by code which is part of the master loop.
    7.18 + * This reads the animating coreID from the MasterEnv and calls the normal malloc
    7.19 + * in VMS__malloc_on_core
    7.20 + */
    7.21 +void *
    7.22 +VMS__malloc( size_t sizeRequested)
    7.23 +{
    7.24 +    return VMS__malloc_on_core(sizeRequested, _VMSMasterEnv->currentMasterProcrID);
    7.25 +}
    7.26  
    7.27 -/*This is sequential code, meant to only be called from the Master, not from
    7.28 - * any slave VPs.
    7.29 +/*
    7.30 + * This is called by the plugin. This call to VMS_malloc_on_core is run on the
    7.31 + * slave VPs stack so there is no switch to the VMS runtime.
    7.32 + */
    7.33 +void *
    7.34 +VMS__malloc_in_lib(size_t sizeRequested, VirtProcr *VProcr)
    7.35 +{
    7.36 +    return VMS__malloc_on_core(sizeRequested, VProcr->coreAnimatedBy);
    7.37 +}
    7.38 +
    7.39 +/*
    7.40   *Search down list, checking size by the nextHigherInMem pointer, to find
    7.41   * first chunk bigger than size needed.
    7.42   *Shave off the extra and make it into a new free-list element, hook it in
    7.43   * then return the address of the found element plus size of prolog.
    7.44 - *
    7.45 - *Will find a
    7.46   */
    7.47 -void *VMS__malloc( size_t sizeRequested )
    7.48 +void *
    7.49 +VMS__malloc_on_core( size_t sizeRequested, int procrID)
    7.50   { MallocProlog *foundElem = NULL, *currElem, *newElem;
    7.51 +   MallocPrologAllocated *returnElem;
    7.52     ssize_t        amountExtra, sizeConsumed,sizeOfFound;
    7.53     uint32        foundElemIsTopOfHeap;
    7.54  
    7.55 @@ -61,7 +80,8 @@
    7.56     
    7.57        //step up the size to be aligned at 16-byte boundary, prob better ways
    7.58     sizeRequested = (sizeRequested + 16) & ~15;
    7.59 -   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
    7.60 +   currElem = (_VMSMasterEnv->freeListHead[_VMSMasterEnv->currentMasterProcrID])
    7.61 +                        ->nextChunkInFreeList;
    7.62  
    7.63     while( currElem != NULL )
    7.64      {    //check if size of currElem is big enough
    7.65 @@ -95,16 +115,18 @@
    7.66      { foundElem->nextChunkInFreeList->prevChunkInFreeList =
    7.67                                                foundElem->prevChunkInFreeList;
    7.68      }
    7.69 -   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
    7.70 +   returnElem = (MallocPrologAllocated*)foundElem;
    7.71 +   returnElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
    7.72 +   returnElem->procrID = procrID;
    7.73     
    7.74        //if enough, turn extra into new elem & insert it
    7.75     if( amountExtra > 64 )
    7.76      {   //make new elem by adding to addr of curr elem then casting
    7.77          sizeConsumed = sizeof(MallocProlog) + sizeRequested; 
    7.78 -        newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
    7.79 -        newElem->nextLowerInMem    = foundElem; //This is evil (but why?) 
    7.80 -        newElem->nextHigherInMem   = foundElem->nextHigherInMem; //This is evil (but why?)
    7.81 -        foundElem->nextHigherInMem = newElem;
    7.82 +        newElem = (MallocProlog *)( (uintptr_t)returnElem + sizeConsumed );
    7.83 +        newElem->nextLowerInMem    = returnElem; //This is evil (but why?) 
    7.84 +        newElem->nextHigherInMem   = returnElem->nextHigherInMem; //This is evil (but why?)
    7.85 +        returnElem->nextHigherInMem = newElem;
    7.86          if( ! foundElemIsTopOfHeap )
    7.87          {  //there is no next higher for top of heap, so can't write to it
    7.88             newElem->nextHigherInMem->nextLowerInMem = newElem;
    7.89 @@ -125,139 +147,46 @@
    7.90     //========================================================================
    7.91  
    7.92        //skip over the prolog by adding its size to the pointer return
    7.93 -   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
    7.94 +   return (void*)((uintptr_t)returnElem + sizeof(MallocProlog));
    7.95   }
    7.96  
    7.97 -/*This is sequential code, meant to only be called from the Master, not from
    7.98 - * any slave VPs.
    7.99 - *Search down list, checking size by the nextHigherInMem pointer, to find
   7.100 - * first chunk bigger than size needed.
   7.101 - *Shave off the extra and make it into a new free-list element, hook it in
   7.102 - * then return the address of the found element plus size of prolog.
   7.103 - *
   7.104 - * The difference to the regular malloc is, that all the allocated chunks are
   7.105 - * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk
   7.106 - * before the aligned chunk.
   7.107 +/*
   7.108 + * This free is called for a master loop. It decides whether the allocation of
   7.109 + * chunk was done on the same core. If it was it calls VMS__free_on_core 
   7.110 + * otherwise it sends a message to the responsible core.
   7.111   */
   7.112 -void *VMS__malloc_aligned( size_t sizeRequested )
   7.113 - { MallocProlog *foundElem = NULL, *currElem, *newElem;
   7.114 -   ssize_t        amountExtra, sizeConsumed,sizeOfFound,prevAmount;
   7.115 -   uint32        foundElemIsTopOfHeap;
   7.116 +void
   7.117 +VMS__free(void *ptrToFree)
   7.118 +{
   7.119 +    MallocPrologAllocated chunk = (MallocPrologAllocated*)ptrToFree - 1;
   7.120 +    if(chunk->procrID == _VMSMasterEnv->currentMasterProcrID)
   7.121 +    {
   7.122 +        VMS__free_on_core(ptrToFree, _VMSMasterEnv->currentMasterProcrID);
   7.123 +    }
   7.124 +    else
   7.125 +    {
   7.126 +        //Request from other Core
   7.127 +    }
   7.128 +}
   7.129  
   7.130 -   //============================= MEASUREMENT STUFF ========================
   7.131 -   #ifdef MEAS__TIME_MALLOC
   7.132 -   uint32 startStamp, endStamp;
   7.133 -   saveLowTimeStampCountInto( startStamp );
   7.134 -   #endif
   7.135 -   //========================================================================
   7.136 -   
   7.137 -      //step up the size to be multiple of the cache line size
   7.138 -   sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1);
   7.139 -   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
   7.140 -
   7.141 -   while( currElem != NULL )
   7.142 -    {    //check if size of currElem is big enough
   7.143 -      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
   7.144 -      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
   7.145 -      if( amountExtra > 0 )
   7.146 -       {    
   7.147 -         //look if the found element is already aligned
   7.148 -         if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){
   7.149 -             //found it, get out of loop
   7.150 -             foundElem = currElem;
   7.151 -             break;
   7.152 -         }else{
   7.153 -             //find first aligned address and check if it's still big enough
   7.154 -             //check also if the space before the aligned address is big enough
   7.155 -             //for a new element
   7.156 -             void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1)));
   7.157 -             prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem;
   7.158 -             sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog);
   7.159 -             amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog);
   7.160 -             if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){
   7.161 -                 //found suitable element
   7.162 -                 //create new previous element and exit loop
   7.163 -                 MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1;
   7.164 -                 
   7.165 -                 //insert new element into free list
   7.166 -                 if(currElem->nextChunkInFreeList != NULL)
   7.167 -                     currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem;                     
   7.168 -                 newAlignedElem->prevChunkInFreeList = currElem;
   7.169 -                 newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList;
   7.170 -                 currElem->nextChunkInFreeList = newAlignedElem;
   7.171 -                 
   7.172 -                 //set higherInMem and lowerInMem
   7.173 -                 newAlignedElem->nextHigherInMem = currElem->nextHigherInMem;
   7.174 -                 foundElemIsTopOfHeap = currElem->nextHigherInMem ==
   7.175 -                          _VMSMasterEnv->freeListHead->nextHigherInMem;
   7.176 -                 if(!foundElemIsTopOfHeap)
   7.177 -                     currElem->nextHigherInMem->nextLowerInMem = newAlignedElem;
   7.178 -                 currElem->nextHigherInMem = newAlignedElem;
   7.179 -                 newAlignedElem->nextLowerInMem = currElem;
   7.180 -                 
   7.181 -                 //Found new element leaving loop
   7.182 -                 foundElem = newAlignedElem;
   7.183 -                 break;
   7.184 -             }
   7.185 -         }
   7.186 -         
   7.187 -       }
   7.188 -       currElem = currElem->nextChunkInFreeList;
   7.189 +/*
   7.190 + * This free is called for the plugins. It decides whether the allocation of
   7.191 + * chunk was done on the same core. If it was it calls VMS__free_on_core 
   7.192 + * otherwise it sends a message to the responsible core.
   7.193 + */
   7.194 +void
   7.195 +VMS__free_in_lib(void *ptrToFree, VirtProcr *VProc)
   7.196 +{
   7.197 +    MallocPrologAllocated chunk = (MallocPrologAllocated*)ptrToFree - 1;
   7.198 +    if(chunk->procrID == VProc->coreAnimatedBy)
   7.199 +    {
   7.200 +        VMS__free_on_core(ptrToFree, VProc->coreAnimatedBy);
   7.201      }
   7.202 -
   7.203 -   if( foundElem == NULL )
   7.204 -    { ERROR("\nmalloc failed\n")
   7.205 -      return (void *)NULL;  //indicates malloc failed
   7.206 +    else
   7.207 +    {
   7.208 +        //Request from other Core
   7.209      }
   7.210 -      //Using a kludge to identify the element that is the top chunk in the
   7.211 -      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
   7.212 -      // save addr of start of heap in head's nextLowerInMem
   7.213 -      //Will handle top of Heap specially
   7.214 -   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
   7.215 -                          _VMSMasterEnv->freeListHead->nextHigherInMem;
   7.216 -
   7.217 -      //before shave off and try to insert new elem, remove found elem
   7.218 -      //note, foundElem will never be the head, so always has valid prevChunk
   7.219 -   foundElem->prevChunkInFreeList->nextChunkInFreeList =
   7.220 -                                              foundElem->nextChunkInFreeList;
   7.221 -   if( foundElem->nextChunkInFreeList != NULL )
   7.222 -    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
   7.223 -                                              foundElem->prevChunkInFreeList;
   7.224 -    }
   7.225 -   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
   7.226 -   
   7.227 -      //if enough, turn extra into new elem & insert it
   7.228 -   if( amountExtra > 64 )
   7.229 -    {    //make new elem by adding to addr of curr elem then casting
   7.230 -      sizeConsumed = sizeof(MallocProlog) + sizeRequested;
   7.231 -      newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
   7.232 -      newElem->nextHigherInMem   = foundElem->nextHigherInMem;
   7.233 -      newElem->nextLowerInMem    = foundElem;
   7.234 -      foundElem->nextHigherInMem = newElem;
   7.235 -      
   7.236 -      if( ! foundElemIsTopOfHeap )
   7.237 -       {    //there is no next higher for top of heap, so can't write to it
   7.238 -         newElem->nextHigherInMem->nextLowerInMem = newElem;
   7.239 -       }
   7.240 -      add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
   7.241 -    }
   7.242 -   else
   7.243 -    {
   7.244 -      sizeConsumed = sizeOfFound;
   7.245 -    }
   7.246 -  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
   7.247 -
   7.248 -   //============================= MEASUREMENT STUFF ========================
   7.249 -   #ifdef MEAS__TIME_MALLOC
   7.250 -   saveLowTimeStampCountInto( endStamp );
   7.251 -   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
   7.252 -   #endif
   7.253 -   //========================================================================
   7.254 -
   7.255 -      //skip over the prolog by adding its size to the pointer return
   7.256 -   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
   7.257 - }
   7.258 -
   7.259 +}
   7.260  
   7.261  /*This is sequential code -- only to be called from the Master
   7.262   * When free, subtract the size of prolog from pointer, then cast it to a
   7.263 @@ -266,7 +195,7 @@
   7.264   * add this one to free-list.
   7.265   */
   7.266  void
   7.267 -VMS__free( void *ptrToFree )
   7.268 +VMS__free_on_core( void *ptrToFree, int procrID)
   7.269   { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem;
   7.270     size_t         sizeOfElem;
   7.271     uint32         lowerExistsAndIsFree, higherExistsAndIsFree;
   7.272 @@ -443,7 +372,7 @@
   7.273  MallocProlog *
   7.274  VMS_ext__create_free_list()
   7.275   { MallocProlog *freeListHead, *firstChunk;
   7.276 -
   7.277 + 
   7.278        //Note, this is running in the main thread -- all increases in malloc
   7.279        // mem and all frees of it must be done in this thread, with the
   7.280        // thread's original stack available
     8.1 --- a/vmalloc.h	Wed Sep 07 17:45:05 2011 +0200
     8.2 +++ b/vmalloc.h	Fri Sep 16 14:25:49 2011 +0200
     8.3 @@ -6,13 +6,13 @@
     8.4   *
     8.5   * Created on November 14, 2009, 9:07 PM
     8.6   */
     8.7 -
     8.8  #ifndef _VMALLOC_H
     8.9  #define	_VMALLOC_H
    8.10  
    8.11  #include <malloc.h>
    8.12  #include <inttypes.h>
    8.13  #include "VMS_primitive_data_types.h"
    8.14 +#include "ProcrContext.h"
    8.15  
    8.16  typedef struct _MallocProlog MallocProlog;
    8.17  
    8.18 @@ -22,25 +22,41 @@
    8.19     MallocProlog *prevChunkInFreeList;
    8.20     MallocProlog *nextHigherInMem;
    8.21     MallocProlog *nextLowerInMem;
    8.22 - };
    8.23 + }; 
    8.24  //MallocProlog
    8.25 + 
    8.26 + typedef struct
    8.27 + {
    8.28 +     uintptr_t procrID;
    8.29 +     MallocProlog *prevChunkInFreeList;
    8.30 +     MallocProlog *nextHigherInMem;
    8.31 +     MallocProlog *nextLowerInMem;
    8.32 + } MallocPrologAllocated;
    8.33  
    8.34  typedef struct
    8.35   {
    8.36     MallocProlog *firstChunkInFreeList;
    8.37     int32         numInList; //TODO not used
    8.38 - }
    8.39 -FreeListHead;
    8.40 + } FreeListHead;
    8.41  
    8.42  void *
    8.43 -VMS__malloc( size_t sizeRequested );
    8.44 +VMS__malloc_on_core(size_t sizeRequested, int procrID);
    8.45  
    8.46  void *
    8.47 -VMS__malloc_aligned( size_t sizeRequested );
    8.48 +VMS__malloc(size_t sizeRequested);
    8.49 +
    8.50 +void *
    8.51 +VMS__malloc_in_lib(size_t sizeRequested, VirtProcr *VProc);
    8.52  
    8.53  void
    8.54  VMS__free( void *ptrToFree );
    8.55  
    8.56 +void
    8.57 +VMS__free_in_lib(void *ptrToFree, VirtProcr *VProc);
    8.58 +
    8.59 +void
    8.60 +VMS__free_on_core(void *ptrToFree, int procrID);
    8.61 +
    8.62  /*Allocates memory from the external system -- higher overhead
    8.63   */
    8.64  void *