Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
changeset 132:dbfc8382d546 Inter-Master Requests
distributed memory allocation interface - unfinished
| author | Merten Sach <msach@mailbox.tu-berlin.de> |
|---|---|
| date | Fri, 16 Sep 2011 14:25:49 +0200 |
| parents | 24466227d8bb |
| children | 3a295609f045 |
| files | CoreLoop.c MasterLoop.c ProcrContext.h VMS.c VMS.h contextSwitch.s vmalloc.c vmalloc.h |
| diffstat | 8 files changed, 172 insertions(+), 257 deletions(-) [+] |
line diff
1.1 --- a/CoreLoop.c Wed Sep 07 17:45:05 2011 +0200 1.2 +++ b/CoreLoop.c Fri Sep 16 14:25:49 2011 +0200 1.3 @@ -70,7 +70,6 @@ 1.4 //Designate a core by a 1 in bit-position corresponding to the core 1.5 CPU_ZERO(&coreMask); 1.6 CPU_SET(coreLoopThdParams->coreNum,&coreMask); 1.7 - //coreMask = 1L << coreLoopThdParams->coreNum; 1.8 1.9 pthread_t selfThd = pthread_self(); 1.10 errorCode =
2.1 --- a/MasterLoop.c Wed Sep 07 17:45:05 2011 +0200 2.2 +++ b/MasterLoop.c Fri Sep 16 14:25:49 2011 +0200 2.3 @@ -94,6 +94,7 @@ 2.4 2.5 volatileMasterPr = animatingPr; 2.6 masterPr = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp 2.7 + masterEnv = (MasterEnv*)_VMSMasterEnv; 2.8 2.9 //First animation of each MasterVP will in turn animate this part 2.10 // of setup code.. (VP creator sets up the stack as if this function 2.11 @@ -104,8 +105,7 @@ 2.12 // So, just make this an endless loop, and do assembly function at end 2.13 // that saves its own return addr, then jumps to core_loop. 2.14 while(1) 2.15 - { 2.16 - 2.17 + { 2.18 //============================= MEASUREMENT STUFF ======================== 2.19 #ifdef MEAS__TIME_MASTER 2.20 //Total Master time includes one coreloop time -- just assume the core 2.21 @@ -115,11 +115,9 @@ 2.22 #endif 2.23 //======================================================================== 2.24 2.25 - masterEnv = (MasterEnv*)_VMSMasterEnv; 2.26 - 2.27 - //GCC may optimize so doesn't always re-define from frame-storage 2.28 - masterPr = (VirtProcr*)volatileMasterPr; //on stack, reload after jmp 2.29 + //GCC may optimize so doesn't always re-define from frame-storage 2.30 thisCoresIdx = masterPr->coreAnimatedBy; 2.31 + masterEnv->currentMasterProcrID = thisCoresIdx; 2.32 readyToAnimateQ = masterEnv->readyToAnimateQs[thisCoresIdx]; 2.33 schedSlots = masterEnv->allSchedSlots[thisCoresIdx]; 2.34 2.35 @@ -174,6 +172,7 @@ 2.36 if( schedVirtPr != NULL ) 2.37 { currSlot->procrAssignedToSlot = schedVirtPr; 2.38 schedVirtPr->schedSlot = currSlot; 2.39 + schedVirtPr->coreAnimatedBy = thisCoresIdx; 2.40 currSlot->needsProcrAssigned = FALSE; 2.41 numSlotsFilled += 1; 2.42
3.1 --- a/ProcrContext.h Wed Sep 07 17:45:05 2011 +0200 3.2 +++ b/ProcrContext.h Fri Sep 16 14:25:49 2011 +0200 3.3 @@ -5,11 +5,60 @@ 3.4 * Author: seanhalle@yahoo.com 3.5 * 3.6 */ 3.7 - 3.8 #ifndef _ProcrContext_H 3.9 #define _ProcrContext_H 3.10 #define _GNU_SOURCE 3.11 3.12 +#include "VMS.h" 3.13 + 3.14 +typedef struct _SchedSlot SchedSlot; 3.15 +typedef struct _VirtProcr VirtProcr; 3.16 + 3.17 +/*WARNING: re-arranging this data structure could cause VP switching 3.18 + * assembly code to fail -- hard-codes offsets of fields 3.19 + */ 3.20 +struct _VirtProcr 3.21 + { int procrID; //for debugging -- count up each time create 3.22 + int coreAnimatedBy; 3.23 + void *startOfStack; 3.24 + void *stackPtr; 3.25 + void *framePtr; 3.26 + void *nextInstrPt; 3.27 + 3.28 + void *coreLoopStartPt; //allows proto-runtime to be linked later 3.29 + void *coreLoopFramePtr; //restore before jmp back to core loop 3.30 + void *coreLoopStackPtr; //restore before jmp back to core loop 3.31 + 3.32 + void *initialData; 3.33 + 3.34 + SchedSlot *schedSlot; 3.35 + VMSReqst *requests; 3.36 + 3.37 + void *semanticData; 3.38 + void *dataRetFromReq; //values returned from plugin to VP go here 3.39 + 3.40 + //=========== MEASUREMENT STUFF ========== 3.41 + #ifdef MEAS__TIME_STAMP_SUSP 3.42 + unsigned int preSuspTSCLow; 3.43 + unsigned int postSuspTSCLow; 3.44 + #endif 3.45 + #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/ 3.46 + unsigned int startMasterTSCLow;USE_GNU 3.47 + unsigned int endMasterTSCLow; 3.48 + #endif 3.49 + //======================================== 3.50 + 3.51 + float64 createPtInSecs; //have space but don't use on some configs 3.52 + }; 3.53 +//VirtProcr 3.54 + 3.55 +struct _SchedSlot 3.56 + { 3.57 + int workIsDone; 3.58 + int needsProcrAssigned; 3.59 + VirtProcr *procrAssignedToSlot; 3.60 + }; 3.61 + 3.62 void saveCoreLoopReturnAddr(void **returnAddress); 3.63 3.64 void switchToVP(VirtProcr *nextProcr);
4.1 --- a/VMS.c Wed Sep 07 17:45:05 2011 +0200 4.2 +++ b/VMS.c Fri Sep 16 14:25:49 2011 +0200 4.3 @@ -105,7 +105,11 @@ 4.4 //Very first thing put into the master env is the free-list, seeded 4.5 // with a massive initial chunk of memory. 4.6 //After this, all other mallocs are VMS__malloc. 4.7 - _VMSMasterEnv->freeListHead = VMS_ext__create_free_list(); 4.8 + int i; 4.9 + for(i=0; i<NUM_CORES; i++) 4.10 + { 4.11 + _VMSMasterEnv->freeListHead[i] = VMS_ext__create_free_list(); 4.12 + } 4.13 4.14 4.15 //============================= MEASUREMENT STUFF ========================
5.1 --- a/VMS.h Wed Sep 07 17:45:05 2011 +0200 5.2 +++ b/VMS.h Fri Sep 16 14:25:49 2011 +0200 5.3 @@ -5,7 +5,6 @@ 5.4 * Author: seanhalle@yahoo.com 5.5 * 5.6 */ 5.7 - 5.8 #ifndef _VMS_H 5.9 #define _VMS_H 5.10 #define _GNU_SOURCE 5.11 @@ -111,9 +110,7 @@ 5.12 //=========================================================================== 5.13 typedef unsigned long long TSCount; 5.14 5.15 -typedef struct _SchedSlot SchedSlot; 5.16 typedef struct _VMSReqst VMSReqst; 5.17 -typedef struct _VirtProcr VirtProcr; 5.18 typedef struct _InterMasterReqst InterMasterReqst; 5.19 typedef struct _IntervalProbe IntervalProbe; 5.20 typedef struct _GateStruc GateStruc; 5.21 @@ -215,53 +212,6 @@ 5.22 5.23 //==================== Core data structures =================== 5.24 5.25 -struct _SchedSlot 5.26 - { 5.27 - int workIsDone; 5.28 - int needsProcrAssigned; 5.29 - VirtProcr *procrAssignedToSlot; 5.30 - }; 5.31 -//SchedSlot 5.32 - 5.33 -/*WARNING: re-arranging this data structure could cause VP switching 5.34 - * assembly code to fail -- hard-codes offsets of fields 5.35 - */ 5.36 -struct _VirtProcr 5.37 - { int procrID; //for debugging -- count up each time create 5.38 - int coreAnimatedBy; 5.39 - void *startOfStack; 5.40 - void *stackPtr; 5.41 - void *framePtr; 5.42 - void *nextInstrPt; 5.43 - 5.44 - void *coreLoopStartPt; //allows proto-runtime to be linked later 5.45 - void *coreLoopFramePtr; //restore before jmp back to core loop 5.46 - void *coreLoopStackPtr; //restore before jmp back to core loop 5.47 - 5.48 - void *initialData; 5.49 - 5.50 - SchedSlot *schedSlot; 5.51 - VMSReqst *requests; 5.52 - 5.53 - void *semanticData; //this livesUSE_GNU here for the life of VP 5.54 - void *dataRetFromReq;//values returned from plugin to VP go here 5.55 - 5.56 - //=========== MEASUREMENT STUFF ========== 5.57 - #ifdef MEAS__TIME_STAMP_SUSP 5.58 - unsigned int preSuspTSCLow; 5.59 - unsigned int postSuspTSCLow; 5.60 - #endif 5.61 - #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/ 5.62 - unsigned int startMasterTSCLow;USE_GNU 5.63 - unsigned int endMasterTSCLow; 5.64 - #endif 5.65 - //======================================== 5.66 - 5.67 - float64 createPtInSecs; //have space but don't use on some configs 5.68 - }; 5.69 -//VirtProcr 5.70 - 5.71 - 5.72 /*Master Env is the only global variable -- has entry points for any other 5.73 * data needed. 5.74 */ 5.75 @@ -276,22 +226,25 @@ 5.76 5.77 void *semanticEnv; 5.78 void *OSEventStruc; //for future, when add I/O to BLIS 5.79 - MallocProlog *freeListHead; 5.80 - int32 amtOfOutstandingMem; //total currently allocated 5.81 5.82 void *coreLoopReturnPt;//addr to jump to to re-enter coreLoop 5.83 5.84 int32 setupComplete; 5.85 volatile int32 masterLock; 5.86 + 5.87 + MallocProlog *freeListHead[NUM_CORES]; 5.88 + int32 amtOfOutstandingMem; //total currently allocated 5.89 5.90 int32 numMasterInARow[NUM_CORES];//detect back-to-back masterVP 5.91 - GateStruc *workStealingGates[ NUM_CORES ]; //concurrent work-steal 5.92 + GateStruc *workStealingGates[NUM_CORES]; //concurrent work-steal 5.93 int32 workStealingLock; 5.94 5.95 InterMasterReqst* interMasterRequestsFor[NUM_CORES]; 5.96 RequestHandler interPluginReqHdlr; 5.97 5.98 int32 numProcrsCreated; //gives ordering to processor creation 5.99 + 5.100 + int32 currentMasterProcrID; 5.101 5.102 //=========== MEASUREMENT STUFF ============= 5.103 IntervalProbe **intervalProbes;
6.1 --- a/contextSwitch.s Wed Sep 07 17:45:05 2011 +0200 6.2 +++ b/contextSwitch.s Fri Sep 16 14:25:49 2011 +0200 6.3 @@ -2,7 +2,17 @@ 6.4 6.5 6.6 .text 6.7 - 6.8 +/* VirtProcr offsets: 6.9 + * 0x10 stackPtr 6.10 + * 0x18 framePtr 6.11 + * 0x20 nextInstrPt 6.12 + * 0x30 coreLoopFramePtr 6.13 + * 0x38 coreLoopStackPtr 6.14 + * 6.15 + * _VMSMasterEnv offsets: 6.16 + * 0x38 coreLoopReturnPt 6.17 + * 0x44 masterLock 6.18 + */ 6.19 //Save return label address for the coreLoop to pointer 6.20 //Arguments: Pointer to variable holding address 6.21 .globl saveCoreLoopReturnAddr 6.22 @@ -23,17 +33,6 @@ 6.23 6.24 //Switches form CoreLoop to VP ether a normal VP or the Master Loop 6.25 //switch to virt procr's stack and frame ptr then jump to virt procr fn 6.26 -/* VirtProcr offsets: 6.27 - * 0x10 stackPtr 6.28 - * 0x18 framePtr 6.29 - * 0x20 nextInstrPt 6.30 - * 0x30 coreLoopFramePtr 6.31 - * 0x38 coreLoopStackPtr 6.32 - * 6.33 - * _VMSMasterEnv offsets: 6.34 - * 0x48 coreLoopReturnPt 6.35 - * 0x54 masterLock 6.36 - */ 6.37 .globl switchToVP 6.38 switchToVP: 6.39 #VirtProcr in %rdi 6.40 @@ -48,17 +47,6 @@ 6.41 6.42 6.43 //switches to core loop. saves return address 6.44 -/* VirtProcr offsets: 6.45 - * 0x10 stackPtr 6.46 - * 0x18 framePtr 6.47 - * 0x20 nextInstrPt 6.48 - * 0x30 coreLoopFramePtr 6.49 - * 0x38 coreLoopStackPtr 6.50 - * 6.51 - * _VMSMasterEnv offsets: 6.52 - * 0x48 coreLoopReturnPt 6.53 - * 0x54 masterLock 6.54 - */ 6.55 .globl switchToCoreLoop 6.56 switchToCoreLoop: 6.57 #VirtProcr in %rdi 6.58 @@ -69,7 +57,7 @@ 6.59 movq 0x30(%rdi), %rbp #restore frame pointer 6.60 movq $_VMSMasterEnv, %rcx 6.61 movq (%rcx) , %rcx 6.62 - movq 0x48(%rcx), %rax #get CoreLoopStartPt 6.63 + movq 0x38(%rcx), %rax #get CoreLoopStartPt 6.64 jmp *%rax #jmp to CoreLoop 6.65 VPReturn: 6.66 ret 6.67 @@ -78,17 +66,6 @@ 6.68 6.69 //switches to core loop from master. saves return address 6.70 //Releases masterLock so the next MasterLoop can be executed 6.71 -/* VirtProcr offsets: 6.72 - * 0x10 stackPtr 6.73 - * 0x18 framePtr 6.74 - * 0x20 nextInstrPt 6.75 - * 0x30 coreLoopFramePtr 6.76 - * 0x38 coreLoopStackPtr 6.77 - * 6.78 - * _VMSMasterEnv offsets: 6.79 - * 0x48 coreLoopReturnPt 6.80 - * 0x54 masterLock 6.81 - */ 6.82 .globl masterSwitchToCoreLoop 6.83 masterSwitchToCoreLoop: 6.84 #VirtProcr in %rdi 6.85 @@ -99,8 +76,8 @@ 6.86 movq 0x30(%rdi), %rbp #restore frame pointer 6.87 movq $_VMSMasterEnv, %rcx 6.88 movq (%rcx) , %rcx 6.89 - movq 0x48(%rcx), %rax #get CoreLoopStartPt 6.90 - movl $0x0 , 0x54(%rcx) #release lock 6.91 + movq 0x38(%rcx), %rax #get CoreLoopStartPt 6.92 + movl $0x0 , 0x44(%rcx) #release lock 6.93 jmp *%rax #jmp to CoreLoop 6.94 MasterReturn: 6.95 ret 6.96 @@ -112,17 +89,6 @@ 6.97 // and virtPr is in %rdi 6.98 // and both functions have the same argument. 6.99 // do not save register of VP because this function will never return 6.100 -/* VirtProcr offsets: 6.101 - * 0x10 stackPtr 6.102 - * 0x18 framePtr 6.103 - * 0x20 nextInstrPt 6.104 - * 0x30 coreLoopFramePtr 6.105 - * 0x38 coreLoopStackPtr 6.106 - * 6.107 - * _VMSMasterEnv offsets: 6.108 - * 0x48 coreLoopReturnPt 6.109 - * 0x58 masterLock 6.110 - */ 6.111 .globl asmTerminateCoreLoop 6.112 asmTerminateCoreLoop: 6.113 #VirtProcr in %rdi
7.1 --- a/vmalloc.c Wed Sep 07 17:45:05 2011 +0200 7.2 +++ b/vmalloc.c Fri Sep 16 14:25:49 2011 +0200 7.3 @@ -12,7 +12,7 @@ 7.4 #include <stdlib.h> 7.5 #include <stdio.h> 7.6 7.7 -#include "VMS.h" 7.8 +#include "ProcrContext.h" 7.9 #include "Histogram/Histogram.h" 7.10 7.11 /*Helper function 7.12 @@ -37,18 +37,37 @@ 7.13 listHead->nextChunkInFreeList = chunk; 7.14 } 7.15 7.16 +/* 7.17 + * This function is called by code which is part of the master loop. 7.18 + * This reads the animating coreID from the MasterEnv and calls the normal malloc 7.19 + * in VMS__malloc_on_core 7.20 + */ 7.21 +void * 7.22 +VMS__malloc( size_t sizeRequested) 7.23 +{ 7.24 + return VMS__malloc_on_core(sizeRequested, _VMSMasterEnv->currentMasterProcrID); 7.25 +} 7.26 7.27 -/*This is sequential code, meant to only be called from the Master, not from 7.28 - * any slave VPs. 7.29 +/* 7.30 + * This is called by the plugin. This call to VMS_malloc_on_core is run on the 7.31 + * slave VPs stack so there is no switch to the VMS runtime. 7.32 + */ 7.33 +void * 7.34 +VMS__malloc_in_lib(size_t sizeRequested, VirtProcr *VProcr) 7.35 +{ 7.36 + return VMS__malloc_on_core(sizeRequested, VProcr->coreAnimatedBy); 7.37 +} 7.38 + 7.39 +/* 7.40 *Search down list, checking size by the nextHigherInMem pointer, to find 7.41 * first chunk bigger than size needed. 7.42 *Shave off the extra and make it into a new free-list element, hook it in 7.43 * then return the address of the found element plus size of prolog. 7.44 - * 7.45 - *Will find a 7.46 */ 7.47 -void *VMS__malloc( size_t sizeRequested ) 7.48 +void * 7.49 +VMS__malloc_on_core( size_t sizeRequested, int procrID) 7.50 { MallocProlog *foundElem = NULL, *currElem, *newElem; 7.51 + MallocPrologAllocated *returnElem; 7.52 ssize_t amountExtra, sizeConsumed,sizeOfFound; 7.53 uint32 foundElemIsTopOfHeap; 7.54 7.55 @@ -61,7 +80,8 @@ 7.56 7.57 //step up the size to be aligned at 16-byte boundary, prob better ways 7.58 sizeRequested = (sizeRequested + 16) & ~15; 7.59 - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 7.60 + currElem = (_VMSMasterEnv->freeListHead[_VMSMasterEnv->currentMasterProcrID]) 7.61 + ->nextChunkInFreeList; 7.62 7.63 while( currElem != NULL ) 7.64 { //check if size of currElem is big enough 7.65 @@ -95,16 +115,18 @@ 7.66 { foundElem->nextChunkInFreeList->prevChunkInFreeList = 7.67 foundElem->prevChunkInFreeList; 7.68 } 7.69 - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 7.70 + returnElem = (MallocPrologAllocated*)foundElem; 7.71 + returnElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 7.72 + returnElem->procrID = procrID; 7.73 7.74 //if enough, turn extra into new elem & insert it 7.75 if( amountExtra > 64 ) 7.76 { //make new elem by adding to addr of curr elem then casting 7.77 sizeConsumed = sizeof(MallocProlog) + sizeRequested; 7.78 - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 7.79 - newElem->nextLowerInMem = foundElem; //This is evil (but why?) 7.80 - newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) 7.81 - foundElem->nextHigherInMem = newElem; 7.82 + newElem = (MallocProlog *)( (uintptr_t)returnElem + sizeConsumed ); 7.83 + newElem->nextLowerInMem = returnElem; //This is evil (but why?) 7.84 + newElem->nextHigherInMem = returnElem->nextHigherInMem; //This is evil (but why?) 7.85 + returnElem->nextHigherInMem = newElem; 7.86 if( ! foundElemIsTopOfHeap ) 7.87 { //there is no next higher for top of heap, so can't write to it 7.88 newElem->nextHigherInMem->nextLowerInMem = newElem; 7.89 @@ -125,139 +147,46 @@ 7.90 //======================================================================== 7.91 7.92 //skip over the prolog by adding its size to the pointer return 7.93 - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 7.94 + return (void*)((uintptr_t)returnElem + sizeof(MallocProlog)); 7.95 } 7.96 7.97 -/*This is sequential code, meant to only be called from the Master, not from 7.98 - * any slave VPs. 7.99 - *Search down list, checking size by the nextHigherInMem pointer, to find 7.100 - * first chunk bigger than size needed. 7.101 - *Shave off the extra and make it into a new free-list element, hook it in 7.102 - * then return the address of the found element plus size of prolog. 7.103 - * 7.104 - * The difference to the regular malloc is, that all the allocated chunks are 7.105 - * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk 7.106 - * before the aligned chunk. 7.107 +/* 7.108 + * This free is called for a master loop. It decides whether the allocation of 7.109 + * chunk was done on the same core. If it was it calls VMS__free_on_core 7.110 + * otherwise it sends a message to the responsible core. 7.111 */ 7.112 -void *VMS__malloc_aligned( size_t sizeRequested ) 7.113 - { MallocProlog *foundElem = NULL, *currElem, *newElem; 7.114 - ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; 7.115 - uint32 foundElemIsTopOfHeap; 7.116 +void 7.117 +VMS__free(void *ptrToFree) 7.118 +{ 7.119 + MallocPrologAllocated chunk = (MallocPrologAllocated*)ptrToFree - 1; 7.120 + if(chunk->procrID == _VMSMasterEnv->currentMasterProcrID) 7.121 + { 7.122 + VMS__free_on_core(ptrToFree, _VMSMasterEnv->currentMasterProcrID); 7.123 + } 7.124 + else 7.125 + { 7.126 + //Request from other Core 7.127 + } 7.128 +} 7.129 7.130 - //============================= MEASUREMENT STUFF ======================== 7.131 - #ifdef MEAS__TIME_MALLOC 7.132 - uint32 startStamp, endStamp; 7.133 - saveLowTimeStampCountInto( startStamp ); 7.134 - #endif 7.135 - //======================================================================== 7.136 - 7.137 - //step up the size to be multiple of the cache line size 7.138 - sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1); 7.139 - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 7.140 - 7.141 - while( currElem != NULL ) 7.142 - { //check if size of currElem is big enough 7.143 - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 7.144 - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 7.145 - if( amountExtra > 0 ) 7.146 - { 7.147 - //look if the found element is already aligned 7.148 - if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){ 7.149 - //found it, get out of loop 7.150 - foundElem = currElem; 7.151 - break; 7.152 - }else{ 7.153 - //find first aligned address and check if it's still big enough 7.154 - //check also if the space before the aligned address is big enough 7.155 - //for a new element 7.156 - void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1))); 7.157 - prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; 7.158 - sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); 7.159 - amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); 7.160 - if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ 7.161 - //found suitable element 7.162 - //create new previous element and exit loop 7.163 - MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; 7.164 - 7.165 - //insert new element into free list 7.166 - if(currElem->nextChunkInFreeList != NULL) 7.167 - currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; 7.168 - newAlignedElem->prevChunkInFreeList = currElem; 7.169 - newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; 7.170 - currElem->nextChunkInFreeList = newAlignedElem; 7.171 - 7.172 - //set higherInMem and lowerInMem 7.173 - newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; 7.174 - foundElemIsTopOfHeap = currElem->nextHigherInMem == 7.175 - _VMSMasterEnv->freeListHead->nextHigherInMem; 7.176 - if(!foundElemIsTopOfHeap) 7.177 - currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; 7.178 - currElem->nextHigherInMem = newAlignedElem; 7.179 - newAlignedElem->nextLowerInMem = currElem; 7.180 - 7.181 - //Found new element leaving loop 7.182 - foundElem = newAlignedElem; 7.183 - break; 7.184 - } 7.185 - } 7.186 - 7.187 - } 7.188 - currElem = currElem->nextChunkInFreeList; 7.189 +/* 7.190 + * This free is called for the plugins. It decides whether the allocation of 7.191 + * chunk was done on the same core. If it was it calls VMS__free_on_core 7.192 + * otherwise it sends a message to the responsible core. 7.193 + */ 7.194 +void 7.195 +VMS__free_in_lib(void *ptrToFree, VirtProcr *VProc) 7.196 +{ 7.197 + MallocPrologAllocated chunk = (MallocPrologAllocated*)ptrToFree - 1; 7.198 + if(chunk->procrID == VProc->coreAnimatedBy) 7.199 + { 7.200 + VMS__free_on_core(ptrToFree, VProc->coreAnimatedBy); 7.201 } 7.202 - 7.203 - if( foundElem == NULL ) 7.204 - { ERROR("\nmalloc failed\n") 7.205 - return (void *)NULL; //indicates malloc failed 7.206 + else 7.207 + { 7.208 + //Request from other Core 7.209 } 7.210 - //Using a kludge to identify the element that is the top chunk in the 7.211 - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 7.212 - // save addr of start of heap in head's nextLowerInMem 7.213 - //Will handle top of Heap specially 7.214 - foundElemIsTopOfHeap = foundElem->nextHigherInMem == 7.215 - _VMSMasterEnv->freeListHead->nextHigherInMem; 7.216 - 7.217 - //before shave off and try to insert new elem, remove found elem 7.218 - //note, foundElem will never be the head, so always has valid prevChunk 7.219 - foundElem->prevChunkInFreeList->nextChunkInFreeList = 7.220 - foundElem->nextChunkInFreeList; 7.221 - if( foundElem->nextChunkInFreeList != NULL ) 7.222 - { foundElem->nextChunkInFreeList->prevChunkInFreeList = 7.223 - foundElem->prevChunkInFreeList; 7.224 - } 7.225 - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 7.226 - 7.227 - //if enough, turn extra into new elem & insert it 7.228 - if( amountExtra > 64 ) 7.229 - { //make new elem by adding to addr of curr elem then casting 7.230 - sizeConsumed = sizeof(MallocProlog) + sizeRequested; 7.231 - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 7.232 - newElem->nextHigherInMem = foundElem->nextHigherInMem; 7.233 - newElem->nextLowerInMem = foundElem; 7.234 - foundElem->nextHigherInMem = newElem; 7.235 - 7.236 - if( ! foundElemIsTopOfHeap ) 7.237 - { //there is no next higher for top of heap, so can't write to it 7.238 - newElem->nextHigherInMem->nextLowerInMem = newElem; 7.239 - } 7.240 - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 7.241 - } 7.242 - else 7.243 - { 7.244 - sizeConsumed = sizeOfFound; 7.245 - } 7.246 - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 7.247 - 7.248 - //============================= MEASUREMENT STUFF ======================== 7.249 - #ifdef MEAS__TIME_MALLOC 7.250 - saveLowTimeStampCountInto( endStamp ); 7.251 - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 7.252 - #endif 7.253 - //======================================================================== 7.254 - 7.255 - //skip over the prolog by adding its size to the pointer return 7.256 - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 7.257 - } 7.258 - 7.259 +} 7.260 7.261 /*This is sequential code -- only to be called from the Master 7.262 * When free, subtract the size of prolog from pointer, then cast it to a 7.263 @@ -266,7 +195,7 @@ 7.264 * add this one to free-list. 7.265 */ 7.266 void 7.267 -VMS__free( void *ptrToFree ) 7.268 +VMS__free_on_core( void *ptrToFree, int procrID) 7.269 { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; 7.270 size_t sizeOfElem; 7.271 uint32 lowerExistsAndIsFree, higherExistsAndIsFree; 7.272 @@ -443,7 +372,7 @@ 7.273 MallocProlog * 7.274 VMS_ext__create_free_list() 7.275 { MallocProlog *freeListHead, *firstChunk; 7.276 - 7.277 + 7.278 //Note, this is running in the main thread -- all increases in malloc 7.279 // mem and all frees of it must be done in this thread, with the 7.280 // thread's original stack available
8.1 --- a/vmalloc.h Wed Sep 07 17:45:05 2011 +0200 8.2 +++ b/vmalloc.h Fri Sep 16 14:25:49 2011 +0200 8.3 @@ -6,13 +6,13 @@ 8.4 * 8.5 * Created on November 14, 2009, 9:07 PM 8.6 */ 8.7 - 8.8 #ifndef _VMALLOC_H 8.9 #define _VMALLOC_H 8.10 8.11 #include <malloc.h> 8.12 #include <inttypes.h> 8.13 #include "VMS_primitive_data_types.h" 8.14 +#include "ProcrContext.h" 8.15 8.16 typedef struct _MallocProlog MallocProlog; 8.17 8.18 @@ -22,25 +22,41 @@ 8.19 MallocProlog *prevChunkInFreeList; 8.20 MallocProlog *nextHigherInMem; 8.21 MallocProlog *nextLowerInMem; 8.22 - }; 8.23 + }; 8.24 //MallocProlog 8.25 + 8.26 + typedef struct 8.27 + { 8.28 + uintptr_t procrID; 8.29 + MallocProlog *prevChunkInFreeList; 8.30 + MallocProlog *nextHigherInMem; 8.31 + MallocProlog *nextLowerInMem; 8.32 + } MallocPrologAllocated; 8.33 8.34 typedef struct 8.35 { 8.36 MallocProlog *firstChunkInFreeList; 8.37 int32 numInList; //TODO not used 8.38 - } 8.39 -FreeListHead; 8.40 + } FreeListHead; 8.41 8.42 void * 8.43 -VMS__malloc( size_t sizeRequested ); 8.44 +VMS__malloc_on_core(size_t sizeRequested, int procrID); 8.45 8.46 void * 8.47 -VMS__malloc_aligned( size_t sizeRequested ); 8.48 +VMS__malloc(size_t sizeRequested); 8.49 + 8.50 +void * 8.51 +VMS__malloc_in_lib(size_t sizeRequested, VirtProcr *VProc); 8.52 8.53 void 8.54 VMS__free( void *ptrToFree ); 8.55 8.56 +void 8.57 +VMS__free_in_lib(void *ptrToFree, VirtProcr *VProc); 8.58 + 8.59 +void 8.60 +VMS__free_on_core(void *ptrToFree, int procrID); 8.61 + 8.62 /*Allocates memory from the external system -- higher overhead 8.63 */ 8.64 void *
