Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl
diff vmalloc.c @ 132:dbfc8382d546
distributed memory allocation interface - unfinished
| author | Merten Sach <msach@mailbox.tu-berlin.de> |
|---|---|
| date | Fri, 16 Sep 2011 14:25:49 +0200 |
| parents | 21c95d402fe6 |
| children | a9b72021f053 |
line diff
1.1 --- a/vmalloc.c Wed Sep 07 17:45:05 2011 +0200 1.2 +++ b/vmalloc.c Fri Sep 16 14:25:49 2011 +0200 1.3 @@ -12,7 +12,7 @@ 1.4 #include <stdlib.h> 1.5 #include <stdio.h> 1.6 1.7 -#include "VMS.h" 1.8 +#include "ProcrContext.h" 1.9 #include "Histogram/Histogram.h" 1.10 1.11 /*Helper function 1.12 @@ -37,18 +37,37 @@ 1.13 listHead->nextChunkInFreeList = chunk; 1.14 } 1.15 1.16 +/* 1.17 + * This function is called by code which is part of the master loop. 1.18 + * This reads the animating coreID from the MasterEnv and calls the normal malloc 1.19 + * in VMS__malloc_on_core 1.20 + */ 1.21 +void * 1.22 +VMS__malloc( size_t sizeRequested) 1.23 +{ 1.24 + return VMS__malloc_on_core(sizeRequested, _VMSMasterEnv->currentMasterProcrID); 1.25 +} 1.26 1.27 -/*This is sequential code, meant to only be called from the Master, not from 1.28 - * any slave VPs. 1.29 +/* 1.30 + * This is called by the plugin. This call to VMS_malloc_on_core is run on the 1.31 + * slave VPs stack so there is no switch to the VMS runtime. 1.32 + */ 1.33 +void * 1.34 +VMS__malloc_in_lib(size_t sizeRequested, VirtProcr *VProcr) 1.35 +{ 1.36 + return VMS__malloc_on_core(sizeRequested, VProcr->coreAnimatedBy); 1.37 +} 1.38 + 1.39 +/* 1.40 *Search down list, checking size by the nextHigherInMem pointer, to find 1.41 * first chunk bigger than size needed. 1.42 *Shave off the extra and make it into a new free-list element, hook it in 1.43 * then return the address of the found element plus size of prolog. 1.44 - * 1.45 - *Will find a 1.46 */ 1.47 -void *VMS__malloc( size_t sizeRequested ) 1.48 +void * 1.49 +VMS__malloc_on_core( size_t sizeRequested, int procrID) 1.50 { MallocProlog *foundElem = NULL, *currElem, *newElem; 1.51 + MallocPrologAllocated *returnElem; 1.52 ssize_t amountExtra, sizeConsumed,sizeOfFound; 1.53 uint32 foundElemIsTopOfHeap; 1.54 1.55 @@ -61,7 +80,8 @@ 1.56 1.57 //step up the size to be aligned at 16-byte boundary, prob better ways 1.58 sizeRequested = (sizeRequested + 16) & ~15; 1.59 - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 1.60 + currElem = (_VMSMasterEnv->freeListHead[_VMSMasterEnv->currentMasterProcrID]) 1.61 + ->nextChunkInFreeList; 1.62 1.63 while( currElem != NULL ) 1.64 { //check if size of currElem is big enough 1.65 @@ -95,16 +115,18 @@ 1.66 { foundElem->nextChunkInFreeList->prevChunkInFreeList = 1.67 foundElem->prevChunkInFreeList; 1.68 } 1.69 - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 1.70 + returnElem = (MallocPrologAllocated*)foundElem; 1.71 + returnElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 1.72 + returnElem->procrID = procrID; 1.73 1.74 //if enough, turn extra into new elem & insert it 1.75 if( amountExtra > 64 ) 1.76 { //make new elem by adding to addr of curr elem then casting 1.77 sizeConsumed = sizeof(MallocProlog) + sizeRequested; 1.78 - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 1.79 - newElem->nextLowerInMem = foundElem; //This is evil (but why?) 1.80 - newElem->nextHigherInMem = foundElem->nextHigherInMem; //This is evil (but why?) 1.81 - foundElem->nextHigherInMem = newElem; 1.82 + newElem = (MallocProlog *)( (uintptr_t)returnElem + sizeConsumed ); 1.83 + newElem->nextLowerInMem = returnElem; //This is evil (but why?) 1.84 + newElem->nextHigherInMem = returnElem->nextHigherInMem; //This is evil (but why?) 1.85 + returnElem->nextHigherInMem = newElem; 1.86 if( ! foundElemIsTopOfHeap ) 1.87 { //there is no next higher for top of heap, so can't write to it 1.88 newElem->nextHigherInMem->nextLowerInMem = newElem; 1.89 @@ -125,139 +147,46 @@ 1.90 //======================================================================== 1.91 1.92 //skip over the prolog by adding its size to the pointer return 1.93 - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 1.94 + return (void*)((uintptr_t)returnElem + sizeof(MallocProlog)); 1.95 } 1.96 1.97 -/*This is sequential code, meant to only be called from the Master, not from 1.98 - * any slave VPs. 1.99 - *Search down list, checking size by the nextHigherInMem pointer, to find 1.100 - * first chunk bigger than size needed. 1.101 - *Shave off the extra and make it into a new free-list element, hook it in 1.102 - * then return the address of the found element plus size of prolog. 1.103 - * 1.104 - * The difference to the regular malloc is, that all the allocated chunks are 1.105 - * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk 1.106 - * before the aligned chunk. 1.107 +/* 1.108 + * This free is called for a master loop. It decides whether the allocation of 1.109 + * chunk was done on the same core. If it was it calls VMS__free_on_core 1.110 + * otherwise it sends a message to the responsible core. 1.111 */ 1.112 -void *VMS__malloc_aligned( size_t sizeRequested ) 1.113 - { MallocProlog *foundElem = NULL, *currElem, *newElem; 1.114 - ssize_t amountExtra, sizeConsumed,sizeOfFound,prevAmount; 1.115 - uint32 foundElemIsTopOfHeap; 1.116 +void 1.117 +VMS__free(void *ptrToFree) 1.118 +{ 1.119 + MallocPrologAllocated chunk = (MallocPrologAllocated*)ptrToFree - 1; 1.120 + if(chunk->procrID == _VMSMasterEnv->currentMasterProcrID) 1.121 + { 1.122 + VMS__free_on_core(ptrToFree, _VMSMasterEnv->currentMasterProcrID); 1.123 + } 1.124 + else 1.125 + { 1.126 + //Request from other Core 1.127 + } 1.128 +} 1.129 1.130 - //============================= MEASUREMENT STUFF ======================== 1.131 - #ifdef MEAS__TIME_MALLOC 1.132 - uint32 startStamp, endStamp; 1.133 - saveLowTimeStampCountInto( startStamp ); 1.134 - #endif 1.135 - //======================================================================== 1.136 - 1.137 - //step up the size to be multiple of the cache line size 1.138 - sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1); 1.139 - currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList; 1.140 - 1.141 - while( currElem != NULL ) 1.142 - { //check if size of currElem is big enough 1.143 - sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem); 1.144 - amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog); 1.145 - if( amountExtra > 0 ) 1.146 - { 1.147 - //look if the found element is already aligned 1.148 - if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){ 1.149 - //found it, get out of loop 1.150 - foundElem = currElem; 1.151 - break; 1.152 - }else{ 1.153 - //find first aligned address and check if it's still big enough 1.154 - //check also if the space before the aligned address is big enough 1.155 - //for a new element 1.156 - void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1))); 1.157 - prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem; 1.158 - sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog); 1.159 - amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog); 1.160 - if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){ 1.161 - //found suitable element 1.162 - //create new previous element and exit loop 1.163 - MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1; 1.164 - 1.165 - //insert new element into free list 1.166 - if(currElem->nextChunkInFreeList != NULL) 1.167 - currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem; 1.168 - newAlignedElem->prevChunkInFreeList = currElem; 1.169 - newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList; 1.170 - currElem->nextChunkInFreeList = newAlignedElem; 1.171 - 1.172 - //set higherInMem and lowerInMem 1.173 - newAlignedElem->nextHigherInMem = currElem->nextHigherInMem; 1.174 - foundElemIsTopOfHeap = currElem->nextHigherInMem == 1.175 - _VMSMasterEnv->freeListHead->nextHigherInMem; 1.176 - if(!foundElemIsTopOfHeap) 1.177 - currElem->nextHigherInMem->nextLowerInMem = newAlignedElem; 1.178 - currElem->nextHigherInMem = newAlignedElem; 1.179 - newAlignedElem->nextLowerInMem = currElem; 1.180 - 1.181 - //Found new element leaving loop 1.182 - foundElem = newAlignedElem; 1.183 - break; 1.184 - } 1.185 - } 1.186 - 1.187 - } 1.188 - currElem = currElem->nextChunkInFreeList; 1.189 +/* 1.190 + * This free is called for the plugins. It decides whether the allocation of 1.191 + * chunk was done on the same core. If it was it calls VMS__free_on_core 1.192 + * otherwise it sends a message to the responsible core. 1.193 + */ 1.194 +void 1.195 +VMS__free_in_lib(void *ptrToFree, VirtProcr *VProc) 1.196 +{ 1.197 + MallocPrologAllocated chunk = (MallocPrologAllocated*)ptrToFree - 1; 1.198 + if(chunk->procrID == VProc->coreAnimatedBy) 1.199 + { 1.200 + VMS__free_on_core(ptrToFree, VProc->coreAnimatedBy); 1.201 } 1.202 - 1.203 - if( foundElem == NULL ) 1.204 - { ERROR("\nmalloc failed\n") 1.205 - return (void *)NULL; //indicates malloc failed 1.206 + else 1.207 + { 1.208 + //Request from other Core 1.209 } 1.210 - //Using a kludge to identify the element that is the top chunk in the 1.211 - // heap -- saving top-of-heap addr in head's nextHigherInMem -- and 1.212 - // save addr of start of heap in head's nextLowerInMem 1.213 - //Will handle top of Heap specially 1.214 - foundElemIsTopOfHeap = foundElem->nextHigherInMem == 1.215 - _VMSMasterEnv->freeListHead->nextHigherInMem; 1.216 - 1.217 - //before shave off and try to insert new elem, remove found elem 1.218 - //note, foundElem will never be the head, so always has valid prevChunk 1.219 - foundElem->prevChunkInFreeList->nextChunkInFreeList = 1.220 - foundElem->nextChunkInFreeList; 1.221 - if( foundElem->nextChunkInFreeList != NULL ) 1.222 - { foundElem->nextChunkInFreeList->prevChunkInFreeList = 1.223 - foundElem->prevChunkInFreeList; 1.224 - } 1.225 - foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated 1.226 - 1.227 - //if enough, turn extra into new elem & insert it 1.228 - if( amountExtra > 64 ) 1.229 - { //make new elem by adding to addr of curr elem then casting 1.230 - sizeConsumed = sizeof(MallocProlog) + sizeRequested; 1.231 - newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed ); 1.232 - newElem->nextHigherInMem = foundElem->nextHigherInMem; 1.233 - newElem->nextLowerInMem = foundElem; 1.234 - foundElem->nextHigherInMem = newElem; 1.235 - 1.236 - if( ! foundElemIsTopOfHeap ) 1.237 - { //there is no next higher for top of heap, so can't write to it 1.238 - newElem->nextHigherInMem->nextLowerInMem = newElem; 1.239 - } 1.240 - add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead ); 1.241 - } 1.242 - else 1.243 - { 1.244 - sizeConsumed = sizeOfFound; 1.245 - } 1.246 - _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed; 1.247 - 1.248 - //============================= MEASUREMENT STUFF ======================== 1.249 - #ifdef MEAS__TIME_MALLOC 1.250 - saveLowTimeStampCountInto( endStamp ); 1.251 - addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist ); 1.252 - #endif 1.253 - //======================================================================== 1.254 - 1.255 - //skip over the prolog by adding its size to the pointer return 1.256 - return (void*)((uintptr_t)foundElem + sizeof(MallocProlog)); 1.257 - } 1.258 - 1.259 +} 1.260 1.261 /*This is sequential code -- only to be called from the Master 1.262 * When free, subtract the size of prolog from pointer, then cast it to a 1.263 @@ -266,7 +195,7 @@ 1.264 * add this one to free-list. 1.265 */ 1.266 void 1.267 -VMS__free( void *ptrToFree ) 1.268 +VMS__free_on_core( void *ptrToFree, int procrID) 1.269 { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem; 1.270 size_t sizeOfElem; 1.271 uint32 lowerExistsAndIsFree, higherExistsAndIsFree; 1.272 @@ -443,7 +372,7 @@ 1.273 MallocProlog * 1.274 VMS_ext__create_free_list() 1.275 { MallocProlog *freeListHead, *firstChunk; 1.276 - 1.277 + 1.278 //Note, this is running in the main thread -- all increases in malloc 1.279 // mem and all frees of it must be done in this thread, with the 1.280 // thread's original stack available
