comparison vmalloc.c @ 117:07e679ee2095

Bit index search with simple chop off
author Merten Sach <msach@mailbox.tu-berlin.de>
date Tue, 30 Aug 2011 17:11:13 +0200
parents 35547e66b971
children e29bb31df078
comparison
equal deleted inserted replaced
16:f60c85476efd 17:411392858f3f
27 */ 27 */
28 inline 28 inline
29 uint32 getContainer(size_t size) 29 uint32 getContainer(size_t size)
30 { 30 {
31 return (log10(size)-LOG128)/LOG54; 31 return (log10(size)-LOG128)/LOG54;
32 }
33
34 /*
35 * This calculates the size of a given container
36 */
37 inline
38 size_t getContainerSize(uint32 idx)
39 {
40 return pow(CHUNK_INCREASE_RATE,idx)*LOWER_BOUND;
41 } 32 }
42 33
43 /* 34 /*
44 * Removes the first chunk of a freeList 35 * Removes the first chunk of a freeList
45 * The chunk is removed but not set as free. There is no check if 36 * The chunk is removed but not set as free. There is no check if
85 76
86 return removedChunk; 77 return removedChunk;
87 } 78 }
88 79
89 inline 80 inline
90 void recycleSmallChunks(MallocProlog** container) 81 size_t getChunkSize(MallocProlog* chunk)
91 { 82 {
92 //TODO recycle small chunks 83 return (uintptr_t)chunk->nextHigherInMem -
84 (uintptr_t)chunk - sizeof(MallocProlog);
93 } 85 }
94 86
95 /* 87 /*
96 * Removes a chunk from a free list. 88 * Removes a chunk from a free list.
97 */ 89 */
102 if(chunk->nextChunkInFreeList) 94 if(chunk->nextChunkInFreeList)
103 chunk->nextChunkInFreeList->prevChunkInFreeList = chunk->prevChunkInFreeList; 95 chunk->nextChunkInFreeList->prevChunkInFreeList = chunk->prevChunkInFreeList;
104 96
105 //The last element in the list points to the container. If the container points 97 //The last element in the list points to the container. If the container points
106 //to NULL the container is empty 98 //to NULL the container is empty
107 if(*((void**)(chunk->prevChunkInFreeList)) == NULL) 99 if(*((void**)(chunk->prevChunkInFreeList)) == NULL && getChunkSize(chunk) >= 160);
108 { 100 {
109 uint32 containerIdx = ((uintptr_t)chunk->prevChunkInFreeList - (uintptr_t)freeLists->bigChunks) >> 6; 101 //Find the approppiate container because we do not know it
102 uint64 containerIdx = ((uintptr_t)chunk->prevChunkInFreeList - (uintptr_t)freeLists->bigChunks) >> 3;
110 if(containerIdx < (uint32)64) 103 if(containerIdx < (uint32)64)
111 freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); 104 freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx);
112 if(containerIdx < 128 && containerIdx >=64) 105 if(containerIdx < 128 && containerIdx >=64)
113 freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64)); 106 freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64));
107
114 } 108 }
115 } 109 }
116 110
117 /* 111 /*
118 * Merges two chunks. 112 * Merges two chunks.
121 */ 115 */
122 inline 116 inline
123 MallocProlog *mergeChunks(MallocProlog* chunkA, MallocProlog* chunkB) 117 MallocProlog *mergeChunks(MallocProlog* chunkA, MallocProlog* chunkB)
124 { 118 {
125 chunkA->nextHigherInMem = chunkB->nextHigherInMem; 119 chunkA->nextHigherInMem = chunkB->nextHigherInMem;
120 chunkB->nextHigherInMem->nextLowerInMem = chunkA;
126 return chunkA; 121 return chunkA;
127 } 122 }
128 /* 123 /*
129 * Inserts a chunk into a free list. 124 * Inserts a chunk into a free list.
130 */ 125 */
149 newSize - sizeof(MallocProlog)); 144 newSize - sizeof(MallocProlog));
150 145
151 newChunk->nextLowerInMem = chunk; 146 newChunk->nextLowerInMem = chunk;
152 newChunk->nextHigherInMem = chunk->nextHigherInMem; 147 newChunk->nextHigherInMem = chunk->nextHigherInMem;
153 148
149 chunk->nextHigherInMem->nextLowerInMem = newChunk;
154 chunk->nextHigherInMem = newChunk; 150 chunk->nextHigherInMem = newChunk;
155 chunk->nextHigherInMem->nextLowerInMem = newChunk;
156 151
157 return newChunk; 152 return newChunk;
158 } 153 }
159 154
160 inline 155 /*
161 size_t getChunkSize(MallocProlog* chunk) 156 * Search for chunk in the list of big chunks. Split the block if it's too big
162 { 157 */
163 return (uintptr_t)chunk->nextHigherInMem - 158 inline
164 (uintptr_t)chunk - sizeof(MallocProlog); 159 MallocProlog *searchChunk(MallocArrays *freeLists, size_t sizeRequested, uint32 containerIdx)
165 } 160 {
166 161 MallocProlog* foundChunk;
167 162
168 /* 163 uint64 searchVector = freeLists->bigChunksSearchVector[0];
169 * This makes 5 Chunks of the requested size. 164 //set small chunk bits to zero
170 */ 165 searchVector &= MAX_UINT64 << containerIdx;
171 inline 166 containerIdx = __builtin_ffsl(searchVector);
172 void makeChunks(size_t size, MallocProlog **container) 167
173 { 168 if(containerIdx == 0)
174 MallocArrays *freeLists = _VMSMasterEnv->freeLists;
175 size_t addedSize = 5*(size + sizeof(MallocProlog));
176
177 uint32 containerIdx = getContainer(addedSize)+4;
178 while(freeLists->bigChunks[containerIdx] == NULL )
179 containerIdx++;
180
181 MallocProlog *foundChunk = removeChunk(freeLists, containerIdx);
182
183 int i;
184 for(i=0; i<5; i++)
185 { 169 {
186 insertChunk(divideChunk(foundChunk,size), container); 170 searchVector = freeLists->bigChunksSearchVector[1];
171 containerIdx = __builtin_ffsl(searchVector);
172 if(containerIdx == 0)
173 {
174 printf("VMS malloc failed: low memory");
175 exit(1);
176 }
177 containerIdx += 64;
187 } 178 }
188 containerIdx = getContainer(getChunkSize(foundChunk)); 179 containerIdx--;
189 insertChunk(foundChunk, &freeLists->bigChunks[containerIdx]); 180
190 if(containerIdx < 64) 181
191 freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx); 182 foundChunk = removeChunk(freeLists, containerIdx);
192 else 183 size_t chunkSize = getChunkSize(foundChunk);
193 freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64)); 184
194 } 185 //If the new chunk is larger than the requested size: split
186 if(chunkSize > sizeRequested + 2 * sizeof(MallocProlog) + 160)
187 {
188 MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested);
189 containerIdx = getContainer(getChunkSize(foundChunk)) - 1;
190 insertChunk(foundChunk,&freeLists->bigChunks[containerIdx]);
191 if(containerIdx < 64)
192 freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx);
193 else
194 freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64));
195 foundChunk = newChunk;
196 }
197
198 return foundChunk;
199 }
200
195 201
196 /* 202 /*
197 * This is sequential code, meant to only be called from the Master, not from 203 * This is sequential code, meant to only be called from the Master, not from
198 * any slave VPs. 204 * any slave VPs.
199 */ 205 */
205 saveLowTimeStampCountInto( startStamp ); 211 saveLowTimeStampCountInto( startStamp );
206 #endif 212 #endif
207 //======================================================================== 213 //========================================================================
208 214
209 MallocArrays* freeLists = _VMSMasterEnv->freeLists; 215 MallocArrays* freeLists = _VMSMasterEnv->freeLists;
216 MallocProlog* foundChunk;
210 217
211 //Return a small chunk if the requested size is smaller than 128B 218 //Return a small chunk if the requested size is smaller than 128B
212 if(sizeRequested <= LOWER_BOUND) 219 if(sizeRequested <= LOWER_BOUND)
213 { 220 {
214 uint32 freeListIdx = (sizeRequested-1)/32; 221 uint32 freeListIdx = (sizeRequested-1)/SMALL_CHUNK_SIZE;
215 if(freeLists->smallChunkCount[freeListIdx] == 0) 222 if(freeLists->smallChunks[freeListIdx] == NULL)
216 { 223 foundChunk = searchChunk(freeLists, SMALL_CHUNK_SIZE*(freeListIdx+1), 0);
217 makeChunks((freeListIdx+1)*32, &freeLists->smallChunks[freeListIdx]); 224 else
218 freeLists->smallChunkCount[freeListIdx] += 5; 225 foundChunk = removeSmallChunk(freeLists, freeListIdx);
219 }
220 226
221 freeLists->smallChunkCount[freeListIdx]--; 227 //Mark as allocated
222 return removeSmallChunk(freeLists, freeListIdx) + 1; 228 foundChunk->prevChunkInFreeList = NULL;
229 return foundChunk + 1;
223 } 230 }
224 231
225 //Calculate the expected container. Start one higher to have a Chunk that's 232 //Calculate the expected container. Start one higher to have a Chunk that's
226 //always big enough. 233 //always big enough.
227 uint32 containerIdx = getContainer(sizeRequested) + 1; 234 uint32 containerIdx = getContainer(sizeRequested);
228 235
229 MallocProlog* foundChunk;
230 if(freeLists->bigChunks[containerIdx] == NULL) 236 if(freeLists->bigChunks[containerIdx] == NULL)
231 { 237 foundChunk = searchChunk(freeLists, sizeRequested, containerIdx);
232 uint64 searchVector = freeLists->bigChunksSearchVector[0];
233 //set small chunk bits to zero
234 searchVector &= MAX_UINT64 << containerIdx;
235 containerIdx = __builtin_ffsl(searchVector);
236
237 if(containerIdx == 0)
238 {
239 searchVector = freeLists->bigChunksSearchVector[1];
240 containerIdx = __builtin_ffsl(searchVector);
241 if(containerIdx == 0)
242 {
243 printf("VMS malloc failed: low memory");
244 exit(1);
245 }
246 containerIdx += 64;
247 }
248 containerIdx--;
249
250
251 foundChunk = removeChunk(freeLists, containerIdx);
252 size_t chunkSize = getChunkSize(foundChunk);
253
254 //If the new chunk is larger than the requested size: split
255 if(chunkSize > sizeRequested + 2 * sizeof(MallocProlog) + LOWER_BOUND)
256 {
257 MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested);
258 containerIdx = getContainer(getChunkSize(foundChunk));
259 insertChunk(foundChunk,&freeLists->bigChunks[containerIdx]);
260 if(containerIdx < 64)
261 freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx);
262 else
263 freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64));
264 foundChunk = newChunk;
265 }
266 }
267 else 238 else
268 { 239 foundChunk = removeChunk(freeLists, containerIdx);
269 foundChunk = removeChunk(freeLists, containerIdx);
270 }
271 240
272 //Mark as allocated 241 //Mark as allocated
273 foundChunk->prevChunkInFreeList = NULL; 242 foundChunk->prevChunkInFreeList = NULL;
274 243
275 //============================= MEASUREMENT STUFF ======================== 244 //============================= MEASUREMENT STUFF ========================
288 * any slave VPs. 257 * any slave VPs.
289 */ 258 */
290 void 259 void
291 VMS__free( void *ptrToFree ) 260 VMS__free( void *ptrToFree )
292 { 261 {
293 262
294 //============================= MEASUREMENT STUFF ======================== 263 //============================= MEASUREMENT STUFF ========================
295 #ifdef MEAS__TIME_MALLOC 264 #ifdef MEAS__TIME_MALLOC
296 int32 startStamp, endStamp; 265 int32 startStamp, endStamp;
297 saveLowTimeStampCountInto( startStamp ); 266 saveLowTimeStampCountInto( startStamp );
298 #endif 267 #endif
299 //======================================================================== 268 //========================================================================
300 269
301 MallocArrays* freeLists = _VMSMasterEnv->freeLists; 270 MallocArrays* freeLists = _VMSMasterEnv->freeLists;
302 MallocProlog *chunkToFree = (MallocProlog*)ptrToFree - 1; 271 MallocProlog *chunkToFree = (MallocProlog*)ptrToFree - 1;
303 uint32 containerIdx; 272 uint32 containerIdx;
304
305 size_t chunkSize = getChunkSize(chunkToFree);
306 if(chunkSize <= LOWER_BOUND)
307 {
308 containerIdx = (chunkSize-1)/32;
309 insertChunk(chunkToFree,&freeLists->smallChunks[containerIdx]);
310 freeLists->smallChunkCount[containerIdx]++;
311
312 if(freeLists->smallChunkCount[containerIdx] > 20)
313 recycleSmallChunks(&freeLists->smallChunks[containerIdx]);
314
315 return;
316 }
317 273
318 //Check for free neighbors 274 //Check for free neighbors
319 if(chunkToFree->nextLowerInMem) 275 if(chunkToFree->nextLowerInMem)
320 { 276 {
321 if(chunkToFree->nextLowerInMem->prevChunkInFreeList != NULL) 277 if(chunkToFree->nextLowerInMem->prevChunkInFreeList != NULL)
322 {//Chunk is not allocated 278 {//Chunk is not allocated
323 if(getChunkSize(chunkToFree->nextLowerInMem) > LOWER_BOUND) 279 extractChunk(chunkToFree->nextLowerInMem, freeLists);
324 { 280 chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree);
325 extractChunk(chunkToFree->nextLowerInMem, freeLists);
326 chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree);
327 }
328 } 281 }
329 } 282 }
330 if(chunkToFree->nextHigherInMem) 283 if(chunkToFree->nextHigherInMem)
331 { 284 {
332 if(chunkToFree->nextHigherInMem->prevChunkInFreeList != NULL) 285 if(chunkToFree->nextHigherInMem->prevChunkInFreeList != NULL)
333 {//Chunk is not allocated 286 {//Chunk is not allocated
334 if(getChunkSize(chunkToFree->nextHigherInMem) > LOWER_BOUND) 287 extractChunk(chunkToFree->nextHigherInMem, freeLists);
335 { 288 chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem);
336 extractChunk(chunkToFree->nextHigherInMem, freeLists);
337 chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem);
338 }
339 } 289 }
340 } 290 }
341 291
342 containerIdx = getContainer(getChunkSize(chunkToFree)); 292 size_t chunkSize = getChunkSize(chunkToFree);
343 insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]); 293 if(chunkSize < 160)
344 if(containerIdx < 64) 294 {
345 freeLists->bigChunksSearchVector[0] |= (uint64)1 << containerIdx; 295 containerIdx = (chunkSize/SMALL_CHUNK_SIZE)-1;
296 if(containerIdx > 3)
297 containerIdx = 3;
298 insertChunk(chunkToFree, &freeLists->smallChunks[containerIdx]);
299 }
346 else 300 else
347 freeLists->bigChunksSearchVector[1] |= (uint64)1 << (containerIdx-64); 301 {
302 containerIdx = getContainer(getChunkSize(chunkToFree)) - 1;
303 insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]);
304 if(containerIdx < 64)
305 freeLists->bigChunksSearchVector[0] |= (uint64)1 << containerIdx;
306 else
307 freeLists->bigChunksSearchVector[1] |= (uint64)1 << (containerIdx-64);
308 }
348 309
349 //============================= MEASUREMENT STUFF ======================== 310 //============================= MEASUREMENT STUFF ========================
350 #ifdef MEAS__TIME_MALLOC 311 #ifdef MEAS__TIME_MALLOC
351 saveLowTimeStampCountInto( endStamp ); 312 saveLowTimeStampCountInto( endStamp );
352 addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist ); 313 addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist );
353 #endif 314 #endif
354 //======================================================================== 315 //========================================================================
355 316
356 } 317 }
357 318
358 319 /*
359 /*Allocates memory from the external system -- higher overhead 320 * Designed to be called from the main thread outside of VMS, during init
360 *
361 *Because of Linux's malloc throwing bizarre random faults when malloc is
362 * used inside a VMS virtual processor, have to pass this as a request and
363 * have the core loop do it when it gets around to it -- will look for these
364 * chores leftover from the previous animation of masterVP the next time it
365 * goes to animate the masterVP -- so it takes two separate masterVP
366 * animations, separated by work, to complete an external malloc or
367 * external free request.
368 *
369 *Thinking core loop accepts signals -- just looks if signal-location is
370 * empty or not --
371 */
372 void *
373 VMS__malloc_in_ext( size_t sizeRequested )
374 {
375 /*
376 //This is running in the master, so no chance for multiple cores to be
377 // competing for the core's flag.
378 if( *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 )
379 { //something has already signalled to core loop, so save the signal
380 // and look, next time master animated, to see if can send it.
381 //Note, the addr to put a signal is in the coreloop's frame, so just
382 // checks it each time through -- make it volatile to avoid GCC
383 // optimizations -- it's a coreloop local var that only changes
384 // after jumping away. The signal includes the addr to send the
385 //return to -- even if just empty return completion-signal
386 //
387 //save the signal in some queue that the master looks at each time
388 // it starts up -- one loc says if empty for fast common case --
389 //something like that -- want to hide this inside this call -- but
390 // think this has to come as a request -- req handler gives procr
391 // back to master loop, which gives it back to req handler at point
392 // it sees that core loop has sent return signal. Something like
393 // that.
394 saveTheSignal
395
396 }
397 coreSigData->type = malloc;
398 coreSigData->sizeToMalloc = sizeRequested;
399 coreSigData->locToSignalCompletion = &figureOut;
400 _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData;
401 */
402 //just risk system-stack faults until get this figured out
403 return malloc( sizeRequested );
404 }
405
406
407 /*Frees memory that was allocated in the external system -- higher overhead
408 *
409 *As noted in external malloc comment, this is clunky 'cause the free has
410 * to be called in the core loop.
411 */
412 void
413 VMS__free_in_ext( void *ptrToFree )
414 {
415 //just risk system-stack faults until get this figured out
416 free( ptrToFree );
417
418 //TODO: fix this -- so
419 }
420
421
422 /*Designed to be called from the main thread outside of VMS, during init
423 */ 321 */
424 MallocArrays * 322 MallocArrays *
425 VMS_ext__create_free_list() 323 VMS_ext__create_free_list()
426 { 324 {
427 //Initialize containers for small chunks and fill with zeros 325 //Initialize containers for small chunks and fill with zeros
430 328
431 freeLists->smallChunks = 329 freeLists->smallChunks =
432 (MallocProlog**)malloc(SMALL_CHUNK_COUNT*sizeof(MallocProlog*)); 330 (MallocProlog**)malloc(SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
433 memset((void*)freeLists->smallChunks, 331 memset((void*)freeLists->smallChunks,
434 0,SMALL_CHUNK_COUNT*sizeof(MallocProlog*)); 332 0,SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
435 memset((void*)freeLists->smallChunkCount,
436 0,SMALL_CHUNK_COUNT*sizeof(uint32));
437 333
438 //Calculate number of containers for big chunks 334 //Calculate number of containers for big chunks
439 uint32 container = getContainer(MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE)+1; 335 uint32 container = getContainer(MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE)+1;
440 freeLists->bigChunks = (MallocProlog**)malloc(container*sizeof(MallocProlog*)); 336 freeLists->bigChunks = (MallocProlog**)malloc(container*sizeof(MallocProlog*));
441 memset((void*)freeLists->bigChunks,0,container*sizeof(MallocProlog*)); 337 memset((void*)freeLists->bigChunks,0,container*sizeof(MallocProlog*));
442 freeLists->containerCount = container; 338 freeLists->containerCount = container;
443 339
444 //Create first element in lastContainer 340 //Create first element in lastContainer
445 MallocProlog *firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE ); 341 MallocProlog *firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
446 if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);} 342 if( firstChunk == NULL ) {printf("Can't allocate initial memory\n"); exit(1);}
447 freeLists->memSpace = firstChunk; 343 freeLists->memSpace = firstChunk;
448 344
449 firstChunk->nextLowerInMem = NULL; 345 firstChunk->nextLowerInMem = NULL;
450 firstChunk->nextHigherInMem = (MallocProlog*)((uintptr_t)firstChunk + 346 firstChunk->nextHigherInMem = (MallocProlog*)((uintptr_t)firstChunk +
451 MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE - sizeof(MallocProlog*)); 347 MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE - sizeof(MallocProlog*));
452 firstChunk->nextChunkInFreeList = NULL; 348 firstChunk->nextChunkInFreeList = NULL;
453 //previous element in the queue is the container 349 //previous element in the queue is the container
454 firstChunk->prevChunkInFreeList = freeLists->bigChunks[container-1]; 350 firstChunk->prevChunkInFreeList = &freeLists->bigChunks[container-2];
455 351
456 freeLists->bigChunks[container-1] = firstChunk; 352 freeLists->bigChunks[container-2] = firstChunk;
457 //Insert into bit search list 353 //Insert into bit search list
458 if(container <= 64) 354 if(container <= 65)
459 freeLists->bigChunksSearchVector[0] |= ((uint64)1 << (container-1)); 355 freeLists->bigChunksSearchVector[0] |= ((uint64)1 << (container-2));
460 else 356 else
461 freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (container-65)); 357 freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (container-66));
462 358
463 //Create dummy chunk to mark the top of stack this is of course 359 //Create dummy chunk to mark the top of stack this is of course
464 //never freed 360 //never freed
465 MallocProlog *dummyChunk = firstChunk->nextHigherInMem; 361 MallocProlog *dummyChunk = firstChunk->nextHigherInMem;
466 dummyChunk->nextHigherInMem = dummyChunk+1; 362 dummyChunk->nextHigherInMem = dummyChunk+1;