/*
 *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
 *  Licensed under GNU General Public License version 2
 *
 * Author: seanhalle@yahoo.com
 *
 * Created on November 14, 2009, 9:07 PM
 */

#include <malloc.h>
#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

#include "VMS.h"
#include "C_Libraries/Histogram/Histogram.h"

#define MAX_UINT64 0xFFFFFFFFFFFFFFFF

//A MallocProlog is a head element if the HigherInMem variable is NULL
//A Chunk is free if the prevChunkInFreeList variable is NULL

/*
 * This calculates the container which fits the given size.
 */
inline
uint32 getContainer(size_t size)
{
    return (log2(size)-LOG128)/LOG54;
}

/*
 * Removes the first chunk of a freeList
 * The chunk is removed but not set as free. There is no check if
 * the free list is empty, so make sure this is not the case.
 */
inline
MallocProlog *removeChunk(MallocArrays* freeLists, uint32 containerIdx)
{
    MallocProlog** container = &freeLists->bigChunks[containerIdx];
    MallocProlog*  removedChunk = *container;
    *container = removedChunk->nextChunkInFreeList;
    
    if(removedChunk->nextChunkInFreeList)
        removedChunk->nextChunkInFreeList->prevChunkInFreeList = 
                (MallocProlog*)container;
    
    if(*container == NULL)
    {
       if(containerIdx < 64)
           freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); 
       else
           freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64));
    }
    
    return removedChunk;
}

/*
 * Removes the first chunk of a freeList
 * The chunk is removed but not set as free. There is no check if
 * the free list is empty, so make sure this is not the case.
 */
inline
MallocProlog *removeSmallChunk(MallocArrays* freeLists, uint32 containerIdx)
{
    MallocProlog** container = &freeLists->smallChunks[containerIdx];
    MallocProlog*  removedChunk = *container;
    *container = removedChunk->nextChunkInFreeList;
    
    if(removedChunk->nextChunkInFreeList)
        removedChunk->nextChunkInFreeList->prevChunkInFreeList = 
                (MallocProlog*)container;
    
    return removedChunk;
}

inline
size_t getChunkSize(MallocProlog* chunk)
{
    return (uintptr_t)chunk->nextHigherInMem -
            (uintptr_t)chunk - sizeof(MallocProlog);
}

/*
 * Removes a chunk from a free list.
 */
inline
void extractChunk(MallocProlog* chunk, MallocArrays *freeLists)
{
   chunk->prevChunkInFreeList->nextChunkInFreeList = chunk->nextChunkInFreeList;
   if(chunk->nextChunkInFreeList)
       chunk->nextChunkInFreeList->prevChunkInFreeList = chunk->prevChunkInFreeList;
   
   //The last element in the list points to the container. If the container points
   //to NULL the container is empty
   if(*((void**)(chunk->prevChunkInFreeList)) == NULL && getChunkSize(chunk) >= BIG_LOWER_BOUND)
   {
       //Find the approppiate container because we do not know it
       uint64 containerIdx = ((uintptr_t)chunk->prevChunkInFreeList - (uintptr_t)freeLists->bigChunks) >> 3;
       if(containerIdx < (uint32)64)
           freeLists->bigChunksSearchVector[0] &= ~((uint64)1 << containerIdx); 
       if(containerIdx < 128 && containerIdx >=64)
           freeLists->bigChunksSearchVector[1] &= ~((uint64)1 << (containerIdx-64)); 
       
   }
}

/*
 * Merges two chunks.
 * Chunk A has to be before chunk B in memory. Both have to be removed from
 * a free list
 */
inline
MallocProlog *mergeChunks(MallocProlog* chunkA, MallocProlog* chunkB)
{
    chunkA->nextHigherInMem = chunkB->nextHigherInMem;
    chunkB->nextHigherInMem->nextLowerInMem = chunkA;
    return chunkA;
}
/*
 * Inserts a chunk into a free list.
 */
inline
void insertChunk(MallocProlog* chunk, MallocProlog** container)
{
    chunk->nextChunkInFreeList = *container;
    chunk->prevChunkInFreeList = (MallocProlog*)container;
    if(*container)
        (*container)->prevChunkInFreeList = chunk;
    *container = chunk;
}

/*
 * Divides the chunk that a new chunk of newSize is created.
 * There is no size check, so make sure the size value is valid.
 */
inline
MallocProlog *divideChunk(MallocProlog* chunk, size_t newSize)
{
    MallocProlog* newChunk = (MallocProlog*)((uintptr_t)chunk->nextHigherInMem -
            newSize - sizeof(MallocProlog));
    
    newChunk->nextLowerInMem  = chunk;
    newChunk->nextHigherInMem = chunk->nextHigherInMem;
    
    chunk->nextHigherInMem->nextLowerInMem = newChunk;
    chunk->nextHigherInMem = newChunk;
    
    return newChunk;
}

/* 
 * Search for chunk in the list of big chunks. Split the block if it's too big
 */
inline
MallocProlog *searchChunk(MallocArrays *freeLists, size_t sizeRequested, uint32 containerIdx)
{
    MallocProlog* foundChunk;
    
    uint64 searchVector = freeLists->bigChunksSearchVector[0];
    //set small chunk bits to zero
    searchVector &= MAX_UINT64 << containerIdx;
    containerIdx = __builtin_ffsl(searchVector);

    if(containerIdx == 0)
    {
       searchVector = freeLists->bigChunksSearchVector[1];
       containerIdx = __builtin_ffsl(searchVector);
       if(containerIdx == 0)
       {
           printf("VMS malloc failed: low memory");
           exit(1);   
       }
       containerIdx += 64;
    }
    containerIdx--;
    

    foundChunk = removeChunk(freeLists, containerIdx);
    size_t chunkSize     = getChunkSize(foundChunk);

    //If the new chunk is larger than the requested size: split
    if(chunkSize > sizeRequested + 2 * sizeof(MallocProlog) + BIG_LOWER_BOUND)
    {
       MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested);
       containerIdx = getContainer(getChunkSize(foundChunk)) - 1;
       insertChunk(foundChunk,&freeLists->bigChunks[containerIdx]);
       if(containerIdx < 64)
           freeLists->bigChunksSearchVector[0] |= ((uint64)1 << containerIdx);
       else
           freeLists->bigChunksSearchVector[1] |= ((uint64)1 << (containerIdx-64));
       foundChunk = newChunk;
    } 
    
    return foundChunk;
}


/*
 * This is sequential code, meant to only be called from the Master, not from
 * any slave Slvs.
 */
void *VMS_int__malloc( size_t sizeRequested )
 {     
   //============================= MEASUREMENT STUFF ========================
   #ifdef MEAS__TIME_MALLOC
   int32 startStamp, endStamp;
   saveLowTimeStampCountInto( startStamp );
   #endif
   //========================================================================
   
   MallocArrays* freeLists = _VMSMasterEnv->freeLists;
   MallocProlog* foundChunk;
   
   //Return a small chunk if the requested size is smaller than 128B
   if(sizeRequested <= LOWER_BOUND)
   {
       uint32 freeListIdx = (sizeRequested-1)/SMALL_CHUNK_SIZE;
       if(freeLists->smallChunks[freeListIdx] == NULL)
           foundChunk = searchChunk(freeLists, SMALL_CHUNK_SIZE*(freeListIdx+1), 0);
       else
           foundChunk = removeSmallChunk(freeLists, freeListIdx);
       
       //Mark as allocated
       foundChunk->prevChunkInFreeList = NULL;      
       return foundChunk + 1;
   }
   
   //Calculate the expected container. Start one higher to have a Chunk that's
   //always big enough.
   uint32 containerIdx = getContainer(sizeRequested);
   
   if(freeLists->bigChunks[containerIdx] == NULL)
       foundChunk = searchChunk(freeLists, sizeRequested, containerIdx); 
   else
       foundChunk = removeChunk(freeLists, containerIdx); 
   
   //Mark as allocated
   foundChunk->prevChunkInFreeList = NULL;      
   
   //============================= MEASUREMENT STUFF ========================
   #ifdef MEAS__TIME_MALLOC
   saveLowTimeStampCountInto( endStamp );
   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
   #endif
   //========================================================================
   
   //skip over the prolog by adding its size to the pointer return
   return foundChunk + 1;
 }

/*
 * This is sequential code, meant to only be called from the Master, not from
 * any slave Slvs.
 */
void
VMS_int__free( void *ptrToFree )
 {
    
         MEAS__Capture_Pre_Free_Point;
         
   MallocArrays* freeLists = _VMSMasterEnv->freeLists;
   MallocProlog *chunkToFree = (MallocProlog*)ptrToFree - 1;
   uint32 containerIdx;
   
   //Check for free neighbors
   if(chunkToFree->nextLowerInMem)
   {
       if(chunkToFree->nextLowerInMem->prevChunkInFreeList != NULL)
       {//Chunk is not allocated
           extractChunk(chunkToFree->nextLowerInMem, freeLists);
           chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree);
       }
   }
   if(chunkToFree->nextHigherInMem)
   {
       if(chunkToFree->nextHigherInMem->prevChunkInFreeList != NULL)
       {//Chunk is not allocated
           extractChunk(chunkToFree->nextHigherInMem, freeLists);
           chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem);
       }
   }
   
   size_t chunkSize = getChunkSize(chunkToFree);
   if(chunkSize < BIG_LOWER_BOUND)
   {
       containerIdx =  (chunkSize/SMALL_CHUNK_SIZE)-1;
       if(containerIdx > SMALL_CHUNK_COUNT-1)
           containerIdx = SMALL_CHUNK_COUNT-1;
       insertChunk(chunkToFree, &freeLists->smallChunks[containerIdx]);
   }
   else
   {
       containerIdx = getContainer(getChunkSize(chunkToFree)) - 1;
       insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]);
       if(containerIdx < 64)
           freeLists->bigChunksSearchVector[0] |= (uint64)1 << containerIdx;
       else
           freeLists->bigChunksSearchVector[1] |= (uint64)1 << (containerIdx-64);
   }   
   
         MEAS__Capture_Post_Free_Point;
 }

/*
 * Designed to be called from the main thread outside of VMS, during init
 */
MallocArrays *
VMS_ext__create_free_list()
{     
   //Initialize containers for small chunks and fill with zeros
   _VMSMasterEnv->freeLists = (MallocArrays*)malloc( sizeof(MallocArrays) );
   MallocArrays *freeLists = _VMSMasterEnv->freeLists;
   
   freeLists->smallChunks = 
           (MallocProlog**)malloc(SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
   memset((void*)freeLists->smallChunks,
           0,SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
   
   //Calculate number of containers for big chunks
   uint32 container = getContainer(MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE)+1;
   freeLists->bigChunks = (MallocProlog**)malloc(container*sizeof(MallocProlog*));
   memset((void*)freeLists->bigChunks,0,container*sizeof(MallocProlog*));
   freeLists->containerCount = container;
   
   //Create first element in lastContainer 
   MallocProlog *firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
   if( firstChunk == NULL ) {printf("Can't allocate initial memory\n"); exit(1);}
   freeLists->memSpace = firstChunk;
   
   //Touch memory to avoid page faults
   void *ptr,*endPtr; 
   endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE;
   for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE)
   {
       *(char*)ptr = 0;
   }
   
   firstChunk->nextLowerInMem = NULL;
   firstChunk->nextHigherInMem = (MallocProlog*)((uintptr_t)firstChunk +
                        MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE - sizeof(MallocProlog));
   firstChunk->nextChunkInFreeList = NULL;
   //previous element in the queue is the container
   firstChunk->prevChunkInFreeList = &freeLists->bigChunks[container-2];
   
   freeLists->bigChunks[container-2] = firstChunk;
   //Insert into bit search list
   if(container <= 65)
   {
       freeLists->bigChunksSearchVector[0] = ((uint64)1 << (container-2));
       freeLists->bigChunksSearchVector[1] = 0;
   }   
   else
   {
       freeLists->bigChunksSearchVector[0] = 0;
       freeLists->bigChunksSearchVector[1] = ((uint64)1 << (container-66));
   }
   
   //Create dummy chunk to mark the top of stack this is of course
   //never freed
   MallocProlog *dummyChunk = firstChunk->nextHigherInMem;
   dummyChunk->nextHigherInMem = dummyChunk+1;
   dummyChunk->nextLowerInMem  = NULL;
   dummyChunk->nextChunkInFreeList = NULL;
   dummyChunk->prevChunkInFreeList = NULL;
   
   return freeLists;
 }


/*Designed to be called from the main thread outside of VMS, during cleanup
 */
void
VMS_ext__free_free_list( MallocArrays *freeLists )
 {    
   free(freeLists->memSpace);
   free(freeLists->bigChunks);
   free(freeLists->smallChunks);
   
 }

