/*
 *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
 *  Licensed under GNU General Public License version 2
 *
 * Author: seanhalle@yahoo.com
 *
 * Created on November 14, 2009, 9:07 PM
 */

#include <malloc.h>
#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>

#include "VMS.h"
#include "Histogram/Histogram.h"

/*Helper function
 *Insert a newly generated free chunk into the first spot on the free list.
 * The chunk is cast as a MallocProlog, so the various pointers in it are
 * accessed with C's help -- and the size of the prolog is easily added to
 * the pointer when a chunk is returned to the app -- so C handles changes
 * in pointer sizes among machines.
 *
 *The list head is a normal MallocProlog struct -- identified by its
 * prevChunkInFreeList being NULL -- the only one.
 *
 *The end of the list is identified by next chunk being NULL, as usual.
 */
void inline
add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead )
 { 
   chunk->nextChunkInFreeList     = listHead->nextChunkInFreeList;
   if( chunk->nextChunkInFreeList != NULL ) //if not last in free list
      chunk->nextChunkInFreeList->prevChunkInFreeList = chunk;
   chunk->prevChunkInFreeList     = listHead;
   listHead->nextChunkInFreeList  = chunk;
 }


/*This is sequential code, meant to only be called from the Master, not from
 * any slave VPs.
 *Search down list, checking size by the nextHigherInMem pointer, to find
 * first chunk bigger than size needed.
 *Shave off the extra and make it into a new free-list element, hook it in
 * then return the address of the found element plus size of prolog.
 *
 *Will find a
 */
void *VMS__malloc( size_t sizeRequested )
 { MallocProlog *foundElem = NULL, *currElem, *newElem;
   ssize_t        amountExtra, sizeConsumed,sizeOfFound;
   uint32        foundElemIsTopOfHeap;

   //============================= MEASUREMENT STUFF ========================
   #ifdef MEAS__TIME_MALLOC
   int32 startStamp, endStamp;
   saveLowTimeStampCountInto( startStamp );
   #endif
   //========================================================================
   
      //step up the size to be aligned at 16-byte boundary, prob better ways
   sizeRequested = (sizeRequested + 16) & ~15;
   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;

   while( currElem != NULL )
    {    //check if size of currElem is big enough
      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
      if( amountExtra > 0 )
       {    //found it, get out of loop
         foundElem = currElem;
         currElem = NULL;
       }
      else
         currElem = currElem->nextChunkInFreeList;
    }
   
   if( foundElem == NULL )
    { ERROR("\nmalloc failed\n")
      return (void *)NULL;  //indicates malloc failed
    }
      //Using a kludge to identify the element that is the top chunk in the
      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
      // save addr of start of heap in head's nextLowerInMem
      //Will handle top of Heap specially
   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
                          _VMSMasterEnv->freeListHead->nextHigherInMem;
   
      //before shave off and try to insert new elem, remove found elem
      //note, foundElem will never be the head, so always has valid prevChunk
   foundElem->prevChunkInFreeList->nextChunkInFreeList =
                                              foundElem->nextChunkInFreeList;
   if( foundElem->nextChunkInFreeList != NULL )
    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
                                              foundElem->prevChunkInFreeList;
    }
   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
   
      //if enough, turn extra into new elem & insert it
   if( amountExtra > 64 )
    {   //make new elem by adding to addr of curr elem then casting
        sizeConsumed = sizeof(MallocProlog) + sizeRequested; 
        newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
        newElem->nextLowerInMem    = foundElem; //This is evil (but why?) 
        newElem->nextHigherInMem   = foundElem->nextHigherInMem; //This is evil (but why?)
        foundElem->nextHigherInMem = newElem;
        if( ! foundElemIsTopOfHeap )
        {  //there is no next higher for top of heap, so can't write to it
           newElem->nextHigherInMem->nextLowerInMem = newElem;
        }
        add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
    }
   else
    {
      sizeConsumed = sizeOfFound;
    }
  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;

   //============================= MEASUREMENT STUFF ========================
   #ifdef MEAS__TIME_MALLOC
   saveLowTimeStampCountInto( endStamp );
   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
   #endif
   //========================================================================

      //skip over the prolog by adding its size to the pointer return
   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
 }

/*This is sequential code, meant to only be called from the Master, not from
 * any slave VPs.
 *Search down list, checking size by the nextHigherInMem pointer, to find
 * first chunk bigger than size needed.
 *Shave off the extra and make it into a new free-list element, hook it in
 * then return the address of the found element plus size of prolog.
 *
 * The difference to the regular malloc is, that all the allocated chunks are
 * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk
 * before the aligned chunk.
 */
void *VMS__malloc_aligned( size_t sizeRequested )
 { MallocProlog *foundElem = NULL, *currElem, *newElem;
   ssize_t        amountExtra, sizeConsumed,sizeOfFound,prevAmount;
   uint32        foundElemIsTopOfHeap;

   //============================= MEASUREMENT STUFF ========================
   #ifdef MEAS__TIME_MALLOC
   uint32 startStamp, endStamp;
   saveLowTimeStampCountInto( startStamp );
   #endif
   //========================================================================
   
      //step up the size to be multiple of the cache line size
   sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1);
   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;

   while( currElem != NULL )
    {    //check if size of currElem is big enough
      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
      if( amountExtra > 0 )
       {    
         //look if the found element is already aligned
         if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){
             //found it, get out of loop
             foundElem = currElem;
             break;
         }else{
             //find first aligned address and check if it's still big enough
             //check also if the space before the aligned address is big enough
             //for a new element
             void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1)));
             prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem;
             sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog);
             amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog);
             if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){
                 //found suitable element
                 //create new previous element and exit loop
                 MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1;
                 
                 //insert new element into free list
                 if(currElem->nextChunkInFreeList != NULL)
                     currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem;                     
                 newAlignedElem->prevChunkInFreeList = currElem;
                 newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList;
                 currElem->nextChunkInFreeList = newAlignedElem;
                 
                 //set higherInMem and lowerInMem
                 newAlignedElem->nextHigherInMem = currElem->nextHigherInMem;
                 foundElemIsTopOfHeap = currElem->nextHigherInMem ==
                          _VMSMasterEnv->freeListHead->nextHigherInMem;
                 if(!foundElemIsTopOfHeap)
                     currElem->nextHigherInMem->nextLowerInMem = newAlignedElem;
                 currElem->nextHigherInMem = newAlignedElem;
                 newAlignedElem->nextLowerInMem = currElem;
                 
                 //Found new element leaving loop
                 foundElem = newAlignedElem;
                 break;
             }
         }
         
       }
       currElem = currElem->nextChunkInFreeList;
    }

   if( foundElem == NULL )
    { ERROR("\nmalloc failed\n")
      return (void *)NULL;  //indicates malloc failed
    }
      //Using a kludge to identify the element that is the top chunk in the
      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
      // save addr of start of heap in head's nextLowerInMem
      //Will handle top of Heap specially
   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
                          _VMSMasterEnv->freeListHead->nextHigherInMem;

      //before shave off and try to insert new elem, remove found elem
      //note, foundElem will never be the head, so always has valid prevChunk
   foundElem->prevChunkInFreeList->nextChunkInFreeList =
                                              foundElem->nextChunkInFreeList;
   if( foundElem->nextChunkInFreeList != NULL )
    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
                                              foundElem->prevChunkInFreeList;
    }
   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
   
      //if enough, turn extra into new elem & insert it
   if( amountExtra > 64 )
    {    //make new elem by adding to addr of curr elem then casting
      sizeConsumed = sizeof(MallocProlog) + sizeRequested;
      newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
      newElem->nextHigherInMem   = foundElem->nextHigherInMem;
      newElem->nextLowerInMem    = foundElem;
      foundElem->nextHigherInMem = newElem;
      
      if( ! foundElemIsTopOfHeap )
       {    //there is no next higher for top of heap, so can't write to it
         newElem->nextHigherInMem->nextLowerInMem = newElem;
       }
      add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
    }
   else
    {
      sizeConsumed = sizeOfFound;
    }
  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;

   //============================= MEASUREMENT STUFF ========================
   #ifdef MEAS__TIME_MALLOC
   saveLowTimeStampCountInto( endStamp );
   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
   #endif
   //========================================================================

      //skip over the prolog by adding its size to the pointer return
   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
 }


/*This is sequential code -- only to be called from the Master
 * When free, subtract the size of prolog from pointer, then cast it to a
 * MallocProlog.  Then check the nextLower and nextHigher chunks to see if
 * one or both are also free, and coalesce if so, and if neither free, then
 * add this one to free-list.
 */
void
VMS__free( void *ptrToFree )
 { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem;
   size_t         sizeOfElem;
   uint32         lowerExistsAndIsFree, higherExistsAndIsFree;

   //============================= MEASUREMENT STUFF ========================
   #ifdef MEAS__TIME_MALLOC
   int32 startStamp, endStamp;
   saveLowTimeStampCountInto( startStamp );
   #endif
   //========================================================================

   if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem ||
       ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem )
    {    //outside the range of data owned by VMS's malloc, so do nothing
      return;
    }
      //subtract size of prolog to get pointer to prolog, then cast
   elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog));
   sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree);

   if( elemToFree->prevChunkInFreeList != NULL )
    { printf( "error: freeing same element twice!" ); exit(1);
    }

   _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem;

   nextLowerElem  = elemToFree->nextLowerInMem;
   nextHigherElem = elemToFree->nextHigherInMem;

   if( nextHigherElem == NULL )
      higherExistsAndIsFree = FALSE;
   else //okay exists, now check if in the free-list by checking back ptr
      higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL);
    
   if( nextLowerElem == NULL )
      lowerExistsAndIsFree = FALSE;
   else //okay, it exists, now check if it's free
      lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL);
    

      //now, know what exists and what's free
   if( lowerExistsAndIsFree )
    { if( higherExistsAndIsFree )
       {    //both exist and are free, so coalesce all three
            //First, remove higher from free-list
         nextHigherElem->prevChunkInFreeList->nextChunkInFreeList =
                                         nextHigherElem->nextChunkInFreeList;
         if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list?
            nextHigherElem->nextChunkInFreeList->prevChunkInFreeList =
                                         nextHigherElem->prevChunkInFreeList;
            //Now, fix-up sequence-in-mem list -- by side-effect, this also
            // changes size of the lower elem, which is still in free-list
         nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem;
         if( nextHigherElem->nextHigherInMem !=
             _VMSMasterEnv->freeListHead->nextHigherInMem )
            nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem;
            //notice didn't do anything to elemToFree -- it simply is no
            // longer reachable from any of the lists.  Wonder if could be a
            // security leak because left valid addresses in it,
            // but don't care for now.
       }
      else
       {    //lower is the only of the two that exists and is free,
            //In this case, no adjustment to free-list, just change mem-list.
            // By side-effect, changes size of the lower elem
         nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem;
         if( elemToFree->nextHigherInMem !=
             _VMSMasterEnv->freeListHead->nextHigherInMem )
            elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem;
       }
    }
   else
    {    //lower either doesn't exist or isn't free, so check higher
      if( higherExistsAndIsFree )
       {    //higher exists and is the only of the two free
            //First, in free-list, replace higher elem with the one to free
         elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList;
         elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList;
         elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree;
         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
            //Now chg mem-list. By side-effect, changes size of elemToFree
         elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem;
         if( elemToFree->nextHigherInMem !=
             _VMSMasterEnv->freeListHead->nextHigherInMem )
            elemToFree->nextHigherInMem->nextLowerInMem = elemToFree;
       }
      else
       {    //neither lower nor higher is availabe to coalesce so add to list
            // this makes prev chunk ptr non-null, which indicates it's free
         elemToFree->nextChunkInFreeList =
                            _VMSMasterEnv->freeListHead->nextChunkInFreeList;
         _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree;
         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
         elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead;
       }
    }
   //============================= MEASUREMENT STUFF ========================
   #ifdef MEAS__TIME_MALLOC
   saveLowTimeStampCountInto( endStamp );
   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist );
   #endif
   //========================================================================

 }


/*Allocates memory from the external system -- higher overhead
 *
 *Because of Linux's malloc throwing bizarre random faults when malloc is
 * used inside a VMS virtual processor, have to pass this as a request and
 * have the core loop do it when it gets around to it -- will look for these
 * chores leftover from the previous animation of masterVP the next time it
 * goes to animate the masterVP -- so it takes two separate masterVP
 * animations, separated by work, to complete an external malloc or
 * external free request.
 *
 *Thinking core loop accepts signals -- just looks if signal-location is
 * empty or not --
 */
void *
VMS__malloc_in_ext( size_t sizeRequested )
 {
 /*
      //This is running in the master, so no chance for multiple cores to be
      // competing for the core's flag.
   if(  *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 )
    {    //something has already signalled to core loop, so save the signal
         // and look, next time master animated, to see if can send it.
         //Note, the addr to put a signal is in the coreloop's frame, so just
         // checks it each time through -- make it volatile to avoid GCC
         // optimizations -- it's a coreloop local var that only changes
         // after jumping away.  The signal includes the addr to send the
         //return to -- even if just empty return completion-signal
         //
         //save the signal in some queue that the master looks at each time
         // it starts up -- one loc says if empty for fast common case --
         //something like that -- want to hide this inside this call -- but
         // think this has to come as a request -- req handler gives procr
         // back to master loop, which gives it back to req handler at point
         // it sees that core loop has sent return signal.  Something like
         // that.
      saveTheSignal

    }
  coreSigData->type = malloc;
  coreSigData->sizeToMalloc = sizeRequested;
  coreSigData->locToSignalCompletion = &figureOut;
   _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData;
  */
      //just risk system-stack faults until get this figured out
   return malloc( sizeRequested );
 }


/*Frees memory that was allocated in the external system -- higher overhead
 *
 *As noted in external malloc comment, this is clunky 'cause the free has
 * to be called in the core loop.
 */
void
VMS__free_in_ext( void *ptrToFree )
 {
      //just risk system-stack faults until get this figured out
   free( ptrToFree );

      //TODO: fix this -- so 
 }


/*Designed to be called from the main thread outside of VMS, during init
 */
MallocProlog *
VMS_ext__create_free_list()
 { MallocProlog *freeListHead, *firstChunk;

      //Note, this is running in the main thread -- all increases in malloc
      // mem and all frees of it must be done in this thread, with the
      // thread's original stack available
   freeListHead = malloc( sizeof(MallocProlog) );
   firstChunk   = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
   if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);}
   
   //Touch memory to avoid page faults
   void *ptr,*endPtr; 
   endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE;
   for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE)
   {
       *(char*)ptr = 0;
   }

   freeListHead->prevChunkInFreeList = NULL;
      //Use this addr to free the heap when cleanup
   freeListHead->nextLowerInMem      = firstChunk;
      //to identify top-of-heap elem, compare this addr to elem's next higher
   freeListHead->nextHigherInMem     = (void*)( (uintptr_t)firstChunk +
                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
   freeListHead->nextChunkInFreeList = firstChunk;

   firstChunk->nextChunkInFreeList   = NULL;
   firstChunk->prevChunkInFreeList   = freeListHead;
      //next Higher has to be set to top of chunk, so can calc size in malloc
   firstChunk->nextHigherInMem       = (void*)( (uintptr_t)firstChunk +
                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
   firstChunk->nextLowerInMem        = NULL; //identifies as bott of heap
   
   _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet

   return freeListHead;
 }


/*Designed to be called from the main thread outside of VMS, during cleanup
 */
void
VMS_ext__free_free_list( MallocProlog *freeListHead )
 {    
      //stashed a ptr to the one and only bug chunk malloc'd from OS in the
      // free list head's next lower in mem pointer
   free( freeListHead->nextLowerInMem );

   //don't free the head -- it'll be in an array eventually -- free whole
   // array when all the free lists linked from it have already been freed
 }

