Mercurial > cgi-bin > hgwebdir.cgi > VMS > VMS_Implementations > VMS_impls > VMS__MC_shared_impl

changeset 178:c1784868dcea
testing hgeol -- see if it fixes line-ending issues -- commit line endings
author: Me@portablequad
date: Wed, 04 Jan 2012 16:10:11 -0800
parents: ad8213a8e916
children: 0cadabf64cfa
files: .hgeol CoreLoop.c MasterLoop.c ProcrContext.h VMS.h VMS_primitive_data_types.h probes.h vmalloc.c vmalloc.h vutilities.c vutilities.h
diffstat: 11 files changed, 2060 insertions(+), 2048 deletions(-) [+]
[-]

.hgeol 12

CoreLoop.c 430

MasterLoop.c 746

ProcrContext.h 66

VMS.h 1158

VMS_primitive_data_types.h 106

probes.h 390

vmalloc.c 990

vmalloc.h 120

vutilities.c 50

vutilities.h 40 .hgeol 12 CoreLoop.c 430 MasterLoop.c 746 ProcrContext.h 66 VMS.h 1158 VMS_primitive_data_types.h 106 probes.h 390 vmalloc.c 990 vmalloc.h 120 vutilities.c 50 vutilities.h 40
.hgeol 12
CoreLoop.c 430
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/.hgeol	Wed Jan 04 16:10:11 2012 -0800
     1.3 @@ -0,0 +1,12 @@
     1.4 +
     1.5 +[patterns]
     1.6 +**.py = native
     1.7 +**.txt = native
     1.8 +**.c = native
     1.9 +**.h = native
    1.10 +**.cpp = native
    1.11 +**.java = native
    1.12 +**.sh = native
    1.13 +**.pl = native
    1.14 +**.jpg = bin
    1.15 +**.gif = bin

     2.1 --- a/CoreLoop.c	Thu Oct 06 16:24:17 2011 +0200
     2.2 +++ b/CoreLoop.c	Wed Jan 04 16:10:11 2012 -0800
     2.3 @@ -1,215 +1,215 @@
     2.4 -/*
     2.5 - * Copyright 2010  OpenSourceStewardshipFoundation
     2.6 - *
     2.7 - * Licensed under BSD
     2.8 - */
     2.9 -
    2.10 -
    2.11 -#include "VMS.h"
    2.12 -#include "Queue_impl/BlockingQueue.h"
    2.13 -#include "ProcrContext.h"
    2.14 -
    2.15 -#include <stdlib.h>
    2.16 -#include <stdio.h>
    2.17 -#include <time.h>
    2.18 -
    2.19 -#include <pthread.h>
    2.20 -#include <sched.h>
    2.21 -
    2.22 -void *terminateCoreLoop(VirtProcr *currPr);
    2.23 -
    2.24 -/*This is the loop that runs in the OS Thread pinned to each core
    2.25 - *Get virt procr from queue,
    2.26 - * save state of current animator, then load in state of virt procr, using
    2.27 - * jmp instr to switch the program-counter state -- making the virt procr
    2.28 - * the new animator.
    2.29 - *At some point, the virt procr will suspend itself by saving out its
    2.30 - * animator state (stack ptr, frame ptr, program counter) and switching
    2.31 - * back to the OS Thread's animator state, which means restoring the
    2.32 - * stack and frame and jumping to the core loop start point.
    2.33 - *This cycle then repeats, until a special shutdown virtual processor is
    2.34 - * animated, which jumps to the end point at the bottom of core loop.
    2.35 - */
    2.36 -void *
    2.37 -coreLoop( void *paramsIn )
    2.38 - { 
    2.39 -   ThdParams      *coreLoopThdParams;
    2.40 -   int             thisCoresIdx;
    2.41 -   VirtProcr      *currPr;
    2.42 -   VMSQueueStruc *readyToAnimateQ;
    2.43 -   cpu_set_t   coreMask;  //has 1 in bit positions of allowed cores
    2.44 -   int             errorCode;
    2.45 -
    2.46 -      //work-stealing struc on stack to prevent false-sharing in cache-line
    2.47 -   volatile GateStruc gate;
    2.48 -   //preGateProgress, waitProgress, exitProgress, gateClosed;
    2.49 -
    2.50 -
    2.51 -   coreLoopThdParams = (ThdParams *)paramsIn;
    2.52 -   thisCoresIdx = coreLoopThdParams->coreNum;
    2.53 -
    2.54 -   gate.gateClosed      = FALSE;
    2.55 -   gate.preGateProgress = 0;
    2.56 -   gate.waitProgress    = 0;
    2.57 -   gate.exitProgress    = 0;
    2.58 -   _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;//race @startup
    2.59 -
    2.60 -      //wait until signalled that setup is complete
    2.61 -   pthread_mutex_lock(   &suspendLock );
    2.62 -   while( !(_VMSMasterEnv->setupComplete) )
    2.63 -    {
    2.64 -      pthread_cond_wait( &suspend_cond,
    2.65 -                         &suspendLock );
    2.66 -    }
    2.67 -   pthread_mutex_unlock( &suspendLock );
    2.68 -
    2.69 -      //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum );
    2.70 -
    2.71 -      //set thread affinity
    2.72 -      //Linux requires pinning thd to core inside thread-function
    2.73 -      //Designate a core by a 1 in bit-position corresponding to the core
    2.74 -   CPU_ZERO(&coreMask);
    2.75 -   CPU_SET(coreLoopThdParams->coreNum,&coreMask);
    2.76 -   //coreMask = 1L << coreLoopThdParams->coreNum;
    2.77 -
    2.78 -   pthread_t selfThd = pthread_self();
    2.79 -   errorCode =
    2.80 -   pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask);
    2.81 -   
    2.82 -   if(errorCode){ printf("\nset affinity failure\n"); exit(0); }
    2.83 -
    2.84 -   
    2.85 -   //Save the return address in the SwitchVP function
    2.86 -   saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt));
    2.87 -
    2.88 -   
    2.89 -   while(1){
    2.90 -   
    2.91 -      //Get virtual processor from queue
    2.92 -      //The Q must be a global, static volatile var, so not kept in reg,
    2.93 -      // which forces reloading the pointer after each jmp to this point
    2.94 -   readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
    2.95 -
    2.96 -   #ifdef USE_WORK_STEALING
    2.97 -      //Alg for work-stealing designed to make common case fast.  Comment
    2.98 -      // in stealer code explains.
    2.99 -   gate.preGateProgress++;
   2.100 -   if( gate.gateClosed )
   2.101 -    {    //now, set coreloop's progress, so stealer can see that core loop
   2.102 -         // has made it into the waiting area.
   2.103 -      gate.waitProgress = gate.preGateProgress;
   2.104 -      while( gate.gateClosed ) /*busy wait*/;
   2.105 -    }
   2.106 -
   2.107 -   currPr = (VirtProcr *) readVMSQ( readyToAnimateQ );
   2.108 -
   2.109 -      //Set the coreloop's progress, so stealer can see it has made it out
   2.110 -      // of the protected area
   2.111 -   gate.exitProgress = gate.preGateProgress;
   2.112 -   #else
   2.113 -   currPr = (VirtProcr *) readVMSQ( readyToAnimateQ );
   2.114 -   #endif
   2.115 -
   2.116 -   if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
   2.117 -   else
   2.118 -    {
   2.119 -      //============================= MEASUREMENT STUFF =====================
   2.120 -      #ifdef MEAS__TIME_MASTER_LOCK
   2.121 -      int32 startStamp, endStamp;
   2.122 -      saveLowTimeStampCountInto( startStamp );
   2.123 -      #endif
   2.124 -      //=====================================================================
   2.125 -      int tries = 0; int gotLock = 0;
   2.126 -      while( currPr == NULL ) //if queue was empty, enter get masterLock loop
   2.127 -       {    //queue was empty, so get master lock
   2.128 -
   2.129 -         gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock),
   2.130 -                                                          UNLOCKED, LOCKED );
   2.131 -         if( gotLock )
   2.132 -          {    //run own MasterVP -- jmps to coreLoops startPt when done
   2.133 -            currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
   2.134 -            if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 )
   2.135 -             {       DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n");
   2.136 -               pthread_yield();
   2.137 -             }
   2.138 -            _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
   2.139 -            break;  //end while -- have a VP to animate now
   2.140 -          }
   2.141 -
   2.142 -         tries++;      //if too many, means master on other core taking too long
   2.143 -         if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); }
   2.144 -       }
   2.145 -      //============================= MEASUREMENT STUFF =====================
   2.146 -      #ifdef MEAS__TIME_MASTER_LOCK
   2.147 -      saveLowTimeStampCountInto( endStamp );
   2.148 -      addIntervalToHist( startStamp, endStamp,
   2.149 -                         _VMSMasterEnv->masterLockLowTimeHist );
   2.150 -      addIntervalToHist( startStamp, endStamp,
   2.151 -                         _VMSMasterEnv->masterLockHighTimeHist );
   2.152 -      #endif
   2.153 -      //=====================================================================
   2.154 -
   2.155 -    }
   2.156 -
   2.157 -   
   2.158 -   switchToVP(currPr); //The VPs return in here
   2.159 -   flushRegisters();
   2.160 -   }//CoreLoop      
   2.161 - }
   2.162 -
   2.163 -
   2.164 -void *
   2.165 -terminateCoreLoop(VirtProcr *currPr){
   2.166 -   //first free shutdown VP that jumped here -- it first restores the
   2.167 -   // coreloop's stack, so addr of currPr in stack frame is still correct
   2.168 -   VMS__dissipate_procr( currPr );
   2.169 -   pthread_exit( NULL );
   2.170 -}
   2.171 -
   2.172 -
   2.173 -
   2.174 -#ifdef SEQUENTIAL
   2.175 -
   2.176 -//===========================================================================
   2.177 -/*This sequential version is exact same as threaded, except doesn't do the
   2.178 - * pin-threads part, nor the wait until setup complete part.
   2.179 - */
   2.180 -void *
   2.181 -coreLoop_Seq( void *paramsIn )
   2.182 - {
   2.183 -   VirtProcr      *currPr;
   2.184 -   VMSQueueStruc *readyToAnimateQ;
   2.185 -   
   2.186 -   ThdParams      *coreLoopThdParams;
   2.187 -   int             thisCoresIdx;
   2.188 -   
   2.189 -   coreLoopThdParams = (ThdParams *)paramsIn;
   2.190 -//   thisCoresIdx = coreLoopThdParams->coreNum;
   2.191 -   thisCoresIdx = 0;
   2.192 -
   2.193 -   //Save the return address in the SwitchVP function
   2.194 -   saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt));
   2.195 -
   2.196 -   
   2.197 -   while(1){
   2.198 -      //Get virtual processor from queue
   2.199 -      //_VMSWorkQ must be a global, static volatile var, so not kept in reg,
   2.200 -      // which forces reloading the pointer after each jmp to this point
   2.201 -   readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
   2.202 -   currPr = (VirtProcr *) readVMSQ( readyToAnimateQ );
   2.203 -   if( currPr == NULL )
   2.204 -    { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 )
   2.205 -       { printf("too many back to back MasterVP\n"); exit(1); }
   2.206 -      _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
   2.207 -      
   2.208 -      currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
   2.209 -    }
   2.210 -   else
   2.211 -      _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
   2.212 -
   2.213 -
   2.214 -   switchToVP( currPr );
   2.215 -   flushRegisters();
   2.216 -   }
   2.217 - }
   2.218 -#endif
   2.219 +/*
   2.220 + * Copyright 2010  OpenSourceStewardshipFoundation
   2.221 + *
   2.222 + * Licensed under BSD
   2.223 + */
   2.224 +
   2.225 +
   2.226 +#include "VMS.h"
   2.227 +#include "Queue_impl/BlockingQueue.h"
   2.228 +#include "ProcrContext.h"
   2.229 +
   2.230 +#include <stdlib.h>
   2.231 +#include <stdio.h>
   2.232 +#include <time.h>
   2.233 +
   2.234 +#include <pthread.h>
   2.235 +#include <sched.h>
   2.236 +
   2.237 +void *terminateCoreLoop(VirtProcr *currPr);
   2.238 +
   2.239 +/*This is the loop that runs in the OS Thread pinned to each core
   2.240 + *Get virt procr from queue,
   2.241 + * save state of current animator, then load in state of virt procr, using
   2.242 + * jmp instr to switch the program-counter state -- making the virt procr
   2.243 + * the new animator.
   2.244 + *At some point, the virt procr will suspend itself by saving out its
   2.245 + * animator state (stack ptr, frame ptr, program counter) and switching
   2.246 + * back to the OS Thread's animator state, which means restoring the
   2.247 + * stack and frame and jumping to the core loop start point.
   2.248 + *This cycle then repeats, until a special shutdown virtual processor is
   2.249 + * animated, which jumps to the end point at the bottom of core loop.
   2.250 + */
   2.251 +void *
   2.252 +coreLoop( void *paramsIn )
   2.253 + { 
   2.254 +   ThdParams      *coreLoopThdParams;
   2.255 +   int             thisCoresIdx;
   2.256 +   VirtProcr      *currPr;
   2.257 +   VMSQueueStruc *readyToAnimateQ;
   2.258 +   cpu_set_t   coreMask;  //has 1 in bit positions of allowed cores
   2.259 +   int             errorCode;
   2.260 +
   2.261 +      //work-stealing struc on stack to prevent false-sharing in cache-line
   2.262 +   volatile GateStruc gate;
   2.263 +   //preGateProgress, waitProgress, exitProgress, gateClosed;
   2.264 +
   2.265 +
   2.266 +   coreLoopThdParams = (ThdParams *)paramsIn;
   2.267 +   thisCoresIdx = coreLoopThdParams->coreNum;
   2.268 +
   2.269 +   gate.gateClosed      = FALSE;
   2.270 +   gate.preGateProgress = 0;
   2.271 +   gate.waitProgress    = 0;
   2.272 +   gate.exitProgress    = 0;
   2.273 +   _VMSMasterEnv->workStealingGates[ thisCoresIdx ] = (GateStruc*)&gate;//race @startup
   2.274 +
   2.275 +      //wait until signalled that setup is complete
   2.276 +   pthread_mutex_lock(   &suspendLock );
   2.277 +   while( !(_VMSMasterEnv->setupComplete) )
   2.278 +    {
   2.279 +      pthread_cond_wait( &suspend_cond,
   2.280 +                         &suspendLock );
   2.281 +    }
   2.282 +   pthread_mutex_unlock( &suspendLock );
   2.283 +
   2.284 +      //printf( "\nCore unsuspended: %d\n", coreLoopThdParams->coreNum );
   2.285 +
   2.286 +      //set thread affinity
   2.287 +      //Linux requires pinning thd to core inside thread-function
   2.288 +      //Designate a core by a 1 in bit-position corresponding to the core
   2.289 +   CPU_ZERO(&coreMask);
   2.290 +   CPU_SET(coreLoopThdParams->coreNum,&coreMask);
   2.291 +   //coreMask = 1L << coreLoopThdParams->coreNum;
   2.292 +
   2.293 +   pthread_t selfThd = pthread_self();
   2.294 +   errorCode =
   2.295 +   pthread_setaffinity_np( selfThd, sizeof(coreMask), &coreMask);
   2.296 +   
   2.297 +   if(errorCode){ printf("\nset affinity failure\n"); exit(0); }
   2.298 +
   2.299 +   
   2.300 +   //Save the return address in the SwitchVP function
   2.301 +   saveCoreLoopReturnAddr((void**)&(_VMSMasterEnv->coreLoopReturnPt));
   2.302 +
   2.303 +   
   2.304 +   while(1){
   2.305 +   
   2.306 +      //Get virtual processor from queue
   2.307 +      //The Q must be a global, static volatile var, so not kept in reg,
   2.308 +      // which forces reloading the pointer after each jmp to this point
   2.309 +   readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
   2.310 +
   2.311 +   #ifdef USE_WORK_STEALING
   2.312 +      //Alg for work-stealing designed to make common case fast.  Comment
   2.313 +      // in stealer code explains.
   2.314 +   gate.preGateProgress++;
   2.315 +   if( gate.gateClosed )
   2.316 +    {    //now, set coreloop's progress, so stealer can see that core loop
   2.317 +         // has made it into the waiting area.
   2.318 +      gate.waitProgress = gate.preGateProgress;
   2.319 +      while( gate.gateClosed ) /*busy wait*/;
   2.320 +    }
   2.321 +
   2.322 +   currPr = (VirtProcr *) readVMSQ( readyToAnimateQ );
   2.323 +
   2.324 +      //Set the coreloop's progress, so stealer can see it has made it out
   2.325 +      // of the protected area
   2.326 +   gate.exitProgress = gate.preGateProgress;
   2.327 +   #else
   2.328 +   currPr = (VirtProcr *) readVMSQ( readyToAnimateQ );
   2.329 +   #endif
   2.330 +
   2.331 +   if( currPr != NULL ) _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
   2.332 +   else
   2.333 +    {
   2.334 +      //============================= MEASUREMENT STUFF =====================
   2.335 +      #ifdef MEAS__TIME_MASTER_LOCK
   2.336 +      int32 startStamp, endStamp;
   2.337 +      saveLowTimeStampCountInto( startStamp );
   2.338 +      #endif
   2.339 +      //=====================================================================
   2.340 +      int tries = 0; int gotLock = 0;
   2.341 +      while( currPr == NULL ) //if queue was empty, enter get masterLock loop
   2.342 +       {    //queue was empty, so get master lock
   2.343 +
   2.344 +         gotLock = __sync_bool_compare_and_swap(&(_VMSMasterEnv->masterLock),
   2.345 +                                                          UNLOCKED, LOCKED );
   2.346 +         if( gotLock )
   2.347 +          {    //run own MasterVP -- jmps to coreLoops startPt when done
   2.348 +            currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
   2.349 +            if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 )
   2.350 +             {       DEBUG( dbgB2BMaster,"Many back to back MasterVPs\n");
   2.351 +               pthread_yield();
   2.352 +             }
   2.353 +            _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
   2.354 +            break;  //end while -- have a VP to animate now
   2.355 +          }
   2.356 +
   2.357 +         tries++;      //if too many, means master on other core taking too long
   2.358 +         if( tries > MASTERLOCK_RETRIES ) { tries = 0; pthread_yield(); }
   2.359 +       }
   2.360 +      //============================= MEASUREMENT STUFF =====================
   2.361 +      #ifdef MEAS__TIME_MASTER_LOCK
   2.362 +      saveLowTimeStampCountInto( endStamp );
   2.363 +      addIntervalToHist( startStamp, endStamp,
   2.364 +                         _VMSMasterEnv->masterLockLowTimeHist );
   2.365 +      addIntervalToHist( startStamp, endStamp,
   2.366 +                         _VMSMasterEnv->masterLockHighTimeHist );
   2.367 +      #endif
   2.368 +      //=====================================================================
   2.369 +
   2.370 +    }
   2.371 +
   2.372 +   
   2.373 +   switchToVP(currPr); //The VPs return in here
   2.374 +   flushRegisters();
   2.375 +   }//CoreLoop      
   2.376 + }
   2.377 +
   2.378 +
   2.379 +void *
   2.380 +terminateCoreLoop(VirtProcr *currPr){
   2.381 +   //first free shutdown VP that jumped here -- it first restores the
   2.382 +   // coreloop's stack, so addr of currPr in stack frame is still correct
   2.383 +   VMS__dissipate_procr( currPr );
   2.384 +   pthread_exit( NULL );
   2.385 +}
   2.386 +
   2.387 +
   2.388 +
   2.389 +#ifdef SEQUENTIAL
   2.390 +
   2.391 +//===========================================================================
   2.392 +/*This sequential version is exact same as threaded, except doesn't do the
   2.393 + * pin-threads part, nor the wait until setup complete part.
   2.394 + */
   2.395 +void *
   2.396 +coreLoop_Seq( void *paramsIn )
   2.397 + {
   2.398 +   VirtProcr      *currPr;
   2.399 +   VMSQueueStruc *readyToAnimateQ;
   2.400 +   
   2.401 +   ThdParams      *coreLoopThdParams;
   2.402 +   int             thisCoresIdx;
   2.403 +   
   2.404 +   coreLoopThdParams = (ThdParams *)paramsIn;
   2.405 +//   thisCoresIdx = coreLoopThdParams->coreNum;
   2.406 +   thisCoresIdx = 0;
   2.407 +
   2.408 +   //Save the return address in the SwitchVP function
   2.409 +   saveCoreLoopReturnAddr(&(_VMSMasterEnv->coreLoopReturnPt));
   2.410 +
   2.411 +   
   2.412 +   while(1){
   2.413 +      //Get virtual processor from queue
   2.414 +      //_VMSWorkQ must be a global, static volatile var, so not kept in reg,
   2.415 +      // which forces reloading the pointer after each jmp to this point
   2.416 +   readyToAnimateQ  = _VMSMasterEnv->readyToAnimateQs[thisCoresIdx];
   2.417 +   currPr = (VirtProcr *) readVMSQ( readyToAnimateQ );
   2.418 +   if( currPr == NULL )
   2.419 +    { if( _VMSMasterEnv->numMasterInARow[thisCoresIdx] > 1000 )
   2.420 +       { printf("too many back to back MasterVP\n"); exit(1); }
   2.421 +      _VMSMasterEnv->numMasterInARow[thisCoresIdx] += 1;
   2.422 +      
   2.423 +      currPr = _VMSMasterEnv->masterVPs[thisCoresIdx];
   2.424 +    }
   2.425 +   else
   2.426 +      _VMSMasterEnv->numMasterInARow[thisCoresIdx] = 0;
   2.427 +
   2.428 +
   2.429 +   switchToVP( currPr );
   2.430 +   flushRegisters();
   2.431 +   }
   2.432 + }
   2.433 +#endif

     3.1 --- a/MasterLoop.c	Thu Oct 06 16:24:17 2011 +0200
     3.2 +++ b/MasterLoop.c	Wed Jan 04 16:10:11 2012 -0800
     3.3 @@ -1,373 +1,373 @@
     3.4 -/*
     3.5 - * Copyright 2010  OpenSourceStewardshipFoundation
     3.6 - * 
     3.7 - * Licensed under BSD
     3.8 - */
     3.9 -
    3.10 -
    3.11 -
    3.12 -#include <stdio.h>
    3.13 -#include <stddef.h>
    3.14 -
    3.15 -#include "VMS.h"
    3.16 -#include "ProcrContext.h"
    3.17 -
    3.18 -
    3.19 -//===========================================================================
    3.20 -void inline
    3.21 -stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
    3.22 -               VirtProcr *masterPr );
    3.23 -
    3.24 -//===========================================================================
    3.25 -
    3.26 -
    3.27 -
    3.28 -/*This code is animated by the virtual Master processor.
    3.29 - *
    3.30 - *Polls each sched slot exactly once, hands any requests made by a newly
    3.31 - * done slave to the "request handler" plug-in function
    3.32 - *
    3.33 - *Any slots that need a virt procr assigned are given to the "schedule"
    3.34 - * plug-in function, which tries to assign a virt procr (slave) to it.
    3.35 - *
    3.36 - *When all slots needing a processor have been given to the schedule plug-in,
    3.37 - * a fraction of the procrs successfully scheduled are put into the
    3.38 - * work queue, then a continuation of this function is put in, then the rest
    3.39 - * of the virt procrs that were successfully scheduled.
    3.40 - *
    3.41 - *The first thing the continuation does is busy-wait until the previous
    3.42 - * animation completes.  This is because an (unlikely) continuation may
    3.43 - * sneak through queue before previous continuation is done putting second
    3.44 - * part of scheduled slaves in, which is the only race condition.
    3.45 - *
    3.46 - */
    3.47 -
    3.48 -/*May 29, 2010 -- birth a Master during init so that first core loop to
    3.49 - * start running gets it and does all the stuff for a newly born --
    3.50 - * from then on, will be doing continuation, but do suspension self
    3.51 - * directly at end of master loop
    3.52 - *So VMS__init just births the master virtual processor same way it births
    3.53 - * all the others -- then does any extra setup needed and puts it into the
    3.54 - * work queue.
    3.55 - *However means have to make masterEnv a global static volatile the same way
    3.56 - * did with readyToAnimateQ in core loop.  -- for performance, put the
    3.57 - * jump to the core loop directly in here, and have it directly jump back.
    3.58 - *
    3.59 - *
    3.60 - *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this
    3.61 - * avoids the suspected bug in the system stack that causes bizarre faults
    3.62 - * at random places in the system code.
    3.63 - *
    3.64 - *So, this function is coupled to each of the MasterVPs, -- meaning this
    3.65 - * function can't rely on a particular stack and frame -- each MasterVP that
    3.66 - * animates this function has a different one.
    3.67 - *
    3.68 - *At this point, the masterLoop does not write itself into the queue anymore,
    3.69 - * instead, the coreLoop acquires the masterLock when it has nothing to
    3.70 - * animate, and then animates its own masterLoop.  However, still try to put
    3.71 - * several AppVPs into the queue to amortize the startup cost of switching
    3.72 - * to the MasterVP.  Note, don't have to worry about latency of requests much
    3.73 - * because most requests generate work for same core -- only latency issue
    3.74 - * is case when other cores starved and one core's requests generate work
    3.75 - * for them -- so keep max in queue to 3 or 4..
    3.76 - */
    3.77 -void masterLoop( void *initData, VirtProcr *animatingPr )
    3.78 - { 
    3.79 -   int32           slotIdx, numSlotsFilled;
    3.80 -   VirtProcr      *schedVirtPr;
    3.81 -   SchedSlot      *currSlot, **schedSlots;
    3.82 -   MasterEnv      *masterEnv;
    3.83 -   VMSQueueStruc  *readyToAnimateQ;
    3.84 -   
    3.85 -   SlaveScheduler  slaveScheduler;
    3.86 -   RequestHandler  requestHandler;
    3.87 -   void           *semanticEnv;
    3.88 -
    3.89 -   int32           thisCoresIdx;
    3.90 -   VirtProcr      *masterPr;
    3.91 -   volatile        VirtProcr *volatileMasterPr;
    3.92 -   
    3.93 -   volatileMasterPr = animatingPr;
    3.94 -   masterPr         = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp
    3.95 -
    3.96 -      //First animation of each MasterVP will in turn animate this part
    3.97 -      // of setup code.. (VP creator sets up the stack as if this function
    3.98 -      // was called normally, but actually get here by jmp)
    3.99 -      //So, setup values about stack ptr, jmp pt and all that
   3.100 -   //masterPr->nextInstrPt = &&masterLoopStartPt;
   3.101 -
   3.102 -
   3.103 -      //Note, got rid of writing the stack and frame ptr up here, because
   3.104 -      // only one
   3.105 -      // core can ever animate a given MasterVP, so don't need to communicate
   3.106 -      // new frame and stack ptr to the MasterVP storage before a second
   3.107 -      // version of that MasterVP can get animated on a different core.
   3.108 -      //Also got rid of the busy-wait.
   3.109 -
   3.110 -   
   3.111 -   //masterLoopStartPt:
   3.112 -   while(1){
   3.113 -       
   3.114 -   //============================= MEASUREMENT STUFF ========================
   3.115 -   #ifdef MEAS__TIME_MASTER
   3.116 -      //Total Master time includes one coreloop time -- just assume the core
   3.117 -      // loop time is same for Master as for AppVPs, even though it may be
   3.118 -      // smaller due to higher predictability of the fixed jmp.
   3.119 -   saveLowTimeStampCountInto( masterPr->startMasterTSCLow );
   3.120 -   #endif
   3.121 -   //========================================================================
   3.122 -
   3.123 -   masterEnv        = (MasterEnv*)_VMSMasterEnv;
   3.124 -   
   3.125 -      //GCC may optimize so doesn't always re-define from frame-storage
   3.126 -   masterPr         = (VirtProcr*)volatileMasterPr;  //just to make sure after jmp
   3.127 -   thisCoresIdx     = masterPr->coreAnimatedBy;
   3.128 -   readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
   3.129 -   schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];
   3.130 -
   3.131 -   requestHandler   = masterEnv->requestHandler;
   3.132 -   slaveScheduler   = masterEnv->slaveScheduler;
   3.133 -   semanticEnv      = masterEnv->semanticEnv;
   3.134 -
   3.135 -
   3.136 -      //Poll each slot's Done flag
   3.137 -   numSlotsFilled = 0;
   3.138 -   for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++)
   3.139 -    {
   3.140 -      currSlot = schedSlots[ slotIdx ];
   3.141 -
   3.142 -      if( currSlot->workIsDone )
   3.143 -       {
   3.144 -         currSlot->workIsDone         = FALSE;
   3.145 -         currSlot->needsProcrAssigned = TRUE;
   3.146 -
   3.147 -            //process requests from slave to master
   3.148 -               //====================== MEASUREMENT STUFF ===================
   3.149 -               #ifdef MEAS__TIME_PLUGIN
   3.150 -               int32 startStamp1, endStamp1;
   3.151 -               saveLowTimeStampCountInto( startStamp1 );
   3.152 -               #endif
   3.153 -               //============================================================
   3.154 -         (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv );
   3.155 -               //====================== MEASUREMENT STUFF ===================
   3.156 -               #ifdef MEAS__TIME_PLUGIN
   3.157 -               saveLowTimeStampCountInto( endStamp1 );
   3.158 -               addIntervalToHist( startStamp1, endStamp1,
   3.159 -                                        _VMSMasterEnv->reqHdlrLowTimeHist );
   3.160 -               addIntervalToHist( startStamp1, endStamp1,
   3.161 -                                        _VMSMasterEnv->reqHdlrHighTimeHist );
   3.162 -               #endif
   3.163 -               //============================================================
   3.164 -       }
   3.165 -      if( currSlot->needsProcrAssigned )
   3.166 -       {    //give slot a new virt procr
   3.167 -         schedVirtPr =
   3.168 -          (*slaveScheduler)( semanticEnv, thisCoresIdx );
   3.169 -         
   3.170 -         if( schedVirtPr != NULL )
   3.171 -          { currSlot->procrAssignedToSlot = schedVirtPr;
   3.172 -            schedVirtPr->schedSlot        = currSlot;
   3.173 -            currSlot->needsProcrAssigned  = FALSE;
   3.174 -            numSlotsFilled               += 1;
   3.175 -            
   3.176 -            writeVMSQ( schedVirtPr, readyToAnimateQ );
   3.177 -          }
   3.178 -       }
   3.179 -    }
   3.180 -
   3.181 -   
   3.182 -   #ifdef USE_WORK_STEALING
   3.183 -      //If no slots filled, means no more work, look for work to steal.
   3.184 -   if( numSlotsFilled == 0 )
   3.185 -    { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr );
   3.186 -    }
   3.187 -   #endif
   3.188 -
   3.189 -   
   3.190 -   #ifdef MEAS__TIME_MASTER
   3.191 -   saveLowTimeStampCountInto( masterPr->endMasterTSCLow );
   3.192 -   #endif
   3.193 -
   3.194 -   masterSwitchToCoreLoop(animatingPr);
   3.195 -   flushRegisters();
   3.196 -   }//MasterLoop
   3.197 -
   3.198 -
   3.199 - }
   3.200 -
   3.201 -
   3.202 -
   3.203 -/*This has a race condition -- the coreloops are accessing their own queues
   3.204 - * at the same time that this work-stealer on a different core is trying to
   3.205 - */
   3.206 -void inline
   3.207 -stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
   3.208 -               VirtProcr *masterPr )
   3.209 - { 
   3.210 -   VirtProcr   *stolenPr;
   3.211 -   int32        coreIdx, i;
   3.212 -   VMSQueueStruc *currQ;
   3.213 -
   3.214 -   stolenPr = NULL;
   3.215 -   coreIdx = masterPr->coreAnimatedBy;
   3.216 -   for( i = 0; i < NUM_CORES -1; i++ )
   3.217 -    {
   3.218 -      if( coreIdx >= NUM_CORES -1 )
   3.219 -       { coreIdx = 0;
   3.220 -       }
   3.221 -      else
   3.222 -       { coreIdx++;
   3.223 -       }
   3.224 -      currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
   3.225 -      if( numInVMSQ( currQ ) > 0 )
   3.226 -       { stolenPr = readVMSQ (currQ );
   3.227 -         break;
   3.228 -       }
   3.229 -    }
   3.230 -
   3.231 -   if( stolenPr != NULL )
   3.232 -    { currSlot->procrAssignedToSlot = stolenPr;
   3.233 -      stolenPr->schedSlot           = currSlot;
   3.234 -      currSlot->needsProcrAssigned  = FALSE;
   3.235 -
   3.236 -      writeVMSQ( stolenPr, readyToAnimateQ );
   3.237 -    }
   3.238 - }
   3.239 -
   3.240 -/*This algorithm makes the common case fast.  Make the coreloop passive,
   3.241 - * and show its progress.  Make the stealer control a gate that coreloop
   3.242 - * has to pass.
   3.243 - *To avoid interference, only one stealer at a time.  Use a global
   3.244 - * stealer-lock.
   3.245 - *
   3.246 - *The pattern is based on a gate -- stealer shuts the gate, then monitors
   3.247 - * to be sure any already past make it all the way out, before starting.
   3.248 - *So, have a "progress" measure just before the gate, then have two after it,
   3.249 - * one is in a "waiting room" outside the gate, the other is at the exit.
   3.250 - *Then, the stealer first shuts the gate, then checks the progress measure
   3.251 - * outside it, then looks to see if the progress measure at the exit is the
   3.252 - * same.  If yes, it knows the protected area is empty 'cause no other way
   3.253 - * to get in and the last to get in also exited.
   3.254 - *If the progress measure at the exit is not the same, then the stealer goes
   3.255 - * into a loop checking both the waiting-area and the exit progress-measures
   3.256 - * until one of them shows the same as the measure outside the gate.  Might
   3.257 - * as well re-read the measure outside the gate each go around, just to be
   3.258 - * sure.  It is guaranteed that one of the two will eventually match the one
   3.259 - * outside the gate.
   3.260 - *
   3.261 - *Here's an informal proof of correctness:
   3.262 - *The gate can be closed at any point, and have only four cases:
   3.263 - *  1) coreloop made it past the gate-closing but not yet past the exit
   3.264 - *  2) coreloop made it past the pre-gate progress update but not yet past
   3.265 - *     the gate,
   3.266 - *  3) coreloop is right before the pre-gate update
   3.267 - *  4) coreloop is past the exit and far from the pre-gate update.
   3.268 - *
   3.269 - * Covering the cases in reverse order,
   3.270 - *  4) is not a problem -- stealer will read pre-gate progress, see that it
   3.271 - *     matches exit progress, and the gate is closed, so stealer can proceed.
   3.272 - *  3) stealer will read pre-gate progress just after coreloop updates it..
   3.273 - *     so stealer goes into a loop until the coreloop causes wait-progress
   3.274 - *     to match pre-gate progress, so then stealer can proceed
   3.275 - *  2) same as 3..
   3.276 - *  1) stealer reads pre-gate progress, sees that it's different than exit,
   3.277 - *     so goes into loop until exit matches pre-gate, now it knows coreloop
   3.278 - *     is not in protected and cannot get back in, so can proceed.
   3.279 - *
   3.280 - *Implementation for the stealer:
   3.281 - *
   3.282 - *First, acquire the stealer lock -- only cores with no work to do will
   3.283 - * compete to steal, so not a big performance penalty having only one --
   3.284 - * will rarely have multiple stealers in a system with plenty of work -- and
   3.285 - * in a system with little work, it doesn't matter.
   3.286 - *
   3.287 - *Note, have single-reader, single-writer pattern for all variables used to
   3.288 - * communicate between stealer and victims
   3.289 - *
   3.290 - *So, scan the queues of the core loops, until find non-empty.  Each core
   3.291 - * has its own list that it scans.  The list goes in order from closest to
   3.292 - * furthest core, so it steals first from close cores.  Later can add
   3.293 - * taking info from the app about overlapping footprints, and scan all the
   3.294 - * others then choose work with the most footprint overlap with the contents
   3.295 - * of this core's cache.
   3.296 - *
   3.297 - *Now, have a victim want to take work from.  So, shut the gate in that
   3.298 - * coreloop, by setting the "gate closed" var on its stack to TRUE.
   3.299 - *Then, read the core's pre-gate progress and compare to the core's exit
   3.300 - * progress.
   3.301 - *If same, can proceed to take work from the coreloop's queue.  When done,
   3.302 - * write FALSE to gate closed var.
   3.303 - *If different, then enter a loop that reads the pre-gate progress, then
   3.304 - * compares to exit progress then to wait progress.  When one of two
   3.305 - * matches, proceed.  Take work from the coreloop's queue.  When done,
   3.306 - * write FALSE to the gate closed var.
   3.307 - * 
   3.308 - */
   3.309 -void inline
   3.310 -gateProtected_stealWorkInto( SchedSlot *currSlot,
   3.311 -                             VMSQueueStruc *myReadyToAnimateQ,
   3.312 -                             VirtProcr *masterPr )
   3.313 - {
   3.314 -   VirtProcr     *stolenPr;
   3.315 -   int32          coreIdx, i, haveAVictim, gotLock;
   3.316 -   VMSQueueStruc *victimsQ;
   3.317 -
   3.318 -   volatile GateStruc *vicGate;
   3.319 -   int32               coreMightBeInProtected;
   3.320 -
   3.321 -
   3.322 -
   3.323 -      //see if any other cores have work available to steal
   3.324 -   haveAVictim = FALSE;
   3.325 -   coreIdx = masterPr->coreAnimatedBy;
   3.326 -   for( i = 0; i < NUM_CORES -1; i++ )
   3.327 -    {
   3.328 -      if( coreIdx >= NUM_CORES -1 )
   3.329 -       { coreIdx = 0;
   3.330 -       }
   3.331 -      else
   3.332 -       { coreIdx++;
   3.333 -       }
   3.334 -      victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
   3.335 -      if( numInVMSQ( victimsQ ) > 0 )
   3.336 -       { haveAVictim = TRUE;
   3.337 -         vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ];
   3.338 -         break;
   3.339 -       }
   3.340 -    }
   3.341 -   if( !haveAVictim ) return;  //no work to steal, exit
   3.342 -
   3.343 -      //have a victim core, now get the stealer-lock
   3.344 -   gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock),
   3.345 -                                                          UNLOCKED, LOCKED );
   3.346 -   if( !gotLock ) return; //go back to core loop, which will re-start master
   3.347 -
   3.348 -
   3.349 -   //====== Start Gate-protection =======
   3.350 -   vicGate->gateClosed = TRUE;
   3.351 -   coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress;
   3.352 -   while( coreMightBeInProtected )
   3.353 -    {    //wait until sure
   3.354 -      if( vicGate->preGateProgress == vicGate->waitProgress )
   3.355 -         coreMightBeInProtected = FALSE;
   3.356 -      if( vicGate->preGateProgress == vicGate->exitProgress )
   3.357 -         coreMightBeInProtected = FALSE;
   3.358 -    }
   3.359 -
   3.360 -   stolenPr = readVMSQ ( victimsQ );
   3.361 -
   3.362 -   vicGate->gateClosed = FALSE;
   3.363 -   //======= End Gate-protection  =======
   3.364 -
   3.365 -
   3.366 -   if( stolenPr != NULL )  //victim could have been in protected and taken
   3.367 -    { currSlot->procrAssignedToSlot = stolenPr;
   3.368 -      stolenPr->schedSlot           = currSlot;
   3.369 -      currSlot->needsProcrAssigned  = FALSE;
   3.370 -
   3.371 -      writeVMSQ( stolenPr, myReadyToAnimateQ );
   3.372 -    }
   3.373 -
   3.374 -      //unlock the work stealing lock
   3.375 -   _VMSMasterEnv->workStealingLock = UNLOCKED;
   3.376 - }
   3.377 +/*
   3.378 + * Copyright 2010  OpenSourceStewardshipFoundation
   3.379 + * 
   3.380 + * Licensed under BSD
   3.381 + */
   3.382 +
   3.383 +
   3.384 +
   3.385 +#include <stdio.h>
   3.386 +#include <stddef.h>
   3.387 +
   3.388 +#include "VMS.h"
   3.389 +#include "ProcrContext.h"
   3.390 +
   3.391 +
   3.392 +//===========================================================================
   3.393 +void inline
   3.394 +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
   3.395 +               VirtProcr *masterPr );
   3.396 +
   3.397 +//===========================================================================
   3.398 +
   3.399 +
   3.400 +
   3.401 +/*This code is animated by the virtual Master processor.
   3.402 + *
   3.403 + *Polls each sched slot exactly once, hands any requests made by a newly
   3.404 + * done slave to the "request handler" plug-in function
   3.405 + *
   3.406 + *Any slots that need a virt procr assigned are given to the "schedule"
   3.407 + * plug-in function, which tries to assign a virt procr (slave) to it.
   3.408 + *
   3.409 + *When all slots needing a processor have been given to the schedule plug-in,
   3.410 + * a fraction of the procrs successfully scheduled are put into the
   3.411 + * work queue, then a continuation of this function is put in, then the rest
   3.412 + * of the virt procrs that were successfully scheduled.
   3.413 + *
   3.414 + *The first thing the continuation does is busy-wait until the previous
   3.415 + * animation completes.  This is because an (unlikely) continuation may
   3.416 + * sneak through queue before previous continuation is done putting second
   3.417 + * part of scheduled slaves in, which is the only race condition.
   3.418 + *
   3.419 + */
   3.420 +
   3.421 +/*May 29, 2010 -- birth a Master during init so that first core loop to
   3.422 + * start running gets it and does all the stuff for a newly born --
   3.423 + * from then on, will be doing continuation, but do suspension self
   3.424 + * directly at end of master loop
   3.425 + *So VMS__init just births the master virtual processor same way it births
   3.426 + * all the others -- then does any extra setup needed and puts it into the
   3.427 + * work queue.
   3.428 + *However means have to make masterEnv a global static volatile the same way
   3.429 + * did with readyToAnimateQ in core loop.  -- for performance, put the
   3.430 + * jump to the core loop directly in here, and have it directly jump back.
   3.431 + *
   3.432 + *
   3.433 + *Aug 18, 2010 -- Going to a separate MasterVP for each core, to see if this
   3.434 + * avoids the suspected bug in the system stack that causes bizarre faults
   3.435 + * at random places in the system code.
   3.436 + *
   3.437 + *So, this function is coupled to each of the MasterVPs, -- meaning this
   3.438 + * function can't rely on a particular stack and frame -- each MasterVP that
   3.439 + * animates this function has a different one.
   3.440 + *
   3.441 + *At this point, the masterLoop does not write itself into the queue anymore,
   3.442 + * instead, the coreLoop acquires the masterLock when it has nothing to
   3.443 + * animate, and then animates its own masterLoop.  However, still try to put
   3.444 + * several AppVPs into the queue to amortize the startup cost of switching
   3.445 + * to the MasterVP.  Note, don't have to worry about latency of requests much
   3.446 + * because most requests generate work for same core -- only latency issue
   3.447 + * is case when other cores starved and one core's requests generate work
   3.448 + * for them -- so keep max in queue to 3 or 4..
   3.449 + */
   3.450 +void masterLoop( void *initData, VirtProcr *animatingPr )
   3.451 + { 
   3.452 +   int32           slotIdx, numSlotsFilled;
   3.453 +   VirtProcr      *schedVirtPr;
   3.454 +   SchedSlot      *currSlot, **schedSlots;
   3.455 +   MasterEnv      *masterEnv;
   3.456 +   VMSQueueStruc  *readyToAnimateQ;
   3.457 +   
   3.458 +   SlaveScheduler  slaveScheduler;
   3.459 +   RequestHandler  requestHandler;
   3.460 +   void           *semanticEnv;
   3.461 +
   3.462 +   int32           thisCoresIdx;
   3.463 +   VirtProcr      *masterPr;
   3.464 +   volatile        VirtProcr *volatileMasterPr;
   3.465 +   
   3.466 +   volatileMasterPr = animatingPr;
   3.467 +   masterPr         = (VirtProcr*)volatileMasterPr; //used to force re-define after jmp
   3.468 +
   3.469 +      //First animation of each MasterVP will in turn animate this part
   3.470 +      // of setup code.. (VP creator sets up the stack as if this function
   3.471 +      // was called normally, but actually get here by jmp)
   3.472 +      //So, setup values about stack ptr, jmp pt and all that
   3.473 +   //masterPr->nextInstrPt = &&masterLoopStartPt;
   3.474 +
   3.475 +
   3.476 +      //Note, got rid of writing the stack and frame ptr up here, because
   3.477 +      // only one
   3.478 +      // core can ever animate a given MasterVP, so don't need to communicate
   3.479 +      // new frame and stack ptr to the MasterVP storage before a second
   3.480 +      // version of that MasterVP can get animated on a different core.
   3.481 +      //Also got rid of the busy-wait.
   3.482 +
   3.483 +   
   3.484 +   //masterLoopStartPt:
   3.485 +   while(1){
   3.486 +       
   3.487 +   //============================= MEASUREMENT STUFF ========================
   3.488 +   #ifdef MEAS__TIME_MASTER
   3.489 +      //Total Master time includes one coreloop time -- just assume the core
   3.490 +      // loop time is same for Master as for AppVPs, even though it may be
   3.491 +      // smaller due to higher predictability of the fixed jmp.
   3.492 +   saveLowTimeStampCountInto( masterPr->startMasterTSCLow );
   3.493 +   #endif
   3.494 +   //========================================================================
   3.495 +
   3.496 +   masterEnv        = (MasterEnv*)_VMSMasterEnv;
   3.497 +   
   3.498 +      //GCC may optimize so doesn't always re-define from frame-storage
   3.499 +   masterPr         = (VirtProcr*)volatileMasterPr;  //just to make sure after jmp
   3.500 +   thisCoresIdx     = masterPr->coreAnimatedBy;
   3.501 +   readyToAnimateQ  = masterEnv->readyToAnimateQs[thisCoresIdx];
   3.502 +   schedSlots       = masterEnv->allSchedSlots[thisCoresIdx];
   3.503 +
   3.504 +   requestHandler   = masterEnv->requestHandler;
   3.505 +   slaveScheduler   = masterEnv->slaveScheduler;
   3.506 +   semanticEnv      = masterEnv->semanticEnv;
   3.507 +
   3.508 +
   3.509 +      //Poll each slot's Done flag
   3.510 +   numSlotsFilled = 0;
   3.511 +   for( slotIdx = 0; slotIdx < NUM_SCHED_SLOTS; slotIdx++)
   3.512 +    {
   3.513 +      currSlot = schedSlots[ slotIdx ];
   3.514 +
   3.515 +      if( currSlot->workIsDone )
   3.516 +       {
   3.517 +         currSlot->workIsDone         = FALSE;
   3.518 +         currSlot->needsProcrAssigned = TRUE;
   3.519 +
   3.520 +            //process requests from slave to master
   3.521 +               //====================== MEASUREMENT STUFF ===================
   3.522 +               #ifdef MEAS__TIME_PLUGIN
   3.523 +               int32 startStamp1, endStamp1;
   3.524 +               saveLowTimeStampCountInto( startStamp1 );
   3.525 +               #endif
   3.526 +               //============================================================
   3.527 +         (*requestHandler)( currSlot->procrAssignedToSlot, semanticEnv );
   3.528 +               //====================== MEASUREMENT STUFF ===================
   3.529 +               #ifdef MEAS__TIME_PLUGIN
   3.530 +               saveLowTimeStampCountInto( endStamp1 );
   3.531 +               addIntervalToHist( startStamp1, endStamp1,
   3.532 +                                        _VMSMasterEnv->reqHdlrLowTimeHist );
   3.533 +               addIntervalToHist( startStamp1, endStamp1,
   3.534 +                                        _VMSMasterEnv->reqHdlrHighTimeHist );
   3.535 +               #endif
   3.536 +               //============================================================
   3.537 +       }
   3.538 +      if( currSlot->needsProcrAssigned )
   3.539 +       {    //give slot a new virt procr
   3.540 +         schedVirtPr =
   3.541 +          (*slaveScheduler)( semanticEnv, thisCoresIdx );
   3.542 +         
   3.543 +         if( schedVirtPr != NULL )
   3.544 +          { currSlot->procrAssignedToSlot = schedVirtPr;
   3.545 +            schedVirtPr->schedSlot        = currSlot;
   3.546 +            currSlot->needsProcrAssigned  = FALSE;
   3.547 +            numSlotsFilled               += 1;
   3.548 +            
   3.549 +            writeVMSQ( schedVirtPr, readyToAnimateQ );
   3.550 +          }
   3.551 +       }
   3.552 +    }
   3.553 +
   3.554 +   
   3.555 +   #ifdef USE_WORK_STEALING
   3.556 +      //If no slots filled, means no more work, look for work to steal.
   3.557 +   if( numSlotsFilled == 0 )
   3.558 +    { gateProtected_stealWorkInto( currSlot, readyToAnimateQ, masterPr );
   3.559 +    }
   3.560 +   #endif
   3.561 +
   3.562 +   
   3.563 +   #ifdef MEAS__TIME_MASTER
   3.564 +   saveLowTimeStampCountInto( masterPr->endMasterTSCLow );
   3.565 +   #endif
   3.566 +
   3.567 +   masterSwitchToCoreLoop(animatingPr);
   3.568 +   flushRegisters();
   3.569 +   }//MasterLoop
   3.570 +
   3.571 +
   3.572 + }
   3.573 +
   3.574 +
   3.575 +
   3.576 +/*This has a race condition -- the coreloops are accessing their own queues
   3.577 + * at the same time that this work-stealer on a different core is trying to
   3.578 + */
   3.579 +void inline
   3.580 +stealWorkInto( SchedSlot *currSlot, VMSQueueStruc *readyToAnimateQ,
   3.581 +               VirtProcr *masterPr )
   3.582 + { 
   3.583 +   VirtProcr   *stolenPr;
   3.584 +   int32        coreIdx, i;
   3.585 +   VMSQueueStruc *currQ;
   3.586 +
   3.587 +   stolenPr = NULL;
   3.588 +   coreIdx = masterPr->coreAnimatedBy;
   3.589 +   for( i = 0; i < NUM_CORES -1; i++ )
   3.590 +    {
   3.591 +      if( coreIdx >= NUM_CORES -1 )
   3.592 +       { coreIdx = 0;
   3.593 +       }
   3.594 +      else
   3.595 +       { coreIdx++;
   3.596 +       }
   3.597 +      currQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
   3.598 +      if( numInVMSQ( currQ ) > 0 )
   3.599 +       { stolenPr = readVMSQ (currQ );
   3.600 +         break;
   3.601 +       }
   3.602 +    }
   3.603 +
   3.604 +   if( stolenPr != NULL )
   3.605 +    { currSlot->procrAssignedToSlot = stolenPr;
   3.606 +      stolenPr->schedSlot           = currSlot;
   3.607 +      currSlot->needsProcrAssigned  = FALSE;
   3.608 +
   3.609 +      writeVMSQ( stolenPr, readyToAnimateQ );
   3.610 +    }
   3.611 + }
   3.612 +
   3.613 +/*This algorithm makes the common case fast.  Make the coreloop passive,
   3.614 + * and show its progress.  Make the stealer control a gate that coreloop
   3.615 + * has to pass.
   3.616 + *To avoid interference, only one stealer at a time.  Use a global
   3.617 + * stealer-lock.
   3.618 + *
   3.619 + *The pattern is based on a gate -- stealer shuts the gate, then monitors
   3.620 + * to be sure any already past make it all the way out, before starting.
   3.621 + *So, have a "progress" measure just before the gate, then have two after it,
   3.622 + * one is in a "waiting room" outside the gate, the other is at the exit.
   3.623 + *Then, the stealer first shuts the gate, then checks the progress measure
   3.624 + * outside it, then looks to see if the progress measure at the exit is the
   3.625 + * same.  If yes, it knows the protected area is empty 'cause no other way
   3.626 + * to get in and the last to get in also exited.
   3.627 + *If the progress measure at the exit is not the same, then the stealer goes
   3.628 + * into a loop checking both the waiting-area and the exit progress-measures
   3.629 + * until one of them shows the same as the measure outside the gate.  Might
   3.630 + * as well re-read the measure outside the gate each go around, just to be
   3.631 + * sure.  It is guaranteed that one of the two will eventually match the one
   3.632 + * outside the gate.
   3.633 + *
   3.634 + *Here's an informal proof of correctness:
   3.635 + *The gate can be closed at any point, and have only four cases:
   3.636 + *  1) coreloop made it past the gate-closing but not yet past the exit
   3.637 + *  2) coreloop made it past the pre-gate progress update but not yet past
   3.638 + *     the gate,
   3.639 + *  3) coreloop is right before the pre-gate update
   3.640 + *  4) coreloop is past the exit and far from the pre-gate update.
   3.641 + *
   3.642 + * Covering the cases in reverse order,
   3.643 + *  4) is not a problem -- stealer will read pre-gate progress, see that it
   3.644 + *     matches exit progress, and the gate is closed, so stealer can proceed.
   3.645 + *  3) stealer will read pre-gate progress just after coreloop updates it..
   3.646 + *     so stealer goes into a loop until the coreloop causes wait-progress
   3.647 + *     to match pre-gate progress, so then stealer can proceed
   3.648 + *  2) same as 3..
   3.649 + *  1) stealer reads pre-gate progress, sees that it's different than exit,
   3.650 + *     so goes into loop until exit matches pre-gate, now it knows coreloop
   3.651 + *     is not in protected and cannot get back in, so can proceed.
   3.652 + *
   3.653 + *Implementation for the stealer:
   3.654 + *
   3.655 + *First, acquire the stealer lock -- only cores with no work to do will
   3.656 + * compete to steal, so not a big performance penalty having only one --
   3.657 + * will rarely have multiple stealers in a system with plenty of work -- and
   3.658 + * in a system with little work, it doesn't matter.
   3.659 + *
   3.660 + *Note, have single-reader, single-writer pattern for all variables used to
   3.661 + * communicate between stealer and victims
   3.662 + *
   3.663 + *So, scan the queues of the core loops, until find non-empty.  Each core
   3.664 + * has its own list that it scans.  The list goes in order from closest to
   3.665 + * furthest core, so it steals first from close cores.  Later can add
   3.666 + * taking info from the app about overlapping footprints, and scan all the
   3.667 + * others then choose work with the most footprint overlap with the contents
   3.668 + * of this core's cache.
   3.669 + *
   3.670 + *Now, have a victim want to take work from.  So, shut the gate in that
   3.671 + * coreloop, by setting the "gate closed" var on its stack to TRUE.
   3.672 + *Then, read the core's pre-gate progress and compare to the core's exit
   3.673 + * progress.
   3.674 + *If same, can proceed to take work from the coreloop's queue.  When done,
   3.675 + * write FALSE to gate closed var.
   3.676 + *If different, then enter a loop that reads the pre-gate progress, then
   3.677 + * compares to exit progress then to wait progress.  When one of two
   3.678 + * matches, proceed.  Take work from the coreloop's queue.  When done,
   3.679 + * write FALSE to the gate closed var.
   3.680 + * 
   3.681 + */
   3.682 +void inline
   3.683 +gateProtected_stealWorkInto( SchedSlot *currSlot,
   3.684 +                             VMSQueueStruc *myReadyToAnimateQ,
   3.685 +                             VirtProcr *masterPr )
   3.686 + {
   3.687 +   VirtProcr     *stolenPr;
   3.688 +   int32          coreIdx, i, haveAVictim, gotLock;
   3.689 +   VMSQueueStruc *victimsQ;
   3.690 +
   3.691 +   volatile GateStruc *vicGate;
   3.692 +   int32               coreMightBeInProtected;
   3.693 +
   3.694 +
   3.695 +
   3.696 +      //see if any other cores have work available to steal
   3.697 +   haveAVictim = FALSE;
   3.698 +   coreIdx = masterPr->coreAnimatedBy;
   3.699 +   for( i = 0; i < NUM_CORES -1; i++ )
   3.700 +    {
   3.701 +      if( coreIdx >= NUM_CORES -1 )
   3.702 +       { coreIdx = 0;
   3.703 +       }
   3.704 +      else
   3.705 +       { coreIdx++;
   3.706 +       }
   3.707 +      victimsQ = _VMSMasterEnv->readyToAnimateQs[coreIdx];
   3.708 +      if( numInVMSQ( victimsQ ) > 0 )
   3.709 +       { haveAVictim = TRUE;
   3.710 +         vicGate = _VMSMasterEnv->workStealingGates[ coreIdx ];
   3.711 +         break;
   3.712 +       }
   3.713 +    }
   3.714 +   if( !haveAVictim ) return;  //no work to steal, exit
   3.715 +
   3.716 +      //have a victim core, now get the stealer-lock
   3.717 +   gotLock =__sync_bool_compare_and_swap( &(_VMSMasterEnv->workStealingLock),
   3.718 +                                                          UNLOCKED, LOCKED );
   3.719 +   if( !gotLock ) return; //go back to core loop, which will re-start master
   3.720 +
   3.721 +
   3.722 +   //====== Start Gate-protection =======
   3.723 +   vicGate->gateClosed = TRUE;
   3.724 +   coreMightBeInProtected= vicGate->preGateProgress != vicGate->exitProgress;
   3.725 +   while( coreMightBeInProtected )
   3.726 +    {    //wait until sure
   3.727 +      if( vicGate->preGateProgress == vicGate->waitProgress )
   3.728 +         coreMightBeInProtected = FALSE;
   3.729 +      if( vicGate->preGateProgress == vicGate->exitProgress )
   3.730 +         coreMightBeInProtected = FALSE;
   3.731 +    }
   3.732 +
   3.733 +   stolenPr = readVMSQ ( victimsQ );
   3.734 +
   3.735 +   vicGate->gateClosed = FALSE;
   3.736 +   //======= End Gate-protection  =======
   3.737 +
   3.738 +
   3.739 +   if( stolenPr != NULL )  //victim could have been in protected and taken
   3.740 +    { currSlot->procrAssignedToSlot = stolenPr;
   3.741 +      stolenPr->schedSlot           = currSlot;
   3.742 +      currSlot->needsProcrAssigned  = FALSE;
   3.743 +
   3.744 +      writeVMSQ( stolenPr, myReadyToAnimateQ );
   3.745 +    }
   3.746 +
   3.747 +      //unlock the work stealing lock
   3.748 +   _VMSMasterEnv->workStealingLock = UNLOCKED;
   3.749 + }

     4.1 --- a/ProcrContext.h	Thu Oct 06 16:24:17 2011 +0200
     4.2 +++ b/ProcrContext.h	Wed Jan 04 16:10:11 2012 -0800
     4.3 @@ -1,33 +1,33 @@
     4.4 -/*
     4.5 - *  Copyright 2009 OpenSourceStewardshipFoundation.org
     4.6 - *  Licensed under GNU General Public License version 2
     4.7 - *
     4.8 - * Author: seanhalle@yahoo.com
     4.9 - * 
    4.10 - */
    4.11 -
    4.12 -#ifndef _ProcrContext_H
    4.13 -#define	_ProcrContext_H
    4.14 -#define _GNU_SOURCE
    4.15 -
    4.16 -void saveCoreLoopReturnAddr(void **returnAddress);
    4.17 -
    4.18 -void switchToVP(VirtProcr *nextProcr);
    4.19 -
    4.20 -void switchToCoreLoop(VirtProcr *nextProcr);
    4.21 -
    4.22 -void masterSwitchToCoreLoop(VirtProcr *nextProcr);
    4.23 -
    4.24 -void startVirtProcrFn();
    4.25 -
    4.26 -void *asmTerminateCoreLoop(VirtProcr *currPr);
    4.27 -
    4.28 -#define flushRegisters() \
    4.29 -        asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15")
    4.30 -
    4.31 -inline VirtProcr *
    4.32 -create_procr_helper( VirtProcr *newPr,       VirtProcrFnPtr  fnPtr,
    4.33 -                     void      *initialData, void           *stackLocs );
    4.34 -
    4.35 -#endif	/* _ProcrContext_H */
    4.36 -
    4.37 +/*
    4.38 + *  Copyright 2009 OpenSourceStewardshipFoundation.org
    4.39 + *  Licensed under GNU General Public License version 2
    4.40 + *
    4.41 + * Author: seanhalle@yahoo.com
    4.42 + * 
    4.43 + */
    4.44 +
    4.45 +#ifndef _ProcrContext_H
    4.46 +#define	_ProcrContext_H
    4.47 +#define _GNU_SOURCE
    4.48 +
    4.49 +void saveCoreLoopReturnAddr(void **returnAddress);
    4.50 +
    4.51 +void switchToVP(VirtProcr *nextProcr);
    4.52 +
    4.53 +void switchToCoreLoop(VirtProcr *nextProcr);
    4.54 +
    4.55 +void masterSwitchToCoreLoop(VirtProcr *nextProcr);
    4.56 +
    4.57 +void startVirtProcrFn();
    4.58 +
    4.59 +void *asmTerminateCoreLoop(VirtProcr *currPr);
    4.60 +
    4.61 +#define flushRegisters() \
    4.62 +        asm volatile ("":::"%rbx", "%r12", "%r13","%r14","%r15")
    4.63 +
    4.64 +inline VirtProcr *
    4.65 +create_procr_helper( VirtProcr *newPr,       VirtProcrFnPtr  fnPtr,
    4.66 +                     void      *initialData, void           *stackLocs );
    4.67 +
    4.68 +#endif	/* _ProcrContext_H */
    4.69 +

     5.1 --- a/VMS.h	Thu Oct 06 16:24:17 2011 +0200
     5.2 +++ b/VMS.h	Wed Jan 04 16:10:11 2012 -0800
     5.3 @@ -1,579 +1,579 @@
     5.4 -/*
     5.5 - *  Copyright 2009 OpenSourceStewardshipFoundation.org
     5.6 - *  Licensed under GNU General Public License version 2
     5.7 - *
     5.8 - * Author: seanhalle@yahoo.com
     5.9 - * 
    5.10 - */
    5.11 -
    5.12 -#ifndef _VMS_H
    5.13 -#define	_VMS_H
    5.14 -#define _GNU_SOURCE
    5.15 -
    5.16 -#include "VMS_primitive_data_types.h"
    5.17 -#include "Queue_impl/PrivateQueue.h"
    5.18 -#include "Histogram/Histogram.h"
    5.19 -#include "DynArray/DynArray.h"
    5.20 -#include "Hash_impl/PrivateHash.h"
    5.21 -#include "vmalloc.h"
    5.22 -
    5.23 -#include <pthread.h>
    5.24 -#include <sys/time.h>
    5.25 -
    5.26 -
    5.27 -//===============================  Debug  ===================================
    5.28 -//
    5.29 -//When SEQUENTIAL is defined, VMS does sequential exe in the main thread
    5.30 -// It still does co-routines and all the mechanisms are the same, it just
    5.31 -// has only a single thread and animates VPs one at a time
    5.32 -//#define SEQUENTIAL
    5.33 -
    5.34 -//#define USE_WORK_STEALING
    5.35 -
    5.36 -//turns on the probe-instrumentation in the application -- when not
    5.37 -// defined, the calls to the probe functions turn into comments
    5.38 -#define STATS__ENABLE_PROBES
    5.39 -//#define TURN_ON_DEBUG_PROBES
    5.40 -
    5.41 -//These defines turn types of bug messages on and off
    5.42 -// be sure debug messages are un-commented (next block of defines)
    5.43 -#define dbgAppFlow   TRUE /* Top level flow of application code -- general*/
    5.44 -#define dbgProbes    FALSE /* for issues inside probes themselves*/
    5.45 -#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/
    5.46 -#define dbgRqstHdlr  FALSE /* in request handler code*/
    5.47 -
    5.48 -//Comment or un- the substitute half to turn on/off types of debug message
    5.49 -#define DEBUG(  bool, msg)         \
    5.50 -//  if( bool){ printf(msg); fflush(stdin);}
    5.51 -#define DEBUG1( bool, msg, param)  \
    5.52 -//   if(bool){printf(msg, param); fflush(stdin);}
    5.53 -#define DEBUG2( bool, msg, p1, p2) \
    5.54 -//   if(bool) {printf(msg, p1, p2); fflush(stdin);}
    5.55 -
    5.56 -#define ERROR(msg) printf(msg);
    5.57 -#define ERROR1(msg, param) printf(msg, param); 
    5.58 -#define ERROR2(msg, p1, p2) printf(msg, p1, p2);
    5.59 -
    5.60 -//===========================  STATS =======================
    5.61 -
    5.62 -   //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and
    5.63 -   // compiled-in that saves the low part of the time stamp count just before
    5.64 -   // suspending a processor and just after resuming that processorsrc/VPThread_lib/VMS/VMS.h:322: warning: previous declaration of ‘VMS__create_procr’ was here.  It is
    5.65 -   // saved into a field added to VirtProcr.  Have to sanity-check for
    5.66 -   // rollover of low portion into high portion.
    5.67 -//#define MEAS__TIME_STAMP_SUSP
    5.68 -//#define MEAS__TIME_MASTER
    5.69 -#define MEAS__TIME_PLUGIN
    5.70 -#define MEAS__TIME_MALLOC
    5.71 -//#define MEAS__TIME_MASTER_LOCK
    5.72 -#define MEAS__NUM_TIMES_TO_RUN 100000
    5.73 -
    5.74 -   //For code that calculates normalization-offset between TSC counts of
    5.75 -   // different cores.
    5.76 -#define NUM_TSC_ROUND_TRIPS 10
    5.77 -
    5.78 -
    5.79 -//=========================  Hardware related Constants =====================
    5.80 -   //This value is the number of hardware threads in the shared memory
    5.81 -   // machine
    5.82 -//#define NUM_CORES        8
    5.83 -
    5.84 -   // tradeoff amortizing master fixed overhead vs imbalance potential
    5.85 -   // when work-stealing, can make bigger, at risk of losing cache affinity
    5.86 -#define NUM_SCHED_SLOTS  5
    5.87 -
    5.88 -#define MIN_WORK_UNIT_CYCLES 20000
    5.89 -
    5.90 -#define MASTERLOCK_RETRIES 10000
    5.91 -
    5.92 -   // stack size in virtual processors created
    5.93 -#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */
    5.94 -
    5.95 -   // memory for VMS__malloc
    5.96 -#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */
    5.97 -
    5.98 -#define CACHE_LINE 64
    5.99 -#define PAGE_SIZE 4096
   5.100 -
   5.101 -
   5.102 -//==============================
   5.103 -
   5.104 -#define SUCCESS 0
   5.105 -
   5.106 -#define writeVMSQ     writePrivQ
   5.107 -#define readVMSQ      readPrivQ
   5.108 -#define makeVMSQ      makeVMSPrivQ
   5.109 -#define numInVMSQ     numInPrivQ
   5.110 -#define VMSQueueStruc PrivQueueStruc
   5.111 -
   5.112 -
   5.113 -
   5.114 -//===========================================================================
   5.115 -typedef unsigned long long TSCount;
   5.116 -
   5.117 -typedef struct _SchedSlot     SchedSlot;
   5.118 -typedef struct _VMSReqst      VMSReqst;
   5.119 -typedef struct _VirtProcr     VirtProcr;
   5.120 -typedef struct _IntervalProbe IntervalProbe;
   5.121 -typedef struct _GateStruc     GateStruc;
   5.122 -
   5.123 -
   5.124 -typedef VirtProcr * (*SlaveScheduler)  ( void *, int );   //semEnv, coreIdx
   5.125 -typedef void  (*RequestHandler)  ( VirtProcr *, void * ); //prWReqst, semEnv
   5.126 -typedef void  (*VirtProcrFnPtr)  ( void *, VirtProcr * ); //initData, animPr
   5.127 -typedef void    VirtProcrFn      ( void *, VirtProcr * ); //initData, animPr
   5.128 -typedef void  (*ResumePrFnPtr)   ( VirtProcr *, void * );
   5.129 -
   5.130 -
   5.131 -//============= Requests ===========
   5.132 -//
   5.133 -
   5.134 -enum VMSReqstType   //avoid starting enums at 0, for debug reasons
   5.135 - {
   5.136 -   semantic = 1,
   5.137 -   createReq,
   5.138 -   dissipate,
   5.139 -   VMSSemantic      //goes with VMSSemReqst below
   5.140 - };
   5.141 -
   5.142 -struct _VMSReqst
   5.143 - {
   5.144 -   enum VMSReqstType  reqType;//used for dissipate and in future for IO requests
   5.145 -   void              *semReqData;
   5.146 -
   5.147 -   VMSReqst *nextReqst;
   5.148 - };
   5.149 -//VMSReqst
   5.150 -
   5.151 -enum VMSSemReqstType   //These are equivalent to semantic requests, but for
   5.152 - {                     // VMS's services available directly to app, like OS
   5.153 -   createProbe = 1,    // and probe services -- like a VMS-wide built-in lang
   5.154 -   openFile,
   5.155 -   otherIO
   5.156 - };
   5.157 -
   5.158 -typedef struct
   5.159 - { enum VMSSemReqstType reqType;
   5.160 -   VirtProcr           *requestingPr;
   5.161 -   char                *nameStr;  //for create probe
   5.162 - }
   5.163 - VMSSemReq;
   5.164 -
   5.165 -
   5.166 -//====================  Core data structures  ===================
   5.167 -
   5.168 -struct _SchedSlot
   5.169 - {
   5.170 -   int         workIsDone;
   5.171 -   int         needsProcrAssigned;
   5.172 -   VirtProcr  *procrAssignedToSlot;
   5.173 - };
   5.174 -//SchedSlot
   5.175 -
   5.176 -/*WARNING: re-arranging this data structure could cause VP switching
   5.177 - *         assembly code to fail -- hard-codes offsets of fields
   5.178 - */
   5.179 -struct _VirtProcr
   5.180 - { int         procrID;  //for debugging -- count up each time create
   5.181 -   int         coreAnimatedBy;
   5.182 -   void       *startOfStack;
   5.183 -   void       *stackPtr;
   5.184 -   void       *framePtr;
   5.185 -   void       *nextInstrPt;
   5.186 -   
   5.187 -   void       *coreLoopStartPt;  //allows proto-runtime to be linked later
   5.188 -   void       *coreLoopFramePtr; //restore before jmp back to core loop
   5.189 -   void       *coreLoopStackPtr; //restore before jmp back to core loop
   5.190 -
   5.191 -   void       *initialData;
   5.192 -
   5.193 -   SchedSlot  *schedSlot;
   5.194 -   VMSReqst   *requests;
   5.195 -
   5.196 -   void       *semanticData; //this livesUSE_GNU here for the life of VP
   5.197 -   void       *dataRetFromReq;//values returned from plugin to VP go here
   5.198 -
   5.199 -      //=========== MEASUREMENT STUFF ==========
   5.200 -   #ifdef MEAS__TIME_STAMP_SUSP
   5.201 -   unsigned int preSuspTSCLow;
   5.202 -   unsigned int postSuspTSCLow;
   5.203 -   #endif
   5.204 -   #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/
   5.205 -   unsigned int startMasterTSCLow;USE_GNU
   5.206 -   unsigned int endMasterTSCLow;
   5.207 -   #endif
   5.208 -      //========================================
   5.209 -   
   5.210 -   float64      createPtInSecs;  //have space but don't use on some configs
   5.211 - };
   5.212 -//VirtProcr
   5.213 -
   5.214 -
   5.215 -/*WARNING: re-arranging this data structure could cause VP-switching
   5.216 - *         assembly code to fail -- hard-codes offsets of fields
   5.217 - *         (because -O3 messes with things otherwise)
   5.218 - */
   5.219 -typedef struct
   5.220 - {
   5.221 -   SlaveScheduler   slaveScheduler;
   5.222 -   RequestHandler   requestHandler;
   5.223 -   
   5.224 -   SchedSlot     ***allSchedSlots;
   5.225 -   VMSQueueStruc **readyToAnimateQs;
   5.226 -   VirtProcr      **masterVPs;
   5.227 -
   5.228 -   void            *semanticEnv;
   5.229 -   void            *OSEventStruc;   //for future, when add I/O to BLIS
   5.230 -   MallocProlog    *freeListHead;
   5.231 -   int32            amtOfOutstandingMem; //total currently allocated
   5.232 -
   5.233 -   void            *coreLoopReturnPt;//addr to jump to to re-enter coreLoop
   5.234 -
   5.235 -   int32            setupComplete;
   5.236 -   volatile int32   masterLock;
   5.237 -
   5.238 -   int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
   5.239 -   GateStruc       *workStealingGates[ NUM_CORES ]; //concurrent work-steal
   5.240 -   int32            workStealingLock;
   5.241 -   
   5.242 -   int32            numProcrsCreated; //gives ordering to processor creation
   5.243 -
   5.244 -      //=========== MEASUREMENT STUFF =============
   5.245 -   IntervalProbe  **intervalProbes;
   5.246 -   PrivDynArrayInfo    *dynIntervalProbesInfo;
   5.247 -   HashTable       *probeNameHashTbl;
   5.248 -   int32            masterCreateProbeID;
   5.249 -   float64          createPtInSecs;
   5.250 -   Histogram      **measHists;
   5.251 -   PrivDynArrayInfo *measHistsInfo;
   5.252 -   #ifdef MEAS__TIME_PLUGIN
   5.253 -   Histogram       *reqHdlrLowTimeHist;
   5.254 -   Histogram       *reqHdlrHighTimeHist;
   5.255 -   #endif
   5.256 -   #ifdef MEAS__TIME_MALLOC
   5.257 -   Histogram       *mallocTimeHist;
   5.258 -   Histogram       *freeTimeHist;
   5.259 -   #endif
   5.260 -   #ifdef MEAS__TIME_MASTER_LOCK
   5.261 -   Histogram       *masterLockLowTimeHist;
   5.262 -   Histogram       *masterLockHighTimeHist;
   5.263 -   #endif
   5.264 - }
   5.265 -MasterEnv;
   5.266 -
   5.267 -//=========================  Extra Stuff Data Strucs  =======================
   5.268 -typedef struct
   5.269 - {
   5.270 -
   5.271 - }
   5.272 -VMSExcp;
   5.273 -
   5.274 -struct _GateStruc
   5.275 - {
   5.276 -   int32 gateClosed;
   5.277 -   int32 preGateProgress;
   5.278 -   int32 waitProgress;
   5.279 -   int32 exitProgress;
   5.280 - };
   5.281 -//GateStruc
   5.282 -
   5.283 -//=======================  OS Thread related  ===============================
   5.284 -
   5.285 -void * coreLoop( void *paramsIn );  //standard PThreads fn prototype
   5.286 -void * coreLoop_Seq( void *paramsIn );  //standard PThreads fn prototype
   5.287 -void masterLoop( void *initData, VirtProcr *masterPr );
   5.288 -
   5.289 -
   5.290 -typedef struct
   5.291 - {
   5.292 -   void           *endThdPt;
   5.293 -   unsigned int    coreNum;
   5.294 - }
   5.295 -ThdParams;
   5.296 -
   5.297 -pthread_t       coreLoopThdHandles[ NUM_CORES ];  //pthread's virt-procr state
   5.298 -ThdParams      *coreLoopThdParams [ NUM_CORES ];
   5.299 -pthread_mutex_t suspendLock;
   5.300 -pthread_cond_t  suspend_cond;
   5.301 -
   5.302 -
   5.303 -
   5.304 -//=====================  Global Vars ===================
   5.305 -
   5.306 -volatile MasterEnv      *_VMSMasterEnv;
   5.307 -
   5.308 -
   5.309 -
   5.310 -
   5.311 -//===========================  Function Prototypes  =========================
   5.312 -
   5.313 -
   5.314 -//========== Setup and shutdown ==========
   5.315 -void
   5.316 -VMS__init();
   5.317 -
   5.318 -void
   5.319 -VMS__init_Seq();
   5.320 -
   5.321 -void
   5.322 -VMS__start_the_work_then_wait_until_done();
   5.323 -
   5.324 -void
   5.325 -VMS__start_the_work_then_wait_until_done_Seq();
   5.326 -
   5.327 -inline VirtProcr *
   5.328 -VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
   5.329 -
   5.330 -void
   5.331 -VMS__dissipate_procr( VirtProcr *procrToDissipate );
   5.332 -
   5.333 -   //Use this to create processor inside entry point & other places outside
   5.334 -   // the VMS system boundary (IE, not run in slave nor Master)
   5.335 -VirtProcr *
   5.336 -VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
   5.337 -
   5.338 -void
   5.339 -VMS_ext__dissipate_procr( VirtProcr *procrToDissipate );
   5.340 -
   5.341 -void
   5.342 -VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData );
   5.343 -
   5.344 -void
   5.345 -VMS__shutdown();
   5.346 -
   5.347 -void
   5.348 -VMS__cleanup_at_end_of_shutdown();
   5.349 -
   5.350 -void *
   5.351 -VMS__give_sem_env_for( VirtProcr *animPr );
   5.352 -
   5.353 -
   5.354 -//==============  Request Related  ===============
   5.355 -
   5.356 -void
   5.357 -VMS__suspend_procr( VirtProcr *callingPr );
   5.358 -
   5.359 -inline void
   5.360 -VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr );
   5.361 -
   5.362 -inline void
   5.363 -VMS__send_sem_request( void *semReqData, VirtProcr *callingPr );
   5.364 -
   5.365 -void
   5.366 -VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr );
   5.367 -
   5.368 -void inline
   5.369 -VMS__send_dissipate_req( VirtProcr *prToDissipate );
   5.370 -
   5.371 -inline void
   5.372 -VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr );
   5.373 -
   5.374 -VMSReqst *
   5.375 -VMS__take_next_request_out_of( VirtProcr *procrWithReq );
   5.376 -
   5.377 -inline void *
   5.378 -VMS__take_sem_reqst_from( VMSReqst *req );
   5.379 -
   5.380 -void inline
   5.381 -VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv,
   5.382 -                       ResumePrFnPtr resumePrFnPtr );
   5.383 -
   5.384 -//======================== STATS ======================
   5.385 -
   5.386 -//===== RDTSC wrapper ===== //Also runs with x86_64 code
   5.387 -
   5.388 -#define saveTimeStampCountInto(low, high) \
   5.389 -   asm volatile("RDTSC;                   \
   5.390 -                 movl %%eax, %0;          \
   5.391 -                 movl %%edx, %1;"         \
   5.392 -   /* outputs */ : "=m" (low), "=m" (high)\
   5.393 -   /* inputs  */ :                        \
   5.394 -   /* clobber */ : "%eax", "%edx"         \
   5.395 -                );
   5.396 -
   5.397 -#define saveLowTimeStampCountInto(low)    \
   5.398 -   asm volatile("RDTSC;                   \
   5.399 -                 movl %%eax, %0;"         \
   5.400 -   /* outputs */ : "=m" (low)             \
   5.401 -   /* inputs  */ :                        \
   5.402 -   /* clobber */ : "%eax", "%edx"         \
   5.403 -                );
   5.404 -
   5.405 -//====================
   5.406 -#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \
   5.407 -   makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \
   5.408 -   _VMSMasterEnv->measHists[idx] =  \
   5.409 -                       makeFixedBinHist( numBins, startVal, binWidth, name );
   5.410 -
   5.411 -
   5.412 -#define MEAS__SUB_CREATE  /*turn on/off subtraction of create from plugin*/
   5.413 -
   5.414 -#ifdef VPTHREAD
   5.415 -
   5.416 -//VPThread
   5.417 -#define createHistIdx      0
   5.418 -#define mutexLockHistIdx   1
   5.419 -#define mutexUnlockHistIdx 2
   5.420 -#define condWaitHistIdx    3
   5.421 -#define condSignalHistIdx  4
   5.422 -
   5.423 -#define MakeTheMeasHists() \
   5.424 -   _VMSMasterEnv->measHistsInfo = \
   5.425 -              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
   5.426 -   makeAMeasHist( createHistIdx,      "create",        250, 0, 100 ) \
   5.427 -   makeAMeasHist( mutexLockHistIdx,   "mutex_lock",    50, 0, 100 ) \
   5.428 -   makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock",  50, 0, 100 ) \
   5.429 -   makeAMeasHist( condWaitHistIdx,    "cond_wait",     50, 0, 100 ) \
   5.430 -   makeAMeasHist( condSignalHistIdx,  "cond_signal",   50, 0, 100 )
   5.431 -
   5.432 -#endif
   5.433 -
   5.434 -
   5.435 -#ifdef VCILK
   5.436 -
   5.437 -//VCilk
   5.438 -#define spawnHistIdx      0
   5.439 -#define syncHistIdx       1
   5.440 -
   5.441 -#define MakeTheMeasHists() \
   5.442 -   _VMSMasterEnv->measHistsInfo = \
   5.443 -              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
   5.444 -    makeAMeasHist( spawnHistIdx,      "Spawn",        50, 0, 200 ) \
   5.445 -    makeAMeasHist( syncHistIdx,       "Sync",         50, 0, 200 )
   5.446 -
   5.447 -
   5.448 -#endif
   5.449 -
   5.450 -#ifdef SSR
   5.451 -
   5.452 -//SSR
   5.453 -#define SendFromToHistIdx      0
   5.454 -#define SendOfTypeHistIdx      1
   5.455 -#define ReceiveFromToHistIdx   2
   5.456 -#define ReceiveOfTypeHistIdx   3
   5.457 -
   5.458 -#define MakeTheMeasHists() \
   5.459 -   _VMSMasterEnv->measHistsInfo = \
   5.460 -              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
   5.461 -    makeAMeasHist( SendFromToHistIdx,   "SendFromTo",    50, 0, 100 ) \
   5.462 -    makeAMeasHist( SendOfTypeHistIdx,   "SendOfType",    50, 0, 100 ) \
   5.463 -    makeAMeasHist( ReceiveFromToHistIdx,"ReceiveFromTo", 50, 0, 100 ) \
   5.464 -    makeAMeasHist( ReceiveOfTypeHistIdx,"ReceiveOfType", 50, 0, 100 )
   5.465 -
   5.466 -#endif
   5.467 -
   5.468 -//===========================================================================
   5.469 -//VPThread
   5.470 -
   5.471 -
   5.472 -#define Meas_startCreate \
   5.473 -    int32 startStamp, endStamp; \
   5.474 -    saveLowTimeStampCountInto( startStamp ); \
   5.475 -
   5.476 -#define Meas_endCreate \
   5.477 -    saveLowTimeStampCountInto( endStamp ); \
   5.478 -    addIntervalToHist( startStamp, endStamp, \
   5.479 -                                 _VMSMasterEnv->measHists[ createHistIdx ] );
   5.480 -
   5.481 -#define Meas_startMutexLock \
   5.482 -    int32 startStamp, endStamp; \
   5.483 -    saveLowTimeStampCountInto( startStamp ); \
   5.484 -
   5.485 -#define Meas_endMutexLock \
   5.486 -    saveLowTimeStampCountInto( endStamp ); \
   5.487 -    addIntervalToHist( startStamp, endStamp, \
   5.488 -                              _VMSMasterEnv->measHists[ mutexLockHistIdx ] );
   5.489 -
   5.490 -#define Meas_startMutexUnlock \
   5.491 -    int32 startStamp, endStamp; \
   5.492 -    saveLowTimeStampCountInto( startStamp ); \
   5.493 -
   5.494 -#define Meas_endMutexUnlock \
   5.495 -    saveLowTimeStampCountInto( endStamp ); \
   5.496 -    addIntervalToHist( startStamp, endStamp, \
   5.497 -                            _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] );
   5.498 -
   5.499 -#define Meas_startCondWait \
   5.500 -    int32 startStamp, endStamp; \
   5.501 -    saveLowTimeStampCountInto( startStamp ); \
   5.502 -
   5.503 -#define Meas_endCondWait \
   5.504 -    saveLowTimeStampCountInto( endStamp ); \
   5.505 -    addIntervalToHist( startStamp, endStamp, \
   5.506 -                               _VMSMasterEnv->measHists[ condWaitHistIdx ] );
   5.507 -
   5.508 -#define Meas_startCondSignal \
   5.509 -    int32 startStamp, endStamp; \
   5.510 -    saveLowTimeStampCountInto( startStamp ); \
   5.511 -
   5.512 -#define Meas_endCondSignal \
   5.513 -    saveLowTimeStampCountInto( endStamp ); \
   5.514 -    addIntervalToHist( startStamp, endStamp, \
   5.515 -                             _VMSMasterEnv->measHists[ condSignalHistIdx ] );
   5.516 -
   5.517 -//===========================================================================
   5.518 -// VCilk
   5.519 -#define Meas_startSpawn \
   5.520 -    int32 startStamp, endStamp; \
   5.521 -    saveLowTimeStampCountInto( startStamp ); \
   5.522 -
   5.523 -#define Meas_endSpawn \
   5.524 -    saveLowTimeStampCountInto( endStamp ); \
   5.525 -    addIntervalToHist( startStamp, endStamp, \
   5.526 -                             _VMSMasterEnv->measHists[ spawnHistIdx ] );
   5.527 -
   5.528 -#define Meas_startSync \
   5.529 -    int32 startStamp, endStamp; \
   5.530 -    saveLowTimeStampCountInto( startStamp ); \
   5.531 -
   5.532 -#define Meas_endSync \
   5.533 -    saveLowTimeStampCountInto( endStamp ); \
   5.534 -    addIntervalToHist( startStamp, endStamp, \
   5.535 -                             _VMSMasterEnv->measHists[ syncHistIdx ] );
   5.536 -
   5.537 -//===========================================================================
   5.538 -// SSR
   5.539 -#define Meas_startSendFromTo \
   5.540 -    int32 startStamp, endStamp; \
   5.541 -    saveLowTimeStampCountInto( startStamp ); \
   5.542 -
   5.543 -#define Meas_endSendFromTo \
   5.544 -    saveLowTimeStampCountInto( endStamp ); \
   5.545 -    addIntervalToHist( startStamp, endStamp, \
   5.546 -                             _VMSMasterEnv->measHists[ SendFromToHistIdx ] );
   5.547 -
   5.548 -#define Meas_startSendOfType \
   5.549 -    int32 startStamp, endStamp; \
   5.550 -    saveLowTimeStampCountInto( startStamp ); \
   5.551 -
   5.552 -#define Meas_endSendOfType \
   5.553 -    saveLowTimeStampCountInto( endStamp ); \
   5.554 -    addIntervalToHist( startStamp, endStamp, \
   5.555 -                             _VMSMasterEnv->measHists[ SendOfTypeHistIdx ] );
   5.556 -
   5.557 -#define Meas_startReceiveFromTo \
   5.558 -    int32 startStamp, endStamp; \
   5.559 -    saveLowTimeStampCountInto( startStamp ); \
   5.560 -
   5.561 -#define Meas_endReceiveFromTo \
   5.562 -    saveLowTimeStampCountInto( endStamp ); \
   5.563 -    addIntervalToHist( startStamp, endStamp, \
   5.564 -                             _VMSMasterEnv->measHists[ ReceiveFromToHistIdx ] );
   5.565 -
   5.566 -#define Meas_startReceiveOfType \
   5.567 -    int32 startStamp, endStamp; \
   5.568 -    saveLowTimeStampCountInto( startStamp ); \
   5.569 -
   5.570 -#define Meas_endReceiveOfType \
   5.571 -    saveLowTimeStampCountInto( endStamp ); \
   5.572 -    addIntervalToHist( startStamp, endStamp, \
   5.573 -                             _VMSMasterEnv->measHists[ReceiveOfTypeHistIdx ] );
   5.574 -
   5.575 -//=====
   5.576 -
   5.577 -#include "ProcrContext.h"
   5.578 -#include "probes.h"
   5.579 -#include "vutilities.h"
   5.580 -
   5.581 -#endif	/* _VMS_H */
   5.582 -
   5.583 +/*
   5.584 + *  Copyright 2009 OpenSourceStewardshipFoundation.org
   5.585 + *  Licensed under GNU General Public License version 2
   5.586 + *
   5.587 + * Author: seanhalle@yahoo.com
   5.588 + * 
   5.589 + */
   5.590 +
   5.591 +#ifndef _VMS_H
   5.592 +#define	_VMS_H
   5.593 +#define _GNU_SOURCE
   5.594 +
   5.595 +#include "VMS_primitive_data_types.h"
   5.596 +#include "Queue_impl/PrivateQueue.h"
   5.597 +#include "Histogram/Histogram.h"
   5.598 +#include "DynArray/DynArray.h"
   5.599 +#include "Hash_impl/PrivateHash.h"
   5.600 +#include "vmalloc.h"
   5.601 +
   5.602 +#include <pthread.h>
   5.603 +#include <sys/time.h>
   5.604 +
   5.605 +
   5.606 +//===============================  Debug  ===================================
   5.607 +//
   5.608 +//When SEQUENTIAL is defined, VMS does sequential exe in the main thread
   5.609 +// It still does co-routines and all the mechanisms are the same, it just
   5.610 +// has only a single thread and animates VPs one at a time
   5.611 +//#define SEQUENTIAL
   5.612 +
   5.613 +//#define USE_WORK_STEALING
   5.614 +
   5.615 +//turns on the probe-instrumentation in the application -- when not
   5.616 +// defined, the calls to the probe functions turn into comments
   5.617 +#define STATS__ENABLE_PROBES
   5.618 +//#define TURN_ON_DEBUG_PROBES
   5.619 +
   5.620 +//These defines turn types of bug messages on and off
   5.621 +// be sure debug messages are un-commented (next block of defines)
   5.622 +#define dbgAppFlow   TRUE /* Top level flow of application code -- general*/
   5.623 +#define dbgProbes    FALSE /* for issues inside probes themselves*/
   5.624 +#define dbgB2BMaster FALSE /* in coreloop, back to back master VPs*/
   5.625 +#define dbgRqstHdlr  FALSE /* in request handler code*/
   5.626 +
   5.627 +//Comment or un- the substitute half to turn on/off types of debug message
   5.628 +#define DEBUG(  bool, msg)         \
   5.629 +//  if( bool){ printf(msg); fflush(stdin);}
   5.630 +#define DEBUG1( bool, msg, param)  \
   5.631 +//   if(bool){printf(msg, param); fflush(stdin);}
   5.632 +#define DEBUG2( bool, msg, p1, p2) \
   5.633 +//   if(bool) {printf(msg, p1, p2); fflush(stdin);}
   5.634 +
   5.635 +#define ERROR(msg) printf(msg);
   5.636 +#define ERROR1(msg, param) printf(msg, param); 
   5.637 +#define ERROR2(msg, p1, p2) printf(msg, p1, p2);
   5.638 +
   5.639 +//===========================  STATS =======================
   5.640 +
   5.641 +   //when MEAS__TIME_STAMP_SUSP is defined, causes code to be inserted and
   5.642 +   // compiled-in that saves the low part of the time stamp count just before
   5.643 +   // suspending a processor and just after resuming that processorsrc/VPThread_lib/VMS/VMS.h:322: warning: previous declaration of ‘VMS__create_procr’ was here.  It is
   5.644 +   // saved into a field added to VirtProcr.  Have to sanity-check for
   5.645 +   // rollover of low portion into high portion.
   5.646 +//#define MEAS__TIME_STAMP_SUSP
   5.647 +//#define MEAS__TIME_MASTER
   5.648 +#define MEAS__TIME_PLUGIN
   5.649 +#define MEAS__TIME_MALLOC
   5.650 +//#define MEAS__TIME_MASTER_LOCK
   5.651 +#define MEAS__NUM_TIMES_TO_RUN 100000
   5.652 +
   5.653 +   //For code that calculates normalization-offset between TSC counts of
   5.654 +   // different cores.
   5.655 +#define NUM_TSC_ROUND_TRIPS 10
   5.656 +
   5.657 +
   5.658 +//=========================  Hardware related Constants =====================
   5.659 +   //This value is the number of hardware threads in the shared memory
   5.660 +   // machine
   5.661 +//#define NUM_CORES        8
   5.662 +
   5.663 +   // tradeoff amortizing master fixed overhead vs imbalance potential
   5.664 +   // when work-stealing, can make bigger, at risk of losing cache affinity
   5.665 +#define NUM_SCHED_SLOTS  5
   5.666 +
   5.667 +#define MIN_WORK_UNIT_CYCLES 20000
   5.668 +
   5.669 +#define MASTERLOCK_RETRIES 10000
   5.670 +
   5.671 +   // stack size in virtual processors created
   5.672 +#define VIRT_PROCR_STACK_SIZE 0x8000 /* 32K */
   5.673 +
   5.674 +   // memory for VMS__malloc
   5.675 +#define MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE 0x10000000 /* 256M */
   5.676 +
   5.677 +#define CACHE_LINE 64
   5.678 +#define PAGE_SIZE 4096
   5.679 +
   5.680 +
   5.681 +//==============================
   5.682 +
   5.683 +#define SUCCESS 0
   5.684 +
   5.685 +#define writeVMSQ     writePrivQ
   5.686 +#define readVMSQ      readPrivQ
   5.687 +#define makeVMSQ      makeVMSPrivQ
   5.688 +#define numInVMSQ     numInPrivQ
   5.689 +#define VMSQueueStruc PrivQueueStruc
   5.690 +
   5.691 +
   5.692 +
   5.693 +//===========================================================================
   5.694 +typedef unsigned long long TSCount;
   5.695 +
   5.696 +typedef struct _SchedSlot     SchedSlot;
   5.697 +typedef struct _VMSReqst      VMSReqst;
   5.698 +typedef struct _VirtProcr     VirtProcr;
   5.699 +typedef struct _IntervalProbe IntervalProbe;
   5.700 +typedef struct _GateStruc     GateStruc;
   5.701 +
   5.702 +
   5.703 +typedef VirtProcr * (*SlaveScheduler)  ( void *, int );   //semEnv, coreIdx
   5.704 +typedef void  (*RequestHandler)  ( VirtProcr *, void * ); //prWReqst, semEnv
   5.705 +typedef void  (*VirtProcrFnPtr)  ( void *, VirtProcr * ); //initData, animPr
   5.706 +typedef void    VirtProcrFn      ( void *, VirtProcr * ); //initData, animPr
   5.707 +typedef void  (*ResumePrFnPtr)   ( VirtProcr *, void * );
   5.708 +
   5.709 +
   5.710 +//============= Requests ===========
   5.711 +//
   5.712 +
   5.713 +enum VMSReqstType   //avoid starting enums at 0, for debug reasons
   5.714 + {
   5.715 +   semantic = 1,
   5.716 +   createReq,
   5.717 +   dissipate,
   5.718 +   VMSSemantic      //goes with VMSSemReqst below
   5.719 + };
   5.720 +
   5.721 +struct _VMSReqst
   5.722 + {
   5.723 +   enum VMSReqstType  reqType;//used for dissipate and in future for IO requests
   5.724 +   void              *semReqData;
   5.725 +
   5.726 +   VMSReqst *nextReqst;
   5.727 + };
   5.728 +//VMSReqst
   5.729 +
   5.730 +enum VMSSemReqstType   //These are equivalent to semantic requests, but for
   5.731 + {                     // VMS's services available directly to app, like OS
   5.732 +   createProbe = 1,    // and probe services -- like a VMS-wide built-in lang
   5.733 +   openFile,
   5.734 +   otherIO
   5.735 + };
   5.736 +
   5.737 +typedef struct
   5.738 + { enum VMSSemReqstType reqType;
   5.739 +   VirtProcr           *requestingPr;
   5.740 +   char                *nameStr;  //for create probe
   5.741 + }
   5.742 + VMSSemReq;
   5.743 +
   5.744 +
   5.745 +//====================  Core data structures  ===================
   5.746 +
   5.747 +struct _SchedSlot
   5.748 + {
   5.749 +   int         workIsDone;
   5.750 +   int         needsProcrAssigned;
   5.751 +   VirtProcr  *procrAssignedToSlot;
   5.752 + };
   5.753 +//SchedSlot
   5.754 +
   5.755 +/*WARNING: re-arranging this data structure could cause VP switching
   5.756 + *         assembly code to fail -- hard-codes offsets of fields
   5.757 + */
   5.758 +struct _VirtProcr
   5.759 + { int         procrID;  //for debugging -- count up each time create
   5.760 +   int         coreAnimatedBy;
   5.761 +   void       *startOfStack;
   5.762 +   void       *stackPtr;
   5.763 +   void       *framePtr;
   5.764 +   void       *nextInstrPt;
   5.765 +   
   5.766 +   void       *coreLoopStartPt;  //allows proto-runtime to be linked later
   5.767 +   void       *coreLoopFramePtr; //restore before jmp back to core loop
   5.768 +   void       *coreLoopStackPtr; //restore before jmp back to core loop
   5.769 +
   5.770 +   void       *initialData;
   5.771 +
   5.772 +   SchedSlot  *schedSlot;
   5.773 +   VMSReqst   *requests;
   5.774 +
   5.775 +   void       *semanticData; //this livesUSE_GNU here for the life of VP
   5.776 +   void       *dataRetFromReq;//values returned from plugin to VP go here
   5.777 +
   5.778 +      //=========== MEASUREMENT STUFF ==========
   5.779 +   #ifdef MEAS__TIME_STAMP_SUSP
   5.780 +   unsigned int preSuspTSCLow;
   5.781 +   unsigned int postSuspTSCLow;
   5.782 +   #endif
   5.783 +   #ifdef MEAS__TIME_MASTER /* in VirtProcr because multiple masterVPs*/
   5.784 +   unsigned int startMasterTSCLow;USE_GNU
   5.785 +   unsigned int endMasterTSCLow;
   5.786 +   #endif
   5.787 +      //========================================
   5.788 +   
   5.789 +   float64      createPtInSecs;  //have space but don't use on some configs
   5.790 + };
   5.791 +//VirtProcr
   5.792 +
   5.793 +
   5.794 +/*WARNING: re-arranging this data structure could cause VP-switching
   5.795 + *         assembly code to fail -- hard-codes offsets of fields
   5.796 + *         (because -O3 messes with things otherwise)
   5.797 + */
   5.798 +typedef struct
   5.799 + {
   5.800 +   SlaveScheduler   slaveScheduler;
   5.801 +   RequestHandler   requestHandler;
   5.802 +   
   5.803 +   SchedSlot     ***allSchedSlots;
   5.804 +   VMSQueueStruc **readyToAnimateQs;
   5.805 +   VirtProcr      **masterVPs;
   5.806 +
   5.807 +   void            *semanticEnv;
   5.808 +   void            *OSEventStruc;   //for future, when add I/O to BLIS
   5.809 +   MallocProlog    *freeListHead;
   5.810 +   int32            amtOfOutstandingMem; //total currently allocated
   5.811 +
   5.812 +   void            *coreLoopReturnPt;//addr to jump to to re-enter coreLoop
   5.813 +
   5.814 +   int32            setupComplete;
   5.815 +   volatile int32   masterLock;
   5.816 +
   5.817 +   int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
   5.818 +   GateStruc       *workStealingGates[ NUM_CORES ]; //concurrent work-steal
   5.819 +   int32            workStealingLock;
   5.820 +   
   5.821 +   int32            numProcrsCreated; //gives ordering to processor creation
   5.822 +
   5.823 +      //=========== MEASUREMENT STUFF =============
   5.824 +   IntervalProbe  **intervalProbes;
   5.825 +   PrivDynArrayInfo    *dynIntervalProbesInfo;
   5.826 +   HashTable       *probeNameHashTbl;
   5.827 +   int32            masterCreateProbeID;
   5.828 +   float64          createPtInSecs;
   5.829 +   Histogram      **measHists;
   5.830 +   PrivDynArrayInfo *measHistsInfo;
   5.831 +   #ifdef MEAS__TIME_PLUGIN
   5.832 +   Histogram       *reqHdlrLowTimeHist;
   5.833 +   Histogram       *reqHdlrHighTimeHist;
   5.834 +   #endif
   5.835 +   #ifdef MEAS__TIME_MALLOC
   5.836 +   Histogram       *mallocTimeHist;
   5.837 +   Histogram       *freeTimeHist;
   5.838 +   #endif
   5.839 +   #ifdef MEAS__TIME_MASTER_LOCK
   5.840 +   Histogram       *masterLockLowTimeHist;
   5.841 +   Histogram       *masterLockHighTimeHist;
   5.842 +   #endif
   5.843 + }
   5.844 +MasterEnv;
   5.845 +
   5.846 +//=========================  Extra Stuff Data Strucs  =======================
   5.847 +typedef struct
   5.848 + {
   5.849 +
   5.850 + }
   5.851 +VMSExcp;
   5.852 +
   5.853 +struct _GateStruc
   5.854 + {
   5.855 +   int32 gateClosed;
   5.856 +   int32 preGateProgress;
   5.857 +   int32 waitProgress;
   5.858 +   int32 exitProgress;
   5.859 + };
   5.860 +//GateStruc
   5.861 +
   5.862 +//=======================  OS Thread related  ===============================
   5.863 +
   5.864 +void * coreLoop( void *paramsIn );  //standard PThreads fn prototype
   5.865 +void * coreLoop_Seq( void *paramsIn );  //standard PThreads fn prototype
   5.866 +void masterLoop( void *initData, VirtProcr *masterPr );
   5.867 +
   5.868 +
   5.869 +typedef struct
   5.870 + {
   5.871 +   void           *endThdPt;
   5.872 +   unsigned int    coreNum;
   5.873 + }
   5.874 +ThdParams;
   5.875 +
   5.876 +pthread_t       coreLoopThdHandles[ NUM_CORES ];  //pthread's virt-procr state
   5.877 +ThdParams      *coreLoopThdParams [ NUM_CORES ];
   5.878 +pthread_mutex_t suspendLock;
   5.879 +pthread_cond_t  suspend_cond;
   5.880 +
   5.881 +
   5.882 +
   5.883 +//=====================  Global Vars ===================
   5.884 +
   5.885 +volatile MasterEnv      *_VMSMasterEnv;
   5.886 +
   5.887 +
   5.888 +
   5.889 +
   5.890 +//===========================  Function Prototypes  =========================
   5.891 +
   5.892 +
   5.893 +//========== Setup and shutdown ==========
   5.894 +void
   5.895 +VMS__init();
   5.896 +
   5.897 +void
   5.898 +VMS__init_Seq();
   5.899 +
   5.900 +void
   5.901 +VMS__start_the_work_then_wait_until_done();
   5.902 +
   5.903 +void
   5.904 +VMS__start_the_work_then_wait_until_done_Seq();
   5.905 +
   5.906 +inline VirtProcr *
   5.907 +VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
   5.908 +
   5.909 +void
   5.910 +VMS__dissipate_procr( VirtProcr *procrToDissipate );
   5.911 +
   5.912 +   //Use this to create processor inside entry point & other places outside
   5.913 +   // the VMS system boundary (IE, not run in slave nor Master)
   5.914 +VirtProcr *
   5.915 +VMS_ext__create_procr( VirtProcrFnPtr fnPtr, void *initialData );
   5.916 +
   5.917 +void
   5.918 +VMS_ext__dissipate_procr( VirtProcr *procrToDissipate );
   5.919 +
   5.920 +void
   5.921 +VMS__throw_exception( char *msgStr, VirtProcr *reqstPr, VMSExcp *excpData );
   5.922 +
   5.923 +void
   5.924 +VMS__shutdown();
   5.925 +
   5.926 +void
   5.927 +VMS__cleanup_at_end_of_shutdown();
   5.928 +
   5.929 +void *
   5.930 +VMS__give_sem_env_for( VirtProcr *animPr );
   5.931 +
   5.932 +
   5.933 +//==============  Request Related  ===============
   5.934 +
   5.935 +void
   5.936 +VMS__suspend_procr( VirtProcr *callingPr );
   5.937 +
   5.938 +inline void
   5.939 +VMS__add_sem_request_in_mallocd_VMSReqst( void *semReqData, VirtProcr *callingPr );
   5.940 +
   5.941 +inline void
   5.942 +VMS__send_sem_request( void *semReqData, VirtProcr *callingPr );
   5.943 +
   5.944 +void
   5.945 +VMS__send_create_procr_req( void *semReqData, VirtProcr *reqstingPr );
   5.946 +
   5.947 +void inline
   5.948 +VMS__send_dissipate_req( VirtProcr *prToDissipate );
   5.949 +
   5.950 +inline void
   5.951 +VMS__send_VMSSem_request( void *semReqData, VirtProcr *callingPr );
   5.952 +
   5.953 +VMSReqst *
   5.954 +VMS__take_next_request_out_of( VirtProcr *procrWithReq );
   5.955 +
   5.956 +inline void *
   5.957 +VMS__take_sem_reqst_from( VMSReqst *req );
   5.958 +
   5.959 +void inline
   5.960 +VMS__handle_VMSSemReq( VMSReqst *req, VirtProcr *requestingPr, void *semEnv,
   5.961 +                       ResumePrFnPtr resumePrFnPtr );
   5.962 +
   5.963 +//======================== STATS ======================
   5.964 +
   5.965 +//===== RDTSC wrapper ===== //Also runs with x86_64 code
   5.966 +
   5.967 +#define saveTimeStampCountInto(low, high) \
   5.968 +   asm volatile("RDTSC;                   \
   5.969 +                 movl %%eax, %0;          \
   5.970 +                 movl %%edx, %1;"         \
   5.971 +   /* outputs */ : "=m" (low), "=m" (high)\
   5.972 +   /* inputs  */ :                        \
   5.973 +   /* clobber */ : "%eax", "%edx"         \
   5.974 +                );
   5.975 +
   5.976 +#define saveLowTimeStampCountInto(low)    \
   5.977 +   asm volatile("RDTSC;                   \
   5.978 +                 movl %%eax, %0;"         \
   5.979 +   /* outputs */ : "=m" (low)             \
   5.980 +   /* inputs  */ :                        \
   5.981 +   /* clobber */ : "%eax", "%edx"         \
   5.982 +                );
   5.983 +
   5.984 +//====================
   5.985 +#define makeAMeasHist( idx, name, numBins, startVal, binWidth ) \
   5.986 +   makeHighestDynArrayIndexBeAtLeast( _VMSMasterEnv->measHistsInfo, idx ); \
   5.987 +   _VMSMasterEnv->measHists[idx] =  \
   5.988 +                       makeFixedBinHist( numBins, startVal, binWidth, name );
   5.989 +
   5.990 +
   5.991 +#define MEAS__SUB_CREATE  /*turn on/off subtraction of create from plugin*/
   5.992 +
   5.993 +#ifdef VPTHREAD
   5.994 +
   5.995 +//VPThread
   5.996 +#define createHistIdx      0
   5.997 +#define mutexLockHistIdx   1
   5.998 +#define mutexUnlockHistIdx 2
   5.999 +#define condWaitHistIdx    3
  5.1000 +#define condSignalHistIdx  4
  5.1001 +
  5.1002 +#define MakeTheMeasHists() \
  5.1003 +   _VMSMasterEnv->measHistsInfo = \
  5.1004 +              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
  5.1005 +   makeAMeasHist( createHistIdx,      "create",        250, 0, 100 ) \
  5.1006 +   makeAMeasHist( mutexLockHistIdx,   "mutex_lock",    50, 0, 100 ) \
  5.1007 +   makeAMeasHist( mutexUnlockHistIdx, "mutex_unlock",  50, 0, 100 ) \
  5.1008 +   makeAMeasHist( condWaitHistIdx,    "cond_wait",     50, 0, 100 ) \
  5.1009 +   makeAMeasHist( condSignalHistIdx,  "cond_signal",   50, 0, 100 )
  5.1010 +
  5.1011 +#endif
  5.1012 +
  5.1013 +
  5.1014 +#ifdef VCILK
  5.1015 +
  5.1016 +//VCilk
  5.1017 +#define spawnHistIdx      0
  5.1018 +#define syncHistIdx       1
  5.1019 +
  5.1020 +#define MakeTheMeasHists() \
  5.1021 +   _VMSMasterEnv->measHistsInfo = \
  5.1022 +              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
  5.1023 +    makeAMeasHist( spawnHistIdx,      "Spawn",        50, 0, 200 ) \
  5.1024 +    makeAMeasHist( syncHistIdx,       "Sync",         50, 0, 200 )
  5.1025 +
  5.1026 +
  5.1027 +#endif
  5.1028 +
  5.1029 +#ifdef SSR
  5.1030 +
  5.1031 +//SSR
  5.1032 +#define SendFromToHistIdx      0
  5.1033 +#define SendOfTypeHistIdx      1
  5.1034 +#define ReceiveFromToHistIdx   2
  5.1035 +#define ReceiveOfTypeHistIdx   3
  5.1036 +
  5.1037 +#define MakeTheMeasHists() \
  5.1038 +   _VMSMasterEnv->measHistsInfo = \
  5.1039 +              makePrivDynArrayOfSize( (void***)&(_VMSMasterEnv->measHists), 200); \
  5.1040 +    makeAMeasHist( SendFromToHistIdx,   "SendFromTo",    50, 0, 100 ) \
  5.1041 +    makeAMeasHist( SendOfTypeHistIdx,   "SendOfType",    50, 0, 100 ) \
  5.1042 +    makeAMeasHist( ReceiveFromToHistIdx,"ReceiveFromTo", 50, 0, 100 ) \
  5.1043 +    makeAMeasHist( ReceiveOfTypeHistIdx,"ReceiveOfType", 50, 0, 100 )
  5.1044 +
  5.1045 +#endif
  5.1046 +
  5.1047 +//===========================================================================
  5.1048 +//VPThread
  5.1049 +
  5.1050 +
  5.1051 +#define Meas_startCreate \
  5.1052 +    int32 startStamp, endStamp; \
  5.1053 +    saveLowTimeStampCountInto( startStamp ); \
  5.1054 +
  5.1055 +#define Meas_endCreate \
  5.1056 +    saveLowTimeStampCountInto( endStamp ); \
  5.1057 +    addIntervalToHist( startStamp, endStamp, \
  5.1058 +                                 _VMSMasterEnv->measHists[ createHistIdx ] );
  5.1059 +
  5.1060 +#define Meas_startMutexLock \
  5.1061 +    int32 startStamp, endStamp; \
  5.1062 +    saveLowTimeStampCountInto( startStamp ); \
  5.1063 +
  5.1064 +#define Meas_endMutexLock \
  5.1065 +    saveLowTimeStampCountInto( endStamp ); \
  5.1066 +    addIntervalToHist( startStamp, endStamp, \
  5.1067 +                              _VMSMasterEnv->measHists[ mutexLockHistIdx ] );
  5.1068 +
  5.1069 +#define Meas_startMutexUnlock \
  5.1070 +    int32 startStamp, endStamp; \
  5.1071 +    saveLowTimeStampCountInto( startStamp ); \
  5.1072 +
  5.1073 +#define Meas_endMutexUnlock \
  5.1074 +    saveLowTimeStampCountInto( endStamp ); \
  5.1075 +    addIntervalToHist( startStamp, endStamp, \
  5.1076 +                            _VMSMasterEnv->measHists[ mutexUnlockHistIdx ] );
  5.1077 +
  5.1078 +#define Meas_startCondWait \
  5.1079 +    int32 startStamp, endStamp; \
  5.1080 +    saveLowTimeStampCountInto( startStamp ); \
  5.1081 +
  5.1082 +#define Meas_endCondWait \
  5.1083 +    saveLowTimeStampCountInto( endStamp ); \
  5.1084 +    addIntervalToHist( startStamp, endStamp, \
  5.1085 +                               _VMSMasterEnv->measHists[ condWaitHistIdx ] );
  5.1086 +
  5.1087 +#define Meas_startCondSignal \
  5.1088 +    int32 startStamp, endStamp; \
  5.1089 +    saveLowTimeStampCountInto( startStamp ); \
  5.1090 +
  5.1091 +#define Meas_endCondSignal \
  5.1092 +    saveLowTimeStampCountInto( endStamp ); \
  5.1093 +    addIntervalToHist( startStamp, endStamp, \
  5.1094 +                             _VMSMasterEnv->measHists[ condSignalHistIdx ] );
  5.1095 +
  5.1096 +//===========================================================================
  5.1097 +// VCilk
  5.1098 +#define Meas_startSpawn \
  5.1099 +    int32 startStamp, endStamp; \
  5.1100 +    saveLowTimeStampCountInto( startStamp ); \
  5.1101 +
  5.1102 +#define Meas_endSpawn \
  5.1103 +    saveLowTimeStampCountInto( endStamp ); \
  5.1104 +    addIntervalToHist( startStamp, endStamp, \
  5.1105 +                             _VMSMasterEnv->measHists[ spawnHistIdx ] );
  5.1106 +
  5.1107 +#define Meas_startSync \
  5.1108 +    int32 startStamp, endStamp; \
  5.1109 +    saveLowTimeStampCountInto( startStamp ); \
  5.1110 +
  5.1111 +#define Meas_endSync \
  5.1112 +    saveLowTimeStampCountInto( endStamp ); \
  5.1113 +    addIntervalToHist( startStamp, endStamp, \
  5.1114 +                             _VMSMasterEnv->measHists[ syncHistIdx ] );
  5.1115 +
  5.1116 +//===========================================================================
  5.1117 +// SSR
  5.1118 +#define Meas_startSendFromTo \
  5.1119 +    int32 startStamp, endStamp; \
  5.1120 +    saveLowTimeStampCountInto( startStamp ); \
  5.1121 +
  5.1122 +#define Meas_endSendFromTo \
  5.1123 +    saveLowTimeStampCountInto( endStamp ); \
  5.1124 +    addIntervalToHist( startStamp, endStamp, \
  5.1125 +                             _VMSMasterEnv->measHists[ SendFromToHistIdx ] );
  5.1126 +
  5.1127 +#define Meas_startSendOfType \
  5.1128 +    int32 startStamp, endStamp; \
  5.1129 +    saveLowTimeStampCountInto( startStamp ); \
  5.1130 +
  5.1131 +#define Meas_endSendOfType \
  5.1132 +    saveLowTimeStampCountInto( endStamp ); \
  5.1133 +    addIntervalToHist( startStamp, endStamp, \
  5.1134 +                             _VMSMasterEnv->measHists[ SendOfTypeHistIdx ] );
  5.1135 +
  5.1136 +#define Meas_startReceiveFromTo \
  5.1137 +    int32 startStamp, endStamp; \
  5.1138 +    saveLowTimeStampCountInto( startStamp ); \
  5.1139 +
  5.1140 +#define Meas_endReceiveFromTo \
  5.1141 +    saveLowTimeStampCountInto( endStamp ); \
  5.1142 +    addIntervalToHist( startStamp, endStamp, \
  5.1143 +                             _VMSMasterEnv->measHists[ ReceiveFromToHistIdx ] );
  5.1144 +
  5.1145 +#define Meas_startReceiveOfType \
  5.1146 +    int32 startStamp, endStamp; \
  5.1147 +    saveLowTimeStampCountInto( startStamp ); \
  5.1148 +
  5.1149 +#define Meas_endReceiveOfType \
  5.1150 +    saveLowTimeStampCountInto( endStamp ); \
  5.1151 +    addIntervalToHist( startStamp, endStamp, \
  5.1152 +                             _VMSMasterEnv->measHists[ReceiveOfTypeHistIdx ] );
  5.1153 +
  5.1154 +//=====
  5.1155 +
  5.1156 +#include "ProcrContext.h"
  5.1157 +#include "probes.h"
  5.1158 +#include "vutilities.h"
  5.1159 +
  5.1160 +#endif	/* _VMS_H */
  5.1161 +

     6.1 --- a/VMS_primitive_data_types.h	Thu Oct 06 16:24:17 2011 +0200
     6.2 +++ b/VMS_primitive_data_types.h	Wed Jan 04 16:10:11 2012 -0800
     6.3 @@ -1,53 +1,53 @@
     6.4 -/*
     6.5 - *  Copyright 2009 OpenSourceStewardshipFoundation.org
     6.6 - *  Licensed under GNU General Public License version 2
     6.7 - *  
     6.8 - * Author: seanhalle@yahoo.com
     6.9 - *  
    6.10 -
    6.11 - */
    6.12 -
    6.13 -#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H
    6.14 -#define	_BLIS_PRIMITIVE_DATA_TYPES_H
    6.15 -
    6.16 -
    6.17 -/*For portability, need primitive data types that have a well defined
    6.18 - * size, and well-defined layout into bytes
    6.19 - *To do this, provide BLIS standard aliases for all primitive data types
    6.20 - *These aliases must be used in all BLIS functions instead of the ANSI types
    6.21 - *
    6.22 - *These definitions will be replaced inside each specialization module
    6.23 - * according to the compiler used in that module and the hardware being
    6.24 - * specialized to.
    6.25 - */
    6.26 -/*
    6.27 -#define    int8  char
    6.28 -#define   uint8  char
    6.29 -#define    int16 short
    6.30 -#define   uint16 unsigned short
    6.31 -#define    int32 int
    6.32 -#define   uint32 unsigned int
    6.33 -#define    int64 long long
    6.34 -#define   uint64 unsigned long long
    6.35 -#define  float32 float
    6.36 -#define  float64 double
    6.37 -*/
    6.38 -typedef char               bool8;
    6.39 -typedef char               int8;
    6.40 -typedef char               uint8;
    6.41 -typedef short              int16;
    6.42 -typedef unsigned short     uint16;
    6.43 -typedef int                int32;
    6.44 -typedef unsigned int       uint32;
    6.45 -typedef long long          int64;
    6.46 -typedef unsigned long long uint64;
    6.47 -typedef float              float32;
    6.48 -typedef double             float64;
    6.49 -//typedef double double      float128;
    6.50 -#define float128 double double
    6.51 -
    6.52 -#define TRUE  1
    6.53 -#define FALSE 0
    6.54 -
    6.55 -#endif	/* _BLIS_PRIMITIVE_DATA_TYPES_H */
    6.56 -
    6.57 +/*
    6.58 + *  Copyright 2009 OpenSourceStewardshipFoundation.org
    6.59 + *  Licensed under GNU General Public License version 2
    6.60 + *  
    6.61 + * Author: seanhalle@yahoo.com
    6.62 + *  
    6.63 +
    6.64 + */
    6.65 +
    6.66 +#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H
    6.67 +#define	_BLIS_PRIMITIVE_DATA_TYPES_H
    6.68 +
    6.69 +
    6.70 +/*For portability, need primitive data types that have a well defined
    6.71 + * size, and well-defined layout into bytes
    6.72 + *To do this, provide BLIS standard aliases for all primitive data types
    6.73 + *These aliases must be used in all BLIS functions instead of the ANSI types
    6.74 + *
    6.75 + *These definitions will be replaced inside each specialization module
    6.76 + * according to the compiler used in that module and the hardware being
    6.77 + * specialized to.
    6.78 + */
    6.79 +/*
    6.80 +#define    int8  char
    6.81 +#define   uint8  char
    6.82 +#define    int16 short
    6.83 +#define   uint16 unsigned short
    6.84 +#define    int32 int
    6.85 +#define   uint32 unsigned int
    6.86 +#define    int64 long long
    6.87 +#define   uint64 unsigned long long
    6.88 +#define  float32 float
    6.89 +#define  float64 double
    6.90 +*/
    6.91 +typedef char               bool8;
    6.92 +typedef char               int8;
    6.93 +typedef char               uint8;
    6.94 +typedef short              int16;
    6.95 +typedef unsigned short     uint16;
    6.96 +typedef int                int32;
    6.97 +typedef unsigned int       uint32;
    6.98 +typedef long long          int64;
    6.99 +typedef unsigned long long uint64;
   6.100 +typedef float              float32;
   6.101 +typedef double             float64;
   6.102 +//typedef double double      float128;
   6.103 +#define float128 double double
   6.104 +
   6.105 +#define TRUE  1
   6.106 +#define FALSE 0
   6.107 +
   6.108 +#endif	/* _BLIS_PRIMITIVE_DATA_TYPES_H */
   6.109 +

     7.1 --- a/probes.h	Thu Oct 06 16:24:17 2011 +0200
     7.2 +++ b/probes.h	Wed Jan 04 16:10:11 2012 -0800
     7.3 @@ -1,195 +1,195 @@
     7.4 -/*
     7.5 - *  Copyright 2009 OpenSourceStewardshipFoundation.org
     7.6 - *  Licensed under GNU General Public License version 2
     7.7 - *
     7.8 - * Author: seanhalle@yahoo.com
     7.9 - * 
    7.10 - */
    7.11 -
    7.12 -#ifndef _PROBES_H
    7.13 -#define	_PROBES_H
    7.14 -#define _GNU_SOURCE
    7.15 -
    7.16 -#include "VMS_primitive_data_types.h"
    7.17 -
    7.18 -#include <sys/time.h>
    7.19 -
    7.20 -
    7.21 -   //when STATS__TURN_ON_PROBES is defined allows using probes to measure
    7.22 -   // time intervals.  The probes are macros that only compile to something
    7.23 -   // when STATS__TURN_ON_PROBES is defined.  The probes are saved in the
    7.24 -   // master env -- but only when this is defined.
    7.25 -   //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday
    7.26 -#define STATS__TURN_ON_PROBES
    7.27 -//#define STATS__USE_TSC_PROBES
    7.28 -#define STATS__USE_DBL_PROBES
    7.29 -
    7.30 -//typedef struct _IntervalProbe IntervalProbe; //in VMS.h
    7.31 -
    7.32 -struct _IntervalProbe
    7.33 - {
    7.34 -   char           *nameStr;
    7.35 -   int32           probeID;
    7.36 -
    7.37 -   int32           schedChoiceWasRecorded;
    7.38 -   int32           coreNum;
    7.39 -   int32           procrID;
    7.40 -   float64         procrCreateSecs;
    7.41 -
    7.42 -   #ifdef STATS__USE_TSC_PROBES
    7.43 -   TSCount    startStamp;
    7.44 -   TSCount    endStamp;
    7.45 -   #else
    7.46 -   struct timeval  startStamp;
    7.47 -   struct timeval  endStamp;
    7.48 -   #endif
    7.49 -   float64         startSecs;
    7.50 -   float64         endSecs;
    7.51 -   float64         interval;
    7.52 -   DblHist        *hist;//if NULL, then is single interval probe
    7.53 - };
    7.54 -
    7.55 -
    7.56 -//============================= Statistics ==================================
    7.57 -
    7.58 -   //Frequency of TS counts
    7.59 -   //TODO: change freq for each machine
    7.60 -#define TSCOUNT_FREQ 3180000000
    7.61 -
    7.62 -inline TSCount getTSCount();
    7.63 -
    7.64 -
    7.65 -//======================== Probes =============================
    7.66 -//
    7.67 -// Use macros to allow turning probes off with a #define switch
    7.68 -#ifdef STATS__ENABLE_PROBES
    7.69 -int32
    7.70 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr);
    7.71 -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \
    7.72 -        VMS_impl__record_time_point_in_new_probe( nameStr, animPr )
    7.73 -
    7.74 -int32
    7.75 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr );
    7.76 -#define VMS_ext__record_time_point_into_new_probe( nameStr ) \
    7.77 -        VMS_ext_impl__record_time_point_into_new_probe( nameStr )
    7.78 -
    7.79 -
    7.80 -int32
    7.81 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr );
    7.82 -#define VMS__create_single_interval_probe( nameStr, animPr ) \
    7.83 -        VMS_impl__create_single_interval_probe( nameStr, animPr )
    7.84 -
    7.85 -
    7.86 -int32
    7.87 -VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
    7.88 -               float64 binWidth, char    *nameStr, VirtProcr *animPr );
    7.89 -#define VMS__create_histogram_probe(      numBins, startValue,              \
    7.90 -                                          binWidth, nameStr, animPr )       \
    7.91 -        VMS_impl__create_histogram_probe( numBins, startValue,              \
    7.92 -                                          binWidth, nameStr, animPr )
    7.93 -void
    7.94 -VMS_impl__free_probe( IntervalProbe *probe );
    7.95 -#define VMS__free_probe( probe ) \
    7.96 -        VMS_impl__free_probe( probe )
    7.97 -
    7.98 -void
    7.99 -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr );
   7.100 -#define VMS__index_probe_by_its_name( probeID, animPr ) \
   7.101 -        VMS_impl__index_probe_by_its_name( probeID, animPr )
   7.102 -
   7.103 -IntervalProbe *
   7.104 -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr );
   7.105 -#define VMS__get_probe_by_name( probeID, animPr ) \
   7.106 -        VMS_impl__get_probe_by_name( probeName, animPr )
   7.107 -
   7.108 -void
   7.109 -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr );
   7.110 -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \
   7.111 -        VMS_impl__record_sched_choice_into_probe( probeID, animPr )
   7.112 -
   7.113 -void
   7.114 -VMS_impl__record_interval_start_in_probe( int32 probeID );
   7.115 -#define VMS__record_interval_start_in_probe( probeID ) \
   7.116 -        VMS_impl__record_interval_start_in_probe( probeID )
   7.117 -
   7.118 -void
   7.119 -VMS_impl__record_interval_end_in_probe( int32 probeID );
   7.120 -#define VMS__record_interval_end_in_probe( probeID ) \
   7.121 -        VMS_impl__record_interval_end_in_probe( probeID )
   7.122 -
   7.123 -void
   7.124 -VMS_impl__print_stats_of_probe( int32 probeID );
   7.125 -#define VMS__print_stats_of_probe( probeID ) \
   7.126 -        VMS_impl__print_stats_of_probe( probeID )
   7.127 -
   7.128 -void
   7.129 -VMS_impl__print_stats_of_all_probes();
   7.130 -#define VMS__print_stats_of_all_probes() \
   7.131 -        VMS_impl__print_stats_of_all_probes()
   7.132 -
   7.133 -
   7.134 -#else
   7.135 -int32
   7.136 -VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr);
   7.137 -#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \
   7.138 -       0 /* do nothing */
   7.139 -
   7.140 -int32
   7.141 -VMS_ext_impl__record_time_point_into_new_probe( char *nameStr );
   7.142 -#define VMS_ext__record_time_point_into_new_probe( nameStr ) \
   7.143 -       0 /* do nothing */
   7.144 -
   7.145 -
   7.146 -int32
   7.147 -VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr );
   7.148 -#define VMS__create_single_interval_probe( nameStr, animPr ) \
   7.149 -       0 /* do nothing */
   7.150 -
   7.151 -
   7.152 -int32
   7.153 -VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
   7.154 -               float64 binWidth, char    *nameStr, VirtProcr *animPr );
   7.155 -#define VMS__create_histogram_probe(      numBins, startValue,              \
   7.156 -                                          binWidth, nameStr, animPr )       \
   7.157 -       0 /* do nothing */
   7.158 -
   7.159 -void
   7.160 -VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr );
   7.161 -#define VMS__index_probe_by_its_name( probeID, animPr ) \
   7.162 -        /* do nothing */
   7.163 -
   7.164 -IntervalProbe *
   7.165 -VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr );
   7.166 -#define VMS__get_probe_by_name( probeID, animPr ) \
   7.167 -       NULL /* do nothing */
   7.168 -
   7.169 -void
   7.170 -VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr );
   7.171 -#define VMS__record_sched_choice_into_probe( probeID, animPr ) \
   7.172 -        /* do nothing */
   7.173 -
   7.174 -void
   7.175 -VMS_impl__record_interval_start_in_probe( int32 probeID );
   7.176 -#define VMS__record_interval_start_in_probe( probeID ) \
   7.177 -        /* do nothing */
   7.178 -
   7.179 -void
   7.180 -VMS_impl__record_interval_end_in_probe( int32 probeID );
   7.181 -#define VMS__record_interval_end_in_probe( probeID ) \
   7.182 -        /* do nothing */
   7.183 -
   7.184 -inline void doNothing();
   7.185 -void
   7.186 -VMS_impl__print_stats_of_probe( int32 probeID );
   7.187 -#define VMS__print_stats_of_probe( probeID ) \
   7.188 -        doNothing/* do nothing */
   7.189 -
   7.190 -void
   7.191 -VMS_impl__print_stats_of_all_probes();
   7.192 -#define VMS__print_stats_of_all_probes \
   7.193 -        doNothing/* do nothing */
   7.194 -
   7.195 -#endif   /* defined STATS__ENABLE_PROBES */
   7.196 -
   7.197 -#endif	/* _PROBES_H */
   7.198 -
   7.199 +/*
   7.200 + *  Copyright 2009 OpenSourceStewardshipFoundation.org
   7.201 + *  Licensed under GNU General Public License version 2
   7.202 + *
   7.203 + * Author: seanhalle@yahoo.com
   7.204 + * 
   7.205 + */
   7.206 +
   7.207 +#ifndef _PROBES_H
   7.208 +#define	_PROBES_H
   7.209 +#define _GNU_SOURCE
   7.210 +
   7.211 +#include "VMS_primitive_data_types.h"
   7.212 +
   7.213 +#include <sys/time.h>
   7.214 +
   7.215 +
   7.216 +   //when STATS__TURN_ON_PROBES is defined allows using probes to measure
   7.217 +   // time intervals.  The probes are macros that only compile to something
   7.218 +   // when STATS__TURN_ON_PROBES is defined.  The probes are saved in the
   7.219 +   // master env -- but only when this is defined.
   7.220 +   //The TSC probes use RDTSC instr, can be unreliable, Dbl uses gettimeofday
   7.221 +#define STATS__TURN_ON_PROBES
   7.222 +//#define STATS__USE_TSC_PROBES
   7.223 +#define STATS__USE_DBL_PROBES
   7.224 +
   7.225 +//typedef struct _IntervalProbe IntervalProbe; //in VMS.h
   7.226 +
   7.227 +struct _IntervalProbe
   7.228 + {
   7.229 +   char           *nameStr;
   7.230 +   int32           probeID;
   7.231 +
   7.232 +   int32           schedChoiceWasRecorded;
   7.233 +   int32           coreNum;
   7.234 +   int32           procrID;
   7.235 +   float64         procrCreateSecs;
   7.236 +
   7.237 +   #ifdef STATS__USE_TSC_PROBES
   7.238 +   TSCount    startStamp;
   7.239 +   TSCount    endStamp;
   7.240 +   #else
   7.241 +   struct timeval  startStamp;
   7.242 +   struct timeval  endStamp;
   7.243 +   #endif
   7.244 +   float64         startSecs;
   7.245 +   float64         endSecs;
   7.246 +   float64         interval;
   7.247 +   DblHist        *hist;//if NULL, then is single interval probe
   7.248 + };
   7.249 +
   7.250 +
   7.251 +//============================= Statistics ==================================
   7.252 +
   7.253 +   //Frequency of TS counts
   7.254 +   //TODO: change freq for each machine
   7.255 +#define TSCOUNT_FREQ 3180000000
   7.256 +
   7.257 +inline TSCount getTSCount();
   7.258 +
   7.259 +
   7.260 +//======================== Probes =============================
   7.261 +//
   7.262 +// Use macros to allow turning probes off with a #define switch
   7.263 +#ifdef STATS__ENABLE_PROBES
   7.264 +int32
   7.265 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr);
   7.266 +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \
   7.267 +        VMS_impl__record_time_point_in_new_probe( nameStr, animPr )
   7.268 +
   7.269 +int32
   7.270 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr );
   7.271 +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \
   7.272 +        VMS_ext_impl__record_time_point_into_new_probe( nameStr )
   7.273 +
   7.274 +
   7.275 +int32
   7.276 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr );
   7.277 +#define VMS__create_single_interval_probe( nameStr, animPr ) \
   7.278 +        VMS_impl__create_single_interval_probe( nameStr, animPr )
   7.279 +
   7.280 +
   7.281 +int32
   7.282 +VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
   7.283 +               float64 binWidth, char    *nameStr, VirtProcr *animPr );
   7.284 +#define VMS__create_histogram_probe(      numBins, startValue,              \
   7.285 +                                          binWidth, nameStr, animPr )       \
   7.286 +        VMS_impl__create_histogram_probe( numBins, startValue,              \
   7.287 +                                          binWidth, nameStr, animPr )
   7.288 +void
   7.289 +VMS_impl__free_probe( IntervalProbe *probe );
   7.290 +#define VMS__free_probe( probe ) \
   7.291 +        VMS_impl__free_probe( probe )
   7.292 +
   7.293 +void
   7.294 +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr );
   7.295 +#define VMS__index_probe_by_its_name( probeID, animPr ) \
   7.296 +        VMS_impl__index_probe_by_its_name( probeID, animPr )
   7.297 +
   7.298 +IntervalProbe *
   7.299 +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr );
   7.300 +#define VMS__get_probe_by_name( probeID, animPr ) \
   7.301 +        VMS_impl__get_probe_by_name( probeName, animPr )
   7.302 +
   7.303 +void
   7.304 +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr );
   7.305 +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \
   7.306 +        VMS_impl__record_sched_choice_into_probe( probeID, animPr )
   7.307 +
   7.308 +void
   7.309 +VMS_impl__record_interval_start_in_probe( int32 probeID );
   7.310 +#define VMS__record_interval_start_in_probe( probeID ) \
   7.311 +        VMS_impl__record_interval_start_in_probe( probeID )
   7.312 +
   7.313 +void
   7.314 +VMS_impl__record_interval_end_in_probe( int32 probeID );
   7.315 +#define VMS__record_interval_end_in_probe( probeID ) \
   7.316 +        VMS_impl__record_interval_end_in_probe( probeID )
   7.317 +
   7.318 +void
   7.319 +VMS_impl__print_stats_of_probe( int32 probeID );
   7.320 +#define VMS__print_stats_of_probe( probeID ) \
   7.321 +        VMS_impl__print_stats_of_probe( probeID )
   7.322 +
   7.323 +void
   7.324 +VMS_impl__print_stats_of_all_probes();
   7.325 +#define VMS__print_stats_of_all_probes() \
   7.326 +        VMS_impl__print_stats_of_all_probes()
   7.327 +
   7.328 +
   7.329 +#else
   7.330 +int32
   7.331 +VMS_impl__record_time_point_into_new_probe( char *nameStr,VirtProcr *animPr);
   7.332 +#define VMS__record_time_point_into_new_probe( nameStr, animPr ) \
   7.333 +       0 /* do nothing */
   7.334 +
   7.335 +int32
   7.336 +VMS_ext_impl__record_time_point_into_new_probe( char *nameStr );
   7.337 +#define VMS_ext__record_time_point_into_new_probe( nameStr ) \
   7.338 +       0 /* do nothing */
   7.339 +
   7.340 +
   7.341 +int32
   7.342 +VMS_impl__create_single_interval_probe( char *nameStr, VirtProcr *animPr );
   7.343 +#define VMS__create_single_interval_probe( nameStr, animPr ) \
   7.344 +       0 /* do nothing */
   7.345 +
   7.346 +
   7.347 +int32
   7.348 +VMS_impl__create_histogram_probe( int32   numBins, float64    startValue,
   7.349 +               float64 binWidth, char    *nameStr, VirtProcr *animPr );
   7.350 +#define VMS__create_histogram_probe(      numBins, startValue,              \
   7.351 +                                          binWidth, nameStr, animPr )       \
   7.352 +       0 /* do nothing */
   7.353 +
   7.354 +void
   7.355 +VMS_impl__index_probe_by_its_name( int32 probeID, VirtProcr *animPr );
   7.356 +#define VMS__index_probe_by_its_name( probeID, animPr ) \
   7.357 +        /* do nothing */
   7.358 +
   7.359 +IntervalProbe *
   7.360 +VMS_impl__get_probe_by_name( char *probeName, VirtProcr *animPr );
   7.361 +#define VMS__get_probe_by_name( probeID, animPr ) \
   7.362 +       NULL /* do nothing */
   7.363 +
   7.364 +void
   7.365 +VMS_impl__record_sched_choice_into_probe( int32 probeID, VirtProcr *animPr );
   7.366 +#define VMS__record_sched_choice_into_probe( probeID, animPr ) \
   7.367 +        /* do nothing */
   7.368 +
   7.369 +void
   7.370 +VMS_impl__record_interval_start_in_probe( int32 probeID );
   7.371 +#define VMS__record_interval_start_in_probe( probeID ) \
   7.372 +        /* do nothing */
   7.373 +
   7.374 +void
   7.375 +VMS_impl__record_interval_end_in_probe( int32 probeID );
   7.376 +#define VMS__record_interval_end_in_probe( probeID ) \
   7.377 +        /* do nothing */
   7.378 +
   7.379 +inline void doNothing();
   7.380 +void
   7.381 +VMS_impl__print_stats_of_probe( int32 probeID );
   7.382 +#define VMS__print_stats_of_probe( probeID ) \
   7.383 +        doNothing/* do nothing */
   7.384 +
   7.385 +void
   7.386 +VMS_impl__print_stats_of_all_probes();
   7.387 +#define VMS__print_stats_of_all_probes \
   7.388 +        doNothing/* do nothing */
   7.389 +
   7.390 +#endif   /* defined STATS__ENABLE_PROBES */
   7.391 +
   7.392 +#endif	/* _PROBES_H */
   7.393 +

     8.1 --- a/vmalloc.c	Thu Oct 06 16:24:17 2011 +0200
     8.2 +++ b/vmalloc.c	Wed Jan 04 16:10:11 2012 -0800
     8.3 @@ -1,495 +1,495 @@
     8.4 -/*
     8.5 - *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
     8.6 - *  Licensed under GNU General Public License version 2
     8.7 - *
     8.8 - * Author: seanhalle@yahoo.com
     8.9 - *
    8.10 - * Created on November 14, 2009, 9:07 PM
    8.11 - */
    8.12 -
    8.13 -#include <malloc.h>
    8.14 -#include <inttypes.h>
    8.15 -#include <stdlib.h>
    8.16 -#include <stdio.h>
    8.17 -
    8.18 -#include "VMS.h"
    8.19 -#include "Histogram/Histogram.h"
    8.20 -
    8.21 -/*Helper function
    8.22 - *Insert a newly generated free chunk into the first spot on the free list.
    8.23 - * The chunk is cast as a MallocProlog, so the various pointers in it are
    8.24 - * accessed with C's help -- and the size of the prolog is easily added to
    8.25 - * the pointer when a chunk is returned to the app -- so C handles changes
    8.26 - * in pointer sizes among machines.
    8.27 - *
    8.28 - *The list head is a normal MallocProlog struct -- identified by its
    8.29 - * prevChunkInFreeList being NULL -- the only one.
    8.30 - *
    8.31 - *The end of the list is identified by next chunk being NULL, as usual.
    8.32 - */
    8.33 -void inline
    8.34 -add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead )
    8.35 - { 
    8.36 -   chunk->nextChunkInFreeList     = listHead->nextChunkInFreeList;
    8.37 -   if( chunk->nextChunkInFreeList != NULL ) //if not last in free list
    8.38 -      chunk->nextChunkInFreeList->prevChunkInFreeList = chunk;
    8.39 -   chunk->prevChunkInFreeList     = listHead;
    8.40 -   listHead->nextChunkInFreeList  = chunk;
    8.41 - }
    8.42 -
    8.43 -
    8.44 -/*This is sequential code, meant to only be called from the Master, not from
    8.45 - * any slave VPs.
    8.46 - *Search down list, checking size by the nextHigherInMem pointer, to find
    8.47 - * first chunk bigger than size needed.
    8.48 - *Shave off the extra and make it into a new free-list element, hook it in
    8.49 - * then return the address of the found element plus size of prolog.
    8.50 - *
    8.51 - *Will find a
    8.52 - */
    8.53 -void *VMS__malloc( size_t sizeRequested )
    8.54 - { MallocProlog *foundElem = NULL, *currElem, *newElem;
    8.55 -   ssize_t        amountExtra, sizeConsumed,sizeOfFound;
    8.56 -   uint32        foundElemIsTopOfHeap;
    8.57 -
    8.58 -   //============================= MEASUREMENT STUFF ========================
    8.59 -   #ifdef MEAS__TIME_MALLOC
    8.60 -   int32 startStamp, endStamp;
    8.61 -   saveLowTimeStampCountInto( startStamp );
    8.62 -   #endif
    8.63 -   //========================================================================
    8.64 -   
    8.65 -      //step up the size to be aligned at 16-byte boundary, prob better ways
    8.66 -   sizeRequested = (sizeRequested + 16) & ~15;
    8.67 -   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
    8.68 -
    8.69 -   while( currElem != NULL )
    8.70 -    {    //check if size of currElem is big enough
    8.71 -      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
    8.72 -      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
    8.73 -      if( amountExtra > 0 )
    8.74 -       {    //found it, get out of loop
    8.75 -         foundElem = currElem;
    8.76 -         currElem = NULL;
    8.77 -       }
    8.78 -      else
    8.79 -         currElem = currElem->nextChunkInFreeList;
    8.80 -    }
    8.81 -   
    8.82 -   if( foundElem == NULL )
    8.83 -    { ERROR("\nmalloc failed\n")
    8.84 -      return (void *)NULL;  //indicates malloc failed
    8.85 -    }
    8.86 -      //Using a kludge to identify the element that is the top chunk in the
    8.87 -      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
    8.88 -      // save addr of start of heap in head's nextLowerInMem
    8.89 -      //Will handle top of Heap specially
    8.90 -   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
    8.91 -                          _VMSMasterEnv->freeListHead->nextHigherInMem;
    8.92 -   
    8.93 -      //before shave off and try to insert new elem, remove found elem
    8.94 -      //note, foundElem will never be the head, so always has valid prevChunk
    8.95 -   foundElem->prevChunkInFreeList->nextChunkInFreeList =
    8.96 -                                              foundElem->nextChunkInFreeList;
    8.97 -   if( foundElem->nextChunkInFreeList != NULL )
    8.98 -    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
    8.99 -                                              foundElem->prevChunkInFreeList;
   8.100 -    }
   8.101 -   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
   8.102 -   
   8.103 -      //if enough, turn extra into new elem & insert it
   8.104 -   if( amountExtra > 64 )
   8.105 -    {   //make new elem by adding to addr of curr elem then casting
   8.106 -        sizeConsumed = sizeof(MallocProlog) + sizeRequested; 
   8.107 -        newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
   8.108 -        newElem->nextLowerInMem    = foundElem; //This is evil (but why?) 
   8.109 -        newElem->nextHigherInMem   = foundElem->nextHigherInMem; //This is evil (but why?)
   8.110 -        foundElem->nextHigherInMem = newElem;
   8.111 -        if( ! foundElemIsTopOfHeap )
   8.112 -        {  //there is no next higher for top of heap, so can't write to it
   8.113 -           newElem->nextHigherInMem->nextLowerInMem = newElem;
   8.114 -        }
   8.115 -        add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
   8.116 -    }
   8.117 -   else
   8.118 -    {
   8.119 -      sizeConsumed = sizeOfFound;
   8.120 -    }
   8.121 -  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
   8.122 -
   8.123 -   //============================= MEASUREMENT STUFF ========================
   8.124 -   #ifdef MEAS__TIME_MALLOC
   8.125 -   saveLowTimeStampCountInto( endStamp );
   8.126 -   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
   8.127 -   #endif
   8.128 -   //========================================================================
   8.129 -
   8.130 -      //skip over the prolog by adding its size to the pointer return
   8.131 -   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
   8.132 - }
   8.133 -
   8.134 -/*This is sequential code, meant to only be called from the Master, not from
   8.135 - * any slave VPs.
   8.136 - *Search down list, checking size by the nextHigherInMem pointer, to find
   8.137 - * first chunk bigger than size needed.
   8.138 - *Shave off the extra and make it into a new free-list element, hook it in
   8.139 - * then return the address of the found element plus size of prolog.
   8.140 - *
   8.141 - * The difference to the regular malloc is, that all the allocated chunks are
   8.142 - * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk
   8.143 - * before the aligned chunk.
   8.144 - */
   8.145 -void *VMS__malloc_aligned( size_t sizeRequested )
   8.146 - { MallocProlog *foundElem = NULL, *currElem, *newElem;
   8.147 -   ssize_t        amountExtra, sizeConsumed,sizeOfFound,prevAmount;
   8.148 -   uint32        foundElemIsTopOfHeap;
   8.149 -
   8.150 -   //============================= MEASUREMENT STUFF ========================
   8.151 -   #ifdef MEAS__TIME_MALLOC
   8.152 -   uint32 startStamp, endStamp;
   8.153 -   saveLowTimeStampCountInto( startStamp );
   8.154 -   #endif
   8.155 -   //========================================================================
   8.156 -   
   8.157 -      //step up the size to be multiple of the cache line size
   8.158 -   sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1);
   8.159 -   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
   8.160 -
   8.161 -   while( currElem != NULL )
   8.162 -    {    //check if size of currElem is big enough
   8.163 -      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
   8.164 -      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
   8.165 -      if( amountExtra > 0 )
   8.166 -       {    
   8.167 -         //look if the found element is already aligned
   8.168 -         if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){
   8.169 -             //found it, get out of loop
   8.170 -             foundElem = currElem;
   8.171 -             break;
   8.172 -         }else{
   8.173 -             //find first aligned address and check if it's still big enough
   8.174 -             //check also if the space before the aligned address is big enough
   8.175 -             //for a new element
   8.176 -             void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1)));
   8.177 -             prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem;
   8.178 -             sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog);
   8.179 -             amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog);
   8.180 -             if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){
   8.181 -                 //found suitable element
   8.182 -                 //create new previous element and exit loop
   8.183 -                 MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1;
   8.184 -                 
   8.185 -                 //insert new element into free list
   8.186 -                 if(currElem->nextChunkInFreeList != NULL)
   8.187 -                     currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem;                     
   8.188 -                 newAlignedElem->prevChunkInFreeList = currElem;
   8.189 -                 newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList;
   8.190 -                 currElem->nextChunkInFreeList = newAlignedElem;
   8.191 -                 
   8.192 -                 //set higherInMem and lowerInMem
   8.193 -                 newAlignedElem->nextHigherInMem = currElem->nextHigherInMem;
   8.194 -                 foundElemIsTopOfHeap = currElem->nextHigherInMem ==
   8.195 -                          _VMSMasterEnv->freeListHead->nextHigherInMem;
   8.196 -                 if(!foundElemIsTopOfHeap)
   8.197 -                     currElem->nextHigherInMem->nextLowerInMem = newAlignedElem;
   8.198 -                 currElem->nextHigherInMem = newAlignedElem;
   8.199 -                 newAlignedElem->nextLowerInMem = currElem;
   8.200 -                 
   8.201 -                 //Found new element leaving loop
   8.202 -                 foundElem = newAlignedElem;
   8.203 -                 break;
   8.204 -             }
   8.205 -         }
   8.206 -         
   8.207 -       }
   8.208 -       currElem = currElem->nextChunkInFreeList;
   8.209 -    }
   8.210 -
   8.211 -   if( foundElem == NULL )
   8.212 -    { ERROR("\nmalloc failed\n")
   8.213 -      return (void *)NULL;  //indicates malloc failed
   8.214 -    }
   8.215 -      //Using a kludge to identify the element that is the top chunk in the
   8.216 -      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
   8.217 -      // save addr of start of heap in head's nextLowerInMem
   8.218 -      //Will handle top of Heap specially
   8.219 -   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
   8.220 -                          _VMSMasterEnv->freeListHead->nextHigherInMem;
   8.221 -
   8.222 -      //before shave off and try to insert new elem, remove found elem
   8.223 -      //note, foundElem will never be the head, so always has valid prevChunk
   8.224 -   foundElem->prevChunkInFreeList->nextChunkInFreeList =
   8.225 -                                              foundElem->nextChunkInFreeList;
   8.226 -   if( foundElem->nextChunkInFreeList != NULL )
   8.227 -    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
   8.228 -                                              foundElem->prevChunkInFreeList;
   8.229 -    }
   8.230 -   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
   8.231 -   
   8.232 -      //if enough, turn extra into new elem & insert it
   8.233 -   if( amountExtra > 64 )
   8.234 -    {    //make new elem by adding to addr of curr elem then casting
   8.235 -      sizeConsumed = sizeof(MallocProlog) + sizeRequested;
   8.236 -      newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
   8.237 -      newElem->nextHigherInMem   = foundElem->nextHigherInMem;
   8.238 -      newElem->nextLowerInMem    = foundElem;
   8.239 -      foundElem->nextHigherInMem = newElem;
   8.240 -      
   8.241 -      if( ! foundElemIsTopOfHeap )
   8.242 -       {    //there is no next higher for top of heap, so can't write to it
   8.243 -         newElem->nextHigherInMem->nextLowerInMem = newElem;
   8.244 -       }
   8.245 -      add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
   8.246 -    }
   8.247 -   else
   8.248 -    {
   8.249 -      sizeConsumed = sizeOfFound;
   8.250 -    }
   8.251 -  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
   8.252 -
   8.253 -   //============================= MEASUREMENT STUFF ========================
   8.254 -   #ifdef MEAS__TIME_MALLOC
   8.255 -   saveLowTimeStampCountInto( endStamp );
   8.256 -   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
   8.257 -   #endif
   8.258 -   //========================================================================
   8.259 -
   8.260 -      //skip over the prolog by adding its size to the pointer return
   8.261 -   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
   8.262 - }
   8.263 -
   8.264 -
   8.265 -/*This is sequential code -- only to be called from the Master
   8.266 - * When free, subtract the size of prolog from pointer, then cast it to a
   8.267 - * MallocProlog.  Then check the nextLower and nextHigher chunks to see if
   8.268 - * one or both are also free, and coalesce if so, and if neither free, then
   8.269 - * add this one to free-list.
   8.270 - */
   8.271 -void
   8.272 -VMS__free( void *ptrToFree )
   8.273 - { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem;
   8.274 -   size_t         sizeOfElem;
   8.275 -   uint32         lowerExistsAndIsFree, higherExistsAndIsFree;
   8.276 -
   8.277 -   //============================= MEASUREMENT STUFF ========================
   8.278 -   #ifdef MEAS__TIME_MALLOC
   8.279 -   int32 startStamp, endStamp;
   8.280 -   saveLowTimeStampCountInto( startStamp );
   8.281 -   #endif
   8.282 -   //========================================================================
   8.283 -
   8.284 -   if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem ||
   8.285 -       ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem )
   8.286 -    {    //outside the range of data owned by VMS's malloc, so do nothing
   8.287 -      return;
   8.288 -    }
   8.289 -      //subtract size of prolog to get pointer to prolog, then cast
   8.290 -   elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog));
   8.291 -   sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree);
   8.292 -
   8.293 -   if( elemToFree->prevChunkInFreeList != NULL )
   8.294 -    { printf( "error: freeing same element twice!" ); exit(1);
   8.295 -    }
   8.296 -
   8.297 -   _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem;
   8.298 -
   8.299 -   nextLowerElem  = elemToFree->nextLowerInMem;
   8.300 -   nextHigherElem = elemToFree->nextHigherInMem;
   8.301 -
   8.302 -   if( nextHigherElem == NULL )
   8.303 -      higherExistsAndIsFree = FALSE;
   8.304 -   else //okay exists, now check if in the free-list by checking back ptr
   8.305 -      higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL);
   8.306 -    
   8.307 -   if( nextLowerElem == NULL )
   8.308 -      lowerExistsAndIsFree = FALSE;
   8.309 -   else //okay, it exists, now check if it's free
   8.310 -      lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL);
   8.311 -    
   8.312 -
   8.313 -      //now, know what exists and what's free
   8.314 -   if( lowerExistsAndIsFree )
   8.315 -    { if( higherExistsAndIsFree )
   8.316 -       {    //both exist and are free, so coalesce all three
   8.317 -            //First, remove higher from free-list
   8.318 -         nextHigherElem->prevChunkInFreeList->nextChunkInFreeList =
   8.319 -                                         nextHigherElem->nextChunkInFreeList;
   8.320 -         if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list?
   8.321 -            nextHigherElem->nextChunkInFreeList->prevChunkInFreeList =
   8.322 -                                         nextHigherElem->prevChunkInFreeList;
   8.323 -            //Now, fix-up sequence-in-mem list -- by side-effect, this also
   8.324 -            // changes size of the lower elem, which is still in free-list
   8.325 -         nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem;
   8.326 -         if( nextHigherElem->nextHigherInMem !=
   8.327 -             _VMSMasterEnv->freeListHead->nextHigherInMem )
   8.328 -            nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem;
   8.329 -            //notice didn't do anything to elemToFree -- it simply is no
   8.330 -            // longer reachable from any of the lists.  Wonder if could be a
   8.331 -            // security leak because left valid addresses in it,
   8.332 -            // but don't care for now.
   8.333 -       }
   8.334 -      else
   8.335 -       {    //lower is the only of the two that exists and is free,
   8.336 -            //In this case, no adjustment to free-list, just change mem-list.
   8.337 -            // By side-effect, changes size of the lower elem
   8.338 -         nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem;
   8.339 -         if( elemToFree->nextHigherInMem !=
   8.340 -             _VMSMasterEnv->freeListHead->nextHigherInMem )
   8.341 -            elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem;
   8.342 -       }
   8.343 -    }
   8.344 -   else
   8.345 -    {    //lower either doesn't exist or isn't free, so check higher
   8.346 -      if( higherExistsAndIsFree )
   8.347 -       {    //higher exists and is the only of the two free
   8.348 -            //First, in free-list, replace higher elem with the one to free
   8.349 -         elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList;
   8.350 -         elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList;
   8.351 -         elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree;
   8.352 -         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
   8.353 -            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
   8.354 -            //Now chg mem-list. By side-effect, changes size of elemToFree
   8.355 -         elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem;
   8.356 -         if( elemToFree->nextHigherInMem !=
   8.357 -             _VMSMasterEnv->freeListHead->nextHigherInMem )
   8.358 -            elemToFree->nextHigherInMem->nextLowerInMem = elemToFree;
   8.359 -       }
   8.360 -      else
   8.361 -       {    //neither lower nor higher is availabe to coalesce so add to list
   8.362 -            // this makes prev chunk ptr non-null, which indicates it's free
   8.363 -         elemToFree->nextChunkInFreeList =
   8.364 -                            _VMSMasterEnv->freeListHead->nextChunkInFreeList;
   8.365 -         _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree;
   8.366 -         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
   8.367 -            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
   8.368 -         elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead;
   8.369 -       }
   8.370 -    }
   8.371 -   //============================= MEASUREMENT STUFF ========================
   8.372 -   #ifdef MEAS__TIME_MALLOC
   8.373 -   saveLowTimeStampCountInto( endStamp );
   8.374 -   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist );
   8.375 -   #endif
   8.376 -   //========================================================================
   8.377 -
   8.378 - }
   8.379 -
   8.380 -
   8.381 -/*Allocates memory from the external system -- higher overhead
   8.382 - *
   8.383 - *Because of Linux's malloc throwing bizarre random faults when malloc is
   8.384 - * used inside a VMS virtual processor, have to pass this as a request and
   8.385 - * have the core loop do it when it gets around to it -- will look for these
   8.386 - * chores leftover from the previous animation of masterVP the next time it
   8.387 - * goes to animate the masterVP -- so it takes two separate masterVP
   8.388 - * animations, separated by work, to complete an external malloc or
   8.389 - * external free request.
   8.390 - *
   8.391 - *Thinking core loop accepts signals -- just looks if signal-location is
   8.392 - * empty or not --
   8.393 - */
   8.394 -void *
   8.395 -VMS__malloc_in_ext( size_t sizeRequested )
   8.396 - {
   8.397 - /*
   8.398 -      //This is running in the master, so no chance for multiple cores to be
   8.399 -      // competing for the core's flag.
   8.400 -   if(  *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 )
   8.401 -    {    //something has already signalled to core loop, so save the signal
   8.402 -         // and look, next time master animated, to see if can send it.
   8.403 -         //Note, the addr to put a signal is in the coreloop's frame, so just
   8.404 -         // checks it each time through -- make it volatile to avoid GCC
   8.405 -         // optimizations -- it's a coreloop local var that only changes
   8.406 -         // after jumping away.  The signal includes the addr to send the
   8.407 -         //return to -- even if just empty return completion-signal
   8.408 -         //
   8.409 -         //save the signal in some queue that the master looks at each time
   8.410 -         // it starts up -- one loc says if empty for fast common case --
   8.411 -         //something like that -- want to hide this inside this call -- but
   8.412 -         // think this has to come as a request -- req handler gives procr
   8.413 -         // back to master loop, which gives it back to req handler at point
   8.414 -         // it sees that core loop has sent return signal.  Something like
   8.415 -         // that.
   8.416 -      saveTheSignal
   8.417 -
   8.418 -    }
   8.419 -  coreSigData->type = malloc;
   8.420 -  coreSigData->sizeToMalloc = sizeRequested;
   8.421 -  coreSigData->locToSignalCompletion = &figureOut;
   8.422 -   _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData;
   8.423 -  */
   8.424 -      //just risk system-stack faults until get this figured out
   8.425 -   return malloc( sizeRequested );
   8.426 - }
   8.427 -
   8.428 -
   8.429 -/*Frees memory that was allocated in the external system -- higher overhead
   8.430 - *
   8.431 - *As noted in external malloc comment, this is clunky 'cause the free has
   8.432 - * to be called in the core loop.
   8.433 - */
   8.434 -void
   8.435 -VMS__free_in_ext( void *ptrToFree )
   8.436 - {
   8.437 -      //just risk system-stack faults until get this figured out
   8.438 -   free( ptrToFree );
   8.439 -
   8.440 -      //TODO: fix this -- so 
   8.441 - }
   8.442 -
   8.443 -
   8.444 -/*Designed to be called from the main thread outside of VMS, during init
   8.445 - */
   8.446 -MallocProlog *
   8.447 -VMS_ext__create_free_list()
   8.448 - { MallocProlog *freeListHead, *firstChunk;
   8.449 -
   8.450 -      //Note, this is running in the main thread -- all increases in malloc
   8.451 -      // mem and all frees of it must be done in this thread, with the
   8.452 -      // thread's original stack available
   8.453 -   freeListHead = malloc( sizeof(MallocProlog) );
   8.454 -   firstChunk   = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
   8.455 -   if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);}
   8.456 -   
   8.457 -   //Touch memory to avoid page faults
   8.458 -   void *ptr,*endPtr; 
   8.459 -   endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE;
   8.460 -   for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE)
   8.461 -   {
   8.462 -       *(char*)ptr = 0;
   8.463 -   }
   8.464 -
   8.465 -   freeListHead->prevChunkInFreeList = NULL;
   8.466 -      //Use this addr to free the heap when cleanup
   8.467 -   freeListHead->nextLowerInMem      = firstChunk;
   8.468 -      //to identify top-of-heap elem, compare this addr to elem's next higher
   8.469 -   freeListHead->nextHigherInMem     = (void*)( (uintptr_t)firstChunk +
   8.470 -                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
   8.471 -   freeListHead->nextChunkInFreeList = firstChunk;
   8.472 -
   8.473 -   firstChunk->nextChunkInFreeList   = NULL;
   8.474 -   firstChunk->prevChunkInFreeList   = freeListHead;
   8.475 -      //next Higher has to be set to top of chunk, so can calc size in malloc
   8.476 -   firstChunk->nextHigherInMem       = (void*)( (uintptr_t)firstChunk +
   8.477 -                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
   8.478 -   firstChunk->nextLowerInMem        = NULL; //identifies as bott of heap
   8.479 -   
   8.480 -   _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet
   8.481 -
   8.482 -   return freeListHead;
   8.483 - }
   8.484 -
   8.485 -
   8.486 -/*Designed to be called from the main thread outside of VMS, during cleanup
   8.487 - */
   8.488 -void
   8.489 -VMS_ext__free_free_list( MallocProlog *freeListHead )
   8.490 - {    
   8.491 -      //stashed a ptr to the one and only bug chunk malloc'd from OS in the
   8.492 -      // free list head's next lower in mem pointer
   8.493 -   free( freeListHead->nextLowerInMem );
   8.494 -
   8.495 -   //don't free the head -- it'll be in an array eventually -- free whole
   8.496 -   // array when all the free lists linked from it have already been freed
   8.497 - }
   8.498 -
   8.499 +/*
   8.500 + *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
   8.501 + *  Licensed under GNU General Public License version 2
   8.502 + *
   8.503 + * Author: seanhalle@yahoo.com
   8.504 + *
   8.505 + * Created on November 14, 2009, 9:07 PM
   8.506 + */
   8.507 +
   8.508 +#include <malloc.h>
   8.509 +#include <inttypes.h>
   8.510 +#include <stdlib.h>
   8.511 +#include <stdio.h>
   8.512 +
   8.513 +#include "VMS.h"
   8.514 +#include "Histogram/Histogram.h"
   8.515 +
   8.516 +/*Helper function
   8.517 + *Insert a newly generated free chunk into the first spot on the free list.
   8.518 + * The chunk is cast as a MallocProlog, so the various pointers in it are
   8.519 + * accessed with C's help -- and the size of the prolog is easily added to
   8.520 + * the pointer when a chunk is returned to the app -- so C handles changes
   8.521 + * in pointer sizes among machines.
   8.522 + *
   8.523 + *The list head is a normal MallocProlog struct -- identified by its
   8.524 + * prevChunkInFreeList being NULL -- the only one.
   8.525 + *
   8.526 + *The end of the list is identified by next chunk being NULL, as usual.
   8.527 + */
   8.528 +void inline
   8.529 +add_chunk_to_free_list( MallocProlog *chunk, MallocProlog *listHead )
   8.530 + { 
   8.531 +   chunk->nextChunkInFreeList     = listHead->nextChunkInFreeList;
   8.532 +   if( chunk->nextChunkInFreeList != NULL ) //if not last in free list
   8.533 +      chunk->nextChunkInFreeList->prevChunkInFreeList = chunk;
   8.534 +   chunk->prevChunkInFreeList     = listHead;
   8.535 +   listHead->nextChunkInFreeList  = chunk;
   8.536 + }
   8.537 +
   8.538 +
   8.539 +/*This is sequential code, meant to only be called from the Master, not from
   8.540 + * any slave VPs.
   8.541 + *Search down list, checking size by the nextHigherInMem pointer, to find
   8.542 + * first chunk bigger than size needed.
   8.543 + *Shave off the extra and make it into a new free-list element, hook it in
   8.544 + * then return the address of the found element plus size of prolog.
   8.545 + *
   8.546 + *Will find a
   8.547 + */
   8.548 +void *VMS__malloc( size_t sizeRequested )
   8.549 + { MallocProlog *foundElem = NULL, *currElem, *newElem;
   8.550 +   ssize_t        amountExtra, sizeConsumed,sizeOfFound;
   8.551 +   uint32        foundElemIsTopOfHeap;
   8.552 +
   8.553 +   //============================= MEASUREMENT STUFF ========================
   8.554 +   #ifdef MEAS__TIME_MALLOC
   8.555 +   int32 startStamp, endStamp;
   8.556 +   saveLowTimeStampCountInto( startStamp );
   8.557 +   #endif
   8.558 +   //========================================================================
   8.559 +   
   8.560 +      //step up the size to be aligned at 16-byte boundary, prob better ways
   8.561 +   sizeRequested = (sizeRequested + 16) & ~15;
   8.562 +   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
   8.563 +
   8.564 +   while( currElem != NULL )
   8.565 +    {    //check if size of currElem is big enough
   8.566 +      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
   8.567 +      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
   8.568 +      if( amountExtra > 0 )
   8.569 +       {    //found it, get out of loop
   8.570 +         foundElem = currElem;
   8.571 +         currElem = NULL;
   8.572 +       }
   8.573 +      else
   8.574 +         currElem = currElem->nextChunkInFreeList;
   8.575 +    }
   8.576 +   
   8.577 +   if( foundElem == NULL )
   8.578 +    { ERROR("\nmalloc failed\n")
   8.579 +      return (void *)NULL;  //indicates malloc failed
   8.580 +    }
   8.581 +      //Using a kludge to identify the element that is the top chunk in the
   8.582 +      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
   8.583 +      // save addr of start of heap in head's nextLowerInMem
   8.584 +      //Will handle top of Heap specially
   8.585 +   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
   8.586 +                          _VMSMasterEnv->freeListHead->nextHigherInMem;
   8.587 +   
   8.588 +      //before shave off and try to insert new elem, remove found elem
   8.589 +      //note, foundElem will never be the head, so always has valid prevChunk
   8.590 +   foundElem->prevChunkInFreeList->nextChunkInFreeList =
   8.591 +                                              foundElem->nextChunkInFreeList;
   8.592 +   if( foundElem->nextChunkInFreeList != NULL )
   8.593 +    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
   8.594 +                                              foundElem->prevChunkInFreeList;
   8.595 +    }
   8.596 +   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
   8.597 +   
   8.598 +      //if enough, turn extra into new elem & insert it
   8.599 +   if( amountExtra > 64 )
   8.600 +    {   //make new elem by adding to addr of curr elem then casting
   8.601 +        sizeConsumed = sizeof(MallocProlog) + sizeRequested; 
   8.602 +        newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
   8.603 +        newElem->nextLowerInMem    = foundElem; //This is evil (but why?) 
   8.604 +        newElem->nextHigherInMem   = foundElem->nextHigherInMem; //This is evil (but why?)
   8.605 +        foundElem->nextHigherInMem = newElem;
   8.606 +        if( ! foundElemIsTopOfHeap )
   8.607 +        {  //there is no next higher for top of heap, so can't write to it
   8.608 +           newElem->nextHigherInMem->nextLowerInMem = newElem;
   8.609 +        }
   8.610 +        add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
   8.611 +    }
   8.612 +   else
   8.613 +    {
   8.614 +      sizeConsumed = sizeOfFound;
   8.615 +    }
   8.616 +  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
   8.617 +
   8.618 +   //============================= MEASUREMENT STUFF ========================
   8.619 +   #ifdef MEAS__TIME_MALLOC
   8.620 +   saveLowTimeStampCountInto( endStamp );
   8.621 +   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
   8.622 +   #endif
   8.623 +   //========================================================================
   8.624 +
   8.625 +      //skip over the prolog by adding its size to the pointer return
   8.626 +   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
   8.627 + }
   8.628 +
   8.629 +/*This is sequential code, meant to only be called from the Master, not from
   8.630 + * any slave VPs.
   8.631 + *Search down list, checking size by the nextHigherInMem pointer, to find
   8.632 + * first chunk bigger than size needed.
   8.633 + *Shave off the extra and make it into a new free-list element, hook it in
   8.634 + * then return the address of the found element plus size of prolog.
   8.635 + *
   8.636 + * The difference to the regular malloc is, that all the allocated chunks are
   8.637 + * aligned and padded to the size of a CACHE_LINE. Thus creating a new chunk
   8.638 + * before the aligned chunk.
   8.639 + */
   8.640 +void *VMS__malloc_aligned( size_t sizeRequested )
   8.641 + { MallocProlog *foundElem = NULL, *currElem, *newElem;
   8.642 +   ssize_t        amountExtra, sizeConsumed,sizeOfFound,prevAmount;
   8.643 +   uint32        foundElemIsTopOfHeap;
   8.644 +
   8.645 +   //============================= MEASUREMENT STUFF ========================
   8.646 +   #ifdef MEAS__TIME_MALLOC
   8.647 +   uint32 startStamp, endStamp;
   8.648 +   saveLowTimeStampCountInto( startStamp );
   8.649 +   #endif
   8.650 +   //========================================================================
   8.651 +   
   8.652 +      //step up the size to be multiple of the cache line size
   8.653 +   sizeRequested = (sizeRequested + CACHE_LINE) & ~(CACHE_LINE-1);
   8.654 +   currElem = (_VMSMasterEnv->freeListHead)->nextChunkInFreeList;
   8.655 +
   8.656 +   while( currElem != NULL )
   8.657 +    {    //check if size of currElem is big enough
   8.658 +      sizeOfFound=(size_t)((uintptr_t)currElem->nextHigherInMem -(uintptr_t)currElem);
   8.659 +      amountExtra = sizeOfFound - sizeRequested - sizeof(MallocProlog);
   8.660 +      if( amountExtra > 0 )
   8.661 +       {    
   8.662 +         //look if the found element is already aligned
   8.663 +         if((((uintptr_t)currElem+sizeof(MallocProlog)) & (uintptr_t)(CACHE_LINE-1)) == 0){
   8.664 +             //found it, get out of loop
   8.665 +             foundElem = currElem;
   8.666 +             break;
   8.667 +         }else{
   8.668 +             //find first aligned address and check if it's still big enough
   8.669 +             //check also if the space before the aligned address is big enough
   8.670 +             //for a new element
   8.671 +             void *firstAlignedAddr = (void*)(((uintptr_t)currElem + 2*CACHE_LINE) & ~((uintptr_t)(CACHE_LINE-1)));
   8.672 +             prevAmount = (uintptr_t)firstAlignedAddr - (uintptr_t)currElem;
   8.673 +             sizeOfFound=(uintptr_t)currElem->nextHigherInMem -(uintptr_t)firstAlignedAddr + sizeof(MallocProlog);
   8.674 +             amountExtra= sizeOfFound - sizeRequested - sizeof(MallocProlog);
   8.675 +             if(prevAmount > 2*sizeof(MallocProlog) && amountExtra > 0 ){
   8.676 +                 //found suitable element
   8.677 +                 //create new previous element and exit loop
   8.678 +                 MallocProlog *newAlignedElem = (MallocProlog*)firstAlignedAddr - 1;
   8.679 +                 
   8.680 +                 //insert new element into free list
   8.681 +                 if(currElem->nextChunkInFreeList != NULL)
   8.682 +                     currElem->nextChunkInFreeList->prevChunkInFreeList = newAlignedElem;                     
   8.683 +                 newAlignedElem->prevChunkInFreeList = currElem;
   8.684 +                 newAlignedElem->nextChunkInFreeList = currElem->nextChunkInFreeList;
   8.685 +                 currElem->nextChunkInFreeList = newAlignedElem;
   8.686 +                 
   8.687 +                 //set higherInMem and lowerInMem
   8.688 +                 newAlignedElem->nextHigherInMem = currElem->nextHigherInMem;
   8.689 +                 foundElemIsTopOfHeap = currElem->nextHigherInMem ==
   8.690 +                          _VMSMasterEnv->freeListHead->nextHigherInMem;
   8.691 +                 if(!foundElemIsTopOfHeap)
   8.692 +                     currElem->nextHigherInMem->nextLowerInMem = newAlignedElem;
   8.693 +                 currElem->nextHigherInMem = newAlignedElem;
   8.694 +                 newAlignedElem->nextLowerInMem = currElem;
   8.695 +                 
   8.696 +                 //Found new element leaving loop
   8.697 +                 foundElem = newAlignedElem;
   8.698 +                 break;
   8.699 +             }
   8.700 +         }
   8.701 +         
   8.702 +       }
   8.703 +       currElem = currElem->nextChunkInFreeList;
   8.704 +    }
   8.705 +
   8.706 +   if( foundElem == NULL )
   8.707 +    { ERROR("\nmalloc failed\n")
   8.708 +      return (void *)NULL;  //indicates malloc failed
   8.709 +    }
   8.710 +      //Using a kludge to identify the element that is the top chunk in the
   8.711 +      // heap -- saving top-of-heap addr in head's nextHigherInMem -- and
   8.712 +      // save addr of start of heap in head's nextLowerInMem
   8.713 +      //Will handle top of Heap specially
   8.714 +   foundElemIsTopOfHeap = foundElem->nextHigherInMem ==
   8.715 +                          _VMSMasterEnv->freeListHead->nextHigherInMem;
   8.716 +
   8.717 +      //before shave off and try to insert new elem, remove found elem
   8.718 +      //note, foundElem will never be the head, so always has valid prevChunk
   8.719 +   foundElem->prevChunkInFreeList->nextChunkInFreeList =
   8.720 +                                              foundElem->nextChunkInFreeList;
   8.721 +   if( foundElem->nextChunkInFreeList != NULL )
   8.722 +    { foundElem->nextChunkInFreeList->prevChunkInFreeList =
   8.723 +                                              foundElem->prevChunkInFreeList;
   8.724 +    }
   8.725 +   foundElem->prevChunkInFreeList = NULL;//indicates elem currently allocated
   8.726 +   
   8.727 +      //if enough, turn extra into new elem & insert it
   8.728 +   if( amountExtra > 64 )
   8.729 +    {    //make new elem by adding to addr of curr elem then casting
   8.730 +      sizeConsumed = sizeof(MallocProlog) + sizeRequested;
   8.731 +      newElem = (MallocProlog *)( (uintptr_t)foundElem + sizeConsumed );
   8.732 +      newElem->nextHigherInMem   = foundElem->nextHigherInMem;
   8.733 +      newElem->nextLowerInMem    = foundElem;
   8.734 +      foundElem->nextHigherInMem = newElem;
   8.735 +      
   8.736 +      if( ! foundElemIsTopOfHeap )
   8.737 +       {    //there is no next higher for top of heap, so can't write to it
   8.738 +         newElem->nextHigherInMem->nextLowerInMem = newElem;
   8.739 +       }
   8.740 +      add_chunk_to_free_list( newElem, _VMSMasterEnv->freeListHead );
   8.741 +    }
   8.742 +   else
   8.743 +    {
   8.744 +      sizeConsumed = sizeOfFound;
   8.745 +    }
   8.746 +  _VMSMasterEnv->amtOfOutstandingMem += sizeConsumed;
   8.747 +
   8.748 +   //============================= MEASUREMENT STUFF ========================
   8.749 +   #ifdef MEAS__TIME_MALLOC
   8.750 +   saveLowTimeStampCountInto( endStamp );
   8.751 +   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
   8.752 +   #endif
   8.753 +   //========================================================================
   8.754 +
   8.755 +      //skip over the prolog by adding its size to the pointer return
   8.756 +   return (void*)((uintptr_t)foundElem + sizeof(MallocProlog));
   8.757 + }
   8.758 +
   8.759 +
   8.760 +/*This is sequential code -- only to be called from the Master
   8.761 + * When free, subtract the size of prolog from pointer, then cast it to a
   8.762 + * MallocProlog.  Then check the nextLower and nextHigher chunks to see if
   8.763 + * one or both are also free, and coalesce if so, and if neither free, then
   8.764 + * add this one to free-list.
   8.765 + */
   8.766 +void
   8.767 +VMS__free( void *ptrToFree )
   8.768 + { MallocProlog *elemToFree, *nextLowerElem, *nextHigherElem;
   8.769 +   size_t         sizeOfElem;
   8.770 +   uint32         lowerExistsAndIsFree, higherExistsAndIsFree;
   8.771 +
   8.772 +   //============================= MEASUREMENT STUFF ========================
   8.773 +   #ifdef MEAS__TIME_MALLOC
   8.774 +   int32 startStamp, endStamp;
   8.775 +   saveLowTimeStampCountInto( startStamp );
   8.776 +   #endif
   8.777 +   //========================================================================
   8.778 +
   8.779 +   if( ptrToFree < (void*)_VMSMasterEnv->freeListHead->nextLowerInMem ||
   8.780 +       ptrToFree > (void*)_VMSMasterEnv->freeListHead->nextHigherInMem )
   8.781 +    {    //outside the range of data owned by VMS's malloc, so do nothing
   8.782 +      return;
   8.783 +    }
   8.784 +      //subtract size of prolog to get pointer to prolog, then cast
   8.785 +   elemToFree = (MallocProlog *)((uintptr_t)ptrToFree - sizeof(MallocProlog));
   8.786 +   sizeOfElem =(size_t)((uintptr_t)elemToFree->nextHigherInMem-(uintptr_t)elemToFree);
   8.787 +
   8.788 +   if( elemToFree->prevChunkInFreeList != NULL )
   8.789 +    { printf( "error: freeing same element twice!" ); exit(1);
   8.790 +    }
   8.791 +
   8.792 +   _VMSMasterEnv->amtOfOutstandingMem -= sizeOfElem;
   8.793 +
   8.794 +   nextLowerElem  = elemToFree->nextLowerInMem;
   8.795 +   nextHigherElem = elemToFree->nextHigherInMem;
   8.796 +
   8.797 +   if( nextHigherElem == NULL )
   8.798 +      higherExistsAndIsFree = FALSE;
   8.799 +   else //okay exists, now check if in the free-list by checking back ptr
   8.800 +      higherExistsAndIsFree = (nextHigherElem->prevChunkInFreeList != NULL);
   8.801 +    
   8.802 +   if( nextLowerElem == NULL )
   8.803 +      lowerExistsAndIsFree = FALSE;
   8.804 +   else //okay, it exists, now check if it's free
   8.805 +      lowerExistsAndIsFree = (nextLowerElem->prevChunkInFreeList != NULL);
   8.806 +    
   8.807 +
   8.808 +      //now, know what exists and what's free
   8.809 +   if( lowerExistsAndIsFree )
   8.810 +    { if( higherExistsAndIsFree )
   8.811 +       {    //both exist and are free, so coalesce all three
   8.812 +            //First, remove higher from free-list
   8.813 +         nextHigherElem->prevChunkInFreeList->nextChunkInFreeList =
   8.814 +                                         nextHigherElem->nextChunkInFreeList;
   8.815 +         if( nextHigherElem->nextChunkInFreeList != NULL ) //end-of-list?
   8.816 +            nextHigherElem->nextChunkInFreeList->prevChunkInFreeList =
   8.817 +                                         nextHigherElem->prevChunkInFreeList;
   8.818 +            //Now, fix-up sequence-in-mem list -- by side-effect, this also
   8.819 +            // changes size of the lower elem, which is still in free-list
   8.820 +         nextLowerElem->nextHigherInMem = nextHigherElem->nextHigherInMem;
   8.821 +         if( nextHigherElem->nextHigherInMem !=
   8.822 +             _VMSMasterEnv->freeListHead->nextHigherInMem )
   8.823 +            nextHigherElem->nextHigherInMem->nextLowerInMem = nextLowerElem;
   8.824 +            //notice didn't do anything to elemToFree -- it simply is no
   8.825 +            // longer reachable from any of the lists.  Wonder if could be a
   8.826 +            // security leak because left valid addresses in it,
   8.827 +            // but don't care for now.
   8.828 +       }
   8.829 +      else
   8.830 +       {    //lower is the only of the two that exists and is free,
   8.831 +            //In this case, no adjustment to free-list, just change mem-list.
   8.832 +            // By side-effect, changes size of the lower elem
   8.833 +         nextLowerElem->nextHigherInMem = elemToFree->nextHigherInMem;
   8.834 +         if( elemToFree->nextHigherInMem !=
   8.835 +             _VMSMasterEnv->freeListHead->nextHigherInMem )
   8.836 +            elemToFree->nextHigherInMem->nextLowerInMem = nextLowerElem;
   8.837 +       }
   8.838 +    }
   8.839 +   else
   8.840 +    {    //lower either doesn't exist or isn't free, so check higher
   8.841 +      if( higherExistsAndIsFree )
   8.842 +       {    //higher exists and is the only of the two free
   8.843 +            //First, in free-list, replace higher elem with the one to free
   8.844 +         elemToFree->nextChunkInFreeList=nextHigherElem->nextChunkInFreeList;
   8.845 +         elemToFree->prevChunkInFreeList=nextHigherElem->prevChunkInFreeList;
   8.846 +         elemToFree->prevChunkInFreeList->nextChunkInFreeList = elemToFree;
   8.847 +         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
   8.848 +            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
   8.849 +            //Now chg mem-list. By side-effect, changes size of elemToFree
   8.850 +         elemToFree->nextHigherInMem = nextHigherElem->nextHigherInMem;
   8.851 +         if( elemToFree->nextHigherInMem !=
   8.852 +             _VMSMasterEnv->freeListHead->nextHigherInMem )
   8.853 +            elemToFree->nextHigherInMem->nextLowerInMem = elemToFree;
   8.854 +       }
   8.855 +      else
   8.856 +       {    //neither lower nor higher is availabe to coalesce so add to list
   8.857 +            // this makes prev chunk ptr non-null, which indicates it's free
   8.858 +         elemToFree->nextChunkInFreeList =
   8.859 +                            _VMSMasterEnv->freeListHead->nextChunkInFreeList;
   8.860 +         _VMSMasterEnv->freeListHead->nextChunkInFreeList = elemToFree;
   8.861 +         if( elemToFree->nextChunkInFreeList != NULL ) // end-of-list?
   8.862 +            elemToFree->nextChunkInFreeList->prevChunkInFreeList =elemToFree;
   8.863 +         elemToFree->prevChunkInFreeList = _VMSMasterEnv->freeListHead;
   8.864 +       }
   8.865 +    }
   8.866 +   //============================= MEASUREMENT STUFF ========================
   8.867 +   #ifdef MEAS__TIME_MALLOC
   8.868 +   saveLowTimeStampCountInto( endStamp );
   8.869 +   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist );
   8.870 +   #endif
   8.871 +   //========================================================================
   8.872 +
   8.873 + }
   8.874 +
   8.875 +
   8.876 +/*Allocates memory from the external system -- higher overhead
   8.877 + *
   8.878 + *Because of Linux's malloc throwing bizarre random faults when malloc is
   8.879 + * used inside a VMS virtual processor, have to pass this as a request and
   8.880 + * have the core loop do it when it gets around to it -- will look for these
   8.881 + * chores leftover from the previous animation of masterVP the next time it
   8.882 + * goes to animate the masterVP -- so it takes two separate masterVP
   8.883 + * animations, separated by work, to complete an external malloc or
   8.884 + * external free request.
   8.885 + *
   8.886 + *Thinking core loop accepts signals -- just looks if signal-location is
   8.887 + * empty or not --
   8.888 + */
   8.889 +void *
   8.890 +VMS__malloc_in_ext( size_t sizeRequested )
   8.891 + {
   8.892 + /*
   8.893 +      //This is running in the master, so no chance for multiple cores to be
   8.894 +      // competing for the core's flag.
   8.895 +   if(  *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 )
   8.896 +    {    //something has already signalled to core loop, so save the signal
   8.897 +         // and look, next time master animated, to see if can send it.
   8.898 +         //Note, the addr to put a signal is in the coreloop's frame, so just
   8.899 +         // checks it each time through -- make it volatile to avoid GCC
   8.900 +         // optimizations -- it's a coreloop local var that only changes
   8.901 +         // after jumping away.  The signal includes the addr to send the
   8.902 +         //return to -- even if just empty return completion-signal
   8.903 +         //
   8.904 +         //save the signal in some queue that the master looks at each time
   8.905 +         // it starts up -- one loc says if empty for fast common case --
   8.906 +         //something like that -- want to hide this inside this call -- but
   8.907 +         // think this has to come as a request -- req handler gives procr
   8.908 +         // back to master loop, which gives it back to req handler at point
   8.909 +         // it sees that core loop has sent return signal.  Something like
   8.910 +         // that.
   8.911 +      saveTheSignal
   8.912 +
   8.913 +    }
   8.914 +  coreSigData->type = malloc;
   8.915 +  coreSigData->sizeToMalloc = sizeRequested;
   8.916 +  coreSigData->locToSignalCompletion = &figureOut;
   8.917 +   _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData;
   8.918 +  */
   8.919 +      //just risk system-stack faults until get this figured out
   8.920 +   return malloc( sizeRequested );
   8.921 + }
   8.922 +
   8.923 +
   8.924 +/*Frees memory that was allocated in the external system -- higher overhead
   8.925 + *
   8.926 + *As noted in external malloc comment, this is clunky 'cause the free has
   8.927 + * to be called in the core loop.
   8.928 + */
   8.929 +void
   8.930 +VMS__free_in_ext( void *ptrToFree )
   8.931 + {
   8.932 +      //just risk system-stack faults until get this figured out
   8.933 +   free( ptrToFree );
   8.934 +
   8.935 +      //TODO: fix this -- so 
   8.936 + }
   8.937 +
   8.938 +
   8.939 +/*Designed to be called from the main thread outside of VMS, during init
   8.940 + */
   8.941 +MallocProlog *
   8.942 +VMS_ext__create_free_list()
   8.943 + { MallocProlog *freeListHead, *firstChunk;
   8.944 +
   8.945 +      //Note, this is running in the main thread -- all increases in malloc
   8.946 +      // mem and all frees of it must be done in this thread, with the
   8.947 +      // thread's original stack available
   8.948 +   freeListHead = malloc( sizeof(MallocProlog) );
   8.949 +   firstChunk   = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
   8.950 +   if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);}
   8.951 +   
   8.952 +   //Touch memory to avoid page faults
   8.953 +   void *ptr,*endPtr; 
   8.954 +   endPtr = (void*)firstChunk+MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE;
   8.955 +   for(ptr = firstChunk; ptr < endPtr; ptr+=PAGE_SIZE)
   8.956 +   {
   8.957 +       *(char*)ptr = 0;
   8.958 +   }
   8.959 +
   8.960 +   freeListHead->prevChunkInFreeList = NULL;
   8.961 +      //Use this addr to free the heap when cleanup
   8.962 +   freeListHead->nextLowerInMem      = firstChunk;
   8.963 +      //to identify top-of-heap elem, compare this addr to elem's next higher
   8.964 +   freeListHead->nextHigherInMem     = (void*)( (uintptr_t)firstChunk +
   8.965 +                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
   8.966 +   freeListHead->nextChunkInFreeList = firstChunk;
   8.967 +
   8.968 +   firstChunk->nextChunkInFreeList   = NULL;
   8.969 +   firstChunk->prevChunkInFreeList   = freeListHead;
   8.970 +      //next Higher has to be set to top of chunk, so can calc size in malloc
   8.971 +   firstChunk->nextHigherInMem       = (void*)( (uintptr_t)firstChunk +
   8.972 +                                         MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE);
   8.973 +   firstChunk->nextLowerInMem        = NULL; //identifies as bott of heap
   8.974 +   
   8.975 +   _VMSMasterEnv->amtOfOutstandingMem = 0; //none allocated yet
   8.976 +
   8.977 +   return freeListHead;
   8.978 + }
   8.979 +
   8.980 +
   8.981 +/*Designed to be called from the main thread outside of VMS, during cleanup
   8.982 + */
   8.983 +void
   8.984 +VMS_ext__free_free_list( MallocProlog *freeListHead )
   8.985 + {    
   8.986 +      //stashed a ptr to the one and only bug chunk malloc'd from OS in the
   8.987 +      // free list head's next lower in mem pointer
   8.988 +   free( freeListHead->nextLowerInMem );
   8.989 +
   8.990 +   //don't free the head -- it'll be in an array eventually -- free whole
   8.991 +   // array when all the free lists linked from it have already been freed
   8.992 + }
   8.993 +

     9.1 --- a/vmalloc.h	Thu Oct 06 16:24:17 2011 +0200
     9.2 +++ b/vmalloc.h	Wed Jan 04 16:10:11 2012 -0800
     9.3 @@ -1,61 +1,61 @@
     9.4 -/*
     9.5 - *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
     9.6 - *  Licensed under GNU General Public License version 2
     9.7 - *
     9.8 - * Author: seanhalle@yahoo.com
     9.9 - *
    9.10 - * Created on November 14, 2009, 9:07 PM
    9.11 - */
    9.12 -
    9.13 -#ifndef _VMALLOC_H
    9.14 -#define	_VMALLOC_H
    9.15 -
    9.16 -#include <malloc.h>
    9.17 -#include <inttypes.h>
    9.18 -#include "VMS_primitive_data_types.h"
    9.19 -
    9.20 -typedef struct _MallocProlog MallocProlog;
    9.21 -
    9.22 -struct _MallocProlog
    9.23 - {
    9.24 -   MallocProlog *nextChunkInFreeList;
    9.25 -   MallocProlog *prevChunkInFreeList;
    9.26 -   MallocProlog *nextHigherInMem;
    9.27 -   MallocProlog *nextLowerInMem;
    9.28 - };
    9.29 -//MallocProlog
    9.30 -
    9.31 -typedef struct
    9.32 - {
    9.33 -   MallocProlog *firstChunkInFreeList;
    9.34 -   int32         numInList; //TODO not used
    9.35 - }
    9.36 -FreeListHead;
    9.37 -
    9.38 -void *
    9.39 -VMS__malloc( size_t sizeRequested );
    9.40 -
    9.41 -void *
    9.42 -VMS__malloc_aligned( size_t sizeRequested );
    9.43 -
    9.44 -void
    9.45 -VMS__free( void *ptrToFree );
    9.46 -
    9.47 -/*Allocates memory from the external system -- higher overhead
    9.48 - */
    9.49 -void *
    9.50 -VMS__malloc_in_ext( size_t sizeRequested );
    9.51 -
    9.52 -/*Frees memory that was allocated in the external system -- higher overhead
    9.53 - */
    9.54 -void
    9.55 -VMS__free_in_ext( void *ptrToFree );
    9.56 -
    9.57 -
    9.58 -MallocProlog *
    9.59 -VMS_ext__create_free_list();
    9.60 -
    9.61 -void
    9.62 -VMS_ext__free_free_list( MallocProlog *freeListHead );
    9.63 -
    9.64 +/*
    9.65 + *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
    9.66 + *  Licensed under GNU General Public License version 2
    9.67 + *
    9.68 + * Author: seanhalle@yahoo.com
    9.69 + *
    9.70 + * Created on November 14, 2009, 9:07 PM
    9.71 + */
    9.72 +
    9.73 +#ifndef _VMALLOC_H
    9.74 +#define	_VMALLOC_H
    9.75 +
    9.76 +#include <malloc.h>
    9.77 +#include <inttypes.h>
    9.78 +#include "VMS_primitive_data_types.h"
    9.79 +
    9.80 +typedef struct _MallocProlog MallocProlog;
    9.81 +
    9.82 +struct _MallocProlog
    9.83 + {
    9.84 +   MallocProlog *nextChunkInFreeList;
    9.85 +   MallocProlog *prevChunkInFreeList;
    9.86 +   MallocProlog *nextHigherInMem;
    9.87 +   MallocProlog *nextLowerInMem;
    9.88 + };
    9.89 +//MallocProlog
    9.90 +
    9.91 +typedef struct
    9.92 + {
    9.93 +   MallocProlog *firstChunkInFreeList;
    9.94 +   int32         numInList; //TODO not used
    9.95 + }
    9.96 +FreeListHead;
    9.97 +
    9.98 +void *
    9.99 +VMS__malloc( size_t sizeRequested );
   9.100 +
   9.101 +void *
   9.102 +VMS__malloc_aligned( size_t sizeRequested );
   9.103 +
   9.104 +void
   9.105 +VMS__free( void *ptrToFree );
   9.106 +
   9.107 +/*Allocates memory from the external system -- higher overhead
   9.108 + */
   9.109 +void *
   9.110 +VMS__malloc_in_ext( size_t sizeRequested );
   9.111 +
   9.112 +/*Frees memory that was allocated in the external system -- higher overhead
   9.113 + */
   9.114 +void
   9.115 +VMS__free_in_ext( void *ptrToFree );
   9.116 +
   9.117 +
   9.118 +MallocProlog *
   9.119 +VMS_ext__create_free_list();
   9.120 +
   9.121 +void
   9.122 +VMS_ext__free_free_list( MallocProlog *freeListHead );
   9.123 +
   9.124  #endif
   9.125 \ No newline at end of file

    10.1 --- a/vutilities.c	Thu Oct 06 16:24:17 2011 +0200
    10.2 +++ b/vutilities.c	Wed Jan 04 16:10:11 2012 -0800
    10.3 @@ -1,25 +1,25 @@
    10.4 -/*
    10.5 - *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
    10.6 - *  Licensed under GNU General Public License version 2
    10.7 - *
    10.8 - * Author: seanhalle@yahoo.com
    10.9 - *
   10.10 - * Created on November 14, 2009, 9:07 PM
   10.11 - */
   10.12 -
   10.13 -#include <malloc.h>
   10.14 -#include <stdlib.h>
   10.15 -
   10.16 -#include "VMS.h"
   10.17 -
   10.18 -
   10.19 -inline char *
   10.20 -VMS__strDup( char *str )
   10.21 - { char *retStr;
   10.22 -
   10.23 -   retStr = VMS__malloc( strlen(str) + 1 );
   10.24 -   if( str == NULL ) return str;
   10.25 -   strcpy( retStr, str );
   10.26 -
   10.27 -   return retStr;
   10.28 - }
   10.29 +/*
   10.30 + *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
   10.31 + *  Licensed under GNU General Public License version 2
   10.32 + *
   10.33 + * Author: seanhalle@yahoo.com
   10.34 + *
   10.35 + * Created on November 14, 2009, 9:07 PM
   10.36 + */
   10.37 +
   10.38 +#include <malloc.h>
   10.39 +#include <stdlib.h>
   10.40 +
   10.41 +#include "VMS.h"
   10.42 +
   10.43 +
   10.44 +inline char *
   10.45 +VMS__strDup( char *str )
   10.46 + { char *retStr;
   10.47 +
   10.48 +   retStr = VMS__malloc( strlen(str) + 1 );
   10.49 +   if( str == NULL ) return str;
   10.50 +   strcpy( retStr, str );
   10.51 +
   10.52 +   return retStr;
   10.53 + }

    11.1 --- a/vutilities.h	Thu Oct 06 16:24:17 2011 +0200
    11.2 +++ b/vutilities.h	Wed Jan 04 16:10:11 2012 -0800
    11.3 @@ -1,20 +1,20 @@
    11.4 -/*
    11.5 - *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
    11.6 - *  Licensed under GNU General Public License version 2
    11.7 - *
    11.8 - * Author: seanhalle@yahoo.com
    11.9 - *
   11.10 - * Created on November 14, 2009, 9:07 PM
   11.11 - */
   11.12 -
   11.13 -
   11.14 -#ifndef  _UTILITIES_H
   11.15 -#define	_UTILITIES_H
   11.16 -
   11.17 -#include <string.h>
   11.18 -#include "VMS_primitive_data_types.h"
   11.19 -
   11.20 -inline char *
   11.21 -VMS__strDup( char *str );
   11.22 - 
   11.23 -#endif
   11.24 +/*
   11.25 + *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
   11.26 + *  Licensed under GNU General Public License version 2
   11.27 + *
   11.28 + * Author: seanhalle@yahoo.com
   11.29 + *
   11.30 + * Created on November 14, 2009, 9:07 PM
   11.31 + */
   11.32 +
   11.33 +
   11.34 +#ifndef  _UTILITIES_H
   11.35 +#define	_UTILITIES_H
   11.36 +
   11.37 +#include <string.h>
   11.38 +#include "VMS_primitive_data_types.h"
   11.39 +
   11.40 +inline char *
   11.41 +VMS__strDup( char *str );
   11.42 + 
   11.43 +#endif