VMS/VMS_Implementations/VCilk_impls/VCilk__MC_shared_impl

view VCilk_lib.c @ 8:e649c2387a60

new sequential version
author Merten Sach <msach@mailbox.tu-berlin.de>
date Thu, 02 Jun 2011 13:54:34 +0200
parents 58d0c2b1d6a4
children 5131f941f42c
line source
1 /*
2 * Copyright 2010 OpenSourceCodeStewardshipFoundation
3 *
4 * Licensed under BSD
5 */
7 #include <stdio.h>
8 #include <stdlib.h>
10 #include "VMS/VMS.h"
11 #include "VCilk.h"
12 #include "VMS/Queue_impl/PrivateQueue.h"
13 #include "VMS/Hash_impl/PrivateHash.h"
16 //==========================================================================
18 void
19 VCilk__init();
21 void
22 VCilk__init_Seq();
24 void
25 VCilk__init_Helper();
26 //==========================================================================
29 /*TODO: Q: dealing with library f()s and DKU vs WT vs FoR
30 * (still want to do FoR, with time-lines as syntax, could be super cool)
31 * A: thinking pin the coreLoops for all of BLIS -- let Master arbitrate
32 * among library, DKU, WT, FoR -- all the patterns in terms of virtual
33 * processors (or equivalently work-units), so Master picks which virt procr
34 * from which portions of app (DKU, WT, FoR) onto which sched slots
35 *Might even do hierarchy of masters -- group of sched slots for each core
36 * has its own master, that keeps generated work local
37 * single-reader-single-writer sync everywhere -- no atomic primitives (but
38 * memory fences on architectures that need them)
39 * Might have the different schedulers talk to each other, to negotiate
40 * larger-grain sharing of resources, according to predicted critical
41 * path, and expansion of work
42 */
46 //===========================================================================
49 /*These are the library functions *called in the application*
50 *
51 *There's a pattern for the outside sequential code to interact with the
52 * VMS_HW code.
53 *The VMS_HW system is inside a boundary.. every VCilk system is in its
54 * own directory that contains the functions for each of the processor types.
55 * One of the processor types is the "seed" processor that starts the
56 * cascade of creating all the processors that do the work.
57 *So, in the directory is a file called "EntryPoint.c" that contains the
58 * function, named appropriately to the work performed, that the outside
59 * sequential code calls. This function follows a pattern:
60 *1) it calls VCilk__init()
61 *2) it creates the initial data for the seed processor, which is passed
62 * in to the function
63 *3) it creates the seed VCilk processor, with the data to start it with.
64 *4) it calls startVCilkThenWaitUntilWorkDone
65 *5) it gets the returnValue from the transfer struc and returns that
66 * from the function
67 *
68 *For now, a new VCilk system has to be created via VCilk__init every
69 * time an entry point function is called -- later, might add letting the
70 * VCilk system be created once, and let all the entry points just reuse
71 * it -- want to be as simple as possible now, and see by using what makes
72 * sense for later..
73 */
77 //===========================================================================
79 /*This is the "border crossing" function -- the thing that crosses from the
80 * outside world, into the VMS_HW world. It initializes and starts up the
81 * VMS system, then creates one processor from the specified function and
82 * puts it into the readyQ. From that point, that one function is resp.
83 * for creating all the other processors, that then create others, and so
84 * forth.
85 *When all the processors, including the seed, have dissipated, then this
86 * function returns. The results will have been written by side-effect via
87 * pointers read from, or written into initData.
88 *
89 *NOTE: no Threads should exist in the outside program that might touch
90 * any of the data reachable from initData passed in to here
91 */
92 void
93 VCilk__create_seed_procr_and_do_work( VirtProcrFnPtr fnPtr, void *initData )
94 { VCilkSemEnv *semEnv;
95 VirtProcr *seedPr;
97 #ifdef SEQUENTIAL
98 VCilk__init_Seq(); //debug sequential exe
99 #else
100 VCilk__init(); //normal multi-thd
101 #endif
102 semEnv = _VMSMasterEnv->semanticEnv;
104 //VCilk starts with one processor, which is put into initial environ,
105 // and which then calls create() to create more, thereby expanding work
106 seedPr = (VirtProcr*)VCilk__create_procr_helper( fnPtr, initData, NULL, semEnv, -1 );
107 resume_procr( seedPr, semEnv );
109 #ifdef SEQUENTIAL
110 VMS__start_the_work_then_wait_until_done_Seq(); //debug sequential exe
111 #else
112 VMS__start_the_work_then_wait_until_done(); //normal multi-thd
113 #endif
115 VCilk__cleanup_at_end_of_shutdown();
116 }
119 int32 inline
120 VCilk__giveMinWorkUnitCycles( float32 percentOverhead )
121 {
122 return MIN_WORK_UNIT_CYCLES;
123 }
125 int32
126 VCilk__giveIdealNumWorkUnits()
127 {
128 return NUM_SCHED_SLOTS * NUM_CORES;
129 }
131 /*To measure how long a primitive operation takes, when calculating number of
132 * sub-tasks to divide into.
133 * For now, use TSC -- later, make these two macros with assembly that first
134 * saves jump point, and second jumps back several times to get reliable time
135 */
136 void inline
137 VCilk__start_primitive()
138 { //int32 *saveAddr;
139 //saveAddr = &(((VCilkSemEnv *)(_VMSMasterEnv->semanticEnv))->primitiveStartTime);
140 saveLowTimeStampCountInto( (((VCilkSemEnv *)
141 (_VMSMasterEnv->semanticEnv))->primitiveStartTime) );
142 }
144 /*Just quick and dirty for now -- make reliable later
145 * will want this to jump back several times -- to be sure cache is warm
146 * because don't want comm time included in calc-time measurement -- and
147 * also to throw out any "weird" values due to OS interrupt or TSC rollover
148 */
149 int32 inline
150 VCilk__end_primitive_and_give_cycles()
151 { int32 endTime, startTime;
152 //TODO: fix by repeating time-measurement
153 saveLowTimeStampCountInto( endTime );
154 startTime = ( (VCilkSemEnv *)
155 (_VMSMasterEnv->semanticEnv))->primitiveStartTime;
156 return (endTime - startTime);
157 }
159 //===========================================================================
160 //
161 /*Initializes all the data-structures for a VCilk system -- but doesn't
162 * start it running yet!
163 *
164 *This and its callees run in main thread outside VMS
165 *
166 *This sets up the semantic layer over the VMS system
167 *
168 *First, calls VMS_Setup, then creates own environment, making it ready
169 * for creating the seed processor and then starting the work.
170 */
171 void
172 VCilk__init()
173 {
174 VMS__init();
175 //masterEnv, a global var, now is partially set up by init_VMS
177 VCilk__init_Helper();
178 }
180 #ifdef SEQUENTIAL
181 void
182 VCilk__init_Seq()
183 {
184 VMS__init_Seq();
185 //masterEnv, a global var, now is partially set up by init_VMS
187 VCilk__init_Helper();
188 }
189 #endif
191 /*Runs in main thread before VMS system starts
192 */
193 void
194 VCilk__init_Helper()
195 { VCilkSemEnv *semanticEnv;
196 PrivQueueStruc **readyVPQs;
197 int coreIdx;
199 //Hook up the semantic layer's plug-ins to the Master virt procr
200 _VMSMasterEnv->requestHandler = &VCilk__Request_Handler;
201 _VMSMasterEnv->slaveScheduler = &VCilk__schedule_virt_procr;
203 //create the semantic layer's environment (all its data) and add to
204 // the master environment
205 semanticEnv = VMS__malloc( sizeof( VCilkSemEnv ) );
206 _VMSMasterEnv->semanticEnv = semanticEnv;
208 //create the ready queue, hash tables used for pairing send to receive
209 // and so forth
210 //TODO: add hash tables for pairing sends with receives, and
211 // initialize the data ownership system
212 readyVPQs = VMS__malloc( NUM_CORES * sizeof(PrivQueueStruc *) );
214 for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
215 {
216 readyVPQs[ coreIdx ] = makeVMSPrivQ();
217 }
219 semanticEnv->readyVPQs = readyVPQs;
221 semanticEnv->nextCoreToGetNewPr = 0;
223 //TODO: bug -- turn these arrays into dyn arrays to eliminate limit
224 //semanticEnv->singletonHasBeenExecutedFlags = makeDynArrayInfo( );
225 //semanticEnv->transactionStrucs = makeDynArrayInfo( );
226 //something like: setHighestIdx( dynArrayInfo, NUM_STRUCS_IN_SEM_ENV )
227 int32 i;
228 for( i = 0; i < NUM_STRUCS_IN_SEM_ENV; i++ )
229 {
230 semanticEnv->fnSingletons[i].endInstrAddr = NULL;
231 semanticEnv->fnSingletons[i].hasBeenStarted = FALSE;
232 semanticEnv->fnSingletons[i].hasFinished = FALSE;
233 semanticEnv->fnSingletons[i].waitQ = makeVMSPrivQ();
234 semanticEnv->transactionStrucs[i].waitingVPQ = makeVMSPrivQ();
235 }
237 }
240 /*Runs in main thread, outside VMS
241 *Frees any memory allocated by VCilk__init() then calls VMS's cleanup
242 */
243 void
244 VCilk__cleanup_at_end_of_shutdown()
245 { VCilkSemEnv *semanticEnv;
247 semanticEnv = _VMSMasterEnv->semanticEnv;
249 /*
250 int32 coreIdx;
251 for( coreIdx = 0; coreIdx < NUM_CORES; coreIdx++ )
252 {
253 VMS__free( semanticEnv->readyVPQs[coreIdx]->startOfData );
254 VMS__free( semanticEnv->readyVPQs[coreIdx] );
255 }
256 VMS__free( semanticEnv->readyVPQs );
258 VMS__free( _VMSMasterEnv->semanticEnv );
259 */
260 VMS__cleanup_at_end_of_shutdown();
261 }
264 //===========================================================================
267 /*Spawn involves allocating mem as well as creating processor which itself
268 * allocates, so has to be done inside master
269 */
270 void inline
271 VCilk__spawn( int32 coreToSpawnOnto, VirtProcrFnPtr fnPtr,
272 void *initData, VirtProcr *requestingPr )
273 { VCilkSemReq reqData;
275 //the semantic request data is on the stack and disappears when this
276 // call returns -- it's guaranteed to remain in the VP's stack for as
277 // long as the VP is suspended.
278 reqData.reqType = 0; //know it's type because in a VMS create req
279 reqData.coreToSpawnOnto = coreToSpawnOnto;
280 reqData.fnPtr = fnPtr;
281 reqData.initData = initData;
282 reqData.requestingPr = requestingPr;
284 VMS__send_create_procr_req( &reqData, requestingPr );
285 }
288 int32
289 VCilk__give_number_of_cores_to_spawn_onto()
290 {
291 return NUM_CORES;
292 }
296 /*This runs inside slave VP, so can't do any freeing -- have to do in plugin
297 */
298 void inline
299 VCilk__dissipate_procr( VirtProcr *procrToDissipate )
300 {
302 VMS__send_dissipate_req( procrToDissipate );
303 }
305 //===========================================================================
307 void
308 VCilk__sync( VirtProcr *animPr )
309 { VCilkSemReq reqData;
311 reqData.reqType = syncReq;
312 reqData.requestingPr = animPr;
314 VMS__send_sem_request( &reqData, animPr );
315 }
319 void *
320 VCilk__malloc( int32 sizeToMalloc, VirtProcr *animPr )
321 { VCilkSemReq reqData;
323 reqData.reqType = mallocReq;
324 reqData.requestingPr = animPr;
325 reqData.sizeToMalloc = sizeToMalloc;
327 VMS__send_sem_request( &reqData, animPr );
329 return animPr->dataRetFromReq;
330 }
333 /*Sends request to Master, which does the work of freeing
334 */
335 void
336 VCilk__free( void *ptrToFree, VirtProcr *animPr )
337 { VCilkSemReq reqData;
339 reqData.reqType = freeReq;
340 reqData.requestingPr = animPr;
341 reqData.ptrToFree = ptrToFree;
343 VMS__send_sem_request( &reqData, animPr );
344 }
346 //===========================================================================
347 //
348 /*A function singleton is a function whose body executes exactly once, on a
349 * single core, no matter how many times the fuction is called and no
350 * matter how many cores or the timing of cores calling it.
351 *
352 *A data singleton is a ticket attached to data. That ticket can be used
353 * to get the data through the function exactly once, no matter how many
354 * times the data is given to the function, and no matter the timing of
355 * trying to get the data through from different cores.
356 */
358 /*Fn singleton uses ID as index into array of singleton structs held in the
359 * semantic environment.
360 */
361 void
362 VCilk__start_fn_singleton( int32 singletonID, VirtProcr *animPr )
363 {
364 VCilkSemReq reqData;
366 //
367 reqData.reqType = singleton_fn_start;
368 reqData.singletonID = singletonID;
370 VMS__send_sem_request( &reqData, animPr );
371 if( animPr->dataRetFromReq ) //will be 0 or addr of label in end singleton
372 {
373 asm volatile("movl %0, %%eax; \
374 jmp *%%eax" \
375 /* outputs */ : \
376 /* inputs */ : "g"(animPr->dataRetFromReq) \
377 /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx","%edi","%esi"\
378 );
379 }
380 }
382 /*Data singleton hands addr of loc holding a pointer to a singleton struct.
383 * The start_data_singleton makes the structure and puts its addr into the
384 * location.
385 */
386 void
387 VCilk__start_data_singleton( VCilkSingleton **singletonAddr, VirtProcr *animPr )
388 {
389 VCilkSemReq reqData;
391 if( *singletonAddr && (*singletonAddr)->hasFinished )
392 goto JmpToEndSingleton;
393 //
394 reqData.reqType = singleton_data_start;
395 reqData.singletonPtrAddr = singletonAddr;
397 VMS__send_sem_request( &reqData, animPr );
398 if( animPr->dataRetFromReq ) //either 0 or end singleton's return addr
399 { //Assembly code changes the return addr on the stack to the one
400 // saved into the singleton by the end-singleton-fn
401 //The return addr is at 0x4(%%ebp)
402 JmpToEndSingleton:
403 asm volatile("movl %0, %%eax; \
404 movl (%%eax), %%ebx; \
405 movl (%%ebx), %%eax; \
406 movl %%eax, 0x4(%%ebp);" \
407 /* outputs */ : \
408 /* inputs */ : "m"(singletonAddr) \
409 /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx","%edi","%esi"\
410 );
411 }
412 //now, simply return
413 //will exit either from the start singleton call or the end-singleton call
414 }
416 /*Uses ID as index into array of flags. If flag already set, resumes from
417 * end-label. Else, sets flag and resumes normally.
418 *
419 *Note, this call cannot be inlined because the instr addr at the label
420 * inside is shared by all invocations of a given singleton ID.
421 */
422 void
423 VCilk__end_fn_singleton( int32 singletonID, VirtProcr *animPr )
424 {
425 VCilkSemReq reqData;
427 //don't need this addr until after at least one singleton has reached
428 // this function
429 VCilkSemEnv *semEnv = VMS__give_sem_env_for( animPr );
430 semEnv->fnSingletons[ singletonID].endInstrAddr = &&EndSingletonInstrAddr;
432 reqData.reqType = singleton_fn_end;
433 reqData.singletonID = singletonID;
435 VMS__send_sem_request( &reqData, animPr );
437 EndSingletonInstrAddr:
438 return;
439 }
441 void
442 VCilk__end_data_singleton( VCilkSingleton **singletonPtrAddr, VirtProcr *animPr )
443 {
444 VCilkSemReq reqData;
446 //don't need this addr until after singleton struct has reached
447 // this function for first time
448 //do assembly that saves the return addr of this fn call into the
449 // data singleton -- that data-singleton can only be given to exactly
450 // one instance in the code of this function. However, can use this
451 // function in different places for different data-singletons.
452 // (*(singletonAddr))->endInstrAddr = &&EndDataSingletonInstrAddr;
454 //Assembly code takes the return addr off the stack and saves
455 // into the singleton. The first field in the singleton is the
456 // "endInstrAddr" field, and the return addr is at 0x4(%%ebp)
457 asm volatile("movl 0x4(%%ebp), %%eax; \
458 movl %0, %%ebx; \
459 movl (%%ebx), %%ecx; \
460 movl %%eax, (%%ecx);" \
461 /* outputs */ : \
462 /* inputs */ : "m"(singletonPtrAddr) \
463 /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx","%edi","%esi"\
464 );
466 reqData.reqType = singleton_data_end;
467 reqData.singletonPtrAddr = singletonPtrAddr;
469 VMS__send_sem_request( &reqData, animPr );
470 }
472 /*This executes the function in the masterVP, so it executes in isolation
473 * from any other copies -- only one copy of the function can ever execute
474 * at a time.
475 *
476 *It suspends to the master, and the request handler takes the function
477 * pointer out of the request and calls it, then resumes the VP.
478 *Only very short functions should be called this way -- for longer-running
479 * isolation, use transaction-start and transaction-end, which run the code
480 * between as work-code.
481 */
482 void
483 VCilk__animate_short_fn_in_isolation( PtrToAtomicFn ptrToFnToExecInMaster,
484 void *data, VirtProcr *animPr )
485 {
486 VCilkSemReq reqData;
488 //
489 reqData.reqType = atomic;
490 reqData.fnToExecInMaster = ptrToFnToExecInMaster;
491 reqData.dataForFn = data;
493 VMS__send_sem_request( &reqData, animPr );
494 }
497 /*This suspends to the master.
498 *First, it looks at the VP's data, to see the highest transactionID that VP
499 * already has entered. If the current ID is not larger, it throws an
500 * exception stating a bug in the code. Otherwise it puts the current ID
501 * there, and adds the ID to a linked list of IDs entered -- the list is
502 * used to check that exits are properly ordered.
503 *Next it is uses transactionID as index into an array of transaction
504 * structures.
505 *If the "VP_currently_executing" field is non-null, then put requesting VP
506 * into queue in the struct. (At some point a holder will request
507 * end-transaction, which will take this VP from the queue and resume it.)
508 *If NULL, then write requesting into the field and resume.
509 */
510 void
511 VCilk__start_transaction( int32 transactionID, VirtProcr *animPr )
512 {
513 VCilkSemReq reqData;
515 //
516 reqData.reqType = trans_start;
517 reqData.transID = transactionID;
519 VMS__send_sem_request( &reqData, animPr );
520 }
522 /*This suspends to the master, then uses transactionID as index into an
523 * array of transaction structures.
524 *It looks at VP_currently_executing to be sure it's same as requesting VP.
525 * If different, throws an exception, stating there's a bug in the code.
526 *Next it looks at the queue in the structure.
527 *If it's empty, it sets VP_currently_executing field to NULL and resumes.
528 *If something in, gets it, sets VP_currently_executing to that VP, then
529 * resumes both.
530 */
531 void
532 VCilk__end_transaction( int32 transactionID, VirtProcr *animPr )
533 {
534 VCilkSemReq reqData;
536 //
537 reqData.reqType = trans_end;
538 reqData.transID = transactionID;
540 VMS__send_sem_request( &reqData, animPr );
541 }