# HG changeset patch # User Me # Date 1290238745 -3600 # Node ID 46ceb3dd0f0a167e4fc6fdbfcc2a5042fad511f5 # Parent 133633d1c10f32526f25affc7bac6287652070f9 Nov 20 PLDI final numbers -- debug statements, singletons and #ifdef'd probes diff -r 133633d1c10f -r 46ceb3dd0f0a src/Application/VPThread__Matrix_Mult/Divide_Pr.c --- a/src/Application/VPThread__Matrix_Mult/Divide_Pr.c Tue Nov 16 16:02:51 2010 +0100 +++ b/src/Application/VPThread__Matrix_Mult/Divide_Pr.c Sat Nov 20 08:39:05 2010 +0100 @@ -178,6 +178,7 @@ { //====== Do parallel multiply across cores + DEBUG( dbgAppFlow, "divider: do parallel mult\n") //Calc the ideal size of sub-matrix and slice up the dimensions of // the two matrices. //The ideal size is the one takes the number of cycles to calculate @@ -213,6 +214,7 @@ animatingThd ); //====================================================================== + DEBUG( dbgAppFlow, "divider: made mutexes and conds\n") //get results-comm lock before create results-thd, to ensure it can't // signal that results are available before this thd is waiting on cond VPThread__mutex_lock( globals->results_mutex, animatingThd ); @@ -222,11 +224,13 @@ VPThread__mutex_lock( globals->start_mutex, animatingThd ); + DEBUG( dbgAppFlow, "divider: make result thread\n") VPThread__create_thread( &gatherResults, resultsParams, animatingThd ); //Now wait for results thd to signal that it has vector lock VPThread__cond_wait( globals->start_cond, animatingThd ); VPThread__mutex_unlock( globals->start_mutex, animatingThd );//done w/lock + DEBUG( dbgAppFlow, "divider: make sub-matrices\n") //Make the sub-matrices, and pair them up, and make processor to // calc product of each pair. @@ -266,7 +270,7 @@ float32 idealSizeOfSide, idealSizeOfSide1, idealSizeOfSide2; SlicingStruc *leftRowSlices, *vecSlices, *rightColSlices; SlicingStrucCarrier *slicingStrucCarrier = - VPThread__malloc(sizeof(SlicingStrucCarrier), animPr); + VPThread__malloc(sizeof(SlicingStrucCarrier), animPr); int minWorkUnitCycles, primitiveCycles, idealNumWorkUnits; float64 numPrimitiveOpsInMinWorkUnit; @@ -417,13 +421,13 @@ subMatrixPairParams->rightSubMatrix = rightSubMatrices[ vecIdx * numRightColIdxs + resColIdx ]; - subMatrixPairParams->resultPr = resultPr fix_this; + //subMatrixPairParams->resultPr = resultPr; //put all pairs from the same vector onto same core VPThread__create_thread_with_affinity( &calcSubMatrixProduct, - subMatrixPairParams, - animatingPr, - coreToScheduleOnto ); + subMatrixPairParams, + animatingPr, + coreToScheduleOnto ); } //Trying to distribute the subMatrix-vectors across the cores, so diff -r 133633d1c10f -r 46ceb3dd0f0a src/Application/VPThread__Matrix_Mult/VPThread__Matrix_Mult.h --- a/src/Application/VPThread__Matrix_Mult/VPThread__Matrix_Mult.h Tue Nov 16 16:02:51 2010 +0100 +++ b/src/Application/VPThread__Matrix_Mult/VPThread__Matrix_Mult.h Sat Nov 20 08:39:05 2010 +0100 @@ -47,6 +47,8 @@ int32 origStartRow; int32 origStartCol; int32 alreadyCopied; + VPThdSingleton *copySingleton; + VPThdSingleton *copyTransSingleton; int32 numUsesLeft; //have update via message to avoid multiple writers float32 *array; //2D, but dynamically sized, so use addr arith } diff -r 133633d1c10f -r 46ceb3dd0f0a src/Application/VPThread__Matrix_Mult/subMatrix_Pr.c --- a/src/Application/VPThread__Matrix_Mult/subMatrix_Pr.c Tue Nov 16 16:02:51 2010 +0100 +++ b/src/Application/VPThread__Matrix_Mult/subMatrix_Pr.c Sat Nov 20 08:39:05 2010 +0100 @@ -53,10 +53,12 @@ MatrixMultGlobals *globals =(MatrixMultGlobals *)VPThread__give_globals(); DEBUG1(dbgAppFlow, "start sub-matrix mult: %d\n", animatingPr->procrID) + #ifdef TURN_ON_DEBUG_PROBES int32 subMatrixProbe = VMS__create_single_interval_probe( "subMtx", animatingPr); VMS__record_sched_choice_into_probe( subMatrixProbe, animatingPr ); VMS__record_interval_start_in_probe( subMatrixProbe ); + #endif params = (SMPairParams *)data; resultPr = params->resultPr; @@ -90,7 +92,9 @@ //send result to result processor params->partialResultArray = resArray; + #ifdef TURN_ON_DEBUG_PROBES VMS__record_interval_end_in_probe( subMatrixProbe ); + #endif //Send result to results thread //This pattern works 'cause only get lock when results thd inside wait @@ -100,6 +104,7 @@ VPThread__mutex_unlock( globals->vector_mutex, animatingPr );//release //wait-er -- cond_signal implemented such that wait-er gets lock, no other + DEBUG1(dbgAppFlow, "end sub-matrix mult: %d\n", animatingPr->procrID) VPThread__dissipate_thread( animatingPr ); } @@ -233,8 +238,7 @@ Matrix *origMatrix; float32 *origArray, *subArray; - if( subMatrix->alreadyCopied ) return; - VPThread__start_singleton( copyTransposeSingleton, animPr); + VPThread__start_data_singleton( &(subMatrix->copyTransSingleton), animPr ); origMatrix = subMatrix->origMatrix; origArray = origMatrix->array; @@ -252,8 +256,8 @@ origStartRow, origStartCol, origStride, subArray, origArray ); - VPThread__end_singleton( copyTransposeSingleton, animPr); - subMatrix->alreadyCopied = TRUE; //anywhere after singleton work finished + VPThread__end_data_singleton( &(subMatrix->copyTransSingleton), animPr ); + } @@ -267,11 +271,10 @@ //This lets only a single VP execute the code between start and // end -- using start and end so that work runs outside the master. //If a second VP ever executes the start, it will be returned - // from the end-point. If it executions start after another but before + // from the end-point. If its execution starts after another but before // that other has finished, this one will remain suspended until the // other finishes, then be resumed from the end-point. - if( subMatrix->alreadyCopied ) return; //an optimization -- set below - VPThread__start_singleton( copyMatrixSingleton, animPr ); + VPThread__start_data_singleton( &(subMatrix->copySingleton), animPr ); origMatrix = subMatrix->origMatrix; @@ -299,6 +302,5 @@ } } - subMatrix->alreadyCopied = TRUE; //must be after singleton work finished - VPThread__end_singleton( copyMatrixSingleton, animPr ); + VPThread__end_data_singleton( &(subMatrix->copySingleton), animPr ); } diff -r 133633d1c10f -r 46ceb3dd0f0a src/Application/main.c --- a/src/Application/main.c Tue Nov 16 16:02:51 2010 +0100 +++ b/src/Application/main.c Sat Nov 20 08:39:05 2010 +0100 @@ -9,7 +9,7 @@ #include #include "Matrix_Mult.h" -#include "SSR_Matrix_Mult/SSR_Matrix_Mult.h" +#include "VPThread__Matrix_Mult/VPThread__Matrix_Mult.h" /** *Matrix multiply program written using VMS_HW piggy-back language