Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > Vthread > Vthread__Blocked_Matrix_Mult__Bench
changeset 2:46ceb3dd0f0a
Nov 20 PLDI final numbers -- debug statements, singletons and #ifdef'd probes
| author | Me |
|---|---|
| date | Sat, 20 Nov 2010 08:39:05 +0100 |
| parents | 133633d1c10f |
| children | 4007d97740a5 |
| files | src/Application/VPThread__Matrix_Mult/Divide_Pr.c src/Application/VPThread__Matrix_Mult/VPThread__Matrix_Mult.h src/Application/VPThread__Matrix_Mult/subMatrix_Pr.c src/Application/main.c |
| diffstat | 4 files changed, 23 insertions(+), 15 deletions(-) [+] |
line diff
1.1 --- a/src/Application/VPThread__Matrix_Mult/Divide_Pr.c Tue Nov 16 16:02:51 2010 +0100 1.2 +++ b/src/Application/VPThread__Matrix_Mult/Divide_Pr.c Sat Nov 20 08:39:05 2010 +0100 1.3 @@ -178,6 +178,7 @@ 1.4 { 1.5 //====== Do parallel multiply across cores 1.6 1.7 + DEBUG( dbgAppFlow, "divider: do parallel mult\n") 1.8 //Calc the ideal size of sub-matrix and slice up the dimensions of 1.9 // the two matrices. 1.10 //The ideal size is the one takes the number of cycles to calculate 1.11 @@ -213,6 +214,7 @@ 1.12 animatingThd ); 1.13 //====================================================================== 1.14 1.15 + DEBUG( dbgAppFlow, "divider: made mutexes and conds\n") 1.16 //get results-comm lock before create results-thd, to ensure it can't 1.17 // signal that results are available before this thd is waiting on cond 1.18 VPThread__mutex_lock( globals->results_mutex, animatingThd ); 1.19 @@ -222,11 +224,13 @@ 1.20 VPThread__mutex_lock( globals->start_mutex, animatingThd ); 1.21 1.22 1.23 + DEBUG( dbgAppFlow, "divider: make result thread\n") 1.24 VPThread__create_thread( &gatherResults, resultsParams, animatingThd ); 1.25 1.26 //Now wait for results thd to signal that it has vector lock 1.27 VPThread__cond_wait( globals->start_cond, animatingThd ); 1.28 VPThread__mutex_unlock( globals->start_mutex, animatingThd );//done w/lock 1.29 + DEBUG( dbgAppFlow, "divider: make sub-matrices\n") 1.30 1.31 //Make the sub-matrices, and pair them up, and make processor to 1.32 // calc product of each pair. 1.33 @@ -266,7 +270,7 @@ 1.34 float32 idealSizeOfSide, idealSizeOfSide1, idealSizeOfSide2; 1.35 SlicingStruc *leftRowSlices, *vecSlices, *rightColSlices; 1.36 SlicingStrucCarrier *slicingStrucCarrier = 1.37 - VPThread__malloc(sizeof(SlicingStrucCarrier), animPr); 1.38 + VPThread__malloc(sizeof(SlicingStrucCarrier), animPr); 1.39 1.40 int minWorkUnitCycles, primitiveCycles, idealNumWorkUnits; 1.41 float64 numPrimitiveOpsInMinWorkUnit; 1.42 @@ -417,13 +421,13 @@ 1.43 subMatrixPairParams->rightSubMatrix = 1.44 rightSubMatrices[ vecIdx * numRightColIdxs + resColIdx ]; 1.45 1.46 - subMatrixPairParams->resultPr = resultPr fix_this; 1.47 + //subMatrixPairParams->resultPr = resultPr; 1.48 1.49 //put all pairs from the same vector onto same core 1.50 VPThread__create_thread_with_affinity( &calcSubMatrixProduct, 1.51 - subMatrixPairParams, 1.52 - animatingPr, 1.53 - coreToScheduleOnto ); 1.54 + subMatrixPairParams, 1.55 + animatingPr, 1.56 + coreToScheduleOnto ); 1.57 } 1.58 1.59 //Trying to distribute the subMatrix-vectors across the cores, so
2.1 --- a/src/Application/VPThread__Matrix_Mult/VPThread__Matrix_Mult.h Tue Nov 16 16:02:51 2010 +0100 2.2 +++ b/src/Application/VPThread__Matrix_Mult/VPThread__Matrix_Mult.h Sat Nov 20 08:39:05 2010 +0100 2.3 @@ -47,6 +47,8 @@ 2.4 int32 origStartRow; 2.5 int32 origStartCol; 2.6 int32 alreadyCopied; 2.7 + VPThdSingleton *copySingleton; 2.8 + VPThdSingleton *copyTransSingleton; 2.9 int32 numUsesLeft; //have update via message to avoid multiple writers 2.10 float32 *array; //2D, but dynamically sized, so use addr arith 2.11 }
3.1 --- a/src/Application/VPThread__Matrix_Mult/subMatrix_Pr.c Tue Nov 16 16:02:51 2010 +0100 3.2 +++ b/src/Application/VPThread__Matrix_Mult/subMatrix_Pr.c Sat Nov 20 08:39:05 2010 +0100 3.3 @@ -53,10 +53,12 @@ 3.4 MatrixMultGlobals *globals =(MatrixMultGlobals *)VPThread__give_globals(); 3.5 3.6 DEBUG1(dbgAppFlow, "start sub-matrix mult: %d\n", animatingPr->procrID) 3.7 + #ifdef TURN_ON_DEBUG_PROBES 3.8 int32 subMatrixProbe = VMS__create_single_interval_probe( "subMtx", 3.9 animatingPr); 3.10 VMS__record_sched_choice_into_probe( subMatrixProbe, animatingPr ); 3.11 VMS__record_interval_start_in_probe( subMatrixProbe ); 3.12 + #endif 3.13 3.14 params = (SMPairParams *)data; 3.15 resultPr = params->resultPr; 3.16 @@ -90,7 +92,9 @@ 3.17 //send result to result processor 3.18 params->partialResultArray = resArray; 3.19 3.20 + #ifdef TURN_ON_DEBUG_PROBES 3.21 VMS__record_interval_end_in_probe( subMatrixProbe ); 3.22 + #endif 3.23 3.24 //Send result to results thread 3.25 //This pattern works 'cause only get lock when results thd inside wait 3.26 @@ -100,6 +104,7 @@ 3.27 VPThread__mutex_unlock( globals->vector_mutex, animatingPr );//release 3.28 //wait-er -- cond_signal implemented such that wait-er gets lock, no other 3.29 3.30 + DEBUG1(dbgAppFlow, "end sub-matrix mult: %d\n", animatingPr->procrID) 3.31 VPThread__dissipate_thread( animatingPr ); 3.32 } 3.33 3.34 @@ -233,8 +238,7 @@ 3.35 Matrix *origMatrix; 3.36 float32 *origArray, *subArray; 3.37 3.38 - if( subMatrix->alreadyCopied ) return; 3.39 - VPThread__start_singleton( copyTransposeSingleton, animPr); 3.40 + VPThread__start_data_singleton( &(subMatrix->copyTransSingleton), animPr ); 3.41 3.42 origMatrix = subMatrix->origMatrix; 3.43 origArray = origMatrix->array; 3.44 @@ -252,8 +256,8 @@ 3.45 origStartRow, origStartCol, origStride, 3.46 subArray, origArray ); 3.47 3.48 - VPThread__end_singleton( copyTransposeSingleton, animPr); 3.49 - subMatrix->alreadyCopied = TRUE; //anywhere after singleton work finished 3.50 + VPThread__end_data_singleton( &(subMatrix->copyTransSingleton), animPr ); 3.51 + 3.52 } 3.53 3.54 3.55 @@ -267,11 +271,10 @@ 3.56 //This lets only a single VP execute the code between start and 3.57 // end -- using start and end so that work runs outside the master. 3.58 //If a second VP ever executes the start, it will be returned 3.59 - // from the end-point. If it executions start after another but before 3.60 + // from the end-point. If its execution starts after another but before 3.61 // that other has finished, this one will remain suspended until the 3.62 // other finishes, then be resumed from the end-point. 3.63 - if( subMatrix->alreadyCopied ) return; //an optimization -- set below 3.64 - VPThread__start_singleton( copyMatrixSingleton, animPr ); 3.65 + VPThread__start_data_singleton( &(subMatrix->copySingleton), animPr ); 3.66 3.67 3.68 origMatrix = subMatrix->origMatrix; 3.69 @@ -299,6 +302,5 @@ 3.70 } 3.71 } 3.72 3.73 - subMatrix->alreadyCopied = TRUE; //must be after singleton work finished 3.74 - VPThread__end_singleton( copyMatrixSingleton, animPr ); 3.75 + VPThread__end_data_singleton( &(subMatrix->copySingleton), animPr ); 3.76 }
4.1 --- a/src/Application/main.c Tue Nov 16 16:02:51 2010 +0100 4.2 +++ b/src/Application/main.c Sat Nov 20 08:39:05 2010 +0100 4.3 @@ -9,7 +9,7 @@ 4.4 #include <stdlib.h> 4.5 4.6 #include "Matrix_Mult.h" 4.7 -#include "SSR_Matrix_Mult/SSR_Matrix_Mult.h" 4.8 +#include "VPThread__Matrix_Mult/VPThread__Matrix_Mult.h" 4.9 4.10 /** 4.11 *Matrix multiply program written using VMS_HW piggy-back language
