Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > Vthread > Vthread__Blocked_Matrix_Mult__Bench

changeset 2:46ceb3dd0f0a
Nov 20 PLDI final numbers -- debug statements, singletons and #ifdef'd probes
author: Me
date: Sat, 20 Nov 2010 08:39:05 +0100
parents: 133633d1c10f
children: 4007d97740a5
files: src/Application/VPThread__Matrix_Mult/Divide_Pr.c src/Application/VPThread__Matrix_Mult/VPThread__Matrix_Mult.h src/Application/VPThread__Matrix_Mult/subMatrix_Pr.c src/Application/main.c
diffstat: 4 files changed, 23 insertions(+), 15 deletions(-) [+]
[-]

src/Application/VPThread__Matrix_Mult/Divide_Pr.c 14

src/Application/VPThread__Matrix_Mult/VPThread__Matrix_Mult.h 2

src/Application/VPThread__Matrix_Mult/subMatrix_Pr.c 20

src/Application/main.c 2 src/Application/VPThread__Matrix_Mult/Divide_Pr.c 14 src/Application/VPThread__Matrix_Mult/VPThread__Matrix_Mult.h 2 src/Application/VPThread__Matrix_Mult/subMatrix_Pr.c 20 src/Application/main.c 2
src/Application/VPThread__Matrix_Mult/Divide_Pr.c 14
src/Application/VPThread__Matrix_Mult/VPThread__Matrix_Mult.h 2
     1.1 --- a/src/Application/VPThread__Matrix_Mult/Divide_Pr.c	Tue Nov 16 16:02:51 2010 +0100
     1.2 +++ b/src/Application/VPThread__Matrix_Mult/Divide_Pr.c	Sat Nov 20 08:39:05 2010 +0100
     1.3 @@ -178,6 +178,7 @@
     1.4      {
     1.5        //====== Do parallel multiply across cores
     1.6  
     1.7 +            DEBUG( dbgAppFlow, "divider: do parallel mult\n")
     1.8           //Calc the ideal size of sub-matrix and slice up the dimensions of
     1.9           // the two matrices.
    1.10           //The ideal size is the one takes the number of cycles to calculate
    1.11 @@ -213,6 +214,7 @@
    1.12                                                                 animatingThd );
    1.13        //======================================================================
    1.14  
    1.15 +            DEBUG( dbgAppFlow, "divider: made mutexes and conds\n")
    1.16           //get results-comm lock before create results-thd, to ensure it can't
    1.17           // signal that results are available before this thd is waiting on cond
    1.18        VPThread__mutex_lock( globals->results_mutex, animatingThd );
    1.19 @@ -222,11 +224,13 @@
    1.20        VPThread__mutex_lock( globals->start_mutex, animatingThd );
    1.21  
    1.22  
    1.23 +            DEBUG( dbgAppFlow, "divider: make result thread\n")
    1.24        VPThread__create_thread( &gatherResults, resultsParams, animatingThd );
    1.25  
    1.26           //Now wait for results thd to signal that it has vector lock
    1.27        VPThread__cond_wait(  globals->start_cond,  animatingThd );
    1.28        VPThread__mutex_unlock( globals->start_mutex, animatingThd );//done w/lock
    1.29 +            DEBUG( dbgAppFlow, "divider: make sub-matrices\n")
    1.30     
    1.31           //Make the sub-matrices, and pair them up, and make processor to
    1.32           // calc product of each pair.
    1.33 @@ -266,7 +270,7 @@
    1.34     float32 idealSizeOfSide, idealSizeOfSide1, idealSizeOfSide2;
    1.35     SlicingStruc *leftRowSlices, *vecSlices, *rightColSlices;
    1.36     SlicingStrucCarrier *slicingStrucCarrier =
    1.37 -                         VPThread__malloc(sizeof(SlicingStrucCarrier), animPr);
    1.38 +                       VPThread__malloc(sizeof(SlicingStrucCarrier), animPr);
    1.39  
    1.40     int minWorkUnitCycles, primitiveCycles, idealNumWorkUnits;
    1.41     float64 numPrimitiveOpsInMinWorkUnit;
    1.42 @@ -417,13 +421,13 @@
    1.43              subMatrixPairParams->rightSubMatrix =
    1.44                 rightSubMatrices[ vecIdx * numRightColIdxs + resColIdx ];
    1.45  
    1.46 -            subMatrixPairParams->resultPr = resultPr fix_this;
    1.47 +            //subMatrixPairParams->resultPr = resultPr;
    1.48  
    1.49                 //put all pairs from the same vector onto same core
    1.50              VPThread__create_thread_with_affinity( &calcSubMatrixProduct,
    1.51 -                                             subMatrixPairParams,
    1.52 -                                             animatingPr,
    1.53 -                                             coreToScheduleOnto );
    1.54 +                                                   subMatrixPairParams,
    1.55 +                                                   animatingPr,
    1.56 +                                                   coreToScheduleOnto );
    1.57            }
    1.58  
    1.59              //Trying to distribute the subMatrix-vectors across the cores, so

     2.1 --- a/src/Application/VPThread__Matrix_Mult/VPThread__Matrix_Mult.h	Tue Nov 16 16:02:51 2010 +0100
     2.2 +++ b/src/Application/VPThread__Matrix_Mult/VPThread__Matrix_Mult.h	Sat Nov 20 08:39:05 2010 +0100
     2.3 @@ -47,6 +47,8 @@
     2.4     int32    origStartRow;
     2.5     int32    origStartCol;
     2.6     int32    alreadyCopied;
     2.7 +   VPThdSingleton *copySingleton;
     2.8 +   VPThdSingleton *copyTransSingleton;
     2.9     int32    numUsesLeft; //have update via message to avoid multiple writers
    2.10     float32 *array;  //2D, but dynamically sized, so use addr arith
    2.11   }

     3.1 --- a/src/Application/VPThread__Matrix_Mult/subMatrix_Pr.c	Tue Nov 16 16:02:51 2010 +0100
     3.2 +++ b/src/Application/VPThread__Matrix_Mult/subMatrix_Pr.c	Sat Nov 20 08:39:05 2010 +0100
     3.3 @@ -53,10 +53,12 @@
     3.4     MatrixMultGlobals *globals =(MatrixMultGlobals *)VPThread__give_globals();
     3.5  
     3.6           DEBUG1(dbgAppFlow, "start sub-matrix mult: %d\n", animatingPr->procrID)
     3.7 +         #ifdef TURN_ON_DEBUG_PROBES
     3.8           int32 subMatrixProbe = VMS__create_single_interval_probe( "subMtx",
     3.9                                                                  animatingPr);
    3.10           VMS__record_sched_choice_into_probe( subMatrixProbe, animatingPr );
    3.11           VMS__record_interval_start_in_probe( subMatrixProbe );
    3.12 +         #endif
    3.13  
    3.14     params         = (SMPairParams *)data;
    3.15     resultPr       = params->resultPr;
    3.16 @@ -90,7 +92,9 @@
    3.17     //send result to result processor
    3.18     params->partialResultArray = resArray;
    3.19  
    3.20 +         #ifdef TURN_ON_DEBUG_PROBES
    3.21           VMS__record_interval_end_in_probe( subMatrixProbe );
    3.22 +         #endif
    3.23  
    3.24        //Send result to results thread
    3.25        //This pattern works 'cause only get lock when results thd inside wait
    3.26 @@ -100,6 +104,7 @@
    3.27     VPThread__mutex_unlock( globals->vector_mutex, animatingPr );//release
    3.28     //wait-er -- cond_signal implemented such that wait-er gets lock, no other
    3.29  
    3.30 +         DEBUG1(dbgAppFlow, "end sub-matrix mult: %d\n", animatingPr->procrID)
    3.31     VPThread__dissipate_thread( animatingPr );
    3.32   }
    3.33  
    3.34 @@ -233,8 +238,7 @@
    3.35     Matrix *origMatrix;
    3.36     float32 *origArray, *subArray;
    3.37  
    3.38 -   if( subMatrix->alreadyCopied ) return;
    3.39 -   VPThread__start_singleton( copyTransposeSingleton, animPr);
    3.40 +   VPThread__start_data_singleton( &(subMatrix->copyTransSingleton), animPr );
    3.41  
    3.42     origMatrix   = subMatrix->origMatrix;
    3.43     origArray    = origMatrix->array;
    3.44 @@ -252,8 +256,8 @@
    3.45                    origStartRow, origStartCol, origStride,
    3.46                    subArray, origArray );
    3.47  
    3.48 -   VPThread__end_singleton( copyTransposeSingleton, animPr);
    3.49 -   subMatrix->alreadyCopied = TRUE; //anywhere after singleton work finished
    3.50 +   VPThread__end_data_singleton( &(subMatrix->copyTransSingleton), animPr );
    3.51 +
    3.52   }
    3.53  
    3.54  
    3.55 @@ -267,11 +271,10 @@
    3.56        //This lets only a single VP execute the code between start and
    3.57        // end -- using start and end so that work runs outside the master.
    3.58        //If a second VP ever executes the start, it will be returned
    3.59 -      // from the end-point.  If it executions start after another but before
    3.60 +      // from the end-point.  If its execution starts after another but before
    3.61        // that other has finished, this one will remain suspended until the
    3.62        // other finishes, then be resumed from the end-point.
    3.63 -   if( subMatrix->alreadyCopied ) return; //an optimization -- set below
    3.64 -   VPThread__start_singleton( copyMatrixSingleton, animPr );
    3.65 +   VPThread__start_data_singleton( &(subMatrix->copySingleton), animPr );
    3.66  
    3.67  
    3.68     origMatrix    = subMatrix->origMatrix;
    3.69 @@ -299,6 +302,5 @@
    3.70         }
    3.71      }
    3.72  
    3.73 -   subMatrix->alreadyCopied = TRUE; //must be after singleton work finished
    3.74 -   VPThread__end_singleton( copyMatrixSingleton, animPr );
    3.75 +   VPThread__end_data_singleton( &(subMatrix->copySingleton), animPr );
    3.76   }

     4.1 --- a/src/Application/main.c	Tue Nov 16 16:02:51 2010 +0100
     4.2 +++ b/src/Application/main.c	Sat Nov 20 08:39:05 2010 +0100
     4.3 @@ -9,7 +9,7 @@
     4.4  #include <stdlib.h>
     4.5  
     4.6  #include "Matrix_Mult.h"
     4.7 -#include "SSR_Matrix_Mult/SSR_Matrix_Mult.h"
     4.8 +#include "VPThread__Matrix_Mult/VPThread__Matrix_Mult.h"
     4.9  
    4.10  /**
    4.11   *Matrix multiply program written using VMS_HW piggy-back language