changeset 21:cfca88034000 perf_tuning_paper

Performance debug series: original code (with divider bug)
author Nina Engelhardt <nengel@mailbox.tu-berlin.de>
date Tue, 17 Apr 2012 19:18:57 +0200
parents bf6a4474f0e0
children 9b1fea8d8aeb
files SSR_Matrix_Mult/Divide_Pr.c
diffstat 1 files changed, 7 insertions(+), 7 deletions(-) [+]
line diff
     1.1 --- a/SSR_Matrix_Mult/Divide_Pr.c	Tue Apr 17 19:32:08 2012 +0200
     1.2 +++ b/SSR_Matrix_Mult/Divide_Pr.c	Tue Apr 17 19:18:57 2012 +0200
     1.3 @@ -196,7 +196,7 @@
     1.4  
     1.5              DEBUG__printf(dbgAppFlow,"**create result Pr**")
     1.6        resultPr =
     1.7 -         SSR__create_procr_with_affinity( &gatherResults, resultsParams, animPr,0);
     1.8 +         SSR__create_procr_with( &gatherResults, resultsParams, animPr);
     1.9  
    1.10           //Make the sub-matrices, and pair them up, and make processor to
    1.11           // calc product of each pair.
    1.12 @@ -261,7 +261,7 @@
    1.13     idealNumWorkUnits = SSR__giveIdealNumWorkUnits();
    1.14     
    1.15     idealSizeOfSide2 = leftMatrix->numRows / rint(cbrt( idealNumWorkUnits ));
    1.16 -   idealSizeOfSide2 *= 0.4; //finer granularity to help load balance
    1.17 +   idealSizeOfSide2 *= 0.6; //finer granularity to help load balance
    1.18  
    1.19     if( idealSizeOfSide1 > idealSizeOfSide2 )
    1.20        idealSizeOfSide = idealSizeOfSide1;
    1.21 @@ -367,10 +367,10 @@
    1.22  
    1.23     numCores = SSR__give_number_of_cores_to_schedule_onto();
    1.24  
    1.25 -   numToPutOntoEachCore = numRowIdxs*numColIdxs/(numCores-1);
    1.26 +   numToPutOntoEachCore = numRowIdxs*numColIdxs/numCores;
    1.27     leftOverFraction = 0;
    1.28     numVecOnCurrCore = 0;
    1.29 -   coreToAssignOnto = 1;
    1.30 +   coreToAssignOnto = 0;
    1.31  
    1.32     for( resRowIdx = 0; resRowIdx < numRowIdxs; resRowIdx++ )
    1.33      {
    1.34 @@ -402,7 +402,7 @@
    1.35                 // that each core gets the same number of vectors, with a max
    1.36                 // imbalance of 1 vector more on some cores than others
    1.37              numVecOnCurrCore += 1;                 //incr before checking, so
    1.38 -            if( numVecOnCurrCore > numToPutOntoEachCore ) //actual num 1 less
    1.39 +         }   if( numVecOnCurrCore > numToPutOntoEachCore ) //actual num 1 less
    1.40               {
    1.41                    //deal with fractional part, to ensure that imbalance is 1 max
    1.42                    // IE, core with most has only 1 more than core with least
    1.43 @@ -412,9 +412,9 @@
    1.44  
    1.45                    //Move to next core, max core-value to incr to is numCores -1
    1.46                 coreToAssignOnto += 1;
    1.47 -               if( coreToAssignOnto >= numCores ) coreToAssignOnto = 1;
    1.48 +               if( coreToAssignOnto >= numCores ) coreToAssignOnto = 0;
    1.49               } //if
    1.50 -          } //for( vecIdx
    1.51 +           //for( vecIdx
    1.52         } //for( resColIdx
    1.53      } //for( resRowIdx
    1.54