Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > SSR > SSR__Blocked_Matrix_Mult__Bench
changeset 21:cfca88034000 perf_tuning_paper
Performance debug series: original code (with divider bug)
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 17 Apr 2012 19:18:57 +0200 |
| parents | bf6a4474f0e0 |
| children | 9b1fea8d8aeb |
| files | SSR_Matrix_Mult/Divide_Pr.c |
| diffstat | 1 files changed, 7 insertions(+), 7 deletions(-) [+] |
line diff
1.1 --- a/SSR_Matrix_Mult/Divide_Pr.c Tue Apr 17 19:32:08 2012 +0200 1.2 +++ b/SSR_Matrix_Mult/Divide_Pr.c Tue Apr 17 19:18:57 2012 +0200 1.3 @@ -196,7 +196,7 @@ 1.4 1.5 DEBUG__printf(dbgAppFlow,"**create result Pr**") 1.6 resultPr = 1.7 - SSR__create_procr_with_affinity( &gatherResults, resultsParams, animPr,0); 1.8 + SSR__create_procr_with( &gatherResults, resultsParams, animPr); 1.9 1.10 //Make the sub-matrices, and pair them up, and make processor to 1.11 // calc product of each pair. 1.12 @@ -261,7 +261,7 @@ 1.13 idealNumWorkUnits = SSR__giveIdealNumWorkUnits(); 1.14 1.15 idealSizeOfSide2 = leftMatrix->numRows / rint(cbrt( idealNumWorkUnits )); 1.16 - idealSizeOfSide2 *= 0.4; //finer granularity to help load balance 1.17 + idealSizeOfSide2 *= 0.6; //finer granularity to help load balance 1.18 1.19 if( idealSizeOfSide1 > idealSizeOfSide2 ) 1.20 idealSizeOfSide = idealSizeOfSide1; 1.21 @@ -367,10 +367,10 @@ 1.22 1.23 numCores = SSR__give_number_of_cores_to_schedule_onto(); 1.24 1.25 - numToPutOntoEachCore = numRowIdxs*numColIdxs/(numCores-1); 1.26 + numToPutOntoEachCore = numRowIdxs*numColIdxs/numCores; 1.27 leftOverFraction = 0; 1.28 numVecOnCurrCore = 0; 1.29 - coreToAssignOnto = 1; 1.30 + coreToAssignOnto = 0; 1.31 1.32 for( resRowIdx = 0; resRowIdx < numRowIdxs; resRowIdx++ ) 1.33 { 1.34 @@ -402,7 +402,7 @@ 1.35 // that each core gets the same number of vectors, with a max 1.36 // imbalance of 1 vector more on some cores than others 1.37 numVecOnCurrCore += 1; //incr before checking, so 1.38 - if( numVecOnCurrCore > numToPutOntoEachCore ) //actual num 1 less 1.39 + } if( numVecOnCurrCore > numToPutOntoEachCore ) //actual num 1 less 1.40 { 1.41 //deal with fractional part, to ensure that imbalance is 1 max 1.42 // IE, core with most has only 1 more than core with least 1.43 @@ -412,9 +412,9 @@ 1.44 1.45 //Move to next core, max core-value to incr to is numCores -1 1.46 coreToAssignOnto += 1; 1.47 - if( coreToAssignOnto >= numCores ) coreToAssignOnto = 1; 1.48 + if( coreToAssignOnto >= numCores ) coreToAssignOnto = 0; 1.49 } //if 1.50 - } //for( vecIdx 1.51 + //for( vecIdx 1.52 } //for( resColIdx 1.53 } //for( resRowIdx 1.54
