# HG changeset patch # User Sean Halle # Date 1342340859 25200 # Node ID 8972c00c00dd4d2e56473bc19a70bf766e34b139 # Parent 233fe8a5208f0870aa2c8f0376b8cabef40f54f1# Parent c35cb1f48f89b33ca3206eff035b472bbd609bbc Merge with perf_tuning_paper branch, which grabs fixes and best performing version diff -r 233fe8a5208f -r 8972c00c00dd .hgtags --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgtags Sun Jul 15 01:27:39 2012 -0700 @@ -0,0 +1,1 @@ +b7d8cf6680a9b615e41df8305cce48195d03c67e portable version diff -r 233fe8a5208f -r 8972c00c00dd SSR_Matrix_Mult/Divide_Pr.c --- a/SSR_Matrix_Mult/Divide_Pr.c Mon Apr 16 18:27:12 2012 +0200 +++ b/SSR_Matrix_Mult/Divide_Pr.c Sun Jul 15 01:27:39 2012 -0700 @@ -261,7 +261,7 @@ idealNumWorkUnits = SSR__giveIdealNumWorkUnits(); idealSizeOfSide2 = leftMatrix->numRows / rint(cbrt( idealNumWorkUnits )); - idealSizeOfSide2 *= 0.4; //finer granularity to help load balance + idealSizeOfSide2 *= 0.5; //finer granularity to help load balance if( idealSizeOfSide1 > idealSizeOfSide2 ) idealSizeOfSide = idealSizeOfSide1; @@ -367,7 +367,7 @@ numCores = SSR__give_number_of_cores_to_schedule_onto(); - numToPutOntoEachCore = numRowIdxs*numColIdxs/(numCores-1); + numToPutOntoEachCore = numRowIdxs*numColIdxs/numCores; leftOverFraction = 0; numVecOnCurrCore = 0; coreToAssignOnto = 1; @@ -412,7 +412,7 @@ //Move to next core, max core-value to incr to is numCores -1 coreToAssignOnto += 1; - if( coreToAssignOnto >= numCores ) coreToAssignOnto = 1; + if( coreToAssignOnto >= numCores ) coreToAssignOnto = 0; } //if } //for( vecIdx } //for( resColIdx diff -r 233fe8a5208f -r 8972c00c00dd __brch__default --- a/__brch__default Mon Apr 16 18:27:12 2012 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -Applications normally have only the default branch -- they shouldn't be affected by any choices in VMS or language.. diff -r 233fe8a5208f -r 8972c00c00dd __brch__perf_tuning_paper --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/__brch__perf_tuning_paper Sun Jul 15 01:27:39 2012 -0700 @@ -0,0 +1,1 @@ +Branch for keeping different performance tuning steps for paper