Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__Hello_World__LangDev
changeset 1:9ad1a6186956
First shot at application-interface
| author | Some Random Person <seanhalle@yahoo.com> |
|---|---|
| date | Wed, 23 May 2012 14:24:18 -0700 |
| parents | 9cf4c84a3091 |
| children | a8a8c4193c9b |
| files | VSs__Hello_World/EntryPoint.c VSs__Hello_World/SeedVP.c VSs__Hello_World/Task.c main.c |
| diffstat | 4 files changed, 50 insertions(+), 624 deletions(-) [+] |
line diff
1.1 --- a/VSs__Hello_World/EntryPoint.c Wed May 23 12:39:19 2012 -0700 1.2 +++ b/VSs__Hello_World/EntryPoint.c Wed May 23 14:24:18 2012 -0700 1.3 @@ -12,7 +12,7 @@ 1.4 1.5 1.6 1.7 -/*Every SSR system has an "entry point" function that creates the first 1.8 +/*This "entry point" function creates the first 1.9 * processor, which starts the chain of creating more processors.. 1.10 * eventually all of the processors will dissipate themselves, and 1.11 * return. 1.12 @@ -26,37 +26,13 @@ 1.13 * and returns the value from the function 1.14 * 1.15 */ 1.16 -Matrix * 1.17 -multiplyTheseMatrices( Matrix *leftMatrix, Matrix *rightMatrix ) 1.18 - { Matrix *resMatrix; 1.19 - DividerParams *dividerParams; 1.20 - int32 numResRows, numResCols; 1.21 - 1.22 - 1.23 - dividerParams = malloc( sizeof( DividerParams ) ); 1.24 - dividerParams->leftMatrix = leftMatrix; 1.25 - dividerParams->rightMatrix = rightMatrix; 1.26 - 1.27 - 1.28 - numResRows = leftMatrix->numRows; 1.29 - numResCols = rightMatrix->numCols; 1.30 - 1.31 - //VMS has its own separate internal malloc, so to get results out, 1.32 - // have to pass in empty array for it to fill up 1.33 - //The alternative is internally telling SSR make external space to use 1.34 - resMatrix = malloc( sizeof(Matrix) ); 1.35 - resMatrix->array = malloc( numResRows * numResCols * sizeof(float32)); 1.36 - resMatrix->numCols = rightMatrix->numCols; 1.37 - resMatrix->numRows = leftMatrix->numRows; 1.38 - 1.39 - 1.40 - dividerParams->resultMatrix = resMatrix; 1.41 - 1.42 - //create divider processor, start doing the work, and wait till done 1.43 +void 1.44 +VSs__Hello_World( ) 1.45 + { 1.46 + //create seed processor, start doing the work, and wait till done 1.47 //This function is the "border crossing" between normal code and SSR 1.48 - SSR__create_seed_procr_and_do_work( ÷WorkIntoSubMatrixPairProcrs, 1.49 - dividerParams ); 1.50 - 1.51 - free( dividerParams ); 1.52 - return resMatrix; 1.53 + VSs__create_seed_procr_and_do_work( &hello_world, 1.54 + NULL ); 1.55 + 1.56 + return; 1.57 }
2.1 --- a/VSs__Hello_World/SeedVP.c Wed May 23 12:39:19 2012 -0700 2.2 +++ b/VSs__Hello_World/SeedVP.c Wed May 23 14:24:18 2012 -0700 2.3 @@ -9,586 +9,26 @@ 2.4 2.5 #include <math.h> 2.6 #include <string.h> 2.7 -#include "SSR_Matrix_Mult.h" 2.8 +#include "VSs__Hello_World.h" 2.9 2.10 - //The time to compute this many result values should equal the time to 2.11 - // perform this division on a matrix of size gives that many result calcs 2.12 - //IE, size this so that sequential time to calc equals divide time 2.13 - // find the value by experimenting -- but divide time and calc time scale 2.14 - // same way, so this value might remain the same across hardware 2.15 -#define NUM_CELLS_IN_SEQUENTIAL_CUTOFF 1000 2.16 +void hello_world( void *_params, SlaveVP *animPr ) 2.17 + { int32 i; 2.18 + DEBUG__printf( dbgAppFlow, "start hello_world"); 2.19 2.20 + // create all the task types 2.21 + helloWorldTaskType = VMS_App__malloc( sizeof(VSsTaskType) ); 2.22 + helloWorldTaskType->fn = &hello_world_task; 2.23 + helloWorldTaskType->numArgs = 2; 2.24 + helloWorldTaskType->argTypes = {NULL, IN}; 2.25 + helloWorldTaskType->argSizes = {sizeof(int), 16*16*sizeof(float)}; 2.26 2.27 -//=========================================================================== 2.28 -int inline 2.29 -measureMatrixMultPrimitive( SlaveVP *animPr ); 2.30 - 2.31 -SlicingStrucCarrier * 2.32 -calcIdealSizeAndSliceDimensions( Matrix *leftMatrix, Matrix *rightMatrix, 2.33 - SlaveVP *animPr ); 2.34 - 2.35 -SlicingStruc * 2.36 -sliceUpDimension( float32 idealSizeOfSide, int startVal, int endVal, 2.37 - SlaveVP *animPr ); 2.38 - 2.39 -void 2.40 -freeSlicingStruc( SlicingStruc *slicingStruc, SlaveVP *animPr ); 2.41 - 2.42 -SubMatrix ** 2.43 -createSubMatrices( SlicingStruc *rowSlices, SlicingStruc *colSlices, 2.44 - int32 numUses, Matrix *origMatrix, SlaveVP *animPr ); 2.45 - 2.46 -void 2.47 -freeSubMatrices( SlicingStruc *rowSlices, SlicingStruc *colSlices, 2.48 - SubMatrix **subMatrices, SlaveVP *animPr ); 2.49 - 2.50 -void 2.51 -pairUpSubMatricesAndMakeProcessors( SubMatrix **leftSubMatrices, 2.52 - SubMatrix **rightSubMatrices, 2.53 - int32 numRowIdxs, int32 numColIdxs, 2.54 - int32 numVecIdxs, 2.55 - SlaveVP *resultPr, 2.56 - SlaveVP *animatingPr ); 2.57 - 2.58 -void 2.59 -makeSubMatricesAndProcrs( Matrix *leftMatrix, Matrix *rightMatrix, 2.60 - SlicingStrucCarrier *slicingStrucCarrier, 2.61 - SlaveVP *resultPr, SlaveVP *animatingPr ); 2.62 - 2.63 - 2.64 - 2.65 -/*Divider creates one processor for every sub-matrix 2.66 - * It hands them: 2.67 - * the name of the result processor that they should send their results to, 2.68 - * the left and right matrices, and the rows and cols they should multiply 2.69 - * It first creates the result processor, then all the sub-matrixPair 2.70 - * processors, 2.71 - * then does a receive of a message from the result processor that gives 2.72 - * the divider ownership of the result matrix. 2.73 - * Finally, the divider returns the result matrix out of the SSR system. 2.74 - * 2.75 - * Divider chooses the size of sub-matrices via an algorithm that tries to 2.76 - * keep the minimum work above a threshold. The threshold is machine- 2.77 - * dependent, so ask SSR for min work-unit time to get a 2.78 - * given overhead 2.79 - * 2.80 - * Divide min work-unit cycles by measured-cycles for one matrix-cell 2.81 - * product -- gives the number of products need to have in min size 2.82 - * matrix. 2.83 - * 2.84 - * So then, take cubed root of this to get the size of a side of min sub- 2.85 - * matrix. That is the size of the ideal square sub-matrix -- so tile 2.86 - * up the two input matrices into ones as close as possible to that size, 2.87 - * and create the pairs of sub-matrices. 2.88 - * 2.89 - *======================== STRATEGIC OVERVIEW ======================= 2.90 - * 2.91 - *This division is a bit tricky, because have to create things in advance 2.92 - * that it's not at first obvious need to be created.. 2.93 - * 2.94 - *First slice up each dimension -- three of them.. this is because will have 2.95 - * to create the sub-matrix's data-structures before pairing the sub-matrices 2.96 - * with each other -- so, have three dimensions to slice up before can 2.97 - * create the sub-matrix data-strucs -- also, have to be certain that the 2.98 - * cols of the left input have the exact same slicing as the rows of the 2.99 - * left matrix, so just to be sure, do the slicing calc once, then use it 2.100 - * for both. 2.101 - * 2.102 - *So, goes like this: 2.103 - *1) calculate the start & end values of each dimension in each matrix. 2.104 - *2) use those values to create sub-matrix structures 2.105 - *3) combine sub-matrices into pairs, as the tasks to perform. 2.106 - * 2.107 - *Have to calculate separately from creating the sub-matrices because of the 2.108 - * nature of the nesting -- would either end up creating the same sub-matrix 2.109 - * multiple times, or else would have to put in detection of whether had 2.110 - * made a particular one already if tried to combine steps 1 and 2. 2.111 - * 2.112 - *Step 3 has to be separate because of the nesting, as well -- same reason, 2.113 - * would either create same sub-matrix multiple times, or else have to 2.114 - * add detection of whether was already created. 2.115 - * 2.116 - *Another way to look at it: there's one level of loop to divide dimensions, 2.117 - * two levels of nesting to create sub-matrices, and three levels to pair 2.118 - * up the sub-matrices. 2.119 - */ 2.120 -void divideWorkIntoSubMatrixPairProcrs( void *_dividerParams, 2.121 - SlaveVP *animPr ) 2.122 - { SlaveVP *resultPr; 2.123 - DividerParams *dividerParams; 2.124 - ResultsParams *resultsParams; 2.125 - Matrix *leftMatrix, *rightMatrix, *resultMatrix; 2.126 - void *msg; 2.127 - SlicingStrucCarrier *slicingStrucCarrier; 2.128 - float32 *resultArray; //points to array inside result matrix 2.129 + HelloWorldArgs args; //allocate on stack, VSs copies internally 2.130 2.131 - DEBUG__printf( dbgAppFlow, "start divide") 2.132 - 2.133 - int32 2.134 - divideProbe = VMS_App__create_single_interval_probe( "divideProbe", 2.135 - animPr ); 2.136 - VMS_App__record_sched_choice_into_probe( divideProbe, animPr ); 2.137 - VMS_App__record_interval_start_in_probe( divideProbe ); 2.138 - 2.139 - //=========== Setup -- make local copies of ptd-to-things, malloc, aso 2.140 - int32 numResRows, numResCols, vectLength; 2.141 - 2.142 - dividerParams = (DividerParams *)_dividerParams; 2.143 - 2.144 - leftMatrix = dividerParams->leftMatrix; 2.145 - rightMatrix = dividerParams->rightMatrix; 2.146 - 2.147 - vectLength = leftMatrix->numCols; 2.148 - numResRows = leftMatrix->numRows; 2.149 - numResCols = rightMatrix->numCols; 2.150 - resultArray = dividerParams->resultMatrix->array; 2.151 - 2.152 - //zero the result array 2.153 - memset( resultArray, 0, numResRows * numResCols * sizeof(float32) ); 2.154 - 2.155 - //============== Do either sequential mult or do division ============== 2.156 - 2.157 - //Check if input matrices too small -- if yes, just do sequential 2.158 - //Cutoff is determined by overhead of this divider -- relatively 2.159 - // machine-independent 2.160 - if( (float32)leftMatrix->numRows * (float32)leftMatrix->numCols * 2.161 - (float32)rightMatrix->numCols < NUM_CELLS_IN_SEQUENTIAL_CUTOFF ) 2.162 + for( i = 0; i < 5; i++ ) 2.163 { 2.164 - //====== Do sequential multiply on a single core 2.165 - DEBUG__printf( dbgAppFlow, "doing sequential") 2.166 - 2.167 - //transpose the right matrix 2.168 - float32 * 2.169 - transRightArray = SSR__malloc_to( rightMatrix->numRows * 2.170 - rightMatrix->numCols * sizeof(float32), 2.171 - animPr ); 2.172 - 2.173 - //copy values from orig matrix to local 2.174 - copyTranspose( rightMatrix->numRows, rightMatrix->numCols, 2.175 - 0, 0, rightMatrix->numRows, 2.176 - transRightArray, rightMatrix->array ); 2.177 - 2.178 - multiplyMatrixArraysTransposed( vectLength, numResRows, numResCols, 2.179 - leftMatrix->array, transRightArray, 2.180 - resultArray ); 2.181 - } 2.182 - else 2.183 - { 2.184 - //====== Do parallel multiply across cores 2.185 - 2.186 - //Calc the ideal size of sub-matrix and slice up the dimensions of 2.187 - // the two matrices. 2.188 - //The ideal size is the one takes the number of cycles to calculate 2.189 - // such that calc time is equal or greater than min work-unit size 2.190 - slicingStrucCarrier = 2.191 - calcIdealSizeAndSliceDimensions( leftMatrix, rightMatrix, animPr ); 2.192 - 2.193 - //Make the results processor, now that know how many to wait for 2.194 - resultsParams = SSR__malloc_to( sizeof(ResultsParams), animPr ); 2.195 - resultsParams->numSubMatrixPairs = 2.196 - slicingStrucCarrier->leftRowSlices->numVals * 2.197 - slicingStrucCarrier->rightColSlices->numVals * 2.198 - slicingStrucCarrier->vecSlices->numVals; 2.199 - resultsParams->dividerPr = animPr; 2.200 - resultsParams->numCols = rightMatrix->numCols; 2.201 - resultsParams->numRows = leftMatrix->numRows; 2.202 - resultsParams->resultArray = resultArray; 2.203 - 2.204 - DEBUG__printf(dbgAppFlow,"**create result Pr**") 2.205 - resultPr = 2.206 - SSR__create_procr_with( &gatherResults, resultsParams, animPr); 2.207 - 2.208 - //Make the sub-matrices, and pair them up, and make processor to 2.209 - // calc product of each pair. 2.210 - makeSubMatricesAndProcrs( leftMatrix, rightMatrix, 2.211 - slicingStrucCarrier, 2.212 - resultPr, animPr); 2.213 - 2.214 - //result array is allocated externally, so no message from resultPr 2.215 - // however, do have to wait before printing out stats, so wait 2.216 - // for an empty handshake message 2.217 - msg = SSR__receive_from_to( resultPr, animPr ); 2.218 - } 2.219 - 2.220 - 2.221 - //=============== Work done -- send results back ================= 2.222 - 2.223 - 2.224 - DEBUG__printf( dbgAppFlow, "end divide") 2.225 - 2.226 - VMS_App__record_interval_end_in_probe( divideProbe ); 2.227 - VMS_App__print_stats_of_all_probes(); 2.228 - 2.229 - //nothing left to do so dissipate, SSR will wait to shutdown and hence 2.230 - // make results available to outside until all the processors have 2.231 - // dissipated -- so no need to wait for results processor 2.232 - 2.233 - SSR__dissipate_procr( animPr ); //all procrs dissipate self at end 2.234 - //when all of the processors have dissipated, the "create seed and do 2.235 - // work" call in the entry point function returns 2.236 + args.dummy1 = i; 2.237 + args.dummy2 = VMS_App__malloc() 2.238 + VSs__submit_task( VSsTaskType helloWorldTaskType, &args ); 2.239 + } 2.240 } 2.241 2.242 - 2.243 -SlicingStrucCarrier * 2.244 -calcIdealSizeAndSliceDimensions( Matrix *leftMatrix, Matrix *rightMatrix, 2.245 - SlaveVP *animPr ) 2.246 - { 2.247 - float32 idealSizeOfSide, idealSizeOfSide1, idealSizeOfSide2; 2.248 - SlicingStruc *leftRowSlices, *vecSlices, *rightColSlices; 2.249 - SlicingStrucCarrier *slicingStrucCarrier = 2.250 - SSR__malloc_to(sizeof(SlicingStrucCarrier), animPr); 2.251 - 2.252 - int minWorkUnitCycles, primitiveCycles, idealNumWorkUnits; 2.253 - float64 numPrimitiveOpsInMinWorkUnit; 2.254 - 2.255 - 2.256 - //======= Calc ideal size of min-sized sub-matrix ======== 2.257 - 2.258 - //ask SSR for the number of cycles of the minimum work unit, at given 2.259 - // percent overhead then add a guess at overhead from this divider 2.260 - minWorkUnitCycles = SSR__giveMinWorkUnitCycles( .05 ); 2.261 - 2.262 - //ask SSR for number of cycles of the "primitive" op of matrix mult 2.263 - primitiveCycles = measureMatrixMultPrimitive( animPr ); 2.264 - 2.265 - numPrimitiveOpsInMinWorkUnit = 2.266 - (float64)minWorkUnitCycles / (float64)primitiveCycles; 2.267 - 2.268 - //take cubed root -- that's number of these in a "side" of sub-matrix 2.269 - // then multiply by 5 because the primitive is 5x5 2.270 - idealSizeOfSide1 = 5 * cbrt( numPrimitiveOpsInMinWorkUnit ); 2.271 - 2.272 - idealNumWorkUnits = SSR__giveIdealNumWorkUnits(); 2.273 - 2.274 - idealSizeOfSide2 = leftMatrix->numRows / rint(cbrt( idealNumWorkUnits )); 2.275 - idealSizeOfSide2 *= 0.6; //finer granularity to help load balance 2.276 - 2.277 - if( idealSizeOfSide1 > idealSizeOfSide2 ) 2.278 - idealSizeOfSide = idealSizeOfSide1; 2.279 - else 2.280 - idealSizeOfSide = idealSizeOfSide2; 2.281 - 2.282 - //The multiply inner loop blocks the array to fit into L1 cache 2.283 -// if( idealSizeOfSide < ROWS_IN_BLOCK ) idealSizeOfSide = ROWS_IN_BLOCK; 2.284 - 2.285 - //============ Slice up dimensions, now that know target size =========== 2.286 - 2.287 - //Tell the slicer the target size of a side (floating pt), the start 2.288 - // value to start slicing at, and the end value to stop slicing at 2.289 - //It returns an array of start value of each chunk, plus number of them 2.290 - int32 startLeftRow, endLeftRow, startVec,endVec,startRightCol,endRightCol; 2.291 - startLeftRow = 0; 2.292 - endLeftRow = leftMatrix->numRows -1; 2.293 - startVec = 0; 2.294 - endVec = leftMatrix->numCols -1; 2.295 - startRightCol = 0; 2.296 - endRightCol = rightMatrix->numCols -1; 2.297 - 2.298 - leftRowSlices = 2.299 - sliceUpDimension( idealSizeOfSide, startLeftRow, endLeftRow, animPr ); 2.300 - 2.301 - vecSlices = 2.302 - sliceUpDimension( idealSizeOfSide, startVec, endVec, animPr ); 2.303 - 2.304 - rightColSlices = 2.305 - sliceUpDimension( idealSizeOfSide, startRightCol, endRightCol,animPr); 2.306 - 2.307 - slicingStrucCarrier->leftRowSlices = leftRowSlices; 2.308 - slicingStrucCarrier->vecSlices = vecSlices; 2.309 - slicingStrucCarrier->rightColSlices = rightColSlices; 2.310 - 2.311 - return slicingStrucCarrier; 2.312 - } 2.313 - 2.314 - 2.315 -void 2.316 -makeSubMatricesAndProcrs( Matrix *leftMatrix, Matrix *rightMatrix, 2.317 - SlicingStrucCarrier *slicingStrucCarrier, 2.318 - SlaveVP *resultPr, SlaveVP *animPr ) 2.319 - { 2.320 - SlicingStruc *leftRowSlices, *vecSlices, *rightColSlices; 2.321 - 2.322 - leftRowSlices = slicingStrucCarrier->leftRowSlices; 2.323 - vecSlices = slicingStrucCarrier->vecSlices; 2.324 - rightColSlices = slicingStrucCarrier->rightColSlices; 2.325 - SSR__free( slicingStrucCarrier, animPr ); 2.326 - 2.327 - //================ Make sub-matrices, given the slicing ================ 2.328 - SubMatrix **leftSubMatrices, **rightSubMatrices; 2.329 - leftSubMatrices = 2.330 - createSubMatrices( leftRowSlices, vecSlices, rightColSlices->numVals, 2.331 - leftMatrix, animPr ); 2.332 - //double_check_that_always_numRows_in_right_same_as_numCols_in_left(); 2.333 - rightSubMatrices = 2.334 - createSubMatrices( vecSlices, rightColSlices, leftRowSlices->numVals, 2.335 - rightMatrix, animPr ); 2.336 - 2.337 - 2.338 - //============== pair the sub-matrices and make processors ============== 2.339 - int32 numRowIdxs, numColIdxs, numVecIdxs; 2.340 - 2.341 - numRowIdxs = leftRowSlices->numVals; 2.342 - numColIdxs = rightColSlices->numVals; 2.343 - numVecIdxs = vecSlices->numVals; 2.344 - 2.345 - 2.346 - freeSlicingStruc( leftRowSlices, animPr ); 2.347 - freeSlicingStruc( vecSlices, animPr ); 2.348 - freeSlicingStruc( rightColSlices, animPr ); 2.349 - 2.350 - pairUpSubMatricesAndMakeProcessors( leftSubMatrices, 2.351 - rightSubMatrices, 2.352 - numRowIdxs, numColIdxs, 2.353 - numVecIdxs, 2.354 - resultPr, 2.355 - animPr ); 2.356 - } 2.357 - 2.358 - 2.359 - 2.360 - 2.361 -void 2.362 -pairUpSubMatricesAndMakeProcessors( SubMatrix **leftSubMatrices, 2.363 - SubMatrix **rightSubMatrices, 2.364 - int32 numRowIdxs, int32 numColIdxs, 2.365 - int32 numVecIdxs, 2.366 - SlaveVP *resultPr, 2.367 - SlaveVP *animatingPr ) 2.368 - { 2.369 - int32 resRowIdx, resColIdx, vecIdx; 2.370 - int32 numLeftColIdxs, numRightColIdxs; 2.371 - int32 leftRowIdxOffset; 2.372 - SMPairParams *subMatrixPairParams; 2.373 - float32 numToPutOntoEachCore, leftOverFraction, numVecOnCurrCore; 2.374 - int32 numCores, coreToAssignOnto; 2.375 - 2.376 - numLeftColIdxs = numColIdxs; 2.377 - numRightColIdxs = numVecIdxs; 2.378 - 2.379 - numCores = SSR__give_number_of_cores_to_schedule_onto(); 2.380 - 2.381 - numToPutOntoEachCore = numRowIdxs*numColIdxs/numCores; 2.382 - leftOverFraction = 0; 2.383 - numVecOnCurrCore = 0; 2.384 - coreToAssignOnto = 0; 2.385 - 2.386 - for( resRowIdx = 0; resRowIdx < numRowIdxs; resRowIdx++ ) 2.387 - { 2.388 - leftRowIdxOffset = resRowIdx * numLeftColIdxs; 2.389 - 2.390 - for( resColIdx = 0; resColIdx < numColIdxs; resColIdx++ ) 2.391 - { 2.392 - 2.393 - for( vecIdx = 0; vecIdx < numVecIdxs; vecIdx++ ) 2.394 - { 2.395 - //Make the processor for the pair of sub-matrices 2.396 - subMatrixPairParams = SSR__malloc_to( sizeof(SMPairParams), 2.397 - animatingPr); 2.398 - subMatrixPairParams->leftSubMatrix = 2.399 - leftSubMatrices[ leftRowIdxOffset + vecIdx ]; 2.400 - 2.401 - subMatrixPairParams->rightSubMatrix = 2.402 - rightSubMatrices[ vecIdx * numRightColIdxs + resColIdx ]; 2.403 - 2.404 - subMatrixPairParams->resultPr = resultPr; 2.405 - 2.406 - //put all pairs from the same vector onto same core 2.407 - SSR__create_procr_with_affinity( &calcSubMatrixProduct, 2.408 - subMatrixPairParams, 2.409 - animatingPr, 2.410 - coreToAssignOnto ); 2.411 - 2.412 - //Trying to distribute the subMatrix-vectors across the cores, so 2.413 - // that each core gets the same number of vectors, with a max 2.414 - // imbalance of 1 vector more on some cores than others 2.415 - numVecOnCurrCore += 1; //incr before checking, so 2.416 - if( numVecOnCurrCore > numToPutOntoEachCore ) //actual num 1 less 2.417 - { 2.418 - //deal with fractional part, to ensure that imbalance is 1 max 2.419 - // IE, core with most has only 1 more than core with least 2.420 - leftOverFraction = numToPutOntoEachCore - numVecOnCurrCore; 2.421 - if( leftOverFraction > 1 ) ERROR("division alg messed up\n"); 2.422 - numVecOnCurrCore = leftOverFraction; //accumulates "extra" 2.423 - 2.424 - //Move to next core, max core-value to incr to is numCores -1 2.425 - coreToAssignOnto += 1; 2.426 - if( coreToAssignOnto >= numCores ) coreToAssignOnto = 0; 2.427 - } //if 2.428 - } //for( vecIdx 2.429 - } //for( resColIdx 2.430 - } //for( resRowIdx 2.431 - 2.432 - } 2.433 - 2.434 - 2.435 - 2.436 -/*Walk through the two slice-strucs, making sub-matrix strucs as go 2.437 - */ 2.438 -SubMatrix ** 2.439 -createSubMatrices( SlicingStruc *rowSlices, SlicingStruc *colSlices, 2.440 - int32 numUses, Matrix *origMatrix, SlaveVP *animPr ) 2.441 - { 2.442 - int32 numRowIdxs, numColIdxs, rowIdx, colIdx; 2.443 - int32 startRow, endRow, startCol, endCol; 2.444 - int32 *rowStartVals, *colStartVals; 2.445 - int32 rowOffset; 2.446 - SubMatrix **subMatrices, *newSubMatrix; 2.447 - 2.448 - numRowIdxs = rowSlices->numVals; 2.449 - numColIdxs = colSlices->numVals; 2.450 - 2.451 - rowStartVals = rowSlices->startVals; 2.452 - colStartVals = colSlices->startVals; 2.453 - 2.454 - subMatrices = SSR__malloc_to(numRowIdxs * numColIdxs * sizeof(SubMatrix*), 2.455 - animPr ); 2.456 - 2.457 - for( rowIdx = 0; rowIdx < numRowIdxs; rowIdx++ ) 2.458 - { 2.459 - rowOffset = rowIdx * numColIdxs; 2.460 - 2.461 - startRow = rowStartVals[rowIdx]; 2.462 - endRow = rowStartVals[rowIdx + 1] -1; //"fake" start above last is 2.463 - // at last valid idx + 1 & is 2.464 - // 1 greater than end value 2.465 - for( colIdx = 0; colIdx < numColIdxs; colIdx++ ) 2.466 - { 2.467 - startCol = colStartVals[colIdx]; 2.468 - endCol = colStartVals[colIdx + 1] -1; 2.469 - 2.470 - newSubMatrix = SSR__malloc_to( sizeof(SubMatrix), animPr ); 2.471 - newSubMatrix->numRows = endRow - startRow +1; 2.472 - newSubMatrix->numCols = endCol - startCol +1; 2.473 - newSubMatrix->origMatrix = origMatrix; 2.474 - newSubMatrix->origStartRow = startRow; 2.475 - newSubMatrix->origStartCol = startCol; 2.476 - newSubMatrix->copySingleton = NULL; 2.477 - newSubMatrix->numUsesLeft = numUses; //can free after this many 2.478 - //Prevent uninitialized memory 2.479 - newSubMatrix->copySingleton = NULL; 2.480 - newSubMatrix->copyTransSingleton = NULL; 2.481 - 2.482 - subMatrices[ rowOffset + colIdx ] = newSubMatrix; 2.483 - } 2.484 - } 2.485 - return subMatrices; 2.486 - } 2.487 - 2.488 - 2.489 -void 2.490 -freeSubMatrices( SlicingStruc *rowSlices, SlicingStruc *colSlices, 2.491 - SubMatrix **subMatrices, SlaveVP *animPr ) 2.492 - { 2.493 - int32 numRowIdxs, numColIdxs, rowIdx, colIdx, rowOffset; 2.494 - SubMatrix *subMatrix; 2.495 - 2.496 - numRowIdxs = rowSlices->numVals; 2.497 - numColIdxs = colSlices->numVals; 2.498 - 2.499 - for( rowIdx = 0; rowIdx < numRowIdxs; rowIdx++ ) 2.500 - { 2.501 - rowOffset = rowIdx * numColIdxs; 2.502 - for( colIdx = 0; colIdx < numColIdxs; colIdx++ ) 2.503 - { 2.504 - subMatrix = subMatrices[ rowOffset + colIdx ]; 2.505 - if( subMatrix->alreadyCopied ) 2.506 - SSR__free( subMatrix->array, animPr ); 2.507 - SSR__free( subMatrix, animPr ); 2.508 - } 2.509 - } 2.510 - SSR__free( subMatrices, animPr ); 2.511 - } 2.512 - 2.513 - 2.514 - 2.515 -SlicingStruc * 2.516 -sliceUpDimension( float32 idealSizeOfSide, int startVal, int endVal, 2.517 - SlaveVP *animPr ) 2.518 - { float32 residualAcc = 0; 2.519 - int numSlices, i, *startVals, sizeOfSlice, endCondition; 2.520 - SlicingStruc *slicingStruc = SSR__malloc_to(sizeof(SlicingStruc), animPr); 2.521 - 2.522 - //calc size of matrix need to hold start vals -- 2.523 - numSlices = (int32)( (float32)(endVal -startVal +1) / idealSizeOfSide); 2.524 - 2.525 - startVals = SSR__malloc_to( (numSlices + 1) * sizeof(int32), animPr ); 2.526 - 2.527 - //Calc the upper limit of start value -- when get above this, end loop 2.528 - // by saving highest value of the matrix dimension to access, plus 1 2.529 - // as the start point of the imaginary slice following the last one 2.530 - //Plus 1 because go up to value but not include when process last slice 2.531 - //The stopping condition is half-a-size less than highest value because 2.532 - // don't want any pieces smaller than half the ideal size -- just tack 2.533 - // little ones onto end of last one 2.534 - endCondition = endVal - (int) (idealSizeOfSide/2); //end *value*, not size 2.535 - for( i = 0; startVal <= endVal; i++ ) 2.536 - { 2.537 - startVals[i] = startVal; 2.538 - residualAcc += idealSizeOfSide; 2.539 - sizeOfSlice = (int)residualAcc; 2.540 - residualAcc -= (float32)sizeOfSlice; 2.541 - startVal += sizeOfSlice; //ex @size = 2 get 0, 2, 4, 6, 8.. 2.542 - 2.543 - if( startVal > endCondition ) 2.544 - { startVal = endVal + 1; 2.545 - startVals[ i + 1 ] = startVal; 2.546 - } 2.547 - } 2.548 - 2.549 - slicingStruc->startVals = startVals; 2.550 - slicingStruc->numVals = i; //loop incr'd, so == last valid start idx+1 2.551 - // which means is num sub-matrices in dim 2.552 - // also == idx of the fake start just above 2.553 - return slicingStruc; 2.554 - } 2.555 - 2.556 -void 2.557 -freeSlicingStruc( SlicingStruc *slicingStruc, SlaveVP *animPr ) 2.558 - { 2.559 - SSR__free( slicingStruc->startVals, animPr ); 2.560 - SSR__free( slicingStruc, animPr ); 2.561 - } 2.562 - 2.563 - 2.564 -inline int 2.565 -measureMatrixMultPrimitive( SlaveVP *animPr ) 2.566 - { 2.567 - int r, c, v, numCycles; 2.568 - float32 *res, *left, *right; 2.569 - 2.570 - //setup inputs 2.571 - left = SSR__malloc_to( 5 * 5 * sizeof( float32 ), animPr ); 2.572 - right = SSR__malloc_to( 5 * 5 * sizeof( float32 ), animPr ); 2.573 - res = SSR__malloc_to( 5 * 5 * sizeof( float32 ), animPr ); 2.574 - 2.575 - for( r = 0; r < 5; r++ ) 2.576 - { 2.577 - for( c = 0; c < 5; c++ ) 2.578 - { 2.579 - left[ r * 5 + c ] = r; 2.580 - right[ r * 5 + c ] = c; 2.581 - } 2.582 - } 2.583 - 2.584 - //do primitive 2.585 - SSR__start_primitive(); //for now, just takes time stamp 2.586 - for( r = 0; r < 5; r++ ) 2.587 - { 2.588 - for( c = 0; c < 5; c++ ) 2.589 - { 2.590 - for( v = 0; v < 5; v++ ) 2.591 - { 2.592 - res[ r * 5 + c ] = left[ r * 5 + v ] * right[ v * 5 + c ]; 2.593 - } 2.594 - } 2.595 - } 2.596 - numCycles = 2.597 - SSR__end_primitive_and_give_cycles(); 2.598 - 2.599 - SSR__free( left, animPr ); 2.600 - SSR__free( right, animPr ); 2.601 - SSR__free( res, animPr ); 2.602 - 2.603 - return numCycles; 2.604 - } 2.605 -
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/VSs__Hello_World/Task.c Wed May 23 14:24:18 2012 -0700 3.3 @@ -0,0 +1,21 @@ 3.4 +/* 3.5 + * Copyright 2009 OpenSourceStewardshipFoundation.org 3.6 + * Licensed under GNU General Public License version 2 3.7 + * 3.8 + * Author: seanhalle@yahoo.com 3.9 + * 3.10 + */ 3.11 + 3.12 + 3.13 +#include <math.h> 3.14 +#include <string.h> 3.15 +#include "VSs__Hello_World.h" 3.16 + 3.17 +void hello_world_task( void *_args, SlaveVP *animPr ) 3.18 + { HelloWorldArgs *args; 3.19 + 3.20 + args = (HelloWorldArgs *)_args; 3.21 + 3.22 + printf("Hello World: %d, %f", args->dummy1, args->dummy2); 3.23 + } 3.24 +
4.1 --- a/main.c Wed May 23 12:39:19 2012 -0700 4.2 +++ b/main.c Wed May 23 14:24:18 2012 -0700 4.3 @@ -1,5 +1,5 @@ 4.4 /* 4.5 - * Copyright Oct 24, 2009 OpenSourceStewardshipFoundation.org 4.6 + * Copyright 20012 OpenSourceResearchInstitute.org 4.7 * Licensed under GNU General Public License version 2 4.8 * 4.9 * author seanhalle@yahoo.com 4.10 @@ -8,28 +8,17 @@ 4.11 #include <malloc.h> 4.12 #include <stdlib.h> 4.13 4.14 -#include "Matrix_Mult.h" 4.15 -#include "SSR_Matrix_Mult/SSR_Matrix_Mult.h" 4.16 +#include "VSs__Hello_World/VSs__Hello_World.h" 4.17 4.18 /** 4.19 * 4.20 */ 4.21 int main( int argc, char **argv ) 4.22 - { Matrix *leftMatrix, *rightMatrix, *resultMatrix; 4.23 - ParamBag *paramBag; 4.24 + { 4.25 4.26 DEBUG__printf2(TRUE, "arguments: %s | %s", argv[0], argv[1] ); 4.27 - 4.28 - paramBag = makeParamBag(); 4.29 - readParamFileIntoBag( argv[1], paramBag ); 4.30 - initialize_Input_Matrices_Via( &leftMatrix, &rightMatrix, paramBag ); 4.31 4.32 - resultMatrix = multiplyTheseMatrices( leftMatrix, rightMatrix ); 4.33 - 4.34 - printf("\nresult matrix: \n"); 4.35 - printMatrix( resultMatrix ); 4.36 - 4.37 - fflush(stdin); 4.38 + VSs__Hello_World( ); 4.39 4.40 exit(0); //cleans up 4.41 }
