# HG changeset patch # User Some Random Person # Date 1337801959 25200 # Node ID 9cf4c84a309167f2ebf3e6ab3d3eb67d0c217a44 Initial add of copied code -- nonsense code still diff -r 000000000000 -r 9cf4c84a3091 .hgeol --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgeol Wed May 23 12:39:19 2012 -0700 @@ -0,0 +1,14 @@ + +[patterns] +**.py = native +**.txt = native +**.c = native +**.h = native +**.cpp = native +**.java = native +**.class = bin +**.jar = bin +**.sh = native +**.pl = native +**.jpg = bin +**.gif = bin diff -r 000000000000 -r 9cf4c84a3091 .hgignore --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgignore Wed May 23 12:39:19 2012 -0700 @@ -0,0 +1,12 @@ +nbproject +Makefile +build +dist +src/Default +src/.settings +src/.cproject +src/.project +.dep.inc +glob:.cproject +glob:.project +glob:Debug diff -r 000000000000 -r 9cf4c84a3091 VSs__Hello_World/EntryPoint.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VSs__Hello_World/EntryPoint.c Wed May 23 12:39:19 2012 -0700 @@ -0,0 +1,62 @@ +/* + * Copyright 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + */ + +#include + +#include "SSR_Matrix_Mult.h" + + + +/*Every SSR system has an "entry point" function that creates the first + * processor, which starts the chain of creating more processors.. + * eventually all of the processors will dissipate themselves, and + * return. + * + *This entry-point function follows the same pattern as all entry-point + * functions do: + *1) it creates the params for the seed processor, from the + * parameters passed into the entry-point function + *2) it calls SSR__create_seed_procr_and_do_work + *3) it gets the return value from the params struc, frees the params struc, + * and returns the value from the function + * + */ +Matrix * +multiplyTheseMatrices( Matrix *leftMatrix, Matrix *rightMatrix ) + { Matrix *resMatrix; + DividerParams *dividerParams; + int32 numResRows, numResCols; + + + dividerParams = malloc( sizeof( DividerParams ) ); + dividerParams->leftMatrix = leftMatrix; + dividerParams->rightMatrix = rightMatrix; + + + numResRows = leftMatrix->numRows; + numResCols = rightMatrix->numCols; + + //VMS has its own separate internal malloc, so to get results out, + // have to pass in empty array for it to fill up + //The alternative is internally telling SSR make external space to use + resMatrix = malloc( sizeof(Matrix) ); + resMatrix->array = malloc( numResRows * numResCols * sizeof(float32)); + resMatrix->numCols = rightMatrix->numCols; + resMatrix->numRows = leftMatrix->numRows; + + + dividerParams->resultMatrix = resMatrix; + + //create divider processor, start doing the work, and wait till done + //This function is the "border crossing" between normal code and SSR + SSR__create_seed_procr_and_do_work( ÷WorkIntoSubMatrixPairProcrs, + dividerParams ); + + free( dividerParams ); + return resMatrix; + } diff -r 000000000000 -r 9cf4c84a3091 VSs__Hello_World/SeedVP.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VSs__Hello_World/SeedVP.c Wed May 23 12:39:19 2012 -0700 @@ -0,0 +1,594 @@ +/* + * Copyright 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * Author: seanhalle@yahoo.com + * + */ + + +#include +#include +#include "SSR_Matrix_Mult.h" + + //The time to compute this many result values should equal the time to + // perform this division on a matrix of size gives that many result calcs + //IE, size this so that sequential time to calc equals divide time + // find the value by experimenting -- but divide time and calc time scale + // same way, so this value might remain the same across hardware +#define NUM_CELLS_IN_SEQUENTIAL_CUTOFF 1000 + + +//=========================================================================== +int inline +measureMatrixMultPrimitive( SlaveVP *animPr ); + +SlicingStrucCarrier * +calcIdealSizeAndSliceDimensions( Matrix *leftMatrix, Matrix *rightMatrix, + SlaveVP *animPr ); + +SlicingStruc * +sliceUpDimension( float32 idealSizeOfSide, int startVal, int endVal, + SlaveVP *animPr ); + +void +freeSlicingStruc( SlicingStruc *slicingStruc, SlaveVP *animPr ); + +SubMatrix ** +createSubMatrices( SlicingStruc *rowSlices, SlicingStruc *colSlices, + int32 numUses, Matrix *origMatrix, SlaveVP *animPr ); + +void +freeSubMatrices( SlicingStruc *rowSlices, SlicingStruc *colSlices, + SubMatrix **subMatrices, SlaveVP *animPr ); + +void +pairUpSubMatricesAndMakeProcessors( SubMatrix **leftSubMatrices, + SubMatrix **rightSubMatrices, + int32 numRowIdxs, int32 numColIdxs, + int32 numVecIdxs, + SlaveVP *resultPr, + SlaveVP *animatingPr ); + +void +makeSubMatricesAndProcrs( Matrix *leftMatrix, Matrix *rightMatrix, + SlicingStrucCarrier *slicingStrucCarrier, + SlaveVP *resultPr, SlaveVP *animatingPr ); + + + +/*Divider creates one processor for every sub-matrix + * It hands them: + * the name of the result processor that they should send their results to, + * the left and right matrices, and the rows and cols they should multiply + * It first creates the result processor, then all the sub-matrixPair + * processors, + * then does a receive of a message from the result processor that gives + * the divider ownership of the result matrix. + * Finally, the divider returns the result matrix out of the SSR system. + * + * Divider chooses the size of sub-matrices via an algorithm that tries to + * keep the minimum work above a threshold. The threshold is machine- + * dependent, so ask SSR for min work-unit time to get a + * given overhead + * + * Divide min work-unit cycles by measured-cycles for one matrix-cell + * product -- gives the number of products need to have in min size + * matrix. + * + * So then, take cubed root of this to get the size of a side of min sub- + * matrix. That is the size of the ideal square sub-matrix -- so tile + * up the two input matrices into ones as close as possible to that size, + * and create the pairs of sub-matrices. + * + *======================== STRATEGIC OVERVIEW ======================= + * + *This division is a bit tricky, because have to create things in advance + * that it's not at first obvious need to be created.. + * + *First slice up each dimension -- three of them.. this is because will have + * to create the sub-matrix's data-structures before pairing the sub-matrices + * with each other -- so, have three dimensions to slice up before can + * create the sub-matrix data-strucs -- also, have to be certain that the + * cols of the left input have the exact same slicing as the rows of the + * left matrix, so just to be sure, do the slicing calc once, then use it + * for both. + * + *So, goes like this: + *1) calculate the start & end values of each dimension in each matrix. + *2) use those values to create sub-matrix structures + *3) combine sub-matrices into pairs, as the tasks to perform. + * + *Have to calculate separately from creating the sub-matrices because of the + * nature of the nesting -- would either end up creating the same sub-matrix + * multiple times, or else would have to put in detection of whether had + * made a particular one already if tried to combine steps 1 and 2. + * + *Step 3 has to be separate because of the nesting, as well -- same reason, + * would either create same sub-matrix multiple times, or else have to + * add detection of whether was already created. + * + *Another way to look at it: there's one level of loop to divide dimensions, + * two levels of nesting to create sub-matrices, and three levels to pair + * up the sub-matrices. + */ +void divideWorkIntoSubMatrixPairProcrs( void *_dividerParams, + SlaveVP *animPr ) + { SlaveVP *resultPr; + DividerParams *dividerParams; + ResultsParams *resultsParams; + Matrix *leftMatrix, *rightMatrix, *resultMatrix; + void *msg; + SlicingStrucCarrier *slicingStrucCarrier; + float32 *resultArray; //points to array inside result matrix + + DEBUG__printf( dbgAppFlow, "start divide") + + int32 + divideProbe = VMS_App__create_single_interval_probe( "divideProbe", + animPr ); + VMS_App__record_sched_choice_into_probe( divideProbe, animPr ); + VMS_App__record_interval_start_in_probe( divideProbe ); + + //=========== Setup -- make local copies of ptd-to-things, malloc, aso + int32 numResRows, numResCols, vectLength; + + dividerParams = (DividerParams *)_dividerParams; + + leftMatrix = dividerParams->leftMatrix; + rightMatrix = dividerParams->rightMatrix; + + vectLength = leftMatrix->numCols; + numResRows = leftMatrix->numRows; + numResCols = rightMatrix->numCols; + resultArray = dividerParams->resultMatrix->array; + + //zero the result array + memset( resultArray, 0, numResRows * numResCols * sizeof(float32) ); + + //============== Do either sequential mult or do division ============== + + //Check if input matrices too small -- if yes, just do sequential + //Cutoff is determined by overhead of this divider -- relatively + // machine-independent + if( (float32)leftMatrix->numRows * (float32)leftMatrix->numCols * + (float32)rightMatrix->numCols < NUM_CELLS_IN_SEQUENTIAL_CUTOFF ) + { + //====== Do sequential multiply on a single core + DEBUG__printf( dbgAppFlow, "doing sequential") + + //transpose the right matrix + float32 * + transRightArray = SSR__malloc_to( rightMatrix->numRows * + rightMatrix->numCols * sizeof(float32), + animPr ); + + //copy values from orig matrix to local + copyTranspose( rightMatrix->numRows, rightMatrix->numCols, + 0, 0, rightMatrix->numRows, + transRightArray, rightMatrix->array ); + + multiplyMatrixArraysTransposed( vectLength, numResRows, numResCols, + leftMatrix->array, transRightArray, + resultArray ); + } + else + { + //====== Do parallel multiply across cores + + //Calc the ideal size of sub-matrix and slice up the dimensions of + // the two matrices. + //The ideal size is the one takes the number of cycles to calculate + // such that calc time is equal or greater than min work-unit size + slicingStrucCarrier = + calcIdealSizeAndSliceDimensions( leftMatrix, rightMatrix, animPr ); + + //Make the results processor, now that know how many to wait for + resultsParams = SSR__malloc_to( sizeof(ResultsParams), animPr ); + resultsParams->numSubMatrixPairs = + slicingStrucCarrier->leftRowSlices->numVals * + slicingStrucCarrier->rightColSlices->numVals * + slicingStrucCarrier->vecSlices->numVals; + resultsParams->dividerPr = animPr; + resultsParams->numCols = rightMatrix->numCols; + resultsParams->numRows = leftMatrix->numRows; + resultsParams->resultArray = resultArray; + + DEBUG__printf(dbgAppFlow,"**create result Pr**") + resultPr = + SSR__create_procr_with( &gatherResults, resultsParams, animPr); + + //Make the sub-matrices, and pair them up, and make processor to + // calc product of each pair. + makeSubMatricesAndProcrs( leftMatrix, rightMatrix, + slicingStrucCarrier, + resultPr, animPr); + + //result array is allocated externally, so no message from resultPr + // however, do have to wait before printing out stats, so wait + // for an empty handshake message + msg = SSR__receive_from_to( resultPr, animPr ); + } + + + //=============== Work done -- send results back ================= + + + DEBUG__printf( dbgAppFlow, "end divide") + + VMS_App__record_interval_end_in_probe( divideProbe ); + VMS_App__print_stats_of_all_probes(); + + //nothing left to do so dissipate, SSR will wait to shutdown and hence + // make results available to outside until all the processors have + // dissipated -- so no need to wait for results processor + + SSR__dissipate_procr( animPr ); //all procrs dissipate self at end + //when all of the processors have dissipated, the "create seed and do + // work" call in the entry point function returns + } + + +SlicingStrucCarrier * +calcIdealSizeAndSliceDimensions( Matrix *leftMatrix, Matrix *rightMatrix, + SlaveVP *animPr ) + { + float32 idealSizeOfSide, idealSizeOfSide1, idealSizeOfSide2; + SlicingStruc *leftRowSlices, *vecSlices, *rightColSlices; + SlicingStrucCarrier *slicingStrucCarrier = + SSR__malloc_to(sizeof(SlicingStrucCarrier), animPr); + + int minWorkUnitCycles, primitiveCycles, idealNumWorkUnits; + float64 numPrimitiveOpsInMinWorkUnit; + + + //======= Calc ideal size of min-sized sub-matrix ======== + + //ask SSR for the number of cycles of the minimum work unit, at given + // percent overhead then add a guess at overhead from this divider + minWorkUnitCycles = SSR__giveMinWorkUnitCycles( .05 ); + + //ask SSR for number of cycles of the "primitive" op of matrix mult + primitiveCycles = measureMatrixMultPrimitive( animPr ); + + numPrimitiveOpsInMinWorkUnit = + (float64)minWorkUnitCycles / (float64)primitiveCycles; + + //take cubed root -- that's number of these in a "side" of sub-matrix + // then multiply by 5 because the primitive is 5x5 + idealSizeOfSide1 = 5 * cbrt( numPrimitiveOpsInMinWorkUnit ); + + idealNumWorkUnits = SSR__giveIdealNumWorkUnits(); + + idealSizeOfSide2 = leftMatrix->numRows / rint(cbrt( idealNumWorkUnits )); + idealSizeOfSide2 *= 0.6; //finer granularity to help load balance + + if( idealSizeOfSide1 > idealSizeOfSide2 ) + idealSizeOfSide = idealSizeOfSide1; + else + idealSizeOfSide = idealSizeOfSide2; + + //The multiply inner loop blocks the array to fit into L1 cache +// if( idealSizeOfSide < ROWS_IN_BLOCK ) idealSizeOfSide = ROWS_IN_BLOCK; + + //============ Slice up dimensions, now that know target size =========== + + //Tell the slicer the target size of a side (floating pt), the start + // value to start slicing at, and the end value to stop slicing at + //It returns an array of start value of each chunk, plus number of them + int32 startLeftRow, endLeftRow, startVec,endVec,startRightCol,endRightCol; + startLeftRow = 0; + endLeftRow = leftMatrix->numRows -1; + startVec = 0; + endVec = leftMatrix->numCols -1; + startRightCol = 0; + endRightCol = rightMatrix->numCols -1; + + leftRowSlices = + sliceUpDimension( idealSizeOfSide, startLeftRow, endLeftRow, animPr ); + + vecSlices = + sliceUpDimension( idealSizeOfSide, startVec, endVec, animPr ); + + rightColSlices = + sliceUpDimension( idealSizeOfSide, startRightCol, endRightCol,animPr); + + slicingStrucCarrier->leftRowSlices = leftRowSlices; + slicingStrucCarrier->vecSlices = vecSlices; + slicingStrucCarrier->rightColSlices = rightColSlices; + + return slicingStrucCarrier; + } + + +void +makeSubMatricesAndProcrs( Matrix *leftMatrix, Matrix *rightMatrix, + SlicingStrucCarrier *slicingStrucCarrier, + SlaveVP *resultPr, SlaveVP *animPr ) + { + SlicingStruc *leftRowSlices, *vecSlices, *rightColSlices; + + leftRowSlices = slicingStrucCarrier->leftRowSlices; + vecSlices = slicingStrucCarrier->vecSlices; + rightColSlices = slicingStrucCarrier->rightColSlices; + SSR__free( slicingStrucCarrier, animPr ); + + //================ Make sub-matrices, given the slicing ================ + SubMatrix **leftSubMatrices, **rightSubMatrices; + leftSubMatrices = + createSubMatrices( leftRowSlices, vecSlices, rightColSlices->numVals, + leftMatrix, animPr ); + //double_check_that_always_numRows_in_right_same_as_numCols_in_left(); + rightSubMatrices = + createSubMatrices( vecSlices, rightColSlices, leftRowSlices->numVals, + rightMatrix, animPr ); + + + //============== pair the sub-matrices and make processors ============== + int32 numRowIdxs, numColIdxs, numVecIdxs; + + numRowIdxs = leftRowSlices->numVals; + numColIdxs = rightColSlices->numVals; + numVecIdxs = vecSlices->numVals; + + + freeSlicingStruc( leftRowSlices, animPr ); + freeSlicingStruc( vecSlices, animPr ); + freeSlicingStruc( rightColSlices, animPr ); + + pairUpSubMatricesAndMakeProcessors( leftSubMatrices, + rightSubMatrices, + numRowIdxs, numColIdxs, + numVecIdxs, + resultPr, + animPr ); + } + + + + +void +pairUpSubMatricesAndMakeProcessors( SubMatrix **leftSubMatrices, + SubMatrix **rightSubMatrices, + int32 numRowIdxs, int32 numColIdxs, + int32 numVecIdxs, + SlaveVP *resultPr, + SlaveVP *animatingPr ) + { + int32 resRowIdx, resColIdx, vecIdx; + int32 numLeftColIdxs, numRightColIdxs; + int32 leftRowIdxOffset; + SMPairParams *subMatrixPairParams; + float32 numToPutOntoEachCore, leftOverFraction, numVecOnCurrCore; + int32 numCores, coreToAssignOnto; + + numLeftColIdxs = numColIdxs; + numRightColIdxs = numVecIdxs; + + numCores = SSR__give_number_of_cores_to_schedule_onto(); + + numToPutOntoEachCore = numRowIdxs*numColIdxs/numCores; + leftOverFraction = 0; + numVecOnCurrCore = 0; + coreToAssignOnto = 0; + + for( resRowIdx = 0; resRowIdx < numRowIdxs; resRowIdx++ ) + { + leftRowIdxOffset = resRowIdx * numLeftColIdxs; + + for( resColIdx = 0; resColIdx < numColIdxs; resColIdx++ ) + { + + for( vecIdx = 0; vecIdx < numVecIdxs; vecIdx++ ) + { + //Make the processor for the pair of sub-matrices + subMatrixPairParams = SSR__malloc_to( sizeof(SMPairParams), + animatingPr); + subMatrixPairParams->leftSubMatrix = + leftSubMatrices[ leftRowIdxOffset + vecIdx ]; + + subMatrixPairParams->rightSubMatrix = + rightSubMatrices[ vecIdx * numRightColIdxs + resColIdx ]; + + subMatrixPairParams->resultPr = resultPr; + + //put all pairs from the same vector onto same core + SSR__create_procr_with_affinity( &calcSubMatrixProduct, + subMatrixPairParams, + animatingPr, + coreToAssignOnto ); + + //Trying to distribute the subMatrix-vectors across the cores, so + // that each core gets the same number of vectors, with a max + // imbalance of 1 vector more on some cores than others + numVecOnCurrCore += 1; //incr before checking, so + if( numVecOnCurrCore > numToPutOntoEachCore ) //actual num 1 less + { + //deal with fractional part, to ensure that imbalance is 1 max + // IE, core with most has only 1 more than core with least + leftOverFraction = numToPutOntoEachCore - numVecOnCurrCore; + if( leftOverFraction > 1 ) ERROR("division alg messed up\n"); + numVecOnCurrCore = leftOverFraction; //accumulates "extra" + + //Move to next core, max core-value to incr to is numCores -1 + coreToAssignOnto += 1; + if( coreToAssignOnto >= numCores ) coreToAssignOnto = 0; + } //if + } //for( vecIdx + } //for( resColIdx + } //for( resRowIdx + + } + + + +/*Walk through the two slice-strucs, making sub-matrix strucs as go + */ +SubMatrix ** +createSubMatrices( SlicingStruc *rowSlices, SlicingStruc *colSlices, + int32 numUses, Matrix *origMatrix, SlaveVP *animPr ) + { + int32 numRowIdxs, numColIdxs, rowIdx, colIdx; + int32 startRow, endRow, startCol, endCol; + int32 *rowStartVals, *colStartVals; + int32 rowOffset; + SubMatrix **subMatrices, *newSubMatrix; + + numRowIdxs = rowSlices->numVals; + numColIdxs = colSlices->numVals; + + rowStartVals = rowSlices->startVals; + colStartVals = colSlices->startVals; + + subMatrices = SSR__malloc_to(numRowIdxs * numColIdxs * sizeof(SubMatrix*), + animPr ); + + for( rowIdx = 0; rowIdx < numRowIdxs; rowIdx++ ) + { + rowOffset = rowIdx * numColIdxs; + + startRow = rowStartVals[rowIdx]; + endRow = rowStartVals[rowIdx + 1] -1; //"fake" start above last is + // at last valid idx + 1 & is + // 1 greater than end value + for( colIdx = 0; colIdx < numColIdxs; colIdx++ ) + { + startCol = colStartVals[colIdx]; + endCol = colStartVals[colIdx + 1] -1; + + newSubMatrix = SSR__malloc_to( sizeof(SubMatrix), animPr ); + newSubMatrix->numRows = endRow - startRow +1; + newSubMatrix->numCols = endCol - startCol +1; + newSubMatrix->origMatrix = origMatrix; + newSubMatrix->origStartRow = startRow; + newSubMatrix->origStartCol = startCol; + newSubMatrix->copySingleton = NULL; + newSubMatrix->numUsesLeft = numUses; //can free after this many + //Prevent uninitialized memory + newSubMatrix->copySingleton = NULL; + newSubMatrix->copyTransSingleton = NULL; + + subMatrices[ rowOffset + colIdx ] = newSubMatrix; + } + } + return subMatrices; + } + + +void +freeSubMatrices( SlicingStruc *rowSlices, SlicingStruc *colSlices, + SubMatrix **subMatrices, SlaveVP *animPr ) + { + int32 numRowIdxs, numColIdxs, rowIdx, colIdx, rowOffset; + SubMatrix *subMatrix; + + numRowIdxs = rowSlices->numVals; + numColIdxs = colSlices->numVals; + + for( rowIdx = 0; rowIdx < numRowIdxs; rowIdx++ ) + { + rowOffset = rowIdx * numColIdxs; + for( colIdx = 0; colIdx < numColIdxs; colIdx++ ) + { + subMatrix = subMatrices[ rowOffset + colIdx ]; + if( subMatrix->alreadyCopied ) + SSR__free( subMatrix->array, animPr ); + SSR__free( subMatrix, animPr ); + } + } + SSR__free( subMatrices, animPr ); + } + + + +SlicingStruc * +sliceUpDimension( float32 idealSizeOfSide, int startVal, int endVal, + SlaveVP *animPr ) + { float32 residualAcc = 0; + int numSlices, i, *startVals, sizeOfSlice, endCondition; + SlicingStruc *slicingStruc = SSR__malloc_to(sizeof(SlicingStruc), animPr); + + //calc size of matrix need to hold start vals -- + numSlices = (int32)( (float32)(endVal -startVal +1) / idealSizeOfSide); + + startVals = SSR__malloc_to( (numSlices + 1) * sizeof(int32), animPr ); + + //Calc the upper limit of start value -- when get above this, end loop + // by saving highest value of the matrix dimension to access, plus 1 + // as the start point of the imaginary slice following the last one + //Plus 1 because go up to value but not include when process last slice + //The stopping condition is half-a-size less than highest value because + // don't want any pieces smaller than half the ideal size -- just tack + // little ones onto end of last one + endCondition = endVal - (int) (idealSizeOfSide/2); //end *value*, not size + for( i = 0; startVal <= endVal; i++ ) + { + startVals[i] = startVal; + residualAcc += idealSizeOfSide; + sizeOfSlice = (int)residualAcc; + residualAcc -= (float32)sizeOfSlice; + startVal += sizeOfSlice; //ex @size = 2 get 0, 2, 4, 6, 8.. + + if( startVal > endCondition ) + { startVal = endVal + 1; + startVals[ i + 1 ] = startVal; + } + } + + slicingStruc->startVals = startVals; + slicingStruc->numVals = i; //loop incr'd, so == last valid start idx+1 + // which means is num sub-matrices in dim + // also == idx of the fake start just above + return slicingStruc; + } + +void +freeSlicingStruc( SlicingStruc *slicingStruc, SlaveVP *animPr ) + { + SSR__free( slicingStruc->startVals, animPr ); + SSR__free( slicingStruc, animPr ); + } + + +inline int +measureMatrixMultPrimitive( SlaveVP *animPr ) + { + int r, c, v, numCycles; + float32 *res, *left, *right; + + //setup inputs + left = SSR__malloc_to( 5 * 5 * sizeof( float32 ), animPr ); + right = SSR__malloc_to( 5 * 5 * sizeof( float32 ), animPr ); + res = SSR__malloc_to( 5 * 5 * sizeof( float32 ), animPr ); + + for( r = 0; r < 5; r++ ) + { + for( c = 0; c < 5; c++ ) + { + left[ r * 5 + c ] = r; + right[ r * 5 + c ] = c; + } + } + + //do primitive + SSR__start_primitive(); //for now, just takes time stamp + for( r = 0; r < 5; r++ ) + { + for( c = 0; c < 5; c++ ) + { + for( v = 0; v < 5; v++ ) + { + res[ r * 5 + c ] = left[ r * 5 + v ] * right[ v * 5 + c ]; + } + } + } + numCycles = + SSR__end_primitive_and_give_cycles(); + + SSR__free( left, animPr ); + SSR__free( right, animPr ); + SSR__free( res, animPr ); + + return numCycles; + } + diff -r 000000000000 -r 9cf4c84a3091 VSs__Hello_World/VSs__Hello_World.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VSs__Hello_World/VSs__Hello_World.h Wed May 23 12:39:19 2012 -0700 @@ -0,0 +1,94 @@ +/* + * Copyright Oct 24, 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + */ + +#ifndef _SSR_MATRIX_MULT_H_ +#define _SSR_MATRIX_MULT_H_ + +#include + +#include "SSR_impl/SSR.h" +#include "../Matrix_Mult.h" + + +//=============================== Defines ============================== +#define ROWS_IN_BLOCK 32 +#define COLS_IN_BLOCK 32 +#define VEC_IN_BLOCK 32 + +#define copyMatrixSingleton 1 +#define copyTransposeSingleton 2 + +//============================== Structures ============================== +typedef struct + { + Matrix *leftMatrix; + Matrix *rightMatrix; + Matrix *resultMatrix; + } +DividerParams; + +typedef struct + { + SlaveVP *dividerPr; + int numRows; + int numCols; + int numSubMatrixPairs; + float32 *resultArray; + } +ResultsParams; + +typedef struct + { int32 numRows; + int32 numCols; + Matrix *origMatrix; + int32 origStartRow; + int32 origStartCol; + int32 alreadyCopied; + int32 numUsesLeft; //have update via message to avoid multiple writers + SSRSingleton *copySingleton; + SSRSingleton *copyTransSingleton; + float32 *array; //2D, but dynamically sized, so use addr arith + } +SubMatrix; + +typedef struct + { SlaveVP *resultPr; + SubMatrix *leftSubMatrix; + SubMatrix *rightSubMatrix; + float32 *partialResultArray; + } +SMPairParams; + +typedef struct + { int32 numVals; + int32 *startVals; + } +SlicingStruc; + +typedef struct + { + SlicingStruc *leftRowSlices; + SlicingStruc *vecSlices; + SlicingStruc *rightColSlices; + } +SlicingStrucCarrier; + +enum MMMsgType + { + RESULTS_MSG = 1 + }; + +//============================= Processor Functions ========================= +void divideWorkIntoSubMatrixPairProcrs( void *data, SlaveVP *animatingPr ); +void calcSubMatrixProduct( void *data, SlaveVP *animatingPr ); +void gatherResults( void *data, SlaveVP *animatingPr ); + + +//================================ Entry Point ============================== +Matrix * +multiplyTheseMatrices( Matrix *leftMatrix, Matrix *rightMatrix ); + + +#endif /*_SSR_MATRIX_MULT_H_*/ diff -r 000000000000 -r 9cf4c84a3091 __brch__default --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/__brch__default Wed May 23 12:39:19 2012 -0700 @@ -0,0 +1,1 @@ +Applications normally have only the default branch -- they shouldn't be affected by any choices in VMS or language.. diff -r 000000000000 -r 9cf4c84a3091 main.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/main.c Wed May 23 12:39:19 2012 -0700 @@ -0,0 +1,35 @@ +/* + * Copyright Oct 24, 2009 OpenSourceStewardshipFoundation.org + * Licensed under GNU General Public License version 2 + * + * author seanhalle@yahoo.com + */ + +#include +#include + +#include "Matrix_Mult.h" +#include "SSR_Matrix_Mult/SSR_Matrix_Mult.h" + +/** + * + */ +int main( int argc, char **argv ) + { Matrix *leftMatrix, *rightMatrix, *resultMatrix; + ParamBag *paramBag; + + DEBUG__printf2(TRUE, "arguments: %s | %s", argv[0], argv[1] ); + + paramBag = makeParamBag(); + readParamFileIntoBag( argv[1], paramBag ); + initialize_Input_Matrices_Via( &leftMatrix, &rightMatrix, paramBag ); + + resultMatrix = multiplyTheseMatrices( leftMatrix, rightMatrix ); + + printf("\nresult matrix: \n"); + printMatrix( resultMatrix ); + + fflush(stdin); + + exit(0); //cleans up + }