Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > C > Cilk > Cilk__Matrix_Mult__Bench
changeset 2:ec0629f70ee5
forgot to add some of the files
| author | Me |
|---|---|
| date | Tue, 26 Oct 2010 19:34:03 -0700 |
| parents | dd5387f362f6 |
| children | bf7331ed394e |
| files | src/Application/CILK_Linux__Matrix_Mult src/Application/CILK__Matrix_Mult/Divide_Pr.c src/Application/CILK__Matrix_Mult/Divide_Pr.cilk src/Application/CILK__Matrix_Mult/EntryPoint.c src/Application/CILK__Matrix_Mult/EntryPoint.cilk src/Application/CILK__Matrix_Mult/Result_Pr.c src/Application/CILK__Matrix_Mult/VMS_primitive_data_types.h src/Application/CILK__Matrix_Mult/Vector_Pr.c src/Application/CILK__Matrix_Mult/Vector_Pr.cilk src/Application/CILK__Matrix_Mult/matmul.cilk src/Application/Makefile src/Application/main.c src/Application/main.cilk |
| diffstat | 13 files changed, 339 insertions(+), 545 deletions(-) [+] |
line diff
1.1 Binary file src/Application/CILK_Linux__Matrix_Mult has changed
2.1 --- a/src/Application/CILK__Matrix_Mult/Divide_Pr.c Tue Oct 26 19:32:46 2010 -0700 2.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 2.3 @@ -1,116 +0,0 @@ 2.4 -/* 2.5 - * Copyright 2009 OpenSourceStewardshipFoundation.org 2.6 - * Licensed under GNU General Public License version 2 2.7 - * 2.8 - * Author: seanhalle@yahoo.com 2.9 - * 2.10 - */ 2.11 - 2.12 - 2.13 -#include "VPThread__Matrix_Mult.h" 2.14 - 2.15 -/*Divider creates one processor for every row-col pair. 2.16 - * It hands them: 2.17 - * the name of the result processor that they should send their results to, 2.18 - * the left and right matrices, and the row and col they should multiply 2.19 - * the length of the vector 2.20 - * It first creates the result processor, then all the vector processors, 2.21 - * then does a receive of a message from the result processor that gives 2.22 - * the divider ownership of the result matrix. 2.23 - * Finally, the divider returns the result matrix out of the VPThread system. 2.24 - */ 2.25 -void divideIntoVectors( void *_dividerParams, VirtProcr *animatingThd ) 2.26 - { VirtProcr *resultsThd; 2.27 - DividerParams *dividerParams; 2.28 - ResultsParams *resultsParams; 2.29 - VectorParams *vectParams; 2.30 - Matrix *leftMatrix, *rightMatrix, *resultMatrix; 2.31 - void *msg; 2.32 - MatrixMultGlobals *globals; 2.33 - 2.34 -// printf("start divide\n"); fflush(stdin); 2.35 - 2.36 - dividerParams = (DividerParams *)_dividerParams; 2.37 - 2.38 - leftMatrix = dividerParams->leftMatrix; 2.39 - rightMatrix = dividerParams->rightMatrix; 2.40 - 2.41 - resultsParams = malloc( sizeof(ResultsParams) ); 2.42 - resultsParams->dividerThd = animatingThd; 2.43 - resultsParams->numCols = rightMatrix->numCols; 2.44 - resultsParams->numRows = leftMatrix->numRows; 2.45 - 2.46 - //=========== Set up global vars, including conds and mutexes =========== 2.47 - globals = malloc( sizeof(MatrixMultGlobals) ); 2.48 - VPThread__set_globals_to( globals ); 2.49 - 2.50 - globals->results_mutex = VPThread__make_mutex( animatingThd ); 2.51 - globals->results_cond = VPThread__make_cond( globals->results_mutex, 2.52 - animatingThd ); 2.53 - 2.54 - globals->vector_mutex = VPThread__make_mutex( animatingThd ); 2.55 - globals->vector_cond = VPThread__make_cond( globals->vector_mutex, 2.56 - animatingThd ); 2.57 - 2.58 - globals->start_mutex = VPThread__make_mutex( animatingThd ); 2.59 - globals->start_cond = VPThread__make_cond( globals->start_mutex, 2.60 - animatingThd ); 2.61 - //======================================================================== 2.62 - 2.63 - //get results-comm lock before create results-thd, to ensure it can't 2.64 - // signal that results are available before this thd is waiting on cond 2.65 - VPThread__mutex_lock( globals->results_mutex, animatingThd ); 2.66 - 2.67 - //also get the start lock & use to ensure no vector threads send a 2.68 - // signal before the results thread is waiting on vector cond 2.69 - VPThread__mutex_lock( globals->start_mutex, animatingThd ); 2.70 - 2.71 - 2.72 - VPThread__create_thread( &gatherResults, resultsParams, animatingThd ); 2.73 - 2.74 - //Now wait for results thd to signal that it has vector lock 2.75 - VPThread__cond_wait( globals->start_cond, animatingThd ); 2.76 - VPThread__mutex_unlock( globals->start_mutex, animatingThd );//done w/lock 2.77 - 2.78 - 2.79 - //make the vector thds 2.80 - int row, col; 2.81 - for( row = 0; row < leftMatrix->numRows; row++ ) 2.82 - { for( col = 0; col < rightMatrix->numCols; col++ ) 2.83 - { 2.84 - vectParams = malloc( sizeof(VectorParams) ); 2.85 - vectParams->myCol = col; 2.86 - vectParams->myRow = row; 2.87 - vectParams->vectLength = leftMatrix->numCols; 2.88 - vectParams->leftMatrix = leftMatrix; 2.89 - vectParams->rightMatrix = rightMatrix; 2.90 - 2.91 - VPThread__create_thread( &calcVector, vectParams, animatingThd ); 2.92 - } 2.93 - //=================== DEBUG =================== 2.94 - #ifdef PRINT_DEBUG_1 2.95 - printf("created thread: %d, %d\n", row, col); 2.96 - #endif 2.97 - //============================================== 2.98 - } 2.99 - 2.100 - //Wait for results thread to say results are good 2.101 - VPThread__cond_wait( globals->results_cond, animatingThd ); 2.102 - 2.103 - //The results of the all the work have to be linked-to from the data 2.104 - // struc given to the seed procr -- this divide func is animated by 2.105 - // that seed procr, so have to link results to the _dividerParams. 2.106 - resultMatrix = malloc( sizeof(Matrix) ); 2.107 - resultMatrix->numCols = rightMatrix->numCols; 2.108 - resultMatrix->numRows = leftMatrix->numRows; 2.109 - dividerParams->resultMatrix = resultMatrix; 2.110 - resultMatrix->matrix = globals->results; 2.111 - 2.112 - //done with communication, release lock 2.113 - VPThread__mutex_unlock( globals->results_mutex, animatingThd ); 2.114 - 2.115 - 2.116 - VPThread__dissipate_thread( animatingThd ); //all Thds dissipate when done 2.117 - //when all of the threads have dissipated, the "create seed and do 2.118 - // work" call in the entry point function returns 2.119 - }
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/src/Application/CILK__Matrix_Mult/Divide_Pr.cilk Tue Oct 26 19:34:03 2010 -0700 3.3 @@ -0,0 +1,72 @@ 3.4 +/* 3.5 + * Copyright 2009 OpenSourceStewardshipFoundation.org 3.6 + * Licensed under GNU General Public License version 2 3.7 + * 3.8 + * Author: seanhalle@yahoo.com 3.9 + * 3.10 + */ 3.11 + 3.12 + 3.13 +#include "CILK__Matrix_Mult.h" 3.14 + 3.15 +cilk float32 calcVector( void * ); 3.16 + 3.17 +/*Divider creates one processor for every row-col pair. 3.18 + * It hands them: 3.19 + * the name of the result processor that they should send their results to, 3.20 + * the left and right matrices, and the row and col they should multiply 3.21 + * the length of the vector 3.22 + * It first creates the result processor, then all the vector processors, 3.23 + * then does a receive of a message from the result processor that gives 3.24 + * the divider ownership of the result matrix. 3.25 + * Finally, the divider returns the result matrix out of the VPThread system. 3.26 + */ 3.27 + 3.28 +cilk void divideIntoVectors( void *_dividerParams ) 3.29 + { 3.30 + DividerParams *dividerParams; 3.31 + VectorParams *vectParams; 3.32 + Matrix *leftMatrix, *rightMatrix, *resultMatrix; 3.33 + int32 numCells, numCols, mrow, mcol; 3.34 + float32 *resultMatrixArray; 3.35 + 3.36 + dividerParams = (DividerParams *)_dividerParams; 3.37 + 3.38 + leftMatrix = dividerParams->leftMatrix; 3.39 + rightMatrix = dividerParams->rightMatrix; 3.40 + 3.41 + 3.42 + numCols = rightMatrix->numCols; 3.43 + 3.44 + numCells = leftMatrix->numRows * rightMatrix->numCols; 3.45 + resultMatrixArray = malloc( numCells * sizeof( float32 ) ); 3.46 + 3.47 + 3.48 + //spawn vector calcs 3.49 + for( mrow = 0; mrow < leftMatrix->numRows; mrow++ ) 3.50 + { for( mcol = 0; mcol < rightMatrix->numCols; mcol++ ) 3.51 + { 3.52 + vectParams = malloc( sizeof(VectorParams) ); 3.53 + vectParams->myCol = mcol; 3.54 + vectParams->myRow = mrow; 3.55 + vectParams->vectLength = leftMatrix->numCols; 3.56 + vectParams->leftMatrix = leftMatrix; 3.57 + vectParams->rightMatrix = rightMatrix; 3.58 + 3.59 + 3.60 + resultMatrixArray[ mrow * numCols + mcol ] = spawn calcVector( vectParams ); 3.61 + } 3.62 + } 3.63 + 3.64 + sync; 3.65 + 3.66 + 3.67 + //The results of the all the work have to be linked-to from the data 3.68 + // struc given to the seed procr -- this divide func is animated by 3.69 + // that seed procr, so have to link results to the _dividerParams. 3.70 + resultMatrix = malloc( sizeof(Matrix) ); 3.71 + resultMatrix->numCols = rightMatrix->numCols; 3.72 + resultMatrix->numRows = leftMatrix->numRows; 3.73 + dividerParams->resultMatrix = resultMatrix; 3.74 + resultMatrix->matrix = resultMatrixArray; 3.75 + }
4.1 --- a/src/Application/CILK__Matrix_Mult/EntryPoint.c Tue Oct 26 19:32:46 2010 -0700 4.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 4.3 @@ -1,48 +0,0 @@ 4.4 -/* 4.5 - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 4.6 - * Licensed under GNU General Public License version 2 4.7 - * 4.8 - * Author: seanhalle@yahoo.com 4.9 - * 4.10 - */ 4.11 - 4.12 -#include <math.h> 4.13 - 4.14 -#include "VPThread__Matrix_Mult.h" 4.15 - 4.16 - 4.17 - 4.18 -/*Every VPThread system has an "entry point" function that creates the first 4.19 - * processor, which starts the chain of creating more processors.. 4.20 - * eventually all of the processors will dissipate themselves, and 4.21 - * return. 4.22 - * 4.23 - *This entry-point function follows the same pattern as all entry-point 4.24 - * functions do: 4.25 - *1) it creates the params for the seed processor, from the 4.26 - * parameters passed into the entry-point function 4.27 - *2) it calls VPThread__create_seed_procr_and_do_work 4.28 - *3) it gets the return value from the params struc, frees the params struc, 4.29 - * and returns the value from the function 4.30 - * 4.31 - */ 4.32 -Matrix * 4.33 -multiplyTheseMatrices( Matrix *leftMatrix, Matrix *rightMatrix ) 4.34 - { Matrix *resMatrix; 4.35 - DividerParams *dividerParams; 4.36 - 4.37 - 4.38 - dividerParams = malloc( sizeof( DividerParams ) ); 4.39 - dividerParams->leftMatrix = leftMatrix; 4.40 - dividerParams->rightMatrix = rightMatrix; 4.41 - 4.42 - 4.43 - //create divider processor, start doing the work, and wait till done 4.44 - //This function is the "border crossing" between normal code and VPThread 4.45 - VPThread__create_seed_procr_and_do_work( ÷IntoVectors, dividerParams ); 4.46 - 4.47 - //get result matrix and return it 4.48 - resMatrix = dividerParams->resultMatrix; 4.49 - free( dividerParams ); 4.50 - return resMatrix; 4.51 - }
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/src/Application/CILK__Matrix_Mult/EntryPoint.cilk Tue Oct 26 19:34:03 2010 -0700 5.3 @@ -0,0 +1,45 @@ 5.4 +/* 5.5 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 5.6 + * Licensed under GNU General Public License version 2 5.7 + * 5.8 + * Author: seanhalle@yahoo.com 5.9 + * 5.10 + */ 5.11 + 5.12 +#include "CILK__Matrix_Mult.h" 5.13 + 5.14 +cilk void divideIntoVectors( void * ); 5.15 + 5.16 +/*Every VPThread system has an "entry point" function that creates the first 5.17 + * processor, which starts the chain of creating more processors.. 5.18 + * eventually all of the processors will dissipate themselves, and 5.19 + * return. 5.20 + * 5.21 + *This entry-point function follows the same pattern as all entry-point 5.22 + * functions do: 5.23 + *1) it creates the params for the seed processor, from the 5.24 + * parameters passed into the entry-point function 5.25 + *2) it calls VPThread__create_seed_procr_and_do_work 5.26 + *3) it gets the return value from the params struc, frees the params struc, 5.27 + * and returns the value from the function 5.28 + * 5.29 + */ 5.30 +cilk 5.31 +Matrix * 5.32 +multiplyTheseMatrices( Matrix *leftMatrix, Matrix *rightMatrix ) 5.33 + { Matrix *resMatrix; 5.34 + DividerParams *dividerParams; 5.35 + 5.36 + 5.37 + dividerParams = malloc( sizeof( DividerParams ) ); 5.38 + dividerParams->leftMatrix = leftMatrix; 5.39 + dividerParams->rightMatrix = rightMatrix; 5.40 + 5.41 + spawn divideIntoVectors( dividerParams ); 5.42 + sync; 5.43 + 5.44 + //get result matrix and return it 5.45 + resMatrix = dividerParams->resultMatrix; 5.46 + free( dividerParams ); 5.47 + return resMatrix; 5.48 + }
6.1 --- a/src/Application/CILK__Matrix_Mult/Result_Pr.c Tue Oct 26 19:32:46 2010 -0700 6.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 6.3 @@ -1,89 +0,0 @@ 6.4 -/* 6.5 - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 6.6 - * Licensed under GNU General Public License version 2 6.7 - * 6.8 - * Author: seanhalle@yahoo.com 6.9 - * 6.10 - */ 6.11 - 6.12 -#include "VPThread__Matrix_Mult.h" 6.13 - 6.14 -/*The Result Processor gets a message from each of the vector processors, 6.15 - * puts the result from the message in its location in the result- 6.16 - * matrix, and increments the count of results. 6.17 - * 6.18 - *After the count reaches the point that all results have been received, it 6.19 - * returns the result matrix and dissipates. 6.20 - */ 6.21 -void gatherResults( void *_params, VirtProcr *animatingPr ) 6.22 - { VirtProcr *dividerPr; 6.23 - ResultsParams *params; 6.24 - int numRows, numCols, numCells, count=0; 6.25 - float32 *resultMatrixArray; 6.26 - void *msg; 6.27 - VectorParams *aResult; 6.28 - MatrixMultGlobals *globals =(MatrixMultGlobals *)VPThread__give_globals(); 6.29 - 6.30 - 6.31 - //get vector-comm lock before loop, so that this thd keeps lock after 6.32 - // one wait until it enters the next wait -- forces see-saw btwn 6.33 - // waiters and signalers -- wait-signal-wait-signal-... 6.34 - VPThread__mutex_lock( globals->vector_mutex, animatingPr ); 6.35 - 6.36 - //Tell divider that have the vector lock -- so it's sure won't miss any 6.37 - // signals from the vector-threads it's about to create 6.38 - //Don't need a signal variable -- this thd can't be created until 6.39 - // divider thd already has the start lock 6.40 - VPThread__mutex_lock( globals->start_mutex, animatingPr );//finish wait 6.41 - VPThread__cond_signal( globals->start_cond, animatingPr ); 6.42 - VPThread__mutex_unlock( globals->start_mutex, animatingPr );//finish wait 6.43 - 6.44 - //===================== DEBUG ====================== 6.45 - #ifdef PRINT_DEBUG 6.46 - printf("**Result Pr has the lock**\n" ); 6.47 - fflush(stdin); 6.48 - #endif 6.49 - //==================================================== 6.50 - 6.51 - params = (ResultsParams *)_params; 6.52 - dividerPr = params->dividerThd; 6.53 - numCols = params->numCols; 6.54 - numRows = params->numRows; 6.55 - numCells = numRows * numCols; 6.56 - 6.57 - resultMatrixArray = malloc( numCells * sizeof( float32 ) ); 6.58 - 6.59 - 6.60 - while( count < numCells ) 6.61 - { 6.62 - //receive a vector-result from a vector-thread 6.63 - VPThread__cond_wait( globals->vector_cond, animatingPr ); 6.64 - 6.65 - aResult = globals->currVector; 6.66 - *(resultMatrixArray + aResult->myRow * numCols + aResult->myCol) = 6.67 - aResult->result; 6.68 - count++; 6.69 - //===================== DEBUG ====================== 6.70 - #ifdef PRINT_DEBUG_1 6.71 - if( count - count/numRows * numRows == 0 ) 6.72 - { printf("%d vector result: %f\n", count, aResult->result ); 6.73 - fflush(stdin); 6.74 - } 6.75 - #endif 6.76 - //==================================================== 6.77 - 6.78 - } 6.79 - //all comms done, release lock 6.80 - VPThread__mutex_unlock( globals->vector_mutex, animatingPr ); 6.81 - 6.82 - //Send result to divider (seed) thread 6.83 - // note, divider thd had to hold the results-comm lock before creating 6.84 - // this thread, to be sure no race 6.85 - VPThread__mutex_lock( globals->results_mutex, animatingPr ); 6.86 - globals->results = resultMatrixArray; 6.87 - VPThread__cond_signal( globals->results_cond, animatingPr ); 6.88 - VPThread__mutex_unlock( globals->results_mutex, animatingPr ); //releases 6.89 - //divider thread from its wait, at point this executes 6.90 - 6.91 - VPThread__dissipate_thread( animatingPr ); //frees any data owned by procr 6.92 - }
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/src/Application/CILK__Matrix_Mult/VMS_primitive_data_types.h Tue Oct 26 19:34:03 2010 -0700 7.3 @@ -0,0 +1,53 @@ 7.4 +/* 7.5 + * Copyright 2009 OpenSourceStewardshipFoundation.org 7.6 + * Licensed under GNU General Public License version 2 7.7 + * 7.8 + * Author: seanhalle@yahoo.com 7.9 + * 7.10 + 7.11 + */ 7.12 + 7.13 +#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H 7.14 +#define _BLIS_PRIMITIVE_DATA_TYPES_H 7.15 + 7.16 + 7.17 +/*For portability, need primitive data types that have a well defined 7.18 + * size, and well-defined layout into bytes 7.19 + *To do this, provide BLIS standard aliases for all primitive data types 7.20 + *These aliases must be used in all BLIS functions instead of the ANSI types 7.21 + * 7.22 + *These definitions will be replaced inside each specialization module 7.23 + * according to the compiler used in that module and the hardware being 7.24 + * specialized to. 7.25 + */ 7.26 +/* 7.27 +#define int8 char 7.28 +#define uint8 char 7.29 +#define int16 short 7.30 +#define uint16 unsigned short 7.31 +#define int32 int 7.32 +#define uint32 unsigned int 7.33 +#define int64 long long 7.34 +#define uint64 unsigned long long 7.35 +#define float32 float 7.36 +#define float64 double 7.37 +*/ 7.38 +typedef char bool8; 7.39 +typedef char int8; 7.40 +typedef char uint8; 7.41 +typedef short int16; 7.42 +typedef unsigned short uint16; 7.43 +typedef int int32; 7.44 +typedef unsigned int uint32; 7.45 +typedef long long int64; 7.46 +typedef unsigned long long uint64; 7.47 +typedef float float32; 7.48 +typedef double float64; 7.49 +//typedef double double float128; 7.50 +#define float128 double double 7.51 + 7.52 +#define TRUE 1 7.53 +#define FALSE 0 7.54 + 7.55 +#endif /* _BLIS_PRIMITIVE_DATA_TYPES_H */ 7.56 +
8.1 --- a/src/Application/CILK__Matrix_Mult/Vector_Pr.c Tue Oct 26 19:32:46 2010 -0700 8.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 8.3 @@ -1,59 +0,0 @@ 8.4 -/* 8.5 - * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 8.6 - * Licensed under GNU General Public License version 2 8.7 - * 8.8 - * Author: SeanHalle@yahoo.com 8.9 - * 8.10 - */ 8.11 - 8.12 -#include "VPThread__Matrix_Mult.h" 8.13 - 8.14 -/*A Vector processor is created with an environment that holds two matrices, 8.15 - * the row and col that it owns, and the name of a result gathering 8.16 - * processor. 8.17 - *It calculates its vector product then sends the result to the result 8.18 - * processor, which puts it into the result matrix and returns that matrix 8.19 - * when all is done. 8.20 - */ 8.21 - void 8.22 -calcVector( void *data, VirtProcr *animatingPr ) 8.23 - { 8.24 - VectorParams *params; 8.25 - VirtProcr *resultPr; 8.26 - int myRow, myCol, vectLength, pos; 8.27 - float32 *leftMatrixArray, *rightMatrixArray, result = 0.0; 8.28 - Matrix *leftMatrix, *rightMatrix; 8.29 - MatrixMultGlobals *globals =(MatrixMultGlobals *)VPThread__give_globals(); 8.30 - 8.31 - params = (VectorParams *)data; 8.32 - myCol = params->myCol; 8.33 - myRow = params->myRow; 8.34 - vectLength = params->vectLength; 8.35 - leftMatrix = params->leftMatrix; 8.36 - rightMatrix = params->rightMatrix; 8.37 - leftMatrixArray = leftMatrix->matrix; 8.38 - rightMatrixArray = rightMatrix->matrix; 8.39 - //===================== DEBUG ====================== 8.40 - #ifdef PRINT_DEBUG_1 8.41 - if( myCol == 0 ) 8.42 - printf("start vector: %d, %d\n", myRow, myCol ); fflush(stdin); 8.43 - #endif 8.44 - //==================================================== 8.45 - 8.46 - for( pos = 0; pos < vectLength; pos++ ) 8.47 - { 8.48 - result += *(leftMatrixArray + myRow * vectLength + pos) * 8.49 - *(rightMatrixArray + pos * vectLength + myCol); 8.50 - } 8.51 - params->result = result; 8.52 - 8.53 - //Send result to results thread 8.54 - VPThread__mutex_lock( globals->vector_mutex, animatingPr );//only get 8.55 - //the lock when results thd is inside wait. 8.56 - globals->currVector = params; 8.57 - VPThread__cond_signal( globals->vector_cond, animatingPr ); 8.58 - VPThread__mutex_unlock( globals->vector_mutex, animatingPr );//release 8.59 - //wait-er -- cond_signal implemented such that wait-er gets lock, no other 8.60 - 8.61 - VPThread__dissipate_thread( animatingPr ); 8.62 - }
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/src/Application/CILK__Matrix_Mult/Vector_Pr.cilk Tue Oct 26 19:34:03 2010 -0700 9.3 @@ -0,0 +1,48 @@ 9.4 +/* 9.5 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 9.6 + * Licensed under GNU General Public License version 2 9.7 + * 9.8 + * Author: SeanHalle@yahoo.com 9.9 + * 9.10 + */ 9.11 + 9.12 +#include "CILK__Matrix_Mult.h" 9.13 + 9.14 +/*A Vector processor is created with an environment that holds two matrices, 9.15 + * the row and col that it owns, and the name of a result gathering 9.16 + * processor. 9.17 + *It calculates its vector product then sends the result to the result 9.18 + * processor, which puts it into the result matrix and returns that matrix 9.19 + * when all is done. 9.20 + */ 9.21 +cilk 9.22 +float32 9.23 +calcVector( void *data ) 9.24 + { 9.25 + VectorParams *params; 9.26 + int myRow, myCol, vectLength, pos; 9.27 + float32 *leftMatrixArray, *rightMatrixArray, result = 0.0; 9.28 + Matrix *leftMatrix, *rightMatrix; 9.29 + 9.30 + params = (VectorParams *)data; 9.31 + myCol = params->myCol; 9.32 + myRow = params->myRow; 9.33 + vectLength = params->vectLength; 9.34 + leftMatrix = params->leftMatrix; 9.35 + rightMatrix = params->rightMatrix; 9.36 + leftMatrixArray = leftMatrix->matrix; 9.37 + rightMatrixArray = rightMatrix->matrix; 9.38 + //===================== DEBUG ====================== 9.39 + #ifdef PRINT_DEBUG 9.40 + if( myCol == 0 ) 9.41 + printf("start vector: %d, %d\n", myRow, myCol ); fflush(stdin); 9.42 + #endif 9.43 + //==================================================== 9.44 + 9.45 + for( pos = 0; pos < vectLength; pos++ ) 9.46 + { 9.47 + result += leftMatrixArray[ myRow * vectLength + pos ] * 9.48 + rightMatrixArray[ pos * vectLength + myCol]; 9.49 + } 9.50 + return result; 9.51 + }
10.1 --- a/src/Application/CILK__Matrix_Mult/matmul.cilk Tue Oct 26 19:32:46 2010 -0700 10.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 10.3 @@ -1,198 +0,0 @@ 10.4 -/* 10.5 - * Rectangular matrix multiplication. 10.6 - * 10.7 - * See the paper ``Cache-Oblivious Algorithms'', by 10.8 - * Matteo Frigo, Charles E. Leiserson, Harald Prokop, and 10.9 - * Sridhar Ramachandran, FOCS 1999, for an explanation of 10.10 - * why this algorithm is good for caches. 10.11 - * 10.12 - * Author: Matteo Frigo 10.13 - */ 10.14 -static const char *ident __attribute__((__unused__)) 10.15 - = "$HeadURL: https://bradley.csail.mit.edu/svn/repos/cilk/5.4.3/examples/matmul.cilk $ $LastChangedBy: sukhaj $ $Rev: 517 $ $Date: 2003-10-27 10:05:37 -0500 (Mon, 27 Oct 2003) $"; 10.16 - 10.17 -/* 10.18 - * Copyright (c) 2003 Massachusetts Institute of Technology 10.19 - * 10.20 - * This program is free software; you can redistribute it and/or modify 10.21 - * it under the terms of the GNU General Public License as published by 10.22 - * the Free Software Foundation; either version 2 of the License, or 10.23 - * (at your option) any later version. 10.24 - * 10.25 - * This program is distributed in the hope that it will be useful, 10.26 - * but WITHOUT ANY WARRANTY; without even the implied warranty of 10.27 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10.28 - * GNU General Public License for more details. 10.29 - * 10.30 - * You should have received a copy of the GNU General Public License 10.31 - * along with this program; if not, write to the Free Software 10.32 - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 10.33 - * 10.34 - */ 10.35 - 10.36 -#include <cilk-lib.cilkh> 10.37 -#include <stdio.h> 10.38 -#include <stdlib.h> 10.39 -#include <math.h> 10.40 - 10.41 -#define REAL float 10.42 - 10.43 -extern int Cilk_rand(void); 10.44 - 10.45 -void zero(REAL *A, int n) 10.46 -{ 10.47 - int i, j; 10.48 - 10.49 - for (i = 0; i < n; i++) { 10.50 - for (j = 0; j < n; j++) { 10.51 - A[i * n + j] = 0.0; 10.52 - } 10.53 - } 10.54 -} 10.55 - 10.56 -void init(REAL *A, int n) 10.57 -{ 10.58 - int i, j; 10.59 - 10.60 - for (i = 0; i < n; i++) { 10.61 - for (j = 0; j < n; j++) { 10.62 - A[i * n + j] = (double)Cilk_rand(); 10.63 - } 10.64 - } 10.65 -} 10.66 - 10.67 -double maxerror(REAL *A, REAL *B, int n) 10.68 -{ 10.69 - int i, j; 10.70 - double error = 0.0; 10.71 - 10.72 - for (i = 0; i < n; i++) { 10.73 - for (j = 0; j < n; j++) { 10.74 - double diff = (A[i * n + j] - B[i * n + j]) / A[i * n + j]; 10.75 - if (diff < 0) 10.76 - diff = -diff; 10.77 - if (diff > error) 10.78 - error = diff; 10.79 - } 10.80 - } 10.81 - return error; 10.82 -} 10.83 - 10.84 -void iter_matmul(REAL *A, REAL *B, REAL *C, int n) 10.85 -{ 10.86 - int i, j, k; 10.87 - 10.88 - for (i = 0; i < n; i++) 10.89 - for (k = 0; k < n; k++) { 10.90 - REAL c = 0.0; 10.91 - for (j = 0; j < n; j++) 10.92 - c += A[i * n + j] * B[j * n + k]; 10.93 - C[i * n + k] = c; 10.94 - } 10.95 -} 10.96 - 10.97 -/* 10.98 - * A \in M(m, n) 10.99 - * B \in M(n, p) 10.100 - * C \in M(m, p) 10.101 - */ 10.102 -cilk void rec_matmul(REAL *A, REAL *B, REAL *C, int m, int n, int p, int ld, 10.103 - int add) 10.104 -{ 10.105 - if ((m + n + p) <= 64) { 10.106 - int i, j, k; 10.107 - /* base case */ 10.108 - if (add) { 10.109 - for (i = 0; i < m; i++) 10.110 - for (k = 0; k < p; k++) { 10.111 - REAL c = 0.0; 10.112 - for (j = 0; j < n; j++) 10.113 - c += A[i * ld + j] * B[j * ld + k]; 10.114 - C[i * ld + k] += c; 10.115 - } 10.116 - } else { 10.117 - for (i = 0; i < m; i++) 10.118 - for (k = 0; k < p; k++) { 10.119 - REAL c = 0.0; 10.120 - for (j = 0; j < n; j++) 10.121 - c += A[i * ld + j] * B[j * ld + k]; 10.122 - C[i * ld + k] = c; 10.123 - } 10.124 - } 10.125 - } else if (m >= n && n >= p) { 10.126 - int m1 = m >> 1; 10.127 - spawn rec_matmul(A, B, C, m1, n, p, ld, add); 10.128 - spawn rec_matmul(A + m1 * ld, B, C + m1 * ld, m - m1, 10.129 - n, p, ld, add); 10.130 - } else if (n >= m && n >= p) { 10.131 - int n1 = n >> 1; 10.132 - spawn rec_matmul(A, B, C, m, n1, p, ld, add); 10.133 - sync; 10.134 - spawn rec_matmul(A + n1, B + n1 * ld, C, m, n - n1, p, ld, 1); 10.135 - } else { 10.136 - int p1 = p >> 1; 10.137 - spawn rec_matmul(A, B, C, m, n, p1, ld, add); 10.138 - spawn rec_matmul(A, B + p1, C + p1, m, n, p - p1, ld, add); 10.139 - } 10.140 -} 10.141 - 10.142 -cilk int main(int argc, char *argv[]) 10.143 -{ 10.144 - int n; 10.145 - REAL *A, *B, *C1, *C2; 10.146 - double err; 10.147 - Cilk_time tm_begin, tm_elapsed; 10.148 - Cilk_time wk_begin, wk_elapsed; 10.149 - Cilk_time cp_begin, cp_elapsed; 10.150 - 10.151 - if (argc != 2) { 10.152 - fprintf(stderr, "Usage: matmul [<cilk options>] <n>\n"); 10.153 - Cilk_exit(1); 10.154 - } 10.155 - n = atoi(argv[1]); 10.156 - 10.157 - A = malloc(n * n * sizeof(REAL)); 10.158 - B = malloc(n * n * sizeof(REAL)); 10.159 - C1 = malloc(n * n * sizeof(REAL)); 10.160 - C2 = malloc(n * n * sizeof(REAL)); 10.161 - 10.162 - init(A, n); 10.163 - init(B, n); 10.164 - zero(C1, n); 10.165 - zero(C2, n); 10.166 - 10.167 - iter_matmul(A, B, C1, n); 10.168 - 10.169 - /* Timing. "Start" timers */ 10.170 - sync; 10.171 - cp_begin = Cilk_user_critical_path; 10.172 - wk_begin = Cilk_user_work; 10.173 - tm_begin = Cilk_get_wall_time(); 10.174 - 10.175 - spawn rec_matmul(A, B, C2, n, n, n, n, 0); 10.176 - sync; 10.177 - 10.178 - /* Timing. "Stop" timers */ 10.179 - tm_elapsed = Cilk_get_wall_time() - tm_begin; 10.180 - wk_elapsed = Cilk_user_work - wk_begin; 10.181 - cp_elapsed = Cilk_user_critical_path - cp_begin; 10.182 - 10.183 - err = maxerror(C1, C2, n); 10.184 - 10.185 - printf("\nCilk Example: matmul\n"); 10.186 - printf(" running on %d processor%s\n\n", 10.187 - Cilk_active_size, Cilk_active_size > 1 ? "s" : ""); 10.188 - printf("Max error = %g\n", err); 10.189 - printf("Options: size = %d\n", n); 10.190 - printf("Running time = %4f s\n", Cilk_wall_time_to_sec(tm_elapsed)); 10.191 - printf("Work = %4f s\n", Cilk_time_to_sec(wk_elapsed)); 10.192 - printf("Critical path = %4f s\n", Cilk_time_to_sec(cp_elapsed)); 10.193 - printf("``MFLOPS'' = %4f\n\n", 10.194 - 2.0 * n * n * n / (1.0e6 * Cilk_wall_time_to_sec(tm_elapsed))); 10.195 - 10.196 - free(C2); 10.197 - free(C1); 10.198 - free(B); 10.199 - free(A); 10.200 - return 0; 10.201 -}
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 11.2 +++ b/src/Application/Makefile Tue Oct 26 19:34:03 2010 -0700 11.3 @@ -0,0 +1,80 @@ 11.4 + 11.5 + 11.6 +CILK_SOURCE = \ 11.7 + CILK__Matrix_Mult/EntryPoint.cilk \ 11.8 + CILK__Matrix_Mult/Divide_Pr.cilk \ 11.9 + CILK__Matrix_Mult/Vector_Pr.cilk \ 11.10 + main.cilk 11.11 + 11.12 +C_SOURCE = \ 11.13 + matrix_mult.c \ 11.14 + ParamHelper/ParamBag.c\ 11.15 + ParamHelper/ReadParamsFromFile.c 11.16 + 11.17 +C_OBJS = $(C_SOURCE:.c=.o) 11.18 + 11.19 +CILK_OBJS = $(CILK_SOURCE:.cilk=.o) 11.20 + 11.21 +OBJECTS = $(C_SOURCE) $(CILK_SOURCE) 11.22 + 11.23 +#Make has the built-in variable "$<" which is the source file 11.24 +# and "$@" which is the target for that source 11.25 +$(C_OBJS): $(C_SOURCE) 11.26 + gcc -c $< -o $@ 11.27 + 11.28 +$(CILK_OBJS): $(CILK_SOURCE) 11.29 + gcc -c $< -o $@ 11.30 + 11.31 +all: $(OBJECTS) 11.32 + cilkc $(OBJECTS) -o CILK_Linux__Matrix_Mult; \ 11.33 + cp CILK_Linux__Matrix_Mult ~/D/2__INRIA_OMP/1__Development/2__runs_and_data/executables 11.34 + 11.35 + 11.36 + 11.37 +#================================================================ 11.38 +#Other stuff tried/played_with/copied 11.39 +#Example called "specifying alternate directories" 11.40 +# puts all object files in one directory 11.41 +#CFLAGS := 11.42 +#OBJDIR := . 11.43 + 11.44 +#$(OBJDIR)/%.o: %.c 11.45 +# $(CC) $(CFLAGS) -c $(input) -o $(output) 11.46 + 11.47 +#Believe that make fills in "inputs".. and because have the 11.48 +# sub-dir in the target, it puts that sub-dir into "inputs" var 11.49 +# but apparently because the source is in objects dir, it doesn't 11.50 +# include the sub-dir in the "output" var 11.51 +#$(OBJDIR)/CILK_Matrix_Mult: $(OBJDIR)/*.o 11.52 +# cilkc $(input) -o $(output) 11.53 + 11.54 +#%.o: %.cilk 11.55 + 11.56 + 11.57 +#=============================================================== 11.58 +#May be odd usage.. my first makefile.. idea is to tell make 11.59 +# that to get a give .o file, to run cilkc w/"-c" option, which 11.60 +# causes cilkc to generate a ".o" file 11.61 +#%.o: %.cilk 11.62 +# cilkc -c $< -o $@ 11.63 + 11.64 + 11.65 +#================================================================ 11.66 +# playing with below.. 11.67 + 11.68 +#7C9A-RV6P-3XE2-JV99-426K-2K 11.69 + 11.70 +#rule for inferring that the .cilk file is the source for .o file 11.71 +# and how to create the .o from the .cilk 11.72 +#%.o : %.cilk 11.73 +# cilkc -c $(.SOURCE) 11.74 + 11.75 +#CILK_Linux__Matrix_Mult: main.o CILK__Matrix_Mult/foo.o #ParamHelper/foo2.o 11.76 + 11.77 +#foo.o: $(SUBDIR_SOURCES) 11.78 +# gcc -shared $(inputs) -o $(output) 11.79 + 11.80 +#%.o: %.cilk 11.81 +# cilkc -c $(input) -o $(output) 11.82 + 11.83 +
12.1 --- a/src/Application/main.c Tue Oct 26 19:32:46 2010 -0700 12.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 12.3 @@ -1,35 +0,0 @@ 12.4 -/* 12.5 - * Copyright Oct 24, 2009 OpenSourceCodeStewardshipFoundation.org 12.6 - * Licensed under GNU General Public License version 2 12.7 - * 12.8 - * author seanhalle@yahoo.com 12.9 - */ 12.10 - 12.11 -#include <malloc.h> 12.12 -#include <stdlib.h> 12.13 - 12.14 -#include "Matrix_Mult.h" 12.15 -#include "VPThread__Matrix_Mult/VPThread__Matrix_Mult.h" 12.16 - 12.17 -/** 12.18 - *Matrix multiply program written using VMS_HW piggy-back language 12.19 - * 12.20 - */ 12.21 -int main( int argc, char **argv ) 12.22 - { Matrix *leftMatrix, *rightMatrix, *resultMatrix; 12.23 - ParamBag *paramBag; 12.24 - 12.25 - paramBag = makeParamBag(); 12.26 - readParamFileIntoBag( argv[1], paramBag ); 12.27 - initialize_Input_Matrices_Via( &leftMatrix, &rightMatrix, paramBag ); 12.28 - 12.29 - resultMatrix = multiplyTheseMatrices( leftMatrix, rightMatrix ); 12.30 - 12.31 - printf("\nresult matrix: \n"); 12.32 - 12.33 -// printMatrix( resultMatrix ); 12.34 - 12.35 -// VPThread__print_stats(); 12.36 - 12.37 - exit(0); //cleans up 12.38 - }
13.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 13.2 +++ b/src/Application/main.cilk Tue Oct 26 19:34:03 2010 -0700 13.3 @@ -0,0 +1,41 @@ 13.4 +/* 13.5 + * Copyright Oct 24, 2009 OpenSourceCodeStewardshipFoundation.org 13.6 + * Licensed under GNU General Public License version 2 13.7 + * 13.8 + * author seanhalle@yahoo.com 13.9 + */ 13.10 + 13.11 +#include <malloc.h> 13.12 +#include <stdlib.h> 13.13 + 13.14 +#include "Matrix_Mult.h" 13.15 +#include "CILK__Matrix_Mult/CILK__Matrix_Mult.h" 13.16 + 13.17 +cilk Matrix * 13.18 +multiplyTheseMatrices( Matrix *leftMatrix, Matrix *rightMatrix ); 13.19 + 13.20 +/** 13.21 + *Matrix multiply program written using VMS_HW piggy-back language 13.22 + * 13.23 + */ 13.24 +cilk 13.25 +int main( int argc, char **argv ) 13.26 + { Matrix *leftMatrix, *rightMatrix, *resultMatrix; 13.27 + ParamBag *paramBag; 13.28 + 13.29 + 13.30 + paramBag = makeParamBag(); 13.31 + readParamFileIntoBag( argv[1], paramBag ); 13.32 + initialize_Input_Matrices_Via( &leftMatrix, &rightMatrix, paramBag ); 13.33 + 13.34 + resultMatrix = spawn multiplyTheseMatrices( leftMatrix, rightMatrix ); 13.35 + sync; 13.36 + 13.37 + printf("\nresult matrix: \n"); 13.38 + 13.39 +// printMatrix( resultMatrix ); 13.40 + 13.41 +// VPThread__print_stats(); 13.42 + 13.43 + exit(0); //cleans up 13.44 + }
