Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > C > Cilk > Cilk__Matrix_Mult__Bench
changeset 0:b17ddcc97ffc
Just created -- sample code, not CILK code yet, just skeleton
| author | Me |
|---|---|
| date | Mon, 20 Sep 2010 17:11:58 -0700 |
| parents | |
| children | dd5387f362f6 |
| files | src/Application/CILK__Matrix_Mult/CILK__Matrix_Mult.h src/Application/CILK__Matrix_Mult/Divide_Pr.c src/Application/CILK__Matrix_Mult/EntryPoint.c src/Application/CILK__Matrix_Mult/Result_Pr.c src/Application/CILK__Matrix_Mult/Vector_Pr.c src/Application/CILK__Matrix_Mult/matmul.cilk src/Application/Matrix_Mult.c src/Application/Matrix_Mult.h src/Application/main.c |
| diffstat | 9 files changed, 862 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/Application/CILK__Matrix_Mult/CILK__Matrix_Mult.h Mon Sep 20 17:11:58 2010 -0700 1.3 @@ -0,0 +1,75 @@ 1.4 +/* 1.5 + * Copyright Oct 24, 2009 OpenSourceCodeStewardshipFoundation.org 1.6 + * Licensed under GNU General Public License version 2 1.7 + */ 1.8 + 1.9 +#ifndef _VPThread__MATRIX_MULT_H_ 1.10 +#define _VPThread__MATRIX_MULT_H_ 1.11 + 1.12 +#include <stdio.h> 1.13 + 1.14 +#include "../../VPThread__lib/VPThread.h" 1.15 +#include "../Matrix_Mult.h" 1.16 + 1.17 +//============================== Structures ============================== 1.18 +typedef struct 1.19 + { 1.20 + Matrix *leftMatrix; 1.21 + Matrix *rightMatrix; 1.22 + Matrix *resultMatrix; 1.23 + } 1.24 +DividerParams; 1.25 + 1.26 +typedef struct 1.27 + { 1.28 + VirtProcr *dividerThd; 1.29 + int numRows; 1.30 + int numCols; 1.31 + } 1.32 +ResultsParams; 1.33 + 1.34 +typedef struct 1.35 + { VirtProcr *resultsThd; 1.36 + int myCol; 1.37 + int myRow; 1.38 + int vectLength; 1.39 + Matrix *leftMatrix; 1.40 + Matrix *rightMatrix; 1.41 + float32 result; 1.42 + } 1.43 +VectorParams; 1.44 + 1.45 +typedef struct 1.46 + { 1.47 + //for communicating vector results to results Thd 1.48 + int32 vector_mutex; 1.49 + int32 vector_cond; 1.50 + VectorParams *currVector; 1.51 + 1.52 + //for communicating results array back to seed (divider) Thd 1.53 + int32 results_mutex; 1.54 + int32 results_cond; 1.55 + float32 *results; 1.56 + 1.57 + //for ensuring results thd has vector lock before making vector thds 1.58 + int32 start_mutex; 1.59 + int32 start_cond; 1.60 + 1.61 + Matrix *rightMatrix; 1.62 + Matrix *resultMatrix; 1.63 + } 1.64 +MatrixMultGlobals; 1.65 + 1.66 + 1.67 +//============================= Processor Functions ========================= 1.68 +void divideIntoVectors( void *data, VirtProcr *animatingPr ); 1.69 +void calcVector( void *data, VirtProcr *animatingPr ); 1.70 +void gatherResults( void *data, VirtProcr *animatingPr ); 1.71 + 1.72 + 1.73 +//================================ Entry Point ============================== 1.74 +Matrix * 1.75 +multiplyTheseMatrices( Matrix *leftMatrix, Matrix *rightMatrix ); 1.76 + 1.77 + 1.78 +#endif /*_VPThread__MATRIX_MULT_H_*/
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/src/Application/CILK__Matrix_Mult/Divide_Pr.c Mon Sep 20 17:11:58 2010 -0700 2.3 @@ -0,0 +1,116 @@ 2.4 +/* 2.5 + * Copyright 2009 OpenSourceStewardshipFoundation.org 2.6 + * Licensed under GNU General Public License version 2 2.7 + * 2.8 + * Author: seanhalle@yahoo.com 2.9 + * 2.10 + */ 2.11 + 2.12 + 2.13 +#include "VPThread__Matrix_Mult.h" 2.14 + 2.15 +/*Divider creates one processor for every row-col pair. 2.16 + * It hands them: 2.17 + * the name of the result processor that they should send their results to, 2.18 + * the left and right matrices, and the row and col they should multiply 2.19 + * the length of the vector 2.20 + * It first creates the result processor, then all the vector processors, 2.21 + * then does a receive of a message from the result processor that gives 2.22 + * the divider ownership of the result matrix. 2.23 + * Finally, the divider returns the result matrix out of the VPThread system. 2.24 + */ 2.25 +void divideIntoVectors( void *_dividerParams, VirtProcr *animatingThd ) 2.26 + { VirtProcr *resultsThd; 2.27 + DividerParams *dividerParams; 2.28 + ResultsParams *resultsParams; 2.29 + VectorParams *vectParams; 2.30 + Matrix *leftMatrix, *rightMatrix, *resultMatrix; 2.31 + void *msg; 2.32 + MatrixMultGlobals *globals; 2.33 + 2.34 +// printf("start divide\n"); fflush(stdin); 2.35 + 2.36 + dividerParams = (DividerParams *)_dividerParams; 2.37 + 2.38 + leftMatrix = dividerParams->leftMatrix; 2.39 + rightMatrix = dividerParams->rightMatrix; 2.40 + 2.41 + resultsParams = malloc( sizeof(ResultsParams) ); 2.42 + resultsParams->dividerThd = animatingThd; 2.43 + resultsParams->numCols = rightMatrix->numCols; 2.44 + resultsParams->numRows = leftMatrix->numRows; 2.45 + 2.46 + //=========== Set up global vars, including conds and mutexes =========== 2.47 + globals = malloc( sizeof(MatrixMultGlobals) ); 2.48 + VPThread__set_globals_to( globals ); 2.49 + 2.50 + globals->results_mutex = VPThread__make_mutex( animatingThd ); 2.51 + globals->results_cond = VPThread__make_cond( globals->results_mutex, 2.52 + animatingThd ); 2.53 + 2.54 + globals->vector_mutex = VPThread__make_mutex( animatingThd ); 2.55 + globals->vector_cond = VPThread__make_cond( globals->vector_mutex, 2.56 + animatingThd ); 2.57 + 2.58 + globals->start_mutex = VPThread__make_mutex( animatingThd ); 2.59 + globals->start_cond = VPThread__make_cond( globals->start_mutex, 2.60 + animatingThd ); 2.61 + //======================================================================== 2.62 + 2.63 + //get results-comm lock before create results-thd, to ensure it can't 2.64 + // signal that results are available before this thd is waiting on cond 2.65 + VPThread__mutex_lock( globals->results_mutex, animatingThd ); 2.66 + 2.67 + //also get the start lock & use to ensure no vector threads send a 2.68 + // signal before the results thread is waiting on vector cond 2.69 + VPThread__mutex_lock( globals->start_mutex, animatingThd ); 2.70 + 2.71 + 2.72 + VPThread__create_thread( &gatherResults, resultsParams, animatingThd ); 2.73 + 2.74 + //Now wait for results thd to signal that it has vector lock 2.75 + VPThread__cond_wait( globals->start_cond, animatingThd ); 2.76 + VPThread__mutex_unlock( globals->start_mutex, animatingThd );//done w/lock 2.77 + 2.78 + 2.79 + //make the vector thds 2.80 + int row, col; 2.81 + for( row = 0; row < leftMatrix->numRows; row++ ) 2.82 + { for( col = 0; col < rightMatrix->numCols; col++ ) 2.83 + { 2.84 + vectParams = malloc( sizeof(VectorParams) ); 2.85 + vectParams->myCol = col; 2.86 + vectParams->myRow = row; 2.87 + vectParams->vectLength = leftMatrix->numCols; 2.88 + vectParams->leftMatrix = leftMatrix; 2.89 + vectParams->rightMatrix = rightMatrix; 2.90 + 2.91 + VPThread__create_thread( &calcVector, vectParams, animatingThd ); 2.92 + } 2.93 + //=================== DEBUG =================== 2.94 + #ifdef PRINT_DEBUG_1 2.95 + printf("created thread: %d, %d\n", row, col); 2.96 + #endif 2.97 + //============================================== 2.98 + } 2.99 + 2.100 + //Wait for results thread to say results are good 2.101 + VPThread__cond_wait( globals->results_cond, animatingThd ); 2.102 + 2.103 + //The results of the all the work have to be linked-to from the data 2.104 + // struc given to the seed procr -- this divide func is animated by 2.105 + // that seed procr, so have to link results to the _dividerParams. 2.106 + resultMatrix = malloc( sizeof(Matrix) ); 2.107 + resultMatrix->numCols = rightMatrix->numCols; 2.108 + resultMatrix->numRows = leftMatrix->numRows; 2.109 + dividerParams->resultMatrix = resultMatrix; 2.110 + resultMatrix->matrix = globals->results; 2.111 + 2.112 + //done with communication, release lock 2.113 + VPThread__mutex_unlock( globals->results_mutex, animatingThd ); 2.114 + 2.115 + 2.116 + VPThread__dissipate_thread( animatingThd ); //all Thds dissipate when done 2.117 + //when all of the threads have dissipated, the "create seed and do 2.118 + // work" call in the entry point function returns 2.119 + }
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/src/Application/CILK__Matrix_Mult/EntryPoint.c Mon Sep 20 17:11:58 2010 -0700 3.3 @@ -0,0 +1,48 @@ 3.4 +/* 3.5 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 3.6 + * Licensed under GNU General Public License version 2 3.7 + * 3.8 + * Author: seanhalle@yahoo.com 3.9 + * 3.10 + */ 3.11 + 3.12 +#include <math.h> 3.13 + 3.14 +#include "VPThread__Matrix_Mult.h" 3.15 + 3.16 + 3.17 + 3.18 +/*Every VPThread system has an "entry point" function that creates the first 3.19 + * processor, which starts the chain of creating more processors.. 3.20 + * eventually all of the processors will dissipate themselves, and 3.21 + * return. 3.22 + * 3.23 + *This entry-point function follows the same pattern as all entry-point 3.24 + * functions do: 3.25 + *1) it creates the params for the seed processor, from the 3.26 + * parameters passed into the entry-point function 3.27 + *2) it calls VPThread__create_seed_procr_and_do_work 3.28 + *3) it gets the return value from the params struc, frees the params struc, 3.29 + * and returns the value from the function 3.30 + * 3.31 + */ 3.32 +Matrix * 3.33 +multiplyTheseMatrices( Matrix *leftMatrix, Matrix *rightMatrix ) 3.34 + { Matrix *resMatrix; 3.35 + DividerParams *dividerParams; 3.36 + 3.37 + 3.38 + dividerParams = malloc( sizeof( DividerParams ) ); 3.39 + dividerParams->leftMatrix = leftMatrix; 3.40 + dividerParams->rightMatrix = rightMatrix; 3.41 + 3.42 + 3.43 + //create divider processor, start doing the work, and wait till done 3.44 + //This function is the "border crossing" between normal code and VPThread 3.45 + VPThread__create_seed_procr_and_do_work( ÷IntoVectors, dividerParams ); 3.46 + 3.47 + //get result matrix and return it 3.48 + resMatrix = dividerParams->resultMatrix; 3.49 + free( dividerParams ); 3.50 + return resMatrix; 3.51 + }
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/src/Application/CILK__Matrix_Mult/Result_Pr.c Mon Sep 20 17:11:58 2010 -0700 4.3 @@ -0,0 +1,89 @@ 4.4 +/* 4.5 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 4.6 + * Licensed under GNU General Public License version 2 4.7 + * 4.8 + * Author: seanhalle@yahoo.com 4.9 + * 4.10 + */ 4.11 + 4.12 +#include "VPThread__Matrix_Mult.h" 4.13 + 4.14 +/*The Result Processor gets a message from each of the vector processors, 4.15 + * puts the result from the message in its location in the result- 4.16 + * matrix, and increments the count of results. 4.17 + * 4.18 + *After the count reaches the point that all results have been received, it 4.19 + * returns the result matrix and dissipates. 4.20 + */ 4.21 +void gatherResults( void *_params, VirtProcr *animatingPr ) 4.22 + { VirtProcr *dividerPr; 4.23 + ResultsParams *params; 4.24 + int numRows, numCols, numCells, count=0; 4.25 + float32 *resultMatrixArray; 4.26 + void *msg; 4.27 + VectorParams *aResult; 4.28 + MatrixMultGlobals *globals =(MatrixMultGlobals *)VPThread__give_globals(); 4.29 + 4.30 + 4.31 + //get vector-comm lock before loop, so that this thd keeps lock after 4.32 + // one wait until it enters the next wait -- forces see-saw btwn 4.33 + // waiters and signalers -- wait-signal-wait-signal-... 4.34 + VPThread__mutex_lock( globals->vector_mutex, animatingPr ); 4.35 + 4.36 + //Tell divider that have the vector lock -- so it's sure won't miss any 4.37 + // signals from the vector-threads it's about to create 4.38 + //Don't need a signal variable -- this thd can't be created until 4.39 + // divider thd already has the start lock 4.40 + VPThread__mutex_lock( globals->start_mutex, animatingPr );//finish wait 4.41 + VPThread__cond_signal( globals->start_cond, animatingPr ); 4.42 + VPThread__mutex_unlock( globals->start_mutex, animatingPr );//finish wait 4.43 + 4.44 + //===================== DEBUG ====================== 4.45 + #ifdef PRINT_DEBUG 4.46 + printf("**Result Pr has the lock**\n" ); 4.47 + fflush(stdin); 4.48 + #endif 4.49 + //==================================================== 4.50 + 4.51 + params = (ResultsParams *)_params; 4.52 + dividerPr = params->dividerThd; 4.53 + numCols = params->numCols; 4.54 + numRows = params->numRows; 4.55 + numCells = numRows * numCols; 4.56 + 4.57 + resultMatrixArray = malloc( numCells * sizeof( float32 ) ); 4.58 + 4.59 + 4.60 + while( count < numCells ) 4.61 + { 4.62 + //receive a vector-result from a vector-thread 4.63 + VPThread__cond_wait( globals->vector_cond, animatingPr ); 4.64 + 4.65 + aResult = globals->currVector; 4.66 + *(resultMatrixArray + aResult->myRow * numCols + aResult->myCol) = 4.67 + aResult->result; 4.68 + count++; 4.69 + //===================== DEBUG ====================== 4.70 + #ifdef PRINT_DEBUG_1 4.71 + if( count - count/numRows * numRows == 0 ) 4.72 + { printf("%d vector result: %f\n", count, aResult->result ); 4.73 + fflush(stdin); 4.74 + } 4.75 + #endif 4.76 + //==================================================== 4.77 + 4.78 + } 4.79 + //all comms done, release lock 4.80 + VPThread__mutex_unlock( globals->vector_mutex, animatingPr ); 4.81 + 4.82 + //Send result to divider (seed) thread 4.83 + // note, divider thd had to hold the results-comm lock before creating 4.84 + // this thread, to be sure no race 4.85 + VPThread__mutex_lock( globals->results_mutex, animatingPr ); 4.86 + globals->results = resultMatrixArray; 4.87 + VPThread__cond_signal( globals->results_cond, animatingPr ); 4.88 + VPThread__mutex_unlock( globals->results_mutex, animatingPr ); //releases 4.89 + //divider thread from its wait, at point this executes 4.90 + 4.91 + VPThread__dissipate_thread( animatingPr ); //frees any data owned by procr 4.92 + }
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/src/Application/CILK__Matrix_Mult/Vector_Pr.c Mon Sep 20 17:11:58 2010 -0700 5.3 @@ -0,0 +1,59 @@ 5.4 +/* 5.5 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 5.6 + * Licensed under GNU General Public License version 2 5.7 + * 5.8 + * Author: SeanHalle@yahoo.com 5.9 + * 5.10 + */ 5.11 + 5.12 +#include "VPThread__Matrix_Mult.h" 5.13 + 5.14 +/*A Vector processor is created with an environment that holds two matrices, 5.15 + * the row and col that it owns, and the name of a result gathering 5.16 + * processor. 5.17 + *It calculates its vector product then sends the result to the result 5.18 + * processor, which puts it into the result matrix and returns that matrix 5.19 + * when all is done. 5.20 + */ 5.21 + void 5.22 +calcVector( void *data, VirtProcr *animatingPr ) 5.23 + { 5.24 + VectorParams *params; 5.25 + VirtProcr *resultPr; 5.26 + int myRow, myCol, vectLength, pos; 5.27 + float32 *leftMatrixArray, *rightMatrixArray, result = 0.0; 5.28 + Matrix *leftMatrix, *rightMatrix; 5.29 + MatrixMultGlobals *globals =(MatrixMultGlobals *)VPThread__give_globals(); 5.30 + 5.31 + params = (VectorParams *)data; 5.32 + myCol = params->myCol; 5.33 + myRow = params->myRow; 5.34 + vectLength = params->vectLength; 5.35 + leftMatrix = params->leftMatrix; 5.36 + rightMatrix = params->rightMatrix; 5.37 + leftMatrixArray = leftMatrix->matrix; 5.38 + rightMatrixArray = rightMatrix->matrix; 5.39 + //===================== DEBUG ====================== 5.40 + #ifdef PRINT_DEBUG_1 5.41 + if( myCol == 0 ) 5.42 + printf("start vector: %d, %d\n", myRow, myCol ); fflush(stdin); 5.43 + #endif 5.44 + //==================================================== 5.45 + 5.46 + for( pos = 0; pos < vectLength; pos++ ) 5.47 + { 5.48 + result += *(leftMatrixArray + myRow * vectLength + pos) * 5.49 + *(rightMatrixArray + pos * vectLength + myCol); 5.50 + } 5.51 + params->result = result; 5.52 + 5.53 + //Send result to results thread 5.54 + VPThread__mutex_lock( globals->vector_mutex, animatingPr );//only get 5.55 + //the lock when results thd is inside wait. 5.56 + globals->currVector = params; 5.57 + VPThread__cond_signal( globals->vector_cond, animatingPr ); 5.58 + VPThread__mutex_unlock( globals->vector_mutex, animatingPr );//release 5.59 + //wait-er -- cond_signal implemented such that wait-er gets lock, no other 5.60 + 5.61 + VPThread__dissipate_thread( animatingPr ); 5.62 + }
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/src/Application/CILK__Matrix_Mult/matmul.cilk Mon Sep 20 17:11:58 2010 -0700 6.3 @@ -0,0 +1,198 @@ 6.4 +/* 6.5 + * Rectangular matrix multiplication. 6.6 + * 6.7 + * See the paper ``Cache-Oblivious Algorithms'', by 6.8 + * Matteo Frigo, Charles E. Leiserson, Harald Prokop, and 6.9 + * Sridhar Ramachandran, FOCS 1999, for an explanation of 6.10 + * why this algorithm is good for caches. 6.11 + * 6.12 + * Author: Matteo Frigo 6.13 + */ 6.14 +static const char *ident __attribute__((__unused__)) 6.15 + = "$HeadURL: https://bradley.csail.mit.edu/svn/repos/cilk/5.4.3/examples/matmul.cilk $ $LastChangedBy: sukhaj $ $Rev: 517 $ $Date: 2003-10-27 10:05:37 -0500 (Mon, 27 Oct 2003) $"; 6.16 + 6.17 +/* 6.18 + * Copyright (c) 2003 Massachusetts Institute of Technology 6.19 + * 6.20 + * This program is free software; you can redistribute it and/or modify 6.21 + * it under the terms of the GNU General Public License as published by 6.22 + * the Free Software Foundation; either version 2 of the License, or 6.23 + * (at your option) any later version. 6.24 + * 6.25 + * This program is distributed in the hope that it will be useful, 6.26 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 6.27 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 6.28 + * GNU General Public License for more details. 6.29 + * 6.30 + * You should have received a copy of the GNU General Public License 6.31 + * along with this program; if not, write to the Free Software 6.32 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 6.33 + * 6.34 + */ 6.35 + 6.36 +#include <cilk-lib.cilkh> 6.37 +#include <stdio.h> 6.38 +#include <stdlib.h> 6.39 +#include <math.h> 6.40 + 6.41 +#define REAL float 6.42 + 6.43 +extern int Cilk_rand(void); 6.44 + 6.45 +void zero(REAL *A, int n) 6.46 +{ 6.47 + int i, j; 6.48 + 6.49 + for (i = 0; i < n; i++) { 6.50 + for (j = 0; j < n; j++) { 6.51 + A[i * n + j] = 0.0; 6.52 + } 6.53 + } 6.54 +} 6.55 + 6.56 +void init(REAL *A, int n) 6.57 +{ 6.58 + int i, j; 6.59 + 6.60 + for (i = 0; i < n; i++) { 6.61 + for (j = 0; j < n; j++) { 6.62 + A[i * n + j] = (double)Cilk_rand(); 6.63 + } 6.64 + } 6.65 +} 6.66 + 6.67 +double maxerror(REAL *A, REAL *B, int n) 6.68 +{ 6.69 + int i, j; 6.70 + double error = 0.0; 6.71 + 6.72 + for (i = 0; i < n; i++) { 6.73 + for (j = 0; j < n; j++) { 6.74 + double diff = (A[i * n + j] - B[i * n + j]) / A[i * n + j]; 6.75 + if (diff < 0) 6.76 + diff = -diff; 6.77 + if (diff > error) 6.78 + error = diff; 6.79 + } 6.80 + } 6.81 + return error; 6.82 +} 6.83 + 6.84 +void iter_matmul(REAL *A, REAL *B, REAL *C, int n) 6.85 +{ 6.86 + int i, j, k; 6.87 + 6.88 + for (i = 0; i < n; i++) 6.89 + for (k = 0; k < n; k++) { 6.90 + REAL c = 0.0; 6.91 + for (j = 0; j < n; j++) 6.92 + c += A[i * n + j] * B[j * n + k]; 6.93 + C[i * n + k] = c; 6.94 + } 6.95 +} 6.96 + 6.97 +/* 6.98 + * A \in M(m, n) 6.99 + * B \in M(n, p) 6.100 + * C \in M(m, p) 6.101 + */ 6.102 +cilk void rec_matmul(REAL *A, REAL *B, REAL *C, int m, int n, int p, int ld, 6.103 + int add) 6.104 +{ 6.105 + if ((m + n + p) <= 64) { 6.106 + int i, j, k; 6.107 + /* base case */ 6.108 + if (add) { 6.109 + for (i = 0; i < m; i++) 6.110 + for (k = 0; k < p; k++) { 6.111 + REAL c = 0.0; 6.112 + for (j = 0; j < n; j++) 6.113 + c += A[i * ld + j] * B[j * ld + k]; 6.114 + C[i * ld + k] += c; 6.115 + } 6.116 + } else { 6.117 + for (i = 0; i < m; i++) 6.118 + for (k = 0; k < p; k++) { 6.119 + REAL c = 0.0; 6.120 + for (j = 0; j < n; j++) 6.121 + c += A[i * ld + j] * B[j * ld + k]; 6.122 + C[i * ld + k] = c; 6.123 + } 6.124 + } 6.125 + } else if (m >= n && n >= p) { 6.126 + int m1 = m >> 1; 6.127 + spawn rec_matmul(A, B, C, m1, n, p, ld, add); 6.128 + spawn rec_matmul(A + m1 * ld, B, C + m1 * ld, m - m1, 6.129 + n, p, ld, add); 6.130 + } else if (n >= m && n >= p) { 6.131 + int n1 = n >> 1; 6.132 + spawn rec_matmul(A, B, C, m, n1, p, ld, add); 6.133 + sync; 6.134 + spawn rec_matmul(A + n1, B + n1 * ld, C, m, n - n1, p, ld, 1); 6.135 + } else { 6.136 + int p1 = p >> 1; 6.137 + spawn rec_matmul(A, B, C, m, n, p1, ld, add); 6.138 + spawn rec_matmul(A, B + p1, C + p1, m, n, p - p1, ld, add); 6.139 + } 6.140 +} 6.141 + 6.142 +cilk int main(int argc, char *argv[]) 6.143 +{ 6.144 + int n; 6.145 + REAL *A, *B, *C1, *C2; 6.146 + double err; 6.147 + Cilk_time tm_begin, tm_elapsed; 6.148 + Cilk_time wk_begin, wk_elapsed; 6.149 + Cilk_time cp_begin, cp_elapsed; 6.150 + 6.151 + if (argc != 2) { 6.152 + fprintf(stderr, "Usage: matmul [<cilk options>] <n>\n"); 6.153 + Cilk_exit(1); 6.154 + } 6.155 + n = atoi(argv[1]); 6.156 + 6.157 + A = malloc(n * n * sizeof(REAL)); 6.158 + B = malloc(n * n * sizeof(REAL)); 6.159 + C1 = malloc(n * n * sizeof(REAL)); 6.160 + C2 = malloc(n * n * sizeof(REAL)); 6.161 + 6.162 + init(A, n); 6.163 + init(B, n); 6.164 + zero(C1, n); 6.165 + zero(C2, n); 6.166 + 6.167 + iter_matmul(A, B, C1, n); 6.168 + 6.169 + /* Timing. "Start" timers */ 6.170 + sync; 6.171 + cp_begin = Cilk_user_critical_path; 6.172 + wk_begin = Cilk_user_work; 6.173 + tm_begin = Cilk_get_wall_time(); 6.174 + 6.175 + spawn rec_matmul(A, B, C2, n, n, n, n, 0); 6.176 + sync; 6.177 + 6.178 + /* Timing. "Stop" timers */ 6.179 + tm_elapsed = Cilk_get_wall_time() - tm_begin; 6.180 + wk_elapsed = Cilk_user_work - wk_begin; 6.181 + cp_elapsed = Cilk_user_critical_path - cp_begin; 6.182 + 6.183 + err = maxerror(C1, C2, n); 6.184 + 6.185 + printf("\nCilk Example: matmul\n"); 6.186 + printf(" running on %d processor%s\n\n", 6.187 + Cilk_active_size, Cilk_active_size > 1 ? "s" : ""); 6.188 + printf("Max error = %g\n", err); 6.189 + printf("Options: size = %d\n", n); 6.190 + printf("Running time = %4f s\n", Cilk_wall_time_to_sec(tm_elapsed)); 6.191 + printf("Work = %4f s\n", Cilk_time_to_sec(wk_elapsed)); 6.192 + printf("Critical path = %4f s\n", Cilk_time_to_sec(cp_elapsed)); 6.193 + printf("``MFLOPS'' = %4f\n\n", 6.194 + 2.0 * n * n * n / (1.0e6 * Cilk_wall_time_to_sec(tm_elapsed))); 6.195 + 6.196 + free(C2); 6.197 + free(C1); 6.198 + free(B); 6.199 + free(A); 6.200 + return 0; 6.201 +}
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/src/Application/Matrix_Mult.c Mon Sep 20 17:11:58 2010 -0700 7.3 @@ -0,0 +1,165 @@ 7.4 +/* 7.5 + * Copyright 2009 OpenSourceCodeStewardshipFoundation.org 7.6 + * Licensed under GNU General Public License version 2 7.7 + * 7.8 + * Author: seanhalle@yahoo.com 7.9 + * 7.10 + * Created on November 15, 2009, 2:35 AM 7.11 + */ 7.12 + 7.13 +#include <malloc.h> 7.14 +#include <stdlib.h> 7.15 + 7.16 +#include "Matrix_Mult.h" 7.17 +#include "ParamHelper/Param.h" 7.18 + 7.19 + 7.20 + 7.21 + void 7.22 +initialize_Input_Matrices_Via( Matrix **leftMatrix, Matrix **rightMatrix, 7.23 + ParamBag *paramBag ) 7.24 + { char *leftMatrixFileName, *rightMatrixFileName; 7.25 + int leftMatrixRows, leftMatrixCols, rightMatrixRows, rightMatrixCols; 7.26 + 7.27 + ParamStruc *param; 7.28 + param = getParamFromBag( "leftMatrixRows", paramBag ); 7.29 + leftMatrixRows = param->intValue; 7.30 + param = getParamFromBag( "leftMatrixCols", paramBag ); 7.31 + leftMatrixCols = param->intValue; 7.32 + *leftMatrix = makeMatrix_WithResMat( leftMatrixRows, leftMatrixCols ); 7.33 + 7.34 + param = getParamFromBag( "leftMatrixFileName", paramBag ); 7.35 + leftMatrixFileName = param->strValue; //no need to copy 7.36 + read_Matrix_From_File( *leftMatrix, leftMatrixFileName ); 7.37 + 7.38 + param = getParamFromBag( "rightMatrixRows", paramBag ); 7.39 + rightMatrixRows = param->intValue; 7.40 + param = getParamFromBag( "rightMatrixCols", paramBag ); 7.41 + rightMatrixCols = param->intValue; 7.42 + *rightMatrix = makeMatrix_WithResMat( rightMatrixRows, rightMatrixCols ); 7.43 + 7.44 + param = getParamFromBag( "rightMatrixFileName", paramBag ); 7.45 + rightMatrixFileName = param->strValue; 7.46 + read_Matrix_From_File( *rightMatrix, rightMatrixFileName ); 7.47 + } 7.48 + 7.49 + 7.50 +void parseLineIntoRow( char *line, float32* row ); 7.51 + 7.52 + 7.53 + void 7.54 +read_Matrix_From_File( Matrix *matrixStruc, char *matrixFileName ) 7.55 + { int row, maxRead, numRows, numCols; 7.56 + float32 *matrixStart; 7.57 + size_t lineSz = 0; 7.58 + FILE *file; 7.59 + char *line = NULL; 7.60 + 7.61 + lineSz = 50000; //max length of line in a matrix data file 7.62 + line = (char *) malloc( lineSz ); 7.63 + if( line == NULL ) printf( "no mem for matrix line" ); 7.64 + 7.65 + numRows = matrixStruc->numRows; 7.66 + numCols = matrixStruc->numCols; 7.67 + matrixStart = matrixStruc->matrix; 7.68 + 7.69 + file = fopen( matrixFileName, "r" ); 7.70 + if( file == NULL ) { printf( "\nCouldn't open file!!\n"); exit(1);} 7.71 + fseek( file, 0, SEEK_SET ); 7.72 + for( row = 0; row < numRows; row++ ) 7.73 + { 7.74 + if( feof( file ) ) printf( "file ran out too soon" ); 7.75 + maxRead = getline( &line, &lineSz, file ); 7.76 + if( maxRead == -1 ) printf( "prob reading mat line"); 7.77 + 7.78 + if( *line == '\n') continue; //blank line 7.79 + if( *line == '/' ) continue; //comment line 7.80 + 7.81 + parseLineIntoRow( line, matrixStart + row * numCols ); 7.82 + } 7.83 + free( line ); 7.84 + } 7.85 + 7.86 +/*This function relies on each line having the proper number of cols. It 7.87 + * doesn't check, nor enforce, so if the file is improperly formatted it 7.88 + * can write over unrelated memory 7.89 + */ 7.90 + void 7.91 +parseLineIntoRow( char *line, float32* row ) 7.92 + { 7.93 + char *valueStr, *searchPos; 7.94 + 7.95 + //read the float values 7.96 + searchPos = valueStr = line; //start 7.97 + 7.98 + for( ; *searchPos != 0; searchPos++) //bit dangerous, should use buff len 7.99 + { 7.100 + if( *searchPos == '\n' ) //last col.. relying on well-formatted file 7.101 + { *searchPos = 0; 7.102 + *row = atof( valueStr ); 7.103 + break; //end FOR loop 7.104 + } 7.105 + if( *searchPos == ',' ) 7.106 + { *searchPos = 0; //mark end of string 7.107 + *row = (float32) atof( valueStr ); 7.108 + row += 1; //address arith 7.109 + //skip any spaces before digits.. use searchPos + 1 to skip the 0 7.110 + for( ; *(searchPos + 1)== ' ' && *(searchPos + 1) !=0; searchPos++); 7.111 + valueStr = searchPos + 1; 7.112 + } 7.113 + } 7.114 + } 7.115 + 7.116 + //========================================================================== 7.117 + 7.118 +/*In the "_Flat" version of constructor, do only malloc of the top data struc 7.119 + * and set values in that top-level. Don't malloc any sub-structures. 7.120 + */ 7.121 + Matrix * 7.122 +makeMatrix_Flat( int32 numRows, int32 numCols ) 7.123 + { Matrix * retMatrix; 7.124 + retMatrix = malloc( sizeof( Matrix ) ); 7.125 + retMatrix->numRows = numRows; 7.126 + retMatrix->numCols = numCols; 7.127 + 7.128 + return retMatrix; 7.129 + } 7.130 + 7.131 + Matrix * 7.132 +makeMatrix_WithResMat( int32 numRows, int32 numCols ) 7.133 + { Matrix * retMatrix; 7.134 + retMatrix = malloc( sizeof( Matrix ) ); 7.135 + retMatrix->numRows = numRows; 7.136 + retMatrix->numCols = numCols; 7.137 + retMatrix->matrix = malloc( numRows * numCols * sizeof(float32) ); 7.138 + 7.139 + return retMatrix; 7.140 + } 7.141 + 7.142 + void 7.143 +freeMatrix_Flat( Matrix * matrix ) 7.144 + { //( matrix ); 7.145 + } 7.146 + void 7.147 +freeMatrix( Matrix * matrix ) 7.148 + { free( matrix->matrix ); 7.149 + free( matrix ); 7.150 + } 7.151 + 7.152 +void 7.153 +printMatrix( Matrix *matrix ) 7.154 + { int r, c, numRows, numCols; 7.155 + float32 *matrixArray; 7.156 + 7.157 + numRows = matrix->numRows; 7.158 + numCols = matrix->numCols; 7.159 + matrixArray = matrix->matrix; 7.160 + 7.161 + for( r = 0; r < numRows; r++ ) 7.162 + { for( c = 0; c < numCols; c++ ) 7.163 + { printf( "%f | ", *(matrixArray + r*numCols + c) ); 7.164 + } 7.165 + printf("\n"); 7.166 + } 7.167 + } 7.168 +
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/src/Application/Matrix_Mult.h Mon Sep 20 17:11:58 2010 -0700 8.3 @@ -0,0 +1,77 @@ 8.4 +/* 8.5 + * Copyright Oct 24, 2009 OpenSourceCodeStewardshipFoundation.org 8.6 + * Licensed under GNU General Public License version 2 8.7 + */ 8.8 + 8.9 +#ifndef MATRIX_MULT_H_ 8.10 +#define MATRIX_MULT_H_ 8.11 + 8.12 +#include <stdio.h> 8.13 +#include <unistd.h> 8.14 +#include <malloc.h> 8.15 + 8.16 +#include "../VPThread__lib/VMS/VMS_primitive_data_types.h" 8.17 +#include "ParamHelper/Param.h" 8.18 + 8.19 +//============================== Structures ============================== 8.20 + 8.21 +typedef 8.22 +struct 8.23 + { int32 numRows; 8.24 + int32 numCols; 8.25 + float32 *matrix; //2D, but dynamically sized, so use addr arith 8.26 + } 8.27 +Matrix; 8.28 + 8.29 +/* This is the "appSpecificPiece" that is carried inside a DKUPiece. 8.30 + * In the DKUPiece data struc it is declared to be of type "void *". This 8.31 + * allows the application to define any data structure it wants and put it 8.32 + * into a DKUPiece. 8.33 + * When the app specific info is used, it is in app code, so it is cast to 8.34 + * the correct type to tell the compiler how to access fields. 8.35 + * This keeps all app-specific things out of the DKU directory, as per the 8.36 + * DKU standard. */ 8.37 +typedef 8.38 +struct 8.39 + { 8.40 + // pointers to shared data.. the result matrix must be created when the 8.41 + // left and right matrices are put into the root ancestor DKUPiece. 8.42 + Matrix * leftMatrix; 8.43 + Matrix * rightMatrix; 8.44 + Matrix * resultMatrix; 8.45 + 8.46 + // define the starting and ending boundaries for this piece of the 8.47 + // result matrix. These are derivable from the left and right 8.48 + // matrices, but included them for readability of code. 8.49 + int prodStartRow, prodEndRow; 8.50 + int prodStartCol, prodEndCol; 8.51 + // Start and end of the portion of the left matrix that contributes to 8.52 + // this piece of the product 8.53 + int leftStartRow, leftEndRow; 8.54 + int leftStartCol, leftEndCol; 8.55 + // Start and end of the portion of the right matrix that contributes to 8.56 + // this piece of the product 8.57 + int rightStartRow, rightEndRow; 8.58 + int rightStartCol, rightEndCol; 8.59 + } 8.60 +MatrixProdPiece; 8.61 + 8.62 +//============================== Functions ================================ 8.63 +void readFile(); 8.64 + 8.65 +Matrix *makeMatrix( int32 numRows, int32 numCols ); 8.66 +Matrix *makeMatrix_Flat( int32 numRows, int32 numCols ); 8.67 +Matrix *makeMatrix_WithResMat( int32 numRows, int32 numCols ); 8.68 +void freeMatrix_Flat( Matrix * matrix ); 8.69 +void freeMatrix( Matrix * matrix ); 8.70 +void printMatrix( Matrix *matrix ); 8.71 + 8.72 +void read_Matrix_From_File( Matrix *matrixStruc, char *matrixFileName ); 8.73 + 8.74 +void 8.75 +initialize_Input_Matrices_Via( Matrix **leftMatrix, Matrix **rightMatrix, 8.76 + ParamBag *paramBag ); 8.77 + 8.78 +//=========================================================================== 8.79 + 8.80 +#endif /*MATRIX_MULT_H_*/
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/src/Application/main.c Mon Sep 20 17:11:58 2010 -0700 9.3 @@ -0,0 +1,35 @@ 9.4 +/* 9.5 + * Copyright Oct 24, 2009 OpenSourceCodeStewardshipFoundation.org 9.6 + * Licensed under GNU General Public License version 2 9.7 + * 9.8 + * author seanhalle@yahoo.com 9.9 + */ 9.10 + 9.11 +#include <malloc.h> 9.12 +#include <stdlib.h> 9.13 + 9.14 +#include "Matrix_Mult.h" 9.15 +#include "VPThread__Matrix_Mult/VPThread__Matrix_Mult.h" 9.16 + 9.17 +/** 9.18 + *Matrix multiply program written using VMS_HW piggy-back language 9.19 + * 9.20 + */ 9.21 +int main( int argc, char **argv ) 9.22 + { Matrix *leftMatrix, *rightMatrix, *resultMatrix; 9.23 + ParamBag *paramBag; 9.24 + 9.25 + paramBag = makeParamBag(); 9.26 + readParamFileIntoBag( argv[1], paramBag ); 9.27 + initialize_Input_Matrices_Via( &leftMatrix, &rightMatrix, paramBag ); 9.28 + 9.29 + resultMatrix = multiplyTheseMatrices( leftMatrix, rightMatrix ); 9.30 + 9.31 + printf("\nresult matrix: \n"); 9.32 + 9.33 +// printMatrix( resultMatrix ); 9.34 + 9.35 +// VPThread__print_stats(); 9.36 + 9.37 + exit(0); //cleans up 9.38 + }
