changeset 0:b17ddcc97ffc

Just created -- sample code, not CILK code yet, just skeleton
author Me
date Mon, 20 Sep 2010 17:11:58 -0700
parents
children dd5387f362f6
files src/Application/CILK__Matrix_Mult/CILK__Matrix_Mult.h src/Application/CILK__Matrix_Mult/Divide_Pr.c src/Application/CILK__Matrix_Mult/EntryPoint.c src/Application/CILK__Matrix_Mult/Result_Pr.c src/Application/CILK__Matrix_Mult/Vector_Pr.c src/Application/CILK__Matrix_Mult/matmul.cilk src/Application/Matrix_Mult.c src/Application/Matrix_Mult.h src/Application/main.c
diffstat 9 files changed, 862 insertions(+), 0 deletions(-) [+]
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/Application/CILK__Matrix_Mult/CILK__Matrix_Mult.h	Mon Sep 20 17:11:58 2010 -0700
     1.3 @@ -0,0 +1,75 @@
     1.4 +/*
     1.5 + *  Copyright Oct 24, 2009 OpenSourceCodeStewardshipFoundation.org
     1.6 + *  Licensed under GNU General Public License version 2
     1.7 + */
     1.8 +
     1.9 +#ifndef _VPThread__MATRIX_MULT_H_
    1.10 +#define _VPThread__MATRIX_MULT_H_
    1.11 +
    1.12 +#include <stdio.h>
    1.13 +
    1.14 +#include "../../VPThread__lib/VPThread.h"
    1.15 +#include "../Matrix_Mult.h"
    1.16 +
    1.17 +//==============================  Structures  ==============================
    1.18 +typedef struct
    1.19 + {
    1.20 +   Matrix *leftMatrix;
    1.21 +   Matrix *rightMatrix;
    1.22 +   Matrix *resultMatrix;
    1.23 + }
    1.24 +DividerParams;
    1.25 +
    1.26 +typedef struct
    1.27 + {
    1.28 +   VirtProcr *dividerThd;
    1.29 +   int numRows;
    1.30 +   int numCols;
    1.31 + }
    1.32 +ResultsParams;
    1.33 +
    1.34 +typedef struct
    1.35 + { VirtProcr *resultsThd;
    1.36 +   int        myCol;
    1.37 +   int        myRow;
    1.38 +   int        vectLength;
    1.39 +   Matrix    *leftMatrix;
    1.40 +   Matrix    *rightMatrix;
    1.41 +   float32    result;
    1.42 + }
    1.43 +VectorParams;
    1.44 +
    1.45 +typedef struct
    1.46 + {
    1.47 +      //for communicating vector results to results Thd
    1.48 +   int32         vector_mutex;
    1.49 +   int32         vector_cond;
    1.50 +   VectorParams *currVector;
    1.51 +
    1.52 +      //for communicating results array back to seed (divider) Thd
    1.53 +   int32         results_mutex;
    1.54 +   int32         results_cond;
    1.55 +   float32      *results;
    1.56 +
    1.57 +      //for ensuring results thd has vector lock before making vector thds
    1.58 +   int32         start_mutex;
    1.59 +   int32         start_cond;
    1.60 +
    1.61 +   Matrix *rightMatrix;
    1.62 +   Matrix *resultMatrix;
    1.63 + }
    1.64 +MatrixMultGlobals;
    1.65 +
    1.66 +
    1.67 +//============================= Processor Functions =========================
    1.68 +void divideIntoVectors( void *data, VirtProcr *animatingPr );
    1.69 +void calcVector(        void *data, VirtProcr *animatingPr );
    1.70 +void gatherResults(     void *data, VirtProcr *animatingPr );
    1.71 +
    1.72 +
    1.73 +//================================ Entry Point ==============================
    1.74 +Matrix *
    1.75 +multiplyTheseMatrices( Matrix *leftMatrix, Matrix *rightMatrix );
    1.76 +
    1.77 +
    1.78 +#endif /*_VPThread__MATRIX_MULT_H_*/
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/src/Application/CILK__Matrix_Mult/Divide_Pr.c	Mon Sep 20 17:11:58 2010 -0700
     2.3 @@ -0,0 +1,116 @@
     2.4 +/*
     2.5 + *  Copyright 2009 OpenSourceStewardshipFoundation.org
     2.6 + *  Licensed under GNU General Public License version 2
     2.7 + *
     2.8 + * Author: seanhalle@yahoo.com
     2.9 + *
    2.10 + */
    2.11 +
    2.12 +
    2.13 +#include "VPThread__Matrix_Mult.h"
    2.14 +
    2.15 +/*Divider creates one processor for every row-col pair.
    2.16 + * It hands them:
    2.17 + *  the name of the result processor that they should send their results to,
    2.18 + *  the left and right matrices, and the row and col they should multiply
    2.19 + *  the length of the vector
    2.20 + * It first creates the result processor, then all the vector processors,
    2.21 + *  then does a receive of a message from the result processor that gives
    2.22 + *  the divider ownership of the result matrix.
    2.23 + * Finally, the divider returns the result matrix out of the VPThread system.
    2.24 + */
    2.25 +void divideIntoVectors( void *_dividerParams, VirtProcr *animatingThd )
    2.26 + { VirtProcr         *resultsThd;
    2.27 +   DividerParams     *dividerParams;
    2.28 +   ResultsParams     *resultsParams;
    2.29 +   VectorParams      *vectParams;
    2.30 +   Matrix            *leftMatrix, *rightMatrix, *resultMatrix;
    2.31 +   void              *msg;
    2.32 +   MatrixMultGlobals *globals;
    2.33 +
    2.34 +//   printf("start divide\n"); fflush(stdin);
    2.35 +   
    2.36 +   dividerParams   = (DividerParams *)_dividerParams;
    2.37 +   
    2.38 +   leftMatrix      = dividerParams->leftMatrix;
    2.39 +   rightMatrix     = dividerParams->rightMatrix;
    2.40 +
    2.41 +   resultsParams             = malloc( sizeof(ResultsParams) );
    2.42 +   resultsParams->dividerThd = animatingThd;
    2.43 +   resultsParams->numCols    = rightMatrix->numCols;
    2.44 +   resultsParams->numRows    = leftMatrix->numRows;
    2.45 +
    2.46 +   //===========  Set up global vars, including conds and mutexes ===========
    2.47 +   globals = malloc( sizeof(MatrixMultGlobals) );
    2.48 +   VPThread__set_globals_to( globals );
    2.49 +
    2.50 +   globals->results_mutex = VPThread__make_mutex( animatingThd );
    2.51 +   globals->results_cond  = VPThread__make_cond( globals->results_mutex,
    2.52 +                                                              animatingThd );
    2.53 +
    2.54 +   globals->vector_mutex = VPThread__make_mutex( animatingThd );
    2.55 +   globals->vector_cond  = VPThread__make_cond( globals->vector_mutex,
    2.56 +                                                              animatingThd );
    2.57 +
    2.58 +   globals->start_mutex = VPThread__make_mutex( animatingThd );
    2.59 +   globals->start_cond  = VPThread__make_cond( globals->start_mutex,
    2.60 +                                                              animatingThd );
    2.61 +   //========================================================================
    2.62 +
    2.63 +      //get results-comm lock before create results-thd, to ensure it can't
    2.64 +      // signal that results are available before this thd is waiting on cond
    2.65 +   VPThread__mutex_lock( globals->results_mutex, animatingThd );
    2.66 +
    2.67 +      //also get the start lock & use to ensure no vector threads send a
    2.68 +      // signal before the results thread is waiting on vector cond
    2.69 +   VPThread__mutex_lock( globals->start_mutex, animatingThd );
    2.70 +
    2.71 +
    2.72 +   VPThread__create_thread( &gatherResults, resultsParams, animatingThd );
    2.73 +
    2.74 +      //Now wait for results thd to signal that it has vector lock
    2.75 +   VPThread__cond_wait(  globals->start_cond,  animatingThd );
    2.76 +   VPThread__mutex_unlock( globals->start_mutex, animatingThd );//done w/lock
    2.77 +
    2.78 +
    2.79 +      //make the vector thds
    2.80 +   int row, col;
    2.81 +   for( row = 0; row < leftMatrix->numRows; row++ )
    2.82 +    { for( col = 0; col < rightMatrix->numCols; col++ )
    2.83 +       {
    2.84 +         vectParams              = malloc( sizeof(VectorParams) );
    2.85 +         vectParams->myCol       = col;
    2.86 +         vectParams->myRow       = row;
    2.87 +         vectParams->vectLength  = leftMatrix->numCols;
    2.88 +         vectParams->leftMatrix  = leftMatrix;
    2.89 +         vectParams->rightMatrix = rightMatrix;
    2.90 +         
    2.91 +         VPThread__create_thread( &calcVector, vectParams, animatingThd );
    2.92 +       }
    2.93 +               //===================  DEBUG  ===================
    2.94 +               #ifdef PRINT_DEBUG_1
    2.95 +               printf("created thread: %d, %d\n", row, col);
    2.96 +               #endif
    2.97 +               //==============================================
    2.98 +    }
    2.99 +
   2.100 +      //Wait for results thread to say results are good
   2.101 +   VPThread__cond_wait(  globals->results_cond,  animatingThd );
   2.102 +
   2.103 +      //The results of the all the work have to be linked-to from the data
   2.104 +      // struc given to the seed procr -- this divide func is animated by
   2.105 +      // that seed procr, so have to link results to the _dividerParams.
   2.106 +   resultMatrix            = malloc( sizeof(Matrix) );
   2.107 +   resultMatrix->numCols   = rightMatrix->numCols;
   2.108 +   resultMatrix->numRows   = leftMatrix->numRows;
   2.109 +   dividerParams->resultMatrix   = resultMatrix;
   2.110 +   resultMatrix->matrix          = globals->results;
   2.111 +
   2.112 +      //done with communication, release lock
   2.113 +   VPThread__mutex_unlock( globals->results_mutex, animatingThd );
   2.114 +
   2.115 +
   2.116 +   VPThread__dissipate_thread( animatingThd ); //all Thds dissipate when done
   2.117 +      //when all of the threads have dissipated, the "create seed and do
   2.118 +      // work" call in the entry point function returns
   2.119 + }
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/src/Application/CILK__Matrix_Mult/EntryPoint.c	Mon Sep 20 17:11:58 2010 -0700
     3.3 @@ -0,0 +1,48 @@
     3.4 +/*
     3.5 + *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
     3.6 + *  Licensed under GNU General Public License version 2
     3.7 + *
     3.8 + * Author: seanhalle@yahoo.com
     3.9 + *
    3.10 + */
    3.11 +
    3.12 +#include <math.h>
    3.13 +
    3.14 +#include "VPThread__Matrix_Mult.h"
    3.15 +
    3.16 +
    3.17 +
    3.18 +/*Every VPThread system has an "entry point" function that creates the first
    3.19 + * processor, which starts the chain of creating more processors..
    3.20 + * eventually all of the processors will dissipate themselves, and
    3.21 + * return.
    3.22 + *
    3.23 + *This entry-point function follows the same pattern as all entry-point
    3.24 + * functions do:
    3.25 + *1) it creates the params for the seed processor, from the
    3.26 + *    parameters passed into the entry-point function
    3.27 + *2) it calls VPThread__create_seed_procr_and_do_work
    3.28 + *3) it gets the return value from the params struc, frees the params struc,
    3.29 + *    and returns the value from the function
    3.30 + *
    3.31 + */
    3.32 +Matrix *
    3.33 +multiplyTheseMatrices( Matrix *leftMatrix, Matrix *rightMatrix )
    3.34 + { Matrix          *resMatrix;
    3.35 +   DividerParams   *dividerParams;
    3.36 +
    3.37 +
    3.38 +   dividerParams              = malloc( sizeof( DividerParams ) );
    3.39 +   dividerParams->leftMatrix  = leftMatrix;
    3.40 +   dividerParams->rightMatrix = rightMatrix;
    3.41 +
    3.42 +
    3.43 +      //create divider processor, start doing the work, and wait till done
    3.44 +      //This function is the "border crossing" between normal code and VPThread
    3.45 +   VPThread__create_seed_procr_and_do_work( &divideIntoVectors, dividerParams );
    3.46 +   
    3.47 +      //get result matrix and return it
    3.48 +   resMatrix = dividerParams->resultMatrix;
    3.49 +   free( dividerParams );
    3.50 +   return resMatrix;
    3.51 + }
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/src/Application/CILK__Matrix_Mult/Result_Pr.c	Mon Sep 20 17:11:58 2010 -0700
     4.3 @@ -0,0 +1,89 @@
     4.4 +/*
     4.5 + *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
     4.6 + *  Licensed under GNU General Public License version 2
     4.7 + *
     4.8 + * Author: seanhalle@yahoo.com
     4.9 + *
    4.10 + */
    4.11 +
    4.12 +#include "VPThread__Matrix_Mult.h"
    4.13 +
    4.14 +/*The Result Processor gets a message from each of the vector processors,
    4.15 + * puts the result from the message in its location in the result-
    4.16 + * matrix, and increments the count of results.
    4.17 + *
    4.18 + *After the count reaches the point that all results have been received, it
    4.19 + * returns the result matrix and dissipates.
    4.20 + */
    4.21 +void gatherResults( void *_params, VirtProcr *animatingPr )
    4.22 + { VirtProcr *dividerPr;
    4.23 +   ResultsParams  *params;
    4.24 +   int             numRows, numCols, numCells, count=0;
    4.25 +   float32        *resultMatrixArray;
    4.26 +   void           *msg;
    4.27 +   VectorParams   *aResult;
    4.28 +   MatrixMultGlobals *globals =(MatrixMultGlobals *)VPThread__give_globals();
    4.29 +
    4.30 +
    4.31 +      //get vector-comm lock before loop, so that this thd keeps lock after
    4.32 +      // one wait until it enters the next wait -- forces see-saw btwn
    4.33 +      // waiters and signalers -- wait-signal-wait-signal-...
    4.34 +   VPThread__mutex_lock( globals->vector_mutex, animatingPr );
    4.35 +
    4.36 +      //Tell divider that have the vector lock -- so it's sure won't miss any
    4.37 +      // signals from the vector-threads it's about to create
    4.38 +      //Don't need a signal variable -- this thd can't be created until
    4.39 +      // divider thd already has the start lock
    4.40 +   VPThread__mutex_lock( globals->start_mutex, animatingPr );//finish wait
    4.41 +   VPThread__cond_signal( globals->start_cond,  animatingPr );
    4.42 +   VPThread__mutex_unlock( globals->start_mutex, animatingPr );//finish wait
    4.43 +
    4.44 +            //=====================  DEBUG  ======================
    4.45 +            #ifdef PRINT_DEBUG
    4.46 +            printf("**Result Pr has the lock**\n" );
    4.47 +            fflush(stdin);
    4.48 +            #endif
    4.49 +            //====================================================
    4.50 +
    4.51 +   params    = (ResultsParams *)_params;
    4.52 +   dividerPr = params->dividerThd;
    4.53 +   numCols   = params->numCols;
    4.54 +   numRows   = params->numRows;
    4.55 +   numCells  = numRows * numCols;
    4.56 +
    4.57 +   resultMatrixArray = malloc( numCells * sizeof( float32 ) );
    4.58 +
    4.59 +
    4.60 +   while( count < numCells )
    4.61 +    {
    4.62 +         //receive a vector-result from a vector-thread
    4.63 +      VPThread__cond_wait(  globals->vector_cond,  animatingPr );
    4.64 +
    4.65 +      aResult = globals->currVector;
    4.66 +      *(resultMatrixArray + aResult->myRow * numCols + aResult->myCol) =
    4.67 +                                                             aResult->result;
    4.68 +      count++;
    4.69 +            //=====================  DEBUG  ======================
    4.70 +            #ifdef PRINT_DEBUG_1
    4.71 +            if( count - count/numRows * numRows == 0  )
    4.72 +             { printf("%d vector result: %f\n", count, aResult->result );
    4.73 +               fflush(stdin);
    4.74 +             }
    4.75 +            #endif
    4.76 +            //====================================================
    4.77 +
    4.78 +    }
    4.79 +      //all comms done, release lock
    4.80 +   VPThread__mutex_unlock( globals->vector_mutex, animatingPr );
    4.81 +   
    4.82 +      //Send result to divider (seed) thread
    4.83 +      // note, divider thd had to hold the results-comm lock before creating
    4.84 +      // this thread, to be sure no race
    4.85 +   VPThread__mutex_lock(   globals->results_mutex, animatingPr );
    4.86 +   globals->results = resultMatrixArray;
    4.87 +   VPThread__cond_signal(  globals->results_cond,  animatingPr );
    4.88 +   VPThread__mutex_unlock( globals->results_mutex, animatingPr ); //releases
    4.89 +   //divider thread from its wait, at point this executes
    4.90 +
    4.91 +   VPThread__dissipate_thread( animatingPr );  //frees any data owned by procr
    4.92 + }
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/src/Application/CILK__Matrix_Mult/Vector_Pr.c	Mon Sep 20 17:11:58 2010 -0700
     5.3 @@ -0,0 +1,59 @@
     5.4 +/* 
     5.5 + *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
     5.6 + *  Licensed under GNU General Public License version 2
     5.7 + *
     5.8 + * Author: SeanHalle@yahoo.com
     5.9 + *
    5.10 + */
    5.11 +
    5.12 +#include "VPThread__Matrix_Mult.h"
    5.13 +
    5.14 +/*A Vector processor is created with an environment that holds two matrices,
    5.15 + * the row and col that it owns, and the name of a result gathering
    5.16 + * processor.
    5.17 + *It calculates its vector product then sends the result to the result
    5.18 + * processor, which puts it into the result matrix and returns that matrix
    5.19 + * when all is done.
    5.20 + */
    5.21 + void
    5.22 +calcVector( void *data, VirtProcr *animatingPr )
    5.23 + { 
    5.24 +   VectorParams   *params;
    5.25 +   VirtProcr      *resultPr;
    5.26 +   int             myRow, myCol, vectLength, pos;
    5.27 +   float32        *leftMatrixArray, *rightMatrixArray, result = 0.0;
    5.28 +   Matrix         *leftMatrix, *rightMatrix;
    5.29 +   MatrixMultGlobals *globals =(MatrixMultGlobals *)VPThread__give_globals();
    5.30 +
    5.31 +   params      = (VectorParams *)data;
    5.32 +   myCol       = params->myCol;
    5.33 +   myRow       = params->myRow;
    5.34 +   vectLength  = params->vectLength;
    5.35 +   leftMatrix  = params->leftMatrix;
    5.36 +   rightMatrix = params->rightMatrix;
    5.37 +   leftMatrixArray  = leftMatrix->matrix;
    5.38 +   rightMatrixArray = rightMatrix->matrix;
    5.39 +         //=====================  DEBUG  ======================
    5.40 +         #ifdef PRINT_DEBUG_1
    5.41 +         if( myCol == 0 )
    5.42 +            printf("start vector: %d, %d\n", myRow, myCol ); fflush(stdin);
    5.43 +         #endif
    5.44 +         //====================================================
    5.45 +
    5.46 +   for( pos = 0; pos < vectLength; pos++ )
    5.47 +    {
    5.48 +      result += *(leftMatrixArray  + myRow * vectLength + pos)  *
    5.49 +                *(rightMatrixArray + pos   * vectLength + myCol);
    5.50 +    }
    5.51 +   params->result = result;
    5.52 +
    5.53 +      //Send result to results thread
    5.54 +   VPThread__mutex_lock(   globals->vector_mutex, animatingPr );//only get
    5.55 +   //the lock when results thd is inside wait.
    5.56 +   globals->currVector = params;
    5.57 +   VPThread__cond_signal(  globals->vector_cond,  animatingPr );
    5.58 +   VPThread__mutex_unlock( globals->vector_mutex, animatingPr );//release
    5.59 +   //wait-er -- cond_signal implemented such that wait-er gets lock, no other
    5.60 +
    5.61 +   VPThread__dissipate_thread( animatingPr );
    5.62 + }
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/src/Application/CILK__Matrix_Mult/matmul.cilk	Mon Sep 20 17:11:58 2010 -0700
     6.3 @@ -0,0 +1,198 @@
     6.4 +/* 
     6.5 + * Rectangular matrix multiplication.
     6.6 + *
     6.7 + * See the paper ``Cache-Oblivious Algorithms'', by
     6.8 + * Matteo Frigo, Charles E. Leiserson, Harald Prokop, and 
     6.9 + * Sridhar Ramachandran, FOCS 1999, for an explanation of
    6.10 + * why this algorithm is good for caches.
    6.11 + *
    6.12 + * Author: Matteo Frigo
    6.13 + */
    6.14 +static const char *ident __attribute__((__unused__))
    6.15 +     = "$HeadURL: https://bradley.csail.mit.edu/svn/repos/cilk/5.4.3/examples/matmul.cilk $ $LastChangedBy: sukhaj $ $Rev: 517 $ $Date: 2003-10-27 10:05:37 -0500 (Mon, 27 Oct 2003) $";
    6.16 +
    6.17 +/*
    6.18 + * Copyright (c) 2003 Massachusetts Institute of Technology
    6.19 + *
    6.20 + * This program is free software; you can redistribute it and/or modify
    6.21 + * it under the terms of the GNU General Public License as published by
    6.22 + * the Free Software Foundation; either version 2 of the License, or
    6.23 + * (at your option) any later version.
    6.24 + *
    6.25 + * This program is distributed in the hope that it will be useful,
    6.26 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
    6.27 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    6.28 + * GNU General Public License for more details.
    6.29 + *
    6.30 + * You should have received a copy of the GNU General Public License
    6.31 + * along with this program; if not, write to the Free Software
    6.32 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    6.33 + *
    6.34 + */
    6.35 +
    6.36 +#include <cilk-lib.cilkh>
    6.37 +#include <stdio.h>
    6.38 +#include <stdlib.h>
    6.39 +#include <math.h>
    6.40 +
    6.41 +#define REAL float
    6.42 +
    6.43 +extern int Cilk_rand(void);
    6.44 +
    6.45 +void zero(REAL *A, int n)
    6.46 +{
    6.47 +     int i, j;
    6.48 +     
    6.49 +     for (i = 0; i < n; i++) {
    6.50 +	  for (j = 0; j < n; j++) {
    6.51 +	       A[i * n + j] = 0.0;
    6.52 +	  }
    6.53 +     }
    6.54 +}
    6.55 +
    6.56 +void init(REAL *A, int n)
    6.57 +{
    6.58 +     int i, j;
    6.59 +     
    6.60 +     for (i = 0; i < n; i++) {
    6.61 +	  for (j = 0; j < n; j++) {
    6.62 +	       A[i * n + j] = (double)Cilk_rand();
    6.63 +	  }
    6.64 +     }
    6.65 +}
    6.66 +
    6.67 +double maxerror(REAL *A, REAL *B, int n)
    6.68 +{
    6.69 +     int i, j;
    6.70 +     double error = 0.0;
    6.71 +     
    6.72 +     for (i = 0; i < n; i++) {
    6.73 +	  for (j = 0; j < n; j++) {
    6.74 +	       double diff = (A[i * n + j] - B[i * n + j]) / A[i * n + j];
    6.75 +	       if (diff < 0)
    6.76 +		    diff = -diff;
    6.77 +	       if (diff > error)
    6.78 +		    error = diff;
    6.79 +	  }
    6.80 +     }
    6.81 +     return error;
    6.82 +}
    6.83 +
    6.84 +void iter_matmul(REAL *A, REAL *B, REAL *C, int n)
    6.85 +{
    6.86 +     int i, j, k;
    6.87 +     
    6.88 +     for (i = 0; i < n; i++)
    6.89 +	  for (k = 0; k < n; k++) {
    6.90 +	       REAL c = 0.0;
    6.91 +	       for (j = 0; j < n; j++)
    6.92 +		    c += A[i * n + j] * B[j * n + k];
    6.93 +	       C[i * n + k] = c;
    6.94 +	  }
    6.95 +}
    6.96 +
    6.97 +/*
    6.98 + * A \in M(m, n)
    6.99 + * B \in M(n, p)
   6.100 + * C \in M(m, p)
   6.101 + */
   6.102 +cilk void rec_matmul(REAL *A, REAL *B, REAL *C, int m, int n, int p, int ld,
   6.103 +		     int add)
   6.104 +{
   6.105 +     if ((m + n + p) <= 64) {
   6.106 +	  int i, j, k;
   6.107 +	  /* base case */
   6.108 +	  if (add) {
   6.109 +	       for (i = 0; i < m; i++)
   6.110 +		    for (k = 0; k < p; k++) {
   6.111 +			 REAL c = 0.0;
   6.112 +			 for (j = 0; j < n; j++)
   6.113 +			      c += A[i * ld + j] * B[j * ld + k];
   6.114 +			 C[i * ld + k] += c;
   6.115 +		    }
   6.116 +	  } else {
   6.117 +	       for (i = 0; i < m; i++)
   6.118 +		    for (k = 0; k < p; k++) {
   6.119 +			 REAL c = 0.0;
   6.120 +			 for (j = 0; j < n; j++)
   6.121 +			      c += A[i * ld + j] * B[j * ld + k];
   6.122 +			 C[i * ld + k] = c;
   6.123 +		    }
   6.124 +	  }
   6.125 +     } else if (m >= n && n >= p) {
   6.126 +	  int m1 = m >> 1;
   6.127 +	  spawn rec_matmul(A, B, C, m1, n, p, ld, add);
   6.128 +	  spawn rec_matmul(A + m1 * ld, B, C + m1 * ld, m - m1,
   6.129 +			   n, p, ld, add);
   6.130 +     } else if (n >= m && n >= p) {
   6.131 +	  int n1 = n >> 1;
   6.132 +	  spawn rec_matmul(A, B, C, m, n1, p, ld, add);
   6.133 +	  sync;
   6.134 +	  spawn rec_matmul(A + n1, B + n1 * ld, C, m, n - n1, p, ld, 1);
   6.135 +     } else {
   6.136 +	  int p1 = p >> 1;
   6.137 +	  spawn rec_matmul(A, B, C, m, n, p1, ld, add);
   6.138 +	  spawn rec_matmul(A, B + p1, C + p1, m, n, p - p1, ld, add);
   6.139 +     }
   6.140 +}
   6.141 +
   6.142 +cilk int main(int argc, char *argv[])
   6.143 +{
   6.144 +     int n;
   6.145 +     REAL *A, *B, *C1, *C2;
   6.146 +     double err;
   6.147 +     Cilk_time tm_begin, tm_elapsed;
   6.148 +     Cilk_time wk_begin, wk_elapsed;
   6.149 +     Cilk_time cp_begin, cp_elapsed;
   6.150 +
   6.151 +     if (argc != 2) {
   6.152 +	  fprintf(stderr, "Usage: matmul [<cilk options>] <n>\n");
   6.153 +	  Cilk_exit(1);
   6.154 +     }
   6.155 +     n = atoi(argv[1]);
   6.156 +
   6.157 +     A = malloc(n * n * sizeof(REAL));
   6.158 +     B = malloc(n * n * sizeof(REAL));
   6.159 +     C1 = malloc(n * n * sizeof(REAL));
   6.160 +     C2 = malloc(n * n * sizeof(REAL));
   6.161 +	  
   6.162 +     init(A, n);
   6.163 +     init(B, n);
   6.164 +     zero(C1, n);
   6.165 +     zero(C2, n);
   6.166 +
   6.167 +     iter_matmul(A, B, C1, n);
   6.168 +
   6.169 +     /* Timing. "Start" timers */
   6.170 +     sync;
   6.171 +     cp_begin = Cilk_user_critical_path;
   6.172 +     wk_begin = Cilk_user_work;
   6.173 +     tm_begin = Cilk_get_wall_time();
   6.174 +
   6.175 +     spawn rec_matmul(A, B, C2, n, n, n, n, 0); 
   6.176 +     sync;
   6.177 +
   6.178 +     /* Timing. "Stop" timers */
   6.179 +     tm_elapsed = Cilk_get_wall_time() - tm_begin;
   6.180 +     wk_elapsed = Cilk_user_work - wk_begin;
   6.181 +     cp_elapsed = Cilk_user_critical_path - cp_begin;
   6.182 +
   6.183 +     err = maxerror(C1, C2, n);
   6.184 +
   6.185 +     printf("\nCilk Example: matmul\n");
   6.186 +     printf("	      running on %d processor%s\n\n",
   6.187 +	    Cilk_active_size, Cilk_active_size > 1 ? "s" : "");
   6.188 +     printf("Max error     = %g\n", err);
   6.189 +     printf("Options: size = %d\n", n);
   6.190 +     printf("Running time  = %4f s\n", Cilk_wall_time_to_sec(tm_elapsed));
   6.191 +     printf("Work          = %4f s\n", Cilk_time_to_sec(wk_elapsed));
   6.192 +     printf("Critical path = %4f s\n", Cilk_time_to_sec(cp_elapsed));
   6.193 +     printf("``MFLOPS''    = %4f\n\n",
   6.194 +	    2.0 * n * n * n / (1.0e6 * Cilk_wall_time_to_sec(tm_elapsed)));
   6.195 +
   6.196 +     free(C2);
   6.197 +     free(C1);
   6.198 +     free(B);
   6.199 +     free(A);
   6.200 +     return 0;
   6.201 +}
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/src/Application/Matrix_Mult.c	Mon Sep 20 17:11:58 2010 -0700
     7.3 @@ -0,0 +1,165 @@
     7.4 +/*
     7.5 + *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
     7.6 + *  Licensed under GNU General Public License version 2
     7.7 + *
     7.8 + * Author: seanhalle@yahoo.com
     7.9 + *
    7.10 + * Created on November 15, 2009, 2:35 AM
    7.11 + */
    7.12 +
    7.13 +#include <malloc.h>
    7.14 +#include <stdlib.h>
    7.15 +
    7.16 +#include "Matrix_Mult.h"
    7.17 +#include "ParamHelper/Param.h"
    7.18 +
    7.19 +
    7.20 + 
    7.21 + void
    7.22 +initialize_Input_Matrices_Via( Matrix  **leftMatrix, Matrix **rightMatrix,
    7.23 +                               ParamBag *paramBag )
    7.24 + { char *leftMatrixFileName, *rightMatrixFileName;
    7.25 +   int   leftMatrixRows, leftMatrixCols, rightMatrixRows, rightMatrixCols;
    7.26 +   
    7.27 +      ParamStruc *param;
    7.28 +      param = getParamFromBag( "leftMatrixRows", paramBag );
    7.29 +   leftMatrixRows = param->intValue;
    7.30 +      param = getParamFromBag( "leftMatrixCols", paramBag );
    7.31 +   leftMatrixCols = param->intValue;
    7.32 +   *leftMatrix = makeMatrix_WithResMat( leftMatrixRows, leftMatrixCols );
    7.33 +   
    7.34 +      param = getParamFromBag( "leftMatrixFileName", paramBag );
    7.35 +   leftMatrixFileName = param->strValue;  //no need to copy
    7.36 +   read_Matrix_From_File( *leftMatrix,  leftMatrixFileName );
    7.37 +   
    7.38 +      param = getParamFromBag( "rightMatrixRows", paramBag );
    7.39 +   rightMatrixRows = param->intValue;
    7.40 +      param = getParamFromBag( "rightMatrixCols", paramBag );
    7.41 +   rightMatrixCols = param->intValue;
    7.42 +   *rightMatrix = makeMatrix_WithResMat( rightMatrixRows, rightMatrixCols );
    7.43 +   
    7.44 +      param = getParamFromBag( "rightMatrixFileName", paramBag );
    7.45 +   rightMatrixFileName = param->strValue;
    7.46 +   read_Matrix_From_File( *rightMatrix, rightMatrixFileName );
    7.47 + }
    7.48 +
    7.49 +
    7.50 +void parseLineIntoRow( char *line, float32* row );
    7.51 +
    7.52 +
    7.53 + void
    7.54 +read_Matrix_From_File( Matrix *matrixStruc, char *matrixFileName )
    7.55 + { int    row, maxRead, numRows, numCols;
    7.56 +   float32 *matrixStart;
    7.57 +   size_t lineSz = 0;
    7.58 +   FILE  *file;
    7.59 +   char  *line = NULL;
    7.60 +   
    7.61 +   lineSz = 50000; //max length of line in a matrix data file
    7.62 +   line = (char *) malloc( lineSz );
    7.63 +   if( line == NULL ) printf( "no mem for matrix line" );
    7.64 +   
    7.65 +   numRows = matrixStruc->numRows;
    7.66 +   numCols = matrixStruc->numCols;
    7.67 +   matrixStart = matrixStruc->matrix;
    7.68 +
    7.69 +   file = fopen( matrixFileName, "r" );
    7.70 +   if( file == NULL ) { printf( "\nCouldn't open file!!\n"); exit(1);}
    7.71 +   fseek( file, 0, SEEK_SET );
    7.72 +   for( row = 0; row < numRows; row++ )
    7.73 +    {
    7.74 +      if( feof( file ) )  printf( "file ran out too soon" );
    7.75 +      maxRead = getline( &line, &lineSz, file );
    7.76 +      if( maxRead == -1 ) printf( "prob reading mat line");
    7.77 +      
    7.78 +      if( *line == '\n') continue; //blank line
    7.79 +      if( *line == '/' ) continue; //comment line
    7.80 +      
    7.81 +      parseLineIntoRow( line, matrixStart + row * numCols );
    7.82 +    }
    7.83 +   free( line );
    7.84 + }
    7.85 +
    7.86 +/*This function relies on each line having the proper number of cols.  It
    7.87 + * doesn't check, nor enforce, so if the file is improperly formatted it
    7.88 + * can write over unrelated memory
    7.89 + */
    7.90 + void
    7.91 +parseLineIntoRow( char *line, float32* row )
    7.92 + {
    7.93 +   char *valueStr, *searchPos;
    7.94 +   
    7.95 +      //read the float values
    7.96 +   searchPos = valueStr = line; //start
    7.97 +   
    7.98 +   for( ; *searchPos != 0; searchPos++)  //bit dangerous, should use buff len
    7.99 +    {
   7.100 +      if( *searchPos == '\n' ) //last col..  relying on well-formatted file
   7.101 +       { *searchPos = 0;
   7.102 +         *row = atof( valueStr );
   7.103 +         break;                                    //end FOR loop
   7.104 +       }
   7.105 +      if( *searchPos == ',' )
   7.106 +       { *searchPos = 0;                           //mark end of string
   7.107 +         *row = (float32) atof( valueStr );
   7.108 +         row += 1;                                 //address arith
   7.109 +            //skip any spaces before digits.. use searchPos + 1 to skip the 0
   7.110 +         for( ; *(searchPos + 1)== ' ' && *(searchPos + 1) !=0; searchPos++);
   7.111 +         valueStr = searchPos + 1;
   7.112 +       }
   7.113 +    }
   7.114 + }
   7.115 +
   7.116 + //==========================================================================
   7.117 +
   7.118 +/*In the "_Flat" version of constructor, do only malloc of the top data struc
   7.119 + * and set values in that top-level.  Don't malloc any sub-structures.
   7.120 + */
   7.121 + Matrix *
   7.122 +makeMatrix_Flat( int32 numRows, int32 numCols )
   7.123 + { Matrix * retMatrix;
   7.124 +   retMatrix = malloc( sizeof( Matrix ) );
   7.125 +   retMatrix->numRows = numRows;
   7.126 +   retMatrix->numCols = numCols;
   7.127 +
   7.128 +   return retMatrix;
   7.129 + }
   7.130 +
   7.131 + Matrix *
   7.132 +makeMatrix_WithResMat( int32 numRows, int32 numCols )
   7.133 + { Matrix * retMatrix;
   7.134 +   retMatrix = malloc( sizeof( Matrix ) );
   7.135 +   retMatrix->numRows = numRows;
   7.136 +   retMatrix->numCols = numCols;
   7.137 +   retMatrix->matrix  = malloc( numRows * numCols * sizeof(float32) );
   7.138 +
   7.139 +   return retMatrix;
   7.140 + }
   7.141 +
   7.142 + void
   7.143 +freeMatrix_Flat( Matrix * matrix )
   7.144 + { //( matrix );
   7.145 + }
   7.146 + void
   7.147 +freeMatrix( Matrix * matrix )
   7.148 + { free( matrix->matrix );
   7.149 +   free( matrix );
   7.150 + }
   7.151 +
   7.152 +void
   7.153 +printMatrix( Matrix *matrix )
   7.154 + { int r, c, numRows, numCols;
   7.155 +   float32 *matrixArray;
   7.156 +
   7.157 +   numRows = matrix->numRows;
   7.158 +   numCols = matrix->numCols;
   7.159 +   matrixArray = matrix->matrix;
   7.160 +
   7.161 +   for( r = 0; r < numRows; r++ )
   7.162 +    { for( c = 0; c < numCols; c++ )
   7.163 +       { printf( "%f | ", *(matrixArray + r*numCols + c) );
   7.164 +       }
   7.165 +      printf("\n");
   7.166 +    }
   7.167 + }
   7.168 +
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/src/Application/Matrix_Mult.h	Mon Sep 20 17:11:58 2010 -0700
     8.3 @@ -0,0 +1,77 @@
     8.4 +/*
     8.5 + *  Copyright Oct 24, 2009 OpenSourceCodeStewardshipFoundation.org
     8.6 + *  Licensed under GNU General Public License version 2
     8.7 + */
     8.8 +
     8.9 +#ifndef MATRIX_MULT_H_
    8.10 +#define MATRIX_MULT_H_
    8.11 +
    8.12 +#include <stdio.h>
    8.13 +#include <unistd.h>
    8.14 +#include <malloc.h>
    8.15 +
    8.16 +#include "../VPThread__lib/VMS/VMS_primitive_data_types.h"
    8.17 +#include "ParamHelper/Param.h"
    8.18 +
    8.19 +//==============================  Structures  ==============================
    8.20 +
    8.21 +typedef
    8.22 +struct
    8.23 + { int32    numRows;
    8.24 +   int32    numCols;
    8.25 +   float32 *matrix;  //2D, but dynamically sized, so use addr arith
    8.26 + }
    8.27 +Matrix;
    8.28 +
    8.29 +/* This is the "appSpecificPiece" that is carried inside a DKUPiece.
    8.30 + *  In the DKUPiece data struc it is declared to be of type "void *".  This
    8.31 + *  allows the application to define any data structure it wants and put it
    8.32 + *  into a DKUPiece.
    8.33 + * When the app specific info is used, it is in app code, so it is cast to
    8.34 + *  the correct type to tell the compiler how to access fields.
    8.35 + * This keeps all app-specific things out of the DKU directory, as per the
    8.36 + *  DKU standard. */
    8.37 +typedef
    8.38 +struct
    8.39 + { 
    8.40 +      // pointers to shared data..  the result matrix must be created when the
    8.41 +      //  left and right matrices are put into the root ancestor DKUPiece.
    8.42 +   Matrix * leftMatrix;
    8.43 +   Matrix * rightMatrix;
    8.44 +   Matrix * resultMatrix;
    8.45 +
    8.46 +      // define the starting and ending boundaries for this piece of the
    8.47 +      //  result matrix.  These are derivable from the left and right
    8.48 +      //  matrices, but included them for readability of code.
    8.49 +   int prodStartRow, prodEndRow;
    8.50 +   int prodStartCol, prodEndCol;
    8.51 +      // Start and end of the portion of the left matrix that contributes to
    8.52 +      //  this piece of the product
    8.53 +   int leftStartRow, leftEndRow;
    8.54 +   int leftStartCol, leftEndCol;
    8.55 +      // Start and end of the portion of the right matrix that contributes to
    8.56 +      //  this piece of the product
    8.57 +   int rightStartRow, rightEndRow;
    8.58 +   int rightStartCol, rightEndCol;
    8.59 + }
    8.60 +MatrixProdPiece;
    8.61 +
    8.62 +//==============================  Functions  ================================
    8.63 +void readFile();
    8.64 +
    8.65 +Matrix *makeMatrix( int32 numRows, int32 numCols );
    8.66 +Matrix *makeMatrix_Flat( int32 numRows, int32 numCols );
    8.67 +Matrix *makeMatrix_WithResMat( int32 numRows, int32 numCols );
    8.68 +void    freeMatrix_Flat( Matrix * matrix );
    8.69 +void    freeMatrix( Matrix * matrix );
    8.70 +void    printMatrix( Matrix *matrix );
    8.71 +
    8.72 +void read_Matrix_From_File( Matrix *matrixStruc, char *matrixFileName );
    8.73 +
    8.74 +void
    8.75 +initialize_Input_Matrices_Via( Matrix  **leftMatrix, Matrix **rightMatrix,
    8.76 +                              ParamBag *paramBag );
    8.77 +
    8.78 +//===========================================================================
    8.79 +
    8.80 +#endif /*MATRIX_MULT_H_*/
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/src/Application/main.c	Mon Sep 20 17:11:58 2010 -0700
     9.3 @@ -0,0 +1,35 @@
     9.4 +/*
     9.5 + *  Copyright Oct 24, 2009 OpenSourceCodeStewardshipFoundation.org
     9.6 + *  Licensed under GNU General Public License version 2
     9.7 + *
     9.8 + * author seanhalle@yahoo.com
     9.9 + */
    9.10 +
    9.11 +#include <malloc.h>
    9.12 +#include <stdlib.h>
    9.13 +
    9.14 +#include "Matrix_Mult.h"
    9.15 +#include "VPThread__Matrix_Mult/VPThread__Matrix_Mult.h"
    9.16 +
    9.17 +/**
    9.18 + *Matrix multiply program written using VMS_HW piggy-back language
    9.19 + * 
    9.20 + */
    9.21 +int main( int argc, char **argv )
    9.22 + { Matrix      *leftMatrix, *rightMatrix, *resultMatrix;
    9.23 +   ParamBag    *paramBag;
    9.24 +   
    9.25 +   paramBag = makeParamBag();
    9.26 +   readParamFileIntoBag( argv[1], paramBag );
    9.27 +   initialize_Input_Matrices_Via( &leftMatrix, &rightMatrix, paramBag );
    9.28 +   
    9.29 +   resultMatrix = multiplyTheseMatrices( leftMatrix, rightMatrix );
    9.30 +
    9.31 +   printf("\nresult matrix: \n");
    9.32 +
    9.33 +//   printMatrix( resultMatrix );
    9.34 +   
    9.35 +//   VPThread__print_stats();
    9.36 +   
    9.37 +   exit(0); //cleans up
    9.38 + }