changeset 2:ec0629f70ee5

forgot to add some of the files
author Me
date Tue, 26 Oct 2010 19:34:03 -0700
parents dd5387f362f6
children bf7331ed394e
files src/Application/CILK_Linux__Matrix_Mult src/Application/CILK__Matrix_Mult/Divide_Pr.c src/Application/CILK__Matrix_Mult/Divide_Pr.cilk src/Application/CILK__Matrix_Mult/EntryPoint.c src/Application/CILK__Matrix_Mult/EntryPoint.cilk src/Application/CILK__Matrix_Mult/Result_Pr.c src/Application/CILK__Matrix_Mult/VMS_primitive_data_types.h src/Application/CILK__Matrix_Mult/Vector_Pr.c src/Application/CILK__Matrix_Mult/Vector_Pr.cilk src/Application/CILK__Matrix_Mult/matmul.cilk src/Application/Makefile src/Application/main.c src/Application/main.cilk
diffstat 13 files changed, 339 insertions(+), 545 deletions(-) [+]
line diff
     1.1 Binary file src/Application/CILK_Linux__Matrix_Mult has changed
     2.1 --- a/src/Application/CILK__Matrix_Mult/Divide_Pr.c	Tue Oct 26 19:32:46 2010 -0700
     2.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.3 @@ -1,116 +0,0 @@
     2.4 -/*
     2.5 - *  Copyright 2009 OpenSourceStewardshipFoundation.org
     2.6 - *  Licensed under GNU General Public License version 2
     2.7 - *
     2.8 - * Author: seanhalle@yahoo.com
     2.9 - *
    2.10 - */
    2.11 -
    2.12 -
    2.13 -#include "VPThread__Matrix_Mult.h"
    2.14 -
    2.15 -/*Divider creates one processor for every row-col pair.
    2.16 - * It hands them:
    2.17 - *  the name of the result processor that they should send their results to,
    2.18 - *  the left and right matrices, and the row and col they should multiply
    2.19 - *  the length of the vector
    2.20 - * It first creates the result processor, then all the vector processors,
    2.21 - *  then does a receive of a message from the result processor that gives
    2.22 - *  the divider ownership of the result matrix.
    2.23 - * Finally, the divider returns the result matrix out of the VPThread system.
    2.24 - */
    2.25 -void divideIntoVectors( void *_dividerParams, VirtProcr *animatingThd )
    2.26 - { VirtProcr         *resultsThd;
    2.27 -   DividerParams     *dividerParams;
    2.28 -   ResultsParams     *resultsParams;
    2.29 -   VectorParams      *vectParams;
    2.30 -   Matrix            *leftMatrix, *rightMatrix, *resultMatrix;
    2.31 -   void              *msg;
    2.32 -   MatrixMultGlobals *globals;
    2.33 -
    2.34 -//   printf("start divide\n"); fflush(stdin);
    2.35 -   
    2.36 -   dividerParams   = (DividerParams *)_dividerParams;
    2.37 -   
    2.38 -   leftMatrix      = dividerParams->leftMatrix;
    2.39 -   rightMatrix     = dividerParams->rightMatrix;
    2.40 -
    2.41 -   resultsParams             = malloc( sizeof(ResultsParams) );
    2.42 -   resultsParams->dividerThd = animatingThd;
    2.43 -   resultsParams->numCols    = rightMatrix->numCols;
    2.44 -   resultsParams->numRows    = leftMatrix->numRows;
    2.45 -
    2.46 -   //===========  Set up global vars, including conds and mutexes ===========
    2.47 -   globals = malloc( sizeof(MatrixMultGlobals) );
    2.48 -   VPThread__set_globals_to( globals );
    2.49 -
    2.50 -   globals->results_mutex = VPThread__make_mutex( animatingThd );
    2.51 -   globals->results_cond  = VPThread__make_cond( globals->results_mutex,
    2.52 -                                                              animatingThd );
    2.53 -
    2.54 -   globals->vector_mutex = VPThread__make_mutex( animatingThd );
    2.55 -   globals->vector_cond  = VPThread__make_cond( globals->vector_mutex,
    2.56 -                                                              animatingThd );
    2.57 -
    2.58 -   globals->start_mutex = VPThread__make_mutex( animatingThd );
    2.59 -   globals->start_cond  = VPThread__make_cond( globals->start_mutex,
    2.60 -                                                              animatingThd );
    2.61 -   //========================================================================
    2.62 -
    2.63 -      //get results-comm lock before create results-thd, to ensure it can't
    2.64 -      // signal that results are available before this thd is waiting on cond
    2.65 -   VPThread__mutex_lock( globals->results_mutex, animatingThd );
    2.66 -
    2.67 -      //also get the start lock & use to ensure no vector threads send a
    2.68 -      // signal before the results thread is waiting on vector cond
    2.69 -   VPThread__mutex_lock( globals->start_mutex, animatingThd );
    2.70 -
    2.71 -
    2.72 -   VPThread__create_thread( &gatherResults, resultsParams, animatingThd );
    2.73 -
    2.74 -      //Now wait for results thd to signal that it has vector lock
    2.75 -   VPThread__cond_wait(  globals->start_cond,  animatingThd );
    2.76 -   VPThread__mutex_unlock( globals->start_mutex, animatingThd );//done w/lock
    2.77 -
    2.78 -
    2.79 -      //make the vector thds
    2.80 -   int row, col;
    2.81 -   for( row = 0; row < leftMatrix->numRows; row++ )
    2.82 -    { for( col = 0; col < rightMatrix->numCols; col++ )
    2.83 -       {
    2.84 -         vectParams              = malloc( sizeof(VectorParams) );
    2.85 -         vectParams->myCol       = col;
    2.86 -         vectParams->myRow       = row;
    2.87 -         vectParams->vectLength  = leftMatrix->numCols;
    2.88 -         vectParams->leftMatrix  = leftMatrix;
    2.89 -         vectParams->rightMatrix = rightMatrix;
    2.90 -         
    2.91 -         VPThread__create_thread( &calcVector, vectParams, animatingThd );
    2.92 -       }
    2.93 -               //===================  DEBUG  ===================
    2.94 -               #ifdef PRINT_DEBUG_1
    2.95 -               printf("created thread: %d, %d\n", row, col);
    2.96 -               #endif
    2.97 -               //==============================================
    2.98 -    }
    2.99 -
   2.100 -      //Wait for results thread to say results are good
   2.101 -   VPThread__cond_wait(  globals->results_cond,  animatingThd );
   2.102 -
   2.103 -      //The results of the all the work have to be linked-to from the data
   2.104 -      // struc given to the seed procr -- this divide func is animated by
   2.105 -      // that seed procr, so have to link results to the _dividerParams.
   2.106 -   resultMatrix            = malloc( sizeof(Matrix) );
   2.107 -   resultMatrix->numCols   = rightMatrix->numCols;
   2.108 -   resultMatrix->numRows   = leftMatrix->numRows;
   2.109 -   dividerParams->resultMatrix   = resultMatrix;
   2.110 -   resultMatrix->matrix          = globals->results;
   2.111 -
   2.112 -      //done with communication, release lock
   2.113 -   VPThread__mutex_unlock( globals->results_mutex, animatingThd );
   2.114 -
   2.115 -
   2.116 -   VPThread__dissipate_thread( animatingThd ); //all Thds dissipate when done
   2.117 -      //when all of the threads have dissipated, the "create seed and do
   2.118 -      // work" call in the entry point function returns
   2.119 - }
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/src/Application/CILK__Matrix_Mult/Divide_Pr.cilk	Tue Oct 26 19:34:03 2010 -0700
     3.3 @@ -0,0 +1,72 @@
     3.4 +/*
     3.5 + *  Copyright 2009 OpenSourceStewardshipFoundation.org
     3.6 + *  Licensed under GNU General Public License version 2
     3.7 + *
     3.8 + * Author: seanhalle@yahoo.com
     3.9 + *
    3.10 + */
    3.11 +
    3.12 +
    3.13 +#include "CILK__Matrix_Mult.h"
    3.14 +
    3.15 +cilk float32 calcVector( void * );
    3.16 +
    3.17 +/*Divider creates one processor for every row-col pair.
    3.18 + * It hands them:
    3.19 + *  the name of the result processor that they should send their results to,
    3.20 + *  the left and right matrices, and the row and col they should multiply
    3.21 + *  the length of the vector
    3.22 + * It first creates the result processor, then all the vector processors,
    3.23 + *  then does a receive of a message from the result processor that gives
    3.24 + *  the divider ownership of the result matrix.
    3.25 + * Finally, the divider returns the result matrix out of the VPThread system.
    3.26 + */
    3.27 +
    3.28 +cilk void divideIntoVectors( void *_dividerParams )
    3.29 + { 
    3.30 +   DividerParams     *dividerParams;
    3.31 +   VectorParams      *vectParams;
    3.32 +   Matrix            *leftMatrix, *rightMatrix, *resultMatrix;
    3.33 +   int32              numCells, numCols, mrow, mcol;
    3.34 +   float32           *resultMatrixArray;
    3.35 +
    3.36 +   dividerParams   = (DividerParams *)_dividerParams;
    3.37 +   
    3.38 +   leftMatrix      = dividerParams->leftMatrix;
    3.39 +   rightMatrix     = dividerParams->rightMatrix;
    3.40 +
    3.41 +   
    3.42 +   numCols = rightMatrix->numCols;
    3.43 +
    3.44 +   numCells  = leftMatrix->numRows * rightMatrix->numCols;
    3.45 +   resultMatrixArray = malloc( numCells * sizeof( float32 ) );
    3.46 +
    3.47 +
    3.48 +      //spawn vector calcs
    3.49 +   for( mrow = 0; mrow < leftMatrix->numRows; mrow++ )
    3.50 +    { for( mcol = 0; mcol < rightMatrix->numCols; mcol++ )
    3.51 +       {
    3.52 +         vectParams              = malloc( sizeof(VectorParams) );
    3.53 +         vectParams->myCol       = mcol;
    3.54 +         vectParams->myRow       = mrow;
    3.55 +         vectParams->vectLength  = leftMatrix->numCols;
    3.56 +         vectParams->leftMatrix  = leftMatrix;
    3.57 +         vectParams->rightMatrix = rightMatrix;
    3.58 +
    3.59 +
    3.60 +         resultMatrixArray[ mrow * numCols + mcol ] = spawn calcVector( vectParams );
    3.61 +       }
    3.62 +    }
    3.63 +
    3.64 +   sync;
    3.65 +
    3.66 +   
    3.67 +      //The results of the all the work have to be linked-to from the data
    3.68 +      // struc given to the seed procr -- this divide func is animated by
    3.69 +      // that seed procr, so have to link results to the _dividerParams.
    3.70 +   resultMatrix            = malloc( sizeof(Matrix) );
    3.71 +   resultMatrix->numCols   = rightMatrix->numCols;
    3.72 +   resultMatrix->numRows   = leftMatrix->numRows;
    3.73 +   dividerParams->resultMatrix   = resultMatrix;
    3.74 +   resultMatrix->matrix          = resultMatrixArray;
    3.75 + }
     4.1 --- a/src/Application/CILK__Matrix_Mult/EntryPoint.c	Tue Oct 26 19:32:46 2010 -0700
     4.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.3 @@ -1,48 +0,0 @@
     4.4 -/*
     4.5 - *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
     4.6 - *  Licensed under GNU General Public License version 2
     4.7 - *
     4.8 - * Author: seanhalle@yahoo.com
     4.9 - *
    4.10 - */
    4.11 -
    4.12 -#include <math.h>
    4.13 -
    4.14 -#include "VPThread__Matrix_Mult.h"
    4.15 -
    4.16 -
    4.17 -
    4.18 -/*Every VPThread system has an "entry point" function that creates the first
    4.19 - * processor, which starts the chain of creating more processors..
    4.20 - * eventually all of the processors will dissipate themselves, and
    4.21 - * return.
    4.22 - *
    4.23 - *This entry-point function follows the same pattern as all entry-point
    4.24 - * functions do:
    4.25 - *1) it creates the params for the seed processor, from the
    4.26 - *    parameters passed into the entry-point function
    4.27 - *2) it calls VPThread__create_seed_procr_and_do_work
    4.28 - *3) it gets the return value from the params struc, frees the params struc,
    4.29 - *    and returns the value from the function
    4.30 - *
    4.31 - */
    4.32 -Matrix *
    4.33 -multiplyTheseMatrices( Matrix *leftMatrix, Matrix *rightMatrix )
    4.34 - { Matrix          *resMatrix;
    4.35 -   DividerParams   *dividerParams;
    4.36 -
    4.37 -
    4.38 -   dividerParams              = malloc( sizeof( DividerParams ) );
    4.39 -   dividerParams->leftMatrix  = leftMatrix;
    4.40 -   dividerParams->rightMatrix = rightMatrix;
    4.41 -
    4.42 -
    4.43 -      //create divider processor, start doing the work, and wait till done
    4.44 -      //This function is the "border crossing" between normal code and VPThread
    4.45 -   VPThread__create_seed_procr_and_do_work( &divideIntoVectors, dividerParams );
    4.46 -   
    4.47 -      //get result matrix and return it
    4.48 -   resMatrix = dividerParams->resultMatrix;
    4.49 -   free( dividerParams );
    4.50 -   return resMatrix;
    4.51 - }
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/src/Application/CILK__Matrix_Mult/EntryPoint.cilk	Tue Oct 26 19:34:03 2010 -0700
     5.3 @@ -0,0 +1,45 @@
     5.4 +/*
     5.5 + *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
     5.6 + *  Licensed under GNU General Public License version 2
     5.7 + *
     5.8 + * Author: seanhalle@yahoo.com
     5.9 + *
    5.10 + */
    5.11 +
    5.12 +#include "CILK__Matrix_Mult.h"
    5.13 +
    5.14 +cilk void divideIntoVectors( void * );
    5.15 +
    5.16 +/*Every VPThread system has an "entry point" function that creates the first
    5.17 + * processor, which starts the chain of creating more processors..
    5.18 + * eventually all of the processors will dissipate themselves, and
    5.19 + * return.
    5.20 + *
    5.21 + *This entry-point function follows the same pattern as all entry-point
    5.22 + * functions do:
    5.23 + *1) it creates the params for the seed processor, from the
    5.24 + *    parameters passed into the entry-point function
    5.25 + *2) it calls VPThread__create_seed_procr_and_do_work
    5.26 + *3) it gets the return value from the params struc, frees the params struc,
    5.27 + *    and returns the value from the function
    5.28 + *
    5.29 + */
    5.30 +cilk
    5.31 +Matrix *
    5.32 +multiplyTheseMatrices( Matrix *leftMatrix, Matrix *rightMatrix )
    5.33 + { Matrix          *resMatrix;
    5.34 +   DividerParams   *dividerParams;
    5.35 +
    5.36 +
    5.37 +   dividerParams              = malloc( sizeof( DividerParams ) );
    5.38 +   dividerParams->leftMatrix  = leftMatrix;
    5.39 +   dividerParams->rightMatrix = rightMatrix;
    5.40 +
    5.41 +   spawn divideIntoVectors( dividerParams );
    5.42 +   sync;
    5.43 +
    5.44 +      //get result matrix and return it
    5.45 +   resMatrix = dividerParams->resultMatrix;
    5.46 +   free( dividerParams );
    5.47 +   return resMatrix;
    5.48 + }
     6.1 --- a/src/Application/CILK__Matrix_Mult/Result_Pr.c	Tue Oct 26 19:32:46 2010 -0700
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,89 +0,0 @@
     6.4 -/*
     6.5 - *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
     6.6 - *  Licensed under GNU General Public License version 2
     6.7 - *
     6.8 - * Author: seanhalle@yahoo.com
     6.9 - *
    6.10 - */
    6.11 -
    6.12 -#include "VPThread__Matrix_Mult.h"
    6.13 -
    6.14 -/*The Result Processor gets a message from each of the vector processors,
    6.15 - * puts the result from the message in its location in the result-
    6.16 - * matrix, and increments the count of results.
    6.17 - *
    6.18 - *After the count reaches the point that all results have been received, it
    6.19 - * returns the result matrix and dissipates.
    6.20 - */
    6.21 -void gatherResults( void *_params, VirtProcr *animatingPr )
    6.22 - { VirtProcr *dividerPr;
    6.23 -   ResultsParams  *params;
    6.24 -   int             numRows, numCols, numCells, count=0;
    6.25 -   float32        *resultMatrixArray;
    6.26 -   void           *msg;
    6.27 -   VectorParams   *aResult;
    6.28 -   MatrixMultGlobals *globals =(MatrixMultGlobals *)VPThread__give_globals();
    6.29 -
    6.30 -
    6.31 -      //get vector-comm lock before loop, so that this thd keeps lock after
    6.32 -      // one wait until it enters the next wait -- forces see-saw btwn
    6.33 -      // waiters and signalers -- wait-signal-wait-signal-...
    6.34 -   VPThread__mutex_lock( globals->vector_mutex, animatingPr );
    6.35 -
    6.36 -      //Tell divider that have the vector lock -- so it's sure won't miss any
    6.37 -      // signals from the vector-threads it's about to create
    6.38 -      //Don't need a signal variable -- this thd can't be created until
    6.39 -      // divider thd already has the start lock
    6.40 -   VPThread__mutex_lock( globals->start_mutex, animatingPr );//finish wait
    6.41 -   VPThread__cond_signal( globals->start_cond,  animatingPr );
    6.42 -   VPThread__mutex_unlock( globals->start_mutex, animatingPr );//finish wait
    6.43 -
    6.44 -            //=====================  DEBUG  ======================
    6.45 -            #ifdef PRINT_DEBUG
    6.46 -            printf("**Result Pr has the lock**\n" );
    6.47 -            fflush(stdin);
    6.48 -            #endif
    6.49 -            //====================================================
    6.50 -
    6.51 -   params    = (ResultsParams *)_params;
    6.52 -   dividerPr = params->dividerThd;
    6.53 -   numCols   = params->numCols;
    6.54 -   numRows   = params->numRows;
    6.55 -   numCells  = numRows * numCols;
    6.56 -
    6.57 -   resultMatrixArray = malloc( numCells * sizeof( float32 ) );
    6.58 -
    6.59 -
    6.60 -   while( count < numCells )
    6.61 -    {
    6.62 -         //receive a vector-result from a vector-thread
    6.63 -      VPThread__cond_wait(  globals->vector_cond,  animatingPr );
    6.64 -
    6.65 -      aResult = globals->currVector;
    6.66 -      *(resultMatrixArray + aResult->myRow * numCols + aResult->myCol) =
    6.67 -                                                             aResult->result;
    6.68 -      count++;
    6.69 -            //=====================  DEBUG  ======================
    6.70 -            #ifdef PRINT_DEBUG_1
    6.71 -            if( count - count/numRows * numRows == 0  )
    6.72 -             { printf("%d vector result: %f\n", count, aResult->result );
    6.73 -               fflush(stdin);
    6.74 -             }
    6.75 -            #endif
    6.76 -            //====================================================
    6.77 -
    6.78 -    }
    6.79 -      //all comms done, release lock
    6.80 -   VPThread__mutex_unlock( globals->vector_mutex, animatingPr );
    6.81 -   
    6.82 -      //Send result to divider (seed) thread
    6.83 -      // note, divider thd had to hold the results-comm lock before creating
    6.84 -      // this thread, to be sure no race
    6.85 -   VPThread__mutex_lock(   globals->results_mutex, animatingPr );
    6.86 -   globals->results = resultMatrixArray;
    6.87 -   VPThread__cond_signal(  globals->results_cond,  animatingPr );
    6.88 -   VPThread__mutex_unlock( globals->results_mutex, animatingPr ); //releases
    6.89 -   //divider thread from its wait, at point this executes
    6.90 -
    6.91 -   VPThread__dissipate_thread( animatingPr );  //frees any data owned by procr
    6.92 - }
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/src/Application/CILK__Matrix_Mult/VMS_primitive_data_types.h	Tue Oct 26 19:34:03 2010 -0700
     7.3 @@ -0,0 +1,53 @@
     7.4 +/*
     7.5 + *  Copyright 2009 OpenSourceStewardshipFoundation.org
     7.6 + *  Licensed under GNU General Public License version 2
     7.7 + *  
     7.8 + * Author: seanhalle@yahoo.com
     7.9 + *  
    7.10 +
    7.11 + */
    7.12 +
    7.13 +#ifndef _BLIS_PRIMITIVE_DATA_TYPES_H
    7.14 +#define	_BLIS_PRIMITIVE_DATA_TYPES_H
    7.15 +
    7.16 +
    7.17 +/*For portability, need primitive data types that have a well defined
    7.18 + * size, and well-defined layout into bytes
    7.19 + *To do this, provide BLIS standard aliases for all primitive data types
    7.20 + *These aliases must be used in all BLIS functions instead of the ANSI types
    7.21 + *
    7.22 + *These definitions will be replaced inside each specialization module
    7.23 + * according to the compiler used in that module and the hardware being
    7.24 + * specialized to.
    7.25 + */
    7.26 +/*
    7.27 +#define    int8  char
    7.28 +#define   uint8  char
    7.29 +#define    int16 short
    7.30 +#define   uint16 unsigned short
    7.31 +#define    int32 int
    7.32 +#define   uint32 unsigned int
    7.33 +#define    int64 long long
    7.34 +#define   uint64 unsigned long long
    7.35 +#define  float32 float
    7.36 +#define  float64 double
    7.37 +*/
    7.38 +typedef char               bool8;
    7.39 +typedef char               int8;
    7.40 +typedef char               uint8;
    7.41 +typedef short              int16;
    7.42 +typedef unsigned short     uint16;
    7.43 +typedef int                int32;
    7.44 +typedef unsigned int       uint32;
    7.45 +typedef long long          int64;
    7.46 +typedef unsigned long long uint64;
    7.47 +typedef float              float32;
    7.48 +typedef double             float64;
    7.49 +//typedef double double      float128;
    7.50 +#define float128 double double
    7.51 +
    7.52 +#define TRUE  1
    7.53 +#define FALSE 0
    7.54 +
    7.55 +#endif	/* _BLIS_PRIMITIVE_DATA_TYPES_H */
    7.56 +
     8.1 --- a/src/Application/CILK__Matrix_Mult/Vector_Pr.c	Tue Oct 26 19:32:46 2010 -0700
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,59 +0,0 @@
     8.4 -/* 
     8.5 - *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
     8.6 - *  Licensed under GNU General Public License version 2
     8.7 - *
     8.8 - * Author: SeanHalle@yahoo.com
     8.9 - *
    8.10 - */
    8.11 -
    8.12 -#include "VPThread__Matrix_Mult.h"
    8.13 -
    8.14 -/*A Vector processor is created with an environment that holds two matrices,
    8.15 - * the row and col that it owns, and the name of a result gathering
    8.16 - * processor.
    8.17 - *It calculates its vector product then sends the result to the result
    8.18 - * processor, which puts it into the result matrix and returns that matrix
    8.19 - * when all is done.
    8.20 - */
    8.21 - void
    8.22 -calcVector( void *data, VirtProcr *animatingPr )
    8.23 - { 
    8.24 -   VectorParams   *params;
    8.25 -   VirtProcr      *resultPr;
    8.26 -   int             myRow, myCol, vectLength, pos;
    8.27 -   float32        *leftMatrixArray, *rightMatrixArray, result = 0.0;
    8.28 -   Matrix         *leftMatrix, *rightMatrix;
    8.29 -   MatrixMultGlobals *globals =(MatrixMultGlobals *)VPThread__give_globals();
    8.30 -
    8.31 -   params      = (VectorParams *)data;
    8.32 -   myCol       = params->myCol;
    8.33 -   myRow       = params->myRow;
    8.34 -   vectLength  = params->vectLength;
    8.35 -   leftMatrix  = params->leftMatrix;
    8.36 -   rightMatrix = params->rightMatrix;
    8.37 -   leftMatrixArray  = leftMatrix->matrix;
    8.38 -   rightMatrixArray = rightMatrix->matrix;
    8.39 -         //=====================  DEBUG  ======================
    8.40 -         #ifdef PRINT_DEBUG_1
    8.41 -         if( myCol == 0 )
    8.42 -            printf("start vector: %d, %d\n", myRow, myCol ); fflush(stdin);
    8.43 -         #endif
    8.44 -         //====================================================
    8.45 -
    8.46 -   for( pos = 0; pos < vectLength; pos++ )
    8.47 -    {
    8.48 -      result += *(leftMatrixArray  + myRow * vectLength + pos)  *
    8.49 -                *(rightMatrixArray + pos   * vectLength + myCol);
    8.50 -    }
    8.51 -   params->result = result;
    8.52 -
    8.53 -      //Send result to results thread
    8.54 -   VPThread__mutex_lock(   globals->vector_mutex, animatingPr );//only get
    8.55 -   //the lock when results thd is inside wait.
    8.56 -   globals->currVector = params;
    8.57 -   VPThread__cond_signal(  globals->vector_cond,  animatingPr );
    8.58 -   VPThread__mutex_unlock( globals->vector_mutex, animatingPr );//release
    8.59 -   //wait-er -- cond_signal implemented such that wait-er gets lock, no other
    8.60 -
    8.61 -   VPThread__dissipate_thread( animatingPr );
    8.62 - }
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/src/Application/CILK__Matrix_Mult/Vector_Pr.cilk	Tue Oct 26 19:34:03 2010 -0700
     9.3 @@ -0,0 +1,48 @@
     9.4 +/* 
     9.5 + *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
     9.6 + *  Licensed under GNU General Public License version 2
     9.7 + *
     9.8 + * Author: SeanHalle@yahoo.com
     9.9 + *
    9.10 + */
    9.11 +
    9.12 +#include "CILK__Matrix_Mult.h"
    9.13 +
    9.14 +/*A Vector processor is created with an environment that holds two matrices,
    9.15 + * the row and col that it owns, and the name of a result gathering
    9.16 + * processor.
    9.17 + *It calculates its vector product then sends the result to the result
    9.18 + * processor, which puts it into the result matrix and returns that matrix
    9.19 + * when all is done.
    9.20 + */
    9.21 +cilk
    9.22 +float32
    9.23 +calcVector( void *data )
    9.24 + { 
    9.25 +   VectorParams   *params;
    9.26 +   int             myRow, myCol, vectLength, pos;
    9.27 +   float32        *leftMatrixArray, *rightMatrixArray, result = 0.0;
    9.28 +   Matrix         *leftMatrix, *rightMatrix;
    9.29 +
    9.30 +   params      = (VectorParams *)data;
    9.31 +   myCol       = params->myCol;
    9.32 +   myRow       = params->myRow;
    9.33 +   vectLength  = params->vectLength;
    9.34 +   leftMatrix  = params->leftMatrix;
    9.35 +   rightMatrix = params->rightMatrix;
    9.36 +   leftMatrixArray  = leftMatrix->matrix;
    9.37 +   rightMatrixArray = rightMatrix->matrix;
    9.38 +         //=====================  DEBUG  ======================
    9.39 +         #ifdef PRINT_DEBUG
    9.40 +         if( myCol == 0 )
    9.41 +            printf("start vector: %d, %d\n", myRow, myCol ); fflush(stdin);
    9.42 +         #endif
    9.43 +         //====================================================
    9.44 +
    9.45 +   for( pos = 0; pos < vectLength; pos++ )
    9.46 +    {
    9.47 +      result += leftMatrixArray[ myRow * vectLength + pos ]  *
    9.48 +                rightMatrixArray[ pos  * vectLength + myCol];
    9.49 +    }
    9.50 +   return result;
    9.51 + }
    10.1 --- a/src/Application/CILK__Matrix_Mult/matmul.cilk	Tue Oct 26 19:32:46 2010 -0700
    10.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.3 @@ -1,198 +0,0 @@
    10.4 -/* 
    10.5 - * Rectangular matrix multiplication.
    10.6 - *
    10.7 - * See the paper ``Cache-Oblivious Algorithms'', by
    10.8 - * Matteo Frigo, Charles E. Leiserson, Harald Prokop, and 
    10.9 - * Sridhar Ramachandran, FOCS 1999, for an explanation of
   10.10 - * why this algorithm is good for caches.
   10.11 - *
   10.12 - * Author: Matteo Frigo
   10.13 - */
   10.14 -static const char *ident __attribute__((__unused__))
   10.15 -     = "$HeadURL: https://bradley.csail.mit.edu/svn/repos/cilk/5.4.3/examples/matmul.cilk $ $LastChangedBy: sukhaj $ $Rev: 517 $ $Date: 2003-10-27 10:05:37 -0500 (Mon, 27 Oct 2003) $";
   10.16 -
   10.17 -/*
   10.18 - * Copyright (c) 2003 Massachusetts Institute of Technology
   10.19 - *
   10.20 - * This program is free software; you can redistribute it and/or modify
   10.21 - * it under the terms of the GNU General Public License as published by
   10.22 - * the Free Software Foundation; either version 2 of the License, or
   10.23 - * (at your option) any later version.
   10.24 - *
   10.25 - * This program is distributed in the hope that it will be useful,
   10.26 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
   10.27 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   10.28 - * GNU General Public License for more details.
   10.29 - *
   10.30 - * You should have received a copy of the GNU General Public License
   10.31 - * along with this program; if not, write to the Free Software
   10.32 - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   10.33 - *
   10.34 - */
   10.35 -
   10.36 -#include <cilk-lib.cilkh>
   10.37 -#include <stdio.h>
   10.38 -#include <stdlib.h>
   10.39 -#include <math.h>
   10.40 -
   10.41 -#define REAL float
   10.42 -
   10.43 -extern int Cilk_rand(void);
   10.44 -
   10.45 -void zero(REAL *A, int n)
   10.46 -{
   10.47 -     int i, j;
   10.48 -     
   10.49 -     for (i = 0; i < n; i++) {
   10.50 -	  for (j = 0; j < n; j++) {
   10.51 -	       A[i * n + j] = 0.0;
   10.52 -	  }
   10.53 -     }
   10.54 -}
   10.55 -
   10.56 -void init(REAL *A, int n)
   10.57 -{
   10.58 -     int i, j;
   10.59 -     
   10.60 -     for (i = 0; i < n; i++) {
   10.61 -	  for (j = 0; j < n; j++) {
   10.62 -	       A[i * n + j] = (double)Cilk_rand();
   10.63 -	  }
   10.64 -     }
   10.65 -}
   10.66 -
   10.67 -double maxerror(REAL *A, REAL *B, int n)
   10.68 -{
   10.69 -     int i, j;
   10.70 -     double error = 0.0;
   10.71 -     
   10.72 -     for (i = 0; i < n; i++) {
   10.73 -	  for (j = 0; j < n; j++) {
   10.74 -	       double diff = (A[i * n + j] - B[i * n + j]) / A[i * n + j];
   10.75 -	       if (diff < 0)
   10.76 -		    diff = -diff;
   10.77 -	       if (diff > error)
   10.78 -		    error = diff;
   10.79 -	  }
   10.80 -     }
   10.81 -     return error;
   10.82 -}
   10.83 -
   10.84 -void iter_matmul(REAL *A, REAL *B, REAL *C, int n)
   10.85 -{
   10.86 -     int i, j, k;
   10.87 -     
   10.88 -     for (i = 0; i < n; i++)
   10.89 -	  for (k = 0; k < n; k++) {
   10.90 -	       REAL c = 0.0;
   10.91 -	       for (j = 0; j < n; j++)
   10.92 -		    c += A[i * n + j] * B[j * n + k];
   10.93 -	       C[i * n + k] = c;
   10.94 -	  }
   10.95 -}
   10.96 -
   10.97 -/*
   10.98 - * A \in M(m, n)
   10.99 - * B \in M(n, p)
  10.100 - * C \in M(m, p)
  10.101 - */
  10.102 -cilk void rec_matmul(REAL *A, REAL *B, REAL *C, int m, int n, int p, int ld,
  10.103 -		     int add)
  10.104 -{
  10.105 -     if ((m + n + p) <= 64) {
  10.106 -	  int i, j, k;
  10.107 -	  /* base case */
  10.108 -	  if (add) {
  10.109 -	       for (i = 0; i < m; i++)
  10.110 -		    for (k = 0; k < p; k++) {
  10.111 -			 REAL c = 0.0;
  10.112 -			 for (j = 0; j < n; j++)
  10.113 -			      c += A[i * ld + j] * B[j * ld + k];
  10.114 -			 C[i * ld + k] += c;
  10.115 -		    }
  10.116 -	  } else {
  10.117 -	       for (i = 0; i < m; i++)
  10.118 -		    for (k = 0; k < p; k++) {
  10.119 -			 REAL c = 0.0;
  10.120 -			 for (j = 0; j < n; j++)
  10.121 -			      c += A[i * ld + j] * B[j * ld + k];
  10.122 -			 C[i * ld + k] = c;
  10.123 -		    }
  10.124 -	  }
  10.125 -     } else if (m >= n && n >= p) {
  10.126 -	  int m1 = m >> 1;
  10.127 -	  spawn rec_matmul(A, B, C, m1, n, p, ld, add);
  10.128 -	  spawn rec_matmul(A + m1 * ld, B, C + m1 * ld, m - m1,
  10.129 -			   n, p, ld, add);
  10.130 -     } else if (n >= m && n >= p) {
  10.131 -	  int n1 = n >> 1;
  10.132 -	  spawn rec_matmul(A, B, C, m, n1, p, ld, add);
  10.133 -	  sync;
  10.134 -	  spawn rec_matmul(A + n1, B + n1 * ld, C, m, n - n1, p, ld, 1);
  10.135 -     } else {
  10.136 -	  int p1 = p >> 1;
  10.137 -	  spawn rec_matmul(A, B, C, m, n, p1, ld, add);
  10.138 -	  spawn rec_matmul(A, B + p1, C + p1, m, n, p - p1, ld, add);
  10.139 -     }
  10.140 -}
  10.141 -
  10.142 -cilk int main(int argc, char *argv[])
  10.143 -{
  10.144 -     int n;
  10.145 -     REAL *A, *B, *C1, *C2;
  10.146 -     double err;
  10.147 -     Cilk_time tm_begin, tm_elapsed;
  10.148 -     Cilk_time wk_begin, wk_elapsed;
  10.149 -     Cilk_time cp_begin, cp_elapsed;
  10.150 -
  10.151 -     if (argc != 2) {
  10.152 -	  fprintf(stderr, "Usage: matmul [<cilk options>] <n>\n");
  10.153 -	  Cilk_exit(1);
  10.154 -     }
  10.155 -     n = atoi(argv[1]);
  10.156 -
  10.157 -     A = malloc(n * n * sizeof(REAL));
  10.158 -     B = malloc(n * n * sizeof(REAL));
  10.159 -     C1 = malloc(n * n * sizeof(REAL));
  10.160 -     C2 = malloc(n * n * sizeof(REAL));
  10.161 -	  
  10.162 -     init(A, n);
  10.163 -     init(B, n);
  10.164 -     zero(C1, n);
  10.165 -     zero(C2, n);
  10.166 -
  10.167 -     iter_matmul(A, B, C1, n);
  10.168 -
  10.169 -     /* Timing. "Start" timers */
  10.170 -     sync;
  10.171 -     cp_begin = Cilk_user_critical_path;
  10.172 -     wk_begin = Cilk_user_work;
  10.173 -     tm_begin = Cilk_get_wall_time();
  10.174 -
  10.175 -     spawn rec_matmul(A, B, C2, n, n, n, n, 0); 
  10.176 -     sync;
  10.177 -
  10.178 -     /* Timing. "Stop" timers */
  10.179 -     tm_elapsed = Cilk_get_wall_time() - tm_begin;
  10.180 -     wk_elapsed = Cilk_user_work - wk_begin;
  10.181 -     cp_elapsed = Cilk_user_critical_path - cp_begin;
  10.182 -
  10.183 -     err = maxerror(C1, C2, n);
  10.184 -
  10.185 -     printf("\nCilk Example: matmul\n");
  10.186 -     printf("	      running on %d processor%s\n\n",
  10.187 -	    Cilk_active_size, Cilk_active_size > 1 ? "s" : "");
  10.188 -     printf("Max error     = %g\n", err);
  10.189 -     printf("Options: size = %d\n", n);
  10.190 -     printf("Running time  = %4f s\n", Cilk_wall_time_to_sec(tm_elapsed));
  10.191 -     printf("Work          = %4f s\n", Cilk_time_to_sec(wk_elapsed));
  10.192 -     printf("Critical path = %4f s\n", Cilk_time_to_sec(cp_elapsed));
  10.193 -     printf("``MFLOPS''    = %4f\n\n",
  10.194 -	    2.0 * n * n * n / (1.0e6 * Cilk_wall_time_to_sec(tm_elapsed)));
  10.195 -
  10.196 -     free(C2);
  10.197 -     free(C1);
  10.198 -     free(B);
  10.199 -     free(A);
  10.200 -     return 0;
  10.201 -}
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/src/Application/Makefile	Tue Oct 26 19:34:03 2010 -0700
    11.3 @@ -0,0 +1,80 @@
    11.4 +
    11.5 +
    11.6 +CILK_SOURCE = \
    11.7 +   CILK__Matrix_Mult/EntryPoint.cilk \
    11.8 +   CILK__Matrix_Mult/Divide_Pr.cilk \
    11.9 +   CILK__Matrix_Mult/Vector_Pr.cilk \
   11.10 +   main.cilk
   11.11 +
   11.12 +C_SOURCE = \
   11.13 +   matrix_mult.c \
   11.14 +   ParamHelper/ParamBag.c\
   11.15 +   ParamHelper/ReadParamsFromFile.c
   11.16 +
   11.17 +C_OBJS = $(C_SOURCE:.c=.o)
   11.18 +
   11.19 +CILK_OBJS = $(CILK_SOURCE:.cilk=.o) 
   11.20 +
   11.21 +OBJECTS = $(C_SOURCE) $(CILK_SOURCE)
   11.22 +
   11.23 +#Make has the built-in variable "$<" which is the source file
   11.24 +# and "$@" which is the target for that source
   11.25 +$(C_OBJS): $(C_SOURCE)
   11.26 +	gcc -c $< -o $@
   11.27 +
   11.28 +$(CILK_OBJS): $(CILK_SOURCE)
   11.29 +	gcc -c $< -o $@
   11.30 +
   11.31 +all: $(OBJECTS)
   11.32 +	cilkc $(OBJECTS) -o CILK_Linux__Matrix_Mult; \
   11.33 +	cp CILK_Linux__Matrix_Mult ~/D/2__INRIA_OMP/1__Development/2__runs_and_data/executables
   11.34 +
   11.35 +
   11.36 +
   11.37 +#================================================================
   11.38 +#Other stuff tried/played_with/copied
   11.39 +#Example called "specifying alternate directories"
   11.40 +# puts all object files in one directory
   11.41 +#CFLAGS	:= 
   11.42 +#OBJDIR	:= .
   11.43 +
   11.44 +#$(OBJDIR)/%.o: %.c
   11.45 +#	$(CC) $(CFLAGS) -c $(input) -o $(output)
   11.46 +
   11.47 +#Believe that make fills in "inputs"..  and because have the
   11.48 +# sub-dir in the target, it puts that sub-dir into "inputs" var
   11.49 +# but apparently because the source is in objects dir, it doesn't
   11.50 +# include the sub-dir in the "output" var
   11.51 +#$(OBJDIR)/CILK_Matrix_Mult: $(OBJDIR)/*.o
   11.52 +#    	cilkc $(input) -o $(output)
   11.53 +
   11.54 +#%.o: %.cilk
   11.55 +
   11.56 +
   11.57 +#===============================================================
   11.58 +#May be odd usage.. my first makefile..  idea is to tell make
   11.59 +# that to get a give .o file, to run cilkc w/"-c" option, which
   11.60 +# causes cilkc to generate a ".o" file
   11.61 +#%.o: %.cilk
   11.62 +#	cilkc -c $< -o $@
   11.63 +
   11.64 +
   11.65 +#================================================================
   11.66 +# playing with below..
   11.67 +
   11.68 +#7C9A-RV6P-3XE2-JV99-426K-2K
   11.69 +
   11.70 +#rule for inferring that the .cilk file is the source for .o file
   11.71 +# and how to create the .o from the .cilk
   11.72 +#%.o : %.cilk
   11.73 +#	cilkc -c $(.SOURCE)
   11.74 +
   11.75 +#CILK_Linux__Matrix_Mult: main.o CILK__Matrix_Mult/foo.o #ParamHelper/foo2.o
   11.76 +
   11.77 +#foo.o: $(SUBDIR_SOURCES)
   11.78 +#	gcc -shared $(inputs) -o $(output)
   11.79 +
   11.80 +#%.o: %.cilk
   11.81 +#	cilkc -c $(input) -o $(output) 
   11.82 +
   11.83 +
    12.1 --- a/src/Application/main.c	Tue Oct 26 19:32:46 2010 -0700
    12.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.3 @@ -1,35 +0,0 @@
    12.4 -/*
    12.5 - *  Copyright Oct 24, 2009 OpenSourceCodeStewardshipFoundation.org
    12.6 - *  Licensed under GNU General Public License version 2
    12.7 - *
    12.8 - * author seanhalle@yahoo.com
    12.9 - */
   12.10 -
   12.11 -#include <malloc.h>
   12.12 -#include <stdlib.h>
   12.13 -
   12.14 -#include "Matrix_Mult.h"
   12.15 -#include "VPThread__Matrix_Mult/VPThread__Matrix_Mult.h"
   12.16 -
   12.17 -/**
   12.18 - *Matrix multiply program written using VMS_HW piggy-back language
   12.19 - * 
   12.20 - */
   12.21 -int main( int argc, char **argv )
   12.22 - { Matrix      *leftMatrix, *rightMatrix, *resultMatrix;
   12.23 -   ParamBag    *paramBag;
   12.24 -   
   12.25 -   paramBag = makeParamBag();
   12.26 -   readParamFileIntoBag( argv[1], paramBag );
   12.27 -   initialize_Input_Matrices_Via( &leftMatrix, &rightMatrix, paramBag );
   12.28 -   
   12.29 -   resultMatrix = multiplyTheseMatrices( leftMatrix, rightMatrix );
   12.30 -
   12.31 -   printf("\nresult matrix: \n");
   12.32 -
   12.33 -//   printMatrix( resultMatrix );
   12.34 -   
   12.35 -//   VPThread__print_stats();
   12.36 -   
   12.37 -   exit(0); //cleans up
   12.38 - }
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/src/Application/main.cilk	Tue Oct 26 19:34:03 2010 -0700
    13.3 @@ -0,0 +1,41 @@
    13.4 +/*
    13.5 + *  Copyright Oct 24, 2009 OpenSourceCodeStewardshipFoundation.org
    13.6 + *  Licensed under GNU General Public License version 2
    13.7 + *
    13.8 + * author seanhalle@yahoo.com
    13.9 + */
   13.10 +
   13.11 +#include <malloc.h>
   13.12 +#include <stdlib.h>
   13.13 +
   13.14 +#include "Matrix_Mult.h"
   13.15 +#include "CILK__Matrix_Mult/CILK__Matrix_Mult.h"
   13.16 +
   13.17 +cilk Matrix * 
   13.18 +multiplyTheseMatrices( Matrix *leftMatrix, Matrix *rightMatrix );
   13.19 +
   13.20 +/**
   13.21 + *Matrix multiply program written using VMS_HW piggy-back language
   13.22 + * 
   13.23 + */
   13.24 +cilk
   13.25 +int main( int argc, char **argv )
   13.26 + { Matrix      *leftMatrix, *rightMatrix, *resultMatrix;
   13.27 +   ParamBag    *paramBag;
   13.28 +   
   13.29 +   
   13.30 +   paramBag = makeParamBag();
   13.31 +   readParamFileIntoBag( argv[1], paramBag );
   13.32 +   initialize_Input_Matrices_Via( &leftMatrix, &rightMatrix, paramBag );
   13.33 +
   13.34 +   resultMatrix = spawn multiplyTheseMatrices( leftMatrix, rightMatrix );
   13.35 +   sync;
   13.36 +
   13.37 +   printf("\nresult matrix: \n");
   13.38 +
   13.39 +//   printMatrix( resultMatrix );
   13.40 +   
   13.41 +//   VPThread__print_stats();
   13.42 +   
   13.43 +   exit(0); //cleans up
   13.44 + }