Skip to content
This repository has been archived by the owner on Oct 19, 2020. It is now read-only.

Commit

Permalink
Verified matrix multiply works.
Browse files Browse the repository at this point in the history
  • Loading branch information
Andreas Kloeckner committed Jul 30, 2009
1 parent 0a14dfd commit 3a71fa9
Show file tree
Hide file tree
Showing 12 changed files with 322 additions and 78 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -8,3 +8,4 @@ CMakeFiles/
*.so
Makefile
cmake_install.cmake
*.linkinfo
Binary file modified example/multiply_matrix
Binary file not shown.
50 changes: 36 additions & 14 deletions example/multiply_matrix.cpp
Expand Up @@ -42,32 +42,54 @@ int main(int argc, char **argv)
return 1;
}
typedef float entry_type;
typedef gpu_sparse_pkt_matrix<entry_type> mat_type;
std::auto_ptr<mat_type> mat(
mat_type::read_matrix_market_file(argv[1]));
typedef cpu_sparse_csr_matrix<entry_type> cpu_mat_type;
typedef gpu_sparse_pkt_matrix<entry_type> gpu_mat_type;
std::auto_ptr<cpu_mat_type> cpu_mat(
cpu_mat_type::read_matrix_market_file(argv[1]));

gpu_mat_type gpu_mat(*cpu_mat);

// build host vectors
entry_type *x = new entry_type[mat->column_count()];
entry_type *y = new entry_type[mat->row_count()];
entry_type *x = new entry_type[gpu_mat.column_count()];
entry_type *y1 = new entry_type[gpu_mat.row_count()];
entry_type *y2 = new entry_type[gpu_mat.row_count()];

for (int i = 0; i < mat->column_count(); ++i)
for (int i = 0; i < gpu_mat.column_count(); ++i)
x[i] = drand48();
for (int i = 0; i < mat->row_count(); ++i)
y[i] = 0;
for (int i = 0; i < gpu_mat.row_count(); ++i)
{
y1[i] = 0;
y2[i] = 0;
}

gpu_vector<entry_type> x_gpu(mat->column_count());
gpu_vector<entry_type> y_gpu(mat->row_count());
// do gpu matrix multiply
gpu_vector<entry_type> x_gpu(gpu_mat.column_count());
gpu_vector<entry_type> y_gpu(gpu_mat.row_count());

x_gpu.from_cpu(x);
y_gpu.from_cpu(y);
y_gpu.from_cpu(y2);

(*mat)(y_gpu, x_gpu);
gpu_mat(y_gpu, x_gpu);

y_gpu.to_cpu(y);
y_gpu.to_cpu(y2);
synchronize_gpu();

// compute error
(*cpu_mat)(y1, x);

entry_type error = 0;
entry_type norm = 0;

for (int i = 0; i < gpu_mat.row_count(); ++i)
{
error += (y1[i]-y2[i])*(y1[i]-y2[i]);
norm += x[i]*x[i];
}
std::cerr << error/norm << std::endl;

delete[] x;
delete[] y;
delete[] y1;
delete[] y2;

return 0;
}
52 changes: 42 additions & 10 deletions include/iterative-cuda.hpp
Expand Up @@ -83,33 +83,67 @@ namespace iterative_cuda



template <typename ValueType, typename IndexType>
class gpu_sparse_pkt_matrix_pimpl;


template <typename ValueType, typename IndexType=int>
class gpu_sparse_pkt_matrix;

template <typename ValueType, typename IndexType>
class cpu_sparse_csr_matrix_pimpl;

template <typename ValueType, typename IndexType=int>
class gpu_sparse_pkt_matrix// : noncopyable
class cpu_sparse_csr_matrix
{
public:
typedef IndexType index_type;
typedef ValueType value_type;
typedef gpu_vector<value_type, index_type> vector_type;

private:
std::auto_ptr<
gpu_sparse_pkt_matrix_pimpl<value_type, index_type>
cpu_sparse_csr_matrix_pimpl<value_type, index_type>
> pimpl;

public:
gpu_sparse_pkt_matrix(
cpu_sparse_csr_matrix(
index_type row_count,
index_type column_count,
index_type nonzero_count,
const index_type *csr_row_pointers,
const index_type *csr_column_indices,
const value_type *csr_nonzeros);
~cpu_sparse_csr_matrix();

index_type row_count() const;
index_type column_count() const;

void operator()(value_type *y, value_type const *x) const;
void extract_diagonal(value_type *d) const;

static cpu_sparse_csr_matrix *read_matrix_market_file(const char *fn);

friend class gpu_sparse_pkt_matrix<value_type, index_type>;
};




template <typename ValueType, typename IndexType>
class gpu_sparse_pkt_matrix_pimpl;

template <typename ValueType, typename IndexType>
class gpu_sparse_pkt_matrix// : noncopyable
{
public:
typedef IndexType index_type;
typedef ValueType value_type;
typedef gpu_vector<value_type, index_type> vector_type;

private:
std::auto_ptr<
gpu_sparse_pkt_matrix_pimpl<value_type, index_type>
> pimpl;

public:
gpu_sparse_pkt_matrix(
cpu_sparse_csr_matrix<value_type, index_type> const &csr_mat);
~gpu_sparse_pkt_matrix();

index_type row_count() const;
Expand All @@ -119,8 +153,6 @@ namespace iterative_cuda
void unpermute(vector_type &dest, vector_type const &src) const;

void operator()(vector_type &dest, vector_type const &src) const;

static gpu_sparse_pkt_matrix *read_matrix_market_file(const char *fn);
};


Expand Down
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Expand Up @@ -3,7 +3,7 @@ set(BUILD_SHARED_LIBS ON)
include_directories(${METIS_INCLUDE_DIR})

cuda_add_library(iterativecuda
instantiation.cu spmv/mmio.c
instantiation.cu spmv/mmio.c functions.cu
OPTIONS "-arch=${CUDA_ARCH}"
)

Expand Down
164 changes: 164 additions & 0 deletions src/cpu-sparse-matrix.hpp
@@ -0,0 +1,164 @@
/*
Iterative CUDA is licensed to you under the MIT/X Consortium license:
Copyright (c) 2009 Andreas Kloeckner.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the Software), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/




#ifndef _AAFADFJ_ITERATIVE_CUDA_CPU_SPARSE_MATRIX_HPP_SEEN
#define _AAFADFJ_ITERATIVE_CUDA_CPU_SPARSE_MATRIX_HPP_SEEN




#include <iterative-cuda.hpp>
#include "sparse_io.h"




namespace iterative_cuda
{
template <typename ValueType, typename IndexType>
struct cpu_sparse_csr_matrix_pimpl
{
csr_matrix<IndexType, ValueType> matrix;
};




template <typename VT, typename IT>
cpu_sparse_csr_matrix<VT, IT>::cpu_sparse_csr_matrix(
index_type row_count,
index_type column_count,
index_type nonzero_count,
const index_type *csr_row_pointers,
const index_type *csr_column_indices,
const value_type *csr_nonzeros)
: pimpl(new cpu_sparse_csr_matrix_pimpl<VT, IT>)
{
pimpl->matrix.num_rows = row_count;
pimpl->matrix.num_cols = column_count;
pimpl->matrix.num_nonzeros = nonzero_count;
pimpl->matrix.Ap = const_cast<index_type *>(csr_row_pointers);
pimpl->matrix.Aj = const_cast<index_type *>(csr_column_indices);
pimpl->matrix.Ax = const_cast<value_type *>(csr_nonzeros);
}




template <typename VT, typename IT>
cpu_sparse_csr_matrix<VT, IT>::~cpu_sparse_csr_matrix()
{
delete_csr_matrix(pimpl->matrix, HOST_MEMORY);
}




template <typename VT, typename IT>
IT cpu_sparse_csr_matrix<VT, IT>::row_count() const
{
return pimpl->matrix.num_rows;
}




template <typename VT, typename IT>
IT cpu_sparse_csr_matrix<VT, IT>::column_count() const
{
return pimpl->matrix.num_cols;
}




template <typename VT, typename IT>
void cpu_sparse_csr_matrix<VT, IT>::operator()(
value_type *y, value_type const *x) const
{
csr_matrix<index_type, value_type> const &mat(pimpl->matrix);
for (index_type i = 0; i < mat.num_rows; ++i)
{
const index_type row_start = mat.Ap[i];
const index_type row_end = mat.Ap[i+1];

value_type sum = y[i];
for (index_type jj = row_start; jj < row_end; jj++)
{
const index_type j = mat.Aj[jj];
sum += x[j] * mat.Ax[jj];
}
y[i] = sum;
}
}




template <typename VT, typename IT>
void cpu_sparse_csr_matrix<VT, IT>::extract_diagonal(value_type *d) const
{
csr_matrix<index_type, value_type> const &mat(pimpl->matrix);
for (index_type i = 0; i < mat.num_rows; ++i)
{
d[i] = 0;
const index_type row_start = mat.Ap[i];
const index_type row_end = mat.Ap[i+1];

for (index_type jj = row_start; jj < row_end; jj++)
{
const index_type j = mat.Aj[jj];
if (i == j)
d[i] = mat.Ax[jj];
}
}
}




template <class ValueType, class IndexType>
cpu_sparse_csr_matrix<ValueType, IndexType> *
cpu_sparse_csr_matrix<ValueType, IndexType>::read_matrix_market_file(
const char *fn)
{
csr_matrix<IndexType, ValueType> csr_mat =
read_csr_matrix<IndexType, ValueType>(fn);

typedef cpu_sparse_csr_matrix<ValueType, IndexType> mat_tp;
std::auto_ptr<mat_tp> result(new mat_tp(
csr_mat.num_rows, csr_mat.num_cols, csr_mat.num_nonzeros,
csr_mat.Ap, csr_mat.Aj, csr_mat.Ax));

return result.release();
}
}




#endif
36 changes: 36 additions & 0 deletions src/functions.cu
@@ -0,0 +1,36 @@
/*
Iterative CUDA is licensed to you under the MIT/X Consortium license:
Copyright (c) 2009 Andreas Kloeckner.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the Software), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/




#include <iterative-cuda.hpp>




void iterative_cuda::synchronize_gpu()
{
cudaThreadSynchronize();
}

0 comments on commit 3a71fa9

Please sign in to comment.