Skip to content
This repository has been archived by the owner on Oct 19, 2020. It is now read-only.

Commit

Permalink
Simple matrix multiplication appears to compile.
Browse files Browse the repository at this point in the history
  • Loading branch information
inducer committed Jul 30, 2009
1 parent 6afce29 commit 0a14dfd
Show file tree
Hide file tree
Showing 15 changed files with 1,001 additions and 10 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Expand Up @@ -13,6 +13,6 @@ find_package(CUDA QUIET REQUIRED)
find_package(METIS REQUIRED)

include_directories("${CMAKE_SOURCE_DIR}/include")
include_directories("${CMAKE_SOURCE_DIR}/src/spmv/kernels")
include_directories("${CMAKE_SOURCE_DIR}/src/spmv")

subdirs(src)
subdirs(src example)
5 changes: 5 additions & 0 deletions example/CMakeLists.txt
@@ -0,0 +1,5 @@
add_executable(multiply_matrix
multiply_matrix.cpp)

TARGET_LINK_LIBRARIES(multiply_matrix iterativecuda)

Binary file added example/multiply_matrix
Binary file not shown.
73 changes: 73 additions & 0 deletions example/multiply_matrix.cpp
@@ -0,0 +1,73 @@
/*
Iterative CUDA is licensed to you under the MIT/X Consortium license:
Copyright (c) 2009 Andreas Kloeckner.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the Software), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/




#include <iterative-cuda.hpp>
#include <iostream>
#include <cstdlib>




using namespace iterative_cuda;

int main(int argc, char **argv)
{
if (argc != 2)
{
std::cerr << "usage: " << argv[0] << " matrix.mtx" << std::endl;
return 1;
}
typedef float entry_type;
typedef gpu_sparse_pkt_matrix<entry_type> mat_type;
std::auto_ptr<mat_type> mat(
mat_type::read_matrix_market_file(argv[1]));

// build host vectors
entry_type *x = new entry_type[mat->column_count()];
entry_type *y = new entry_type[mat->row_count()];

for (int i = 0; i < mat->column_count(); ++i)
x[i] = drand48();
for (int i = 0; i < mat->row_count(); ++i)
y[i] = 0;

gpu_vector<entry_type> x_gpu(mat->column_count());
gpu_vector<entry_type> y_gpu(mat->row_count());

x_gpu.from_cpu(x);
y_gpu.from_cpu(y);

(*mat)(y_gpu, x_gpu);

y_gpu.to_cpu(y);
synchronize_gpu();

delete[] x;
delete[] y;

return 0;
}
26 changes: 24 additions & 2 deletions include/iterative-cuda.hpp
Expand Up @@ -40,11 +40,24 @@ SOFTWARE.

namespace iterative_cuda
{
class noncopyable
{
protected:
noncopyable() {}
~noncopyable() {}
private:
noncopyable( const noncopyable& );
const noncopyable& operator=( const noncopyable& );
};




template <typename ValueType, typename IndexType>
class gpu_vector_pimpl;

template <typename ValueType, typename IndexType=int>
class gpu_vector
class gpu_vector// : noncopyable
{
public:
typedef IndexType index_type;
Expand Down Expand Up @@ -77,7 +90,7 @@ namespace iterative_cuda


template <typename ValueType, typename IndexType=int>
class gpu_sparse_pkt_matrix
class gpu_sparse_pkt_matrix// : noncopyable
{
public:
typedef IndexType index_type;
Expand All @@ -93,6 +106,7 @@ namespace iterative_cuda
gpu_sparse_pkt_matrix(
index_type row_count,
index_type column_count,
index_type nonzero_count,
const index_type *csr_row_pointers,
const index_type *csr_column_indices,
const value_type *csr_nonzeros);
Expand All @@ -105,11 +119,18 @@ namespace iterative_cuda
void unpermute(vector_type &dest, vector_type const &src) const;

void operator()(vector_type &dest, vector_type const &src) const;

static gpu_sparse_pkt_matrix *read_matrix_market_file(const char *fn);
};




void synchronize_gpu();




template <typename ValueType, typename IndexType>
class diagonal_preconditioner_pimpl;

Expand Down Expand Up @@ -138,6 +159,7 @@ namespace iterative_cuda




template <typename ValueType, typename IndexType, typename Operator, typename Preconditioner>
void run_cg(
const Operator &a,
Expand Down
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Expand Up @@ -3,7 +3,7 @@ set(BUILD_SHARED_LIBS ON)
include_directories(${METIS_INCLUDE_DIR})

cuda_add_library(iterativecuda
instantiation.cu
instantiation.cu spmv/mmio.c
OPTIONS "-arch=${CUDA_ARCH}"
)

Expand Down
45 changes: 42 additions & 3 deletions src/gpu-sparse-matrix.hpp
Expand Up @@ -33,9 +33,11 @@ SOFTWARE.
#include <iterative-cuda.hpp>
#include <stdint.h>
#include "helpers.hpp"
#include "spmv/partition.h"
#include "spmv/csr_to_pkt.h"
#include "spmv/utils.h"
#include "partition.h"
#include "csr_to_pkt.h"
#include "utils.h"
#include "sparse_io.h"
#include "kernels/spmv_pkt_device.cu.h"



Expand All @@ -60,12 +62,16 @@ namespace iterative_cuda
gpu_sparse_pkt_matrix<VT, IT>::gpu_sparse_pkt_matrix(
index_type row_count,
index_type column_count,
index_type nonzero_count,
const index_type *csr_row_pointers,
const index_type *csr_column_indices,
const value_type *csr_nonzeros)
: pimpl(new gpu_sparse_pkt_matrix_pimpl<VT, IT>)
{
csr_matrix<index_type, value_type> csr_mat;
csr_mat.num_rows = row_count;
csr_mat.num_cols = column_count;
csr_mat.num_nonzeros = nonzero_count;
csr_mat.Ap = const_cast<index_type *>(csr_row_pointers);
csr_mat.Aj = const_cast<index_type *>(csr_column_indices);
csr_mat.Ax = const_cast<value_type *>(csr_nonzeros);
Expand All @@ -77,6 +83,7 @@ namespace iterative_cuda
index_type block_count = ICUDA_DIVIDE_INTO(row_count, rows_per_packet);

std::vector<index_type> partition;
partition.resize(row_count);
partition_csr(csr_mat, block_count, partition, /*Kway*/ true);

pkt_matrix<index_type, value_type> host_matrix =
Expand Down Expand Up @@ -135,6 +142,38 @@ namespace iterative_cuda
gather_device(dest.ptr(), src.ptr(),
pimpl->matrix.permute_new_to_old, row_count());
}





template <typename VT, typename IT>
void gpu_sparse_pkt_matrix<VT, IT>::operator()(
vector_type &dest, vector_type const &src) const
{
spmv_pkt_device(pimpl->matrix, src.ptr(), dest.ptr());
}




template <class ValueType, class IndexType>
gpu_sparse_pkt_matrix<ValueType, IndexType> *
gpu_sparse_pkt_matrix<ValueType, IndexType>::read_matrix_market_file(
const char *fn)
{
csr_matrix<IndexType, ValueType> csr_mat =
read_csr_matrix<IndexType, ValueType>(fn);

typedef gpu_sparse_pkt_matrix<ValueType, IndexType> mat_tp;
std::auto_ptr<mat_tp> result(new mat_tp(
csr_mat.num_rows, csr_mat.num_cols, csr_mat.num_nonzeros,
csr_mat.Ap, csr_mat.Aj, csr_mat.Ax));

delete_csr_matrix(csr_mat, HOST_MEMORY);

return result.release();
}
}


Expand Down
14 changes: 14 additions & 0 deletions src/gpu-vector.hpp
Expand Up @@ -107,6 +107,20 @@ namespace iterative_cuda
size()*sizeof(value_type),
cudaMemcpyDeviceToHost));
}




template <typename VT, typename IT>
gpu_vector<VT, IT>::value_type *gpu_vector<VT, IT>::ptr()
{ return pimpl->gpu_data; }




template <typename VT, typename IT>
const gpu_vector<VT, IT>::value_type *gpu_vector<VT, IT>::ptr() const
{ return pimpl->gpu_data; }
}


Expand Down
8 changes: 8 additions & 0 deletions src/instantiation.cu
Expand Up @@ -41,3 +41,11 @@ template class gpu_vector<float>;
template class gpu_vector<double>;
template class gpu_sparse_pkt_matrix<float>;
template class gpu_sparse_pkt_matrix<double>;




void iterative_cuda::synchronize_gpu()
{
cudaThreadSynchronize();
}

0 comments on commit 0a14dfd

Please sign in to comment.