Simple matrix multiplication appears to compile.

inducer · Jul 30, 2009 · 0a14dfd · 0a14dfd
1 parent 6afce29
commit 0a14dfd
Show file tree

Hide file tree

Showing 15 changed files with 1,001 additions and 10 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -13,6 +13,6 @@ find_package(CUDA QUIET REQUIRED)
 find_package(METIS REQUIRED)
 
 include_directories("${CMAKE_SOURCE_DIR}/include")
-include_directories("${CMAKE_SOURCE_DIR}/src/spmv/kernels")
+include_directories("${CMAKE_SOURCE_DIR}/src/spmv")
 
-subdirs(src)
+subdirs(src example)
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_executable(multiply_matrix
+  multiply_matrix.cpp)
+
+TARGET_LINK_LIBRARIES(multiply_matrix iterativecuda)
+
diff --git a/example/multiply_matrix b/example/multiply_matrix
diff --git a/example/multiply_matrix.cpp b/example/multiply_matrix.cpp
@@ -0,0 +1,73 @@
+/*
+Iterative CUDA is licensed to you under the MIT/X Consortium license:
+
+Copyright (c) 2009 Andreas Kloeckner.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the Software), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+
+
+
+#include <iterative-cuda.hpp>
+#include <iostream>
+#include <cstdlib>
+
+
+
+
+using namespace iterative_cuda;
+
+int main(int argc, char **argv)
+{
+  if (argc != 2)
+  {
+    std::cerr << "usage: " << argv[0] << " matrix.mtx" << std::endl;
+    return 1;
+  }
+  typedef float entry_type;
+  typedef gpu_sparse_pkt_matrix<entry_type> mat_type;
+  std::auto_ptr<mat_type> mat(
+      mat_type::read_matrix_market_file(argv[1]));
+
+  // build host vectors
+  entry_type *x = new entry_type[mat->column_count()];
+  entry_type *y = new entry_type[mat->row_count()];
+
+  for (int i = 0; i < mat->column_count(); ++i)
+    x[i] = drand48();
+  for (int i = 0; i < mat->row_count(); ++i)
+    y[i] = 0;
+
+  gpu_vector<entry_type> x_gpu(mat->column_count());
+  gpu_vector<entry_type> y_gpu(mat->row_count());
+
+  x_gpu.from_cpu(x);
+  y_gpu.from_cpu(y);
+
+  (*mat)(y_gpu, x_gpu);
+
+  y_gpu.to_cpu(y);
+  synchronize_gpu();
+
+  delete[] x;
+  delete[] y;
+
+  return 0;
+}
diff --git a/include/iterative-cuda.hpp b/include/iterative-cuda.hpp
@@ -40,11 +40,24 @@ SOFTWARE.
 
 namespace iterative_cuda
 {
+  class noncopyable
+  {
+   protected:
+      noncopyable() {}
+      ~noncopyable() {}
+   private:
+      noncopyable( const noncopyable& );
+      const noncopyable& operator=( const noncopyable& );
+  };
+
+
+
+
   template <typename ValueType, typename IndexType>
   class gpu_vector_pimpl;
 
   template <typename ValueType, typename IndexType=int>
-  class gpu_vector
+  class gpu_vector// : noncopyable
   {
     public:
       typedef IndexType index_type;
@@ -77,7 +90,7 @@ namespace iterative_cuda
 
 
   template <typename ValueType, typename IndexType=int>
-  class gpu_sparse_pkt_matrix
+  class gpu_sparse_pkt_matrix// : noncopyable
   {
     public:
       typedef IndexType index_type;
@@ -93,6 +106,7 @@ namespace iterative_cuda
       gpu_sparse_pkt_matrix(
           index_type row_count,
           index_type column_count,
+          index_type nonzero_count,
           const index_type *csr_row_pointers,
           const index_type *csr_column_indices,
           const value_type *csr_nonzeros);
@@ -105,11 +119,18 @@ namespace iterative_cuda
       void unpermute(vector_type &dest, vector_type const &src) const;
 
       void operator()(vector_type &dest, vector_type const &src) const;
+
+      static gpu_sparse_pkt_matrix *read_matrix_market_file(const char *fn);
   };
 
 
 
 
+  void synchronize_gpu();
+
+
+
+
   template <typename ValueType, typename IndexType>
   class diagonal_preconditioner_pimpl;
 
@@ -138,6 +159,7 @@ namespace iterative_cuda
 
 
 
+
   template <typename ValueType, typename IndexType, typename Operator, typename Preconditioner>
   void run_cg(
       const Operator &a,

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ set(BUILD_SHARED_LIBS ON)
 include_directories(${METIS_INCLUDE_DIR})
 
 cuda_add_library(iterativecuda
-  instantiation.cu
+  instantiation.cu spmv/mmio.c
   OPTIONS "-arch=${CUDA_ARCH}"
   )
 

diff --git a/src/gpu-sparse-matrix.hpp b/src/gpu-sparse-matrix.hpp
@@ -33,9 +33,11 @@ SOFTWARE.
 #include <iterative-cuda.hpp>
 #include <stdint.h>
 #include "helpers.hpp"
-#include "spmv/partition.h"
-#include "spmv/csr_to_pkt.h"
-#include "spmv/utils.h"
+#include "partition.h"
+#include "csr_to_pkt.h"
+#include "utils.h"
+#include "sparse_io.h"
+#include "kernels/spmv_pkt_device.cu.h"
 
 
 
@@ -60,12 +62,16 @@ namespace iterative_cuda
   gpu_sparse_pkt_matrix<VT, IT>::gpu_sparse_pkt_matrix(
       index_type row_count,
       index_type column_count,
+      index_type nonzero_count,
       const index_type *csr_row_pointers,
       const index_type *csr_column_indices,
       const value_type *csr_nonzeros)
   : pimpl(new gpu_sparse_pkt_matrix_pimpl<VT, IT>)
   {
     csr_matrix<index_type, value_type> csr_mat;
+    csr_mat.num_rows = row_count;
+    csr_mat.num_cols = column_count;
+    csr_mat.num_nonzeros = nonzero_count;
     csr_mat.Ap = const_cast<index_type *>(csr_row_pointers);
     csr_mat.Aj = const_cast<index_type *>(csr_column_indices);
     csr_mat.Ax = const_cast<value_type *>(csr_nonzeros);
@@ -77,6 +83,7 @@ namespace iterative_cuda
     index_type block_count = ICUDA_DIVIDE_INTO(row_count, rows_per_packet);
 
     std::vector<index_type> partition;
+    partition.resize(row_count);
     partition_csr(csr_mat, block_count, partition, /*Kway*/ true);
 
     pkt_matrix<index_type, value_type> host_matrix =
@@ -135,6 +142,38 @@ namespace iterative_cuda
     gather_device(dest.ptr(), src.ptr(), 
         pimpl->matrix.permute_new_to_old, row_count());
   }
+
+
+
+
+
+  template <typename VT, typename IT>
+  void gpu_sparse_pkt_matrix<VT, IT>::operator()(
+      vector_type &dest, vector_type const &src) const
+  {
+    spmv_pkt_device(pimpl->matrix, src.ptr(), dest.ptr());
+  }
+
+
+
+
+  template <class ValueType, class IndexType>
+  gpu_sparse_pkt_matrix<ValueType, IndexType> *
+  gpu_sparse_pkt_matrix<ValueType, IndexType>::read_matrix_market_file(
+      const char *fn)
+  {
+    csr_matrix<IndexType, ValueType> csr_mat =
+      read_csr_matrix<IndexType, ValueType>(fn);
+
+    typedef gpu_sparse_pkt_matrix<ValueType, IndexType> mat_tp;
+    std::auto_ptr<mat_tp> result(new mat_tp(
+          csr_mat.num_rows, csr_mat.num_cols, csr_mat.num_nonzeros,
+          csr_mat.Ap, csr_mat.Aj, csr_mat.Ax));
+
+    delete_csr_matrix(csr_mat, HOST_MEMORY);
+
+    return result.release();
+  }
 }
 
 

diff --git a/src/gpu-vector.hpp b/src/gpu-vector.hpp
@@ -107,6 +107,20 @@ namespace iterative_cuda
           size()*sizeof(value_type),
           cudaMemcpyDeviceToHost));
   }
+
+
+
+
+  template <typename VT, typename IT>
+  gpu_vector<VT, IT>::value_type *gpu_vector<VT, IT>::ptr()
+  { return pimpl->gpu_data; }
+
+
+
+
+  template <typename VT, typename IT>
+  const gpu_vector<VT, IT>::value_type *gpu_vector<VT, IT>::ptr() const
+  { return pimpl->gpu_data; }
 }
 
 

diff --git a/src/instantiation.cu b/src/instantiation.cu
@@ -41,3 +41,11 @@ template class gpu_vector<float>;
 template class gpu_vector<double>;
 template class gpu_sparse_pkt_matrix<float>;
 template class gpu_sparse_pkt_matrix<double>;
+
+
+
+
+void iterative_cuda::synchronize_gpu()
+{
+  cudaThreadSynchronize();
+}