Verified matrix multiply works.

inducer · Jul 30, 2009 · 3a71fa9 · 3a71fa9
1 parent 0a14dfd
commit 3a71fa9
Show file tree

Hide file tree

Showing 12 changed files with 322 additions and 78 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,4 @@ CMakeFiles/
 *.so
 Makefile
 cmake_install.cmake
+*.linkinfo
diff --git a/example/multiply_matrix b/example/multiply_matrix
diff --git a/example/multiply_matrix.cpp b/example/multiply_matrix.cpp
@@ -42,32 +42,54 @@ int main(int argc, char **argv)
     return 1;
   }
   typedef float entry_type;
-  typedef gpu_sparse_pkt_matrix<entry_type> mat_type;
-  std::auto_ptr<mat_type> mat(
-      mat_type::read_matrix_market_file(argv[1]));
+  typedef cpu_sparse_csr_matrix<entry_type> cpu_mat_type;
+  typedef gpu_sparse_pkt_matrix<entry_type> gpu_mat_type;
+  std::auto_ptr<cpu_mat_type> cpu_mat(
+      cpu_mat_type::read_matrix_market_file(argv[1]));
+
+  gpu_mat_type gpu_mat(*cpu_mat);
 
   // build host vectors
-  entry_type *x = new entry_type[mat->column_count()];
-  entry_type *y = new entry_type[mat->row_count()];
+  entry_type *x = new entry_type[gpu_mat.column_count()];
+  entry_type *y1 = new entry_type[gpu_mat.row_count()];
+  entry_type *y2 = new entry_type[gpu_mat.row_count()];
 
-  for (int i = 0; i < mat->column_count(); ++i)
+  for (int i = 0; i < gpu_mat.column_count(); ++i)
     x[i] = drand48();
-  for (int i = 0; i < mat->row_count(); ++i)
-    y[i] = 0;
+  for (int i = 0; i < gpu_mat.row_count(); ++i)
+  {
+    y1[i] = 0;
+    y2[i] = 0;
+  }
 
-  gpu_vector<entry_type> x_gpu(mat->column_count());
-  gpu_vector<entry_type> y_gpu(mat->row_count());
+  // do gpu matrix multiply
+  gpu_vector<entry_type> x_gpu(gpu_mat.column_count());
+  gpu_vector<entry_type> y_gpu(gpu_mat.row_count());
 
   x_gpu.from_cpu(x);
-  y_gpu.from_cpu(y);
+  y_gpu.from_cpu(y2);
 
-  (*mat)(y_gpu, x_gpu);
+  gpu_mat(y_gpu, x_gpu);
 
-  y_gpu.to_cpu(y);
+  y_gpu.to_cpu(y2);
   synchronize_gpu();
 
+  // compute error
+  (*cpu_mat)(y1, x);
+
+  entry_type error = 0;
+  entry_type norm = 0;
+
+  for (int i = 0; i < gpu_mat.row_count(); ++i)
+  {
+    error += (y1[i]-y2[i])*(y1[i]-y2[i]);
+    norm += x[i]*x[i];
+  }
+  std::cerr << error/norm << std::endl;
+
   delete[] x;
-  delete[] y;
+  delete[] y1;
+  delete[] y2;
 
   return 0;
 }
diff --git a/include/iterative-cuda.hpp b/include/iterative-cuda.hpp
@@ -83,33 +83,67 @@ namespace iterative_cuda
 
 
 
-  template <typename ValueType, typename IndexType>
-  class gpu_sparse_pkt_matrix_pimpl;
-
-
+  template <typename ValueType, typename IndexType=int>
+  class gpu_sparse_pkt_matrix;
 
+  template <typename ValueType, typename IndexType>
+  class cpu_sparse_csr_matrix_pimpl;
 
   template <typename ValueType, typename IndexType=int>
-  class gpu_sparse_pkt_matrix// : noncopyable
+  class cpu_sparse_csr_matrix
   {
     public:
       typedef IndexType index_type;
       typedef ValueType value_type;
-      typedef gpu_vector<value_type, index_type> vector_type;
 
     private:
       std::auto_ptr<
-        gpu_sparse_pkt_matrix_pimpl<value_type, index_type>
+        cpu_sparse_csr_matrix_pimpl<value_type, index_type>
         > pimpl;
 
     public:
-      gpu_sparse_pkt_matrix(
+      cpu_sparse_csr_matrix(
           index_type row_count,
           index_type column_count,
           index_type nonzero_count,
           const index_type *csr_row_pointers,
           const index_type *csr_column_indices,
           const value_type *csr_nonzeros);
+      ~cpu_sparse_csr_matrix();
+
+      index_type row_count() const;
+      index_type column_count() const;
+
+      void operator()(value_type *y, value_type const *x) const;
+      void extract_diagonal(value_type *d) const;
+
+      static cpu_sparse_csr_matrix *read_matrix_market_file(const char *fn);
+
+      friend class gpu_sparse_pkt_matrix<value_type, index_type>;
+  };
+
+
+
+
+  template <typename ValueType, typename IndexType>
+  class gpu_sparse_pkt_matrix_pimpl;
+
+  template <typename ValueType, typename IndexType>
+  class gpu_sparse_pkt_matrix// : noncopyable
+  {
+    public:
+      typedef IndexType index_type;
+      typedef ValueType value_type;
+      typedef gpu_vector<value_type, index_type> vector_type;
+
+    private:
+      std::auto_ptr<
+        gpu_sparse_pkt_matrix_pimpl<value_type, index_type>
+        > pimpl;
+
+    public:
+      gpu_sparse_pkt_matrix(
+          cpu_sparse_csr_matrix<value_type, index_type> const &csr_mat);
       ~gpu_sparse_pkt_matrix();
 
       index_type row_count() const;
@@ -119,8 +153,6 @@ namespace iterative_cuda
       void unpermute(vector_type &dest, vector_type const &src) const;
 
       void operator()(vector_type &dest, vector_type const &src) const;
-
-      static gpu_sparse_pkt_matrix *read_matrix_market_file(const char *fn);
   };
 
 

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ set(BUILD_SHARED_LIBS ON)
 include_directories(${METIS_INCLUDE_DIR})
 
 cuda_add_library(iterativecuda
-  instantiation.cu spmv/mmio.c
+  instantiation.cu spmv/mmio.c functions.cu
   OPTIONS "-arch=${CUDA_ARCH}"
   )
 

diff --git a/src/cpu-sparse-matrix.hpp b/src/cpu-sparse-matrix.hpp
@@ -0,0 +1,164 @@
+/*
+Iterative CUDA is licensed to you under the MIT/X Consortium license:
+
+Copyright (c) 2009 Andreas Kloeckner.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the Software), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+
+
+
+#ifndef _AAFADFJ_ITERATIVE_CUDA_CPU_SPARSE_MATRIX_HPP_SEEN
+#define _AAFADFJ_ITERATIVE_CUDA_CPU_SPARSE_MATRIX_HPP_SEEN
+
+
+
+
+#include <iterative-cuda.hpp>
+#include "sparse_io.h"
+
+
+
+
+namespace iterative_cuda
+{
+  template <typename ValueType, typename IndexType>
+  struct cpu_sparse_csr_matrix_pimpl
+  {
+    csr_matrix<IndexType, ValueType> matrix;
+  };
+
+
+
+
+  template <typename VT, typename IT>
+  cpu_sparse_csr_matrix<VT, IT>::cpu_sparse_csr_matrix(
+      index_type row_count,
+      index_type column_count,
+      index_type nonzero_count,
+      const index_type *csr_row_pointers,
+      const index_type *csr_column_indices,
+      const value_type *csr_nonzeros)
+  : pimpl(new cpu_sparse_csr_matrix_pimpl<VT, IT>)
+  {
+    pimpl->matrix.num_rows = row_count;
+    pimpl->matrix.num_cols = column_count;
+    pimpl->matrix.num_nonzeros = nonzero_count;
+    pimpl->matrix.Ap = const_cast<index_type *>(csr_row_pointers);
+    pimpl->matrix.Aj = const_cast<index_type *>(csr_column_indices);
+    pimpl->matrix.Ax = const_cast<value_type *>(csr_nonzeros);
+  }
+
+
+
+
+  template <typename VT, typename IT>
+  cpu_sparse_csr_matrix<VT, IT>::~cpu_sparse_csr_matrix()
+  {
+    delete_csr_matrix(pimpl->matrix, HOST_MEMORY);
+  }
+
+
+
+
+  template <typename VT, typename IT>
+  IT cpu_sparse_csr_matrix<VT, IT>::row_count() const
+  {
+    return pimpl->matrix.num_rows;
+  }
+
+
+
+
+  template <typename VT, typename IT>
+  IT cpu_sparse_csr_matrix<VT, IT>::column_count() const
+  {
+    return pimpl->matrix.num_cols;
+  }
+
+
+
+
+  template <typename VT, typename IT>
+  void cpu_sparse_csr_matrix<VT, IT>::operator()(
+      value_type *y, value_type const *x) const
+  {
+    csr_matrix<index_type, value_type> const &mat(pimpl->matrix);
+    for (index_type i = 0; i < mat.num_rows; ++i)
+    {
+      const index_type row_start = mat.Ap[i];
+      const index_type row_end = mat.Ap[i+1];
+
+      value_type sum = y[i];
+      for (index_type jj = row_start; jj < row_end; jj++) 
+      {
+        const index_type j = mat.Aj[jj];
+        sum += x[j] * mat.Ax[jj];
+      }
+      y[i] = sum;
+    }
+  }
+
+
+
+
+  template <typename VT, typename IT>
+  void cpu_sparse_csr_matrix<VT, IT>::extract_diagonal(value_type *d) const
+  {
+    csr_matrix<index_type, value_type> const &mat(pimpl->matrix);
+    for (index_type i = 0; i < mat.num_rows; ++i)
+    {
+      d[i] = 0;
+      const index_type row_start = mat.Ap[i];
+      const index_type row_end = mat.Ap[i+1];
+
+      for (index_type jj = row_start; jj < row_end; jj++) 
+      {
+        const index_type j = mat.Aj[jj];
+        if (i == j)
+          d[i] = mat.Ax[jj];
+      }
+    }
+  }
+
+
+
+
+  template <class ValueType, class IndexType>
+  cpu_sparse_csr_matrix<ValueType, IndexType> *
+  cpu_sparse_csr_matrix<ValueType, IndexType>::read_matrix_market_file(
+      const char *fn)
+  {
+    csr_matrix<IndexType, ValueType> csr_mat =
+      read_csr_matrix<IndexType, ValueType>(fn);
+
+    typedef cpu_sparse_csr_matrix<ValueType, IndexType> mat_tp;
+    std::auto_ptr<mat_tp> result(new mat_tp(
+          csr_mat.num_rows, csr_mat.num_cols, csr_mat.num_nonzeros,
+          csr_mat.Ap, csr_mat.Aj, csr_mat.Ax));
+
+    return result.release();
+  }
+}
+
+
+
+
+#endif
diff --git a/src/functions.cu b/src/functions.cu
@@ -0,0 +1,36 @@
+/*
+Iterative CUDA is licensed to you under the MIT/X Consortium license:
+
+Copyright (c) 2009 Andreas Kloeckner.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the Software), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+
+
+
+#include <iterative-cuda.hpp>
+
+
+
+
+void iterative_cuda::synchronize_gpu()
+{
+  cudaThreadSynchronize();
+}