Code up diagonal preconditioner.

inducer · Jul 30, 2009 · 9aff29c · 9aff29c
1 parent 0448466
commit 9aff29c
Show file tree

Hide file tree

Showing 10 changed files with 115 additions and 37 deletions.
diff --git a/include/iterative-cuda.hpp b/include/iterative-cuda.hpp
@@ -25,8 +25,8 @@ SOFTWARE.
 
 
 
-#ifndef _AAFADFJ_ITERATIVE_CUDA_HPP_SEEN
-#define _AAFADFJ_ITERATIVE_CUDA_HPP_SEEN
+#ifndef AAFADFJ_ITERATIVE_CUDA_HPP_SEEN
+#define AAFADFJ_ITERATIVE_CUDA_HPP_SEEN
 
 
 
@@ -171,42 +171,43 @@ namespace iterative_cuda
 
 
 
-  template <typename ValueType, typename IndexType>
+  template <typename GpuVector>
   class diagonal_preconditioner_pimpl;
 
 
 
 
   template <class GpuVector>
-  class diagonal_preconditioner
+  class diagonal_preconditioner : private noncopyable
   {
     public:
-      typedef GpuVector gpu_vector;
-      typedef typename gpu_vector::index_type index_type;
-      typedef typename gpu_vector::value_type value_type;
+      typedef GpuVector gpu_vector_type;
+      typedef typename gpu_vector_type::index_type index_type;
+      typedef typename gpu_vector_type::value_type value_type;
 
     private:
       std::auto_ptr<
-        diagonal_preconditioner_pimpl<value_type, index_type>
-        > m_pimpl;
+        diagonal_preconditioner_pimpl<gpu_vector_type>
+        > pimpl;
 
     public:
-      diagonal_preconditioner(gpu_vector const &vec);
+      // keeps a reference to vec
+      diagonal_preconditioner(gpu_vector_type const &vec);
 
-      void operator()(gpu_vector const &op, gpu_vector& result);
+      void operator()(gpu_vector_type &result, gpu_vector_type const &op);
   };
 
 
 
 
 
   template <typename ValueType, typename IndexType, typename Operator, typename Preconditioner>
-  void run_cg(
+  void gpu_cg(
       const Operator &a,
-      const Preconditioner &m_inv,
       gpu_vector<ValueType, IndexType> const &x,
       gpu_vector<ValueType, IndexType> const &b,
-      ValueType tol = 1e-8);
+      ValueType tol=1e-8,
+      const Preconditioner *m_inv=0);
 }
 
 

diff --git a/src/bitlog.hpp b/src/bitlog.hpp
@@ -30,8 +30,8 @@ SOFTWARE.
 
 
 
-#ifndef _AFJDFJSDFSD_ITERATIVE_CUDA_HEADER_SEEN_BITLOG_HPP
-#define _AFJDFJSDFSD_ITERATIVE_CUDA_HEADER_SEEN_BITLOG_HPP
+#ifndef AFJDFJSDFSD_ITERATIVE_CUDA_HEADER_SEEN_BITLOG_HPP
+#define AFJDFJSDFSD_ITERATIVE_CUDA_HEADER_SEEN_BITLOG_HPP
 
 
 

diff --git a/src/cpu-sparse-matrix.hpp b/src/cpu-sparse-matrix.hpp
@@ -25,8 +25,8 @@ SOFTWARE.
 
 
 
-#ifndef _AAFADFJ_ITERATIVE_CUDA_CPU_SPARSE_MATRIX_HPP_SEEN
-#define _AAFADFJ_ITERATIVE_CUDA_CPU_SPARSE_MATRIX_HPP_SEEN
+#ifndef AAFADFJ_ITERATIVE_CUDA_CPU_SPARSE_MATRIX_HPP_SEEN
+#define AAFADFJ_ITERATIVE_CUDA_CPU_SPARSE_MATRIX_HPP_SEEN
 
 
 

diff --git a/src/diag-preconditioner.hpp b/src/diag-preconditioner.hpp
@@ -0,0 +1,73 @@
+/*
+Iterative CUDA is licensed to you under the MIT/X Consortium license:
+
+Copyright (c) 2009 Andreas Kloeckner.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the Software), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+
+
+#ifndef AAFADFJ_ITERATIVE_CUDA_DIAG_PRECONDITIONER_HPP_SEEN
+#define AAFADFJ_ITERATIVE_CUDA_DIAG_PRECONDITIONER_HPP_SEEN
+
+
+
+
+#include <iterative-cuda.hpp>
+#include "elementwise.hpp"
+
+
+
+
+namespace iterative_cuda
+{
+  template <typename GpuVector>
+  struct diagonal_preconditioner_pimpl
+  {
+    GpuVector const *vec;
+  };
+
+
+
+
+
+  template <class GpuVector>
+  inline diagonal_preconditioner<GpuVector>::
+  diagonal_preconditioner(gpu_vector_type const &vec)
+    : pimpl(new diagonal_preconditioner_pimpl<GpuVector>)
+  {
+    pimpl->vec = &vec;
+  }
+
+
+
+
+  template <class GpuVector>
+  inline void diagonal_preconditioner<GpuVector>::operator()(
+      gpu_vector_type &result, gpu_vector_type const &op)
+  {
+    product(op, *pimpl->vec, result);
+  }
+}
+
+
+
+
+#endif
diff --git a/src/elementwise.hpp b/src/elementwise.hpp
@@ -25,8 +25,8 @@ SOFTWARE.
 
 
 
-#ifndef _AAFADFJ_ITERATIVE_CUDA_ELEMENTWISE_HPP_SEEN
-#define _AAFADFJ_ITERATIVE_CUDA_ELEMENTWISE_HPP_SEEN
+#ifndef AAFADFJ_ITERATIVE_CUDA_ELEMENTWISE_HPP_SEEN
+#define AAFADFJ_ITERATIVE_CUDA_ELEMENTWISE_HPP_SEEN
 
 
 
@@ -182,12 +182,12 @@ namespace iterative_cuda
   void product(
       gpu_vector<VT, IT> const &x,
       gpu_vector<VT, IT> const &y,
-      gpu_vector<VT, IT> const &z)
+      gpu_vector<VT, IT> &z)
   {
     dim3 grid, block;
     splay(x.size(), grid, block);
     product_kernel<VT><<<grid, block>>>(
-        x.ptr(), y.ptr(), z.ptr());
+        x.ptr(), y.ptr(), z.ptr(), x.size());
   }
 }
 

diff --git a/src/gpu-sparse-matrix.hpp b/src/gpu-sparse-matrix.hpp
@@ -25,8 +25,8 @@ SOFTWARE.
 
 
 
-#ifndef _AAFADFJ_ITERATIVE_CUDA_GPU_SPARSE_MATRIX_HPP_SEEN
-#define _AAFADFJ_ITERATIVE_CUDA_GPU_SPARSE_MATRIX_HPP_SEEN
+#ifndef AAFADFJ_ITERATIVE_CUDA_GPU_SPARSE_MATRIX_HPP_SEEN
+#define AAFADFJ_ITERATIVE_CUDA_GPU_SPARSE_MATRIX_HPP_SEEN
 
 
 
@@ -60,7 +60,7 @@ namespace iterative_cuda
 
 
   template <typename VT, typename IT>
-  gpu_sparse_pkt_matrix<VT, IT>::gpu_sparse_pkt_matrix(
+  inline gpu_sparse_pkt_matrix<VT, IT>::gpu_sparse_pkt_matrix(
       cpu_sparse_csr_matrix<VT, IT> const &csr_mat)
   : pimpl(new gpu_sparse_pkt_matrix_pimpl<VT, IT>)
   {
@@ -106,7 +106,7 @@ namespace iterative_cuda
 
 
   template <typename VT, typename IT>
-  gpu_sparse_pkt_matrix<VT, IT>::~gpu_sparse_pkt_matrix()
+  inline gpu_sparse_pkt_matrix<VT, IT>::~gpu_sparse_pkt_matrix()
   {
     delete_pkt_matrix(pimpl->matrix, DEVICE_MEMORY);
   }
@@ -115,7 +115,7 @@ namespace iterative_cuda
 
 
   template <typename VT, typename IT>
-  IT gpu_sparse_pkt_matrix<VT, IT>::row_count() const
+  inline IT gpu_sparse_pkt_matrix<VT, IT>::row_count() const
   {
     return pimpl->matrix.num_rows;
   }
@@ -124,7 +124,7 @@ namespace iterative_cuda
 
 
   template <typename VT, typename IT>
-  IT gpu_sparse_pkt_matrix<VT, IT>::column_count() const
+  inline IT gpu_sparse_pkt_matrix<VT, IT>::column_count() const
   {
     return pimpl->matrix.num_cols;
   }
@@ -133,7 +133,7 @@ namespace iterative_cuda
 
 
   template <typename VT, typename IT>
-  void gpu_sparse_pkt_matrix<VT, IT>::permute(
+  inline void gpu_sparse_pkt_matrix<VT, IT>::permute(
       vector_type &dest,
       vector_type const &src) const
   {
@@ -145,7 +145,7 @@ namespace iterative_cuda
 
 
   template <typename VT, typename IT>
-  void gpu_sparse_pkt_matrix<VT, IT>::unpermute(
+  inline void gpu_sparse_pkt_matrix<VT, IT>::unpermute(
       vector_type &dest,
       vector_type const &src) const
   {
@@ -158,7 +158,7 @@ namespace iterative_cuda
 
 
   template <typename VT, typename IT>
-  void gpu_sparse_pkt_matrix<VT, IT>::operator()(
+  inline void gpu_sparse_pkt_matrix<VT, IT>::operator()(
       vector_type &dest, vector_type const &src) const
   {
     spmv_pkt_device(pimpl->matrix, src.ptr(), dest.ptr());

diff --git a/src/helpers.hpp b/src/helpers.hpp
@@ -25,8 +25,8 @@ SOFTWARE.
 
 
 
-#ifndef _AAFADFJ_ITERATIVE_CUDA_HELPERS_HPP_SEEN
-#define _AAFADFJ_ITERATIVE_CUDA_HELPERS_HPP_SEEN
+#ifndef AAFADFJ_ITERATIVE_CUDA_HELPERS_HPP_SEEN
+#define AAFADFJ_ITERATIVE_CUDA_HELPERS_HPP_SEEN
 
 
 

diff --git a/src/instantiation.cu b/src/instantiation.cu
@@ -29,6 +29,7 @@ SOFTWARE.
 #include "gpu-vector.hpp"
 #include "cpu-sparse-matrix.hpp"
 #include "gpu-sparse-matrix.hpp"
+#include "diag-preconditioner.hpp"
 
 
 
@@ -46,3 +47,6 @@ template class cpu_sparse_csr_matrix<double>;
 
 template class gpu_sparse_pkt_matrix<float>;
 template class gpu_sparse_pkt_matrix<double>;
+
+template class diagonal_preconditioner<gpu_vector<float> >;
+template class diagonal_preconditioner<gpu_vector<double> >;
diff --git a/src/mempool.hpp b/src/mempool.hpp
@@ -30,8 +30,8 @@ SOFTWARE.
 
 
 
-#ifndef _AFJDFJSDFSD_ITERATIVE_CUDA_HEADER_SEEN_MEMPOOL_HPP
-#define _AFJDFJSDFSD_ITERATIVE_PYCUDA_HEADER_SEEN_MEMPOOL_HPP
+#ifndef AFJDFJSDFSD_ITERATIVE_CUDA_HEADER_SEEN_MEMPOOL_HPP
+#define AFJDFJSDFSD_ITERATIVE_PYCUDA_HEADER_SEEN_MEMPOOL_HPP
 
 
 

diff --git a/src/reduction.hpp b/src/reduction.hpp
@@ -27,8 +27,8 @@ Based on code by Mark Harris at Nvidia.
 
 
 
-#ifndef _AAFADFJ_ITERATIVE_CUDA_REDUCTION_HPP_SEEN
-#define _AAFADFJ_ITERATIVE_CUDA_REDUCTION_HPP_SEEN
+#ifndef AAFADFJ_ITERATIVE_CUDA_REDUCTION_HPP_SEEN
+#define AAFADFJ_ITERATIVE_CUDA_REDUCTION_HPP_SEEN