More build tweakage.

inducer · Jul 29, 2009 · 9dfa212 · 9dfa212
1 parent 1e14451
commit 9dfa212
Show file tree

Hide file tree

Showing 19 changed files with 1,161 additions and 50 deletions.
diff --git a/CMake/FindMETIS.cmake b/CMake/FindMETIS.cmake
@@ -1,10 +1,11 @@
 #
 # Find the METIS includes and libraries
 #
-# ParMETIS is an MPI-based parallel library that implements a variety of algorithms for
-# partitioning unstructured graphs, meshes, and for computing fill-reducing orderings of
+# METIS is an library that implements a variety of algorithms for
+# partitioning unstructured graphs, meshes, and for computing 
+# fill-reducing orderings of
 # sparse matrices. It can be found at:
-# 	http://www-users.cs.umn.edu/~karypis/metis/parmetis/index.html
+# 	http://www-users.cs.umn.edu/~karypis/metis/
 #
 # METIS_INCLUDE_DIR - where to find autopack.h
 # METIS_LIBRARIES   - List of fully qualified libraries to link against.
@@ -15,16 +16,18 @@
 FIND_PATH(METIS_INCLUDE_DIR metis.h
   /usr/local/include
   /usr/include
-  /usr/include/parmetis
+  /usr/include/metis
   )
 
 FIND_LIBRARY(METIS_LIBRARY metis
   /usr/local/lib
   /usr/lib
   )
 
-IF(METIS_INCLUDE_DIR)
-  IF(METIS_LIBRARY)
-    SET( METIS_FOUND "YES" )
-  ENDIF(METIS_LIBRARY)
-ENDIF(METIS_INCLUDE_DIR)
+IF(METIS_INCLUDE_DIR AND METIS_LIBRARY)
+  SET( METIS_FOUND "YES" )
+ELSE()
+  IF(METIS_FIND_REQUIRED)
+    MESSAGE(FATAL_ERROR "METIS not found")
+  ENDIF()
+ENDIF()
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -5,15 +5,12 @@ set(CMAKE_MODULE_PATH
   "${CMAKE_SOURCE_DIR}/CMake/cuda" 
   ${CMAKE_MODULE_PATH})
 
-Project(iterative_cuda)
+project(iterative_cuda)
 
 find_package(CUDA QUIET REQUIRED)
 find_package(METIS REQUIRED)
 
-INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/include")
-#FIND_LIBRARY(LAPACKPP_LIBRARY lapackpp PATHS /users/cpfrang/pool/lib)
-#ADD_LIBRARY(MeinQuatsch ${LAPACKPP_LIBRARY})
-#ADD_EXECUTABLE(MeinQuatsch tSpdSolve.cc )
-#TARGET_LINK_LIBRARIES(MeinQuatsch ${LAPACKPP_LIBRARY})
+include_directories("${CMAKE_SOURCE_DIR}/include")
+include_directories("${CMAKE_SOURCE_DIR}/src/spmv/kernels")
 
 subdirs( src)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -1,14 +1,14 @@
 set(BUILD_SHARED_LIBS ON)
-set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE ON)
 
-CUDA_COMPILE(CUDA_FILES host_instantiation.cpp)
-CUDA_COMPILE(CUDA_FILES instantiation.cu)
+include_directories(${METIS_INCLUDE_DIR})
+message("${METIS_INCLUDE_DIR} bitches")
 
-
-ADD_LIBRARY(iterativecuda
-  ${CUDA_FILES}
+cuda_add_library(iterativecuda
+  host_instantiation.cpp 
+  instantiation.cu
   )
 
-SET_TARGET_PROPERTIES(iterativecuda PROPERTIES LINKER_LANGUAGE CXX)
+target_link_libraries(iterativecuda ${METIS_LIBRARY})
+set_target_properties(iterativecuda PROPERTIES LINKER_LANGUAGE CXX)
 
-CUDA_BUILD_CLEAN_TARGET()
+cuda_build_clean_target()
diff --git a/src/gpu-sparse-matrix.hpp b/src/gpu-sparse-matrix.hpp
@@ -25,22 +25,23 @@ SOFTWARE.
 
 
 
-#ifndef _AAFADFJ_ITERATIVE_CUDA_GPU_VECTOR_HPP_SEEN
-#define _AAFADFJ_ITERATIVE_CUDA_GPU_VECTOR_HPP_SEEN
+#ifndef _AAFADFJ_ITERATIVE_CUDA_GPU_SPARSE_MATRIX_HPP_SEEN
+#define _AAFADFJ_ITERATIVE_CUDA_GPU_SPARSE_MATRIX_HPP_SEEN
 
 
 
 #include <iterative-cuda.hpp>
+#include <stdint.h>
 #include "helpers.hpp"
-#include "partition.hpp"
-#include "csr_to_pk.hpp"
+#include "spmv/partition.h"
+#include "spmv/csr_to_pkt.h"
 
 
 
 
 namespace iterative_cuda
 {
-  typedef std::uint32_t packed_index_type;
+  typedef uint32_t packed_index_type;
 
 
 
@@ -61,7 +62,7 @@ namespace iterative_cuda
     IndexType *coo_i;
     IndexType *coo_j;
     ValueType *coo_v;
-  }
+  };
 
 
 
@@ -73,26 +74,27 @@ namespace iterative_cuda
       const index_type *csr_row_pointers,
       const index_type *csr_column_indices,
       const value_type *csr_nonzeros)
-  : pimpl(new gpu_sparse_pkt_matrix_pimpl)
+  : pimpl(new gpu_sparse_pkt_matrix_pimpl<VT, IT>)
   {
     csr_matrix<index_type, value_type> csr_mat;
-    csr_mat.Ap = csr_row_pointers;
-    csr_mat.Aj = csr_column_indices;
-    csr_mat.Ax = csr_nonzeros;
+    csr_mat.Ap = const_cast<index_type *>(csr_row_pointers);
+    csr_mat.Aj = const_cast<index_type *>(csr_column_indices);
+    csr_mat.Ax = const_cast<value_type *>(csr_nonzeros);
 
     index_type rows_per_packet = 
-      (SHARED_MEM_AMOUNT - 100)
+      (SHARED_MEM_BYTES - 100)
       / (2*sizeof(value_type));
 
-    index_type block_count = divide_into(h, rows_per_packet);
+    index_type block_count = ICUDA_DIVIDE_INTO(row_count, rows_per_packet);
 
-    std::vector<IndexType> partition;
+    std::vector<index_type> partition;
     partition_csr(csr_mat, block_count, partition, /*Kway*/ true);
 
     pkt_matrix<index_type, value_type> pkt =
       csr_to_pkt(csr_mat, partition.data());
 
 
+
   }
 }
 

diff --git a/src/gpu-vector.hpp b/src/gpu-vector.hpp
@@ -52,7 +52,7 @@ namespace iterative_cuda
   gpu_vector<VT, IT>::gpu_vector(index_type size)
   : pimpl(new gpu_vector_pimpl<VT, IT>)
   {
-    CUDA_CHK(cudaMalloc, ((void **) &pimpl->gpu_data, size*sizeof(value_type)));
+    ICUDA_CHK(cudaMalloc, ((void **) &pimpl->gpu_data, size*sizeof(value_type)));
     pimpl->size = size;
   }
 
@@ -63,9 +63,9 @@ namespace iterative_cuda
   gpu_vector<VT, IT>::gpu_vector(gpu_vector const &src)
   : pimpl(new gpu_vector_pimpl<VT, IT>)
   {
-    CUDA_CHK(cudaMalloc, ((void **) &pimpl->gpu_data, src.size()*sizeof(value_type)));
+    ICUDA_CHK(cudaMalloc, ((void **) &pimpl->gpu_data, src.size()*sizeof(value_type)));
     pimpl->size = src.size();
-    CUDA_CHK(cudaMemcpy, (pimpl->gpu_data, src.pimpl->gpu_data, 
+    ICUDA_CHK(cudaMemcpy, (pimpl->gpu_data, src.pimpl->gpu_data, 
           src.size()*sizeof(value_type),
           cudaMemcpyDeviceToDevice));
   }
@@ -76,7 +76,7 @@ namespace iterative_cuda
   template <typename VT, typename IT>
   gpu_vector<VT, IT>::~gpu_vector()
   {
-    CUDA_CHK(cudaFree, (pimpl->gpu_data));
+    ICUDA_CHK(cudaFree, (pimpl->gpu_data));
   }
 
 
@@ -92,7 +92,7 @@ namespace iterative_cuda
   template <typename VT, typename IT>
   void gpu_vector<VT, IT>::from_cpu(value_type *cpu)
   {
-    CUDA_CHK(cudaMemcpy, (pimpl->gpu_data, cpu, 
+    ICUDA_CHK(cudaMemcpy, (pimpl->gpu_data, cpu, 
           size()*sizeof(value_type),
           cudaMemcpyHostToDevice));
   }
@@ -103,7 +103,7 @@ namespace iterative_cuda
   template <typename VT, typename IT>
   void gpu_vector<VT, IT>::to_cpu(value_type *cpu)
   {
-    CUDA_CHK(cudaMemcpy, (cpu, pimpl->gpu_data,
+    ICUDA_CHK(cudaMemcpy, (cpu, pimpl->gpu_data,
           size()*sizeof(value_type),
           cudaMemcpyDeviceToHost));
   }

diff --git a/src/helpers.hpp b/src/helpers.hpp
@@ -33,12 +33,21 @@ SOFTWARE.
 
 #include <cstdio>
 #include <cstdlib>
-#include <cudart.h>
+#include <cuda_runtime.h>
 
 
 
 
-#define CUDA_CHK(NAME, ARGS) { \
+#define SHARED_MEM_BYTES 16384
+
+
+
+
+#define ICUDA_DIVIDE_INTO(x,y) ((x + y - 1)/y)
+
+
+
+#define ICUDA_CHK(NAME, ARGS) { \
   cudaError_t cuda_err_code = NAME ARGS; \
   if (cuda_err_code != cudaSuccess) { \
     printf("%s failed with code %d\n", #NAME, cuda_err_code); \

diff --git a/src/host_instantiation.cpp b/src/host_instantiation.cpp
@@ -37,8 +37,7 @@ using namespace iterative_cuda;
 
 
 
-template class gpu_vector<int, float>;
-template class gpu_vector<int, double>;
-template class gpu_sparse_pkt_matrix<int, float>;
-template class gpu_sparse_pkt_matrix<int, double>;
-
+template class gpu_vector<float>;
+template class gpu_vector<double>;
+template class gpu_sparse_pkt_matrix<float>;
+template class gpu_sparse_pkt_matrix<double>;
diff --git a/src/array_utils.h → src/spmv/array_utils.h b/src/array_utils.h → src/spmv/array_utils.h
diff --git a/src/csr_to_pkt.h → src/spmv/csr_to_pkt.h b/src/csr_to_pkt.h → src/spmv/csr_to_pkt.h
diff --git a/src/spmv_common_device.cu.h → src/spmv/kernels/spmv_common_device.cu.h b/src/spmv_common_device.cu.h → src/spmv/kernels/spmv_common_device.cu.h
diff --git a/src/spmv_coo_flat_device.cu.h → src/spmv/kernels/spmv_coo_flat_device.cu.h b/src/spmv_coo_flat_device.cu.h → src/spmv/kernels/spmv_coo_flat_device.cu.h
diff --git a/src/spmv_coo_serial_device.cu.h → src/spmv/kernels/spmv_coo_serial_device.cu.h b/src/spmv_coo_serial_device.cu.h → src/spmv/kernels/spmv_coo_serial_device.cu.h
diff --git a/src/spmv_pkt_device.cu.h → src/spmv/kernels/spmv_pkt_device.cu.h b/src/spmv_pkt_device.cu.h → src/spmv/kernels/spmv_pkt_device.cu.h