Skip to content
This repository has been archived by the owner on Oct 19, 2020. It is now read-only.

Commit

Permalink
More build tweakage.
Browse files Browse the repository at this point in the history
  • Loading branch information
inducer committed Jul 29, 2009
1 parent 1e14451 commit 9dfa212
Show file tree
Hide file tree
Showing 19 changed files with 1,161 additions and 50 deletions.
21 changes: 12 additions & 9 deletions CMake/FindMETIS.cmake
@@ -1,10 +1,11 @@
#
# Find the METIS includes and libraries
#
# ParMETIS is an MPI-based parallel library that implements a variety of algorithms for
# partitioning unstructured graphs, meshes, and for computing fill-reducing orderings of
# METIS is an library that implements a variety of algorithms for
# partitioning unstructured graphs, meshes, and for computing
# fill-reducing orderings of
# sparse matrices. It can be found at:
# http://www-users.cs.umn.edu/~karypis/metis/parmetis/index.html
# http://www-users.cs.umn.edu/~karypis/metis/
#
# METIS_INCLUDE_DIR - where to find autopack.h
# METIS_LIBRARIES - List of fully qualified libraries to link against.
Expand All @@ -15,16 +16,18 @@
FIND_PATH(METIS_INCLUDE_DIR metis.h
/usr/local/include
/usr/include
/usr/include/parmetis
/usr/include/metis
)

FIND_LIBRARY(METIS_LIBRARY metis
/usr/local/lib
/usr/lib
)

IF(METIS_INCLUDE_DIR)
IF(METIS_LIBRARY)
SET( METIS_FOUND "YES" )
ENDIF(METIS_LIBRARY)
ENDIF(METIS_INCLUDE_DIR)
IF(METIS_INCLUDE_DIR AND METIS_LIBRARY)
SET( METIS_FOUND "YES" )
ELSE()
IF(METIS_FIND_REQUIRED)
MESSAGE(FATAL_ERROR "METIS not found")
ENDIF()
ENDIF()
9 changes: 3 additions & 6 deletions CMakeLists.txt
Expand Up @@ -5,15 +5,12 @@ set(CMAKE_MODULE_PATH
"${CMAKE_SOURCE_DIR}/CMake/cuda"
${CMAKE_MODULE_PATH})

Project(iterative_cuda)
project(iterative_cuda)

find_package(CUDA QUIET REQUIRED)
find_package(METIS REQUIRED)

INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/include")
#FIND_LIBRARY(LAPACKPP_LIBRARY lapackpp PATHS /users/cpfrang/pool/lib)
#ADD_LIBRARY(MeinQuatsch ${LAPACKPP_LIBRARY})
#ADD_EXECUTABLE(MeinQuatsch tSpdSolve.cc )
#TARGET_LINK_LIBRARIES(MeinQuatsch ${LAPACKPP_LIBRARY})
include_directories("${CMAKE_SOURCE_DIR}/include")
include_directories("${CMAKE_SOURCE_DIR}/src/spmv/kernels")

subdirs( src)
16 changes: 8 additions & 8 deletions src/CMakeLists.txt
@@ -1,14 +1,14 @@
set(BUILD_SHARED_LIBS ON)
set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE ON)

CUDA_COMPILE(CUDA_FILES host_instantiation.cpp)
CUDA_COMPILE(CUDA_FILES instantiation.cu)
include_directories(${METIS_INCLUDE_DIR})
message("${METIS_INCLUDE_DIR} bitches")


ADD_LIBRARY(iterativecuda
${CUDA_FILES}
cuda_add_library(iterativecuda
host_instantiation.cpp
instantiation.cu
)

SET_TARGET_PROPERTIES(iterativecuda PROPERTIES LINKER_LANGUAGE CXX)
target_link_libraries(iterativecuda ${METIS_LIBRARY})
set_target_properties(iterativecuda PROPERTIES LINKER_LANGUAGE CXX)

CUDA_BUILD_CLEAN_TARGET()
cuda_build_clean_target()
28 changes: 15 additions & 13 deletions src/gpu-sparse-matrix.hpp
Expand Up @@ -25,22 +25,23 @@ SOFTWARE.



#ifndef _AAFADFJ_ITERATIVE_CUDA_GPU_VECTOR_HPP_SEEN
#define _AAFADFJ_ITERATIVE_CUDA_GPU_VECTOR_HPP_SEEN
#ifndef _AAFADFJ_ITERATIVE_CUDA_GPU_SPARSE_MATRIX_HPP_SEEN
#define _AAFADFJ_ITERATIVE_CUDA_GPU_SPARSE_MATRIX_HPP_SEEN



#include <iterative-cuda.hpp>
#include <stdint.h>
#include "helpers.hpp"
#include "partition.hpp"
#include "csr_to_pk.hpp"
#include "spmv/partition.h"
#include "spmv/csr_to_pkt.h"




namespace iterative_cuda
{
typedef std::uint32_t packed_index_type;
typedef uint32_t packed_index_type;



Expand All @@ -61,7 +62,7 @@ namespace iterative_cuda
IndexType *coo_i;
IndexType *coo_j;
ValueType *coo_v;
}
};



Expand All @@ -73,26 +74,27 @@ namespace iterative_cuda
const index_type *csr_row_pointers,
const index_type *csr_column_indices,
const value_type *csr_nonzeros)
: pimpl(new gpu_sparse_pkt_matrix_pimpl)
: pimpl(new gpu_sparse_pkt_matrix_pimpl<VT, IT>)
{
csr_matrix<index_type, value_type> csr_mat;
csr_mat.Ap = csr_row_pointers;
csr_mat.Aj = csr_column_indices;
csr_mat.Ax = csr_nonzeros;
csr_mat.Ap = const_cast<index_type *>(csr_row_pointers);
csr_mat.Aj = const_cast<index_type *>(csr_column_indices);
csr_mat.Ax = const_cast<value_type *>(csr_nonzeros);

index_type rows_per_packet =
(SHARED_MEM_AMOUNT - 100)
(SHARED_MEM_BYTES - 100)
/ (2*sizeof(value_type));

index_type block_count = divide_into(h, rows_per_packet);
index_type block_count = ICUDA_DIVIDE_INTO(row_count, rows_per_packet);

std::vector<IndexType> partition;
std::vector<index_type> partition;
partition_csr(csr_mat, block_count, partition, /*Kway*/ true);

pkt_matrix<index_type, value_type> pkt =
csr_to_pkt(csr_mat, partition.data());



}
}

Expand Down
12 changes: 6 additions & 6 deletions src/gpu-vector.hpp
Expand Up @@ -52,7 +52,7 @@ namespace iterative_cuda
gpu_vector<VT, IT>::gpu_vector(index_type size)
: pimpl(new gpu_vector_pimpl<VT, IT>)
{
CUDA_CHK(cudaMalloc, ((void **) &pimpl->gpu_data, size*sizeof(value_type)));
ICUDA_CHK(cudaMalloc, ((void **) &pimpl->gpu_data, size*sizeof(value_type)));
pimpl->size = size;
}

Expand All @@ -63,9 +63,9 @@ namespace iterative_cuda
gpu_vector<VT, IT>::gpu_vector(gpu_vector const &src)
: pimpl(new gpu_vector_pimpl<VT, IT>)
{
CUDA_CHK(cudaMalloc, ((void **) &pimpl->gpu_data, src.size()*sizeof(value_type)));
ICUDA_CHK(cudaMalloc, ((void **) &pimpl->gpu_data, src.size()*sizeof(value_type)));
pimpl->size = src.size();
CUDA_CHK(cudaMemcpy, (pimpl->gpu_data, src.pimpl->gpu_data,
ICUDA_CHK(cudaMemcpy, (pimpl->gpu_data, src.pimpl->gpu_data,
src.size()*sizeof(value_type),
cudaMemcpyDeviceToDevice));
}
Expand All @@ -76,7 +76,7 @@ namespace iterative_cuda
template <typename VT, typename IT>
gpu_vector<VT, IT>::~gpu_vector()
{
CUDA_CHK(cudaFree, (pimpl->gpu_data));
ICUDA_CHK(cudaFree, (pimpl->gpu_data));
}


Expand All @@ -92,7 +92,7 @@ namespace iterative_cuda
template <typename VT, typename IT>
void gpu_vector<VT, IT>::from_cpu(value_type *cpu)
{
CUDA_CHK(cudaMemcpy, (pimpl->gpu_data, cpu,
ICUDA_CHK(cudaMemcpy, (pimpl->gpu_data, cpu,
size()*sizeof(value_type),
cudaMemcpyHostToDevice));
}
Expand All @@ -103,7 +103,7 @@ namespace iterative_cuda
template <typename VT, typename IT>
void gpu_vector<VT, IT>::to_cpu(value_type *cpu)
{
CUDA_CHK(cudaMemcpy, (cpu, pimpl->gpu_data,
ICUDA_CHK(cudaMemcpy, (cpu, pimpl->gpu_data,
size()*sizeof(value_type),
cudaMemcpyDeviceToHost));
}
Expand Down
13 changes: 11 additions & 2 deletions src/helpers.hpp
Expand Up @@ -33,12 +33,21 @@ SOFTWARE.

#include <cstdio>
#include <cstdlib>
#include <cudart.h>
#include <cuda_runtime.h>




#define CUDA_CHK(NAME, ARGS) { \
#define SHARED_MEM_BYTES 16384




#define ICUDA_DIVIDE_INTO(x,y) ((x + y - 1)/y)



#define ICUDA_CHK(NAME, ARGS) { \
cudaError_t cuda_err_code = NAME ARGS; \
if (cuda_err_code != cudaSuccess) { \
printf("%s failed with code %d\n", #NAME, cuda_err_code); \
Expand Down
9 changes: 4 additions & 5 deletions src/host_instantiation.cpp
Expand Up @@ -37,8 +37,7 @@ using namespace iterative_cuda;



template class gpu_vector<int, float>;
template class gpu_vector<int, double>;
template class gpu_sparse_pkt_matrix<int, float>;
template class gpu_sparse_pkt_matrix<int, double>;

template class gpu_vector<float>;
template class gpu_vector<double>;
template class gpu_sparse_pkt_matrix<float>;
template class gpu_sparse_pkt_matrix<double>;
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit 9dfa212

Please sign in to comment.