Skip to content
This repository has been archived by the owner on Oct 19, 2020. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
Add memory pools and elementwise functionality.
  • Loading branch information
Andreas Kloeckner committed Jul 30, 2009
1 parent 57eff9f commit 0448466
Show file tree
Hide file tree
Showing 8 changed files with 739 additions and 21 deletions.
5 changes: 5 additions & 0 deletions include/iterative-cuda.hpp
Expand Up @@ -80,6 +80,11 @@ namespace iterative_cuda
value_type *ptr();
const value_type *ptr() const;

void set_to_linear_combination(
value_type a,
gpu_vector const &x,
value_type b,
gpu_vector const &y);
gpu_vector *dot(gpu_vector const &b) const;
};

Expand Down
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Expand Up @@ -3,7 +3,7 @@ set(BUILD_SHARED_LIBS ON)
include_directories(${METIS_INCLUDE_DIR})

cuda_add_library(iterativecuda
instantiation.cu spmv/mmio.c functions.cu
instantiation.cu spmv/mmio.c functions.cu bitlog.cpp
OPTIONS "-arch=${CUDA_ARCH}"
)

Expand Down
54 changes: 54 additions & 0 deletions src/bitlog.cpp
@@ -0,0 +1,54 @@
/*
Iterative CUDA is licensed to you under the MIT/X Consortium license:
Copyright (c) 2009 Andreas Kloeckner.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the Software), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/




#include "bitlog.hpp"




/* from http://graphics.stanford.edu/~seander/bithacks.html */
const char iterative_cuda::log_table_8[] =
{
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
};


80 changes: 80 additions & 0 deletions src/bitlog.hpp
@@ -0,0 +1,80 @@
/*
Iterative CUDA is licensed to you under the MIT/X Consortium license:
Copyright (c) 2009 Andreas Kloeckner.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the Software), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/




// Base-2 logarithm bithack.




#ifndef _AFJDFJSDFSD_ITERATIVE_CUDA_HEADER_SEEN_BITLOG_HPP
#define _AFJDFJSDFSD_ITERATIVE_CUDA_HEADER_SEEN_BITLOG_HPP




#include <climits>
#include <stdint.h>




namespace iterative_cuda
{
extern const char log_table_8[];

inline unsigned bitlog2_16(uint16_t v)
{
if (unsigned long t = v >> 8)
return 8+log_table_8[t];
else
return log_table_8[v];
}

inline unsigned bitlog2_32(uint32_t v)
{
if (uint16_t t = v >> 16)
return 16+bitlog2_16(t);
else
return bitlog2_16(v);
}

inline unsigned bitlog2(unsigned long v)
{
#if (ULONG_MAX != 4294967295)
if (uint32_t t = v >> 32)
return 32+bitlog2_32(t);
else
#endif
return bitlog2_32(v);
}
}





#endif
197 changes: 197 additions & 0 deletions src/elementwise.hpp
@@ -0,0 +1,197 @@
/*
Iterative CUDA is licensed to you under the MIT/X Consortium license:
Copyright (c) 2009 Andreas Kloeckner.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the Software), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/




#ifndef _AAFADFJ_ITERATIVE_CUDA_ELEMENTWISE_HPP_SEEN
#define _AAFADFJ_ITERATIVE_CUDA_ELEMENTWISE_HPP_SEEN




#include "helpers.hpp"




namespace iterative_cuda
{
static bool dev_props_loaded = false;
static cudaDeviceProp dev_props;

static inline void fill_dev_props()
{
if (dev_props_loaded)
return;

int my_dev;
ICUDA_CHK(cudaGetDevice, (&my_dev));

ICUDA_CHK(cudaGetDeviceProperties, (&dev_props, my_dev));
}




inline void splay(unsigned n, dim3 &grid, dim3 &block)
{
fill_dev_props();

unsigned min_threads = dev_props.warpSize;
unsigned max_threads = 128;
unsigned max_blocks = 4 * 8 * dev_props.multiProcessorCount;

unsigned block_count, threads_per_block;
if (n < min_threads)
{
block_count = 1;
threads_per_block = min_threads;
}
else if (n < (max_blocks * min_threads))
{
block_count = (n + min_threads - 1) / min_threads;
threads_per_block = min_threads;
}
else if ( n < (max_blocks * max_threads))
{
block_count = max_blocks;
unsigned grp = (n + min_threads - 1) / min_threads;
threads_per_block = ((grp + max_blocks -1) / max_blocks) * min_threads;
}
else
{
block_count = max_blocks;
threads_per_block = max_threads;
}

grid = block_count;
block = threads_per_block;
}




template <class ValueType>
__global__ void lc2_kernel(
ValueType a, ValueType const *x,
ValueType b, ValueType const *y,
ValueType *z, unsigned n)
{
unsigned tid = threadIdx.x;
unsigned total_threads = gridDim.x*blockDim.x;
unsigned cta_start = blockDim.x*blockIdx.x;
unsigned i;

for (i = cta_start + tid; i < n; i += total_threads)
z[i] = a*x[i] + b*y[i];
}




template <class ValueType>
__global__ void lc2p_kernel(
ValueType const *a_ptr, ValueType const *x,
ValueType const *b_ptr, ValueType const *y,
ValueType *z, unsigned n)
{
unsigned tid = threadIdx.x;
unsigned total_threads = gridDim.x*blockDim.x;
unsigned cta_start = blockDim.x*blockIdx.x;
unsigned i;

ValueType a = *a_ptr;
ValueType b = *b_ptr;

for (i = cta_start + tid; i < n; i += total_threads)
z[i] = a*x[i] + b*y[i];
}




template <class VT, class IT>
void lc2(
VT a, gpu_vector<VT, IT> const &x,
VT b, gpu_vector<VT, IT> const &y,
gpu_vector<VT, IT> &z)
{
dim3 grid, block;
splay(x.size(), grid, block);
lc2_kernel<VT><<<grid, block>>>(
a, x.ptr(), b, y.ptr(), z.ptr(), x.size());
}




template <class VT, class IT>
void lc2(
gpu_vector<VT, IT> const &a, gpu_vector<VT, IT> const &x,
gpu_vector<VT, IT> const &b, gpu_vector<VT, IT> const &y,
gpu_vector<VT, IT> &z)
{
dim3 grid, block;
splay(x.size(), grid, block);
lc2p_kernel<VT><<<grid, block>>>(
a.ptr(), x.ptr(), b.ptr(), y.ptr(), z.ptr(), x.size());
}




template <class ValueType>
__global__ void product_kernel(
ValueType const *x, ValueType const *y, ValueType *z, unsigned n)
{
unsigned tid = threadIdx.x;
unsigned total_threads = gridDim.x*blockDim.x;
unsigned cta_start = blockDim.x*blockIdx.x;
unsigned i;

for (i = cta_start + tid; i < n; i += total_threads)
z[i] = x[i]*y[i];
}




template <class VT, class IT>
void product(
gpu_vector<VT, IT> const &x,
gpu_vector<VT, IT> const &y,
gpu_vector<VT, IT> const &z)
{
dim3 grid, block;
splay(x.size(), grid, block);
product_kernel<VT><<<grid, block>>>(
x.ptr(), y.ptr(), z.ptr());
}
}




#endif
11 changes: 10 additions & 1 deletion src/functions.cu
Expand Up @@ -26,11 +26,20 @@ SOFTWARE.


#include <iterative-cuda.hpp>
#include "helpers.hpp"
#include "gpu-vector.hpp"




static iterative_cuda::cuda_mempool mempool;

iterative_cuda::cuda_mempool &iterative_cuda::get_cuda_mempool()
{
return mempool;
}

void iterative_cuda::synchronize_gpu()
{
cudaThreadSynchronize();
ICUDA_CHK(cudaThreadSynchronize, ());
}

0 comments on commit 0448466

Please sign in to comment.