Skip to content
This repository has been archived by the owner on Oct 19, 2020. It is now read-only.

Commit

Permalink
Fix compile with CUDA 3.0beta.
Browse files Browse the repository at this point in the history
  • Loading branch information
inducer committed Nov 23, 2009
1 parent 12ecb8c commit abc23a5
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
6 changes: 3 additions & 3 deletions src/spmv/kernels/spmv_coo_flat_device.cu.h
Expand Up @@ -66,7 +66,7 @@ spmv_coo_flat_kernel(const IndexType num_nonzeros,
else if(carry_idx[warp_lane] != first_idx)
y[carry_idx[warp_lane]] += carry_val[warp_lane]; // row terminated, does not span boundary
else
atomicAdd(y + carry_idx[warp_lane], carry_val[warp_lane]); // row terminated, but spans iter-warp boundary
myAtomicAdd(y + carry_idx[warp_lane], carry_val[warp_lane]); // row terminated, but spans iter-warp boundary
}

// segmented reduction in shared memory
Expand All @@ -84,14 +84,14 @@ spmv_coo_flat_kernel(const IndexType num_nonzeros,
if(idx[threadIdx.x] != first_idx)
y[idx[threadIdx.x]] += val[threadIdx.x]; // row terminated, does not span inter-warp boundary
else
atomicAdd(y + idx[threadIdx.x], val[threadIdx.x]); // row terminated, but spans iter-warp boundary
myAtomicAdd(y + idx[threadIdx.x], val[threadIdx.x]); // row terminated, but spans iter-warp boundary
}

}

// final carry
if(thread_lane == 31){
atomicAdd(y + carry_idx[warp_lane], carry_val[warp_lane]);
myAtomicAdd(y + carry_idx[warp_lane], carry_val[warp_lane]);
}
}

Expand Down
7 changes: 5 additions & 2 deletions src/spmv/utils.h
@@ -1,4 +1,6 @@
/* Copyright 2008 NVIDIA Corporation. All Rights Reserved */
#ifndef ITCUDA_SPMV_UTILS
#define ITCUDA_SPMV_UTILS

#pragma once

Expand Down Expand Up @@ -28,7 +30,7 @@
// dodgy. It just does a bit comparison rather than a true floating
// point comparison. Hence 0 != -0, for instance.
//
static __inline__ __device__ float atomicAdd(float *addr, float val)
static __inline__ __device__ float myAtomicAdd(float *addr, float val)
{
float old=*addr, assumed;

Expand All @@ -44,7 +46,7 @@ static __inline__ __device__ float atomicAdd(float *addr, float val)
#endif // !defined(CUDA_NO_SM_11_ATOMIC_INTRINSICS)

#if !defined(CUDA_NO_SM_13_DOUBLE_INTRINSICS)
static __inline__ __device__ double atomicAdd(double *addr, double val)
static __inline__ __device__ double myAtomicAdd(double *addr, double val)
{
double old=*addr, assumed;

Expand Down Expand Up @@ -208,3 +210,4 @@ void gather_device(ValueType * dest, const ValueType * src, const IndexType* map
// scatter_dev_kernel<IndexType,ValueType><<<grid,block>>>(dest,src,map,N);
//}

#endif

0 comments on commit abc23a5

Please sign in to comment.