Skip to content

Commit e8a003d

Browse files
committed
bug fix for wrong usage of checkGPU && port to windows with msvc
1 parent afd74e7 commit e8a003d

11 files changed

+23
-12
lines changed

lib/THC/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,11 @@ ENDIF()
116116
INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}")
117117
CONFIGURE_FILE(THCGeneral.h.in "${CMAKE_CURRENT_BINARY_DIR}/THCGeneral.h")
118118

119-
SET(CMAKE_C_FLAGS "-std=c99 ${CMAKE_C_FLAGS}")
119+
IF(MSVC)
120+
LIST(APPEND CUDA_NVCC_FLAGS "-Xcompiler /wd4819")
121+
ELSE()
122+
SET(CMAKE_C_FLAGS "-std=c99 ${CMAKE_C_FLAGS}")
123+
ENDIF()
120124

121125
SET(src
122126
THCAllocator.c

lib/THC/THCHalf.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020

2121
THC_EXTERNC void THCFloat2Half(THCState *state, half *out, float *in, long len);
2222
THC_EXTERNC void THCHalf2Float(THCState *state, float *out, half *in, long len);
23-
THC_EXTERNC half THC_float2half(float a);
24-
THC_EXTERNC float THC_half2float(half a);
23+
THC_API half THC_float2half(float a);
24+
THC_API float THC_half2float(half a);
2525

2626
/* Check for native fp16 support on the current device (CC 5.3+) */
2727
THC_EXTERNC int THC_nativeHalfInstructions(THCState *state);

lib/THC/THCReduceApplyUtils.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#include <stdlib.h>
55

66
// Maximum size per grid dimension that we assume (compute capability >= 2.0)
7-
#define MAX_GRID_SIZE 65535L
7+
#define MAX_GRID_SIZE 65535LL
88

99
void THCCheckTensorDims(THCState* state, THCudaTensor* tensor, int arg) {
1010
long dims = THCudaTensor_nDimension(state, tensor);
@@ -21,11 +21,11 @@ bool THC_getGridFromTiles(long gridTiles, dim3& grid) {
2121
long gridZ = 1;
2222

2323
if (gridTiles > MAX_GRID_SIZE) {
24-
gridTiles = THCCeilDiv(gridTiles, MAX_GRID_SIZE);
24+
gridTiles = THCCeilDiv(gridTiles, (long) MAX_GRID_SIZE);
2525
gridY = gridTiles > MAX_GRID_SIZE ? MAX_GRID_SIZE : gridTiles;
2626

2727
if (gridTiles > MAX_GRID_SIZE) {
28-
gridTiles = THCCeilDiv(gridTiles, MAX_GRID_SIZE);
28+
gridTiles = THCCeilDiv(gridTiles, (long) MAX_GRID_SIZE);
2929
gridZ = gridTiles > MAX_GRID_SIZE ? MAX_GRID_SIZE : gridTiles;
3030
}
3131
}

lib/THC/THCTensorMath2.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ __host__ void THCudaTensor_varOuterDim(THCState *state, THCudaTensor *tgt, THCud
270270
unsigned ndim = THCudaTensor_nDimension(state, src);
271271
// Treat all outer dimensions (i.e. dim < dimension) as one.
272272
unsigned num_orows = 1;
273-
for (unsigned dim = 0; dim < dimension; dim++) {
273+
for (long dim = 0; dim < dimension; dim++) {
274274
num_orows *= THCudaTensor_size(state, src, dim);
275275
}
276276
unsigned row_size = THCudaTensor_size(state, src, dimension);

lib/THC/THCTensorMathReduce.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ THC_transformReduceOuterDimIndex(THCState *state,
148148
BinaryFunction binary_op) {
149149
unsigned ndim = TensorUtils<TensorTypeK>::getDims(state, src);
150150
unsigned num_orows = 1;
151-
for (unsigned dim = 0; dim < rdim; dim++) {
151+
for (long dim = 0; dim < rdim; dim++) {
152152
num_orows *= TensorUtils<TensorTypeK>::getSize(state, src, dim);
153153
}
154154
unsigned row_size = TensorUtils<TensorTypeK>::getSize(state, src, rdim);

lib/THC/THCTensorMathScan.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ __host__ void THCudaTensor_scanOuterDim(THCState *state, THCudaTensor *tgt, THCu
4747
unsigned ndim = THCudaTensor_nDimension(state, src);
4848
// Treat all outer dimensions (i.e. dim < dimension) as one.
4949
unsigned num_orows = 1;
50-
for (unsigned dim = 0; dim < dimension; dim++) {
50+
for (long dim = 0; dim < dimension; dim++) {
5151
num_orows *= THCudaTensor_size(state, src, dim);
5252
}
5353
unsigned row_size = THCudaTensor_size(state, src, dimension);

lib/THC/THCTensorSort.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ unsigned long nextHighestPowerOf2(unsigned long n) {
88
n |= n >> 4;
99
n |= n >> 8;
1010
n |= n >> 16;
11+
#ifndef _MSC_VER
1112
n |= n >> 32;
13+
#endif
1214
n++;
1315

1416
return n;

lib/THC/THCThreadLocal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define THC_THREAD_LOCAL_INC
33

44
#ifdef _WIN32
5+
#include <windows.h>
56
typedef DWORD THCThreadLocal;
67
#else
78
#include <pthread.h>

lib/THC/cmake/select_compute_arch.cmake

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ function(CUDA_DETECT_INSTALLED_GPUS OUT_VARIABLE)
6969
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
7070

7171
if(nvcc_res EQUAL 0)
72+
# only keep the last line of nvcc_out
73+
STRING(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}")
74+
STRING(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}")
75+
list(GET nvcc_out -1 nvcc_out)
7276
string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}")
7377
set(CUDA_GPU_DETECT_OUTPUT ${nvcc_out} CACHE INTERNAL "Returned GPU architetures from detect_gpus tool" FORCE)
7478
endif()

lib/THC/generic/THCTensorIndex.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ void THCTensor_(indexCopy_long)(THCState *state, THCTensor *dst, int dim, THLong
1616

1717
void THCTensor_(indexCopy)(THCState *state, THCTensor *dst, int dim, THCudaLongTensor *indices, THCTensor *src)
1818
{
19-
THAssert(THCTensor_(checkGPU)(state, 3, dst, src));
19+
THAssert(THCTensor_(checkGPU)(state, 2, dst, src));
2020
THAssert(THCudaLongTensor_checkGPU(state, 1, indices));
2121

2222
long dims = THCTensor_(nDimension)(state, dst);
@@ -144,7 +144,7 @@ void THCTensor_(indexAdd_long)(THCState *state, THCTensor *dst, int dim, THLongT
144144

145145
void THCTensor_(indexAdd)(THCState *state, THCTensor *dst, int dim, THCudaLongTensor *indices, THCTensor *src)
146146
{
147-
THAssert(THCTensor_(checkGPU)(state, 3, dst, src));
147+
THAssert(THCTensor_(checkGPU)(state, 2, dst, src));
148148
THAssert(THCudaLongTensor_checkGPU(state, 1, indices));
149149

150150
long dims = THCTensor_(nDimension)(state, dst);

0 commit comments

Comments
 (0)