!18896 uniform op gpu bug fix
Merge pull request !18896 from Peilin/uniform-op-bug-fix
This commit is contained in:
commit
7ae1c4a696
|
@ -80,6 +80,7 @@ bool UniformInt(int seed, int seed2, curandState *globalState, T *input1, size_t
|
|||
bool host_error_res = false;
|
||||
UniformIntKernel<<<GET_BLOCKS(count), GET_THREADS, 0, cuda_stream>>>
|
||||
(RNG_seed, globalState, input1, input_size_1, input2, input_size_2, output, count);
|
||||
cudaDeviceSynchronize();
|
||||
cudaMemcpyFromSymbol(&host_error_res, dev_error_res, sizeof(bool));
|
||||
return host_error_res;
|
||||
}
|
||||
|
|
|
@ -47,7 +47,7 @@ class RandomOpGpuKernel : public GpuKernel {
|
|||
public:
|
||||
RandomOpGpuKernel()
|
||||
: random_op_type_(RANDOM_OP_INVALID_TYPE),
|
||||
input_size_0_(sizeof(0)),
|
||||
input_size_0_(sizeof(int32_t)),
|
||||
input_size_1_(sizeof(T)),
|
||||
input_size_2_(sizeof(T)),
|
||||
output_size_(sizeof(T)),
|
||||
|
@ -137,7 +137,7 @@ class RandomOpGpuKernel : public GpuKernel {
|
|||
}
|
||||
auto input_shape_0 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
for (size_t i = 0; i < input_shape_0.size(); i++) {
|
||||
input_size_0_ += input_shape_0[i];
|
||||
input_size_0_ *= input_shape_0[i];
|
||||
}
|
||||
input_size_0_ *= sizeof(int);
|
||||
if (random_op_type_ == RANDOM_OP_UNIFORM_INT) {
|
||||
|
|
Loading…
Reference in New Issue