forked from mindspore-Ecosystem/mindspore
add AbsGrad support for GPU
This commit is contained in:
parent
9c461f5565
commit
40b7708ede
|
@ -87,6 +87,14 @@ struct FloorDivFunc<half, bool> {
|
||||||
__device__ __forceinline__ half operator()(const half &lhs, const half &rhs) { return false; }
|
__device__ __forceinline__ half operator()(const half &lhs, const half &rhs) { return false; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T, typename S>
|
||||||
|
struct AbsGradFunc {
|
||||||
|
__device__ __forceinline__ S operator()(const T &lhs, const T &rhs) {
|
||||||
|
T zero = 0.0;
|
||||||
|
return lhs < zero ? -rhs : rhs;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct PowerFunc<half, bool> {
|
struct PowerFunc<half, bool> {
|
||||||
|
@ -149,6 +157,9 @@ __global__ void BroadcastKernel(const int l0, const int l1, const int l2, const
|
||||||
case BROADCAST_TYPE_FLOORDIV:
|
case BROADCAST_TYPE_FLOORDIV:
|
||||||
return BroadcastOperator<T, S, FloorDivFunc<T, S>>(l0, l1, l2, l3, r0, r1, r2, r3, d0, d1, d2, d3, input0, input1,
|
return BroadcastOperator<T, S, FloorDivFunc<T, S>>(l0, l1, l2, l3, r0, r1, r2, r3, d0, d1, d2, d3, input0, input1,
|
||||||
output);
|
output);
|
||||||
|
case BROADCAST_TYPE_ABSGRAD:
|
||||||
|
return BroadcastOperator<T, S, AddFunc<T, S>>(l0, l1, l2, l3, r0, r1, r2, r3, d0, d1, d2, d3, input0, input1,
|
||||||
|
output);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -192,6 +203,8 @@ __global__ void NoBroadcastKernel(const int nums, enum BroadcastOpType op, const
|
||||||
return NoBroadcastOperator<T, S, AddFunc<T, S>>(nums, input0, input1, output);
|
return NoBroadcastOperator<T, S, AddFunc<T, S>>(nums, input0, input1, output);
|
||||||
case BROADCAST_TYPE_FLOORDIV:
|
case BROADCAST_TYPE_FLOORDIV:
|
||||||
return NoBroadcastOperator<T, S, FloorDivFunc<T, S>>(nums, input0, input1, output);
|
return NoBroadcastOperator<T, S, FloorDivFunc<T, S>>(nums, input0, input1, output);
|
||||||
|
case BROADCAST_TYPE_ABSGRAD:
|
||||||
|
return NoBroadcastOperator<T, S, FloorDivFunc<T, S>>(nums, input0, input1, output);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,7 @@ enum BroadcastOpType {
|
||||||
BROADCAST_TYPE_SUB = 7,
|
BROADCAST_TYPE_SUB = 7,
|
||||||
BROADCAST_TYPE_ADD = 8,
|
BROADCAST_TYPE_ADD = 8,
|
||||||
BROADCAST_TYPE_FLOORDIV = 9,
|
BROADCAST_TYPE_FLOORDIV = 9,
|
||||||
|
BROADCAST_TYPE_ABSGRAD = 10,
|
||||||
BROADCAST_TYPE_INVALID = 0xffffffff,
|
BROADCAST_TYPE_INVALID = 0xffffffff,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -55,6 +55,10 @@ MS_REG_GPU_KERNEL_TWO(
|
||||||
FloorDiv,
|
FloorDiv,
|
||||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||||
BroadcastOpGpuKernel, float, float)
|
BroadcastOpGpuKernel, float, float)
|
||||||
|
MS_REG_GPU_KERNEL_TWO(
|
||||||
|
AbsGrad,
|
||||||
|
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||||
|
BroadcastOpGpuKernel, float, float)
|
||||||
|
|
||||||
// fp16
|
// fp16
|
||||||
MS_REG_GPU_KERNEL_TWO(
|
MS_REG_GPU_KERNEL_TWO(
|
||||||
|
@ -93,6 +97,10 @@ MS_REG_GPU_KERNEL_TWO(
|
||||||
FloorDiv,
|
FloorDiv,
|
||||||
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||||
BroadcastOpGpuKernel, half, half)
|
BroadcastOpGpuKernel, half, half)
|
||||||
|
MS_REG_GPU_KERNEL_TWO(
|
||||||
|
AbsGrad,
|
||||||
|
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||||
|
BroadcastOpGpuKernel, half, half)
|
||||||
|
|
||||||
// int32
|
// int32
|
||||||
MS_REG_GPU_KERNEL_TWO(
|
MS_REG_GPU_KERNEL_TWO(
|
||||||
|
@ -113,5 +121,8 @@ MS_REG_GPU_KERNEL_TWO(
|
||||||
MS_REG_GPU_KERNEL_TWO(
|
MS_REG_GPU_KERNEL_TWO(
|
||||||
FloorDiv, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
FloorDiv, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||||
BroadcastOpGpuKernel, int, int)
|
BroadcastOpGpuKernel, int, int)
|
||||||
|
MS_REG_GPU_KERNEL_TWO(
|
||||||
|
AbsGrad, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||||
|
BroadcastOpGpuKernel, int, int)
|
||||||
} // namespace kernel
|
} // namespace kernel
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -96,10 +96,10 @@ class BroadcastOpGpuKernel : public GpuKernel {
|
||||||
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||||
|
|
||||||
static std::map<std::string, BroadcastOpType> kBroadcastTypeMap = {
|
static std::map<std::string, BroadcastOpType> kBroadcastTypeMap = {
|
||||||
{"Greater", BROADCAST_TYPE_GREATER}, {"Less", BROADCAST_TYPE_LESS}, {"Maximum", BROADCAST_TYPE_MAXIMUM},
|
{"Greater", BROADCAST_TYPE_GREATER}, {"Less", BROADCAST_TYPE_LESS}, {"Maximum", BROADCAST_TYPE_MAXIMUM},
|
||||||
{"Minimum", BROADCAST_TYPE_MINIMUM}, {"Pow", BROADCAST_TYPE_POWER}, {"RealDiv", BROADCAST_TYPE_REALDIV},
|
{"Minimum", BROADCAST_TYPE_MINIMUM}, {"Pow", BROADCAST_TYPE_POWER}, {"RealDiv", BROADCAST_TYPE_REALDIV},
|
||||||
{"Mul", BROADCAST_TYPE_MUL}, {"Sub", BROADCAST_TYPE_SUB}, {"TensorAdd", BROADCAST_TYPE_ADD},
|
{"Mul", BROADCAST_TYPE_MUL}, {"Sub", BROADCAST_TYPE_SUB}, {"TensorAdd", BROADCAST_TYPE_ADD},
|
||||||
{"FloorDiv", BROADCAST_TYPE_FLOORDIV},
|
{"FloorDiv", BROADCAST_TYPE_FLOORDIV}, {"AbsGrad", BROADCAST_TYPE_ABSGRAD},
|
||||||
};
|
};
|
||||||
|
|
||||||
auto iter = kBroadcastTypeMap.find(kernel_name);
|
auto iter = kBroadcastTypeMap.find(kernel_name);
|
||||||
|
|
Loading…
Reference in New Issue