!39282 WeNet: Replace DynamicAtomicCleanAddr with Memset
Merge pull request !39282 from zhengzuohe/memset_master
This commit is contained in:
commit
a5fe1f0b3b
|
@ -38,6 +38,7 @@
|
|||
#include "include/common/utils/utils.h"
|
||||
#include "register/op_tiling.h"
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "runtime/device/memory_manager.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -238,9 +239,14 @@ bool DynamicTbeKernelMod::Launch(const std::vector<AddressPtr> &inputs, const st
|
|||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
device::KernelRuntime::GenLaunchArgs(*kernel_mod, atomic_clean_node.lock(), &kernel_launch_info);
|
||||
auto atomic_inputs = kernel_launch_info.inputs_;
|
||||
std::vector<AddressPtr> atomic_outputs;
|
||||
std::vector<AddressPtr> atomic_workspace;
|
||||
kernel_mod->Launch(atomic_inputs, atomic_workspace, atomic_outputs, stream_ptr);
|
||||
// Temporary scheme to avoid SyncStream error in dynamic-shaped WeNet network
|
||||
for (auto input : atomic_inputs) {
|
||||
auto align_size = device::MemoryManager::GetCommonAlignSize(input->size);
|
||||
auto ret = aclrtMemsetAsync(input->addr, align_size, 0, align_size, stream_ptr);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "AclrtMemset failed for " << cnode->fullname_with_scope();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,7 +33,6 @@ ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *
|
|||
return ACL_ERROR_NONE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @ingroup AscendCL
|
||||
* @brief Asynchronous memory replication between Host and Device
|
||||
|
@ -76,3 +75,27 @@ ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const v
|
|||
ACL_FUNC_VISIBILITY aclError aclrtSetDevice(int32_t deviceId) {
|
||||
return ACL_ERROR_NONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup AscendCL
|
||||
* @brief asynchronously initialize memory and set contents to specified value
|
||||
*
|
||||
* @par Function
|
||||
* The memory to be initialized is on the Host or Device side
|
||||
* and the system uses address to recognize that
|
||||
*
|
||||
* @param dst [IN] destination address pointer
|
||||
* @param destMax [IN] max length of the destination address memory
|
||||
* @param value [IN] set value
|
||||
* @param count [IN] the number of byte to set
|
||||
* @param stream [IN] asynchronized task stream
|
||||
*
|
||||
* @retval ACL_SUCCESS The function is successfully executed.
|
||||
* @retval OtherValues Failure
|
||||
*
|
||||
* @see aclrtSynchronizeStream
|
||||
*/
|
||||
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *dst, size_t destMax, int32_t value, size_t count,
|
||||
aclrtStream stream) {
|
||||
return ACL_ERROR_NONE;
|
||||
}
|
Loading…
Reference in New Issue