Compare commits

...

1103 Commits
master ... r1.6

Author SHA1 Message Date
i-robot 017770ad58
!38597 fix error links for r1.6
Merge pull request !38597 from lvmingfu/code_docs_r1.666
2022-07-22 02:18:28 +00:00
lvmingfu 0789e24585 fix error links for r1.6 2022-07-21 17:49:14 +08:00
i-robot d1dc0a3a10
!36964 [Auto parallel] [MoE] Expertize bias in Linear
Merge pull request !36964 from Xiaoda/134-expertize-bias-r1.6
2022-07-04 03:31:02 +00:00
Xiaoda Zhang 3be188f849 expertize bias in Linear 2022-06-30 15:27:29 +08:00
i-robot ce0febe9de
!36456 fix wrong deployment target on mac
Merge pull request !36456 from xulei/fix_deployment_target_r1.6
2022-06-25 06:44:36 +00:00
i-robot cf8f296467
!36552 fix gpu dockerfile as NVIDIA replaced its GPG keys
Merge pull request !36552 from yanghaoran/r1.6
2022-06-25 03:19:53 +00:00
yanghaoran 5aee950463 fix gpu dockerfile as NVIDIA replaced its GPG keys 2022-06-25 10:43:42 +08:00
i-robot 9ff5a05201
!36538 fix 1.6.2 dockerfile as mindinsight is not included for 1.6.2
Merge pull request !36538 from yanghaoran/r1.6
2022-06-24 12:43:07 +00:00
yanghaoran 91458a9251 fix 1.6.2 dockerfile as mindinsight is not included for 1.6.2 2022-06-24 20:31:58 +08:00
xulei 3250939c63 fix wrong deployment target on mac 2022-06-24 13:46:21 +08:00
i-robot 35dffd0be6
!36360 add 1.6.2 dockerfile
Merge pull request !36360 from yanghaoran/r1.6
2022-06-22 12:36:03 +00:00
yanghaoran ff7f2ef350 add 1.6.2 dockerfile 2022-06-22 19:58:55 +08:00
i-robot 6c6c87eaae
!36138 Update release notes for 1.6.2.
Merge pull request !36138 from TronZhang/code_docs_release_note_1.6.2
2022-06-18 07:14:19 +00:00
tronzhang 462acabfc9 update release notes for 1.6.2 2022-06-17 17:29:05 +08:00
i-robot 49a781f252
!35749 version change to 1.6.2
Merge pull request !35749 from TronZhang/version_1.6.2
2022-06-10 09:36:52 +00:00
tronzhang a5298b3b6a version 1.6.2 2022-06-10 17:10:08 +08:00
i-robot 8ce44fbfd2
!35697 [r1.6] fix mac compile with clang++13.1
Merge pull request !35697 from xulei/fix_mac_r1.6
2022-06-10 01:52:12 +00:00
xulei 7ed57e1acb fix mac compile on clang++13 2022-06-09 19:39:03 +08:00
i-robot 67b828174a
!35643 mod_convert_model_r1.6
Merge pull request !35643 from changzherui/mod_convert_model_r1.6
2022-06-09 06:26:31 +00:00
changzherui 35f72a0cd6 modify convert model 2022-06-09 10:28:55 +08:00
i-robot 4f679c0cb7
!35597 modify docs
Merge pull request !35597 from changzherui/code_docs_log3
2022-06-08 07:33:31 +00:00
changzherui cd6ac31ff2 modofy docs 2022-06-08 11:35:16 +08:00
i-robot 4723fe375b
!35428 add mindir input r1.6
Merge pull request !35428 from changzherui/add_mindir_input_r1.6
2022-06-08 03:14:58 +00:00
changzherui 8e58862f2d add mindir input for r1.6 2022-06-07 22:21:41 +08:00
i-robot 82c19cbaf9
!33439 reset event after create
Merge pull request !33439 from zhoufeng/reset-event-1.6
2022-06-07 11:31:14 +00:00
i-robot 34761f434f
!35225 fix_independent_stream_error_r1.6
Merge pull request !35225 from yao_yf/fix_independent_stream_error_r1.6
2022-06-01 01:19:25 +00:00
i-robot 4dc930bbd2
!35185 [MD] add OBSMindDataset in r1.6
Merge pull request !35185 from liyong126/add_obs_minddataset_r1.6
2022-05-31 11:55:40 +00:00
yao_yf f9c54b5a81 fix indenpent stream active 2022-05-31 15:14:17 +08:00
liyong dc36a0617a add obs minddataset 2022-05-30 17:03:28 +08:00
i-robot b440c9400d
!34864 fix SoftmaxDropoutDoMaskV3Fusion and Dropout recompute
Merge pull request !34864 from yuchaojie/r1.6
2022-05-27 08:26:22 +00:00
yuchaojie fd9d175cc0 fix SoftmaxDropoutDoMaskV3Fusion and dropout recompute 2022-05-27 16:11:07 +08:00
i-robot 9b106f651d
!34792 Fix device memory leak in dynamic case.
Merge pull request !34792 from TronZhang/fix_memory
2022-05-25 01:40:12 +00:00
TronZhang c23eec4738 fix allocate workspace memory leak 2022-05-23 15:44:45 +08:00
i-robot 7f88955843
!33926 [MSLITE][Fix] remove gateway model.
Merge pull request !33926 from 赵英灼/r1.6gatemodel
2022-05-06 07:51:04 +00:00
zhaoyingzhuo b1fd978e6a remove gateway model 2022-05-06 11:41:34 +08:00
i-robot 78e17e5464
!33494 modify java jni
Merge pull request !33494 from liyan2022/dev_r1.6
2022-04-25 12:21:09 +00:00
albert-yan df8a023e0b support aar and jar 2022-04-25 12:00:36 +08:00
zhoufeng 890ff60f4d reset event after create
Signed-off-by: zhoufeng <zhoufeng54@huawei.com>
2022-04-24 11:16:14 +08:00
i-robot c8092b7143
!32709 correct the cpu kernel factory error ionformation on branch 1.6
Merge pull request !32709 from 沈竞兴/r1.60408
2022-04-19 01:59:24 +00:00
i-robot 14be329ef8
!32833 Fix protobuf CVE
Merge pull request !32833 from liuyang/ms_16
2022-04-14 09:18:00 +00:00
i-robot e215713d54
!32786 fix Floatmode bug
Merge pull request !32786 from yeyunpeng2020/r1.6
2022-04-12 08:08:41 +00:00
shen_jingxing 9cf98a18ce correct the cpu kernel factory error information on branch 1.6 2022-04-11 20:07:52 +08:00
liuyang_655 f4c478f370 modify CVE 2022-04-11 07:31:12 -04:00
i-robot dda9e07273
!32791 [MS][LITE][parallel predict] fix mem leak
Merge pull request !32791 from yefeng/283-fix_mem_leak
2022-04-11 09:25:45 +00:00
yefeng 89b26288fe fix mem leak 2022-04-11 10:22:50 +08:00
i-robot 9fb5c18c60
!32085 Fix Coredump while FL start outtime
Merge pull request !32085 from zhangzhaoju/r1.6
2022-04-06 10:50:29 +00:00
i-robot 248fbefc62
!32085 Fix Coredump while FL start outtime
Merge pull request !32085 from zhangzhaoju/r1.6
2022-04-06 10:50:26 +00:00
i-robot 00f95e1aab
!32466 Fix Python security check r1.6
Merge pull request !32466 from VectorSL/r1.6
2022-04-06 08:58:57 +00:00
i-robot 98fcd395e3
!32520 fix I50PAP memory leak
Merge pull request !32520 from tan-wei-cheng-3260/r1.6-develop2
2022-04-04 04:09:00 +00:00
twc d98b51cda9 fix I50PAP memory leak 2022-04-04 00:48:59 +08:00
VectorSL 7a689de064 fix python security check r1.6 2022-04-02 14:42:17 +08:00
i-robot 487c9c5e16
!32281 [MS][LITE] revert bn mod
Merge pull request !32281 from zhengjun10/r1.6
2022-04-02 01:31:49 +00:00
i-robot 613939605c
!32365 fix issue I4Z7WC、I502P8、I502TN、I5031D、I503MS、I503SO、I502L2
Merge pull request !32365 from tan-wei-cheng-3260/r1.6-develop3
2022-03-31 14:59:37 +00:00
twc b0b00316d0 fix issue I4Z7WC、I502P8、I502TN、I5031D、I503MS、I503SO、I502L2 2022-03-31 16:32:26 +08:00
yeyunpeng2020 af66a685f3 fix Floatmode bug 2022-03-31 15:29:36 +08:00
i-robot 354b4189e8
!32272 [MS][LITE][DEVELOP]arm support NUMA
Merge pull request !32272 from chenjianping/r1.6_dev
2022-03-31 01:23:00 +00:00
zhengjun10 f2688acd98 Revert "!31803 [MS][LITE][TOD] is_training BN mode"
This reverts commit bdd1583f2d, reversing
changes made to c2ba409b24.
2022-03-30 16:05:30 +08:00
jpc_chenjianping 9305765724 arm support NUMA 2022-03-30 14:37:45 +08:00
i-robot c3a3755f5a
!31949 Fix load tensor into mem twice for ascend kernel by kernel dump r1.6
Merge pull request !31949 from TinaMengtingZhang/kernel_dump1.6
2022-03-30 03:22:33 +00:00
i-robot 3a2d020068
!32181 version() add nativeLib
Merge pull request !32181 from liyan2022/dev_r1.6
2022-03-29 08:24:38 +00:00
albert-yan 7271f44f60 version() add nativeLib 2022-03-29 14:50:31 +08:00
zhangzhaoju 2f9487fef0 Fix DTS2022032212820
Fix coredump while start timeout, join thread instead detach
2022-03-28 15:12:42 +08:00
i-robot 8f0143754f
!32019 [MS][LITE] fix java bug
Merge pull request !32019 from yefeng/cherry-pick-1648287448
2022-03-26 12:51:41 +00:00
i-robot 5fb89b31a1
!31914 replace English link
Merge pull request !31914 from 宦晓玲/code_docs_2325
2022-03-26 10:36:40 +00:00
yefeng d56129bead fixed ee5e6d8 from https://gitee.com/YeFeng_24/mindspore/pulls/32007
fix java mem leak
2022-03-26 09:37:30 +00:00
i-robot 1e0380fba1
!31965 udpate release note for datasets.py reconstructed
Merge pull request !31965 from guozhijian/update_release_r1.6
2022-03-26 07:48:33 +00:00
jonyguo 8b8e3881b4 update RELEASE.md for md 2022-03-26 11:32:40 +08:00
i-robot 4031095c8a
!31904 improve mac compile for r1.6
Merge pull request !31904 from xulei/improve_mac
2022-03-26 01:09:15 +00:00
TinaMengtingZhang 58fa47d802 fix load and convert tensor twice in ascend kernel by kernel dump 2022-03-25 14:32:15 -04:00
i-robot 3c357e989e
!31862 [MS][LITE][TOD] update LSTMGrad files
Merge pull request !31862 from Nizzan/export_nizzan1
2022-03-25 08:41:40 +00:00
i-robot bdd1583f2d
!31803 [MS][LITE][TOD] is_training BN mode
Merge pull request !31803 from Nizzan/export_nizzan
2022-03-25 08:41:15 +00:00
i-robot c2ba409b24
!31900 [MS][LITE]Fix arm64 linux build
Merge pull request !31900 from gongdaguo1/r116_fix_
2022-03-25 07:33:47 +00:00
zhangyi 20ea40dc04 replace the English link 2022-03-25 15:14:11 +08:00
gongdaguo a41b326544 fix arm64 converter build 2022-03-25 11:59:50 +08:00
xulei 8c26f4cfad improve mac compile 2022-03-25 11:53:29 +08:00
nizzan 8634c03dd4 Merge LSTMGrad latest files 2022-03-24 12:57:43 +02:00
i-robot 18e378ee74
!31786 [MSLITE][CPU][r1.6] fix arm build bug, update some cost value
Merge pull request !31786 from Greatpan/arm32_bug_fix_r1.6
2022-03-24 09:08:32 +00:00
i-robot 4ccce5ee3e
!31793 [MS][LITE][r1.6] fix npu fp16 tensor datatype error
Merge pull request !31793 from XianglongZeng/r1.6
2022-03-24 08:55:32 +00:00
i-robot 6bcd546eb4
!31831 [MSLITE] fix cuda 10.1 compile log error
Merge pull request !31831 from Liu_Xuu/r1.6
2022-03-24 07:16:47 +00:00
Liu_Xuu 918eaf1e39 [MSLITE] fix cuda 10.1 compile error 0323_02 2022-03-24 13:01:33 +08:00
i-robot 8a83b8c9db
!31771 modify native lib for aar
Merge pull request !31771 from liyan2022/dev_r1.6_glog
2022-03-24 01:59:32 +00:00
greatpanc 8b892dbd3b fix arm32 debug version, build failed 2022-03-23 23:43:35 +08:00
nizzan b8aab5cdf5 is_training BN mode 2022-03-23 11:51:05 +02:00
zengxianglong d5ca97f885 fix npu fp16 tensor datatype error 2022-03-23 17:07:08 +08:00
albert-yan 7e6bc3ca6d modify jni for aar 2022-03-23 15:53:11 +08:00
i-robot 9dcc28781d
!31761 [MSLITE] fix compile support for cuda 10.1
Merge pull request !31761 from Liu_Xuu/r1.6
2022-03-23 06:42:50 +00:00
i-robot 0dcb7abdb5
!31682 【r1.6】fix openssl CVE-2022-0778
Merge pull request !31682 from emmmmtang/r1.6
2022-03-23 04:00:23 +00:00
Liu_Xuu 8863d58ee4 [MSLITE] fix compile support for cuda 10.1 0322_04 2022-03-23 11:23:17 +08:00
i-robot 59a1487988
!31709 modify native lib
Merge pull request !31709 from liyan2022/dev_r1.6
2022-03-23 02:06:11 +00:00
i-robot d9f3ee2659
!31705 [MS][LITE][DEVELOP]remove allocate memory limit
Merge pull request !31705 from chenjianping/r1.6_dev
2022-03-23 01:08:41 +00:00
i-robot d04fb76a52
!31653 [MS][LITE][r1.6] fix converter fuzz
Merge pull request !31653 from jianghui58/codex_fuzz_r1.6
2022-03-22 12:31:47 +00:00
jpc_chenjianping 8bb54c3c17 remove memory limit 2022-03-22 20:15:05 +08:00
albert-yan 822e4b00e8 modiry native lib load 2022-03-22 18:29:21 +08:00
jianghui58 d8ca7fd2c8 fix converter fuzz r1.6 2022-03-22 16:52:27 +08:00
i-robot 17bd902d2b
!31637 Optimize the processing logic of flclient in r1.6
Merge pull request !31637 from zhoushan33/flclient0321_r1.6
2022-03-22 07:17:28 +00:00
zhoushan 93a3cae305 Optimize the processing logic of flclient in r1.6 2022-03-22 09:53:46 +08:00
emmmmtang 0f6c828d7c fix openssl CVE-2022-0778 2022-03-21 20:38:53 +08:00
i-robot eb8ded4a57
!31623 version1.6.2 and add glog so, fix testcase
Merge pull request !31623 from liyan2022/dev_r1.6_glog
2022-03-21 12:00:06 +00:00
i-robot f1be6fa3c4
!31607 modify format
Merge pull request !31607 from 俞涵/code_docs_2
2022-03-21 10:53:55 +00:00
albert-yan 554a26d453 version 1.6.2 and add glog so 2022-03-21 17:12:09 +08:00
i-robot 1d7ff20e8c
!31519 [MSLITE] bias add int accuracy bug
Merge pull request !31519 from ling/r1.6
2022-03-21 06:20:51 +00:00
i-robot b43e76a7b8
!31513 [MS][LITE][r1.6] code check
Merge pull request !31513 from XianglongZeng/r1.6
2022-03-21 05:27:18 +00:00
huodagu d880338483 modify_urlformat_1.6 2022-03-21 11:08:37 +08:00
i-robot 55957930c5
!31592 code sync and fix bug in fl
Merge pull request !31592 from tan-wei-cheng-3260/r1.6-develop2
2022-03-21 01:12:25 +00:00
twc d74d22d609 code sync and fix bug in fl 2022-03-20 20:04:47 +08:00
i-robot b14ab3095c
!31562 [MS][LITE]parallel predict fix bug for 1.6: without numa
Merge pull request !31562 from yefeng/267-fix_bug-1.6
2022-03-19 08:15:07 +00:00
yefeng bc367d441f fix bug 2022-03-19 13:10:58 +08:00
i-robot 2bfaff6303
!31498 Fix DTS2022031207044: scheduler recover core dump while doing scale out
Merge pull request !31498 from zhangzhaoju/r1.6
2022-03-19 02:17:42 +00:00
i-robot 4b4630dee3
!31542 code sync and fix bug
Merge pull request !31542 from tan-wei-cheng-3260/r1.6-develop2
2022-03-19 01:20:43 +00:00
twc dc4e6d83a3 code sync and fix bug 2022-03-18 21:27:53 +08:00
i-robot ba6e1c7b6b
!31485 [MS][LITE][parallel predict] fix numa for linux arm64
Merge pull request !31485 from yefeng/265-fix_numa_for_linux_arm64-r1.6
2022-03-18 12:46:43 +00:00
zhangzhaoju 3359730028 Fix DTS2022031207044
solve coredump problem while Fl scale out + scheduler recover
2022-03-18 17:32:28 +08:00
i-robot d8a9cb7469
!31493 [MSLite][OnDeviceTraining] Fix random initization for labels of TOD
Merge pull request !31493 from lz/r1.6
2022-03-18 08:17:12 +00:00
ling 27aa301bd5 [MSLITE] bias add int accuracy bug 2022-03-18 15:03:14 +08:00
zengxianglong f98a67b46a fix fp16 weight conv bug 2022-03-18 14:15:25 +08:00
lz f125375376 Fix Random initialization to labels 2022-03-18 11:01:30 +08:00
i-robot 7ac1684587
!31405 [MS][LITE][STABLE]support glog
Merge pull request !31405 from chenjianping/r1.6_dev4
2022-03-18 02:37:43 +00:00
yefeng 29f855c453 fix numa for linux arm64 2022-03-18 09:24:19 +08:00
i-robot a759dd5705
!31250 Profiling is modified using sample code.
Merge pull request !31250 from liuchuting/code_docs_1
2022-03-18 01:23:51 +00:00
jpc_chenjianping 382b659954 support glog 2022-03-17 17:32:43 +08:00
i-robot e65f1cab6f
!30771 fix waring on 1.6
Merge pull request !30771 from shenwei41/waring_1.6
2022-03-17 07:18:49 +00:00
i-robot f293c2f203
!31367 [lite]transpose opt for server part1
Merge pull request !31367 from 徐安越/r1.6_temp1
2022-03-17 02:45:12 +00:00
i-robot 400745c8ef
!31344 fix dynamic resize bug for matmul fp16
Merge pull request !31344 from yeyunpeng2020/r1.6
2022-03-16 12:35:28 +00:00
xuanyue 688048c085 transpose opt for server 2022-03-16 15:56:29 +08:00
yeyunpeng2020 49336a33a6 fix dynamic resize bug for matmul fp16 2022-03-16 11:10:00 +08:00
i-robot ab34d893ab
!31292 [lite]fix scale-int8 bug
Merge pull request !31292 from 徐安越/r1.6_temp2
2022-03-16 03:06:24 +00:00
i-robot a472eb4296
!31290 fix I4X31J and I4VPZ5 and I4WSW2
Merge pull request !31290 from tan-wei-cheng-3260/r1.6-develop2
2022-03-16 01:27:54 +00:00
i-robot 88db7e958c
!31307 [MS][LITE][parallel predict] Enable dynamic memory allocation
Merge pull request !31307 from yefeng/259-fix_1.6_bug
2022-03-15 12:44:42 +00:00
twc 15b33b1bc3 fix I4X31J and I4VPZ5 and I4WSW2 2022-03-15 18:30:33 +08:00
yefeng f6823d6485 fix bug 2022-03-15 17:19:00 +08:00
i-robot c8b6dc4783
!31301 [MS][LITE][parallel predict] fix java api
Merge pull request !31301 from yefeng/258-fix_java_api_1.6
2022-03-15 09:17:07 +00:00
i-robot 0f7dc69992
!31276 [lite]arithmetic opt
Merge pull request !31276 from 徐安越/r1.6
2022-03-15 09:17:07 +00:00
i-robot 9324b085f2
!31224 solve core dump problem of fl
Merge pull request !31224 from zhangzhaoju/r1.6
2022-03-15 08:48:20 +00:00
i-robot dce4e18df5
!31273 [MSLITE] bug fix for cublas matmul
Merge pull request !31273 from Liu_Xuu/trt_0315_opt
2022-03-15 08:20:36 +00:00
yefeng f7946c1add fix java api 2022-03-15 16:16:53 +08:00
i-robot edfcb0aadf
!31257 [MS][LITE][parallel predict] copy to 1.6
Merge pull request !31257 from yefeng/255-copy_to_1.6_1
2022-03-15 07:49:48 +00:00
Liu_Xuu bb7781037e [MSLITE] bug fix for matmul op 0415_10 2022-03-15 15:24:26 +08:00
twc 1691e84c2c fix bug in PE_encrypt and scale out 2022-03-15 14:57:36 +08:00
twc 8fe5637689 fix bug in newinstance 2022-03-15 14:57:35 +08:00
xuanyue 6da70200f1 fix bug of scale-int8 2022-03-15 14:45:07 +08:00
xuanyue 5e960ed0c6 arithmetic opt 2022-03-15 10:32:15 +08:00
i-robot 6eda3e8918
!31114 FL, fix getModel and startFlJob total reponse buffer too large
Merge pull request !31114 from 徐永飞/r1.6
2022-03-15 02:20:43 +00:00
i-robot 5e741c05f5
!31114 FL, fix getModel and startFlJob total reponse buffer too large
Merge pull request !31114 from 徐永飞/r1.6
2022-03-15 02:20:40 +00:00
i-robot 58162434de
!31259 [MSLITE][CPU][r1.6] op Dynamic thread choose Optimization
Merge pull request !31259 from Greatpan/dynamic_thread_r1.6_new
2022-03-15 01:34:43 +00:00
yefeng 6d018a1b79 copy code to 1.6 2022-03-15 09:03:21 +08:00
i-robot 84a9c68b5c
!31235 [lite]unified cutting optimization for gather
Merge pull request !31235 from 徐安越/r1.6_temp2
2022-03-14 12:22:09 +00:00
i-robot 8c3160b4f8
!31231 [lite]optimize bias_add's multi threads
Merge pull request !31231 from 徐安越/r1.6_temp1
2022-03-14 12:21:56 +00:00
greatpanc 1e19c0d290 dynamic thread r1.6 2022-03-14 20:08:20 +08:00
i-robot 4d6eea3cc1
!31219 [MSLITE] add lite cuda opt for matmul and sigmoid
Merge pull request !31219 from Liu_Xuu/trt_0314_opt
2022-03-14 11:38:05 +00:00
i-robot bf908dd093
!31232 add jar package for x86 and aarch64
Merge pull request !31232 from liyan2022/dev_r1.6
2022-03-14 11:28:08 +00:00
Liu_Xuu b49a3c97c9 [MSLITE] add lite cuda opt for matmul and sigmoid 0414_20 2022-03-14 18:10:08 +08:00
albert-yan 31ab597b7f add jar package for x86 and aarch64 2022-03-14 17:13:55 +08:00
xuanyue 3a2bdc0df8 optimize bias_add's multi threads 2022-03-14 16:02:37 +08:00
xuanyue 8a113a1c0a unified cutting optimization for gather 2022-03-14 16:00:22 +08:00
i-robot b1cde285af
!31167 [lite]fix bug for tf
Merge pull request !31167 from 徐安越/r1.6_temp2
2022-03-14 07:21:29 +00:00
zhangzhaoju b3493ef131 Fix coredump problem of fl 2022-03-14 14:20:36 +08:00
i-robot 69cff16818
!30629 [lite]concat opt
Merge pull request !30629 from 徐安越/r1.6_temp1
2022-03-14 06:16:27 +00:00
i-robot f188616162
!31174 [MS][LITE][STABLE]optimize code | sync from master
Merge pull request !31174 from chenjianping/r1.6_dev
2022-03-14 03:28:29 +00:00
i-robot b791d9d91f
!31123 fix size problem in SplitVFission
Merge pull request !31123 from yuchaojie/r1.6_fix
2022-03-14 02:51:24 +00:00
i-robot 478938495b
!31123 fix size problem in SplitVFission
Merge pull request !31123 from yuchaojie/r1.6_fix
2022-03-14 02:51:20 +00:00
xuanyue f748a764b5 concat opt 2022-03-14 09:57:59 +08:00
xuanyue cb5f4f13d5 fix bug for tf 2022-03-14 09:54:46 +08:00
i-robot b48d1cd6a1
!31143 [MS][LITE][r1.6] fix converter fuzz bug
Merge pull request !31143 from jianghui58/codex_fuzz_r1.6
2022-03-11 12:20:21 +00:00
i-robot 96356e8cfa
!30817 fix heterogeneous with dynamic
Merge pull request !30817 from baihuawei/1.6bug
2022-03-11 09:19:40 +00:00
jianghui58 1eb847c531 fix converter fuzz bug 2022-03-11 16:54:09 +08:00
jpc_chenjianping c1ae2ba55f optimize code | sync from master 2022-03-11 15:06:33 +08:00
i-robot 6f37394089
!31140 Fix kernel by kernel issues for branch 1.6
Merge pull request !31140 from parastooashtari/1.6
2022-03-11 06:08:59 +00:00
i-robot 65e1bb17e0
!30598 change lr for server in r1.6
Merge pull request !30598 from zhoushan33/srever0225_r1.6
2022-03-11 06:01:09 +00:00
i-robot 2d24bd8c56
!30598 change lr for server in r1.6
Merge pull request !30598 from zhoushan33/srever0225_r1.6
2022-03-11 06:01:08 +00:00
baihuawei e89c357582 insert cast for tuple getitem and fix heterougeneous with dynamic shape bug 2022-03-11 11:28:01 +08:00
xuyongfei 482c4c024d FL, fix getModel and startFlJob total reponse buffer too large 2022-03-11 10:53:05 +08:00
i-robot 93886405c5
!30755 [MSLITE] release package for tensorrt
Merge pull request !30755 from Liu_Xuu/r1.6
2022-03-11 01:29:43 +00:00
i-robot 8e6d30ff71
!30755 [MSLITE] release package for tensorrt
Merge pull request !30755 from Liu_Xuu/r1.6
2022-03-11 01:29:39 +00:00
i-robot e79fe8989f
!31128 Split test cases of bfgs method.
Merge pull request !31128 from hezhenhao1/r1.6
2022-03-11 01:09:21 +00:00
Parastoo Ashtari 77f1853a44 fix kernel by kernel and mindRT parameter bug 2022-03-10 17:36:02 -05:00
hezhenhao1 a62b1c11f5 Split test cases of bfgs method in r1.6 branch. 2022-03-10 20:28:56 +08:00
yuchaojie 088b2268f5 fix size problem in SplitVFission 2022-03-10 18:53:36 +08:00
liuchuting f46fab0993 Profiling is modified using sample code. 2022-03-10 15:33:36 +08:00
i-robot bab1aa9034
!30984 fix dockerfile for 1.6.1
Merge pull request !30984 from yanghaoran/r1.6
2022-03-08 08:57:40 +00:00
yanghaoran 0f212e4380 fix dockerfile for 1.6.1 2022-03-08 16:39:54 +08:00
i-robot c9f72f3f57
!30877 update docker script for 1.6.1
Merge pull request !30877 from yanghaoran/r1.6
2022-03-05 10:30:12 +00:00
yanghaoran b1419d7cf1 update docker script for 1.6.1 2022-03-05 16:50:18 +08:00
i-robot 3550ec87aa
!30843 [MS][LITE][r1.6] roll back codes in mindir anf exporter
Merge pull request !30843 from XianglongZeng/r1.6
2022-03-04 15:45:29 +00:00
i-robot 2afbb3075c
!30843 [MS][LITE][r1.6] roll back codes in mindir anf exporter
Merge pull request !30843 from XianglongZeng/r1.6
2022-03-04 15:45:27 +00:00
i-robot 946ac31814
!30849 add transpose_x2 check in MatmulConfusionTranposeFusionPass
Merge pull request !30849 from yuchaojie/r1.6_fix
2022-03-04 15:44:08 +00:00
i-robot 055f0fceff
!30857 To prevent large memory data, sample the memory.
Merge pull request !30857 from liuchuting/mem_161
2022-03-04 15:04:55 +00:00
i-robot 0e891d37c5
!30856 takedown test_bfgs due to probalistic failures
Merge pull request !30856 from yanghaoran/r1.6
2022-03-04 11:31:05 +00:00
liuchuting 8ff339450e To prevent large memory data, sample the memory. 2022-03-04 18:57:33 +08:00
yanghaoran 9ea7b0c19a takedown test_bfgs due to probalistic failures 2022-03-04 18:45:01 +08:00
yuchaojie d646cf1ff6 add transpose_x2 check in MatmulConfusionTranposeFusionPass 2022-03-04 17:19:38 +08:00
zengxianglong 6999154a6c Revert "fix mindir attr analyzing error"
This reverts commit be756c8ff8.
2022-03-04 15:28:38 +08:00
i-robot 3c2ecf6a37
!30804 [MSLITE] Fix bug of mindir converter.
Merge pull request !30804 from wangshaocong/bugfix_r1.6
2022-03-03 12:01:33 +00:00
i-robot 772e5ef3b0
!30798 add case check for MatmulConfusionTranposeFusionPass
Merge pull request !30798 from yuchaojie/r1.6_fix
2022-03-03 11:50:37 +00:00
yuchaojie ffb9bbbbd8 add case check for MatmulConfusionTranposeFusionPass 2022-03-03 16:50:43 +08:00
i-robot b631a1065e
!30745 Bugfix in ConvertNonscalarTensorToParameter when value is uninitialized
Merge pull request !30745 from DeshiChen/0228_uninit_value
2022-03-03 08:27:34 +00:00
wangshaocong c4ab795423 [MSLITE] fix bug of mindir converter 2022-03-03 10:10:58 +08:00
shenwei41 1496a575c4 waring_fix_1.6 2022-03-03 09:40:22 +08:00
i-robot f1c1acd681
!30743 Fix an issue of federal differential privacy training.
Merge pull request !30743 from jxlang910/r1.6
2022-03-02 12:25:10 +00:00
i-robot 4fb2ff5563
!30758 remove dump check in exception case
Merge pull request !30758 from yelihua/r16
2022-03-02 12:22:59 +00:00
i-robot 3bdb94c184
!30767 GraphKernel Fix alexnet and broadcast bug in r1.6
Merge pull request !30767 from ZengZitao/alex_bug_fix
2022-03-02 12:20:40 +00:00
i-robot f8673cd4b5
!30607 [MS][LITE][develop] optimize mindspore_core build
Merge pull request !30607 from sunsuodong/optimize_build_1.6
2022-03-02 10:49:46 +00:00
yelihua 1d97eefbb7 enable dump when met exception during train 2022-03-02 18:33:00 +08:00
i-robot 181addec81
!30744 To prevent large memory data, sample the memory.
Merge pull request !30744 from liuchuting/mem_16
2022-03-02 09:22:01 +00:00
jin-xiulang 3fdf3f15b5 Fix an issue of federal differential privacy 2022-03-02 17:19:53 +08:00
zengzitao f7ada42493 fix alex and cpu bug error 2022-03-02 17:19:06 +08:00
sunsuodong 3f84ca4bd3 optimize build 2022-03-02 16:51:09 +08:00
i-robot edd3e0bfea
!30752 [MS][LITE][Develop] fix ci
Merge pull request !30752 from sunsuodong/code_docs_fix_ci
2022-03-02 08:45:57 +00:00
Liu_Xuu df7f756ba3 [MSLITE] release package for tensorrt 2022-03-02 16:26:02 +08:00
sunsuodong be9ea2ed19 fix ci 2022-03-02 16:16:43 +08:00
dayschan 4ebeb23ddd bugfix in ConvertNonscalarTensorToParameter when value is uninitialized. 2022-03-02 15:27:07 +08:00
liuchuting 1df925f2c0 To prevent large memory data, sample the memory. 2022-03-02 10:18:01 +08:00
i-robot ae3b3c404a
!30688 FL, fix scale in/out raise exception
Merge pull request !30688 from 徐永飞/r1.6
2022-03-01 06:20:22 +00:00
xuyongfei 42766bba63 FL, fix scale in/out raise exception 2022-03-01 11:01:20 +08:00
i-robot ea5ccf205e
!30685 [MS][LITE][Develop] Changed the Lite version to 1.6.1
Merge pull request !30685 from sunsuodong/code_docs_version
2022-03-01 02:56:19 +00:00
sunsuodong 124fb4cd48 change lite version 2022-02-28 18:26:21 -08:00
i-robot dad6d9825b
!30656 fix mac compile r1.6
Merge pull request !30656 from xulei/mac_fix
2022-02-28 12:46:31 +00:00
xulei 96fb547cf1 fix mac compile 2022-02-28 19:02:35 +08:00
i-robot 5623e5d938
!30652 Upgrade version to 1.6.1
Merge pull request !30652 from TronZhang/upgrade_version_to_1.6.1
2022-02-28 09:46:31 +00:00
i-robot 89550e6dbc
!30570 rolling back some code of PR29238
Merge pull request !30570 from zhouneng/fix_issue_i4uqoy_r1.6
2022-02-28 09:25:31 +00:00
i-robot ce3148770b
!30640 [Parallel] Fix cast inserted position error
Merge pull request !30640 from huangxinjing/fix_cast_pos
2022-02-28 09:05:01 +00:00
i-robot fd7781ca7c
!30478 FL, opt allreduce
Merge pull request !30478 from 徐永飞/r1.6
2022-02-28 08:49:19 +00:00
i-robot 2854d4d0ff
!30647 [bugfix]Cache prefetch thread exits before finishing compiling graph
Merge pull request !30647 from zyli2020/r1.6
2022-02-28 07:43:02 +00:00
i-robot b625d8a003
!30456 [MS][LITE][MODELPOOL] add model pool access control
Merge pull request !30456 from yefeng/240-model_pool_dor
2022-02-28 07:29:47 +00:00
tronzhang 6eedd1cfd9 upgrade version to 1.6.1 2022-02-28 14:58:57 +08:00
lizhenyu 70853db3da bugfix: prefetch thread exit before finishing compiling graph 2022-02-28 14:25:04 +08:00
i-robot 3537e89009
!30635 [ME][Auto_monad] Fix bug:Remove duplicate loads before Load node grouping.
Merge pull request !30635 from Margaret_wangrui/r1.6_auto_monad_eliminate_2
2022-02-28 06:07:19 +00:00
huangxinjing ba3b1fd480 [Parallel]Fix Pipeline Bug, Fix Cast Inserted Position 2022-02-28 11:56:22 +08:00
Margaret_wangrui ea740fc044 [ME][Auto_monad] Fix bug:Remove duplicate loads before Load node grouping. 2022-02-28 11:34:19 +08:00
i-robot 3ff5358b68
!30625 Fix the bug of duplicate key value of ActorSet
Merge pull request !30625 from caifubi/r1.6-actor-set-key-duplicate
2022-02-27 06:02:44 +00:00
i-robot e6ab9f8045
!30547 [MS][RDR] fix codecheck warnings
Merge pull request !30547 from louie5/r1.6
2022-02-26 11:00:49 +00:00
i-robot 4e085a256b
!30594 [MS][LITE][r1.6] fix mindir attr analyzing error
Merge pull request !30594 from XianglongZeng/r1.6
2022-02-26 09:49:33 +00:00
i-robot ded5024e1e
!30551 Catch abnormals from sto functions in debug
Merge pull request !30551 from maning202007/r1.6
2022-02-26 08:52:16 +00:00
caifubi 18722e3ace fix bug of actor-set key duplicate 2022-02-26 15:39:12 +08:00
xuyongfei 8f1437744d FL, opt allreduce 2022-02-26 15:11:40 +08:00
zengxianglong be756c8ff8 fix mindir attr analyzing error 2022-02-26 11:07:16 +08:00
maning202007 3bfdbae9e1 Catch abnormals from sto functions in debug
Fix type errors

Fix bugs

Fix cycle depth
2022-02-26 11:06:24 +08:00
i-robot b7ca17f8d5
!30603 fix codedex warnings
Merge pull request !30603 from zyli2020/r1.6
2022-02-26 02:31:24 +00:00
yefeng 06b9b2263a parallel predict door 2022-02-26 10:08:52 +08:00
i-robot 2d843657fa
!30595 [MS][LITE]fix control model stuck on Samsung phone
Merge pull request !30595 from mengyuanli/sync_r1.6
2022-02-26 02:00:51 +00:00
i-robot 4e2d42453c
!30583 Fix I4TZAN: Don't throw exception while cell id not in cache.
Merge pull request !30583 from zhangzhaoju/r1.6
2022-02-26 01:50:51 +00:00
zhoushan b40175748f change lr for server in r1.6 2022-02-26 09:44:18 +08:00
i-robot 9e82849df7
!30546 fix codecheck
Merge pull request !30546 from xulei/fix_codecheck_r1.6
2022-02-26 01:37:11 +00:00
i-robot 6c8f580daa
!29242 fix a bug about pynative mode error on Ascend
Merge pull request !29242 from 沈竞兴/codefix_r1.6
2022-02-25 12:32:02 +00:00
i-robot 1fb4246c76
!30563 fix copy bug in mac when run mindir
Merge pull request !30563 from zhangbuxue/fix_copy_bug_in_mac_when_run_mindir_r1.6
2022-02-25 11:34:01 +00:00
i-robot f6b2defcc2
!30586 TaskFailCallback does not support in kernel by kernel
Merge pull request !30586 from liangzelang/r1.6
2022-02-25 11:29:46 +00:00
lizhenyu b64740b9d2 fix codedex warnings 2022-02-25 17:46:22 +08:00
mengyuanli 50a9d6ccc6 fix bug of control flow stuck 2022-02-25 17:02:48 +08:00
i-robot c9ddd3cdd6
!30495 add cast for GetTupleItem output
Merge pull request !30495 from baihuawei/fix_insert_cast
2022-02-25 08:33:17 +00:00
liangzelang 09dbbb54e8 TaskFailCallback does not support in kernel by kernel 2022-02-25 16:22:19 +08:00
i-robot 70dbd3b1c9
!30582 Fix code example of DatasetCache
Merge pull request !30582 from xiaotianci/code_docs_fix_example_r1.6
2022-02-25 08:19:21 +00:00
i-robot 860a6efeeb
!30185 [MSLITE][GPU][r1.6] support arthmetic op 2d with 5d broadcast
Merge pull request !30185 from Greatpan/5dyolov5_support_r1.6
2022-02-25 08:17:54 +00:00
i-robot c06234ef7f
!30548 fix the link bug of depend on depend node in the control flow
Merge pull request !30548 from limingqi107/r1.6
2022-02-25 07:48:32 +00:00
zhangzhaoju c59e31f373 Fix I4TZAN
If key not exist in map(while deep copy a cell), not throw exception
while pop key.
2022-02-25 15:38:59 +08:00
Xiao Tianci ffaff708b4 fix code example 2022-02-25 15:29:06 +08:00
i-robot d8feb92a86
!30577 Correct the error information about BNTrainingReduce operator on CPU platform
Merge pull request !30577 from 沈竞兴/whitelist0225
2022-02-25 06:57:48 +00:00
i-robot 4c6e728cbf
!30556 [lite]adjust gather func's in-params' name and synchronize micro
Merge pull request !30556 from 徐安越/r1.6_temp2
2022-02-25 06:27:33 +00:00
i-robot 89a521a856
!30422 [MS][LITE][Develop] fix thread pool when MindRT is off
Merge pull request !30422 from sunsuodong/fix_thread_pool_1.6
2022-02-25 06:25:45 +00:00
i-robot de6c6b6040
!30555 [MS][LITE]Fix arm64 converter
Merge pull request !30555 from gongdaguo1/cherry-pick-1645752484
2022-02-25 06:24:44 +00:00
i-robot 32c81afe3d
!30520 fix pylint warning
Merge pull request !30520 from laiyongqiang/r16_py_warning
2022-02-25 06:23:43 +00:00
i-robot 10c75a1b15
!30380 fix bug of restart for flclient in r1.6
Merge pull request !30380 from zhoushan33/flclient0222_r1.6
2022-02-25 06:19:14 +00:00
i-robot fd42efac7f
!30557 [MS][LITE] converter handle unknown error
Merge pull request !30557 from jianghui58/codex_fuzz_r1.6
2022-02-25 06:16:34 +00:00
shen_jingxing 7f549e2c7f correct the error information 2022-02-25 12:50:07 +08:00
i-robot 29baf6ee50
!30437 [MSLITE] check int8 weight tensor
Merge pull request !30437 from ling/r1.6
2022-02-25 04:42:55 +00:00
i-robot faf7ced730
!30528 PS】fix process can not exit normaly when exception has been thrown
Merge pull request !30528 from zyli2020/r1.6
2022-02-25 03:24:48 +00:00
zhouneng2 584b39c1f0 rolling back some code of PR29238 2022-02-25 11:14:18 +08:00
xuanyue a281631b03 adjust gather func's in-params' name and synchronize micro 2022-02-25 11:10:20 +08:00
baihuawei e5352e8236 add cast for get tuple item output 2022-02-25 10:55:00 +08:00
buxue 76f5ce51a2 fix copy bug in mac when run mindir 2022-02-25 10:35:56 +08:00
greatpanc 035f0384b2 support arthmetic op 2d with 5d broadcast 2022-02-25 10:10:17 +08:00
jianghui58 f79e1a92e3 converter handle unknown error 2022-02-25 10:10:10 +08:00
i-robot 74256534bf
!30452 [MS][LITE][r1.6] fix fuzz bug
Merge pull request !30452 from jianghui58/codex_fuzz_r1.6
2022-02-25 02:01:56 +00:00
i-robot 1b2d7348cf
!30521 modify param check for bit_num
Merge pull request !30521 from liyan2022/dev_r1.6
2022-02-25 01:46:28 +00:00
gongdaguo ae9b93a786 fixed 786cee1 from https://gitee.com/gongdaguo1/mindspore/pulls/30347
test ci
2022-02-25 01:28:17 +00:00
i-robot fc92f582e6
!30525 [MSLITE][CPU][r1.6] AVX512/256/SSE/NENO Advanced packaging, and batchnorm Op Refactoring and optimization
Merge pull request !30525 from Greatpan/avx512_batchnorm_r1.6
2022-02-25 01:27:17 +00:00
i-robot 2c6624b9a3
!30516 Fix Bug of MelGAN's FPS is Smaller than Standard
Merge pull request !30516 from jiaorui/fix-timeout
2022-02-25 01:26:04 +00:00
i-robot c3efd02f88
!30227 [MSLITE][Bug][Func]Fuzz test.
Merge pull request !30227 from wangshaocong/r1.6_codex
2022-02-25 01:20:47 +00:00
i-robot b6559a987c
!30499 remove wondows cpu testcase
Merge pull request !30499 from caifubi/r1.6-pynative-run-in-graph
2022-02-25 01:11:18 +00:00
xulei b9f627bb5b fix codecheck 2022-02-25 09:07:15 +08:00
louie5 7e00d4db46 fix codecheck warnings 2022-02-25 09:03:01 +08:00
limingqi107 50334b27c4 fix the link bug of depend on depend node in the control flow 2022-02-24 21:05:04 +08:00
i-robot 89aeab0402
!30062 [bugfix] fuzz bugfix
Merge pull request !30062 from zhengyuanhua/r1.6
2022-02-24 12:57:59 +00:00
lizhenyu d6ad2a2a39 fix process can not exit normaly when exception has been thrown 2022-02-24 20:40:41 +08:00
i-robot 6ce2497135
!30482 fix issue I4TBJO
Merge pull request !30482 from tan-wei-cheng-3260/r1.6-develop
2022-02-24 12:26:44 +00:00
LaiYongqiang e921c1b4e4 fix pylint warning 2022-02-24 20:25:38 +08:00
i-robot 2ab0b6022f
!30504 fix the bug of GPU TopK kernel incorrect index r1.6
Merge pull request !30504 from zong_shuai/topk_r1.6_index_err
2022-02-24 11:30:44 +00:00
i-robot ac811b93f1
!30268 【MS】【LITE】support NUMA
Merge pull request !30268 from chenjianping/r1.6_dev
2022-02-24 09:33:24 +00:00
greatpanc 6cb4e72f1b avx512 batchnorm op 2022-02-24 17:29:57 +08:00
albert-yan 46a44a489f add param check for bit_num 2022-02-24 16:58:43 +08:00
jiaorui 0bab61075b fix timeout 2022-02-24 16:19:45 +08:00
i-robot c40f85e65b
!30461 [MS][LITE][MODELPOOL][1.6] add param numWork
Merge pull request !30461 from yefeng/241-add_param-numWork
2022-02-24 06:42:39 +00:00
wangshaocong f66ca10f6f [MSLITE] FuzzTest. 2022-02-24 14:13:28 +08:00
zong-shuai ee871daca3 debug 2022-02-24 14:11:01 +08:00
twc 54f2befa35 fix issue I4TBJO 2022-02-24 12:26:38 +08:00
i-robot 06967502a9
!30481 add type validation for GroupNorm and optimize the documentation of HookBackward, DistributedGradReducer.
Merge pull request !30481 from wangshuide/wsd_r1.6
2022-02-24 04:02:41 +00:00
chenjianping b7c998b672 support NUMA and dynamic memory 2022-02-24 11:48:37 +08:00
caifubi 8559d8260f delete windows cpu testcase 2022-02-24 11:42:05 +08:00
i-robot 90af71bd39
!30468 fix error link in micro readme for r1.6
Merge pull request !30468 from lvmingfu/r1.6
2022-02-24 02:31:22 +00:00
i-robot 1ee57386ba
!30442 [MS][LITE][1.6]fix fp16 weight model inference error
Merge pull request !30442 from XianglongZeng/r1.6
2022-02-24 02:25:54 +00:00
i-robot 8109c94939
!30341 [GraphKernel]Fix Static Check on r1.6
Merge pull request !30341 from jiaoy1224/r1.6
2022-02-24 02:22:10 +00:00
yefeng c224350da6 runner config add param: workers_num 2022-02-24 09:54:15 +08:00
i-robot cbcbed0973
!30372 alarm cleaning
Merge pull request !30372 from 刘勇琪/r1.6
2022-02-24 01:31:18 +00:00
i-robot c408f0e022
!30372 alarm cleaning
Merge pull request !30372 from 刘勇琪/r1.6
2022-02-24 01:31:16 +00:00
i-robot 7946973dcd
!29806 [MD] fix code check in r1.6
Merge pull request !29806 from liyong126/fix_codex_sync_r1.6
2022-02-24 01:30:38 +00:00
i-robot 45d08f6b75
!30462 clean code
Merge pull request !30462 from jjfeing/r1.6
2022-02-24 01:11:03 +00:00
sunsuodong e820db98ac fix thread pool when mindrt off 2022-02-23 16:56:48 -08:00
i-robot 396d759331
!30458 fix validation of PILCUBIC
Merge pull request !30458 from luoyang/issues_r1.6
2022-02-23 14:42:59 +00:00
i-robot b41dc3550d
!30464 [ME][Auto_monad]The load node in print operator inputs should not be replaced.
Merge pull request !30464 from Margaret_wangrui/r1.6
2022-02-23 11:40:06 +00:00
wangshuide2020 3673d526dc add type validation for GroupNorm and optimize the documentation of HookBackward, DistributedGradReducer. 2022-02-23 19:22:04 +08:00
i-robot 9050dafa1f
!30426 fix sponge numpy accuracy issue
Merge pull request !30426 from 杨林枫/sponge_nl_fix_r1.6
2022-02-23 11:09:23 +00:00
i-robot 95388dbd3d
!30178 [MS][LITE][r1.6] fix npu op bugs
Merge pull request !30178 from XianglongZeng/r1.6_
2022-02-23 10:40:24 +00:00
i-robot 3c23600725
!30441 dont evaluated to specific SymbolicKey if not direct weight parameter
Merge pull request !30441 from xychow/dont-specialze-refembed-if-not-direct-weight
2022-02-23 10:16:53 +00:00
Yang Jiao 457803e987 static check 2022-02-23 18:06:15 +08:00
i-robot c95964f9b7
!30463 Fix example code in docs
Merge pull request !30463 from xiaotianci/code_docs_fix_example_r1.6
2022-02-23 09:46:49 +00:00
i-robot a50c8fe960
!30395 remove annotated code
Merge pull request !30395 from qinzheng/remove_code_r1.6
2022-02-23 09:38:28 +00:00
i-robot 8fae8d30d8
!30302 fix fp16 subgraph partition bug
Merge pull request !30302 from qinzheng/fix_fp16
2022-02-23 09:20:58 +00:00
i-robot d48e7d806f
!30444 degrade INFO level to DEBUG as it's only useful for developer
Merge pull request !30444 from xychow/fix-log-info
2022-02-23 08:55:11 +00:00
lvmingfu 33a90f2f3e fix error links for micro readme in r1.6 2022-02-23 16:43:42 +08:00
Margaret_wangrui d19a631319 [ME][Auto_monad]The load node in print operator inputs should not be replaced. 2022-02-23 16:30:37 +08:00
jjfeing cf74f67ab1 clean code 2022-02-23 16:29:22 +08:00
Xiao Tianci fba57d9713 fix example code 2022-02-23 16:29:10 +08:00
i-robot 6acafffcde
!30449 [MS][LITE] pack weight
Merge pull request !30449 from yefeng/239-copy
2022-02-23 08:22:40 +00:00
YangLuo 27b40bbe6b fix validation of PILCUBIC 2022-02-23 15:34:07 +08:00
i-robot e49b4b5ecb
!30289 remove redundent nullptr-check
Merge pull request !30289 from yuchaojie/r1.6_fix
2022-02-23 07:33:32 +00:00
i-robot b97af2b6eb
!30326 If split indices is large than gradient's number, throw warnings.
Merge pull request !30326 from linqingke/r1.6
2022-02-23 07:21:00 +00:00
zhousiyi 6e39d08b58 dont evaluated to specific SymbolicKey if not direct weight parameter 2022-02-23 07:15:49 +00:00
i-robot d1e9253e73
!30405 [r1.6] clean code warnings at 0222
Merge pull request !30405 from huanghui/r1.6
2022-02-23 07:09:58 +00:00
jianghui58 9a7fe142b2 fix fuzz bug 2022-02-23 14:54:20 +08:00
liyong b7d859327e fix codecheck 2022-02-23 14:26:08 +08:00
i-robot fb3c55d41b
!30412 fix the coredump of stack actor and code review
Merge pull request !30412 from limingqi107/r1.6
2022-02-23 06:10:53 +00:00
yefeng e897938ae2 pack weight 2022-02-23 12:42:13 +08:00
yanglf1121 27026bcef5 fix sponge numpy 2022-02-23 11:23:19 +08:00
i-robot 8509a855db
!30419 clean code for r.16
Merge pull request !30419 from huangbingjian/clean_1.6
2022-02-23 03:09:51 +00:00
zhousiyi 6b8c60858d degrade INFO level to DEBUG as it's only useful for developer 2022-02-23 02:59:42 +00:00
ling a289c61539 [MSLITE] check int8 weight tensor 2022-02-23 10:48:31 +08:00
i-robot f7a63ebee0
!30402 Clean code for core directory
Merge pull request !30402 from YuJianfeng/r1.6
2022-02-23 02:45:16 +00:00
zengxianglong c9f4658075 fix fp16 weight model inference error 2022-02-23 10:35:04 +08:00
i-robot 395b1cb3be
!30418 remove the warning suppression of pylint:invalid-unary-operand-type for LGamma.
Merge pull request !30418 from wangshuide/wsd_r1.6
2022-02-23 02:22:13 +00:00
i-robot d4a2865a40
!30300 fix pylint bug
Merge pull request !30300 from cjh9368/pylint_1_6
2022-02-23 02:16:49 +00:00
i-robot 1dece1792e
!30375 Code alarm clearance for parallel modules.
Merge pull request !30375 from liuluobin/warning_clear_r1.6
2022-02-23 01:57:13 +00:00
limingqi107 30f18adeb3 fix the coredump of stack actor and code review 2022-02-23 09:46:54 +08:00
i-robot a90c4648cd
!30384 [MSLITE][CPU][r1.6] AVX512 debug mode compile failed problem bugfix
Merge pull request !30384 from Greatpan/debug_compile_error_r1.6
2022-02-23 01:38:23 +00:00
linqingke f97ffc43de split indices warning if large than gradient's number. 2022-02-23 09:29:02 +08:00
yuchaojie 379ecb278b remove redundent nullptr-check 2022-02-23 09:28:47 +08:00
i-robot 488dc38df0
!30403 update cpu adafactor code annotation
Merge pull request !30403 from kisnwang/r1.6
2022-02-23 01:25:23 +00:00
i-robot 5456a352da
!30401 fix codecheck
Merge pull request !30401 from zlq2020/r1.6
2022-02-23 00:48:49 +00:00
i-robot 895260ca98
!30394 [ME] Code Check.
Merge pull request !30394 from Margaret_wangrui/r1.6
2022-02-22 16:31:51 +00:00
i-robot d985fde886
!30366 Optimize check args message
Merge pull request !30366 from zhangzhaoju/r1.6
2022-02-22 13:05:44 +00:00
i-robot 6baff7a6e3
!30393 Fix DumpParameters issue when only need dump input data
Merge pull request !30393 from maning202007/r1.6
2022-02-22 12:57:25 +00:00
i-robot e40b562b4f
!30383 profiler fix the code warning
Merge pull request !30383 from zangqx/r1.6_1
2022-02-22 12:50:35 +00:00
wangshuide2020 af9ee9d8ab remove the warning suppression of pylint:invalid-unary-operand-type for LGamma. 2022-02-22 20:43:12 +08:00
huangbingjian f94d37ebde clean code 1.6 2022-02-22 20:37:36 +08:00
yujianfeng b22a9ef0f2 Clean code for core directory 2022-02-22 20:29:48 +08:00
i-robot bfe6bbb609
!30247 Always eval CNode if the funcgraph enter second time with sequence node
Merge pull request !30247 from xychow/always-eval-if-abstract-sequence-with-node
2022-02-22 11:20:18 +00:00
huanghui 8627502da2 clean code warnings at 0222 2022-02-22 17:28:25 +08:00
i-robot 29cc3d1422
!30304 PyNative ms_function run in GRAPH_MODE
Merge pull request !30304 from caifubi/r1.6-pynative-run-in-graph
2022-02-22 09:20:36 +00:00
liuluobin 3ba9fc772d Code alarm clearance for parallel modules. 2022-02-22 16:39:52 +08:00
kswang 2791e0efa6 update cpu adafactor code annotation 2022-02-22 16:34:43 +08:00
qinzheng 50b0cfcc55 remove annotated code 2022-02-22 16:20:43 +08:00
zlq2020 7f5fd63a7c fix codecheck 2022-02-22 16:20:07 +08:00
Margaret_wangrui f25fc7c2cf [ME] Code Check. 2022-02-22 15:58:42 +08:00
i-robot cfdca08b6c
!30370 Fix check result of control testcase
Merge pull request !30370 from chenfei_mindspore/r1.6-develop
2022-02-22 07:38:27 +00:00
maning202007 e2bbd10302 Fix DumpParameters issue when only need dump input data
Fix code warnings2
2022-02-22 15:23:04 +08:00
臧庆香 98de4dd115 fix the code warning 2022-02-22 15:17:59 +08:00
greatpanc f343652550 debug compile error bugfix 2022-02-22 15:12:08 +08:00
i-robot 3e397190f2
!30222 [MS][LITE][MODELPOOL][1.6] rename
Merge pull request !30222 from yefeng/234-rename_for_model_pool
2022-02-22 03:55:02 +00:00
i-robot 21f25caa66
!30263 310 codex and online infer bugfix
Merge pull request !30263 from chenping/r1.6
2022-02-22 03:52:29 +00:00
zhoushan 57c449e9e7 fix bug of restart for flclient 2022-02-22 11:42:13 +08:00
liu-yongqi-63 9003d817ff alarm cleaning 2022-02-22 11:37:32 +08:00
i-robot 40ba4d1473
!29128 clean code warnings
Merge pull request !29128 from jxlang910/r1.6
2022-02-22 03:28:33 +00:00
i-robot 7a6ad092e0
!30360 Fix code warning in r1.6
Merge pull request !30360 from LiangZhibo/warning_r1.6
2022-02-22 02:56:07 +00:00
i-robot 88a219b9d4
!30335 fix code check
Merge pull request !30335 from lianliguang/r1.6
2022-02-22 02:26:58 +00:00
i-robot f5975a2c6a
!30252 [lite]optimize reduce op
Merge pull request !30252 from 徐安越/r1.6_temp2
2022-02-22 02:07:28 +00:00
i-robot a385ce92e2
!30354 [MSLITE][CPU][R1.6] AVX512/256/SSE/NENO Advanced packaging, and Pool Op Refactoring and optimization
Merge pull request !30354 from Greatpan/avx512_pool
2022-02-22 02:04:08 +00:00
caifubi 7a0cd27fc1 PyNative ms_function run in GRAPH_MODE 2022-02-22 09:53:33 +08:00
chenfei 9ff660830f fix assert result 2022-02-22 09:49:10 +08:00
zhangzhaoju 810c67e431 Fix issue I4KVTZ
The description of "outmost network" in error message instead by
method/function name
2022-02-22 09:36:05 +08:00
i-robot 8ff74242d6
!30312 Fix static code check issues for distributed communication
Merge pull request !30312 from chengang/fix_code_check_1_6
2022-02-22 01:24:50 +00:00
zhousiyi 3a33fb1984 Always eval CNode if the funcgraph was evaluated with same AbstractTuple parameter but without sequence_nodes, so the possible TupleGetItem inside that funcgraph may set the used flags in that sequence_nodes 2022-02-22 01:20:03 +00:00
i-robot 31d5a60bc3
!30355 [MS][LITE][Develop] fix vs build
Merge pull request !30355 from sunsuodong/r1.6
2022-02-22 01:11:28 +00:00
jin-xiulang 59da17b5f5 clean code 2022-02-22 09:02:54 +08:00
cristoval 1187548518 fix static code check 2022-02-21 23:04:50 +08:00
i-robot 22d73a209d
!30333 Fix code warnings in debug
Merge pull request !30333 from maning202007/r1.6
2022-02-21 14:18:50 +00:00
l00591931 5e1962685b fix 1.6 code warning 2022-02-21 20:58:12 +08:00
greatpanc d58c5e771c avx512 pool 2022-02-21 20:38:18 +08:00
sunsuodong fcf5c14800 fix vs build 2022-02-21 04:06:09 -08:00
i-robot 899b1f950d
!30336 Fix example docs of minddata
Merge pull request !30336 from xiaotianci/code_docs_fix_example_r1.6
2022-02-21 10:50:39 +00:00
i-robot 7e437e4ea4
!30287 [MSLITE][GPU][r1.6] remove some big model
Merge pull request !30287 from Greatpan/loss_device_r1.6
2022-02-21 10:42:46 +00:00
i-robot 0ddd1a47dd
!30305 [MS][LITE][r1.6] split quant task
Merge pull request !30305 from XianglongZeng/r1.6
2022-02-21 10:15:09 +00:00
lianliguang b62358fde5 fix pclint plus 2022-02-21 17:33:16 +08:00
maning202007 1b8859326a Fix code warnings in debug 2022-02-21 17:30:14 +08:00
i-robot 0a741a0137
!30318 [ME] Remove warning log when Parameter is not set name.
Merge pull request !30318 from Margaret_wangrui/r1.6
2022-02-21 09:18:43 +00:00
i-robot 9f06eb5d30
!30310 Fix static pc lint
Merge pull request !30310 from ZPaC/1.6-relax-ps-acc
2022-02-21 08:51:15 +00:00
qinzheng 42bf6755f5 fix fp16 subgraph bug 2022-02-21 16:35:03 +08:00
Xiao Tianci 58aaa8ba13 fix code docs 2022-02-21 16:19:50 +08:00
zhengyuanhua 5f8e89cd2b [code check]fix fuzz 2022-02-21 15:02:22 +08:00
i-robot 879b78c140
!30204 fix bug of fuzz
Merge pull request !30204 from cjh9368/clean_static_error_1_6
2022-02-21 06:33:31 +00:00
Margaret_wangrui 16dabf18fd [ME] Remove warning log when Parameter is not set name. 2022-02-21 14:26:06 +08:00
ZPaC f979e7c6e2 Fix static pc lint 2022-02-21 11:49:31 +08:00
zengxianglong 5e17247010 split quant task 2022-02-21 11:03:03 +08:00
i-robot ef6fc721c8
!29470 [MS][LITE][develop] code review
Merge pull request !29470 from sunsuodong/code_check_1.6
2022-02-21 02:38:46 +00:00
xuanyue 748035cb41 optimize reduce op 2022-02-21 10:29:52 +08:00
cjh9368 f31821cca8 codecheck fix 2022-02-21 09:55:56 +08:00
chenping 8e20d1e4c3 310 codex and online infer bugfix 2022-02-21 09:47:39 +08:00
greatpan a05b8df932 remove some big model 2022-02-20 20:30:41 +08:00
i-robot b0e0bb6477
!30236 Add testcases about side effect & Execute in vm of pipeline + control + mutil-target
Merge pull request !30236 from chenfei_mindspore/r1.6-develop
2022-02-19 10:42:27 +00:00
i-robot ce4bd51e24
!30270 fix activation grad infer
Merge pull request !30270 from zhaodezan/r1.6
2022-02-19 10:32:22 +00:00
zengxianglong e637adf5e8 fix npu op bugs 2022-02-19 18:24:58 +08:00
i-robot 81e01513ca
!30114 remove mutil_stream for kernel by kernel
Merge pull request !30114 from baihuawei/seq_kernel_by_kernel1.6
2022-02-19 09:01:00 +00:00
i-robot aa344a0470
!30224 Switch actor only outputs partial.
Merge pull request !30224 from gaoyong10/r1.6_2
2022-02-19 08:29:03 +00:00
i-robot ca4b7f610b
!30149 AllReduce input and output size aligned by 512 in ascend device context
Merge pull request !30149 from laiyongqiang/r16_allreduce
2022-02-19 08:19:27 +00:00
zhaodezan 18a69e288b fix activation grad infer 2022-02-19 15:33:24 +08:00
baihuawei 0cf67ca557 remote kernel event for kernel by kernel 2022-02-19 14:34:17 +08:00
chenfei d3dda99b9e add side effect cases 2022-02-19 14:34:13 +08:00
i-robot a22b72e4e1
!30099 Construct cache dir with env rank_id directly
Merge pull request !30099 from laiyongqiang/cache_dir_r16
2022-02-19 05:54:13 +00:00
i-robot 717f4527f7
!30208 [MS][LITE]sync fuzz
Merge pull request !30208 from mengyuanli/sync_r1.6
2022-02-19 02:06:36 +00:00
i-robot 14e1696708
!30048 fix fuzz and clean codex
Merge pull request !30048 from qinzheng/fix_fuzz_1.6
2022-02-18 09:42:32 +00:00
sunsuodong a76bff6146 code review 2022-02-18 01:36:35 -08:00
gaoyong10 4e7ce2d37c Switch actor only outputs partial. 2022-02-18 17:32:22 +08:00
i-robot 2e85f2b0a5
!30142 [MS][LITE][r1.6] codex clean && fuzz bugfix
Merge pull request !30142 from jianghui58/codex_fuzz_r1.6
2022-02-18 08:55:49 +00:00
i-robot 85b6487e02
!30203 Fix input dump error in kernel by kernel mode 1.6
Merge pull request !30203 from TinaMengtingZhang/bugfix_1.6
2022-02-18 08:15:56 +00:00
i-robot 2891daa82d
!30228 [MS][lite][providers]codex
Merge pull request !30228 from KXiong/r1.6
2022-02-18 08:15:31 +00:00
mengyuanli 0f5fa9a521 fix fuzz problem 2022-02-18 15:56:26 +08:00
i-robot a96ec8616f
!30151 [lite]optimize gather op
Merge pull request !30151 from 徐安越/r1.6_temp1
2022-02-18 07:44:40 +00:00
cjh9368 4cc268e22c [MS][LITE] fix bug of codex 2022-02-18 15:36:26 +08:00
i-robot e86adadc9e
!30188 Fix the sample code
Merge pull request !30188 from maning202007/code_docs_r1.6
2022-02-18 06:59:24 +00:00
xiongkun 75e9844729 [MS][LITE][providers]codex 2022-02-18 14:43:13 +08:00
yefeng 99b15366e4 model pool rename 2022-02-18 14:36:13 +08:00
i-robot 25c3d507cd
!30174 Fix the debugger warning on cpu
Merge pull request !30174 from maning202007/r1.6
2022-02-18 06:35:46 +00:00
i-robot d3917a30ed
!30150 [MS][LITE] support full quant model restore to float model
Merge pull request !30150 from zhengjun10/r1.6
2022-02-18 03:13:50 +00:00
i-robot d1af93fd37
!30067 Sync before and after launch communication op when using MemScheduler
Merge pull request !30067 from tanghuikang/swap_sync_comm
2022-02-18 02:29:53 +00:00
i-robot f82cf22c0f
!30152 sync codex from ms-enterprise
Merge pull request !30152 from hangq/r1.6.0
2022-02-18 01:35:12 +00:00
i-robot c01a461f64
!30144 fix issue I4TG2C
Merge pull request !30144 from tan-wei-cheng-3260/r1.6-develop
2022-02-18 01:32:57 +00:00
TinaMengtingZhang 57ff5b2077 fix input parameter dump error 2022-02-17 20:32:22 -05:00
jianghui58 6e3efcbd39 codex clean && fuzz bugfix 2022-02-18 09:13:45 +08:00
twc 2589e13024 fix issue I4TG2C 2022-02-17 22:21:57 +08:00
i-robot adf48e9434
!30131 [MS][LITE][r1.6] sync code clean and fuzz bug fix 2
Merge pull request !30131 from XianglongZeng/r1.6
2022-02-17 14:10:28 +00:00
i-robot 5b97857977
!30092 Fix incorrect dump structure for multigraph in ascend mindRT r1.6
Merge pull request !30092 from parastooashtari/1.6
2022-02-17 13:59:25 +00:00
i-robot d889926e2f
!30186 Fix auto monad control arrow from make tuple to return.
Merge pull request !30186 from gaoyong10/r1.6_2
2022-02-17 12:43:08 +00:00
i-robot 248eee5b1f
!30137 Fix host format bug
Merge pull request !30137 from liangzelang/r1.6
2022-02-17 11:41:47 +00:00
maning202007 87f632f768 Fix the sample code. 2022-02-17 17:44:58 +08:00
gaoyong10 99136e8a61 Fix auto monad control arrow from make tuple to return.
Fix cpu sync data for scalar type.

Link data arrow from device data source actor to exit actor.
2022-02-17 17:22:39 +08:00
maning202007 bada85d5a7 Fix the debugger warning on cpu 2022-02-17 17:06:07 +08:00
qinzheng 58f4b16935 fix fuzz and clean codex 2022-02-17 16:15:27 +08:00
i-robot 0180f27026
!30154 [lite]code clean
Merge pull request !30154 from 徐安越/r1.6_temp2
2022-02-17 08:12:08 +00:00
i-robot 11dcd9af0f
!30147 The format needs to be converted when the format of tensor is 5D
Merge pull request !30147 from caifubi/r1.6-pynative-graph-mix-exec
2022-02-17 07:47:20 +00:00
i-robot 9a94962f7a
!30148 Memory alignment for GPU AllReduce
Merge pull request !30148 from caifubi/r1.6-allreduce-memory-align
2022-02-17 06:33:35 +00:00
liangzelang d30d9ec51c fix host format bug 2022-02-17 11:51:50 +08:00
xuanyue 2a97fb4fe5 code clean 2022-02-17 11:05:01 +08:00
hangangqiang 74a8723695 sync codex from ms-enterprise 2022-02-17 10:54:15 +08:00
xuanyue d5cc6fb5f7 optimize gather op 2022-02-17 10:53:18 +08:00
zhengjun10 b17e23882f support full quant model restore to float model 2022-02-17 10:52:50 +08:00
i-robot fa224b61f1
!30042 [MS][LITE] r1.6 add lite log to flclient and fix issue
Merge pull request !30042 from zhengjun10/fix16
2022-02-17 02:46:24 +00:00
i-robot c1284c8e1c
!30143 [MS][LITE][1.6] train check return value
Merge pull request !30143 from yefeng/232-checkout_code_train_return_value-1.6
2022-02-17 02:05:33 +00:00
i-robot 6b0bff3c61
!30102 [MS][LITE][MODELPOOL][1.6] test
Merge pull request !30102 from yefeng/229-Test_door
2022-02-17 02:04:27 +00:00
LaiYongqiang cdf8a8fcca AllReduce input and output size aligned by 512 in ascend device context 2022-02-17 10:02:50 +08:00
tanghuikang e86a242761 Sync before and after launch communication op when using MemScheduler to
avoid memory conflict
2022-02-17 09:54:09 +08:00
i-robot 5085ef5e2b
!30106 fix bug of codex and fuzz
Merge pull request !30106 from cjh9368/clean_static_error_1_6
2022-02-17 01:16:27 +00:00
i-robot 38cec34b78
!30110 Remove global constant tensor, float_types, _SafeNormalize of mindspore.scipy module in r1.6 branch.
Merge pull request !30110 from hezhenhao1/r1.6
2022-02-17 01:15:14 +00:00
i-robot 91bd4aad24
!30109 relax ps acc
Merge pull request !30109 from ZPaC/1.6-relax-ps-acc
2022-02-16 12:51:08 +00:00
caifubi a16d09f755 Fix AllReduce memory align size 2022-02-16 20:09:45 +08:00
caifubi 4dd49a4b72 Trans format when the tensor is 5D format but the graph input is 4D format 2022-02-16 20:02:35 +08:00
i-robot 9f3e9d0c85
!30101 FL, pclint clean
Merge pull request !30101 from 徐永飞/r1.6
2022-02-16 11:13:40 +00:00
i-robot 861eeeb928
!30097 [MS][LITE][MODELPOOL][1.6] fix model pool memory leak
Merge pull request !30097 from yefeng/228-fix_model_pool_memory_leak
2022-02-16 11:06:12 +00:00
yefeng 67b4fe9cea train code check return value 2022-02-16 19:01:08 +08:00
zengxianglong deb744edde sync code clean and fuzz bug fix 2 2022-02-16 18:35:16 +08:00
i-robot fa3e26ce61
!30104 [MSLITE] Clear residual benchmark threads before running.
Merge pull request !30104 from wangshaocong/r1.6_codex
2022-02-16 10:33:52 +00:00
i-robot cf7a0981f8
!30082 Get device target from cnode in multi-device situation
Merge pull request !30082 from chenfei_mindspore/get-all-nodes-target
2022-02-16 09:20:47 +00:00
yefeng 0694d96fc1 fix model pool compile 2022-02-16 16:58:52 +08:00
zhengjun10 b09af08420 add flclient log and fix issue 2022-02-16 16:11:20 +08:00
i-robot 9ecfccf4f6
!30093 Fix statistic dump in kernel by kernel mode r1.6
Merge pull request !30093 from TinaMengtingZhang/kernel_by_kernel_stat_dump_1.6
2022-02-16 07:07:01 +00:00
hezhenhao1 5b182d6b14 Remove global constant tensor, float_types, _SafeNormalize of mindspore.scipy module. 2022-02-16 14:34:54 +08:00
ZPaC 47e1dc2afa relax ps acc 2022-02-16 14:19:39 +08:00
i-robot 7f331d284d
!30032 [MS][LITE][r1.6] adjust scatter_nd input order and cast mindir float64 input
Merge pull request !30032 from XianglongZeng/r1.6_
2022-02-16 06:18:14 +00:00
cjh9368 39fbf4aa09 [MS][LITE] fix fuzz problem of miss populate 2022-02-16 11:38:53 +08:00
i-robot c8c29da927
!30076 update the documentation of sequence_mask, tensor_dot and dot.
Merge pull request !30076 from wangshuide/wsd_r1.6
2022-02-16 03:36:11 +00:00
wang_shaocong aa3ff1e11f [MSLITE] clear residual benchmark threads. 2022-02-16 11:07:27 +08:00
i-robot 30aa0dc60b
!30096 fix v3plus512_512_op11.onnx precision
Merge pull request !30096 from yeyunpeng2020/r1.6
2022-02-16 03:00:41 +00:00
yefeng a4f796b0c1 fix model pool memory leak 2022-02-16 10:59:35 +08:00
xuyongfei 976eb43e25 FL, pclint clean 2022-02-16 10:47:36 +08:00
LaiYongqiang b11707d9ea construct cache dir with env rank_id directly 2022-02-16 10:17:57 +08:00
yeyunpeng2020 44768aed49 fix v3plus512_512_op11.onnx precision 2022-02-16 09:53:42 +08:00
i-robot 3640558060
!30086 [MSLITE][GPU][r1.6] gpu support 5d op
Merge pull request !30086 from Greatpan/gpu_5d_r1.6
2022-02-16 01:14:40 +00:00
i-robot 470bb27fd9
!30072 Filter out expired weak ptr of sequence_nodes.
Merge pull request !30072 from 张清华/cherry-pick-1644913971
2022-02-16 01:02:24 +00:00
TinaMengtingZhang f4f43bbbd8 dump statistic file in kernel by kernel mode 2022-02-15 18:18:49 -05:00
Parastoo Ashtari 4ebb6109af fix dump structure for multi graph in ascend mindRT 2022-02-15 15:49:25 -05:00
zengxianglong 1f78d1cde9 adjust scatter_nd input order and cast mindir float64 input 2022-02-15 21:59:10 +08:00
i-robot aadf82454f
!29955 optimize the host and device copy in the control flow
Merge pull request !29955 from limingqi107/r1.6
2022-02-15 11:21:58 +00:00
chenfei f7cd6ea923 get target from all nodes 2022-02-15 18:57:41 +08:00
greatpanc 42529bf8ca gpu support 5d op 2022-02-15 18:50:45 +08:00
i-robot a5a98726f3
!30018 Allreduce input&output aligned by 512
Merge pull request !30018 from TuDouNi/r1.6
2022-02-15 09:35:08 +00:00
i-robot 99c87911e3
!30051 thor test modify r1.6
Merge pull request !30051 from melody/r1.6
2022-02-15 09:34:55 +00:00
wangshuide2020 c3df7a6780 update the documentation of sequence_mask, tensor_dot and dot. 2022-02-15 17:11:29 +08:00
limingqi107 fddaa45b15 optimize the host and device copy in the control flow 2022-02-15 17:09:43 +08:00
i-robot 4b1df4d5f5
!30058 FL, opt kernel launch
Merge pull request !30058 from 徐永飞/r1.6
2022-02-15 09:03:04 +00:00
Zhang Qinghua cf800d4b3d Filter out expired weak ptr of sequence_nodes. 2022-02-15 16:50:01 +08:00
i-robot 4192c8de34
!29984 [lite] fix fuzz bug
Merge pull request !29984 from 徐安越/r1.6_temp
2022-02-15 07:48:05 +00:00
i-robot 5d898f5789
!30021 [MS][LITE] model pool fix bug
Merge pull request !30021 from yefeng/227-fix_split_batch_and_getoutput_api-01
2022-02-15 07:31:12 +00:00
i-robot 2b617ce8b2
!30028 Fix tensor with dynamic shape problem
Merge pull request !30028 from hewei/fix_core_1.6
2022-02-15 06:27:16 +00:00
xuyongfei 7e7ba0eb83 FL, opt kernel launch 2022-02-15 13:01:30 +08:00
cmy_melody b0a0fb68a6 test thor r1.6 2022-02-15 11:29:33 +08:00
yefeng 8b949febb8 fix model pool api bug 2022-02-15 10:26:18 +08:00
i-robot 96b030a1c0
!30031 [MSLITE] Reconstruct implementation of topk.
Merge pull request !30031 from wangshaocong/r1.6_topk_int
2022-02-15 02:02:56 +00:00
i-robot a312f2fee6
!30015 [MS][LITE][ToD] Fix dropout bug when saving inference file
Merge pull request !30015 from Nizzan/export1.6
2022-02-15 01:56:13 +00:00
i-robot ab87f04c31
!30030 [bugfix][dataset] 当使用pil的resize时,在一些机器上可能出现结果不符合预期的情况导致影响精度
Merge pull request !30030 from xiefangqi/md_fix_clip8_outofrange_r1.6
2022-02-15 01:52:43 +00:00
He Wei 875648582d Fix tensor with dynamic shape problem
Set data size to zero for tensor with dynamic shape,
prevent unexpected memory allocation for tensor data.
2022-02-15 09:40:03 +08:00
i-robot 0c6afc7eda
!29653 [MS][LITE][r1.6] sync code clean and fuzz bug fix
Merge pull request !29653 from XianglongZeng/r1.6
2022-02-15 01:24:07 +00:00
i-robot f38ce1dd89
!29982 Add MakeValue related functions in MindAPI
Merge pull request !29982 from hewei/fix_r1.6
2022-02-15 01:15:57 +00:00
i-robot cdb8d2f74c
!30007 Pipelien split bug fix of IsParameterGraph
Merge pull request !30007 from chenfei_mindspore/r1.6-develop
2022-02-14 12:34:18 +00:00
wang_shaocong 5859f9ed76 [MSLITE] reconstruct implementation of topk. 2022-02-14 19:50:05 +08:00
xiefangqi 26b72dd054 fix pil cubic resize random access problem 2022-02-14 19:33:20 +08:00
i-robot e9da47f6c2
!30005 Fix value node in load.
Merge pull request !30005 from gaoyong10/r1.6
2022-02-14 10:07:03 +00:00
i-robot 72f70c470f
!29669 Fix the link format in summary_record docstring
Merge pull request !29669 from maning202007/code_docs_fix_link_format_r1.6
2022-02-14 09:51:30 +00:00
i-robot 31fe2b6be5
!29999 fix issue I4TBA2
Merge pull request !29999 from tan-wei-cheng-3260/r1.6-develop
2022-02-14 09:27:59 +00:00
ttudu d2c48175ec Allreduce input and output size aligned by 512 2022-02-14 16:48:04 +08:00
i-robot 9e1a4c1a24
!30002 add new committer for fl code
Merge pull request !30002 from zlq2020/r1.6
2022-02-14 08:31:40 +00:00
nizzan b0b24fb31e Fix dropout bug when saving inference model 2022-02-14 10:26:22 +02:00
i-robot 029d47f5f1
!29938 [MS][LITE] model pool api
Merge pull request !29938 from yefeng/224-fix_model_pool_api
2022-02-14 08:23:12 +00:00
i-robot c54d1dc9e4
!29961 Bug fix, complete features and code refactor for mindRT in branch 1.6
Merge pull request !29961 from parastooashtari/1.6
2022-02-14 07:51:46 +00:00
i-robot 5e380a98ba
!29901 Fix code check problems on r1.6
Merge pull request !29901 from xiaotianci/fix_codex_r1.6
2022-02-14 07:37:08 +00:00
i-robot 4dba66598b
!29988 fix fuzz test
Merge pull request !29988 from zhengyuanhua/r1.6
2022-02-14 07:01:18 +00:00
chenfei d65ff5e1cb fix bug of IsParameterGraph 2022-02-14 14:57:44 +08:00
xuanyue 2ad05bbf0e fix fuzz bug 2022-02-14 14:50:05 +08:00
zengxianglong b54320a5bb adjust scatter_nd input order and cast mindir flat64 input 2022-02-14 14:46:42 +08:00
i-robot 3fb6d115c3
!29974 Fix Static Check for Graph Kernel on r1.6
Merge pull request !29974 from jiaoy1224/r1.6
2022-02-14 06:42:15 +00:00
gaoyong10 fff9dc97fb Fix value node in load. 2022-02-14 14:25:32 +08:00
i-robot eed3356213
!29992 Change sequence node insert fail log level.
Merge pull request !29992 from 张清华/cherry-pick-1644806447
2022-02-14 06:15:38 +00:00
i-robot e246b524b1
!29942 fix random op bug
Merge pull request !29942 from caifubi/r1.6-random-op
2022-02-14 05:12:53 +00:00
twc 59f03517b2 fix issue I4TBA2 2022-02-14 12:12:18 +08:00
zlq2020 8f6c712e67 add new commiter for fl code file 2022-02-14 11:41:29 +08:00
i-robot 9dbf2e49e9
!29986 add note for GRU on Ascend
Merge pull request !29986 from 吕昱峰(Nate.River)/code_docs_r1.6
2022-02-14 03:35:04 +00:00
Zhang Qinghua bec0f7c683 fixed 07d8b59 from https://gitee.com/zh_qh/mindspore/pulls/29987
Change sequence node insert fail log level.
2022-02-14 02:40:50 +00:00
Yang Jiao 2548566e30 static check 2022-02-14 10:29:09 +08:00
zhengyuanhua 761b3179df fix fuzz test 2022-02-14 10:23:42 +08:00
i-robot ee84da49a8
!29971 fix fl core dump bug r1.6
Merge pull request !29971 from wtcheng/r1.6
2022-02-14 02:21:03 +00:00
lvyufeng 9a36de6853 add note for GRU on Ascend 2022-02-14 10:12:06 +08:00
He Wei 4788e795ae Add MakeValue related functions in MindAPI
1. NewValueNode() by PrimitivePtr;
2. MakeValue() by vector<bool>.
2022-02-14 09:02:43 +08:00
i-robot 9037469fa8
!29972 memory optimization for flclient in r1.6
Merge pull request !29972 from zhoushan33/flclient0212_r1.6
2022-02-13 02:30:31 +00:00
i-robot d6a0a69679
!29959 1.fix issue I4T45E && I4T0GI 2.fix bug in newinstance 3.add client upload loss
Merge pull request !29959 from tan-wei-cheng-3260/r1.6
2022-02-13 02:26:36 +00:00
i-robot 16ada06668
!29969 Fix ptr empty in device address.
Merge pull request !29969 from gaoyong10/r1.6
2022-02-12 11:19:04 +00:00
twc 079e05dce1 1.fix issue I4T45E && I4T0GI
2.fix bug in newinstance
3.add client upload loss
2022-02-12 17:54:16 +08:00
i-robot c1191a2cd1
!29965 add host mem stack
Merge pull request !29965 from liangzelang/r1.6
2022-02-12 09:32:25 +00:00
w00517672 97fed44ecb fix fl core dump bug 2022-02-12 17:25:09 +08:00
zhoushan ce9c24d3c2 memory optimization for flclient in r1.6 2022-02-12 17:18:55 +08:00
i-robot d3febb1190
!29920 [MS][LITE] fix fl client log print
Merge pull request !29920 from zhengjun10/fix16
2022-02-12 09:17:27 +00:00
gaoyong10 06e61034f5 Fix ptr empty in device address.1 2022-02-12 16:05:13 +08:00
liangzelang 7ba0e2ed8f add host mem stackt
Use correct syn api in Ascend.
2022-02-12 14:29:11 +08:00
i-robot ec75a3212d
!29874 fix matmul infer div zero
Merge pull request !29874 from zhaodezan/r1.6
2022-02-12 05:00:39 +00:00
i-robot d75d880733
!29899 [MS][LITE][r1.6] fix the bug that input tensor data with old size dosen't free
Merge pull request !29899 from XianglongZeng/r1.6_
2022-02-12 03:30:15 +00:00
i-robot 9c9e26127d
!29897 [lite]dynamic segmentation for matmul
Merge pull request !29897 from 徐安越/r1.6
2022-02-12 02:44:52 +00:00
Parastoo Ashtari 9a3c870ba5 Bug fix and complete mindRT features. refactor mindRT code. 2022-02-11 14:36:46 -05:00
i-robot f8da97b31f
!29946 Clear device tensor in front value node.
Merge pull request !29946 from gaoyong10/r1.6_2
2022-02-11 12:13:14 +00:00
i-robot c3893f52e3
!29921 FL, opt update model get client info
Merge pull request !29921 from 徐永飞/r1.6
2022-02-11 10:20:20 +00:00
i-robot 585a880344
!29944 support config allreduce group size for batch norm
Merge pull request !29944 from yangzhenzhang/add-group-size-for-parallel-bn
2022-02-11 09:23:09 +00:00
i-robot 92aa673d03
!29932 [MSLITE] Support topk with int input.
Merge pull request !29932 from wangshaocong/r1.6_topk_int
2022-02-11 09:09:08 +00:00
i-robot 434f92f403
!29904 fix some potential risk
Merge pull request !29904 from liubuyu/r1.6
2022-02-11 09:07:23 +00:00
i-robot ec3c4200f4
!29837 [MSLITE] Codex clean.
Merge pull request !29837 from wangshaocong/r1.6_codex
2022-02-11 08:57:06 +00:00
gaoyong10 03e7335fa5 Clear device tensor in front value node. 2022-02-11 16:31:40 +08:00
i-robot b83a48a0b8
!29875 [MSLITE][GPU][r1.6] code style clean
Merge pull request !29875 from Greatpan/r1.6_code_clean
2022-02-11 08:30:06 +00:00
i-robot 5330fec3af
!29927 [MSLITE][CPU][r1.6] fuzz problem, cast_fp32.cc bugfix
Merge pull request !29927 from Greatpan/fuzz_problem_r1.6
2022-02-11 08:27:22 +00:00
i-robot 41aff4fd64
!29913 Fix a logging api bug in MindAPI
Merge pull request !29913 from hewei/fix_r1.6
2022-02-11 07:55:55 +00:00
yefeng 768d0fb41a model pool api 2022-02-11 15:47:46 +08:00
yangzhenzhang bcf45d0ed6 support config group for batchnorm 2022-02-11 15:47:26 +08:00
i-robot d96f78efc0
!29928 support 'STAND_ALONE' ParallelMode for dimension reduce
Merge pull request !29928 from jinjiali-kali/r1.6
2022-02-11 07:41:29 +00:00
caifubi 8d65a09a8a Duplicate primitive id 2022-02-11 15:36:17 +08:00
xuyongfei 69b04e5837 FL, update model get client info 2022-02-11 15:24:58 +08:00
i-robot a0453df907
!29929 [MSLITE][GPU][r1.6] avx512 gate problem
Merge pull request !29929 from Greatpan/r1.6_avx512_gate3
2022-02-11 06:37:36 +00:00
wang_shaocong 15f9aaefdb [MSLITE] Add implication of topk_int. 2022-02-11 14:20:07 +08:00
greatpanc 2a27763648 avx512 gate 2022-02-11 12:43:20 +08:00
xuanyue 9fd902197b dynamic segmentation for matmul 2022-02-11 12:34:15 +08:00
i-robot 0e7697df0c
!29918 fix memory leak
Merge pull request !29918 from yeyunpeng2020/r1.6_bak
2022-02-11 04:30:16 +00:00
i-robot 090de19c17
!29907 Refine ps mode consistence check
Merge pull request !29907 from zyli2020/r1.6
2022-02-11 03:51:12 +00:00
wang_shaocong cb3e1c8bef [MSLITE] Codex clean. 2022-02-11 11:19:25 +08:00
greatpanc c464cfed29 r1.6 cast fp32 bugfix, add break 2022-02-11 10:57:09 +08:00
i-robot 72ac8e047d
!29914 [MS][LITE] split batch
Merge pull request !29914 from yefeng/223-split_different-1.6
2022-02-11 02:37:44 +00:00
Xiao Tianci 96cec6da4f fix codex on 1.6 2022-02-11 10:31:11 +08:00
yeyunpeng2020 3d75b08d7e fix memory leak 2022-02-11 09:56:22 +08:00
i-robot fcb0f2b890
!29915 add ModelParallelRunner java api
Merge pull request !29915 from yeyunpeng2020/r1.6
2022-02-11 01:54:40 +00:00
yanghaoran 57ada55e6e
!29922 revert 'Pull Request !29817 : [MSLITE][CPU][r1.6] AVX512 gate'
Merge pull request !29922 from yanghaoran/revert-merge-29817-r1.6
2022-02-11 01:50:26 +00:00
yanghaoran 1618a78fe4
回退 'Pull Request !29817 : [MSLITE][CPU][r1.6] AVX512 gate' 2022-02-11 01:49:10 +00:00
zhengjun10 2aa49034b4 fix fl client log print 2022-02-11 09:45:19 +08:00
i-robot 0823adbc5f
!29817 [MSLITE][CPU][r1.6] AVX512 gate
Merge pull request !29817 from Greatpan/r1.6_avx512_gate
2022-02-11 01:27:59 +00:00
lizhenyu cd12f98c29 refine ps mode consistence check 2022-02-11 09:23:30 +08:00
He Wei c58dde13bf Fix a logging api bug in MindAPI 2022-02-11 08:54:20 +08:00
yeyunpeng2020 2127d67c7e add ModelParallelRunner java api 2022-02-10 23:36:00 +08:00
yefeng 4d6c49dfc4 model pool split batch 2022-02-10 21:46:51 +08:00
i-robot 0e1dcb50db
!29893 [MS][LITE] mdoel pool split batch
Merge pull request !29893 from yefeng/222-split_batch_add_lock
2022-02-10 13:08:27 +00:00
i-robot 50a0f60315
!29869 [MSLITE] fix mem leak and diverse wide deep networks compatibility in tensorrt delegate
Merge pull request !29869 from Liu_Xuu/r16_0210_memleak
2022-02-10 12:52:39 +00:00
greatpanc f724ace438 r1.6 avx512 gate, and some avx512 bugfix 2022-02-10 20:36:53 +08:00
jinjiali 497945fe3c update dim_reduce: support 'STAND_ALONE' ParallelMode 2022-02-10 20:19:28 +08:00
lby cc95ce6dc3 fix some potential risk 2022-02-10 20:14:23 +08:00
zengxianglong ea6368c98c fix the bug that input tensor data with old size dosen't free 2022-02-10 19:49:56 +08:00
yefeng ee5e521ca5 model pool 2022-02-10 19:32:48 +08:00
i-robot 9daf5933e1
!29879 [MSLITE][CPU][r1.6] code style clean
Merge pull request !29879 from Greatpan/r1.6_code_clean2
2022-02-10 11:21:29 +00:00
i-robot c7038bbcf2
!29873 [lite]move in matmul, transpose and bias_add's opt
Merge pull request !29873 from 徐安越/r1.6_temp
2022-02-10 11:20:31 +00:00
i-robot 8b8418ae4d
!29880 Don't build value node of Monad type node
Merge pull request !29880 from chenfei_mindspore/r1.6-develop
2022-02-10 11:02:15 +00:00
i-robot 7e21452f49
!29871 [MS][LITE] mdoel pool check nullptr
Merge pull request !29871 from yefeng/221-fix_model_pool_bug-1.6
2022-02-10 09:28:30 +00:00
greatpanc ab05b5291f code clean r1.6 2022-02-10 16:47:50 +08:00
i-robot 5e29413428
!29863 add A15 macro
Merge pull request !29863 from qinzheng/fix_ios_1.6
2022-02-10 08:32:07 +00:00
i-robot b962dc7683
!29843 Use device HBM free memory to calculate MindSpore manage memory size
Merge pull request !29843 from laiyongqiang/mem_check_r16
2022-02-10 08:28:50 +00:00
i-robot 24ecae8519
!29704 Fix codex warning for dump and debugger in 1.6
Merge pull request !29704 from TinaMengtingZhang/codex-1.6
2022-02-10 08:26:33 +00:00
i-robot 0a84e8abba
!29858 Check whether the graph has incorporate call
Merge pull request !29858 from chenfei_mindspore/check-incorporate-call
2022-02-10 08:19:04 +00:00
Liu_Xuu 7619a24967 [MSLITE] fix mem leak and diverse wide deep networks compatibility in tensorrt delegate 0210_04 2022-02-10 16:14:24 +08:00
i-robot 5d2b8cfb10
!29770 Add more required apis in MindAPI
Merge pull request !29770 from hewei/fix_r1.6
2022-02-10 07:55:54 +00:00
i-robot 3117737755
!29665 [MS][LITE]sync bugfix of master
Merge pull request !29665 from mengyuanli/sync_r1.6
2022-02-10 07:28:55 +00:00
chenfei aee9996366 not build value node of monad type node 2022-02-10 15:18:48 +08:00
greatpanc a8ea3055ee r1.6 code style clean, cpu 2022-02-10 15:06:26 +08:00
yefeng 7752f6dcc3 model pool GetOutput 2022-02-10 14:52:38 +08:00
zhaodezan 1abd300719 fix matmul infer div zero 2022-02-10 14:11:59 +08:00
i-robot fe8588f32f
!29866 fix acl env guard
Merge pull request !29866 from zhoufeng/fix-310-envguard-1.6
2022-02-10 06:11:13 +00:00
xuanyue 399276790e move in matmul, transpose and bias_add's opt 2022-02-10 11:52:59 +08:00
He Wei ae0f75ac53 Add more required apis in MindAPI
1. AbstractBase::set_shape();
2. Move shape() from AbstractTensor to AbstractBase;
3. AbstractSequence construct from AbstractBasePtrList;
4. Shape construct from ShapeVector.
2022-02-10 11:21:14 +08:00
i-robot 980f3769c3
!29835 Lite, fix bug: invalid data type cause 0 alloc memory tensor
Merge pull request !29835 from 徐永飞/r1.6
2022-02-10 03:20:19 +00:00
zhoufeng 8da8420e7c fix acl env guard
Signed-off-by: zhoufeng <zhoufeng54@huawei.com>
2022-02-10 10:35:45 +08:00
mengyuanli 5a2df65ca4 fix bug of call one actor many place in fp16
code clean

add test case for control flow
2022-02-10 10:25:02 +08:00
i-robot dffd3bf6e9
!29824 [MS][LITE] packed weight for 1.6
Merge pull request !29824 from yefeng/217-packed_weight_1.6
2022-02-10 02:03:53 +00:00
chenfei 1972f09f45 check incorporate call not by analyzer 2022-02-10 09:30:04 +08:00
i-robot 74e0626771
!29821 [lite]add avx512 opt
Merge pull request !29821 from 徐安越/r1.6
2022-02-10 01:24:05 +00:00
i-robot 420af08036
!29827 [MS][LITE] bind core for model pool 1.6
Merge pull request !29827 from yefeng/218-bind_core_for_model_pool_1.6
2022-02-10 01:23:31 +00:00
i-robot e2e3a33c86
!29852 [MS][LITE] model pool support GetOutputs 1.6
Merge pull request !29852 from yefeng/220-model_pool_support_getoutput-1.6
2022-02-10 01:22:01 +00:00
qinzheng 9be7d17fc7 add A15 macro 2022-02-10 09:16:32 +08:00
i-robot da71108d04
!29854 [MSLITE][CPU][r1.6] fuzz test problem bugfix, batchnorm_fp32 op, exp_fp32 op
Merge pull request !29854 from Greatpan/r1.6_fuzz_bugfix
2022-02-10 01:13:12 +00:00
i-robot e31db6670e
!29772 modify tdt push failed error log
Merge pull request !29772 from ms_yan/r1.6_log_modify
2022-02-09 14:14:59 +00:00
greatpanc fc0ce9ea90 fuzz test problem bugfix, batchnorm and exp_fp32 2022-02-09 20:49:52 +08:00
i-robot cb11049d81
!29833 Fix bug of pipeline split visit node
Merge pull request !29833 from chenfei_mindspore/r1.6-develop
2022-02-09 11:48:24 +00:00
yefeng 9000d09096 model pool support getoutput 2022-02-09 18:23:39 +08:00
i-robot 8f4cef43ed
!29816 fix bug of flclient 0209 in r1.6
Merge pull request !29816 from zhoushan33/flclient0209_r1.6
2022-02-09 10:00:26 +00:00
LaiYongqiang 602d764596 use device HBM free memory to calculate MindSpore manage memory size 2022-02-09 17:36:44 +08:00
i-robot b7c8c10bd4
!29776 [MSLITE] add matmul, fully connected optimize for tensorr
Merge pull request !29776 from Liu_Xuu/r16_0208_fc
2022-02-09 09:33:03 +00:00
i-robot a641dec90a
!29804 [MSLITE] fix ignore retun value
Merge pull request !29804 from ling/r1.6
2022-02-09 09:04:25 +00:00
i-robot 37001e1d63
!29811 [MS][LITE] model pool benchmark for 1.6
Merge pull request !29811 from yefeng/216-model_pool_benchamrk_1.6
2022-02-09 08:46:24 +00:00
i-robot 687419f169
!29810 [MS][LITE] model pool split batch for 1.6
Merge pull request !29810 from yefeng/215-model_pool_batch_split_1.6
2022-02-09 08:46:06 +00:00
i-robot e1cc515472
!29792 [MS][LITE]fix linux arm64 run
Merge pull request !29792 from gongdaguo/fix_arm64_linux_run
2022-02-09 08:45:05 +00:00
i-robot 5c54d0ab1b
!29823 GraphKernel Fix coredump for some cpu network
Merge pull request !29823 from ZengZitao/cpu_core_fix
2022-02-09 08:26:52 +00:00
xuyongfei 67471f6754 Lite, fix bug: invalid data type cause 0 alloc memory tensor 2022-02-09 16:01:17 +08:00
i-robot a84ff56bde
!29799 optimize full quant strategy
Merge pull request !29799 from yeyunpeng2020/r1.6
2022-02-09 07:58:43 +00:00
i-robot 6ad0ccb000
!29805 return default value when infer EnvironGet primitive
Merge pull request !29805 from xychow/return-dflt-when-infer-environ-get-1.6
2022-02-09 07:34:27 +00:00
yefeng 69ae07640c model pool bind core 1.6 2022-02-09 15:15:48 +08:00
yefeng 7ac492cf71 weight packed 1.6 2022-02-09 14:57:44 +08:00
i-robot 45efa3a7db
!29794 fix event error
Merge pull request !29794 from TuDouNi/r1.6
2022-02-09 06:46:17 +00:00
xuanyue 99131343a3 add avx512 opt 2022-02-09 14:44:19 +08:00
chenfei cc3d4b558e fix bug of pipeline split 2022-02-09 14:38:30 +08:00
i-robot 8eeb5f53c7
!29762 Lite, codex clean
Merge pull request !29762 from 徐永飞/r1.6
2022-02-09 06:20:13 +00:00
i-robot 562dce4816
!29795 [MSLITE][CPU][r1.6] AVX512/256/SSE/NENO Advanced packaging, and add/arithmetic_self/softmax/sub Op Refactoring and optimization
Merge pull request !29795 from Greatpan/simd_op_r1.6
2022-02-09 06:17:35 +00:00
yeyunpeng2020 442ea81872 optimize full quant strategy 2022-02-09 14:14:29 +08:00
zengzitao 757126b8d6 fix core_dump in cpu 2022-02-09 14:09:43 +08:00
yefeng 8a3670f42f model pool benchmark 1.6 2022-02-09 11:12:00 +08:00
i-robot e31e523712
!29767 fix l2loss op's input shape with 0
Merge pull request !29767 from 范吉斌/l2loss_1.6
2022-02-09 03:10:17 +00:00
yefeng a48d2aa94c model pool batch split for 1.6 2022-02-09 11:00:49 +08:00
i-robot b354d0de5b
!29780 [MS][LITE] model pool for r1.6
Merge pull request !29780 from yefeng/213-model_pool_r1.6
2022-02-09 02:47:37 +00:00
i-robot b6fb9e5dce
!29746 [lite]fix code warning of pclint
Merge pull request !29746 from 徐安越/r1.6
2022-02-09 02:39:58 +00:00
ling e92ef71e52 [MSLITE] fix ignore retun value 2022-02-09 10:34:35 +08:00
zhousiyi 9e6e2ca824 return default value when infer EnvironGet primitive 2022-02-09 02:34:11 +00:00
i-robot 9e6db9bea0
!29238 fix stridedslice parallel mask for 1.6
Merge pull request !29238 from fangzehua/fix_stride_mask_1.6
2022-02-09 02:25:36 +00:00
i-robot ebbd3d2bc8
!29782 fix the bug of host device in the control flow
Merge pull request !29782 from limingqi107/r1.6
2022-02-09 02:21:13 +00:00
zhoushan 33f57d91d5 fix bug of flclient 0209 in r1.6 2022-02-09 10:13:09 +08:00
i-robot 289dfe4905
!29090 fix code check
Merge pull request !29090 from lianliguang/r1.6
2022-02-09 02:03:33 +00:00
Liu_Xuu 8b47d8c515 [MSLITE] add matmul, fully connected optimize for tensorr 0209_01 2022-02-09 10:01:55 +08:00
i-robot 080a775d61
!29736 sync code with master
Merge pull request !29736 from tan-wei-cheng-3260/r1.6
2022-02-09 01:44:16 +00:00
i-robot 56e9a2cc30
!29787 Clear empty stack control arrow in stack actor.
Merge pull request !29787 from gaoyong10/r1.6
2022-02-09 01:37:55 +00:00
i-robot 0342934468
!29760 【静态告警清理】signds pclint r1.6
Merge pull request !29760 from emmmmtang/r1.6
2022-02-09 01:21:14 +00:00
greatpanc d3e4ed4dd7 r1.6, add/arithmetic_self/softmax/sub op simd accomplish 2022-02-09 09:18:07 +08:00
gongdaguo 8caeb5b6d5 fix arm64 linux runtime 2022-02-09 09:13:51 +08:00
i-robot ea971c7911
!29761 [ME] Code check.
Merge pull request !29761 from Margaret_wangrui/r1.6_codecheck
2022-02-09 01:03:51 +00:00
ttudu 23b69672ef fix event error 2022-02-08 21:05:15 +08:00
xuyongfei a6ce4e29e9 Lite, codex clean and fix 0 size tensor caused by invalid data type 2022-02-08 20:34:31 +08:00
i-robot a9c03b7213
!29742 Limite parameter u not build value node
Merge pull request !29742 from chenfei_mindspore/r1.6-develop
2022-02-08 12:30:26 +00:00
i-robot 7c71833619
!29784 [MSLITE] fix ci run on one device for tensorrt
Merge pull request !29784 from Liu_Xuu/r1.6
2022-02-08 12:21:34 +00:00
limingqi107 a4423eb803 fix the bug of host device in the control flow 2022-02-08 20:14:00 +08:00
gaoyong10 5a76c80707 Clear empty stack control arrow in stack actor. 2022-02-08 19:29:57 +08:00
Liu_Xuu 350d008b71 [MSLITE] fix ci run on one device for tensorrt 1227_03 2022-02-08 18:43:08 +08:00
yefeng 4f839a7521 model pool for r1.6 2022-02-08 18:18:30 +08:00
i-robot 9391dcd1e2
!29758 fix tensor move data type
Merge pull request !29758 from laiyongqiang/tensor_move_16
2022-02-08 09:44:51 +00:00
i-robot 9127c68f81
!29744 Parse kernel level by kernel graph.
Merge pull request !29744 from gaoyong10/r1.6
2022-02-08 09:42:48 +00:00
emmmmtang 76cec97160 signds pclint 2022-02-08 17:35:27 +08:00
i-robot 688d2be232
!29756 Add Error Manager
Merge pull request !29756 from jiaorui/error-manager-1.6
2022-02-08 09:17:12 +00:00
i-robot 6cb73a764e
!29330 Fix API docs on branch 1.6
Merge pull request !29330 from xiaotianci/code_docs_fix_api_r1.6
2022-02-08 09:06:20 +00:00
lianliguang ffe4393e95 fix code check 2022-02-08 16:30:28 +08:00
i-robot 16bb659a6b
!29752 [r1.6] 修复_send IndexError: list index out of range 问题
Merge pull request !29752 from xiefangqi/fix_queue_race_issue_r1.6
2022-02-08 08:26:08 +00:00
twc 97b00882a6 1.fix bug in updatemodel with federated learning mode
2.sync code
3.update log level
2022-02-08 16:14:16 +08:00
ms_yan 29120febaf modify push failed log 2022-02-08 15:23:53 +08:00
i-robot 9c8a4279c8
!29749 add GRU support info and fix RNN ut
Merge pull request !29749 from 吕昱峰(Nate.River)/r1.6
2022-02-08 07:23:13 +00:00
i-robot c7e91c33f4
!29143 length of kernel_size and strides should be five
Merge pull request !29143 from liubuyu/r1.6
2022-02-08 07:20:18 +00:00
LaiYongqiang f7976ea54e fix tensor move data type 2022-02-08 15:16:27 +08:00
Margaret_wangrui 2cce85b10c Code check 2022-02-08 14:34:06 +08:00
i-robot 330e9e9978
!29748 [MSLITE][DEVELOP] code review for lite: base op directory, include directory
Merge pull request !29748 from yangruoqi713/r1.6
2022-02-08 06:28:52 +00:00
i-robot 163222ab59
!29066 [MSLITE][DEVELOP] judge tensor type when loading model
Merge pull request !29066 from yangruoqi713/r1.6_fuzz
2022-02-08 06:28:34 +00:00
i-robot ad9fe8b073
!29661 [MSLITE] add data type check for NegGrad
Merge pull request !29661 from ling/r1.6
2022-02-08 06:28:12 +00:00
i-robot 8d2d601b54
!29661 [MSLITE] add data type check for NegGrad
Merge pull request !29661 from ling/r1.6
2022-02-08 06:28:12 +00:00
i-robot 12b58437a7
!29661 [MSLITE] add data type check for NegGrad
Merge pull request !29661 from ling/r1.6
2022-02-08 06:28:12 +00:00
i-robot 239ff2176a
!29740 Fix codedex warning in r1.6
Merge pull request !29740 from LiangZhibo/warning_r1.6
2022-02-08 06:26:16 +00:00
i-robot 72b11d0037
!29130 Fix TypeIdLabel bug for complex128
Merge pull request !29130 from liangxhao/support_complex128_r1.6
2022-02-08 04:35:30 +00:00
twc 9e5433db11 fix I4RQOC && sync code 2022-02-08 12:16:56 +08:00
twc 5fea05619d fix fl pclint 2022-02-08 12:16:56 +08:00
twc 964e320383 fix ISSUE I4QRZP
(cherry picked from commit 76d9daddf0)
2022-02-08 12:16:56 +08:00
twc 73b29d5bdb fix ISSUE I4QCJM 2022-02-08 12:16:56 +08:00
jiaorui 803210d280 error manager 2022-02-08 11:52:07 +08:00
i-robot 358b7124bc
!29078 Clean code for compiler
Merge pull request !29078 from YuJianfeng/r1.6
2022-02-08 03:31:44 +00:00
xiefangqi 440c1ae709 fix del multiprocessing queue race condition problem 2022-02-08 10:47:56 +08:00
lvyufeng 1c501266a8 add GRU support info and fix RNN ut 2022-02-08 10:25:17 +08:00
yangruoqi713 c655a7bd8b [MSLITE][DEVELOP] code review for lite: base op directory, include directory 2022-02-08 09:56:07 +08:00
i-robot b78efaf644
!29648 [MS][LITE] support rm minddata and fix so rpath
Merge pull request !29648 from zhengjun10/fix16
2022-02-08 01:46:21 +00:00
xuanyue cb607709cb fix code warning of pclint 2022-02-08 09:46:15 +08:00
gaoyong10 911f664b0e Parse kernel level by kernel graph. 2022-02-08 09:44:56 +08:00
i-robot d7dee8aa2b !29189 Fix code warning in r1.6
Merge pull request !29189 from LiangZhibo/master
2022-02-08 09:32:24 +08:00
lby 9287d9cc32 length of kernel_size and strides should be five 2022-02-08 09:30:24 +08:00
chenfei 20d9777cb3 limite parameter 2022-02-08 09:29:01 +08:00
i-robot b85161cb94
!29731 Fix Gather CPU kernel on r1.6
Merge pull request !29731 from zuochuanyong/r1.6_fix_gather_cpu
2022-02-08 01:00:10 +00:00
TinaMengtingZhang 3d1ace35f4 sync up codex warning fix for dump and debugger to open source branch 2022-02-07 17:07:56 -05:00
i-robot 9095aef205
!29694 r1.6 support control flow.
Merge pull request !29694 from linqingke/control_flow_1.6
2022-02-07 13:07:12 +00:00
i-robot 3e427a2482
!29710 add DynamicInferModel tag for flclient in r1.6
Merge pull request !29710 from zhoushan33/flclient0126_r1.6
2022-02-07 11:59:40 +00:00
i-robot aecc3ffe58
!29708 update dim_reduce: move scale_loss from optimizer to outer, add param filter
Merge pull request !29708 from jinjiali-kali/r1.6
2022-02-07 11:27:22 +00:00
liangzelang 956a0f3460 Enable Ascend kernelbykernel in MindRT.
After ensuring the correct control edge relationship, then insert the TensorMove operator.

if the last input of EnvironGet/EnvironSet CNode is a tuple, then split this CNode to multiple CNode with value as non-tuple

set specialized flag for cloned funcgraph valuenode

Fix the problem that some abstracts is not updated caused by the call_graph_tuple_transform.

Share the sequence nodes between sequence abstracts.

visit fix of parallel

Workaround for BiasAddGrad as it don't have backpropagator, so insert StopGradient before this CNode.
2022-02-07 19:03:14 +08:00
zuochuanyong 9854545a33 fix gather cpu kernel 2022-02-07 18:41:24 +08:00
i-robot 37d6c72f98
!29655 [MS][LITE][r1.6] reconstuct npu scale op and insert transpose pass
Merge pull request !29655 from XianglongZeng/r1.6_
2022-02-07 09:21:42 +00:00
i-robot 48b612cf14
!29712 [r1.6] PyNative ms_function compile and run in Graph
Merge pull request !29712 from caifubi/r1.6-ms_function-run-in-graph
2022-02-07 08:26:07 +00:00
yangruoqi713 3930624ef3 [MSLITE][DEVELOP] judge tensor type when loading model, fix bug of gelu fp16 2022-02-07 16:20:01 +08:00
i-robot d13e57f934
!29717 Skip onednn dfx testcase on mac and windows
Merge pull request !29717 from zuochuanyong/r1.6_onednn_dfx_testcase
2022-02-07 07:21:57 +00:00
i-robot d35fc2531c
!29666 solve topk warning problem
Merge pull request !29666 from zong_shuai/topk_r.16_debug
2022-02-07 05:46:50 +00:00
i-robot 3089313a85
!29208 Change log level.
Merge pull request !29208 from liangzelang/cherry-pick-1642421022
2022-02-07 04:07:09 +00:00
i-robot 77dcccd447
!29111 r1.6 Fixe some CPU operator whitelists
Merge pull request !29111 from chenweitao_295/r1.6
2022-02-07 03:43:41 +00:00
i-robot 5f3a10fde4
!29684 fix password error bug r1.6
Merge pull request !29684 from wtcheng/r1.6
2022-02-07 03:33:32 +00:00
i-robot 2ff5af7ec1
!29438 [MSLITE] rm tmp wide_deep_worker_ ci
Merge pull request !29438 from Liu_Xuu/r1.6
2022-02-07 03:09:45 +00:00
caifubi a060179429 Pynative ms_function mix execution
1. Add bprop flag to bp-graph in PyNative mode.
2. Run ms_function in actor-DAG.
3. Use aclMemcopyAsync in DtoD copy.
4. Insert event before RunTask.
2022-02-07 10:39:14 +08:00
i-robot 3f9dfd7233
!29086 Exporse of the Layernorm Interface for r1.5 PanGu
Merge pull request !29086 from huangxinjing/fx_name
2022-02-07 01:34:46 +00:00
i-robot 00615837b7
!29659 Swap temporary when mem on device not enough
Merge pull request !29659 from tanghuikang/swap_temporary_1.6
2022-02-07 01:18:27 +00:00
i-robot f2fa38f921
!29698 Fix bugs in offline debugger r1.6
Merge pull request !29698 from TinaMengtingZhang/bugfix_dbg_1.6
2022-02-04 15:12:18 +00:00
TinaMengtingZhang a295fa7fc0 bugfix: extract task id error in offline dbg
bugfix: duplicate wp hit results in offline dbg
2022-02-03 13:25:11 -05:00
i-robot 135f4f79d0
!29649 fix some ir_fusion bugs
Merge pull request !29649 from yuchaojie/r1.6_fix
2022-01-30 15:13:11 +00:00
i-robot 4eed03dd20
!29649 fix some ir_fusion bugs
Merge pull request !29649 from yuchaojie/r1.6_fix
2022-01-30 15:13:10 +00:00
i-robot 9c0d043971
!29658 r1.6 sync codex
Merge pull request !29658 from zhaodezan/r1.6
2022-01-30 09:28:07 +00:00
i-robot 3a24cb799e
!29692 Alarm cleaning
Merge pull request !29692 from 刘勇琪/r1.6
2022-01-30 09:22:39 +00:00
w00517672 1067ea1e88 fix password error bug r1.6 2022-01-30 16:48:24 +08:00
i-robot 864a8380fa
!29672 Change aot error code strategy and modify logs
Merge pull request !29672 from jiaoy1224/r1.6
2022-01-30 08:39:50 +00:00
i-robot 3c5ac6bba1
!29685 [MSLITE] Codex clean.
Merge pull request !29685 from wangshaocong/codex_r1.6
2022-01-30 08:22:22 +00:00
liu-yongqi-63 4a0c58fa11 Alarm cleaning 2022-01-30 14:49:42 +08:00
i-robot a3460e9807
!29651 free shared memory when ITERATORS_LIST had been deleted and solve slash and backslash mixed on windows
Merge pull request !29651 from guozhijian/fix_multiprocess_shared_memory_inc_r1.6
2022-01-30 06:32:54 +00:00
i-robot bf373a198e
!29651 free shared memory when ITERATORS_LIST had been deleted and solve slash and backslash mixed on windows
Merge pull request !29651 from guozhijian/fix_multiprocess_shared_memory_inc_r1.6
2022-01-30 06:32:54 +00:00
i-robot 4786b28223
!29651 free shared memory when ITERATORS_LIST had been deleted and solve slash and backslash mixed on windows
Merge pull request !29651 from guozhijian/fix_multiprocess_shared_memory_inc_r1.6
2022-01-30 06:32:54 +00:00
i-robot bd13eadd64
!29668 端云联邦SignDS算法r1.6
Merge pull request !29668 from emmmmtang/r1.6
2022-01-30 06:26:27 +00:00
i-robot be3f6e437b
!29682 clear warnings of static code check
Merge pull request !29682 from zhangzhaoju/r1.6
2022-01-30 06:21:31 +00:00
i-robot 678c1e3d5d
!29674 fix heterogeneous creating new parameter with mindrt
Merge pull request !29674 from baihuawei/hete1.6xx
2022-01-30 04:30:53 +00:00
jonyguo 1e5666f058 1. del all the Queue & SharedQueue when the iter had been deleted from ITERATORS_LIST
2. fix: on windows platform with mix back slash path
2022-01-30 12:26:20 +08:00
wang_shaocong 7b8c923289 [MSLITE] Codex clean. 2022-01-30 11:24:12 +08:00
i-robot b559404468
!29434 Fix Manager is Null
Merge pull request !29434 from hwjiaorui/fix-manager-null-r1.6
2022-01-30 02:02:33 +00:00
i-robot 75dcefcc00
!29656 GraphKernel add test case for llvm on cpu and fix type error bug in akg
Merge pull request !29656 from ZengZitao/r16_gk_cpu_bugfix
2022-01-30 01:15:17 +00:00
zhangzhaoju d4270606f5 clean static code check warnings 2022-01-30 09:14:29 +08:00
i-robot f4fc890c77
!29675 [MS][LITE]remove wrong model
Merge pull request !29675 from mengyuanli/sync_r1.6_2
2022-01-30 01:04:09 +00:00
i-robot 418cf5a94e
!29675 [MS][LITE]remove wrong model
Merge pull request !29675 from mengyuanli/sync_r1.6_2
2022-01-30 01:04:09 +00:00
i-robot e150f97bef
!29673 fix code warning
Merge pull request !29673 from huanghui/r1.6-fix-code-warnings
2022-01-29 14:38:26 +00:00
i-robot c266690590
!29673 fix code warning
Merge pull request !29673 from huanghui/r1.6-fix-code-warnings
2022-01-29 14:38:25 +00:00
Yang Jiao ad8b472f87 change aot func return code 2022-01-29 19:28:14 +08:00
i-robot d078e6df12
!29240 Fix tuple in tuple bug.
Merge pull request !29240 from liangzelang/cherry-pick-1642423643
2022-01-29 11:11:30 +00:00
i-robot 7a2d75b771
!29652 Unify server and scheduler exit funcition
Merge pull request !29652 from ZPaC/1.6-unify-exit-for-server-sched
2022-01-29 09:33:06 +00:00
i-robot d18c6f630f
!29652 Unify server and scheduler exit funcition
Merge pull request !29652 from ZPaC/1.6-unify-exit-for-server-sched
2022-01-29 09:33:05 +00:00
mengyuanli 30fccc5e88 remove wrong model 2022-01-29 17:16:52 +08:00
maning202007 4577f09a9c Fix the link format in summary_record docstring 2022-01-29 17:11:23 +08:00
baihuawei dc383dd573 fix heterogeneous kernel by kernel 2022-01-29 17:10:24 +08:00
huanghui 6fa47eba79 fix some code warning fort pclint-plus 2022-01-29 17:06:38 +08:00
i-robot 380fb6982a
!29647 Fix CI probabilistic filter failed in gate building<test_arithmetic_op.py>.
Merge pull request !29647 from hezhenhao1/r1.6
2022-01-29 08:45:38 +00:00
emmmmtang 09c938a584 signds 2022-01-29 16:20:25 +08:00
zong-shuai 32ad2aea13 debug_topk 2022-01-29 15:53:53 +08:00
ling 494719e441 [MSLITE] fix fuzz bug 2022-01-29 15:19:21 +08:00
i-robot c72a1146fb
!29476 [Dataset][multiprocess][bugfix] Fix generatordataset can't exit normally and clear when catch ctrl+c
Merge pull request !29476 from xiefangqi/md_set_sigint_to_ign_in_subprocess_r1.6
2022-01-29 07:18:08 +00:00
zhaodezan 86b9c63a60 r1.6 sync codex 2022-01-29 15:17:56 +08:00
tanghuikang 7091e2da1f Swap temporary when mem on device not enough 2022-01-29 15:09:32 +08:00
zengzitao 4d8282443c fix some r1.6 graphkernel cpu bugs 2022-01-29 15:07:47 +08:00
i-robot cf1c5e4cd5
!29336 fix LSTM description
Merge pull request !29336 from 吕昱峰(Nate.River)/code_docs_r1.6
2022-01-29 06:52:50 +00:00
i-robot 9897d6356b
!29449 fix mistakes of mindapi path
Merge pull request !29449 from zhoufeng/add-mindapi-h-file-to-package-r1.6
2022-01-29 06:41:36 +00:00
fangwenyi b624801136
!29644 Update 1.6 maintenance info
Merge pull request !29644 from fangwenyi/r1.6
2022-01-29 06:36:55 +00:00
fangwenyi fdbd037a4c
update README_CN.md. 2022-01-29 06:33:14 +00:00
fangwenyi 5c30f8f5bb
update README.md. 2022-01-29 06:32:24 +00:00
hezhenhao1 bc0cab0ff8 Fix CI probabilistic filter failed in gate building<test_arithmetic_op.py>. 2022-01-29 14:11:49 +08:00
zengxianglong c564c628b5 reconstuct npu scale op and insert transpose pass 2022-01-29 01:10:37 +08:00
ZPaC 3db62b79ea Unify server and scheduler exit funcition 2022-01-28 15:41:17 +08:00
jinjiali acfdce0e7e update dim_reduce: add param filter 2022-01-28 11:50:55 +08:00
yuchaojie aefcd54f1c fix some ir_fusion bugs 2022-01-27 17:12:13 +08:00
xiefangqi 32b32a72b5 fix generatordataset can't exit normally when ctrl+c problem 2022-01-27 08:19:14 +08:00
zuochuanyong 89234ba284 onednn dfx testcase skip win and mac 2022-01-26 11:42:19 +08:00
zhoushan 15ec4f5af2 add DynamicInferModel tag for flclient in r1.6 2022-01-26 10:12:00 +08:00
zhengjun10 dbd9f8978d sync master add without minddata so aar 2022-01-26 09:53:35 +08:00
fangzehua a2d4eec5ae fix stridedslice mask in parallel 2022-01-26 09:40:03 +08:00
jinjiali 36a4b7c6a6 update dim_reduce: move scale_loss from optimizer to outer 2022-01-25 15:39:04 +08:00
yanghaoran f6e83e0383
!29489 takedown test_broadcast_grade_cpu
Merge pull request !29489 from yanghaoran/r1.6
2022-01-25 01:52:08 +00:00
yanghaoran 3dc84150a7 takedown test_broadcast_grade_cpu 2022-01-25 09:45:28 +08:00
i-robot 76bca2addc
!29474 1.6 bugfix: Model amp args differ from amp.build_train_network
Merge pull request !29474 from wangnan39/1.6_bugfix_Model_amp_config
2022-01-24 11:16:26 +00:00
hwjiaorui 86dabddfbd fix manager is null 2022-01-24 18:56:32 +08:00
i-robot a900b25fe9
!29452 [MS][LITE] check train data type
Merge pull request !29452 from yefeng/208-check_train_data_type-r1.6
2022-01-24 09:34:12 +00:00
王南 a569ff0783 bugfix: Model amp args differ from amp.build_train_network 2022-01-24 17:15:02 +08:00
i-robot 19e40a1f76
!29447 [MS][LITE]Synchronize bugfix of control flow
Merge pull request !29447 from mengyuanli/sync_r1.6
2022-01-24 08:08:02 +00:00
yefeng b0d867fd7d check train data type 2022-01-24 15:43:33 +08:00
zhoufeng 6c3c811145 fix mistakes of mindapi path
Signed-off-by: zhoufeng <zhoufeng54@huawei.com>
2022-01-24 15:00:13 +08:00
mengyuanli 6d9f02a077 add identity kernel to link actor right
fix bug of control flow model

 fix bug of control flow

fix bug of control flow model

fix bug of link info
2022-01-24 14:46:46 +08:00
i-robot 6458920baf
!29432 add mindapi head files to tar package
Merge pull request !29432 from zhoufeng/add-mindapi-h-file-to-package-r1.6
2022-01-24 06:09:28 +00:00
i-robot 64932d08fb
!29089 remove device target info in session basic
Merge pull request !29089 from baihuawei/clear_code_rt1.6
2022-01-24 05:31:30 +00:00
i-robot 000ea714b2
!29335 fix greater op infer bug
Merge pull request !29335 from Simson/push-to-r1.6
2022-01-24 04:42:38 +00:00
Xiao Tianci bcb83a5dd1 fix some API doc errors 2022-01-24 10:13:32 +08:00
zhoufeng 3a15acf29b add mindapi head files to tar package
Signed-off-by: zhoufeng <zhoufeng54@huawei.com>
2022-01-24 10:01:10 +08:00
i-robot d49404b4db
!29332 add more log to help debuging random error
Merge pull request !29332 from chengbin/r1.6
2022-01-22 06:57:11 +00:00
fanjibin abed7d1df7 fix cpu l2loss with input shape 0 2022-01-21 01:08:12 +08:00
simson 9c0cd286fe fix greater op infer bug 2022-01-20 15:30:10 +08:00
i-robot 7d4a6f8654
!29323 Modify Error Info For 1.6
Merge pull request !29323 from liuyang/ms_1_6
2022-01-20 06:50:49 +00:00
i-robot 9d6f62e3d5
!29206 fix transpose_fp32 buffer overflow bug
Merge pull request !29206 from qinzheng/fix_fuzz_r1.6
2022-01-20 03:17:43 +00:00
liuyang_655 238ef09f00 Modify error info for 1.6 2022-01-19 22:09:42 -05:00
Liu_Xuu a4a108c5c1 [MSLITE] rm tmp wide_deep_worker_ ci 2022-01-20 09:53:02 +08:00
liuxiao93 3908ce6173 fix a bug about pynative mode error on Ascend 2022-01-19 11:15:56 +08:00
i-robot 9978d6bd29
!29114 1.6 Model support change dataset
Merge pull request !29114 from wangnan39/model_support_change_dataset
2022-01-19 01:50:30 +00:00
i-robot 94af9efe4d
!29036 me whrite r1.6 0113
Merge pull request !29036 from mindspore_ding/me_whrite_r1.6_0113
2022-01-18 10:56:25 +00:00
i-robot 48ddb59f06
!29245 Fix error format of docstring.
Merge pull request !29245 from zhangyi/code_docs_r1.6
2022-01-18 07:29:05 +00:00
zhangyi 18c5e1fda1 fix error format of docstring. 2022-01-18 14:50:45 +08:00
i-robot a745d62cad
!29202 Fix Core Dump after RuntimeError
Merge pull request !29202 from hwjiaorui/core-dump-1.6
2022-01-18 06:16:48 +00:00
i-robot 88b1a4704d
!29205 fix error msg for zeros and ones & gathernd bug
Merge pull request !29205 from Simson/push-to-r1.6
2022-01-18 06:16:11 +00:00
i-robot 033ae49e14
!29201 Release python GIL when SyncStream
Merge pull request !29201 from caifubi/r1.6-pynative-sync-gil-lock
2022-01-18 06:14:48 +00:00
qinzheng 96c2b6eb38 fix transpose_fp32.cc buffer overflow bug 2022-01-18 11:09:45 +08:00
lvyufeng 79773c575f fix LSTM description 2022-01-18 10:10:13 +08:00
liangzelang 438750ac7f fixed f613a66 from https://gitee.com/liangzelang/mindspore/pulls/29125
fix tuple in tuple bug
2022-01-17 12:47:24 +00:00
ckey_Dou 4bd1b5623a add more log to help debuging random error 2022-01-17 20:29:04 +08:00
liangzelang 57b1e889b2 fixed 977edd7 from https://gitee.com/liangzelang/mindspore/pulls/29207
Change log level
2022-01-17 12:03:45 +00:00
i-robot 704e79155f
!29140 【MS】【LITE】change nnie_3516_master to nnie_3516_r1.6_stable
Merge pull request !29140 from chenjianping/r1.6_dev
2022-01-17 11:39:34 +00:00
caifubi e3b0c21ac9 release python gil lock when sync stream 2022-01-17 18:42:27 +08:00
hwjiaorui 9978b4862c fix core dump after runtime error 2022-01-17 11:48:23 +08:00
simson 1a3267f496 fix error msg for zeros and ones 2022-01-17 11:15:52 +08:00
i-robot 11f13eb7db
!29146 r1.6 fix minddata doc
Merge pull request !29146 from luoyang/code_docs_r1.6
2022-01-17 03:02:54 +00:00
chenjianping 4094e0e338 change nnie 3516_master to 3516_r1.6_stable 2022-01-17 10:28:32 +08:00
i-robot 0d8dc2fbe3
!29145 Synchronize the inputs abstract sequence node info. before save.
Merge pull request !29145 from 张清华/cherry-pick-1642246958
2022-01-17 01:01:49 +00:00
i-robot f52499c03e
!29072 functional select check input cond
Merge pull request !29072 from wangnan39/1.6_function_select_input_check
2022-01-16 08:32:31 +00:00
luoyang c07ae323b1 fix minddata doc 2022-01-15 23:37:22 -08:00
Zhang Qinghua ab43a7edd4 fixed 4642b96 from https://gitee.com/zh_qh/mindspore/pulls/29129
Synchronize the inputs abstract sequence node info. before save.
2022-01-15 11:42:39 +00:00
i-robot fdc99ac561
!29136 add ascend release pkg
Merge pull request !29136 from zhengyuanhua/r1.6
2022-01-15 08:53:28 +00:00
i-robot 88cb3e5151
!29132 [assistant][ops] Fix InferType bug for IsNan and IsInf
Merge pull request !29132 from 孟权令/Fix_IsInf_IsNan
2022-01-15 08:52:41 +00:00
i-robot d24fa4ab54
!29138 update dockerfile for 1.6.0
Merge pull request !29138 from yanghaoran/r1.6
2022-01-15 08:49:06 +00:00
i-robot a5c16000fd
!29101 optimize err msg about c++ CPU and 1 code docs issue
Merge pull request !29101 from chentangyu/code_err_msg_cty_r1.6_I4QIY2_I4QS98
2022-01-15 08:26:40 +00:00
i-robot 652dfde4a6
!29123 fix Conv2DBackpropEltwiseEltwiseFusion pattern
Merge pull request !29123 from yuchaojie/r1.6_fix
2022-01-15 08:23:32 +00:00
yanghaoran f7f517f9b8 update dockerfile for 1.6.0 2022-01-15 15:49:03 +08:00
zhengyuanhua 4810530133 add ascend release pkg 2022-01-15 15:42:03 +08:00
Sawyer f977e11afd Fix InferType bug for IsNan and IsInf 2022-01-15 14:28:34 +08:00
i-robot 6dc3b4ef97
!29048 r1.6 Int64 to int of type id
Merge pull request !29048 from chenfei_mindspore/r1.6
2022-01-15 04:40:01 +00:00
lianghao23 5ca74b338e Fix TypeIdLabel bug for complex128 2022-01-15 12:27:36 +08:00
jjfeing 5567163cf8
!29126 updata release md r1.6
Merge pull request !29126 from jjfeing/updata_release
2022-01-15 03:38:07 +00:00
jjfeing 39d0a9452c updata release md 2022-01-15 11:35:08 +08:00
yuchaojie 3ded4b0620 fix Conv2DBackpropEltwiseEltwiseFusion pattern 2022-01-15 10:59:36 +08:00
i-robot 0146d12169
!29102 Ignore Partial(DeadNode) in backend routine, and add more primtives using tuple/list input.
Merge pull request !29102 from 张清华/r1.6
2022-01-14 17:08:46 +00:00
i-robot 23fcf45348
!29069 Upgrade Ascend 20220113
Merge pull request !29069 from TronZhang/upgrade_ascend_20220113_1_6
2022-01-14 12:55:39 +00:00
i-robot 054d9481a7
!29080 fix bug of data-parallel mix-precision in PyNative
Merge pull request !29080 from caifubi/r1.6-pynative-data-parallel
2022-01-14 12:40:29 +00:00
i-robot 6737f3dd40
!29077 Disable internal output when using MemScheduler
Merge pull request !29077 from tanghuikang/sync_host_to_device_1.6
2022-01-14 12:19:33 +00:00
tacyi139 c9021e2e91 optimize err msg about c++ CPU and 1 code docs issue 2022-01-14 19:10:07 +08:00
jinxiaoxian 601fcf40d5 fix: model train exchange dataset 2022-01-14 19:08:23 +08:00
i-robot 3cbbf5779c
!29085 Adapt to the old data format of aicpu
Merge pull request !29085 from zangqx/r1.6
2022-01-14 10:43:35 +00:00
chenweitao_295 97791d7562 r1.6 Fixe some CPU operator whitelists 2022-01-14 18:28:06 +08:00
baihuawei 7656fb353f session decoup with device target 2022-01-14 17:40:54 +08:00
i-robot ae2ab557a7
!29024 optimize err msg about c++ GPU
Merge pull request !29024 from chentangyu/code_err_msg_cty_r1.6_I4QJZ9
2022-01-14 09:39:38 +00:00
i-robot 107ecb93f0
!29059 bugfix: set custom node of ms output shape
Merge pull request !29059 from zhengyuanhua/r1.6
2022-01-14 09:19:13 +00:00
Zhang Qinghua e990c1b550 Add more primtives using tuple/list input. 2022-01-14 17:03:38 +08:00
Zhang Qinghua f764f15278 Ignore Partial(DeadNode) in backend routine. 2022-01-14 17:03:08 +08:00
i-robot a77d539f31
!29098 fix doc
Merge pull request !29098 from 杨林枫/code_docs_numpy_1.6
2022-01-14 08:55:51 +00:00
i-robot 10356f7fdc
!29012 [ME] Code static check.
Merge pull request !29012 from Margaret_wangrui/r1.6
2022-01-14 08:53:06 +00:00
yanglf1121 2bc6db13b6 fix doc 2022-01-14 16:48:00 +08:00
dingpeifei 0881a13939 me whrite 0113 r1.6 2022-01-14 15:46:14 +08:00
caifubi 38d53fd5c4 Fix data-parallel mix-precision bug in PyNative Mode 2022-01-14 15:32:17 +08:00
i-robot 167fbd92fa
!29029 第三方调用故障可分析特性
Merge pull request !29029 from zuochuanyong/r1.6_onednn_dfx
2022-01-14 07:30:49 +00:00
huangxinjing 5c0afe3384 Export Layernorm to support r1.5 pangu 2022-01-14 15:10:03 +08:00
i-robot a3381ecad9
!29063 Only Tensor of consturct input that used to create cell instance raise exception
Merge pull request !29063 from zhangzhaoju/r1.6
2022-01-14 07:01:01 +00:00
臧庆香 2e622c1428 Adapt to the old data format of aicpu 2022-01-14 14:48:41 +08:00
tanghuikang 5955cdf98e Disable internal output when using MemScheduler 2022-01-14 14:44:12 +08:00
i-robot 35b2db5590
!29030 [Bugfix]Tensor host&device address format is different
Merge pull request !29030 from caifubi/r1.6-pynative-input-tensor-format-different
2022-01-14 06:41:36 +00:00
i-robot ff7646453a
!29070 Fix pclint.
Merge pull request !29070 from gaoyong10/r1.6
2022-01-14 06:29:43 +00:00
yujianfeng f04b07dddc Clean code 2022-01-14 14:19:28 +08:00
chenfei cea9e5f341 change int64 of type id to int 2022-01-14 14:14:40 +08:00
i-robot 5741d7d0f3
!28844 GPU codex fix
Merge pull request !28844 from VectorSL/r1.6
2022-01-14 03:49:31 +00:00
王南 911146543e 1.6 select input check 2022-01-14 11:44:26 +08:00
i-robot 41e7ef51ef
!29026 Fix code check issues in MindAPI
Merge pull request !29026 from hewei/fix_r1.6
2022-01-14 03:38:48 +00:00
gaoyong10 05c40f1fa6 Fix pclint. 2022-01-14 11:31:54 +08:00
i-robot 63fd66bd74
!28985 optimize error message
Merge pull request !28985 from chentangyu/code_err_msg_cty_r1.6_I4QJGO
2022-01-14 02:58:59 +00:00
TronZhang 62994837bd Upgrade Ascend 20220113 2022-01-14 10:56:54 +08:00
i-robot c54e7babfb
!29043 [MSLITE] matmul bias tensor data invalid
Merge pull request !29043 from ling/r1.6
2022-01-14 02:35:53 +00:00
i-robot 12eda9f0c3
!29020 Upatate the api docstring for Summary
Merge pull request !29020 from maning202007/r1.6
2022-01-14 02:29:38 +00:00
i-robot fe5509794c
!28998 Fix waring on 1.6
Merge pull request !28998 from shenwei41/fix_waring_1.6
2022-01-14 02:28:22 +00:00
i-robot 364fb8d81d
!29000 fix random op seed
Merge pull request !29000 from fangzehua/fix_random_1.6
2022-01-14 02:22:02 +00:00
i-robot 1d3315c552
!28852 fix celu in r1.6
Merge pull request !28852 from jiangzhenguang/fix_celu_r1.6
2022-01-14 02:21:29 +00:00
Margaret_wangrui d1dcac7721 [ME] Code static check. 2022-01-14 09:54:56 +08:00
i-robot baf8ebf27b
!29065 takedown sit_rnn_grad_input due to probalistic failures
Merge pull request !29065 from yanghaoran/r1.6
2022-01-14 01:53:24 +00:00
i-robot c64bd0a797
!29042 Update submodule akg to r1.6
Merge pull request !29042 from anyrenwei/r1.6
2022-01-14 01:43:52 +00:00
i-robot ccc08e2208
!29038 revert rename reshape
Merge pull request !29038 from lingyunli63/r1.6_revert_reshape
2022-01-14 01:37:27 +00:00
yanghaoran 8d51a83ad4 takedown sit_rnn_grad_input due to probalistic failures 2022-01-14 09:33:54 +08:00
zhangzhaoju 76abfd6fda Only Tensor of consturct input that used to create cell instance raise exception. 2022-01-14 09:01:15 +08:00
i-robot 024e67605a
!29052 Only mark unused elements of tuple/list, not eliminate them.
Merge pull request !29052 from 张清华/r1.6
2022-01-14 00:48:49 +00:00
i-robot da2a860146
!29052 Only mark unused elements of tuple/list, not eliminate them.
Merge pull request !29052 from 张清华/r1.6
2022-01-14 00:48:48 +00:00
zhengyuanhua b9e00bcc5f bugfix: update custom outout type 2022-01-13 23:30:58 +08:00
i-robot 613b072798
!29016 Fix pclint.
Merge pull request !29016 from gaoyong10/r1.6
2022-01-13 13:25:13 +00:00
zuochuanyong 94d84d5a8d onednn dfx 2022-01-13 21:24:44 +08:00
i-robot fcf461e33f
!29017 r1.6 fix minddata api doc
Merge pull request !29017 from luoyang/r1.6
2022-01-13 13:23:39 +00:00
Zhang Qinghua 9a95e57d49 Do renormalize after CConv. 2022-01-13 21:22:03 +08:00
Zhang Qinghua 435fda944e Only mark unused elements of tuple/list, not eliminate them. 2022-01-13 21:21:42 +08:00
i-robot b52ae65f7d
!29046 fix doc issues
Merge pull request !29046 from luoyang/code_docs_r1.6
2022-01-13 13:05:18 +00:00
i-robot 550a2fa8c5
!29031 [MS][LITE][ToD] Fix VAE BNGrad segmentation fault bug
Merge pull request !29031 from Nizzan/export1_nizzan
2022-01-13 12:46:01 +00:00
luoyang 911616ce2e fix doc issues 2022-01-13 20:34:42 +08:00
ling a6a19dff1d [MSLITE] matmul bias tensor data invalid 2022-01-13 20:26:42 +08:00
i-robot d2a4b0d2f3
!28885 add copyattr in TransposedUpdateFusion
Merge pull request !28885 from yuchaojie/r1.6_fix
2022-01-13 12:23:47 +00:00
anyrenwei fc27204120 update akg to r1.6 2022-01-13 20:13:39 +08:00
lingyunli63 af64e3705f Revert "rename dynamicreshape to reshape"
This reverts commit 89e8b90a8d.
2022-01-13 20:07:03 +08:00
i-robot 2b902742be
!28997 Fix bugs in edge_costmodel.
Merge pull request !28997 from liuluobin/r1.6_codex
2022-01-13 11:43:08 +00:00
luoyang 070a63a86b fix minddata api doc 2022-01-13 19:34:38 +08:00
i-robot 95c858c218
!28906 revert lock
Merge pull request !28906 from TuDouNi/r1.6
2022-01-13 11:33:52 +00:00
nizzan b9528c1735 Fix VAE BNGrad Seg fault bug 2022-01-13 13:11:44 +02:00
i-robot 723863d3fd
!29014 fix demo print
Merge pull request !29014 from yeyunpeng2020/r1.6
2022-01-13 10:59:30 +00:00
caifubi 4d5ba0edba [Bugfix]use SyncHostToDevice when tensor address format is different from parameter address format 2022-01-13 18:44:38 +08:00
shenwei41 82302ba52b fix waring 2022-01-13 17:31:58 +08:00
i-robot 5ffa9a0e02
!28948 [MS][LITE] fix micro mobinetv2 example and x86 release path
Merge pull request !28948 from zhengjun10/micro16
2022-01-13 09:27:38 +00:00
He Wei 5f71520ba4 Fix code check issues in MindAPI 2022-01-13 17:21:03 +08:00
tacyi139 547943d270 optimize err msg about c++ GPU 2022-01-13 17:17:25 +08:00
maning202007 aeebc26315 Upatate the api docstring for Summary 2022-01-13 17:04:36 +08:00
i-robot 6c06a59be1
!28973 Fix the order of infer shape and type for xlogy
Merge pull request !28973 from liangxhao/xlogy_r1.6
2022-01-13 08:50:42 +00:00
i-robot c73cac6a72
!28864 enhance dataset step2 for br:r1.6
Merge pull request !28864 from guozhijian/enhance_dataset_r1.6
2022-01-13 08:32:50 +00:00
liuluobin b5f772e3ed Fix bugs in edge_costmodel. 2022-01-13 16:32:39 +08:00
gaoyong10 4a658c0355 Fix pclint. 2022-01-13 16:29:28 +08:00
yeyunpeng2020 bb736231ad fix demo print 2022-01-13 16:18:47 +08:00
i-robot b73f77a4da
!28988 fix lu batched for gpu && cpu backend
Merge pull request !28988 from zhuzhongrui/r1.6
2022-01-13 08:03:46 +00:00
yuchaojie c859eebdab add copyattr in TransposedUpdateFusion 2022-01-13 16:01:42 +08:00
fangzehua 1634ece8f0 fix random 2022-01-13 16:01:02 +08:00
i-robot 97028a63c0
!28928 r1.6 Change EnvironGet's key input type from uint64 to int64
Merge pull request !28928 from chenfei_mindspore/r1.6
2022-01-13 07:53:07 +00:00
i-robot 6e899925c6
!28961 Code check, change log level 1.6
Merge pull request !28961 from huangbingjian/clean_code_1.6
2022-01-13 07:52:18 +00:00
zhengjun10 7b22a9817c fix micro and x86 release package path 2022-01-13 15:00:49 +08:00
i-robot 3cff14ba80
!28900 clean code for 1.6
Merge pull request !28900 from changzherui/clean_code_1.6
2022-01-13 06:28:09 +00:00
i-robot 0724ce8b99
!28937 Log Level r1.6
Merge pull request !28937 from hwjiaorui/log-level-1.6
2022-01-13 06:26:32 +00:00
z00512249 15d35023e5 fix lu batched for gpu && cpu backend 2022-01-13 11:21:49 +08:00
tacyi139 a2ba74715a optimize error message 2022-01-13 10:38:56 +08:00
i-robot 505a7fc459
!28981 takedown failed testcase rnn forward input
Merge pull request !28981 from yanghaoran/r1.6
2022-01-13 02:16:50 +00:00
jonyguo 138dec9a55 enhance datasets.py 2022-01-13 10:04:43 +08:00
i-robot fd6c4322cc
!28958 Code_specification_for_dynamic_shape_operator for r1.6
Merge pull request !28958 from 张毅辉/cherry-pick-1641984593
2022-01-13 02:00:46 +00:00
yanghaoran f22bb5fc70 takedown failed testcase rnn forward input 2022-01-13 09:52:32 +08:00
i-robot 2f337de064
!28938 [MS][LITE][ToD] Fix VAE model bug
Merge pull request !28938 from Nizzan/export_nizzan
2022-01-13 01:42:41 +00:00
i-robot 23dcccb29a
!28956 update dim_reduce: add callback for line search
Merge pull request !28956 from jinjiali-kali/r1.6
2022-01-13 01:35:15 +00:00
i-robot bd74e1510f
!28889 optimizes the error message of BroadcastTo, ResizeBilinear, Conv3D and supports zero dims of input for Squeeze.
Merge pull request !28889 from wangshuide/wsd_r1.6
2022-01-13 01:31:04 +00:00
i-robot 54dfe5a4ae
!28960 fix some potential risk
Merge pull request !28960 from liubuyu/r1.6
2022-01-13 01:22:55 +00:00
i-robot 69b4a6bd01
!28968 [MS][LITE]fix bug of control flow model
Merge pull request !28968 from mengyuanli/sync_r1.6
2022-01-13 01:20:16 +00:00
lianghao23 8c093f462a Fix the order of infer shape and type for xlogy 2022-01-12 23:09:28 +08:00
changzherui f7ad788852 clean code for 1.6 2022-01-12 22:32:38 +08:00
mengyuanli 13385af6e7 move func to control flow scheduler 2022-01-12 21:09:03 +08:00
huangbingjian 823a623405 code check 2022-01-12 19:57:43 +08:00
lby ba0c7f8e2e fix some potential risk 2022-01-12 19:08:58 +08:00
zyhStack e749e646cc fixed 2144cc2 from https://gitee.com/zyhstack/mindspore/pulls/28953
Modify the code specification of dynamic shape operator
2022-01-12 10:49:56 +00:00
i-robot ddcf897386
!28951 Remove modelzoo
Merge pull request !28951 from chenhaozhe/remove-modelzoo-r.16
2022-01-12 09:38:36 +00:00
VectorSL 87c4e20513 fix codex 2022-01-12 17:34:54 +08:00
i-robot 087dde5bfc
!28912 Fix some code review probelms on branch 1.6
Merge pull request !28912 from xiaotianci/update_code_review_r1.6
2022-01-12 09:30:48 +00:00
jinjiali 0dc067e16b update dim_reduce: add callback for line search 2022-01-12 17:29:42 +08:00
chenhaozhe 7c44866a9a Remove modelzoo completely, modelzoo has been moved to https://gitee.com/mindspore/models 2022-01-12 17:16:28 +08:00
i-robot 6d0106e5f0
!28933 Fix pclint and codedex and ref weight node.
Merge pull request !28933 from gaoyong10/r1.6
2022-01-12 09:12:16 +00:00
i-robot 2720e0d77f
!28947 optimize code examples about DistributedGradReducer and SyncBatchNorm
Merge pull request !28947 from chentangyu/code_docs_cty_r1.6_I4Q5L9
2022-01-12 09:06:16 +00:00
tacyi139 ab831933a6 optimize code examples about DistributedGradReducer and SyncBatchNorm 2022-01-12 16:53:27 +08:00
i-robot 28d93b8268
!28907 [MSLITE][GPU][r1.6] arithmetic broadcast bugfix, gather op bugfix, mali-g78's opencl refrush policy update
Merge pull request !28907 from Greatpan/r1.6
2022-01-12 08:45:58 +00:00
i-robot 605b15335e
!28878 Fix bug for Scatter_xx ops in r1.6
Merge pull request !28878 from 张毅辉/cherry-pick-1641891038
2022-01-12 08:26:43 +00:00
i-robot 6cea22ca1c
!28910 Clean Code
Merge pull request !28910 from tanghuikang/clean_code
2022-01-12 08:12:57 +00:00
i-robot e2b13c07b7
!28903 Fix Transformer Overflow
Merge pull request !28903 from huangxinjing/fx_transformer_overflow
2022-01-12 08:00:06 +00:00
i-robot a605b2c551
!28917 Fix codedex and pclint warnings
Merge pull request !28917 from zyli2020/r1.6
2022-01-12 07:51:13 +00:00
hwjiaorui 034bde4abd modify log level 2022-01-12 15:23:49 +08:00
i-robot 4720a43e55
!28884 operator code optimization
Merge pull request !28884 from 沈竞兴/codefix_r1.6
2022-01-12 07:19:47 +00:00
i-robot 04a9e3823a
!28925 [r1.6]fix bad hash function
Merge pull request !28925 from huanghui/r1.6-fix-map-overflow
2022-01-12 07:19:25 +00:00
i-robot 5a788fde8e
!28891 Add UInt8Imm to MindAPI
Merge pull request !28891 from hewei/fix_r1.6
2022-01-12 07:15:55 +00:00
i-robot b0cf019553
!28882 fix pclint
Merge pull request !28882 from tan-wei-cheng-3260/r1.6
2022-01-12 07:03:03 +00:00
i-robot d093623169
!28932 Subject to zhouyaqiang's modification
Merge pull request !28932 from chentangyu/code_docs_cty_r1.6_I4PIJK
2022-01-12 06:38:11 +00:00
i-robot d22c320cfa
!28854 [MS][LITE] sync example to new java api
Merge pull request !28854 from zhengjun10/java16
2022-01-12 06:31:00 +00:00
i-robot ab59143fc3
!28915 Fix attr check bug
Merge pull request !28915 from jiaoy1224/r1.6
2022-01-12 06:24:03 +00:00
i-robot ee824b4f60
!28861 Fixes: "[engine_] is null" and ms_memory_recycle failed while compile exception.
Merge pull request !28861 from zhangzhaoju/r1.6
2022-01-12 06:19:27 +00:00
i-robot fe228ce535
!28899 fix b_hh cased error
Merge pull request !28899 from 吕昱峰(Nate.River)/r1.6
2022-01-12 06:19:24 +00:00
i-robot c51b2cb0d6
!28892 [MSLITE] clean fp32 bug
Merge pull request !28892 from ling/r1.6
2022-01-12 06:09:52 +00:00
i-robot 85ce12a18b
!28832 code clean
Merge pull request !28832 from chenweifeng/r1.6
2022-01-12 06:06:39 +00:00
i-robot f794347bdb
!28916 Reject repeat registrations of alive nodes for ps
Merge pull request !28916 from zyli2020/r1.6_code_optimize
2022-01-12 04:42:03 +00:00
i-robot a278540002
!28931 change links for doc of alltoall
Merge pull request !28931 from zhoufeng/code_docs_alltoall_1.6
2022-01-12 03:57:15 +00:00
gaoyong10 5056d5c2d1 Fix pclint and codedex.
Collect weight from entrance actor in subgraph.
2022-01-12 11:39:50 +08:00
tacyi139 f5062a69af 以周亚强的修改为准 2022-01-12 11:27:24 +08:00
zhoufeng f735eada84 change links for doc of alltoall
Signed-off-by: zhoufeng <zhoufeng54@huawei.com>
2022-01-12 11:25:31 +08:00
i-robot 528470d537
!28863 axis debug
Merge pull request !28863 from zong_shuai/unstack_debug
2022-01-12 03:15:00 +00:00
chenfei 0ddf066fce change key from uint64 to int64 2022-01-12 11:04:25 +08:00
huanghui 9745927e0b fix bad hash function 2022-01-12 10:58:28 +08:00
lizhenyu 5c98edb31f fix codedex warning 2022-01-12 10:38:33 +08:00
lizhenyu dc1cd3cfba ps supports refusing to pull up the same node repeatedly 2022-01-12 10:33:27 +08:00
wangshuide2020 0924c0d2fe optimizes the error message of BroadcastTo, ResizeBilinear, Conv3D and supports zero dims of input for Squeeze. 2022-01-12 10:32:01 +08:00
Yang Jiao 05a61050c3 fix attrs check bug 2022-01-12 10:27:41 +08:00
i-robot 12641803cf
!28897 [assistant][ops]New operator implementation, include ApplyCenteredRMSProp
Merge pull request !28897 from ganqijun/ApplyCenteredRMSPropBugFix
2022-01-12 02:18:23 +00:00
Xiao Tianci dac9d60332 update review problems 2022-01-12 10:09:49 +08:00
i-robot 91006bb506
!28875 fix random cpu ops error
Merge pull request !28875 from fangzehua/fix_random_cpu
2022-01-12 01:59:06 +00:00
greatpanc 77508bdbb0 arithmetic broadcast bugfix, gather op bugfix, mali-g78's opencl refrush policy update 2022-01-12 09:28:58 +08:00
ttudu 168872e2eb revert lock 2022-01-12 09:25:21 +08:00
ling 7486f2a20b [MSLITE] clean fp32 bug 2022-01-12 09:21:20 +08:00
i-robot fc97be97c5
!28880 Clearing code check alarm for parallel
Merge pull request !28880 from liuluobin/r1.6_codex
2022-01-12 01:14:46 +00:00
nizzan 5c68da9b18 Fix converter when adding transpose for get state, for VAE fix 2022-01-11 16:55:13 +02:00
lvyufeng 84ff241236 fix b_hh cased error 2022-01-11 22:18:36 +08:00
i-robot d43834cb2e
!28851 [MS][LITE][develop] code review
Merge pull request !28851 from sunsuodong/code_check_1.6
2022-01-11 12:35:44 +00:00
i-robot 2a1637153e
!28839 [MS][LITE] clean code check error
Merge pull request !28839 from cjh9368/clean_static_error_1_6
2022-01-11 12:25:44 +00:00
i-robot 9b25ae3b8e
!28865 GraphKernel rewrite output pointer is nullptr bug fix in r1.6
Merge pull request !28865 from ZengZitao/r1.6_rewrite_fix
2022-01-11 12:14:10 +00:00
bsx 890fd9b395 [fix][assistant][I48OB7] Modify the logic error in infershape 2022-01-11 20:00:31 +08:00
liuluobin e117294896 Clearing code check alarm for parallel 2022-01-11 19:38:50 +08:00
He Wei 67ccdab735 Add UInt8Imm to MindAPI 2022-01-11 19:36:53 +08:00
i-robot 1cb8896ed8
!28867 [MS][LITE]change output kernel to identity kernel
Merge pull request !28867 from mengyuanli/sync_r1.6
2022-01-11 11:14:48 +00:00
zyhStack 12eec72e4a Fix bug for Scatter_xx ops 2022-01-11 19:05:57 +08:00
liuxiao93 8d99efec04 code optimization 2022-01-11 18:54:00 +08:00
i-robot 0926d1ebec
!28849 Export the bprop mindir when the bprop directory is symbolic link
Merge pull request !28849 from YuJianfeng/r1.6
2022-01-11 10:40:56 +00:00
i-robot 904f8bc65e
!28857 Support non-tensor object return in PyNative custom bprop
Merge pull request !28857 from JoyLvliang/r1.6
2022-01-11 10:36:47 +00:00
i-robot 7ea2376a3f
!28873 remove comma in narrow description
Merge pull request !28873 from 吕昱峰(Nate.River)/code_docs_r1.6
2022-01-11 09:37:13 +00:00
twc f341661384 fix pclint 2022-01-11 17:30:45 +08:00
i-robot 907a31da4b
!28871 Sync 6 issue items from the master branch
Merge pull request !28871 from chentangyu/code_docs_cty_master_I4Q5W5_I4Q7GD_I4Q1GJ_I4Q21D_I4Q1YP_I4Q1P3
2022-01-11 08:31:25 +00:00
lvyufeng d9e9a81cac remove comma in narrow description 2022-01-11 16:10:55 +08:00
fangzehua 5db0bf553b fix random cpu ops 2022-01-11 16:04:36 +08:00
tacyi139 4457e1711e Sync 6 issue items from the master branch 2022-01-11 16:02:24 +08:00
i-robot 6d99de6d5a
!28853 update links r1.6
Merge pull request !28853 from yingchen/code_docs_link1.6
2022-01-11 07:53:06 +00:00
mengyuanli 6d911b7e9c fix bug of stuck
move output kernel to identity kernel
2022-01-11 15:52:00 +08:00
i-robot dde2f348c3
!28824 Disable lazy build when set PYNATIVE_SYNCHRONIZE in context.
Merge pull request !28824 from caifubi/r1.6-pynative-sync
2022-01-11 07:45:46 +00:00
i-robot 0c4be6b011
!28823 The input of RefNode may not necessarily be parameter in PyNative mode
Merge pull request !28823 from caifubi/r1.6-pynative-refnode-bug
2022-01-11 07:45:33 +00:00
i-robot 7a7b95e099
!28827 Fix bug of Tensor __repr__
Merge pull request !28827 from caifubi/r1.6-pynative-tensor-repr
2022-01-11 07:44:31 +00:00
i-robot ce7d97bfa3
!28833 Bugfix for random effect operator
Merge pull request !28833 from caifubi/r1.6-pynative-random-op
2022-01-11 07:41:18 +00:00
zengzitao 86f659f700 fix rewrite output is nullptr bug in r1.6 2022-01-11 15:28:09 +08:00
i-robot 064cdfad50
!28831 Fix Static Check on r1.6
Merge pull request !28831 from jiaoy1224/r1.6
2022-01-11 07:17:12 +00:00
zhangzhaoju 4b00d7400b Add input type check for instance Create in Cell.construct
Adjust the order of resource release sequence

Save executor_info to cache after compile avoiding inconsistent between
python cache and c++ cache

Clean entire_costgraph at ClearResAtExit to avoid coredump.
2022-01-11 15:06:03 +08:00
i-robot b126622ee9
!28816 fix greater op infervalue
Merge pull request !28816 from Simson/push-to-r1.6
2022-01-11 06:57:40 +00:00
7347157+joylvliang@user.noreply.gitee.com 279426a7a3 correct_check_pynative_hook 2022-01-11 14:31:35 +08:00
i-robot ce510b96e5
!28818 fix dynamicrnngrad error for pynative r1.6
Merge pull request !28818 from chujinjin/fix_dynamicrnngrad_error_for_pynative_1.6
2022-01-11 06:30:24 +00:00
yingchen 516193c2ec update links r1.6 2022-01-11 14:29:50 +08:00
i-robot 235b0d16f8
!28841 Fix an issue of federated learning
Merge pull request !28841 from jxlang910/r1.6
2022-01-11 06:21:14 +00:00
jiangzhenguang 6caf38433d fix celu in r1.6 2022-01-11 14:16:41 +08:00
sunsuodong eddea91f3c code review 2022-01-10 22:11:04 -08:00
zhengjun10 940e31860d sync examples to new java api 2022-01-11 12:43:06 +08:00
i-robot c3e3da7bd8
!28820 [MSLITE] fix matmul bug and mem leak in tensorrt delegate
Merge pull request !28820 from Liu_Xuu/r1.6
2022-01-11 04:03:45 +00:00
yujianfeng 7ebe23fecf Export the bprop mindir when the bprop directory is symbolic link 2022-01-11 11:52:32 +08:00
zong-shuai 978d165609 debug 2022-01-11 11:48:28 +08:00
cjh9368 bcddad4344 codecheck fix 2022-01-11 11:33:44 +08:00
wilfChen b24e0a51cb code clean 2022-01-11 10:55:12 +08:00
caifubi 22cf096bca Disable lazy build when set PYNATIVE_SYNCHRONIZE 2022-01-11 10:31:45 +08:00
jin-xiulang 8530e01775 Fix an issue of federated learning 2022-01-11 10:22:00 +08:00
Yang Jiao bb62ae0777 fix static check on master 2022-01-11 10:18:20 +08:00
Liu_Xuu e2f252a833 [MSLITE] fix matmul bug and mem leak in tensorrt delegate 0111_01 2022-01-11 10:06:57 +08:00
chujinjin 8172774f54 fix dynamicrnngrad error for pynative 2022-01-11 09:52:15 +08:00
simson 42c2676c1e fix greater op infervalue 2022-01-11 09:50:11 +08:00
i-robot 5c643a207f
!28800 fix bias correction && windows bug
Merge pull request !28800 from yeyunpeng2020/quant_2
2022-01-11 01:31:54 +00:00
i-robot 7d65ce6b3b
!28642 split h and w dimension for conv2d
Merge pull request !28642 from yangzhenzhang/compute-top-bottom-overlap-for-conv2d
2022-01-11 01:29:25 +00:00
i-robot 3b9026083b
!28786 modify error format of docstring.
Merge pull request !28786 from zhangyi/code_docs_master
2022-01-11 01:27:02 +00:00
caifubi b9a8f32c88 bugfix for _random_effect op 2022-01-11 09:25:01 +08:00
i-robot 839f17ea61
!28771 [GraphKernel] Recompute support whole absort and fix threshold.
Merge pull request !28771 from TronZhang/recompute_fix_threshold
2022-01-11 01:22:19 +00:00
i-robot a0bb65d705
!28155 rename DynamicReshape to reshape, and support static-shape
Merge pull request !28155 from lingyunli63/rm_dynamic_reshape
2022-01-11 01:20:53 +00:00
i-robot 034ca212ea
!27833 reconstruct dataset.py to diff scenario
Merge pull request !27833 from guozhijian/reconstruct_datasets
2022-01-11 01:18:54 +00:00
i-robot ce5ba0e591
!28760 check generator source code
Merge pull request !28760 from shenwei41/luoyang
2022-01-11 01:13:40 +00:00
i-robot baae22b1b4
!28751 fix the pclint
Merge pull request !28751 from limingqi107/new_actor_runtime
2022-01-11 01:13:11 +00:00
i-robot b08d5ba1ba
!28666 [ME][Auto-Monad] Insert Tensor for some special Load nodes.
Merge pull request !28666 from Margaret_wangrui/auto_monad_load
2022-01-10 16:23:53 +00:00
caifubi 3a181d9266 Execute task before tensor __repr__ 2022-01-10 23:35:04 +08:00
i-robot 417cea03ee
!28762 Fix static check.
Merge pull request !28762 from ZPaC/static
2022-01-10 14:25:15 +00:00
i-robot 608ad70bb7
!28754 [lite]support DumpGraph interface can be called explicitly
Merge pull request !28754 from 徐安越/master_core
2022-01-10 14:16:39 +00:00
i-robot f316b1a88f
!28674 [MS][LITE][CPU] unique bug fix
Merge pull request !28674 from liuzhongkai/code_generate3
2022-01-10 12:54:17 +00:00
yeyunpeng2020 4ed99ae928 fix bias correction 2022-01-10 20:49:11 +08:00
i-robot 55de37fd5b
!28768 fix GetValue type error in ConfusionSoftmaxGradRule when axis is a tuple
Merge pull request !28768 from yuchaojie/ir_fusion3
2022-01-10 12:47:43 +00:00
i-robot b7e514f683
!28748 Fix SoftShrink operator documentation
Merge pull request !28748 from chenweitao_295/SoftShrink_doc_issues
2022-01-10 12:22:24 +00:00
i-robot 4d9437c0aa
!28746 Parallel module code Alarm clearing
Merge pull request !28746 from liuluobin/master
2022-01-10 12:10:59 +00:00
i-robot 4153f35c30
!28790 [MS][LITE][develop] add 1.6 release notes
Merge pull request !28790 from sunsuodong/code_docs_release_notes
2022-01-10 12:10:11 +00:00
i-robot 00980c7a05
!28773 [MSLITE] tmp rm wide_and_deep ci in tensorrt
Merge pull request !28773 from Liu_Xuu/trt_0110_rm
2022-01-10 12:02:03 +00:00
i-robot 4856bda05f
!28732 Update doc for nn prob
Merge pull request !28732 from zichun_ye/doc_fix
2022-01-10 11:46:56 +00:00
i-robot 32de3d5e4e
!28750 Modify Log Level
Merge pull request !28750 from hwjiaorui/log
2022-01-10 11:46:31 +00:00
i-robot 96b89aa1f4
!28735 bugfix: remove convert param device
Merge pull request !28735 from zhengyuanhua/code_review
2022-01-10 11:38:23 +00:00
caifubi e06d9200e1 The input of RefNode may not necessarily be parameter in PyNative mode 2022-01-10 19:35:53 +08:00
jonyguo 4481f28e6d reconstruct datasets.py 2022-01-10 19:34:41 +08:00
sunsuodong 032246837f add 1.6 release notes 2022-01-10 03:33:46 -08:00
i-robot 8849fed917
!28531 neighborexchangev2 send empty depend
Merge pull request !28531 from TuDouNi/neighborexchangev2_fix_bug
2022-01-10 11:00:44 +00:00
zhangyi 3510e7c444 modify error format of docstring. 2022-01-10 18:28:03 +08:00
limingqi107 c3888a1e2f fix the pclint 2022-01-10 18:19:29 +08:00
i-robot b52b3c7a42
!28723 [MSLITE] Codex clean.
Merge pull request !28723 from wangshaocong/codex
2022-01-10 09:52:10 +00:00
TronZhang b05f9af150 fix whole absort for single ouput recompute 2022-01-10 17:51:34 +08:00
i-robot 379ae29ec6
!28747 fix lu input args valid check
Merge pull request !28747 from zhuzhongrui/pub_master3
2022-01-10 09:44:38 +00:00
lingyunli63 89e8b90a8d rename dynamicreshape to reshape 2022-01-10 17:37:31 +08:00
i-robot e72b29470c
!28756 GraphKernel Fix KMetaTypeNone bug
Merge pull request !28756 from ZengZitao/typenone_fix
2022-01-10 09:25:48 +00:00
i-robot b73ba4181b
!28647 clean code
Merge pull request !28647 from hwjiaorui/clean-code-master
2022-01-10 09:22:11 +00:00
ZPaC 9dd90de735 Fix static check. 2022-01-10 16:36:28 +08:00
i-robot 42a48bd6fe
!28627 update dim_reduce: add timeout, remove l2_loss, accelerate weight concate, update parameter description
Merge pull request !28627 from jinjiali-kali/dimReduce
2022-01-10 08:31:09 +00:00
Liu_Xuu 0dd0136adf [MSLITE] tmp rm wide_and_deep ci in tensorrt 2022-01-10 16:30:37 +08:00
huangxinjing bc1b48810d Add first commit 2022-01-10 16:30:07 +08:00
i-robot f69699f1ec
!28749 fix summary docs
Merge pull request !28749 from jiangshuqiang/code_docs_wb
2022-01-10 08:25:25 +00:00
i-robot d5502bab19
!28758 fix comments
Merge pull request !28758 from liutongtong9/code_docs_fix_issue
2022-01-10 08:16:32 +00:00
yangzhenzhang e5df74e9e4 compute top bottom overlap for conv2d 2022-01-10 16:15:04 +08:00
i-robot 05cd3ca997
!28671 Move e2e dump to super kernel Ascend MindRT
Merge pull request !28671 from parastooashtari/ascend_mindrt
2022-01-10 08:11:58 +00:00
i-robot 663ced7f6a
!28607 [MSLITE][GPU] Adreno channel unalign concat bugfix
Merge pull request !28607 from Greatpan/concat_bug_fix
2022-01-10 08:09:53 +00:00
Zichun Ye d51483f235 update nn.prob doc
fix typo

update doc
2022-01-10 16:08:56 +08:00
Margaret_wangrui 6dcab5a498 [ME][Auto-Monad] Insert Tensor for the Load whose refkey appears more than once,
or the load is input of call or partial, or the first input of load is call or partial.
2022-01-10 16:05:39 +08:00
i-robot e6c8f2ad5e
!28737 [MSLITE][GPU] mindspore lite frame, gpu subgraph data_type set error bugfix
Merge pull request !28737 from Greatpan/to_format_bugfix
2022-01-10 07:52:22 +00:00
i-robot c483842f9c
!28614 add graph manager for AscendGraphOptimization
Merge pull request !28614 from yuchaojie/ir_fusion
2022-01-10 07:47:17 +00:00
yuchaojie 1b7c4ded0f fix GetValue type error in ConfusionSoftmaxGradRule when axis is a tuple 2022-01-10 15:40:10 +08:00
liutongtong 03b856c952 fix comments 2022-01-10 15:29:33 +08:00
shenwei41 9d4a07dd52 checck generator source code 2022-01-10 15:08:28 +08:00
i-robot 889f7cb030
!28684 [lite] change 35xx branch to nnie_3516_master
Merge pull request !28684 from zuochuanyong/change_35xx_branch
2022-01-10 07:04:37 +00:00
i-robot 36873f36ae
!28061 optimize the documentation of chinese API of Add,AddN,Div,Eps, etc.
Merge pull request !28061 from chenweitao_295/ops_amend_other
2022-01-10 06:48:55 +00:00
zhengyuanhua 4109450399 bugfix: remove converter param device 2022-01-10 14:37:39 +08:00
liuluobin 7b597d2361 Clearing codecheck for parallel 2022-01-10 14:37:26 +08:00
z00512249 0b5ae6febf fix lu input args valid check 2022-01-10 14:22:44 +08:00
zengzitao 9072e3e105 fix kmetatypenone bug in ppo 2022-01-10 11:41:06 +08:00
xuanyue 21f1d6a015 support DumpGraph interface can be called explicitly 2022-01-10 11:04:23 +08:00
jiangshuqiang 067d5b7b45 fix summary docs 2022-01-10 10:33:57 +08:00
zuochuanyong f8f522a9b1 change 35xx branch to nnie_3516_master 2022-01-10 10:32:56 +08:00
wang_shaocong 5d9950cdce [MSLITE] Codex clean 2022-01-10 10:23:04 +08:00
hwjiaorui 4851580446 log level 2022-01-10 10:20:32 +08:00
chenweitao_295 815b4669e5 fix 2022-01-10 10:02:24 +08:00
ttudu 1595ea7d91 neighborexchangev2 add send empty depend 2022-01-10 09:14:58 +08:00
greatpanc d2126449b6 mindspore lite hydra subgraph data_type bugfix 2022-01-09 21:34:42 +08:00
hwjiaorui 1f07b9bc6c clean code 2022-01-08 16:46:23 +08:00
yuchaojie 3e81faeb0a add graph manager for AscendGraphOptimization 2022-01-08 14:23:42 +08:00
Parastoo Ashtari 28c88a6b2d move e2e dump to superkernel for mindRT 2022-01-07 11:01:56 -05:00
chenweitao_295 3c9aa15c15 amend ops 2022-01-07 17:42:36 +08:00
lzk 39381a7ac8 unique bug fix 2022-01-06 17:54:39 -08:00
greatpanc 6326b8fa24 concat bugfix 2022-01-06 21:27:46 +08:00
jinjiali 38b2d9426a update dim_reduce: add timeout, remove l2_loss, accelerate weight concate, update parameter descroption 2022-01-06 10:22:59 +08:00
2382 changed files with 68907 additions and 30464 deletions

View File

@ -24,6 +24,7 @@
"mindspore/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc" "useStlAlgorithm"
"mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.cc" "unknownMacro"
"mindspore/mindspore/ccsrc/runtime/device/ascend/ascend_memory_manager.cc" "nullPointerArithmeticRedundantCheck"
"mindspore/mindspore/ccsrc/pipeline/jit/static_analysis/auto_monad.cc" "containerOutOfBounds"
# MindData
"mindspore/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc" "useStlAlgorithm"

View File

@ -95,6 +95,7 @@ mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_winograd
mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_winograd_fp32.c:DeConvWgMerge
mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/avx/TiledC8MatMulFp32.c:TiledC8MatmulFp32
mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/quant_dtype_cast_fp16.c:Fp16ToInt8_arm64
mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c:MatMul4x1Kernel
mindspore/mindspore/ccsrc/backend/session/gpu_session.cc:mindspore::session::gpu::GPUSession::LoadInputData
mindspore/mindspore/ccsrc/debug/dump_proto.cc:mindspore::ProtoExporter::SetNodeOutputType
mindspore/mindspore/ccsrc/debug/dump_proto.cc:mindspore::ProtoExporter::SetValueToProto
@ -164,11 +165,30 @@ mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_4x32_kernel_nhwc_fp32.c:nnacl_gemm_avx512_4x32_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_5x64_kernel_nhwc_fp32.c:nnacl_gemm_avx512_5x64_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_5x32_kernel_nhwc_fp32.c:nnacl_gemm_avx512_5x32_kernel_nhwc_fp32
mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_avx512_fp32.c:nnacl_gemm_avx512_2x64_kernel_nhwc_fp32
mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_avx512_fp32.c:nnacl_gemm_avx512_3x64_kernel_nhwc_fp32
mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_avx512_fp32.c:nnacl_gemm_avx512_4x64_kernel_nhwc_fp32
mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_avx512_fp32.c:nnacl_gemm_avx512_5x64_kernel_nhwc_fp32
mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_avx512_fp32.c:nnacl_gemm_avx512_6x64_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_11x32_kernel_nhwc_fp32.c:nnacl_gemm_avx512_11x32_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_10x16_kernel_nhwc_fp32.c:nnacl_gemm_avx512_10x16_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_7x48_kernel_nhwc_fp32.c:nnacl_gemm_avx512_7x48_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_9x32_kernel_nhwc_fp32.c:nnacl_gemm_avx512_9x32_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_9x16_kernel_nhwc_fp32.c:nnacl_gemm_avx512_9x16_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_5x48_kernel_nhwc_fp32.c:nnacl_gemm_avx512_5x48_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_12x16_kernel_nhwc_fp32.c:nnacl_gemm_avx512_12x16_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_5x16_kernel_nhwc_fp32.c:nnacl_gemm_avx512_5x16_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_6x16_kernel_nhwc_fp32.c:nnacl_gemm_avx512_6x16_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_1x48_kernel_nhwc_fp32.c:nnacl_gemm_avx512_1x48_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_8x48_kernel_nhwc_fp32.c:nnacl_gemm_avx512_8x48_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_4x48_kernel_nhwc_fp32.c:nnacl_gemm_avx512_4x48_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_7x16_kernel_nhwc_fp32.c:nnacl_gemm_avx512_7x16_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_6x48_kernel_nhwc_fp32.c:nnacl_gemm_avx512_6x48_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_3x16_kernel_nhwc_fp32.c:nnacl_gemm_avx512_3x16_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_1x32_kernel_nhwc_fp32.c:nnacl_gemm_avx512_1x32_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_2x16_kernel_nhwc_fp32.c:nnacl_gemm_avx512_2x16_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_11x16_kernel_nhwc_fp32.c:nnacl_gemm_avx512_11x16_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_1x16_kernel_nhwc_fp32.c:nnacl_gemm_avx512_1x16_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_10x32_kernel_nhwc_fp32.c:nnacl_gemm_avx512_10x32_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_4x16_kernel_nhwc_fp32.c:nnacl_gemm_avx512_4x16_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_8x16_kernel_nhwc_fp32.c:nnacl_gemm_avx512_8x16_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_3x48_kernel_nhwc_fp32.c:nnacl_gemm_avx512_3x48_kernel_nhwc_fp32
mindspore/mindspore/lite/experiment/HPC-generator/gemm_avx512/nnacl_gemm_avx512_2x48_kernel_nhwc_fp32.c:nnacl_gemm_avx512_2x48_kernel_nhwc_fp32
mindspore/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc:mindspore::kernel::MatmulFp32BaseCPUKernel::Run
mindspore/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.cc:mindspore::parallel::GetWeights
mindspore/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.cc:mindspore::parallel::PartitionNode

View File

@ -19,51 +19,13 @@ if(NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
endif()
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
# find appropriate macosx SDK and set SDKROOT
if(NOT DEFINED ENV{SDKROOT})
# arm64: macosx11.x
# x86_64: macosx10.x, macosx11.x
if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "arm64")
set(MACOSX_SDK_REGEX "MacOSX11(\\.\\d+)?")
else()
set(MACOSX_SDK_REGEX "MacOSX1[01](\\.\\d+)?")
endif()
exec_program(xcrun ARGS --show-sdk-path OUTPUT_VARIABLE MACOSX_SDK_PATH)
get_filename_component(MACOSX_SDK_PATH ${MACOSX_SDK_PATH} DIRECTORY)
file(GLOB ALL_SDK_NAME RELATIVE ${MACOSX_SDK_PATH} ${MACOSX_SDK_PATH}/*)
# get highest SDK version meets the requirements
execute_process(
COMMAND bash -c "echo '${ALL_SDK_NAME}' | grep -Eo '${MACOSX_SDK_REGEX}' | sort -n | tail -1 | tr -d '\\n'"
OUTPUT_VARIABLE MACOSX_FIND_SDK_NAME
)
if(NOT MACOSX_FIND_SDK_NAME)
message(FATAL_ERROR "can not find appropriate macosx SDK, you may need upgrade xcode")
endif()
set(ENV{SDKROOT} "${MACOSX_SDK_PATH}/${MACOSX_FIND_SDK_NAME}.sdk")
endif()
message("macosx sdkroot: $ENV{SDKROOT}")
# set macosx deployment target based on SDK
if(NOT DEFINED ENV{MACOSX_DEPLOYMENT_TARGET})
execute_process(
COMMAND bash -c "cat $ENV{SDKROOT}/SDKSettings.json | \
grep -Eo 'MACOSX_DEPLOYMENT_TARGET\\\":\\\"\\d{2}\\.\\d+' | cut -d '\"' -f 3 | tr -d '\\n'"
OUTPUT_VARIABLE MACOSX_FIND_SDK_VERSION
)
if(NOT MACOSX_FIND_SDK_VERSION)
message(FATAL_ERROR "can not find MACOSX_DEPLOYMENT_TARGET in SDKROOT, \
please check whether it's a valid SDK path")
endif()
set(CMAKE_OSX_DEPLOYMENT_TARGET ${MACOSX_FIND_SDK_VERSION} CACHE STRING
"minimum macosx deployment target version" FORCE)
if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "arm64")
set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0")
endif()
endif()
message("macosx deployment target version: ${CMAKE_OSX_DEPLOYMENT_TARGET}")
set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Winconsistent-missing-override -Wno-user-defined-warnings \
-Wno-return-std-move -Wno-unused-private-field -Wno-unused-lambda-capture -Wno-sign-compare \
-Wno-overloaded-virtual -Wno-unneeded-internal-declaration -Wno-unused-variable -Wno-pessimizing-move \
-Wno-inconsistent-missing-override -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 13.1)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wno-unused-but-set-variable")
endif()
elseif(ENABLE_SYM_FILE)
set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -g -ggdb -Wl,--allow-shlib-undefined \
-DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")

View File

@ -44,7 +44,7 @@ enrichment of the AI software/hardware application ecosystem.
<img src="https://gitee.com/mindspore/mindspore/raw/master/docs/MindSpore-architecture.png" alt="MindSpore Architecture"/>
For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/programming_guide/en/master/architecture.html).
For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/programming_guide/en/r1.6/architecture.html).
### Automatic Differentiation
@ -240,7 +240,7 @@ please check out [docker](https://gitee.com/mindspore/mindspore/blob/master/scri
## Quickstart
See the [Quick Start](https://www.mindspore.cn/tutorials/en/master/quick_start.html)
See the [Quick Start](https://www.mindspore.cn/tutorials/en/r1.6/quick_start.html)
to implement the image classification.
## Docs
@ -282,6 +282,7 @@ Project stable branches will be in one of the following states:
| **Branch** | **Status** | **Initial Release Date** | **Next Phase** | **EOL Date**|
|------------|--------------|--------------------------|----------------------------------------|-------------|
| **r1.6** | Maintained | 2022-01-29 | Unmaintained <br> 2023-01-29 estimated | |
| **r1.5** | Maintained | 2021-10-15 | Unmaintained <br> 2022-10-15 estimated | |
| **r1.4** | Maintained | 2021-08-15 | Unmaintained <br> 2022-08-15 estimated | |
| **r1.3** | Maintained | 2021-07-15 | Unmaintained <br> 2022-07-15 estimated | |

View File

@ -41,7 +41,7 @@ MindSpore提供了友好的设计和高效的执行旨在提升数据科学
<img src="https://gitee.com/mindspore/mindspore/raw/master/docs/MindSpore-architecture-zh.png" alt="MindSpore Architecture"/>
欲了解更多详情,请查看我们的[总体架构](https://www.mindspore.cn/docs/programming_guide/zh-CN/master/architecture.html)。
欲了解更多详情,请查看我们的[总体架构](https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/architecture.html)。
### 自动微分
@ -236,7 +236,7 @@ MindSpore的Docker镜像托管在[Docker Hub](https://hub.docker.com/r/mindspore
## 快速入门
参考[快速入门](https://www.mindspore.cn/tutorials/zh-CN/master/quick_start.html)实现图片分类。
参考[快速入门](https://www.mindspore.cn/tutorials/zh-CN/r1.6/quick_start.html)实现图片分类。
## 文档
@ -275,6 +275,7 @@ MindSpore的版本分支有以下几种维护阶段
| **分支名** | **当前状态** | **上线时间** | **后续状态** | **EOL 日期**|
|------------|--------------|----------------------|----------------------------------------|------------|
| **r1.6** | Maintained | 2022-01-29 | Unmaintained <br> 2023-01-29 estimated | |
| **r1.5** | Maintained | 2021-10-15 | Unmaintained <br> 2022-10-15 estimated | |
| **r1.4** | Maintained | 2021-08-15 | Unmaintained <br> 2022-08-15 estimated | |
| **r1.3** | Maintained | 2021-07-15 | Unmaintained <br> 2022-07-15 estimated | |

View File

@ -1,3 +1,197 @@
# MindSpore 1.6.2
## MindSpore 1.6.2 Release Notes
### Bug fixes
- Fix the problem that ASR dynamic shape memory leak.
- Support ModelArts trains while reading.
- Dropout operator Ascend recalculation adaptation.
- Provide MindIR to ONNX model conversion interface.
### Contributors
Thanks goes to these wonderful people:
Adel, AGroupofProbiotocs, anthonyaje, anzhengqi, askmiao, baihuawei, baiyangfan, bai-yangfan, bingyaweng, BowenK, buxue, caifubi, CaoJian, caojian05, caozhou, Cathy, changzherui, chenbo116, chenfei, chengxianbin, chenhaozhe, chenjianping, chenzomi, chenzupeng, chujinjin, cj, cjh9368, Corleone, damon0626, danish, Danish, davidmc, dayschan, doitH, dong-li001, eric, Eric, fary86, fuzhiye, Gaoxiong, GAO_HYP_XYJ, gengdongjie, Gogery, gongdaguo, gray0v0, gukecai, guoqi, gzhcv, hangq, hanhuifeng2020, Harshvardhan, He, heleiwang, hexia, Hoai, HuangBingjian, huangdongrun, huanghui, huangxinjing, huqi, huzhifeng, hwjiaorui, Islam Amin, Jesse, , Jiabin Liu, jianghui58, jiangzhiwen, Jiaqi, jin-xiulang, jinyaohui, jjfeing, John, Jonathan, jonyguo, JulyAi, jzg, kai00, kingfo, kingxian, kpy, kswang, laiyongqiang, leonwanghui, Li, liangchenghui, liangzelang, lichen_101010, lichenever, lihongkang, lilei, limingqi107, ling, linqingke, Lin Xh, liubuyu, liuwenhao4, liuxiao78, liuxiao93, liuyang_655, liuzhongkai, Lixia, lixian, liyanliu, liyong, lizhenyu, luopengting, luoyang, lvchangquan, lvliang, lz, mahdi, Mahdi, maning202007, Margaret_wangrui, mayang, mengyuanli, Ming_blue, nhussain, ougongchang, panfengfeng, panyifeng, Payne, Peilin, peixu_ren, Pengyongrong, qianlong, qianjiahong, r1chardf1d0, riemann_penn, rmdyh, Sheng, shenwei41, simson, Simson, Su, sunsuodong, tao_yunhao, tinazhang, VectorSL, , Wan, wandongdong, wangdongxu, wangmin, wangnan39@huawei.com, wangyue01, wangzhe, wanyiming, Wei, wenchunjiang, wilfChen, WilliamLian, wsc, wudenggang, wukesong, wuweikang, wuxuejian, Xiao Tianci, Xiaoda, xiefangqi, xinyunfan, xuanyue, xulei2020, Xun, xuyongfei, yanghaitao, yanghaitao1, yanghaoran, YangLuo, yangruoqi713, yankai, yanzhenxiang2020, yao_yf, yepei6, yeyunpeng, Yi, yoni, yoonlee666, yuchaojie, yujianfeng, yuximiao, zengzitao, Zhang, zhanghaibo5@huawei.com, zhanghuiyao, zhanghui_china, zhangxinfeng3, zhangyihui, zhangz0911gm, zhanke, zhanyuan, zhaodezan, zhaojichen, zhaoting, zhaozhenlong, zhengjun10, Zhenglong Li, zhiqwang, zhoufeng, zhousiyi, zhouyaqiang, zhouyifengCode, Zichun, Zirui, Ziyan, zjun, ZPaC, wangfengwfwf, zymaa, gerayking.
Contributions of any kind are welcome!
# MindSpore 1.6.1
## MindSpore 1.6.1 Release Notes
### Bug fixes
- Fix the problem that the accuracy of the transformer network decreases.
- Fix the problem that the accuracy of the warpctc network decreases.
### Contributors
Thanks goes to these wonderful people:
Adel, AGroupofProbiotocs, anthonyaje, anzhengqi, askmiao, baihuawei, baiyangfan, bai-yangfan, bingyaweng, BowenK, buxue, caifubi, CaoJian, caojian05, caozhou, Cathy, changzherui, chenbo116, chenfei, chengxianbin, chenhaozhe, chenjianping, chenzomi, chenzupeng, chujinjin, cj, cjh9368, Corleone, damon0626, danish, Danish, davidmc, dayschan, doitH, dong-li001, eric, Eric, fary86, fuzhiye, Gaoxiong, GAO_HYP_XYJ, gengdongjie, Gogery, gongdaguo, gray0v0, gukecai, guoqi, gzhcv, hangq, hanhuifeng2020, Harshvardhan, He, heleiwang, hexia, Hoai, HuangBingjian, huangdongrun, huanghui, huangxinjing, huqi, huzhifeng, hwjiaorui, Islam Amin, Jesse, , Jiabin Liu, jianghui58, jiangzhiwen, Jiaqi, jin-xiulang, jinyaohui, jjfeing, John, Jonathan, jonyguo, JulyAi, jzg, kai00, kingfo, kingxian, kpy, kswang, laiyongqiang, leonwanghui, Li, liangchenghui, liangzelang, lichen_101010, lichenever, lihongkang, lilei, limingqi107, ling, linqingke, Lin Xh, liubuyu, liuwenhao4, liuxiao78, liuxiao93, liuyang_655, liuzhongkai, Lixia, lixian, liyanliu, liyong, lizhenyu, luopengting, luoyang, lvchangquan, lvliang, lz, mahdi, Mahdi, maning202007, Margaret_wangrui, mayang, mengyuanli, Ming_blue, nhussain, ougongchang, panfengfeng, panyifeng, Payne, Peilin, peixu_ren, Pengyongrong, qianlong, qianjiahong, r1chardf1d0, riemann_penn, rmdyh, Sheng, shenwei41, simson, Simson, Su, sunsuodong, tao_yunhao, tinazhang, VectorSL, , Wan, wandongdong, wangdongxu, wangmin, wangnan39@huawei.com, wangyue01, wangzhe, wanyiming, Wei, wenchunjiang, wilfChen, WilliamLian, wsc, wudenggang, wukesong, wuweikang, wuxuejian, Xiao Tianci, Xiaoda, xiefangqi, xinyunfan, xuanyue, xulei2020, Xun, xuyongfei, yanghaitao, yanghaitao1, yanghaoran, YangLuo, yangruoqi713, yankai, yanzhenxiang2020, yao_yf, yepei6, yeyunpeng, Yi, yoni, yoonlee666, yuchaojie, yujianfeng, yuximiao, zengzitao, Zhang, zhanghaibo5@huawei.com, zhanghuiyao, zhanghui_china, zhangxinfeng3, zhangyihui, zhangz0911gm, zhanke, zhanyuan, zhaodezan, zhaojichen, zhaoting, zhaozhenlong, zhengjun10, Zhenglong Li, zhiqwang, zhoufeng, zhousiyi, zhouyaqiang, zhouyifengCode, Zichun, Zirui, Ziyan, zjun, ZPaC, wangfengwfwf, zymaa, gerayking.
Contributions of any kind are welcome!
# MindSpore 1.6.0
## MindSpore 1.6.0 Release Notes
### Major Features and Improvements
#### OS
- [STABLE] Support macOS with CPU(X86)
- [BETA] Supoport macOS with CPU(M1)
#### FrontEnd
- [STABLE] Support JIT Fallback feature in Graph mode.
- [STABLE] Support compile cache feature in Graph mode.
- [STABLE] Add new optimizers, including ASGD and Rprop.
- [STABLE] Add new initializers, including Identity, Orthogonal, Dirac, Sparse and VarianceScaling.
- [STABLE] Support resuming training when an exception occurs in the process.
- [STABLE] Change `mindspore.nn.LSTMCell` from single-layer LSTM to single-cell LSTM.
- [BETA] Introduce `mindspore.ops.Custom` to customize your own operators for Ascend(AICore, AICPU), GPU, CPU backends, and the custom type can be one of TBE, AKG, pure Python function or prebuild binary(called aot operator).
#### PyNative
- [STABLE] Support heterogeneous feature in PyNative mode.
- [STABLE] Optimize memory allocation in PyNative mode.
#### Auto Parallel
- [STABLE] Support configuring the output shard strategy of the MatMul distributed operator.
- [STABLE] Support multi-instances parallel.
- [STABLE] Support activation slice communication and calculation overlap in Transformer.
- [STABLE] Support heterogeneous parallel tensor swap.
- [STABLE] Add implementations of distributed operator of ResizeNearestNeighbor.
- [STABLE] Add a communication operator named NeighborExchangeV2 that supports data exchange between adjacent 8 rank ids.
- [STABLE] Pipeline parallel support GPU platform.
- [STABLE] Add cell-level data parallel interface.
- [STABLE] Support gradient AllReduce fusion according to the amount of data.
- [STABLE] Support a sharding strategy search algorithm called sharding propagation.
#### Executor
- [STABLE] Support multigraph sink and subgraph sink of MindRT.
- [STABLE] Support memory swap to break the device memory size limit on Ascend platform.
- [STABLE] Support dynamic deployment of distributed training cluster(GPU).
- [BETA] Support automatic failover of parameter server.
#### DataSet
- [STABLE] Support overwrite feature in MindRecord.
- [STABLE] Log improvement and more friendly to users.
- [BETA] Support new feature [Dataset Offload](https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/enable_dataset_offload.html) to speed up data processing by heterogeneous computing.
- [BETA] Support new feature [Dataset Autotune](https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/enable_dataset_autotune.html) to adjust parallelism of dataset pipeline automatically.
#### GraphKernel Fusion
- [STABLE] Support kernel fusion and generation for CPU backend.
#### Federated Learning
- [STABLE] FL-Client framework and model decoupling.
- [BETA] Support Cross-silo federated learning framework.
#### Debug
- [STABLE] Support dump in cell level(Ascend).
- [STABLE] Support dump Tensor statistics(Ascend/GPU).
- [STABLE] Support displaying corresponding code lines for fusion nodes.
- [STABLE] Support passing dump flag in Ascend backend in order to dump correct operators after fusion transformation.
### API Change
#### Backwards Incompatible Change
##### Python API
###### `mindspore.dataset.MindDataset` interface changes input parameter dataset_file([!27542](https://gitee.com/mindspore/mindspore/pulls/27542))
`MindDataset` contains the input parameter `dataset_file`, which is in the singular format. It can receive a single file path or a list that stores multiple file paths. Thus It is preferred to change the input parameter `dataset_file` into plural format. In addition, the input parameters of most dataset API, such as `TFRecordDataset`, are in plural formart (`dataset_files`). To ensure consistency, the input parameter `dataset_file` of MindDataset is changed to plural formart as `dataset_files`, we can see the updated version in api of [mindspore.dataset.MindDataset](https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/dataset/mindspore.dataset.MindDataset.html#mindspore.dataset.MindDataset).
###### Deprecated usage: `import mindspore.dataset.engine.datasets as ds`. Use `import mindspore.dataset as ds` instead as recommended
We're reconstructed `mindspore/dataset/engine/datasets.py` to `datasets.py`, `datasets_version.py`, `datasets_text.py`, `datasets_audio.py`, `datasets_standard_format.py` and `datasets_user_defined.py`. This is more convenient for subsequent maintenance, the specific dataset loading classes will be scattered in various files, which will cause that the corresponding class cannot be found from the file name, so it is recommended to use a unified usage `import mindspore.dataset as ds`.
###### Delete `mindspore.Tensor`'s property `virtual_flag`([!26989](https://gitee.com/mindspore/mindspore/pulls/26989))
###### Delete `mindspore.Parameter`'s property `is_init`([!26989](https://gitee.com/mindspore/mindspore/pulls/26989))
###### Delete `mindspore.nn.ROC`'s interface `roc`([!25713](https://gitee.com/mindspore/mindspore/pulls/25713))
###### The `shard()` interface of primitive is changed from `shard(strategy)` to `shard(in_strategy=None, out_strategy=None)`
###### The `set_auto_parallel_context()` interface of context is changed from
###### `set_auto_parallel_context(parallel_mode=AUTO_PARALLEL, auto_parallel_search_mode="dynamic_programming")` to ` set_auto_parallel_context(parallel_mode=AUTO_PARALLEL, search_mode="dynamic_programming")`
#### Collect Data and Create Landscape
##### Python API
###### `mindspore.train.callback.SummaryCollector` interface's parameter `collect_specified_data` add new operations `collect_landscape` ([!26229](https://gitee.com/mindspore/mindspore/pulls/26229))
`collect_landscape` can collect the parameters needed to create the loss landscape. we can see the updated version in api of [mindspore.train.callback.SummaryCollector](https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.train.html#mindspore.train.callback.SummaryCollector).
###### `mindspore.train.callback` add new interface `SummaryLandscape` ([!26229](https://gitee.com/mindspore/mindspore/pulls/26229))
`SummaryLandscape` can help you to collect loss landscape information. It can create landscape in PCA direction or random direction by calculating loss. We can see the updated version in api of [mindspore.train.callback.SummaryLandscape](https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.train.html#mindspore.train.callback.SummaryLandscape).
### Bug fixes
#### Executor
- Fix process hanging while calling MPI_comm_create in asymmetric pipeline split scenario. ([!28707](https://gitee.com/mindspore/mindspore/pulls/28707))
- Fix the execution error when the weights are shared between graph mode and PyNative mode.([!26635](https://gitee.com/mindspore/mindspore/pulls/26635))
- Fixed the probability coredump when free memory under PyNative mode.([!25472](https://gitee.com/mindspore/mindspore/pulls/25472))
#### Dataset
- Fix memory increase abnormally when running dataset for a long time. ([!26237](https://gitee.com/mindspore/mindspore/pulls/26237))
- Fix saving MindRecord files with Chinese path on Windows. ([!28378](https://gitee.com/mindspore/mindspore/pulls/28378))
## MindSpore Lite
### Major Features and Improvements
#### Converter and runtime
- [STABLE] Add more fusion patterns in the converter tool to improve runtime performance.
- [STABLE] Support take OpenGL texture as input and output of inference.
- [STABLE] Refactor the JAVA API.
- [BETA] Support inference on Ascend310.
#### x86 backend optimization
- [STABLE] Optimize kernels for x86 using Advanced Vector Extensions(AVX512).
#### ARM backend optimization
- [STABLE] Support heterogeneous parallel inference, including splitting operators, constructing heterogeneous subgraphs, and heterogeneous parallel scheduling between CPUs and GPUs.
- [STABLE] Add more FP16 operators.
#### Post quantization
- [STABLE] Post quantization supports debugging.
- [STABLE] Full quantization supports choosing non-quantized nodes.
- [STABLE] Mixed bit quantization supports auto-tune.
#### Training on Device
- [STABLE] Support user-defined algorithm models to access the federated learning framework.
### Contributors
Thanks goes to these wonderful people:
AGroupofProbiotocs, anzhengqi, askmiao, baihuawei, baiyangfan, bai-yangfan, bingyaweng, BowenK, buxue, caifubi, CaoJian, caojian05, caozhou, Cathy, changzherui, chenbo116, chenfei, chengxianbin, chenhaozhe, chenjianping, chenzomi, chenzupeng, chujinjin, cj, cjh9368, Corleone, damon0626, danish, Danish, davidmc, dayschan, doitH, dong-li001, fary86, fuzhiye, Gaoxiong, GAO_HYP_XYJ, gengdongjie, Gogery, gongdaguo, gray0v0, gukecai, guoqi, gzhcv, hangq, hanhuifeng2020, Harshvardhan, He, heleiwang, hesham, hexia, Hoai, HuangBingjian, huangdongrun, huanghui, huangxinjing, huqi, huzhifeng, hwjiaorui, Jiabin Liu, jianghui58, Jiaqi, jin-xiulang, jinyaohui, jjfeing, John, jonyguo, JulyAi, jzg, kai00, kingfo, kingxian, kpy, kswang, liuyongqi, laiyongqiang, leonwanghui, liangchenghui, liangzelang, lichen_101010, lichenever, lihongkang, lilei, limingqi107, ling, linqingke, Lin Xh, liubuyu, liuwenhao4, liuxiao78, liuxiao93, liuyang_655, liuzhongkai, Lixia, lixian, liyanliu, liyong, lizhenyu, luopengting, lvchangquan, lvliang, lz, maning202007, Margaret_wangrui, mengyuanli, Ming_blue, ms_yan, ougongchang, panfengfeng, panyifeng, Payne, Peilin, peixu_ren, Pengyongrong, qianlong, qianjiahong, r1chardf1d0, riemann_penn, rmdyh, Sheng, shenwei41, simson, Simson, Su, sunsuodong, tao_yunhao, tinazhang, VectorSL, , Wan, wandongdong, wangdongxu, wangmin, [wangnan39@huawei.com](mailto:wangnan39@huawei.com), wangyue01, wangzhe, wanyiming, Wei, wenchunjiang, wilfChen, WilliamLian, wsc, wudenggang, wukesong, wuweikang, wuxuejian, Xiao Tianci, Xiaoda, xiefangqi, xinyunfan, xuanyue, xuyongfei, yanghaitao, yanghaitao1, yanghaoran, YangLuo, yangruoqi713, yankai, yanzhenxiang2020, yao_yf, yepei6, yeyunpeng, Yi, yoni, yoonlee666, yuchaojie, yujianfeng, yuximiao, zengzitao, Zhang, [zhanghaibo5@huawei.com](mailto:zhanghaibo5@huawei.com), zhanghuiyao, zhanghui_china, zhangxinfeng3, zhangyihui, zhangz0911gm, zhanke, zhanyuan, zhaodezan, zhaojichen, zhaoting, zhaozhenlong, zhengjun10, zhiqwang, zhoufeng, zhousiyi, zhouyaqiang, zhouyifengCode, Zichun, Ziyan, zjun, ZPaC, wangfengwfwf, zymaa, gerayking.
Contributions of any kind are welcome!
# MindSpore 1.5.2
## MindSpore 1.5.2 Release Notes
@ -417,7 +611,7 @@ thor(net, learning_rate, damping, momentum, weight_decay=0.0, loss_scale=1.0, ba
##### Dump Config
Previously, we could only dump tensor data for one or all steps. To make the dump feature easier to use, we changed the dump configuration format and dump structure. View the [New Dump Tutorial](https://www.mindspore.cn/docs/programming_guide/en/master/dump_in_graph_mode.html#dump).
Previously, we could only dump tensor data for one or all steps. To make the dump feature easier to use, we changed the dump configuration format and dump structure. View the [New Dump Tutorial](https://www.mindspore.cn/docs/programming_guide/en/r1.6/dump_in_graph_mode.html#dump).
| 1.2.1 | 1.3.0 |
| ------------------------------------------------------ | ------------------------------------------------------------------------------------------- |
@ -801,7 +995,7 @@ However, currently MindSpore Parser cannot parse numpy.ndarray in JIT-graph. To
###### mindspore.numpy interfaces remove support for keyword arguments `out` and `where`([!12726](https://gitee.com/mindspore/mindspore/pulls/12726))
Previously, we have incomplete support for keyword arguments `out` and `where` in mindspore.numpy interfaces, however, the `out` argument is only functional when `where` argument is also provided, and `out` cannot be used to pass reference to numpy functions. Therefore, we have removed these two arguments to avoid any confusion users may have. Their original functionality can be found in [np.where](https://www.mindspore.cn/docs/api/en/master/api_python/numpy/mindspore.numpy.where.html#mindspore.numpy.where)
Previously, we have incomplete support for keyword arguments `out` and `where` in mindspore.numpy interfaces, however, the `out` argument is only functional when `where` argument is also provided, and `out` cannot be used to pass reference to numpy functions. Therefore, we have removed these two arguments to avoid any confusion users may have. Their original functionality can be found in [np.where](https://www.mindspore.cn/docs/api/en/r1.6/api_python/numpy/mindspore.numpy.where.html#mindspore.numpy.where)
<table>
<tr>
@ -1100,7 +1294,7 @@ MSTensor::DestroyTensorPtr(tensor);
###### `nn.MatMul` is now deprecated in favor of `ops.matmul` ([!12817](https://gitee.com/mindspore/mindspore/pulls/12817))
[ops.matmul](https://www.mindspore.cn/docs/api/en/master/api_python/ops/mindspore.ops.matmul.html#mindspore.ops.matmul) follows the API of [numpy.matmul](https://numpy.org/doc/stable/reference/generated/numpy.matmul.html) as closely as possible. As a function interface, [ops.matmul](https://www.mindspore.cn/docs/api/en/master/api_python/ops/mindspore.ops.matmul.html#mindspore.ops.matmul) is applied without instantiation, as opposed to `nn.MatMul`, which should only be used as a class instance.
[ops.matmul](https://www.mindspore.cn/docs/api/en/r1.6/api_python/ops/mindspore.ops.matmul.html#mindspore.ops.matmul) follows the API of [numpy.matmul](https://numpy.org/doc/stable/reference/generated/numpy.matmul.html) as closely as possible. As a function interface, [ops.matmul](https://www.mindspore.cn/docs/api/en/r1.6/api_python/ops/mindspore.ops.matmul.html#mindspore.ops.matmul) is applied without instantiation, as opposed to `nn.MatMul`, which should only be used as a class instance.
<table>
<tr>

2
akg

@ -1 +1 @@
Subproject commit 288658ca72e8de65b17c3109170d1ef0d511eb28
Subproject commit e53ed0355b0cb1422ee406755196c964ebd4c4ad

View File

@ -75,3 +75,60 @@ if(NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
find_required_package(FLEX)
endif()
endif()
# for macos, find appropriate macosx SDK then set SDKROOT and MACOSX_DEPLOYMENT_TARGET
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
if(NOT DEFINED ENV{SDKROOT})
# arm64: macosx11.x
# x86_64: macosx10.x, macosx11.x
if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "arm64")
set(MACOSX_SDK_REGEX "MacOSX11(\\.\\d+)?")
else()
set(MACOSX_SDK_REGEX "MacOSX1[01](\\.\\d+)?")
endif()
set(MACOSX_XCODE_SDK_PATH "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs")
set(MACOSX_CLT_SDK_PATH "/Library/Developer/CommandLineTools/SDKs")
set(MACOSX_SDK_SEARCH_PATHS "${MACOSX_XCODE_SDK_PATH}/*" "${MACOSX_CLT_SDK_PATH}/*")
file(GLOB ALL_SDK_NAME ${MACOSX_SDK_SEARCH_PATHS})
# get highest SDK version meets the requirements
execute_process(
COMMAND bash -c "echo '${ALL_SDK_NAME}' | grep -Eo '${MACOSX_SDK_REGEX}' | sort -n | tail -1 | tr -d '\\n'"
OUTPUT_VARIABLE MACOSX_FIND_SDK_NAME
)
if(NOT MACOSX_FIND_SDK_NAME)
message(FATAL_ERROR
"can not find appropriate macosx SDK, find in ${ALL_SDK_NAME}, you may set SDKROOT manually"
)
endif()
if(IS_DIRECTORY "${MACOSX_XCODE_SDK_PATH}/${MACOSX_FIND_SDK_NAME}.sdk")
set(CMAKE_OSX_SYSROOT "${MACOSX_XCODE_SDK_PATH}/${MACOSX_FIND_SDK_NAME}.sdk")
else()
set(CMAKE_OSX_SYSROOT "${MACOSX_CLT_SDK_PATH}/${MACOSX_FIND_SDK_NAME}.sdk")
endif()
set(ENV{SDKROOT} ${CMAKE_OSX_SYSROOT})
endif()
message("macosx sdkroot: $ENV{SDKROOT}")
# set macosx deployment target based on SDK
if(NOT DEFINED ENV{MACOSX_DEPLOYMENT_TARGET})
execute_process(
COMMAND bash -c "cat $ENV{SDKROOT}/SDKSettings.json | \
grep -Eo 'MACOSX_DEPLOYMENT_TARGET\\\":\\\"\\d{2}\\.\\d+' | cut -d '\"' -f 3 | tr -d '\\n'"
OUTPUT_VARIABLE MACOSX_FIND_SDK_VERSION
)
if(NOT MACOSX_FIND_SDK_VERSION)
message(FATAL_ERROR "can not find MACOSX_DEPLOYMENT_TARGET in SDKROOT, \
please check whether it's a valid SDK path")
endif()
if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "arm64")
set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0")
elseif(${MACOSX_FIND_SDK_VERSION} VERSION_LESS "10.15")
set(CMAKE_OSX_DEPLOYMENT_TARGET ${MACOSX_FIND_SDK_VERSION} CACHE STRING
"minimum macosx deployment target version" FORCE)
else()
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.15")
endif()
set(ENV{MACOSX_DEPLOYMENT_TARGET} ${CMAKE_OSX_DEPLOYMENT_TARGET})
endif()
message("macosx deployment target version: $ENV{MACOSX_DEPLOYMENT_TARGET}")
endif()

View File

@ -21,6 +21,7 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR APPLE)
CONFIGURE_COMMAND ./config no-zlib no-shared
PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2021-3711.patch
PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2021-3712.patch
PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2022-0778.patch
)
include_directories(${openssl_INC})
add_library(mindspore::ssl ALIAS openssl::ssl)

View File

@ -44,6 +44,12 @@ else()
set(MD5 "1a6274bc4a65b55a6fa70e264d796490")
endif()
if(BUILD_LITE)
set(PROTOBUF_PATCH_ROOT ${TOP_DIR}/third_party/patch/protobuf)
else()
set(PROTOBUF_PATCH_ROOT ${CMAKE_SOURCE_DIR}/third_party/patch/protobuf)
endif()
mindspore_add_pkg(protobuf
VER 3.13.0
LIBS protobuf
@ -51,7 +57,8 @@ mindspore_add_pkg(protobuf
URL ${REQ_URL}
MD5 ${MD5}
CMAKE_PATH cmake/
CMAKE_OPTION -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release)
CMAKE_OPTION -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release
PATCHES ${PROTOBUF_PATCH_ROOT}/CVE-2021-22570.patch)
include_directories(${protobuf_INC})
add_library(mindspore::protobuf ALIAS protobuf::protobuf)

View File

@ -239,6 +239,10 @@ if(PLATFORM_ARM64)
if(NOT TARGET_MIX210)
__install_micro_wrapper()
endif()
if(MSLITE_ENABLE_RUNTIME_GLOG)
install(FILES ${glog_LIBPATH}/libglog.so.0.4.0 DESTINATION ${GLOG_DIR} RENAME libglog.so.0
COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
if(MSLITE_ENABLE_TOOLS)
if(NOT MSLITE_COMPILE_TWICE)
install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR}
@ -265,6 +269,142 @@ if(PLATFORM_ARM64)
install(TARGETS ${BENCHMARK_TRAIN_NAME} RUNTIME DESTINATION ${BENCHMARK_TRAIN_ROOT_DIR} COMPONENT
${RUNTIME_COMPONENT_NAME})
endif()
if(MSLITE_ENABLE_CONVERTER)
install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${CONVERTER_ROOT_DIR}/include
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h"
PATTERN "train*" EXCLUDE PATTERN "delegate.h" EXCLUDE PATTERN "lite_session.h" EXCLUDE)
install(FILES ${API_HEADER} DESTINATION ${CONVERTER_ROOT_DIR}/include/api
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${MINDAPI_BASE_HEADER} DESTINATION ${CONVERTER_ROOT_DIR}/include/core/mindapi/base
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${MINDAPI_IR_HEADER} DESTINATION ${CONVERTER_ROOT_DIR}/include/core/mindapi/ir
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${ABSTRACT_HEADER} DESTINATION ${CONVERTER_ROOT_DIR}/include/core/abstract
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${API_IR_HEADER} DESTINATION ${CONVERTER_ROOT_DIR}/include/core/api/ir
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${BASE_HEADER} DESTINATION ${CONVERTER_ROOT_DIR}/include/core/base
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${IR_DTYPE_HEADER} DESTINATION ${CONVERTER_ROOT_DIR}/include/core/ir/dtype
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${IR_HEADER} DESTINATION ${CONVERTER_ROOT_DIR}/include/core/ir
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(DIRECTORY ${TOP_DIR}/mindspore/core/ops/ DESTINATION ${CONVERTER_ROOT_DIR}/include/core/ops
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
install(FILES ${UTILS_HEADER} DESTINATION ${CONVERTER_ROOT_DIR}/include/core/utils
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/schema/
DESTINATION ${CONVERTER_ROOT_DIR}/include/schema
COMPONENT ${RUNTIME_COMPONENT_NAME}
FILES_MATCHING PATTERN "*.h" PATTERN "schema_generated.h" EXCLUDE)
install(DIRECTORY ${flatbuffers_INC}/ DESTINATION ${CONVERTER_ROOT_DIR}/include/third_party
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(DIRECTORY ${glog_LIBPATH}/../include/glog/
DESTINATION ${CONVERTER_ROOT_DIR}/include/third_party/glog
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
install(DIRECTORY ${TOP_DIR}/third_party/securec/include/
DESTINATION ${CONVERTER_ROOT_DIR}/include/third_party/securec
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
install(TARGETS converter_lite RUNTIME DESTINATION ${CONVERTER_ROOT_DIR}/converter
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/lite/build/tools/converter/registry/libmslite_converter_plugin.so
DESTINATION ${CONVERTER_ROOT_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${glog_LIBPATH}/libglog.so.0.4.0 DESTINATION ${CONVERTER_ROOT_DIR}/lib RENAME libglog.so.0
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${opencv_LIBPATH}/libopencv_core.so.4.5.2
DESTINATION ${CONVERTER_ROOT_DIR}/lib RENAME libopencv_core.so.4.5
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${opencv_LIBPATH}/libopencv_imgcodecs.so.4.5.2
DESTINATION ${CONVERTER_ROOT_DIR}/lib RENAME libopencv_imgcodecs.so.4.5
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${opencv_LIBPATH}/libopencv_imgproc.so.4.5.2
DESTINATION ${CONVERTER_ROOT_DIR}/lib RENAME libopencv_imgproc.so.4.5
COMPONENT ${RUNTIME_COMPONENT_NAME})
if(MSLITE_ENABLE_ACL)
set(LITE_ACL_DIR ${TOP_DIR}/mindspore/lite/build/tools/converter/adapter/acl)
install(FILES ${LITE_ACL_DIR}/mindspore_shared_lib/libmindspore_shared_lib.so
DESTINATION ${CONVERTER_ROOT_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME})
if(MSLITE_ENABLE_RUNTIME_CONVERT)
install(FILES ${LITE_ACL_DIR}/mindspore_shared_lib/libmindspore_shared_lib.so
DESTINATION ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${glog_LIBPATH}/libglog.so.0.4.0 DESTINATION ${RUNTIME_LIB_DIR} RENAME libglog.so.0
COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
if(MSLITE_MINDDATA_IMPLEMENT STREQUAL "cloud" AND MSLITE_ENABLE_RUNTIME_CONVERT)
file(GLOB DATA_ENGINE_LIB_LIST ${LITE_ACL_DIR}/_c_dataengine/*.so)
file(GLOB DATA_RECORD_LIB_LIST ${LITE_ACL_DIR}/_c_mindrecord/*.so)
install(FILES ${DATA_ENGINE_LIB_LIST}
DESTINATION ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${DATA_RECORD_LIB_LIST}
DESTINATION ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${jpeg_turbo_LIBPATH}/libjpeg.so.62.3.0
DESTINATION ${RUNTIME_LIB_DIR} RENAME libjpeg.so.62 COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${jpeg_turbo_LIBPATH}/libturbojpeg.so.0.2.0
DESTINATION ${RUNTIME_LIB_DIR} RENAME libturbojpeg.so.0 COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${tinyxml2_LIBPATH}/libtinyxml2.so.8.0.0
DESTINATION ${RUNTIME_LIB_DIR} RENAME libtinyxml2.so.8 COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${icu4c_LIBPATH}/libicuuc.so.67.1
DESTINATION ${RUNTIME_LIB_DIR} RENAME libicuuc.so.67 COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${icu4c_LIBPATH}/libicudata.so.67.1
DESTINATION ${RUNTIME_LIB_DIR} RENAME libicudata.so.67 COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${icu4c_LIBPATH}/libicui18n.so.67.1
DESTINATION ${RUNTIME_LIB_DIR} RENAME libicui18n.so.67 COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${grpc_LIBPATH}/libmindspore_grpc++.so.1.36.1 DESTINATION ${RUNTIME_LIB_DIR}
RENAME libmindspore_grpc++.so.1 COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${grpc_LIBPATH}/libmindspore_grpc.so.15.0.0 DESTINATION
${RUNTIME_LIB_DIR} RENAME libmindspore_grpc.so.15 COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${grpc_LIBPATH}/libmindspore_gpr.so.15.0.0 DESTINATION
${RUNTIME_LIB_DIR} RENAME libmindspore_gpr.so.15 COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${grpc_LIBPATH}/libmindspore_upb.so.15.0.0 DESTINATION
${RUNTIME_LIB_DIR} RENAME libmindspore_upb.so.15 COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${grpc_LIBPATH}/libmindspore_address_sorting.so.15.0.0 DESTINATION ${RUNTIME_LIB_DIR}
RENAME libmindspore_address_sorting.so.15 COMPONENT ${RUNTIME_COMPONENT_NAME})
## Public header files for minddata
install(
FILES ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/config.h
${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h
${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/execute.h
${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/text.h
${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/transforms.h
${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h
${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/vision_lite.h
${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/vision_ascend.h
DESTINATION ${RUNTIME_INC_DIR}/dataset COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
endif()
if(MSLITE_ENABLE_DPICO_ATC_ADAPTER)
install(FILES ${TOP_DIR}/mindspore/lite/build/tools/converter/adapter/dpico/libdpico_atc_adapter.so
DESTINATION ${CONVERTER_ROOT_DIR}/providers/SD3403 COMPONENT ${RUNTIME_COMPONENT_NAME})
if(MSLITE_ENABLE_TOOLS)
install(TARGETS ${BECHCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR}
COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
endif()
if(MSLITE_ENABLE_RUNTIME_GLOG)
install(DIRECTORY ${glog_LIBPATH}/../include/glog/ DESTINATION ${RUNTIME_INC_DIR}/third_party/glog
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
install(FILES ${glog_LIBPATH}/libglog.so.0.4.0 DESTINATION ${GLOG_DIR} RENAME libglog.so.0
COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
if(MSLITE_ENABLE_RUNTIME_CONVERT)
install(FILES ${TOP_DIR}/mindspore/lite/build/tools/converter/registry/libmslite_converter_plugin.so
DESTINATION ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${opencv_LIBPATH}/libopencv_core.so.4.5.2
DESTINATION ${RUNTIME_LIB_DIR} RENAME libopencv_core.so.4.5
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${opencv_LIBPATH}/libopencv_imgcodecs.so.4.5.2
DESTINATION ${RUNTIME_LIB_DIR} RENAME libopencv_imgcodecs.so.4.5
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${opencv_LIBPATH}/libopencv_imgproc.so.4.5.2
DESTINATION ${RUNTIME_LIB_DIR} RENAME libopencv_imgproc.so.4.5
COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
endif()
endif()
if(MSLITE_ENABLE_TESTCASES)
install(FILES ${TOP_DIR}/mindspore/lite/build/test/lite-test DESTINATION ${TEST_CASE_DIR}
@ -495,6 +635,10 @@ else()
install(FILES ${TOP_DIR}/mindspore/lite/tools/obfuscator/lib/linux-x64/libmsdeobfuscator-lite.so
DESTINATION ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
if(MSLITE_ENABLE_RUNTIME_GLOG)
install(FILES ${glog_LIBPATH}/libglog.so.0.4.0 DESTINATION ${GLOG_DIR} RENAME libglog.so.0
COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
if(MSLITE_ENABLE_CONVERTER)
install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${CONVERTER_ROOT_DIR}/include
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h"

View File

@ -141,6 +141,14 @@ install(
COMPONENT mindspore
)
## Public header files for mindapi
install(
DIRECTORY ${CMAKE_SOURCE_DIR}/mindspore/core/mindapi/base
${CMAKE_SOURCE_DIR}/mindspore/core/mindapi/ir
DESTINATION ${INSTALL_BASE_DIR}/include/mindapi
COMPONENT mindspore
)
## Public header files for minddata
install(
FILES ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include/dataset/config.h

View File

@ -114,6 +114,7 @@ function(__find_pkg_then_add_target pkg_name pkg_exe lib_path)
message("_FIND:${${pkg_name}_BASE_DIR}")
if(pkg_exe)
unset(${pkg_exe}_EXE CACHE)
find_program(${pkg_exe}_EXE ${pkg_exe} PATHS ${${pkg_name}_BASE_DIR}/bin NO_DEFAULT_PATH)
if(NOT ${pkg_exe}_EXE)
return()
@ -206,7 +207,6 @@ endfunction()
set(MS_FIND_NO_DEFAULT_PATH NO_CMAKE_PATH NO_CMAKE_ENVIRONMENT_PATH NO_SYSTEM_ENVIRONMENT_PATH
NO_CMAKE_BUILDS_PATH NO_CMAKE_PACKAGE_REGISTRY NO_CMAKE_SYSTEM_PATH
NO_CMAKE_SYSTEM_PACKAGE_REGISTRY)
set(MS_FIND_NO_DEFAULT_PATH ${MS_FIND_NO_DEFAULT_PATH} PARENT_SCOPE)
function(mindspore_add_pkg pkg_name)
set(options)
@ -239,6 +239,9 @@ function(mindspore_add_pkg pkg_name)
"${CMAKE_CXX_COMPILER_VERSION}-${CMAKE_C_COMPILER_VERSION}
${ARGN} - ${${pkg_name}_USE_STATIC_LIBS}- ${${pkg_name}_PATCHES_HASH}
${${pkg_name}_CXXFLAGS}--${${pkg_name}_CFLAGS}--${${pkg_name}_LDFLAGS}")
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(${pkg_name}_CONFIG_TXT "${${pkg_name}_CONFIG_TXT}--${CMAKE_OSX_DEPLOYMENT_TARGET}")
endif()
string(REPLACE ";" "-" ${pkg_name}_CONFIG_TXT ${${pkg_name}_CONFIG_TXT})
string(MD5 ${pkg_name}_CONFIG_HASH ${${pkg_name}_CONFIG_TXT})
@ -268,7 +271,7 @@ function(mindspore_add_pkg pkg_name)
return()
endif()
elseif(NOT PKG_HEAD_ONLY)
find_package(${__FIND_PKG_NAME} ${PKG_VER} ${MS_FIND_NO_DEFAULT_PATH})
find_package(${__FIND_PKG_NAME} ${PKG_VER} PATHS ${${pkg_name}_BASE_DIR} ${MS_FIND_NO_DEFAULT_PATH})
if(${__FIND_PKG_NAME}_FOUND)
set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE)
message("Found pkg: ${__FIND_PKG_NAME}")

View File

@ -619,7 +619,7 @@
{"op_name": "Mod", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["int8", ""], ["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""], ["uint8", ""]], [["int32", ""], ["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "mod.so", "compute_cost": 10, "kernel_name": "mod", "partial_flag": true, "reshape_type": "", "dynamic_shape": false, "dynamic_compile_static": false, "need_check_supported": false, "is_dynamic_format": false, "op_pattern": "broadcast"}
{"op_name": "MaxPoolGradGrad", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "kernel_size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_mode", "param_type": "required", "type": "str", "value": "all"}, {"name": "format", "param_type": "optional", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad_grad.so", "compute_cost": 10, "kernel_name": "max_pool_grad_grad", "partial_flag": true, "reshape_type": "", "dynamic_shape": false, "dynamic_compile_static": false, "need_check_supported": false, "is_dynamic_format": false, "op_pattern": ""}
{"op_name": "MaxPoolGradGradWithArgmax", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "grad", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "argmax", "need_compile": false, "param_type": "optional", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "kernel_size", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "strides", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "pad_mode", "param_type": "required", "type": "str", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["uint16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["int64", "NC1HWC0"], ["float16", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "max_pool_grad_grad_with_argmax.so", "compute_cost": 10, "kernel_name": "max_pool_grad_grad_with_argmax", "partial_flag": true, "reshape_type": "", "dynamic_shape": false, "dynamic_compile_static": false, "need_check_supported": false, "is_dynamic_format": false, "op_pattern": ""}
{"op_name": "TensorMove", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int32", ""], ["int32", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]], [["int8", ""], ["int8", ""]], [["uint8", ""], ["uint8", ""]], [["bool", ""], ["bool", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "tensor_move.so", "compute_cost": 10, "kernel_name": "tensor_move", "partial_flag": true, "reshape_type": "", "dynamic_shape": false, "dynamic_compile_static": false, "need_check_supported": false, "is_dynamic_format": false, "op_pattern": "formatAgnostic"}
{"op_name": "TensorMove", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", ""], ["bool", ""]], [["int8", ""], ["int8", ""]], [["int16", ""], ["int16", ""]], [["int32", ""], ["int32", ""]], [["int64", ""], ["int64", ""]], [["float16", ""], ["float16", ""]], [["float32", ""], ["float32", ""]], [["float64", ""], ["float64", ""]], [["uint8", ""], ["uint8", ""]], [["uint16", ""], ["uint16", ""]], [["uint32", ""], ["uint32", ""]], [["uint64", ""], ["uint64", ""]], [["complex64", "DefaultFormat"], ["complex64", "DefaultFormat"]], [["complex128", "DefaultFormat"], ["complex128", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "tensor_move.so", "compute_cost": 10, "kernel_name": "tensor_move", "partial_flag": true, "reshape_type": "", "dynamic_shape": false, "dynamic_compile_static": false, "need_check_supported": false, "is_dynamic_format": false, "op_pattern": "formatAgnostic"}
{"op_name": "PopulationCount", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["int16", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["int16", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["uint16", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "population_count.so", "compute_cost": 10, "kernel_name": "population_count", "partial_flag": true, "reshape_type": "", "dynamic_shape": false, "dynamic_compile_static": false, "need_check_supported": false, "is_dynamic_format": false, "op_pattern": ""}
{"op_name": "ParallelConcat", "inputs": [{"index": 0, "name": "values", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "outputs": [{"index": 0, "name": "output_data", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "shape", "param_type": "required", "type": "listInt", "value": "all"}, {"name": "N", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["bool", "NC1HWC0"], ["bool", "NC1HWC0"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int8", "NC1HWC0"], ["int8", "NC1HWC0"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint8", "NC1HWC0"], ["uint8", "NC1HWC0"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int16", "NC1HWC0"], ["int16", "NC1HWC0"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint16", "NC1HWC0"], ["uint16", "NC1HWC0"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "NC1HWC0"], ["int32", "NC1HWC0"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint32", "NC1HWC0"], ["uint32", "NC1HWC0"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["int64", "NC1HWC0"], ["int64", "NC1HWC0"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["uint64", "NC1HWC0"], ["uint64", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["bool", "NHWC"], ["bool", "NHWC"]], [["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "NHWC"], ["int8", "NHWC"]], [["uint8", "NHWC"], ["uint8", "NHWC"]], [["int16", "NHWC"], ["int16", "NHWC"]], [["uint16", "NHWC"], ["uint16", "NHWC"]], [["int32", "NHWC"], ["int32", "NHWC"]], [["uint32", "NHWC"], ["uint32", "NHWC"]], [["int64", "NHWC"], ["int64", "NHWC"]], [["uint64", "NHWC"], ["uint64", "NHWC"]], [["float16", "NHWC"], ["float16", "NHWC"]], [["float32", "NHWC"], ["float32", "NHWC"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "parallel_concat.so", "compute_cost": 10, "kernel_name": "parallel_concat", "partial_flag": true, "reshape_type": "", "dynamic_shape": false, "dynamic_compile_static": false, "need_check_supported": false, "is_dynamic_format": false, "op_pattern": ""}
{"op_name": "AdamApplyOneAssign", "inputs": [{"index": 0, "name": "input0", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "input1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "input2", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "input3", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "input4", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 5, "name": "mul0_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 6, "name": "mul1_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 7, "name": "mul2_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 8, "name": "mul3_x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 9, "name": "add2_y", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "output0", "need_compile": true, "param_type": "required", "shape": "all"}, {"index": 1, "name": "output1", "need_compile": true, "param_type": "required", "shape": "all"}, {"index": 2, "name": "output2", "need_compile": true, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "adam_apply_one_assign.so", "compute_cost": 10, "kernel_name": "adam_apply_one_assign", "partial_flag": true, "reshape_type": "", "dynamic_shape": false, "dynamic_compile_static": false, "need_check_supported": false, "is_dynamic_format": false, "op_pattern": ""}

View File

@ -23,7 +23,7 @@ mindspore.dataset.CLUEDataset
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数默认值None。指定此参数后, `num_samples` 表示每个分片的最大样本数。
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值None不使用缓存。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/cache.html>`_ 。默认值None不使用缓存。
根据给定的 `task` 参数配置,数据集会生成不同的输出列:

View File

@ -22,7 +22,7 @@
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数默认值None。指定此参数后, `num_samples` 表示每个分片的最大样本数。
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值None不使用缓存。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/cache.html>`_ 。默认值None不使用缓存。
**异常:**

View File

@ -19,7 +19,7 @@ mindspore.dataset.CelebADataset
- **num_samples** (int, 可选) - 指定从数据集中读取的样本数可以小于数据集总数。默认值None读取全部样本图片。
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数默认值None。指定此参数后 `num_samples` 表示每个分片的最大样本数。
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值None不使用缓存。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/cache.html>`_ 。默认值None不使用缓存。
**异常:**

View File

@ -18,7 +18,7 @@ mindspore.dataset.Cifar100Dataset
- **sampler** (Sampler, 可选) - 指定从数据集中选取样本的采样器默认值None下表中会展示不同配置的预期行为。
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数默认值None。指定此参数后 `num_samples` 表示每个分片的最大样本数。
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值None不使用缓存。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/cache.html>`_ 。默认值None不使用缓存。
**异常:**

View File

@ -18,7 +18,7 @@ mindspore.dataset.Cifar10Dataset
- **sampler** (Sampler, 可选) - 指定从数据集中选取样本的采样器默认值None下表中会展示不同配置的预期行为。
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数默认值None。指定此参数后 `num_samples` 表示每个分片的最大样本数。
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值None不使用缓存。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/cache.html>`_ 。默认值None不使用缓存。
**异常:**

View File

@ -17,7 +17,7 @@
- **sampler** (Sampler, 可选) - 指定从数据集中选取样本的采样器默认值None下表中会展示不同配置的预期行为。
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数默认值None。指定此参数后 `num_samples` 表示每个分片的最大样本数。
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值None不使用缓存。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/cache.html>`_ 。默认值None不使用缓存。
- **extra_metadata** (bool, 可选) - 用于指定是否额外输出一个数据列用于表示图片元信息。如果为True则将额外输出一个名为 `[_meta-filename, dtype=string]` 的数据列默认值False。
根据不同 `task` 参数设置,生成数据集具有不同的输出列:

View File

@ -5,8 +5,8 @@ mindspore.dataset.DatasetCache
创建数据缓存客户端实例。
关于单节点数据缓存的使用,请参阅 `单节点数据缓存教程 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/enable_cache.html>`_
`单节点数据缓存编程指南 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_
关于单节点数据缓存的使用,请参阅 `单节点数据缓存教程 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/enable_cache.html>`_
`单节点数据缓存编程指南 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/cache.html>`_
**参数:**

View File

@ -19,7 +19,7 @@ mindspore.dataset.ImageFolderDataset
- **decode** (bool, 可选) - 是否对读取的图片进行解码操作默认值False不解码。
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数默认值None。指定此参数后 `num_samples` 表示每个分片的最大样本数。
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值None不使用缓存。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/cache.html>`_ 。默认值None不使用缓存。
**异常:**

View File

@ -19,7 +19,7 @@
- **decode** (bool, 可选) - 是否对读取的图片进行解码操作默认值False不解码。
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数默认值None。指定此参数后 `num_samples` 表示每个分片的最大样本数。
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值None不使用缓存。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/cache.html>`_ 。默认值None不使用缓存。
**异常:**

View File

@ -24,7 +24,7 @@
- **padded_sample** (dict, 可选): 指定额外添加到数据集的样本,可用于在分布式训练时补齐分片数据,注意字典的键名需要与 `column_list` 指定的列名相同。默认值None不添加样本。需要与 `num_padded` 参数同时使用。
- **num_padded** (int, 可选) - 指定额外添加的数据集样本的数量。在分布式训练时可用于为数据集补齐样本,使得总样本数量可被 `num_shards` 整除。默认值None不添加样本。需要与 `padded_sample` 参数同时使用。
- **num_samples** (int, 可选) - 指定从数据集中读取的样本数。默认值None读取所有样本。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值None不使用缓存。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/cache.html>`_ 。默认值None不使用缓存。
**异常:**

View File

@ -18,7 +18,7 @@ mindspore.dataset.MnistDataset
- **sampler** (Sampler, 可选) - 指定从数据集中选取样本的采样器默认值None下表中会展示不同配置的预期行为。
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数默认值None。指定此参数后 `num_samples` 表示每个分片的最大样本数。
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值None不使用缓存。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/cache.html>`_ 。默认值None不使用缓存。
**异常:**

View File

@ -27,7 +27,7 @@ mindspore.dataset.TFRecordDataset
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数默认值None。指定此参数后, `num_samples` 表示每个分片的最大样本数。
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **shard_equal_rows** (bool, 可选) - 分布式训练时为所有分片获取等量的数据行数。默认值False。如果 `shard_equal_rows` 为False则可能会使得每个分片的数据条目不相等从而导致分布式训练失败。因此当每个TFRecord文件的数据数量不相等时建议将此参数设置为True。注意只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值None不使用缓存。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/cache.html>`_ 。默认值None不使用缓存。
**异常:**

View File

@ -19,7 +19,7 @@
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数默认值None。指定此参数后, `num_samples` 表示每个分片的最大样本数。
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值None不使用缓存。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/cache.html>`_ 。默认值None不使用缓存。
**异常:**

View File

@ -22,7 +22,7 @@ mindspore.dataset.VOCDataset
- **sampler** (Sampler, 可选) - 指定从数据集中选取样本的采样器默认值None下表中会展示不同配置的预期行为。
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数默认值None。指定此参数后 `num_samples` 表示每个分片的最大样本数。
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_ 。默认值None不使用缓存。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读`单节点数据缓存 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/cache.html>`_ 。默认值None不使用缓存。
- **extra_metadata** (bool, 可选) - 用于指定是否额外输出一个数据列用于表示图片元信息。如果为True则将额外输出一个名为 `[_meta-filename, dtype=string]` 的数据列默认值False。
根据给定的`task`配置,生成数据集具有不同的输出列:

View File

@ -3,14 +3,14 @@ mindspore.dataset.WaitedDSCallback
.. py:class:: mindspore.dataset.WaitedDSCallback(step_size=1)
数据集自定义回调类的抽象基类,用于与训练回调类(`mindspore.callback <https://mindspore.cn/docs/api/zh-CN/master/api_python/mindspore.train.html#mindspore.train.callback.Callback>`_)的同步。
数据集自定义回调类的抽象基类,用于与训练回调类 `mindspore.train.callback <https://mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.train.html#mindspore.train.callback.Callback>`_ 的同步。
可用于在每个step或epoch开始前执行自定义的回调方法注意第二个step或epoch开始时才会触发该调用。
例如在自动数据增强中根据上一个epoch的loss值来更新增强算子参数配置。
用户可通过 `train_run_context` 获取模型相关信息。如 `network` 、 `train_network` 、 `epoch_num` 、 `batch_num` 、 `loss_fn` 、 `optimizer` 、 `parallel_mode` 、 `device_number` 、 `list_callback` 、 `cur_epoch_num` 、 `cur_step_num` 、 `dataset_sink_mode` 、 `net_outputs` 等,详见 `mindspore.callback <https://mindspore.cn/docs/api/zh-CN/master/api_python/mindspore.train.html#mindspore.train.callback.Callback>`_
用户可通过 `train_run_context` 获取网络训练相关信息,如 `network` 、 `train_network` 、 `epoch_num` 、 `batch_num` 、 `loss_fn` 、 `optimizer` 、 `parallel_mode` 、 `device_number` 、 `list_callback` 、 `cur_epoch_num` 、 `cur_step_num` 、 `dataset_sink_mode` 、 `net_outputs` 等,详见 `mindspore.train.callback <https://mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.train.html#mindspore.train.callback.Callback>`_
用户可通过 `ds_run_context` 获取数据处理管道相关信息包括 `cur_epoch_num` (当前epoch数)、 `cur_step_num_in_epoch` (当前epoch的step数)、 `cur_step_num` (当前step数)。
用户可通过 `ds_run_context` 获取数据处理管道相关信息包括 `cur_epoch_num` (当前epoch数)、 `cur_step_num_in_epoch` (当前epoch的step数)、 `cur_step_num` (当前step数)。
**参数:**

View File

@ -106,7 +106,7 @@ MindSpore上下文用于配置当前执行环境包括执行模式、执
- **pynative_synchronize** (bool) - 表示是否在PyNative模式下启动设备同步执行。默认值False。设置为False时将在设备上异步执行算子。当算子执行出错时将无法定位特定错误脚本代码的位置。当设置为True时将在设备上同步执行算子。这将降低程序的执行性能。此时当算子执行出错时可以根据错误的调用栈来定位错误脚本代码的位置。
- **mode** (int) - 表示在GRAPH_MODE(0)或PYNATIVE_MODE(1)模式中的运行。默认值GRAPH_MODE(0)。GRAPH_MODE或PYNATIVE_MODE可以通过 `mode` 属性设置两种模式都支持所有后端。默认模式为GRAPH_MODE。
- **enable_graph_kernel** (bool) - 表示是否启用图算融合去优化网络执行性能。默认值False。如果 `enable_graph_kernel` 设置为True则可以启用加速。有关图算融合的详细信息请查看 `使能图算融合 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/enable_graph_kernel_fusion.html>`_
- **enable_graph_kernel** (bool) - 表示是否启用图算融合去优化网络执行性能。默认值False。如果 `enable_graph_kernel` 设置为True则可以启用加速。有关图算融合的详细信息请查看 `使能图算融合 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/enable_graph_kernel_fusion.html>`_
- **graph_kernel_flags** (str) - 图算融合的优化选项当与enable_graph_kernel冲突时它的优先级更高。其仅适用于有经验的用户。例如context.set_context(graph_kernel_flags="--opt_level=2 --dump_as_text")。一些常用选项:
- **opt_level**设置优化级别。默认值2。当opt_level的值大于0时启动图算融合。可选值包括
@ -128,11 +128,11 @@ MindSpore上下文用于配置当前执行环境包括执行模式、执
- RLGA当RL和GA优化同时打开时工具会根据网络模型中的不同算子类型自动选择RL或GA。RL和GA的顺序没有区别。自动选择
有关启用算子调优工具设置的更多信息,请查看 `使能算子调优工具 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/enable_auto_tune.html>`_
有关启用算子调优工具设置的更多信息,请查看 `使能算子调优工具 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/enable_auto_tune.html>`_
- **check_bprop** (bool) - 表示是否检查反向传播节点,以确保反向传播节点输出的形状(shape)和数据类型与输入参数相同。默认值False。
- **max_call_depth** (int) - 指定函数调用的最大深度。其值必须为正整数。默认值1000。当嵌套Cell太深或子图数量太多时需要设置 `max_call_depth` 参数。如果 `max_call_depth` 的值比以前的大,则应把系统最大堆栈深度设得更大,否则可能会因为系统堆栈溢出而引发 `core dumped` 异常。
- **enable_sparse** (bool) - 表示是否启用稀疏特征。默认值False。有关稀疏特征和稀疏张量的详细信息请查看 `稀疏张量 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/tensor.html#sparse-tensor>`_
- **enable_sparse** (bool) - 表示是否启用稀疏特征。默认值False。有关稀疏特征和稀疏张量的详细信息请查看 `稀疏张量 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/tensor.html#sparse-tensor>`_
- **grad_for_scalar** (bool) 表示是否获取标量梯度。默认值False。当 `grad_for_scalar` 设置为True时则衍生函数的标量输入。默认值为False。由于后端目前不支持伸缩操作所以该接口只支持在前端可推演的简单操作。
- **enable_compile_cache** (bool) - 表示是否加载或者保存前端编译的图。当 `enable_compile_cache` 被设置为True时在第一次执行的过程中一个硬件无关的编译缓存会被生成并且导出为一个MINDIR文件。当该网络被再次执行时如果 `enable_compile_cache` 仍然为True并且网络脚本没有被更改那么这个编译缓存会被加载。注意目前只支持有限的Python脚本更改的自动检测这意味着可能有正确性风险。默认值False。这是一个实验原型可能会被更改或者删除。
- **compile_cache_path** (str) - 保存前端图编译缓存的路径。默认值:"."。如果目录不存在,系统会自动创建这个目录。缓存会被保存到如下目录:`compile_cache_path/rank_${rank_id}/``rank_id` 是集群上当前设备的ID。

View File

@ -8,7 +8,7 @@ mindspore.dataset
大多数数据集可以通过指定参数 `cache` 启用缓存服务,以提升整体数据处理效率。
请注意Windows平台上还不支持缓存服务因此在Windows上加载和处理数据时请勿使用。更多介绍和限制
请参考 `Single-Node Tensor Cache <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/cache.html>`_
请参考 `Single-Node Tensor Cache <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/cache.html>`_
在API示例中常用的模块导入方法如下

View File

@ -3,7 +3,7 @@
SummaryCollector可以收集一些常用信息。
它可以帮助收集loss、学习率、计算图等。
SummaryCollector还可以允许通过 `summary算子 <https://www.mindspore.cn/mindinsight/docs/zh-CN/master/summary_record.html#summarysummarycollector>`_ 将数据收集到summary文件中。
SummaryCollector还可以允许通过 `summary算子 <https://www.mindspore.cn/mindinsight/docs/zh-CN/r1.6/summary_record.html#summarysummarycollector>`_ 将数据收集到summary文件中。
.. note::
- 不允许在回调列表中存在多个SummaryCollector实例。
@ -20,7 +20,7 @@
- **collect_metric** (bool) - 表示是否收集训练metrics目前只收集loss。把第一个输出视为loss并且算出其平均数。默认值True。
- **collect_graph** (bool) - 表示是否收集计算图。目前只收集训练计算图。默认值True。
- **collect_train_lineage** (bool) - 表示是否收集训练阶段的lineage数据该字段将显示在MindInsight的 `lineage页面 <https://www.mindspore.cn/mindinsight/docs/zh-CN/master/lineage_and_scalars_comparison.html>`_ 上。默认值True。
- **collect_train_lineage** (bool) - 表示是否收集训练阶段的lineage数据该字段将显示在MindInsight的 `lineage页面 <https://www.mindspore.cn/mindinsight/docs/zh-CN/r1.6/lineage_and_scalars_comparison.html>`_ 上。默认值True。
- **collect_eval_lineage** (bool) - 表示是否收集评估阶段的lineage数据该字段将显示在MindInsight的lineage页面上。默认值True。
- **collect_input_data** (bool) - 表示是否为每次训练收集数据集。目前仅支持图像数据。如果数据集中有多列数据则第一列应为图像数据。默认值True。
- **collect_dataset_graph** (bool) - 表示是否收集训练阶段的数据集图。默认值True。

View File

@ -4,7 +4,7 @@
该方法将在一个指定的目录中创建summary文件和lineage文件并将数据写入文件。
它通过执行 `record` 方法将数据写入文件。除了通过 `summary算子 <https://www.mindspore.cn/mindinsight/docs/zh-CN/master/summary_record.html#summarysummarycollector>`_ 记录网络的数据外SummaryRecord还支持通过 `自定义回调函数和自定义训练循环 <https://www.mindspore.cn/mindinsight/docs/zh-CN/master/summary_record.html#callback>`_ 记录数据。
它通过执行 `record` 方法将数据写入文件。除了通过 `summary算子 <https://www.mindspore.cn/mindinsight/docs/zh-CN/r1.6/summary_record.html#summarysummarycollector>`_ 记录网络的数据外SummaryRecord还支持通过 `自定义回调函数和自定义训练循环 <https://www.mindspore.cn/mindinsight/docs/zh-CN/r1.6/summary_record.html#callback>`_ 记录数据。
.. note::
- 确保在最后关闭SummaryRecord否则进程不会退出。请参阅下面的示例部分了解如何用两种方式正确关闭SummaryRecord。
@ -144,7 +144,7 @@
**异常:**
- **TypeError** `step` 不为整型,或 `train_network` 的类型不为`mindspore.nn.Cell <https://www.mindspore.cn/docs/api/zh-CN/master/api_python/nn/mindspore.nn.Cell.html?highlight=MindSpore.nn.cell#mindspore-nn-cell>`_
- **TypeError** `step` 不为整型,或 `train_network` 的类型不为`mindspore.nn.Cell <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/nn/mindspore.nn.Cell.html?highlight=MindSpore.nn.cell#mindspore-nn-cell>`_
**样例:**
@ -157,7 +157,7 @@
.. py:method:: set_mode(mode)
设置模型运行状态。不同的状态会影响记录数据的内容。
设置模型运行阶段。不同的阶段会影响记录数据的内容。
**参数:**

View File

@ -3,7 +3,7 @@ mindspore.build_searched_strategy
.. py:class:: mindspore.build_searched_strategy(strategy_filename)
构建网络中每个参数的策略,用于分布式推理。关于它的使用细节,请参考: `保存和加载模型HyBrid Parallel模式 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/save_load_model_hybrid_parallel.html>`_
构建网络中每个参数的策略,用于分布式推理。关于它的使用细节,请参考: `保存和加载模型HyBrid Parallel模式 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/save_load_model_hybrid_parallel.html>`_
**参数:**

View File

@ -3,7 +3,7 @@ mindspore.load_distributed_checkpoint
.. py:method:: mindspore.load_distributed_checkpoint(network, checkpoint_filenames, predict_strategy=None, train_strategy_filename=None, strict_load=False, dec_key=None, dec_mode='AES-GCM')
给分布式预测加载checkpoint文件到网络用于分布式推理。关于分布式推理的细节请参考 https://www.mindspore.cn/docs/programming_guide/zh-CN/master/distributed_inference.html 。
给分布式预测加载checkpoint文件到网络用于分布式推理。关于分布式推理的细节请参考 https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/distributed_inference.html 。
**参数:**

View File

@ -3,7 +3,7 @@ mindspore.merge_sliced_parameter
.. py:method:: mindspore.merge_sliced_parameter(sliced_parameters, strategy=None)
将参数切片合并为一个完整的参数,用于分布式推理。关于它的细节,请参考:`保存和加载模型HyBrid Parallel模式 <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/save_load_model_hybrid_parallel.html>`_
将参数切片合并为一个完整的参数,用于分布式推理。关于它的细节,请参考:`保存和加载模型HyBrid Parallel模式 <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/save_load_model_hybrid_parallel.html>`_
**参数:**

View File

@ -7,7 +7,7 @@ mindspore.nn.Flatten
**输入:**
- **x** (Tensor) - 要展平的输入Tensor。shape为 :math:`(N, *)`,其中 :math:`*` 表示任意的附加维度。数据类型为 `number <https://www.mindspore.cn/docs/api/zh-CN/master/api_python/mindspore.html#mindspore.dtype>`_
- **x** (Tensor) - 要展平的输入Tensor。shape为 :math:`(N, *)`,其中 :math:`*` 表示任意的附加维度。数据类型为 `number <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_
**输出:**

View File

@ -15,7 +15,7 @@ mindspore.nn.ReLU
**输入:**
- **x** (Tensor) - 用于计算ReLU的任意维度的Tensor。数据类型为 `number <https://www.mindspore.cn/docs/api/zh-CN/master/api_python/mindspore.html#mindspore.dtype>`_
- **x** (Tensor) - 用于计算ReLU的任意维度的Tensor。数据类型为 `number <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_
**输出:**

View File

@ -11,7 +11,7 @@ mindspore.nn.Tril
**输入:**
- **x** (Tensor)输入Tensor。数据类型为`number <https://www.mindspore.cn/docs/api/zh-CN/master/api_python/mindspore.html#mindspore.dtype>`_
- **x** (Tensor)输入Tensor。数据类型为`number <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_
- **k** (int)对角线的索引。默认值0。假设输入的矩阵的维度分别为d1d2则k的范围应在[-min(d1, d2)+1, min(d1, d2)-1],超出该范围时输出值与输入 `x` 一致。
**输出:**

View File

@ -1 +1 @@
优化器和混合精度之间通常没有联系。但是,当使用 `FixedLossScaleManager` 且 `FixedLossScaleManager` 中的 `drop_overflow_update` 设置为False时优化器需要设置'loss_scale'。由于此优化器没有 `loss_scale` 的参数,因此需要通过其他方式处理 `loss_scale` ,如何正确处理 `loss_scale` 详见 `LossScale <https://www.mindspore.cn/docs/programming_guide/zh-CN/master/lossscale.html>`_
优化器和混合精度之间通常没有联系。但是,当使用 `FixedLossScaleManager` 且 `FixedLossScaleManager` 中的 `drop_overflow_update` 设置为False时优化器需要设置'loss_scale'。由于此优化器没有 `loss_scale` 的参数,因此需要通过其他方式处理 `loss_scale` ,如何正确处理 `loss_scale` 详见 `LossScale <https://www.mindspore.cn/docs/programming_guide/zh-CN/r1.6/lossscale.html>`_

View File

@ -39,7 +39,7 @@ mindspore.nn.probability.bijector.GumbelCDF
>>>
>>> # 初始化GumbelCDF Bijectorloc设置为1.0和scale设置为2.0。
>>> gumbel_cdf = msb.GumbelCDF(1.0, 2.0)
>>> # 在网络中使用ScalarAffinebijector。
>>> # 在网络中使用GumbelCDF bijector。
>>> x = Tensor([1, 2, 3], dtype=mindspore.float32)
>>> y = Tensor([0.1, 0.2, 0.3], dtype=mindspore.float32)
>>> ans1 = gumbel_cdf.forward(x)

View File

@ -3,7 +3,7 @@ mindspore.nn.probability.bijector.PowerTransform
.. py:class:: mindspore.nn.probability.bijector.PowerTransform(power=0., name='PowerTransform')
乘方BijectorPower Bijector
乘方BijectorPowerTransform Bijector
此Bijector对应的映射函数为
.. math::
@ -11,7 +11,7 @@ mindspore.nn.probability.bijector.PowerTransform
其中幂c >= 0。
Power Bijector将输入从 `[-1/c, inf]` 映射到 `[0, inf]`
PowerTransform Bijector将输入从 `[-1/c, inf]` 映射到 `[0, inf]`
`c=0`此Bijector等于 :class:`mindspore.nn.probability.bijector.Exp` Bijector。

View File

@ -15,7 +15,7 @@ mindspore.nn.probability.distribution.Gumbel
- **loc** (int, float, list, numpy.ndarray, Tensor) - Gumbel分布的位置。
- **scale** (int, float, list, numpy.ndarray, Tensor) - Gumbel分布的尺度。
- **seed** (int) - 采样时使用的种子。如果为None则使用全局种子。默认值None
- **seed** (int) - 采样时使用的种子。如果为None则使用全局种子。默认值0
- **dtype** (mindspore.dtype) - 分布类型。默认值mindspore.float32。
- **name** (str) - 分布的名称。默认值:'Gumbel'。

View File

@ -16,7 +16,7 @@ mindspore.nn.probability.distribution.LogNormal
- **loc** (int, float, list, numpy.ndarray, Tensor) - 基础正态分布的平均值。默认值None。
- **scale** (int, float, list, numpy.ndarray, Tensor) - 基础正态分布的标准差。默认值None。
- **seed** (int) - 采样时使用的种子。如果为None则使用全局种子。默认值None
- **seed** (int) - 采样时使用的种子。如果为None则使用全局种子。默认值0
- **dtype** (mindspore.dtype) - 分布类型。默认值mindspore.float32。
- **name** (str) - 分布的名称。默认值:'LogNormal'。

View File

@ -1,31 +1,32 @@
mindspore.ops.Add
=================
.. py:class:: mindspore.ops.Add(*args, **kwargs)
.. py:class:: mindspore.ops.Add()
两个输入Tensor按元素相加。
输入 `x``y` 遵循隐式类型转换规则,使数据类型保持一致。
输入必须是两个Tensor或一个Tensor和一个Scalar。
当输入是两个Tensor时它们的数据类型不能同时是bool它们的shape可以广播。
当输入是一个Tensor和一个Scalar时Scalar只能是一个常数。
两个输入Tensor逐元素相加。
.. math::
out_{i} = x_{i} + y_{i}
.. note::
- 输入 `x` 和 `y` 遵循 `隐式类型转换规则 <https://www.mindspore.cn/docs/note/zh-CN/r1.6/operator_list_implicit.html>`_ ,使数据类型保持一致。
- 输入必须是两个Tensor或一个Tensor和一个Scalar。
- 当输入是两个Tensor时它们的数据类型不能同时是bool并保证其shape可以广播。
- 当输入是一个Tensor和一个Scalar时Scalar只能是一个常数。
**输入:**
- **x** (Union[Tensor, Number, bool]) - 第一个输入是一个Number、bool值或数据类型为Number或bool的Tensor。
- **y** (Union[Tensor, Number, bool]) - 第二个输入当第一个输入是Tensor时第二个输入应该是一个Number或bool值或数据类型为Number或bool的Tensor。
- **x** (Union[Tensor, number.Number, bool]) - 第一个输入是一个number.Number、bool值或数据类型为 `number <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_`bool_ <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_ 的Tensor。
- **y** (Union[Tensor, number.Number, bool]) - 第二个输入当第一个输入是Tensor时第二个输入应该是一个number.Number或bool值或数据类型为number或bool_的Tensor。当第一个输入是Scalar时第二个输入必须是数据类型为number或bool_的Tensor。
**输出:**
Tensorshape与广播后的shape相同数据类型为两个输入中精度较高的类型。
Tensorshape与输入 `x``y` 广播后的shape相同数据类型为两个输入中精度较高的类型。
**异常:**
- **TypeError** - `x``y` 不是Tensor、Number或bool。
- **TypeError** - `x``y` 不是Tensor、number.Number或bool。
**支持平台:**

View File

@ -1,15 +1,15 @@
mindspore.ops.AddN
===================
.. py:class:: mindspore.ops.AddN(*args, **kwargs)
.. py:class:: mindspore.ops.AddN()
元素将所有输入的Tensor相加。
元素将所有输入的Tensor相加。
所有输入Tensor必须具有相同的shape。
**输入:**
- **x** (Union(tuple[Tensor], list[Tensor])) - 输入tuple或list由多个Tensor组成其数据类型为Number或bool用于相加
- **x** (Union(tuple[Tensor], list[Tensor])) - Tensor组成的tuble或list类型为 `bool_ <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_`number <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_
**输出:**
@ -18,6 +18,7 @@ mindspore.ops.AddN
**异常:**
- **TypeError** - `x` 既不是tuple也不是list。
- **ValueError** - `x` 中存在shape不同的Tensor。
**支持平台:**

View File

@ -1,28 +1,33 @@
mindspore.ops.Div
=================
.. py:class:: mindspore.ops.Div(*args, **kwargs)
.. py:class:: mindspore.ops.Div()
按元素计算第一输入Tensor除以第二输入Tensor的商。
输入 `x``y` 遵循隐式类型转换规则使数据类型保持一致。输入必须是两个Tensor或一个Tensor和一个Scalar。当输入是两个Tensor时它们的数据类型不能同时是bool它们的shape可以广播。当输入是一个Tensor和一个Scalar时Scalar只能是一个常数。
逐元素计算第一输入Tensor除以第二输入Tensor的商。
.. math::
out_{i} = \frac{x_i}{y_i}
.. note::
- 输入 `x` 和 `y` 遵循 `隐式类型转换规则 <https://www.mindspore.cn/docs/note/zh-CN/r1.6/operator_list_implicit.html>`_ ,使数据类型保持一致。
- 输入必须是两个Tensor或一个Tensor和一个Scalar。
- 当输入是两个Tensor时它们的数据类型不能同时是bool并保证其shape可以广播。
- 当输入是一个Tensor和一个Scalar时Scalar只能是一个常数。
**输入:**
- **x** (Union[Tensor, Number, bool]) - 第一个输入是一个Number、bool值或数据类型为Number或bool的Tensor。
- **y** (Union[Tensor, Number, bool]) - 第二个输入当第一个输入是Tensor时第二个输入应该是一个Number或bool值或数据类型为Number或bool的Tensor。
- **x** (Union[Tensor, number.Number, bool]) - 第一个输入是一个number.Number、bool值或数据类型为 `number <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_`bool_ <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_ 的Tensor。
- **y** (Union[Tensor, number.Number, bool]) - 第二个输入当第一个输入是Tensor时第二个输入应该是一个number.Number或bool值或数据类型为number或bool_的Tensor。当第一个输入是Scalar时第二个输入必须是数据类型为number或bool_的Tensor。
**输出:**
Tensorshape与广播后的shape相同数据类型为两个输入中精度较高的类型。
Tensorshape与输入 `x``y` 广播后的shape相同数据类型为两个输入中精度较高的类型。
**异常:**
- **TypeError** - `x``y` 都不是Tensor。
- **TypeError** - `x``y` 数据类型都是bool_的Tensor。
**支持平台:**
@ -38,8 +43,8 @@ mindspore.ops.Div
>>> print(output)
[-1.3333334 2.5 2. ]
>>> # 用例2两个输入的数据类型和shape不同
>>> x = Tensor(np.array([-4.0, 5.0, 6.0]), mindspore.int32)
>>> y = Tensor(2, mindspore.float32)
>>> x = Tensor(np.array([-4.0, 5.0, 6.0]), mindspore.float32)
>>> y = Tensor(2, mindspore.int32)
>>> output = div(x, y)
>>> print(output)
[-2. 2.5 3.]

View File

@ -1,18 +1,23 @@
mindspore.ops.Eps
=================
.. py:class:: mindspore.ops.Eps(*args, **kwargs)
.. py:class:: mindspore.ops.Eps()
创建一个填充 `x` 数据类型最小值的Tensor
创建一个与输入数据类型和shape都相同的Tensor元素值为对应数据类型能表达的最小值
**输入:**
- **x** (Tensor) - 用于获取其数据类型最小值的Tensor。数据类型必须为float16或float32。shape为 :math:`(N,*)` ,其中 :math:`*` 表示任意的附加维度数。
- **x** (Tensor) - 用于获取其数据类型能表达的最小值的任意维度的Tensor。数据类型必须为float16或float32。
**输出:**
Tensor具有与 `x` 相同的数据类型和shape填充了 `x` 数据类型的最小值。
**异常:**
- **TypeError** - `x` 不是Tensor。
- **TypeError** - `x` 的数据类型不是float16或者float32。
**支持平台:**
``Ascend`` ``GPU`` ``CPU``

View File

@ -1,9 +1,9 @@
mindspore.ops.Erf
=================
.. py:class:: mindspore.ops.Erf(*args, **kwargs)
.. py:class:: mindspore.ops.Erf()
元素计算 `x` 的高斯误差函数。
元素计算 `x` 的高斯误差函数。
.. math::
@ -11,7 +11,7 @@ mindspore.ops.Erf
**输入:**
- **x** (Tensor) - 用于计算高斯误差函数的Tensor。数据类型必须为float16或float32。shape为 :math:`(N,*)` ,其中 :math:`*` 表示任意的附加维度数,其秩应小于8。
- **x** (Tensor) - 高斯误差函数的输入Tensor。数据类型必须为float16或float32。任意维度小于8的Tensor
**输出:**

View File

@ -9,7 +9,7 @@ mindspore.ops.Eye
- **n** (int) - 指定返回Tensor的行数。仅支持常量值。
- **m** (int) - 指定返回Tensor的列数。仅支持常量值。
- **t** (mindspore.dtype) - 指定返回Tensor的数据类型。数据类型必须是`bool_ <https://www.mindspore.cn/docs/api/zh-CN/master/api_python/mindspore.html#mindspore.dtype>`_`number <https://www.mindspore.cn/docs/api/zh-CN/master/api_python/mindspore.html#mindspore.dtype>`_
- **t** (mindspore.dtype) - 指定返回Tensor的数据类型。数据类型必须是`bool_ <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_`number <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_
**输出:**

View File

@ -7,7 +7,7 @@ mindspore.ops.Fill
**输入:**
- **type** (mindspore.dtype) - 指定输出Tensor的数据类型。数据类型只支持`bool_ <https://www.mindspore.cn/docs/api/zh-CN/master/api_python/mindspore.html#mindspore.dtype>`_`number <https://www.mindspore.cn/docs/api/zh-CN/master/api_python/mindspore.html#mindspore.dtype>`_
- **type** (mindspore.dtype) - 指定输出Tensor的数据类型。数据类型只支持`bool_ <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_`number <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_
- **shape** (tuple[int]) - 指定输出Tensor的shape。
- **value** (Union(number.Number, bool)) - 用来填充输出Tensor的值。

View File

@ -3,7 +3,7 @@ mindspore.ops.Gamma
.. py:class:: mindspore.ops.Gamma(seed=0, seed2=0)
根据概率密度函数分布生成随机正浮点数x。
根据概率密度函数分布生成随机正浮点数x。
.. math::
@ -14,6 +14,13 @@ mindspore.ops.Gamma
- **seed** (int) - 算子层的随机种子用于生成随机数。必须是非负的。默认值0。
- **seed2** (int)全局的随机种子和算子层的随机种子共同决定最终生成的随机数。必须是非负的。默认值0。
.. note::
- 随机种子:通过一些复杂的数学算法,可以得到一组有规律的随机数,而随机种子就是这个随机数的初始值。随机种子相同,得到的随机数就不会改变。
- 全局的随机种子和算子层的随机种子都没设置:使用默认值当做随机种子。
- 全局的随机种子设置了,算子层的随机种子未设置:随机生成一个种子和全局的随机种子拼接。
- 全局的随机种子未设置,算子层的随机种子设置了:使用默认的全局的随机种子,和算子层的随机种子拼接。
- 全局的随机种子和算子层的随机种子都设置了:全局的随机种子和算子层的随机种子拼接。
**输入:**
- **shape** (tuple) - 待生成的随机Tensor的shape。只支持常量值。
@ -22,12 +29,13 @@ mindspore.ops.Gamma
**输出:**
Tensor。shape是输入 `shape` 以及alpha、beta广播后的shape。数据类型为float32。
Tensor。shape是输入 `shape` `alpha` `beta` 广播后的shape。数据类型为float32。
**异常:**
- **TypeError** - `seed``seed2` 都不是int。
- **TypeError** - `alpha``beta` 都不是Tensor。
- **TypeError** - `seed``seed2` 的数据类型不是int。
- **TypeError** - `alpha``beta` 不是Tensor。
- **TypeError** - `alpha``beta` 的数据类型不是float32。
- **ValueError** - `shape` 不是常量值。
**支持平台:**

View File

@ -1,7 +1,7 @@
mindspore.ops.GeLU
==================
.. py:class:: mindspore.ops.GeLU(*args, **kwargs)
.. py:class:: mindspore.ops.GeLU()
高斯误差线性单元激活函数Gaussian Error Linear Units activation function
@ -11,13 +11,13 @@ mindspore.ops.GeLU
GeLU函数定义如下
.. math::
\text{output} = 0.5 * x * (1 + tanh(x / \sqrt{2})),
GELU(x_i) = x_i*P(X < x_i)
其中 :math:`tanh` 是双曲正切函数
其中 :math:`P` 是标准高斯分布的累积分布函数, :math:`x_i` 是输入的元素
**输入:**
- **x** (Tensor) - 用于计算GeLU函数的Tensor数据类型为float16或float32。
- **x** (Tensor) - 激活函数GeLU的输入数据类型为float16或float32。
**输出:**

View File

@ -21,7 +21,7 @@ mindspore.ops.Greater
**输入:**
- **x** (Union[Tensor, number.Number, bool]) - 第一个输入是一个number.Number、bool值或数据类型为`number <https://www.mindspore.cn/docs/api/zh-CN/master/api_python/mindspore.html#mindspore.dtype>`_`bool_ <https://www.mindspore.cn/docs/api/zh-CN/master/api_python/mindspore.html#mindspore.dtype>`_的Tensor。
- **x** (Union[Tensor, number.Number, bool]) - 第一个输入是一个number.Number、bool值或数据类型为`number <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_`bool_ <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_的Tensor。
- **y** (Union[Tensor, number.Number, bool]) - 第二个输入当第一个输入是Tensor时第二个输入应该是一个number.Number或bool值或数据类型为number或bool_的Tensor。当第一个输入是Scalar时第二个输入必须是数据类型为number或bool_的Tensor。
**输出:**

View File

@ -1,14 +1,14 @@
mindspore.ops.L2Loss
====================
.. py:class:: mindspore.ops.L2Loss(*args, **kwargs)
.. py:class:: mindspore.ops.L2Loss()
计算Tensor的L2范数的一半,不对结果进行开方。
用于计算L2范数不对结果进行开方操作
`input_x` 设为x输出设为loss。
输入设为x输出设为loss。
.. math::
loss = sum(x ** 2) / 2
loss = \frac{\sum x ^ 2}{2}
**输入:**
@ -16,7 +16,7 @@ mindspore.ops.L2Loss
**输出:**
Tensor具有与 `input_x` 相同的数据类型。输出Tensor是loss的值是一个scalar Tensor。
Tensor具有与 `input_x` 相同的数据类型的Scalar Tensor。
**异常:**

View File

@ -1,14 +1,9 @@
mindspore.ops.LessEqual
========================
.. py:class:: mindspore.ops.LessEqual(*args, **kwargs)
.. py:class:: mindspore.ops.LessEqual()
按元素计算 :math:`x <= y` 的bool值。
输入 `x``y` 遵循隐式类型转换规则,使数据类型保持一致。
输入必须是两个Tensor或一个Tensor和一个Scalar。
当输入是两个Tensor时它们的数据类型不能同时是bool它们的shape可以广播。
当输入是一个Tensor和一个Scalar时Scalar只能是一个常数。
逐元素计算 :math:`x <= y` 的bool值。
.. math::
@ -17,10 +12,16 @@ mindspore.ops.LessEqual
& \text{False, if } x_{i}>y_{i}
\end{cases}
.. note::
- 输入 `x` 和 `y` 遵循 `隐式类型转换规则 <https://www.mindspore.cn/docs/note/zh-CN/r1.6/operator_list_implicit.html>`_ ,使数据类型保持一致。
- 输入必须是两个Tensor或一个Tensor和一个Scalar。
- 当输入是两个Tensor时它们的数据类型不能同时是bool并保证其shape可以广播。
- 当输入是一个Tensor和一个Scalar时Scalar只能是一个常数。
**输入:**
- **x** (Union[Tensor, Number, bool]) - 第一个输入是一个Number、bool值或数据类型为Number或bool的Tensor。
- **y** (Union[Tensor, Number, bool]) - 第二个输入当第一个输入是Tensor时第二个输入应该是一个Number或bool值或数据类型为Number或bool的Tensor。
- **x** (Union[Tensor, number.Number, bool]) - 第一个输入是一个number.Number、bool值或数据类型为 `number <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_`bool_ <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_ 的Tensor。
- **y** (Union[Tensor, number.Number, bool]) - 第二个输入当第一个输入是Tensor时第二个输入应该是一个number.Number或bool值或数据类型为number或bool_的Tensor。当第一个输入是Scalar时第二个输入必须是数据类型为number或bool_的Tensor。
**输出:**

View File

@ -1,20 +1,23 @@
mindspore.ops.Log
=================
.. py:class:: mindspore.ops.Log(*args, **kwargs)
.. py:class:: mindspore.ops.Log()
元素返回Tensor的自然对数。
元素返回Tensor的自然对数。
.. math::
y_i = log_e(x_i)
.. warning::
如果算子Log的输入值在(00.01]或[0.951.05]范围内,则输出精度可能会发生变化。
如果算子Log的输入值在(00.01]或[0.951.05]范围内,则输出精度可能会存在误差。
.. note::
Ascend上输入Tensor的维度要小于等于8CPU上输入Tensor的维度要小于8。
**输入:**
- **x** (Tensor) - 输入Tensor。该值必须大于0。shape为 :math:`(N,*)`,其中 :math:`*` 表示任意的附加维度数它的秩应小于8。
- **x** (Tensor) - 任意维度的输入Tensor。该值必须大于0。
**输出:**

View File

@ -28,7 +28,7 @@ mindspore.ops.matmul
**异常:**
- **TypeError** - `transpose_a``transpose_b` 不是bool。
- **ValueError** - 矩阵 `a`维度的列不等于矩阵 `b` 的维度的行。
- **ValueError** - 矩阵 `a` 的列不等于矩阵 `b` 的行。
- **ValueError** - `a``b` 的维度不等于2。
**支持平台:**

View File

@ -1,23 +1,24 @@
mindspore.ops.Mul
=================
.. py:class:: mindspore.ops.Mul(*args, **kwargs)
.. py:class:: mindspore.ops.Mul()
两个Tensor按元素相乘。
输入 `x``y` 遵循隐式类型转换规则,使数据类型保持一致。
输入必须是两个Tensor或一个Tensor和一个Scalar。
当输入是两个Tensor时它们的数据类型不能同时是bool它们的shape可以广播。
当输入是一个Tensor和一个Scalar时Scalar只能是一个常数。
两个Tensor逐元素相乘。
.. math::
out_{i} = x_{i} * y_{i}
.. note::
- 输入 `x` 和 `y` 遵循 `隐式类型转换规则 <https://www.mindspore.cn/docs/note/zh-CN/r1.6/operator_list_implicit.html>`_ ,使数据类型保持一致。
- 输入必须是两个Tensor或一个Tensor和一个Scalar。
- 当输入是两个Tensor时它们的数据类型不能同时是bool并保证其shape可以广播。
- 当输入是一个Tensor和一个Scalar时Scalar只能是一个常数。
**输入:**
- **x** (Union[Tensor, Number, bool]) - 第一个输入是一个Number、bool值或数据类型为Number或bool的Tensor。
- **y** (Union[Tensor, Number, bool]) - 第二个输入当第一个输入是Tensor时第二个输入应该是一个Number或bool值或数据类型为Number或bool的Tensor。
- **x** (Union[Tensor, number.Number, bool]) - 第一个输入是一个number.Number、bool值或数据类型为 `number <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_`bool_ <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_ 的Tensor。
- **y** (Union[Tensor, number.Number, bool]) - 第二个输入当第一个输入是Tensor时第二个输入应该是一个number.Number或bool值或数据类型为number或bool_的Tensor。当第一个输入是Scalar时第二个输入必须是数据类型为number或bool_的Tensor。
**输出:**
@ -25,7 +26,7 @@ mindspore.ops.Mul
**异常:**
- **TypeError** - `x``y` 不是Tensor、Number或bool。
- **TypeError** - `x``y` 不是Tensor、number.Number或bool。
- **ValueError** - `x``y` 的shape不相同。
**支持平台:**

View File

@ -1,15 +1,13 @@
mindspore.ops.OnesLike
======================
.. py:class:: mindspore.ops.OnesLike(*args, **kwargs)
.. py:class:: mindspore.ops.OnesLike()
创建新Tensor。所有元素的值都为1。
返回填充了Scalar值为1的具有与输入相同shape和数据类型的Tensor。
返回值为1的Tensorshape和数据类型与输入相同。
**输入:**
- **input_x** (Tensor) - 输入Tensor。shape为 :math:`(N,*)` ,其中 :math:`*` 表示任意的附加维度数
- **input_x** (Tensor) - 任意维度的Tensor
**输出:**

View File

@ -1,7 +1,7 @@
mindspore.ops.PReLU
===================
.. py:class:: mindspore.ops.PReLU(*args, **kwargs)
.. py:class:: mindspore.ops.PReLU()
带参数的线性修正单元激活函数Parametric Rectified Linear Unit activation function
@ -14,18 +14,18 @@ mindspore.ops.PReLU
.. note::
Ascend不支持0-D或1-D的x。
Ascend不支持标量和1维向量的输入x。
**输入:**
- **x** (Tensor) - 用于计算激活函数的Tensor。数据类型为float16或float32。shape为 :math:`(N, C, *)` ,其中 :math:`*` 表示任意的附加维度
- **weight** (Tensor) - 权重Tensor。数据类型为float16或float32。只有两种shape是合法的1或 `input_x` 的通道数。通道维度是输入的第二维。当输入为0-D或1-D Tensor时通道数为1。
- **x** (Tensor) - 激活函数的输入Tensor。数据类型为float16或float32。shape为 :math:`(N, C, *)` ,其中 :math:`*` 表示任意的附加维度。
- **weight** (Tensor) - 权重Tensor。数据类型为float16或float32。weight只可以是向量长度与输入x的通道数C相同。在GPU设备上当输入为标量时shape为1。
**输出:**
Tensor数据类型与 `x` 的相同。
有关详细信息,请参考:class:`nn.PReLU`。
有关详细信息,请参考 :class:`mindspore.nn.PReLU`
**异常:**

View File

@ -1,23 +1,24 @@
mindspore.ops.Pow
==================
.. py:class:: mindspore.ops.Pow(*args, **kwargs)
.. py:class:: mindspore.ops.Pow()
计算 `x` 中每个元素的 `y` 的幂次。
输入 `x``y` 遵循隐式类型转换规则,使数据类型保持一致。
输入必须是两个Tensor或一个Tensor和一个Scalar。
当输入是两个Tensor时它们的数据类型不能同时是bool它们的shape可以广播。
当输入是一个Tensor和一个Scalar时Scalar只能是一个常数。
计算 `x` 中每个元素的 `y` 次幂。
.. math::
out_{i} = x_{i} ^{ y_{i}}
.. note::
- 输入 `x` 和 `y` 遵循 `隐式类型转换规则 <https://www.mindspore.cn/docs/note/zh-CN/r1.6/operator_list_implicit.html>`_ ,使数据类型保持一致。
- 输入必须是两个Tensor或一个Tensor和一个Scalar。
- 当输入是两个Tensor时它们的数据类型不能同时是bool并保证其shape可以广播。
- 当输入是一个Tensor和一个Scalar时Scalar只能是一个常数。
**输入:**
- **x** (Union[Tensor, Number, bool]) - 第一个输入是一个Number、bool值或数据类型为Number或bool的Tensor。
- **y** (Union[Tensor, Number, bool]) - 第二个输入当第一个输入是Tensor时第二个输入应该是一个Number或bool值或数据类型为Number或bool的Tensor。
- **x** (Union[Tensor, number.Number, bool]) - 第一个输入是一个number.Number、bool值或数据类型为 `number <https://www.mindspore.cn/docs/api/en/r1.6/api_python/mindspore.html#mindspore.dtype>`_`bool_ <https://www.mindspore.cn/docs/api/en/r1.6/api_python/mindspore.html#mindspore.dtype>`_ 的Tensor。
- **y** (Union[Tensor, number.Number, bool]) - 第二个输入当第一个输入是Tensor时第二个输入应该是一个number.Number或bool值或数据类型为number或bool_的Tensor。当第一个输入是Scalar时第二个输入必须是数据类型为number或bool_的Tensor。
**输出:**
@ -25,8 +26,8 @@ mindspore.ops.Pow
**异常:**
- **TypeError** - `x``y` 不是Tensor、Number或bool。
- **ValueError** - `x``y` 的shape不相同。
- **TypeError** - `x``y` 不是Tensor、number.Number或bool。
- **ValueError** - `x``y` 都为Tensor时它们的shape不相同。
**支持平台:**

View File

@ -1,7 +1,7 @@
mindspore.ops.ReLUV2
====================
.. py:class:: mindspore.ops.ReLUV2(*args, **kwargs)
.. py:class:: mindspore.ops.ReLUV2()
线性修正单元激活函数Rectified Linear Unit activation function
@ -11,10 +11,6 @@ mindspore.ops.ReLUV2
\text{ReLU}(x) = (x)^+ = \max(0, x)
.. note::
`ReLu` 的区别在于该算子多输出一个mask且算子的kernel与 `ReLu` 的不同。
**输入:**
- **input_x** (Tensor) - 输入Tensor必须是4-D Tensor。
@ -22,7 +18,7 @@ mindspore.ops.ReLUV2
**输出:**
- **output** (Tensor) - 数据类型和shape与 `input_x` 的相同。
- **mask** (Tensor) - 数据类型必须为uint8的Tensor
- **mask** (Tensor) - 保留输出,无实际意义
**异常:**
@ -37,14 +33,9 @@ mindspore.ops.ReLUV2
>>> input_x = Tensor(np.array([[[[1, -2], [-3, 4]], [[-5, 6], [7, -8]]]]), mindspore.float32)
>>> relu_v2 = ops.ReLUV2()
>>> output, mask= relu_v2(input_x)
>>> output, _= relu_v2(input_x)
>>> print(output)
[[[[1. 0.]
[0. 4.]]
[[0. 6.]
[7. 0.]]]]
>>> print(mask)
[[[[[1 0]
[2 0]]
[[2 0]
[1 0]]]]]
[7. 0.]]]]

View File

@ -1,9 +1,9 @@
mindspore.ops.Reshape
======================
.. py:class:: mindspore.ops.Reshape(*args, **kwargs)
.. py:class:: mindspore.ops.Reshape()
基于给定的shape使用相同的值对输入Tensor进行reshape操作
基于给定的shape对输入Tensor进行重新排列
`input_shape` 最多只能有一个-1在这种情况下它可以从剩余的维度和输入的元素个数中推断出来。
@ -18,7 +18,7 @@ mindspore.ops.Reshape
**异常:**
- **ValueError** - 给定的 `input_shape`,如果它有个-1或者除-1若存在之外的元素的乘积小于或等于0或者无法被输入Tensor的shape的乘积除,或者与输入的数组大小不匹配。
- **ValueError** - 给定的 `input_shape`,如果它有个-1或者除-1若存在之外的元素的乘积小于或等于0或者无法被输入Tensor的shape的乘积除,或者与输入的数组大小不匹配。
**支持平台:**

View File

@ -1,9 +1,9 @@
mindspore.ops.SeLU
==================
.. py:class:: mindspore.ops.SeLU(*args, **kwargs)
.. py:class:: mindspore.ops.SeLU()
按元素计算输入Tensor的SeLUscaled exponential Linear Unit函数
激活函数SeLUScaled exponential Linear Unit
该激活函数定义为:
@ -21,7 +21,7 @@ mindspore.ops.SeLU
**输入:**
- **input_x** (Tensor) - shape为 :math:`(N, *)` 的Tensor其中 :math:`*` 表示任意的附加维度数数据类型为float16或float32。
- **input_x** (Tensor) - 任意维度的Tensor数据类型为float16或float32。
**输出:**

View File

@ -1,11 +1,11 @@
mindspore.ops.Sigmoid
=====================
.. py:class:: mindspore.ops.Sigmoid(*args, **kwargs)
.. py:class:: mindspore.ops.Sigmoid()
Sigmoid激活函数。
按元素计算输入的Sigmoid函数。Sigmoid函数定义为
逐元素计算Sgmoid激活函数。Sigmoid函数定义为
.. math::
@ -15,7 +15,7 @@ mindspore.ops.Sigmoid
**输入:**
- **input_x** (Tensor) - shape为 :math:`(N, *)` 的tensor其中 :math:`*` 表示任意的附加维度数数据类型为float16或float32。
- **input_x** (Tensor) - 任意维度的Tensor数据类型为float16或float32。
**输出:**

View File

@ -1,15 +1,13 @@
mindspore.ops.Size
==================
.. py:class:: mindspore.ops.Size(*args, **kwargs)
.. py:class:: mindspore.ops.Size()
返回Tensor的大小。
返回一个整数Scalar表示输入的元素大小即Tensor中元素的总数。
返回一个Scalar类型为整数表示输入Tensor的大小即Tensor中元素的总数。
**输入:**
- **input_x** (Tensor) - Tensor的shape为 :math:`(x_1, x_2, ..., x_R)` 。数据类型为Number
- **input_x** (Tensor) - 输入参数shape为 :math:`(x_1, x_2, ..., x_R)` 。数据类型为 `number <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_
**输出:**

View File

@ -1,23 +1,24 @@
mindspore.ops.Sub
=================
.. py:class:: mindspore.ops.Sub(*args, **kwargs)
.. py:class:: mindspore.ops.Sub()
按元素用第一个输入Tensor减去第二个输入Tensor。
输入 `x``y` 遵循隐式类型转换规则,使数据类型保持一致。
输入必须是两个Tensor或一个Tensor和一个Scalar。
当输入是两个Tensor时它们的数据类型不能同时是bool它们的shape可以广播。
当输入是一个Tensor和一个Scalar时Scalar只能是一个常数。
逐元素用第一个输入Tensor减去第二个输入Tensor。
.. math::
out_{i} = x_{i} - y_{i}
.. note::
- 输入 `x` 和 `y` 遵循 `隐式类型转换规则 <https://www.mindspore.cn/docs/note/zh-CN/r1.6/operator_list_implicit.html>`_ ,使数据类型保持一致。
- 输入必须是两个Tensor或一个Tensor和一个Scalar。
- 当输入是两个Tensor时它们的数据类型不能同时是bool它们的shape可以广播。
- 当输入是一个Tensor和一个Scalar时Scalar只能是一个常数。
**输入:**
- **x** (Union[Tensor, Number, bool]) - 第一个输入是一个Number、bool值或数据类型为Number或bool的Tensor。
- **y** (Union[Tensor, Number, bool]) - 第二个输入当第一个输入是Tensor时第二个输入应该是一个Number或bool值或数据类型为Number或bool的Tensor。
- **x** (Union[Tensor, number.Number, bool]) - 第一个输入是一个number.Number、bool值或数据类型为 `number <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_`bool_ <https://www.mindspore.cn/docs/api/zh-CN/r1.6/api_python/mindspore.html#mindspore.dtype>`_ 的Tensor。
- **y** (Union[Tensor, number.Number, bool]) - 第二个输入当第一个输入是Tensor时第二个输入应该是一个number.Number或bool值或数据类型为number或bool_的Tensor。当第一个输入是Scalar时第二个输入必须是数据类型为number或bool_的Tensor。
**输出:**
@ -25,7 +26,7 @@ mindspore.ops.Sub
**异常:**
- **TypeError** - `x``y` 不是Tensor、Number或bool。
- **TypeError** - `x``y` 不是Tensor、number.Number或bool。
**支持平台:**

View File

@ -1,9 +1,9 @@
mindspore.ops.Tile
===================
.. py:class:: mindspore.ops.Tile(*args, **kwargs)
.. py:class:: mindspore.ops.Tile()
按照给定的次数复制Tensor。
按照给定的次数复制输入Tensor。
通过复制 `multiples``input_x` 来创建新的Tensor。输出Tensor的第i维度有 `input_x.shape[i] * multiples[i]` 个元素,并且 `input_x` 的值沿第i维度被复制 `multiples[i]` 次。
@ -13,12 +13,12 @@ mindspore.ops.Tile
**输入:**
- **input_x** (Tensor) - 1-D或更高的Tensor。将输入Tensor的shape设置为 :math:`(x_1, x_2, ..., x_S)`
- **multiples** (tuple[int]) - 输入tuple由多个整数构成:math:`(y_1, y_2, ..., y_S)``multiples` 的长度不能小于 `input_x` 的维度。只支持常量值。
- **input_x** (Tensor) - 1-D或更高的Tensor。
- **multiples** (tuple[int]) - 指定复制次数的参数参数类型为tuple数据类型为整数。:math:`(y_1, y_2, ..., y_S)``multiples` 的长度不能小于 `input_x` 的维度。只支持常量值。
**输出:**
Tensor具有与 `input_x` 相同的数据类型。假设 `multiples` 的长度为 `d` `input_x` 的维度为 `input_x.dim`
Tensor具有与 `input_x` 相同的数据类型。假设 `multiples` 的长度为 `d` `input_x` 的维度为 `input_x.dim``input_x`的shape为 :math:`(x_1, x_2, ..., x_S)`
- 如果 `input_x.dim = d` 将其相应位置的shape相乘输出的shape为 :math:`(x_1*y_1, x_2*y_2, ..., x_S*y_S)`
- 如果 `input_x.dim < d``input_x` 的shape的前面填充1直到它们的长度一致。例如将 `input_x` 的shape设置为 :math:`(1, ..., x_1, ..., x_R, x_S)` 然后可以将其相应位置的shape相乘输出的shape为 :math:`(1*y_1, ..., x_R*y_R, x_S*y_S)`

View File

@ -3,24 +3,31 @@ mindspore.ops.UniformReal
.. py:class:: mindspore.ops.UniformReal(seed=0, seed2=0)
产生随机的浮点数i,均匀分布在[01)范围内。
产生随机的浮点数,均匀分布在[01)范围内。
**参数:**
- **seed** (int) - 算子层的随机种子用于生成随机数。必须是非负的。默认值0。
- **seed2** (int)全局的随机种子和算子层的随机种子共同决定最终生成的随机数。必须是非负的。默认值0。
.. note::
- 随机种子:通过一些复杂的数学算法,可以得到一组有规律的随机数,而随机种子就是这个随机数的初始值。随机种子相同,得到的随机数就不会改变。
- 全局的随机种子和算子层的随机种子都没设置:使用默认值当做随机种子。
- 全局的随机种子设置了,算子层的随机种子未设置:随机生成一个种子和全局的随机种子拼接。
- 全局的随机种子未设置,算子层的随机种子设置了:使用默认的全局的随机种子,和算子层的随机种子拼接。
- 全局的随机种子和算子层的随机种子都设置了:全局的随机种子和算子层的随机种子拼接。
**输入:**
- **shape** (tuple) - 待生成的随机Tensor的shape。只支持常量值。
- **shape** (tuple) - 待生成的Tensor的shape。只支持常量值。
**输出:**
Tensor。它的shape为输入 `shape` 表示的值。数据类型为float32。
Tensor。它的shape为输入 `shape`。数据类型为float32。
**异常:**
- **TypeError** - `seed` `seed2`不是int。
- **TypeError** - `seed` `seed2` 不是int。
- **TypeError** - `shape` 不是tuple。
- **ValueError** - `shape` 不是常量值。

View File

@ -1,15 +1,13 @@
mindspore.ops.ZerosLike
=======================
.. py:class:: mindspore.ops.ZerosLike(*args, **kwargs)
.. py:class:: mindspore.ops.ZerosLike()
创建新的Tensor。它的所有元素的值都为0。
返回具有与输入Tensor相同shape和数据类型的值为0的Tensor。
返回值为0的Tensor其shape和数据类型与输入Tensor相同。
**输入:**
- **input_x** (Tensor) - 输入Tensor。数据类型为int32、int64、float16或float32。shape为 :math:`(N,*)` ,其中 :math:`*` 表示任意的附加维度数。
- **input_x** (Tensor) - 任意维度的输入Tensor。数据类型为int32、int64、float16或float32。
**输出:**

@ -1 +1 @@
Subproject commit 2158c0a9b8c53d89c5be65046b41c9ac058b916d
Subproject commit 008fafbb15d6e1126b424dbb8689a4795578b5b6

View File

@ -99,6 +99,16 @@ class MS_API Context {
/// \return Pointer to the custom delegate.
std::shared_ptr<Delegate> GetDelegate() const;
/// \brief Set quant model to run as float model in multi device.
///
/// \param[in] float_mode: true, run as float model; false, not run as float model.
void SetMultiModalHW(bool float_mode);
/// \brief Get the mode of the model run.
///
/// \return Bool value that indicates whether run as float model
bool GetMultiModalHW() const;
/// \brief Get a mutable reference of DeviceInfoContext vector in this context. Only MindSpore Lite supports
/// heterogeneous scenarios with multiple members in the vector.
///

View File

@ -139,7 +139,6 @@ class MS_API Kernel {
/// \param[in] value define the kernel's attribute value.
void SetAttr(const std::string &key, const std::string &value) { attrs_[key] = value; }
protected:
std::string name_;
const mindspore::Context *context_ = nullptr;
std::vector<mindspore::MSTensor> inputs_;

View File

@ -0,0 +1,71 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_INCLUDE_API_MODEL_PARALLEL_RUNNER_H
#define MINDSPORE_INCLUDE_API_MODEL_PARALLEL_RUNNER_H
#include <vector>
#include <memory>
#include <utility>
#include <string>
#include "include/api/status.h"
#include "include/api/context.h"
namespace mindspore {
struct RunnerConfig {
std::shared_ptr<Context> context = nullptr;
int workers_num = 0;
};
class ModelPool;
/// \brief The ModelParallelRunner class is used to define a MindSpore ModelParallelRunner, facilitating Model
/// management.
class MS_API ModelParallelRunner {
public:
ModelParallelRunner() = default;
~ModelParallelRunner() = default;
/// \brief build a model parallel runner from model path so that it can run on a device. Only valid for Lite.
///
/// \param[in] model_path Define the model path.
/// \param[in] runner_config Define the config used to store options during model pool init.
///
/// \return Status.
Status Init(const std::string &model_path, const std::shared_ptr<RunnerConfig> &runner_config = nullptr);
/// \brief Obtains all input tensors information of the model.
///
/// \return The vector that includes all input tensors.
std::vector<MSTensor> GetInputs();
/// \brief Obtains all output tensors information of the model.
///
/// \return The vector that includes all output tensors.
std::vector<MSTensor> GetOutputs();
/// \brief Inference ModelParallelRunner.
///
/// \param[in] inputs A vector where model inputs are arranged in sequence.
/// \param[out] outputs Which is a pointer to a vector. The model outputs are filled in the container in sequence.
/// \param[in] before CallBack before predict.
/// \param[in] after CallBack after predict.
///
/// \return Status.
Status Predict(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs,
const MSKernelCallBack &before = nullptr, const MSKernelCallBack &after = nullptr);
private:
std::shared_ptr<ModelPool> model_pool_ = nullptr;
};
} // namespace mindspore
#endif // MINDSPORE_INCLUDE_API_MODEL_PARALLEL_RUNNER_H

View File

@ -108,7 +108,7 @@ void ParseAttrValue(const std::string &type, const std::string &attr_name, const
MS_EXCEPTION_IF_NULL(node_attr);
MS_EXCEPTION_IF_NULL(value);
if (type == "int") {
auto attr_value = static_cast<int>(GetValue<int64_t>(value));
auto attr_value = value->isa<Int32Imm>() ? GetValue<int>(value) : GetValue<int64_t>(value);
(*node_attr)[attr_name].set_i(attr_value);
} else if (type == "str") {
auto attr_value = GetValue<std::string>(value);
@ -186,6 +186,12 @@ void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef
return;
}
std::vector<size_t> input_size_list;
if (!SetIOIputSize(anf_node, input_num, &input_size_list)) {
MS_LOG(ERROR) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] get input size list failed.";
return;
}
for (size_t input_index = 0; input_index < input_num; input_index++) {
::mindspore::Tensor *node_inputs = proto->add_inputs();
MS_EXCEPTION_IF_NULL(node_inputs);
@ -215,6 +221,7 @@ void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef
}
node_inputs->set_tensor_type(input_data_type);
node_inputs->set_mem_device("HBM");
node_inputs->set_data_size(input_size_list[input_index]);
}
}
@ -243,8 +250,17 @@ void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef
}
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
int64_t data_size = 1;
if (!GetShapeSize(output_shape, TypeIdToType(output_type), &data_size)) {
MS_LOG(ERROR) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] get output size failed for output "
<< output_index;
return;
}
node_outputs->set_tensor_type(output_data_type);
node_outputs->set_mem_device("HBM");
node_outputs->set_data_size(LongToSize(data_size));
}
}

View File

@ -24,13 +24,18 @@ if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "aarch64" AND EXISTS ${CMAKE_C_COMPILE
${CMAKE_CURRENT_SOURCE_DIR}/aicpu_sharder/aicpu_pulse.cc
${CMAKE_CURRENT_SOURCE_DIR}/aicpu_sharder/aicpu_sharder.cc
${CMAKE_CURRENT_SOURCE_DIR}/random_choice_with_mask_kernels.cc
${CMAKE_CURRENT_SOURCE_DIR}/environ/aicpu_environ_manager.cc
${CMAKE_CURRENT_SOURCE_DIR}/environ/environ_create.cc
${CMAKE_CURRENT_SOURCE_DIR}/environ/environ_set.cc
${CMAKE_CURRENT_SOURCE_DIR}/environ/environ_get.cc
${CMAKE_CURRENT_SOURCE_DIR}/environ/environ_destroy_all.cc
)
add_library(aicpu_kernels SHARED
add_library(mindspore_aicpu_kernels SHARED
${AICPU_SRC}
)
target_compile_options(aicpu_kernels PRIVATE
target_compile_options(mindspore_aicpu_kernels PRIVATE
-march=armv8-a
-O2
-fvisibility-inlines-hidden
@ -39,7 +44,7 @@ if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "aarch64" AND EXISTS ${CMAKE_C_COMPILE
-fno-common
)
target_link_libraries(aicpu_kernels PRIVATE
target_link_libraries(mindspore_aicpu_kernels PRIVATE
-ldl
-shared
PUBLIC
@ -53,8 +58,8 @@ if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "aarch64" AND EXISTS ${CMAKE_C_COMPILE
)
set(INSTALL_LIBRARY_DIR lib)
install(TARGETS aicpu_kernels OPTIONAL
EXPORT aicpu_kernels-targets
install(TARGETS mindspore_aicpu_kernels OPTIONAL
EXPORT mindspore_aicpu_kernels-targets
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}
)

View File

@ -1,77 +1,95 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OPS_AICPU_COMMON_KERNEL_LOG_H_
#define AICPU_OPS_AICPU_COMMON_KERNEL_LOG_H_
#include <unistd.h>
#include <sys/syscall.h>
#include <iostream>
#include <utility>
#include "common/kernel_errcode.h"
inline int GetTid(void) {
thread_local static int tid = syscall(__NR_gettid);
return tid;
}
static const int LOG_COUNT = 0;
namespace aicpu {
#define AICPU_LOG_DEBUG 0
#define AICPU_LOG_INFO 1
#define AICPU_LOG_WARN 2
#define AICPU_LOG_ERROR 3
#define AICPU_LOG_EVENT 0x10
inline void PrintLog(const int level) { std::cerr << level << std::endl; }
template <typename T, typename... Args>
inline void PrintLog(const int level, T &&head, Args &&... tail) {
std::cerr << std::forward<T>(head) << " ";
PrintLog(level, std::forward<Args>(tail)...);
}
int LogSetLevel(int level);
int LogGetLevel(void);
bool CheckLogLevel(int log_level_check);
#define AICPU_LOGD(fmt, ...) \
AICPU_LOG(AICPU_LOG_DEBUG, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
#define AICPU_LOGI(fmt, ...) \
AICPU_LOG(AICPU_LOG_INFO, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
#define AICPU_LOGW(fmt, ...) \
AICPU_LOG(AICPU_LOG_WARN, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
#define AICPU_LOGE(fmt, ...) \
AICPU_LOG(AICPU_LOG_ERROR, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
#define AICPU_LOGEVENT(fmt, ...) \
AICPU_LOG(AICPU_LOG_EVENT, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
#define AICPU_LOG(level, fmt, ...) \
do { \
if (aicpu::CheckLogLevel(level)) { \
aicpu::PrintLog(level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
} \
} while (LOG_COUNT != 0)
#define AICPU_CHK_STATUS_RET(expr...) \
do { \
const uint32_t status = (expr); \
if (status != AICPU_KERNEL_STATE_SUCCESS) { \
return status; \
} \
} while (0);
} // namespace aicpu
#endif // AICPU_OPS_AICPU_COMMON_KERNEL_LOG_H_
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef AICPU_OPS_AICPU_COMMON_KERNEL_LOG_H_
#define AICPU_OPS_AICPU_COMMON_KERNEL_LOG_H_
#include <unistd.h>
#include <sys/syscall.h>
#include <iostream>
#include <utility>
#include "common/kernel_errcode.h"
inline int GetTid(void) {
thread_local static int tid = syscall(__NR_gettid);
return tid;
}
static const int LOG_COUNT = 0;
namespace aicpu {
#define AICPU_LOG_DEBUG 0
#define AICPU_LOG_INFO 1
#define AICPU_LOG_WARN 2
#define AICPU_LOG_ERROR 3
#define AICPU_LOG_EVENT 0x10
inline void PrintLog(const int level) { std::cerr << level << std::endl; }
template <typename T, typename... Args>
inline void PrintLog(const int level, T &&head, Args &&... tail) {
std::cerr << std::forward<T>(head) << " ";
PrintLog(level, std::forward<Args>(tail)...);
}
int LogSetLevel(int level);
int LogGetLevel(void);
bool CheckLogLevel(int log_level_check);
#define AICPU_LOGD(fmt, ...) \
AICPU_LOG(AICPU_LOG_DEBUG, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
#define AICPU_LOGI(fmt, ...) \
AICPU_LOG(AICPU_LOG_INFO, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
#define AICPU_LOGW(fmt, ...) \
AICPU_LOG(AICPU_LOG_WARN, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
#define AICPU_LOGE(fmt, ...) \
AICPU_LOG(AICPU_LOG_ERROR, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
#define AICPU_LOGEVENT(fmt, ...) \
AICPU_LOG(AICPU_LOG_EVENT, "%s:%s:%d[tid:%lu]:" #fmt, __FUNCTION__, __FILE__, __LINE__, GetTid(), ##__VA_ARGS__);
#define AICPU_LOG(level, fmt, ...) \
do { \
if (aicpu::CheckLogLevel(level)) { \
aicpu::PrintLog(level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
} \
} while (LOG_COUNT != 0)
#define AICPU_CHK_STATUS_RET(expr...) \
do { \
const uint32_t status = (expr); \
if (status != AICPU_KERNEL_STATE_SUCCESS) { \
return status; \
} \
} while (0);
#define AICPU_CHECK_NULLPTR_VOID(value, logText...) \
if (value == nullptr) { \
AICPU_LOGE(logText); \
return; \
}
#define AICPU_CHECK_FALSE(condition, errorCode, logText...) \
if (!(condition)) { \
AICPU_LOGE(logText); \
return errorCode; \
}
#define AICPU_CHECK_NULLPTR(value, errorCode, logText...) \
if (value == nullptr) { \
AICPU_LOGE(logText); \
return errorCode; \
}
} // namespace aicpu
#endif // AICPU_OPS_AICPU_COMMON_KERNEL_LOG_H_

View File

@ -0,0 +1,97 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_AICPU_ENVIRON_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_AICPU_ENVIRON_H_
#include <vector>
#include <string>
#include <memory>
#include <map>
#include "common/kernel_log.h"
namespace aicpu {
constexpr int64_t kObjectTypeTensorType = 17;
constexpr int64_t kObjectTypeEnvType = 26;
// index of input or output
enum Index : size_t {
kIndex0 = 0,
kIndex1,
kIndex2,
kIndex3,
kIndex4,
kIndex5,
kIndex6,
kIndex7,
kIndex8,
kIndex9,
kIndex10,
kIndex11,
kIndex12,
kIndex13,
kIndex14,
kIndex15,
kIndex16,
};
struct EnvironValue {
EnvironValue() : addr_(nullptr), size_(0), value_type_(kObjectTypeTensorType) {}
EnvironValue(void *address_addr, size_t address_size, int32_t value_type)
: addr_(address_addr), size_(address_size), value_type_(value_type) {}
void *addr_;
size_t size_;
int32_t value_type_;
};
using EnvironValuePtr = std::shared_ptr<EnvironValue>;
// Environ is the meaning expression of map.
class Environ {
public:
explicit Environ(int64_t handle) : handle_(handle) {}
virtual ~Environ() = default;
void Set(int64_t key, const EnvironValuePtr &value) { values_[key] = value; }
EnvironValuePtr Get(int64_t key) {
if (values_.count(key) > 0) {
return values_[key];
}
return nullptr;
}
void Clear() {
// Foreach values to free the value addr.
for (auto &value : values_) {
AICPU_CHECK_NULLPTR_VOID(value.second, "value.second is null.");
free(value.second->addr_);
}
values_.clear();
handle_ = 0;
}
private:
// The handle is unique for each env.
int64_t handle_ = 0;
// Store the tensors in map, as <key, tensor>.
std::map<int64_t, EnvironValuePtr> values_;
};
using EnvironPtr = std::shared_ptr<Environ>;
} // namespace aicpu
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_AICPU_ENVIRON_H_

View File

@ -0,0 +1,100 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "environ/aicpu_environ_manager.h"
#include <string>
namespace aicpu {
constexpr auto kScalarTensorShapeDim = 1;
constexpr auto kScalarTensorShapeSize = 1;
constexpr auto kEnvValueTypeAttr = "value_type";
int64_t EnvironMgr::Create() {
std::unique_lock<std::mutex> lock(mutex);
if (env_handles_count_ >= INT64_MAX) {
AICPU_LOGE(" The handles number:%d is out of range: ", env_handles_count_);
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
int64_t ret_handle = ++env_handles_count_;
auto env = std::make_shared<Environ>(ret_handle);
AICPU_CHECK_NULLPTR(env, AICPU_KERNEL_STATE_PARAM_INVALID, "env is null.");
envs_[ret_handle] = env;
return ret_handle;
}
EnvironPtr EnvironMgr::Get(int64_t handle) {
std::unique_lock<std::mutex> lock(mutex);
const auto &envIter = envs_.find(handle);
if (envIter != envs_.end()) {
auto &result = envIter->second;
return result;
}
return nullptr;
}
void EnvironMgr::Clear() {
std::unique_lock<std::mutex> lock(mutex);
for (auto &env : envs_) {
AICPU_CHECK_NULLPTR_VOID(env.second, "env is null.")
env.second->Clear();
}
envs_.clear();
}
bool EnvironMgr::IsScalarTensor(const aicpuops::Tensor &tensor) {
aicpuops::TensorShape shape = tensor.tensor_shape();
if (shape.dim_size() == 0) {
AICPU_LOGD("The shape is empty.");
return true;
}
if ((shape.dim_size() == kScalarTensorShapeDim) && (shape.dim(aicpu::kIndex0).size() == kScalarTensorShapeSize)) {
AICPU_LOGD("The tensor is scalar.");
return true;
}
return false;
}
bool EnvironMgr::CheckEnvInput(const aicpuops::NodeDef &node_def) {
::google::protobuf::Map<::std::string, ::aicpuops::AttrValue> nodedef_map = node_def.attrs();
auto value_type_attr = nodedef_map[kEnvValueTypeAttr].i();
if ((value_type_attr != kObjectTypeTensorType) && (value_type_attr != kObjectTypeEnvType)) {
AICPU_LOGE("The value type is not supported: [%d]", value_type_attr);
return false;
}
// Check the input handle.
if (!IsScalarTensor(node_def.inputs(aicpu::kIndex0))) {
AICPU_LOGE("The input handle checks invalid.");
return false;
}
// Check the input key
if (!IsScalarTensor(node_def.inputs(aicpu::kIndex1))) {
AICPU_LOGE("The input key checks invalid.");
return false;
}
// Check the input value
if ((value_type_attr == kObjectTypeEnvType) && (!IsScalarTensor(node_def.inputs(aicpu::kIndex2)))) {
AICPU_LOGE("The input value checks invalid.");
return false;
}
return true;
}
} // namespace aicpu

View File

@ -0,0 +1,69 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_AICPU_ENVIRON_MANAGER_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_AICPU_ENVIRON_MANAGER_H_
#include <utility>
#include <map>
#include <memory>
#include <vector>
#include <mutex>
#include "environ/aicpu_environ.h"
#include "aicpu_sharder/aicpu_sharder.h"
#include "proto/aicpu_tensor.pb.h"
#include "common/distinct_uniform_int_distribution.h"
#include "common/tensor.h"
namespace aicpu {
class EnvironMgr {
public:
static EnvironMgr &GetInstance() noexcept {
static EnvironMgr instance;
return instance;
}
EnvironMgr(const EnvironMgr &) = delete;
EnvironMgr(EnvironMgr &&) = delete;
EnvironMgr &operator=(const EnvironMgr &) = delete;
EnvironMgr &operator=(EnvironMgr &&) = delete;
// Create the env object and return the unique env handle.
int64_t Create();
EnvironPtr Get(int64_t handle);
void Clear();
// Check whether the inputs of EnvironGet kernel or EnvironSet kernel are valid.
bool CheckEnvInput(const aicpuops::NodeDef &node_def);
// Check whether is scalar tensor. Environ handle and env key only support scalar tensor currently.
bool IsScalarTensor(const aicpuops::Tensor &tensor);
private:
EnvironMgr() = default;
~EnvironMgr() = default;
// Store the envs in map, as <handle, env>.
std::map<int64_t, EnvironPtr> envs_;
int64_t env_handles_count_{0};
std::mutex mutex;
};
} // namespace aicpu
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_AICPU_ENVIRON_MANAGER_H_

View File

@ -0,0 +1,46 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "environ/environ_create.h"
#include "environ/aicpu_environ_manager.h"
namespace aicpu {
uint32_t EnvironCreateKernel::DoCompute() {
// Generate an unique handle.
int64_t env_handle = EnvironMgr::GetInstance().Create();
AICPU_LOGD("Create env handle:%d", env_handle);
auto *output_data = reinterpret_cast<int64_t *>(io_addrs_[aicpu::kIndex0]);
output_data[0] = env_handle;
return AICPU_KERNEL_STATE_SUCCESS;
}
uint32_t EnvironCreateKernel::ParseKernelParam() {
AICPU_LOGD("Enter ParseKernelParam.");
if (!EnvironMgr::GetInstance().IsScalarTensor(node_def_.outputs(aicpu::kIndex0))) {
AICPU_LOGE("The output is not scalar tensor.");
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
return AICPU_KERNEL_STATE_SUCCESS;
}
} // namespace aicpu
extern "C" {
__attribute__((visibility("default"))) uint32_t EnvironCreate(void *param) {
aicpu::EnvironCreateKernel environCreateKernel;
return environCreateKernel.Compute(param);
}
}

View File

@ -0,0 +1,33 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_ENVIRON_CREATE_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_ENVIRON_CREATE_H_
#include "common/kernel_base.h"
namespace aicpu {
class EnvironCreateKernel : public KernelBase {
public:
EnvironCreateKernel() : KernelBase("EnvironCreate") {}
~EnvironCreateKernel() = default;
protected:
uint32_t DoCompute() override;
uint32_t ParseKernelParam() override;
};
} // namespace aicpu
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_ENVIRON_CREATE_H_

View File

@ -0,0 +1,42 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "environ/environ_destroy_all.h"
#include "environ/aicpu_environ_manager.h"
namespace aicpu {
uint32_t EnvironDestroyAllKernel::DoCompute() {
AICPU_LOGD("Destroy all env handle");
EnvironMgr::GetInstance().Clear();
return AICPU_KERNEL_STATE_SUCCESS;
}
uint32_t EnvironDestroyAllKernel::ParseKernelParam() {
AICPU_LOGD("Enter ParseKernelParam.");
if (!EnvironMgr::GetInstance().IsScalarTensor(node_def_.outputs(aicpu::kIndex0))) {
AICPU_LOGE("The output is not scalar tensor.");
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
return AICPU_KERNEL_STATE_SUCCESS;
}
} // namespace aicpu
extern "C" {
__attribute__((visibility("default"))) uint32_t EnvironDestroyAll(void *param) {
aicpu::EnvironDestroyAllKernel environDestroyAllKernel;
return environDestroyAllKernel.Compute(param);
}
}

View File

@ -0,0 +1,33 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_ENVIRON_DESTORY_ALL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_ENVIRON_DESTORY_ALL_H_
#include "common/kernel_base.h"
namespace aicpu {
class EnvironDestroyAllKernel : public KernelBase {
public:
EnvironDestroyAllKernel() : KernelBase("EnvironDestroyAll") {}
~EnvironDestroyAllKernel() = default;
protected:
uint32_t DoCompute() override;
uint32_t ParseKernelParam() override;
};
} // namespace aicpu
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_ENVIRON_DESTORY_ALL_H_

View File

@ -0,0 +1,107 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "environ/environ_get.h"
#include <random>
#include <climits>
#include <vector>
#include <algorithm>
#include <string>
#include "mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_ops/aicpu_sharder/aicpu_sharder.h"
#include "mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_ops/common/tensor.h"
#include "environ/aicpu_environ_manager.h"
namespace aicpu {
constexpr auto kEnvValueTypeAttr = "value_type";
uint32_t EnvironGetKernel::DoCompute() {
AICPU_LOGD("Enter DoCompute.");
auto &env_mgr = EnvironMgr::GetInstance();
auto *input_handle_ptr = reinterpret_cast<int64_t *>((io_addrs_[aicpu::kIndex0]));
auto *input_key_ptr = reinterpret_cast<int64_t *>((io_addrs_[aicpu::kIndex1]));
auto *default_value_ptr = reinterpret_cast<void *>((io_addrs_[aicpu::kIndex2]));
auto *output_ptr = reinterpret_cast<void *>((io_addrs_[aicpu::kIndex3]));
// Get handle and key
int64_t handle = input_handle_ptr[0];
int64_t key = input_key_ptr[0];
// Get env and value by handle and key
const auto &env = env_mgr.Get(handle);
AICPU_CHECK_NULLPTR(env, AICPU_KERNEL_STATE_PARAM_INVALID, "Get env [%d] failed", handle)
const auto &env_value = env->Get(key);
AICPU_LOGD("EnvironGetKernel: hindle[%d], key[%d], value[%d]", handle, key, (void *)&env_value);
// Default value
auto *output_value_ptr = default_value_ptr;
auto output_value_size = default_value_size_;
auto output_value_type = attr_value_type_;
if (env_value != nullptr) {
output_value_ptr = env_value->addr_;
output_value_size = env_value->size_;
output_value_type = env_value->value_type_;
} else {
AICPU_LOGE("Get key[%d] value checks failed.", key);
}
if ((output_value_size_ < output_value_size) || (output_value_type != attr_value_type_)) {
AICPU_LOGE("The env value checks invalid, value_size: %d vs %d, value_type:%d vs %d", output_value_size_,
output_value_size, output_value_type, attr_value_type_);
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
auto ret = memcpy_s(output_ptr, output_value_size_, output_value_ptr, output_value_size_);
AICPU_CHECK_FALSE((ret == EOK), AICPU_KERNEL_STATE_PARAM_INVALID,
"Memcpy size[%zu] from env map to output[0] failed.", output_value_size_);
return AICPU_KERNEL_STATE_SUCCESS;
}
uint32_t EnvironGetKernel::ParseKernelParam() {
AICPU_LOGD("Enter ParseKernelParam.");
auto &env_mgr = EnvironMgr::GetInstance();
if (!env_mgr.CheckEnvInput(node_def_)) {
AICPU_LOGE("The input checks invalid. ");
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
// Get value type attr
::google::protobuf::Map<::std::string, ::aicpuops::AttrValue> nodedef_map = node_def_.attrs();
attr_value_type_ = nodedef_map[kEnvValueTypeAttr].i();
// check output value
auto default_value_tensor = node_def_.inputs(aicpu::kIndex2);
auto output_value_ptr_tensor = node_def_.outputs(aicpu::kIndex0);
if ((output_value_ptr_tensor.tensor_shape().dim_size() != default_value_tensor.tensor_shape().dim_size()) ||
(output_value_ptr_tensor.tensor_type() != default_value_tensor.tensor_type())) {
AICPU_LOGE("The env value checks invalid.");
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
// Get value size.
default_value_size_ = default_value_tensor.data_size();
output_value_size_ = output_value_ptr_tensor.data_size();
return AICPU_KERNEL_STATE_SUCCESS;
}
} // namespace aicpu
extern "C" {
__attribute__((visibility("default"))) uint32_t EnvironGet(void *param) {
aicpu::EnvironGetKernel environGetKernel;
return environGetKernel.Compute(param);
}
}

View File

@ -0,0 +1,39 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_ENVIRON_GET_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_ENVIRON_GET_H_
#include <vector>
#include "mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_ops/common/kernel_base.h"
namespace aicpu {
class EnvironGetKernel : public KernelBase {
public:
EnvironGetKernel() : KernelBase("EnvironGet") {}
~EnvironGetKernel() = default;
protected:
uint32_t DoCompute() override;
uint32_t ParseKernelParam() override;
private:
int32_t attr_value_type_{0};
size_t default_value_size_{0};
size_t output_value_size_{0};
};
} // namespace aicpu
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_ENVIRON_GET_H_

View File

@ -0,0 +1,85 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "environ/environ_set.h"
#include <string>
#include <memory>
#include "mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_ops/aicpu_sharder/aicpu_sharder.h"
#include "mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_ops/common/tensor.h"
#include "environ/aicpu_environ_manager.h"
namespace aicpu {
constexpr auto kEnvValueTypeAttr = "value_type";
uint32_t EnvironSetKernel::DoCompute() {
AICPU_LOGD("Enter DoCompute.");
auto &env_mgr = EnvironMgr::GetInstance();
auto *input_handle_ptr = reinterpret_cast<int64_t *>(io_addrs_[aicpu::kIndex0]);
auto *input_key_ptr = reinterpret_cast<int64_t *>(io_addrs_[aicpu::kIndex1]);
auto *input_value_ptr = reinterpret_cast<void *>(io_addrs_[aicpu::kIndex2]);
auto *output_handle_ptr = reinterpret_cast<int64_t *>(io_addrs_[aicpu::kIndex3]);
auto *value_ptr = malloc(value_size_);
AICPU_CHECK_NULLPTR(value_ptr, AICPU_KERNEL_STATE_PARAM_INVALID, "Malloc failed.")
auto ret = memcpy_s(value_ptr, value_size_, input_value_ptr, value_size_);
AICPU_CHECK_FALSE((ret == EOK), AICPU_KERNEL_STATE_PARAM_INVALID, "Memcpy size from input[2] to environ failed.",
value_size_);
// Set env member.
const auto &env = env_mgr.Get(input_handle_ptr[0]);
AICPU_CHECK_NULLPTR(env, AICPU_KERNEL_STATE_PARAM_INVALID, "Get handle[%d] failed.", input_handle_ptr[0]);
auto env_value = std::make_shared<EnvironValue>(value_ptr, value_size_, attr_value_type_);
env->Set(input_key_ptr[0], env_value);
AICPU_LOGD("EnvironSetKernel: handle[%d], key[%d], value[%d]", input_handle_ptr[0], input_key_ptr[0],
(void *)&env_value);
// Set output handle
output_handle_ptr[0] = input_handle_ptr[0];
return AICPU_KERNEL_STATE_SUCCESS;
}
uint32_t EnvironSetKernel::ParseKernelParam() {
AICPU_LOGD("Enter ParseKernelParam.");
auto &env_mgr = EnvironMgr::GetInstance();
if (!env_mgr.CheckEnvInput(node_def_)) {
AICPU_LOGE("The input checks invalid. ");
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
if (!env_mgr.IsScalarTensor(node_def_.outputs(aicpu::kIndex0))) {
AICPU_LOGE("The output handle is not equal of input handle.");
return AICPU_KERNEL_STATE_PARAM_INVALID;
}
// Get value type.
::google::protobuf::Map<::std::string, ::aicpuops::AttrValue> nodedef_map = node_def_.attrs();
attr_value_type_ = nodedef_map[kEnvValueTypeAttr].i();
// Get value size.
aicpuops::Tensor value_tensor = node_def_.inputs(aicpu::kIndex2);
value_size_ = value_tensor.data_size();
return AICPU_KERNEL_STATE_SUCCESS;
}
} // namespace aicpu
extern "C" {
__attribute__((visibility("default"))) uint32_t EnvironSet(void *param) {
aicpu::EnvironSetKernel environSetKernel;
return environSetKernel.Compute(param);
}
}

View File

@ -0,0 +1,36 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_ENVIRON_SET_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_ENVIRON_SET_H_
#include "common/kernel_base.h"
namespace aicpu {
class EnvironSetKernel : public KernelBase {
public:
EnvironSetKernel() : KernelBase("EnvironSet") {}
~EnvironSetKernel() = default;
protected:
uint32_t DoCompute() override;
uint32_t ParseKernelParam() override;
private:
int32_t attr_value_type_{0};
size_t value_size_{0};
};
} // namespace aicpu
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_OPS_ENVIRON_ENVIRON_SET_H_

View File

@ -71,11 +71,16 @@ constexpr auto kSearchSorted = "SearchSorted";
constexpr auto kResizeBilinear = "ResizeBilinear";
constexpr auto kResizeBilinearGrad = "ResizeBilinearGrad";
constexpr auto kScatterElements = "ScatterElements";
constexpr auto kEnvironCreate = "EnvironCreate";
constexpr auto kEnvironSet = "EnvironSet";
constexpr auto kEnvironGet = "EnvironGet";
constexpr auto kEnvironDestroyAll = "EnvironDestroyAll";
const std::set<std::string> kCpuKernelOps{kIdentity, kMaskedSelect, kMaskedSelectGrad, kDynamicStitch,
kSearchSorted, kResizeBilinear, kResizeBilinearGrad, kScatterElements};
const std::set<std::string> kCacheKernelOps{kUpdateCache, kCacheSwapTable, kSubAndFilter, kPadAndShift,
kDropout3D, kDropout2D, kNonMaxSuppressionV3};
const std::set<std::string> kCpuKernelBaseOps{kGetNext, kInitData, kRandomChoiceWithMask};
const std::set<std::string> kCpuKernelBaseOps{kGetNext, kInitData, kRandomChoiceWithMask, kEnvironCreate,
kEnvironSet, kEnvironGet, kEnvironDestroyAll};
const std::set<std::string> kDynamicInputOps{
kPrint, kPack, kMeshgrid, kStackInitOpName, kStackDestroyOpName, kStackPushOpName, kStackPopOpName, kDynamicStitch};
struct AicpuParamHead {

View File

@ -22,6 +22,7 @@
#include <fcntl.h>
#include <unistd.h>
#include <chrono>
#include <algorithm>
#include <map>
#include <memory>
@ -44,6 +45,8 @@ constexpr int32_t MAX_ERROR_LEN = 1024;
constexpr int32_t PROCESS_NUM = 16;
constexpr int32_t TIME_OUT = 300;
#define ACQUIRE_LOCK LockMng lock(fd_, __func__, __LINE__)
inline std::string GetErrorInfo() {
char buf[MAX_ERROR_LEN + 1] = {0};
auto ret = strerror_r(errno, buf, MAX_ERROR_LEN);
@ -59,8 +62,9 @@ inline std::string GetErrorInfo() {
}
bool AkgKernelPool::LockMng::TryLock() const {
// Try to lock 100 times. Return errno if lock unsuccessfully
uint32_t trial = 100;
// Try to lock trial times. Return errno if lock unsuccessfully
uint32_t trial = 2000;
const uint32_t sleep_time_us = 5000;
int32_t ret = -1;
while (trial > 0) {
@ -70,14 +74,15 @@ bool AkgKernelPool::LockMng::TryLock() const {
}
trial--;
(void)usleep(5000);
(void)usleep(sleep_time_us);
}
if (ret == -1) {
MS_LOG(ERROR) << "Failed to acquire the lock, error msg:" << GetErrorInfo() << ".";
MS_LOG(ERROR) << "Failed to acquire the lock, error msg:" << GetErrorInfo() << ", left trying times: " << trial;
return false;
}
MS_LOG(INFO) << "AkgKernelBuild successfully acquire lock called at " << calling_position_;
return true;
}
@ -86,6 +91,7 @@ void AkgKernelPool::LockMng::Unlock() const {
if (ret == -1) {
MS_LOG(ERROR) << "Failed to release the lock, error msg:" << GetErrorInfo();
}
MS_LOG(INFO) << "AkgKernelBuild successfully release lock called at " << calling_position_;
}
std::string AkgKernelPool::GetCurrentPath() const {
@ -114,7 +120,7 @@ void *AkgKernelPool::CreateSharedMem(const std::string &path) {
auto mem_size = sizeof(size_t) * kListNum_ * (kMaxKernelNum_ + 1) + 512;
{
LockMng lock(fd_);
ACQUIRE_LOCK;
if (!lock.locked_) {
MS_LOG(ERROR) << "Failed to acquire lock.";
return nullptr;
@ -140,7 +146,7 @@ void *AkgKernelPool::CreateSharedMem(const std::string &path) {
}
}
LockMng lock(fd_);
ACQUIRE_LOCK;
if (!lock.locked_) {
MS_LOG(ERROR) << "Failed to acquire lock.";
return nullptr;
@ -203,7 +209,7 @@ int32_t AkgKernelPool::Init(const std::vector<JsonNodePair> &build_args) {
int32_t AkgKernelPool::Release() const {
{
LockMng lock(fd_);
ACQUIRE_LOCK;
if (!lock.locked_) {
MS_LOG(ERROR) << "Failed to acquire lock.";
return -1;
@ -244,7 +250,7 @@ int32_t AkgKernelPool::Release() const {
}
int32_t AkgKernelPool::AddKernels(const std::vector<JsonNodePair> &build_args) {
LockMng lock(fd_);
ACQUIRE_LOCK;
if (!lock.locked_) {
MS_LOG(ERROR) << "Failed to acquire lock.";
return -1;
@ -293,7 +299,7 @@ int32_t AkgKernelPool::AddKernels(const std::vector<JsonNodePair> &build_args) {
}
int32_t AkgKernelPool::FetchKernels(std::set<size_t> *out) {
LockMng lock(fd_);
ACQUIRE_LOCK;
if (!lock.locked_) {
MS_LOG(ERROR) << "Failed to acquire lock.";
return -1;
@ -301,7 +307,7 @@ int32_t AkgKernelPool::FetchKernels(std::set<size_t> *out) {
std::set<size_t> left_in_todo_list;
// filter out kernels which belongs to other processes
// filter out kernels which does not belongs to this process
auto FilterBySelfList = [&left_in_todo_list, &out, this](size_t id) {
if (this->self_kernel_ids_.count(id) != 0) {
(void)out->emplace(id);
@ -323,7 +329,7 @@ int32_t AkgKernelPool::FetchKernels(std::set<size_t> *out) {
int32_t AkgKernelPool::UpdateAndWait(const std::set<size_t> &ids) {
if (!ids.empty()) {
LockMng lock(fd_);
ACQUIRE_LOCK;
if (!lock.locked_) {
MS_LOG(ERROR) << "Failed to acquire lock.";
return -1;
@ -355,10 +361,11 @@ int32_t AkgKernelPool::UpdateAndWait(const std::set<size_t> &ids) {
int32_t AkgKernelPool::Wait() const {
// wait until all the kernels which belong to this process finish compiling
uint32_t trials = 1000;
const uint32_t sleep_time_us = 1000000;
while (trials > 0) {
{
LockMng lock(fd_);
ACQUIRE_LOCK;
if (!lock.locked_) {
MS_LOG(ERROR) << "Failed to acquire lock.";
return -1;
@ -372,7 +379,7 @@ int32_t AkgKernelPool::Wait() const {
}
}
(void)usleep(1000000);
(void)usleep(sleep_time_us);
trials--;
}
@ -565,6 +572,7 @@ void AkgKernelBuilder::LoadCache() {
(void)bin_map->Insert(kernel_name, kernel_dir + kernel_json);
}
has_load = true;
(void)closedir(dir);
return;
}

View File

@ -61,8 +61,9 @@ class AkgKernelPool {
public:
class LockMng {
public:
explicit LockMng(int32_t fd) {
explicit LockMng(const int32_t fd, const char *function, const uint32_t line) {
fd_ = fd;
calling_position_ = std::string(function) + ":" + std::to_string(line);
locked_ = TryLock();
}
@ -79,6 +80,7 @@ class AkgKernelPool {
void Unlock() const;
int32_t fd_{-1};
std::string calling_position_;
};
public:

View File

@ -90,7 +90,8 @@ std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(cons
accum_idx += LongToInt(dyn_input_sizes[dyn_i]);
if (used_as_idx < accum_idx) {
input_index.push_back(std::make_pair(
anf_node, std::make_pair(dyn_i, IntToSize(used_as_idx - (accum_idx - dyn_input_sizes[dyn_i])))));
anf_node,
std::make_pair(dyn_i, IntToSize(used_as_idx - (accum_idx - LongToInt(dyn_input_sizes[dyn_i]))))));
found = true;
break;
}
@ -335,7 +336,7 @@ bool AkgKernelJsonGenerator::CreateInputDescJson(const AnfNodePtr &anf_node, con
input_list.emplace_back(input_desc_json);
real_input_index++;
}
inputs_json->emplace_back(input_list);
(void)inputs_json->emplace_back(input_list);
}
return true;
}
@ -877,7 +878,7 @@ nlohmann::json AkgKernelJsonGenerator::CreateInputsJson(const std::vector<AnfNod
input_shape.push_back(1);
}
input_desc_json[kJsonKeyShape] = input_shape;
inputs_json.emplace_back(std::vector<nlohmann::json>{input_desc_json});
(void)inputs_json.emplace_back(std::vector<nlohmann::json>{input_desc_json});
}
return inputs_json;
}

View File

@ -73,12 +73,12 @@ bool AkgKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vect
// pack all addresses into a vector.
std::vector<void *> runtime_args;
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtime_args),
[](const AddressPtr &input) -> void * { return input->addr; });
[](const AddressPtr &input) { return input->addr; });
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtime_args),
[](const AddressPtr &output) -> void * { return output->addr; });
[](const AddressPtr &output) { return output->addr; });
if (!workspace.empty()) {
(void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(runtime_args),
[](const AddressPtr &addr) -> void * { return addr->addr; });
[](const AddressPtr &addr) { return addr->addr; });
}
rtL2Ctrl_t *l2ctrl = nullptr;
@ -111,12 +111,12 @@ std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &in
// pack all addresses into a vector.
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs),
[](const AddressPtr &input) -> void * { return input->addr; });
[](const AddressPtr &input) { return input->addr; });
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs),
[](const AddressPtr &output) -> void * { return output->addr; });
[](const AddressPtr &output) { return output->addr; });
if (!workspace.empty()) {
(void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(workspace_addrs),
[](const AddressPtr &workspace) -> void * { return workspace->addr; });
[](const AddressPtr &workspace) { return workspace->addr; });
}
uint32_t block_dim = DEFAULT_BLOCK_DIM; // default blockdim equal to 1.

View File

@ -33,7 +33,7 @@ namespace kernel {
class AkgParallelLaunch {
public:
using AkgParallelLambda = int (*)(int task_id, int num_task, void *cdata);
static int AkgLaunchFunc(AkgParallelLambda flambda, void *cdata, int num_task) {
static int AkgLaunchFunc(AkgParallelLambda flambda, void *cdata, int) {
auto nthreads = omp_get_max_threads();
#pragma omp parallel num_threads(nthreads)
{ flambda(omp_get_thread_num(), nthreads, cdata); }
@ -41,10 +41,11 @@ class AkgParallelLaunch {
}
};
struct AkgCallBack {
void *parallel_launch_func;
void *(*malloc_func)(size_t);
void (*free_func)(void *);
class AkgCallBack {
public:
void *parallel_launch_func = nullptr;
void *(*malloc_func)(size_t) = nullptr;
void (*free_func)(void *) = nullptr;
AkgCallBack() {
parallel_launch_func = reinterpret_cast<void *>(&AkgParallelLaunch::AkgLaunchFunc);
@ -121,16 +122,16 @@ CpuKernelMod::CpuKernelMod(const KernelPackPtr &kp) {
}
bool CpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
const std::vector<AddressPtr> &outputs, void *) {
if (launch_func_ == nullptr) {
MS_LOG(ERROR) << "GetFunction failed. kernel: " << kernel_name_;
return false;
}
std::vector<void *> runtimeargs;
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtimeargs),
[](const AddressPtr &input) -> void * { return input->addr; });
[](const AddressPtr &input) { return input->addr; });
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtimeargs),
[](const AddressPtr &output) -> void * { return output->addr; });
[](const AddressPtr &output) { return output->addr; });
static AkgCallBack akg_callback;
(void)runtimeargs.emplace_back(reinterpret_cast<void *>(&akg_callback));
using AkgCpuKernelFunction = void (*)(void *);

View File

@ -55,7 +55,7 @@ class CpuKernelMod : public KernelMod {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
const std::vector<AddressPtr> &outputs, void *) override;
static CpuKernelManagerPtr kernelmanager_;

View File

@ -16,6 +16,7 @@
#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h"
#include <algorithm>
#include "nlohmann/json.hpp"
#include "utils/ms_utils.h"
@ -29,6 +30,12 @@ const int MAX_REGISTER_PER_THREAD_BLOCK = 65536;
const int REGISTER_UNIT_IN_WARP = 256;
const int WARP_SIZE = 32;
const int WARP_ALLOC_GRAN = 4;
const int BLOCKIDX_X_INDEX = 0;
const int BLOCKIDX_Y_INDEX = 1;
const int BLOCKIDX_Z_INDEX = 2;
const int THREADIDX_X_INDEX = 3;
const int THREADIDX_Y_INDEX = 4;
const int THREADIDX_Z_INDEX = 5;
GpuKernelManagerPtr GpuKernelMod::kernelmanager_ = std::make_shared<GpuKernelManager>();
GpuKernelManager::GpuKernelManager() {}
@ -120,15 +127,16 @@ bool GpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vect
}
std::vector<void *> runtimeargs;
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtimeargs),
[](const AddressPtr &input) -> void * { return reinterpret_cast<void *>(&(input->addr)); });
[](const AddressPtr &input) { return reinterpret_cast<void *>(&(input->addr)); });
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtimeargs),
[](const AddressPtr &output) -> void * { return reinterpret_cast<void *>(&(output->addr)); });
[](const AddressPtr &output) { return reinterpret_cast<void *>(&(output->addr)); });
if (!workspace.empty()) {
(void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(runtimeargs),
[](const AddressPtr &addr) -> void * { return reinterpret_cast<void *>(&(addr->addr)); });
[](const AddressPtr &addr) { return reinterpret_cast<void *>(&(addr->addr)); });
}
result = cuLaunchKernel(kernel_addr, thread_info[0], thread_info[1], thread_info[2], thread_info[3], thread_info[4],
thread_info[5], 0, reinterpret_cast<CUstream>(stream_ptr),
result = cuLaunchKernel(kernel_addr, thread_info[BLOCKIDX_X_INDEX], thread_info[BLOCKIDX_Y_INDEX],
thread_info[BLOCKIDX_Z_INDEX], thread_info[THREADIDX_X_INDEX], thread_info[THREADIDX_Y_INDEX],
thread_info[THREADIDX_Z_INDEX], 0, reinterpret_cast<CUstream>(stream_ptr),
reinterpret_cast<void **>(&runtimeargs[0]), 0);
if (result != CUDA_SUCCESS) {
const char *msg = nullptr;

View File

@ -215,8 +215,7 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro
KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor) {
MS_LOG(INFO) << "Insert cache for kernel:" << kernel_name << ", processr:" << processor;
KernelMeta *bin_map = KernelMeta::GetInstance();
std::string kernel_json;
kernel_json = bin_map->kernel_meta_path();
std::string kernel_json = bin_map->kernel_meta_path();
(void)kernel_json.append(kernel_name).append(kJsonSuffix);
KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) {
@ -984,9 +983,9 @@ size_t UnitSizeInBytes(const mindspore::TypeId &t) {
case kNumberTypeFloat64:
bytes = sizeof(int64_t);
break;
case kNumberTypeInt4:
default:
MS_LOG(EXCEPTION) << "Invalid types " << t;
break;
}
return bytes;

View File

@ -459,12 +459,13 @@ bool ArithmeticCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const
const std::vector<AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOutputsNum, kernel_name_);
if (output_size_ == 0) {
return true;
}
auto *input1 = reinterpret_cast<T *>(inputs[0]->addr);
const auto *input2 = reinterpret_cast<T *>(inputs[1]->addr);
auto *output = reinterpret_cast<T *>(outputs[0]->addr);
if (output_size_ == 0) {
MS_LOG(WARNING) << kernel_name_ << " output shape contain 0, output_shape: " << output_shape_;
return true;
}
if (kernel_name_ == prim::kPrimAssignAdd->name()) {
AssignAdd(input1, input2, output);
} else {

View File

@ -64,10 +64,8 @@ void BroadcastToCPUKernel<T>::CheckArgs() {
if (input_shape_[i] != output_shape_[i + offset] && input_shape_[i] != 1) {
MS_LOG(EXCEPTION)
<< "For '" << kernel_name_ << "', when the " << i
<< "'th dimension of input tensor 'input_x' "
"and target shape 'shape' not equal, the dimension length of input tensor 'input_x' should be "
"1. But got the dimension of input tensor 'input_x': "
<< Vector2Str(input_shape_) << ", and the dimension of target shape 'shape': " << Vector2Str(output_shape_);
<< "'th, the shape of input should be 1 and equal to the shape of output, but got the shape of input: "
<< Vector2Str(input_shape_) << ", and the shape of output: " << Vector2Str(output_shape_);
}
}
}

Some files were not shown because too many files have changed in this diff Show More