Compare commits

...

397 Commits
master ... r0.7

Author SHA1 Message Date
mindspore-ci-bot 2139c7ddc6 !8172 【轻量级 PR】:update RELEASE.md.
Merge pull request !8172 from shenwei41/N/A
2020-11-05 09:08:01 +08:00
mindspore-ci-bot c4e3fd70ee !8181 fix securec download links due to mistakes made by openeuler community
Merge pull request !8181 from yanghaoran/r0.7
2020-11-04 09:24:01 +08:00
yanghaoran 28372fbb96 update graphengine, fix securec download links 2020-11-03 19:41:48 +08:00
shenwei41 81c2068635 update RELEASE.md. 2020-11-03 17:41:17 +08:00
mindspore-ci-bot 4a8f92e4be !6799 mobilenetv2 update and debug
Merge pull request !6799 from yepei6/r0.7_align
2020-09-25 16:11:50 +08:00
Payne c53cd6bb22 align to docs and r1.0 2020-09-21 14:32:01 +08:00
mindspore-ci-bot 297e65e162 !6579 modify README_CN.md
Merge pull request !6579 from HW_KK/r0.7
2020-09-20 15:38:47 +08:00
wuweikang 1729a27bdb modify README_CN.md 2020-09-19 18:15:03 +08:00
mindspore-ci-bot 38da39b0be !6544 modify Dockerfile for devel
Merge pull request !6544 from HW_KK/r0.7
2020-09-19 14:39:00 +08:00
mindspore-ci-bot 56e22a5c82 !6535 【MD】r0.7 Branch: MD5 value update in the file - icu4c.cmake of branch r0.7
Merge pull request !6535 from magemomou/MD5_r0.7
2020-09-19 12:36:50 +08:00
wuweikang b551623c86 modify Dockerfile for devel 2020-09-19 11:41:26 +08:00
mayang 5529d41769 MD5 value update in the file icu4c.cmake of branch r0.7 2020-09-19 10:43:02 +08:00
mindspore-ci-bot ab997f9e37 !6299 mobilenetv2 debug for load ckpt
Merge pull request !6299 from yepei6/r0.7_mobilenet_debug
2020-09-18 11:28:16 +08:00
mindspore-ci-bot 5027130939 !6360 modify Dockerfile for GPU version
Merge pull request !6360 from HW_KK/r0.7
2020-09-16 17:52:28 +08:00
wuweikang 30450e2177 modify Dockerfile for GPU version 2020-09-16 17:26:59 +08:00
Payne 93c4d2929c load_ckpt debug 2020-09-16 16:16:18 +08:00
mindspore-ci-bot e39775edfd !6218 [MS][LITE]delete thread header for fix 0.7r java api bugs
Merge pull request !6218 from yeyunpeng2020/r0.7
2020-09-15 15:00:14 +08:00
mindspore-ci-bot e0953c47c3 !6213 [MS][LITE]fix the compilation failure
Merge pull request !6213 from gongdaguo/fix_compilation
2020-09-15 14:44:08 +08:00
mindspore-ci-bot 8e59d7449e !6187 make ref node's update edge using same address
Merge pull request !6187 from lianliguang/r0.7
2020-09-15 14:18:12 +08:00
yeyunpeng 41fd02baf3 delete thread header for fix 0.7r java api bugs 2020-09-15 10:50:55 +08:00
gongdaguo 02ba93492d fix the compilation failure and change the model download address
update mindspore/lite/test/models_mindspore.cfg.

update mindspore/lite/test/models_tflite_awaretraining.cfg.
2020-09-15 09:37:28 +08:00
WilliamLian 683e4aba74 make ref edage using same address 2020-09-14 16:28:46 +08:00
mindspore-ci-bot d373efcf99 !6129 mobilenetv2 add incremental_learn func
Merge pull request !6129 from yepei6/r0.7_mobilenetv2_add_incremental_func
2020-09-14 11:28:13 +08:00
Payne 4e2ccefe29 mobilenetv2 add incremental learning func 2020-09-13 22:25:39 +08:00
mindspore-ci-bot 6b4c5227ee !5942 adjust Dockerfile for nvcc support
Merge pull request !5942 from HW_KK/r0.7
2020-09-12 10:45:33 +08:00
mindspore-ci-bot 738ade501d !6014 update info with new platform
Merge pull request !6014 from guozhijian/update_info_with_platform_r0.7
2020-09-11 10:51:29 +08:00
mindspore-ci-bot 527d36597c !6010 fix bug of aicpu device ptr release failed
Merge pull request !6010 from caifubi/r0.7
2020-09-11 09:16:43 +08:00
jonyguo 7132be9ed2 update info with platform 2020-09-10 22:44:18 +08:00
caifubi 1c97d65f40 fix bug of aicpu device ptr release failed 2020-09-10 21:57:54 +08:00
mindspore-ci-bot 0ce4e3418c !5934 update supported platform info
Merge pull request !5934 from guozhijian/update_platform_info
2020-09-09 15:06:07 +08:00
wuweikang b83aa68071 adjust Dockerfile for nvcc support 2020-09-09 14:20:42 +08:00
jonyguo 86f5189323 update supported platform info 2020-09-09 09:58:09 +08:00
mindspore-ci-bot fc4bf19294 !5820 fix ub fusion
Merge pull request !5820 from jjfeing/r0.7_ub_fusion
2020-09-07 22:20:20 +08:00
jjfeing bd1261e5e6 adapt tensorengin modify, fix ub fusion 2020-09-07 20:45:57 +08:00
mindspore-ci-bot 5de9578abb !5794 rewrite fp16 to fp32
Merge pull request !5794 from cjh9368/static_check_r0.7
2020-09-05 13:56:50 +08:00
cjh9368 16f41b1619 rewrite fp16 to fp32 and fp32 to fp16 2020-09-05 10:44:54 +08:00
mindspore-ci-bot 7abe5a231f !5764 fix release note word err
Merge pull request !5764 from guozhijian/fix_release_note_word_err
2020-09-04 16:33:42 +08:00
jonyguo 27807a21ea fix: RELEASE.md spelling word error 2020-09-04 15:18:46 +08:00
mindspore-ci-bot 40bf2493fa !5490 modify the format info of tensorAdd
Merge pull request !5490 from limingqi107/r0.7
2020-09-03 11:07:12 +08:00
mindspore-ci-bot a585177825 !5575 update run for br: r0.7
Merge pull request !5575 from guozhijian/udpate_run_from_c75b100_to_c75b150
2020-09-02 20:54:47 +08:00
wuxuejian 06a6af7628 update aicpu proto and update module: graphengine
Support Dynamic Shape Aicpu Run Package
2020-09-02 19:27:21 +08:00
mindspore-ci-bot 9833a00e93 !5450 Fixbugfix for server shard range computation
Merge pull request !5450 from ZPaC/r0.7-fix-local-shard-error
2020-09-02 09:45:21 +08:00
mindspore-ci-bot 13fcfcdb78 !5592 [MS][Googlenet][README]googlenet readme have somewhere need to correct
Merge pull request !5592 from caojian05/ms_r0.7_googlenet_readme_update
2020-09-01 21:41:02 +08:00
CaoJian 96fad3b0a4 googlenet README update 2020-09-01 21:08:43 +08:00
mindspore-ci-bot a96646fc5a !5569 change readme file in yolov3_darknet53
Merge pull request !5569 from yangyongjie/r0.7
2020-09-01 14:53:16 +08:00
mindspore-ci-bot 26aba3b74c !5499 Support manual convert to quantative network of resnet
Merge pull request !5499 from chenfei_mindspore/r0.7
2020-09-01 14:31:50 +08:00
mindspore-ci-bot 5120720bac !5562 bert scripts enhancement
Merge pull request !5562 from yoonlee666/r0.7
2020-08-31 21:05:49 +08:00
mindspore-ci-bot 4382ce202c !5555 Modify read me of inceptionv3
Merge pull request !5555 from zhouyaqiang0/r0.7
2020-08-31 20:38:27 +08:00
mindspore-ci-bot 29fabd1324 !5561 Fix C++ coding standard problem
Merge pull request !5561 from yeyunpeng2020/r0.7
2020-08-31 17:18:27 +08:00
mindspore-ci-bot a43bd07c39 !5560 clean static check
Merge pull request !5560 from lyvette/r07
2020-08-31 16:51:36 +08:00
mindspore-ci-bot d332e4d2b7 !5581 clean static check
Merge pull request !5581 from cjh9368/static_check_r0.7
2020-08-31 16:51:12 +08:00
mindspore-ci-bot 403cb700b2 !5565 update RELEASE
Merge pull request !5565 from mengchunyang/r0.7
2020-08-31 16:42:53 +08:00
cristoval b2ffdf82c6 bugfix for server shard range computation 2020-08-31 16:40:40 +08:00
mindspore-ci-bot f5cba099d0 !5567 Delete some useless comments
Merge pull request !5567 from zhanyuan/r0.7
2020-08-31 16:36:09 +08:00
mindspore-ci-bot 9d10d934c7 !5557 [MS][LITE][Develop] fix bug of arm cpu int8 op: pooling memory leak
Merge pull request !5557 from yangruoqi713/r0.7
2020-08-31 16:31:11 +08:00
lyvette 62e7746e77 clean
modify log
2020-08-31 16:15:45 +08:00
chenfei 298393b66b add manual quantative network of resnet50 2020-08-31 16:02:33 +08:00
meng_chunyang 26bf044dde update RELEASE 2020-08-31 15:41:26 +08:00
cjh9368 fb973778e5 fix bug 2020-08-31 15:26:34 +08:00
yoonlee666 d472ee3921 bert script enhancement 2020-08-31 15:25:20 +08:00
yangyongjie e0a128142b fix performance of yolov3_darknet53 in readme file. 2020-08-31 15:09:21 +08:00
yeyunpeng 794aeb8e2c fix static check problem 2020-08-31 15:04:24 +08:00
zhanyuan 6051e3cfec Delete some useless comments 2020-08-31 14:58:56 +08:00
mindspore-ci-bot 03ff5f334a !5487 add schema file for BERT and TinyBERT
Merge pull request !5487 from wanghua/r0.7
2020-08-31 11:55:56 +08:00
yangruoqi713 dbe5cd9091 [MS][LITE][Develop] fix bug of arm cpu int8 op: pooling 2020-08-31 11:38:54 +08:00
zhouyaqiang 02fe2f9f1d modify read me of inception v3 2020-08-31 11:18:31 +08:00
mindspore-ci-bot 82dba19adf !5546 update README
Merge pull request !5546 from mengchunyang/r0.7
2020-08-31 11:01:20 +08:00
meng_chunyang e256877be9 update README 2020-08-31 10:09:03 +08:00
mindspore-ci-bot 433eaab225 !5544 remove unused benchmark option
Merge pull request !5544 from hangq/r0.7
2020-08-30 21:16:57 +08:00
hangq 18d3c686dc remove unused benchmark option 2020-08-30 20:42:26 +08:00
mindspore-ci-bot 7ebecd8d09 !5541 update model zoon image_classification reame ,delete some error links
Merge pull request !5541 from zhangbiao31/r0.7
2020-08-30 20:36:54 +08:00
mindspore-ci-bot 6b062f2454 !5510 Modify read me of deeplabv3 and inceptionv3
Merge pull request !5510 from zhouyaqiang0/r0.7
2020-08-30 20:18:52 +08:00
mindspore-ci-bot 6dd79e2b78 !5543 warpctc and maskrcnn readme recitification
Merge pull request !5543 from gengdongjie/r0.7
2020-08-30 20:16:40 +08:00
mindspore-ci-bot 9ac3e85045 !5515 vgg16 readme update
Merge pull request !5515 from caojian05/ms_r0.7_vgg_readme_update
2020-08-30 18:56:36 +08:00
gengdongjie f366faa275 warpctc and maskrcnn readme recitification 2020-08-30 16:24:33 +08:00
Jolin Zhang46 4b7c6208a3 update model zoon image_classification readme 2020-08-30 16:16:51 +08:00
mindspore-ci-bot e70d664119 !5493 Modify name of the package generated on x86.
Merge pull request !5493 from wangshaocong/r0.7_lite_pkg
2020-08-30 13:47:03 +08:00
mindspore-ci-bot 60f9577103 !5535 add image_classification in model zoo
Merge pull request !5535 from zhangbiao31/r0.7
2020-08-30 09:53:00 +08:00
mindspore-ci-bot 92a1c55941 !5537 Fix some tiny bugs for converter in windows enviroment and conv ops
Merge pull request !5537 from liuwenhao/r0.7
2020-08-30 07:19:54 +08:00
liuwenhao4 22e37013b9 Fix some tiny bugs for converter in windows enviroment and conv ops 2020-08-30 01:17:02 +08:00
mindspore-ci-bot fa97e24938 !5519 [MS][LITE][Develop]fix lite debug compile
Merge pull request !5519 from sunsuodong/fix_debug_compile_0.7
2020-08-30 00:12:15 +08:00
mindspore-ci-bot 7714669469 !5498 modify yolov3-resnet18 README
Merge pull request !5498 from chengxb7532/r0.7
2020-08-29 20:56:39 +08:00
mindspore-ci-bot 3f305b8afa !5511 modify README.md
Merge pull request !5511 from wukesong/wks_read0.7
2020-08-29 20:52:21 +08:00
mindspore-ci-bot 9fcf1e37b6 !5506 Add the REAMD.md in Wide&Deep model
Merge pull request !5506 from huangxinjing/wide-deep-multi-readme
2020-08-29 20:04:56 +08:00
zhangbiao31 7a30cd8784 !4 add image_classification
Merge pull request !4 from wangzhe/r0.7
2020-08-29 19:34:09 +08:00
wangzhe c223bb3949 add image_classification 2020-08-29 19:29:30 +08:00
mindspore-ci-bot 1621e02b68 !5523 update release info for br: r0.7
Merge pull request !5523 from guozhijian/update_release_notes_r0.7
2020-08-29 17:43:37 +08:00
wukesong 2fb88e8c46 modify readme 2020-08-29 17:27:10 +08:00
jonyguo 958ca51437 udpate release notes for r0.7 2020-08-29 17:22:10 +08:00
zhouyaqiang 61e8f10209 modify readme 2020-08-29 17:19:17 +08:00
sunsuodong 23e68ce790 fix_debug_compile 2020-08-29 17:03:41 +08:00
CaoJian 422a836cde vgg16 readme update 2020-08-29 16:50:45 +08:00
huangxinjing 330f18de5f Add README for multi-table of Wide&Deep 2020-08-29 16:16:16 +08:00
mindspore-ci-bot 0662256f23 !5470 bugfix on argparser for bool
Merge pull request !5470 from gengdongjie/r0.7
2020-08-29 16:10:55 +08:00
mindspore-ci-bot 3a6749ab78 !5464 update resnet50 readme
Merge pull request !5464 from VectorSL/r0.7
2020-08-29 15:54:46 +08:00
wsc b7a22e1398 Modify name of the package generated on x86. 2020-08-29 15:50:49 +08:00
chengxianbin 6c055b96d1 modiy yolov3-resnet18 net README.md 2020-08-29 15:34:27 +08:00
mindspore-ci-bot a9943a382c !5466 remove bool parameter parser in wide_and_deep
Merge pull request !5466 from yao_yf/remove_bool_paraser_in_wide_and_deep_r0.7
2020-08-29 14:42:51 +08:00
limingqi107 0775db0940 modify the format info of tensorAdd 2020-08-29 14:41:12 +08:00
wanghua 6674a88de4 add schema file for BERT and TinyBERT 2020-08-29 14:23:36 +08:00
mindspore-ci-bot 13dd07c34e !5459 modify transformer & fasterrcnn fix bool arguement parse bug
Merge pull request !5459 from yuchaojie/r0.7
2020-08-29 11:55:17 +08:00
mindspore-ci-bot 0178ecf785 !5440 lstm readme update
Merge pull request !5440 from caojian05/ms_r0.7_lstm_readme_update
2020-08-29 10:42:00 +08:00
gengdongjie b15a48a53b bugfix on argpasr for bool 2020-08-29 10:35:29 +08:00
wilfChen 50324d3dda lstm readme update 2020-08-29 10:19:02 +08:00
yao_yf 0c175b2cc0 no bool parameter parser in wide_and_deep 2020-08-29 09:51:41 +08:00
mindspore-ci-bot 947c396f09 !5392 Modify read me of deeplabv3 and inceptionv3
Merge pull request !5392 from zhouyaqiang0/r0.7
2020-08-29 09:44:22 +08:00
VectorSL 48d669636a update resnet readme 2020-08-29 09:42:13 +08:00
mindspore-ci-bot 7d38a1fb7e !5456 sync lite to r0.7
Merge pull request !5456 from 徐安越/r0.7
2020-08-29 09:12:59 +08:00
mindspore-ci-bot 565b542886 !5437 Modify for resnet readme
Merge pull request !5437 from qujianwei/r0.7
2020-08-29 09:11:23 +08:00
yuchaojie 32787a3453 modify transformer & fasterrcnn fix bool arguement parse bug 2020-08-29 09:02:55 +08:00
xuanyue 0ce8708dee sync lite to r0.7 2020-08-29 00:25:16 +08:00
mindspore-ci-bot b5393e6628 !5429 fix yolov3-resnet18&ssd bool argument parse bug and modify README
Merge pull request !5429 from chengxb7532/r0.7
2020-08-28 21:11:15 +08:00
chengxianbin 3da41e1091 modify yolov3-darknet53-quant README and fix bool arguement parse bug 2020-08-28 18:20:37 +08:00
qujianwei e0ac982589 modify for resnet readme 2020-08-28 17:28:48 +08:00
mindspore-ci-bot 52ed1ea725 !5404 update submodule akg to r0.7 branch
Merge pull request !5404 from looop5/akg_r0.7
2020-08-28 16:59:59 +08:00
mindspore-ci-bot 2ff8de674c !5421 Fix sparse slicer leak
Merge pull request !5421 from ZPaC/r0.7-fix-sparseslicer-leak
2020-08-28 16:09:32 +08:00
mindspore-ci-bot 13978a2c98 !5422 update release note for br:r0.7
Merge pull request !5422 from guozhijian/update_release_notes_r0.7
2020-08-28 16:02:19 +08:00
jonyguo b0f847072c update release notes for r0.7.0 2020-08-28 15:37:38 +08:00
ZPaC 1ac075919a Fix sparse-slicer leak. 2020-08-28 15:15:04 +08:00
mindspore-ci-bot 6c68e70cfe !5400 yolov3_darknet53 script suit for gpu
Merge pull request !5400 from hanhuifeng/yolov3_gpu_r0.7
2020-08-28 14:52:14 +08:00
mindspore-ci-bot 915d9a0d8f !5409 Fix bugs in c-api: rename, concat, take, sampler, duplicate column & Change docstring of OneHot
Merge pull request !5409 from luoyang/son_r0.7
2020-08-28 14:39:51 +08:00
mindspore-ci-bot 03b9cf3c12 !5380 fix formula grammar error in comment of SmoothL1Loss
Merge pull request !5380 from xiaotianci/fix_formula
2020-08-28 14:39:35 +08:00
mindspore-ci-bot 321e8d3e8f !5410 ReadMe file normalize
Merge pull request !5410 from chenfei_mindspore/r0.7
2020-08-28 14:34:22 +08:00
luoyang 4f98ecb433 Fix bugs in c-api: rename, concat, take, sampler, duplicate column &
Change docstring of OneHot
2020-08-28 14:22:56 +08:00
mindspore-ci-bot d24af4b181 !5396 fix googlenet performance
Merge pull request !5396 from panfengfeng/fix_googlenet_performance
2020-08-28 11:48:05 +08:00
chenfei 1eab0cd71b fix README 2020-08-28 11:26:59 +08:00
zhouyaqiang c317c4643b fix readme of inceptionv3 and deeplabv3 2020-08-28 11:26:09 +08:00
mindspore-ci-bot 0c7c3c3e8d !5393 change enable_data_sink value to true for transformer
Merge pull request !5393 from yuchaojie/r0.7
2020-08-28 11:13:09 +08:00
looop5 b0ce67fdd6 update submodule akg to r0.7 branch 2020-08-28 10:55:30 +08:00
mindspore-ci-bot 23fc178a5a !5363 Modify for resnet readme and fix bool type optional
Merge pull request !5363 from qujianwei/r0.7
2020-08-28 10:32:50 +08:00
hanhuifeng2020 1f0a760cdb yolov3_darknet53 suit for gpu 2020-08-28 10:31:21 +08:00
panfengfeng 3b8562aa3d fix googlenet performance 2020-08-28 09:50:12 +08:00
yuchaojie ed1340f1e5 set enable_data_sink value to true for transformer 2020-08-28 09:44:49 +08:00
mindspore-ci-bot 98528bbc16 !5370 fix laod input data error in pynative mode on gpu
Merge pull request !5370 from chujinjin/fix_load_input_data_error_in_pynative
2020-08-28 09:41:30 +08:00
mindspore-ci-bot 86cfa89c97 !5379 CutMixBatch and MixUpBatch fix for 2D one-hot labels for r0.7
Merge pull request !5379 from guozhijian/fix_cutmix_mixup_for_r0.7
2020-08-28 09:23:31 +08:00
mindspore-ci-bot beae07a9d1 !5364 gpu GoogleNet performance optimize
Merge pull request !5364 from VectorSL/slice-0.7
2020-08-28 09:14:06 +08:00
mindspore-ci-bot 4f9c6e77b7 !5273 Delete extra file.
Merge pull request !5273 from ZPaC/r0.7-delete-extra-file
2020-08-28 09:10:33 +08:00
ZPaC ac27f82607 Delete extra file 2020-08-27 22:41:58 +08:00
Xiao Tianci ed53f7f821 fix formula grammar error in comment of SmoothL1Loss 2020-08-27 22:06:54 +08:00
mahdi a5228c75c7 Fixed 2D one-hot label problems in CutMix and MixUp 2020-08-27 22:03:39 +08:00
mindspore-ci-bot 8533744d7c !5342 GPU FusedBatchNormEx add signature
Merge pull request !5342 from VectorSL/r0.7
2020-08-27 21:45:16 +08:00
mindspore-ci-bot f3444977f3 !5339 add type for get obj id & fix resource clear bug
Merge pull request !5339 from Simson/fix-r07
2020-08-27 21:39:14 +08:00
mindspore-ci-bot 581788f040 !5344 Delete __del__func in Profiler.
Merge pull request !5344 from yuximiao/r0.7
2020-08-27 21:11:35 +08:00
mindspore-ci-bot 3d511f80d5 !5357 gpu GoogleNet performance optimize
Merge pull request !5357 from limingqi107/r0.7
2020-08-27 21:05:28 +08:00
mindspore-ci-bot cb88a43a24 !5288 yolov3-resnet18 and ssd net README file normalize
Merge pull request !5288 from chengxb7532/r0.7
2020-08-27 20:55:07 +08:00
chujinjin 8948e55ae5 fix load input data error when input is a tuple 2020-08-27 20:43:34 +08:00
qujianwei 3adc06024c modify for resnet readme and fix bool type option 2020-08-27 20:39:52 +08:00
chengxianbin f3d87dd13e modiy ssd&yolov3-resnet18 net README.md 2020-08-27 20:13:19 +08:00
VectorSL c381430e71 slice support nhwc 2020-08-27 19:37:42 +08:00
mindspore-ci-bot e5780288e9 !5341 transfer tensor to tuple
Merge pull request !5341 from lijiaqi/new
2020-08-27 19:22:50 +08:00
mindspore-ci-bot d670bcb003 !5346 Fix bug of DepthwiseConv2d deleting arg 'group'
Merge pull request !5346 from chenfei_mindspore/r0.7
2020-08-27 19:21:47 +08:00
simson b00cdb2fe6 add type for get obj id & fix resource clear bug 2020-08-27 19:06:49 +08:00
limingqi107 3516447749 gpu GoogleNet performance optimize 2020-08-27 18:40:15 +08:00
李嘉琪 0e9815f63c modify 0.7 2020-08-27 17:54:34 +08:00
mindspore-ci-bot 2f189543b9 !5328 [bug]fix bugs when parameters updata r0.7
Merge pull request !5328 from vlne-v1/I1SP3I-return-value-not-the-exact-value-r0.7
2020-08-27 17:38:27 +08:00
mindspore-ci-bot c0a184ae8f !5291 serving RESTful: opt performance, short timeout
Merge pull request !5291 from 徐永飞/r0.7
2020-08-27 17:33:27 +08:00
mindspore-ci-bot ebfca60cdf !5176 fix data dump log
Merge pull request !5176 from jjfeing/r0.7_data_dump
2020-08-27 17:01:48 +08:00
mindspore-ci-bot f01613508f !5257 Fix no attribute'_graph_data' error in graphdata.py
Merge pull request !5257 from heleiwang/r0.7_fix_graphdata
2020-08-27 16:44:21 +08:00
chenfei c545422384 can't delete arg 'group' of DepthwiseConv2d 2020-08-27 16:26:47 +08:00
VectorSL db3f387f14 FusedBatchNormEx add signature 2020-08-27 16:12:22 +08:00
yuximiao 6005091a09 fix __del__ in Profiler 2020-08-27 16:05:59 +08:00
Wei Luning 051b019c96 fix bug in parameter init 2020-08-27 15:50:36 +08:00
mindspore-ci-bot ee6ab2980d !5318 Fix wrong error
Merge pull request !5318 from fanglei/r0.7
2020-08-27 15:00:04 +08:00
mindspore-ci-bot f9609e4ca7 !5252 add tinybert gpu readme
Merge pull request !5252 from hanhuifeng/tinybert_readme_r0.7
2020-08-27 14:43:35 +08:00
mindspore-ci-bot 3fdf9b72df !5306 [MS][LITE][Develop]fix arm32 compile
Merge pull request !5306 from sunsuodong/fix_arm32_0.7
2020-08-27 14:07:46 +08:00
mindspore-ci-bot d142a8d944 !5232 modify readme for deepfm in r0.7
Merge pull request !5232 from yangyongjie/r0.7
2020-08-27 12:59:58 +08:00
xuyongfei 78f88cde1b serving RESTful: opt for performance 2020-08-27 11:54:19 +08:00
sunsuodong 6385eafacf fix_arm32 2020-08-27 11:46:02 +08:00
leilei_snow 08c1d4bf44 fix index error 2020-08-27 03:25:05 +00:00
mindspore-ci-bot 5f3581aa69 !5303 Enlarge the threshold of resnet50 in pynative
Merge pull request !5303 from JoyLvliang/r0.7-enlarge-the-threshold-of-resnet50-in-pynative
2020-08-27 10:44:20 +08:00
mindspore-ci-bot e40fd0b4ed !5242 Fix remove internal output for unique device target
Merge pull request !5242 from YuJianfeng/r0.7
2020-08-27 10:43:33 +08:00
mindspore-ci-bot 620f5856ef !5293 Add test cases for uniform ops on GPU
Merge pull request !5293 from peixu_ren/r0.7temp
2020-08-27 10:42:56 +08:00
mindspore-ci-bot e3b0ae75ae !5286 [MD] minddata gpu add circular_memory to device_queue
Merge pull request !5286 from xiefangqi/xfq_add_circular_gpu_r0.7
2020-08-27 10:12:23 +08:00
mindspore-ci-bot a47ce883e9 !5245 fix assign memory error in pynative mode
Merge pull request !5245 from chujinjin/fix_runopassignoutputmemory_error
2020-08-27 09:48:40 +08:00
mindspore-ci-bot 3d2f761505 !5299 Add dtype check in uniform and normal distribution and fix docs
Merge pull request !5299 from XunDeng/branch_r0.7
2020-08-27 09:20:17 +08:00
mindspore-ci-bot 647053ed4d !5296 Fix some doc errors in pp distributions and bijectors
Merge pull request !5296 from peixu_ren/r0.7
2020-08-27 09:19:48 +08:00
mindspore-ci-bot 1c093f7db7 !5241 code refine for BN docs
Merge pull request !5241 from zyli2020/r0.7
2020-08-27 09:10:01 +08:00
mindspore-ci-bot ddf1b25f9b !5259 Fix some comments of api
Merge pull request !5259 from zhangxinfeng3/r0.7
2020-08-27 09:03:52 +08:00
lvliang 6f84bc57f4 enlarge-the-threshold-of-resnet50-in-pynative 2020-08-27 09:01:06 +08:00
Xun Deng 8ab9903125 add parameter type check in normal and uniform distribution 2020-08-26 17:46:10 -04:00
peixu_ren f8dde0c3c2 Fix some doc errors in pp distributions and bijectors 2020-08-26 15:03:46 -04:00
peixu_ren 85e67f193c Add test cases for uniform ops on GPU 2020-08-26 14:01:30 -04:00
mindspore-ci-bot be2a98e7c8 !5284 Fixes a bug in RandomSharpness cpp op for br:r0.7
Merge pull request !5284 from guozhijian/fix_sharpness_error
2020-08-27 01:46:03 +08:00
mindspore-ci-bot d506630185 !5261 update api for optional - br:r0.7
Merge pull request !5261 from guozhijian/fix_api_comment_0826_r0.7
2020-08-26 22:40:53 +08:00
xiefangqi 6d0ae1794f add circular_memory function to gpu 2020-08-26 21:49:21 +08:00
avakh 6300058cb6 fixing a bug in random sharpness 2020-08-26 21:36:52 +08:00
mindspore-ci-bot 71dd8a4a71 !5233 GPU add log in LoadInputData
Merge pull request !5233 from VectorSL/r0.7
2020-08-26 21:35:17 +08:00
mindspore-ci-bot 57e131a136 !5276 Fix the problem of resource clear v2 in r0.7
Merge pull request !5276 from Simson/fix-r07
2020-08-26 21:34:01 +08:00
mindspore-ci-bot 81d67f6828 !5266 Revert "Avoid copy when create Tensor from numpy array"
Merge pull request !5266 from hewei/r0.7_revert_no_copy_tensor
2020-08-26 21:16:16 +08:00
simson 7c406fb3a0 fix risk of memory leak 2020-08-26 19:55:25 +08:00
mindspore-ci-bot c904bc2f00 !5251 Combine sparse embedding gradient
Merge pull request !5251 from chengang/combine_grad_r0.7
2020-08-26 19:29:10 +08:00
mindspore-ci-bot adeeda2fe1 !5236 transfer tensor to tuple
Merge pull request !5236 from lijiaqi/transfer_tensor_to_tuple
2020-08-26 19:27:27 +08:00
yangyongjie fcc5f77a77 modify readme for deepfm 2020-08-26 19:25:29 +08:00
mindspore-ci-bot bed0f5d3c3 !5249 Put elimminate AllReduce const pass back
Merge pull request !5249 from BowenK/r0.7
2020-08-26 19:22:47 +08:00
He Wei eca64ab8e9 Revert "Avoid copy when create Tensor from numpy array"
This reverts commit a6690168a8.
2020-08-26 19:09:54 +08:00
mindspore-ci-bot 84989b0cbf !5218 SoftDvppDecodeRandomCropResizeJpeg do not support resize iimage to odd resolution
Merge pull request !5218 from qianlong21st/r0.7_softdvpp_crop
2020-08-26 18:08:27 +08:00
jonyguo f45e2c921f fix: update api comment for optional 2020-08-26 17:55:51 +08:00
zhangxinfeng3 758269d049 modify some comments of api 2020-08-26 17:37:25 +08:00
heleiwang 814fc1d03e fix graphdata error 2020-08-26 17:29:49 +08:00
mindspore-ci-bot b9dff22d6c !5104 fix multiple epoch data issue for gpu profiler
Merge pull request !5104 from 治愈系潇洒哥/r0.7
2020-08-26 17:24:02 +08:00
mindspore-ci-bot 54e615e904 !5207 fix bug for yolov3-resnet18 scripts
Merge pull request !5207 from chengxb7532/r0.7
2020-08-26 17:00:24 +08:00
yujianfeng 499d81e573 Fix remove internal output for unique device target 2020-08-26 16:55:06 +08:00
cristoval 817bfed1ec combine sparse embedding gradient 2020-08-26 16:43:07 +08:00
hanhuifeng2020 a533147f52 add tinybert gpu readme 2020-08-26 16:41:28 +08:00
BowenK fcc0e263f3 Revert "Revert "Eliminate AllReduce when the input is a constant""
This reverts commit 7a7e499475.
2020-08-26 16:38:02 +08:00
mindspore-ci-bot 46de719a12 !5209 Modify the name of parameters in uniform
Merge pull request !5209 from peixu_ren/r0.7temp
2020-08-26 16:29:07 +08:00
chujinjin 7b6dd0b84d fix runopassignoutputmemory error 2020-08-26 16:08:17 +08:00
lizhenyu 8438221259 code refine for BN docs 2020-08-26 16:04:04 +08:00
李嘉琪 2f5a454ef4 transfer_tensor_to_tuple 2020-08-26 15:57:17 +08:00
qianlong 6b6409d910 DecodeAndCropAndResizeJepg do not support odd size 2020-08-26 15:32:18 +08:00
VectorSL ed9c63469b GPU add log in loadinputdata when tensor input != graph input 2020-08-26 15:27:34 +08:00
mindspore-ci-bot 049acf6d58 !5173 GPU fix getinputformat error
Merge pull request !5173 from VectorSL/r0.7
2020-08-26 15:19:31 +08:00
mindspore-ci-bot 55d997ec11 !5052 update readme for docker gpu environment deployment
Merge pull request !5052 from HW_KK/r0.7
2020-08-26 14:45:30 +08:00
mindspore-ci-bot ee7d9bf4ac !5213 Quick fix scalar add flaws in MindSpore
Merge pull request !5213 from peixu_ren/r0.7
2020-08-26 14:10:40 +08:00
mindspore-ci-bot 6b706529c5 !5200 fix SE-Resnet50 infer to use 24 epoch and add SE-Resnet50 readme description
Merge pull request !5200 from qujianwei/r0.7
2020-08-26 12:33:38 +08:00
VectorSL 7884176df7 fix getinputformat error when input is not a realnode 2020-08-26 11:51:09 +08:00
mindspore-ci-bot bedfa8578b !5154 serving RESTful, disable http port reuse, update error msg output to user
Merge pull request !5154 from 徐永飞/r0.7
2020-08-26 11:44:02 +08:00
peixu_ren d522d7ba46 Quick fix scalar add flaws in MindSpore 2020-08-25 23:20:45 -04:00
peixu_ren e701fbfa2e Modify the name of parameters in uniform 2020-08-25 23:00:17 -04:00
mindspore-ci-bot 7090e16df8 !5134 stream parallel support ctrl stream
Merge pull request !5134 from gukecai/stream-r0.7
2020-08-26 10:59:42 +08:00
mindspore-ci-bot 382f9a8ebb !5195 Fix CheckTensor in pynative mode
Merge pull request !5195 from peixu_ren/r0.7
2020-08-26 10:49:48 +08:00
mindspore-ci-bot fd8ad73689 !5194 fix: padded dataset when no div and with repeat op for br:r0.7
Merge pull request !5194 from guozhijian/fix_padded_with_no_div_repeat_r0.7
2020-08-26 10:47:37 +08:00
mindspore-ci-bot 7bdb90a40b !5158 fix softdvpp coredump
Merge pull request !5158 from qianlong21st/r0.7_fix_softdvpp_core
2020-08-26 10:41:39 +08:00
mindspore-ci-bot 185b25c6de !5179 add create_dict_iterator parameter num_epochs api
Merge pull request !5179 from anzhengqi/add-num_epochs-api-r0.7
2020-08-26 10:24:08 +08:00
chengxianbin 115a85114f clear the warning of execute run_distribute_train.sh 2020-08-26 10:19:26 +08:00
mindspore-ci-bot 0bf7f1f39c !5152 change group conv dtype in gpu resnext50
Merge pull request !5152 from zhaoting/r0.7
2020-08-26 10:08:22 +08:00
qujianwei d2588f1935 fix SE-ResNet50 infer to 24-epoch and add ReadMe description 2020-08-26 09:30:31 +08:00
mindspore-ci-bot 6017521219 !5133 modify readme.md for yolov3_darknet53 in r0.7
Merge pull request !5133 from yangyongjie/r0.7
2020-08-26 09:27:25 +08:00
xuyongfei c0389eaea4 serving RESTful, disable http port reuse, update error msg output to user 2020-08-26 09:16:14 +08:00
Xun Deng 6bb61615fd fix checktensor in pynative mode 2020-08-25 21:10:50 -04:00
qianlong bc8aec007f fix softdvpp coredump 2020-08-26 09:07:35 +08:00
jonyguo d262c63214 fix: padded dataset with non div & repeat 2020-08-26 08:58:24 +08:00
mindspore-ci-bot 9e20e17590 !5191 Add note of limitation for prarmeters of uniform
Merge pull request !5191 from peixu_ren/r0.7temp
2020-08-26 08:51:56 +08:00
mindspore-ci-bot 07103b98a9 !5101 Add erf and erfc as generic functions for all the backend
Merge pull request !5101 from peixu_ren/r0.7
2020-08-26 08:36:24 +08:00
peixu_ren 13584ebc22 Add note of limitation for prarmeters of uniform 2020-08-25 16:01:32 -04:00
peixu_ren 5ce4bcf416 Add erf and erfc as generic functions for all the backend and fix notation in power_transform. 2020-08-25 10:12:29 -04:00
mindspore-ci-bot 92787df680 !5159 add cuda path checker
Merge pull request !5159 from zyli2020/r0.7
2020-08-25 21:46:13 +08:00
mindspore-ci-bot 8afbba7936 !5177 Revert "!5121 Fix the problem of resource clear in r0.7"
Merge pull request !5177 from Simson/fix-r07
2020-08-25 21:43:16 +08:00
anzhengqi 3c4feaa4f4 add num_epochs api 2020-08-25 21:15:35 +08:00
simson 556f79d185 Revert "!5121 Fix the problem of resource clear in r0.7"
This reverts commit a0a6463210, reversing
changes made to 0bcd75bd2b.
2020-08-25 21:03:27 +08:00
mindspore-ci-bot 1a42811748 !5144 Rectification of API comments of r0.7
Merge pull request !5144 from byweng/r0.7
2020-08-25 20:41:22 +08:00
mindspore-ci-bot cee889e426 !5126 Fix problem in RandomPosterize & CutMixBatch
Merge pull request !5126 from luoyang/son_r0.7
2020-08-25 20:33:05 +08:00
jjfeing 3af2059342 fix data dump log info 2020-08-25 20:32:44 +08:00
mindspore-ci-bot a0a6463210 !5121 Fix the problem of resource clear in r0.7
Merge pull request !5121 from Simson/push-to-r07
2020-08-25 18:48:27 +08:00
lizhenyu 1d8e5a27b7 add cuda path checker 2020-08-25 18:39:35 +08:00
mindspore-ci-bot 0bcd75bd2b !5119 Avoid copy when create Tensor from numpy array
Merge pull request !5119 from hewei/r0.7_avoid_copy_tensor_numpy
2020-08-25 17:51:18 +08:00
zhaoting 576c35d408 change group conv dtype in gpu resnext50 2020-08-25 17:38:10 +08:00
mindspore-ci-bot e05a5c8002 !5115 modify bert and tinybert scripts and README
Merge pull request !5115 from wanghua/r0.7
2020-08-25 17:18:07 +08:00
bingyaweng 5b941df087 rectification of API comments 2020-08-25 16:54:14 +08:00
mindspore-ci-bot b77c7d2729 !5099 modify wrong characters in r07
Merge pull request !5099 from caozhou/modify_r07
2020-08-25 16:22:23 +08:00
mindspore-ci-bot ac0b72e67f !5089 Fix MASS and FasterRcnn CI Problem.
Merge pull request !5089 from linqingke/r0.7
2020-08-25 16:14:58 +08:00
gukecai 4ace444f54 parallel ctrl 2020-08-25 16:12:34 +08:00
yangyongjie 32e7eb9148 modify readme for yolov3_darknet53 2020-08-25 16:04:50 +08:00
mindspore-ci-bot c481d45996 !5072 model_zoo README.md format change for googlenet
Merge pull request !5072 from caojian05/ms_r0.7_googlenet_readme_update
2020-08-25 16:00:20 +08:00
wanghua cb893dfca9 modify BERT and TinyBERT README 2020-08-25 15:58:37 +08:00
mindspore-ci-bot 6b55dc802d !5116 Modify readme of deeplabv3 and inceptionv3
Merge pull request !5116 from zhouyaqiang0/r0.7
2020-08-25 15:57:19 +08:00
zhouyaqiang 41de14bacd modify readme of deeplabv3 and inceptionv3 2020-08-25 15:49:36 +08:00
mindspore-ci-bot ee37dc52fa !5054 fix shape and data size
Merge pull request !5054 from hexia/shape_wrong_r0.7
2020-08-25 15:14:27 +08:00
luoyang a75ac9c445 Add type check for RandomPosterize & Add Float tensor support for CutMixBatch 2020-08-25 15:10:24 +08:00
askmiao 1804ea246d fix multiple epoch data issue 2020-08-25 15:08:42 +08:00
mindspore-ci-bot 56350e71f0 !5080 modify sgd and momentum and withgradcell comments
Merge pull request !5080 from lijiaqi/sgd
2020-08-25 14:59:31 +08:00
mindspore-ci-bot 761cd8393e !5107 raise RuntimeError when set different mode after Initializer created
Merge pull request !5107 from yihuaijie/r0.7
2020-08-25 14:51:59 +08:00
simson 90c004078e fix risk of memory leak 2020-08-25 14:48:31 +08:00
mindspore-ci-bot e9f2aae73d !5083 [bugfix]LSTM SyncDeviceToHost failed
Merge pull request !5083 from zyli2020/r0.7
2020-08-25 14:47:44 +08:00
He Wei a6690168a8 Avoid copy when create Tensor from numpy array 2020-08-25 14:38:34 +08:00
hexia 4eb1706307 shape_wrong_r0.7 2020-08-25 10:45:13 +08:00
Yi Huaijie 524cf0ed9a raise RuntimeError when set different mode after Initializer created 2020-08-25 10:14:52 +08:00
mindspore-ci-bot d8d7cebc8a !5090 Update fix custom exp/log ops cast logic to r0.7
Merge pull request !5090 from zichun_ye/r0.7_fix_custom_ops
2020-08-25 09:41:54 +08:00
linqingke 9f5a7939d4 fix fasterrcnn and mass bug. 2020-08-25 09:32:30 +08:00
caozhou 83557cd28d modify wrong characters in r07 2020-08-25 09:10:10 +08:00
Zichun Ye d29bd6862a modify custom_ops to pass pynative mode
update dtype for device

delete used funcs
2020-08-24 11:17:38 -04:00
mindspore-ci-bot 1b71d50953 !5068 resolve fronted layout
Merge pull request !5068 from caozhou/resolve_fronted_display_r07
2020-08-24 21:23:41 +08:00
lizhenyu 68b37ee1b3 [bugfix]LSTM SyncDeviceToHost failed 2020-08-24 21:22:53 +08:00
李嘉琪 0238ba4d14 modify comments7 2020-08-24 20:51:51 +08:00
mindspore-ci-bot 891228bcbe !5062 GPU opt insert transpose pass
Merge pull request !5062 from VectorSL/r0.7
2020-08-24 20:26:09 +08:00
CaoJian 1f9c3bb044 model_zoo README.md format change for googlenet 2020-08-24 19:24:49 +08:00
caozhou 8287445f95 resolve frontend layput 2020-08-24 18:53:47 +08:00
mindspore-ci-bot 10c6fb5612 !5023 Update lastest fix of bernoulii probs to r0.7
Merge pull request !5023 from zichun_ye/r0.7_bernoulli_fix
2020-08-24 17:37:09 +08:00
mindspore-ci-bot fae225460c !5034 Add readme and fix some comments of api
Merge pull request !5034 from zhangxinfeng3/r0.7
2020-08-24 17:36:40 +08:00
VectorSL 54bb6ba58c gpu optimize transpose 2020-08-24 17:34:04 +08:00
wuweikang cec6206bc4 update readme for docker gpu environment deployment 2020-08-24 16:56:27 +08:00
mindspore-ci-bot e69a91b6b5 !5018 maskrcnn adopt smoothl1loss change
Merge pull request !5018 from gengdongjie/master
2020-08-24 15:21:12 +08:00
zhangxinfeng3 2869e5ace4 update some comments of api 2020-08-24 14:32:53 +08:00
mindspore-ci-bot 09cf1c1a54 !5014 add libevent copyright r0.7
Merge pull request !5014 from hexia/from_r0.7
2020-08-24 14:12:20 +08:00
Zichun Ye 04b5b8c737 fix bernoulli prob formula; fix some other minor bugs
update threshold of softplus computation

support fp for bernoulli and geometric distribution
2020-08-23 23:30:32 -04:00
mindspore-ci-bot 8d0b52fb13 !5008 fix bug of EraseAssign
Merge pull request !5008 from wenchunjiang/adapte_to_resnet_second_optimize
2020-08-24 11:11:53 +08:00
hexia 2027f73eaf add libevent copyright 2020-08-24 10:54:20 +08:00
mindspore-ci-bot befc209480 !4805 [MS][LITE]add nnacl readme
Merge pull request !4805 from zhaizhiqiang/master
2020-08-23 18:32:18 +08:00
mindspore-ci-bot c5279ecf35 !5001 [MS][LITE][Develop]fix fp16 kernel register
Merge pull request !5001 from sunsuodong/fixfp16
2020-08-23 17:11:42 +08:00
mindspore-ci-bot d2de60f1d5 !4991 [MS][LITE] arm cpu fp32 op: add common functions and slidewindow for conv depthwise
Merge pull request !4991 from yangruoqi713/conv_dw_lite
2020-08-23 17:08:04 +08:00
mindspore-ci-bot dc13718ce2 !4974 [MS][LITE][Develop]fp16 conv1x1 bug
Merge pull request !4974 from ling/sr
2020-08-23 17:06:41 +08:00
mindspore-ci-bot 7dbe9f7067 !4986 optimize prule
Merge pull request !4986 from fuzhiye/tmp
2020-08-23 17:05:41 +08:00
fuzhiye 2c9daf0f14 optimize prule 2020-08-23 16:57:16 +08:00
sunsuodong bcd97d9751 fix fp16 2020-08-23 16:49:02 +08:00
mindspore-ci-bot 7b8229d644 !4913 transform bn to scale
Merge pull request !4913 from zhengjun10/master
2020-08-23 16:37:07 +08:00
mindspore-ci-bot e3c053c4ff !4961 Optimize the performance of BatchNorm and FusedBatchNorm, add Fp16 kernel
Merge pull request !4961 from sunsuodong/batch_norm_fp16
2020-08-23 16:36:33 +08:00
yangruoqi713 b4551670a9 [MS][LITE] arm cpu fp32 op: conv depthwise 2020-08-23 15:56:50 +08:00
sunsuodong 9734f2a88e batch_norm_fp16 2020-08-23 15:44:53 +08:00
zhengjun10 204ab11572 add bn convert scale pass 2020-08-23 14:48:33 +08:00
mindspore-ci-bot 80d570f003 !4999 change long to int64
Merge pull request !4999 from yeyunpeng2020/primitive
2020-08-23 14:16:51 +08:00
yeyunpeng 6b46acb39e change long to int64 2020-08-23 12:14:37 +08:00
mindspore-ci-bot dde257592b !4870 add UnPack method in ops & remove anf_importer populater
Merge pull request !4870 from hangq/primitive
2020-08-23 10:57:32 +08:00
hangq 28e3508718 add UnPack method in ops & remove anf_importer populater 2020-08-22 22:29:13 +08:00
mindspore-ci-bot 07a75658bf !4995 [MS][LITE][Develop]stack support int32
Merge pull request !4995 from chenjianping/lite_dev2
2020-08-22 18:44:51 +08:00
ling 0fac817a2d [MS][LITE][Develop]Fp16 conv1x1 bug 2020-08-22 18:30:16 +08:00
chenjianping babff262e3 stack support int32 2020-08-22 18:17:56 +08:00
wenchunjiang a221ee176b fix EraseAssign bug 2020-08-22 17:54:00 +08:00
mindspore-ci-bot 6d0bbb36a3 !4908 Modify the '-fvisibility' attribute during building of mindspore-lite.
Merge pull request !4908 from wangshaocong/lite_clean
2020-08-22 17:53:05 +08:00
mindspore-ci-bot 8219df7337 !4975 Fix bug and add tflite models
Merge pull request !4975 from jianghui/master
2020-08-22 17:41:36 +08:00
wsc 758130924f Modify the '-fvisibility' attribute of mindspore-lite project. 2020-08-22 17:34:07 +08:00
mindspore-ci-bot 33c7b49219 !4987 [bugfix]SyncDeviceToHost failed when device address size is zero
Merge pull request !4987 from zyli2020/bug_fix
2020-08-22 17:10:21 +08:00
mindspore-ci-bot 0f362bb158 !4980 [MS][LITE][Develop]benchmark mem check fixed
Merge pull request !4980 from wangchangkai/master
2020-08-22 17:08:35 +08:00
mindspore-ci-bot faa0fed8bb !4989 add the hiai_cpu_face_attr caffe gate
Merge pull request !4989 from zhaodezan/master
2020-08-22 17:07:40 +08:00
mindspore-ci-bot 150b987898 !4989 add the hiai_cpu_face_attr caffe gate
Merge pull request !4989 from zhaodezan/master
2020-08-22 17:07:37 +08:00
mindspore-ci-bot 50877b586d !4971 [MS][LITE][Develop]move nnacl to lite/
Merge pull request !4971 from chenjianping/lite_dev2
2020-08-22 17:02:22 +08:00
mindspore-ci-bot 75fce54208 !4790 Fix a Pynative bug.
Merge pull request !4790 from jxlang910/push-to-opensource
2020-08-22 16:45:00 +08:00
chenjianping d88a98658c move nnacl to lite/ 2020-08-22 16:43:15 +08:00
mindspore-ci-bot add52da73e !4973 Fix errors in exp calculation logics
Merge pull request !4973 from peixu_ren/custom_pp_ops
2020-08-22 16:14:00 +08:00
mindspore-ci-bot d2641bbf79 !4969 delete group parameter from nn.DepthwiseConv2d
Merge pull request !4969 from chenzhongming/lite
2020-08-22 16:13:09 +08:00
zhaodezan 8339d5dae6 add the hiai_cpu_face_attr caffe gate 2020-08-22 04:08:36 -04:00
mindspore-ci-bot 94a109f476 !4898 Fix coredump caused by function call depth too large
Merge pull request !4898 from fary86/fix_call_depth_too_large
2020-08-22 16:08:27 +08:00
mindspore-ci-bot 31a04ea1fe !4977 correct benchmark help info
Merge pull request !4977 from zhaozhenlong/lite/tool/benchmark_modify_help_info
2020-08-22 15:50:30 +08:00
jin-xiulang c246b177a6 Debug for Pynative mode.
Debug test.

pynative debug
2020-08-22 15:47:36 +08:00
kai00 a9771d63e1 benchmark mem check fixed 2020-08-22 15:39:53 +08:00
mindspore-ci-bot aedd6de6d5 !4927 fix bug for identity
Merge pull request !4927 from flywind/pynative_identity
2020-08-22 15:37:21 +08:00
lizhenyu 1becddf3a4 [bugfix]SyncDeviceToHost failed when device address size is zero 2020-08-22 15:37:08 +08:00
jianghui58 1d601b6924 fix bug and add two tflite models
set quant_type default value
2020-08-22 15:12:48 +08:00
fary86 04524b6bd3 Fix coredump caused by function call depth too large 2020-08-22 15:00:04 +08:00
mindspore-ci-bot 7098b5c5d5 !4979 fix the segment when the input data is null
Merge pull request !4979 from zhaodezan/master
2020-08-22 14:59:33 +08:00
mindspore-ci-bot d3733b3b04 !4942 Revert AllReduce const elimination
Merge pull request !4942 from BowenK/master
2020-08-22 14:49:52 +08:00
mindspore-ci-bot 90552c4933 !4861 [MS][LITE][Develop]add conv per channel support for int8
Merge pull request !4861 from lixian/master
2020-08-22 14:49:38 +08:00
mindspore-ci-bot 8e360888d0 !4590 fix gpu matmul fp32 accuracy
Merge pull request !4590 from qujianwei/master
2020-08-22 14:46:45 +08:00
mindspore-ci-bot aefca7b782 !4968 Fix some errors in API about ops and validator of input.
Merge pull request !4968 from liuxiao93/fix-some-bug
2020-08-22 14:44:02 +08:00
mindspore-ci-bot 1556450445 !4972 fix post training quant
Merge pull request !4972 from xutianchun/quant_0822
2020-08-22 14:43:33 +08:00
kpy 4fa89408a1 pynative add identity primitive and add comment for set_grad 2020-08-22 14:36:20 +08:00
zhaozhenlong 488c991eba update benchmark help info
time_profile add fp16 option
2020-08-22 14:32:26 +08:00
mindspore-ci-bot e2203bed01 !3957 Gpu StridedSlice dims exceeds
Merge pull request !3957 from chenweifeng/strided_slice_dims_exceeds
2020-08-22 14:28:39 +08:00
mindspore-ci-bot b52229379d !4931 add script
Merge pull request !4931 from wukesong/add_shell
2020-08-22 14:26:34 +08:00
mindspore-ci-bot d184066b77 !4967 Resnext50 readme normalize
Merge pull request !4967 from zhaoting/readme
2020-08-22 14:25:41 +08:00
mindspore-ci-bot b23b957228 !4964 memory ascend while multi model.eval
Merge pull request !4964 from anzhengqi/I1QWT0-memory-ascend
2020-08-22 14:21:55 +08:00
mindspore-ci-bot 38c366306c !4937 vgg16: modify readme format and replace callback
Merge pull request !4937 from ms_yan/vgg_format
2020-08-22 14:21:07 +08:00
zhaodezan fed8f406ac fix segment when the input data is null 2020-08-22 02:17:08 -04:00
zhaoting a5c16fc4ac resnext50 readme normalize 2020-08-22 11:58:04 +08:00
mindspore-ci-bot 0c60f7e6ac !4965 fix googlenet deepfm
Merge pull request !4965 from panfengfeng/fix_googlenet_deepfm
2020-08-22 11:57:51 +08:00
mindspore-ci-bot 6a5c517d7b !4943 process the bn with phase in prototxt
Merge pull request !4943 from zhaodezan/master
2020-08-22 11:52:03 +08:00
mindspore-ci-bot 387f4d445f !4938 auto umap buffer for opencl and clean code
Merge pull request !4938 from liuchao/lite-master
2020-08-22 11:49:34 +08:00
peixu_ren 03dac9b621 Fix errors in exp calculation logics 2020-08-21 23:45:40 -04:00
mindspore-ci-bot 2b78032605 !4910 Add Tflite Models
Merge pull request !4910 from jianghui/master
2020-08-22 11:37:02 +08:00
mindspore-ci-bot 42a092d687 !4915 [MS][LITE][Develop]mem check fixed
Merge pull request !4915 from wangchangkai/master
2020-08-22 11:33:55 +08:00
mindspore-ci-bot fd9be2ddc2 !4946 Get server rank id in python and fix multi server error.
Merge pull request !4946 from ZPaC/master-get-server-rank-in-python
2020-08-22 11:23:02 +08:00
mindspore-ci-bot 115c0cbf72 !4923 fix dts bug
Merge pull request !4923 from cjh9368/dts-0718
2020-08-22 11:22:14 +08:00
ms_yan 0752c566b1 modify format in vgg16 2020-08-22 11:21:44 +08:00
xutianchun 1ae9f81c82 Fix Post Quant 2020-08-22 11:17:22 +08:00
mindspore-ci-bot fe11760834 !4944 Add tflite and caffe models
Merge pull request !4944 from mengchunyang/master
2020-08-22 11:16:48 +08:00
Corleone 4b60297832 auto unmap buffer for opencl and clean code 2020-08-22 11:11:57 +08:00
chenzomi 8337ae710e delete group parameter from nn.DepthwiseConv2d 2020-08-22 11:11:17 +08:00
zhaizhiqiang 9d98246dae readme for nnacl 2020-08-22 11:08:20 +08:00
jianghui58 defd9a784e add eight tflite models 2020-08-22 11:06:07 +08:00
kai00 15dff1c4a4 check mem fixing 2020-08-22 11:05:36 +08:00
mindspore-ci-bot 026bbc46ff !4941 [MS][LITE][Develop]rename caffeprelu to prelu
Merge pull request !4941 from chenjianping/lite_dev2
2020-08-22 10:57:55 +08:00
mindspore-ci-bot ac81886328 !4916 fix generator_dataset hangs and test_graphdata_distributed.py failing randomly
Merge pull request !4916 from heleiwang/gnn_fix_bug
2020-08-22 10:51:01 +08:00
liuxiao93 cfe8859499 fix some bug in API and validator of input. 2020-08-22 10:49:33 +08:00
panfengfeng 30b69d3488 fix googlenet & deepfm 2020-08-22 10:49:02 +08:00
mindspore-ci-bot 77198f3182 !4914 Modify the problem list
Merge pull request !4914 from shenwei41/sw_master
2020-08-22 10:44:43 +08:00
ZPaC 830172201a Fix multi server precision error. 2020-08-22 10:42:35 +08:00
mindspore-ci-bot b366608a3f !4952 Fix errors in log calculation logics
Merge pull request !4952 from peixu_ren/custom_pp_ops
2020-08-22 10:40:40 +08:00
lixian 1ffb095037 add conv per channel support for int8 kernel 2020-08-22 10:35:58 +08:00
anzhengqi dd942e1807 fix memory ascend while multi model.eval 2020-08-22 10:34:52 +08:00
mindspore-ci-bot 9b503e4f38 !4955 Fixes for Dynamic Augmentation Ops
Merge pull request !4955 from MahdiRahmaniHanzaki/dynamic-ops-fix
2020-08-22 10:27:50 +08:00
mindspore-ci-bot 528fb81093 !4959 delete #include "src/ops/primitive_c.h" at model.h
Merge pull request !4959 from yeyunpeng2020/mindspore
2020-08-22 10:19:35 +08:00
yeyunpeng f4eee11dfa delete #include "src/ops/primitive_c.h" at model.h 2020-08-22 10:04:11 +08:00
mindspore-ci-bot e06dfaa80d !4854 fix bug
Merge pull request !4854 from 徐安越/master
2020-08-22 09:37:57 +08:00
mindspore-ci-bot 9f19076788 !4956 Fix CheckTuple in pynative mode
Merge pull request !4956 from XunDeng/pp_issue_branch
2020-08-22 09:37:47 +08:00
mindspore-ci-bot ab45bec828 !4924 Modify API comments and fix error of st
Merge pull request !4924 from byweng/fix_param_check
2020-08-22 09:37:36 +08:00
peixu_ren 1c8eb9b15d Fix errors in log calculation logics 2020-08-21 21:28:10 -04:00
shenwei41 76518f7f13 Modify the problem list 2020-08-22 09:21:11 +08:00
mindspore-ci-bot 8ee136db18 !4953 C++ API: Minor fixes for dataset parameters
Merge pull request !4953 from cathwong/ckw_c_api_fixes1
2020-08-22 09:14:08 +08:00
meng_chunyang 631aa8cf46 add tflite and caffe models 2020-08-22 09:10:08 +08:00
Xun Deng dc11fa9f53 Fixed CheckTuple issues and error message 2020-08-21 18:28:56 -04:00
Mahdi a5f9b8f92e Added fix for MixUpBatch and CutMixBatch and for RandomAffine
updated c color op descriptions
2020-08-21 17:40:08 -04:00
Cathy Wong 93810a0dc8 C++ API: Minor fixes for dataset parameters 2020-08-21 17:35:39 -04:00
mindspore-ci-bot 04decda0c5 !4951 Fix missing symbol, build failure issue with -B on
Merge pull request !4951 from HarshvardhanGupta/multi_fix
2020-08-22 04:49:43 +08:00
Harshvardhan Gupta ac457f3163 fix build failures and wp tensor search condition 2020-08-21 16:18:51 -04:00
mindspore-ci-bot 9d7250c483 !4776 Introduce 2 extra ctrl flags to DataBuffer in dataset, address remaining cmts to PR4632
Merge pull request !4776 from ZiruiWu/map_callback_follow_up
2020-08-22 03:31:31 +08:00
Zirui Wu 74c1e6da60 introducing pause and quit flags to DataBuffer
fix review cmts

fix ci

fix Ci

fixci

address ci

ci

- add timeout
- add more test cases

fix CI

address review cmts
2020-08-21 14:33:26 -04:00
mindspore-ci-bot 3eef4a4e06 !4948 Fix backend compilation error when -g is off
Merge pull request !4948 from JesseKLee/glog_off
2020-08-21 23:54:31 +08:00
chenjianping 5a83d3a7df rename prelu to leakyrelu,rename caffeprelu to prelu 2020-08-21 22:15:11 +08:00
Jesse Lee ebd4cc5c0a Fix glog off compilation bug 2020-08-21 09:37:22 -04:00
wukesong 9784a58da2 add script shell 2020-08-21 21:13:00 +08:00
zhaodezan ad883e4384 process bn with phase 2020-08-21 09:09:41 -04:00
BowenK 7a7e499475 Revert "Eliminate AllReduce when the input is a constant"
This reverts commit f3a9fbdd78.
2020-08-21 20:17:36 +08:00
xuanyue bbedc02700 fix bug 2020-08-21 19:24:52 +08:00
bingyaweng 3422f60d50 modify comments of API 2020-08-21 17:26:11 +08:00
cjh9368 70001a71ea fix dts bug 2020-08-21 16:47:00 +08:00
wilfChen 837aecf9af gpu stridedslice 2020-08-21 16:21:27 +08:00
heleiwang 4870abc848 1. fix generator_dataset hangs
2. fix test_graphdata_distributed.py failing randomly
2020-08-21 16:03:06 +08:00
qujianwei c21ffc0317 fix gpu matmul fp32 accuracy 2020-08-18 19:56:33 +08:00
1643 changed files with 42748 additions and 20820 deletions

View File

@ -66,10 +66,12 @@ MindSpore offers build options across multiple backends:
| Hardware Platform | Operating System | Status |
| :---------------- | :--------------- | :----- |
| Ascend910 | Ubuntu-x86 | ✔️ |
| | Ubuntu-aarch64 | ✔️ |
| | EulerOS-x86 | ✔️ |
| | EulerOS-aarch64 | ✔️ |
| GPU CUDA 10.1 | Ubuntu-x86 | ✔️ |
| CPU | Ubuntu-x86 | ✔️ |
| | Ubuntu-aarch64 | ✔️ |
| | Windows-x86 | ✔️ |
For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an example:
@ -149,7 +151,23 @@ currently the containerized build options are supported as follows:
sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit nvidia-docker2
sudo systemctl restart docker
```
Then edit the file daemon.json:
```
$ vim /etc/docker/daemon.json
{
"runtimes": {
"nvidia": {
"path": "nvidia-container-runtime",
"runtimeArgs": []
}
}
}
```
Restart docker again:
```
sudo systemctl daemon-reload
sudo systemctl restart docker
```
Then you can pull and run the latest stable image using the below command:
```
docker pull mindspore/mindspore-gpu:0.7.0-beta

View File

@ -1,4 +1,4 @@
![MindSpore标志](docs/MindSpore-logo.png "MindSpore logo")
![MindSpore标志](docs/MindSpore-logo.png "MindSpore logo")
============================================================
[View English](./README.md)
@ -66,10 +66,12 @@ MindSpore提供跨多个后端的构建选项
| 硬件平台 | 操作系统 | 状态 |
| :------------ | :-------------- | :--- |
| Ascend 910 | Ubuntu-x86 | ✔️ |
| | Ubuntu-aarch64 | ✔️ |
| | EulerOS-x86 | ✔️ |
| | EulerOS-aarch64 | ✔️ |
| GPU CUDA 10.1 | Ubuntu-x86 | ✔️ |
| CPU | Ubuntu-x86 | ✔️ |
| | Ubuntu-aarch64 | ✔️ |
| | Windows-x86 | ✔️ |
使用`pip`命令安装,以`CPU`和`Ubuntu-x86`build版本为例
@ -120,10 +122,10 @@ MindSpore的Docker镜像托管在[Docker Hub](https://hub.docker.com/r/mindspore
| 硬件平台 | Docker镜像仓库 | 标签 | 说明 |
| :----- | :------------------------ | :----------------------- | :--------------------------------------- |
| CPU | `mindspore/mindspore-cpu` | `x.y.z` | 已经预安装MindSpore `x.y.z` CPU版本的生产环境。 |
| | | `devel` | 提供开发环境从源头构建MindSpore`CPU`后端。安装详情请参考https://www.mindspore.cn/install。 |
| | | `devel` | 提供开发环境从源头构建MindSpore`CPU`后端。安装详情请参考https://www.mindspore.cn/install 。 |
| | | `runtime` | 提供运行时环境安装MindSpore二进制包`CPU`后端)。 |
| GPU | `mindspore/mindspore-gpu` | `x.y.z` | 已经预安装MindSpore `x.y.z` GPU版本的生产环境。 |
| | | `devel` | 提供开发环境从源头构建MindSpore`GPU CUDA10.1`后端。安装详情请参考https://www.mindspore.cn/install。 |
| | | `devel` | 提供开发环境从源头构建MindSpore`GPU CUDA10.1`后端。安装详情请参考https://www.mindspore.cn/install 。 |
| | | `runtime` | 提供运行时环境安装MindSpore二进制包`GPU CUDA10.1`后端)。 |
| Ascend | <center>&mdash;</center> | <center>&mdash;</center> | 即将推出,敬请期待。 |
@ -148,7 +150,23 @@ MindSpore的Docker镜像托管在[Docker Hub](https://hub.docker.com/r/mindspore
sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit nvidia-docker2
sudo systemctl restart docker
```
编辑文件 daemon.json:
```
$ vim /etc/docker/daemon.json
{
"runtimes": {
"nvidia": {
"path": "nvidia-container-runtime",
"runtimeArgs": []
}
}
}
```
再次重启docker:
```
sudo systemctl daemon-reload
sudo systemctl restart docker
```
使用以下命令获取并运行最新的稳定镜像:
```
docker pull mindspore/mindspore-gpu:0.7.0-beta

View File

@ -1,3 +1,114 @@
# Release 0.7.0-beta
## Major Features and Improvements
### MindSpore Training and Inference Framework
#### Ascend 910
* New models
* TinyBert: a smaller and faster version of BERT using transformer distillation for natural language understanding on GLUE benchmark.
* SE-ResNet50: add Squeeze-and-Excitation blocks(SE-Blocks) to the resnet50 network to improve channel interdependencies for image classification on ImageNet 2012 dataset.
* Inception V3: the third version of Inception convolutional architectures for image classification on ImageNet 2012 dataset.
* Frontend and user interface
* Embedding operator high-level packaging to support segmented by field for Wide&Deep.
* Load multi-node checkpoint into single-process to support host-device hybrid inference.
* Support Concat/Tile/Strideslice distributed operators.
* Support cumulative gradient and batch training split.
* Support variable parameter input for Cell object.
* Parameter mixed calculation optimization for pynative mode.
* Deep Probabilistic Programming
* Support statistical distributions classes used to generate stochastic tensors.
* Support probabilistic inference algorithms.
* Support BNN layers used to construct BNN in Graph mode.
* Support interfaces for the transformation between BNN and DNN in Graph mode.
* Support uncertainty estimation to estimate epistemic uncertainty and aleatoric uncertainty.
* User interfaces change log
* change base class of parameter([!3473](https://gitee.com/mindspore/mindspore/pulls/3473))
* change binary to mindir([!4258](https://gitee.com/mindspore/mindspore/pulls/4258))
* change export from geir to air([!4269](https://gitee.com/mindspore/mindspore/pulls/4269))
* Init parameter data by default([!3967](https://gitee.com/mindspore/mindspore/pulls/3967))
* change IndexedSlices to RowTensor([!4031](https://gitee.com/mindspore/mindspore/pulls/4031))
* Must set or change parallel mode before any Initializer created([!4801](https://gitee.com/mindspore/mindspore/pulls/4801))
* Executor and performance optimization
* MindSpore graph compilation process performance improved by 20%.
* Decoupling C++ and Python modules to achieve separate compilation of core modules.
* Data processing, augmentation, and save format
* Support automatic data augmentation
* Support GNN distributed cache in single node
* Support ConcatDataset using distributed sampler
#### Other Hardware Support
* GPU platform
* New model supported: VGG16, ResNet101, DeepFM.
* Support some distributed operators in ResNet50 and Wide&Deep.
* Support automatic parallel for Wide&Deep.
* Support function funcs[i](*inputs) (such as switch-case).
* Support distributed training with parameter server.
* Support GPU operator profiling.
* Performance optimization of the distributed training with allreduce.
* Performance optimization of the mixed precision training.
* Performance optimization of the pynative mode.
* Performance optimization of the convolution operator, batch normalization operator.
* CPU platform
* Support MobileNetV2 Re-Training: Re-train the network with different class number.
### MindSpore Lite
* Converter
* Support third-party models, including TFLite/Caffe/ONNX.
* Add 93 TFLite op.
* Add 24 Caffe op.
* Add 62 ONNX op.
* Add 11 optimized passes, include fusion/const fold.
* Support aware-training and Post-training quantization.
* CPU
* Add 100+opssupport fp32, int8/uint8, FP16 ops
* Support fast convolution algorithms: Sliding Window, Img2col + Gemm, Strassen, Winograd
* Support assembly/neon instruction.
* Support CPU fp16 and sdot on ARM v8.2+.
* GPU
* Add 20+ ops for OpenCL.
* Support image2D/buffer format.
* Optimize online initialization time.
* add optimized convolution1X1/3X3/depthwise/convolution_transposed for OpenCL.
* Tool & example
* Add benchmark and TimeProfile tools.
* Add image classification Android Demo.
## Bugfixes
* Models
* normalize the readme file([!5410](https://gitee.com/mindspore/mindspore/pulls/5410))
* fix a sink_size bug for transformer([!5393](https://gitee.com/mindspore/mindspore/pulls/5393))
* fix bool type optional for resnet50([!5363](https://gitee.com/mindspore/mindspore/pulls/5363))
* Python API
* improve interface '__bool__' for tensor([!4000](https://gitee.com/mindspore/mindspore/pulls/4000))
* fix GPU-ResizeNearestNeighbor([!3760](https://gitee.com/mindspore/mindspore/pulls/3760))
* fix topK multi dimention grad func([!3711](https://gitee.com/mindspore/mindspore/pulls/3711))
* fix scatterop error msg([!3699](https://gitee.com/mindspore/mindspore/pulls/3699))
* fix bug of cast dtype when using mix_presion in pynative mode([!3730](https://gitee.com/mindspore/mindspore/pulls/3730))
* Executor
* fix etsnet train error when UnsegmentSum's first input shape is (1,) ([!4573](https://gitee.com/mindspore/mindspore/pulls/4573))
* fix bug of result error in while control flow because of unsupporting for value reference ([!4103](https://gitee.com/mindspore/mindspore/pulls/4103))
* fix bug of the output tensor does not carry device data type ([!3774](https://gitee.com/mindspore/mindspore/pulls/3774))
* fix bug of avoiding multi attr value are eliminated in pynative mode ([!4225](https://gitee.com/mindspore/mindspore/pulls/4225))
* fix bug of AssignAdd unable to work normally in multi-cases ([!5171](https://gitee.com/mindspore/mindspore/pulls/5171))
* GPU platform
* improve the environment variable checking for nvcc compiler path ([!5140](https://gitee.com/mindspore/mindspore/pulls/5140))
* fix bug of error in cast operator conversion from fp16 to fp32 ([!4147](https://gitee.com/mindspore/mindspore/pulls/4147))
* fix bug of the array out of bound in case of make_tuple operator ([!5219](https://gitee.com/mindspore/mindspore/pulls/5219))
* Data processing and Pro
* fix GeneratorDataset time out([!3624](https://gitee.com/mindspore/mindspore/pulls/3624))
* fix concat operator get_dataset_size error([!4701](https://gitee.com/mindspore/mindspore/pulls/4701))
* fixing python validator for Repeat Op([!4366](https://gitee.com/mindspore/mindspore/pulls/4366))
* Third party
* Sqlite : Update sqlite to 3.32.2 to handle [CVE-2020-11656](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11656), [CVE-2020-13871](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13871), [CVE-2020-11655](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655), [CVE-2020-9327](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-9327), [CVE-2020-13630](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13630), [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15358), [CVE-2020-13631](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13631), [CVE-2020-13632](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13632), [CVE-2020-13434](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13434), [CVE-2020-13435](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13435), and [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655).
* Libjpeg-turbo : Update libjpeg-turbo to 2.0.4 to handle [CVE-2020-13790](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13790).
## Contributors
Thanks goes to these wonderful people:
Adel, Alexey, andy, andy_wangrui, anthonyaje, anzhengqi, askmiao, avakh, baihuawei, bingyaweng, BowenK, buxue, caifubi, CaoJian, caozhou, Cathy, changzherui, chenfei, chengxianbin, chenhaozhe, chenjianping, chentingting, chenzomi, chenzupeng, chujinjin, cjh9368, Corleone, cristoval, danish, dengyutao, eric, Eric, ervinzhang, etone-chan, fangzehua, fary86, fuzhiye, gengdongjie, genglishuai, Giancarlo, gongdaguo, gukecai, guohongzilong, GuoMengHao, hangq, hanhaocheng, hanhuifeng2020, hanjun996, Harshvardhan, He, heleiwang, hesham, hexia, Hoai, hongxing, huangdongrun, huanghui, huangxinjing, islam_amin, Jesse, jianghui58, jiangzhiwen, jin-xiulang, jinyaohui, jjfeing, John, Jonathan, jonyguo, kai00, kingfo, kpy, kswang, laiyongqiang, leilei_snow, leopz, Li, liangzelang, lianliguang, lichen_101010, lichenever, lihongkang, lilei, limingqi107, ling, lingyunli63, linqingke, lirongzhen1, liubuyu, liuwenhao4, liuxiao78, liuxiao93, liuzhongkai, Lixia, lixian, liyong, lizhenyu, looop5, luoyang, lvchangquan, lvliang, lvwenyuan, lyvette, mahdi, Mahdi, mamba_ni, maning202007, Margaret_wangrui, mayang, meixiaowei, meng_chunyang, ms_yan, nhussain, panbingao, panfengfeng, panyifeng, Payne, Peilin, peixu_ren, pengyongrong, Pengyongrong, qianlong, qujianwei, root, shenwei41, shibeiji, simson, songhonglei413, Su, sunsuodong, suteng, tao_yunhao, TFbunny, tinazhang, tom__chen, tony_liu2, tronzhang, VectorSL, wandongdong, wangdongxu, wanghua, wangmin, wangshaocong, wangzhe, wanyiming, Wei, wenchunjiang, wilfChen, WilliamLian, wsc, wukesong, wuweikang, wuxuejian, wuyongkang, xiefangqi, xuanyue, Xun, xutianchun, xuyongfei, yanghaitao, yangjie159, YangLuo, yangruoqi713, yangyongjie, yangzhenzhang, yankai, yao_yf, yelihua, yeyunpeng, Yi, yoni, yoonlee666, yuchaojie, yujianfeng, yuximiao, zhangxuetong, zhaizhiqiang, Zhang, zhangxinfeng3, zhangxuetong, zhangyihui, zhangz0911gm, zhanke, zhanyuan, zhaodezan, zhaoting, zhaozhenlong, zhengjun10, zhongligeng, zhoufeng, zhousiyi, zhouyaqiang, zhouyuanshen, Zichun, Zirui, zjun, zongha, ZPaC, lijiaqi, liangchenghui, wangminggui
Contributions of any kind are welcome!
# Release 0.6.0-beta
## Major Features and Improvements
@ -60,6 +171,9 @@
* Data processing
* Fix bug of RandomColor and RandomSharpness default parameter checking ([!2833](https://gitee.com/mindspore/mindspore/pulls/2833))
* Fix process hung when training and eval ([!3469](https://gitee.com/mindspore/mindspore/pulls/3469))
* Third party
* Sqlite : Update sqlite to 3.32.2 to handle [CVE-2020-11656](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11656), [CVE-2020-13871](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13871), [CVE-2020-11655](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655), [CVE-2020-9327](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-9327), [CVE-2020-13630](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13630), [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15358), [CVE-2020-13631](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13631), [CVE-2020-13632](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13632), [CVE-2020-13434](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13434), [CVE-2020-13435](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13435), and [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655).
* Libjpeg-turbo : Update libjpeg-turbo to 2.0.4 to handle [CVE-2020-13790](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13790).
## Contributors
Thanks goes to these wonderful people:
@ -133,6 +247,8 @@ Contributions of any kind are welcome!
* Fix bug of Cifar dataset reading([!2096](https://gitee.com/mindspore/mindspore/pulls/2096))
* Fix bug of C++ behavior in RandomCropAndResize([!2026](https://gitee.com/mindspore/mindspore/pulls/2026))
* Fix the bug of mindrecord shuffle([!2420](https://gitee.com/mindspore/mindspore/pulls/2420))
* Third party
* Sqlite : Update sqlite to 3.32.2 to handle [CVE-2020-11656](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11656), [CVE-2020-13871](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13871), [CVE-2020-11655](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655), [CVE-2020-9327](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-9327), [CVE-2020-13630](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13630), [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15358), [CVE-2020-13631](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13631), [CVE-2020-13632](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13632), [CVE-2020-13434](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13434), [CVE-2020-13435](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13435), and [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655).
## Contributors
Thanks goes to these wonderful people:
@ -207,11 +323,11 @@ Contributions of any kind are welcome!
* Executor
* Fix dropouttopK and addn errors in PyNative mode ([!1285](https://gitee.com/mindspore/mindspore/pulls/1285), [!1138](https://gitee.com/mindspore/mindspore/pulls/1138), [!1033](https://gitee.com/mindspore/mindspore/pulls/1033)).
* Fix memory leaks after execution in PyNatvie mode ([!1201](https://gitee.com/mindspore/mindspore/pulls/1201)).
* Fix HCCL failure in some special scenes ([!1204](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1204), [!1252](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1252)).
* Fix SSD network when Select failed, cann't find kernel info([!1449](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1449)).
* Fix Topk operator selection strategy bug between aicore and aicpu([!1367](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1367)).
* Fix input memory size of 'assign' op unequal in control sink mode when assigning a data from one child graph to another child graph([!802](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/802)).
* Fix allreduce ir inconsistency([!989](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/989)).
* Fix HCCL failure in some special scenes ([!1204](https://gitee.com/mindspore/mindspore/pulls/1204), [!1252](https://gitee.com/mindspore/mindspore/pulls/1252)).
* Fix SSD network when Select failed, cann't find kernel info([!1449](https://gitee.com/mindspore/mindspore/pulls/1449)).
* Fix Topk operator selection strategy bug between aicore and aicpu([!1367](https://gitee.com/mindspore/mindspore/pulls/1367)).
* Fix input memory size of 'assign' op unequal in control sink mode when assigning a data from one child graph to another child graph([!802](https://gitee.com/mindspore/mindspore/pulls/802)).
* Fix allreduce ir inconsistency([!989](https://gitee.com/mindspore/mindspore/pulls/989)).
* GPU platform
* Fix summary for gradient collection ([!1364](https://gitee.com/mindspore/mindspore/pulls/1364))
* Fix the slice operator ([!1489](https://gitee.com/mindspore/mindspore/pulls/1489))

View File

@ -3693,6 +3693,72 @@ Copyright (c) 1999, Frank Warmerdam
Copyright (c) 1991-1996 Sam Leffler
Copyright (c) 1996 USAF Phillips Laboratory
Software: libevent 2.1.12
Copyright notice:
Copyright (C) 1998 - 2012, Daniel Stenberg, <daniel@haxx.se>, et al.
COPYRIGHT AND PERMISSION NOTICE
Copyright (c) 1996 - 2013, Daniel Stenberg, <daniel@haxx.se>.
Copyright (C) 2012, iSEC Partners.
Copyright (c) 1987, 1993, 1994, 1995
Copyright (c) 1987, 1993, 1994, 1996
Copyright 2002 Niels Provos <provos@citi.umich.edu>
Copyright (c) 2007-2012 Niels Provos and Nick Mathewson
Copyright (c) 2000-2007 Niels Provos <provos@citi.umich.edu>
Copyright (c) 2007-2012 Niels Provos, Nick Mathewson
Copyright (c) 2009-2012 Niels Provos and Nick Mathewson
Copyright (c) 2006-2007 Niels Provos <provos@citi.umich.edu>
Copyright (c) 2008-2012 Niels Provos and Nick Mathewson
Copyright (c) 1991, 1993
Copyright (c) 2009, Michihiro NAKAJIMA
Copyright 2000-2013 Kitware, Inc.
Copyright 2000-2011 Insight Software Consortium
notices of original copyright by their contributors; see each source
Copyright (C) 1996-2018 Free Software Foundation, Inc.
Copyright (c) 2010 Chris Davis, Niels Provos, and Nick Mathewson
Copyright (c) 2010-2012 Niels Provos and Nick Mathewson
Copyright (c) 1996, David Mazieres <dm@uun.org>
Copyright (c) 2008, Damien Miller <djm@openbsd.org>
Copyright (c) 2002-2007 Niels Provos <provos@citi.umich.edu>
Copyright (c) 2002-2006 Niels Provos <provos@citi.umich.edu>
Copyright (c) 2009-2012 Niels Provos, Nick Mathewson
Copyright 2000-2009 Niels Provos <provos@citi.umich.edu>
Copyright 2009-2012 Niels Provos and Nick Mathewson
Copyright 2000-2007 Niels Provos <provos@citi.umich.edu>
Copyright 2007-2012 Niels Provos, Nick Mathewson
Copyright 2003-2009 Niels Provos <provos@citi.umich.edu>
Copyright 2006-2007 Niels Provos
Copyright 2007-2012 Nick Mathewson and Niels Provos
Copyright (c) 2005-2007 Niels Provos <provos@citi.umich.edu>
Copyright (c) 2003-2009 Niels Provos <provos@citi.umich.edu>
Copyright 2007-2012 Niels Provos and Nick Mathewson
Copyright (c) 2007 Sun Microsystems. All rights reserved.
Copyright (c) 2008-2012 Niels Provos, Nick Mathewson
Copyright 2002 Christopher Clark
Copyright 2005-2012 Nick Mathewson
Copyright 2001-2007 Niels Provos <provos@citi.umich.edu>
Copyright (c) 2012 Niels Provos and Nick Mathewson
Copyright (c) 2000 Dug Song <dugsong@monkey.org>
Copyright (c) 1993 The Regents of the University of California.
Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
Copyright (c) 2003 Michael A. Davis <mike@datanerds.net>
Copyright (c) 2007 Sun Microsystems
Copyright (c) 2002 Christopher Clark
Copyright (c) 2006 Maxim Yegorushkin <maxim.yegorushkin@gmail.com>
Copyright (c) 2010 BitTorrent, Inc.
Copyright (c) 2005-2012 Niels Provos and Nick Mathewson
Copyright (c) 1993
Copyright 2003 Michael A. Davis <mike@datanerds.net>
Copyright 2003-2007 Niels Provos <provos@citi.umich.edu>
Copyright 2008-2012 Niels Provos and Nick Mathewson
Copyright (c) 2003-2007 Niels Provos <provos@citi.umich.edu>
Copyright (c) 2013 Niels Provos and Nick Mathewson
Copyright (c) 2009-2012 Nick Mathewson and Niels Provos
Copyright (c) 2007-2013 Niels Provos and Nick Mathewson
Copyright (c) 2012 Ross Lagerwall <rosslagerwall@gmail.com>
tinytest.c -- Copyright 2009-2012 Nick Mathewson
tinytest.h -- Copyright 2009-2012 Nick Mathewson
tinytestmacros.h -- Copyright 2009-2012 Nick Mathewson
Software: opencv 4.2.0
Copyright notice:
Copyright (C) 2016, NVIDIA Corporation, all rights reserved.

2
akg

@ -1 +1 @@
Subproject commit 3bb6264188d0b1d6ff776a35a571bc7190df0800
Subproject commit 76a1ecf9da48fa463e25ad63c26281fb5867874d

View File

@ -16,20 +16,20 @@
@title mindspore_build
SET BASEPATH=%CD%
IF NOT EXIST %BASEPATH%/build (
IF NOT EXIST "%BASEPATH%/build" (
md "build"
)
cd %BASEPATH%/build
set BUILD_PATH=%CD%
IF NOT EXIST %BUILD_PATH%/mindspore (
IF NOT EXIST "%BUILD_PATH%/mindspore" (
md "mindspore"
)
cd %CD%/mindspore
IF "%2%" == "lite" (
IF "%1%" == "lite" (
call :gene_gtest
call :run_cmake
IF errorlevel 1 (
@ -47,14 +47,17 @@ IF "%2%" == "lite" (
)
cd %BUILD_PATH%/mindspore
IF "%1%" == "" (
cmake --build . -- -j6
IF "%2%" == "" (
cmake --build . --target package -- -j6
) ELSE (
cmake --build . -- -j%1%
cmake --build . --target package -- -j%2%
)
IF errorlevel 1 (
echo "build fail."
goto run_fail
) ELSE (
cd %BASEPATH%/output
rd /s /q _CPack_Packages
)
) ELSE (
cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_CPU=ON -DENABLE_MINDDATA=ON -DUSE_GLOG=ON ^
@ -83,7 +86,7 @@ goto run_eof
cd %BUILD_PATH%/mindspore
cmake -DBUILD_DEVICE=on -DBUILD_CONVERTER=on -DPLATFORM_ARM64=off -DSUPPORT_TRAIN=off ^
-DCMAKE_BUILD_TYPE=Release -DSUPPORT_GPU=off -DBUILD_MINDDATA=off -DOFFLINE_COMPILE=off ^
-G "CodeBlocks - MinGW Makefiles" %BASEPATH%/mindspore/lite
-G "CodeBlocks - MinGW Makefiles" "%BASEPATH%/mindspore/lite"
GOTO:EOF
:gene_gtest
@ -94,31 +97,31 @@ GOTO:EOF
GOTO:EOF
:gene_protobuf
SET PROTOC=%BASEPATH%/build/mindspore/_deps/protobuf-src/_build/protoc
SET PROTOC="%BASEPATH%/build/mindspore/_deps/protobuf-src/_build/protoc"
SET PROTO_SRC_DIR=%BASEPATH%/mindspore/lite/tools/converter/parser/caffe
SET PROTO_SRC_DIR="%BASEPATH%/mindspore/lite/tools/converter/parser/caffe"
cd %PROTO_SRC_DIR%
%PROTOC% *.proto --proto_path=%PROTO_SRC_DIR% --cpp_out=%PROTO_SRC_DIR%
SET PROTO_SRC_DIR=%BASEPATH%/mindspore/lite/tools/converter/parser/onnx
SET PROTO_SRC_DIR="%BASEPATH%/mindspore/lite/tools/converter/parser/onnx"
cd %PROTO_SRC_DIR%
%PROTOC% *.proto --proto_path=%PROTO_SRC_DIR% --cpp_out=%PROTO_SRC_DIR%
cd %BUILD_PATH%/mindspore
GOTO:EOF
:gene_flatbuffer
SET FLATC=%BASEPATH%/build/mindspore/_deps/flatbuffers-src/_build/flatc
SET FLATC="%BASEPATH%/build/mindspore/_deps/flatbuffers-src/_build/flatc"
SET FLAT_DIR=%BASEPATH%/mindspore/lite/schema
cd %FLAT_DIR%
IF EXIST inner rd /s /q inner
md inner
%FLATC% -c -b *.fbs
%FLATC% -c -b --reflect-types --gen-mutable --reflect-names --gen-object-api -o %FLAT_DIR%/inner *.fbs
%FLATC% -c -b --reflect-types --gen-mutable --reflect-names --gen-object-api -o "%FLAT_DIR%/inner" *.fbs
SET FLAT_DIR=%BASEPATH%/mindspore/lite/tools/converter/parser/tflite
cd %FLAT_DIR%
%FLATC% -c -b --reflect-types --gen-mutable --reflect-names --gen-object-api -o %FLAT_DIR% *.fbs
%FLATC% -c -b --reflect-types --gen-mutable --reflect-names --gen-object-api -o "%FLAT_DIR%" *.fbs
cd %BUILD_PATH%/mindspore
GOTO:EOF

View File

@ -53,7 +53,7 @@ usage()
echo " -n Compile minddata lite"
echo " -M Enable MPI and NCCL for GPU training, gpu default on"
echo " -V Specify the minimum required cuda version, default CUDA 10.1"
echo " -I Compile lite"
echo " -I Enable compiling mindspore lite for arm64, arm32 or x86_64, default disable mindspore lite compiling"
echo " -K Compile with AKG, default on"
echo " -s Enable serving module, default off"
echo " -w Enable acl module, default off"
@ -393,7 +393,7 @@ build_mindspore()
CMAKE_VERBOSE="--verbose"
fi
cmake --build . --target package ${CMAKE_VERBOSE} -j$THREAD_NUM
echo "success to build mindspore project!"
echo "success building mindspore project!"
}
checkndk() {
@ -618,10 +618,12 @@ build_lite()
if [[ "${COMPILE_RET}" -ne 0 ]]; then
echo "---------------- mindspore lite: build failed ----------------"
exit 1
else
mv ${BASEPATH}/output/tmp/*.tar.gz* ${BASEPATH}/output/
rm -rf ${BASEPATH}/output/tmp/
echo "---------------- mindspore lite: build success ----------------"
exit 0
fi
}

View File

@ -8,7 +8,7 @@ else()
VER 67.1
LIBS ${LIB_ICU_COMMON} ${LIB_ICU_DATA} ${LIB_ICU_I18N}
URL https://github.com/unicode-org/icu/archive/release-67-1.tar.gz
MD5 0c2662a2b0bc80b0eb56495205247c8f
MD5 fd525fb47d8827b0b7da78b51dd2d93f
CONFIGURE_COMMAND ${CMAKE_SOURCE_DIR}/scripts/build_icu4c.sh
)
include_directories(${icu4c_INC})

View File

@ -1,13 +1,18 @@
include(CMakePackageConfigHelpers)
set(LIB_DIR ${MAIN_DIR}/lib)
set(INC_DIR ${MAIN_DIR}/include)
set(TURBO_DIR ${MAIN_DIR}/third_party/libjpeg-turbo)
set(OPENCV_DIR ${MAIN_DIR}/third_party/opencv)
set(PROTOBF_DIR ${MAIN_DIR}/third_party/protobuf)
set(FLATBF_DIR ${MAIN_DIR}/third_party/flatbuffers)
set(LIB_DIR ${MAIN_DIR}-${COMPONENT_NAME}/lib)
set(INC_DIR ${MAIN_DIR}-${COMPONENT_NAME}/include)
set(TURBO_DIR ${MAIN_DIR}-${COMPONENT_NAME}/third_party/libjpeg-turbo)
set(OPENCV_DIR ${MAIN_DIR}-${COMPONENT_NAME}/third_party/opencv)
set(PROTOBF_DIR ${MAIN_DIR}-${COMPONENT_NAME}/third_party/protobuf)
set(FLATBF_DIR ${MAIN_DIR}-${COMPONENT_NAME}/third_party/flatbuffers)
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
set(LIB_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/lib)
set(INC_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/include)
set(TURBO_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/libjpeg-turbo)
set(OPENCV_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/opencv)
set(PROTOBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/protobuf)
set(FLATBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/flatbuffers)
if (BUILD_MINDDATA)
install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
@ -30,25 +35,52 @@ if (BUILD_MINDDATA)
endif ()
if (PLATFORM_ARM64)
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${INC_DIR}/ir/dtype COMPONENT ${COMPONENT_NAME})
install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
install(DIRECTORY ${TOP_DIR}/mindspore/lite/schema/ DESTINATION ${INC_DIR}/schema COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "inner" EXCLUDE)
install(FILES ${TOP_DIR}/mindspore/lite/build/src/runtime/kernel/arm/nnacl/liboptimize.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/lite/build/nnacl/liboptimize.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
install(DIRECTORY ${TOP_DIR}/third_party/flatbuffers/include DESTINATION ${FLATBF_DIR} COMPONENT ${COMPONENT_NAME})
elseif (PLATFORM_ARM32)
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${INC_DIR}/ir/dtype COMPONENT ${COMPONENT_NAME})
install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
install(DIRECTORY ${TOP_DIR}/mindspore/lite/schema/ DESTINATION ${INC_DIR}/schema COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "inner" EXCLUDE)
install(DIRECTORY ${TOP_DIR}/third_party/flatbuffers/include DESTINATION ${FLATBF_DIR} COMPONENT ${COMPONENT_NAME})
elseif (CMAKE_SYSTEM_NAME MATCHES "Windows")
get_filename_component(CXX_DIR ${CMAKE_CXX_COMPILER} PATH)
file(GLOB LIB_LIST ${CXX_DIR}/libstdc++-6.dll ${CXX_DIR}/libwinpthread-1.dll ${CXX_DIR}/libssp-0.dll ${CXX_DIR}/libgcc_s_seh-1.dll)
install(FILES ${TOP_DIR}/build/mindspore/tools/converter/converter_lite.exe DESTINATION ${TOP_DIR}/build/mindspore/package COMPONENT ${COMPONENT_NAME})
install(FILES ${LIB_LIST} DESTINATION ${TOP_DIR}/build/mindspore/package COMPONENT ${COMPONENT_NAME})
install(FILES ${TOP_DIR}/build/mindspore/tools/converter/libconverter_parser.a DESTINATION ${TOP_DIR}/build/mindspore/package COMPONENT ${PARSER_NAME})
else ()
install(FILES ${TOP_DIR}/third_party/protobuf/build/lib/libprotobuf.so.19.0.0 DESTINATION ${PROTOBF_DIR}/lib RENAME libprotobuf.so.19 COMPONENT ${COMPONENT_NAME})
install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${INC_DIR_RUN_X86} COMPONENT ${RUN_X86_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
install(DIRECTORY ${TOP_DIR}/mindspore/lite/schema/ DESTINATION ${INC_DIR_RUN_X86}/schema COMPONENT ${RUN_X86_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "inner" EXCLUDE)
install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${INC_DIR_RUN_X86}/ir/dtype COMPONENT ${RUN_X86_COMPONENT_NAME})
install(DIRECTORY ${TOP_DIR}/third_party/flatbuffers/include DESTINATION ${FLATBF_DIR_RUN_X86} COMPONENT ${RUN_X86_COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${LIB_DIR_RUN_X86} COMPONENT ${RUN_X86_COMPONENT_NAME})
install(FILES ${TOP_DIR}/third_party/protobuf/build/lib/libprotobuf.so.19.0.0 DESTINATION ${PROTOBF_DIR}/lib RENAME libprotobuf.so.19 COMPONENT ${COMPONENT_NAME})
endif ()
set(CPACK_GENERATOR TGZ)
if (CMAKE_SYSTEM_NAME MATCHES "Windows")
set(CPACK_GENERATOR ZIP)
else ()
set(CPACK_GENERATOR TGZ)
endif ()
set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
set(CPACK_COMPONENTS_ALL ${COMPONENT_NAME})
if (PLATFORM_ARM64 OR PLATFORM_ARM32)
set(CPACK_COMPONENTS_ALL ${COMPONENT_NAME})
elseif (WIN32)
set(CPACK_COMPONENTS_ALL ${COMPONENT_NAME})
else ()
set(CPACK_COMPONENTS_ALL ${COMPONENT_NAME} ${RUN_X86_COMPONENT_NAME})
endif ()
set(CPACK_PACKAGE_FILE_NAME ${MAIN_DIR})
set(CPACK_PACKAGE_DIRECTORY ${TOP_DIR}/output/tmp)
if (WIN32)
set(CPACK_PACKAGE_DIRECTORY ${TOP_DIR}/output)
else ()
set(CPACK_PACKAGE_DIRECTORY ${TOP_DIR}/output/tmp)
endif()
set(CPACK_PACKAGE_CHECKSUM SHA256)
include(CPack)

View File

@ -6,6 +6,7 @@ MAINTAINER leonwanghui <leon.wanghui@huawei.com>
ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
ENV CMAKE_ROOT_PATH /usr/local/cmake-3.14.1
ENV PATH ${PYTHON_ROOT_PATH}/bin:${CMAKE_ROOT_PATH}/bin:/usr/local/bin:$PATH
ENV LD_LIBRARY_PATH ${PYTHON_ROOT_PATH}/lib
# Install base tools
RUN apt update \
@ -48,7 +49,7 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
&& tar -xvf v3.7.5.tar.gz \
&& cd /tmp/cpython-3.7.5 \
&& mkdir -p ${PYTHON_ROOT_PATH} \
&& ./configure --prefix=${PYTHON_ROOT_PATH} \
&& ./configure --prefix=${PYTHON_ROOT_PATH} --enable-shared \
&& make -j4 \
&& make install -j4 \
&& rm -f /usr/local/bin/python \

View File

@ -1,4 +1,4 @@
FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
MAINTAINER leonwanghui <leon.wanghui@huawei.com>
@ -35,9 +35,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt install -y \
autoconf \
libtool \
automake \
flex \
libnccl2=2.4.8-1+cuda10.1 \
libnccl-dev=2.4.8-1+cuda10.1
flex
# Set bash
RUN echo "dash dash/sh boolean false" | debconf-set-selections

View File

@ -6,6 +6,7 @@ MAINTAINER leonwanghui <leon.wanghui@huawei.com>
ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
ENV CMAKE_ROOT_PATH /usr/local/cmake-3.14.1
ENV PATH ${CMAKE_ROOT_PATH}/bin:/usr/local/bin:$PATH
ENV LD_LIBRARY_PATH ${PYTHON_ROOT_PATH}/lib
# Install base tools
RUN apt update \
@ -51,7 +52,7 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
&& tar -xvf v3.7.5.tar.gz \
&& cd /tmp/cpython-3.7.5 \
&& mkdir -p ${PYTHON_ROOT_PATH} \
&& ./configure --prefix=${PYTHON_ROOT_PATH} \
&& ./configure --prefix=${PYTHON_ROOT_PATH} --enable-shared \
&& make -j4 \
&& make install -j4 \
&& rm -f /usr/local/bin/python \

View File

@ -1,4 +1,4 @@
FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
MAINTAINER leonwanghui <leon.wanghui@huawei.com>
@ -35,9 +35,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt install -y \
autoconf \
libtool \
automake \
flex \
libnccl2=2.4.8-1+cuda10.1 \
libnccl-dev=2.4.8-1+cuda10.1
flex
# Set bash
RUN echo "dash dash/sh boolean false" | debconf-set-selections

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

@ -1 +1 @@
Subproject commit 622af6c1c50034bea5a08bd409c5a410782bfe53
Subproject commit 80f9c96ed3fe0f07bf40a91d1f41373681d0c0dc

View File

@ -24,6 +24,7 @@
#include <memory>
#include <iostream>
#include <chrono>
#include <vector>
#ifndef ENABLE_ACL
#include "mindspore/core/utils/log_adapter.h"
@ -44,6 +45,19 @@ class LogStream {
return *this;
}
template <typename T>
LogStream &operator<<(const std::vector<T> &val) noexcept {
(*sstream_) << "[";
for (size_t i = 0; i < val.size(); i++) {
(*this) << val[i];
if (i + 1 < val.size()) {
(*sstream_) << ", ";
}
}
(*sstream_) << "]";
return *this;
}
LogStream &operator<<(std::ostream &func(std::ostream &os)) noexcept {
(*sstream_) << func;
return *this;

View File

@ -17,8 +17,6 @@ import json
import os
import sys
from te.platform.cce_conf import te_set_version
from te.platform.fusion_manager import op_build_cfg_dis, op_build_cfg_en, set_current_op_name, \
init_op_pattern, set_op_params, set_op_build_type, get_op_pattern, set_current_op_func_name
from te.platform.fusion_util import fusion_op
from common import check_kernel_info, get_args, get_build_in_impl_path, get_ddk_version
@ -27,7 +25,6 @@ build_in_impl_path = get_build_in_impl_path()
# op function list
op_build = "compile"
op_pre_build = "pre_build"
fusion_pattern_start_flag = "fusion_pattern_start"
fusion_pattern_end_flag = "fusion_pattern_end"
@ -83,19 +80,7 @@ def build_op(build_type, json_str):
else:
op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0)
# get function
if build_type == op_pre_build:
# set op parameter
op_build_cfg_dis()
set_current_op_func_name(op_name)
set_current_op_name(kernel_name)
init_op_pattern()
set_op_params(*outputs_args, *attrs_args, kernel_name=kernel_name)
set_op_build_type('prebuild')
if custom_flag:
py_fn_name = kernel_info['op_info']['name']
else:
py_fn_name = op_name
elif build_type == op_build:
if build_type == op_build:
if custom_flag:
py_fn_name = kernel_info['op_info']['name']
else:
@ -106,13 +91,6 @@ def build_op(build_type, json_str):
if op_func is None:
raise ValueError("Op:{} function {} is not supported by Tbe.".format(op_name, build_type))
# pre build
if build_type == op_pre_build:
op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
# disable only pattern configuration
op_build_cfg_en()
return get_op_pattern()
# call function
if kernel_name[0:19] == "bounding_box_encode":
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name_val=kernel_name)
@ -120,8 +98,6 @@ def build_op(build_type, json_str):
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
except Exception as e:
if build_type == op_pre_build:
op_build_cfg_en()
raise RuntimeError(e)
@ -136,14 +112,9 @@ def compile_fusion_op(json_str):
Exception: If specific keyword is not found.
"""
args = json.loads(json_str)
te_set_version(ddk_version)
if 'fusion_op' not in args or not args['fusion_op']:
raise ValueError("Json string Errors, key:fusion_op not found.")
if 'prebuild_ops' not in args or not args['prebuild_ops']:
raise ValueError("Json string Errors, key:prebuild_ops not found.")
pre_build_op_list = args['prebuild_ops']
for op in pre_build_op_list:
build_op(op_pre_build, json.dumps(op))
fusion_op_arg = args['fusion_op']
return fusion_op(json.dumps(fusion_op_arg))
@ -159,8 +130,6 @@ def compile_with_json(json_str):
json_info = json.loads(json_str)
if "fusion_op" in json_info:
ret = compile_fusion_op(json_str)
elif "compile_type" in json_info:
ret = build_op(op_pre_build, json_str)
else:
ret = build_op(op_build, json_str)
return ret

View File

@ -20,6 +20,8 @@
#include <vector>
#include <memory>
#include <algorithm>
#include <map>
#include <climits>
#include "runtime/device/kernel_runtime.h"
#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
@ -218,7 +220,7 @@ void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
dim->set_size((::google::protobuf::int64)item);
}
node_inputs->set_tensor_type((mindspore::DataType)input_data_type);
node_inputs->set_tensor_type(input_data_type);
node_inputs->set_mem_device("HBM");
}
}
@ -245,7 +247,7 @@ void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef
}
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
node_outputs->set_tensor_type((mindspore::DataType)output_data_type);
node_outputs->set_tensor_type(output_data_type);
node_outputs->set_mem_device("HBM");
}
}
@ -287,6 +289,109 @@ bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
return true;
}
bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
if (!anf_node->isa<CNode>()) {
return true;
}
if (!AnfAlgo::IsDynamicShape(anf_node)) {
return true;
}
MS_LOG(INFO) << "CreateExtInfo start, " << anf_node->fullname_with_scope();
int32_t unknown_shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
uint64_t ext_info_head_len = kExtInfoHeadSize;
std::string ext_info;
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
// 1.addr:unknown shape type
uint64_t ext_info_len = ext_info.size();
ext_info_len += ext_info_head_len + sizeof(int32_t);
// 2.addr:input ShapeAndType
ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType);
// 3.addr:output ShapeAndType
ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType);
uint64_t ext_info_offset = ext_info.size();
ext_info.resize(ext_info_len, 0);
char *ext_info_buf = ext_info.data();
// deal1: unknown shape type
ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
info->infoType = FWK_ADPT_EXT_SHAPE_TYPE;
info->infoLen = sizeof(int32_t);
ext_info_offset += ext_info_head_len;
int32_t *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset);
*shape_type = unknown_shape_type;
ext_info_offset += info->infoLen;
// deal2:input ShapeAndType
info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
info->infoType = FWK_ADPT_EXT_INPUT_SHAPE;
info->infoLen = input_num * sizeof(ShapeAndType);
ext_info_offset += ext_info_head_len;
ShapeAndType *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
for (size_t input_index = 0; input_index < input_num; input_index++) {
TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);
std::vector<size_t> input_shape;
int32_t input_data_type;
if (input_type == kObjectTypeString) {
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
auto input_node = cnode->inputs()[input_index + 1];
auto value_ptr = GetValueNode(input_node);
auto value = GetValue<std::string>(value_ptr);
input_shape.push_back(1);
input_shape.push_back(value.size());
input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown);
} else {
input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);
input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type);
}
inputs[input_index].type = input_data_type;
size_t input_shape_index = 0;
for (; input_shape_index < input_shape.size(); input_shape_index++) {
inputs[input_index].dims[input_shape_index] = SizeToLong(input_shape[input_shape_index]);
}
if (input_shape.size() < kMaxShapeDims) {
inputs[input_index].dims[input_shape_index] = LLONG_MIN;
}
}
ext_info_offset += info->infoLen;
// deal3:output ShapeAndType
info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE;
info->infoLen = output_num * sizeof(ShapeAndType);
ext_info_offset += ext_info_head_len;
ShapeAndType *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
for (size_t output_index = 0; output_index < output_num; output_index++) {
std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
outputs[output_index].type = output_data_type;
size_t output_shape_index = 0;
for (; output_shape_index < output_shape.size(); output_shape_index++) {
outputs[output_index].dims[output_shape_index] = SizeToLong(output_shape[output_shape_index]);
}
if (output_shape_index < kMaxShapeDims) {
outputs[output_index].dims[output_shape_index] = LLONG_MIN;
}
}
// set ext info
kernel_mod_ptr->SetExtInfo(ext_info);
return true;
}
KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
@ -300,6 +405,11 @@ KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
}
if (!CreateExtInfo(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
}
if (!SetIOSize(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Set input output size list failed.";
}

View File

@ -43,6 +43,7 @@ AicpuOpKernelMod::~AicpuOpKernelMod() {
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
ext_info_.clear();
}
void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
@ -54,6 +55,7 @@ const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { retu
void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; }
void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; }
void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); }
void AicpuOpKernelMod::SetExtInfo(const std::string &ext_info) { ext_info_ = ext_info; }
void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; }
void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
@ -84,16 +86,30 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs
auto node_def_len = node_def_str_.length();
param_len += node_def_len;
param_len += sizeof(uint32_t);
AicpuParamHead aicpu_param_head;
aicpu_param_head.length = param_len;
aicpu_param_head.ioAddrNum = io_addrs_num;
if (ext_info_.empty()) {
MS_LOG(INFO) << "Static Shape Kernel";
aicpu_param_head.extInfoLength = 0;
aicpu_param_head.extInfoAddr = 0;
} else {
MS_LOG(INFO) << "Dynamic Kernel Ext Info size:" << ext_info_.size();
}
// Create taskArgs: AicpuParamHead + ioAddrs + notifyId + customizedAttr
AicpuParamHead paramHead = {static_cast<uint32_t>(param_len), static_cast<uint32_t>(io_addrs_num)};
args_.clear();
(void)args_.append(reinterpret_cast<const char *>(&paramHead), sizeof(AicpuParamHead));
(void)args_.append(reinterpret_cast<const char *>(&aicpu_param_head), sizeof(AicpuParamHead));
// TaskArgs append ioAddrs
if (io_addrs_size != 0) {
(void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size);
}
// size for node_def
args_.append(reinterpret_cast<const char *>(&node_def_len), sizeof(uint32_t));
// When it's aicpu customized ops, taskArgs should append customized attr
if (node_def_len != 0) {
(void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len);
@ -145,8 +161,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
node_name_ = kTopKV2;
}
AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());
AicpuTaskInfoPtr task_info_ptr =
make_shared<ge::model_runner::AicpuTaskInfo>(kernel_name_, stream_id, node_so_, node_name_, node_def_str_,
ext_info_, input_data_addrs, output_data_addrs, NeedDump());
MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
return {task_info_ptr};

View File

@ -36,6 +36,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
void SetOutputList(const std::vector<int64_t> &outputList);
void SetAnfNode(const AnfNodePtr &anf_node);
void SetNodeDef(const std::string &nodeDef);
void SetExtInfo(const std::string &ext_info);
void SetNodeName(const std::string &node_name);
/**
@ -58,6 +59,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
std::string node_def_str_;
std::string node_name_;
std::string node_so_;
std::string ext_info_;
std::vector<int64_t> inputList_;
std::vector<int64_t> outputList_;
AnfNodePtr anf_node_;

View File

@ -21,7 +21,6 @@
#include <map>
#include <string>
#include "backend/kernel_compiler/kernel.h"
namespace mindspore {
namespace kernel {
constexpr auto kInitDataSetQueue = "InitDataSetQueue";
@ -50,6 +49,36 @@ struct AicpuParamHead {
uint64_t extInfoAddr; // extInfo address
} __attribute__((packed));
const uint32_t kExtInfoHeadSize = 8;
struct ExtInfo {
int32_t infoType; // extend type
uint32_t infoLen; // length for infoMsg
char infoMsg[0]; // extend value
} __attribute__((packed));
// Extent info ShapeAndType
const uint32_t kMaxShapeDims = 8;
struct ShapeAndType {
int32_t type;
int64_t dims[kMaxShapeDims];
} __attribute__((packed));
// Extend Info type for task
enum FWKTaskExtInfoType {
FWK_ADPT_EXT_SHAPE_TYPE = 0,
FWK_ADPT_EXT_INPUT_SHAPE,
FWK_ADPT_EXT_OUTPUT_SHAPE,
FWK_ADPT_EXT_INVALID
};
// for unknown shape op type
enum UnknowShapeOpType {
DEPEND_IN_SHAPE = 1, // op out shape get by input shape
DEPEND_CONST_VALUE = 2, // op out shape get by const op value
DEPEND_SHAPE_RANGE = 3, // op out shape get by range
DEPEND_COMPUTE = 4 // op out shape get by totally computing
};
class AicpuOpUtil {
public:
static int MsTypeToProtoType(TypeId ms_type);

View File

@ -26,7 +26,7 @@ message AttrValue {
repeated int64 i = 3 [ packed = true ]; //"array(int)"
repeated float f = 4 [ packed = true ]; //"array(float)"
repeated bool b = 5 [ packed = true ]; //"array(bool)"
repeated DataType type = 6 [ packed = true ]; //"array(type)"
repeated int32 type = 6 [ packed = true ]; //"array(type)"
repeated TensorShape shape = 7; //"array(shape)"
repeated Tensor tensor = 8; //"array(tensor)"
}

View File

@ -18,9 +18,16 @@ package mindspore;
import "attr.proto";
import "tensor.proto";
message DynamicIdx {
int32 idx = 1;
int32 num = 2;
}
message NodeDef {
string op = 2;
map<string, AttrValue> attrs = 3;
repeated Tensor inputs = 4;
repeated Tensor outputs = 5;
map<string, DynamicIdx> dym_inputs = 6;
map<string, DynamicIdx> dym_outputs = 7;
}

View File

@ -26,9 +26,12 @@ message Tensor {
TensorShape tensor_shape = 1;
// tensor content data type
DataType tensor_type = 2;
int32 tensor_type = 2;
// tensor memory device
// data located memory device , "DDR" "HBM" OR "NONE"
string mem_device = 3;
string name = 4;
uint64 data_ptr = 5;
uint64 data_size = 6;
}

View File

@ -31,5 +31,5 @@ message TensorShape {
bool unknown_rank = 3;
// data format "NHWC" "NCHW" "NC1HWC0" OR "NONE"
string data_format = 4;
int32 data_format = 4;
};

View File

@ -19,17 +19,30 @@ option cc_enable_arenas = true;
package mindspore;
enum DataType {
MS_UNKNOWN = 0;
MS_BOOL = 1;
MS_FLOAT32 = 0;
MS_FLOAT16 = 1;
MS_INT8 = 2;
MS_UINT8 = 3;
MS_INT16 = 4;
MS_UINT16 = 5;
MS_INT32 = 6;
MS_UINT32 = 7;
MS_INT64 = 8;
MS_UINT64 = 9;
MS_FLOAT16 = 10;
MS_FLOAT32 = 11;
MS_FLOAT64 = 12;
MS_INT32 = 3;
MS_UINT8 = 4;
MS_INT16 = 6;
MS_UINT16 = 7;
MS_UINT32 = 8;
MS_INT64 = 9;
MS_UINT64 = 10;
MS_FLOAT64 = 11;
MS_BOOL = 12;
MS_STRING = 13;
MS_DUAL_SUB_INT8 = 14;
MS_DUAL_SUB_UINT8 = 15;
MS_COMPLEX64 = 16;
MS_COMPLEX128 = 17;
MS_QINT8 = 18;
MS_QINT16 = 19;
MS_QINT32 = 20;
MS_QUINT8 = 21;
MS_QUINT16 = 22;
MS_RESOURCE = 23;
MS_STRING_REF = 24;
MS_DUAL = 25;
MS_UNKNOWN = 26;
}

View File

@ -177,7 +177,7 @@ KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &pro
if (processor == kProcessorAiCore || processor == kProcessorAiCpu) {
kernel_json = kCceKernelMeta;
} else {
kernel_json = bin_map->GetKernelMetaPath();
kernel_json = bin_map->kernel_meta_path();
}
(void)kernel_json.append(kernel_name).append(kJsonSuffix);
KernelPackPtr kernel_pack = std::make_shared<KernelPack>();

View File

@ -57,8 +57,8 @@ class KernelMeta {
void RemoveKernelCache();
std::string Search(const std::string &kernel_name) const;
bool Insert(const std::string &kernel_name, const std::string &kernel_json);
std::string GetKernelMetaPath() { return kernel_meta_path_; }
std::string kernel_meta_path() const { return kernel_meta_path_; }
bool initialized() const { return initialized_; }
static KernelMeta *GetInstance() {
static KernelMeta kernel_meta;
return &kernel_meta;

View File

@ -45,6 +45,22 @@ class PServerKernel {
protected:
virtual void ReInit(const std::vector<AddressPtr> &) {}
void SetTotalRowCnt(size_t total_cnt) {
MS_LOG(INFO) << "Total row count of server " << rank_id_ << " is " << total_cnt;
total_row_cnt_ = total_cnt;
}
void CalOffset() {
size_t rem = total_row_cnt_ % pserver_num_;
if (rem == 0) {
row_offset_ = total_row_cnt_ / pserver_num_ * rank_id_;
} else {
row_offset_ = std::round((static_cast<float>(total_row_cnt_)) / pserver_num_) * rank_id_;
}
MS_LOG(INFO) << "Row offset of server " << rank_id_ << " is " << row_offset_;
}
void Shard(std::vector<size_t> *shape, int axis) {
(*shape)[axis] = Util::LocalShard((*shape)[axis], rank_id_, pserver_num_);
}
@ -52,6 +68,9 @@ class PServerKernel {
size_t rank_id_;
size_t pserver_num_;
size_t worker_num_;
size_t total_row_cnt_;
size_t row_offset_;
};
} // namespace ps
} // namespace kernel

View File

@ -31,6 +31,8 @@ void SparseApplyAdamPSKernel::InitKernel(
const std::vector<size_t> &grad_shape = *(shape_vec[9]);
const std::vector<size_t> &indices_shape = *(shape_vec[10]);
SetTotalRowCnt(var_shape[0]);
CalOffset();
Shard(&var_shape, 0);
Shard(&m_shape, 0);
Shard(&v_shape, 0);
@ -69,8 +71,8 @@ void SparseApplyAdamPSKernel::ReInit(const std::shared_ptr<std::vector<std::shar
const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
const std::vector<size_t> &indices_shape = *(shape_vec[0]);
indices_size_ = indices_shape[0];
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
workspace_size_list_[1] = indices_size_ * sizeof(int);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
}
void SparseApplyAdamPSKernel::ReInit(const std::vector<AddressPtr> &inputs) {
@ -83,10 +85,6 @@ void SparseApplyAdamPSKernel::ReInit(const std::vector<AddressPtr> &inputs) {
bool SparseApplyAdamPSKernel::Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) {
ReInit(inputs);
int *indices = reinterpret_cast<int *>(inputs[10]->addr);
for (size_t i = 0; i < inputs[10]->size / sizeof(int); i++) {
indices[i] -= rank_id_ * var_first_dim_size_;
}
return Launch(inputs, workspace, outputs);
}

View File

@ -28,6 +28,8 @@ void SparseApplyFtrlPSKernel::InitKernel(
std::vector<size_t> grad_shape = *(shape_vec[3]);
std::vector<size_t> indices_shape = *(shape_vec[4]);
SetTotalRowCnt(var_shape[0]);
CalOffset();
Shard(&var_shape, 0);
Shard(&accum_shape, 0);
Shard(&linear_shape, 0);
@ -72,24 +74,20 @@ void SparseApplyFtrlPSKernel::ReInit(const std::shared_ptr<std::vector<std::shar
const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
std::vector<size_t> indices_shape = *(shape_vec[0]);
indices_size_ = indices_shape[0];
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
workspace_size_list_[1] = indices_size_ * sizeof(int);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
}
void SparseApplyFtrlPSKernel::ReInit(const std::vector<AddressPtr> &inputs) {
const auto &indices_addr = inputs[4];
indices_size_ = indices_addr->size / sizeof(int);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
workspace_size_list_[1] = indices_size_ * sizeof(int);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
}
bool SparseApplyFtrlPSKernel::Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) {
ReInit(inputs);
int *indices = reinterpret_cast<int *>(inputs[4]->addr);
for (size_t i = 0; i < inputs[4]->size / sizeof(int); i++) {
indices[i] -= rank_id_ * var_first_dim_size_;
}
return Launch(inputs, workspace, outputs);
}

View File

@ -31,6 +31,8 @@ void SparseApplyLazyAdamPSKernel::InitKernel(
const std::vector<size_t> &grad_shape = *(shape_vec[9]);
const std::vector<size_t> &indices_shape = *(shape_vec[10]);
SetTotalRowCnt(var_shape[0]);
CalOffset();
Shard(&var_shape, 0);
Shard(&m_shape, 0);
Shard(&v_shape, 0);
@ -69,25 +71,21 @@ void SparseApplyLazyAdamPSKernel::ReInit(
const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
const std::vector<size_t> &indices_shape = *(shape_vec[0]);
indices_size_ = indices_shape[0];
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
workspace_size_list_[1] = indices_size_ * sizeof(int);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
}
void SparseApplyLazyAdamPSKernel::ReInit(const std::vector<AddressPtr> &inputs) {
const auto &indices_addr = inputs[10];
indices_size_ = indices_addr->size / sizeof(int);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
workspace_size_list_[1] = indices_size_ * sizeof(int);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
}
bool SparseApplyLazyAdamPSKernel::Execute(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) {
ReInit(inputs);
int *indices = reinterpret_cast<int *>(inputs[10]->addr);
for (size_t i = 0; i < inputs[10]->size / sizeof(int); i++) {
indices[i] -= rank_id_ * var_first_dim_size_;
}
return Launch(inputs, workspace, outputs);
}

View File

@ -63,19 +63,21 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
if (!CheckParam(kernel_node)) {
return false;
}
axis_ = GetAttr<int>(kernel_node, "axis");
if (axis_ < 0) {
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
axis_ += SizeToInt(input_shape.size());
}
auto origin_data_format = AnfAlgo::GetOriginDataFormat(kernel_node);
auto input_format = AnfAlgo::GetInputFormat(kernel_node, 0);
axis_ = AxisTransform(origin_data_format, input_format, axis_);
input_num_ = SizeToInt(AnfAlgo::GetInputTensorNum(kernel_node));
inputs_host_ = std::make_unique<T *[]>(input_num_);
len_axis_ = std::make_unique<int[]>(input_num_);
for (int i = 0; i < input_num_; i++) {
size_t input_size = 1;
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i);
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i);
for (size_t j = 0; j < input_shape.size(); j++) {
input_size *= input_shape[j];
}
@ -85,7 +87,7 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
workspace_size_list_.push_back(sizeof(T *) * input_num_);
workspace_size_list_.push_back(sizeof(int) * input_num_);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
output_size_ = 1;
for (int i = 0; i < SizeToInt(output_shape.size()); i++) {
output_size_ *= output_shape[i];
@ -98,7 +100,6 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
}
}
output_size_list_.push_back(output_size_ * sizeof(T));
InitSizeLists();
return true;
}

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SLICE_GPU_KERNEL_H
#include <vector>
#include <utility>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
@ -27,8 +28,7 @@ namespace kernel {
template <typename T>
class SliceGpuFwdKernel : public GpuKernel {
public:
SliceGpuFwdKernel()
: is_strided_slice_(false), is_null_input_(false), input_size_(0), output_size_(0), workspace_size_(0) {}
SliceGpuFwdKernel() : is_null_input_(false), input_size_(0), output_size_(0), workspace_size_(0) {}
~SliceGpuFwdKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
@ -50,51 +50,31 @@ class SliceGpuFwdKernel : public GpuKernel {
if (!CheckParam(kernel_node)) {
return false;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto data_format = AnfAlgo::GetInputFormat(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
ShapeNdTo4d(input_shape, &input_shape_);
auto strides = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides");
if (strides) {
strides_ = GetAttr<std::vector<int>>(kernel_node, "strides");
for (auto i = strides_.size(); i < 4; i++) {
(void)strides_.insert(strides_.begin(), 1);
}
size_ = GetAttr<std::vector<int>>(kernel_node, "end");
is_strided_slice_ = true;
} else {
size_ = GetAttr<std::vector<int>>(kernel_node, "size");
}
for (auto i = begin_.size(); i < 4; i++) {
(void)begin_.insert(begin_.begin(), 0);
}
for (size_t i = size_.size(); i < 4; i++) {
(void)size_.insert(size_.begin(), 1);
}
for (size_t i = 0; i < begin_.size(); i++) {
if (begin_[i] < 0) {
begin_[i] = begin_[i] + input_shape_[i];
}
}
for (size_t i = 0; i < size_.size(); i++) {
if (size_[i] < 0) {
size_[i] = (size_[i] + input_shape_[i]) > 0 ? (size_[i] + input_shape_[i]) : 0;
}
if (begin_[i] == size_[i] && is_strided_slice_) {
MS_LOG(WARNING) << "Output is null.";
is_null_input_ = true;
}
if (size_[i] == 0 && strides_[i] > 0) {
size_[i] = begin_[i] + 1;
}
}
input_size_ = IntToSize(input_shape_[0] * input_shape_[1] * input_shape_[2] * input_shape_[3]) * sizeof(T);
auto out_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
auto out_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
output_size_ = sizeof(T);
for (size_t x : out_shape) {
output_size_ = output_size_ * x;
}
// transpose begin and size for NHWC data
if (data_format == "NHWC") {
std::swap(begin_[1], begin_[3]);
std::swap(begin_[1], begin_[2]);
std::swap(size_[1], size_[3]);
std::swap(size_[1], size_[2]);
}
InitSizeLists();
return true;
}
@ -126,26 +106,24 @@ class SliceGpuFwdKernel : public GpuKernel {
MS_LOG(ERROR) << "Input dims is " << input_shape.size() << ", scalar is not supported.";
return false;
}
size_ = GetAttr<std::vector<int>>(kernel_node, "size");
begin_ = GetAttr<std::vector<int>>(kernel_node, "begin");
for (size_t i = 0; i < input_shape.size(); i++) {
if ((begin_[i] > 0 && (begin_[i] > SizeToInt(input_shape[i]))) ||
(begin_[i] < 0 && (std::abs(begin_[i]) > SizeToInt(input_shape[i])))) {
MS_LOG(INFO) << "Input out of bounds " << input_shape[i] << " in axis " << i << ".";
begin_[i] = 0;
if (input_shape[i] <= 0 || size_[i] <= 0) {
MS_LOG(WARNING) << "Slice output is null.";
is_null_input_ = true;
}
}
return true;
}
std::vector<int> begin_;
std::vector<int> size_;
std::vector<int> strides_;
std::vector<int> input_shape_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
bool is_strided_slice_;
bool is_null_input_;
size_t input_size_;
size_t output_size_;

View File

@ -59,6 +59,7 @@ class StridedSliceGpuKernel : public GpuKernel {
ParseMasks(kernel_node);
FillOutputDim();
null_output_ = IsNullOutput();
InitSizeLists();
return true;
}
@ -86,14 +87,15 @@ class StridedSliceGpuKernel : public GpuKernel {
for (size_t i = 0; i < MAX_DIMS; i++) {
if (i < begin_.size()) {
begin_[i] =
std::min(begin_[i] < 0 ? SizeToInt(begin_[i] + input_shape_[i]) : begin_[i], SizeToInt(input_shape_[i] - 1));
int dim = SizeToInt(input_shape_[i]);
begin_[i] = std::min(begin_[i] < 0 ? std::max(begin_[i] + dim, 0) : begin_[i], dim - 1);
} else {
begin_.push_back(0);
}
if (i < end_.size()) {
end_[i] = std::max(end_[i] < 0 ? end_[i] + SizeToInt(input_shape_[i]) : end_[i], -1);
int dim = SizeToInt(input_shape_[i]);
end_[i] = std::max(end_[i] < 0 ? end_[i] + dim : std::min(end_[i], dim), -1);
} else {
end_.push_back(i < input_shape_.size() ? input_shape_[i] : 1);
}

View File

@ -87,14 +87,15 @@ class StridedSliceGradGpuKernel : public GpuKernel {
for (size_t i = 0; i < MAX_DIMS; i++) {
if (i < begin_.size()) {
begin_[i] =
std::min(begin_[i] < 0 ? SizeToInt(begin_[i] + input_shape_[i]) : begin_[i], SizeToInt(input_shape_[i] - 1));
int dim = SizeToInt(input_shape_[i]);
begin_[i] = std::min(begin_[i] < 0 ? std::max(begin_[i] + dim, 0) : begin_[i], dim - 1);
} else {
begin_.push_back(0);
}
if (i < end_.size()) {
end_[i] = std::max(end_[i] < 0 ? end_[i] + SizeToInt(input_shape_[i]) : end_[i], -1);
int dim = SizeToInt(input_shape_[i]);
end_[i] = std::max(end_[i] < 0 ? end_[i] + dim : std::min(end_[i], dim), -1);
} else {
end_.push_back(i < input_shape_.size() ? input_shape_[i] : 1);
}

View File

@ -22,6 +22,7 @@
#include <string>
#include <vector>
#include <utility>
#include <map>
#include "backend/kernel_compiler/kernel.h"
#include "backend/kernel_compiler/gpu/kernel_constants.h"
#include "runtime/device/gpu/gpu_device_manager.h"
@ -31,6 +32,19 @@ using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
namespace mindspore {
namespace kernel {
static std::map<int, int> kNCHWToNHWCAxisMap = {
{0, 0},
{1, 3},
{2, 1},
{3, 2},
};
static std::map<int, int> kNHWCToNCHWAxisMap = {
{0, 0},
{1, 2},
{2, 3},
{3, 1},
};
class GpuKernel : public KernelMod {
public:
virtual ~GpuKernel() = default;
@ -74,6 +88,18 @@ class GpuKernel : public KernelMod {
dst->push_back(src.size() == 0 ? 1 : SizeToInt(src[src.size() - 1]));
}
int AxisTransform(const std::string &origin_data_format, const std::string &cal_format, int axis) {
if (((origin_data_format == kOpFormat_DEFAULT) || (origin_data_format == kOpFormat_NCHW)) &&
(cal_format == kOpFormat_NHWC)) {
return kNCHWToNHWCAxisMap[axis];
} else if (((cal_format == kOpFormat_DEFAULT) || (cal_format == kOpFormat_NCHW)) &&
(origin_data_format == kOpFormat_NHWC)) {
return kNHWCToNCHWAxisMap[axis];
} else {
return axis;
}
}
// transpose shape: NCHW To NHWC
void ShapeNCHW2NHWC(std::vector<size_t> *shape) {
std::swap((*shape)[1], (*shape)[3]);

View File

@ -82,7 +82,7 @@ class AddNGpuFwdKernel : public GpuKernel {
MS_LOG(ERROR) << "Output number is " << output_num << ", but cudnnAddTensor needs 1 output.";
return false;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(input_shape);
if (is_null_input_) {
MS_LOG(WARNING) << "AddNGpuFwdKernel input is null";
@ -96,9 +96,16 @@ class AddNGpuFwdKernel : public GpuKernel {
for (size_t i = 0; i < input_shape.size(); i++) {
dimA[i] = SizeToInt(input_shape[i]);
}
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptorEx(input_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_,
SizeToInt(input_shape.size()), dimA),
"cudnnSetTensorNdDescriptor failed");
auto input_format = AnfAlgo::GetInputFormat(kernel_node, 0);
if (input_format == kOpFormat_NHWC) {
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptorEx(input_descriptor_, CUDNN_TENSOR_NHWC, cudnn_data_type_,
SizeToInt(input_shape.size()), dimA),
"cudnnSetTensorNdDescriptor failed");
} else {
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptorEx(input_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_,
SizeToInt(input_shape.size()), dimA),
"cudnnSetTensorNdDescriptor failed");
}
InitSizeLists();
return true;
}

View File

@ -56,9 +56,9 @@ class BroadcastOpGpuKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
GetOpType(kernel_node);
auto shape1 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto shape2 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto shape3 = AnfAlgo::GetOutputInferShape(kernel_node, 0);
auto shape1 = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
auto shape2 = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
auto shape3 = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
need_broadcast_ = IsBroadcast(shape1, shape2);
if (need_broadcast_ && shape1.size() > 7) {
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than 7";

View File

@ -42,7 +42,7 @@ class MatMulGpuKernel : public GpuKernel {
dtype_a_(CUDA_R_32F),
dtype_b_(CUDA_R_32F),
dtype_c_(CUDA_R_32F),
algo_(CUBLAS_GEMM_DEFAULT_TENSOR_OP) {}
algo_(CUBLAS_GEMM_DEFAULT) {}
~MatMulGpuKernel() = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
@ -85,6 +85,10 @@ class MatMulGpuKernel : public GpuKernel {
dtype_a_ = GetCudaDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
dtype_b_ = GetCudaDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 1)));
dtype_c_ = GetCudaDataType(TypeIdLabel(AnfAlgo::GetOutputDeviceDataType(kernel_node, 0)));
if (dtype_a_ == CUDA_R_16F && dtype_b_ == CUDA_R_16F && dtype_c_ == CUDA_R_16F) {
MS_LOG(WARNING) << "input and output type is float16, allow to use Tensor Core operations if possible";
algo_ = CUBLAS_GEMM_DEFAULT_TENSOR_OP;
}
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_NULL_INPUT(output_shape);
if (is_null_input_) {

View File

@ -37,7 +37,6 @@ enum FusionType {
COMMREDUCE,
SEGMENT,
OPAQUE,
DYNAMIC,
UNKNOWN_FUSION_TYPE = -1,
};
enum OpPattern {
@ -80,8 +79,8 @@ class KernelPack {
bool LoadKernelMeta(const std::string &json_f, const std::string &processor);
bool ReadFromJsonFile(const std::string &json_f, const std::string &processor);
const std::string Serialize() const;
const FlexArray *const GetJson() const { return json_; }
const FlexArray *const GetKernel() const { return kernel_; }
const FlexArray *GetJson() const { return json_; }
const FlexArray *GetKernel() const { return kernel_; }
~KernelPack() {
if (json_) {
delete[] json_;

View File

@ -19,53 +19,36 @@
#include <map>
#include <string>
#include <memory>
#include <utility>
#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"
#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
#include "utils/ms_context.h"
namespace mindspore {
namespace kernel {
using mindspore::kernel::tbe::TbeUtils;
static bool GenPreBuildKernelJson(const std::vector<AnfNodePtr> &compute_nodes,
std::vector<nlohmann::json> *prebuild_op_list) {
MS_EXCEPTION_IF_NULL(prebuild_op_list);
TbeKernelJsonCreator creator(PREBUILD);
for (const auto &anf_node : compute_nodes) {
nlohmann::json prebuild;
if (!creator.GenTbeSingleKernelJson(anf_node, &prebuild)) {
MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
return false;
}
(*prebuild_op_list).push_back(prebuild);
}
return true;
}
std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) {
MS_LOG(INFO) << "kernel fusion build start, scope size:" << fusion_scopes.size();
std::map<int32_t, KernelModPtr> kernel_mod_ret;
auto build_manger = std::make_shared<ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manger);
for (const auto &fusion_scope_iter : fusion_scopes) {
auto scope_id = fusion_scope_iter.scope_id;
string fusion_kernel_name;
nlohmann::json fusion_op;
string fusion_kernel = "te_fusion";
if (!TbeKernelBuild::GenFusionScopeJson(fusion_scope_iter.input_nodes, fusion_scope_iter.compute_nodes, &fusion_op,
&fusion_kernel)) {
&fusion_kernel_name)) {
continue;
}
// gen kernel_name & check cache
std::string json_str = fusion_op.dump();
size_t hash_id = std::hash<std::string>()(json_str);
auto json_name = fusion_kernel.append("_").append(std::to_string(hash_id));
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
auto device_id = context_ptr->device_id();
auto json_name =
fusion_kernel_name.append("_").append(std::to_string(hash_id)).append("_").append(std::to_string(device_id));
fusion_op["fusion_op_name"] = json_name;
// gen json for prebuild
std::vector<nlohmann::json> prebuild_op_list;
if (!GenPreBuildKernelJson(fusion_scope_iter.compute_nodes, &prebuild_op_list)) {
continue;
}
// get io size
std::vector<size_t> input_size_list;
std::vector<size_t> output_size_list;
@ -80,20 +63,20 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
auto kernel_mod =
build_manger->GenKernelMod(json_name, tbe::kProcessorAiCore, input_size_list, output_size_list, kernel_pack);
if (kernel_mod != nullptr) {
kernel_mod_ret[scope_id] = kernel_mod;
kernel_mod_ret[fusion_scope_iter.scope_id] = kernel_mod;
continue;
}
}
// fusion build
nlohmann::json fusion_json;
fusion_json["fusion_op"] = fusion_op;
fusion_json["prebuild_ops"] = prebuild_op_list;
auto task_id = build_manger->StartCompileOp(fusion_json);
TbeUtils::SaveJsonInfo(json_name, fusion_json.dump());
if (task_id < 0) {
MS_EXCEPTION(ArgumentError) << "start compile failed.";
}
build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list, scope_id);
build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list,
fusion_scope_iter.scope_id);
}
int build_failed_num = 0;

View File

@ -16,6 +16,7 @@
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
#include <utility>
#include <vector>
#include <map>
#include "backend/kernel_compiler/kernel.h"
@ -25,11 +26,9 @@ namespace kernel {
* @brief fuse op and return a callable mod
*/
struct FusionScopeInfo {
FusionScopeInfo() {}
FusionScopeInfo(int32_t id, const std::vector<AnfNodePtr> &in, const std::vector<AnfNodePtr> &comp,
const std::vector<AnfNodePtr> &out)
: scope_id(id), input_nodes(in), compute_nodes(comp), output_nodes(out) {}
int32_t scope_id;
FusionScopeInfo(int32_t id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, std::vector<AnfNodePtr> out)
: scope_id(id), input_nodes(std::move(in)), compute_nodes(std::move(comp)), output_nodes(std::move(out)) {}
int32_t scope_id{};
std::vector<AnfNodePtr> input_nodes;
std::vector<AnfNodePtr> compute_nodes;
std::vector<AnfNodePtr> output_nodes;

View File

@ -40,14 +40,13 @@ class OpLib {
private:
static bool RegOpFromLocalInfo();
static bool DecodeOpInfo(const nlohmann::json &obj, const OpImplyType imply_type, const std::string &impl_path);
static bool DecodeAttr(const nlohmann::json &obj, const OpImplyType imply_type,
const std::shared_ptr<OpInfo> &op_info);
static bool DecodeOpInfo(const nlohmann::json &obj, OpImplyType imply_type, const std::string &impl_path);
static bool DecodeAttr(const nlohmann::json &obj, OpImplyType imply_type, const std::shared_ptr<OpInfo> &op_info);
static bool DecodeDtypeFormat(const nlohmann::json &dtype_format, const std::shared_ptr<OpIOInfo> &op_io,
size_t index);
static void DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
static void DecodeAKGSpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
static bool DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply_type, const OpIOType io_type,
static bool DecodeInputOutput(const nlohmann::json &obj, OpImplyType imply_type, OpIOType io_type,
const std::shared_ptr<OpInfo> &op_info, const nlohmann::json &dtype_format);
static bool GetRefInfo(const std::shared_ptr<OpInfo> &op_info);
static bool CheckRepetition(const std::shared_ptr<OpInfo> &op_info);

View File

@ -173,7 +173,7 @@ void TbeAdapter::NormalizeFuncName(std::string *func_name) {
*func_name = name_tmp;
auto iter = tbe_func_adapter_map.find(*func_name);
if (iter != tbe_func_adapter_map.end()) {
MS_LOG(INFO) << "map actual op from me " << *func_name << " to tbe op" << iter->second;
MS_LOG(INFO) << "Map actual op from me: " << *func_name << " to tbe op: " << iter->second;
*func_name = iter->second;
}
}

View File

@ -27,7 +27,7 @@
// the TBE back-end operator implementation difference
namespace mindspore {
namespace kernel {
enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
enum kCreaterType : int { SINGLE_BUILD = 0, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
namespace tbe {
using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
nlohmann::json *attrs_json);

View File

@ -63,7 +63,7 @@ const std::unordered_map<std::string, size_t> type_nbyte_maps = {
const std::unordered_map<std::string, FusionType> fusion_type_maps = {
{"CONVLUTION", FusionType::CONVLUTION}, {"ELEMWISE", FusionType::ELEMWISE}, {"COMMREDUCE", FusionType::COMMREDUCE},
{"SEGMENT", FusionType::SEGMENT}, {"DYNAMIC", FusionType::DYNAMIC}, {"OPAQUE", FusionType::OPAQUE},
{"SEGMENT", FusionType::SEGMENT}, {"OPAQUE", FusionType::OPAQUE},
};
TypeId DtypeToTypeId(const std::string &dtypes) {

View File

@ -24,6 +24,7 @@
#include "backend/kernel_compiler/tbe/tbe_adapter.h"
#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"
#include "utils/ms_context.h"
namespace mindspore {
namespace kernel {
@ -71,14 +72,20 @@ constexpr auto kVTypeListListInt = "listListInt";
constexpr auto kJValue = "value";
constexpr auto kJDynIndex = "dyn_index";
constexpr auto kJFuncName = "func_name";
std::string NormalizeFullScopeName(const string &full_scope_name) {
// exp:Default/ReLU-op0 -->Default_ReLU_op0
string normal_ret = full_scope_name;
std::replace(normal_ret.begin(), normal_ret.end(), '/', '_');
std::replace(normal_ret.begin(), normal_ret.end(), '-', '_');
return normal_ret;
}
constexpr auto kJL1AddrOffset = "L1_addr_offset";
constexpr auto kJL1FusionType = "L1_fusion_type";
constexpr auto kJL1WorkspaceSize = "L1_workspace_size";
constexpr auto kJAddrType = "addr_type";
constexpr auto kJSliceOffset = "slice_offset";
constexpr auto kJSplitIndex = "split_index";
constexpr auto kJTotalShape = "total_shape";
constexpr auto kJValidShape = "valid_shape";
constexpr auto kJModuleName = "module_name";
constexpr auto kJPattern = "pattern";
constexpr auto kJPyModulePath = "py_module_path";
constexpr auto kJPreBuildOutsAttrs = "prebuild_outs_attrs";
constexpr auto kJKwdArgs = "kwds_args";
constexpr auto kJListArgs = "list_args";
bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspore::AnfNode> &anf_node,
nlohmann::json *kernel_json) {
@ -117,13 +124,12 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor
op_info_json[kJAttrs] = attrs_json;
std::string json_str = op_info_json.dump();
size_t hash_id = std::hash<std::string>()(json_str);
json_name_ = op_name + "_" + std::to_string(hash_id);
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
auto device_id = context_ptr->device_id();
json_name_ = op_name + "_" + std::to_string(hash_id) + "_" + std::to_string(device_id);
json_info_ = json_str;
if (creater_type_ == PREBUILD) {
op_info_json[kJKernelName] = NormalizeFullScopeName(anf_node->fullname_with_scope());
} else {
op_info_json[kJKernelName] = json_name_;
}
op_info_json[kJKernelName] = json_name_;
(*kernel_json)[kJOpInfo] = op_info_json;
(*kernel_json)[kJFullName] = anf_node->fullname_with_scope();
if (creater_type_ == SINGLE_BUILD) {
@ -581,25 +587,25 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector<si
bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr> &input_nodes,
const std::vector<mindspore::AnfNodePtr> &compute_nodes,
nlohmann::json *fusion_str, std::string *fusion_kernel) {
MS_EXCEPTION_IF_NULL(fusion_str);
MS_EXCEPTION_IF_NULL(fusion_kernel);
nlohmann::json *fusion_json, std::string *fusion_kernel_name) {
MS_EXCEPTION_IF_NULL(fusion_json);
MS_EXCEPTION_IF_NULL(fusion_kernel_name);
// get input layer info
std::vector<std::vector<mindspore::AnfNodePtr>> input_layers;
std::map<const AnfNodePtr, FusionDataType> spec_data_input;
if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) {
return false;
}
// gen fusion scopre_op jsom
// gen fusion scopre_op json
std::vector<nlohmann::json> compute_list;
(*fusion_kernel) = kFusionKernelNamePrfix;
(*fusion_kernel_name) = kFusionKernelNamePrfix;
// index: fusion build option input record, next one from 0
static size_t index = 0;
auto layer_iter = input_layers.begin();
auto compute_op_iter = compute_nodes.begin();
for (; compute_op_iter != compute_nodes.end(); ++compute_op_iter, ++layer_iter) {
nlohmann::json compute_op_str;
(void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel, &index);
(void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel_name, &index);
compute_list.push_back(compute_op_str);
}
index = 0;
@ -617,36 +623,122 @@ bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr>
}
index = 0;
data_list.insert(data_list.end(), compute_list.begin(), compute_list.end());
(*fusion_str)[kFusionOpList] = data_list;
(*fusion_json)[kFusionOpList] = data_list;
return true;
}
void TbeKernelBuild::GenPreDescJson(nlohmann::json *output_desc) {
MS_EXCEPTION_IF_NULL(output_desc);
(*output_desc)[kJL1AddrOffset] = 0;
(*output_desc)[kJL1FusionType] = -1;
(*output_desc)[kJL1WorkspaceSize] = -1;
(*output_desc)[kJAddrType] = 0;
}
void TbeKernelBuild::GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str,
std::string *fusion_kernel_name) {
MS_EXCEPTION_IF_NULL(compute_op_str);
MS_EXCEPTION_IF_NULL(fusion_kernel_name);
// gen others
auto origin_type = AnfAlgo::GetCNodeName(cnode);
// replace special op type for buffer fusion op
auto type = GetRealOpType(origin_type);
(*compute_op_str)[kJtype] = type;
tbe::TbeAdapter::NormalizeFuncName(&type);
(*compute_op_str)[kJFuncName] = type;
(*compute_op_str)[kJModuleName] = std::string("impl.") + type;
(*compute_op_str)[kJName] = cnode->fullname_with_scope();
(*compute_op_str)[kJPattern] = GetNodeFusionType(cnode);
(*compute_op_str)[kJPyModulePath] = "/usr/local/Ascend/opp/op_impl/build_in/ai_core/tbe";
(void)(*fusion_kernel_name).append("_");
(void)(*fusion_kernel_name).append(type);
}
void TbeKernelBuild::GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(compute_op_str);
// kwds args
nlohmann::json json_prebuild_args;
json_prebuild_args[kJKwdArgs] = nlohmann::json::object();
// list_args
nlohmann::json json_list_args;
// list_args: output args
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
for (size_t i = 0; i < output_size; ++i) {
nlohmann::json output_desc;
GenDescJson(cnode, i, i, &output_desc);
output_desc[kJDtype] = output_desc[kJDataType];
json_list_args.push_back(output_desc);
}
// list_args: attr args
auto op_name = AnfAlgo::GetCNodeName(cnode);
auto opinfo = OpLib::FindOp(op_name, OpImplyType::kTBE);
MS_EXCEPTION_IF_NULL(opinfo);
TbeKernelJsonCreator json_creater(SINGLE_BUILD);
nlohmann::json json_attr_args;
if (!json_creater.GenTbeAttrJson(cnode, opinfo, &json_attr_args)) {
MS_LOG(INFO) << "Fusion warning: get prebuild args of attr failed.";
}
for (const auto &attr : json_attr_args) {
// if(attr[kJName] != "isRef" && attr["valid"] == true) {
if (attr[kJName] != "isRef" && attr[kJValid] == true) {
json_list_args.push_back(attr[kJValue]);
}
}
json_prebuild_args[kJListArgs] = json_list_args;
(*compute_op_str)[kJPreBuildOutsAttrs] = json_prebuild_args;
}
void TbeKernelBuild::GenSuffixDescJson(nlohmann::json *output_desc) {
MS_EXCEPTION_IF_NULL(output_desc);
(*output_desc)[kJSliceOffset] = nlohmann::json::array();
(*output_desc)[kJSplitIndex] = 0;
(*output_desc)[kJTotalShape] = nlohmann::json::array();
(*output_desc)[kJValidShape] = nlohmann::json::array();
}
// anf_node: this node is used to get output desc(type\foramt\shape ...)
// node_out_idx: node output index
// desc_output_idx: this index use to add json
// nlohmann::json *output_desc: for return
// FusionDataType fusion_data_type: speceial process json desc output shape [kFusionAddN, kFusionReLUGradV2]
void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) {
GenPreDescJson(output_desc);
// data_type
auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx);
(*output_desc)[kJDataType] = tbe::TypeIdToString(type_id);
// name
std::string output_desc_name = anf_node->fullname_with_scope();
if (node_out_idx > 0) {
output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx);
}
(*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name);
auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx);
(*output_desc)[kJDataType] = tbe::TypeIdToString(type_id);
(*output_desc)[kJName] = output_desc_name;
// ori_format
(*output_desc)[kJOriFormat] = kOpFormat_NCHW;
// ori_shape
auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, node_out_idx);
if (ori_shape.empty()) {
ori_shape.emplace_back(1);
}
(*output_desc)[kJOriShape] = ori_shape;
// !! Note: output_index, only node's output use it
(*output_desc)[kJOutputIndex] = desc_output_idx;
// shape
auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, node_out_idx);
if (shape.empty()) {
shape.emplace_back(1);
}
(*output_desc)[kJShape] = shape;
// !! Note: format: only data node's output use it
auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx);
if (format == kOpFormat_DEFAULT) {
format = ori_shape.size() == 4 ? kOpFormat_NCHW : kOpFormat_ND;
} else if (format == kOpFormat_FRAC_Z) {
format = kOpFormat_FRACTAL_Z;
}
(*output_desc)[kJFormat] = format;
(*output_desc)[kJOriFormat] = kOpFormat_NCHW;
(*output_desc)[kJOutputIndex] = desc_output_idx;
// special node
if (fusion_data_type == kFusionAddN && format == kOpFormat_NC1HWC0) {
std::vector<size_t> spec_shape = {};
spec_shape.emplace_back(shape[0]);
@ -663,12 +755,13 @@ void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_
(*output_desc)[kJShape] = spec_shape;
(*output_desc)[kJDataType] = kVTypeBool;
}
GenSuffixDescJson(output_desc);
}
void TbeKernelBuild::GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
size_t output_index, nlohmann::json *output_desc) {
std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index);
(*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name);
(*output_desc)[kJName] = output_desc_name;
(*output_desc)[kJOutputIndex] = output_index;
std::vector<size_t> shape;
(*output_desc)[kJShape] = shape;
@ -692,6 +785,9 @@ bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name,
return true;
}
// <input_nodes> : contains parameter/data node, input order may doesn't match tbe input order;
// <compute_nodes> : contains cnode, inputs order may doesn't match tbe input order;
// Special process node list: reference tbe_adapter.cc [except: Conv2DBackpropInput]
bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes,
const std::vector<mindspore::AnfNodePtr> &compute_nodes,
std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers,
@ -722,7 +818,7 @@ bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &in
MS_LOG(INFO) << "Fusion info: add compute node's [" << i << "] input: " << input->fullname_with_scope();
layer.emplace_back((*find_iter));
} else {
MS_LOG(INFO) << "Fusion warnig: this input [" << i << "] may be pre compute(" << input->fullname_with_scope()
MS_LOG(INFO) << "Fusion warning: this input [" << i << "] may be pre compute(" << input->fullname_with_scope()
<< ") node's output.";
}
}
@ -750,8 +846,9 @@ bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::Anf
MS_EXCEPTION_IF_NULL(data_str);
MS_EXCEPTION_IF_NULL(index);
std::vector<nlohmann::json> output_desc_list;
// if data_input is null, this is optional input.
if (!data_input) {
MS_LOG(INFO) << "Data input is optional node";
MS_LOG(INFO) << "Fusion info: data input is optional node";
auto name = std::string(kOptional) + std::to_string(*index);
(*data_str)[kJName] = name;
nlohmann::json output_desc;
@ -767,12 +864,16 @@ bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::Anf
auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0);
auto real_node = kernel_idx.first;
size_t real_idx = kernel_idx.second;
MS_LOG(INFO) << "Real name " << real_node->fullname_with_scope() << " index:" << real_idx;
MS_LOG(INFO) << "Fusion info: Real name: " << real_node->fullname_with_scope() << ". index:" << real_idx;
// kJOutputDesc
nlohmann::json output_desc;
GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type);
output_desc_list.push_back(output_desc);
(*data_str)[kJName] = NormalizeFullScopeName(real_node->fullname_with_scope());
auto full_name = real_node->fullname_with_scope();
if (real_idx > 0) {
full_name = full_name.append("_").append(std::to_string(real_idx));
}
(*data_str)[kJName] = full_name;
}
(*data_str)[kJOutputDesc] = output_desc_list;
(*data_str)[kJtype] = "Data";
@ -808,6 +909,7 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) {
size_t TbeKernelBuild::GetOptionalInput(const mindspore::CNodePtr &cnode, bool is_dynamic_input) {
MS_EXCEPTION_IF_NULL(cnode);
if (is_dynamic_input) {
// Node can not have optional & dynamic input.
return 0;
}
MS_EXCEPTION_IF_NULL(cnode);
@ -831,22 +933,46 @@ std::string TbeKernelBuild::GetRealOpType(const std::string &origin_type) {
return result;
}
std::string TbeKernelBuild::GetNodeFusionType(const mindspore::CNodePtr &cnode) {
MS_EXCEPTION_IF_NULL(cnode);
auto node_type = AnfAlgo::GetCNodeName(cnode);
static std::map<std::string, std::string> fusion_type_map = {{kConv2DOpName, "Convolution"},
{kBNTrainingReduceOpName, "bn_reduce"},
{kBNTrainingUpdateOpName, "bn_update"},
{kReluV2OpName, "ElemWise"},
{kTensorAddOpName, "ElemWise"},
{kConv2DBackpropInputOpName, "Conv2d_backprop_input"},
{kAddNOpName, "ElemWise"},
{kReluGradV2OpName, "ElemWise"},
{kRealDivOpName, "ElemWise"}};
auto find = fusion_type_map.find(node_type);
if (find == fusion_type_map.end()) {
MS_LOG(INFO) << "Fusion warning: get node fusion type failed, origin node type: " << node_type
<< " return null string.";
return "";
} else {
return find->second;
}
}
bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
std::vector<nlohmann::json> *input_desc_list, size_t *index) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(input_desc_list);
std::vector<nlohmann::json> input_desc_list_tmp = {};
// 1. input json
bool is_dynamic_input = IsDynamicInput(cnode);
for (size_t i = 1; i < cnode->inputs().size(); ++i) {
auto input = cnode->input(i);
auto kernel_idx = AnfAlgo::VisitKernel(input, 0);
auto real_node = kernel_idx.first;
size_t real_idx = kernel_idx.second;
MS_LOG(INFO) << "Real name" << real_node->fullname_with_scope() << "index:" << real_idx;
MS_LOG(INFO) << "Fusion info: real name: " << real_node->fullname_with_scope() << ". index:" << real_idx;
nlohmann::json input_desc;
GenDescJson(real_node, real_idx, real_idx, &input_desc);
if (is_dynamic_input) {
// 2. dynamic input json
MS_LOG(INFO) << "Node has dynamic input.";
input_desc[kJDynIndex] = (i - 1);
}
@ -854,7 +980,8 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
}
size_t optional_num = GetOptionalInput(cnode, is_dynamic_input);
if (optional_num > 0) {
MS_LOG(INFO) << "Node has optional input.";
// 3. optional input
MS_LOG(INFO) << "Fusion info: node has optional input.";
for (size_t i = 0; i < optional_num; ++i) {
nlohmann::json optional_input_desc;
optional_input_desc[kJName] = std::string(kOptional) + std::to_string(*index);
@ -872,7 +999,7 @@ std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int> &o
std::vector<size_t> desc_output_index = {};
for (size_t idx = 0; idx < output_used_nums.size(); ++idx) {
auto output_use_num_item = output_used_nums[idx];
MS_LOG(INFO) << "Output used num[" << idx << "] = " << output_use_num_item;
MS_LOG(INFO) << "Fusion info: output used num[" << idx << "] = " << output_use_num_item;
desc_output_index.emplace_back(idx);
if (output_use_num_item > 1) {
desc_output_index.emplace_back(idx);
@ -887,7 +1014,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) {
auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum);
MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope();
MS_LOG(INFO) << "Fusion info: this node's output has been reused, node name: " << cnode->fullname_with_scope();
if (output_used_nums.size() != output_size) {
MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")"
<< " is not match output used num(" << output_used_nums.size() << ")";
@ -930,20 +1057,14 @@ bool TbeKernelBuild::GenFusionComputeJson(const mindspore::AnfNodePtr &compute_n
// gen output desc
std::vector<nlohmann::json> output_desc_list;
if (!GenFusionComputeOutputJson(cnode, &output_desc_list)) {
MS_LOG(INFO) << "Fusion Error: gen fusion output desc faild, node full name: " << cnode->fullname_with_scope();
MS_LOG(INFO) << "Fusion Error: gen fusion output desc failed, node full name: " << cnode->fullname_with_scope();
return false;
}
(*compute_op_str)[kJOutputDesc] = output_desc_list;
// gen others
auto origin_type = AnfAlgo::GetCNodeName(cnode);
// replace special op type for buffer fusion op
auto type = GetRealOpType(origin_type);
(*compute_op_str)[kJtype] = type;
tbe::TbeAdapter::NormalizeFuncName(&type);
(*compute_op_str)[kJFuncName] = type;
(*compute_op_str)[kJName] = NormalizeFullScopeName(cnode->fullname_with_scope());
(void)(*fusion_kernel_name).append("_");
(void)(*fusion_kernel_name).append(type);
// gen common desc
GenFusionComputeCommonJson(cnode, compute_op_str, fusion_kernel_name);
// gen prebuild args
GenFusionComputePreBuildJson(cnode, compute_op_str);
return true;
}
@ -965,7 +1086,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list,
MS_EXCEPTION_IF_NULL(output_size_list);
input_size_list->clear();
output_size_list->clear();
// cal input size for malloc
for (const auto &op : fusion_op_list) {
if (op[kJtype] == "Data") {
const auto &data_output_desc = op[kJOutputDesc];
@ -975,23 +1096,23 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list,
}
auto ret = GetIOSizeImpl(data_output);
input_size_list->push_back(ret);
MS_LOG(INFO) << "Fusion info: scope input name " << op[kJName] << ", size: " << ret;
MS_LOG(INFO) << "Fusion info: input node name " << op[kJName] << ", size: " << ret;
}
}
}
// cal output size for malloc
for (const auto &output_node : output_nodes) {
auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0);
auto real_node = kernel_idx.first;
size_t real_idx = kernel_idx.second;
auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope());
MS_LOG(INFO) << "Fusion info: real node name: " << normal_name << ", real output index: " << real_idx;
auto full_name = real_node->fullname_with_scope();
MS_LOG(INFO) << "Fusion info: real output node name: " << full_name << ", real output index: " << real_idx;
for (const auto &op : fusion_op_list) {
if (op[kJName] == normal_name) {
if (op[kJName] == full_name) {
auto op_output_desces = op[kJOutputDesc];
if (output_node != real_node) {
// tuple_get item
MS_LOG(INFO) << "Output is a tuple getitem node";
MS_LOG(INFO) << "Fusion info: output is a tuple get_item node";
auto output_desc = op_output_desces[real_idx];
if (output_desc[kJShape].empty()) {
MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx;
@ -1001,6 +1122,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list,
output_size_list->push_back(ret);
MS_LOG(INFO) << "Fusion info: scope output index " << real_idx << ", size: " << ret;
} else {
MS_LOG(INFO) << "Fusion info: output is self.";
for (const auto &output_desc : op_output_desces) {
if (output_desc[kJShape].empty()) {
MS_LOG(INFO) << "Fusion info: output_desc's shape is empty, may be this node output";

View File

@ -41,8 +41,8 @@ class TbeKernelBuild {
std::vector<size_t> *output_size_list);
// Ub Fuison
static bool GenFusionScopeJson(const std::vector<AnfNodePtr> &input_nodes,
const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_str,
std::string *fusion_kernel);
const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_json,
std::string *fusion_kernel_name);
static bool GetIOSize(const nlohmann::json &fusion_op_list, const std::vector<AnfNodePtr> &output_nodes,
std::vector<size_t> *input_size_list, std::vector<size_t> *output_size_list);
@ -61,9 +61,14 @@ class TbeKernelBuild {
static std::vector<size_t> GetDescOutputIndex(const std::vector<int> &output_used_nums);
static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
std::vector<nlohmann::json> *output_desc_list);
static void GenPreDescJson(nlohmann::json *output_desc);
static void GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str,
std::string *fusion_kernel_name);
static void GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str);
static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
size_t desc_output_idx, nlohmann::json *output_desc,
FusionDataType fusion_data_type = kFusionNormal);
static void GenSuffixDescJson(nlohmann::json *output_desc);
static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
size_t output_index, nlohmann::json *output_desc);
static size_t GetIOSizeImpl(const nlohmann::json &desc);
@ -76,6 +81,7 @@ class TbeKernelBuild {
static bool IsDynamicInput(const CNodePtr &cnode);
static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input);
static std::string GetRealOpType(const std::string &origin_type);
static std::string GetNodeFusionType(const CNodePtr &cnode);
};
class TbeKernelJsonCreator {
@ -84,14 +90,14 @@ class TbeKernelJsonCreator {
~TbeKernelJsonCreator() = default;
bool GenTbeSingleKernelJson(const std::shared_ptr<AnfNode> &anf_node, nlohmann::json *kernel_json);
std::string json_name() { return json_name_; }
bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *attrs_json);
private:
bool GenTbeInputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *inputs_json);
bool GenTbeOutputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *outputs_json);
bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *attrs_json);
static void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj);
bool GenInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, bool value,
const std::shared_ptr<OpIOInfo> &input_ptr, const string &op_input_name, size_t input_i,

View File

@ -33,42 +33,6 @@
namespace mindspore {
namespace kernel {
using mindspore::kernel::tbe::TbeUtils;
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes) {
auto build_manger = std::make_shared<ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manger);
for (const auto &anf_node : anf_nodes) {
// gen kernel json
MS_EXCEPTION_IF_NULL(anf_node);
nlohmann::json kernel_json;
TbeKernelJsonCreator creator(OP_PRE_COMPILE);
if (!creator.GenTbeSingleKernelJson(anf_node, &kernel_json)) {
MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
return false;
}
kernel_json["compile_type"] = "pre_build";
// op build
auto task_id = build_manger->StartCompileOp(kernel_json);
build_manger->SavePreTaskInfo(task_id, anf_node);
}
while (!build_manger->IsAllPreTaskFinish()) {
int task_id = -1;
std::string task_result;
std::string pre_build_result;
auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
if (!ret) {
MS_EXCEPTION(ArgumentError) << "Pre Build Failed. wait one ret:" << ret << ", task id:" << task_id;
}
if (task_result != "Success") {
MS_EXCEPTION(ArgumentError) << "task pre compile Failed, task id:" << task_id << ", cause:" << task_result;
}
build_manger->PreTaskFinishProcess(task_id, pre_build_result);
}
return true;
}
bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
auto build_manger = std::make_shared<ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manger);
@ -123,15 +87,8 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
return build_manger->GenSameOpKernelMod();
}
ParallelBuildManager::ParallelBuildManager() {}
ParallelBuildManager::~ParallelBuildManager() { ResetTaskInfo(); }
void ParallelBuildManager::SavePreTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node) {
MS_LOG(INFO) << "SavePreTaskInfo, task id: " << task_id;
pre_task_map_[task_id] = anf_node;
}
void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node,
const std::string &json_name, const std::vector<size_t> &input_size_list,
const std::vector<size_t> &output_size_list, int32_t scope_id) {
@ -150,42 +107,11 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod
task_map_[task_id] = task_info;
}
bool ParallelBuildManager::IsAllPreTaskFinish() const {
MS_LOG(INFO) << "wait pre build process task_num: " << pre_task_map_.size();
return pre_task_map_.empty();
}
bool ParallelBuildManager::IsAllTaskFinish() const {
MS_LOG(INFO) << "wait process task_num: " << task_map_.size();
return task_map_.empty();
}
void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) {
auto task_iter = pre_task_map_.find(task_id);
if (task_iter == pre_task_map_.end()) {
MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id;
}
auto node = task_iter->second;
auto builder =
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node));
std::string start_flag = "fusion_pattern_start";
std::string end_flag = "fusion_pattern_end";
int start = pre_build_result.find(start_flag);
int end = pre_build_result.find(end_flag);
if (start != -1 && end != -1 && end >= start) {
std::string result = pre_build_result.substr(start + start_flag.size(), end - start - start_flag.size());
if (result == "") {
(void)pre_task_map_.erase(task_iter);
return;
}
transform(result.begin(), result.end(), result.begin(), ::toupper);
FusionType fusion_type = tbe::GetFusionType(result);
builder->SetFusionType(fusion_type);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get());
}
(void)pre_task_map_.erase(task_iter);
}
std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) {
auto task_iter = task_map_.find(task_id);
if (task_iter == task_map_.end()) {

View File

@ -28,7 +28,6 @@
namespace mindspore {
namespace kernel {
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes);
bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes);
struct KernelBuildTaskInfo {
@ -42,9 +41,8 @@ struct KernelBuildTaskInfo {
class ParallelBuildManager {
public:
ParallelBuildManager();
ParallelBuildManager() = default;
~ParallelBuildManager();
void SavePreTaskInfo(int32_t task_id, const AnfNodePtr &anf_node);
void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name,
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
int32_t scope_id = 0);
@ -54,10 +52,7 @@ class ParallelBuildManager {
bool SearchInCache(const std::string &json_name, const std::string &processor,
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
AnfNode *node) const;
bool IsAllPreTaskFinish() const;
bool IsAllTaskFinish() const;
void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result);
std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true);
KernelModPtr GenKernelMod(const string &json_name, const string &processor,
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,

View File

@ -62,6 +62,16 @@ session::KernelWithIndex FindRefOriginNode(const AnfNodePtr &node) {
return kernel_with_index;
}
void AddRefNodePairToKernelGraph(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const size_t output_index,
const size_t input_index) {
// record the ref_pair
auto kernel_graph = func_graph->cast<KernelGraphPtr>();
MS_EXCEPTION_IF_NULL(kernel_graph);
session::AnfWithOutIndex final_pair = std::make_pair(cnode, output_index);
session::KernelWithIndex kernel_with_index = AnfAlgo::VisitKernel(AnfAlgo::GetInputNode(cnode, input_index), 0);
kernel_graph->AddRefCorrespondPairs(final_pair, kernel_with_index);
}
void AddRefPairToKernelGraph(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const AnfNodePtr &get_item,
const AnfNodePtr &final_node, size_t final_index,
const session::KernelWithIndex &origin_pair) {
@ -88,6 +98,7 @@ void AddRefPairToKernelGraph(const FuncGraphPtr &func_graph, const CNodePtr &cno
AnfNodePtr AddAdditionalToRefOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode, size_t output_index,
size_t input_index, const AnfNodePtr &get_item) {
AnfNodePtr final_node = (get_item == nullptr ? cnode : get_item);
bool need_refresh_ref_addr = false;
size_t final_index = output_index;
AnfNodePtr input_node = AnfAlgo::GetInputNode(cnode, input_index);
session::KernelWithIndex origin_pair;
@ -109,6 +120,7 @@ AnfNodePtr AddAdditionalToRefOutput(const FuncGraphPtr &func_graph, const CNodeP
final_node = NewTransOpNode(func_graph, final_node, kernel_select, false, prim::KPrimTransData->name());
RefreshKernelBuildInfo(cur_format, origin_format, final_node, {}, cur_type);
final_index = 0;
need_refresh_ref_addr = true;
MS_EXCEPTION_IF_NULL(final_node);
MS_LOG(INFO) << "DealRefTransAndCast add trans op, op debug info is " << final_node->DebugString();
}
@ -119,15 +131,19 @@ AnfNodePtr AddAdditionalToRefOutput(const FuncGraphPtr &func_graph, const CNodeP
MS_EXCEPTION_IF_NULL(final_node);
final_node->set_scope(cnode->scope());
final_index = 0;
need_refresh_ref_addr = true;
MS_LOG(INFO) << "DealRefTransAndCast add cast op, op debug info is " << final_node->DebugString();
}
// add ref pair
AddRefPairToKernelGraph(func_graph, cnode, get_item, final_node, final_index, origin_pair);
if (need_refresh_ref_addr) {
AddRefNodePairToKernelGraph(func_graph, cnode, output_index, input_index);
}
// insert depend
if (origin_format != cur_format || origin_type != cur_type) {
std::vector<AnfNodePtr> depend_nodes{NewValueNode(prim::kPrimDepend), cnode, final_node};
final_node = func_graph->NewCNode(depend_nodes);
MS_LOG(INFO) << "DealRefTransAndCast add denpend, op debug info is " << final_node->DebugString();
MS_LOG(INFO) << "DealRefTranshwAndCast add denpend, op debug info is " << final_node->DebugString();
}
return final_node;

View File

@ -58,7 +58,7 @@ const AnfNodePtr RemoveInternalOutput::Process(const FuncGraphPtr &func_graph, c
if (kernel_graph == nullptr) {
return nullptr;
}
if (!kernel_graph->IsInternalOutput(node, 0)) {
if (!kernel_graph->IsUniqueTargetInternalOutput(node, 0)) {
return nullptr;
}
if (!UsedForOutputOnly(func_graph, node)) {

View File

@ -33,7 +33,25 @@ std::vector<int> TransposeAxis(const std::string &src_format, const std::string
} else if ((src_format == kOpFormat_NHWC) && (dst_format == kOpFormat_NCHW)) {
return {0, 3, 1, 2};
} else {
MS_LOG(EXCEPTION) << "Invaild format transform, from " << src_format << " to " << dst_format;
MS_LOG(EXCEPTION) << "Invalid format transform, from " << src_format << " to " << dst_format;
}
}
// Transpose can be replaceed by nop reshape in some situations.
// 1. out_shape [x, 1, 1, y] with transpose perm {0, 2, 3, 1}
// 2. out_shape [x, y, 1, 1] with transpose perm {0, 3, 1, 2}
bool IsFakeTranspose(const std::vector<size_t> &out_shape, const std::vector<int> &transpose_perm) {
if (out_shape.size() != 4) {
MS_LOG(EXCEPTION) << "Invalid data shape, 4-D data was needed, but get " << out_shape.size() << "-D.";
}
std::vector<int> perm1 = {0, 2, 3, 1};
std::vector<int> perm2 = {0, 3, 1, 2};
if (transpose_perm == perm1) {
return (out_shape[1] == 1 && out_shape[2] == 1);
} else if (transpose_perm == perm2) {
return (out_shape[2] == 1 && out_shape[3] == 1);
} else {
return false;
}
}
@ -56,8 +74,16 @@ void SetTransposeOpBuildInfo(const std::string &input_format, const std::string
CNodePtr InsertTransposeOp(const FuncGraphPtr &graph, const AnfNodePtr &node, const AnfNodePtr &used_node,
int used_node_index, const std::vector<int> &transpose_perm) {
MS_EXCEPTION_IF_NULL(graph);
// 1.Create a transpose node.
auto transpose_prim = std::make_shared<Primitive>(prim::kPrimTranspose->name());
// 0.Judge whether it is a fake transpose
auto transed_shape = AnfAlgo::GetInputDeviceShape(used_node, used_node_index);
bool is_fake = IsFakeTranspose(transed_shape, transpose_perm);
// 1.Create a transpose node or a fake transpose node:reshape.
mindspore::PrimitivePtr transpose_prim;
if (is_fake) {
transpose_prim = std::make_shared<Primitive>(prim::kPrimReshape->name());
} else {
transpose_prim = std::make_shared<Primitive>(prim::kPrimTranspose->name());
}
MS_EXCEPTION_IF_NULL(transpose_prim);
// 2.Set the input of transpose.
std::vector<AnfNodePtr> transpose_input = {NewValueNode(transpose_prim), node};
@ -66,7 +92,9 @@ CNodePtr InsertTransposeOp(const FuncGraphPtr &graph, const AnfNodePtr &node, co
auto transpose_type = {AnfAlgo::GetPrevNodeOutputInferDataType(used_node, used_node_index)};
auto transpose_shape = {AnfAlgo::GetPrevNodeOutputInferShape(used_node, used_node_index)};
AnfAlgo::SetOutputInferTypeAndShape(transpose_type, transpose_shape, transpose_op.get());
AnfAlgo::SetNodeAttr(kAttrPerm, MakeValue(transpose_perm), transpose_op);
if (!is_fake) {
AnfAlgo::SetNodeAttr(kAttrPerm, MakeValue(transpose_perm), transpose_op);
}
// 4.Set the input of used_node.
MS_LOG(DEBUG) << "Node: " << node->fullname_with_scope() << ", used node: " << used_node->fullname_with_scope()
<< ", index: " << used_node_index;

View File

@ -49,6 +49,7 @@ const AnfNodePtr ReplaceBNCastFusion::Process(const FuncGraphPtr &graph, const A
auto manager = graph->manager();
MS_EXCEPTION_IF_NULL(manager);
auto outlist = GetRealNodeUsedList(graph, fbn2);
bool changed = false;
for (size_t i = 0; i < outlist->size(); i++) {
auto index_node = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(outlist->at(i).first), 1);
auto value_node = index_node->cast<ValueNodePtr>();
@ -57,14 +58,18 @@ const AnfNodePtr ReplaceBNCastFusion::Process(const FuncGraphPtr &graph, const A
if (item_idx == 0) {
auto cast = GetRealNodeUsedList(graph, outlist->at(i).first);
if (AnfAlgo::GetCNodeName(cast->at(0).first) != "Cast") {
return nullptr;
continue;
}
manager->Replace(utils::cast<CNodePtr>(cast->at(0).first), utils::cast<CNodePtr>(outlist->at(i).first));
outputs_type.push_back(kNumberTypeFloat16);
outputs_shape.push_back(AnfAlgo::GetOutputInferShape(outlist->at(i).first, 0));
AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, outlist->at(i).first.get());
changed = true;
}
}
if (!changed) {
return nullptr;
}
manager->Replace(utils::cast<CNodePtr>(x_after), utils::cast<CNodePtr>(x_before));
outputs_type.clear();
outputs_shape.clear();

View File

@ -211,8 +211,11 @@ bool CommunicationOpFusion::DoFusion(const FuncGraphPtr &func_graph, const Commu
start_index = end_index + 1;
continue;
}
auto kernel_graph = func_graph->cast<KernelGraphPtr>();
auto graph_id = kernel_graph->graph_id();
AnfNodePtr new_communication_op =
CreateFusedCommunicationOp(func_graph, communication_op_info, start_index, end_index);
AnfAlgo::SetGraphId(graph_id, new_communication_op.get());
// replace old communication op with new communication op
for (auto idx = start_index; idx <= end_index; ++idx) {
std::vector<AnfNodePtr> tuple_getitem_input;

View File

@ -425,7 +425,7 @@ std::string AnfRuntimeAlgorithm::GetInputFormat(const AnfNodePtr &node, size_t i
<< node->DebugString() << "]";
}
if (!IsRealKernel(node)) {
GetPrevNodeOutputFormat(node, input_idx);
return GetPrevNodeOutputFormat(node, input_idx);
}
auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
MS_EXCEPTION_IF_NULL(kernel_info);
@ -1197,6 +1197,19 @@ TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, s
return GetCNodeOutputPrecision(kernel_with_index.first);
}
bool AnfRuntimeAlgorithm::IsDynamicShape(const AnfNodePtr &node) {
if (!node->isa<CNode>()) {
return false;
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
auto has_attr = AnfAlgo::HasNodeAttr(kAttrIsDynamicShape, cnode);
if (!has_attr) {
return false;
}
return AnfAlgo::GetNodeAttr<bool>(node, kAttrIsDynamicShape);
}
bool AnfRuntimeAlgorithm::IsCondControlKernel(const CNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
if (node->inputs().empty()) {

View File

@ -217,6 +217,7 @@ class AnfRuntimeAlgorithm {
static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node);
// get fix output precision from prev node, input_idx is the input index of current node related to prev node.
static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx);
static bool IsDynamicShape(const AnfNodePtr &node);
static bool IsCondControlKernel(const CNodePtr &node);
static bool IsIndependentNode(const CNodePtr &node);
};

View File

@ -261,17 +261,16 @@ void AscendControlParser::EraseParameter(NotNull<KernelGraphPtr> root_graph,
}
}
EraseAssign(all_nodes, para_to_written_node, root_graph);
root_graph->set_execution_order(exec_order);
EraseAssign(std::make_shared<ReferenceCounter>(parameter_count), all_nodes, para_to_written_node, root_graph);
}
void AscendControlParser::EraseAssign(const std::set<CNodePtr> &all_nodes,
void AscendControlParser::EraseAssign(std::shared_ptr<ReferenceCounter> parameter_count,
const std::set<CNodePtr> &all_nodes,
const std::map<AnfNodePtr, CNodePtr> &para_to_written_node,
NotNull<KernelGraphPtr> root_graph) {
std::vector<CNodePtr> exec_order = root_graph->execution_order();
ReferenceCounter parameter_count([](int32_t read, int32_t write) -> bool { return write == 1; });
while (parameter_count.HasValidElem()) {
auto [para, read, written] = parameter_count.GetOneValidElem();
while (parameter_count->HasValidElem()) {
auto [para, read, written] = parameter_count->GetOneValidElem();
MS_LOG(INFO) << para->DebugString() << " was read " << read << " times, written " << written << " times.";
auto assign_iter = para_to_written_node.find(para);
if (assign_iter == para_to_written_node.end()) {
@ -280,7 +279,7 @@ void AscendControlParser::EraseAssign(const std::set<CNodePtr> &all_nodes,
auto &assign_node = assign_iter->second;
MS_EXCEPTION_IF_NULL(assign_node);
if (!IsPrimitiveCNode(assign_node, prim::kPrimAssign)) {
parameter_count.EraseElem(para);
parameter_count->EraseElem(para);
continue;
}
MS_LOG(INFO) << "Erase " << assign_node->DebugString(5);
@ -288,10 +287,10 @@ void AscendControlParser::EraseAssign(const std::set<CNodePtr> &all_nodes,
auto source = assign_node->input(kCNodeAssignSource);
MS_EXCEPTION_IF_NULL(source);
auto visit_source = AnfAlgo::VisitKernelWithReturnType(source, 0).first;
parameter_count.AddWriteCount(para, -1);
parameter_count.AddReadCount(para, -1);
parameter_count->AddWriteCount(para, -1);
parameter_count->AddReadCount(para, -1);
if (visit_source->isa<Parameter>()) {
parameter_count.AddReadCount(visit_source, read - 1);
parameter_count->AddReadCount(visit_source, read - 1);
}
for (auto &node : all_nodes) {
for (size_t i = 0; i < node->size(); ++i) {
@ -302,6 +301,7 @@ void AscendControlParser::EraseAssign(const std::set<CNodePtr> &all_nodes,
}
}
}
root_graph->set_execution_order(exec_order);
}
void AscendControlParser::EraseLabel(NotNull<KernelGraphPtr> root_graph) {

View File

@ -22,6 +22,7 @@
#include <tuple>
#include <utility>
#include <functional>
#include <memory>
#include "backend/session/kernel_graph.h"
#include "base/base_ref.h"
#include "utils/contract.h"
@ -44,7 +45,7 @@ class AscendControlParser {
class ReferenceCounter;
static void EraseParameter(NotNull<KernelGraphPtr> root_graph, const std::set<KernelGraphPtr> &graph_list);
static void EraseAssign(const std::set<CNodePtr> &all_nodes,
static void EraseAssign(std::shared_ptr<ReferenceCounter> parameter_count, const std::set<CNodePtr> &all_nodes,
const std::map<AnfNodePtr, CNodePtr> &para_to_written_node,
NotNull<KernelGraphPtr> root_graph);
static void EraseLabel(NotNull<KernelGraphPtr> root_graph);

View File

@ -474,7 +474,6 @@ void AscendSession::InitRuntimeResource() {
}
void AscendSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) const {
device::ascend::KernelPreBuild(kernel_graph.get());
MS_LOG(INFO) << "HardwareOptimize start!";
opt::AscendBackendOptimization(kernel_graph);
opt::AscendGraphKernelCommonProcess(kernel_graph);

View File

@ -65,6 +65,8 @@ void GPUSession::StartKernelRT() const {
void GPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
auto optimizer = std::make_shared<opt::GraphOptimizer>();
auto pm = std::make_shared<opt::PassManager>();
pm->AddPass(std::make_shared<opt::AdamWeightDecayFusion>());
@ -73,9 +75,11 @@ void GPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
pm->AddPass(std::make_shared<opt::ReplaceBNGradCastFusion>());
pm->AddPass(std::make_shared<opt::ReplaceMomentumCastFusion>());
pm->AddPass(std::make_shared<opt::ReplaceAddNFusion>());
pm->AddPass(std::make_shared<opt::BatchNormReluFusion>());
pm->AddPass(std::make_shared<opt::BatchNormReluGradFusion>());
pm->AddPass(std::make_shared<opt::BatchNormAddReluFusion>());
if (context_ptr->execution_mode() != kPynativeMode) {
pm->AddPass(std::make_shared<opt::BatchNormReluFusion>());
pm->AddPass(std::make_shared<opt::BatchNormReluGradFusion>());
pm->AddPass(std::make_shared<opt::BatchNormAddReluFusion>());
}
optimizer->AddPassManager(pm);
(void)optimizer->Optimize(kernel_graph);
kernel_graph->SetExecOrderByDefault();
@ -129,10 +133,16 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs_const) const {
std::vector<tensor::TensorPtr> inputs(inputs_const);
MS_EXCEPTION_IF_NULL(kernel_graph);
auto input_nodes = kernel_graph->inputs();
std::vector<AnfNodePtr> input_nodes;
for (const auto &input_node : kernel_graph->inputs()) {
auto params = AnfAlgo::GetAllOutput(input_node);
std::copy(params.begin(), params.end(), std::back_inserter(input_nodes));
}
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
if (inputs.size() != input_nodes.size()) {
MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size();
}
for (size_t i = 0; i < inputs.size(); ++i) {
auto tensor = inputs[i];
MS_EXCEPTION_IF_NULL(tensor);

View File

@ -41,13 +41,13 @@ void DataDumpParser::ResetParam() {
bool DataDumpParser::DumpEnabled() const {
auto enable_dump = std::getenv(kEnableDataDump);
if (enable_dump == nullptr) {
MS_LOG(INFO) << "[DataDump] enable dump is null. Please export ENABLE_DATA_DUMP";
MS_LOG(INFO) << "[DataDump] enable dump is null. If you want to dump data, please export ENABLE_DATA_DUMP";
return false;
}
auto enabled = std::atoi(enable_dump);
if (enabled != 1) {
MS_LOG(WARNING) << "[DataDump] Please export ENABLE_DATA_DUMP=1";
MS_LOG(WARNING) << "[DataDump] If you want to dump data, please export ENABLE_DATA_DUMP=1";
return false;
}
@ -62,7 +62,7 @@ bool DataDumpParser::DumpEnabled() const {
std::optional<std::string> DataDumpParser::GetDumpPath() const {
auto dump_path = std::getenv(kDataDumpPath);
if (dump_path == nullptr) {
MS_LOG(ERROR) << "[DataDump] dump path is null. Please export DATA_DUMP_PATH";
MS_LOG(ERROR) << "[DataDump] dump path is null. If you want to dump data, please export DATA_DUMP_PATH";
return {};
}
std::string dump_path_str(dump_path);

View File

@ -73,6 +73,7 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
std::string current_tensor_name;
std::unordered_map<unsigned int, watchpoint_t> watchpoints_to_check_table;
const size_t location = 0;
for (std::size_t i = 0; i < tensor_list.size(); i++) {
current_tensor_name = tensor_list[i]->GetName();
@ -102,7 +103,7 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
// check if the current node tensor name is included the watchpoint
std::string current_node_name = current_tensor_name.substr(0, current_tensor_name.find_first_of(":"));
if ((w_type == true && (current_tensor_name.find(w_name) != string::npos || w_name == "*")) ||
if ((w_type == true && (current_tensor_name.find(w_name) == location || w_name == "*")) ||
(w_type == false && current_node_name == w_name)) {
watchpoints_to_check_table[w_table_item.second.id] = w_table_item.second;
break;

View File

@ -151,35 +151,34 @@ void Debugger::EnableDebugger() {
MS_LOG(WARNING) << "Memory Reuse is disabled. Set environment variable MS_DEBUGGER_PARTIAL_MEM=1 to reduce memory "
"usage for large models.";
}
if (device_target_ == kAscendDevice) {
// set operation overflow info
overflow_bin_path_ = DataDumpParser::GetInstance().GetOpOverflowBinPath(graph_ptr_->graph_id(), device_id_);
// new overflow dump files will have a timestamp greater than last_overflow_bin_
last_overflow_bin_ = 0;
DIR *d;
d = opendir(overflow_bin_path_.c_str());
if (d != nullptr) {
struct dirent *dir;
while ((dir = readdir(d)) != NULL) {
if (dir->d_type == DT_REG) {
std::string file_path = overflow_bin_path_;
file_path.append(dir->d_name);
std::size_t found = file_path.find_last_of(".");
if (found == std::string::npos) {
continue;
}
std::string overflow_time = file_path.substr(found + 1);
if (stod(overflow_time) <= last_overflow_bin_) {
MS_LOG(INFO) << "Old op overflow bin folder" << file_path;
continue;
}
last_overflow_bin_ = stod(overflow_time);
#ifdef ENABLE_D
// set operation overflow info
overflow_bin_path_ = DataDumpParser::GetInstance().GetOpOverflowBinPath(graph_ptr_->graph_id(), device_id_);
// new overflow dump files will have a timestamp greater than last_overflow_bin_
last_overflow_bin_ = 0;
DIR *d;
d = opendir(overflow_bin_path_.c_str());
if (d != nullptr) {
struct dirent *dir;
while ((dir = readdir(d)) != NULL) {
if (dir->d_type == DT_REG) {
std::string file_path = overflow_bin_path_;
file_path.append(dir->d_name);
std::size_t found = file_path.find_last_of(".");
if (found == std::string::npos) {
continue;
}
std::string overflow_time = file_path.substr(found + 1);
if (stod(overflow_time) <= last_overflow_bin_) {
MS_LOG(INFO) << "Old op overflow bin folder" << file_path;
continue;
}
last_overflow_bin_ = stod(overflow_time);
}
MS_LOG(INFO) << "last op overflow bin folder" << last_overflow_bin_;
}
MS_LOG(INFO) << "last op overflow bin folder" << last_overflow_bin_;
}
#endif
// initialize grpc client
if (debugger_enabled_) {
@ -554,8 +553,9 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints() {
std::vector<int> condition;
std::vector<unsigned int> watchpoint_id;
std::vector<std::string> overflow_ops;
#ifdef ENABLE_D
overflow_ops = CheckOpOverflow();
#endif
debug_services_->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, overflow_ops);
std::list<WatchpointHit> hits;
for (unsigned int i = 0; i < name.size(); i++) {

View File

@ -117,7 +117,7 @@ void GrpcClient::Init(const std::string &host, const std::string &port, const bo
int dwcaLen = i2d_X509(sk_X509_value(ca, 0), NULL); // get the length of private key
unsigned char *cabuf = (unsigned char *)malloc(sizeof(unsigned char) * dwcaLen);
i2d_X509(sk_X509_value(ca, 0), &cabuf); // PrivateKey DER code
strcat = std::string(reinterpret_cast<char const *>(cabuf), dwcaLen);
strca = std::string(reinterpret_cast<char const *>(cabuf), dwcaLen);
free(pribuf);
free(certbuf);

View File

@ -81,8 +81,6 @@ void ParallelContext::set_mirror_mean(bool mirror_mean) { mirror_mean_ = mirror_
void ParallelContext::set_full_batch(bool full_batch) { full_batch_ = full_batch; }
void ParallelContext::set_has_initializer(bool has_initializer) { has_initializer_ = has_initializer; }
void ParallelContext::set_cast_before_mirror(bool cast_before_mirror) { cast_before_mirror_ = cast_before_mirror; }
void ParallelContext::set_loss_repeated_mean(bool loss_repeated_mean) { loss_repeated_mean_ = loss_repeated_mean; }

View File

@ -58,9 +58,6 @@ class ParallelContext {
void set_full_batch(bool full_batch);
bool full_batch() const { return full_batch_; }
void set_has_initializer(bool has_initializer);
bool has_initializer() const { return has_initializer_; }
void set_cast_before_mirror(bool cast_before_mirror);
bool cast_before_mirror() const { return cast_before_mirror_; }
@ -115,7 +112,6 @@ class ParallelContext {
static std::shared_ptr<ParallelContext> inst_context_;
bool mirror_mean_;
bool full_batch_;
bool has_initializer_ = false;
bool cast_before_mirror_;
bool loss_repeated_mean_;
int32_t device_num_;

View File

@ -16,6 +16,7 @@
#include "frontend/parallel/ps/optimizer_info.h"
#include <memory>
#include "frontend/parallel/ps/util.h"
namespace mindspore {
namespace parallel {
@ -30,6 +31,8 @@ const std::vector<AddressPtr> &OptimizerInfo::outputs() { return outputs_; }
bool OptimizerInfo::IsSparse() const { return false; }
const size_t OptimizerInfo::indice_size() const { return 0; }
size_t OptimizerInfo::grad_index() { return 0; }
size_t OptimizerInfo::indices_index() { return 0; }
@ -57,7 +60,8 @@ void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
}
}
void DenseOptimInfo::ComputeMean(size_t n) {
void DenseOptimInfo::ComputeMean(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &, size_t n,
size_t server_num, size_t rank_id) {
if (n > 1) {
float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
size_t size = gradient()->size / sizeof(float);
@ -96,15 +100,90 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
for (size_t i = 0; i < indices_index; i++) {
indice_offset += lengths[i];
}
int *incr_indice_data = reinterpret_cast<int *>(values.data() + indice_offset);
size_t incr_indice_size = lengths[indices_index] * sizeof(float);
float *incr_indice_data = values.data() + indice_offset;
size_t incr_indice_size = lengths[indices_index];
size_t incr_indice_data_size = incr_indice_size * sizeof(int);
int *converted_indices = new int[incr_indice_size];
for (size_t i = 0; i < incr_indice_size; i++) {
converted_indices[i] = static_cast<int>(incr_indice_data[i]);
}
auto ret2 = memcpy_s(accum_indices_data + indices_offset_, incr_indice_size, incr_indice_data, incr_indice_size);
auto ret2 =
memcpy_s(accum_indices_data + indices_offset_, incr_indice_data_size, converted_indices, incr_indice_data_size);
if (ret2 != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
}
delete[] converted_indices;
indices_offset_ += lengths[indices_index];
indices()->size += incr_indice_size;
indices()->size += incr_indice_data_size;
}
void SparseOptimInfo::ComputeMean(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes,
size_t n, size_t server_num, size_t rank_id) {
size_t indices_size = static_cast<size_t>(indices()->size / sizeof(int));
int segment_size = gradient()->size / indices()->size;
float *new_grad = new float[indices_size * segment_size];
int *new_indices = new int[indices_size];
mindspore::kernel::SparseGradient unique_sparse_grad({new_grad, new_indices, indices_size});
const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
if (shape_vec.size() < 2 || shape_vec[1] == nullptr) {
MS_LOG(EXCEPTION) << "No input shape found";
}
auto input_shapes = shape_vec.size() > 0 ? shape_vec[1] : nullptr;
MS_EXCEPTION_IF_NULL(input_shapes);
if (input_shapes->size() == 0) {
MS_LOG(EXCEPTION) << "Invalid input shapes";
}
int first_dim_size = input_shapes->front();
int outer_dim_size = segment_size;
if (first_dim_size == 0 || outer_dim_size == 0) {
MS_LOG(ERROR) << "Invalid first dim size";
}
float *grad_data = reinterpret_cast<float *>(gradient()->addr);
int *indices_data = reinterpret_cast<int *>(indices()->addr);
size_t original_row_count = input_shapes->front();
if (original_row_count > 0) {
size_t offset = 0;
std::map<int, int> rank_dims = Util::AllRankLocalShard(original_row_count, rank_id, server_num);
for (size_t i = 0; i < rank_id; i++) {
if (rank_dims.count(i) == 0) {
MS_LOG(EXCEPTION) << "No local shard number for rank " << i;
}
offset += rank_dims[i];
}
for (size_t i = 0; i < indices_size; i++) {
indices_data[i] -= offset;
}
}
Util::ReduceSparseGradient(grad_data, indices_data, indices_size, segment_size, first_dim_size, outer_dim_size,
&unique_sparse_grad);
int reduced_grad_size = unique_sparse_grad.indices_size_ * segment_size * sizeof(float);
auto ret = memcpy_s(gradient()->addr, reduced_grad_size, unique_sparse_grad.value_, reduced_grad_size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
}
int reduced_indice_size = unique_sparse_grad.indices_size_ * sizeof(int);
ret = memcpy_s(indices()->addr, reduced_indice_size, unique_sparse_grad.indices_, reduced_indice_size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
}
gradient()->size = reduced_grad_size;
indices()->size = reduced_indice_size;
for (size_t i = 0; i < unique_sparse_grad.indices_size_ * segment_size; i++) {
grad_data[i] = grad_data[i] / n;
}
delete[] new_grad;
delete[] new_indices;
}
void SparseOptimInfo::Reset() {
@ -135,6 +214,8 @@ void MomentumOptimInfo::Update(const Values &values, const Lengths &lens) {
}
}
const size_t SparseOptimInfo::indice_size() const { return indices_offset_; }
const AddressPtr &MomentumOptimInfo::gradient() { return inputs_[3]; }
const AddressPtr &MomentumOptimInfo::indices() { return inputs_[3]; }

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_FRONTEND_PARALLEL_PS_OPTIMIZER_INFO_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/kernel.h"
#include "frontend/parallel/ps/common.h"
@ -33,12 +34,14 @@ class OptimizerInfo {
virtual void Update(const Values &values, const Lengths &lengths) {}
virtual void UpdateWeight(const WeightPtr &weight);
virtual void Accumulate(const Values &values, const Lengths &lengths) = 0;
virtual void ComputeMean(size_t n) {}
virtual void ComputeMean(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes, size_t n,
size_t server_num, size_t rank_id) {}
virtual void Reset() {}
void AddWorkspace(const AddressPtr &workspace);
virtual const AddressPtr &gradient() = 0;
virtual const AddressPtr &indices() = 0;
virtual const size_t indice_size() const;
const std::vector<AddressPtr> &inputs();
const std::vector<AddressPtr> &workspaces();
const std::vector<AddressPtr> &outputs();
@ -59,7 +62,8 @@ class DenseOptimInfo : public OptimizerInfo {
~DenseOptimInfo() override = default;
void Accumulate(const Values &values, const Lengths &lens) override;
void ComputeMean(size_t n) override;
void ComputeMean(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes, size_t n,
size_t server_num, size_t rank_id) override;
void Reset() override;
};
@ -69,7 +73,10 @@ class SparseOptimInfo : public OptimizerInfo {
~SparseOptimInfo() override = default;
void Accumulate(const Values &values, const Lengths &lens) override;
void ComputeMean(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes, size_t n,
size_t server_num, size_t rank_id) override;
void Reset() override;
const size_t indice_size() const override;
protected:
size_t grads_offset_{0};

View File

@ -136,15 +136,21 @@ OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
const std::shared_ptr<std::vector<size_t>> &indices_shape = (*inputs_shape)[10];
size_t total_indice_size =
std::accumulate((*indices_shape).begin(), (*indices_shape).end(), sizeof(float), std::multiplies<size_t>());
std::accumulate((*indices_shape).begin(), (*indices_shape).end(), sizeof(int), std::multiplies<size_t>());
AddressPtr indices = std::make_shared<kernel::Address>();
indices->addr = new float[total_indice_size * worker_num];
ret = memcpy_s(indices->addr, lens[7] * sizeof(float), reinterpret_cast<float *>(epsilon->addr) + lens[5] + lens[6],
lens[7] * sizeof(float));
indices->addr = new int[total_indice_size * worker_num];
int *converted_indices = new int[lens[7]];
size_t indices_data_size = lens[7] * sizeof(int);
float *indices_data = reinterpret_cast<float *>(epsilon->addr) + lens[5] + lens[6];
for (int i = 0; i < lens[7]; i++) {
converted_indices[i] = static_cast<int>(indices_data[i]);
}
ret = memcpy_s(indices->addr, indices_data_size, converted_indices, indices_data_size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
}
indices->size = lens[7] * sizeof(int);
indices->size = indices_data_size;
delete[] converted_indices;
return new SparseAdamOptimInfo(weight_addr, m, v, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon,
grad, indices);
@ -185,13 +191,19 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
size_t total_indice_size =
std::accumulate((*indices_shape).begin(), (*indices_shape).end(), 1, std::multiplies<size_t>());
AddressPtr indices = std::make_shared<kernel::Address>();
indices->addr = new float[total_indice_size * worker_num];
ret = memcpy_s(indices->addr, lens[1] * sizeof(float), reinterpret_cast<float *>(values.data()) + lens[0],
lens[1] * sizeof(float));
indices->addr = new int[total_indice_size * worker_num];
int *converted_indices = new int[lens[1]];
size_t indices_data_size = lens[1] * sizeof(int);
float *indices_data = reinterpret_cast<float *>(values.data()) + lens[0];
for (int i = 0; i < lens[1]; i++) {
converted_indices[i] = static_cast<int>(indices_data[i]);
}
ret = memcpy_s(indices->addr, indices_data_size, converted_indices, indices_data_size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
}
indices->size = lens[1] * sizeof(int);
indices->size = indices_data_size;
delete[] converted_indices;
return new SparseFtrlOptimInfo(weight_addr, accum, linear, grad, indices);
}

View File

@ -145,6 +145,7 @@ class ParameterServer {
std::unordered_map<Key, std::shared_ptr<PServerKernel>> optimizers_;
std::unordered_map<Key, InputsShapePtr> optim_inputs_shape_;
std::unordered_map<Key, InputsShapePtr> original_optim_inputs_shape_;
std::unordered_map<Key, std::shared_ptr<OptimizerInfo>> optim_infos_;
std::unordered_map<std::string, std::shared_ptr<OptimizerInfoBuilder>> optim_info_builders_;
std::unordered_map<Key, std::string> weight_key_to_optims_;
@ -366,19 +367,24 @@ void ParameterServer<T>::InitWeightKeyToOptims(const Key &key, const int &optim_
template <typename T>
void ParameterServer<T>::InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths) {
InputsShapePtr inputs_shape = std::make_shared<InputsShape>();
InputsShapePtr original_inputs_shape = std::make_shared<InputsShape>();
int val_idx = 0;
const Key &key = keys[0];
MS_LOG(INFO) << "Initializing optimizer inputs shape for key:" << key;
if (optim_inputs_shape_.count(key) == 0) {
original_optim_inputs_shape_[key] = original_inputs_shape;
optim_inputs_shape_[key] = inputs_shape;
}
for (size_t i = 0; i < keys.size(); i++) {
auto shape = std::make_shared<std::vector<size_t>>();
auto original_shape = std::make_shared<std::vector<size_t>>();
inputs_shape->push_back(shape);
original_inputs_shape->push_back(original_shape);
int len = lengths[i];
for (int j = 0; j < len; j++) {
shape->push_back(values[val_idx++]);
shape->push_back(values[val_idx]);
original_shape->push_back(values[val_idx++]);
}
}
if (weight_key_to_optims_.count(key) > 0) {
@ -505,16 +511,27 @@ void ParameterServer<T>::UpdateWeights() {
MS_EXCEPTION_IF_NULL(optimizer);
std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
if (optim_info == nullptr) {
continue;
}
const std::vector<kernel::AddressPtr> &inputs = optim_info->inputs();
const std::vector<kernel::AddressPtr> &workspaces = optim_info->workspaces();
const std::vector<kernel::AddressPtr> &outputs = optim_info->outputs();
if (optim_info != nullptr) {
const std::vector<kernel::AddressPtr> &inputs = optim_info->inputs();
const std::vector<kernel::AddressPtr> &workspaces = optim_info->workspaces();
const std::vector<kernel::AddressPtr> &outputs = optim_info->outputs();
optim_info->ComputeMean(worker_num_);
optimizer->Execute(inputs, workspaces, outputs);
optim_info->Reset();
std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> shapes =
std::make_shared<std::vector<std::shared_ptr<std::vector<size_t>>>>();
std::shared_ptr<std::vector<size_t>> indices_shape = std::make_shared<std::vector<size_t>>();
indices_shape->emplace_back(optim_info->indice_size());
shapes->push_back(indices_shape);
if (original_optim_inputs_shape_.count(key) != 0) {
for (auto &input_shapes : *(original_optim_inputs_shape_[key])) {
shapes->push_back(input_shapes);
}
}
optimizer->ReInit(shapes);
optim_info->ComputeMean(shapes, worker_num_, pserver_num_, rank_id_);
optimizer->Execute(inputs, workspaces, outputs);
optim_info->Reset();
}
if (!is_embedding_[key]) {
tokens_[key] = worker_num_;
}
@ -527,23 +544,26 @@ template <typename T>
void ParameterServer<T>::AccumGrad(const Keys &keys, const Values &values, const Lengths &lengths) {
std::unique_lock<std::mutex> lock(mutex_);
const Key &key = keys[0];
std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
bool no_sparse_grad = values.size() == 1 && values[0] == -100;
if (!no_sparse_grad) {
std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
// Create or update the optimizer info
if (optim_info == nullptr) {
const std::shared_ptr<OptimizerInfoBuilder> &builder = optim_info_builders_[weight_key_to_optims_[key]];
std::shared_ptr<kernel::ps::PServerKernel> pserver_kernel = optimizers_[key];
if (pserver_kernel == nullptr) {
MS_LOG(EXCEPTION) << "no optimizer found for key " << key << " optim name " << weight_key_to_optims_[key];
// Create or update the optimizer info
if (optim_info == nullptr) {
const std::shared_ptr<OptimizerInfoBuilder> &builder = optim_info_builders_[weight_key_to_optims_[key]];
std::shared_ptr<kernel::ps::PServerKernel> pserver_kernel = optimizers_[key];
if (pserver_kernel == nullptr) {
MS_LOG(EXCEPTION) << "no optimizer found for key " << key << " optim name " << weight_key_to_optims_[key];
}
MS_EXCEPTION_IF_NULL(pserver_kernel);
OptimizerInfo *optim =
builder->Build(pserver_kernel, weights_[key], keys, values, lengths, optim_inputs_shape_[key], worker_num_);
optim_info.reset(optim);
optim_infos_[key] = optim_info;
} else {
optim_info->Update(values, lengths);
optim_info->Accumulate(values, lengths);
}
MS_EXCEPTION_IF_NULL(pserver_kernel);
OptimizerInfo *optim =
builder->Build(pserver_kernel, weights_[key], keys, values, lengths, optim_inputs_shape_[key], worker_num_);
optim_info.reset(optim);
optim_infos_[key] = optim_info;
} else {
optim_info->Update(values, lengths);
optim_info->Accumulate(values, lengths);
}
grads_accum_counter_[key] += 1;
@ -721,6 +741,7 @@ void ParameterServer<T>::Run(const FuncGraphPtr &func_graph) {
return;
}
Init(func_graph);
Util::SetRankId(rank_id_);
thread_->join();
::ps::Finalize(0, true);
}

View File

@ -22,6 +22,8 @@
namespace mindspore {
namespace parallel {
namespace ps {
int Util::rank_id_ = -1;
std::unordered_map<std::string, int> Util::optimizer_to_ids{
{kApplyMomentum, 0},
{kSparseAdam, 1},
@ -132,13 +134,63 @@ std::string Util::optimizer_node_name(int id) {
bool Util::is_optimizer(std::string name) { return optimizer_to_ids.count(name) > 0; }
int Util::LocalShard(int first_dim, int rank_id, int server_num) {
int shard_size = std::round((static_cast<float>(first_dim)) / server_num);
int remain_size = first_dim % server_num;
if (remain_size == 0 || rank_id < server_num - 1) {
return shard_size;
} else {
return first_dim - (shard_size * (server_num - 1));
std::map<int, int> shard_dims = AllRankLocalShard(first_dim, rank_id, server_num);
if (shard_dims.count(rank_id) == 0) {
MS_LOG(EXCEPTION) << "Invalid rank id " << rank_id;
}
return shard_dims[rank_id];
}
std::map<int, int> Util::AllRankLocalShard(int first_dim, int rank_id, int server_num) {
if (rank_id >= server_num) {
MS_LOG(EXCEPTION) << "The rank ID " << rank_id << " should be less than the number of servers " << server_num;
}
std::map<int, int> shard_dims;
for (int i = 0; i < server_num; i++) {
shard_dims[i] = 0;
}
if (server_num != static_cast<int>(shard_dims.size())) {
MS_LOG(EXCEPTION) << "Inconsistent server num " << server_num << " shard dims counter size " << shard_dims.size();
}
int server_index = -1;
for (int i = 0; i < first_dim; i++) {
server_index = (server_index + 1) % server_num;
shard_dims[server_index] = shard_dims[server_index] + 1;
}
if (shard_dims.count(rank_id) == 0) {
MS_LOG(EXCEPTION) << "Invalid rank id " << rank_id << ", total server num " << server_num;
}
return shard_dims;
}
void Util::SetRankId(int rank_id) { rank_id_ = rank_id; }
int Util::GetRankId() { return rank_id_; }
void Util::ReduceSparseGradient(float *gradients, int *indices, const size_t indices_size, size_t segment_size,
const size_t first_dim_size, const size_t outer_dim_size,
mindspore::kernel::SparseGradient *unique_sparse_grad) {
size_t slice_segment_size = indices_size * segment_size;
auto workspace_grad = new float[slice_segment_size];
auto workspace_indices = new int[indices_size];
MS_EXCEPTION_IF_NULL(gradients);
MS_EXCEPTION_IF_NULL(indices);
MS_EXCEPTION_IF_NULL(workspace_grad);
MS_EXCEPTION_IF_NULL(workspace_indices);
mindspore::kernel::SparseGradient workspace_sparse_grad({workspace_grad, workspace_indices, indices_size});
mindspore::kernel::SparseGradient input_sparse_grad({gradients, indices, indices_size});
mindspore::kernel::ReduceSparseGradientParam param;
param.input_grad_ = &input_sparse_grad;
param.workspace_grad_ = &workspace_sparse_grad;
param.output_grad_ = unique_sparse_grad;
param.max_index_ = first_dim_size;
param.value_stride_ = outer_dim_size;
BucketReduceSparseGradient(param);
delete[] workspace_grad;
delete[] workspace_indices;
}
} // namespace ps
} // namespace parallel

View File

@ -21,6 +21,7 @@
#include <string>
#include <unordered_map>
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/kernel_compiler/common_utils.h"
namespace mindspore {
namespace parallel {
@ -37,11 +38,18 @@ class Util {
static std::string optimizer_node_name(int id);
static bool is_optimizer(std::string name);
static int LocalShard(int first_dim, int rank_id, int server_num);
static std::map<int, int> AllRankLocalShard(int first_dim, int rank_id, int server_num);
static void SetRankId(int rank_id);
static int GetRankId();
static void ReduceSparseGradient(float *gradients, int *indices, const size_t indices_size, size_t segment_size,
const size_t first_dim_size, const size_t outer_dim_size,
mindspore::kernel::SparseGradient *unique_sparse_grad);
private:
static std::unordered_map<std::string, int> optimizer_to_ids;
static std::unordered_map<int, std::string> id_to_optimizers;
static std::unordered_map<int, std::string> id_to_optimizer_nodes;
static int rank_id_;
};
} // namespace ps
} // namespace parallel

View File

@ -95,6 +95,32 @@ void Worker<T>::Run() {
template <typename T>
void Worker<T>::Push(const std::vector<size_t> &keys, std::vector<uintptr_t> addrs, const std::vector<int> &sizes) {
if (keys.size() == 0) {
MS_LOG(EXCEPTION) << "key size should be greater than zero";
}
if (key_to_optimId_.count(keys[0]) == 0) {
MS_LOG(EXCEPTION) << "no optim id found for key" << keys[0];
}
Key key = keys[0];
int optim_id = key_to_optimId_[key];
bool is_sparse = false;
if (optim_id == 1 || optim_id == 2 || optim_id == 3) {
is_sparse = true;
}
int grad_index = -1;
int indice_index = -1;
// Sparse adam gradient
if (optim_id == 1 || optim_id == 2) {
grad_index = 6;
indice_index = 7;
// Sparse ftrl gradient
} else if (optim_id == 3) {
grad_index = 0;
indice_index = 1;
}
size_t total_size = 0;
for (auto size : sizes) {
total_size += size;
@ -109,10 +135,22 @@ void Worker<T>::Push(const std::vector<size_t> &keys, std::vector<uintptr_t> add
}
offset += sizes[i] * sizeof(T);
}
while (!kv_worker_->IsReadyForPush(keys[0])) {
continue;
}
kv_worker_->PushData(::ps::SArray<::ps::Key>(keys), total_buffer, ::ps::SArray<int>(sizes));
if (!is_sparse) {
kv_worker_->PushData(::ps::SArray<::ps::Key>(keys), total_buffer, ::ps::SArray<int>(sizes));
} else {
std::vector<int> &var_shape = key_to_optim_shapes_[key][0];
int first_dim_size = var_shape[0];
int outer_dim_size = 1;
for (size_t i = 1; i < var_shape.size(); ++i) {
outer_dim_size *= var_shape[i];
}
kv_worker_->PushSparseData(::ps::SArray<::ps::Key>(keys), total_buffer, ::ps::SArray<int>(sizes), grad_index,
indice_index, first_dim_size, outer_dim_size);
}
}
template <typename T>

View File

@ -17,14 +17,16 @@
#ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_PS_WORKER_PROXY_H_
#define MINDSPORE_CCSRC_FRONTEND_PARALLEL_PS_WORKER_PROXY_H_
#include <map>
#include <unordered_map>
#include <unordered_set>
#include <algorithm>
#include <utility>
#include <memory>
#include <vector>
#include <unordered_set>
#include "ps/ps.h"
#include "frontend/parallel/ps/util.h"
#include "backend/kernel_compiler/common_utils.h"
namespace mindspore {
namespace parallel {
@ -36,23 +38,26 @@ class WorkerProxy : public ::ps::KVWorker<T> {
using Callback = std::function<void()>;
using SlicedKVs = std::vector<std::pair<bool, ::ps::KVPairs<T>>>;
using Slicer = std::function<void(int ts, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &ranges,
SlicedKVs *sliced)>;
SlicedKVs *sliced, const std::map<int, int> &attrs)>;
using ::ps::SimpleApp::obj_;
explicit WorkerProxy(int app_id, int customer_id, int lookup_customer_id, int general_customer_id)
: Worker(app_id, customer_id) {
server_num_ = ::ps::NumServers();
Util::SetRankId(::ps::MyRank());
using std::placeholders::_1;
using std::placeholders::_2;
using std::placeholders::_3;
using std::placeholders::_4;
using std::placeholders::_5;
lookup_customer_ = std::unique_ptr<::ps::Customer>(
new ::ps::Customer(app_id, lookup_customer_id, std::bind(&WorkerProxy<T>::ProcessLookupResult, this, _1)));
general_customer_ = std::unique_ptr<::ps::Customer>(
new ::ps::Customer(app_id, general_customer_id, std::bind(&WorkerProxy<T>::ProcessResponse, this, _1)));
lookup_slicer_ = std::bind(&WorkerProxy<T>::LookupIdSlicer, this, _1, _2, _3, _4);
broadcast_slicer_ = std::bind(&WorkerProxy<T>::BroadcastSlicer, this, _1, _2, _3, _4);
round_robin_slicer_ = std::bind(&WorkerProxy<T>::RoundRobinSlicer, this, _1, _2, _3, _4);
worker_init_embedding_slicer_ = std::bind(&WorkerProxy<T>::WorkerInitEmbeddingSlicer, this, _1, _2, _3, _4);
lookup_slicer_ = std::bind(&WorkerProxy<T>::LookupIdSlicer, this, _1, _2, _3, _4, _5);
sparse_slicer_ = std::bind(&WorkerProxy<T>::SparseSlicer, this, _1, _2, _3, _4, _5);
broadcast_slicer_ = std::bind(&WorkerProxy<T>::BroadcastSlicer, this, _1, _2, _3, _4, _5);
round_robin_slicer_ = std::bind(&WorkerProxy<T>::RoundRobinSlicer, this, _1, _2, _3, _4, _5);
worker_init_embedding_slicer_ = std::bind(&WorkerProxy<T>::WorkerInitEmbeddingSlicer, this, _1, _2, _3, _4, _5);
}
~WorkerProxy() override = default;
@ -67,6 +72,8 @@ class WorkerProxy : public ::ps::KVWorker<T> {
bool IsReadyForPull(const Key &key);
void PushData(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals, const ::ps::SArray<int> &lens = {},
int cmd = 0, int priority = 0);
void PushSparseData(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals, const ::ps::SArray<int> &lens,
size_t grad_index, size_t indice_index, size_t first_dim_size, size_t outer_dim_size);
void PullData(const ::ps::SArray<::ps::Key> &keys, ::ps::SArray<T> *vals, ::ps::SArray<int> *lens = nullptr,
int cmd = 0, int priority = 0);
void Finalize();
@ -78,27 +85,37 @@ class WorkerProxy : public ::ps::KVWorker<T> {
int AddGeneralRspCB(const ::ps::SArray<::ps::Key> &keys, ::ps::SArray<T> *vals, ::ps::SArray<int> *lens, int cmd,
const Callback &cb);
void LookupIdSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced, const std::map<int, int> &attrs);
void SparseSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced, const std::map<int, int> &attrs);
void BroadcastSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced, const std::map<int, int> &attrs);
void RoundRobinSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced, const std::map<int, int> &attrs);
void WorkerInitEmbeddingSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced,
const std::map<int, int> &attrs);
void ProcessLookupResult(const ::ps::Message &msg);
void ProcessResponse(const ::ps::Message &msg);
void Send(::ps::Customer *customer, int timestamp, bool push, bool pull, int cmd, const ::ps::KVPairs<T> &kvs,
const Slicer &slicer);
const Slicer &slicer, std::map<int, int> attrs = {});
void AddKeyByHashMod(const ::ps::Key &key);
void PrepareSparseGradient(const size_t begin, const size_t end, const std::unordered_set<int> &distinct_ids,
const std::vector<std::pair<int, T *>> &indice_to_grad, const int *all_indice,
const size_t segment_size, T *gradient, int *indice);
void BuildSparseValue(const ::ps::SArray<int> &lengths, const size_t grad_index, const size_t indice_index,
const T *original_data, const T *grads, int *indices, ::ps::SArray<T> *reduced_data);
int server_num_;
std::unique_ptr<::ps::Customer> lookup_customer_;
std::unique_ptr<::ps::Customer> general_customer_;
std::unordered_map<::ps::Key, std::shared_ptr<std::vector<::ps::Range>>> embedding_table_ranges_;
std::unordered_map<int, std::vector<::ps::KVPairs<T>>> lookup_results_;
std::unordered_map<int, ::ps::KVPairs<T>> gathered_response_;
std::unordered_map<int, std::map<int, ::ps::KVPairs<T>>> gathered_response_;
std::mutex mutex_;
Slicer lookup_slicer_;
Slicer sparse_slicer_;
Slicer broadcast_slicer_;
Slicer round_robin_slicer_;
Slicer worker_init_embedding_slicer_;
@ -220,6 +237,28 @@ void WorkerProxy<T>::PushData(const ::ps::SArray<::ps::Key> &keys, const ::ps::S
general_customer_->WaitRequest(ts);
}
template <typename T>
void WorkerProxy<T>::PushSparseData(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals,
const ::ps::SArray<int> &lens, size_t grad_index, size_t indice_index,
size_t first_dim_size, size_t outer_dim_size) {
int ts = AddGeneralRspCB(keys, nullptr, nullptr, 0, nullptr);
::ps::KVPairs<T> kvs;
kvs.keys = keys;
kvs.vals = vals;
kvs.lens = lens;
int cmd = 0;
if (embedding_table_ranges_.count(keys[0])) {
std::map<int, int> attrs{{0, grad_index}, {1, indice_index}, {2, first_dim_size}, {3, outer_dim_size}};
Send(general_customer_.get(), ts, true, false, cmd, kvs, sparse_slicer_, attrs);
} else {
Send(general_customer_.get(), ts, true, false, cmd, kvs, round_robin_slicer_);
}
if (expected_result_count_[ts] < server_num_) {
general_customer_->AddResponse(ts, server_num_ - expected_result_count_[ts]);
}
general_customer_->WaitRequest(ts);
}
template <typename T>
void WorkerProxy<T>::PullData(const ::ps::SArray<::ps::Key> &keys, ::ps::SArray<T> *vals, ::ps::SArray<int> *lens,
int cmd, int priority) {
@ -298,12 +337,19 @@ int WorkerProxy<T>::AddGeneralRspCB(const ::ps::SArray<::ps::Key> &keys, ::ps::S
int ts = general_customer_->NewRequest(::ps::kServerGroup);
const auto &callback = [this, ts, keys, vals, lens, cb]() mutable {
mutex_.lock();
auto &kvs = gathered_response_[ts];
std::map<int, ::ps::KVPairs<T>> server_kvs = gathered_response_[ts];
mutex_.unlock();
*vals = kvs.vals;
if (lens) {
*lens = kvs.lens;
vals->clear();
for (auto kvs : server_kvs) {
for (auto val : kvs.second.vals) {
vals->push_back(val);
}
if (lens) {
for (auto len : kvs.second.lens) {
lens->push_back(len);
}
}
}
mutex_.lock();
@ -319,7 +365,8 @@ int WorkerProxy<T>::AddGeneralRspCB(const ::ps::SArray<::ps::Key> &keys, ::ps::S
template <typename T>
void WorkerProxy<T>::LookupIdSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced,
const std::map<int, int> &attrs) {
int *lookup_ids = send.lens.data();
size_t id_size = send.lens.size();
@ -357,9 +404,193 @@ void WorkerProxy<T>::LookupIdSlicer(int timestamp, const ::ps::KVPairs<T> &send,
}
}
template <typename T>
void WorkerProxy<T>::SparseSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced,
const std::map<int, int> &attrs) {
// Init variables
T *data = send.vals.data();
if (attrs.count(0) == 0 || attrs.count(1) == 0 || attrs.count(2) == 0 || attrs.count(3) == 0) {
MS_LOG(EXCEPTION) << "Invalid attrs keys";
}
auto iter = attrs.find(0);
size_t grad_index = static_cast<size_t>(iter->second);
iter = attrs.find(1);
size_t indice_index = static_cast<size_t>(iter->second);
iter = attrs.find(2);
size_t first_dim_size = static_cast<size_t>(iter->second);
iter = attrs.find(3);
size_t outer_dim_size = static_cast<size_t>(iter->second);
int grad_size = send.lens[grad_index];
int indice_size = send.lens[indice_index];
int segment_size = grad_size / indice_size;
int grad_offset = 0;
int indice_offset = 0;
for (size_t i = 0; i < grad_index; i++) {
grad_offset += send.lens[i];
}
for (size_t j = 0; j < indice_index; j++) {
indice_offset += send.lens[j];
}
T *grad_data = data + grad_offset;
int *indice_data = reinterpret_cast<int *>(data) + indice_offset;
// Build the mappings of indice to gradient
std::vector<std::pair<int, T *>> indice_to_grads;
for (int i = 0; i < indice_size; i++) {
int indice = indice_data[i];
T *grad = grad_data + i * segment_size;
indice_to_grads.push_back(std::make_pair(indice, grad));
}
const Key &key = send.keys[0];
const std::vector<::ps::Range> &ranges = *(embedding_table_ranges_[key]);
sliced->resize(ranges.size());
// Construct reduced sparse data for each server
for (size_t i = 0; i < ranges.size(); i++) {
const ::ps::Range &range = ranges[i];
const auto &begin = range.begin();
const auto &end = range.end();
auto &kvs = sliced->at(i).second;
kvs.keys = send.keys;
kvs.lens = send.lens;
// Prepare the sparse gradient and indice
std::vector<int> indice_ids;
std::unordered_set<int> distinct_ids;
for (int j = 0; j < indice_size; j++) {
size_t indice = static_cast<size_t>(indice_data[j]);
if (indice >= begin && indice <= end) {
indice_ids.push_back(indice);
distinct_ids.insert(indice);
}
}
size_t indices_size = indice_ids.size();
if (indices_size > 0) {
int slice_segment_size = indices_size * segment_size;
T *src_grad_data = new T[slice_segment_size];
int *src_indice_data = new int[indices_size];
PrepareSparseGradient(begin, end, distinct_ids, indice_to_grads, indice_data, segment_size, src_grad_data,
src_indice_data);
// Reduce the sparse gradient and indice
T *new_grad = new T[slice_segment_size];
int *new_indices = new int[indices_size];
mindspore::kernel::SparseGradient unique_sparse_grad({new_grad, new_indices, indices_size});
Util::ReduceSparseGradient(src_grad_data, src_indice_data, indices_size, segment_size, first_dim_size,
outer_dim_size, &unique_sparse_grad);
// Update the length of reduce sparse gradient and indice
::ps::SArray<int> reduced_lens;
reduced_lens.CopyFrom(kvs.lens);
reduced_lens[grad_index] = unique_sparse_grad.indices_size_ * segment_size;
reduced_lens[indice_index] = unique_sparse_grad.indices_size_;
// Build the sparse value to be sent
size_t total_size = 0;
for (auto size : reduced_lens) {
total_size += size;
}
::ps::SArray<T> reduced_data(total_size, 0);
BuildSparseValue(reduced_lens, grad_index, indice_index, data, unique_sparse_grad.value_,
unique_sparse_grad.indices_, &reduced_data);
kvs.lens = reduced_lens;
kvs.vals = reduced_data;
delete[] src_grad_data;
delete[] src_indice_data;
delete[] new_grad;
delete[] new_indices;
}
if (indices_size <= 0) {
::ps::SArray<T> no_keys;
::ps::SArray<T> no_vals;
::ps::SArray<T> no_lens;
no_keys.push_back(key);
no_vals.push_back(-100);
kvs.vals = no_vals;
kvs.lens = no_lens;
}
sliced->at(i).first = true;
expected_result_count_[timestamp] += 1;
}
}
template <typename T>
void WorkerProxy<T>::PrepareSparseGradient(const size_t begin, const size_t end,
const std::unordered_set<int> &distinct_ids,
const std::vector<std::pair<int, T *>> &indice_to_grads,
const int *all_indice, const size_t segment_size, T *gradient,
int *indices) {
int offset = 0;
int index = 0;
size_t segment_data_size = segment_size * sizeof(T);
for (auto &pair : indice_to_grads) {
if (distinct_ids.count(pair.first) == 0) {
continue;
}
indices[index++] = pair.first;
auto ret = memcpy_s(gradient + offset, segment_data_size, pair.second, segment_data_size);
if (ret != 0) {
MS_LOG(ERROR) << "memcpy_s error, errorno(" << ret << ")";
}
offset += segment_size;
}
}
template <typename T>
void WorkerProxy<T>::BuildSparseValue(const ::ps::SArray<int> &lengths, const size_t grad_index,
const size_t indice_index, const T *original_data, const T *grads, int *indices,
::ps::SArray<T> *reduced_data) {
int offset = 0;
for (size_t i = 0; i < lengths.size(); i++) {
if (i != grad_index && i != indice_index) {
int data_size = lengths[i] * sizeof(T);
auto ret = memcpy_s(reduced_data->data() + offset, data_size, original_data + offset, data_size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
}
}
offset += lengths[i];
}
// Fill the reduced gradient
int grad_offset = 0;
for (size_t i = 0; i < grad_index; i++) {
grad_offset += lengths[i];
}
int data_size = lengths[grad_index] * sizeof(T);
auto ret = memcpy_s(reduced_data->data() + grad_offset, data_size, grads, data_size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
}
// Fill the reduced indice
int indice_offset = grad_offset + lengths[grad_index];
data_size = lengths[indice_index] * sizeof(T);
T *indice_data = reduced_data->data() + indice_offset;
T *convert = new T[lengths[indice_index]];
for (int i = 0; i < lengths[indice_index]; i++) {
convert[i] = static_cast<T>(indices[i]);
}
ret = memcpy_s(indice_data, data_size, convert, data_size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
}
delete[] convert;
}
template <typename T>
void WorkerProxy<T>::BroadcastSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced,
const std::map<int, int> &attr) {
sliced->resize(server_num_);
for (int i = 0; i < server_num_; i++) {
sliced->at(i).first = true;
@ -370,7 +601,8 @@ void WorkerProxy<T>::BroadcastSlicer(int timestamp, const ::ps::KVPairs<T> &send
template <typename T>
void WorkerProxy<T>::RoundRobinSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced,
const std::map<int, int> &attr) {
sliced->resize(server_num_);
auto keys = send.keys;
auto vals = send.vals;
@ -407,7 +639,8 @@ void WorkerProxy<T>::RoundRobinSlicer(int timestamp, const ::ps::KVPairs<T> &sen
template <typename T>
void WorkerProxy<T>::WorkerInitEmbeddingSlicer(int timestamp, const ::ps::KVPairs<T> &send,
const std::vector<::ps::Range> &,
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced,
const std::map<int, int> &attrs) {
sliced->resize(server_num_);
auto keys = send.keys;
auto vals = send.vals;
@ -442,7 +675,7 @@ void WorkerProxy<T>::ProcessLookupResult(const ::ps::Message &msg) {
lookup_results_[ts].push_back(kvs);
mutex_.unlock();
}
if (lookup_customer_->NumResponse(ts) == expected_result_count_[ts] - 1) {
if (lookup_customer_->NumResponse(ts) + 1 == server_num_) {
const auto &cb = lookup_callbacks_[ts];
cb();
lookup_callbacks_.erase(ts);
@ -462,15 +695,8 @@ void WorkerProxy<T>::ProcessResponse(const ::ps::Message &msg) {
kvs.lens = msg.data[2];
}
mutex_.lock();
for (auto key : kvs.keys) {
gathered_response_[ts].keys.push_back(key);
}
for (auto val : kvs.vals) {
gathered_response_[ts].vals.push_back(val);
}
for (auto len : kvs.lens) {
gathered_response_[ts].lens.push_back(len);
}
int rsp_server_rank = ::ps::Postoffice::Get()->IDtoRank(msg.meta.sender);
gathered_response_[ts][rsp_server_rank] = kvs;
mutex_.unlock();
if (general_customer_->NumResponse(ts) + 1 == server_num_) {
const auto &cb = general_callbacks_[ts];
@ -482,9 +708,9 @@ void WorkerProxy<T>::ProcessResponse(const ::ps::Message &msg) {
template <typename T>
void WorkerProxy<T>::Send(::ps::Customer *customer, int timestamp, bool push, bool pull, int cmd,
const ::ps::KVPairs<T> &kvs, const Slicer &slicer) {
const ::ps::KVPairs<T> &kvs, const Slicer &slicer, std::map<int, int> attrs) {
SlicedKVs sliced;
slicer(timestamp, kvs, ::ps::Postoffice::Get()->GetServerKeyRanges(), &sliced);
slicer(timestamp, kvs, ::ps::Postoffice::Get()->GetServerKeyRanges(), &sliced, attrs);
for (size_t i = 0; i < sliced.size(); i++) {
const auto &s = sliced[i];

View File

@ -191,8 +191,8 @@ std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir,
}
// Function to create a ManifestDataset.
std::shared_ptr<ManifestDataset> Manifest(std::string dataset_file, std::string usage,
std::shared_ptr<SamplerObj> sampler,
std::shared_ptr<ManifestDataset> Manifest(const std::string &dataset_file, const std::string &usage,
const std::shared_ptr<SamplerObj> &sampler,
const std::map<std::string, int32_t> &class_indexing, bool decode) {
auto ds = std::make_shared<ManifestDataset>(dataset_file, usage, sampler, class_indexing, decode);
@ -211,14 +211,14 @@ std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::s
// Function to overload "+" operator to concat two datasets
std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &datasets1,
const std::shared_ptr<Dataset> &datasets2) {
std::shared_ptr<ConcatDataset> ds = std::make_shared<ConcatDataset>(std::vector({datasets1, datasets2}));
std::shared_ptr<ConcatDataset> ds = std::make_shared<ConcatDataset>(std::vector({datasets2, datasets1}));
// Call derived class validation method.
return ds->ValidateParams() ? ds : nullptr;
}
// Function to create a TextFileDataset.
std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &dataset_files, int32_t num_samples,
std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &dataset_files, int64_t num_samples,
ShuffleMode shuffle, int32_t num_shards, int32_t shard_id) {
auto ds = std::make_shared<TextFileDataset>(dataset_files, num_samples, shuffle, num_shards, shard_id);
@ -580,13 +580,6 @@ bool SchemaObj::from_json(nlohmann::json json_obj) {
// OTHER FUNCTIONS
// Helper function to create default RandomSampler.
std::shared_ptr<SamplerObj> CreateDefaultSampler() {
const int32_t num_samples = 0; // 0 means to sample all ids.
bool replacement = false;
return std::make_shared<RandomSamplerObj>(replacement, num_samples);
}
// Helper function to compute a default shuffle size
Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
int64_t *shuffle_size) {
@ -682,6 +675,36 @@ bool ValidateDatasetShardParams(const std::string &dataset_name, int32_t num_sha
return true;
}
// Helper function to validate dataset sampler parameter
bool ValidateDatasetSampler(const std::string &dataset_name, const std::shared_ptr<SamplerObj> &sampler) {
if (sampler == nullptr) {
MS_LOG(ERROR) << dataset_name << ": Sampler is not constructed correctly, sampler: nullptr";
return false;
}
return true;
}
// Helper function to validate dataset input/output column parameter
bool ValidateDatasetColumnParam(const std::string &dataset_name, const std::string &column_param,
const std::vector<std::string> &columns) {
if (columns.empty()) {
MS_LOG(ERROR) << dataset_name << ":" << column_param << " should not be empty";
return false;
}
for (uint32_t i = 0; i < columns.size(); ++i) {
if (columns[i].empty()) {
MS_LOG(ERROR) << dataset_name << ":" << column_param << "[" << i << "] should not be empty";
return false;
}
}
std::set<std::string> columns_set(columns.begin(), columns.end());
if (columns_set.size() != columns.size()) {
MS_LOG(ERROR) << dataset_name << ":" << column_param << ": Every column name should not be same with others";
return false;
}
return true;
}
/* ####################################### Derived Dataset classes ################################# */
// DERIVED DATASET CLASSES LEAF-NODE DATASETS
@ -701,6 +724,9 @@ bool CelebADataset::ValidateParams() {
if (!ValidateDatasetDirParam("CelebADataset", dataset_dir_)) {
return false;
}
if (!ValidateDatasetSampler("CelebADataset", sampler_)) {
return false;
}
std::set<std::string> dataset_type_list = {"all", "train", "valid", "test"};
auto iter = dataset_type_list.find(dataset_type_);
if (iter == dataset_type_list.end()) {
@ -715,11 +741,6 @@ std::vector<std::shared_ptr<DatasetOp>> CelebADataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if (sampler_ == nullptr) {
sampler_ = CreateDefaultSampler();
}
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
RETURN_EMPTY_IF_ERROR(
schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
@ -736,18 +757,15 @@ std::vector<std::shared_ptr<DatasetOp>> CelebADataset::Build() {
Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler)
: dataset_dir_(dataset_dir), sampler_(sampler) {}
bool Cifar10Dataset::ValidateParams() { return ValidateDatasetDirParam("Cifar10Dataset", dataset_dir_); }
bool Cifar10Dataset::ValidateParams() {
return ValidateDatasetDirParam("Cifar10Dataset", dataset_dir_) && ValidateDatasetSampler("Cifar10Dataset", sampler_);
}
// Function to build CifarOp for Cifar10
std::vector<std::shared_ptr<DatasetOp>> Cifar10Dataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if (sampler_ == nullptr) {
sampler_ = CreateDefaultSampler();
}
// Do internal Schema generation.
auto schema = std::make_unique<DataSchema>();
RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
@ -765,18 +783,16 @@ std::vector<std::shared_ptr<DatasetOp>> Cifar10Dataset::Build() {
Cifar100Dataset::Cifar100Dataset(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler)
: dataset_dir_(dataset_dir), sampler_(sampler) {}
bool Cifar100Dataset::ValidateParams() { return ValidateDatasetDirParam("Cifar100Dataset", dataset_dir_); }
bool Cifar100Dataset::ValidateParams() {
return ValidateDatasetDirParam("Cifar100Dataset", dataset_dir_) &&
ValidateDatasetSampler("Cifar100Dataset", sampler_);
}
// Function to build CifarOp for Cifar100
std::vector<std::shared_ptr<DatasetOp>> Cifar100Dataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if (sampler_ == nullptr) {
sampler_ = CreateDefaultSampler();
}
// Do internal Schema generation.
auto schema = std::make_unique<DataSchema>();
RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
@ -987,6 +1003,9 @@ bool CocoDataset::ValidateParams() {
if (!ValidateDatasetDirParam("CocoDataset", dataset_dir_)) {
return false;
}
if (!ValidateDatasetSampler("CocoDataset", sampler_)) {
return false;
}
Path annotation_file(annotation_file_);
if (!annotation_file.Exists()) {
MS_LOG(ERROR) << "annotation_file is invalid or not exist";
@ -1006,11 +1025,6 @@ std::vector<std::shared_ptr<DatasetOp>> CocoDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if (sampler_ == nullptr) {
sampler_ = CreateDefaultSampler();
}
CocoOp::TaskType task_type;
if (task_ == "Detection") {
task_type = CocoOp::TaskType::Detection;
@ -1100,6 +1114,12 @@ bool CSVDataset::ValidateParams() {
return false;
}
if (!column_names_.empty()) {
if (!ValidateDatasetColumnParam("CSVDataset", "column_names", column_names_)) {
return false;
}
}
return true;
}
@ -1155,17 +1175,15 @@ ImageFolderDataset::ImageFolderDataset(std::string dataset_dir, bool decode, std
class_indexing_(class_indexing),
exts_(extensions) {}
bool ImageFolderDataset::ValidateParams() { return ValidateDatasetDirParam("ImageFolderDataset", dataset_dir_); }
bool ImageFolderDataset::ValidateParams() {
return ValidateDatasetDirParam("ImageFolderDataset", dataset_dir_) &&
ValidateDatasetSampler("ImageFolderDataset", sampler_);
}
std::vector<std::shared_ptr<DatasetOp>> ImageFolderDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
// If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
if (sampler_ == nullptr) {
sampler_ = CreateDefaultSampler();
}
// Do internal Schema generation.
// This arg is exist in ImageFolderOp, but not externalized (in Python API).
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
@ -1180,7 +1198,8 @@ std::vector<std::shared_ptr<DatasetOp>> ImageFolderDataset::Build() {
return node_ops;
}
ManifestDataset::ManifestDataset(std::string dataset_file, std::string usage, std::shared_ptr<SamplerObj> sampler,
ManifestDataset::ManifestDataset(const std::string &dataset_file, const std::string &usage,
const std::shared_ptr<SamplerObj> &sampler,
const std::map<std::string, int32_t> &class_indexing, bool decode)
: dataset_file_(dataset_file), usage_(usage), decode_(decode), class_index_(class_indexing), sampler_(sampler) {}
@ -1190,6 +1209,9 @@ bool ManifestDataset::ValidateParams() {
MS_LOG(ERROR) << "dataset file: [" << dataset_file_ << "] is invalid or not exist";
return false;
}
if (!ValidateDatasetSampler("ManifestDataset", sampler_)) {
return false;
}
std::vector<std::string> usage_list = {"train", "eval", "inference"};
if (find(usage_list.begin(), usage_list.end(), usage_) == usage_list.end()) {
@ -1204,11 +1226,6 @@ std::vector<std::shared_ptr<DatasetOp>> ManifestDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if (sampler_ == nullptr) {
sampler_ = CreateDefaultSampler();
}
// Do internal Schema generation.
auto schema = std::make_unique<DataSchema>();
RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
@ -1228,17 +1245,14 @@ std::vector<std::shared_ptr<DatasetOp>> ManifestDataset::Build() {
MnistDataset::MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler)
: dataset_dir_(dataset_dir), sampler_(sampler) {}
bool MnistDataset::ValidateParams() { return ValidateDatasetDirParam("MnistDataset", dataset_dir_); }
bool MnistDataset::ValidateParams() {
return ValidateDatasetDirParam("MnistDataset", dataset_dir_) && ValidateDatasetSampler("MnistDataset", sampler_);
}
std::vector<std::shared_ptr<DatasetOp>> MnistDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
// If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
if (sampler_ == nullptr) {
sampler_ = CreateDefaultSampler();
}
// Do internal Schema generation.
auto schema = std::make_unique<DataSchema>();
RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
@ -1257,6 +1271,14 @@ bool RandomDataset::ValidateParams() {
MS_LOG(ERROR) << "RandomDataset: total_rows must be greater than 0, now get " << total_rows_;
return false;
}
if (!ValidateDatasetSampler("RandomDataset", sampler_)) {
return false;
}
if (!columns_list_.empty()) {
if (!ValidateDatasetColumnParam("RandomDataset", "columns_list", columns_list_)) {
return false;
}
}
return true;
}
@ -1279,11 +1301,6 @@ std::vector<std::shared_ptr<DatasetOp>> RandomDataset::Build() {
total_rows_ = schema_obj->get_num_rows();
}
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if (sampler_ == nullptr) {
sampler_ = CreateDefaultSampler();
}
std::string schema_json_string, schema_file_path;
if (schema_ != nullptr) {
schema_->set_dataset_type("Random");
@ -1331,7 +1348,7 @@ bool TextFileDataset::ValidateParams() {
return false;
}
if (!ValidateDatasetShardParams("TextfileDataset", num_shards_, shard_id_)) {
if (!ValidateDatasetShardParams("TextFileDataset", num_shards_, shard_id_)) {
return false;
}
@ -1392,6 +1409,9 @@ bool VOCDataset::ValidateParams() {
MS_LOG(ERROR) << "Invalid dataset path or no dataset path is specified.";
return false;
}
if (!ValidateDatasetSampler("VOCDataset", sampler_)) {
return false;
}
if (task_ == "Segmentation") {
if (!class_index_.empty()) {
MS_LOG(ERROR) << "class_indexing is invalid in Segmentation task.";
@ -1420,11 +1440,6 @@ std::vector<std::shared_ptr<DatasetOp>> VOCDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if (sampler_ == nullptr) {
sampler_ = CreateDefaultSampler();
}
auto schema = std::make_unique<DataSchema>();
VOCOp::TaskType task_type_;
@ -1539,6 +1554,10 @@ bool ConcatDataset::ValidateParams() {
MS_LOG(ERROR) << "Concat: concatenated datasets are not specified.";
return false;
}
if (find(datasets_.begin(), datasets_.end(), nullptr) != datasets_.end()) {
MS_LOG(ERROR) << "Concat: concatenated dataset should not be null.";
return false;
}
return true;
}
@ -1586,6 +1605,21 @@ bool MapDataset::ValidateParams() {
MS_LOG(ERROR) << "Map: No operation is specified.";
return false;
}
if (!input_columns_.empty()) {
if (!ValidateDatasetColumnParam("MapDataset", "input_columns", input_columns_)) {
return false;
}
}
if (!output_columns_.empty()) {
if (!ValidateDatasetColumnParam("MapDataset", "output_columns", output_columns_)) {
return false;
}
}
if (!project_columns_.empty()) {
if (!ValidateDatasetColumnParam("MapDataset", "project_columns", project_columns_)) {
return false;
}
}
return true;
}
@ -1615,12 +1649,12 @@ RenameDataset::RenameDataset(const std::vector<std::string> &input_columns,
: input_columns_(input_columns), output_columns_(output_columns) {}
bool RenameDataset::ValidateParams() {
if (input_columns_.empty() || output_columns_.empty()) {
MS_LOG(ERROR) << "input and output columns must be specified";
if (input_columns_.size() != output_columns_.size()) {
MS_LOG(ERROR) << "RenameDataset: input and output columns must be the same size";
return false;
}
if (input_columns_.size() != output_columns_.size()) {
MS_LOG(ERROR) << "input and output columns must be the same size";
if (!ValidateDatasetColumnParam("RenameDataset", "input_columns", input_columns_) ||
!ValidateDatasetColumnParam("RenameDataset", "output_columns", output_columns_)) {
return false;
}
return true;
@ -1713,7 +1747,7 @@ std::vector<std::shared_ptr<DatasetOp>> TakeDataset::Build() {
// Function to validate the parameters for TakeDataset
bool TakeDataset::ValidateParams() {
if (take_count_ < 0 && take_count_ != -1) {
if (take_count_ <= 0 && take_count_ != -1) {
MS_LOG(ERROR) << "Take: take_count should be either -1 or positive integer, take_count: " << take_count_;
return false;
}

View File

@ -45,6 +45,8 @@ PYBIND_REGISTER(ConfigManager, 0, ([](const py::module *m) {
.def("get_op_connector_size", &ConfigManager::op_connector_size)
.def("get_seed", &ConfigManager::seed)
.def("get_monitor_sampling_interval", &ConfigManager::monitor_sampling_interval)
.def("get_callback_timeout", &ConfigManager::callback_timeout)
.def("set_callback_timeout", &ConfigManager::set_callback_timeout)
.def("load", [](ConfigManager &c, std::string s) { THROW_IF_ERROR(c.LoadFile(s)); });
}));

View File

@ -382,7 +382,7 @@ CutMixBatchOperation::CutMixBatchOperation(ImageBatchFormat image_batch_format,
: image_batch_format_(image_batch_format), alpha_(alpha), prob_(prob) {}
bool CutMixBatchOperation::ValidateParams() {
if (alpha_ < 0) {
if (alpha_ <= 0) {
MS_LOG(ERROR) << "CutMixBatch: alpha cannot be negative.";
return false;
}
@ -434,7 +434,7 @@ std::shared_ptr<TensorOp> HwcToChwOperation::Build() { return std::make_shared<H
MixUpBatchOperation::MixUpBatchOperation(float alpha) : alpha_(alpha) {}
bool MixUpBatchOperation::ValidateParams() {
if (alpha_ < 0) {
if (alpha_ <= 0) {
MS_LOG(ERROR) << "MixUpBatch: alpha must be a positive floating value however it is: " << alpha_;
return false;
}

View File

@ -50,7 +50,7 @@ Status CallbackManager::Begin(const CallbackParam &cb_param) {
// return Status::OK() if no begin is needed
RETURN_OK_IF_TRUE(callback_inds.empty());
RETURN_IF_NOT_OK(op_->PauseFromMaster());
RETURN_IF_NOT_OK(op_->WaitForWorkers());
// Now do the actual callback
for (size_t ind : callback_inds) {
@ -69,7 +69,7 @@ Status CallbackManager::EpochBegin(const CallbackParam &cb_param) {
// return Status::OK() if no epoch_begin is needed
RETURN_OK_IF_TRUE(callback_inds.empty());
RETURN_IF_NOT_OK(op_->PauseFromMaster());
RETURN_IF_NOT_OK(op_->WaitForWorkers());
// Now do the actual callback
for (size_t ind : callback_inds) {
@ -89,7 +89,7 @@ Status CallbackManager::StepBegin(const CallbackParam &cb_param) {
// return Status::OK() if no step_begin is needed
RETURN_OK_IF_TRUE(callback_inds.empty());
RETURN_IF_NOT_OK(op_->PauseFromMaster());
RETURN_IF_NOT_OK(op_->WaitForWorkers());
// Now do the actual callback
for (size_t ind : callback_inds) {
@ -108,7 +108,7 @@ Status CallbackManager::End(const CallbackParam &cb_param) {
// return Status::OK() if no end is needed
RETURN_OK_IF_TRUE(callback_inds.empty());
RETURN_IF_NOT_OK(op_->PauseFromMaster());
RETURN_IF_NOT_OK(op_->WaitForWorkers());
// Now do the actual callback
for (size_t ind : callback_inds) {
@ -127,7 +127,7 @@ Status CallbackManager::EpochEnd(const CallbackParam &cb_param) {
// return Status::OK() if no epoch_end is needed
RETURN_OK_IF_TRUE(callback_inds.empty());
RETURN_IF_NOT_OK(op_->PauseFromMaster());
RETURN_IF_NOT_OK(op_->WaitForWorkers());
// Now do the actual callback
for (size_t ind : callback_inds) {
@ -147,7 +147,7 @@ Status CallbackManager::StepEnd(const CallbackParam &cb_param) {
// return Status::OK() if no step_end is needed
RETURN_OK_IF_TRUE(callback_inds.empty());
RETURN_IF_NOT_OK(op_->PauseFromMaster());
RETURN_IF_NOT_OK(op_->WaitForWorkers());
// Now do the actual callback
for (size_t ind : callback_inds) {

View File

@ -32,7 +32,7 @@ class DatasetOp;
/// This class manages all the callbacks that are associated with a single DatasetOp. For now, only MapOp supports this.
class CallbackManager {
public:
/// CallbackManager default constructor. Init needs to be called before using the created instance.
/// \brief CallbackManager default constructor. Init needs to be called before using the created instance.
CallbackManager() : enabled_(false) {}
/// \brief

View File

@ -88,5 +88,8 @@ uint32_t ConfigManager::seed() const { return seed_; }
void ConfigManager::set_seed(uint32_t seed) { seed_ = seed; }
void ConfigManager::set_monitor_sampling_interval(uint32_t interval) { monitor_sampling_interval_ = interval; }
void ConfigManager::set_callback_timeout(uint32_t timeout) { callback_timout_ = timeout; }
} // namespace dataset
} // namespace mindspore

View File

@ -116,9 +116,17 @@ class ConfigManager {
void set_monitor_sampling_interval(uint32_t interval);
// getter function
// @return The iterval of monitor sampling
// @return The interval of monitor sampling
int32_t monitor_sampling_interval() const { return monitor_sampling_interval_; }
// setter function
// @param timeout - The setting to apply to the config
void set_callback_timeout(uint32_t timeout);
// getter function
// @return The timeout DSWaitedCallback would wait for before raising an error
int32_t callback_timeout() const { return callback_timout_; }
private:
int32_t rows_per_buffer_{kCfgRowsPerBuffer};
int32_t num_parallel_workers_{kCfgParallelWorkers};
@ -126,8 +134,9 @@ class ConfigManager {
int32_t op_connector_size_{kCfgOpConnectorSize};
uint32_t seed_{kCfgDefaultSeed};
uint32_t monitor_sampling_interval_{kCfgMonitorSamplingInterval};
uint32_t callback_timout_{kCfgCallbackTimeout};
// Private helper function that taks a nlohmann json format and populates the settings
// Private helper function that takes a nlohmann json format and populates the settings
// @param j - The json nlohmann json info
Status FromJson(const nlohmann::json &j);
};

View File

@ -68,6 +68,7 @@ constexpr uint32_t kCfgWorkerConnectorSize = 16;
constexpr uint32_t kCfgOpConnectorSize = 16;
constexpr uint32_t kCfgDefaultSeed = std::mt19937::default_seed;
constexpr uint32_t kCfgMonitorSamplingInterval = 10;
constexpr uint32_t kCfgCallbackTimeout = 60; // timeout value for callback in seconds
// Invalid OpenCV type should not be from 0 to 7 (opencv4/opencv2/core/hal/interface.h)
constexpr uint8_t kCVInvalidType = 255;

View File

@ -59,7 +59,7 @@ constexpr static uint32_t kDataIsInSharedMemory = 2;
/// \param rc[in] Status object
/// \param reply[in/out] pointer to pre-allocated protobuf object
inline void Status2CacheReply(const Status &rc, CacheReply *reply) {
reply->set_rc(static_cast<google::int32>(rc.get_code()));
reply->set_rc(static_cast<int32_t>(rc.get_code()));
reply->set_msg(rc.ToString());
}

View File

@ -76,7 +76,7 @@ class BaseRequest {
/// \brief Base class of a cache server request
/// \param type Type of the request
explicit BaseRequest(RequestType type) : type_(type) { rq_.set_type(static_cast<google::int32>(type_)); }
explicit BaseRequest(RequestType type) : type_(type) { rq_.set_type(static_cast<int16_t>(type_)); }
virtual ~BaseRequest() = default;
/// \brief A print method for debugging

View File

@ -37,8 +37,10 @@ class DataBuffer {
// Buffer flags
enum BufferFlags : uint32_t {
kDeBFlagNone = 0,
kDeBFlagEOF = 1, // The buffer is an eof end-of-data msg
kDeBFlagEOE = 1u << 1 // The buffer is an eoe end-of-epoch msg
kDeBFlagEOF = 1, // The buffer is an eof end-of-data msg
kDeBFlagEOE = 1u << 1, // The buffer is an eoe end-of-epoch msg
kDeBFlagWait = 1u << 2, // The buffer is an control signal for workers to suspend operations
kDeBFlagQuit = 1u << 3 // The buffer is a control signal for workers to quit
};
// Name: Constructor #1
@ -64,6 +66,10 @@ class DataBuffer {
bool eoe() const { return (static_cast<uint32_t>(buffer_flags_) & static_cast<uint32_t>(kDeBFlagEOE)); }
bool wait() const { return (static_cast<uint32_t>(buffer_flags_) & static_cast<uint32_t>(kDeBFlagWait)); }
bool quit() const { return (static_cast<uint32_t>(buffer_flags_) & static_cast<uint32_t>(kDeBFlagQuit)); }
// Simple getter funcs
int32_t id() const { return buffer_id_; }

View File

@ -363,10 +363,9 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
/// This function is only intended to be called by CallbackManager within the master thread of ParallelOp
/// The expected behavior is this, when this function is invoked, this function will block until all the workers
/// have finished their remaining work and go to sleep. Since all ParallelOps use a QueueList to sync with master.
/// They would automatically wait on the QueueList when they are done. Hence, for now, a Unpause() function is not
/// needed. Only parallelOp needs to override this function.
/// They would automatically wait on the QueueList when they are done.
/// \return Status
virtual Status PauseFromMaster() { return Status::OK(); }
virtual Status WaitForWorkers() { return Status::OK(); }
protected:
/// \brief Removes a parent operator from this operator

View File

@ -44,9 +44,9 @@ DeviceQueueOp::DeviceQueueOp(std::string channel_name, DeviceType device_type, i
DeviceQueueOp::~DeviceQueueOp() {}
#ifdef ENABLE_GPUQUE
void ReleaseData(void *addr) {
void DeviceQueueOp::ReleaseData(void *addr) {
if (addr != nullptr) {
free(addr);
pool_->Deallocate(addr);
}
}
#endif
@ -87,6 +87,7 @@ Status DeviceQueueOp::operator()() {
#endif
} else if (device_type_ == DeviceType::GPU) {
#ifdef ENABLE_GPUQUE
RETURN_IF_NOT_OK(CircularPool::CreateCircularPool(&pool_));
RETURN_IF_NOT_OK(SendDataToGPU());
#endif
} else if (device_type_ == DeviceType::CPU) {
@ -187,6 +188,7 @@ Status DeviceQueueOp::SendDataToGPU() {
bool is_break_loop = false;
bool is_open = false;
uint32_t handle = INVALID_HANDLE;
auto release_function = std::bind(&DeviceQueueOp::ReleaseData, this, std::placeholders::_1);
std::unique_ptr<DataBuffer> current_buffer;
RETURN_IF_NOT_OK(GetNextInput(&current_buffer));
@ -204,7 +206,7 @@ Status DeviceQueueOp::SendDataToGPU() {
data_size.push_back(static_cast<size_t>(curr_row[i]->SizeInBytes()));
}
if (!is_open) {
handle = GpuBufferMgr::GetInstance().Open(0, channel_name_, data_size, ReleaseData);
handle = GpuBufferMgr::GetInstance().Open(0, channel_name_, data_size, release_function);
if (handle == INVALID_HANDLE) {
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "open failed");
}
@ -246,7 +248,7 @@ Status DeviceQueueOp::RetryPushGPUData(const std::vector<size_t> &data_size, con
BlockQueueStatus_T ret = GpuBufferMgr::GetInstance().Push(handle, items, WAIT_TIME);
if (ret) {
for (int i = 0; i < items.size(); i++) {
free(items[i].data_ptr_);
ReleaseData(items[i].data_ptr_);
}
if (ret == BlockQueueStatus_T::ERROR_INPUT) {
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "invalid input Data, please check it.");
@ -267,7 +269,7 @@ Status DeviceQueueOp::RetryPushGPUData(const std::vector<size_t> &data_size, con
Status DeviceQueueOp::MallocForGPUData(std::vector<device::DataItemGpu> *items, const TensorRow &curr_row) {
int i = 0;
for (auto &sub_item : *items) {
sub_item.data_ptr_ = (unsigned char *)malloc(sub_item.data_len_);
RETURN_IF_NOT_OK(pool_->Allocate(sub_item.data_len_, &sub_item.data_ptr_));
if (sub_item.data_ptr_ == nullptr) {
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "memory malloc failed.");
}

View File

@ -29,6 +29,7 @@
#endif
#ifdef ENABLE_GPUQUE
#include "minddata/dataset/util/circular_pool.h"
#include "runtime/device/gpu/gpu_buffer_mgr.h"
using mindspore::device::BlockQueueStatus_T;
using mindspore::device::GpuBufferMgr;
@ -162,6 +163,9 @@ class DeviceQueueOp : public PipelineOp {
Status SendDataToGPU();
Status RetryPushGPUData(const std::vector<size_t> &data_size, const TensorRow &curr_row, uint32_t handle);
Status MallocForGPUData(std::vector<device::DataItemGpu> *items, const TensorRow &curr_row);
void ReleaseData(void *addr);
std::shared_ptr<MemoryPool> pool_;
#endif
Status SendDataToCPU();

View File

@ -166,7 +166,7 @@ Status MapOp::operator()() {
// init callback
RETURN_IF_NOT_OK(callback_manager_.Init(shared_from_this()));
Status rc = local_queues_.Register(tree_->AllTasks());
RETURN_IF_NOT_OK(master_pause_wp_.Register(tree_->AllTasks()));
RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
if (rc.IsError()) {
TaskManager::FindMe()->Post();
return rc;
@ -205,23 +205,29 @@ Status MapOp::operator()() {
RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buff, 0));
}
// send the eoe buffer to worker
// reset epoch_step when a new epoch is about to start
// check whether this is the end of a real epoch (not all eoe signals end of epoch)
if ((op_current_repeats_ + 1) % op_num_repeats_per_epoch() == 0) {
RETURN_IF_NOT_OK(callback_manager_.EpochEnd(CallbackParam(op_current_epochs_ + 1, ep_step, total_step)));
ep_step = 0;
}
// Propagate the eoe buffer to worker
std::unique_ptr<MapWorkerJob> worker_job = std::make_unique<MapWorkerJob>(std::move(buff));
RETURN_IF_NOT_OK(local_queues_[num_buf++ % num_workers_]->Add(std::move(worker_job)));
UpdateRepeatAndEpochCounter();
RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buff, 0));
}
// the last eoe increments the eoe count by 1, but this shouldn't be reflected on End() callback
// RETURN_IF_NOT_OK(callback_manager_.End(CallbackParam(op_current_epochs_, ep_step, total_step)));
// handle eof logic
// End() is commented out because it might never be called due to the lack of EOF when EpochCtrl is -1
// RETURN_IF_NOT_OK(callback_manager_.End(CallbackParam(op_current_epochs_, ep_step, total_step)));
// Handle eof logic, this code might never be reached if epoch_ctrl = -1.
std::unique_ptr<MapWorkerJob> worker_job = std::make_unique<MapWorkerJob>(std::move(buff));
RETURN_IF_NOT_OK(local_queues_[num_buf++ % num_workers_]->Add(std::move(worker_job)));
// Quit all workers, this code might never be reached if EpochCtrl is -1.
for (int32_t wkr_id = 0; wkr_id < num_workers_; wkr_id++) {
auto quit = std::make_unique<MapWorkerJob>(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagQuit));
RETURN_IF_NOT_OK(local_queues_[num_buf++ % num_workers_]->Add(std::move(quit)));
}
return Status::OK();
}
@ -242,26 +248,27 @@ Status MapOp::WorkerEntry(int32_t worker_id) {
// Map op does not use child iterator, and it needs to manually handle eoe and eof's itself
// rather than use the base-class defaults.
while (true) {
// handle the pause logic. Pause is triggered when an buffer id of -1 with no special flag and no row is received
if (in_buffer->id() == -1 && in_buffer->buffer_flags() == DataBuffer::kDeBFlagNone && in_buffer->NumRows() == 0) {
// when worker receives the signal from master thread, it increments a atomic int
// the last guy who increments the counter, wakes up master thread
if (++num_workers_paused_ == num_workers_) master_pause_wp_.Set();
// this will block the worker until master thread gives it a new work
// Handle special logic where buffer carries a ctrl flag.
if (in_buffer->buffer_flags() != DataBuffer::kDeBFlagNone) {
if (in_buffer->wait()) {
// When worker receives the signal from master thread, it increments a atomic int
// The last guy who increments the counter, wakes up master thread
if (++num_workers_paused_ == num_workers_) {
wait_for_workers_post_.Set();
}
// This will block the worker until master thread gives it a new work
} else if (in_buffer->eoe()) {
// Calling base class EoeReceived to forward eoe buffer.
RETURN_IF_NOT_OK(EoeReceived(worker_id));
} else if (in_buffer->eof()) {
// Calling base class EofReceived to forward eof buffer.
RETURN_IF_NOT_OK(EofReceived(worker_id));
} else if (in_buffer->quit()) {
break;
}
RETURN_IF_NOT_OK(FetchNextWork(worker_id, &in_buffer, &job_list));
continue;
} else if (in_buffer->eoe()) {
// Calling base class EoeReceived to forward eoe buffer.
RETURN_IF_NOT_OK(EoeReceived(worker_id));
// Fetch next data buffer and map job list
RETURN_IF_NOT_OK(FetchNextWork(worker_id, &in_buffer, &job_list));
continue;
} else if (in_buffer->eof()) {
// Calling base class EofReceived to forward eof buffer.
RETURN_IF_NOT_OK(EofReceived(worker_id));
break;
}
CHECK_FAIL_RETURN_UNEXPECTED(in_buffer->NumRows() * in_buffer->NumCols() != 0, "MapOp got an empty DataBuffer.");
std::unique_ptr<TensorQTable> new_tensor_table(std::make_unique<TensorQTable>());
// Perform the compute function of TensorOp(s) and store the result in new_tensor_table.
@ -299,9 +306,9 @@ Status MapOp::WorkerCompute(DataBuffer *in_buffer, TensorQTable *new_tensor_tabl
// Variable to keep the result after executing the job.
std::vector<TensorRow> result_table;
// Executing the list of jobs
// Executing the list of jobs.
for (size_t i = 0; i < job_list.size(); i++) {
// Execute MapJob.
// Execute MapWorkerJob.
RETURN_IF_NOT_OK(job_list[i]->Run(job_input_table, &result_table));
// Assign the processed data as an input for the next job processing, except for the last TensorOp in the list.
if (i + 1 < job_list.size()) {
@ -311,8 +318,7 @@ Status MapOp::WorkerCompute(DataBuffer *in_buffer, TensorQTable *new_tensor_tabl
// Sanity check a row in result_table
if (!result_table.empty() && out_columns_.size() != result_table[0].size()) {
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
"Result of a tensorOp doesn't match output column names");
RETURN_STATUS_UNEXPECTED("Result of a tensorOp doesn't match output column names");
}
// Merging the data processed by job (result_table) with the data that are not used.
@ -386,7 +392,7 @@ Status MapOp::InitPrivateVariable(std::unordered_map<std::string, int32_t> *col_
// columns from child are correct
RETURN_IF_NOT_OK(this->ValidateInColumns(*col_name_id_map));
// initialize keep_input_columns, true means to keep the column.
// Initialize keep_input_columns, true means to keep the column.
keep_input_columns_.resize(col_name_id_map->size(), true);
for (const auto &col_name : in_columns_) {
int32_t missed = (*col_name_id_map)[col_name];
@ -449,18 +455,18 @@ Status MapOp::Accept(NodePass *p, bool *modified) {
return p->RunOnNode(shared_from_base<MapOp>(), modified);
}
Status MapOp::PauseFromMaster() {
Status MapOp::WaitForWorkers() {
// reset num_paused workers to 0
num_workers_paused_ = 0;
for (int32_t wkr_id = 0; wkr_id < num_workers_; wkr_id++) {
// a special buffer (id=-1, empty, none flag) is used to signal that worker needs to pause.
RETURN_IF_NOT_OK(local_queues_[wkr_id]->Add(
std::make_unique<MapWorkerJob>(std::make_unique<DataBuffer>(-1, DataBuffer::kDeBFlagNone))));
std::make_unique<MapWorkerJob>(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagWait))));
}
// wait until all workers are done processing their work in local_queue_
RETURN_IF_NOT_OK(master_pause_wp_.Wait());
RETURN_IF_NOT_OK(wait_for_workers_post_.Wait());
// clear the WaitPost for the next Wait()
master_pause_wp_.Clear();
wait_for_workers_post_.Clear();
return Status::OK();
}
} // namespace dataset

View File

@ -228,10 +228,10 @@ class MapOp : public ParallelOp {
// Indices of the columns to process.
std::vector<size_t> to_process_indices_;
// wait post used to perform the pausing logic in MapOp
WaitPost master_pause_wp_;
// Wait post used to perform the pausing logic in MapOp
WaitPost wait_for_workers_post_;
// count number of workers that have signaled master
// Count number of workers that have signaled master
std::atomic_int num_workers_paused_;
// Private function for worker/thread to loop continuously. It comprises the main
@ -272,7 +272,7 @@ class MapOp : public ParallelOp {
// Workers upon receiving the suspension token from master thread, increment an atomic count, the last worker
// who does the increment wakes up the master.
// @return - Status
Status PauseFromMaster() override;
Status WaitForWorkers() override;
};
} // namespace dataset
} // namespace mindspore

View File

@ -75,6 +75,9 @@ Status DistributedSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer
RETURN_STATUS_UNEXPECTED("Distributed Sampler Error");
} else if (cnt_ == samples_per_buffer_ && (non_empty_ || !even_dist_)) {
(*out_buffer) = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
if (!samples_per_buffer_) {
non_empty_ = false;
}
} else if (!samples_per_buffer_ && !non_empty_) {
// If the buffer is empty, we add samples with subscript 0 in the current dataset.
// This step is to make up for the solution that the code default buffer is not empty before.

View File

@ -84,32 +84,32 @@ std::shared_ptr<SchemaObj> Schema(const std::string &schema_file = "");
// The type of the image tensor is uint8. The attr tensor is uint32 and one hot type.
/// \param[in] dataset_dir Path to the root directory that contains the dataset.
/// \param[in] dataset_type One of 'all', 'train', 'valid' or 'test'.
/// \param[in] decode Decode the images after reading (default=False).
/// \param[in] extensions List of file extensions to be included in the dataset (default=None).
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
/// will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \param[in] decode Decode the images after reading (default=false).
/// \param[in] extensions Set of file extensions to be included in the dataset (default={}).
/// \return Shared pointer to the current Dataset
std::shared_ptr<CelebADataset> CelebA(const std::string &dataset_dir, const std::string &dataset_type = "all",
const std::shared_ptr<SamplerObj> &sampler = nullptr, bool decode = false,
const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), bool decode = false,
const std::set<std::string> &extensions = {});
/// \brief Function to create a Cifar10 Dataset
/// \notes The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_dir Path to the root directory that contains the dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
/// will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \return Shared pointer to the current Dataset
std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir,
const std::shared_ptr<SamplerObj> &sampler = nullptr);
const std::shared_ptr<SamplerObj> &sampler = RandomSampler());
/// \brief Function to create a Cifar100 Dataset
/// \notes The generated dataset has three columns ['image', 'coarse_label', 'fine_label']
/// \param[in] dataset_dir Path to the root directory that contains the dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
/// will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \return Shared pointer to the current Dataset
std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir,
const std::shared_ptr<SamplerObj> &sampler = nullptr);
const std::shared_ptr<SamplerObj> &sampler = RandomSampler());
/// \brief Function to create a CLUEDataset
/// \notes The generated dataset has a variable number of columns depending on the task and usage
@ -146,12 +146,12 @@ std::shared_ptr<CLUEDataset> CLUE(const std::vector<std::string> &dataset_files,
/// \param[in] annotation_file Path to the annotation json
/// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint'
/// \param[in] decode Decode the images after reading
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
/// will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \return Shared pointer to the current Dataset
std::shared_ptr<CocoDataset> Coco(const std::string &dataset_dir, const std::string &annotation_file,
const std::string &task = "Detection", const bool &decode = false,
const std::shared_ptr<SamplerObj> &sampler = nullptr);
const std::shared_ptr<SamplerObj> &sampler = RandomSampler());
/// \brief Function to create a CSVDataset
/// \notes The generated dataset has a variable number of columns
@ -185,13 +185,13 @@ std::shared_ptr<CSVDataset> CSV(const std::vector<std::string> &dataset_files, c
/// The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_dir Path to the root directory that contains the dataset
/// \param[in] decode A flag to decode in ImageFolder
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
/// A `RandomSampler` will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \param[in] extensions File extensions to be read
/// \param[in] class_indexing a class name to label map
/// \return Shared pointer to the current ImageFolderDataset
std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir, bool decode = false,
const std::shared_ptr<SamplerObj> &sampler = nullptr,
const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
const std::set<std::string> &extensions = {},
const std::map<std::string, int32_t> &class_indexing = {});
@ -199,25 +199,25 @@ std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir,
/// \notes The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_file The dataset file to be read
/// \param[in] usage Need "train", "eval" or "inference" data (default="train")
/// \param[in] decode Decode the images after reading (default=false).
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \param[in] class_indexing A str-to-int mapping from label name to index (default={}, the folder
/// names will be sorted alphabetically and each class will be given a unique index starting from 0).
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
/// A `RandomSampler` will be used to randomly iterate the entire dataset
/// \param[in] decode Decode the images after reading (default=false).
/// \return Shared pointer to the current ManifestDataset
std::shared_ptr<ManifestDataset> Manifest(std::string dataset_file, std::string usage = "train",
std::shared_ptr<SamplerObj> sampler = nullptr,
std::shared_ptr<ManifestDataset> Manifest(const std::string &dataset_file, const std::string &usage = "train",
const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
const std::map<std::string, int32_t> &class_indexing = {},
bool decode = false);
/// \brief Function to create a MnistDataset
/// \notes The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_dir Path to the root directory that contains the dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
/// A `RandomSampler` will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \return Shared pointer to the current MnistDataset
std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir,
const std::shared_ptr<SamplerObj> &sampler = nullptr);
const std::shared_ptr<SamplerObj> &sampler = RandomSampler());
/// \brief Function to create a ConcatDataset
/// \notes Reload "+" operator to concat two datasets
@ -230,15 +230,15 @@ std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &dataset
/// \brief Function to create a RandomDataset
/// \param[in] total_rows Number of rows for the dataset to generate (default=0, number of rows is random)
/// \param[in] schema SchemaObj to set column type, data type and data shape
/// \param[in] columns_list List of columns to be read (default=None, read all columns)
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
/// will be used to randomly iterate the entire dataset
/// \param[in] columns_list List of columns to be read (default={}, read all columns)
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \return Shared pointer to the current Dataset
template <typename T = std::shared_ptr<SchemaObj>>
std::shared_ptr<RandomDataset> RandomData(const int32_t &total_rows = 0, T schema = nullptr,
std::vector<std::string> columns_list = {},
std::shared_ptr<SamplerObj> sampler = nullptr) {
auto ds = std::make_shared<RandomDataset>(total_rows, schema, std::move(columns_list), std::move(sampler));
const std::vector<std::string> &columns_list = {},
const std::shared_ptr<SamplerObj> &sampler = RandomSampler()) {
auto ds = std::make_shared<RandomDataset>(total_rows, schema, columns_list, std::move(sampler));
return ds->ValidateParams() ? ds : nullptr;
}
@ -257,7 +257,7 @@ std::shared_ptr<RandomDataset> RandomData(const int32_t &total_rows = 0, T schem
/// \param[in] shard_id The shard ID within num_shards. This argument should be
/// specified only when num_shards is also specified. (Default = 0)
/// \return Shared pointer to the current TextFileDataset
std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &dataset_files, int32_t num_samples = 0,
std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &dataset_files, int64_t num_samples = 0,
ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1,
int32_t shard_id = 0);
@ -271,13 +271,13 @@ std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &datase
/// \param[in] mode Set the data list txt file to be readed
/// \param[in] class_indexing A str-to-int mapping from label name to index
/// \param[in] decode Decode the images after reading
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
/// will be used to randomly iterate the entire dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
/// \return Shared pointer to the current Dataset
std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::string &task = "Segmentation",
const std::string &mode = "train",
const std::map<std::string, int32_t> &class_indexing = {}, bool decode = false,
const std::shared_ptr<SamplerObj> &sampler = nullptr);
const std::shared_ptr<SamplerObj> &sampler = RandomSampler());
/// \brief Function to create a ZipDataset
/// \notes Applies zip to the dataset
@ -302,7 +302,7 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
virtual std::vector<std::shared_ptr<DatasetOp>> Build() = 0;
/// \brief Pure virtual function for derived class to implement parameters validation
/// \return bool True if all the params are valid
/// \return bool true if all the parameters are valid
virtual bool ValidateParams() = 0;
/// \brief Setter function for runtime number of workers
@ -716,7 +716,7 @@ class ImageFolderDataset : public Dataset {
class ManifestDataset : public Dataset {
public:
/// \brief Constructor
ManifestDataset(std::string dataset_file, std::string usage, std::shared_ptr<SamplerObj> sampler,
ManifestDataset(const std::string &dataset_file, const std::string &usage, const std::shared_ptr<SamplerObj> &sampler,
const std::map<std::string, int32_t> &class_indexing, bool decode);
/// \brief Destructor
@ -767,8 +767,8 @@ class RandomDataset : public Dataset {
static constexpr int32_t kMaxDimValue = 32;
/// \brief Constructor
RandomDataset(const int32_t &total_rows, std::shared_ptr<SchemaObj> schema, std::vector<std::string> columns_list,
std::shared_ptr<SamplerObj> sampler)
RandomDataset(const int32_t &total_rows, std::shared_ptr<SchemaObj> schema,
const std::vector<std::string> &columns_list, const std::shared_ptr<SamplerObj> &sampler)
: total_rows_(total_rows),
schema_path_(""),
schema_(std::move(schema)),
@ -776,8 +776,8 @@ class RandomDataset : public Dataset {
sampler_(std::move(sampler)) {}
/// \brief Constructor
RandomDataset(const int32_t &total_rows, std::string schema_path, std::vector<std::string> columns_list,
std::shared_ptr<SamplerObj> sampler)
RandomDataset(const int32_t &total_rows, std::string schema_path, const std::vector<std::string> &columns_list,
const std::shared_ptr<SamplerObj> &sampler)
: total_rows_(total_rows), schema_path_(schema_path), columns_list_(columns_list), sampler_(std::move(sampler)) {}
/// \brief Destructor

View File

@ -14,8 +14,8 @@
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_API_SAMPLERS_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_API_SAMPLERS_H_
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_SAMPLERS_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_SAMPLERS_H_
#include <vector>
#include <memory>
@ -70,7 +70,7 @@ std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle = false, i
/// Function to create a Random Sampler.
/// \notes Samples the elements randomly.
/// \param[in] replacement - If True, put the sample ID back for the next draw.
/// \param[in] replacement - If true, put the sample ID back for the next draw.
/// \param[in] num_samples - The number of samples to draw (default to all elements).
/// \return Shared pointer to the current Sampler.
std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement = false, int64_t num_samples = 0);
@ -94,7 +94,7 @@ std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(std::vector<int64_t>
/// weights (probabilities).
/// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1.
/// \param[in] num_samples - The number of samples to draw (default to all elements).
/// \param[in] replacement - If True, put the sample ID back for the next draw.
/// \param[in] replacement - If true, put the sample ID back for the next draw.
/// \return Shared pointer to the current Sampler.
std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(std::vector<double> weights, int64_t num_samples = 0,
bool replacement = true);
@ -199,4 +199,4 @@ class WeightedRandomSamplerObj : public SamplerObj {
} // namespace api
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_API_SAMPLERS_H_
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_SAMPLERS_H_

View File

@ -50,7 +50,7 @@ void CutMixBatchOp::GetCropBox(int height, int width, float lam, int *x, int *y,
Status CutMixBatchOp::Compute(const TensorRow &input, TensorRow *output) {
if (input.size() < 2) {
RETURN_STATUS_UNEXPECTED("Both images and labels columns are required for this operation");
RETURN_STATUS_UNEXPECTED("Both images and labels columns are required for this operation.");
}
std::vector<std::shared_ptr<Tensor>> images;
@ -59,10 +59,17 @@ Status CutMixBatchOp::Compute(const TensorRow &input, TensorRow *output) {
// Check inputs
if (image_shape.size() != 4 || image_shape[0] != label_shape[0]) {
RETURN_STATUS_UNEXPECTED("You must batch before calling CutMixBatch.");
RETURN_STATUS_UNEXPECTED(
"CutMixBatch: You must make sure images are HWC or CHW and batched before calling CutMixBatch.");
}
if (label_shape.size() != 2) {
RETURN_STATUS_UNEXPECTED("CutMixBatch: Label's must be in one-hot format and in a batch");
if (!input.at(1)->type().IsInt()) {
RETURN_STATUS_UNEXPECTED("CutMixBatch: Wrong labels type. The second column (labels) must only include int types.");
}
if (label_shape.size() != 2 && label_shape.size() != 3) {
RETURN_STATUS_UNEXPECTED(
"CutMixBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC where N is the batch "
"size, L is the number of labels in each row, "
"and C is the number of classes. labels must be in one-hot format and in a batch.");
}
if ((image_shape[1] != 1 && image_shape[1] != 3) && image_batch_format_ == ImageBatchFormat::kNCHW) {
RETURN_STATUS_UNEXPECTED("CutMixBatch: Image doesn't match the given image format.");
@ -84,10 +91,12 @@ Status CutMixBatchOp::Compute(const TensorRow &input, TensorRow *output) {
// Tensor holding the output labels
std::shared_ptr<Tensor> out_labels;
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape(label_shape), DataType(DataType::DE_FLOAT32), &out_labels));
RETURN_IF_NOT_OK(TypeCast(std::move(input.at(1)), &out_labels, DataType(DataType::DE_FLOAT32)));
int64_t row_labels = label_shape.size() == 3 ? label_shape[1] : 1;
int64_t num_classes = label_shape.size() == 3 ? label_shape[2] : label_shape[1];
// Compute labels and images
for (int i = 0; i < image_shape[0]; i++) {
for (int64_t i = 0; i < image_shape[0]; i++) {
// Calculating lambda
// If x1 is a random variable from Gamma(a1, 1) and x2 is a random variable from Gamma(a2, 1)
// then x = x1 / (x1+x2) is a random variable from Beta(a1, a2)
@ -138,15 +147,29 @@ Status CutMixBatchOp::Compute(const TensorRow &input, TensorRow *output) {
}
// Compute labels
for (int j = 0; j < label_shape[1]; j++) {
uint64_t first_value, second_value;
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, {i, j}));
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, {rand_indx[i] % label_shape[0], j}));
RETURN_IF_NOT_OK(out_labels->SetItemAt({i, j}, label_lam * first_value + (1 - label_lam) * second_value));
for (int64_t j = 0; j < row_labels; j++) {
for (int64_t k = 0; k < num_classes; k++) {
std::vector<int64_t> first_index = label_shape.size() == 3 ? std::vector{i, j, k} : std::vector{i, k};
std::vector<int64_t> second_index =
label_shape.size() == 3 ? std::vector{rand_indx[i], j, k} : std::vector{rand_indx[i], k};
if (input.at(1)->type().IsSignedInt()) {
int64_t first_value, second_value;
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, first_index));
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, second_index));
RETURN_IF_NOT_OK(
out_labels->SetItemAt(first_index, label_lam * first_value + (1 - label_lam) * second_value));
} else {
uint64_t first_value, second_value;
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, first_index));
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, second_index));
RETURN_IF_NOT_OK(
out_labels->SetItemAt(first_index, label_lam * first_value + (1 - label_lam) * second_value));
}
}
}
}
}
std::shared_ptr<Tensor> out_images;
RETURN_IF_NOT_OK(TensorVectorToBatchTensor(images, &out_images));

View File

@ -415,9 +415,7 @@ Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Te
for (int i = 0; i < crop_width; i++) {
for (int j = 0; j < crop_height; j++) {
for (int c = 0; c < number_of_channels; c++) {
uint8_t pixel_value;
RETURN_IF_NOT_OK(sub_mat->GetItemAt(&pixel_value, {j, i, c}));
RETURN_IF_NOT_OK((*input)->SetItemAt({y + j, x + i, c}, pixel_value));
RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {j, i, c}, {y + j, x + i, c}));
}
}
}
@ -432,9 +430,7 @@ Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Te
for (int i = 0; i < crop_width; i++) {
for (int j = 0; j < crop_height; j++) {
for (int c = 0; c < number_of_channels; c++) {
uint8_t pixel_value;
RETURN_IF_NOT_OK(sub_mat->GetItemAt(&pixel_value, {c, j, i}));
RETURN_IF_NOT_OK((*input)->SetItemAt({c, y + j, x + i}, pixel_value));
RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {c, j, i}, {c, y + j, x + i}));
}
}
}
@ -447,9 +443,7 @@ Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Te
}
for (int i = 0; i < crop_width; i++) {
for (int j = 0; j < crop_height; j++) {
uint8_t pixel_value;
RETURN_IF_NOT_OK(sub_mat->GetItemAt(&pixel_value, {j, i}));
RETURN_IF_NOT_OK((*input)->SetItemAt({y + j, x + i}, pixel_value));
RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {j, i}, {y + j, x + i}));
}
}
} else {
@ -458,6 +452,24 @@ Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Te
return Status::OK();
}
Status CopyTensorValue(const std::shared_ptr<Tensor> &source_tensor, std::shared_ptr<Tensor> *dest_tensor,
const std::vector<int64_t> &source_indx, const std::vector<int64_t> &dest_indx) {
if (source_tensor->type() != (*dest_tensor)->type())
RETURN_STATUS_UNEXPECTED("CopyTensorValue: source and destination tensor must have the same type.");
if (source_tensor->type() == DataType::DE_UINT8) {
uint8_t pixel_value;
RETURN_IF_NOT_OK(source_tensor->GetItemAt(&pixel_value, source_indx));
RETURN_IF_NOT_OK((*dest_tensor)->SetItemAt(dest_indx, pixel_value));
} else if (source_tensor->type() == DataType::DE_FLOAT32) {
float pixel_value;
RETURN_IF_NOT_OK(source_tensor->GetItemAt(&pixel_value, source_indx));
RETURN_IF_NOT_OK((*dest_tensor)->SetItemAt(dest_indx, pixel_value));
} else {
RETURN_STATUS_UNEXPECTED("CopyTensorValue: Tensor type is not supported. Tensor type must be float32 or uint8.");
}
return Status::OK();
}
Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
try {
std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));

View File

@ -133,6 +133,17 @@ Status HwcToChw(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output);
Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Tensor> *input, int x, int y, int width,
int height, ImageFormat image_format);
/// \brief Copies a value from a source tensor into a destination tensor
/// \note This is meant for images and therefore only works if tensor is uint8 or float32
/// \param[in] source_tensor The tensor we take the value from
/// \param[in] dest_tensor The pointer to the tensor we want to copy the value to
/// \param[in] source_indx index of the value in the source tensor
/// \param[in] dest_indx index of the value in the destination tensor
/// \param[out] dest_tensor Copies the value to the given dest_tensor and returns it
/// @return Status ok/error
Status CopyTensorValue(const std::shared_ptr<Tensor> &source_tensor, std::shared_ptr<Tensor> *dest_tensor,
const std::vector<int64_t> &source_indx, const std::vector<int64_t> &dest_indx);
/// \brief Swap the red and blue pixels (RGB <-> BGR)
/// \param input: Tensor of shape <H,W,3> and any OpenCv compatible type, see CVTensor.
/// \param output: Swapped image of same shape and type

View File

@ -38,13 +38,20 @@ Status MixUpBatchOp::Compute(const TensorRow &input, TensorRow *output) {
// Check inputs
if (image_shape.size() != 4 || image_shape[0] != label_shape[0]) {
RETURN_STATUS_UNEXPECTED("You must batch before calling MixUpBatch");
RETURN_STATUS_UNEXPECTED(
"MixUpBatch:You must make sure images are HWC or CHW and batched before calling MixUpBatch.");
}
if (label_shape.size() != 2) {
RETURN_STATUS_UNEXPECTED("MixUpBatch: Label's must be in one-hot format and in a batch");
if (!input.at(1)->type().IsInt()) {
RETURN_STATUS_UNEXPECTED("MixUpBatch: Wrong labels type. The second column (labels) must only include int types.");
}
if (label_shape.size() != 2 && label_shape.size() != 3) {
RETURN_STATUS_UNEXPECTED(
"MixUpBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC where N is the batch "
"size, L is the number of labels in each row, "
"and C is the number of classes. labels must be in one-hot format and in a batch.");
}
if ((image_shape[1] != 1 && image_shape[1] != 3) && (image_shape[3] != 1 && image_shape[3] != 3)) {
RETURN_STATUS_UNEXPECTED("MixUpBatch: Images must be in the shape of HWC or CHW");
RETURN_STATUS_UNEXPECTED("MixUpBatch: Images must be in the shape of HWC or CHW.");
}
// Move images into a vector of CVTensors
@ -65,16 +72,31 @@ Status MixUpBatchOp::Compute(const TensorRow &input, TensorRow *output) {
// Compute labels
std::shared_ptr<Tensor> out_labels;
RETURN_IF_NOT_OK(TypeCast(std::move(input.at(1)), &out_labels, DataType("float32")));
RETURN_IF_NOT_OK(TypeCast(std::move(input.at(1)), &out_labels, DataType(DataType::DE_FLOAT32)));
int64_t row_labels = label_shape.size() == 3 ? label_shape[1] : 1;
int64_t num_classes = label_shape.size() == 3 ? label_shape[2] : label_shape[1];
for (int64_t i = 0; i < label_shape[0]; i++) {
for (int64_t j = 0; j < label_shape[1]; j++) {
uint64_t first_value, second_value;
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, {i, j}));
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, {rand_indx[i], j}));
RETURN_IF_NOT_OK(out_labels->SetItemAt({i, j}, lam * first_value + (1 - lam) * second_value));
for (int64_t j = 0; j < row_labels; j++) {
for (int64_t k = 0; k < num_classes; k++) {
std::vector<int64_t> first_index = label_shape.size() == 3 ? std::vector{i, j, k} : std::vector{i, k};
std::vector<int64_t> second_index =
label_shape.size() == 3 ? std::vector{rand_indx[i], j, k} : std::vector{rand_indx[i], k};
if (input.at(1)->type().IsSignedInt()) {
int64_t first_value, second_value;
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, first_index));
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, second_index));
RETURN_IF_NOT_OK(out_labels->SetItemAt(first_index, lam * first_value + (1 - lam) * second_value));
} else {
uint64_t first_value, second_value;
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, first_index));
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, second_index));
RETURN_IF_NOT_OK(out_labels->SetItemAt(first_index, lam * first_value + (1 - lam) * second_value));
}
}
}
}
// Compute images
for (int64_t i = 0; i < images.size(); i++) {
TensorShape remaining({-1});

View File

@ -40,6 +40,8 @@ Status PosterizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt
}
cv::Mat in_image = input_cv->mat();
cv::Mat output_img;
CHECK_FAIL_RETURN_UNEXPECTED(in_image.depth() == CV_8U || in_image.depth() == CV_8S,
"Input image data type can not be float, but got " + input->type().ToString());
cv::LUT(in_image, lut_vector, output_img);
std::shared_ptr<CVTensor> result_tensor;
RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, &result_tensor));

Some files were not shown because too many files have changed in this diff Show More