Compare commits

...

206 Commits
master ... r0.6

Author SHA1 Message Date
mindspore-ci-bot a2edfcb09b !8171 【轻量级 PR】:update RELEASE.md.
Merge pull request !8171 from shenwei41/N/A
2020-11-05 09:07:23 +08:00
mindspore-ci-bot bbff1828ba !8185 fix securec download links due to mistakes made by openeuler community
Merge pull request !8185 from yanghaoran/r0.6
2020-11-04 09:38:50 +08:00
yanghaoran 94c644cb52 update graphengine, fix securec download links 2020-11-03 20:32:39 +08:00
shenwei41 df90cf1538 update RELEASE.md. 2020-11-03 17:39:25 +08:00
mindspore-ci-bot 4ca658319b !6531 【MD】r0.6 Branch: MD5 value update in the file - icu4c.cmake of branch r0.6
Merge pull request !6531 from magemomou/MD5_r0.6
2020-09-19 12:36:10 +08:00
mayang aa89c9f33c MD5 value update in the file icu4c.cmake of branch r0.6 2020-09-19 10:11:36 +08:00
mindspore-ci-bot d1b1a626c2 !5447 Support manual convert to quantative network of resnet
Merge pull request !5447 from chenfei_mindspore/r0.6
2020-08-31 10:50:48 +08:00
chenfei d27f7bf88b add manual quantative network of resnet 2020-08-29 17:02:28 +08:00
mindspore-ci-bot 50d7480a4e !4457 modify yolov3_quant eval script
Merge pull request !4457 from chengxb7532/r0.6
2020-08-15 10:33:40 +08:00
chengxianbin ef9e3a5360 modify yolov3_darknet53 2020-08-14 16:35:38 +08:00
mindspore-ci-bot 04a6612baf !4424 modify quant DenseBnAct API
Merge pull request !4424 from chengxb7532/r0.6
2020-08-14 15:26:04 +08:00
chengxianbin 7bc5b71b44 modify quant DenseBnAct code 2020-08-13 23:22:49 +08:00
mindspore-ci-bot 30452899ec !4351 modify yolov3-darknet quant net codes
Merge pull request !4351 from chengxb7532/r0.6
2020-08-12 21:54:49 +08:00
chengxianbin 59863abcd3 modify yolov3-darknet53 quant code 2020-08-12 18:10:39 +08:00
mindspore-ci-bot a15ae5238d !4304 upload yolov3-darknet quant net codes
Merge pull request !4304 from chengxb7532/r0.6
2020-08-12 14:31:04 +08:00
chengxianbin c80a1da8ac upload yolov3-darknet53 quant code 2020-08-11 22:16:49 +08:00
mindspore-ci-bot 7d483cd09c !4115 runpackage sync C75B050 for mindspore r0.6
Merge pull request !4115 from HW_KK/r0.6
2020-08-08 14:52:46 +08:00
wuweikang 9f3dcd7ab9 runpackage sync C75B050 for r0.6 2020-08-07 22:19:05 +08:00
mindspore-ci-bot 801660ef08 !3912 fix numpyslice bug
Merge pull request !3912 from luoyang/son_r0.6
2020-08-04 19:43:55 +08:00
YangLuo 16f54c900b fix numpyslice bug 2020-08-04 11:43:19 +08:00
mindspore-ci-bot 5465525f09 !3812 upgrade dockerfile version to 0.6.0-beta
Merge pull request !3812 from yanghaoran/r0.6
2020-07-31 18:57:41 +08:00
mindspore-ci-bot d9320b1606 !3805 modify release note for 0.6
Merge pull request !3805 from changzherui/mod_release
2020-07-31 17:14:15 +08:00
yanghaoran 34f2e94bd4 update mindspore version to 0.6.0-beta 2020-07-31 16:09:21 +08:00
changzherui a0e575a17d modify release 2020-07-31 15:41:45 +08:00
lujiale dc4e15d32c update RELEASE.md. 2020-07-31 15:19:05 +08:00
lujiale 917a7e227f update RELEASE.md. 2020-07-31 12:11:31 +08:00
lujiale 1b7daf777a update build.sh. 2020-07-31 12:09:03 +08:00
mindspore-ci-bot 7d6160516f !3761 simplify googlenet
Merge pull request !3761 from panfengfeng/simplify_googlenet
2020-07-31 11:27:50 +08:00
panfengfeng ca881ec03e add maxpool_with_argmax/grad cuda kernel 2020-07-30 23:38:35 +08:00
mindspore-ci-bot 983437feaf !3757 debug mindspore hub
Merge pull request !3757 from chenzhongming/r0.6
2020-07-30 23:22:25 +08:00
chenzomi a059e8910f debug mindspore hub 2020-07-30 22:29:51 +08:00
mindspore-ci-bot 9dc23eeb98 !3602 Delete hard code in pull node
Merge pull request !3602 from ZPaC/r0.6-delete-hard-code-in-pull-node
2020-07-30 22:25:04 +08:00
ZPaC 78e3cb4bc4 Delete hard code in pull kernel. 2020-07-30 20:26:06 +08:00
mindspore-ci-bot 0db3ff5773 !3742 fix GetInputReshapeType reports ERROR
Merge pull request !3742 from liubuyu/r0.6
2020-07-30 20:13:45 +08:00
mindspore-ci-bot c9583ad3a4 !3730 fix bug of cast dtype when using mix_presion in pynative mode
Merge pull request !3730 from jinyaohui/mix_presion
2020-07-30 20:08:18 +08:00
mindspore-ci-bot 294520e1fd !3548 Pass optimzier attributes to push kernel and parameter server.
Merge pull request !3548 from ZPaC/r0.6-pass-attr-to-ps
2020-07-30 19:22:12 +08:00
mindspore-ci-bot 4621565258 !3733 block trans data to change format
Merge pull request !3733 from lvchangquan/r.06
2020-07-30 19:14:10 +08:00
mindspore-ci-bot b3b71e1d3f !3724 modify readme and timemoniter steps
Merge pull request !3724 from wanghua/r0.6
2020-07-30 19:01:31 +08:00
liubuyu 7d5e523743 fix set/get reshape type bug 2020-07-30 18:43:35 +08:00
mindspore-ci-bot 0fb669190a !3703 Enlarge the threshold of resnet50 performance st in pynative
Merge pull request !3703 from JoyLvliang/r0.6
2020-07-30 16:42:53 +08:00
jinyaohui db216a077a fix bug of cast dtype when using mix_presion in pynative mode 2020-07-30 16:42:50 +08:00
lvchangquan f298e55072 block use trans data to change format 2020-07-30 16:36:58 +08:00
mindspore-ci-bot dcd471eb96 !3718 add mindspore hub for download ckpt file
Merge pull request !3718 from chenzhongming/r0.6
2020-07-30 16:25:30 +08:00
wanghua c9a675f4e5 modify readme and timemoniter steps 2020-07-30 15:49:43 +08:00
mindspore-ci-bot fdc183ad36 !3704 [r0.6][bug][auto_mixed_precision]fix amp bug in eval
Merge pull request !3704 from vlne-v1/amp_doc_r0.6
2020-07-30 15:24:01 +08:00
lvliang 937c5b5d8e enlarge the threshold of resnet50 performance in pynative 2020-07-30 15:02:32 +08:00
chenzomi 783b823a25 add mindspore hub for download ckpt file
add mindspore.hub and change model_zoo
2020-07-30 15:02:17 +08:00
mindspore-ci-bot 30ffcd8a1f !3681 modelzoo: support vgg16 in GPU
Merge pull request !3681 from ms_yan/vgg_r0.6
2020-07-30 14:29:25 +08:00
mindspore-ci-bot 9ab94fa076 !3685 add tinybert scripts
Merge pull request !3685 from wanghua/r0.6
2020-07-30 14:24:03 +08:00
mindspore-ci-bot 944929f980 !3682 add googlenet gpu
Merge pull request !3682 from panfengfeng/googlenet-gpu_support
2020-07-30 11:38:05 +08:00
mindspore-ci-bot 09dd4128d5 !3689 fix cpu multi graph mem error
Merge pull request !3689 from kisnwang/r0.6-fix-cpu-multi-graph-memory-error
2020-07-30 10:42:18 +08:00
Wei Luning ca4b2f6c0b fix eval in amp 2020-07-30 10:27:22 +08:00
mindspore-ci-bot 7f3926429b !3628 fix log bug
Merge pull request !3628 from gukecai/log
2020-07-30 09:13:44 +08:00
kswang 7360a2fa07 fix cpu multi graph mem error 2020-07-30 09:13:05 +08:00
mindspore-ci-bot 10f0f0d5a5 !3673 fix serving input numbers
Merge pull request !3673 from hexia/fix_input_check_r0.6
2020-07-30 09:12:13 +08:00
mindspore-ci-bot 6b81f9f7f7 !3683 Modify patches and alerts
Merge pull request !3683 from shenwei41/r0.6
2020-07-30 09:09:24 +08:00
mindspore-ci-bot 5a36b19e80 !3666 Modify the order of init and open of TDT
Merge pull request !3666 from hanjun996/r0.6
2020-07-30 09:07:09 +08:00
mindspore-ci-bot 6944af09ee !3596 fix batchnorm issue under mix precision in pynative mode
Merge pull request !3596 from wangqiuliang/fix-batchnorm-r0.6
2020-07-30 08:57:01 +08:00
ms_yan e497117b74 init add vgg16 gpu version
merge the script

optimize the script

repair problem in vgg16 cifar10 version

optimize the vgg script
2020-07-30 07:41:39 +08:00
mindspore-ci-bot 78375e104a !3680 lowering value checking threshold to fix bug of pass eps
Merge pull request !3680 from wangnan39/lowering_value_checking_threshold_to_support_training_with_very_small_steps
2020-07-29 22:52:04 +08:00
mindspore-ci-bot abd346e84b !3649 modify setup.py version number for r0.6
Merge pull request !3649 from changzherui/mod_ver_num
2020-07-29 21:59:11 +08:00
mindspore-ci-bot 9156775655 !3677 support multy node training in deeplabv3
Merge pull request !3677 from zhouyaqiang0/r0.6
2020-07-29 21:53:56 +08:00
mindspore-ci-bot df7f0c8a7c !3659 modify readme for maskrcnn
Merge pull request !3659 from meixiaowei/r0.6
2020-07-29 21:49:09 +08:00
wanghua 9da1c96c4a add tinybert scripts 2020-07-29 21:21:18 +08:00
panfengfeng 7d5a67e9f0 googlenet-gpu 2020-07-29 21:19:28 +08:00
kingfo fc92598881 fix batchnorm issue in pynative auto mix precision 2020-07-29 21:15:54 +08:00
mindspore-ci-bot e3fe1d76ca !3558 Fix a racing condition in CacheMergeOp when the leaf hits an error and exit too early
Merge pull request !3558 from guozhijian/fix_dataset_none_hung
2020-07-29 21:07:54 +08:00
mindspore-ci-bot b429a8421f !3586 fix python api doc for mindspore .dataset
Merge pull request !3586 from guansongsong/gss/fix_python_api_for_r0.6
2020-07-29 20:50:03 +08:00
mindspore-ci-bot bb4339e3ca !3584 Fix a DatasetCache sharing scenario
Merge pull request !3584 from guansongsong/gss/fix_cache
2020-07-29 20:49:09 +08:00
shenwei41 e49a2f83e7 Modify patches and alerts 2020-07-29 20:47:29 +08:00
mindspore-ci-bot 1ec63700c7 !3632 Fix resource not release bug
Merge pull request !3632 from Kang/r0.6
2020-07-29 20:25:31 +08:00
wangnan39@huawei.com fc5d419422 Lowering value checking threshold to support fix the bug of pass add eps 2020-07-29 20:21:31 +08:00
mindspore-ci-bot d4b5cda934 !3604 Fix minor errors in probabilistic programming
Merge pull request !3604 from peixu_ren/r0.6
2020-07-29 20:00:13 +08:00
ZPaC d6a56cd6fd Pass optimizer attributes to push nodes. 2020-07-29 19:59:42 +08:00
mindspore-ci-bot f04243b1f1 !3663 Fix multi worker
Merge pull request !3663 from ZPaC/r0.6-fix-sgd
2020-07-29 19:53:43 +08:00
mindspore-ci-bot 6b57b4f0e1 !3652 add epoch_num description
Merge pull request !3652 from panfengfeng/add_epoch_num_description
2020-07-29 19:33:36 +08:00
zhouyaqiang b096a6cbe9 support multy node training and remove code 2020-07-29 19:18:08 +08:00
hanjun996 c718774538 modify tdt 2020-07-29 19:01:41 +08:00
mindspore-ci-bot 68128f87a9 !3634 Spilt unsupported transdata
Merge pull request !3634 from lianliguang/r0.6
2020-07-29 18:52:22 +08:00
mindspore-ci-bot 22dbd1a233 !3646 [MD] fix minddataset core dump when file list size ia greater than 1000.
Merge pull request !3646 from liyong126/r0.6_fix_minrecord_bug
2020-07-29 18:29:37 +08:00
hexia 52776820d8 fix_input_check 2020-07-29 18:28:38 +08:00
guansongsong 5b15f40598 Fix a DatasetCache sharing scenario 2020-07-29 17:42:05 +08:00
cristoval bf74164df3 fix sync sgd under multi-worker 2020-07-29 17:39:51 +08:00
meixiaowei e5b9776b86 modify readme 2020-07-29 16:46:52 +08:00
panfengfeng 8803c6258d add epoch_num 2020-07-29 16:06:56 +08:00
changzherui 614841aa39 modify setup version number 2020-07-29 15:22:14 +08:00
wuyongkang 983cb9b23d Fix resource not release bug 2020-07-29 14:52:38 +08:00
guansongsong 68f27eb62b fix python api doc for mindspore.dataset 2020-07-29 14:49:17 +08:00
mindspore-ci-bot 924a34acb8 !3639 fix GeneratorDataset time out
Merge pull request !3639 from yanghaitao/yht_generator_timeout_r0.6
2020-07-29 14:42:38 +08:00
mindspore-ci-bot db01f3eafe !3640 support bprop for const in pynative and develop stridedslice and isinstance
Merge pull request !3640 from zhangbuxue/support_bprop_for_const_in_pynative_and_develop_stridedslice_and_isinstance
2020-07-29 14:32:25 +08:00
liyong 66d8395fea fix coredump when number of file list more than 1000. 2020-07-29 14:30:00 +08:00
mindspore-ci-bot e33b5e435e !3633 fix dataset & train gil lock of gpu process
Merge pull request !3633 from panfengfeng/fix_dataset_train_gil_of_gpu
2020-07-29 14:18:07 +08:00
mindspore-ci-bot 477bf42fe5 !3641 Update submodule akg to r0.6 branch
Merge pull request !3641 from looop5/akg_r0.6
2020-07-29 14:14:56 +08:00
WilliamLian edba641ddb split unsupported transdata 2020-07-29 14:07:19 +08:00
mindspore-ci-bot 338a225410 !3623 [r0.6][bug][auto_mixed_precision]fix amp doc and eval network build
Merge pull request !3623 from vlne-v1/amp_doc_r0.6
2020-07-29 13:02:45 +08:00
looop5 13d8bedbf4 update submodule akg to r0.6 branch 2020-07-29 12:03:29 +08:00
mindspore-ci-bot 9a43468fee !3626 fix: device occupied tdt hung
Merge pull request !3626 from guozhijian/fix_device_occupied_tdt_hung_r0.6
2020-07-29 11:58:22 +08:00
buxue 6beb8071d7 support bprop for const in pynative and develop stridedslice and isinstance. 2020-07-29 11:56:59 +08:00
mindspore-ci-bot cc233f66ab !3629 Fix numpyslice issue
Merge pull request !3629 from xiefangqi/md_fix_numpyslice_r0.6
2020-07-29 11:56:51 +08:00
yanghaitao 248130e5d1 fix generator time out 2020-07-29 11:54:17 +08:00
mindspore-ci-bot 8f6eafdfcd !3589 fix the description of cache
Merge pull request !3589 from guansongsong/fix_cache_core_for_r0.6
2020-07-29 11:39:01 +08:00
xiefangqi 30ed5a25ce fix numpyslice issue to r0.6 2020-07-29 10:39:13 +08:00
panfengfeng 4eea891730 fix dataset train gil of gpu 2020-07-29 10:37:59 +08:00
gukecai fe29a2501f fix log bug 2020-07-29 10:07:56 +08:00
jonyguo 0d375bbaa3 fix: device occupied tdt hung 2020-07-29 09:46:32 +08:00
mindspore-ci-bot 4f1e586ee3 !3579 fix maskrcnn dataset rescale bug
Merge pull request !3579 from meixiaowei/r0.6
2020-07-29 09:37:26 +08:00
Wei Luning dd26d85caf fix doc and eval network build in amp 2020-07-29 09:15:30 +08:00
peixu_ren 49cdeb3f78 Fix minor errors in probabilistic programming 2020-07-28 11:35:10 -04:00
mindspore-ci-bot d9ca3f2e88 !3566 dataset: api format problem in totype, totensor, slice
Merge pull request !3566 from ms_yan/r0.6_api_format
2020-07-28 23:23:27 +08:00
mindspore-ci-bot c5f8b6b0c7 !3599 merge fix sparse doc to r0.6
Merge pull request !3599 from riemann_penn/merge_fix_sparse_doc_to_r0.6
2020-07-28 21:52:22 +08:00
panyifeng 3714a07d71 fix sparse api doc 2020-07-28 21:26:18 +08:00
mindspore-ci-bot 950367c102 !3595 add desc about sink_size
Merge pull request !3595 from jinyaohui/sink_size
2020-07-28 21:23:45 +08:00
jinyaohui 40b859395d add description about sink_size 2020-07-28 20:21:41 +08:00
mindspore-ci-bot d7caa7955b !3582 Fix minddata cache include flatbuffer head problem
Merge pull request !3582 from xiefangqi/r0.6
2020-07-28 18:54:10 +08:00
mindspore-ci-bot 552490326f !3572 [MD] fix save pydoc and log
Merge pull request !3572 from liyong126/r0.6_fix_save_pydoc_log
2020-07-28 18:53:33 +08:00
guansongsong 543b75f366 fix the description of cache 2020-07-28 18:13:51 +08:00
mindspore-ci-bot 3d87436bb0 !3580 fix allreduce fusion case in grad reducer
Merge pull request !3580 from gziyan/fix_allreduce_fusion
2020-07-28 17:18:55 +08:00
ms_yan 47efc83bcd repair api format problem in totype, totensor, slice 2020-07-28 16:36:16 +08:00
xiefangqi 0e4065f0ef fix flatbuffer head to r0.6 2020-07-28 16:21:09 +08:00
Ziyan fdb21ecf74 update 2020-07-28 16:15:16 +08:00
meixiaowei 7df05b1da7 fix rescale dataset bug 2020-07-28 16:13:48 +08:00
mindspore-ci-bot c617a07dff !3533 modify serving readme
Merge pull request !3533 from dinghao/r0.6
2020-07-28 15:51:29 +08:00
liyong f52859a2fc fix save op pydoc and log 2020-07-28 14:34:21 +08:00
mindspore-ci-bot 2a6884d97c !3564 [Auto parallel] Cost model for GPU
Merge pull request !3564 from Xiaoda/15-r0.6-add-new-gpu-costmodel
2020-07-28 11:55:22 +08:00
dinghao b54fc35cde modify serving readme 2020-07-28 10:16:35 +08:00
Xiaoda Zhang ab676ba81a add costmodel for gpu 2020-07-28 10:07:49 +08:00
Jesse Lee f118869869 Fix a merge_op timing hole 2020-07-28 09:37:44 +08:00
mindspore-ci-bot c31c1c808a !3530 Fix a bug for Parameter
Merge pull request !3530 from hewei/fix_parameter_bug_r0.6
2020-07-27 20:24:48 +08:00
mindspore-ci-bot 67600c1d8c !3539 Change at-most collected tensor summary from 50 to 20 when auto-calculated
Merge pull request !3539 from LiHongzhang/f50_t20_r
2020-07-27 19:54:19 +08:00
mindspore-ci-bot 49e8727d37 !3518 fix python import r0.6
Merge pull request !3518 from hexia/fix_python_import_r0.6
2020-07-27 19:36:04 +08:00
mindspore-ci-bot 36c2bbdbcc !3501 fix sparse feature bug for auto parallel
Merge pull request !3501 from lirongzhen1/r0.6
2020-07-27 19:28:22 +08:00
mindspore-ci-bot a536e922c2 !3524 add bert ci script to r0.6 branch
Merge pull request !3524 from yoonlee666/bertci
2020-07-27 17:38:56 +08:00
Li Hongzhang d86668d216 change at-most collected tensor from 50 to 20
When `collect_tensor_freq` is specified as `None`,
the `collect_tensor_freq` would be auto calculated.

The previous behavior is to collect at most 50 steps,
now changing to 20
2020-07-27 17:04:27 +08:00
mindspore-ci-bot bcba696a62 !3482 `max_file_size` includes metadata and drops the last step
Merge pull request !3482 from LiHongzhang/limit_summary_r
2020-07-27 17:02:01 +08:00
He Wei 1f6771256d Fix a bug for Parameter
1. Parameter's init_data() should have no effect if default_input already set;
2. This bug is introduced by 'decouple ParamValue from python';
3. An unit test case is added to ensure the right init_data() behavior.
2020-07-27 15:45:18 +08:00
yoonlee666 1dcf9abf6a add bert ci script 2020-07-27 15:08:03 +08:00
hexia 5fb1280e12 fix python import 2020-07-27 14:27:17 +08:00
mindspore-ci-bot dfab48d532 !3492 Change readme.txt in WarpCTC and checkpoint directory
Merge pull request !3492 from yangyongjie/r0.6
2020-07-27 11:17:45 +08:00
Li Hongzhang 5a517f3a49 max_file_size include metadata length and drop last step 2020-07-27 10:54:37 +08:00
mindspore-ci-bot 62cf01fc7b !3509 Add parameter server mode_zoo case and CI test cases.
Merge pull request !3509 from ZPaC/add-ps-test-cases
2020-07-27 10:43:55 +08:00
ZPaC b109e6f643 Add parameter server model_zoo case and CI test cases. 2020-07-27 09:53:42 +08:00
mindspore-ci-bot fdf198eee9 !3493 Modify comment of register_backward_hook [r0.6]
Merge pull request !3493 from Simson/push-to-r06
2020-07-27 09:30:26 +08:00
mindspore-ci-bot 7f6f140d94 !3498 Fix getting output address of internal output
Merge pull request !3498 from YuJianfeng/r0.6
2020-07-26 18:06:00 +08:00
mindspore-ci-bot ec3e7269ba !3505 merge eager mode enable sparse to r0.6
Merge pull request !3505 from riemann_penn/merge_eager_mode_enable_sparse_to_r0.6
2020-07-25 22:07:51 +08:00
panyifeng 032c5e0fdc eager mode enable sparse 2020-07-25 19:18:20 +08:00
mindspore-ci-bot 9626532e0b !3499 Delete parameter name hard code for embedding-lookup
Merge pull request !3499 from ZPaC/r0.6-delete-param-name-hard-code
2020-07-25 18:24:43 +08:00
mindspore-ci-bot 304ae51a25 !3470 Init CSV column default list when it's empty r0.6
Merge pull request !3470 from jiangzhiwen/fix_column_name_r_0_6
2020-07-25 18:15:18 +08:00
yangyongjie 2241017e3f fix word missing in readme.txt 2020-07-25 18:02:08 +08:00
ZPaC c1b36c3d4f Delete parameter name hard code for embedding table. 2020-07-25 17:26:59 +08:00
lirongzhen1 8af4a16d9d fix sparse feature bug for auto parallel 2020-07-25 17:26:43 +08:00
yujianfeng 67ed5451ad Fix getting output address of internal output 2020-07-25 17:11:37 +08:00
mindspore-ci-bot ac564a9e86 !3466 fix cpu nonop net fp16 error
Merge pull request !3466 from kisnwang/r0.6-fix-cpu-nonop-fp16-error
2020-07-25 16:56:49 +08:00
mindspore-ci-bot 375078cf55 !3471 Fixing Bug with AutoContrast/Equalize supporting uint8 dtype/mnist
Merge pull request !3471 from guozhijian/fix_autocontrast_equalize_support_uint8
2020-07-25 16:48:49 +08:00
simson 63bb52b408 Modify comment of register_backward_hook 2020-07-25 16:43:31 +08:00
mindspore-ci-bot c9f25d0d5c !3477 upload maskrcnn scripts
Merge pull request !3477 from gengdongjie/r0.6
2020-07-25 15:13:54 +08:00
mindspore-ci-bot b0cb13d265 !3463 [MD]Fix Segementation Falut when SentencepieceTokenizer Op before zipOp and ConcatOp
Merge pull request !3463 from xulei/tmp_0.6
2020-07-25 15:10:16 +08:00
mindspore-ci-bot 14ce0afab3 !3478 Add Warpctc GPU network
Merge pull request !3478 from yangyongjie/r0.6
2020-07-25 14:35:32 +08:00
mindspore-ci-bot 26733198e9 !3458 fix getdataset size error r0.6
Merge pull request !3458 from panfengfeng/fix_getdataset_size_error_r0.6
2020-07-25 14:24:09 +08:00
mindspore-ci-bot 73f58dc937 !3480 Graceful shutdown for ps modules
Merge pull request !3480 from chengang/graceful_shutdown_ps_r0.6
2020-07-25 14:22:24 +08:00
cristoval c1332c03e5 support graceful shutdown for ps components 2020-07-25 12:19:45 +08:00
meixiaowei 10c74de9b6 upload maskrcnn scripts 2020-07-25 12:01:45 +08:00
yangyongjie 28b9074e9b add warpctc GPU 2020-07-25 11:53:57 +08:00
mindspore-ci-bot 63442d563f !3402 [AutoParallel]Fix autoparallel gpu bug
Merge pull request !3402 from lichen/fix_autoparallel_gpu_bug
2020-07-25 11:40:26 +08:00
islam_amin b0e83c5a06 Fixing AutoContrast/Equalize Bug 2020-07-25 11:30:34 +08:00
kswang 9f5315fc80 fix cpu nonop net fp16 error 2020-07-25 11:26:39 +08:00
panfengfeng 4e7cb1a7a4 fix get daataset size error 2020-07-25 11:20:24 +08:00
jiangzhiwen d408cdf0e0 init column_default_list_ when it is empty 2020-07-25 11:04:07 +08:00
mindspore-ci-bot c5e6cfebe7 !3436 fix mix precision operator issue
Merge pull request !3436 from wangqiuliang/fix-mix-precision-r0.6
2020-07-25 10:50:42 +08:00
cristoval aac2275d1b support graceful shutdown for ps components 2020-07-25 10:08:27 +08:00
mindspore-ci-bot 70aee2fe7a !3401 cpp client example
Merge pull request !3401 from hexia/cpp_client_example_r0.6
2020-07-25 09:57:16 +08:00
xulei2020 c43bc92d7c add code 2020-07-25 09:39:23 +08:00
kingfo 5916da1763 fix mix precision operator issue 2020-07-25 09:30:29 +08:00
mindspore-ci-bot 50e20e4042 !3443 Restore the code to collect the graph network
Merge pull request !3443 from LiHongzhang/oh_graph_r
2020-07-24 22:05:46 +08:00
Li Hongzhang 2373e94384 restore the ability to collect network graph 2020-07-24 21:01:28 +08:00
mindspore-ci-bot cda920b21b !3432 add single quotes and modify parameters
Merge pull request !3432 from lijiaqi/add_single_quotes_and_others
2020-07-24 20:30:07 +08:00
mindspore-ci-bot af4b4fb36d !3417 fix bug of group lr when save ckpt
Merge pull request !3417 from wangnan39/fix_bug_of_group_lr_when_save_ckpt
2020-07-24 20:17:54 +08:00
mindspore-ci-bot 927a52fdf8 !3388 Transfer tuple getitem's control to new added memcpy_async
Merge pull request !3388 from huanghui/r0.6
2020-07-24 20:14:56 +08:00
mindspore-ci-bot 0f8c4d6794 !3428 modify annotation: wegith_decay modify weight_decay
Merge pull request !3428 from lilei/modify_weight_decay_annotation
2020-07-24 20:06:43 +08:00
李嘉琪 8feb9450f2 add single quotes and modify parameters 2020-07-24 16:41:25 +08:00
lilei f304fe9614 modify weight_decay annotation 2020-07-24 16:10:07 +08:00
mindspore-ci-bot e62137f7c0 !3406 fix optimizer parallel problems
Merge pull request !3406 from gziyan/fix_optimizer_parallel_r0.6
2020-07-24 15:11:19 +08:00
mindspore-ci-bot c005dfd803 !3389 merge sparse hot fix to r0.6
Merge pull request !3389 from riemann_penn/merger_sparse_hot_fix_to_r0.6
2020-07-24 14:43:46 +08:00
mindspore-ci-bot a051d7c5dc !3410 [bug][ci]fix bug when remove the phis
Merge pull request !3410 from vlne-v1/fix_redundant_phi-r0.6
2020-07-24 14:21:57 +08:00
wangnan39@huawei.com 3c93ff3385 fix_bug_of_group_lr_when_save_ckpt 2020-07-24 13:40:14 +08:00
Wei Luning 43d4f80428 fix bug in remove phiphi should replace the inner ones first 2020-07-24 12:39:58 +08:00
Ziyan 9f264b6e55 fix optimizer parallel problems 2020-07-24 11:36:11 +08:00
panyifeng 2cebc62bbf fix sparse related issues 2020-07-24 11:21:29 +08:00
mindspore-ci-bot f9aec99c01 !3379 modify the vgg16/lstm path to offical/{cv/nlp}
Merge pull request !3379 from caojian05/ms_master_dev
2020-07-24 11:08:07 +08:00
CaoJian 80a655099a modify the vgg16/lstm path to offical/{cv/nlp} 2020-07-24 10:19:47 +08:00
hexia f14974392c cpp_client_example_r0.6 2020-07-24 10:04:20 +08:00
huanghui 3901c0414f deal tuple getitem control for new added memcpy 2020-07-24 09:54:23 +08:00
lichenever 12738ceda7 fix auto parallel gpu bug 2020-07-24 09:40:33 +08:00
mindspore-ci-bot fe0348b3d7 !3380 Fix visit depend node
Merge pull request !3380 from lianliguang/r0.6
2020-07-24 09:03:06 +08:00
mindspore-ci-bot 93ce266ae5 !3373 support call super when class define in test_case
Merge pull request !3373 from zhangbuxue/support_call_super_when_class_define_in_test_case_
2020-07-23 21:20:11 +08:00
WilliamLian 35b466f8f7 fix visit depend node 2020-07-23 20:23:51 +08:00
buxue 15487759ff support call super when class define in test_case. 2020-07-23 18:30:00 +08:00
mindspore-ci-bot 251fba00f5 !3363 fix cloner when funcgraph return is null
Merge pull request !3363 from leopz/fix_clone
2020-07-23 17:27:07 +08:00
mindspore-ci-bot 984be47299 !3365 restructure client example
Merge pull request !3365 from hexia/restructure_client_example_r0.6
2020-07-23 16:47:35 +08:00
mindspore-ci-bot 45d8a9eea3 !3354 improve performance of bert by adding order paramters
Merge pull request !3354 from shibeiji/r0.6
2020-07-23 16:19:01 +08:00
mindspore-ci-bot 5cdfbf0e82 !3359 fix cpu nonop net
Merge pull request !3359 from kisnwang/r0.6-cpu-support-nonop-net
2020-07-23 16:01:37 +08:00
hexia 9daa8a890b restructure client example 2020-07-23 14:54:06 +08:00
leopz 61bf0c5d99 fix cloner when funcgraph is null 2020-07-23 12:06:56 +08:00
mindspore-ci-bot 27982ebbe8 !3347 Fix internal multiple outputs check
Merge pull request !3347 from YuJianfeng/r0.6
2020-07-23 11:49:17 +08:00
kswang 926120ef95 cpu support nonop net 2020-07-23 11:48:48 +08:00
shibeiji 1ae2d2d6c8 add order params for bert to improve performance 2020-07-23 11:11:43 +08:00
yujianfeng 16035dc62c Fix internal multiple outputs check 2020-07-23 09:11:15 +08:00
368 changed files with 17385 additions and 2712 deletions

View File

@ -106,6 +106,7 @@ endif() # NOT ENABLE_ACL
if (ENABLE_SERVING)
add_subdirectory(serving)
add_subdirectory(serving/example/cpp_client)
endif()
if (NOT ENABLE_ACL)

View File

@ -75,7 +75,7 @@ For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an ex
1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package.
```
pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.5.0-beta/MindSpore/cpu/ubuntu_x86/mindspore-0.5.0-cp37-cp37m-linux_x86_64.whl
pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.6.0-beta/MindSpore/cpu/ubuntu_x86/mindspore-0.6.0-cp37-cp37m-linux_x86_64.whl
```
2. Run the following command to verify the install.

View File

@ -1,3 +1,76 @@
# Release 0.6.0-beta
## Major Features and Improvements
### Ascend 910 Training and Inference Framework
* New models
* There are official, research and community under modelzoo.
* Official is maintained with the newest APIs by MindSpore team, MaskRCNN are added.
* Research is uploaded by researchers for official review, and APIs may not be updated in time.
* Community reprints the relevant links of partner research results.
* Hub added on the same level as modelzoo, synchronous storage of materials needed for official hub web pages which will be launched soon.
* Support pre-trained models, few lines of code can be used to download and load pre-trained models, supporting inference or transfer learning.
* Frontend and user interface
* Supports user side operator compilation and graph execution error rendering.
* Uniform definition dynamic learning rate behavior in optimizers.
* Support IndexSlice in sparse expression.
* Support use parent construct method during construct.
* Support asynchronous execution save checkpoint file.
* Support implicit type conversion in pynative mode.
* User interfaces change log
* unform learning rate behavior in optimizers([!2755](https://gitee.com/mindspore/mindspore/pulls/2755))
* rename operator of sparse optimizer([!3217](https://gitee.com/mindspore/mindspore/pulls/3217))
* move profiler module from mindinsight to mindspore([!3075](https://gitee.com/mindspore/mindspore/pulls/3075))
* VOCDataset output change to multi-columns([!3093](https://gitee.com/mindspore/mindspore/pulls/3093))
* GetDatasize feature([!3212](https://gitee.com/mindspore/mindspore/pulls/3212))
* dataset: modify config api([!2936](https://gitee.com/mindspore/mindspore/pulls/2936))
* Executor and performance optimization
* Decouple C++ and python, so make the architecture more extensible.
* Parameter Server for distributed deep learning supported.
* Servinga flexible service deployment framework for deep learning models.
* Memory reuse is enhanced, and the batch size of Bert large model is increased from 96 to 160 on a single server.
* Data processing, augmentation, and save format
* Support MindRecord save operator after date processing
* Support automatic fusion operator, such as decode/resize/crop
* Support CSV dataset loading
### Other Hardware Support
* GPU platform
* New model supported: ResNext50, WarpCTC and GoogLeNet.
* Support hyperparametric search and data enhanced automl on GPU.
* Support Resnet50 automatic parallel in GPU backend.
## Bugfixes
* Models
* Improved the performance and accuracy on ResNet50([!3456](https://gitee.com/mindspore/mindspore/pulls/3456))
* Fixed the performance test case of bert([!3486](https://gitee.com/mindspore/mindspore/pulls/3486))
* Python API
* Fix assign used in while loop([!2720](https://gitee.com/mindspore/mindspore/pulls/2720))
* Revert optimize the graph output of all nop node.([!2857](https://gitee.com/mindspore/mindspore/pulls/2857))
* Print tensor as numpy.([!2859](https://gitee.com/mindspore/mindspore/pulls/2859))
* Support weight decay for sparse optimizer([!2668](https://gitee.com/mindspore/mindspore/pulls/2668))
* Fix BatchToSpaceND([!2741](https://gitee.com/mindspore/mindspore/pulls/2741))
* Fixing type check mistakes of InplaceAdd and Inplace Sub ops([!2744](https://gitee.com/mindspore/mindspore/pulls/2744]))
* Change order param only equal to group param([!2748](https://gitee.com/mindspore/mindspore/pulls/2748))
* Executor
* The performance of graph whith control flow is optimized([!2931](https://gitee.com/mindspore/mindspore/pulls/2931))
* Fix bug of wrong number of tuple layers([!3390](https://gitee.com/mindspore/mindspore/pulls/3390))
* Fix cpu multi graph memory exception([!3631](https://gitee.com/mindspore/mindspore/pulls/3631))
* Enable data sync when calling operator without defining a cell([!3081](https://gitee.com/mindspore/mindspore/pulls/3081))
* Fix argmaxwith value error in pynative mode on GPU([!3082](https://gitee.com/mindspore/mindspore/pulls/3082))
* Fix precision error with fp16 input on pynative mode([!3196](https://gitee.com/mindspore/mindspore/pulls/3196))
* Data processing
* Fix bug of RandomColor and RandomSharpness default parameter checking ([!2833](https://gitee.com/mindspore/mindspore/pulls/2833))
* Fix process hung when training and eval ([!3469](https://gitee.com/mindspore/mindspore/pulls/3469))
* Third party
* Sqlite : Update sqlite to 3.32.2 to handle [CVE-2020-11656](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11656), [CVE-2020-13871](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13871), [CVE-2020-11655](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655), [CVE-2020-9327](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-9327), [CVE-2020-13630](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13630), [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15358), [CVE-2020-13631](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13631), [CVE-2020-13632](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13632), [CVE-2020-13434](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13434), [CVE-2020-13435](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13435), and [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655).
* Libjpeg-turbo : Update libjpeg-turbo to 2.0.4 to handle [CVE-2020-13790](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13790).
## Contributors
Thanks goes to these wonderful people:
Alexey Shevlyakov, avakh, baihuawei, BowenK, buxue, caifubi, caojian05, Cathy Wong, changzherui, chenfei, chengxianbin, chenhaozhe, chenjianping, chentingting, chenzomi, chujinjin, Danish Farid, dayschan, dengwentao, dinghao, etone-chan, fangzehua, fary86, geekun, Giancarlo Colmenares, gong chen, gukecai, guohongzilong, hangangqiang, heleiwang, hesham, He Wei, hexia, hongxing, huangdongrun, huanghui, islam_amin, Jamie Nisbet, Jesse Lee, jiangjinsheng, jiangzhiwen, jinyaohui, jjfeing, jojobugfree, Jonathan Yan, jonyguo, Junhan Hu, Kang, kingfo, kouzhenzhong, kpy, kswang, laiyongqiang, leopz, liangzelang, lichenever, lihongkang, Li Hongzhang, lilei, limingqi107, lirongzhen1, liubuyu, liuchongming74, liuwenhao4, liuxiao, Lixia Chen, liyanliu, liyong, lizhenyu, lvliang, Mahdi, Margaret_wangrui, meixiaowei, ms_yan, nhussain, ougongchang, panfengfeng, panyifeng, peilinwang, Peilin Wang, pkuliuliu, qianlong, rick_sanchez, shibeiji, Shida He, shijianning, simson, sunsuodong, suteng, Tinazhang, Tron Zhang, unknown, VectorSL, wandongdong, wangcong, wangdongxu, wangdongxu6, wanghua, wangnan39, Wei Luning, wenchunjiang, wenkai, wilfChen, WilliamLian, wukesong, Xian Weizhao, Xiaoda Zhang, xiefangqi, xulei2020, xunxue, xutianchun, Yang, yanghaitao, yanghaitao1, yanghaoran, yangjie, yangjie159, YangLuo, Yanjun Peng, yankai, yanzhenxiang2020, yao_yf, Yi Huaijie, yoonlee666, yuchaojie, yujianfeng, zhangzhongpeng, zhangdengcheng, Zhang Qinghua, zhangyinxia, zhangz0911gm, zhaojichen, zhaoting, zhaozhenlong, zhoufeng, zhouneng, zhousiyi, Zirui Wu, Ziyan, zjun, ZPaC, lihongzhang, wangdongxu
Contributions of any kind are welcome!
# Release 0.5.0-beta
## Major Features and Improvements
@ -62,6 +135,8 @@
* Fix bug of Cifar dataset reading([!2096](https://gitee.com/mindspore/mindspore/pulls/2096))
* Fix bug of C++ behavior in RandomCropAndResize([!2026](https://gitee.com/mindspore/mindspore/pulls/2026))
* Fix the bug of mindrecord shuffle([!2420](https://gitee.com/mindspore/mindspore/pulls/2420))
* Third party
* Sqlite : Update sqlite to 3.32.2 to handle [CVE-2020-11656](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11656), [CVE-2020-13871](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13871), [CVE-2020-11655](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655), [CVE-2020-9327](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-9327), [CVE-2020-13630](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13630), [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15358), [CVE-2020-13631](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13631), [CVE-2020-13632](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13632), [CVE-2020-13434](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13434), [CVE-2020-13435](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13435), and [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655).
## Contributors
Thanks goes to these wonderful people:
@ -134,7 +209,7 @@ Contributions of any kind are welcome!
* Fix sens shape error of TrainOneStepWithLossScaleCell([!1050](https://gitee.com/mindspore/mindspore/pulls/1050))
* Fix BatchNormGrad operator([!1344](https://gitee.com/mindspore/mindspore/pulls/1344))
* Executor
* Fix dropouttopK and addn errors in PyNative mode ([!1285](https://gitee.com/mindspore/mindspore/pulls/1285), [!1138](https://gitee.com/mindspore/mindspore/pulls/1138), [!1033](https://gitee.com/mindspore/mindspore/pulls/1033)).
* Fix dropout, topK and addn errors in PyNative mode ([!1285](https://gitee.com/mindspore/mindspore/pulls/1285), [!1138](https://gitee.com/mindspore/mindspore/pulls/1138), [!1033](https://gitee.com/mindspore/mindspore/pulls/1033)).
* Fix memory leaks after execution in PyNatvie mode ([!1201](https://gitee.com/mindspore/mindspore/pulls/1201)).
* Fix HCCL failure in some special scenes ([!1204](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1204), [!1252](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1252)).
* Fix SSD network when Select failed, cann't find kernel info([!1449](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1449)).

2
akg

@ -1 +1 @@
Subproject commit f60af9df4220bf3db5de2b224418953c0dc1f625
Subproject commit 5c0e3d2ffb6ba7650453c3b11163237a43d206d6

View File

@ -491,9 +491,9 @@ build_predict()
cd "${BASEPATH}/predict/output/"
if [[ "$PREDICT_PLATFORM" == "x86_64" ]]; then
tar -cf MSPredict-0.5.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
tar -cf MSPredict-0.6.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
elif [[ "$PREDICT_PLATFORM" == "arm64" ]]; then
tar -cf MSPredict-0.5.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
tar -cf MSPredict-0.6.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
fi
echo "success to build predict project!"
}

View File

@ -8,7 +8,7 @@ else()
VER 67.1
LIBS ${LIB_ICU_COMMON} ${LIB_ICU_DATA} ${LIB_ICU_I18N}
URL https://github.com/unicode-org/icu/archive/release-67-1.tar.gz
MD5 0c2662a2b0bc80b0eb56495205247c8f
MD5 fd525fb47d8827b0b7da78b51dd2d93f
CONFIGURE_COMMAND ${CMAKE_SOURCE_DIR}/scripts/build_icu4c.sh
)
include_directories(${icu4c_INC})

View File

@ -12,6 +12,7 @@ mindspore_add_pkg(jpeg_turbo
URL https://github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.4.tar.gz
MD5 44c43e4a9fb352f47090804529317c88
CMAKE_OPTION -DCMAKE_BUILD_TYPE=Release -DCMAKE_SKIP_RPATH=TRUE
PATCHES ${CMAKE_SOURCE_DIR}/third_party/patch/jpeg_turbo/jpeg_turbo.patch001
)
include_directories(${jpeg_turbo_INC})
add_library(mindspore::jpeg_turbo ALIAS jpeg_turbo::jpeg)

View File

@ -278,6 +278,13 @@ if (ENABLE_SERVING)
COMPONENT mindspore
)
file(GLOB MS_SERVING_PY_LIST ${CMAKE_SOURCE_DIR}/serving/*.py)
install(
FILES ${MS_SERVING_PY_LIST}
DESTINATION ${INSTALL_PY_DIR}
COMPONENT mindspore
)
install(
TARGETS inference
DESTINATION ${INSTALL_LIB_DIR}

View File

@ -0,0 +1,67 @@
FROM ubuntu:18.04
MAINTAINER leonwanghui <leon.wanghui@huawei.com>
# Set env
ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
ENV PATH /usr/local/bin:$PATH
# Install base tools
RUN apt update \
&& DEBIAN_FRONTEND=noninteractive apt install -y \
vim \
wget \
curl \
xz-utils \
net-tools \
openssh-client \
git \
ntpdate \
tzdata \
tcl \
sudo \
bash-completion
# Install compile tools
RUN DEBIAN_FRONTEND=noninteractive apt install -y \
gcc \
g++ \
zlibc \
make \
libgmp-dev \
patch \
autoconf \
libtool \
automake \
flex
# Set bash
RUN echo "dash dash/sh boolean false" | debconf-set-selections
RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
# Install python (v3.7.5)
RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
&& cd /tmp \
&& wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
&& tar -xvf v3.7.5.tar.gz \
&& cd /tmp/cpython-3.7.5 \
&& mkdir -p ${PYTHON_ROOT_PATH} \
&& ./configure --prefix=${PYTHON_ROOT_PATH} \
&& make -j4 \
&& make install -j4 \
&& rm -f /usr/local/bin/python \
&& rm -f /usr/local/bin/pip \
&& ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
&& ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
&& rm -rf /tmp/cpython-3.7.5 \
&& rm -f /tmp/v3.7.5.tar.gz
# Set pip source
RUN mkdir -pv /root/.pip \
&& echo "[global]" > /root/.pip/pip.conf \
&& echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
&& echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
# Install MindSpore cpu whl package
RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.6.0-beta/MindSpore/cpu/ubuntu_x86/mindspore-0.6.0-cp37-cp37m-linux_x86_64.whl

View File

@ -0,0 +1,83 @@
FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
MAINTAINER leonwanghui <leon.wanghui@huawei.com>
# Set env
ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
ENV OMPI_ROOT_PATH /usr/local/openmpi-3.1.5
ENV PATH ${OMPI_ROOT_PATH}/bin:/usr/local/bin:$PATH
ENV LD_LIBRARY_PATH ${OMPI_ROOT_PATH}/lib:$LD_LIBRARY_PATH
# Install base tools
RUN apt update \
&& DEBIAN_FRONTEND=noninteractive apt install -y \
vim \
wget \
curl \
xz-utils \
net-tools \
openssh-client \
git \
ntpdate \
tzdata \
tcl \
sudo \
bash-completion
# Install compile tools
RUN DEBIAN_FRONTEND=noninteractive apt install -y \
gcc \
g++ \
zlibc \
make \
libgmp-dev \
patch \
autoconf \
libtool \
automake \
flex \
libnccl2=2.4.8-1+cuda10.1 \
libnccl-dev=2.4.8-1+cuda10.1
# Set bash
RUN echo "dash dash/sh boolean false" | debconf-set-selections
RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
# Install python (v3.7.5)
RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
&& cd /tmp \
&& wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
&& tar -xvf v3.7.5.tar.gz \
&& cd /tmp/cpython-3.7.5 \
&& mkdir -p ${PYTHON_ROOT_PATH} \
&& ./configure --prefix=${PYTHON_ROOT_PATH} \
&& make -j4 \
&& make install -j4 \
&& rm -f /usr/local/bin/python \
&& rm -f /usr/local/bin/pip \
&& ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
&& ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
&& rm -rf /tmp/cpython-3.7.5 \
&& rm -f /tmp/v3.7.5.tar.gz
# Set pip source
RUN mkdir -pv /root/.pip \
&& echo "[global]" > /root/.pip/pip.conf \
&& echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
&& echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
# Install openmpi (v3.1.5)
RUN cd /tmp \
&& wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.5.tar.gz \
&& tar -xvf openmpi-3.1.5.tar.gz \
&& cd /tmp/openmpi-3.1.5 \
&& mkdir -p ${OMPI_ROOT_PATH} \
&& ./configure --prefix=${OMPI_ROOT_PATH} \
&& make -j4 \
&& make install -j4 \
&& rm -rf /tmp/openmpi-3.1.5 \
&& rm -f /tmp/openmpi-3.1.5.tar.gz
# Install MindSpore cuda-10.1 whl package
RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.6.0-beta/MindSpore/gpu/ubuntu_x86/cuda-10.1/mindspore_gpu-0.6.0-cp37-cp37m-linux_x86_64.whl

@ -1 +1 @@
Subproject commit 103f2d1019dc50d781d7a964551d9f1f50b3b009
Subproject commit 885af56694eff438a4ea079c0c34de30993f1473

View File

@ -14,7 +14,10 @@
# ============================================================================
"""builtin_operations"""
import numpy as np
from mindspore.ops import functional as F
from mindspore.ops import composite as C
from mindspore.common.tensor import Tensor
import mindspore.common.dtype as mstype
from mindspore.common.dtype import dtype_to_nptype, get_py_obj_dtype
@ -113,6 +116,7 @@ def bool_or(x, y):
"""Implement `bool_or`."""
return x or y
def vm_compare(*args):
"""Implement `vm_compare` for tensor."""
obj_str = args[-1]
@ -141,10 +145,12 @@ def list_len(x):
"""Implement `list_len`."""
return len(x)
def Depend(value, expr):
"""Implement `Depend`."""
return value
# only used in PyNative mode
def make_ref(key, value, ref):
return value
@ -171,3 +177,16 @@ def tuple_to_array(x):
def stop_gradient(x):
"""Implement `stop_gradient`."""
return x
hyper_map = C.HyperMap()
def mixed_precision_cast(dst_type, x):
"""Implement `mixed_precision_cast`."""
def cast_inner(data):
if isinstance(data, Tensor) and data.dtype in (mstype.float32, mstype.float16):
return F.cast(data, dst_type)
return data
return hyper_map(cast_inner, x)

View File

@ -459,27 +459,27 @@ class Parser:
logger.debug("ops info = %r", ops_info)
return ops_info
def analyze_super(self, father_class_node, subclass_instance):
def analyze_super(self, class_type_node, subclass_instance):
"""Analyze super and return a class instance."""
father_class = None
if father_class_node is None:
father_class = type(subclass_instance)
if isinstance(father_class_node, ast.Name):
father_class_name = getattr(father_class_node, 'id')
father_class = self.global_namespace[father_class_name]
if isinstance(father_class_node, ast.Attribute):
value = getattr(father_class_node, 'value')
attr = getattr(father_class_node, 'attr')
module_name = getattr(value, 'id')
father_class_module = self.global_namespace[module_name]
father_class = getattr(father_class_module, attr)
if father_class is None:
raise ValueError("When call 'super', the father class is None.")
if not isinstance(subclass_instance, father_class):
raise ValueError("When call 'super', the second arg should be an instance of first arg.")
sub_class = type(subclass_instance)
if class_type_node is None:
return super(sub_class, subclass_instance)
if isinstance(class_type_node, ast.Name):
class_name = getattr(class_type_node, 'id')
elif isinstance(class_type_node, ast.Attribute):
class_name = getattr(class_type_node, 'attr')
else:
raise ValueError(f"When call 'super', the first arg should be a class type, "
f"but got {class_type_node.__class__.__name__}.")
target_class_instance = super(father_class, subclass_instance)
return target_class_instance
target_father_class = None
for class_element in sub_class.mro():
if class_element.__name__ == class_name:
target_father_class = class_element
break
if target_father_class is None:
raise ValueError("When call 'super', the second arg should be an instance of first arg.")
return super(target_father_class, subclass_instance)
def get_location(self, node):
"""

View File

@ -132,7 +132,9 @@ def while_cond(x):
@constexpr
def check_type_same(x_type, base_type):
"""Check x_type is same as base_type."""
return mstype.issubclass_(x_type, base_type)
if mstype.issubclass_(x_type, base_type):
return True
raise TypeError(f"The arg 'x' should be a {base_type}, but got {x_type}.")
@constexpr

View File

@ -31,8 +31,9 @@ class PServerKernel {
~PServerKernel() = default;
PServerKernel(const PServerKernel &) = delete;
PServerKernel &operator=(const PServerKernel &) = delete;
virtual void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) {}
virtual void InitKernel(const CNodePtr &cnode,
const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) {}
virtual void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) {}
virtual bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) = 0;

View File

@ -33,8 +33,9 @@ class PullKernel : public CPUKernel {
~PullKernel() override = default;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, const std::vector<AddressPtr> &) {
// If the paramter is embedding table, don't Pull from PServer.
if (param_name_.find("embedding") == std::string::npos && param_name_.find("wide_w") == std::string::npos) {
bool init_in_server = mindspore::parallel::ps::Worker<float>::GetInstance().GetParamInitInServer(param_name_);
// If init_in_server, forward kernel should run in server too.
if (!init_in_server) {
parallel::ps::Worker<T>::GetInstance().Pull(key_, inputs[1]->addr, inputs[1]->size);
}
return true;

View File

@ -43,7 +43,10 @@ class PushKernel : public CPUKernel {
sizes.push_back(SizeToInt(input->size) / sizeof(T));
}
parallel::ps::Worker<T>::GetInstance().Push(keys, addrs, sizes);
memcpy_s(outputs[0]->addr, sizeof(size_t), &key_, sizeof(size_t));
auto ret = memcpy_s(outputs[0]->addr, sizeof(size_t), &key_, sizeof(size_t));
if (ret != EOK) {
MS_LOG(EXCEPTION) << "Lookup id memcpy failed.";
}
return true;
}

View File

@ -23,7 +23,7 @@ namespace mindspore {
namespace kernel {
namespace ps {
void SparseApplyAdamPSKernel::InitKernel(
const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
std::vector<size_t> &var_shape = *(shape_vec[0]);
std::vector<size_t> &m_shape = *(shape_vec[1]);
@ -55,11 +55,9 @@ void SparseApplyAdamPSKernel::InitKernel(
if (grad_shape[0] != indices_size_) {
MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices";
}
/*
if (AnfAlgo::HasNodeAttr(USE_NESTEROV, kernel_node)) {
use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "use_nesterov");
if (AnfAlgo::HasNodeAttr(USE_NESTEROV, cnode)) {
use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(cnode, "use_nesterov");
}
*/
workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));

View File

@ -30,7 +30,8 @@ class SparseApplyAdamPSKernel : public SparseApplyAdamCPUKernel, public PServerK
SparseApplyAdamPSKernel(size_t rank_id, size_t pserver_num) : PServerKernel(rank_id, pserver_num) {}
~SparseApplyAdamPSKernel() override = default;
void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
void InitKernel(const CNodePtr &cnode,
const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

View File

@ -20,7 +20,7 @@ namespace mindspore {
namespace kernel {
namespace ps {
void SparseApplyFtrlPSKernel::InitKernel(
const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
std::vector<size_t> var_shape = *(shape_vec[0]);
std::vector<size_t> accum_shape = *(shape_vec[1]);
@ -46,10 +46,22 @@ void SparseApplyFtrlPSKernel::InitKernel(
if (grad_shape[0] != indices_size_) {
MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices";
}
lr_ = 0.01;
l1_ = 1e-8;
l2_ = 1e-8;
lr_power_ = -0.5;
lr_ = AnfAlgo::GetNodeAttr<float>(cnode, "lr");
if (lr_ <= 0) {
MS_LOG(EXCEPTION) << "lr should be a positive scalar";
}
l1_ = AnfAlgo::GetNodeAttr<float>(cnode, "l1");
if (l1_ < 0) {
MS_LOG(EXCEPTION) << "l1 should be a non-negative scalar";
}
l2_ = AnfAlgo::GetNodeAttr<float>(cnode, "l2");
if (l2_ < 0) {
MS_LOG(EXCEPTION) << "l2 should be a non-negative scalar";
}
lr_power_ = AnfAlgo::GetNodeAttr<float>(cnode, "lr_power");
if (lr_power_ > 0) {
MS_LOG(EXCEPTION) << "lr_power should be a non-positive scalar";
}
workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));

View File

@ -30,7 +30,8 @@ class SparseApplyFtrlPSKernel : public SparseApplyFtrlCPUKernel, public PServerK
SparseApplyFtrlPSKernel(size_t rank_id, size_t pserver_num) : PServerKernel(rank_id, pserver_num) {}
~SparseApplyFtrlPSKernel() override = default;
void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
void InitKernel(const CNodePtr &cnode,
const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,

View File

@ -23,7 +23,7 @@ namespace mindspore {
namespace kernel {
namespace ps {
void SparseApplyLazyAdamPSKernel::InitKernel(
const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
std::vector<size_t> &var_shape = *(shape_vec[0]);
std::vector<size_t> &m_shape = *(shape_vec[1]);
@ -55,11 +55,9 @@ void SparseApplyLazyAdamPSKernel::InitKernel(
if (grad_shape[0] != indices_size_) {
MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices";
}
/*
if (AnfAlgo::HasNodeAttr(USE_NESTEROV, kernel_node)) {
use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "use_nesterov");
if (AnfAlgo::HasNodeAttr(USE_NESTEROV, cnode)) {
use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(cnode, "use_nesterov");
}
*/
workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
workspace_size_list_.emplace_back(indices_size_ * sizeof(int));
workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));

View File

@ -30,7 +30,8 @@ class SparseApplyLazyAdamPSKernel : public SparseApplyLazyAdamCPUKernel, public
SparseApplyLazyAdamPSKernel(size_t rank_id, size_t pserver_num) : PServerKernel(rank_id, pserver_num) {}
~SparseApplyLazyAdamPSKernel() override = default;
void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
void InitKernel(const CNodePtr &cnode,
const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
void ReInit(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &) override;
bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

View File

@ -0,0 +1,226 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include "maxpool_with_argmax_grad_impl.cuh"
#include "runtime/device/gpu/cuda_common.h"
#include "include/cuda_fp16.h"
template <typename T, typename S>
__global__ void MaxPoolWithArgmaxGrad(const T* x,
const T* dy,
const S* index,
const int n,
const int c,
const int xHeight,
const int xWidth,
const int dyHeight,
const int dyWidth,
const int windowHeight,
const int windowWidth,
const int strideHeight,
const int strideWidth,
const int padTop,
const int padLeft,
const int xNCHW,
const int xCHW,
const int xHW,
const int dyCHW,
const int dyHW,
T* dx) {
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x;
pos < (xNCHW);
pos += blockDim.x * gridDim.x) {
const int posn = pos / xCHW;
const int posc = pos / xHW % c;
const int posh = pos / xHeight % xHeight;
const int posw = pos % xWidth;
const S posIdx = posh*xWidth + posw;
int hstart = posh+padTop;
if (hstart < windowHeight) {
hstart = 0;
} else {
hstart = (hstart-windowHeight)/strideHeight + 1;
}
int wstart = posw+padLeft;
if (wstart < windowWidth) {
wstart = 0;
} else {
wstart = (wstart-windowWidth)/strideWidth + 1;
}
const int hend = min((posh+padTop)/strideHeight +1, dyHeight);
const int wend = min((posw+padLeft)/strideWidth +1, dyWidth);
const int channelStart = posn*dyCHW + posc*dyHW;
T dySum = static_cast<T>(0.0);
for (int hcur = hstart; hcur < hend; ++hcur) {
for (int wcur = wstart; wcur < wend; ++wcur) {
const int curIdx = hcur*dyWidth + wcur;
S maxIdx = index[channelStart+curIdx];
if (maxIdx == posIdx) {
dySum += dy[channelStart+curIdx];
}
}
}
dx[pos] = dySum;
}
return;
}
template <>
__global__ void MaxPoolWithArgmaxGrad(const half* x,
const half* dy,
const int* index,
const int n,
const int c,
const int xHeight,
const int xWidth,
const int dyHeight,
const int dyWidth,
const int windowHeight,
const int windowWidth,
const int strideHeight,
const int strideWidth,
const int padTop,
const int padLeft,
const int xNCHW,
const int xCHW,
const int xHW,
const int dyCHW,
const int dyHW,
half* dx) {
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x;
pos < (xNCHW);
pos += blockDim.x * gridDim.x) {
const int posn = pos / xCHW;
const int posc = pos / xHW % c;
const int posh = pos / xHeight % xHeight;
const int posw = pos % xWidth;
const int posIdx = posh*xWidth + posw;
int hstart = posh+padTop;
if (hstart < windowHeight) {
hstart = 0;
} else {
hstart = (hstart-windowHeight)/strideHeight + 1;
}
int wstart = posw+padLeft;
if (wstart < windowWidth) {
wstart = 0;
} else {
wstart = (wstart-windowWidth)/strideWidth + 1;
}
const int hend = min((posh+padTop)/strideHeight +1, dyHeight);
const int wend = min((posw+padLeft)/strideWidth +1, dyWidth);
const int channelStart = posn*dyCHW + posc*dyHW;
float dySum = 0.0f;
for (int hcur = hstart; hcur < hend; ++hcur) {
for (int wcur = wstart; wcur < wend; ++wcur) {
const int curIdx = hcur*dyWidth + wcur;
int maxIdx = index[channelStart+curIdx];
if (maxIdx == posIdx) {
dySum += __half2float(dy[channelStart+curIdx]);
}
}
}
dx[pos] = __float2half(dySum);
}
return;
}
template <typename T, typename S>
void CalMaxPoolWithArgmaxGrad(const T* x,
const T* dy,
const S* index,
const int n,
const int c,
const int xHeight,
const int xWidth,
const int dyHeight,
const int dyWidth,
const int windowHeight,
const int windowWidth,
const int strideHeight,
const int strideWidth,
const int padTop,
const int padLeft,
T* dx,
cudaStream_t cuda_stream) {
const int xHW = xHeight*xWidth;
const int xCHW = c*xHW;
const int xNCHW = n*xCHW;
const int dyHW = dyHeight*dyWidth;
const int dyCHW = c*dyHW;
MaxPoolWithArgmaxGrad<<<GET_BLOCKS(xNCHW),
GET_THREADS,
0,
cuda_stream>>>(
x,
dy,
index,
n,
c,
xHeight,
xWidth,
dyHeight,
dyWidth,
windowHeight,
windowWidth,
strideHeight,
strideWidth,
padTop,
padLeft,
xNCHW,
xCHW,
xHW,
dyCHW,
dyHW,
dx);
return;
}
template void CalMaxPoolWithArgmaxGrad<float, int>(const float* x,
const float* dy,
const int* index,
const int n,
const int c,
const int xHeight,
const int xWidth,
const int dyHeight,
const int dyWidth,
const int windowHeight,
const int windowWidth,
const int strideHeight,
const int strideWidth,
const int padTop,
const int padLeft,
float* dx,
cudaStream_t cuda_stream);
template void CalMaxPoolWithArgmaxGrad<half, int>(const half* x,
const half* dy,
const int* index,
const int n,
const int c,
const int xHeight,
const int xWidth,
const int dyHeight,
const int dyWidth,
const int windowHeight,
const int windowWidth,
const int strideHeight,
const int strideWidth,
const int padTop,
const int padLeft,
half* dx,
cudaStream_t cuda_stream);

View File

@ -0,0 +1,25 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_MAXPOOLWITHARGMAX_GRAD_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_MAXPOOLWITHARGMAX_GRAD_H_
template <typename T, typename S>
void CalMaxPoolWithArgmaxGrad(const T* x, const T* dy, const S* index, const int n, const int c, const int xHeight,
const int xWidth, const int dyHeight, const int dyWidth, const int windowHeight,
const int windowWidth, const int strideHeight, const int strideWidth, const int padTop,
const int padLeft, T* dx, cudaStream_t cuda_stream);
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_MAXPOOLWITHARGMAX_GRAD_H_

View File

@ -0,0 +1,149 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include "maxpool_with_argmax_impl.cuh"
#include "runtime/device/gpu/cuda_common.h"
#include "include/cuda_fp16.h"
template <typename T, typename S>
__global__ void MaxPoolWithArgmax(const T* input,
const int n,
const int c,
const int h,
const int w,
const int windowHeight,
const int windowWidth,
const int strideHeight,
const int strideWidth,
const int padTop,
const int padLeft,
const int outputHeight,
const int outputWidth,
const int outputNCHW,
const int outputCHW,
const int outputHW,
T* output,
S *index) {
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x;
pos < (outputNCHW);
pos += blockDim.x * gridDim.x) {
const int posn = pos / outputCHW;
const int posc = pos / outputHW % c;
const int posh = pos / outputHeight % outputHeight;
const int posw = pos % outputWidth;
int hstart = posh * strideHeight - padTop;
int wstart = posw * strideWidth - padLeft;
const int hend = min(hstart + windowHeight, h);
const int wend = min(wstart + windowWidth, w);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
S inputStart = posn*c*h*w + posc*h*w;
S maxIdx = hstart*w + wstart;
T maxData = input[inputStart+maxIdx];
for (int hcur = hstart; hcur < hend; ++hcur) {
for (int wcur = wstart; wcur < wend; ++wcur) {
S inputIdx = hcur*w + wcur;
T inputData = input[inputStart+inputIdx];
if (inputData > maxData) {
maxIdx = inputIdx;
maxData = inputData;
}
}
}
output[pos] = maxData;
index[pos] = maxIdx;
}
return;
}
template <typename T, typename S>
void CalMaxPoolWithArgmax(const T* input,
const int n,
const int c,
const int h,
const int w,
const int windowHeight,
const int windowWidth,
const int strideHeight,
const int strideWidth,
const int padTop,
const int padLeft,
const int outputHeight,
const int outputWidth,
T* output,
S *index,
cudaStream_t cuda_stream) {
const int outputNCHW = n*c*outputHeight*outputWidth;
const int outputCHW = c*outputHeight*outputWidth;
const int outputHW = outputHeight*outputWidth;
MaxPoolWithArgmax<<<GET_BLOCKS(n*c*outputHeight*outputWidth),
GET_THREADS,
0,
cuda_stream>>>(
input,
n,
c,
h,
w,
windowHeight,
windowWidth,
strideHeight,
strideWidth,
padTop,
padLeft,
outputHeight,
outputWidth,
outputNCHW,
outputCHW,
outputHW,
output,
index);
return;
}
template void CalMaxPoolWithArgmax<float, int>(const float* input,
const int n,
const int c,
const int h,
const int w,
const int windowHeight,
const int windowWidth,
const int strideHeight,
const int strideWidth,
const int padTop,
const int padLeft,
const int outputHeight,
const int outputWidth,
float* output,
int* index,
cudaStream_t cuda_stream);
template void CalMaxPoolWithArgmax<half, int>(const half* input,
const int n,
const int c,
const int h,
const int w,
const int windowHeight,
const int windowWidth,
const int strideHeight,
const int strideWidth,
const int padTop,
const int padLeft,
const int outputHeight,
const int outputWidth,
half* output,
int* index,
cudaStream_t cuda_stream);

View File

@ -0,0 +1,25 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_MAXPOOLWITHARGMAX_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_MAXPOOLWITHARGMAX_H_
template <typename T, typename S>
void CalMaxPoolWithArgmax(const T* input, const int n, const int c, const int h, const int w, const int windowHeight,
const int windowWidth, const int strideHeight, const int strideWidth, const int padTop,
const int padLeft, const int outputHeight, const int outputWidth, T* output, S *index,
cudaStream_t cuda_stream);
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_MAXPOOLWITHARGMAX_H_

View File

@ -0,0 +1,30 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/nn/maxpool_with_argmax_gpu_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(
MaxPoolWithArgmax,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32),
MaxPoolWithArgmaxGpuFwdKernel, float, int)
MS_REG_GPU_KERNEL_TWO(
MaxPoolWithArgmax,
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeInt32),
MaxPoolWithArgmaxGpuFwdKernel, half, int)
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,160 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_MAXPOOLWITHARGMAX_GPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_MAXPOOLWITHARGMAX_GPU_KERNEL_H_
#include <algorithm>
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/maxpool_with_argmax_impl.cuh"
#include "backend/kernel_compiler/gpu/kernel_constants.h"
namespace mindspore {
namespace kernel {
template <typename T, typename S>
class MaxPoolWithArgmaxGpuFwdKernel : public GpuKernel {
public:
MaxPoolWithArgmaxGpuFwdKernel()
: n_(0),
c_(0),
input_height_(0),
input_width_(0),
window_height_(0),
window_width_(0),
pad_height_(0),
pad_width_(0),
pad_top_(0),
pad_left_(0),
stride_height_(0),
stride_width_(0),
output_height_(0),
output_width_(0),
input_size_(0),
output_size_(0) {}
~MaxPoolWithArgmaxGpuFwdKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
T *input_addr = GetDeviceAddress<T>(inputs, 0);
T *output_addr = GetDeviceAddress<T>(outputs, 0);
S *index_addr = GetDeviceAddress<S>(outputs, 1);
CalMaxPoolWithArgmax(input_addr, n_, c_, input_height_, input_width_, window_height_, window_width_, stride_height_,
stride_width_, pad_top_, pad_left_, output_height_, output_width_, output_addr, index_addr,
reinterpret_cast<cudaStream_t>(stream_ptr));
return true;
}
bool Init(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but MaxPoolWithArgmax needs 1 inputs.";
return false;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 2) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but MaxPoolWithArgmax needs 2 output.";
return false;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
input_size_ = sizeof(T);
for (auto x : input_shape) {
input_size_ *= x;
}
output_size_ = sizeof(T);
for (auto x : output_shape) {
output_size_ *= x;
}
n_ = SizeToInt(input_shape[0]);
c_ = SizeToInt(input_shape[1]);
input_height_ = SizeToInt(input_shape[2]);
input_width_ = SizeToInt(input_shape[3]);
output_height_ = SizeToInt(output_shape[2]);
output_width_ = SizeToInt(output_shape[3]);
auto window = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ksize"));
window_height_ = window[1];
window_width_ = window[2];
auto stride = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides"));
stride_height_ = stride[1];
stride_width_ = stride[2];
pad_mode_ = GetValue<std::string>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("padding"));
pad_top_ = 0;
pad_left_ = 0;
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) {
SetPad();
}
InitSizeLists();
return true;
}
protected:
void InitSizeLists() override {
input_size_list_.push_back(input_size_);
output_size_list_.push_back(output_size_);
output_size_list_.push_back(output_size_ / sizeof(T) * sizeof(S));
}
private:
void SetPad() {
pad_height_ = std::max<int>(
0, (((input_height_ / stride_height_) * stride_height_ == input_height_ ? (input_height_ / stride_height_)
: (input_height_ / stride_height_) + 1) -
1) *
stride_height_ +
window_height_ - input_height_);
pad_width_ = std::max<int>(
0, (((input_width_ / stride_width_) * stride_width_ == input_width_ ? (input_width_ / stride_width_)
: (input_width_ / stride_width_) + 1) -
1) *
stride_width_ +
window_width_ - input_width_);
pad_top_ = pad_height_ / 2;
pad_left_ = pad_width_ / 2;
}
std::string pad_mode_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
int n_;
int c_;
int input_height_;
int input_width_;
int window_height_;
int window_width_;
int pad_height_;
int pad_width_;
int pad_top_;
int pad_left_;
int stride_height_;
int stride_width_;
int output_height_;
int output_width_;
size_t input_size_;
size_t output_size_;
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_MAXPOOLWITHARGMAX_GPU_KERNEL_H_

View File

@ -0,0 +1,36 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/nn/maxpool_with_argmax_grad_gpu_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_TWO(MaxPoolGradWithArgmax,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
MaxPoolWithArgmaxGradGpuKernel, float, int)
MS_REG_GPU_KERNEL_TWO(MaxPoolGradWithArgmax,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat16),
MaxPoolWithArgmaxGradGpuKernel, half, int)
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,168 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_MAXPOOLWITHARGMAX_GRAD_GPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_MAXPOOLWITHARGMAX_GRAD_GPU_KERNEL_H_
#include <algorithm>
#include <vector>
#include <string>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/maxpool_with_argmax_grad_impl.cuh"
#include "backend/kernel_compiler/gpu/kernel_constants.h"
namespace mindspore {
namespace kernel {
template <typename T, typename S>
class MaxPoolWithArgmaxGradGpuKernel : public GpuKernel {
public:
MaxPoolWithArgmaxGradGpuKernel()
: n_(0),
c_(0),
x_height_(0),
x_width_(0),
dy_height_(0),
dy_width_(0),
x_size_(0),
dy_size_(0),
index_size_(0),
dx_size_(0) {}
~MaxPoolWithArgmaxGradGpuKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
T *x_addr = GetDeviceAddress<T>(inputs, 0);
T *dy_addr = GetDeviceAddress<T>(inputs, 1);
S *index_addr = GetDeviceAddress<S>(inputs, 2);
T *dx_addr = GetDeviceAddress<T>(outputs, 0);
CalMaxPoolWithArgmaxGrad(x_addr, dy_addr, index_addr, n_, c_, x_height_, x_width_, dy_height_, dy_width_,
window_height_, window_width_, stride_height_, stride_width_, pad_top_, pad_left_, dx_addr,
reinterpret_cast<cudaStream_t>(stream_ptr));
return true;
}
bool Init(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but MaxPoolGradWithArgmax needs 3 inputs.";
return false;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but MaxPoolGradWithArgmax needs 1 output.";
return false;
}
auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
auto dx_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
x_size_ = sizeof(T);
for (auto x : x_shape) {
x_size_ *= x;
}
dy_size_ = sizeof(T);
for (auto x : dy_shape) {
dy_size_ *= x;
}
index_size_ = sizeof(S);
for (auto x : index_shape) {
index_size_ *= x;
}
dx_size_ = sizeof(T);
for (auto x : dx_shape) {
dx_size_ *= x;
}
n_ = SizeToInt(x_shape[0]);
c_ = SizeToInt(x_shape[1]);
x_height_ = SizeToInt(x_shape[2]);
x_width_ = SizeToInt(x_shape[3]);
dy_height_ = SizeToInt(dy_shape[2]);
dy_width_ = SizeToInt(dy_shape[3]);
auto window = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ksize"));
window_height_ = window[1];
window_width_ = window[2];
auto stride = GetValue<std::vector<int>>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides"));
stride_height_ = stride[1];
stride_width_ = stride[2];
pad_mode_ = GetValue<std::string>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("padding"));
pad_top_ = 0;
pad_left_ = 0;
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) {
SetPad();
}
InitSizeLists();
return true;
}
protected:
void InitSizeLists() override {
input_size_list_.push_back(x_size_);
input_size_list_.push_back(dy_size_);
input_size_list_.push_back(index_size_);
output_size_list_.push_back(dx_size_);
}
private:
void SetPad() {
pad_height_ = std::max<int>(
0, (((x_height_ / stride_height_) * stride_height_ == x_height_ ? (x_height_ / stride_height_)
: (x_height_ / stride_height_) + 1) -
1) *
stride_height_ +
window_height_ - x_height_);
pad_width_ =
std::max<int>(0, (((x_width_ / stride_width_) * stride_width_ == x_width_ ? (x_width_ / stride_width_)
: (x_width_ / stride_width_) + 1) -
1) *
stride_width_ +
window_width_ - x_width_);
pad_top_ = pad_height_ / 2;
pad_left_ = pad_width_ / 2;
}
std::string pad_mode_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
int n_;
int c_;
int x_height_;
int x_width_;
int dy_height_;
int dy_width_;
int window_height_;
int window_width_;
int pad_height_;
int pad_width_;
int pad_top_;
int pad_left_;
int stride_height_;
int stride_width_;
size_t x_size_;
size_t dy_size_;
size_t index_size_;
size_t dx_size_;
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_MAXPOOLWITHARGMAX_GRAD_GPU_KERNEL_H_

View File

@ -20,11 +20,17 @@
#include "utils/utils.h"
#include "backend/kernel_compiler/hccl/hcom_util.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "frontend/parallel/context.h"
namespace mindspore {
namespace kernel {
namespace {
std::string GetKernelFormat(const CNodePtr &kernel_node, size_t index) {
auto parallel_context_instance = parallel::ParallelContext::GetInstance();
MS_EXCEPTION_IF_NULL(parallel_context_instance);
if (parallel_context_instance->enable_parallel_optimizer()) {
return kOpFormat_DEFAULT;
}
const std::set<std::string> kReduceNoSupportedSet = {kOpFormat_FRAC_Z, kOpFormat_FRACTAL_Z_C04, kOpFormat_C1HWNCoC0};
auto op_name = AnfAlgo::GetCNodeName(kernel_node);
auto format = AnfAlgo::GetPrevNodeOutputFormat(kernel_node, index);

View File

@ -65,6 +65,9 @@ size_t KernelBuildInfo::GetInputNum() const { return inputs_format_.size(); }
size_t KernelBuildInfo::GetOutputNum() const { return outputs_format_.size(); }
std::vector<Axis> KernelBuildInfo::GetInputReshapeType(size_t input_index) const {
if (input_reshape_type_.empty()) {
return {};
}
if (input_index >= input_reshape_type_.size()) {
MS_LOG(EXCEPTION) << "The index [" << input_index << "] is exceed the number of input node size "
<< input_reshape_type_.size();
@ -73,6 +76,9 @@ std::vector<Axis> KernelBuildInfo::GetInputReshapeType(size_t input_index) const
}
std::vector<Axis> KernelBuildInfo::GetOutputReshapeType(size_t output_index) const {
if (output_reshape_type_.empty()) {
return {};
}
if (output_index >= output_reshape_type_.size()) {
MS_LOG(EXCEPTION) << "The index [" << output_index << "] is exceed the number of output node size "
<< output_reshape_type_.size();
@ -158,13 +164,13 @@ void KernelBuildInfo::KernelBuildInfoBuilder::SetProcessor(Processor processor)
std::shared_ptr<KernelBuildInfo> KernelBuildInfo::KernelBuildInfoBuilder::Build() { return kernel_build_info_; }
void KernelBuildInfo::KernelBuildInfoBuilder::SetInputReshapeType(
void KernelBuildInfo::KernelBuildInfoBuilder::SetInputsReshapeType(
const std::vector<std::vector<Axis>> &input_reshape_type) {
MS_EXCEPTION_IF_NULL(kernel_build_info_);
kernel_build_info_->input_reshape_type_ = input_reshape_type;
}
void KernelBuildInfo::KernelBuildInfoBuilder::SetOutputReshapeType(
void KernelBuildInfo::KernelBuildInfoBuilder::SetOutputsReshapeType(
const std::vector<std::vector<Axis>> &output_reshape_type) {
MS_EXCEPTION_IF_NULL(kernel_build_info_);
kernel_build_info_->output_reshape_type_ = output_reshape_type;
@ -189,5 +195,37 @@ void KernelBuildInfo::KernelBuildInfoBuilder::SetOutputFormat(const std::string
}
kernel_build_info_->outputs_format_[index] = format;
}
void KernelBuildInfo::KernelBuildInfoBuilder::SetInputReshapeType(const std::vector<Axis> &input_reshape_type,
size_t index) {
if (index >= kernel_build_info_->input_reshape_type_.size()) {
MS_LOG(EXCEPTION) << "index outof range!";
}
std::copy(input_reshape_type.begin(), input_reshape_type.end(),
std::back_inserter(kernel_build_info_->input_reshape_type_[index]));
}
void KernelBuildInfo::KernelBuildInfoBuilder::SetOutputReshapeType(const std::vector<Axis> &output_reshape_type,
size_t index) {
if (index >= kernel_build_info_->output_reshape_type_.size()) {
MS_LOG(EXCEPTION) << "index outof range!";
}
std::copy(output_reshape_type.begin(), output_reshape_type.end(),
std::back_inserter(kernel_build_info_->output_reshape_type_[index]));
}
void KernelBuildInfo::KernelBuildInfoBuilder::SetOutputDeviceType(const TypeId &output_device_type, size_t index) {
if (index >= kernel_build_info_->outputs_device_type_.size()) {
MS_LOG(EXCEPTION) << "index outof range!";
}
kernel_build_info_->outputs_device_type_[index] = output_device_type;
}
void KernelBuildInfo::KernelBuildInfoBuilder::SetInputDeviceType(const TypeId &input_device_type, size_t index) {
if (index >= kernel_build_info_->inputs_device_type_.size()) {
MS_LOG(EXCEPTION) << "index outof range!";
}
kernel_build_info_->inputs_device_type_[index] = input_device_type;
}
} // namespace kernel
} // namespace mindspore

View File

@ -71,6 +71,10 @@ class KernelBuildInfo {
std::vector<TypeId> GetAllOutputDeviceTypes() const;
std::vector<std::vector<Axis>> GetAllOutputReshapeType() const;
std::vector<std::vector<Axis>> GetAllInputReshapeType() const;
OpPattern op_pattern() const { return op_pattern_; }
FusionType fusion_type() const { return fusion_type_; }
@ -108,8 +112,23 @@ class KernelBuildInfo::KernelBuildInfoBuilder {
public:
KernelBuildInfoBuilder() { kernel_build_info_ = std::make_shared<KernelBuildInfo>(); }
explicit KernelBuildInfoBuilder(std::shared_ptr<KernelBuildInfo> kernel_build_info)
: kernel_build_info_(std::move(kernel_build_info)) {}
explicit KernelBuildInfoBuilder(const std::shared_ptr<KernelBuildInfo> &kernel_build_info)
: kernel_build_info_(std::make_shared<KernelBuildInfo>()) {
SetKernelType(kernel_build_info->kernel_type());
SetFusionType(kernel_build_info->fusion_type());
SetProcessor(kernel_build_info->processor());
OpPattern(kernel_build_info->op_pattern());
for (size_t index = 0; index < kernel_build_info->GetInputNum(); ++index) {
kernel_build_info_->inputs_device_type_.emplace_back(kernel_build_info->GetInputDeviceType(index));
kernel_build_info_->inputs_format_.emplace_back(kernel_build_info->GetInputFormat(index));
kernel_build_info_->input_reshape_type_.emplace_back(kernel_build_info->GetInputReshapeType(index));
}
for (size_t index = 0; index < kernel_build_info->GetOutputNum(); ++index) {
kernel_build_info_->outputs_device_type_.emplace_back(kernel_build_info->GetOutputDeviceType(index));
kernel_build_info_->outputs_format_.emplace_back(kernel_build_info->GetOutputFormat(index));
kernel_build_info_->output_reshape_type_.emplace_back(kernel_build_info->GetOutputReshapeType(index));
}
}
~KernelBuildInfoBuilder() = default;
@ -123,9 +142,9 @@ class KernelBuildInfo::KernelBuildInfoBuilder {
void SetOutputsDeviceType(const std::vector<TypeId> &outputs_device_type);
void SetInputReshapeType(const std::vector<std::vector<Axis>> &input_reshape_type);
void SetInputsReshapeType(const std::vector<std::vector<Axis>> &input_reshape_type);
void SetOutputReshapeType(const std::vector<std::vector<Axis>> &output_reshape_type);
void SetOutputsReshapeType(const std::vector<std::vector<Axis>> &output_reshape_type);
void SetFusionType(FusionType fusion_type);
@ -137,6 +156,14 @@ class KernelBuildInfo::KernelBuildInfoBuilder {
void SetOutputFormat(const std::string &format, size_t index);
void SetInputReshapeType(const std::vector<Axis> &input_reshape_type, size_t index);
void SetOutputReshapeType(const std::vector<Axis> &output_reshape_type, size_t index);
void SetInputDeviceType(const TypeId &input_device_type, size_t index);
void SetOutputDeviceType(const TypeId &output_device_type, size_t index);
std::shared_ptr<KernelBuildInfo> Build();
private:

View File

@ -118,7 +118,7 @@ void TbeKernelSelect::GetCommonPatternKernelInfo(const OpInfo &op_info) {
}
builder.SetInputsDeviceType(inputs_device_type);
builder.SetInputsFormat(inputs_format);
builder.SetInputReshapeType(inputs_reshape_type);
builder.SetInputsReshapeType(inputs_reshape_type);
// output
std::vector<std::string> outputs_format;
std::vector<TypeId> outputs_device_type;
@ -129,7 +129,7 @@ void TbeKernelSelect::GetCommonPatternKernelInfo(const OpInfo &op_info) {
}
builder.SetOutputsDeviceType(outputs_device_type);
builder.SetOutputsFormat(outputs_format);
builder.SetOutputReshapeType(outputs_reshape_type);
builder.SetOutputsReshapeType(outputs_reshape_type);
kernel_info_list_->emplace_back(builder.Build());
}
MS_LOG(INFO) << "end.";

View File

@ -47,6 +47,7 @@
#include "backend/optimizer/ascend/ir_fission/transdata_split.h"
#include "backend/optimizer/ascend/ir_fission/topk_split.h"
#include "backend/optimizer/ascend/ir_fusion/momentum_lossscale_fusion.h"
#include "backend/optimizer/ascend/format_type/split_unsupported_transdata.h"
#include "backend/optimizer/ascend/ir_fusion/mul_add_fusion.h"
#include "backend/optimizer/ascend/ir_fusion/mul_addn_fusion.h"
#include "backend/optimizer/ascend/ir_fusion/matmul_biasadd_fusion.h"
@ -228,6 +229,7 @@ void AscendMixPrecision(const std::shared_ptr<session::KernelGraph> &kernel_grap
mixed_precision_pm->AddPass(std::make_shared<MergeCastToOp>());
mixed_precision_pm->AddPass(std::make_shared<LayerNormBetaGammaBackpropFusion>());
mixed_precision_pm->AddPass(std::make_shared<EraseVisitAttr>());
mixed_precision_pm->AddPass(std::make_shared<SplitUnsupportedTransData>());
mixed_precision_pm->AddPass(std::make_shared<ConvertUnSupportNodeToAICPU>());
mixed_precision_pm->AddPass(std::make_shared<RemoveInternalOutputCast>());
optimizer->AddPassManager(mixed_precision_pm);

View File

@ -153,7 +153,7 @@ AnfNodePtr InsertTransOpForMultipleOutput(const FuncGraphPtr &func_graph, const
std::vector<size_t> origin_shape = AnfAlgo::GetOutputInferShape(node, output_idx);
if (kCommonFormatSet.find(output_format) == kCommonFormatSet.end() && origin_shape.size() > 1) {
auto trans_op = AddTransOpNodeToGraph(func_graph, tuple_getitem, kernel_select, 0, false);
if (kernel_graph != nullptr && kernel_graph->IsInternalOutput(node)) {
if (kernel_graph != nullptr && kernel_graph->IsInternalOutput(node, output_idx)) {
kernel_graph->ReplaceInternalOutput(node, trans_op, output_idx, 0);
}
make_tuple_inputs.emplace_back(trans_op);
@ -174,8 +174,8 @@ void RefreshKernelBuildInfo(const std::string &input_format, const std::string &
MS_EXCEPTION_IF_NULL(ori_build_info);
auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(ori_build_info);
builder->SetInputsFormat({input_format});
builder->SetInputReshapeType({reshape_type});
builder->SetOutputReshapeType({reshape_type});
builder->SetInputsReshapeType({reshape_type});
builder->SetOutputsReshapeType({reshape_type});
builder->SetOutputsFormat({output_format});
if (type_id != kTypeUnknown) {
builder->SetOutputsDeviceType({type_id});
@ -265,7 +265,7 @@ AnfNodePtr InsertTransOpForOutput(const FuncGraphPtr &func_graph, const AnfNodeP
// Single output
if (outputs_num == 1 && (!AnfAlgo::IsTupleOutput(node))) {
auto new_node = InsertTransOpForSingleOutput(func_graph, node, kernel_select);
if (kernel_graph != nullptr && kernel_graph->IsInternalOutput(node)) {
if (kernel_graph != nullptr && kernel_graph->IsInternalOutput(node, 0)) {
kernel_graph->ReplaceInternalOutput(node, new_node);
}
return new_node;

View File

@ -40,6 +40,38 @@ bool IsParameterOrValueNode(const AnfNodePtr &node) {
return real_node->isa<ValueNode>();
}
void SetInput(const CNodePtr &control_depend, const int index, const FuncGraphPtr &graph, const CNodePtr &hccl_node,
const std::vector<AnfNodePtr> &memcpy_async_list) {
MS_EXCEPTION_IF_NULL(control_depend);
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(hccl_node);
std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple)};
make_tuple_inputs.insert(make_tuple_inputs.end(), memcpy_async_list.begin(), memcpy_async_list.end());
make_tuple_inputs.emplace_back(hccl_node);
auto make_tuple = graph->NewCNode(make_tuple_inputs);
MS_EXCEPTION_IF_NULL(make_tuple);
control_depend->set_input(IntToSize(index), make_tuple);
}
void DealControlForGetitem(const CNodePtr &tuple_getitem, const FuncGraphPtr &graph, const CNodePtr &hccl_node,
const std::vector<AnfNodePtr> &memcpy_async_list) {
MS_EXCEPTION_IF_NULL(tuple_getitem);
auto manager = graph->manager();
MS_EXCEPTION_IF_NULL(manager);
auto &node_users = manager->node_users();
auto iter = node_users.find(tuple_getitem);
if (iter == node_users.end()) {
MS_LOG(EXCEPTION) << "node has no output in manager";
}
for (const auto &node_index : iter->second) {
AnfNodePtr output = node_index.first;
MS_EXCEPTION_IF_NULL(output);
if (AnfAlgo::CheckPrimitiveType(output, prim::kPrimControlDepend)) {
SetInput(output->cast<CNodePtr>(), node_index.second, graph, hccl_node, memcpy_async_list);
}
}
}
void TransferControl(const CNodePtr &hccl_node, const std::vector<AnfNodePtr> &memcpy_async_list,
const FuncGraphPtr &graph) {
MS_EXCEPTION_IF_NULL(hccl_node);
@ -53,25 +85,13 @@ void TransferControl(const CNodePtr &hccl_node, const std::vector<AnfNodePtr> &m
}
// find hccl_node's output which is a control depend
for (const auto &node_index : iter->second) {
if (!AnfAlgo::CheckPrimitiveType(node_index.first, prim::kPrimControlDepend)) {
continue;
AnfNodePtr output = node_index.first;
MS_EXCEPTION_IF_NULL(output);
if (AnfAlgo::CheckPrimitiveType(output, prim::kPrimControlDepend)) {
SetInput(output->cast<CNodePtr>(), node_index.second, graph, hccl_node, memcpy_async_list);
} else if (AnfAlgo::CheckPrimitiveType(output, prim::kPrimTupleGetItem)) {
DealControlForGetitem(output->cast<CNodePtr>(), graph, hccl_node, memcpy_async_list);
}
CNodePtr control_depend = node_index.first->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(control_depend);
std::vector<AnfNodePtr> new_inputs;
for (size_t i = 0; i < control_depend->size(); ++i) {
if (i == IntToSize(node_index.second)) {
std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple)};
make_tuple_inputs.insert(make_tuple_inputs.end(), memcpy_async_list.begin(), memcpy_async_list.end());
make_tuple_inputs.emplace_back(hccl_node);
auto make_tuple = graph->NewCNode(make_tuple_inputs);
MS_EXCEPTION_IF_NULL(make_tuple);
new_inputs.push_back(make_tuple);
} else {
new_inputs.push_back(control_depend->input(i));
}
}
control_depend->set_inputs(new_inputs);
}
}
} // namespace
@ -148,11 +168,10 @@ const AnfNodePtr InsertMemcpyAsyncForHcclOp::Process(const FuncGraphPtr &func_gr
if (func_graph == nullptr || node == nullptr || !node->isa<CNode>()) {
return nullptr;
}
auto cnode = node->cast<CNodePtr>();
if (!AnfAlgo::IsCommunicationOp(node)) {
return nullptr;
}
InsertMemcpyAsync(func_graph, cnode);
InsertMemcpyAsync(func_graph, node->cast<CNodePtr>());
return nullptr;
}
} // namespace opt

View File

@ -65,7 +65,7 @@ AnfNodePtr InsertCastForMultipleOutput(const FuncGraphPtr &func_graph, const CNo
MS_EXCEPTION_IF_NULL(replace_node);
replace_node->set_scope(cnode->scope());
AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), replace_node);
if (kernel_graph != nullptr && kernel_graph->IsInternalOutput(cnode)) {
if (kernel_graph != nullptr && kernel_graph->IsInternalOutput(cnode, output_idx)) {
kernel_graph->ReplaceInternalOutput(cnode, replace_node, output_idx, 0);
}
} else {
@ -114,7 +114,7 @@ AnfNodePtr InsertCastForOutput(const FuncGraphPtr &func_graph, const CNodePtr &c
MS_EXCEPTION_IF_NULL(replace_node);
replace_node->set_scope(cnode->scope());
AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), replace_node);
if (kernel_graph != nullptr && kernel_graph->IsInternalOutput(cnode)) {
if (kernel_graph != nullptr && kernel_graph->IsInternalOutput(cnode, 0)) {
kernel_graph->ReplaceInternalOutput(cnode, replace_node);
}
}

View File

@ -58,7 +58,7 @@ const AnfNodePtr RemoveInternalOutput::Process(const FuncGraphPtr &func_graph, c
if (kernel_graph == nullptr) {
return nullptr;
}
if (!kernel_graph->IsInternalOutput(node)) {
if (!kernel_graph->IsInternalOutput(node, 0)) {
return nullptr;
}
if (!UsedForOutputOnly(func_graph, node)) {

View File

@ -0,0 +1,65 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/optimizer/ascend/format_type/split_unsupported_transdata.h"
#include <vector>
#include <memory>
#include "backend/session/anf_runtime_algorithm.h"
namespace mindspore {
namespace opt {
const BaseRef SplitUnsupportedTransData::DefinePattern() const {
VarPtr X = std::make_shared<Var>();
return VectorRef({prim::KPrimTransData, X});
}
const AnfNodePtr SplitUnsupportedTransData::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
const EquivPtr &) const {
if (node == nullptr || !node->isa<CNode>() || !AnfAlgo::IsRealKernel(node)) {
return nullptr;
}
auto ori_trans_data = node->cast<CNodePtr>();
if (AnfAlgo::GetCNodeName(ori_trans_data) != prim::KPrimTransData->name()) {
return nullptr;
}
auto kernel_info = AnfAlgo::GetSelectKernelBuildInfo(ori_trans_data);
MS_EXCEPTION_IF_NULL(kernel_info);
if (kernel_info->GetInputNum() != 1 || kernel_info->GetOutputNum() != 1) {
MS_LOG(EXCEPTION) << "Transdata node's kernel info's input and output format size is not 1"
<< ori_trans_data->DebugString();
}
return SplitTransData(func_graph, ori_trans_data);
}
AnfNodePtr SplitUnsupportedTransData::SplitTransData(const FuncGraphPtr &func_graph, const CNodePtr &trans_node) const {
auto kernel_info = AnfAlgo::GetSelectKernelBuildInfo(trans_node);
if (kHWSpecialFormatSet.find(kernel_info->GetInputFormat(0)) == kHWSpecialFormatSet.end() ||
kHWSpecialFormatSet.find(kernel_info->GetOutputFormat(0)) == kHWSpecialFormatSet.end()) {
return trans_node;
}
auto builder_info_to_default = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(kernel_info);
auto builder_info_to_special_foramt = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(kernel_info);
builder_info_to_default->SetOutputsFormat({kOpFormat_DEFAULT});
builder_info_to_special_foramt->SetInputsFormat({kOpFormat_DEFAULT});
std::vector<AnfNodePtr> next_trans_node_inputs = {
NewValueNode(std::make_shared<Primitive>(prim::KPrimTransData->name())), trans_node};
auto next_trans_node = func_graph->NewCNode(next_trans_node_inputs);
next_trans_node->set_abstract(trans_node->abstract());
AnfAlgo::SetSelectKernelBuildInfo(builder_info_to_default->Build(), trans_node.get());
AnfAlgo::SetSelectKernelBuildInfo(builder_info_to_special_foramt->Build(), next_trans_node.get());
return next_trans_node;
}
} // namespace opt
} // namespace mindspore

View File

@ -0,0 +1,37 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_REMOVE_TRANSDATA_SPILT_H
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_REMOVE_TRANSDATA_SPILT_H
#include "backend/optimizer/common/optimizer.h"
namespace mindspore {
namespace opt {
class SplitUnsupportedTransData : public PatternProcessPass {
public:
explicit SplitUnsupportedTransData(bool multigraph = true)
: PatternProcessPass("split_unsupported_transdata", multigraph) {}
~SplitUnsupportedTransData() override = default;
const BaseRef DefinePattern() const override;
const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
private:
AnfNodePtr SplitTransData(const FuncGraphPtr &func_graph, const CNodePtr &trans_node) const;
};
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_REMOVE_TRANSDATA_SPILT_H

View File

@ -405,7 +405,7 @@ KernelWithIndex AnfRuntimeAlgorithm::GetPrevNodeOutput(const AnfNodePtr &anf_nod
}
auto node = cnode->input(input_idx + 1);
MS_EXCEPTION_IF_NULL(node);
return VisitKernel(node, 0);
return VisitKernelWithReturnType(node, 0);
}
std::string AnfRuntimeAlgorithm::GetPrevNodeOutputFormat(const AnfNodePtr &anf_node, size_t input_idx) {

View File

@ -94,25 +94,33 @@ bool AscendInferenceSession::CheckModelInputs(uint32_t graph_id, const std::vect
MS_EXCEPTION_IF_NULL(kernel_graph);
auto kernel_graph_inputs = kernel_graph->inputs();
size_t no_weight_input = 0;
vector<ParameterPtr> paras;
// find parameters of graph inputs
for (size_t i = 0; i < kernel_graph_inputs.size(); ++i) {
tensor::TensorPtr tensor = nullptr;
if (!kernel_graph_inputs[i]->isa<Parameter>()) {
MS_LOG(ERROR) << "Kernel graph inputs have anfnode which is not Parameter.";
continue;
}
auto parameter = kernel_graph_inputs[i]->cast<ParameterPtr>();
if (!AnfAlgo::IsParameterWeight(parameter)) {
// compare input number
if (no_weight_input >= inputs.size()) {
MS_LOG(ERROR) << "Input number is inconsistent. The actual input number [" << inputs.size()
<< "] less than that of graph.";
return false;
}
auto input = inputs[no_weight_input++];
if (!CompareInput(input, parameter)) {
MS_LOG(ERROR) << "Please check the input information.";
return false;
}
paras.push_back(parameter);
}
}
// check inputs
for (size_t i = 0; i < paras.size(); ++i) {
// compare input number
if (paras.size() != inputs.size()) {
MS_LOG(ERROR) << "Input number is inconsistent. The actual input number [" << inputs.size()
<< "] but the graph input number is [" << paras.size() << "]";
MS_LOG(ERROR) << "InputsInfo --" << InputsInfo(paras, inputs);
return false;
}
auto input = inputs[no_weight_input++];
if (!CompareInput(input, paras[i])) {
MS_LOG(ERROR) << "Please check the input information.";
MS_LOG(ERROR) << "InputsInfo --" << InputsInfo(paras, inputs);
return false;
}
}
return true;
@ -123,12 +131,6 @@ bool AscendInferenceSession::CompareInput(const tensor::TensorPtr &input, const
MS_EXCEPTION_IF_NULL(parameter);
// compare dims
auto parameter_shape = AnfAlgo::GetOutputDeviceShape(parameter, 0);
if (input->shape().size() != parameter_shape.size()) {
MS_LOG(ERROR) << "Input dim is inconsistent. The actual dim is " << input->shape().size()
<< ", but the parameter dim is " << parameter_shape.size()
<< ". parameter : " << parameter->DebugString();
return false;
}
// compare shape
auto input_shape = input->shape();
@ -153,12 +155,31 @@ bool AscendInferenceSession::CompareInput(const tensor::TensorPtr &input, const
return true;
}
std::string AscendInferenceSession::PrintInputShape(std::vector<size_t> shape) const {
template <typename T>
std::string AscendInferenceSession::PrintInputShape(std::vector<T> shape) const {
string res = "[";
for (auto dim : shape) {
res += " " + std::to_string(dim);
}
return res + " ]";
}
std::string AscendInferenceSession::InputsInfo(const std::vector<ParameterPtr> &paras,
const std::vector<tensor::TensorPtr> &inputs) const {
std::string graph = "graph inputs:{ ";
for (size_t i = 0; i < paras.size(); ++i) {
graph += std::to_string(i) + ": dims " + std::to_string(AnfAlgo::GetOutputDeviceShape(paras[i], 0).size()) +
", shape " + PrintInputShape(AnfAlgo::GetOutputDeviceShape(paras[i], 0)) + ", data type " +
std::to_string(AnfAlgo::GetSelectKernelBuildInfo(paras[i])->GetOutputDeviceType(0)) + " }";
}
std::string actual = "actual inputs:{ ";
for (size_t i = 0; i < inputs.size(); ++i) {
actual += std::to_string(i) + ": dims " + std::to_string(inputs[i]->shape().size()) + ", shape " +
PrintInputShape(inputs[i]->shape()) + ", data type " + std::to_string(inputs[i]->data_type()) + " }";
}
return graph + " " + actual;
}
} // namespace session
} // namespace mindspore

View File

@ -41,7 +41,9 @@ class AscendInferenceSession : public AscendSession {
GraphId CompileGraph(NotNull<FuncGraphPtr> func_graph) override;
bool CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs) const override;
bool CompareInput(const tensor::TensorPtr &input, const ParameterPtr &parameter) const;
std::string PrintInputShape(std::vector<size_t> shape) const;
template <typename T>
std::string PrintInputShape(std::vector<T> shape) const;
std::string InputsInfo(const std::vector<ParameterPtr> &paras, const std::vector<tensor::TensorPtr> &inputs) const;
};
MS_REG_SESSION(kDavinciInferenceDevice, AscendInferenceSession);
} // namespace session

View File

@ -517,9 +517,7 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::
LoadInputData(kernel_graph, inputs);
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
// Initialize parameter server
if (!ps_init_) {
InitPSParamAndOptim(kernel_graph, inputs);
}
InitPSParamAndOptim(kernel_graph, inputs);
#endif
// convert inputs to model
predictmodel::StepConvertWeight(inputs);

View File

@ -91,10 +91,7 @@ void CPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten
auto &kernel_graph = graphs_[graph_id];
MS_EXCEPTION_IF_NULL(kernel_graph);
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
// Initialize parameter server
if (!ps_init_) {
InitPSParamAndOptim(kernel_graph, inputs);
}
InitPSParamAndOptim(kernel_graph, inputs);
#endif
MS_LOG(INFO) << "Bind input output address";
std::vector<tensor::TensorPtr> need_sync_outputs;

View File

@ -233,9 +233,7 @@ void GPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten
LoadInputData(kernel_graph, inputs);
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
// Initialize parameter server
if (!ps_init_) {
InitPSParamAndOptim(kernel_graph, inputs);
}
InitPSParamAndOptim(kernel_graph, inputs);
#endif
MS_EXCEPTION_IF_NULL(kernel_graph);
// Convert inputs to model
@ -281,7 +279,10 @@ py::tuple GPUSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph
RunOpAllocateMemory(input_tensors, kernel_graph.get());
// Execute the computation
LoadInputData(kernel_graph, input_tensors);
Execute(kernel_graph);
{
py::gil_scoped_release gil_release;
Execute(kernel_graph);
}
// Fetch outputs
VectorRef outputs;
UpdateOutputs(kernel_graph, &outputs, input_tensors);

View File

@ -1021,26 +1021,16 @@ AnfNodePtr KernelGraph::GetInternalOutputByFrontNode(const AnfNodePtr &front_nod
return nullptr;
}
bool KernelGraph::IsInternalOutput(const AnfNodePtr &node) const {
if (internal_outputs_to_front_map_.find(node) != internal_outputs_to_front_map_.end()) {
return true;
}
return false;
}
void KernelGraph::AddFinalOutputKernel(const AnfNodePtr &node) {
if (node == nullptr) {
return;
}
(void)final_output_kernels_.insert(node);
}
bool KernelGraph::IsFinalOutputKernel(const AnfNodePtr &node) const {
if (node == nullptr) {
return false;
}
if (final_output_kernels_.find(node) != final_output_kernels_.end()) {
return true;
bool KernelGraph::IsInternalOutput(const AnfNodePtr &node, int output_idx) const {
auto front_nodes_iter = internal_outputs_to_front_map_.find(node);
if (front_nodes_iter != internal_outputs_to_front_map_.end()) {
if (output_idx == -1) {
return true;
}
auto &front_nodes = front_nodes_iter->second;
if (front_nodes.find(output_idx) != front_nodes.end()) {
return true;
}
}
return false;
}

View File

@ -153,9 +153,7 @@ class KernelGraph : public FuncGraph {
void ReplaceInternalOutput(const AnfNodePtr &node, const AnfNodePtr &new_node, int src_output_idx = -1,
int dst_output_idx = -1);
AnfNodePtr GetInternalOutputByFrontNode(const AnfNodePtr &front_node) const;
bool IsInternalOutput(const AnfNodePtr &node) const;
void AddFinalOutputKernel(const AnfNodePtr &node);
bool IsFinalOutputKernel(const AnfNodePtr &node) const;
bool IsInternalOutput(const AnfNodePtr &node, int output_idx = -1) const;
uint32_t current_epoch() const { return current_epoch_; }
void set_current_epoch(uint32_t epoch) { current_epoch_ = epoch; }
void UpdateChildGraphOrder();
@ -230,7 +228,6 @@ class KernelGraph : public FuncGraph {
bool null_output_;
std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_;
std::unordered_map<AnfNodePtr, std::unordered_map<int, AnfNodePtr>> internal_outputs_to_front_map_;
std::set<AnfNodePtr> final_output_kernels_;
uint32_t current_epoch_;
};
} // namespace session

View File

@ -89,7 +89,7 @@ BaseRef CreateOneTensor(const AnfNodePtr &node, size_t output_index, const Kerne
TypeId type_id = kNumberTypeFloat32;
type_id = AnfAlgo::GetOutputInferDataType(node, output_index);
std::vector<int> temp_shape;
if (graph.IsInternalOutput(node)) {
if (graph.IsInternalOutput(node, output_index)) {
temp_shape.emplace_back(1);
tensor::TensorPtr tensor = std::make_shared<tensor::Tensor>(type_id, temp_shape);
tensor->set_device_address(address);
@ -307,18 +307,17 @@ void SessionBasic::InitInternalOutputParameter(const AnfNodePtr &out_node, const
auto real_kernel = AnfAlgo::VisitKernel(ref_node, output_idx);
auto ref_real_node = real_kernel.first;
auto ref_real_node_index = real_kernel.second;
if (ref_real_node->isa<CNode>() && node_graph->IsInternalOutput(ref_real_node) &&
node_graph->IsFinalOutputKernel(ref_real_node)) {
if (ref_real_node->isa<CNode>() && node_graph->IsInternalOutput(ref_real_node, ref_real_node_index)) {
auto kernel_info = ref_real_node->kernel_info();
if (kernel_info == nullptr || !kernel_info->has_build_info()) {
MS_LOG(INFO) << "No kernel info";
return;
}
auto address = AnfAlgo::GetMutableOutputAddr(ref_real_node, ref_real_node_index);
if (address == nullptr) {
if (!opt::IsNopNode(ref_real_node) && !AnfAlgo::OutputAddrExist(ref_real_node, ref_real_node_index)) {
MS_LOG(INFO) << "No kernel address";
return;
}
auto address = AnfAlgo::GetMutableOutputAddr(ref_real_node, ref_real_node_index);
auto format = AnfAlgo::GetOutputFormat(ref_real_node, ref_real_node_index);
auto type = AnfAlgo::GetOutputDeviceDataType(ref_real_node, ref_real_node_index);
auto d_kernel_info = std::make_shared<device::KernelInfo>();
@ -1004,6 +1003,7 @@ CNodePtr SessionBasic::ConstructOutput(const AnfNodePtrList &outputs, const std:
break;
}
}
if (internal_output) {
MS_LOG(INFO) << "Internal output1: " << out->DebugString() << "To " << backend_real_kernel.first->DebugString();
graph->AddInternalOutput(out, backend_real_kernel.first);
@ -1203,11 +1203,9 @@ void SessionBasic::InitPSParamAndOptim(const KernelGraphPtr &kernel_graph,
MS_EXCEPTION_IF_NULL(input_node);
if (input_node->isa<Parameter>() && AnfAlgo::OutputAddrExist(input_node, 0)) {
auto pk_node = input_node->cast<ParameterPtr>();
mindspore::parallel::ps::Worker<float>::GetInstance().InitPSParamAndOptim(
pk_node->fullname_with_scope(), tensor->data_c(), LongToSize(tensor->data().nbytes()));
mindspore::parallel::ps::Worker<float>::GetInstance().InitPSParamAndOptim(pk_node->fullname_with_scope(), tensor);
}
}
ps_init_ = true;
}
#endif
} // namespace session

View File

@ -51,7 +51,7 @@ using OpRunInfoPtr = std::shared_ptr<OpRunInfo>;
class SessionBasic {
public:
SessionBasic() : context_(nullptr), summary_callback_(nullptr), device_id_(0), ps_init_(false) {
SessionBasic() : context_(nullptr), summary_callback_(nullptr), device_id_(0) {
#ifdef ENABLE_DEBUGGER
debugger_ = nullptr;
#endif
@ -152,7 +152,6 @@ class SessionBasic {
CallBackFunc summary_callback_;
static GraphId graph_sum_;
uint32_t device_id_;
bool ps_init_;
#ifdef ENABLE_DEBUGGER
std::shared_ptr<Debugger> debugger_;
#endif

View File

@ -378,10 +378,19 @@ AbstractBasePtr InferImplMakeIndexedSlices(const AnalysisEnginePtr &, const Prim
auto elem = GetValue<int>(e);
return elem;
});
for (auto dense_shape_elem : dense_shape_vec) {
if (dense_shape_elem < 0) {
MS_EXCEPTION(TypeError) << "The element of dense_shape must be positive, but got "
<< dense_shape_value->ToString();
if (dense_shape_vec.size() != values_shp.size()) {
MS_EXCEPTION(TypeError) << "The size of dense_shape must be the same with the dimension of values "
<< values_shp.size() << ", but got " << dense_shape_value->size();
}
for (size_t i = 0; i < dense_shape_vec.size(); i++) {
if (dense_shape_vec[i] < 0) {
MS_EXCEPTION(TypeError) << "The " << i << "th element of dense_shape must be positive, but got "
<< dense_shape_vec[i];
}
// The 0th mode might be less or exceed dense_shape[0] due to duplicated selection
if (i != 0 && dense_shape_vec[i] != values_shp[i]) {
MS_EXCEPTION(TypeError) << "The " << i << "th element of dense_shape must be same with the " << i
<< "th dimension of values " << values_shp[i] << ", but got " << dense_shape_vec[i];
}
}
auto ret = std::make_shared<AbstractIndexedSlices>(values->element()->BuildType(), dense_shape_vec);

View File

@ -34,7 +34,8 @@ namespace parallel {
#define OPERATOR_TO_OPERATOR_CONNECTOR "-"
#define DEFAULT_DEVICE_MEMORY_CAPACITY (1024.0 * 1024.0 * 1024.0 * 16.0)
#define DEFAULT_COST_MODEL_ALPHA 1.0
#define DEFAULT_COST_MODEL_BETA 400.0
#define DEFAULT_COST_MODEL_BETA_ASCEND 400.0 // for 'device_target = Ascend'
#define DEFAULT_COST_MODEL_BETA_GPU 50.0 // for 'device_target = GPU'
#define DEFAULT_COST_MODEL_GAMMA 0.001
#define DEFAULT_COST_MODEL_SIMPLIFY_CALCULATION true
#define DEFAULT_COST_MODEL_COMMUNI_THRESHOLD 2048.0
@ -73,7 +74,7 @@ class CostGraph {
CostGraph() {
dev_memory_ = DEFAULT_DEVICE_MEMORY_CAPACITY;
costmodel_alpha_ = DEFAULT_COST_MODEL_ALPHA;
costmodel_beta_ = DEFAULT_COST_MODEL_BETA;
costmodel_beta_ = DEFAULT_COST_MODEL_BETA_ASCEND;
}
~CostGraph() = default;
void AddOperator(const OperatorInfoPtr &op) { ops_.push_back(op); }

View File

@ -20,6 +20,7 @@
#include "frontend/parallel/allreduce_fusion/allreduce_fusion.h"
#include "frontend/parallel/auto_parallel/graph_costmodel.h"
#include "utils/context/ms_context.h"
namespace mindspore {
namespace parallel {
@ -41,7 +42,7 @@ CostModelContext::CostModelContext() {
void CostModelContext::ResetCostModel() {
device_memory_capacity_ = DEFAULT_DEVICE_MEMORY_CAPACITY;
costmodel_alpha_ = DEFAULT_COST_MODEL_ALPHA;
costmodel_beta_ = DEFAULT_COST_MODEL_BETA;
costmodel_beta_ = DEFAULT_COST_MODEL_BETA_ASCEND;
costmodel_gamma_ = DEFAULT_COST_MODEL_GAMMA;
costmodel_communi_threshold_ = DEFAULT_COST_MODEL_COMMUNI_THRESHOLD;
costmodel_communi_const_ = DEFAULT_COST_MODEL_COMMUNI_CONST;
@ -66,6 +67,12 @@ void CostModelContext::ResetAlgoParameters() {
elementwise_stra_follow_ = DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW;
}
void CostModelContext::set_costmodel_context_for_device(const std::string &device_target) {
if (device_target == kGPUDevice) {
costmodel_beta_ = DEFAULT_COST_MODEL_BETA_GPU;
}
}
void CostModelContext::set_device_memory_capacity(double dm_capacity) { device_memory_capacity_ = dm_capacity; }
void CostModelContext::set_costmodel_alpha(double cm_alpha) { costmodel_alpha_ = cm_alpha; }

View File

@ -35,6 +35,7 @@ class CostModelContext {
static std::shared_ptr<CostModelContext> GetInstance();
void set_costmodel_context_for_device(const std::string &);
// DEVICE_MEMORY_CAPACITY
void set_device_memory_capacity(double);
double device_memory_capacity() const { return device_memory_capacity_; }

View File

@ -57,15 +57,22 @@ constexpr char kMomentum[] = "momentum";
constexpr char kApplyMomentum[] = "ApplyMomentum";
constexpr char kSparseAdam[] = "Adam";
constexpr char kSparseFtrl[] = "Ftrl";
constexpr char kApplyMomentumOp[] = "Momentum";
constexpr char kSparseAdamOp[] = "Adam";
constexpr char kSparseFtrlOp[] = "FTRL";
constexpr int kInitWeightsCmd = 10;
constexpr int kInitWeightToOptimIdCmd = 11;
constexpr int kInitOptimInputsShapeCmd = 12;
constexpr int kInitKeyToPushNodeIdCmd = 13;
constexpr int kInitEmbeddingsCmd = 20;
constexpr int kCheckReadyForPushCmd = 25;
constexpr int kCheckReadyForPullCmd = 26;
constexpr int kEmbeddingLookupCmd = 30;
constexpr int kFinalizeCmd = 40;
constexpr size_t kInvalidKey = UINT64_MAX;
constexpr int kInvalidID = -1;
using Key = ::ps::Key;
using Keys = ::ps::SArray<Key>;

View File

@ -158,16 +158,19 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
}
AddressPtr linear = std::make_shared<kernel::Address>();
linear->addr = new float[weight->size()];
memcpy_s(linear->addr, weight->size() * sizeof(float), 0x00, weight->size() * sizeof(float));
auto ret = memset_s(linear->addr, weight->size() * sizeof(float), 0x00, weight->size() * sizeof(float));
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
}
linear->size = weight->size() * sizeof(float);
const std::shared_ptr<std::vector<size_t>> &grad_shape = (*inputs_shape)[3];
size_t total_grad_size = std::accumulate((*grad_shape).begin(), (*grad_shape).end(), 1, std::multiplies<size_t>());
AddressPtr grad = std::make_shared<kernel::Address>();
grad->addr = new float[total_grad_size * worker_num];
auto ret = memcpy_s(grad->addr, lens[0] * sizeof(float), values.data(), lens[0] * sizeof(float));
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
auto ret1 = memcpy_s(grad->addr, lens[0] * sizeof(float), values.data(), lens[0] * sizeof(float));
if (ret1 != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret1 << ")";
}
grad->size = lens[0] * sizeof(float);

138
mindspore/ccsrc/frontend/parallel/ps/parameter_server.h Executable file → Normal file
View File

@ -28,6 +28,7 @@
#include <thread>
#include <cmath>
#include <random>
#include <list>
#include "ir/func_graph.h"
#include "backend/session/session_basic.h"
#include "backend/session/anf_runtime_algorithm.h"
@ -70,6 +71,7 @@ class ParameterServer {
handler_(nullptr),
func_graph_(nullptr),
sess_(nullptr),
running_(true),
thread_(nullptr) {}
~ParameterServer() = default;
ParameterServer(const ParameterServer &) = delete;
@ -89,6 +91,8 @@ class ParameterServer {
::ps::KVPairs<T> *res);
void HandleInitInputsShape(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
void HandleInitEmbeddings(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
void HandleCheckReadyForPush(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
void HandleCheckReadyForPull(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
void HandleEmbeddingLookup(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
void HandleFinalize(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res);
@ -96,6 +100,9 @@ class ParameterServer {
typedef void (ServerHandler::*RequestHandler)(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
::ps::KVPairs<T> *res);
std::unordered_map<int, RequestHandler> handlers_;
std::unordered_map<Key, bool> init_weights_;
std::unordered_map<Key, bool> init_weight_to_optim_;
std::unordered_map<Key, bool> init_optim_info_;
};
bool Init(const FuncGraphPtr &func_graph);
@ -106,14 +113,18 @@ class ParameterServer {
void InitGrad(const Key &key, const GradPtr &grad);
void InitEmbeddingTable(const Key &key,
const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes);
void Finalize();
void UpdateWeights();
void AccumGrad(const Keys &key, const Values &values, const Lengths &lengths);
WeightPtr weight(const Key &key);
void DoEmbeddingLookup(Key key, const LookupIds &lookup_ids, ::ps::KVPairs<T> *res);
int SumOfShapes(const std::vector<int> &shapes) const;
bool ReadyForUpdateWeights();
bool ReadyForAccumGrads();
bool ReadyForPush(const Key &key);
bool ReadyForPull(const Key &key);
void ResetGradAccumCount();
std::mutex &mutex();
const CNodePtr GetCNode(const std::string &name) const;
size_t pserver_num_;
size_t worker_num_;
@ -123,20 +134,23 @@ class ParameterServer {
std::unique_ptr<ServerHandler> handler_;
FuncGraphPtr func_graph_;
std::shared_ptr<session::SessionBasic> sess_;
bool running_;
std::unordered_map<Key, std::shared_ptr<PServerKernel>> optimizers_;
std::unordered_map<Key, InputsShapePtr> optim_inputs_shape_;
std::unordered_map<Key, std::shared_ptr<OptimizerInfo>> optim_infos_;
std::unordered_map<std::string, std::shared_ptr<OptimizerInfoBuilder>> optim_info_builders_;
std::unordered_map<Key, std::string> weight_key_to_optims_;
std::unordered_map<Key, std::string> weight_key_to_optim_op_;
std::unordered_map<Key, WeightPtr> weights_;
std::unordered_map<Key, bool> is_embedding_;
std::unordered_map<Key, WeightPtr> grads_;
std::unordered_map<Key, size_t> grads_accum_counter_;
std::unordered_map<Key, std::shared_ptr<PServerKernel>> embedding_lookup_ops_;
std::unordered_map<Key, uint64_t> tokens_;
std::mutex mutex_;
std::condition_variable apply_grads_cv_;
std::condition_variable accum_grads_cv_;
std::unique_ptr<std::thread> thread_;
@ -165,6 +179,8 @@ void ParameterServer<T>::ServerHandler::Init() {
handlers_[kInitWeightToOptimIdCmd] = &ServerHandler::HandleInitWeightToOptimId;
handlers_[kInitOptimInputsShapeCmd] = &ServerHandler::HandleInitInputsShape;
handlers_[kInitEmbeddingsCmd] = &ServerHandler::HandleInitEmbeddings;
handlers_[kCheckReadyForPushCmd] = &ServerHandler::HandleCheckReadyForPush;
handlers_[kCheckReadyForPullCmd] = &ServerHandler::HandleCheckReadyForPull;
handlers_[kEmbeddingLookupCmd] = &ServerHandler::HandleEmbeddingLookup;
handlers_[kFinalizeCmd] = &ServerHandler::HandleFinalize;
}
@ -186,6 +202,7 @@ void ParameterServer<T>::ServerHandler::HandlePullReq(const ::ps::KVMeta &req_me
template <typename T>
void ParameterServer<T>::ServerHandler::HandleInitWeights(const ::ps::KVMeta &req_meta,
const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
std::unique_lock<std::mutex> lock(ps_->mutex());
size_t key_num = req_data.keys.size();
T *data_ptr = req_data.vals.data();
size_t pos = 0;
@ -207,10 +224,16 @@ template <typename T>
void ParameterServer<T>::ServerHandler::HandleInitWeightToOptimId(const ::ps::KVMeta &req_meta,
const ::ps::KVPairs<T> &req_data,
::ps::KVPairs<T> *res) {
std::unique_lock<std::mutex> lock(ps_->mutex());
size_t key_num = req_data.keys.size();
for (size_t i = 0; i < key_num; i++) {
Key key = req_data.keys[i];
T val = req_data.vals[i];
if (init_weight_to_optim_[key]) {
continue;
} else {
init_weight_to_optim_[key] = true;
}
ps_->InitWeightKeyToOptims(key, val);
}
}
@ -218,12 +241,21 @@ void ParameterServer<T>::ServerHandler::HandleInitWeightToOptimId(const ::ps::KV
template <typename T>
void ParameterServer<T>::ServerHandler::HandleInitInputsShape(const ::ps::KVMeta &req_meta,
const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
std::unique_lock<std::mutex> lock(ps_->mutex());
const Key &key = req_data.keys[0];
if (init_optim_info_[key]) {
return;
} else {
init_optim_info_[key] = true;
}
ps_->InitOptimInputsShape(req_data.keys, req_data.vals, req_data.lens);
}
template <typename T>
void ParameterServer<T>::ServerHandler::HandleInitEmbeddings(const ::ps::KVMeta &req_meta,
const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
std::unique_lock<std::mutex> lock(ps_->mutex());
const Key &key = req_data.keys[0];
std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> shapes =
std::make_shared<std::vector<std::shared_ptr<std::vector<size_t>>>>();
std::shared_ptr<std::vector<size_t>> input_shape = std::make_shared<std::vector<size_t>>();
@ -233,7 +265,6 @@ void ParameterServer<T>::ServerHandler::HandleInitEmbeddings(const ::ps::KVMeta
shapes->push_back(indices_shape);
shapes->push_back(output_shape);
const Key &key = req_data.keys[0];
const Lengths &lens = req_data.lens;
size_t index = 0;
for (int i = 0; i < lens[0]; i++) {
@ -248,6 +279,26 @@ void ParameterServer<T>::ServerHandler::HandleInitEmbeddings(const ::ps::KVMeta
ps_->InitEmbeddingTable(key, shapes);
}
template <typename T>
void ParameterServer<T>::ServerHandler::HandleCheckReadyForPush(const ::ps::KVMeta &req_meta,
const ::ps::KVPairs<T> &req_data,
::ps::KVPairs<T> *res) {
const Key &key = req_data.keys[0];
bool ready = ps_->ReadyForPush(key);
res->keys.push_back(key);
res->vals.push_back(ready);
}
template <typename T>
void ParameterServer<T>::ServerHandler::HandleCheckReadyForPull(const ::ps::KVMeta &req_meta,
const ::ps::KVPairs<T> &req_data,
::ps::KVPairs<T> *res) {
const Key &key = req_data.keys[0];
bool ready = ps_->ReadyForPull(key);
res->keys.push_back(key);
res->vals.push_back(ready);
}
template <typename T>
void ParameterServer<T>::ServerHandler::HandleEmbeddingLookup(const ::ps::KVMeta &req_meta,
const ::ps::KVPairs<T> &req_data, ::ps::KVPairs<T> *res) {
@ -261,7 +312,7 @@ void ParameterServer<T>::ServerHandler::HandleEmbeddingLookup(const ::ps::KVMeta
template <typename T>
void ParameterServer<T>::ServerHandler::HandleFinalize(const ::ps::KVMeta &req_meta, const ::ps::KVPairs<T> &req_data,
::ps::KVPairs<T> *res) {
::ps::Finalize(0, false);
ps_->Finalize();
}
template <typename T>
@ -274,7 +325,6 @@ bool ParameterServer<T>::Init(const FuncGraphPtr &func_graph) {
handler_->Init();
InitOptimInfoBuilders();
ps_->set_request_handle(*handler_);
thread_.reset(new std::thread(&ParameterServer::UpdateWeights, this));
return true;
@ -296,6 +346,7 @@ void ParameterServer<T>::InitWeightKeyToOptims(const Key &key, const int &optim_
return;
}
weight_key_to_optims_[key] = Util::optimizer_name(optim_id);
weight_key_to_optim_op_[key] = Util::optimizer_node_name(optim_id);
}
template <typename T>
@ -318,31 +369,49 @@ void ParameterServer<T>::InitOptimInputsShape(const Keys &keys, const Values &va
}
if (weight_key_to_optims_.count(key) > 0) {
const std::string &optim_name = weight_key_to_optims_[key];
const std::string &optim_op_name = weight_key_to_optim_op_[key];
if (optimizers_.count(key) == 0 && optim_inputs_shape_.count(key) > 0) {
const CNodePtr cnode = GetCNode(optim_op_name);
MS_EXCEPTION_IF_NULL(cnode);
if (optim_name == kSparseAdam) {
std::shared_ptr<PServerKernel> optimizer =
std::make_shared<kernel::ps::SparseApplyLazyAdamPSKernel>(rank_id_, pserver_num_);
optimizer->InitKernel(optim_inputs_shape_[key]);
optimizer->InitKernel(cnode, optim_inputs_shape_[key]);
optimizers_[key] = optimizer;
} else if (optim_name == kApplyMomentum) {
std::shared_ptr<PServerKernel> optimizer =
std::make_shared<kernel::ps::ApplyMomentumPSKernel>(rank_id_, pserver_num_);
optimizer->InitKernel(optim_inputs_shape_[key]);
optimizer->InitKernel(cnode, optim_inputs_shape_[key]);
optimizers_[key] = optimizer;
} else if (optim_name == kSparseFtrl) {
std::shared_ptr<PServerKernel> optimizer =
std::make_shared<kernel::ps::SparseApplyFtrlPSKernel>(rank_id_, pserver_num_);
optimizer->InitKernel(optim_inputs_shape_[key]);
optimizer->InitKernel(cnode, optim_inputs_shape_[key]);
optimizers_[key] = optimizer;
}
}
}
}
template <typename T>
const CNodePtr ParameterServer<T>::GetCNode(const std::string &name) const {
std::list<CNodePtr> cnodes = func_graph_->GetOrderedCnodes();
for (CNodePtr cnode : cnodes) {
std::string fullname = cnode->fullname_with_scope();
if (fullname.find(name) != std::string::npos && fullname.find("Push") != std::string::npos) {
return cnode;
}
}
return nullptr;
}
template <typename T>
void ParameterServer<T>::InitWeight(const Key &key, const WeightPtr &weight) {
if (weights_.count(key) == 0) {
MS_LOG(INFO) << "Initializing weight for key " << key;
if ((weights_.count(key) == 0) || (is_embedding_[key] && weights_.count(key) != 0)) {
weights_[key] = weight;
tokens_[key] = 0;
is_embedding_[key] = false;
}
}
@ -357,7 +426,7 @@ void ParameterServer<T>::InitGrad(const Key &key, const GradPtr &grad) {
template <typename T>
void ParameterServer<T>::InitEmbeddingTable(
const Key &key, const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes) {
// Init embedding lookup kernel
MS_LOG(INFO) << "Initializing embedding table for key " << key;
std::shared_ptr<PServerKernel> lookup = std::make_shared<kernel::ps::EmbeddingLookUpPSKernel>(rank_id_, pserver_num_);
lookup->InitKernel(shapes);
embedding_lookup_ops_[key] = lookup;
@ -377,15 +446,26 @@ void ParameterServer<T>::InitEmbeddingTable(
embedding_data[i] = random(engine);
}
weights_[key] = embedding;
tokens_[key] = 0;
is_embedding_[key] = true;
grads_accum_counter_[key] = 0;
}
template <typename T>
void ParameterServer<T>::Finalize() {
running_ = false;
apply_grads_cv_.notify_one();
}
template <typename T>
void ParameterServer<T>::UpdateWeights() {
while (true) {
std::unique_lock<std::mutex> lock(mutex_);
apply_grads_cv_.wait(lock, [this] { return this->ReadyForUpdateWeights(); });
apply_grads_cv_.wait(lock, [this] { return this->ReadyForUpdateWeights() || !running_; });
if (!running_) {
break;
}
for (auto iter = weights_.begin(); iter != weights_.end(); iter++) {
Key key = iter->first;
@ -408,17 +488,17 @@ void ParameterServer<T>::UpdateWeights() {
optim_info->ComputeMean(worker_num_);
optimizer->Execute(inputs, workspaces, outputs);
optim_info->Reset();
if (!is_embedding_[key]) {
tokens_[key] = worker_num_;
}
}
ResetGradAccumCount();
accum_grads_cv_.notify_all();
}
}
template <typename T>
void ParameterServer<T>::AccumGrad(const Keys &keys, const Values &values, const Lengths &lengths) {
std::unique_lock<std::mutex> lock(mutex_);
accum_grads_cv_.wait(lock, [this] { return this->ReadyForAccumGrads(); });
const Key &key = keys[0];
std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
@ -451,14 +531,13 @@ void ParameterServer<T>::AccumGrad(const Keys &keys, const Values &values, const
template <typename T>
WeightPtr ParameterServer<T>::weight(const Key &key) {
std::unique_lock<std::mutex> lock(mutex_);
if (weights_.count(key) == 0) {
MS_LOG(ERROR) << "Invalid weight key " << key;
return nullptr;
MS_LOG(EXCEPTION) << "Invalid weight key " << key;
}
WeightPtr weight_ptr = weights_[key];
WeightPtr copy_weight_ptr = std::make_shared<::ps::SArray<T>>(weight_ptr->size(), 0);
copy_weight_ptr->CopyFrom(weight_ptr->data(), weight_ptr->size());
tokens_[key] -= 1;
return copy_weight_ptr;
}
@ -529,8 +608,22 @@ inline bool ParameterServer<T>::ReadyForUpdateWeights() {
}
template <typename T>
inline bool ParameterServer<T>::ReadyForAccumGrads() {
return grad_accum_count_ < weights_.size();
inline bool ParameterServer<T>::ReadyForPush(const Key &key) {
std::unique_lock<std::mutex> lock(mutex_);
if (weights_.empty()) {
MS_LOG(EXCEPTION) << "The weights in server is empty. Many reasons could cause this: 1.The Worker didn't send "
"kInitWeightsCmd command. 2.The Server failed to initialize weights.";
}
return grad_accum_count_ < weights_.size() && tokens_[key] <= 0;
}
template <typename T>
inline bool ParameterServer<T>::ReadyForPull(const Key &key) {
std::unique_lock<std::mutex> lock(mutex_);
if (tokens_.count(key) == 0 || weights_[key] == 0) {
MS_LOG(EXCEPTION) << "Invalid weight key " << key;
}
return tokens_[key] > 0;
}
template <typename T>
@ -541,6 +634,11 @@ inline void ParameterServer<T>::ResetGradAccumCount() {
}
}
template <typename T>
inline std::mutex &ParameterServer<T>::mutex() {
return mutex_;
}
template <typename T>
void ParameterServer<T>::Run(const FuncGraphPtr &func_graph) {
::ps::Start(0);
@ -550,6 +648,8 @@ void ParameterServer<T>::Run(const FuncGraphPtr &func_graph) {
}
Init(func_graph);
thread_->join();
::ps::Finalize(0, true);
exit(1);
}
} // namespace ps
} // namespace parallel

View File

@ -23,9 +23,8 @@ namespace parallel {
namespace ps {
void Scheduler::Run() {
::ps::Start(0);
while (true) {
sleep(1);
}
::ps::Finalize(0, true);
exit(1);
}
} // namespace ps
} // namespace parallel

View File

@ -33,6 +33,13 @@ std::unordered_map<int, std::string> Util::id_to_optimizers{
{1, kSparseAdam},
{2, kSparseFtrl},
};
std::unordered_map<int, std::string> Util::id_to_optimizer_nodes{
{0, kApplyMomentumOp},
{1, kSparseAdamOp},
{2, kSparseFtrlOp},
};
bool Util::IsParamServerMode() { return IsRoleOfWorker() || IsRoleOfPServer() || IsRoleOfScheduler(); }
bool Util::IsRoleOfWorker() {
@ -112,6 +119,13 @@ std::string Util::optimizer_name(int id) {
return "";
}
std::string Util::optimizer_node_name(int id) {
if (id_to_optimizer_nodes.count(id) > 0) {
return id_to_optimizer_nodes[id];
}
return "";
}
bool Util::is_optimizer(std::string name) { return optimizer_to_ids.count(name) > 0; }
int Util::LocalShard(int first_dim, int rank_id, int server_num) {

View File

@ -34,12 +34,14 @@ class Util {
static void SetInternalEnvVar();
static int optimizer_id(std::string name);
static std::string optimizer_name(int id);
static std::string optimizer_node_name(int id);
static bool is_optimizer(std::string name);
static int LocalShard(int first_dim, int rank_id, int server_num);
private:
static std::unordered_map<std::string, int> optimizer_to_ids;
static std::unordered_map<int, std::string> id_to_optimizers;
static std::unordered_map<int, std::string> id_to_optimizer_nodes;
};
} // namespace ps
} // namespace parallel

View File

@ -24,6 +24,7 @@
#include <map>
#include "ps/ps.h"
#include "utils/log_adapter.h"
#include "ir/tensor.h"
#include "frontend/parallel/ps/util.h"
#include "frontend/parallel/ps/common.h"
#include "frontend/parallel/ps/worker_proxy.h"
@ -43,18 +44,20 @@ class Worker {
void Push(const std::vector<size_t> &keys, std::vector<uintptr_t> addrs, const std::vector<int> &sizes);
void Pull(const size_t key, void *dev_addr, const size_t size);
size_t SetParamKey(const std::string &param_name);
void SetParamInitInServer(const std::string &param_name, bool init_in_server);
bool GetParamInitInServer(const std::string &param_name);
void SetKeyOptimId(size_t key, const std::string &optimizer_name);
void SetOptimInputShapes(size_t key, const std::vector<int> &shape);
void AddEmbeddingTable(const ::ps::Key &key, const size_t &row_count);
void InitPSEmbeddingTable(const std::vector<size_t> &keys, std::vector<size_t> shapes, const std::vector<int> &sizes);
void InitPSParamAndOptim(const std::string &param_name, void *param_data, size_t param_size);
void InitPSParamAndOptim(const std::string &param_name, tensor::TensorPtr tensor);
void DoPSEmbeddingLookup(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<int> &lookup_ids,
const ::ps::SArray<int> &lens, ::ps::SArray<T> *lookup_result, int cmd);
void Finalize();
private:
Worker() : kv_worker_(nullptr), running_(false), key_cnt_(0) {}
~Worker() { ::ps::Finalize(0, true); }
~Worker() = default;
Worker(const Worker &) = delete;
Worker &operator=(const Worker &) = delete;
@ -73,6 +76,7 @@ class Worker {
std::map<size_t, bool> init_keys_;
std::map<size_t, int> key_to_optimId_;
std::map<size_t, std::vector<std::vector<int>>> key_to_optim_shapes_;
std::map<std::string, bool> param_to_init_in_server_;
};
template <typename T>
@ -81,7 +85,6 @@ void Worker<T>::Run() {
MS_LOG(INFO) << "'Worker is already running.";
return;
}
::ps::Start(0);
if (!::ps::IsWorker()) {
MS_LOG(EXCEPTION) << "The role is not worker.";
@ -99,18 +102,30 @@ void Worker<T>::Push(const std::vector<size_t> &keys, std::vector<uintptr_t> add
::ps::SArray<T> total_buffer(total_size, 0);
size_t offset = 0;
for (size_t i = 0; i < sizes.size(); i++) {
memcpy_s(total_buffer.data() + offset / sizeof(T), sizes[i] * sizeof(T), reinterpret_cast<void *>(addrs[i]),
sizes[i] * sizeof(T));
auto ret = memcpy_s(total_buffer.data() + offset / sizeof(T), sizes[i] * sizeof(T),
reinterpret_cast<void *>(addrs[i]), sizes[i] * sizeof(T));
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
}
offset += sizes[i] * sizeof(T);
}
while (!kv_worker_->IsReadyForPush(keys[0])) {
continue;
}
kv_worker_->PushData(::ps::SArray<::ps::Key>(keys), total_buffer, ::ps::SArray<int>(sizes));
}
template <typename T>
void Worker<T>::Pull(const size_t key, void *dev_addr, const size_t size) {
::ps::SArray<T> variables(size / sizeof(T), 0);
while (!kv_worker_->IsReadyForPull(key)) {
continue;
}
kv_worker_->Wait(kv_worker_->ZPull({key}, &variables));
memcpy_s(dev_addr, size, variables.data(), size);
auto ret = memcpy_s(dev_addr, size, variables.data(), size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
}
}
template <typename T>
@ -121,7 +136,11 @@ void Worker<T>::DoPSEmbeddingLookup(const ::ps::SArray<::ps::Key> &keys, const :
template <typename T>
void Worker<T>::Finalize() {
kv_worker_->Finalize();
if (running_) {
kv_worker_->Finalize();
kv_worker_.reset();
running_ = false;
}
}
template <typename T>
@ -192,6 +211,20 @@ size_t Worker<T>::SetParamKey(const std::string &param_name) {
return key;
}
template <typename T>
void Worker<T>::SetParamInitInServer(const std::string &param_name, bool init_in_server) {
MS_LOG(INFO) << "Set parameter " << param_name << " init_in_server:" << init_in_server;
param_to_init_in_server_[param_name] = init_in_server;
}
template <typename T>
bool Worker<T>::GetParamInitInServer(const std::string &param_name) {
if (param_to_init_in_server_.count(param_name) == 0) {
return false;
}
return param_to_init_in_server_[param_name];
}
template <typename T>
size_t Worker<T>::GetParamKey(const std::string &param_name) {
size_t key = kInvalidKey;
@ -237,17 +270,27 @@ void Worker<T>::InitPSEmbeddingTable(const std::vector<size_t> &keys, std::vecto
template <typename T>
// Initialize parameters and optimizer kernels of Parameter Server.
void Worker<T>::InitPSParamAndOptim(const std::string &param_name, void *param_data, size_t param_size) {
void Worker<T>::InitPSParamAndOptim(const std::string &param_name, tensor::TensorPtr tensor) {
void *param_data = tensor->data_c();
size_t param_size = LongToSize(tensor->data().nbytes());
std::vector<int> param_shape = tensor->shape_c();
size_t param_key = GetParamKey(param_name);
if (param_key == kInvalidKey) {
MS_LOG(INFO) << "Parameter " << param_name << " has no key assigned.";
return;
}
bool init_in_server = false;
std::vector<int> shape_init_in_server = {1};
if (param_shape == shape_init_in_server) {
init_in_server = true;
}
SetParamInitInServer(param_name, init_in_server);
bool init = IsKeyInit(param_key);
if (!init) {
MS_LOG(INFO) << "Init paramter and optimizer in parameter server side for " << param_name;
// No need to push embedding table data to Parameter Server.
if (param_name.find("embedding_table") == std::string::npos && param_name.find("wide_w") == std::string::npos) {
MS_LOG(INFO) << "Init paramter and optimizer in parameter server side for " << param_name
<< ", whether init in server: " << init_in_server;
if (!init_in_server) {
InitPSParamData({param_key}, param_data, param_size);
}
InitPSOptimId(param_key);

View File

@ -56,6 +56,8 @@ class WorkerProxy : public ::ps::KVWorker<T> {
int priority = 0);
int InitEmbeddingTable(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals,
const ::ps::SArray<int> &lens = {}, const Callback &cb = nullptr, int priority = 0);
bool IsReadyForPush(const Key &key);
bool IsReadyForPull(const Key &key);
void PushData(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals, const ::ps::SArray<int> &lens = {},
int cmd = 0, int priority = 0);
void Finalize();
@ -134,6 +136,28 @@ int WorkerProxy<T>::InitEmbeddingTable(const ::ps::SArray<::ps::Key> &keys, cons
return ts;
}
template <typename T>
bool WorkerProxy<T>::IsReadyForPush(const Key &key) {
::ps::SArray<T> result(1, 0);
this->Wait(this->ZPull({key}, &result, nullptr, kCheckReadyForPushCmd));
if (result[0] > 0) {
return true;
} else {
return false;
}
}
template <typename T>
bool WorkerProxy<T>::IsReadyForPull(const Key &key) {
::ps::SArray<T> result(1, 0);
this->Wait(this->ZPull({key}, &result, nullptr, kCheckReadyForPullCmd));
if (result[0] > 0) {
return true;
} else {
return false;
}
}
template <typename T>
void WorkerProxy<T>::PushData(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals,
const ::ps::SArray<int> &lens, int cmd, int priority) {
@ -155,7 +179,7 @@ void WorkerProxy<T>::Finalize() {
kvs.vals.push_back(0.0f);
Send(obj_, ts, true, false, kFinalizeCmd, kvs, broadcast_slicer_);
obj_->WaitRequest(ts);
::ps::Finalize(0, false);
::ps::Finalize(0, true);
}
template <typename T>

View File

@ -47,7 +47,9 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-rpath,$ORIGIN:$ORIGIN/lib")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=default")
ms_build_flatbuffers("engine/cache/de_tensor.fbs" ${CMAKE_CURRENT_SOURCE_DIR} generated_engine_files ${CMAKE_BINARY_DIR})
include_directories("${CMAKE_BINARY_DIR}/minddata/dataset/engine/cache")
set(MD_FLATBUFFER_OU "${CMAKE_BINARY_DIR}/minddata/dataset/engine/cache")
ms_build_flatbuffers("engine/cache/de_tensor.fbs" ${CMAKE_CURRENT_SOURCE_DIR} generated_engine_files ${MD_FLATBUFFER_OU})
################## Include sub-modules ###############################
add_subdirectory(util)

View File

@ -410,6 +410,7 @@ Status DEPipeline::SaveDataset(const std::vector<std::string> &file_names, const
std::vector<std::string> index_fields;
s = FetchMetaFromTensorRow(column_name_id_map, row, &mr_json, &index_fields);
RETURN_IF_NOT_OK(s);
MS_LOG(DEBUG) << "Schema of saved mindrecord: " << mr_json.dump();
if (mindrecord::SUCCESS !=
mindrecord::ShardHeader::initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) {
RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardHeader.");
@ -569,6 +570,7 @@ Status DEPipeline::FetchMetaFromTensorRow(const std::unordered_map<std::string,
if (column_name_id_map.empty()) {
RETURN_STATUS_UNEXPECTED("Error: column not found.");
}
json dataset_schema;
for (auto &col : column_name_id_map) {
auto idx = col.second;
auto column_name = col.first;
@ -580,6 +582,7 @@ Status DEPipeline::FetchMetaFromTensorRow(const std::unordered_map<std::string,
auto shapes = column_shape.AsVector();
std::vector<int> mr_shape(shapes.begin(), shapes.end());
std::string el = column_type.ToString();
dataset_schema[column_name] = el;
if (mindrecord::kTypesMap.find(el) == mindrecord::kTypesMap.end()) {
std::string err_msg("Error: can not support data type: " + el);
RETURN_STATUS_UNEXPECTED(err_msg);
@ -605,6 +608,7 @@ Status DEPipeline::FetchMetaFromTensorRow(const std::unordered_map<std::string,
if (mr_type == "bytes" || !mr_shape.empty()) continue;
index_fields->emplace_back(column_name); // candidate of index fields
}
MS_LOG(DEBUG) << "Schema of dataset: " << dataset_schema.dump();
return Status::OK();
}
Status DEPipeline::BuildMindrecordSamplerChain(const py::handle &handle,

View File

@ -268,6 +268,10 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool();
(*ptr)->data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
int64_t byte_size = (*ptr)->SizeInBytes();
if (byte_size == 0) {
return Status::OK();
}
RETURN_IF_NOT_OK((*ptr)->AllocateBuffer(byte_size));
unsigned char *data = static_cast<unsigned char *>(arr.request().ptr);

View File

@ -23,9 +23,9 @@
#include <utility>
#include <vector>
#include "./de_tensor_generated.h"
#include "minddata/dataset/engine/data_buffer.h"
#include "minddata/dataset/engine/cache/cache_server.h"
#include "minddata/dataset/engine/cache/de_tensor_generated.h"
#include "minddata/dataset/util/lock.h"
namespace mindspore {

View File

@ -23,8 +23,8 @@
#include <utility>
#include <vector>
#include "./de_tensor_generated.h"
#include "minddata/dataset/core/tensor_row.h"
#include "minddata/dataset/engine/cache/de_tensor_generated.h"
#include "minddata/dataset/util/slice.h"
#include "minddata/dataset/util/wait_post.h"

View File

@ -25,10 +25,10 @@
#include <utility>
#include <vector>
#include "./de_tensor_generated.h"
#include "minddata/dataset/core/global_context.h"
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/engine/cache/cache_request.h"
#include "minddata/dataset/engine/cache/de_tensor_generated.h"
#include "minddata/dataset/util/arena.h"
#include "minddata/dataset/util/btree.h"
#include "minddata/dataset/util/cache_pool.h"
@ -84,6 +84,7 @@ class CacheService : public Service {
public:
using state_type = std::underlying_type<State>::type;
ServiceStat() : min_(0), max_(0), state_(0) {}
~ServiceStat() = default;
CachePool::CacheStat stat_{};
row_id_type min_;
row_id_type max_;

View File

@ -388,6 +388,13 @@ uint32_t DatasetOp::GenerateCRC(const std::shared_ptr<DatasetOp> &op) {
op->tree_->Print(ss, op);
std::string ss_str = ss.str();
// Filter out the Num workers field when generating the check sum
ss_str = std::regex_replace(ss_str, std::regex("Num workers.*\n"), "");
ss_str = std::regex_replace(ss_str, std::regex("\\[workers.*\\]"), "");
// Filter out Number of rows when generating the check sum
ss_str = std::regex_replace(ss_str, std::regex("Number of rows.*\n"), "");
// Filter out the Operator control flags field when generating the check sum
ss_str = std::regex_replace(ss_str, std::regex("Operator control flags.*\n"), "");
@ -400,6 +407,8 @@ uint32_t DatasetOp::GenerateCRC(const std::shared_ptr<DatasetOp> &op) {
ss_str = std::regex_replace(ss_str, std::regex("Cache crc.*\n"), "");
ss_str = std::regex_replace(ss_str, std::regex("Server cache id.*\n"), "");
MS_LOG(DEBUG) << "Printing the tree for generating crc:\n" << ss_str;
uint32_t cache_crc = system::Crc32c::GetMaskCrc32cValue(ss_str.c_str(), ss_str.length());
return cache_crc;
}

View File

@ -212,12 +212,12 @@ Status DeviceQueueOp::SendDataToGPU() {
RETURN_IF_NOT_OK(RetryPushGPUData(data_size, curr_row, handle));
total_batch++;
}
if (!TaskManager::FindMe()->Interrupted())
if (!TaskManager::FindMe()->Interrupted() && !GpuBufferMgr::GetInstance().IsClosed())
RETURN_IF_NOT_OK(GetNextInput(&current_buffer));
else
is_break_loop = true;
}
if (!TaskManager::FindMe()->Interrupted())
if (!TaskManager::FindMe()->Interrupted() && !GpuBufferMgr::GetInstance().IsClosed())
RETURN_IF_NOT_OK(GetNextInput(&current_buffer));
else
is_break_loop = true;

View File

@ -758,6 +758,11 @@ Status CsvOp::ComputeColMap() {
} else {
MS_LOG(WARNING) << "Column name map is already set!";
}
if (column_default_list_.size() < column_name_id_map_.size()) {
for (int32_t i = column_default_list_.size(); i < column_name_id_map_.size(); i++) {
column_default_list_.push_back(std::make_shared<CsvOp::Record<std::string>>(CsvOp::STRING, ""));
}
}
return Status::OK();
}
} // namespace dataset

View File

@ -679,9 +679,10 @@ Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor
}
cv::Mat result;
cv::merge(image_result, result);
result.convertTo(result, input_cv->mat().type());
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(result);
if (input_cv->Rank() == 2) output_cv->Squeeze();
(*output) = std::static_pointer_cast<Tensor>(output_cv);
(*output)->Reshape(input->shape());
} catch (const cv::Exception &e) {
RETURN_STATUS_UNEXPECTED("Error in auto contrast");
}
@ -781,8 +782,8 @@ Status Equalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o
cv::Mat result;
cv::merge(image_result, result);
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(result);
if (input_cv->Rank() == 2) output_cv->Squeeze();
(*output) = std::static_pointer_cast<Tensor>(output_cv);
(*output)->Reshape(input->shape());
} catch (const cv::Exception &e) {
RETURN_STATUS_UNEXPECTED("Error in equalize.");
}

View File

@ -27,17 +27,34 @@ namespace dataset {
SentencePieceTokenizerOp::SentencePieceTokenizerOp(const std::shared_ptr<SentencePieceVocab> vocab,
const SPieceTokenizerLoadType load_type,
const SPieceTokenizerOutType out_type)
: vocab_(vocab), load_type_(load_type), out_type_(out_type) {}
: vocab_(vocab), load_type_(load_type), out_type_(out_type) {
auto status = processor_.LoadFromSerializedProto(vocab_.get()->model_proto());
if (!status.ok()) {
model_status_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "parser vocab model filed.");
} else {
model_status_ = Status::OK();
}
}
SentencePieceTokenizerOp::SentencePieceTokenizerOp(const std::string &model_path, const std::string &model_filename,
const SPieceTokenizerLoadType load_type,
const SPieceTokenizerOutType out_type)
: load_type_(load_type), out_type_(out_type) {
(void)GetModelRealPath(model_path, model_filename);
auto status = processor_.Load(file_path_);
if (!status.ok()) {
model_status_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "load vocab model filed.");
} else {
model_status_ = Status::OK();
}
}
Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
if (!model_status_.IsOk()) {
return model_status_;
}
if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor");
}
@ -45,18 +62,6 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
std::string_view sentence_v;
RETURN_IF_NOT_OK(input->GetItemAt(&sentence_v, {}));
std::string sentence{sentence_v};
if (load_type_ == SPieceTokenizerLoadType::kFile) {
auto status = processor_.Load(file_path_);
if (!status.ok()) {
RETURN_STATUS_UNEXPECTED("load sentence piece model failed.");
}
} else {
RETURN_UNEXPECTED_IF_NULL(vocab_);
auto status = processor_.LoadFromSerializedProto(vocab_.get()->model_proto());
if (!status.ok()) {
RETURN_STATUS_UNEXPECTED("sentence piece load model failed.");
}
}
if (out_type_ == SPieceTokenizerOutType::kString) {
std::vector<std::string> pieces;

View File

@ -58,6 +58,7 @@ class SentencePieceTokenizerOp : public TensorOp {
std::string file_path_;
SPieceTokenizerLoadType load_type_;
sentencepiece::SentencePieceProcessor processor_;
Status model_status_;
};
} // namespace dataset
} // namespace mindspore

View File

@ -296,7 +296,13 @@ Status TaskGroup::CreateAsyncTask(const std::string &my_name, const std::functio
return Status::OK();
}
void TaskGroup::interrupt_all() noexcept { intrp_svc_->InterruptAll(); }
void TaskGroup::interrupt_all() noexcept {
// There is a racing condition if we don't stop the interrupt service at this point. New resource
// may come in and not being picked up after we call InterruptAll(). So stop new comers and then
// interrupt any existing resources.
(void)intrp_svc_->ServiceStop();
intrp_svc_->InterruptAll();
}
Status TaskGroup::join_all(Task::WaitFlag wf) {
Status rc;
@ -312,7 +318,6 @@ Status TaskGroup::join_all(Task::WaitFlag wf) {
}
Status TaskGroup::DoServiceStop() {
intrp_svc_->ServiceStop();
interrupt_all();
return (join_all(Task::WaitFlag::kNonBlocking));
}

View File

@ -133,6 +133,7 @@ void BindGlobalParams(py::module *m) {
(*m).attr("MAX_PAGE_SIZE") = kMaxPageSize;
(*m).attr("MIN_SHARD_COUNT") = kMinShardCount;
(*m).attr("MAX_SHARD_COUNT") = kMaxShardCount;
(*m).attr("MAX_FILE_COUNT") = kMaxFileCount;
(*m).attr("MIN_CONSUMER_COUNT") = kMinConsumerCount;
(void)(*m).def("get_max_thread_num", &GetMaxThreadNum);
}

View File

@ -104,7 +104,8 @@ const uint64_t kInt64Len = 8;
const uint64_t kMinFileSize = kInt64Len;
const int kMinShardCount = 1;
const int kMaxShardCount = 1000;
const int kMaxShardCount = 1000; // write
const int kMaxFileCount = 4096; // read
const int kMinConsumerCount = 1;
const int kMaxConsumerCount = 128;

View File

@ -152,7 +152,7 @@ class ShardHeader {
MSRStatus CheckIndexField(const std::string &field, const json &schema);
void ParsePage(const json &page, int shard_index, bool load_dataset);
MSRStatus ParsePage(const json &page, int shard_index, bool load_dataset);
MSRStatus ParseStatistics(const json &statistics);

View File

@ -252,7 +252,7 @@ std::vector<std::tuple<int, int, int, uint64_t>> ShardReader::ReadRowGroupSummar
if (shard_count <= 0) {
return row_group_summary;
}
if (shard_count <= kMaxShardCount) {
if (shard_count <= kMaxFileCount) {
for (int shard_id = 0; shard_id < shard_count; ++shard_id) {
// return -1 when page's size equals to 0.
auto last_page_id = shard_header_->GetLastPageId(shard_id);
@ -1054,7 +1054,7 @@ MSRStatus ShardReader::CreateTasksByRow(const std::vector<std::tuple<int, int, i
}
auto offsets = std::get<1>(ret);
auto local_columns = std::get<2>(ret);
if (shard_count_ <= kMaxShardCount) {
if (shard_count_ <= kMaxFileCount) {
for (int shard_id = 0; shard_id < shard_count_; shard_id++) {
for (uint32_t i = 0; i < offsets[shard_id].size(); i += 1) {
tasks_.InsertTask(TaskType::kCommonTask, offsets[shard_id][i][0], offsets[shard_id][i][1],

View File

@ -83,7 +83,7 @@ MSRStatus ShardWriter::OpenDataFiles(bool append) {
// if not append and mindrecord file exist, return FAILED
fs->open(common::SafeCStr(file), std::ios::in | std::ios::binary);
if (fs->good()) {
MS_LOG(ERROR) << "MindRecord file already existed.";
MS_LOG(ERROR) << "MindRecord file already existed, please delete file: " << common::SafeCStr(file);
fs->close();
return FAILED;
}

View File

@ -55,7 +55,9 @@ MSRStatus ShardHeader::InitializeHeader(const std::vector<json> &headers, bool l
header_size_ = header["header_size"].get<uint64_t>();
page_size_ = header["page_size"].get<uint64_t>();
}
ParsePage(header["page"], shard_index, load_dataset);
if (SUCCESS != ParsePage(header["page"], shard_index, load_dataset)) {
return FAILED;
}
shard_index++;
}
return SUCCESS;
@ -248,11 +250,16 @@ MSRStatus ShardHeader::ParseIndexFields(const json &index_fields) {
return SUCCESS;
}
void ShardHeader::ParsePage(const json &pages, int shard_index, bool load_dataset) {
MSRStatus ShardHeader::ParsePage(const json &pages, int shard_index, bool load_dataset) {
// set shard_index when load_dataset is false
if (pages_.empty() && shard_count_ <= kMaxShardCount) {
if (shard_count_ > kMaxFileCount) {
MS_LOG(ERROR) << "The number of mindrecord files is greater than max value: " << kMaxFileCount;
return FAILED;
}
if (pages_.empty() && shard_count_ <= kMaxFileCount) {
pages_.resize(shard_count_);
}
for (auto &page : pages) {
int page_id = page["page_id"];
int shard_id = page["shard_id"];
@ -275,6 +282,7 @@ void ShardHeader::ParsePage(const json &pages, int shard_index, bool load_datase
pages_[shard_index].push_back(std::move(parsed_page));
}
}
return SUCCESS;
}
MSRStatus ShardHeader::ParseStatistics(const json &statistics) {
@ -715,7 +723,9 @@ MSRStatus ShardHeader::FileToPages(const std::string dump_file_name) {
std::string line;
while (std::getline(page_in_handle, line)) {
ParsePage(json::parse(line), -1, true);
if (SUCCESS != ParsePage(json::parse(line), -1, true)) {
return FAILED;
}
}
page_in_handle.close();

View File

@ -17,6 +17,8 @@
*/
#include "pipeline/jit/parse/parse.h"
#include <utility>
#include <string>
#include <memory>
#include <sstream>
@ -1480,21 +1482,25 @@ AnfNodePtr FindPhis(const std::unordered_map<ParameterPtr, AnfNodePtr> &removabl
void Parser::RemoveUnnecessaryPhis() {
// merge all removable phis to one map;
std::unordered_map<ParameterPtr, AnfNodePtr> removable_phis;
std::vector<ParameterPtr> phis;
for (FunctionBlockPtr &block : func_block_list_) {
MS_EXCEPTION_IF_NULL(block);
removable_phis.insert(block->removable_phis().begin(), block->removable_phis().end());
std::transform(block->removable_phis().begin(), block->removable_phis().end(), std::back_inserter(phis),
[](std::pair<ParameterPtr, AnfNodePtr> pair) { return pair.first; });
}
if (removable_phis.size() == 0) {
return;
}
auto fg_name = func_graph_->ToString();
auto mng = Manage(func_graph_, false);
// replace the nodes
for (auto iter : removable_phis) {
auto new_node = FindPhis(removable_phis, iter.first);
MS_LOG(DEBUG) << "phi " << iter.first->DebugString() << " to " << new_node->DebugString();
mng->Replace(iter.first, new_node);
// remove from inside to outside
for (int idx = SizeToInt(phis.size() - 1); idx >= 0; idx--) {
auto phi = phis[IntToSize(idx)];
auto new_node = FindPhis(removable_phis, phi);
MS_LOG(DEBUG) << "phi " << phi->DebugString() << " to " << new_node->DebugString();
mng->Replace(phi, new_node);
}
// remove the parameter
for (FunctionBlockPtr &block : func_block_list_) {

View File

@ -45,6 +45,7 @@
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
#include "frontend/parallel/ps/common.h"
#include "frontend/parallel/ps/util.h"
#include "frontend/parallel/ps/worker.h"
#endif
#if (ENABLE_GE || ENABLE_D)
@ -261,6 +262,7 @@ void ExecutorPy::DelNetRes(const std::string &id) {
for (auto &item : tmp_info) {
if (item.first.find(id) != string::npos) {
MS_LOG(DEBUG) << "Delete network res:" << item.first;
item.second = nullptr;
(void)info_.erase(item.first);
flag = true;
}
@ -949,7 +951,13 @@ void ClearResAtexit() {
pynative::ClearPyNativeSession();
session::ClearPythonParasMap();
device::KernelRuntimeManager::Instance().ClearRuntimeResource();
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
if (mindspore::parallel::ps::Util::IsParamServerMode()) {
if (parallel::ps::Util::IsRoleOfWorker()) {
parallel::ps::Worker<float>::GetInstance().Finalize();
}
}
#endif
ad::g_k_prims.clear();
abstract::ClearPrimEvaluatorMap();

View File

@ -150,7 +150,8 @@ PrimitiveEvalImplMap &GetPrimitiveToEvalImplMap() {
using mindspore::parse::PyObjectWrapper;
EvalResultPtr StandardPrimEvaluator::EvalPrim(const AnalysisEnginePtr &engine, const AbstractBasePtrList &args) {
if (prim_ != prim::kPrimMakeTuple && prim_ != prim::kPrimSwitch) {
if (prim_ != prim::kPrimMakeTuple && prim_ != prim::kPrimSwitch && prim_ != prim::kPrimEnvSetItem &&
prim_ != prim::kPrimEnvGetItem) {
auto ret_abstract = AbstractEval(args);
if (ret_abstract != nullptr) {
MS_LOG(DEBUG) << "StandardPrimEvaluator eval Undetermined";
@ -386,6 +387,16 @@ py::dict ConvertAbstractToPython(const AbstractBasePtr &abs_base) {
dic["shape"] = arg_tensor->shape()->shape();
dic["dtype"] = arg_tensor->BuildType();
dic["value"] = BuildValue(arg_tensor->BuildValue());
} else if (abs_base->isa<AbstractIndexedSlices>()) {
auto arg = dyn_cast<AbstractIndexedSlices>(abs_base);
dic["shape"] = arg->shape()->shape();
dic["dtype"] = arg->BuildType();
dic["value"] = BuildValue(arg->BuildValue());
} else if (abs_base->isa<AbstractSparseTensor>()) {
auto arg = dyn_cast<AbstractSparseTensor>(abs_base);
dic["shape"] = arg->shape()->shape();
dic["dtype"] = arg->BuildType();
dic["value"] = BuildValue(arg->BuildValue());
} else if (abs_base->isa<AbstractScalar>() || abs_base->isa<AbstractType>() || abs_base->isa<AbstractRefKey>()) {
std::vector<int> shape;
dic["shape"] = shape;

View File

@ -59,7 +59,7 @@ struct OpExecInfo {
using OpExecInfoPtr = std::shared_ptr<OpExecInfo>;
OpExecInfoPtr GenerateOpExecInfo(const py::args &args, py::list *const out_args);
const std::set<std::string> ignore_infer_prim = {"make_ref"};
const std::set<std::string> ignore_infer_prim = {"make_ref", "mixed_precision_cast"};
} // namespace pynative
} // namespace mindspore

View File

@ -57,7 +57,7 @@ using mindspore::tensor::TensorPy;
const char SINGLE_OP_GRAPH[] = "single_op_graph";
// primitive unable to infer value for constant input in PyNative mode
const std::set<std::string> vm_operators = {"make_ref", "HookBackward", "stop_gradient"};
const std::set<std::string> vm_operators = {"make_ref", "HookBackward", "stop_gradient", "mixed_precision_cast"};
namespace mindspore {
namespace pynative {
@ -690,12 +690,15 @@ py::tuple RunOpInner(const OpExecInfoPtr &op_exec_info, const py::args &args) {
return err_ret;
}
auto node = PynativeExecutor::GetInstance()->MakeCNode(op_exec_info, args, result);
if (node != nullptr) {
node->set_abstract(op_exec_info->abstract);
MS_LOG(DEBUG) << "RunOp MakeCnode,new node is: " << node->DebugString();
if (op_exec_info->op_name != prim::kPrimMixedPrecisionCast->name()) {
auto node = PynativeExecutor::GetInstance()->MakeCNode(op_exec_info, args, result);
if (node != nullptr) {
node->set_abstract(op_exec_info->abstract);
MS_LOG(DEBUG) << "RunOp MakeCnode,new node is: " << node->DebugString();
}
MS_LOG(DEBUG) << "RunOp end";
}
MS_LOG(DEBUG) << "RunOp end";
return result;
}
@ -766,6 +769,9 @@ PynativeExecutor::PynativeExecutor() { grad_flag_ = false; }
void PynativeExecutor::NewGraphInner(const py::object &cell, const py::args &args) {
auto cell_id = GetId(cell);
if (cell_graph_map_.count(cell_id) != 0) {
if (cell_resource_map_.find(cell_id) != cell_resource_map_.end()) {
resource_ = cell_resource_map_[cell_id];
}
MS_LOG(DEBUG) << "Newgraph already compiled";
return;
}
@ -774,6 +780,8 @@ void PynativeExecutor::NewGraphInner(const py::object &cell, const py::args &arg
if (top_g_ == nullptr) {
top_g_ = curr_g_ = g;
resource_ = std::make_shared<pipeline::Resource>();
cell_resource_map_[cell_id] = resource_;
df_builder_ = std::make_shared<FuncGraph>();
MS_LOG(DEBUG) << "First new graph" << top_g_.get();
Pushp();
@ -910,8 +918,8 @@ void PynativeExecutor::EndGraphInner(const py::object &cell, const py::object &o
cnode->set_inputs(args);
set_obj_node_map(curr_g_, out_id, cnode);
} else {
MS_LOG(ERROR) << "Graph has no this out: " << out_id;
return;
MS_LOG(DEBUG) << "Set ValueNode as output for graph, out id: " << out_id;
MakeValueNode(out, out_id);
}
}
EndGraphByOutId(out_id, cell, out, args);
@ -1075,6 +1083,7 @@ void PynativeExecutor::Clear(const std::string &flag) {
MS_LOG(INFO) << "Clear res";
(void)graph_map_.erase(flag);
(void)cell_graph_map_.erase(flag);
(void)cell_resource_map_.erase(flag);
Clean();
// Maybe exit in the pynative runing op, so need reset pynative flag.
auto ms_context = MsContext::GetInstance();
@ -1086,6 +1095,7 @@ void PynativeExecutor::Clear(const std::string &flag) {
MS_LOG(INFO) << "Clear";
top_g_ = nullptr;
df_builder_ = nullptr;
curr_g_ = nullptr;
graph_info_map_.clear();
std::stack<FuncGraphPtr>().swap(graph_p_);
@ -1095,7 +1105,6 @@ void PynativeExecutor::Clean() {
MS_LOG(INFO) << "Clean all res";
Clear();
grad_flag_ = false;
df_builder_ = nullptr;
ad::CleanRes();
pipeline::ReclaimOptimizer();
}

View File

@ -115,6 +115,7 @@ class PynativeExecutor : public std::enable_shared_from_this<PynativeExecutor> {
bool grad_flag_;
std::unordered_map<std::string, FuncGraphPtr> graph_map_;
std::unordered_map<std::string, FuncGraphPtr> cell_graph_map_;
std::unordered_map<std::string, ResourcePtr> cell_resource_map_;
std::unordered_map<FuncGraphPtr, GraphInfo> graph_info_map_;
std::stack<FuncGraphPtr> graph_p_;
FuncGraphPtr top_g_;

View File

@ -484,7 +484,8 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormat(const std::vector<int
std::vector<size_t> device_shape = GetDeviceShape(&host_shape);
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
if (ms_context->execution_mode() == kPynativeMode && type_id_name_map.find(type_id_) != type_id_name_map.end()) {
if (ms_context->execution_mode() != kPynativeMode && ms_context->execution_mode() != kGraphMode &&
type_id_name_map.find(type_id_) != type_id_name_map.end()) {
std::pair<std::string, std::string> type_format = std::make_pair(type_id_name_map.at(type_id_), format_);
if (use_trans_data.find(type_format) != use_trans_data.end()) {
sync_ok = SyncDeviceToHostAndConvertFormatBasedOnTransData(host_shape, device_shape, size, type, host_ptr);

View File

@ -672,10 +672,8 @@ void AscendStreamAssign::InsertEventForIndependentParallel(const NotNull<KernelG
void AscendStreamAssign::GetNeedActiveStreams(const NotNull<KernelGraphPtr> &graph_ptr) {
CNodePtr cur_cnode_ptr = nullptr;
auto cnode_ptr_list = graph_ptr->execution_order();
// 1)first stream 0 should be actived first;
need_first_active_streams_.emplace_back(0);
// 2)stream witch kStreamNeedActivedFirst attr should be actived;
// 1)stream witch kStreamNeedActivedFirst attr should be actived;
for (size_t i = 0; i < cnode_ptr_list.size(); ++i) {
cur_cnode_ptr = cnode_ptr_list[i];
MS_EXCEPTION_IF_NULL(cur_cnode_ptr);
@ -691,19 +689,25 @@ void AscendStreamAssign::GetNeedActiveStreams(const NotNull<KernelGraphPtr> &gra
}
}
// 3)independent stream:if has not been activate, push to need active vector
// 2)independent stream:if has not been activate, push to need active vector
if (!independent_stream_activated_) {
for (auto &item : independent_stream_map_) {
need_first_active_streams_.emplace_back(item.first);
}
}
// 4)hcom stream:if has not been activate, push to need active vector
// 3)hcom stream:if has not been activate, push to need active vector
if (!hcom_stream_activated_) {
for (auto &item : hcom_stream_map_) {
need_first_active_streams_.emplace_back(item.first);
}
}
// 4)first stream 0 should be actived first;
auto it = std::find(need_first_active_streams_.begin(), need_first_active_streams_.end(), 0);
if (it == need_first_active_streams_.end()) {
need_first_active_streams_.emplace_back(0);
}
}
// section8
@ -958,7 +962,7 @@ void AscendStreamAssign::DFS(uint32_t start, std::vector<uint32_t> *group) {
if (!IsVecExist(group)) {
stream_groups_.emplace_back(*group);
} else {
MS_LOG(WARNING) << "DFS should not print this log";
MS_LOG(WARNING) << "DFS find same stream group, Not expected";
}
return;
}

View File

@ -492,6 +492,10 @@ void SetTensorDeviceInfo(const kernel::KernelBuildInfo &selected_kernel_info, co
AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) != kTypeUnknown) {
continue;
}
if (AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) != kTypeUnknown &&
AnfAlgo::OutputAddrExist(real_input_node, 0)) {
continue;
}
if (AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown || is_ref) {
std::vector<std::string> output_format = {selected_kernel_info.GetInputFormat(input_index)};
builder->SetOutputsFormat(output_format);

View File

@ -52,6 +52,11 @@ bool CPUDeviceAddress::SyncDeviceToHost(const std::vector<int> & /*shape*/, size
bool CPUDeviceAddress::SyncHostToDevice(const std::vector<int> & /*shape*/, size_t size, TypeId type,
const void *host_ptr) const {
if (host_ptr == ptr_) {
MS_LOG(DEBUG) << "host_ptr is equal to ptr_, request ignored.";
return true;
}
if (type == kNumberTypeFloat16) {
HalfToFloat(ptr_, host_ptr, size / 2);
} else if (type == kNumberTypeFloat64) {

View File

@ -40,8 +40,7 @@ void CPUKernelRuntime::AssignKernelAddress(session::KernelGraph *kernel_graph) {
AssignValueNodeAddress(kernel_graph);
AssignInputNodeAddress(kernel_graph);
AssignKernelOutputAddress(kernel_graph);
resource_manager_.MemPlan(kernel_graph);
resource_manager_.MemMalloc(kernel_graph);
resource_manager_.AssignMemory(kernel_graph);
}
void CPUKernelRuntime::AssignValueNodeAddress(session::KernelGraph *kernel_graph) {
@ -186,11 +185,15 @@ BaseRef CPUKernelRuntime::CreatTensorForOutput(const session::KernelWithIndex &k
return ret;
}
return CreatTensorForOutput(node, index, bound_addresses, need_sync_outputs);
} else if (input_node->isa<Parameter>() || input_node->isa<ValueNode>()) {
} else if (input_node->isa<Parameter>()) {
auto iter = input_map.find(input_node.get());
if (iter != input_map.end()) {
return iter->second;
}
} else if (input_node->isa<ValueNode>()) {
auto value_node = input_node->cast<ValueNodePtr>();
MS_EXCEPTION_IF_NULL(value_node);
return value_node->value();
}
return BaseRef();
}
@ -220,7 +223,8 @@ void CPUKernelRuntime::BindInputOutput(const session::KernelGraph *kernel_graph,
(void)tensor->data_sync();
}
if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) {
if (tensor->data_type() == address->type_id_ || tensor->data_type() == kNumberTypeFloat32 ||
tensor->data_type() == kNumberTypeInt32) {
address->ptr_ = tensor->data_c();
} else {
std::vector<int> data_shape = tensor->shape();

View File

@ -34,11 +34,13 @@ void CPUResourceManager::MemFree() {
dynamic_mem_.clear();
}
void CPUResourceManager::MemPlan(const session::KernelGraph *graph) {
mem_plan_.MemPlan(graph);
size_t graph_mem_size = mem_plan_.GetGraphMemSize(graph);
void CPUResourceManager::AssignMemory(const session::KernelGraph *graph) {
size_t graph_mem_size = mem_plan_.MemPlan(graph);
if (graph_mem_size > mem_size_) {
MemFree();
if (mem_size_ > 0) {
dynamic_mem_[mem_ptr_] = mem_size_;
mem_size_ = 0;
}
mem_ptr_ = reinterpret_cast<uint8_t *>(malloc(graph_mem_size));
if (mem_ptr_ != nullptr) {
mem_size_ = graph_mem_size;
@ -48,9 +50,6 @@ void CPUResourceManager::MemPlan(const session::KernelGraph *graph) {
dynamic_malloc_ = true;
}
}
}
void CPUResourceManager::MemMalloc(const session::KernelGraph *graph) {
if (dynamic_malloc_) {
return;
}

View File

@ -17,7 +17,7 @@
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_CPU_CPU_RESOURCE_MANAGER_H_
#include <vector>
#include <unordered_map>
#include <map>
#include "backend/session/kernel_graph.h"
#include "backend/session/session_basic.h"
#include "runtime/device/device_address.h"
@ -30,8 +30,7 @@ class CPUResourceManager {
CPUResourceManager() = default;
~CPUResourceManager();
void MemPlan(const session::KernelGraph *graph);
void MemMalloc(const session::KernelGraph *graph);
void AssignMemory(const session::KernelGraph *graph);
void IncreaseAddressRefCount(const session::KernelGraph *graph);
void DecreaseAddressRefCount(const AnfNodePtr &kernel);
void *MemMalloc(size_t mem_size);
@ -46,7 +45,7 @@ class CPUResourceManager {
size_t mem_size_{0};
uint8_t *mem_ptr_{nullptr};
bool dynamic_malloc_{false};
std::unordered_map<void *, size_t> dynamic_mem_;
std::map<void *, size_t> dynamic_mem_;
};
} // namespace cpu
} // namespace device

View File

@ -19,9 +19,9 @@
namespace mindspore {
namespace device {
namespace cpu {
void CPUSimpleMemPlan::MemPlan(const session::KernelGraph *graph) {
size_t CPUSimpleMemPlan::MemPlan(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
size_t total_mem_size = 0;
size_t total_mem_size = 32;
auto kernels = graph->execution_order();
for (const auto &kernel : kernels) {
MS_EXCEPTION_IF_NULL(kernel);
@ -58,15 +58,8 @@ void CPUSimpleMemPlan::MemPlan(const session::KernelGraph *graph) {
}
}
}
graph_mem_size_[graph] = total_mem_size;
}
size_t CPUSimpleMemPlan::GetGraphMemSize(const session::KernelGraph *graph) const {
auto iter = graph_mem_size_.find(graph);
if (iter != graph_mem_size_.end()) {
return iter->second;
}
return 0;
return total_mem_size;
}
void CPUSimpleMemPlan::MemAssign(const session::KernelGraph *graph, uint8_t *base_ptr) {

View File

@ -17,7 +17,6 @@
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_CPU_CPU_SIMPLE_MEM_PLAN_H_
#include <vector>
#include <unordered_map>
#include "backend/session/kernel_graph.h"
#include "runtime/device/device_address.h"
@ -29,12 +28,8 @@ class CPUSimpleMemPlan {
CPUSimpleMemPlan() = default;
~CPUSimpleMemPlan() = default;
void MemPlan(const session::KernelGraph *graph);
size_t MemPlan(const session::KernelGraph *graph);
void MemAssign(const session::KernelGraph *graph, uint8_t *base_ptr);
size_t GetGraphMemSize(const session::KernelGraph *graph) const;
private:
std::unordered_map<const session::KernelGraph *, size_t> graph_mem_size_;
};
} // namespace cpu
} // namespace device

View File

@ -355,7 +355,6 @@ void KernelRuntime::AssignStaticMemoryOutput(session::KernelGraph *graph) {
if (!item_with_index.first->isa<CNode>() || !AnfAlgo::IsRealKernel(item_with_index.first)) {
continue;
}
graph->AddFinalOutputKernel(item_with_index.first);
if (AnfAlgo::IsCommunicationOp(item_with_index.first)) {
AssignCommunicationNodeMem(kStaticMem, item_with_index.first);
} else {

View File

@ -309,12 +309,7 @@ INPUT_MAP(SoftmaxCrossEntropyWithLogits) = {{1, INPUT_DESC(features)}, {2, INPUT
ATTR_MAP(SoftmaxCrossEntropyWithLogits) = EMPTY_ATTR_MAP;
OUTPUT_MAP(SoftmaxCrossEntropyWithLogits) = {{0, OUTPUT_DESC(loss)}, {1, OUTPUT_DESC(backprop)}};
// MeanGrad
INPUT_MAP(MeanGrad) = {{1, INPUT_DESC(x)}};
INPUT_ATTR_MAP(MeanGrad) = {{2, ATTR_DESC(mean_grad_output_shape_value, kOpFormat_NHWC,
AnyTraits<std::vector<int64_t>>(), AnyTraits<int64_t>())}};
ATTR_MAP(MeanGrad) = {{"mode", ATTR_DESC(mode, AnyTraits<int64_t>())}};
// SliceD
INPUT_MAP(SliceD) = {{1, INPUT_DESC(x)}};
INPUT_ATTR_MAP(SliceD) = {{2, ATTR_DESC(offsets, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
{3, ATTR_DESC(size, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())}};
@ -431,11 +426,6 @@ INPUT_MAP(TopK) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(k)}};
ATTR_MAP(TopK) = {{"sorted", ATTR_DESC(sorted, AnyTraits<bool>())}};
OUTPUT_MAP(TopK) = {{0, OUTPUT_DESC(values)}, {1, OUTPUT_DESC(indices)}};
// Multiply
INPUT_MAP(Multiply) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(y)}};
ATTR_MAP(Multiply) = EMPTY_ATTR_MAP;
OUTPUT_MAP(Multiply) = {{0, OUTPUT_DESC(z)}};
// TileD
INPUT_MAP(TileD) = {{1, INPUT_DESC(x)}};
INPUT_ATTR_MAP(TileD) = {{2, ATTR_DESC(multiples, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())}};

Some files were not shown because too many files have changed in this diff Show More