forked from mindspore-Ecosystem/mindspore
Compare commits
397 Commits
Author | SHA1 | Date |
---|---|---|
mindspore-ci-bot | 2139c7ddc6 | |
mindspore-ci-bot | c4e3fd70ee | |
yanghaoran | 28372fbb96 | |
shenwei41 | 81c2068635 | |
mindspore-ci-bot | 4a8f92e4be | |
Payne | c53cd6bb22 | |
mindspore-ci-bot | 297e65e162 | |
wuweikang | 1729a27bdb | |
mindspore-ci-bot | 38da39b0be | |
mindspore-ci-bot | 56e22a5c82 | |
wuweikang | b551623c86 | |
mayang | 5529d41769 | |
mindspore-ci-bot | ab997f9e37 | |
mindspore-ci-bot | 5027130939 | |
wuweikang | 30450e2177 | |
Payne | 93c4d2929c | |
mindspore-ci-bot | e39775edfd | |
mindspore-ci-bot | e0953c47c3 | |
mindspore-ci-bot | 8e59d7449e | |
yeyunpeng | 41fd02baf3 | |
gongdaguo | 02ba93492d | |
WilliamLian | 683e4aba74 | |
mindspore-ci-bot | d373efcf99 | |
Payne | 4e2ccefe29 | |
mindspore-ci-bot | 6b4c5227ee | |
mindspore-ci-bot | 738ade501d | |
mindspore-ci-bot | 527d36597c | |
jonyguo | 7132be9ed2 | |
caifubi | 1c97d65f40 | |
mindspore-ci-bot | 0ce4e3418c | |
wuweikang | b83aa68071 | |
jonyguo | 86f5189323 | |
mindspore-ci-bot | fc4bf19294 | |
jjfeing | bd1261e5e6 | |
mindspore-ci-bot | 5de9578abb | |
cjh9368 | 16f41b1619 | |
mindspore-ci-bot | 7abe5a231f | |
jonyguo | 27807a21ea | |
mindspore-ci-bot | 40bf2493fa | |
mindspore-ci-bot | a585177825 | |
wuxuejian | 06a6af7628 | |
mindspore-ci-bot | 9833a00e93 | |
mindspore-ci-bot | 13fcfcdb78 | |
CaoJian | 96fad3b0a4 | |
mindspore-ci-bot | a96646fc5a | |
mindspore-ci-bot | 26aba3b74c | |
mindspore-ci-bot | 5120720bac | |
mindspore-ci-bot | 4382ce202c | |
mindspore-ci-bot | 29fabd1324 | |
mindspore-ci-bot | a43bd07c39 | |
mindspore-ci-bot | d332e4d2b7 | |
mindspore-ci-bot | 403cb700b2 | |
cristoval | b2ffdf82c6 | |
mindspore-ci-bot | f5cba099d0 | |
mindspore-ci-bot | 9d10d934c7 | |
lyvette | 62e7746e77 | |
chenfei | 298393b66b | |
meng_chunyang | 26bf044dde | |
cjh9368 | fb973778e5 | |
yoonlee666 | d472ee3921 | |
yangyongjie | e0a128142b | |
yeyunpeng | 794aeb8e2c | |
zhanyuan | 6051e3cfec | |
mindspore-ci-bot | 03ff5f334a | |
yangruoqi713 | dbe5cd9091 | |
zhouyaqiang | 02fe2f9f1d | |
mindspore-ci-bot | 82dba19adf | |
meng_chunyang | e256877be9 | |
mindspore-ci-bot | 433eaab225 | |
hangq | 18d3c686dc | |
mindspore-ci-bot | 7ebecd8d09 | |
mindspore-ci-bot | 6b062f2454 | |
mindspore-ci-bot | 6dd79e2b78 | |
mindspore-ci-bot | 9ac3e85045 | |
gengdongjie | f366faa275 | |
Jolin Zhang46 | 4b7c6208a3 | |
mindspore-ci-bot | e70d664119 | |
mindspore-ci-bot | 60f9577103 | |
mindspore-ci-bot | 92a1c55941 | |
liuwenhao4 | 22e37013b9 | |
mindspore-ci-bot | fa97e24938 | |
mindspore-ci-bot | 7714669469 | |
mindspore-ci-bot | 3f305b8afa | |
mindspore-ci-bot | 9fcf1e37b6 | |
zhangbiao31 | 7a30cd8784 | |
wangzhe | c223bb3949 | |
mindspore-ci-bot | 1621e02b68 | |
wukesong | 2fb88e8c46 | |
jonyguo | 958ca51437 | |
zhouyaqiang | 61e8f10209 | |
sunsuodong | 23e68ce790 | |
CaoJian | 422a836cde | |
huangxinjing | 330f18de5f | |
mindspore-ci-bot | 0662256f23 | |
mindspore-ci-bot | 3a6749ab78 | |
wsc | b7a22e1398 | |
chengxianbin | 6c055b96d1 | |
mindspore-ci-bot | a9943a382c | |
limingqi107 | 0775db0940 | |
wanghua | 6674a88de4 | |
mindspore-ci-bot | 13dd07c34e | |
mindspore-ci-bot | 0178ecf785 | |
gengdongjie | b15a48a53b | |
wilfChen | 50324d3dda | |
yao_yf | 0c175b2cc0 | |
mindspore-ci-bot | 947c396f09 | |
VectorSL | 48d669636a | |
mindspore-ci-bot | 7d38a1fb7e | |
mindspore-ci-bot | 565b542886 | |
yuchaojie | 32787a3453 | |
xuanyue | 0ce8708dee | |
mindspore-ci-bot | b5393e6628 | |
chengxianbin | 3da41e1091 | |
qujianwei | e0ac982589 | |
mindspore-ci-bot | 52ed1ea725 | |
mindspore-ci-bot | 2ff8de674c | |
mindspore-ci-bot | 13978a2c98 | |
jonyguo | b0f847072c | |
ZPaC | 1ac075919a | |
mindspore-ci-bot | 6c68e70cfe | |
mindspore-ci-bot | 915d9a0d8f | |
mindspore-ci-bot | 03b9cf3c12 | |
mindspore-ci-bot | 321e8d3e8f | |
luoyang | 4f98ecb433 | |
mindspore-ci-bot | d24af4b181 | |
chenfei | 1eab0cd71b | |
zhouyaqiang | c317c4643b | |
mindspore-ci-bot | 0c7c3c3e8d | |
looop5 | b0ce67fdd6 | |
mindspore-ci-bot | 23fc178a5a | |
hanhuifeng2020 | 1f0a760cdb | |
panfengfeng | 3b8562aa3d | |
yuchaojie | ed1340f1e5 | |
mindspore-ci-bot | 98528bbc16 | |
mindspore-ci-bot | 86cfa89c97 | |
mindspore-ci-bot | beae07a9d1 | |
mindspore-ci-bot | 4f9c6e77b7 | |
ZPaC | ac27f82607 | |
Xiao Tianci | ed53f7f821 | |
mahdi | a5228c75c7 | |
mindspore-ci-bot | 8533744d7c | |
mindspore-ci-bot | f3444977f3 | |
mindspore-ci-bot | 581788f040 | |
mindspore-ci-bot | 3d511f80d5 | |
mindspore-ci-bot | cb88a43a24 | |
chujinjin | 8948e55ae5 | |
qujianwei | 3adc06024c | |
chengxianbin | f3d87dd13e | |
VectorSL | c381430e71 | |
mindspore-ci-bot | e5780288e9 | |
mindspore-ci-bot | d670bcb003 | |
simson | b00cdb2fe6 | |
limingqi107 | 3516447749 | |
李嘉琪 | 0e9815f63c | |
mindspore-ci-bot | 2f189543b9 | |
mindspore-ci-bot | c0a184ae8f | |
mindspore-ci-bot | ebfca60cdf | |
mindspore-ci-bot | f01613508f | |
chenfei | c545422384 | |
VectorSL | db3f387f14 | |
yuximiao | 6005091a09 | |
Wei Luning | 051b019c96 | |
mindspore-ci-bot | ee6ab2980d | |
mindspore-ci-bot | f9609e4ca7 | |
mindspore-ci-bot | 3fdf9b72df | |
mindspore-ci-bot | d142a8d944 | |
xuyongfei | 78f88cde1b | |
sunsuodong | 6385eafacf | |
leilei_snow | 08c1d4bf44 | |
mindspore-ci-bot | 5f3581aa69 | |
mindspore-ci-bot | e40fd0b4ed | |
mindspore-ci-bot | 620f5856ef | |
mindspore-ci-bot | e3b0ae75ae | |
mindspore-ci-bot | a47ce883e9 | |
mindspore-ci-bot | 3d2f761505 | |
mindspore-ci-bot | 647053ed4d | |
mindspore-ci-bot | 1c093f7db7 | |
mindspore-ci-bot | ddf1b25f9b | |
lvliang | 6f84bc57f4 | |
Xun Deng | 8ab9903125 | |
peixu_ren | f8dde0c3c2 | |
peixu_ren | 85e67f193c | |
mindspore-ci-bot | be2a98e7c8 | |
mindspore-ci-bot | d506630185 | |
xiefangqi | 6d0ae1794f | |
avakh | 6300058cb6 | |
mindspore-ci-bot | 71dd8a4a71 | |
mindspore-ci-bot | 57e131a136 | |
mindspore-ci-bot | 81d67f6828 | |
simson | 7c406fb3a0 | |
mindspore-ci-bot | c904bc2f00 | |
mindspore-ci-bot | adeeda2fe1 | |
yangyongjie | fcc5f77a77 | |
mindspore-ci-bot | bed0f5d3c3 | |
He Wei | eca64ab8e9 | |
mindspore-ci-bot | 84989b0cbf | |
jonyguo | f45e2c921f | |
zhangxinfeng3 | 758269d049 | |
heleiwang | 814fc1d03e | |
mindspore-ci-bot | b9dff22d6c | |
mindspore-ci-bot | 54e615e904 | |
yujianfeng | 499d81e573 | |
cristoval | 817bfed1ec | |
hanhuifeng2020 | a533147f52 | |
BowenK | fcc0e263f3 | |
mindspore-ci-bot | 46de719a12 | |
chujinjin | 7b6dd0b84d | |
lizhenyu | 8438221259 | |
李嘉琪 | 2f5a454ef4 | |
qianlong | 6b6409d910 | |
VectorSL | ed9c63469b | |
mindspore-ci-bot | 049acf6d58 | |
mindspore-ci-bot | 55d997ec11 | |
mindspore-ci-bot | ee7d9bf4ac | |
mindspore-ci-bot | 6b706529c5 | |
VectorSL | 7884176df7 | |
mindspore-ci-bot | bedfa8578b | |
peixu_ren | d522d7ba46 | |
peixu_ren | e701fbfa2e | |
mindspore-ci-bot | 7090e16df8 | |
mindspore-ci-bot | 382f9a8ebb | |
mindspore-ci-bot | fd8ad73689 | |
mindspore-ci-bot | 7bdb90a40b | |
mindspore-ci-bot | 185b25c6de | |
chengxianbin | 115a85114f | |
mindspore-ci-bot | 0bf7f1f39c | |
qujianwei | d2588f1935 | |
mindspore-ci-bot | 6017521219 | |
xuyongfei | c0389eaea4 | |
Xun Deng | 6bb61615fd | |
qianlong | bc8aec007f | |
jonyguo | d262c63214 | |
mindspore-ci-bot | 9e20e17590 | |
mindspore-ci-bot | 07103b98a9 | |
peixu_ren | 13584ebc22 | |
peixu_ren | 5ce4bcf416 | |
mindspore-ci-bot | 92787df680 | |
mindspore-ci-bot | 8afbba7936 | |
anzhengqi | 3c4feaa4f4 | |
simson | 556f79d185 | |
mindspore-ci-bot | 1a42811748 | |
mindspore-ci-bot | cee889e426 | |
jjfeing | 3af2059342 | |
mindspore-ci-bot | a0a6463210 | |
lizhenyu | 1d8e5a27b7 | |
mindspore-ci-bot | 0bcd75bd2b | |
zhaoting | 576c35d408 | |
mindspore-ci-bot | e05a5c8002 | |
bingyaweng | 5b941df087 | |
mindspore-ci-bot | b77c7d2729 | |
mindspore-ci-bot | ac0b72e67f | |
gukecai | 4ace444f54 | |
yangyongjie | 32e7eb9148 | |
mindspore-ci-bot | c481d45996 | |
wanghua | cb893dfca9 | |
mindspore-ci-bot | 6b55dc802d | |
zhouyaqiang | 41de14bacd | |
mindspore-ci-bot | ee37dc52fa | |
luoyang | a75ac9c445 | |
askmiao | 1804ea246d | |
mindspore-ci-bot | 56350e71f0 | |
mindspore-ci-bot | 761cd8393e | |
simson | 90c004078e | |
mindspore-ci-bot | e9f2aae73d | |
He Wei | a6690168a8 | |
hexia | 4eb1706307 | |
Yi Huaijie | 524cf0ed9a | |
mindspore-ci-bot | d8d7cebc8a | |
linqingke | 9f5a7939d4 | |
caozhou | 83557cd28d | |
Zichun Ye | d29bd6862a | |
mindspore-ci-bot | 1b71d50953 | |
lizhenyu | 68b37ee1b3 | |
李嘉琪 | 0238ba4d14 | |
mindspore-ci-bot | 891228bcbe | |
CaoJian | 1f9c3bb044 | |
caozhou | 8287445f95 | |
mindspore-ci-bot | 10c6fb5612 | |
mindspore-ci-bot | fae225460c | |
VectorSL | 54bb6ba58c | |
wuweikang | cec6206bc4 | |
mindspore-ci-bot | e69a91b6b5 | |
zhangxinfeng3 | 2869e5ace4 | |
mindspore-ci-bot | 09cf1c1a54 | |
Zichun Ye | 04b5b8c737 | |
mindspore-ci-bot | 8d0b52fb13 | |
hexia | 2027f73eaf | |
mindspore-ci-bot | befc209480 | |
mindspore-ci-bot | c5279ecf35 | |
mindspore-ci-bot | d2de60f1d5 | |
mindspore-ci-bot | dc13718ce2 | |
mindspore-ci-bot | 7dbe9f7067 | |
fuzhiye | 2c9daf0f14 | |
sunsuodong | bcd97d9751 | |
mindspore-ci-bot | 7b8229d644 | |
mindspore-ci-bot | e3c053c4ff | |
yangruoqi713 | b4551670a9 | |
sunsuodong | 9734f2a88e | |
zhengjun10 | 204ab11572 | |
mindspore-ci-bot | 80d570f003 | |
yeyunpeng | 6b46acb39e | |
mindspore-ci-bot | dde257592b | |
hangq | 28e3508718 | |
mindspore-ci-bot | 07a75658bf | |
ling | 0fac817a2d | |
chenjianping | babff262e3 | |
wenchunjiang | a221ee176b | |
mindspore-ci-bot | 6d0bbb36a3 | |
mindspore-ci-bot | 8219df7337 | |
wsc | 758130924f | |
mindspore-ci-bot | 33c7b49219 | |
mindspore-ci-bot | 0f362bb158 | |
mindspore-ci-bot | faa0fed8bb | |
mindspore-ci-bot | 150b987898 | |
mindspore-ci-bot | 50877b586d | |
mindspore-ci-bot | 75fce54208 | |
chenjianping | d88a98658c | |
mindspore-ci-bot | add52da73e | |
mindspore-ci-bot | d2641bbf79 | |
zhaodezan | 8339d5dae6 | |
mindspore-ci-bot | 94a109f476 | |
mindspore-ci-bot | 31a04ea1fe | |
jin-xiulang | c246b177a6 | |
kai00 | a9771d63e1 | |
mindspore-ci-bot | aedd6de6d5 | |
lizhenyu | 1becddf3a4 | |
jianghui58 | 1d601b6924 | |
fary86 | 04524b6bd3 | |
mindspore-ci-bot | 7098b5c5d5 | |
mindspore-ci-bot | d3733b3b04 | |
mindspore-ci-bot | 90552c4933 | |
mindspore-ci-bot | 8e360888d0 | |
mindspore-ci-bot | aefca7b782 | |
mindspore-ci-bot | 1556450445 | |
kpy | 4fa89408a1 | |
zhaozhenlong | 488c991eba | |
mindspore-ci-bot | e2203bed01 | |
mindspore-ci-bot | b52229379d | |
mindspore-ci-bot | d184066b77 | |
mindspore-ci-bot | b23b957228 | |
mindspore-ci-bot | 38c366306c | |
zhaodezan | fed8f406ac | |
zhaoting | a5c16fc4ac | |
mindspore-ci-bot | 0c60f7e6ac | |
mindspore-ci-bot | 6a5c517d7b | |
mindspore-ci-bot | 387f4d445f | |
peixu_ren | 03dac9b621 | |
mindspore-ci-bot | 2b78032605 | |
mindspore-ci-bot | 42a092d687 | |
mindspore-ci-bot | fd9be2ddc2 | |
mindspore-ci-bot | 115c0cbf72 | |
ms_yan | 0752c566b1 | |
xutianchun | 1ae9f81c82 | |
mindspore-ci-bot | fe11760834 | |
Corleone | 4b60297832 | |
chenzomi | 8337ae710e | |
zhaizhiqiang | 9d98246dae | |
jianghui58 | defd9a784e | |
kai00 | 15dff1c4a4 | |
mindspore-ci-bot | 026bbc46ff | |
mindspore-ci-bot | ac81886328 | |
liuxiao93 | cfe8859499 | |
panfengfeng | 30b69d3488 | |
mindspore-ci-bot | 77198f3182 | |
ZPaC | 830172201a | |
mindspore-ci-bot | b366608a3f | |
lixian | 1ffb095037 | |
anzhengqi | dd942e1807 | |
mindspore-ci-bot | 9b503e4f38 | |
mindspore-ci-bot | 528fb81093 | |
yeyunpeng | f4eee11dfa | |
mindspore-ci-bot | e06dfaa80d | |
mindspore-ci-bot | 9f19076788 | |
mindspore-ci-bot | ab45bec828 | |
peixu_ren | 1c8eb9b15d | |
shenwei41 | 76518f7f13 | |
mindspore-ci-bot | 8ee136db18 | |
meng_chunyang | 631aa8cf46 | |
Xun Deng | dc11fa9f53 | |
Mahdi | a5f9b8f92e | |
Cathy Wong | 93810a0dc8 | |
mindspore-ci-bot | 04decda0c5 | |
Harshvardhan Gupta | ac457f3163 | |
mindspore-ci-bot | 9d7250c483 | |
Zirui Wu | 74c1e6da60 | |
mindspore-ci-bot | 3eef4a4e06 | |
chenjianping | 5a83d3a7df | |
Jesse Lee | ebd4cc5c0a | |
wukesong | 9784a58da2 | |
zhaodezan | ad883e4384 | |
BowenK | 7a7e499475 | |
xuanyue | bbedc02700 | |
bingyaweng | 3422f60d50 | |
cjh9368 | 70001a71ea | |
wilfChen | 837aecf9af | |
heleiwang | 4870abc848 | |
qujianwei | c21ffc0317 |
20
README.md
20
README.md
|
@ -66,10 +66,12 @@ MindSpore offers build options across multiple backends:
|
|||
| Hardware Platform | Operating System | Status |
|
||||
| :---------------- | :--------------- | :----- |
|
||||
| Ascend910 | Ubuntu-x86 | ✔️ |
|
||||
| | Ubuntu-aarch64 | ✔️ |
|
||||
| | EulerOS-x86 | ✔️ |
|
||||
| | EulerOS-aarch64 | ✔️ |
|
||||
| GPU CUDA 10.1 | Ubuntu-x86 | ✔️ |
|
||||
| CPU | Ubuntu-x86 | ✔️ |
|
||||
| | Ubuntu-aarch64 | ✔️ |
|
||||
| | Windows-x86 | ✔️ |
|
||||
|
||||
For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an example:
|
||||
|
@ -149,7 +151,23 @@ currently the containerized build options are supported as follows:
|
|||
sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit nvidia-docker2
|
||||
sudo systemctl restart docker
|
||||
```
|
||||
|
||||
Then edit the file daemon.json:
|
||||
```
|
||||
$ vim /etc/docker/daemon.json
|
||||
{
|
||||
"runtimes": {
|
||||
"nvidia": {
|
||||
"path": "nvidia-container-runtime",
|
||||
"runtimeArgs": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
Restart docker again:
|
||||
```
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl restart docker
|
||||
```
|
||||
Then you can pull and run the latest stable image using the below command:
|
||||
```
|
||||
docker pull mindspore/mindspore-gpu:0.7.0-beta
|
||||
|
|
26
README_CN.md
26
README_CN.md
|
@ -1,4 +1,4 @@
|
|||
![MindSpore标志](docs/MindSpore-logo.png "MindSpore logo")
|
||||
![MindSpore标志](docs/MindSpore-logo.png "MindSpore logo")
|
||||
============================================================
|
||||
|
||||
[View English](./README.md)
|
||||
|
@ -66,10 +66,12 @@ MindSpore提供跨多个后端的构建选项:
|
|||
| 硬件平台 | 操作系统 | 状态 |
|
||||
| :------------ | :-------------- | :--- |
|
||||
| Ascend 910 | Ubuntu-x86 | ✔️ |
|
||||
| | Ubuntu-aarch64 | ✔️ |
|
||||
| | EulerOS-x86 | ✔️ |
|
||||
| | EulerOS-aarch64 | ✔️ |
|
||||
| GPU CUDA 10.1 | Ubuntu-x86 | ✔️ |
|
||||
| CPU | Ubuntu-x86 | ✔️ |
|
||||
| | Ubuntu-aarch64 | ✔️ |
|
||||
| | Windows-x86 | ✔️ |
|
||||
|
||||
使用`pip`命令安装,以`CPU`和`Ubuntu-x86`build版本为例:
|
||||
|
@ -120,10 +122,10 @@ MindSpore的Docker镜像托管在[Docker Hub](https://hub.docker.com/r/mindspore
|
|||
| 硬件平台 | Docker镜像仓库 | 标签 | 说明 |
|
||||
| :----- | :------------------------ | :----------------------- | :--------------------------------------- |
|
||||
| CPU | `mindspore/mindspore-cpu` | `x.y.z` | 已经预安装MindSpore `x.y.z` CPU版本的生产环境。 |
|
||||
| | | `devel` | 提供开发环境从源头构建MindSpore(`CPU`后端)。安装详情请参考https://www.mindspore.cn/install。 |
|
||||
| | | `devel` | 提供开发环境从源头构建MindSpore(`CPU`后端)。安装详情请参考https://www.mindspore.cn/install 。 |
|
||||
| | | `runtime` | 提供运行时环境安装MindSpore二进制包(`CPU`后端)。 |
|
||||
| GPU | `mindspore/mindspore-gpu` | `x.y.z` | 已经预安装MindSpore `x.y.z` GPU版本的生产环境。 |
|
||||
| | | `devel` | 提供开发环境从源头构建MindSpore(`GPU CUDA10.1`后端)。安装详情请参考https://www.mindspore.cn/install。 |
|
||||
| | | `devel` | 提供开发环境从源头构建MindSpore(`GPU CUDA10.1`后端)。安装详情请参考https://www.mindspore.cn/install 。 |
|
||||
| | | `runtime` | 提供运行时环境安装MindSpore二进制包(`GPU CUDA10.1`后端)。 |
|
||||
| Ascend | <center>—</center> | <center>—</center> | 即将推出,敬请期待。 |
|
||||
|
||||
|
@ -148,7 +150,23 @@ MindSpore的Docker镜像托管在[Docker Hub](https://hub.docker.com/r/mindspore
|
|||
sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit nvidia-docker2
|
||||
sudo systemctl restart docker
|
||||
```
|
||||
|
||||
编辑文件 daemon.json:
|
||||
```
|
||||
$ vim /etc/docker/daemon.json
|
||||
{
|
||||
"runtimes": {
|
||||
"nvidia": {
|
||||
"path": "nvidia-container-runtime",
|
||||
"runtimeArgs": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
再次重启docker:
|
||||
```
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl restart docker
|
||||
```
|
||||
使用以下命令获取并运行最新的稳定镜像:
|
||||
```
|
||||
docker pull mindspore/mindspore-gpu:0.7.0-beta
|
||||
|
|
126
RELEASE.md
126
RELEASE.md
|
@ -1,3 +1,114 @@
|
|||
# Release 0.7.0-beta
|
||||
|
||||
## Major Features and Improvements
|
||||
### MindSpore Training and Inference Framework
|
||||
#### Ascend 910
|
||||
* New models
|
||||
* TinyBert: a smaller and faster version of BERT using transformer distillation for natural language understanding on GLUE benchmark.
|
||||
* SE-ResNet50: add Squeeze-and-Excitation blocks(SE-Blocks) to the resnet50 network to improve channel interdependencies for image classification on ImageNet 2012 dataset.
|
||||
* Inception V3: the third version of Inception convolutional architectures for image classification on ImageNet 2012 dataset.
|
||||
* Frontend and user interface
|
||||
* Embedding operator high-level packaging to support segmented by field for Wide&Deep.
|
||||
* Load multi-node checkpoint into single-process to support host-device hybrid inference.
|
||||
* Support Concat/Tile/Strideslice distributed operators.
|
||||
* Support cumulative gradient and batch training split.
|
||||
* Support variable parameter input for Cell object.
|
||||
* Parameter mixed calculation optimization for pynative mode.
|
||||
* Deep Probabilistic Programming
|
||||
* Support statistical distributions classes used to generate stochastic tensors.
|
||||
* Support probabilistic inference algorithms.
|
||||
* Support BNN layers used to construct BNN in Graph mode.
|
||||
* Support interfaces for the transformation between BNN and DNN in Graph mode.
|
||||
* Support uncertainty estimation to estimate epistemic uncertainty and aleatoric uncertainty.
|
||||
* User interfaces change log
|
||||
* change base class of parameter([!3473](https://gitee.com/mindspore/mindspore/pulls/3473))
|
||||
* change binary to mindir([!4258](https://gitee.com/mindspore/mindspore/pulls/4258))
|
||||
* change export from geir to air([!4269](https://gitee.com/mindspore/mindspore/pulls/4269))
|
||||
* Init parameter data by default([!3967](https://gitee.com/mindspore/mindspore/pulls/3967))
|
||||
* change IndexedSlices to RowTensor([!4031](https://gitee.com/mindspore/mindspore/pulls/4031))
|
||||
* Must set or change parallel mode before any Initializer created([!4801](https://gitee.com/mindspore/mindspore/pulls/4801))
|
||||
* Executor and performance optimization
|
||||
* MindSpore graph compilation process performance improved by 20%.
|
||||
* Decoupling C++ and Python modules to achieve separate compilation of core modules.
|
||||
* Data processing, augmentation, and save format
|
||||
* Support automatic data augmentation
|
||||
* Support GNN distributed cache in single node
|
||||
* Support ConcatDataset using distributed sampler
|
||||
|
||||
#### Other Hardware Support
|
||||
* GPU platform
|
||||
* New model supported: VGG16, ResNet101, DeepFM.
|
||||
* Support some distributed operators in ResNet50 and Wide&Deep.
|
||||
* Support automatic parallel for Wide&Deep.
|
||||
* Support function funcs[i](*inputs) (such as switch-case).
|
||||
* Support distributed training with parameter server.
|
||||
* Support GPU operator profiling.
|
||||
* Performance optimization of the distributed training with allreduce.
|
||||
* Performance optimization of the mixed precision training.
|
||||
* Performance optimization of the pynative mode.
|
||||
* Performance optimization of the convolution operator, batch normalization operator.
|
||||
* CPU platform
|
||||
* Support MobileNetV2 Re-Training: Re-train the network with different class number.
|
||||
|
||||
### MindSpore Lite
|
||||
* Converter
|
||||
* Support third-party models, including TFLite/Caffe/ONNX.
|
||||
* Add 93 TFLite op.
|
||||
* Add 24 Caffe op.
|
||||
* Add 62 ONNX op.
|
||||
* Add 11 optimized passes, include fusion/const fold.
|
||||
* Support aware-training and Post-training quantization.
|
||||
* CPU
|
||||
* Add 100+ops,support fp32, int8/uint8, FP16 ops
|
||||
* Support fast convolution algorithms: Sliding Window, Img2col + Gemm, Strassen, Winograd
|
||||
* Support assembly/neon instruction.
|
||||
* Support CPU fp16 and sdot on ARM v8.2+.
|
||||
* GPU
|
||||
* Add 20+ ops for OpenCL.
|
||||
* Support image2D/buffer format.
|
||||
* Optimize online initialization time.
|
||||
* add optimized convolution1X1/3X3/depthwise/convolution_transposed for OpenCL.
|
||||
* Tool & example
|
||||
* Add benchmark and TimeProfile tools.
|
||||
* Add image classification Android Demo.
|
||||
|
||||
## Bugfixes
|
||||
* Models
|
||||
* normalize the readme file([!5410](https://gitee.com/mindspore/mindspore/pulls/5410))
|
||||
* fix a sink_size bug for transformer([!5393](https://gitee.com/mindspore/mindspore/pulls/5393))
|
||||
* fix bool type optional for resnet50([!5363](https://gitee.com/mindspore/mindspore/pulls/5363))
|
||||
* Python API
|
||||
* improve interface '__bool__' for tensor([!4000](https://gitee.com/mindspore/mindspore/pulls/4000))
|
||||
* fix GPU-ResizeNearestNeighbor([!3760](https://gitee.com/mindspore/mindspore/pulls/3760))
|
||||
* fix topK multi dimention grad func([!3711](https://gitee.com/mindspore/mindspore/pulls/3711))
|
||||
* fix scatterop error msg([!3699](https://gitee.com/mindspore/mindspore/pulls/3699))
|
||||
* fix bug of cast dtype when using mix_presion in pynative mode([!3730](https://gitee.com/mindspore/mindspore/pulls/3730))
|
||||
* Executor
|
||||
* fix etsnet train error when UnsegmentSum's first input shape is (1,) ([!4573](https://gitee.com/mindspore/mindspore/pulls/4573))
|
||||
* fix bug of result error in while control flow because of unsupporting for value reference ([!4103](https://gitee.com/mindspore/mindspore/pulls/4103))
|
||||
* fix bug of the output tensor does not carry device data type ([!3774](https://gitee.com/mindspore/mindspore/pulls/3774))
|
||||
* fix bug of avoiding multi attr value are eliminated in pynative mode ([!4225](https://gitee.com/mindspore/mindspore/pulls/4225))
|
||||
* fix bug of AssignAdd unable to work normally in multi-cases ([!5171](https://gitee.com/mindspore/mindspore/pulls/5171))
|
||||
* GPU platform
|
||||
* improve the environment variable checking for nvcc compiler path ([!5140](https://gitee.com/mindspore/mindspore/pulls/5140))
|
||||
* fix bug of error in cast operator conversion from fp16 to fp32 ([!4147](https://gitee.com/mindspore/mindspore/pulls/4147))
|
||||
* fix bug of the array out of bound in case of make_tuple operator ([!5219](https://gitee.com/mindspore/mindspore/pulls/5219))
|
||||
* Data processing and Pro
|
||||
* fix GeneratorDataset time out([!3624](https://gitee.com/mindspore/mindspore/pulls/3624))
|
||||
* fix concat operator get_dataset_size error([!4701](https://gitee.com/mindspore/mindspore/pulls/4701))
|
||||
* fixing python validator for Repeat Op([!4366](https://gitee.com/mindspore/mindspore/pulls/4366))
|
||||
* Third party
|
||||
* Sqlite : Update sqlite to 3.32.2 to handle [CVE-2020-11656](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11656), [CVE-2020-13871](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13871), [CVE-2020-11655](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655), [CVE-2020-9327](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-9327), [CVE-2020-13630](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13630), [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15358), [CVE-2020-13631](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13631), [CVE-2020-13632](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13632), [CVE-2020-13434](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13434), [CVE-2020-13435](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13435), and [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655).
|
||||
* Libjpeg-turbo : Update libjpeg-turbo to 2.0.4 to handle [CVE-2020-13790](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13790).
|
||||
|
||||
## Contributors
|
||||
Thanks goes to these wonderful people:
|
||||
|
||||
Adel, Alexey, andy, andy_wangrui, anthonyaje, anzhengqi, askmiao, avakh, baihuawei, bingyaweng, BowenK, buxue, caifubi, CaoJian, caozhou, Cathy, changzherui, chenfei, chengxianbin, chenhaozhe, chenjianping, chentingting, chenzomi, chenzupeng, chujinjin, cjh9368, Corleone, cristoval, danish, dengyutao, eric, Eric, ervinzhang, etone-chan, fangzehua, fary86, fuzhiye, gengdongjie, genglishuai, Giancarlo, gongdaguo, gukecai, guohongzilong, GuoMengHao, hangq, hanhaocheng, hanhuifeng2020, hanjun996, Harshvardhan, He, heleiwang, hesham, hexia, Hoai, hongxing, huangdongrun, huanghui, huangxinjing, islam_amin, Jesse, jianghui58, jiangzhiwen, jin-xiulang, jinyaohui, jjfeing, John, Jonathan, jonyguo, kai00, kingfo, kpy, kswang, laiyongqiang, leilei_snow, leopz, Li, liangzelang, lianliguang, lichen_101010, lichenever, lihongkang, lilei, limingqi107, ling, lingyunli63, linqingke, lirongzhen1, liubuyu, liuwenhao4, liuxiao78, liuxiao93, liuzhongkai, Lixia, lixian, liyong, lizhenyu, looop5, luoyang, lvchangquan, lvliang, lvwenyuan, lyvette, mahdi, Mahdi, mamba_ni, maning202007, Margaret_wangrui, mayang, meixiaowei, meng_chunyang, ms_yan, nhussain, panbingao, panfengfeng, panyifeng, Payne, Peilin, peixu_ren, pengyongrong, Pengyongrong, qianlong, qujianwei, root, shenwei41, shibeiji, simson, songhonglei413, Su, sunsuodong, suteng, tao_yunhao, TFbunny, tinazhang, tom__chen, tony_liu2, tronzhang, VectorSL, wandongdong, wangdongxu, wanghua, wangmin, wangshaocong, wangzhe, wanyiming, Wei, wenchunjiang, wilfChen, WilliamLian, wsc, wukesong, wuweikang, wuxuejian, wuyongkang, xiefangqi, xuanyue, Xun, xutianchun, xuyongfei, yanghaitao, yangjie159, YangLuo, yangruoqi713, yangyongjie, yangzhenzhang, yankai, yao_yf, yelihua, yeyunpeng, Yi, yoni, yoonlee666, yuchaojie, yujianfeng, yuximiao, zhangxuetong, zhaizhiqiang, Zhang, zhangxinfeng3, zhangxuetong, zhangyihui, zhangz0911gm, zhanke, zhanyuan, zhaodezan, zhaoting, zhaozhenlong, zhengjun10, zhongligeng, zhoufeng, zhousiyi, zhouyaqiang, zhouyuanshen, Zichun, Zirui, zjun, zongha, ZPaC, lijiaqi, liangchenghui, wangminggui
|
||||
|
||||
Contributions of any kind are welcome!
|
||||
|
||||
|
||||
# Release 0.6.0-beta
|
||||
|
||||
## Major Features and Improvements
|
||||
|
@ -60,6 +171,9 @@
|
|||
* Data processing
|
||||
* Fix bug of RandomColor and RandomSharpness default parameter checking ([!2833](https://gitee.com/mindspore/mindspore/pulls/2833))
|
||||
* Fix process hung when training and eval ([!3469](https://gitee.com/mindspore/mindspore/pulls/3469))
|
||||
* Third party
|
||||
* Sqlite : Update sqlite to 3.32.2 to handle [CVE-2020-11656](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11656), [CVE-2020-13871](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13871), [CVE-2020-11655](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655), [CVE-2020-9327](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-9327), [CVE-2020-13630](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13630), [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15358), [CVE-2020-13631](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13631), [CVE-2020-13632](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13632), [CVE-2020-13434](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13434), [CVE-2020-13435](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13435), and [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655).
|
||||
* Libjpeg-turbo : Update libjpeg-turbo to 2.0.4 to handle [CVE-2020-13790](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13790).
|
||||
|
||||
## Contributors
|
||||
Thanks goes to these wonderful people:
|
||||
|
@ -133,6 +247,8 @@ Contributions of any kind are welcome!
|
|||
* Fix bug of Cifar dataset reading([!2096](https://gitee.com/mindspore/mindspore/pulls/2096))
|
||||
* Fix bug of C++ behavior in RandomCropAndResize([!2026](https://gitee.com/mindspore/mindspore/pulls/2026))
|
||||
* Fix the bug of mindrecord shuffle([!2420](https://gitee.com/mindspore/mindspore/pulls/2420))
|
||||
* Third party
|
||||
* Sqlite : Update sqlite to 3.32.2 to handle [CVE-2020-11656](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11656), [CVE-2020-13871](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13871), [CVE-2020-11655](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655), [CVE-2020-9327](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-9327), [CVE-2020-13630](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13630), [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15358), [CVE-2020-13631](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13631), [CVE-2020-13632](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13632), [CVE-2020-13434](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13434), [CVE-2020-13435](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13435), and [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655).
|
||||
|
||||
## Contributors
|
||||
Thanks goes to these wonderful people:
|
||||
|
@ -207,11 +323,11 @@ Contributions of any kind are welcome!
|
|||
* Executor
|
||||
* Fix dropout,topK and addn errors in PyNative mode ([!1285](https://gitee.com/mindspore/mindspore/pulls/1285), [!1138](https://gitee.com/mindspore/mindspore/pulls/1138), [!1033](https://gitee.com/mindspore/mindspore/pulls/1033)).
|
||||
* Fix memory leaks after execution in PyNatvie mode ([!1201](https://gitee.com/mindspore/mindspore/pulls/1201)).
|
||||
* Fix HCCL failure in some special scenes ([!1204](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1204), [!1252](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1252)).
|
||||
* Fix SSD network when Select failed, cann't find kernel info([!1449](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1449)).
|
||||
* Fix Topk operator selection strategy bug between aicore and aicpu([!1367](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1367)).
|
||||
* Fix input memory size of 'assign' op unequal in control sink mode when assigning a data from one child graph to another child graph([!802](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/802)).
|
||||
* Fix allreduce ir inconsistency([!989](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/989)).
|
||||
* Fix HCCL failure in some special scenes ([!1204](https://gitee.com/mindspore/mindspore/pulls/1204), [!1252](https://gitee.com/mindspore/mindspore/pulls/1252)).
|
||||
* Fix SSD network when Select failed, cann't find kernel info([!1449](https://gitee.com/mindspore/mindspore/pulls/1449)).
|
||||
* Fix Topk operator selection strategy bug between aicore and aicpu([!1367](https://gitee.com/mindspore/mindspore/pulls/1367)).
|
||||
* Fix input memory size of 'assign' op unequal in control sink mode when assigning a data from one child graph to another child graph([!802](https://gitee.com/mindspore/mindspore/pulls/802)).
|
||||
* Fix allreduce ir inconsistency([!989](https://gitee.com/mindspore/mindspore/pulls/989)).
|
||||
* GPU platform
|
||||
* Fix summary for gradient collection ([!1364](https://gitee.com/mindspore/mindspore/pulls/1364))
|
||||
* Fix the slice operator ([!1489](https://gitee.com/mindspore/mindspore/pulls/1489))
|
||||
|
|
|
@ -3693,6 +3693,72 @@ Copyright (c) 1999, Frank Warmerdam
|
|||
Copyright (c) 1991-1996 Sam Leffler
|
||||
Copyright (c) 1996 USAF Phillips Laboratory
|
||||
|
||||
Software: libevent 2.1.12
|
||||
Copyright notice:
|
||||
Copyright (C) 1998 - 2012, Daniel Stenberg, <daniel@haxx.se>, et al.
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
Copyright (c) 1996 - 2013, Daniel Stenberg, <daniel@haxx.se>.
|
||||
Copyright (C) 2012, iSEC Partners.
|
||||
Copyright (c) 1987, 1993, 1994, 1995
|
||||
Copyright (c) 1987, 1993, 1994, 1996
|
||||
Copyright 2002 Niels Provos <provos@citi.umich.edu>
|
||||
Copyright (c) 2007-2012 Niels Provos and Nick Mathewson
|
||||
Copyright (c) 2000-2007 Niels Provos <provos@citi.umich.edu>
|
||||
Copyright (c) 2007-2012 Niels Provos, Nick Mathewson
|
||||
Copyright (c) 2009-2012 Niels Provos and Nick Mathewson
|
||||
Copyright (c) 2006-2007 Niels Provos <provos@citi.umich.edu>
|
||||
Copyright (c) 2008-2012 Niels Provos and Nick Mathewson
|
||||
Copyright (c) 1991, 1993
|
||||
Copyright (c) 2009, Michihiro NAKAJIMA
|
||||
Copyright 2000-2013 Kitware, Inc.
|
||||
Copyright 2000-2011 Insight Software Consortium
|
||||
notices of original copyright by their contributors; see each source
|
||||
Copyright (C) 1996-2018 Free Software Foundation, Inc.
|
||||
Copyright (c) 2010 Chris Davis, Niels Provos, and Nick Mathewson
|
||||
Copyright (c) 2010-2012 Niels Provos and Nick Mathewson
|
||||
Copyright (c) 1996, David Mazieres <dm@uun.org>
|
||||
Copyright (c) 2008, Damien Miller <djm@openbsd.org>
|
||||
Copyright (c) 2002-2007 Niels Provos <provos@citi.umich.edu>
|
||||
Copyright (c) 2002-2006 Niels Provos <provos@citi.umich.edu>
|
||||
Copyright (c) 2009-2012 Niels Provos, Nick Mathewson
|
||||
Copyright 2000-2009 Niels Provos <provos@citi.umich.edu>
|
||||
Copyright 2009-2012 Niels Provos and Nick Mathewson
|
||||
Copyright 2000-2007 Niels Provos <provos@citi.umich.edu>
|
||||
Copyright 2007-2012 Niels Provos, Nick Mathewson
|
||||
Copyright 2003-2009 Niels Provos <provos@citi.umich.edu>
|
||||
Copyright 2006-2007 Niels Provos
|
||||
Copyright 2007-2012 Nick Mathewson and Niels Provos
|
||||
Copyright (c) 2005-2007 Niels Provos <provos@citi.umich.edu>
|
||||
Copyright (c) 2003-2009 Niels Provos <provos@citi.umich.edu>
|
||||
Copyright 2007-2012 Niels Provos and Nick Mathewson
|
||||
Copyright (c) 2007 Sun Microsystems. All rights reserved.
|
||||
Copyright (c) 2008-2012 Niels Provos, Nick Mathewson
|
||||
Copyright 2002 Christopher Clark
|
||||
Copyright 2005-2012 Nick Mathewson
|
||||
Copyright 2001-2007 Niels Provos <provos@citi.umich.edu>
|
||||
Copyright (c) 2012 Niels Provos and Nick Mathewson
|
||||
Copyright (c) 2000 Dug Song <dugsong@monkey.org>
|
||||
Copyright (c) 1993 The Regents of the University of California.
|
||||
Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
|
||||
Copyright (c) 2003 Michael A. Davis <mike@datanerds.net>
|
||||
Copyright (c) 2007 Sun Microsystems
|
||||
Copyright (c) 2002 Christopher Clark
|
||||
Copyright (c) 2006 Maxim Yegorushkin <maxim.yegorushkin@gmail.com>
|
||||
Copyright (c) 2010 BitTorrent, Inc.
|
||||
Copyright (c) 2005-2012 Niels Provos and Nick Mathewson
|
||||
Copyright (c) 1993
|
||||
Copyright 2003 Michael A. Davis <mike@datanerds.net>
|
||||
Copyright 2003-2007 Niels Provos <provos@citi.umich.edu>
|
||||
Copyright 2008-2012 Niels Provos and Nick Mathewson
|
||||
Copyright (c) 2003-2007 Niels Provos <provos@citi.umich.edu>
|
||||
Copyright (c) 2013 Niels Provos and Nick Mathewson
|
||||
Copyright (c) 2009-2012 Nick Mathewson and Niels Provos
|
||||
Copyright (c) 2007-2013 Niels Provos and Nick Mathewson
|
||||
Copyright (c) 2012 Ross Lagerwall <rosslagerwall@gmail.com>
|
||||
tinytest.c -- Copyright 2009-2012 Nick Mathewson
|
||||
tinytest.h -- Copyright 2009-2012 Nick Mathewson
|
||||
tinytestmacros.h -- Copyright 2009-2012 Nick Mathewson
|
||||
|
||||
Software: opencv 4.2.0
|
||||
Copyright notice:
|
||||
Copyright (C) 2016, NVIDIA Corporation, all rights reserved.
|
||||
|
|
2
akg
2
akg
|
@ -1 +1 @@
|
|||
Subproject commit 3bb6264188d0b1d6ff776a35a571bc7190df0800
|
||||
Subproject commit 76a1ecf9da48fa463e25ad63c26281fb5867874d
|
29
build.bat
29
build.bat
|
@ -16,20 +16,20 @@
|
|||
@title mindspore_build
|
||||
|
||||
SET BASEPATH=%CD%
|
||||
IF NOT EXIST %BASEPATH%/build (
|
||||
IF NOT EXIST "%BASEPATH%/build" (
|
||||
md "build"
|
||||
)
|
||||
|
||||
cd %BASEPATH%/build
|
||||
set BUILD_PATH=%CD%
|
||||
|
||||
IF NOT EXIST %BUILD_PATH%/mindspore (
|
||||
IF NOT EXIST "%BUILD_PATH%/mindspore" (
|
||||
md "mindspore"
|
||||
)
|
||||
|
||||
cd %CD%/mindspore
|
||||
|
||||
IF "%2%" == "lite" (
|
||||
IF "%1%" == "lite" (
|
||||
call :gene_gtest
|
||||
call :run_cmake
|
||||
IF errorlevel 1 (
|
||||
|
@ -47,14 +47,17 @@ IF "%2%" == "lite" (
|
|||
)
|
||||
|
||||
cd %BUILD_PATH%/mindspore
|
||||
IF "%1%" == "" (
|
||||
cmake --build . -- -j6
|
||||
IF "%2%" == "" (
|
||||
cmake --build . --target package -- -j6
|
||||
) ELSE (
|
||||
cmake --build . -- -j%1%
|
||||
cmake --build . --target package -- -j%2%
|
||||
)
|
||||
IF errorlevel 1 (
|
||||
echo "build fail."
|
||||
goto run_fail
|
||||
) ELSE (
|
||||
cd %BASEPATH%/output
|
||||
rd /s /q _CPack_Packages
|
||||
)
|
||||
) ELSE (
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_CPU=ON -DENABLE_MINDDATA=ON -DUSE_GLOG=ON ^
|
||||
|
@ -83,7 +86,7 @@ goto run_eof
|
|||
cd %BUILD_PATH%/mindspore
|
||||
cmake -DBUILD_DEVICE=on -DBUILD_CONVERTER=on -DPLATFORM_ARM64=off -DSUPPORT_TRAIN=off ^
|
||||
-DCMAKE_BUILD_TYPE=Release -DSUPPORT_GPU=off -DBUILD_MINDDATA=off -DOFFLINE_COMPILE=off ^
|
||||
-G "CodeBlocks - MinGW Makefiles" %BASEPATH%/mindspore/lite
|
||||
-G "CodeBlocks - MinGW Makefiles" "%BASEPATH%/mindspore/lite"
|
||||
GOTO:EOF
|
||||
|
||||
:gene_gtest
|
||||
|
@ -94,31 +97,31 @@ GOTO:EOF
|
|||
GOTO:EOF
|
||||
|
||||
:gene_protobuf
|
||||
SET PROTOC=%BASEPATH%/build/mindspore/_deps/protobuf-src/_build/protoc
|
||||
SET PROTOC="%BASEPATH%/build/mindspore/_deps/protobuf-src/_build/protoc"
|
||||
|
||||
SET PROTO_SRC_DIR=%BASEPATH%/mindspore/lite/tools/converter/parser/caffe
|
||||
SET PROTO_SRC_DIR="%BASEPATH%/mindspore/lite/tools/converter/parser/caffe"
|
||||
cd %PROTO_SRC_DIR%
|
||||
%PROTOC% *.proto --proto_path=%PROTO_SRC_DIR% --cpp_out=%PROTO_SRC_DIR%
|
||||
|
||||
SET PROTO_SRC_DIR=%BASEPATH%/mindspore/lite/tools/converter/parser/onnx
|
||||
SET PROTO_SRC_DIR="%BASEPATH%/mindspore/lite/tools/converter/parser/onnx"
|
||||
cd %PROTO_SRC_DIR%
|
||||
%PROTOC% *.proto --proto_path=%PROTO_SRC_DIR% --cpp_out=%PROTO_SRC_DIR%
|
||||
cd %BUILD_PATH%/mindspore
|
||||
GOTO:EOF
|
||||
|
||||
:gene_flatbuffer
|
||||
SET FLATC=%BASEPATH%/build/mindspore/_deps/flatbuffers-src/_build/flatc
|
||||
SET FLATC="%BASEPATH%/build/mindspore/_deps/flatbuffers-src/_build/flatc"
|
||||
SET FLAT_DIR=%BASEPATH%/mindspore/lite/schema
|
||||
cd %FLAT_DIR%
|
||||
IF EXIST inner rd /s /q inner
|
||||
md inner
|
||||
|
||||
%FLATC% -c -b *.fbs
|
||||
%FLATC% -c -b --reflect-types --gen-mutable --reflect-names --gen-object-api -o %FLAT_DIR%/inner *.fbs
|
||||
%FLATC% -c -b --reflect-types --gen-mutable --reflect-names --gen-object-api -o "%FLAT_DIR%/inner" *.fbs
|
||||
|
||||
SET FLAT_DIR=%BASEPATH%/mindspore/lite/tools/converter/parser/tflite
|
||||
cd %FLAT_DIR%
|
||||
%FLATC% -c -b --reflect-types --gen-mutable --reflect-names --gen-object-api -o %FLAT_DIR% *.fbs
|
||||
%FLATC% -c -b --reflect-types --gen-mutable --reflect-names --gen-object-api -o "%FLAT_DIR%" *.fbs
|
||||
cd %BUILD_PATH%/mindspore
|
||||
GOTO:EOF
|
||||
|
||||
|
|
6
build.sh
6
build.sh
|
@ -53,7 +53,7 @@ usage()
|
|||
echo " -n Compile minddata lite"
|
||||
echo " -M Enable MPI and NCCL for GPU training, gpu default on"
|
||||
echo " -V Specify the minimum required cuda version, default CUDA 10.1"
|
||||
echo " -I Compile lite"
|
||||
echo " -I Enable compiling mindspore lite for arm64, arm32 or x86_64, default disable mindspore lite compiling"
|
||||
echo " -K Compile with AKG, default on"
|
||||
echo " -s Enable serving module, default off"
|
||||
echo " -w Enable acl module, default off"
|
||||
|
@ -393,7 +393,7 @@ build_mindspore()
|
|||
CMAKE_VERBOSE="--verbose"
|
||||
fi
|
||||
cmake --build . --target package ${CMAKE_VERBOSE} -j$THREAD_NUM
|
||||
echo "success to build mindspore project!"
|
||||
echo "success building mindspore project!"
|
||||
}
|
||||
|
||||
checkndk() {
|
||||
|
@ -618,10 +618,12 @@ build_lite()
|
|||
|
||||
if [[ "${COMPILE_RET}" -ne 0 ]]; then
|
||||
echo "---------------- mindspore lite: build failed ----------------"
|
||||
exit 1
|
||||
else
|
||||
mv ${BASEPATH}/output/tmp/*.tar.gz* ${BASEPATH}/output/
|
||||
rm -rf ${BASEPATH}/output/tmp/
|
||||
echo "---------------- mindspore lite: build success ----------------"
|
||||
exit 0
|
||||
fi
|
||||
}
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ else()
|
|||
VER 67.1
|
||||
LIBS ${LIB_ICU_COMMON} ${LIB_ICU_DATA} ${LIB_ICU_I18N}
|
||||
URL https://github.com/unicode-org/icu/archive/release-67-1.tar.gz
|
||||
MD5 0c2662a2b0bc80b0eb56495205247c8f
|
||||
MD5 fd525fb47d8827b0b7da78b51dd2d93f
|
||||
CONFIGURE_COMMAND ${CMAKE_SOURCE_DIR}/scripts/build_icu4c.sh
|
||||
)
|
||||
include_directories(${icu4c_INC})
|
||||
|
|
|
@ -1,13 +1,18 @@
|
|||
include(CMakePackageConfigHelpers)
|
||||
|
||||
set(LIB_DIR ${MAIN_DIR}/lib)
|
||||
set(INC_DIR ${MAIN_DIR}/include)
|
||||
set(TURBO_DIR ${MAIN_DIR}/third_party/libjpeg-turbo)
|
||||
set(OPENCV_DIR ${MAIN_DIR}/third_party/opencv)
|
||||
set(PROTOBF_DIR ${MAIN_DIR}/third_party/protobuf)
|
||||
set(FLATBF_DIR ${MAIN_DIR}/third_party/flatbuffers)
|
||||
set(LIB_DIR ${MAIN_DIR}-${COMPONENT_NAME}/lib)
|
||||
set(INC_DIR ${MAIN_DIR}-${COMPONENT_NAME}/include)
|
||||
set(TURBO_DIR ${MAIN_DIR}-${COMPONENT_NAME}/third_party/libjpeg-turbo)
|
||||
set(OPENCV_DIR ${MAIN_DIR}-${COMPONENT_NAME}/third_party/opencv)
|
||||
set(PROTOBF_DIR ${MAIN_DIR}-${COMPONENT_NAME}/third_party/protobuf)
|
||||
set(FLATBF_DIR ${MAIN_DIR}-${COMPONENT_NAME}/third_party/flatbuffers)
|
||||
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
|
||||
set(LIB_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/lib)
|
||||
set(INC_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/include)
|
||||
set(TURBO_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/libjpeg-turbo)
|
||||
set(OPENCV_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/opencv)
|
||||
set(PROTOBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/protobuf)
|
||||
set(FLATBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/flatbuffers)
|
||||
if (BUILD_MINDDATA)
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
|
||||
|
@ -30,25 +35,52 @@ if (BUILD_MINDDATA)
|
|||
endif ()
|
||||
|
||||
if (PLATFORM_ARM64)
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
|
||||
install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${INC_DIR}/ir/dtype COMPONENT ${COMPONENT_NAME})
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/lite/schema/ DESTINATION ${INC_DIR}/schema COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "inner" EXCLUDE)
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/src/runtime/kernel/arm/nnacl/liboptimize.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/nnacl/liboptimize.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
|
||||
install(DIRECTORY ${TOP_DIR}/third_party/flatbuffers/include DESTINATION ${FLATBF_DIR} COMPONENT ${COMPONENT_NAME})
|
||||
elseif (PLATFORM_ARM32)
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
|
||||
install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${INC_DIR}/ir/dtype COMPONENT ${COMPONENT_NAME})
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/lite/schema/ DESTINATION ${INC_DIR}/schema COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "inner" EXCLUDE)
|
||||
install(DIRECTORY ${TOP_DIR}/third_party/flatbuffers/include DESTINATION ${FLATBF_DIR} COMPONENT ${COMPONENT_NAME})
|
||||
elseif (CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
get_filename_component(CXX_DIR ${CMAKE_CXX_COMPILER} PATH)
|
||||
file(GLOB LIB_LIST ${CXX_DIR}/libstdc++-6.dll ${CXX_DIR}/libwinpthread-1.dll ${CXX_DIR}/libssp-0.dll ${CXX_DIR}/libgcc_s_seh-1.dll)
|
||||
install(FILES ${TOP_DIR}/build/mindspore/tools/converter/converter_lite.exe DESTINATION ${TOP_DIR}/build/mindspore/package COMPONENT ${COMPONENT_NAME})
|
||||
install(FILES ${LIB_LIST} DESTINATION ${TOP_DIR}/build/mindspore/package COMPONENT ${COMPONENT_NAME})
|
||||
install(FILES ${TOP_DIR}/build/mindspore/tools/converter/libconverter_parser.a DESTINATION ${TOP_DIR}/build/mindspore/package COMPONENT ${PARSER_NAME})
|
||||
else ()
|
||||
install(FILES ${TOP_DIR}/third_party/protobuf/build/lib/libprotobuf.so.19.0.0 DESTINATION ${PROTOBF_DIR}/lib RENAME libprotobuf.so.19 COMPONENT ${COMPONENT_NAME})
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${INC_DIR_RUN_X86} COMPONENT ${RUN_X86_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/lite/schema/ DESTINATION ${INC_DIR_RUN_X86}/schema COMPONENT ${RUN_X86_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "inner" EXCLUDE)
|
||||
install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${INC_DIR_RUN_X86}/ir/dtype COMPONENT ${RUN_X86_COMPONENT_NAME})
|
||||
install(DIRECTORY ${TOP_DIR}/third_party/flatbuffers/include DESTINATION ${FLATBF_DIR_RUN_X86} COMPONENT ${RUN_X86_COMPONENT_NAME})
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${LIB_DIR_RUN_X86} COMPONENT ${RUN_X86_COMPONENT_NAME})
|
||||
|
||||
install(FILES ${TOP_DIR}/third_party/protobuf/build/lib/libprotobuf.so.19.0.0 DESTINATION ${PROTOBF_DIR}/lib RENAME libprotobuf.so.19 COMPONENT ${COMPONENT_NAME})
|
||||
endif ()
|
||||
|
||||
set(CPACK_GENERATOR TGZ)
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
set(CPACK_GENERATOR ZIP)
|
||||
else ()
|
||||
set(CPACK_GENERATOR TGZ)
|
||||
endif ()
|
||||
set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
|
||||
set(CPACK_COMPONENTS_ALL ${COMPONENT_NAME})
|
||||
if (PLATFORM_ARM64 OR PLATFORM_ARM32)
|
||||
set(CPACK_COMPONENTS_ALL ${COMPONENT_NAME})
|
||||
elseif (WIN32)
|
||||
set(CPACK_COMPONENTS_ALL ${COMPONENT_NAME})
|
||||
else ()
|
||||
set(CPACK_COMPONENTS_ALL ${COMPONENT_NAME} ${RUN_X86_COMPONENT_NAME})
|
||||
endif ()
|
||||
set(CPACK_PACKAGE_FILE_NAME ${MAIN_DIR})
|
||||
set(CPACK_PACKAGE_DIRECTORY ${TOP_DIR}/output/tmp)
|
||||
if (WIN32)
|
||||
set(CPACK_PACKAGE_DIRECTORY ${TOP_DIR}/output)
|
||||
else ()
|
||||
set(CPACK_PACKAGE_DIRECTORY ${TOP_DIR}/output/tmp)
|
||||
endif()
|
||||
set(CPACK_PACKAGE_CHECKSUM SHA256)
|
||||
include(CPack)
|
|
@ -6,6 +6,7 @@ MAINTAINER leonwanghui <leon.wanghui@huawei.com>
|
|||
ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
|
||||
ENV CMAKE_ROOT_PATH /usr/local/cmake-3.14.1
|
||||
ENV PATH ${PYTHON_ROOT_PATH}/bin:${CMAKE_ROOT_PATH}/bin:/usr/local/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH ${PYTHON_ROOT_PATH}/lib
|
||||
|
||||
# Install base tools
|
||||
RUN apt update \
|
||||
|
@ -48,7 +49,7 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
|
|||
&& tar -xvf v3.7.5.tar.gz \
|
||||
&& cd /tmp/cpython-3.7.5 \
|
||||
&& mkdir -p ${PYTHON_ROOT_PATH} \
|
||||
&& ./configure --prefix=${PYTHON_ROOT_PATH} \
|
||||
&& ./configure --prefix=${PYTHON_ROOT_PATH} --enable-shared \
|
||||
&& make -j4 \
|
||||
&& make install -j4 \
|
||||
&& rm -f /usr/local/bin/python \
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
|
||||
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
|
||||
|
||||
MAINTAINER leonwanghui <leon.wanghui@huawei.com>
|
||||
|
||||
|
@ -35,9 +35,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt install -y \
|
|||
autoconf \
|
||||
libtool \
|
||||
automake \
|
||||
flex \
|
||||
libnccl2=2.4.8-1+cuda10.1 \
|
||||
libnccl-dev=2.4.8-1+cuda10.1
|
||||
flex
|
||||
|
||||
# Set bash
|
||||
RUN echo "dash dash/sh boolean false" | debconf-set-selections
|
||||
|
|
|
@ -6,6 +6,7 @@ MAINTAINER leonwanghui <leon.wanghui@huawei.com>
|
|||
ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
|
||||
ENV CMAKE_ROOT_PATH /usr/local/cmake-3.14.1
|
||||
ENV PATH ${CMAKE_ROOT_PATH}/bin:/usr/local/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH ${PYTHON_ROOT_PATH}/lib
|
||||
|
||||
# Install base tools
|
||||
RUN apt update \
|
||||
|
@ -51,7 +52,7 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
|
|||
&& tar -xvf v3.7.5.tar.gz \
|
||||
&& cd /tmp/cpython-3.7.5 \
|
||||
&& mkdir -p ${PYTHON_ROOT_PATH} \
|
||||
&& ./configure --prefix=${PYTHON_ROOT_PATH} \
|
||||
&& ./configure --prefix=${PYTHON_ROOT_PATH} --enable-shared \
|
||||
&& make -j4 \
|
||||
&& make install -j4 \
|
||||
&& rm -f /usr/local/bin/python \
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
|
||||
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
|
||||
|
||||
MAINTAINER leonwanghui <leon.wanghui@huawei.com>
|
||||
|
||||
|
@ -35,9 +35,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt install -y \
|
|||
autoconf \
|
||||
libtool \
|
||||
automake \
|
||||
flex \
|
||||
libnccl2=2.4.8-1+cuda10.1 \
|
||||
libnccl-dev=2.4.8-1+cuda10.1
|
||||
flex
|
||||
|
||||
# Set bash
|
||||
RUN echo "dash dash/sh boolean false" | debconf-set-selections
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 54 KiB |
|
@ -1 +1 @@
|
|||
Subproject commit 622af6c1c50034bea5a08bd409c5a410782bfe53
|
||||
Subproject commit 80f9c96ed3fe0f07bf40a91d1f41373681d0c0dc
|
|
@ -24,6 +24,7 @@
|
|||
#include <memory>
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
|
||||
#ifndef ENABLE_ACL
|
||||
#include "mindspore/core/utils/log_adapter.h"
|
||||
|
@ -44,6 +45,19 @@ class LogStream {
|
|||
return *this;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
LogStream &operator<<(const std::vector<T> &val) noexcept {
|
||||
(*sstream_) << "[";
|
||||
for (size_t i = 0; i < val.size(); i++) {
|
||||
(*this) << val[i];
|
||||
if (i + 1 < val.size()) {
|
||||
(*sstream_) << ", ";
|
||||
}
|
||||
}
|
||||
(*sstream_) << "]";
|
||||
return *this;
|
||||
}
|
||||
|
||||
LogStream &operator<<(std::ostream &func(std::ostream &os)) noexcept {
|
||||
(*sstream_) << func;
|
||||
return *this;
|
||||
|
|
|
@ -17,8 +17,6 @@ import json
|
|||
import os
|
||||
import sys
|
||||
from te.platform.cce_conf import te_set_version
|
||||
from te.platform.fusion_manager import op_build_cfg_dis, op_build_cfg_en, set_current_op_name, \
|
||||
init_op_pattern, set_op_params, set_op_build_type, get_op_pattern, set_current_op_func_name
|
||||
from te.platform.fusion_util import fusion_op
|
||||
from common import check_kernel_info, get_args, get_build_in_impl_path, get_ddk_version
|
||||
|
||||
|
@ -27,7 +25,6 @@ build_in_impl_path = get_build_in_impl_path()
|
|||
|
||||
# op function list
|
||||
op_build = "compile"
|
||||
op_pre_build = "pre_build"
|
||||
fusion_pattern_start_flag = "fusion_pattern_start"
|
||||
fusion_pattern_end_flag = "fusion_pattern_end"
|
||||
|
||||
|
@ -83,19 +80,7 @@ def build_op(build_type, json_str):
|
|||
else:
|
||||
op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0)
|
||||
# get function
|
||||
if build_type == op_pre_build:
|
||||
# set op parameter
|
||||
op_build_cfg_dis()
|
||||
set_current_op_func_name(op_name)
|
||||
set_current_op_name(kernel_name)
|
||||
init_op_pattern()
|
||||
set_op_params(*outputs_args, *attrs_args, kernel_name=kernel_name)
|
||||
set_op_build_type('prebuild')
|
||||
if custom_flag:
|
||||
py_fn_name = kernel_info['op_info']['name']
|
||||
else:
|
||||
py_fn_name = op_name
|
||||
elif build_type == op_build:
|
||||
if build_type == op_build:
|
||||
if custom_flag:
|
||||
py_fn_name = kernel_info['op_info']['name']
|
||||
else:
|
||||
|
@ -106,13 +91,6 @@ def build_op(build_type, json_str):
|
|||
if op_func is None:
|
||||
raise ValueError("Op:{} function {} is not supported by Tbe.".format(op_name, build_type))
|
||||
|
||||
# pre build
|
||||
if build_type == op_pre_build:
|
||||
op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
|
||||
# disable only pattern configuration
|
||||
op_build_cfg_en()
|
||||
return get_op_pattern()
|
||||
|
||||
# call function
|
||||
if kernel_name[0:19] == "bounding_box_encode":
|
||||
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name_val=kernel_name)
|
||||
|
@ -120,8 +98,6 @@ def build_op(build_type, json_str):
|
|||
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
|
||||
|
||||
except Exception as e:
|
||||
if build_type == op_pre_build:
|
||||
op_build_cfg_en()
|
||||
raise RuntimeError(e)
|
||||
|
||||
|
||||
|
@ -136,14 +112,9 @@ def compile_fusion_op(json_str):
|
|||
Exception: If specific keyword is not found.
|
||||
"""
|
||||
args = json.loads(json_str)
|
||||
te_set_version(ddk_version)
|
||||
if 'fusion_op' not in args or not args['fusion_op']:
|
||||
raise ValueError("Json string Errors, key:fusion_op not found.")
|
||||
if 'prebuild_ops' not in args or not args['prebuild_ops']:
|
||||
raise ValueError("Json string Errors, key:prebuild_ops not found.")
|
||||
|
||||
pre_build_op_list = args['prebuild_ops']
|
||||
for op in pre_build_op_list:
|
||||
build_op(op_pre_build, json.dumps(op))
|
||||
fusion_op_arg = args['fusion_op']
|
||||
return fusion_op(json.dumps(fusion_op_arg))
|
||||
|
||||
|
@ -159,8 +130,6 @@ def compile_with_json(json_str):
|
|||
json_info = json.loads(json_str)
|
||||
if "fusion_op" in json_info:
|
||||
ret = compile_fusion_op(json_str)
|
||||
elif "compile_type" in json_info:
|
||||
ret = build_op(op_pre_build, json_str)
|
||||
else:
|
||||
ret = build_op(op_build, json_str)
|
||||
return ret
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#include <vector>
|
||||
#include <memory>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <climits>
|
||||
#include "runtime/device/kernel_runtime.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
|
||||
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
|
||||
|
@ -218,7 +220,7 @@ void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef
|
|||
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
|
||||
dim->set_size((::google::protobuf::int64)item);
|
||||
}
|
||||
node_inputs->set_tensor_type((mindspore::DataType)input_data_type);
|
||||
node_inputs->set_tensor_type(input_data_type);
|
||||
node_inputs->set_mem_device("HBM");
|
||||
}
|
||||
}
|
||||
|
@ -245,7 +247,7 @@ void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef
|
|||
}
|
||||
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
|
||||
int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
|
||||
node_outputs->set_tensor_type((mindspore::DataType)output_data_type);
|
||||
node_outputs->set_tensor_type(output_data_type);
|
||||
node_outputs->set_mem_device("HBM");
|
||||
}
|
||||
}
|
||||
|
@ -287,6 +289,109 @@ bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
|
||||
if (!anf_node->isa<CNode>()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!AnfAlgo::IsDynamicShape(anf_node)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "CreateExtInfo start, " << anf_node->fullname_with_scope();
|
||||
|
||||
int32_t unknown_shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
|
||||
uint64_t ext_info_head_len = kExtInfoHeadSize;
|
||||
std::string ext_info;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
|
||||
|
||||
// 1.addr:unknown shape type
|
||||
uint64_t ext_info_len = ext_info.size();
|
||||
ext_info_len += ext_info_head_len + sizeof(int32_t);
|
||||
|
||||
// 2.addr:input ShapeAndType
|
||||
ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType);
|
||||
|
||||
// 3.addr:output ShapeAndType
|
||||
ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType);
|
||||
|
||||
uint64_t ext_info_offset = ext_info.size();
|
||||
ext_info.resize(ext_info_len, 0);
|
||||
char *ext_info_buf = ext_info.data();
|
||||
|
||||
// deal1: unknown shape type
|
||||
ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
|
||||
info->infoType = FWK_ADPT_EXT_SHAPE_TYPE;
|
||||
info->infoLen = sizeof(int32_t);
|
||||
ext_info_offset += ext_info_head_len;
|
||||
int32_t *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset);
|
||||
*shape_type = unknown_shape_type;
|
||||
ext_info_offset += info->infoLen;
|
||||
|
||||
// deal2:input ShapeAndType
|
||||
info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
|
||||
info->infoType = FWK_ADPT_EXT_INPUT_SHAPE;
|
||||
info->infoLen = input_num * sizeof(ShapeAndType);
|
||||
ext_info_offset += ext_info_head_len;
|
||||
|
||||
ShapeAndType *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
|
||||
for (size_t input_index = 0; input_index < input_num; input_index++) {
|
||||
TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);
|
||||
std::vector<size_t> input_shape;
|
||||
int32_t input_data_type;
|
||||
if (input_type == kObjectTypeString) {
|
||||
auto cnode = anf_node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto input_node = cnode->inputs()[input_index + 1];
|
||||
auto value_ptr = GetValueNode(input_node);
|
||||
auto value = GetValue<std::string>(value_ptr);
|
||||
input_shape.push_back(1);
|
||||
input_shape.push_back(value.size());
|
||||
input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown);
|
||||
} else {
|
||||
input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);
|
||||
input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type);
|
||||
}
|
||||
inputs[input_index].type = input_data_type;
|
||||
|
||||
size_t input_shape_index = 0;
|
||||
for (; input_shape_index < input_shape.size(); input_shape_index++) {
|
||||
inputs[input_index].dims[input_shape_index] = SizeToLong(input_shape[input_shape_index]);
|
||||
}
|
||||
if (input_shape.size() < kMaxShapeDims) {
|
||||
inputs[input_index].dims[input_shape_index] = LLONG_MIN;
|
||||
}
|
||||
}
|
||||
ext_info_offset += info->infoLen;
|
||||
|
||||
// deal3:output ShapeAndType
|
||||
info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
|
||||
info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE;
|
||||
info->infoLen = output_num * sizeof(ShapeAndType);
|
||||
ext_info_offset += ext_info_head_len;
|
||||
|
||||
ShapeAndType *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
|
||||
for (size_t output_index = 0; output_index < output_num; output_index++) {
|
||||
std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);
|
||||
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
|
||||
int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
|
||||
outputs[output_index].type = output_data_type;
|
||||
|
||||
size_t output_shape_index = 0;
|
||||
for (; output_shape_index < output_shape.size(); output_shape_index++) {
|
||||
outputs[output_index].dims[output_shape_index] = SizeToLong(output_shape[output_shape_index]);
|
||||
}
|
||||
if (output_shape_index < kMaxShapeDims) {
|
||||
outputs[output_index].dims[output_shape_index] = LLONG_MIN;
|
||||
}
|
||||
}
|
||||
|
||||
// set ext info
|
||||
kernel_mod_ptr->SetExtInfo(ext_info);
|
||||
return true;
|
||||
}
|
||||
|
||||
KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
|
||||
|
@ -300,6 +405,11 @@ KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
|
|||
if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) {
|
||||
MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
|
||||
}
|
||||
|
||||
if (!CreateExtInfo(anf_node, kernel_mod_ptr)) {
|
||||
MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
|
||||
}
|
||||
|
||||
if (!SetIOSize(anf_node, kernel_mod_ptr)) {
|
||||
MS_LOG(EXCEPTION) << "Set input output size list failed.";
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ AicpuOpKernelMod::~AicpuOpKernelMod() {
|
|||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
ext_info_.clear();
|
||||
}
|
||||
|
||||
void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
|
||||
|
@ -54,6 +55,7 @@ const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { retu
|
|||
void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; }
|
||||
void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; }
|
||||
void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); }
|
||||
void AicpuOpKernelMod::SetExtInfo(const std::string &ext_info) { ext_info_ = ext_info; }
|
||||
void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; }
|
||||
void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
|
@ -84,16 +86,30 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs
|
|||
|
||||
auto node_def_len = node_def_str_.length();
|
||||
param_len += node_def_len;
|
||||
param_len += sizeof(uint32_t);
|
||||
|
||||
AicpuParamHead aicpu_param_head;
|
||||
aicpu_param_head.length = param_len;
|
||||
aicpu_param_head.ioAddrNum = io_addrs_num;
|
||||
|
||||
if (ext_info_.empty()) {
|
||||
MS_LOG(INFO) << "Static Shape Kernel";
|
||||
aicpu_param_head.extInfoLength = 0;
|
||||
aicpu_param_head.extInfoAddr = 0;
|
||||
} else {
|
||||
MS_LOG(INFO) << "Dynamic Kernel Ext Info size:" << ext_info_.size();
|
||||
}
|
||||
|
||||
// Create taskArgs: AicpuParamHead + ioAddrs + notifyId + customizedAttr
|
||||
AicpuParamHead paramHead = {static_cast<uint32_t>(param_len), static_cast<uint32_t>(io_addrs_num)};
|
||||
args_.clear();
|
||||
(void)args_.append(reinterpret_cast<const char *>(¶mHead), sizeof(AicpuParamHead));
|
||||
(void)args_.append(reinterpret_cast<const char *>(&aicpu_param_head), sizeof(AicpuParamHead));
|
||||
// TaskArgs append ioAddrs
|
||||
if (io_addrs_size != 0) {
|
||||
(void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size);
|
||||
}
|
||||
|
||||
// size for node_def
|
||||
args_.append(reinterpret_cast<const char *>(&node_def_len), sizeof(uint32_t));
|
||||
|
||||
// When it's aicpu customized ops, taskArgs should append customized attr
|
||||
if (node_def_len != 0) {
|
||||
(void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len);
|
||||
|
@ -145,8 +161,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
|
|||
node_name_ = kTopKV2;
|
||||
}
|
||||
|
||||
AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
|
||||
kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());
|
||||
AicpuTaskInfoPtr task_info_ptr =
|
||||
make_shared<ge::model_runner::AicpuTaskInfo>(kernel_name_, stream_id, node_so_, node_name_, node_def_str_,
|
||||
ext_info_, input_data_addrs, output_data_addrs, NeedDump());
|
||||
|
||||
MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
|
||||
return {task_info_ptr};
|
||||
|
|
|
@ -36,6 +36,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
|
|||
void SetOutputList(const std::vector<int64_t> &outputList);
|
||||
void SetAnfNode(const AnfNodePtr &anf_node);
|
||||
void SetNodeDef(const std::string &nodeDef);
|
||||
void SetExtInfo(const std::string &ext_info);
|
||||
void SetNodeName(const std::string &node_name);
|
||||
|
||||
/**
|
||||
|
@ -58,6 +59,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
|
|||
std::string node_def_str_;
|
||||
std::string node_name_;
|
||||
std::string node_so_;
|
||||
std::string ext_info_;
|
||||
std::vector<int64_t> inputList_;
|
||||
std::vector<int64_t> outputList_;
|
||||
AnfNodePtr anf_node_;
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
#include <map>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
constexpr auto kInitDataSetQueue = "InitDataSetQueue";
|
||||
|
@ -50,6 +49,36 @@ struct AicpuParamHead {
|
|||
uint64_t extInfoAddr; // extInfo address
|
||||
} __attribute__((packed));
|
||||
|
||||
const uint32_t kExtInfoHeadSize = 8;
|
||||
struct ExtInfo {
|
||||
int32_t infoType; // extend type
|
||||
uint32_t infoLen; // length for infoMsg
|
||||
char infoMsg[0]; // extend value
|
||||
} __attribute__((packed));
|
||||
|
||||
// Extent info ShapeAndType
|
||||
const uint32_t kMaxShapeDims = 8;
|
||||
struct ShapeAndType {
|
||||
int32_t type;
|
||||
int64_t dims[kMaxShapeDims];
|
||||
} __attribute__((packed));
|
||||
|
||||
// Extend Info type for task
|
||||
enum FWKTaskExtInfoType {
|
||||
FWK_ADPT_EXT_SHAPE_TYPE = 0,
|
||||
FWK_ADPT_EXT_INPUT_SHAPE,
|
||||
FWK_ADPT_EXT_OUTPUT_SHAPE,
|
||||
FWK_ADPT_EXT_INVALID
|
||||
};
|
||||
|
||||
// for unknown shape op type
|
||||
enum UnknowShapeOpType {
|
||||
DEPEND_IN_SHAPE = 1, // op out shape get by input shape
|
||||
DEPEND_CONST_VALUE = 2, // op out shape get by const op value
|
||||
DEPEND_SHAPE_RANGE = 3, // op out shape get by range
|
||||
DEPEND_COMPUTE = 4 // op out shape get by totally computing
|
||||
};
|
||||
|
||||
class AicpuOpUtil {
|
||||
public:
|
||||
static int MsTypeToProtoType(TypeId ms_type);
|
||||
|
|
|
@ -26,7 +26,7 @@ message AttrValue {
|
|||
repeated int64 i = 3 [ packed = true ]; //"array(int)"
|
||||
repeated float f = 4 [ packed = true ]; //"array(float)"
|
||||
repeated bool b = 5 [ packed = true ]; //"array(bool)"
|
||||
repeated DataType type = 6 [ packed = true ]; //"array(type)"
|
||||
repeated int32 type = 6 [ packed = true ]; //"array(type)"
|
||||
repeated TensorShape shape = 7; //"array(shape)"
|
||||
repeated Tensor tensor = 8; //"array(tensor)"
|
||||
}
|
||||
|
|
|
@ -18,9 +18,16 @@ package mindspore;
|
|||
import "attr.proto";
|
||||
import "tensor.proto";
|
||||
|
||||
message DynamicIdx {
|
||||
int32 idx = 1;
|
||||
int32 num = 2;
|
||||
}
|
||||
|
||||
message NodeDef {
|
||||
string op = 2;
|
||||
map<string, AttrValue> attrs = 3;
|
||||
repeated Tensor inputs = 4;
|
||||
repeated Tensor outputs = 5;
|
||||
map<string, DynamicIdx> dym_inputs = 6;
|
||||
map<string, DynamicIdx> dym_outputs = 7;
|
||||
}
|
||||
|
|
|
@ -26,9 +26,12 @@ message Tensor {
|
|||
TensorShape tensor_shape = 1;
|
||||
|
||||
// tensor content data type
|
||||
DataType tensor_type = 2;
|
||||
int32 tensor_type = 2;
|
||||
|
||||
// tensor memory device
|
||||
// data located memory device , "DDR" "HBM" OR "NONE"
|
||||
string mem_device = 3;
|
||||
string name = 4;
|
||||
uint64 data_ptr = 5;
|
||||
uint64 data_size = 6;
|
||||
}
|
||||
|
|
|
@ -31,5 +31,5 @@ message TensorShape {
|
|||
bool unknown_rank = 3;
|
||||
|
||||
// data format "NHWC" "NCHW" "NC1HWC0" OR "NONE"
|
||||
string data_format = 4;
|
||||
int32 data_format = 4;
|
||||
};
|
||||
|
|
|
@ -19,17 +19,30 @@ option cc_enable_arenas = true;
|
|||
package mindspore;
|
||||
|
||||
enum DataType {
|
||||
MS_UNKNOWN = 0;
|
||||
MS_BOOL = 1;
|
||||
MS_FLOAT32 = 0;
|
||||
MS_FLOAT16 = 1;
|
||||
MS_INT8 = 2;
|
||||
MS_UINT8 = 3;
|
||||
MS_INT16 = 4;
|
||||
MS_UINT16 = 5;
|
||||
MS_INT32 = 6;
|
||||
MS_UINT32 = 7;
|
||||
MS_INT64 = 8;
|
||||
MS_UINT64 = 9;
|
||||
MS_FLOAT16 = 10;
|
||||
MS_FLOAT32 = 11;
|
||||
MS_FLOAT64 = 12;
|
||||
MS_INT32 = 3;
|
||||
MS_UINT8 = 4;
|
||||
MS_INT16 = 6;
|
||||
MS_UINT16 = 7;
|
||||
MS_UINT32 = 8;
|
||||
MS_INT64 = 9;
|
||||
MS_UINT64 = 10;
|
||||
MS_FLOAT64 = 11;
|
||||
MS_BOOL = 12;
|
||||
MS_STRING = 13;
|
||||
MS_DUAL_SUB_INT8 = 14;
|
||||
MS_DUAL_SUB_UINT8 = 15;
|
||||
MS_COMPLEX64 = 16;
|
||||
MS_COMPLEX128 = 17;
|
||||
MS_QINT8 = 18;
|
||||
MS_QINT16 = 19;
|
||||
MS_QINT32 = 20;
|
||||
MS_QUINT8 = 21;
|
||||
MS_QUINT16 = 22;
|
||||
MS_RESOURCE = 23;
|
||||
MS_STRING_REF = 24;
|
||||
MS_DUAL = 25;
|
||||
MS_UNKNOWN = 26;
|
||||
}
|
||||
|
|
|
@ -177,7 +177,7 @@ KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &pro
|
|||
if (processor == kProcessorAiCore || processor == kProcessorAiCpu) {
|
||||
kernel_json = kCceKernelMeta;
|
||||
} else {
|
||||
kernel_json = bin_map->GetKernelMetaPath();
|
||||
kernel_json = bin_map->kernel_meta_path();
|
||||
}
|
||||
(void)kernel_json.append(kernel_name).append(kJsonSuffix);
|
||||
KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
|
||||
|
|
|
@ -57,8 +57,8 @@ class KernelMeta {
|
|||
void RemoveKernelCache();
|
||||
std::string Search(const std::string &kernel_name) const;
|
||||
bool Insert(const std::string &kernel_name, const std::string &kernel_json);
|
||||
std::string GetKernelMetaPath() { return kernel_meta_path_; }
|
||||
|
||||
std::string kernel_meta_path() const { return kernel_meta_path_; }
|
||||
bool initialized() const { return initialized_; }
|
||||
static KernelMeta *GetInstance() {
|
||||
static KernelMeta kernel_meta;
|
||||
return &kernel_meta;
|
||||
|
|
|
@ -45,6 +45,22 @@ class PServerKernel {
|
|||
|
||||
protected:
|
||||
virtual void ReInit(const std::vector<AddressPtr> &) {}
|
||||
|
||||
void SetTotalRowCnt(size_t total_cnt) {
|
||||
MS_LOG(INFO) << "Total row count of server " << rank_id_ << " is " << total_cnt;
|
||||
total_row_cnt_ = total_cnt;
|
||||
}
|
||||
|
||||
void CalOffset() {
|
||||
size_t rem = total_row_cnt_ % pserver_num_;
|
||||
if (rem == 0) {
|
||||
row_offset_ = total_row_cnt_ / pserver_num_ * rank_id_;
|
||||
} else {
|
||||
row_offset_ = std::round((static_cast<float>(total_row_cnt_)) / pserver_num_) * rank_id_;
|
||||
}
|
||||
MS_LOG(INFO) << "Row offset of server " << rank_id_ << " is " << row_offset_;
|
||||
}
|
||||
|
||||
void Shard(std::vector<size_t> *shape, int axis) {
|
||||
(*shape)[axis] = Util::LocalShard((*shape)[axis], rank_id_, pserver_num_);
|
||||
}
|
||||
|
@ -52,6 +68,9 @@ class PServerKernel {
|
|||
size_t rank_id_;
|
||||
size_t pserver_num_;
|
||||
size_t worker_num_;
|
||||
|
||||
size_t total_row_cnt_;
|
||||
size_t row_offset_;
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace kernel
|
||||
|
|
|
@ -31,6 +31,8 @@ void SparseApplyAdamPSKernel::InitKernel(
|
|||
const std::vector<size_t> &grad_shape = *(shape_vec[9]);
|
||||
const std::vector<size_t> &indices_shape = *(shape_vec[10]);
|
||||
|
||||
SetTotalRowCnt(var_shape[0]);
|
||||
CalOffset();
|
||||
Shard(&var_shape, 0);
|
||||
Shard(&m_shape, 0);
|
||||
Shard(&v_shape, 0);
|
||||
|
@ -69,8 +71,8 @@ void SparseApplyAdamPSKernel::ReInit(const std::shared_ptr<std::vector<std::shar
|
|||
const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
|
||||
const std::vector<size_t> &indices_shape = *(shape_vec[0]);
|
||||
indices_size_ = indices_shape[0];
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int);
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
|
||||
}
|
||||
|
||||
void SparseApplyAdamPSKernel::ReInit(const std::vector<AddressPtr> &inputs) {
|
||||
|
@ -83,10 +85,6 @@ void SparseApplyAdamPSKernel::ReInit(const std::vector<AddressPtr> &inputs) {
|
|||
bool SparseApplyAdamPSKernel::Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
ReInit(inputs);
|
||||
int *indices = reinterpret_cast<int *>(inputs[10]->addr);
|
||||
for (size_t i = 0; i < inputs[10]->size / sizeof(int); i++) {
|
||||
indices[i] -= rank_id_ * var_first_dim_size_;
|
||||
}
|
||||
return Launch(inputs, workspace, outputs);
|
||||
}
|
||||
|
||||
|
|
|
@ -28,6 +28,8 @@ void SparseApplyFtrlPSKernel::InitKernel(
|
|||
std::vector<size_t> grad_shape = *(shape_vec[3]);
|
||||
std::vector<size_t> indices_shape = *(shape_vec[4]);
|
||||
|
||||
SetTotalRowCnt(var_shape[0]);
|
||||
CalOffset();
|
||||
Shard(&var_shape, 0);
|
||||
Shard(&accum_shape, 0);
|
||||
Shard(&linear_shape, 0);
|
||||
|
@ -72,24 +74,20 @@ void SparseApplyFtrlPSKernel::ReInit(const std::shared_ptr<std::vector<std::shar
|
|||
const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
|
||||
std::vector<size_t> indices_shape = *(shape_vec[0]);
|
||||
indices_size_ = indices_shape[0];
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int);
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
|
||||
}
|
||||
|
||||
void SparseApplyFtrlPSKernel::ReInit(const std::vector<AddressPtr> &inputs) {
|
||||
const auto &indices_addr = inputs[4];
|
||||
indices_size_ = indices_addr->size / sizeof(int);
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int);
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
|
||||
}
|
||||
|
||||
bool SparseApplyFtrlPSKernel::Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
ReInit(inputs);
|
||||
int *indices = reinterpret_cast<int *>(inputs[4]->addr);
|
||||
for (size_t i = 0; i < inputs[4]->size / sizeof(int); i++) {
|
||||
indices[i] -= rank_id_ * var_first_dim_size_;
|
||||
}
|
||||
return Launch(inputs, workspace, outputs);
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,8 @@ void SparseApplyLazyAdamPSKernel::InitKernel(
|
|||
const std::vector<size_t> &grad_shape = *(shape_vec[9]);
|
||||
const std::vector<size_t> &indices_shape = *(shape_vec[10]);
|
||||
|
||||
SetTotalRowCnt(var_shape[0]);
|
||||
CalOffset();
|
||||
Shard(&var_shape, 0);
|
||||
Shard(&m_shape, 0);
|
||||
Shard(&v_shape, 0);
|
||||
|
@ -69,25 +71,21 @@ void SparseApplyLazyAdamPSKernel::ReInit(
|
|||
const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
|
||||
const std::vector<size_t> &indices_shape = *(shape_vec[0]);
|
||||
indices_size_ = indices_shape[0];
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int);
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
|
||||
}
|
||||
|
||||
void SparseApplyLazyAdamPSKernel::ReInit(const std::vector<AddressPtr> &inputs) {
|
||||
const auto &indices_addr = inputs[10];
|
||||
indices_size_ = indices_addr->size / sizeof(int);
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int);
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
|
||||
}
|
||||
|
||||
bool SparseApplyLazyAdamPSKernel::Execute(const std::vector<AddressPtr> &inputs,
|
||||
const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
ReInit(inputs);
|
||||
int *indices = reinterpret_cast<int *>(inputs[10]->addr);
|
||||
for (size_t i = 0; i < inputs[10]->size / sizeof(int); i++) {
|
||||
indices[i] -= rank_id_ * var_first_dim_size_;
|
||||
}
|
||||
return Launch(inputs, workspace, outputs);
|
||||
}
|
||||
|
||||
|
|
|
@ -63,19 +63,21 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
|
|||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
axis_ = GetAttr<int>(kernel_node, "axis");
|
||||
if (axis_ < 0) {
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
axis_ += SizeToInt(input_shape.size());
|
||||
}
|
||||
auto origin_data_format = AnfAlgo::GetOriginDataFormat(kernel_node);
|
||||
auto input_format = AnfAlgo::GetInputFormat(kernel_node, 0);
|
||||
axis_ = AxisTransform(origin_data_format, input_format, axis_);
|
||||
|
||||
input_num_ = SizeToInt(AnfAlgo::GetInputTensorNum(kernel_node));
|
||||
inputs_host_ = std::make_unique<T *[]>(input_num_);
|
||||
len_axis_ = std::make_unique<int[]>(input_num_);
|
||||
for (int i = 0; i < input_num_; i++) {
|
||||
size_t input_size = 1;
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i);
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i);
|
||||
for (size_t j = 0; j < input_shape.size(); j++) {
|
||||
input_size *= input_shape[j];
|
||||
}
|
||||
|
@ -85,7 +87,7 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
|
|||
workspace_size_list_.push_back(sizeof(T *) * input_num_);
|
||||
workspace_size_list_.push_back(sizeof(int) * input_num_);
|
||||
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
output_size_ = 1;
|
||||
for (int i = 0; i < SizeToInt(output_shape.size()); i++) {
|
||||
output_size_ *= output_shape[i];
|
||||
|
@ -98,7 +100,6 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
|
|||
}
|
||||
}
|
||||
output_size_list_.push_back(output_size_ * sizeof(T));
|
||||
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SLICE_GPU_KERNEL_H
|
||||
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/slice_impl.cuh"
|
||||
|
@ -27,8 +28,7 @@ namespace kernel {
|
|||
template <typename T>
|
||||
class SliceGpuFwdKernel : public GpuKernel {
|
||||
public:
|
||||
SliceGpuFwdKernel()
|
||||
: is_strided_slice_(false), is_null_input_(false), input_size_(0), output_size_(0), workspace_size_(0) {}
|
||||
SliceGpuFwdKernel() : is_null_input_(false), input_size_(0), output_size_(0), workspace_size_(0) {}
|
||||
~SliceGpuFwdKernel() override = default;
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
|
@ -50,51 +50,31 @@ class SliceGpuFwdKernel : public GpuKernel {
|
|||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto data_format = AnfAlgo::GetInputFormat(kernel_node, 0);
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
ShapeNdTo4d(input_shape, &input_shape_);
|
||||
auto strides = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("strides");
|
||||
if (strides) {
|
||||
strides_ = GetAttr<std::vector<int>>(kernel_node, "strides");
|
||||
for (auto i = strides_.size(); i < 4; i++) {
|
||||
(void)strides_.insert(strides_.begin(), 1);
|
||||
}
|
||||
size_ = GetAttr<std::vector<int>>(kernel_node, "end");
|
||||
is_strided_slice_ = true;
|
||||
} else {
|
||||
size_ = GetAttr<std::vector<int>>(kernel_node, "size");
|
||||
}
|
||||
|
||||
for (auto i = begin_.size(); i < 4; i++) {
|
||||
(void)begin_.insert(begin_.begin(), 0);
|
||||
}
|
||||
for (size_t i = size_.size(); i < 4; i++) {
|
||||
(void)size_.insert(size_.begin(), 1);
|
||||
}
|
||||
for (size_t i = 0; i < begin_.size(); i++) {
|
||||
if (begin_[i] < 0) {
|
||||
begin_[i] = begin_[i] + input_shape_[i];
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < size_.size(); i++) {
|
||||
if (size_[i] < 0) {
|
||||
size_[i] = (size_[i] + input_shape_[i]) > 0 ? (size_[i] + input_shape_[i]) : 0;
|
||||
}
|
||||
if (begin_[i] == size_[i] && is_strided_slice_) {
|
||||
MS_LOG(WARNING) << "Output is null.";
|
||||
is_null_input_ = true;
|
||||
}
|
||||
if (size_[i] == 0 && strides_[i] > 0) {
|
||||
size_[i] = begin_[i] + 1;
|
||||
}
|
||||
}
|
||||
|
||||
input_size_ = IntToSize(input_shape_[0] * input_shape_[1] * input_shape_[2] * input_shape_[3]) * sizeof(T);
|
||||
auto out_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
auto out_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
|
||||
output_size_ = sizeof(T);
|
||||
for (size_t x : out_shape) {
|
||||
output_size_ = output_size_ * x;
|
||||
}
|
||||
|
||||
// transpose begin and size for NHWC data
|
||||
if (data_format == "NHWC") {
|
||||
std::swap(begin_[1], begin_[3]);
|
||||
std::swap(begin_[1], begin_[2]);
|
||||
std::swap(size_[1], size_[3]);
|
||||
std::swap(size_[1], size_[2]);
|
||||
}
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -126,26 +106,24 @@ class SliceGpuFwdKernel : public GpuKernel {
|
|||
MS_LOG(ERROR) << "Input dims is " << input_shape.size() << ", scalar is not supported.";
|
||||
return false;
|
||||
}
|
||||
size_ = GetAttr<std::vector<int>>(kernel_node, "size");
|
||||
begin_ = GetAttr<std::vector<int>>(kernel_node, "begin");
|
||||
for (size_t i = 0; i < input_shape.size(); i++) {
|
||||
if ((begin_[i] > 0 && (begin_[i] > SizeToInt(input_shape[i]))) ||
|
||||
(begin_[i] < 0 && (std::abs(begin_[i]) > SizeToInt(input_shape[i])))) {
|
||||
MS_LOG(INFO) << "Input out of bounds " << input_shape[i] << " in axis " << i << ".";
|
||||
begin_[i] = 0;
|
||||
if (input_shape[i] <= 0 || size_[i] <= 0) {
|
||||
MS_LOG(WARNING) << "Slice output is null.";
|
||||
is_null_input_ = true;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
std::vector<int> begin_;
|
||||
std::vector<int> size_;
|
||||
std::vector<int> strides_;
|
||||
std::vector<int> input_shape_;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
|
||||
bool is_strided_slice_;
|
||||
bool is_null_input_;
|
||||
size_t input_size_;
|
||||
size_t output_size_;
|
||||
|
|
|
@ -59,6 +59,7 @@ class StridedSliceGpuKernel : public GpuKernel {
|
|||
ParseMasks(kernel_node);
|
||||
FillOutputDim();
|
||||
null_output_ = IsNullOutput();
|
||||
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
@ -86,14 +87,15 @@ class StridedSliceGpuKernel : public GpuKernel {
|
|||
|
||||
for (size_t i = 0; i < MAX_DIMS; i++) {
|
||||
if (i < begin_.size()) {
|
||||
begin_[i] =
|
||||
std::min(begin_[i] < 0 ? SizeToInt(begin_[i] + input_shape_[i]) : begin_[i], SizeToInt(input_shape_[i] - 1));
|
||||
int dim = SizeToInt(input_shape_[i]);
|
||||
begin_[i] = std::min(begin_[i] < 0 ? std::max(begin_[i] + dim, 0) : begin_[i], dim - 1);
|
||||
} else {
|
||||
begin_.push_back(0);
|
||||
}
|
||||
|
||||
if (i < end_.size()) {
|
||||
end_[i] = std::max(end_[i] < 0 ? end_[i] + SizeToInt(input_shape_[i]) : end_[i], -1);
|
||||
int dim = SizeToInt(input_shape_[i]);
|
||||
end_[i] = std::max(end_[i] < 0 ? end_[i] + dim : std::min(end_[i], dim), -1);
|
||||
} else {
|
||||
end_.push_back(i < input_shape_.size() ? input_shape_[i] : 1);
|
||||
}
|
||||
|
|
|
@ -87,14 +87,15 @@ class StridedSliceGradGpuKernel : public GpuKernel {
|
|||
|
||||
for (size_t i = 0; i < MAX_DIMS; i++) {
|
||||
if (i < begin_.size()) {
|
||||
begin_[i] =
|
||||
std::min(begin_[i] < 0 ? SizeToInt(begin_[i] + input_shape_[i]) : begin_[i], SizeToInt(input_shape_[i] - 1));
|
||||
int dim = SizeToInt(input_shape_[i]);
|
||||
begin_[i] = std::min(begin_[i] < 0 ? std::max(begin_[i] + dim, 0) : begin_[i], dim - 1);
|
||||
} else {
|
||||
begin_.push_back(0);
|
||||
}
|
||||
|
||||
if (i < end_.size()) {
|
||||
end_[i] = std::max(end_[i] < 0 ? end_[i] + SizeToInt(input_shape_[i]) : end_[i], -1);
|
||||
int dim = SizeToInt(input_shape_[i]);
|
||||
end_[i] = std::max(end_[i] < 0 ? end_[i] + dim : std::min(end_[i], dim), -1);
|
||||
} else {
|
||||
end_.push_back(i < input_shape_.size() ? input_shape_[i] : 1);
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/kernel_constants.h"
|
||||
#include "runtime/device/gpu/gpu_device_manager.h"
|
||||
|
@ -31,6 +32,19 @@ using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
static std::map<int, int> kNCHWToNHWCAxisMap = {
|
||||
{0, 0},
|
||||
{1, 3},
|
||||
{2, 1},
|
||||
{3, 2},
|
||||
};
|
||||
static std::map<int, int> kNHWCToNCHWAxisMap = {
|
||||
{0, 0},
|
||||
{1, 2},
|
||||
{2, 3},
|
||||
{3, 1},
|
||||
};
|
||||
|
||||
class GpuKernel : public KernelMod {
|
||||
public:
|
||||
virtual ~GpuKernel() = default;
|
||||
|
@ -74,6 +88,18 @@ class GpuKernel : public KernelMod {
|
|||
dst->push_back(src.size() == 0 ? 1 : SizeToInt(src[src.size() - 1]));
|
||||
}
|
||||
|
||||
int AxisTransform(const std::string &origin_data_format, const std::string &cal_format, int axis) {
|
||||
if (((origin_data_format == kOpFormat_DEFAULT) || (origin_data_format == kOpFormat_NCHW)) &&
|
||||
(cal_format == kOpFormat_NHWC)) {
|
||||
return kNCHWToNHWCAxisMap[axis];
|
||||
} else if (((cal_format == kOpFormat_DEFAULT) || (cal_format == kOpFormat_NCHW)) &&
|
||||
(origin_data_format == kOpFormat_NHWC)) {
|
||||
return kNHWCToNCHWAxisMap[axis];
|
||||
} else {
|
||||
return axis;
|
||||
}
|
||||
}
|
||||
|
||||
// transpose shape: NCHW To NHWC
|
||||
void ShapeNCHW2NHWC(std::vector<size_t> *shape) {
|
||||
std::swap((*shape)[1], (*shape)[3]);
|
||||
|
|
|
@ -82,7 +82,7 @@ class AddNGpuFwdKernel : public GpuKernel {
|
|||
MS_LOG(ERROR) << "Output number is " << output_num << ", but cudnnAddTensor needs 1 output.";
|
||||
return false;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(input_shape);
|
||||
if (is_null_input_) {
|
||||
MS_LOG(WARNING) << "AddNGpuFwdKernel input is null";
|
||||
|
@ -96,9 +96,16 @@ class AddNGpuFwdKernel : public GpuKernel {
|
|||
for (size_t i = 0; i < input_shape.size(); i++) {
|
||||
dimA[i] = SizeToInt(input_shape[i]);
|
||||
}
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptorEx(input_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_,
|
||||
SizeToInt(input_shape.size()), dimA),
|
||||
"cudnnSetTensorNdDescriptor failed");
|
||||
auto input_format = AnfAlgo::GetInputFormat(kernel_node, 0);
|
||||
if (input_format == kOpFormat_NHWC) {
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptorEx(input_descriptor_, CUDNN_TENSOR_NHWC, cudnn_data_type_,
|
||||
SizeToInt(input_shape.size()), dimA),
|
||||
"cudnnSetTensorNdDescriptor failed");
|
||||
} else {
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptorEx(input_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_,
|
||||
SizeToInt(input_shape.size()), dimA),
|
||||
"cudnnSetTensorNdDescriptor failed");
|
||||
}
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -56,9 +56,9 @@ class BroadcastOpGpuKernel : public GpuKernel {
|
|||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
GetOpType(kernel_node);
|
||||
auto shape1 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto shape2 = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto shape3 = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
auto shape1 = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
auto shape2 = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
auto shape3 = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
need_broadcast_ = IsBroadcast(shape1, shape2);
|
||||
if (need_broadcast_ && shape1.size() > 7) {
|
||||
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than 7";
|
||||
|
|
|
@ -42,7 +42,7 @@ class MatMulGpuKernel : public GpuKernel {
|
|||
dtype_a_(CUDA_R_32F),
|
||||
dtype_b_(CUDA_R_32F),
|
||||
dtype_c_(CUDA_R_32F),
|
||||
algo_(CUBLAS_GEMM_DEFAULT_TENSOR_OP) {}
|
||||
algo_(CUBLAS_GEMM_DEFAULT) {}
|
||||
~MatMulGpuKernel() = default;
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
|
@ -85,6 +85,10 @@ class MatMulGpuKernel : public GpuKernel {
|
|||
dtype_a_ = GetCudaDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
dtype_b_ = GetCudaDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 1)));
|
||||
dtype_c_ = GetCudaDataType(TypeIdLabel(AnfAlgo::GetOutputDeviceDataType(kernel_node, 0)));
|
||||
if (dtype_a_ == CUDA_R_16F && dtype_b_ == CUDA_R_16F && dtype_c_ == CUDA_R_16F) {
|
||||
MS_LOG(WARNING) << "input and output type is float16, allow to use Tensor Core operations if possible";
|
||||
algo_ = CUBLAS_GEMM_DEFAULT_TENSOR_OP;
|
||||
}
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_NULL_INPUT(output_shape);
|
||||
if (is_null_input_) {
|
||||
|
|
|
@ -37,7 +37,6 @@ enum FusionType {
|
|||
COMMREDUCE,
|
||||
SEGMENT,
|
||||
OPAQUE,
|
||||
DYNAMIC,
|
||||
UNKNOWN_FUSION_TYPE = -1,
|
||||
};
|
||||
enum OpPattern {
|
||||
|
@ -80,8 +79,8 @@ class KernelPack {
|
|||
bool LoadKernelMeta(const std::string &json_f, const std::string &processor);
|
||||
bool ReadFromJsonFile(const std::string &json_f, const std::string &processor);
|
||||
const std::string Serialize() const;
|
||||
const FlexArray *const GetJson() const { return json_; }
|
||||
const FlexArray *const GetKernel() const { return kernel_; }
|
||||
const FlexArray *GetJson() const { return json_; }
|
||||
const FlexArray *GetKernel() const { return kernel_; }
|
||||
~KernelPack() {
|
||||
if (json_) {
|
||||
delete[] json_;
|
||||
|
|
|
@ -19,53 +19,36 @@
|
|||
#include <map>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_utils.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
using mindspore::kernel::tbe::TbeUtils;
|
||||
static bool GenPreBuildKernelJson(const std::vector<AnfNodePtr> &compute_nodes,
|
||||
std::vector<nlohmann::json> *prebuild_op_list) {
|
||||
MS_EXCEPTION_IF_NULL(prebuild_op_list);
|
||||
TbeKernelJsonCreator creator(PREBUILD);
|
||||
for (const auto &anf_node : compute_nodes) {
|
||||
nlohmann::json prebuild;
|
||||
if (!creator.GenTbeSingleKernelJson(anf_node, &prebuild)) {
|
||||
MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
|
||||
return false;
|
||||
}
|
||||
(*prebuild_op_list).push_back(prebuild);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) {
|
||||
MS_LOG(INFO) << "kernel fusion build start, scope size:" << fusion_scopes.size();
|
||||
std::map<int32_t, KernelModPtr> kernel_mod_ret;
|
||||
auto build_manger = std::make_shared<ParallelBuildManager>();
|
||||
MS_EXCEPTION_IF_NULL(build_manger);
|
||||
for (const auto &fusion_scope_iter : fusion_scopes) {
|
||||
auto scope_id = fusion_scope_iter.scope_id;
|
||||
string fusion_kernel_name;
|
||||
nlohmann::json fusion_op;
|
||||
string fusion_kernel = "te_fusion";
|
||||
if (!TbeKernelBuild::GenFusionScopeJson(fusion_scope_iter.input_nodes, fusion_scope_iter.compute_nodes, &fusion_op,
|
||||
&fusion_kernel)) {
|
||||
&fusion_kernel_name)) {
|
||||
continue;
|
||||
}
|
||||
// gen kernel_name & check cache
|
||||
std::string json_str = fusion_op.dump();
|
||||
size_t hash_id = std::hash<std::string>()(json_str);
|
||||
auto json_name = fusion_kernel.append("_").append(std::to_string(hash_id));
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
auto device_id = context_ptr->device_id();
|
||||
auto json_name =
|
||||
fusion_kernel_name.append("_").append(std::to_string(hash_id)).append("_").append(std::to_string(device_id));
|
||||
fusion_op["fusion_op_name"] = json_name;
|
||||
// gen json for prebuild
|
||||
std::vector<nlohmann::json> prebuild_op_list;
|
||||
if (!GenPreBuildKernelJson(fusion_scope_iter.compute_nodes, &prebuild_op_list)) {
|
||||
continue;
|
||||
}
|
||||
// get io size
|
||||
std::vector<size_t> input_size_list;
|
||||
std::vector<size_t> output_size_list;
|
||||
|
@ -80,20 +63,20 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
|
|||
auto kernel_mod =
|
||||
build_manger->GenKernelMod(json_name, tbe::kProcessorAiCore, input_size_list, output_size_list, kernel_pack);
|
||||
if (kernel_mod != nullptr) {
|
||||
kernel_mod_ret[scope_id] = kernel_mod;
|
||||
kernel_mod_ret[fusion_scope_iter.scope_id] = kernel_mod;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// fusion build
|
||||
nlohmann::json fusion_json;
|
||||
fusion_json["fusion_op"] = fusion_op;
|
||||
fusion_json["prebuild_ops"] = prebuild_op_list;
|
||||
auto task_id = build_manger->StartCompileOp(fusion_json);
|
||||
TbeUtils::SaveJsonInfo(json_name, fusion_json.dump());
|
||||
if (task_id < 0) {
|
||||
MS_EXCEPTION(ArgumentError) << "start compile failed.";
|
||||
}
|
||||
build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list, scope_id);
|
||||
build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list,
|
||||
fusion_scope_iter.scope_id);
|
||||
}
|
||||
|
||||
int build_failed_num = 0;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
|
@ -25,11 +26,9 @@ namespace kernel {
|
|||
* @brief fuse op and return a callable mod
|
||||
*/
|
||||
struct FusionScopeInfo {
|
||||
FusionScopeInfo() {}
|
||||
FusionScopeInfo(int32_t id, const std::vector<AnfNodePtr> &in, const std::vector<AnfNodePtr> &comp,
|
||||
const std::vector<AnfNodePtr> &out)
|
||||
: scope_id(id), input_nodes(in), compute_nodes(comp), output_nodes(out) {}
|
||||
int32_t scope_id;
|
||||
FusionScopeInfo(int32_t id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, std::vector<AnfNodePtr> out)
|
||||
: scope_id(id), input_nodes(std::move(in)), compute_nodes(std::move(comp)), output_nodes(std::move(out)) {}
|
||||
int32_t scope_id{};
|
||||
std::vector<AnfNodePtr> input_nodes;
|
||||
std::vector<AnfNodePtr> compute_nodes;
|
||||
std::vector<AnfNodePtr> output_nodes;
|
||||
|
|
|
@ -40,14 +40,13 @@ class OpLib {
|
|||
|
||||
private:
|
||||
static bool RegOpFromLocalInfo();
|
||||
static bool DecodeOpInfo(const nlohmann::json &obj, const OpImplyType imply_type, const std::string &impl_path);
|
||||
static bool DecodeAttr(const nlohmann::json &obj, const OpImplyType imply_type,
|
||||
const std::shared_ptr<OpInfo> &op_info);
|
||||
static bool DecodeOpInfo(const nlohmann::json &obj, OpImplyType imply_type, const std::string &impl_path);
|
||||
static bool DecodeAttr(const nlohmann::json &obj, OpImplyType imply_type, const std::shared_ptr<OpInfo> &op_info);
|
||||
static bool DecodeDtypeFormat(const nlohmann::json &dtype_format, const std::shared_ptr<OpIOInfo> &op_io,
|
||||
size_t index);
|
||||
static void DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
|
||||
static void DecodeAKGSpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
|
||||
static bool DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply_type, const OpIOType io_type,
|
||||
static bool DecodeInputOutput(const nlohmann::json &obj, OpImplyType imply_type, OpIOType io_type,
|
||||
const std::shared_ptr<OpInfo> &op_info, const nlohmann::json &dtype_format);
|
||||
static bool GetRefInfo(const std::shared_ptr<OpInfo> &op_info);
|
||||
static bool CheckRepetition(const std::shared_ptr<OpInfo> &op_info);
|
||||
|
|
|
@ -173,7 +173,7 @@ void TbeAdapter::NormalizeFuncName(std::string *func_name) {
|
|||
*func_name = name_tmp;
|
||||
auto iter = tbe_func_adapter_map.find(*func_name);
|
||||
if (iter != tbe_func_adapter_map.end()) {
|
||||
MS_LOG(INFO) << "map actual op from me " << *func_name << " to tbe op" << iter->second;
|
||||
MS_LOG(INFO) << "Map actual op from me: " << *func_name << " to tbe op: " << iter->second;
|
||||
*func_name = iter->second;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
// the TBE back-end operator implementation difference
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
|
||||
enum kCreaterType : int { SINGLE_BUILD = 0, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
|
||||
namespace tbe {
|
||||
using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
|
||||
nlohmann::json *attrs_json);
|
||||
|
|
|
@ -63,7 +63,7 @@ const std::unordered_map<std::string, size_t> type_nbyte_maps = {
|
|||
|
||||
const std::unordered_map<std::string, FusionType> fusion_type_maps = {
|
||||
{"CONVLUTION", FusionType::CONVLUTION}, {"ELEMWISE", FusionType::ELEMWISE}, {"COMMREDUCE", FusionType::COMMREDUCE},
|
||||
{"SEGMENT", FusionType::SEGMENT}, {"DYNAMIC", FusionType::DYNAMIC}, {"OPAQUE", FusionType::OPAQUE},
|
||||
{"SEGMENT", FusionType::SEGMENT}, {"OPAQUE", FusionType::OPAQUE},
|
||||
};
|
||||
|
||||
TypeId DtypeToTypeId(const std::string &dtypes) {
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "backend/kernel_compiler/tbe/tbe_adapter.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_utils.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -71,14 +72,20 @@ constexpr auto kVTypeListListInt = "listListInt";
|
|||
constexpr auto kJValue = "value";
|
||||
constexpr auto kJDynIndex = "dyn_index";
|
||||
constexpr auto kJFuncName = "func_name";
|
||||
|
||||
std::string NormalizeFullScopeName(const string &full_scope_name) {
|
||||
// exp:Default/ReLU-op0 -->Default_ReLU_op0
|
||||
string normal_ret = full_scope_name;
|
||||
std::replace(normal_ret.begin(), normal_ret.end(), '/', '_');
|
||||
std::replace(normal_ret.begin(), normal_ret.end(), '-', '_');
|
||||
return normal_ret;
|
||||
}
|
||||
constexpr auto kJL1AddrOffset = "L1_addr_offset";
|
||||
constexpr auto kJL1FusionType = "L1_fusion_type";
|
||||
constexpr auto kJL1WorkspaceSize = "L1_workspace_size";
|
||||
constexpr auto kJAddrType = "addr_type";
|
||||
constexpr auto kJSliceOffset = "slice_offset";
|
||||
constexpr auto kJSplitIndex = "split_index";
|
||||
constexpr auto kJTotalShape = "total_shape";
|
||||
constexpr auto kJValidShape = "valid_shape";
|
||||
constexpr auto kJModuleName = "module_name";
|
||||
constexpr auto kJPattern = "pattern";
|
||||
constexpr auto kJPyModulePath = "py_module_path";
|
||||
constexpr auto kJPreBuildOutsAttrs = "prebuild_outs_attrs";
|
||||
constexpr auto kJKwdArgs = "kwds_args";
|
||||
constexpr auto kJListArgs = "list_args";
|
||||
|
||||
bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspore::AnfNode> &anf_node,
|
||||
nlohmann::json *kernel_json) {
|
||||
|
@ -117,13 +124,12 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor
|
|||
op_info_json[kJAttrs] = attrs_json;
|
||||
std::string json_str = op_info_json.dump();
|
||||
size_t hash_id = std::hash<std::string>()(json_str);
|
||||
json_name_ = op_name + "_" + std::to_string(hash_id);
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
auto device_id = context_ptr->device_id();
|
||||
json_name_ = op_name + "_" + std::to_string(hash_id) + "_" + std::to_string(device_id);
|
||||
json_info_ = json_str;
|
||||
if (creater_type_ == PREBUILD) {
|
||||
op_info_json[kJKernelName] = NormalizeFullScopeName(anf_node->fullname_with_scope());
|
||||
} else {
|
||||
op_info_json[kJKernelName] = json_name_;
|
||||
}
|
||||
op_info_json[kJKernelName] = json_name_;
|
||||
(*kernel_json)[kJOpInfo] = op_info_json;
|
||||
(*kernel_json)[kJFullName] = anf_node->fullname_with_scope();
|
||||
if (creater_type_ == SINGLE_BUILD) {
|
||||
|
@ -581,25 +587,25 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector<si
|
|||
|
||||
bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr> &input_nodes,
|
||||
const std::vector<mindspore::AnfNodePtr> &compute_nodes,
|
||||
nlohmann::json *fusion_str, std::string *fusion_kernel) {
|
||||
MS_EXCEPTION_IF_NULL(fusion_str);
|
||||
MS_EXCEPTION_IF_NULL(fusion_kernel);
|
||||
nlohmann::json *fusion_json, std::string *fusion_kernel_name) {
|
||||
MS_EXCEPTION_IF_NULL(fusion_json);
|
||||
MS_EXCEPTION_IF_NULL(fusion_kernel_name);
|
||||
// get input layer info
|
||||
std::vector<std::vector<mindspore::AnfNodePtr>> input_layers;
|
||||
std::map<const AnfNodePtr, FusionDataType> spec_data_input;
|
||||
if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) {
|
||||
return false;
|
||||
}
|
||||
// gen fusion scopre_op jsom
|
||||
// gen fusion scopre_op json
|
||||
std::vector<nlohmann::json> compute_list;
|
||||
(*fusion_kernel) = kFusionKernelNamePrfix;
|
||||
(*fusion_kernel_name) = kFusionKernelNamePrfix;
|
||||
// index: fusion build option input record, next one from 0
|
||||
static size_t index = 0;
|
||||
auto layer_iter = input_layers.begin();
|
||||
auto compute_op_iter = compute_nodes.begin();
|
||||
for (; compute_op_iter != compute_nodes.end(); ++compute_op_iter, ++layer_iter) {
|
||||
nlohmann::json compute_op_str;
|
||||
(void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel, &index);
|
||||
(void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel_name, &index);
|
||||
compute_list.push_back(compute_op_str);
|
||||
}
|
||||
index = 0;
|
||||
|
@ -617,36 +623,122 @@ bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr>
|
|||
}
|
||||
index = 0;
|
||||
data_list.insert(data_list.end(), compute_list.begin(), compute_list.end());
|
||||
(*fusion_str)[kFusionOpList] = data_list;
|
||||
(*fusion_json)[kFusionOpList] = data_list;
|
||||
return true;
|
||||
}
|
||||
|
||||
void TbeKernelBuild::GenPreDescJson(nlohmann::json *output_desc) {
|
||||
MS_EXCEPTION_IF_NULL(output_desc);
|
||||
(*output_desc)[kJL1AddrOffset] = 0;
|
||||
(*output_desc)[kJL1FusionType] = -1;
|
||||
(*output_desc)[kJL1WorkspaceSize] = -1;
|
||||
(*output_desc)[kJAddrType] = 0;
|
||||
}
|
||||
|
||||
void TbeKernelBuild::GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str,
|
||||
std::string *fusion_kernel_name) {
|
||||
MS_EXCEPTION_IF_NULL(compute_op_str);
|
||||
MS_EXCEPTION_IF_NULL(fusion_kernel_name);
|
||||
// gen others
|
||||
auto origin_type = AnfAlgo::GetCNodeName(cnode);
|
||||
// replace special op type for buffer fusion op
|
||||
auto type = GetRealOpType(origin_type);
|
||||
(*compute_op_str)[kJtype] = type;
|
||||
tbe::TbeAdapter::NormalizeFuncName(&type);
|
||||
(*compute_op_str)[kJFuncName] = type;
|
||||
(*compute_op_str)[kJModuleName] = std::string("impl.") + type;
|
||||
(*compute_op_str)[kJName] = cnode->fullname_with_scope();
|
||||
(*compute_op_str)[kJPattern] = GetNodeFusionType(cnode);
|
||||
(*compute_op_str)[kJPyModulePath] = "/usr/local/Ascend/opp/op_impl/build_in/ai_core/tbe";
|
||||
(void)(*fusion_kernel_name).append("_");
|
||||
(void)(*fusion_kernel_name).append(type);
|
||||
}
|
||||
|
||||
void TbeKernelBuild::GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_EXCEPTION_IF_NULL(compute_op_str);
|
||||
// kwds args
|
||||
nlohmann::json json_prebuild_args;
|
||||
json_prebuild_args[kJKwdArgs] = nlohmann::json::object();
|
||||
// list_args
|
||||
nlohmann::json json_list_args;
|
||||
// list_args: output args
|
||||
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
|
||||
for (size_t i = 0; i < output_size; ++i) {
|
||||
nlohmann::json output_desc;
|
||||
GenDescJson(cnode, i, i, &output_desc);
|
||||
output_desc[kJDtype] = output_desc[kJDataType];
|
||||
json_list_args.push_back(output_desc);
|
||||
}
|
||||
// list_args: attr args
|
||||
auto op_name = AnfAlgo::GetCNodeName(cnode);
|
||||
auto opinfo = OpLib::FindOp(op_name, OpImplyType::kTBE);
|
||||
MS_EXCEPTION_IF_NULL(opinfo);
|
||||
TbeKernelJsonCreator json_creater(SINGLE_BUILD);
|
||||
nlohmann::json json_attr_args;
|
||||
if (!json_creater.GenTbeAttrJson(cnode, opinfo, &json_attr_args)) {
|
||||
MS_LOG(INFO) << "Fusion warning: get prebuild args of attr failed.";
|
||||
}
|
||||
for (const auto &attr : json_attr_args) {
|
||||
// if(attr[kJName] != "isRef" && attr["valid"] == true) {
|
||||
if (attr[kJName] != "isRef" && attr[kJValid] == true) {
|
||||
json_list_args.push_back(attr[kJValue]);
|
||||
}
|
||||
}
|
||||
json_prebuild_args[kJListArgs] = json_list_args;
|
||||
(*compute_op_str)[kJPreBuildOutsAttrs] = json_prebuild_args;
|
||||
}
|
||||
|
||||
void TbeKernelBuild::GenSuffixDescJson(nlohmann::json *output_desc) {
|
||||
MS_EXCEPTION_IF_NULL(output_desc);
|
||||
(*output_desc)[kJSliceOffset] = nlohmann::json::array();
|
||||
(*output_desc)[kJSplitIndex] = 0;
|
||||
(*output_desc)[kJTotalShape] = nlohmann::json::array();
|
||||
(*output_desc)[kJValidShape] = nlohmann::json::array();
|
||||
}
|
||||
|
||||
// anf_node: this node is used to get output desc(type\foramt\shape ...)
|
||||
// node_out_idx: node output index
|
||||
// desc_output_idx: this index use to add json
|
||||
// nlohmann::json *output_desc: for return
|
||||
// FusionDataType fusion_data_type: speceial process json desc output shape [kFusionAddN, kFusionReLUGradV2]
|
||||
void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
|
||||
size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) {
|
||||
GenPreDescJson(output_desc);
|
||||
// data_type
|
||||
auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx);
|
||||
(*output_desc)[kJDataType] = tbe::TypeIdToString(type_id);
|
||||
// name
|
||||
std::string output_desc_name = anf_node->fullname_with_scope();
|
||||
if (node_out_idx > 0) {
|
||||
output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx);
|
||||
}
|
||||
(*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name);
|
||||
auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx);
|
||||
(*output_desc)[kJDataType] = tbe::TypeIdToString(type_id);
|
||||
(*output_desc)[kJName] = output_desc_name;
|
||||
// ori_format
|
||||
(*output_desc)[kJOriFormat] = kOpFormat_NCHW;
|
||||
// ori_shape
|
||||
auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, node_out_idx);
|
||||
if (ori_shape.empty()) {
|
||||
ori_shape.emplace_back(1);
|
||||
}
|
||||
(*output_desc)[kJOriShape] = ori_shape;
|
||||
// !! Note: output_index, only node's output use it
|
||||
(*output_desc)[kJOutputIndex] = desc_output_idx;
|
||||
// shape
|
||||
auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, node_out_idx);
|
||||
if (shape.empty()) {
|
||||
shape.emplace_back(1);
|
||||
}
|
||||
(*output_desc)[kJShape] = shape;
|
||||
// !! Note: format: only data node's output use it
|
||||
auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx);
|
||||
if (format == kOpFormat_DEFAULT) {
|
||||
format = ori_shape.size() == 4 ? kOpFormat_NCHW : kOpFormat_ND;
|
||||
} else if (format == kOpFormat_FRAC_Z) {
|
||||
format = kOpFormat_FRACTAL_Z;
|
||||
}
|
||||
(*output_desc)[kJFormat] = format;
|
||||
(*output_desc)[kJOriFormat] = kOpFormat_NCHW;
|
||||
(*output_desc)[kJOutputIndex] = desc_output_idx;
|
||||
// special node
|
||||
if (fusion_data_type == kFusionAddN && format == kOpFormat_NC1HWC0) {
|
||||
std::vector<size_t> spec_shape = {};
|
||||
spec_shape.emplace_back(shape[0]);
|
||||
|
@ -663,12 +755,13 @@ void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_
|
|||
(*output_desc)[kJShape] = spec_shape;
|
||||
(*output_desc)[kJDataType] = kVTypeBool;
|
||||
}
|
||||
GenSuffixDescJson(output_desc);
|
||||
}
|
||||
|
||||
void TbeKernelBuild::GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
|
||||
size_t output_index, nlohmann::json *output_desc) {
|
||||
std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index);
|
||||
(*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name);
|
||||
(*output_desc)[kJName] = output_desc_name;
|
||||
(*output_desc)[kJOutputIndex] = output_index;
|
||||
std::vector<size_t> shape;
|
||||
(*output_desc)[kJShape] = shape;
|
||||
|
@ -692,6 +785,9 @@ bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name,
|
|||
return true;
|
||||
}
|
||||
|
||||
// <input_nodes> : contains parameter/data node, input order may doesn't match tbe input order;
|
||||
// <compute_nodes> : contains cnode, inputs order may doesn't match tbe input order;
|
||||
// Special process node list: reference tbe_adapter.cc [except: Conv2DBackpropInput]
|
||||
bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes,
|
||||
const std::vector<mindspore::AnfNodePtr> &compute_nodes,
|
||||
std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers,
|
||||
|
@ -722,7 +818,7 @@ bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &in
|
|||
MS_LOG(INFO) << "Fusion info: add compute node's [" << i << "] input: " << input->fullname_with_scope();
|
||||
layer.emplace_back((*find_iter));
|
||||
} else {
|
||||
MS_LOG(INFO) << "Fusion warnig: this input [" << i << "] may be pre compute(" << input->fullname_with_scope()
|
||||
MS_LOG(INFO) << "Fusion warning: this input [" << i << "] may be pre compute(" << input->fullname_with_scope()
|
||||
<< ") node's output.";
|
||||
}
|
||||
}
|
||||
|
@ -750,8 +846,9 @@ bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::Anf
|
|||
MS_EXCEPTION_IF_NULL(data_str);
|
||||
MS_EXCEPTION_IF_NULL(index);
|
||||
std::vector<nlohmann::json> output_desc_list;
|
||||
// if data_input is null, this is optional input.
|
||||
if (!data_input) {
|
||||
MS_LOG(INFO) << "Data input is optional node";
|
||||
MS_LOG(INFO) << "Fusion info: data input is optional node";
|
||||
auto name = std::string(kOptional) + std::to_string(*index);
|
||||
(*data_str)[kJName] = name;
|
||||
nlohmann::json output_desc;
|
||||
|
@ -767,12 +864,16 @@ bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::Anf
|
|||
auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0);
|
||||
auto real_node = kernel_idx.first;
|
||||
size_t real_idx = kernel_idx.second;
|
||||
MS_LOG(INFO) << "Real name " << real_node->fullname_with_scope() << " index:" << real_idx;
|
||||
MS_LOG(INFO) << "Fusion info: Real name: " << real_node->fullname_with_scope() << ". index:" << real_idx;
|
||||
// kJOutputDesc
|
||||
nlohmann::json output_desc;
|
||||
GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type);
|
||||
output_desc_list.push_back(output_desc);
|
||||
(*data_str)[kJName] = NormalizeFullScopeName(real_node->fullname_with_scope());
|
||||
auto full_name = real_node->fullname_with_scope();
|
||||
if (real_idx > 0) {
|
||||
full_name = full_name.append("_").append(std::to_string(real_idx));
|
||||
}
|
||||
(*data_str)[kJName] = full_name;
|
||||
}
|
||||
(*data_str)[kJOutputDesc] = output_desc_list;
|
||||
(*data_str)[kJtype] = "Data";
|
||||
|
@ -808,6 +909,7 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) {
|
|||
size_t TbeKernelBuild::GetOptionalInput(const mindspore::CNodePtr &cnode, bool is_dynamic_input) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (is_dynamic_input) {
|
||||
// Node can not have optional & dynamic input.
|
||||
return 0;
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
|
@ -831,22 +933,46 @@ std::string TbeKernelBuild::GetRealOpType(const std::string &origin_type) {
|
|||
return result;
|
||||
}
|
||||
|
||||
std::string TbeKernelBuild::GetNodeFusionType(const mindspore::CNodePtr &cnode) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto node_type = AnfAlgo::GetCNodeName(cnode);
|
||||
static std::map<std::string, std::string> fusion_type_map = {{kConv2DOpName, "Convolution"},
|
||||
{kBNTrainingReduceOpName, "bn_reduce"},
|
||||
{kBNTrainingUpdateOpName, "bn_update"},
|
||||
{kReluV2OpName, "ElemWise"},
|
||||
{kTensorAddOpName, "ElemWise"},
|
||||
{kConv2DBackpropInputOpName, "Conv2d_backprop_input"},
|
||||
{kAddNOpName, "ElemWise"},
|
||||
{kReluGradV2OpName, "ElemWise"},
|
||||
{kRealDivOpName, "ElemWise"}};
|
||||
auto find = fusion_type_map.find(node_type);
|
||||
if (find == fusion_type_map.end()) {
|
||||
MS_LOG(INFO) << "Fusion warning: get node fusion type failed, origin node type: " << node_type
|
||||
<< " return null string.";
|
||||
return "";
|
||||
} else {
|
||||
return find->second;
|
||||
}
|
||||
}
|
||||
|
||||
bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
|
||||
std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
|
||||
std::vector<nlohmann::json> *input_desc_list, size_t *index) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_EXCEPTION_IF_NULL(input_desc_list);
|
||||
std::vector<nlohmann::json> input_desc_list_tmp = {};
|
||||
// 1. input json
|
||||
bool is_dynamic_input = IsDynamicInput(cnode);
|
||||
for (size_t i = 1; i < cnode->inputs().size(); ++i) {
|
||||
auto input = cnode->input(i);
|
||||
auto kernel_idx = AnfAlgo::VisitKernel(input, 0);
|
||||
auto real_node = kernel_idx.first;
|
||||
size_t real_idx = kernel_idx.second;
|
||||
MS_LOG(INFO) << "Real name" << real_node->fullname_with_scope() << "index:" << real_idx;
|
||||
MS_LOG(INFO) << "Fusion info: real name: " << real_node->fullname_with_scope() << ". index:" << real_idx;
|
||||
nlohmann::json input_desc;
|
||||
GenDescJson(real_node, real_idx, real_idx, &input_desc);
|
||||
if (is_dynamic_input) {
|
||||
// 2. dynamic input json
|
||||
MS_LOG(INFO) << "Node has dynamic input.";
|
||||
input_desc[kJDynIndex] = (i - 1);
|
||||
}
|
||||
|
@ -854,7 +980,8 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
|
|||
}
|
||||
size_t optional_num = GetOptionalInput(cnode, is_dynamic_input);
|
||||
if (optional_num > 0) {
|
||||
MS_LOG(INFO) << "Node has optional input.";
|
||||
// 3. optional input
|
||||
MS_LOG(INFO) << "Fusion info: node has optional input.";
|
||||
for (size_t i = 0; i < optional_num; ++i) {
|
||||
nlohmann::json optional_input_desc;
|
||||
optional_input_desc[kJName] = std::string(kOptional) + std::to_string(*index);
|
||||
|
@ -872,7 +999,7 @@ std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int> &o
|
|||
std::vector<size_t> desc_output_index = {};
|
||||
for (size_t idx = 0; idx < output_used_nums.size(); ++idx) {
|
||||
auto output_use_num_item = output_used_nums[idx];
|
||||
MS_LOG(INFO) << "Output used num[" << idx << "] = " << output_use_num_item;
|
||||
MS_LOG(INFO) << "Fusion info: output used num[" << idx << "] = " << output_use_num_item;
|
||||
desc_output_index.emplace_back(idx);
|
||||
if (output_use_num_item > 1) {
|
||||
desc_output_index.emplace_back(idx);
|
||||
|
@ -887,7 +1014,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode
|
|||
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
|
||||
if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) {
|
||||
auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum);
|
||||
MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope();
|
||||
MS_LOG(INFO) << "Fusion info: this node's output has been reused, node name: " << cnode->fullname_with_scope();
|
||||
if (output_used_nums.size() != output_size) {
|
||||
MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")"
|
||||
<< " is not match output used num(" << output_used_nums.size() << ")";
|
||||
|
@ -930,20 +1057,14 @@ bool TbeKernelBuild::GenFusionComputeJson(const mindspore::AnfNodePtr &compute_n
|
|||
// gen output desc
|
||||
std::vector<nlohmann::json> output_desc_list;
|
||||
if (!GenFusionComputeOutputJson(cnode, &output_desc_list)) {
|
||||
MS_LOG(INFO) << "Fusion Error: gen fusion output desc faild, node full name: " << cnode->fullname_with_scope();
|
||||
MS_LOG(INFO) << "Fusion Error: gen fusion output desc failed, node full name: " << cnode->fullname_with_scope();
|
||||
return false;
|
||||
}
|
||||
(*compute_op_str)[kJOutputDesc] = output_desc_list;
|
||||
// gen others
|
||||
auto origin_type = AnfAlgo::GetCNodeName(cnode);
|
||||
// replace special op type for buffer fusion op
|
||||
auto type = GetRealOpType(origin_type);
|
||||
(*compute_op_str)[kJtype] = type;
|
||||
tbe::TbeAdapter::NormalizeFuncName(&type);
|
||||
(*compute_op_str)[kJFuncName] = type;
|
||||
(*compute_op_str)[kJName] = NormalizeFullScopeName(cnode->fullname_with_scope());
|
||||
(void)(*fusion_kernel_name).append("_");
|
||||
(void)(*fusion_kernel_name).append(type);
|
||||
// gen common desc
|
||||
GenFusionComputeCommonJson(cnode, compute_op_str, fusion_kernel_name);
|
||||
// gen prebuild args
|
||||
GenFusionComputePreBuildJson(cnode, compute_op_str);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -965,7 +1086,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list,
|
|||
MS_EXCEPTION_IF_NULL(output_size_list);
|
||||
input_size_list->clear();
|
||||
output_size_list->clear();
|
||||
|
||||
// cal input size for malloc
|
||||
for (const auto &op : fusion_op_list) {
|
||||
if (op[kJtype] == "Data") {
|
||||
const auto &data_output_desc = op[kJOutputDesc];
|
||||
|
@ -975,23 +1096,23 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list,
|
|||
}
|
||||
auto ret = GetIOSizeImpl(data_output);
|
||||
input_size_list->push_back(ret);
|
||||
MS_LOG(INFO) << "Fusion info: scope input name: " << op[kJName] << ", size: " << ret;
|
||||
MS_LOG(INFO) << "Fusion info: input node name: " << op[kJName] << ", size: " << ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// cal output size for malloc
|
||||
for (const auto &output_node : output_nodes) {
|
||||
auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0);
|
||||
auto real_node = kernel_idx.first;
|
||||
size_t real_idx = kernel_idx.second;
|
||||
auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope());
|
||||
MS_LOG(INFO) << "Fusion info: real node name: " << normal_name << ", real output index: " << real_idx;
|
||||
auto full_name = real_node->fullname_with_scope();
|
||||
MS_LOG(INFO) << "Fusion info: real output node name: " << full_name << ", real output index: " << real_idx;
|
||||
for (const auto &op : fusion_op_list) {
|
||||
if (op[kJName] == normal_name) {
|
||||
if (op[kJName] == full_name) {
|
||||
auto op_output_desces = op[kJOutputDesc];
|
||||
if (output_node != real_node) {
|
||||
// tuple_get item
|
||||
MS_LOG(INFO) << "Output is a tuple getitem node";
|
||||
MS_LOG(INFO) << "Fusion info: output is a tuple get_item node";
|
||||
auto output_desc = op_output_desces[real_idx];
|
||||
if (output_desc[kJShape].empty()) {
|
||||
MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx;
|
||||
|
@ -1001,6 +1122,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list,
|
|||
output_size_list->push_back(ret);
|
||||
MS_LOG(INFO) << "Fusion info: scope output index: " << real_idx << ", size: " << ret;
|
||||
} else {
|
||||
MS_LOG(INFO) << "Fusion info: output is self.";
|
||||
for (const auto &output_desc : op_output_desces) {
|
||||
if (output_desc[kJShape].empty()) {
|
||||
MS_LOG(INFO) << "Fusion info: output_desc's shape is empty, may be this node output";
|
||||
|
|
|
@ -41,8 +41,8 @@ class TbeKernelBuild {
|
|||
std::vector<size_t> *output_size_list);
|
||||
// Ub Fuison
|
||||
static bool GenFusionScopeJson(const std::vector<AnfNodePtr> &input_nodes,
|
||||
const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_str,
|
||||
std::string *fusion_kernel);
|
||||
const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_json,
|
||||
std::string *fusion_kernel_name);
|
||||
static bool GetIOSize(const nlohmann::json &fusion_op_list, const std::vector<AnfNodePtr> &output_nodes,
|
||||
std::vector<size_t> *input_size_list, std::vector<size_t> *output_size_list);
|
||||
|
||||
|
@ -61,9 +61,14 @@ class TbeKernelBuild {
|
|||
static std::vector<size_t> GetDescOutputIndex(const std::vector<int> &output_used_nums);
|
||||
static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
|
||||
std::vector<nlohmann::json> *output_desc_list);
|
||||
static void GenPreDescJson(nlohmann::json *output_desc);
|
||||
static void GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str,
|
||||
std::string *fusion_kernel_name);
|
||||
static void GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str);
|
||||
static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
|
||||
size_t desc_output_idx, nlohmann::json *output_desc,
|
||||
FusionDataType fusion_data_type = kFusionNormal);
|
||||
static void GenSuffixDescJson(nlohmann::json *output_desc);
|
||||
static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
|
||||
size_t output_index, nlohmann::json *output_desc);
|
||||
static size_t GetIOSizeImpl(const nlohmann::json &desc);
|
||||
|
@ -76,6 +81,7 @@ class TbeKernelBuild {
|
|||
static bool IsDynamicInput(const CNodePtr &cnode);
|
||||
static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input);
|
||||
static std::string GetRealOpType(const std::string &origin_type);
|
||||
static std::string GetNodeFusionType(const CNodePtr &cnode);
|
||||
};
|
||||
|
||||
class TbeKernelJsonCreator {
|
||||
|
@ -84,14 +90,14 @@ class TbeKernelJsonCreator {
|
|||
~TbeKernelJsonCreator() = default;
|
||||
bool GenTbeSingleKernelJson(const std::shared_ptr<AnfNode> &anf_node, nlohmann::json *kernel_json);
|
||||
std::string json_name() { return json_name_; }
|
||||
bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
|
||||
nlohmann::json *attrs_json);
|
||||
|
||||
private:
|
||||
bool GenTbeInputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
|
||||
nlohmann::json *inputs_json);
|
||||
bool GenTbeOutputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
|
||||
nlohmann::json *outputs_json);
|
||||
bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
|
||||
nlohmann::json *attrs_json);
|
||||
static void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj);
|
||||
bool GenInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, bool value,
|
||||
const std::shared_ptr<OpIOInfo> &input_ptr, const string &op_input_name, size_t input_i,
|
||||
|
|
|
@ -33,42 +33,6 @@
|
|||
namespace mindspore {
|
||||
namespace kernel {
|
||||
using mindspore::kernel::tbe::TbeUtils;
|
||||
|
||||
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes) {
|
||||
auto build_manger = std::make_shared<ParallelBuildManager>();
|
||||
MS_EXCEPTION_IF_NULL(build_manger);
|
||||
for (const auto &anf_node : anf_nodes) {
|
||||
// gen kernel json
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
nlohmann::json kernel_json;
|
||||
TbeKernelJsonCreator creator(OP_PRE_COMPILE);
|
||||
if (!creator.GenTbeSingleKernelJson(anf_node, &kernel_json)) {
|
||||
MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
|
||||
return false;
|
||||
}
|
||||
kernel_json["compile_type"] = "pre_build";
|
||||
// op build
|
||||
auto task_id = build_manger->StartCompileOp(kernel_json);
|
||||
build_manger->SavePreTaskInfo(task_id, anf_node);
|
||||
}
|
||||
while (!build_manger->IsAllPreTaskFinish()) {
|
||||
int task_id = -1;
|
||||
std::string task_result;
|
||||
std::string pre_build_result;
|
||||
auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
|
||||
if (!ret) {
|
||||
MS_EXCEPTION(ArgumentError) << "Pre Build Failed. wait one ret:" << ret << ", task id:" << task_id;
|
||||
}
|
||||
|
||||
if (task_result != "Success") {
|
||||
MS_EXCEPTION(ArgumentError) << "task pre compile Failed, task id:" << task_id << ", cause:" << task_result;
|
||||
}
|
||||
|
||||
build_manger->PreTaskFinishProcess(task_id, pre_build_result);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
|
||||
auto build_manger = std::make_shared<ParallelBuildManager>();
|
||||
MS_EXCEPTION_IF_NULL(build_manger);
|
||||
|
@ -123,15 +87,8 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
|
|||
return build_manger->GenSameOpKernelMod();
|
||||
}
|
||||
|
||||
ParallelBuildManager::ParallelBuildManager() {}
|
||||
|
||||
ParallelBuildManager::~ParallelBuildManager() { ResetTaskInfo(); }
|
||||
|
||||
void ParallelBuildManager::SavePreTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node) {
|
||||
MS_LOG(INFO) << "SavePreTaskInfo, task id: " << task_id;
|
||||
pre_task_map_[task_id] = anf_node;
|
||||
}
|
||||
|
||||
void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node,
|
||||
const std::string &json_name, const std::vector<size_t> &input_size_list,
|
||||
const std::vector<size_t> &output_size_list, int32_t scope_id) {
|
||||
|
@ -150,42 +107,11 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod
|
|||
task_map_[task_id] = task_info;
|
||||
}
|
||||
|
||||
bool ParallelBuildManager::IsAllPreTaskFinish() const {
|
||||
MS_LOG(INFO) << "wait pre build process task_num: " << pre_task_map_.size();
|
||||
return pre_task_map_.empty();
|
||||
}
|
||||
|
||||
bool ParallelBuildManager::IsAllTaskFinish() const {
|
||||
MS_LOG(INFO) << "wait process task_num: " << task_map_.size();
|
||||
return task_map_.empty();
|
||||
}
|
||||
|
||||
void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) {
|
||||
auto task_iter = pre_task_map_.find(task_id);
|
||||
if (task_iter == pre_task_map_.end()) {
|
||||
MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id;
|
||||
}
|
||||
auto node = task_iter->second;
|
||||
auto builder =
|
||||
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node));
|
||||
std::string start_flag = "fusion_pattern_start";
|
||||
std::string end_flag = "fusion_pattern_end";
|
||||
int start = pre_build_result.find(start_flag);
|
||||
int end = pre_build_result.find(end_flag);
|
||||
if (start != -1 && end != -1 && end >= start) {
|
||||
std::string result = pre_build_result.substr(start + start_flag.size(), end - start - start_flag.size());
|
||||
if (result == "") {
|
||||
(void)pre_task_map_.erase(task_iter);
|
||||
return;
|
||||
}
|
||||
transform(result.begin(), result.end(), result.begin(), ::toupper);
|
||||
FusionType fusion_type = tbe::GetFusionType(result);
|
||||
builder->SetFusionType(fusion_type);
|
||||
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get());
|
||||
}
|
||||
(void)pre_task_map_.erase(task_iter);
|
||||
}
|
||||
|
||||
std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) {
|
||||
auto task_iter = task_map_.find(task_id);
|
||||
if (task_iter == task_map_.end()) {
|
||||
|
|
|
@ -28,7 +28,6 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes);
|
||||
bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes);
|
||||
|
||||
struct KernelBuildTaskInfo {
|
||||
|
@ -42,9 +41,8 @@ struct KernelBuildTaskInfo {
|
|||
|
||||
class ParallelBuildManager {
|
||||
public:
|
||||
ParallelBuildManager();
|
||||
ParallelBuildManager() = default;
|
||||
~ParallelBuildManager();
|
||||
void SavePreTaskInfo(int32_t task_id, const AnfNodePtr &anf_node);
|
||||
void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name,
|
||||
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
|
||||
int32_t scope_id = 0);
|
||||
|
@ -54,10 +52,7 @@ class ParallelBuildManager {
|
|||
bool SearchInCache(const std::string &json_name, const std::string &processor,
|
||||
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
|
||||
AnfNode *node) const;
|
||||
|
||||
bool IsAllPreTaskFinish() const;
|
||||
bool IsAllTaskFinish() const;
|
||||
void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result);
|
||||
std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true);
|
||||
KernelModPtr GenKernelMod(const string &json_name, const string &processor,
|
||||
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
|
||||
|
|
|
@ -62,6 +62,16 @@ session::KernelWithIndex FindRefOriginNode(const AnfNodePtr &node) {
|
|||
return kernel_with_index;
|
||||
}
|
||||
|
||||
void AddRefNodePairToKernelGraph(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const size_t output_index,
|
||||
const size_t input_index) {
|
||||
// record the ref_pair
|
||||
auto kernel_graph = func_graph->cast<KernelGraphPtr>();
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
session::AnfWithOutIndex final_pair = std::make_pair(cnode, output_index);
|
||||
session::KernelWithIndex kernel_with_index = AnfAlgo::VisitKernel(AnfAlgo::GetInputNode(cnode, input_index), 0);
|
||||
kernel_graph->AddRefCorrespondPairs(final_pair, kernel_with_index);
|
||||
}
|
||||
|
||||
void AddRefPairToKernelGraph(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const AnfNodePtr &get_item,
|
||||
const AnfNodePtr &final_node, size_t final_index,
|
||||
const session::KernelWithIndex &origin_pair) {
|
||||
|
@ -88,6 +98,7 @@ void AddRefPairToKernelGraph(const FuncGraphPtr &func_graph, const CNodePtr &cno
|
|||
AnfNodePtr AddAdditionalToRefOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode, size_t output_index,
|
||||
size_t input_index, const AnfNodePtr &get_item) {
|
||||
AnfNodePtr final_node = (get_item == nullptr ? cnode : get_item);
|
||||
bool need_refresh_ref_addr = false;
|
||||
size_t final_index = output_index;
|
||||
AnfNodePtr input_node = AnfAlgo::GetInputNode(cnode, input_index);
|
||||
session::KernelWithIndex origin_pair;
|
||||
|
@ -109,6 +120,7 @@ AnfNodePtr AddAdditionalToRefOutput(const FuncGraphPtr &func_graph, const CNodeP
|
|||
final_node = NewTransOpNode(func_graph, final_node, kernel_select, false, prim::KPrimTransData->name());
|
||||
RefreshKernelBuildInfo(cur_format, origin_format, final_node, {}, cur_type);
|
||||
final_index = 0;
|
||||
need_refresh_ref_addr = true;
|
||||
MS_EXCEPTION_IF_NULL(final_node);
|
||||
MS_LOG(INFO) << "DealRefTransAndCast add trans op, op debug info is " << final_node->DebugString();
|
||||
}
|
||||
|
@ -119,15 +131,19 @@ AnfNodePtr AddAdditionalToRefOutput(const FuncGraphPtr &func_graph, const CNodeP
|
|||
MS_EXCEPTION_IF_NULL(final_node);
|
||||
final_node->set_scope(cnode->scope());
|
||||
final_index = 0;
|
||||
need_refresh_ref_addr = true;
|
||||
MS_LOG(INFO) << "DealRefTransAndCast add cast op, op debug info is " << final_node->DebugString();
|
||||
}
|
||||
// add ref pair
|
||||
AddRefPairToKernelGraph(func_graph, cnode, get_item, final_node, final_index, origin_pair);
|
||||
if (need_refresh_ref_addr) {
|
||||
AddRefNodePairToKernelGraph(func_graph, cnode, output_index, input_index);
|
||||
}
|
||||
// insert depend
|
||||
if (origin_format != cur_format || origin_type != cur_type) {
|
||||
std::vector<AnfNodePtr> depend_nodes{NewValueNode(prim::kPrimDepend), cnode, final_node};
|
||||
final_node = func_graph->NewCNode(depend_nodes);
|
||||
MS_LOG(INFO) << "DealRefTransAndCast add denpend, op debug info is " << final_node->DebugString();
|
||||
MS_LOG(INFO) << "DealRefTranshwAndCast add denpend, op debug info is " << final_node->DebugString();
|
||||
}
|
||||
|
||||
return final_node;
|
||||
|
|
|
@ -58,7 +58,7 @@ const AnfNodePtr RemoveInternalOutput::Process(const FuncGraphPtr &func_graph, c
|
|||
if (kernel_graph == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
if (!kernel_graph->IsInternalOutput(node, 0)) {
|
||||
if (!kernel_graph->IsUniqueTargetInternalOutput(node, 0)) {
|
||||
return nullptr;
|
||||
}
|
||||
if (!UsedForOutputOnly(func_graph, node)) {
|
||||
|
|
|
@ -33,7 +33,25 @@ std::vector<int> TransposeAxis(const std::string &src_format, const std::string
|
|||
} else if ((src_format == kOpFormat_NHWC) && (dst_format == kOpFormat_NCHW)) {
|
||||
return {0, 3, 1, 2};
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Invaild format transform, from " << src_format << " to " << dst_format;
|
||||
MS_LOG(EXCEPTION) << "Invalid format transform, from " << src_format << " to " << dst_format;
|
||||
}
|
||||
}
|
||||
|
||||
// Transpose can be replaceed by nop reshape in some situations.
|
||||
// 1. out_shape [x, 1, 1, y] with transpose perm {0, 2, 3, 1}
|
||||
// 2. out_shape [x, y, 1, 1] with transpose perm {0, 3, 1, 2}
|
||||
bool IsFakeTranspose(const std::vector<size_t> &out_shape, const std::vector<int> &transpose_perm) {
|
||||
if (out_shape.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "Invalid data shape, 4-D data was needed, but get " << out_shape.size() << "-D.";
|
||||
}
|
||||
std::vector<int> perm1 = {0, 2, 3, 1};
|
||||
std::vector<int> perm2 = {0, 3, 1, 2};
|
||||
if (transpose_perm == perm1) {
|
||||
return (out_shape[1] == 1 && out_shape[2] == 1);
|
||||
} else if (transpose_perm == perm2) {
|
||||
return (out_shape[2] == 1 && out_shape[3] == 1);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -56,8 +74,16 @@ void SetTransposeOpBuildInfo(const std::string &input_format, const std::string
|
|||
CNodePtr InsertTransposeOp(const FuncGraphPtr &graph, const AnfNodePtr &node, const AnfNodePtr &used_node,
|
||||
int used_node_index, const std::vector<int> &transpose_perm) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
// 1.Create a transpose node.
|
||||
auto transpose_prim = std::make_shared<Primitive>(prim::kPrimTranspose->name());
|
||||
// 0.Judge whether it is a fake transpose
|
||||
auto transed_shape = AnfAlgo::GetInputDeviceShape(used_node, used_node_index);
|
||||
bool is_fake = IsFakeTranspose(transed_shape, transpose_perm);
|
||||
// 1.Create a transpose node or a fake transpose node:reshape.
|
||||
mindspore::PrimitivePtr transpose_prim;
|
||||
if (is_fake) {
|
||||
transpose_prim = std::make_shared<Primitive>(prim::kPrimReshape->name());
|
||||
} else {
|
||||
transpose_prim = std::make_shared<Primitive>(prim::kPrimTranspose->name());
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(transpose_prim);
|
||||
// 2.Set the input of transpose.
|
||||
std::vector<AnfNodePtr> transpose_input = {NewValueNode(transpose_prim), node};
|
||||
|
@ -66,7 +92,9 @@ CNodePtr InsertTransposeOp(const FuncGraphPtr &graph, const AnfNodePtr &node, co
|
|||
auto transpose_type = {AnfAlgo::GetPrevNodeOutputInferDataType(used_node, used_node_index)};
|
||||
auto transpose_shape = {AnfAlgo::GetPrevNodeOutputInferShape(used_node, used_node_index)};
|
||||
AnfAlgo::SetOutputInferTypeAndShape(transpose_type, transpose_shape, transpose_op.get());
|
||||
AnfAlgo::SetNodeAttr(kAttrPerm, MakeValue(transpose_perm), transpose_op);
|
||||
if (!is_fake) {
|
||||
AnfAlgo::SetNodeAttr(kAttrPerm, MakeValue(transpose_perm), transpose_op);
|
||||
}
|
||||
// 4.Set the input of used_node.
|
||||
MS_LOG(DEBUG) << "Node: " << node->fullname_with_scope() << ", used node: " << used_node->fullname_with_scope()
|
||||
<< ", index: " << used_node_index;
|
||||
|
|
|
@ -49,6 +49,7 @@ const AnfNodePtr ReplaceBNCastFusion::Process(const FuncGraphPtr &graph, const A
|
|||
auto manager = graph->manager();
|
||||
MS_EXCEPTION_IF_NULL(manager);
|
||||
auto outlist = GetRealNodeUsedList(graph, fbn2);
|
||||
bool changed = false;
|
||||
for (size_t i = 0; i < outlist->size(); i++) {
|
||||
auto index_node = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(outlist->at(i).first), 1);
|
||||
auto value_node = index_node->cast<ValueNodePtr>();
|
||||
|
@ -57,14 +58,18 @@ const AnfNodePtr ReplaceBNCastFusion::Process(const FuncGraphPtr &graph, const A
|
|||
if (item_idx == 0) {
|
||||
auto cast = GetRealNodeUsedList(graph, outlist->at(i).first);
|
||||
if (AnfAlgo::GetCNodeName(cast->at(0).first) != "Cast") {
|
||||
return nullptr;
|
||||
continue;
|
||||
}
|
||||
manager->Replace(utils::cast<CNodePtr>(cast->at(0).first), utils::cast<CNodePtr>(outlist->at(i).first));
|
||||
outputs_type.push_back(kNumberTypeFloat16);
|
||||
outputs_shape.push_back(AnfAlgo::GetOutputInferShape(outlist->at(i).first, 0));
|
||||
AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, outlist->at(i).first.get());
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
if (!changed) {
|
||||
return nullptr;
|
||||
}
|
||||
manager->Replace(utils::cast<CNodePtr>(x_after), utils::cast<CNodePtr>(x_before));
|
||||
outputs_type.clear();
|
||||
outputs_shape.clear();
|
||||
|
|
|
@ -211,8 +211,11 @@ bool CommunicationOpFusion::DoFusion(const FuncGraphPtr &func_graph, const Commu
|
|||
start_index = end_index + 1;
|
||||
continue;
|
||||
}
|
||||
auto kernel_graph = func_graph->cast<KernelGraphPtr>();
|
||||
auto graph_id = kernel_graph->graph_id();
|
||||
AnfNodePtr new_communication_op =
|
||||
CreateFusedCommunicationOp(func_graph, communication_op_info, start_index, end_index);
|
||||
AnfAlgo::SetGraphId(graph_id, new_communication_op.get());
|
||||
// replace old communication op with new communication op
|
||||
for (auto idx = start_index; idx <= end_index; ++idx) {
|
||||
std::vector<AnfNodePtr> tuple_getitem_input;
|
||||
|
|
|
@ -425,7 +425,7 @@ std::string AnfRuntimeAlgorithm::GetInputFormat(const AnfNodePtr &node, size_t i
|
|||
<< node->DebugString() << "]";
|
||||
}
|
||||
if (!IsRealKernel(node)) {
|
||||
GetPrevNodeOutputFormat(node, input_idx);
|
||||
return GetPrevNodeOutputFormat(node, input_idx);
|
||||
}
|
||||
auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
|
||||
MS_EXCEPTION_IF_NULL(kernel_info);
|
||||
|
@ -1197,6 +1197,19 @@ TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, s
|
|||
return GetCNodeOutputPrecision(kernel_with_index.first);
|
||||
}
|
||||
|
||||
bool AnfRuntimeAlgorithm::IsDynamicShape(const AnfNodePtr &node) {
|
||||
if (!node->isa<CNode>()) {
|
||||
return false;
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto has_attr = AnfAlgo::HasNodeAttr(kAttrIsDynamicShape, cnode);
|
||||
if (!has_attr) {
|
||||
return false;
|
||||
}
|
||||
return AnfAlgo::GetNodeAttr<bool>(node, kAttrIsDynamicShape);
|
||||
}
|
||||
|
||||
bool AnfRuntimeAlgorithm::IsCondControlKernel(const CNodePtr &node) {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (node->inputs().empty()) {
|
||||
|
|
|
@ -217,6 +217,7 @@ class AnfRuntimeAlgorithm {
|
|||
static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node);
|
||||
// get fix output precision from prev node, input_idx is the input index of current node related to prev node.
|
||||
static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx);
|
||||
static bool IsDynamicShape(const AnfNodePtr &node);
|
||||
static bool IsCondControlKernel(const CNodePtr &node);
|
||||
static bool IsIndependentNode(const CNodePtr &node);
|
||||
};
|
||||
|
|
|
@ -261,17 +261,16 @@ void AscendControlParser::EraseParameter(NotNull<KernelGraphPtr> root_graph,
|
|||
}
|
||||
}
|
||||
|
||||
EraseAssign(all_nodes, para_to_written_node, root_graph);
|
||||
root_graph->set_execution_order(exec_order);
|
||||
EraseAssign(std::make_shared<ReferenceCounter>(parameter_count), all_nodes, para_to_written_node, root_graph);
|
||||
}
|
||||
|
||||
void AscendControlParser::EraseAssign(const std::set<CNodePtr> &all_nodes,
|
||||
void AscendControlParser::EraseAssign(std::shared_ptr<ReferenceCounter> parameter_count,
|
||||
const std::set<CNodePtr> &all_nodes,
|
||||
const std::map<AnfNodePtr, CNodePtr> ¶_to_written_node,
|
||||
NotNull<KernelGraphPtr> root_graph) {
|
||||
std::vector<CNodePtr> exec_order = root_graph->execution_order();
|
||||
ReferenceCounter parameter_count([](int32_t read, int32_t write) -> bool { return write == 1; });
|
||||
while (parameter_count.HasValidElem()) {
|
||||
auto [para, read, written] = parameter_count.GetOneValidElem();
|
||||
while (parameter_count->HasValidElem()) {
|
||||
auto [para, read, written] = parameter_count->GetOneValidElem();
|
||||
MS_LOG(INFO) << para->DebugString() << " was read " << read << " times, written " << written << " times.";
|
||||
auto assign_iter = para_to_written_node.find(para);
|
||||
if (assign_iter == para_to_written_node.end()) {
|
||||
|
@ -280,7 +279,7 @@ void AscendControlParser::EraseAssign(const std::set<CNodePtr> &all_nodes,
|
|||
auto &assign_node = assign_iter->second;
|
||||
MS_EXCEPTION_IF_NULL(assign_node);
|
||||
if (!IsPrimitiveCNode(assign_node, prim::kPrimAssign)) {
|
||||
parameter_count.EraseElem(para);
|
||||
parameter_count->EraseElem(para);
|
||||
continue;
|
||||
}
|
||||
MS_LOG(INFO) << "Erase " << assign_node->DebugString(5);
|
||||
|
@ -288,10 +287,10 @@ void AscendControlParser::EraseAssign(const std::set<CNodePtr> &all_nodes,
|
|||
auto source = assign_node->input(kCNodeAssignSource);
|
||||
MS_EXCEPTION_IF_NULL(source);
|
||||
auto visit_source = AnfAlgo::VisitKernelWithReturnType(source, 0).first;
|
||||
parameter_count.AddWriteCount(para, -1);
|
||||
parameter_count.AddReadCount(para, -1);
|
||||
parameter_count->AddWriteCount(para, -1);
|
||||
parameter_count->AddReadCount(para, -1);
|
||||
if (visit_source->isa<Parameter>()) {
|
||||
parameter_count.AddReadCount(visit_source, read - 1);
|
||||
parameter_count->AddReadCount(visit_source, read - 1);
|
||||
}
|
||||
for (auto &node : all_nodes) {
|
||||
for (size_t i = 0; i < node->size(); ++i) {
|
||||
|
@ -302,6 +301,7 @@ void AscendControlParser::EraseAssign(const std::set<CNodePtr> &all_nodes,
|
|||
}
|
||||
}
|
||||
}
|
||||
root_graph->set_execution_order(exec_order);
|
||||
}
|
||||
|
||||
void AscendControlParser::EraseLabel(NotNull<KernelGraphPtr> root_graph) {
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include "backend/session/kernel_graph.h"
|
||||
#include "base/base_ref.h"
|
||||
#include "utils/contract.h"
|
||||
|
@ -44,7 +45,7 @@ class AscendControlParser {
|
|||
class ReferenceCounter;
|
||||
|
||||
static void EraseParameter(NotNull<KernelGraphPtr> root_graph, const std::set<KernelGraphPtr> &graph_list);
|
||||
static void EraseAssign(const std::set<CNodePtr> &all_nodes,
|
||||
static void EraseAssign(std::shared_ptr<ReferenceCounter> parameter_count, const std::set<CNodePtr> &all_nodes,
|
||||
const std::map<AnfNodePtr, CNodePtr> ¶_to_written_node,
|
||||
NotNull<KernelGraphPtr> root_graph);
|
||||
static void EraseLabel(NotNull<KernelGraphPtr> root_graph);
|
||||
|
|
|
@ -474,7 +474,6 @@ void AscendSession::InitRuntimeResource() {
|
|||
}
|
||||
|
||||
void AscendSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) const {
|
||||
device::ascend::KernelPreBuild(kernel_graph.get());
|
||||
MS_LOG(INFO) << "HardwareOptimize start!";
|
||||
opt::AscendBackendOptimization(kernel_graph);
|
||||
opt::AscendGraphKernelCommonProcess(kernel_graph);
|
||||
|
|
|
@ -65,6 +65,8 @@ void GPUSession::StartKernelRT() const {
|
|||
|
||||
void GPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
auto optimizer = std::make_shared<opt::GraphOptimizer>();
|
||||
auto pm = std::make_shared<opt::PassManager>();
|
||||
pm->AddPass(std::make_shared<opt::AdamWeightDecayFusion>());
|
||||
|
@ -73,9 +75,11 @@ void GPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
|
|||
pm->AddPass(std::make_shared<opt::ReplaceBNGradCastFusion>());
|
||||
pm->AddPass(std::make_shared<opt::ReplaceMomentumCastFusion>());
|
||||
pm->AddPass(std::make_shared<opt::ReplaceAddNFusion>());
|
||||
pm->AddPass(std::make_shared<opt::BatchNormReluFusion>());
|
||||
pm->AddPass(std::make_shared<opt::BatchNormReluGradFusion>());
|
||||
pm->AddPass(std::make_shared<opt::BatchNormAddReluFusion>());
|
||||
if (context_ptr->execution_mode() != kPynativeMode) {
|
||||
pm->AddPass(std::make_shared<opt::BatchNormReluFusion>());
|
||||
pm->AddPass(std::make_shared<opt::BatchNormReluGradFusion>());
|
||||
pm->AddPass(std::make_shared<opt::BatchNormAddReluFusion>());
|
||||
}
|
||||
optimizer->AddPassManager(pm);
|
||||
(void)optimizer->Optimize(kernel_graph);
|
||||
kernel_graph->SetExecOrderByDefault();
|
||||
|
@ -129,10 +133,16 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
|
|||
const std::vector<tensor::TensorPtr> &inputs_const) const {
|
||||
std::vector<tensor::TensorPtr> inputs(inputs_const);
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto input_nodes = kernel_graph->inputs();
|
||||
std::vector<AnfNodePtr> input_nodes;
|
||||
for (const auto &input_node : kernel_graph->inputs()) {
|
||||
auto params = AnfAlgo::GetAllOutput(input_node);
|
||||
std::copy(params.begin(), params.end(), std::back_inserter(input_nodes));
|
||||
}
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
|
||||
if (inputs.size() != input_nodes.size()) {
|
||||
MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size();
|
||||
}
|
||||
for (size_t i = 0; i < inputs.size(); ++i) {
|
||||
auto tensor = inputs[i];
|
||||
MS_EXCEPTION_IF_NULL(tensor);
|
||||
|
|
|
@ -41,13 +41,13 @@ void DataDumpParser::ResetParam() {
|
|||
bool DataDumpParser::DumpEnabled() const {
|
||||
auto enable_dump = std::getenv(kEnableDataDump);
|
||||
if (enable_dump == nullptr) {
|
||||
MS_LOG(INFO) << "[DataDump] enable dump is null. Please export ENABLE_DATA_DUMP";
|
||||
MS_LOG(INFO) << "[DataDump] enable dump is null. If you want to dump data, please export ENABLE_DATA_DUMP";
|
||||
return false;
|
||||
}
|
||||
|
||||
auto enabled = std::atoi(enable_dump);
|
||||
if (enabled != 1) {
|
||||
MS_LOG(WARNING) << "[DataDump] Please export ENABLE_DATA_DUMP=1";
|
||||
MS_LOG(WARNING) << "[DataDump] If you want to dump data, please export ENABLE_DATA_DUMP=1";
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -62,7 +62,7 @@ bool DataDumpParser::DumpEnabled() const {
|
|||
std::optional<std::string> DataDumpParser::GetDumpPath() const {
|
||||
auto dump_path = std::getenv(kDataDumpPath);
|
||||
if (dump_path == nullptr) {
|
||||
MS_LOG(ERROR) << "[DataDump] dump path is null. Please export DATA_DUMP_PATH";
|
||||
MS_LOG(ERROR) << "[DataDump] dump path is null. If you want to dump data, please export DATA_DUMP_PATH";
|
||||
return {};
|
||||
}
|
||||
std::string dump_path_str(dump_path);
|
||||
|
|
|
@ -73,6 +73,7 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
|
|||
|
||||
std::string current_tensor_name;
|
||||
std::unordered_map<unsigned int, watchpoint_t> watchpoints_to_check_table;
|
||||
const size_t location = 0;
|
||||
|
||||
for (std::size_t i = 0; i < tensor_list.size(); i++) {
|
||||
current_tensor_name = tensor_list[i]->GetName();
|
||||
|
@ -102,7 +103,7 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *name, std::vector
|
|||
|
||||
// check if the current node tensor name is included the watchpoint
|
||||
std::string current_node_name = current_tensor_name.substr(0, current_tensor_name.find_first_of(":"));
|
||||
if ((w_type == true && (current_tensor_name.find(w_name) != string::npos || w_name == "*")) ||
|
||||
if ((w_type == true && (current_tensor_name.find(w_name) == location || w_name == "*")) ||
|
||||
(w_type == false && current_node_name == w_name)) {
|
||||
watchpoints_to_check_table[w_table_item.second.id] = w_table_item.second;
|
||||
break;
|
||||
|
|
|
@ -151,35 +151,34 @@ void Debugger::EnableDebugger() {
|
|||
MS_LOG(WARNING) << "Memory Reuse is disabled. Set environment variable MS_DEBUGGER_PARTIAL_MEM=1 to reduce memory "
|
||||
"usage for large models.";
|
||||
}
|
||||
|
||||
if (device_target_ == kAscendDevice) {
|
||||
// set operation overflow info
|
||||
overflow_bin_path_ = DataDumpParser::GetInstance().GetOpOverflowBinPath(graph_ptr_->graph_id(), device_id_);
|
||||
// new overflow dump files will have a timestamp greater than last_overflow_bin_
|
||||
last_overflow_bin_ = 0;
|
||||
DIR *d;
|
||||
d = opendir(overflow_bin_path_.c_str());
|
||||
if (d != nullptr) {
|
||||
struct dirent *dir;
|
||||
while ((dir = readdir(d)) != NULL) {
|
||||
if (dir->d_type == DT_REG) {
|
||||
std::string file_path = overflow_bin_path_;
|
||||
file_path.append(dir->d_name);
|
||||
std::size_t found = file_path.find_last_of(".");
|
||||
if (found == std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
std::string overflow_time = file_path.substr(found + 1);
|
||||
if (stod(overflow_time) <= last_overflow_bin_) {
|
||||
MS_LOG(INFO) << "Old op overflow bin folder" << file_path;
|
||||
continue;
|
||||
}
|
||||
last_overflow_bin_ = stod(overflow_time);
|
||||
#ifdef ENABLE_D
|
||||
// set operation overflow info
|
||||
overflow_bin_path_ = DataDumpParser::GetInstance().GetOpOverflowBinPath(graph_ptr_->graph_id(), device_id_);
|
||||
// new overflow dump files will have a timestamp greater than last_overflow_bin_
|
||||
last_overflow_bin_ = 0;
|
||||
DIR *d;
|
||||
d = opendir(overflow_bin_path_.c_str());
|
||||
if (d != nullptr) {
|
||||
struct dirent *dir;
|
||||
while ((dir = readdir(d)) != NULL) {
|
||||
if (dir->d_type == DT_REG) {
|
||||
std::string file_path = overflow_bin_path_;
|
||||
file_path.append(dir->d_name);
|
||||
std::size_t found = file_path.find_last_of(".");
|
||||
if (found == std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
std::string overflow_time = file_path.substr(found + 1);
|
||||
if (stod(overflow_time) <= last_overflow_bin_) {
|
||||
MS_LOG(INFO) << "Old op overflow bin folder" << file_path;
|
||||
continue;
|
||||
}
|
||||
last_overflow_bin_ = stod(overflow_time);
|
||||
}
|
||||
MS_LOG(INFO) << "last op overflow bin folder" << last_overflow_bin_;
|
||||
}
|
||||
MS_LOG(INFO) << "last op overflow bin folder" << last_overflow_bin_;
|
||||
}
|
||||
#endif
|
||||
|
||||
// initialize grpc client
|
||||
if (debugger_enabled_) {
|
||||
|
@ -554,8 +553,9 @@ std::list<WatchpointHit> Debugger::CheckWatchpoints() {
|
|||
std::vector<int> condition;
|
||||
std::vector<unsigned int> watchpoint_id;
|
||||
std::vector<std::string> overflow_ops;
|
||||
|
||||
#ifdef ENABLE_D
|
||||
overflow_ops = CheckOpOverflow();
|
||||
#endif
|
||||
debug_services_->CheckWatchpoints(&name, &slot, &condition, &watchpoint_id, overflow_ops);
|
||||
std::list<WatchpointHit> hits;
|
||||
for (unsigned int i = 0; i < name.size(); i++) {
|
||||
|
|
|
@ -117,7 +117,7 @@ void GrpcClient::Init(const std::string &host, const std::string &port, const bo
|
|||
int dwcaLen = i2d_X509(sk_X509_value(ca, 0), NULL); // get the length of private key
|
||||
unsigned char *cabuf = (unsigned char *)malloc(sizeof(unsigned char) * dwcaLen);
|
||||
i2d_X509(sk_X509_value(ca, 0), &cabuf); // PrivateKey DER code
|
||||
strcat = std::string(reinterpret_cast<char const *>(cabuf), dwcaLen);
|
||||
strca = std::string(reinterpret_cast<char const *>(cabuf), dwcaLen);
|
||||
|
||||
free(pribuf);
|
||||
free(certbuf);
|
||||
|
|
|
@ -81,8 +81,6 @@ void ParallelContext::set_mirror_mean(bool mirror_mean) { mirror_mean_ = mirror_
|
|||
|
||||
void ParallelContext::set_full_batch(bool full_batch) { full_batch_ = full_batch; }
|
||||
|
||||
void ParallelContext::set_has_initializer(bool has_initializer) { has_initializer_ = has_initializer; }
|
||||
|
||||
void ParallelContext::set_cast_before_mirror(bool cast_before_mirror) { cast_before_mirror_ = cast_before_mirror; }
|
||||
|
||||
void ParallelContext::set_loss_repeated_mean(bool loss_repeated_mean) { loss_repeated_mean_ = loss_repeated_mean; }
|
||||
|
|
|
@ -58,9 +58,6 @@ class ParallelContext {
|
|||
void set_full_batch(bool full_batch);
|
||||
bool full_batch() const { return full_batch_; }
|
||||
|
||||
void set_has_initializer(bool has_initializer);
|
||||
bool has_initializer() const { return has_initializer_; }
|
||||
|
||||
void set_cast_before_mirror(bool cast_before_mirror);
|
||||
bool cast_before_mirror() const { return cast_before_mirror_; }
|
||||
|
||||
|
@ -115,7 +112,6 @@ class ParallelContext {
|
|||
static std::shared_ptr<ParallelContext> inst_context_;
|
||||
bool mirror_mean_;
|
||||
bool full_batch_;
|
||||
bool has_initializer_ = false;
|
||||
bool cast_before_mirror_;
|
||||
bool loss_repeated_mean_;
|
||||
int32_t device_num_;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "frontend/parallel/ps/optimizer_info.h"
|
||||
#include <memory>
|
||||
#include "frontend/parallel/ps/util.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace parallel {
|
||||
|
@ -30,6 +31,8 @@ const std::vector<AddressPtr> &OptimizerInfo::outputs() { return outputs_; }
|
|||
|
||||
bool OptimizerInfo::IsSparse() const { return false; }
|
||||
|
||||
const size_t OptimizerInfo::indice_size() const { return 0; }
|
||||
|
||||
size_t OptimizerInfo::grad_index() { return 0; }
|
||||
|
||||
size_t OptimizerInfo::indices_index() { return 0; }
|
||||
|
@ -57,7 +60,8 @@ void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
|
|||
}
|
||||
}
|
||||
|
||||
void DenseOptimInfo::ComputeMean(size_t n) {
|
||||
void DenseOptimInfo::ComputeMean(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &, size_t n,
|
||||
size_t server_num, size_t rank_id) {
|
||||
if (n > 1) {
|
||||
float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
|
||||
size_t size = gradient()->size / sizeof(float);
|
||||
|
@ -96,15 +100,90 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
|
|||
for (size_t i = 0; i < indices_index; i++) {
|
||||
indice_offset += lengths[i];
|
||||
}
|
||||
int *incr_indice_data = reinterpret_cast<int *>(values.data() + indice_offset);
|
||||
size_t incr_indice_size = lengths[indices_index] * sizeof(float);
|
||||
float *incr_indice_data = values.data() + indice_offset;
|
||||
size_t incr_indice_size = lengths[indices_index];
|
||||
size_t incr_indice_data_size = incr_indice_size * sizeof(int);
|
||||
int *converted_indices = new int[incr_indice_size];
|
||||
for (size_t i = 0; i < incr_indice_size; i++) {
|
||||
converted_indices[i] = static_cast<int>(incr_indice_data[i]);
|
||||
}
|
||||
|
||||
auto ret2 = memcpy_s(accum_indices_data + indices_offset_, incr_indice_size, incr_indice_data, incr_indice_size);
|
||||
auto ret2 =
|
||||
memcpy_s(accum_indices_data + indices_offset_, incr_indice_data_size, converted_indices, incr_indice_data_size);
|
||||
if (ret2 != 0) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
|
||||
}
|
||||
delete[] converted_indices;
|
||||
indices_offset_ += lengths[indices_index];
|
||||
indices()->size += incr_indice_size;
|
||||
indices()->size += incr_indice_data_size;
|
||||
}
|
||||
|
||||
void SparseOptimInfo::ComputeMean(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes,
|
||||
size_t n, size_t server_num, size_t rank_id) {
|
||||
size_t indices_size = static_cast<size_t>(indices()->size / sizeof(int));
|
||||
int segment_size = gradient()->size / indices()->size;
|
||||
|
||||
float *new_grad = new float[indices_size * segment_size];
|
||||
int *new_indices = new int[indices_size];
|
||||
mindspore::kernel::SparseGradient unique_sparse_grad({new_grad, new_indices, indices_size});
|
||||
|
||||
const std::vector<std::shared_ptr<std::vector<size_t>>> &shape_vec = *shapes;
|
||||
if (shape_vec.size() < 2 || shape_vec[1] == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "No input shape found";
|
||||
}
|
||||
auto input_shapes = shape_vec.size() > 0 ? shape_vec[1] : nullptr;
|
||||
MS_EXCEPTION_IF_NULL(input_shapes);
|
||||
if (input_shapes->size() == 0) {
|
||||
MS_LOG(EXCEPTION) << "Invalid input shapes";
|
||||
}
|
||||
int first_dim_size = input_shapes->front();
|
||||
int outer_dim_size = segment_size;
|
||||
|
||||
if (first_dim_size == 0 || outer_dim_size == 0) {
|
||||
MS_LOG(ERROR) << "Invalid first dim size";
|
||||
}
|
||||
|
||||
float *grad_data = reinterpret_cast<float *>(gradient()->addr);
|
||||
int *indices_data = reinterpret_cast<int *>(indices()->addr);
|
||||
|
||||
size_t original_row_count = input_shapes->front();
|
||||
if (original_row_count > 0) {
|
||||
size_t offset = 0;
|
||||
std::map<int, int> rank_dims = Util::AllRankLocalShard(original_row_count, rank_id, server_num);
|
||||
for (size_t i = 0; i < rank_id; i++) {
|
||||
if (rank_dims.count(i) == 0) {
|
||||
MS_LOG(EXCEPTION) << "No local shard number for rank " << i;
|
||||
}
|
||||
offset += rank_dims[i];
|
||||
}
|
||||
for (size_t i = 0; i < indices_size; i++) {
|
||||
indices_data[i] -= offset;
|
||||
}
|
||||
}
|
||||
|
||||
Util::ReduceSparseGradient(grad_data, indices_data, indices_size, segment_size, first_dim_size, outer_dim_size,
|
||||
&unique_sparse_grad);
|
||||
|
||||
int reduced_grad_size = unique_sparse_grad.indices_size_ * segment_size * sizeof(float);
|
||||
auto ret = memcpy_s(gradient()->addr, reduced_grad_size, unique_sparse_grad.value_, reduced_grad_size);
|
||||
if (ret != 0) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
}
|
||||
int reduced_indice_size = unique_sparse_grad.indices_size_ * sizeof(int);
|
||||
ret = memcpy_s(indices()->addr, reduced_indice_size, unique_sparse_grad.indices_, reduced_indice_size);
|
||||
if (ret != 0) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
}
|
||||
|
||||
gradient()->size = reduced_grad_size;
|
||||
indices()->size = reduced_indice_size;
|
||||
|
||||
for (size_t i = 0; i < unique_sparse_grad.indices_size_ * segment_size; i++) {
|
||||
grad_data[i] = grad_data[i] / n;
|
||||
}
|
||||
|
||||
delete[] new_grad;
|
||||
delete[] new_indices;
|
||||
}
|
||||
|
||||
void SparseOptimInfo::Reset() {
|
||||
|
@ -135,6 +214,8 @@ void MomentumOptimInfo::Update(const Values &values, const Lengths &lens) {
|
|||
}
|
||||
}
|
||||
|
||||
const size_t SparseOptimInfo::indice_size() const { return indices_offset_; }
|
||||
|
||||
const AddressPtr &MomentumOptimInfo::gradient() { return inputs_[3]; }
|
||||
|
||||
const AddressPtr &MomentumOptimInfo::indices() { return inputs_[3]; }
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_FRONTEND_PARALLEL_PS_OPTIMIZER_INFO_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "frontend/parallel/ps/common.h"
|
||||
|
||||
|
@ -33,12 +34,14 @@ class OptimizerInfo {
|
|||
virtual void Update(const Values &values, const Lengths &lengths) {}
|
||||
virtual void UpdateWeight(const WeightPtr &weight);
|
||||
virtual void Accumulate(const Values &values, const Lengths &lengths) = 0;
|
||||
virtual void ComputeMean(size_t n) {}
|
||||
virtual void ComputeMean(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes, size_t n,
|
||||
size_t server_num, size_t rank_id) {}
|
||||
virtual void Reset() {}
|
||||
void AddWorkspace(const AddressPtr &workspace);
|
||||
|
||||
virtual const AddressPtr &gradient() = 0;
|
||||
virtual const AddressPtr &indices() = 0;
|
||||
virtual const size_t indice_size() const;
|
||||
const std::vector<AddressPtr> &inputs();
|
||||
const std::vector<AddressPtr> &workspaces();
|
||||
const std::vector<AddressPtr> &outputs();
|
||||
|
@ -59,7 +62,8 @@ class DenseOptimInfo : public OptimizerInfo {
|
|||
~DenseOptimInfo() override = default;
|
||||
|
||||
void Accumulate(const Values &values, const Lengths &lens) override;
|
||||
void ComputeMean(size_t n) override;
|
||||
void ComputeMean(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes, size_t n,
|
||||
size_t server_num, size_t rank_id) override;
|
||||
void Reset() override;
|
||||
};
|
||||
|
||||
|
@ -69,7 +73,10 @@ class SparseOptimInfo : public OptimizerInfo {
|
|||
~SparseOptimInfo() override = default;
|
||||
|
||||
void Accumulate(const Values &values, const Lengths &lens) override;
|
||||
void ComputeMean(const std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> &shapes, size_t n,
|
||||
size_t server_num, size_t rank_id) override;
|
||||
void Reset() override;
|
||||
const size_t indice_size() const override;
|
||||
|
||||
protected:
|
||||
size_t grads_offset_{0};
|
||||
|
|
|
@ -136,15 +136,21 @@ OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
|
|||
|
||||
const std::shared_ptr<std::vector<size_t>> &indices_shape = (*inputs_shape)[10];
|
||||
size_t total_indice_size =
|
||||
std::accumulate((*indices_shape).begin(), (*indices_shape).end(), sizeof(float), std::multiplies<size_t>());
|
||||
std::accumulate((*indices_shape).begin(), (*indices_shape).end(), sizeof(int), std::multiplies<size_t>());
|
||||
AddressPtr indices = std::make_shared<kernel::Address>();
|
||||
indices->addr = new float[total_indice_size * worker_num];
|
||||
ret = memcpy_s(indices->addr, lens[7] * sizeof(float), reinterpret_cast<float *>(epsilon->addr) + lens[5] + lens[6],
|
||||
lens[7] * sizeof(float));
|
||||
indices->addr = new int[total_indice_size * worker_num];
|
||||
int *converted_indices = new int[lens[7]];
|
||||
size_t indices_data_size = lens[7] * sizeof(int);
|
||||
float *indices_data = reinterpret_cast<float *>(epsilon->addr) + lens[5] + lens[6];
|
||||
for (int i = 0; i < lens[7]; i++) {
|
||||
converted_indices[i] = static_cast<int>(indices_data[i]);
|
||||
}
|
||||
ret = memcpy_s(indices->addr, indices_data_size, converted_indices, indices_data_size);
|
||||
if (ret != 0) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
}
|
||||
indices->size = lens[7] * sizeof(int);
|
||||
indices->size = indices_data_size;
|
||||
delete[] converted_indices;
|
||||
|
||||
return new SparseAdamOptimInfo(weight_addr, m, v, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon,
|
||||
grad, indices);
|
||||
|
@ -185,13 +191,19 @@ OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight,
|
|||
size_t total_indice_size =
|
||||
std::accumulate((*indices_shape).begin(), (*indices_shape).end(), 1, std::multiplies<size_t>());
|
||||
AddressPtr indices = std::make_shared<kernel::Address>();
|
||||
indices->addr = new float[total_indice_size * worker_num];
|
||||
ret = memcpy_s(indices->addr, lens[1] * sizeof(float), reinterpret_cast<float *>(values.data()) + lens[0],
|
||||
lens[1] * sizeof(float));
|
||||
indices->addr = new int[total_indice_size * worker_num];
|
||||
int *converted_indices = new int[lens[1]];
|
||||
size_t indices_data_size = lens[1] * sizeof(int);
|
||||
float *indices_data = reinterpret_cast<float *>(values.data()) + lens[0];
|
||||
for (int i = 0; i < lens[1]; i++) {
|
||||
converted_indices[i] = static_cast<int>(indices_data[i]);
|
||||
}
|
||||
ret = memcpy_s(indices->addr, indices_data_size, converted_indices, indices_data_size);
|
||||
if (ret != 0) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
}
|
||||
indices->size = lens[1] * sizeof(int);
|
||||
indices->size = indices_data_size;
|
||||
delete[] converted_indices;
|
||||
|
||||
return new SparseFtrlOptimInfo(weight_addr, accum, linear, grad, indices);
|
||||
}
|
||||
|
|
|
@ -145,6 +145,7 @@ class ParameterServer {
|
|||
|
||||
std::unordered_map<Key, std::shared_ptr<PServerKernel>> optimizers_;
|
||||
std::unordered_map<Key, InputsShapePtr> optim_inputs_shape_;
|
||||
std::unordered_map<Key, InputsShapePtr> original_optim_inputs_shape_;
|
||||
std::unordered_map<Key, std::shared_ptr<OptimizerInfo>> optim_infos_;
|
||||
std::unordered_map<std::string, std::shared_ptr<OptimizerInfoBuilder>> optim_info_builders_;
|
||||
std::unordered_map<Key, std::string> weight_key_to_optims_;
|
||||
|
@ -366,19 +367,24 @@ void ParameterServer<T>::InitWeightKeyToOptims(const Key &key, const int &optim_
|
|||
template <typename T>
|
||||
void ParameterServer<T>::InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths) {
|
||||
InputsShapePtr inputs_shape = std::make_shared<InputsShape>();
|
||||
InputsShapePtr original_inputs_shape = std::make_shared<InputsShape>();
|
||||
int val_idx = 0;
|
||||
const Key &key = keys[0];
|
||||
MS_LOG(INFO) << "Initializing optimizer inputs shape for key:" << key;
|
||||
if (optim_inputs_shape_.count(key) == 0) {
|
||||
original_optim_inputs_shape_[key] = original_inputs_shape;
|
||||
optim_inputs_shape_[key] = inputs_shape;
|
||||
}
|
||||
for (size_t i = 0; i < keys.size(); i++) {
|
||||
auto shape = std::make_shared<std::vector<size_t>>();
|
||||
auto original_shape = std::make_shared<std::vector<size_t>>();
|
||||
inputs_shape->push_back(shape);
|
||||
original_inputs_shape->push_back(original_shape);
|
||||
|
||||
int len = lengths[i];
|
||||
for (int j = 0; j < len; j++) {
|
||||
shape->push_back(values[val_idx++]);
|
||||
shape->push_back(values[val_idx]);
|
||||
original_shape->push_back(values[val_idx++]);
|
||||
}
|
||||
}
|
||||
if (weight_key_to_optims_.count(key) > 0) {
|
||||
|
@ -505,16 +511,27 @@ void ParameterServer<T>::UpdateWeights() {
|
|||
MS_EXCEPTION_IF_NULL(optimizer);
|
||||
|
||||
std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
|
||||
if (optim_info == nullptr) {
|
||||
continue;
|
||||
}
|
||||
const std::vector<kernel::AddressPtr> &inputs = optim_info->inputs();
|
||||
const std::vector<kernel::AddressPtr> &workspaces = optim_info->workspaces();
|
||||
const std::vector<kernel::AddressPtr> &outputs = optim_info->outputs();
|
||||
if (optim_info != nullptr) {
|
||||
const std::vector<kernel::AddressPtr> &inputs = optim_info->inputs();
|
||||
const std::vector<kernel::AddressPtr> &workspaces = optim_info->workspaces();
|
||||
const std::vector<kernel::AddressPtr> &outputs = optim_info->outputs();
|
||||
|
||||
optim_info->ComputeMean(worker_num_);
|
||||
optimizer->Execute(inputs, workspaces, outputs);
|
||||
optim_info->Reset();
|
||||
std::shared_ptr<std::vector<std::shared_ptr<std::vector<size_t>>>> shapes =
|
||||
std::make_shared<std::vector<std::shared_ptr<std::vector<size_t>>>>();
|
||||
std::shared_ptr<std::vector<size_t>> indices_shape = std::make_shared<std::vector<size_t>>();
|
||||
indices_shape->emplace_back(optim_info->indice_size());
|
||||
shapes->push_back(indices_shape);
|
||||
|
||||
if (original_optim_inputs_shape_.count(key) != 0) {
|
||||
for (auto &input_shapes : *(original_optim_inputs_shape_[key])) {
|
||||
shapes->push_back(input_shapes);
|
||||
}
|
||||
}
|
||||
optimizer->ReInit(shapes);
|
||||
optim_info->ComputeMean(shapes, worker_num_, pserver_num_, rank_id_);
|
||||
optimizer->Execute(inputs, workspaces, outputs);
|
||||
optim_info->Reset();
|
||||
}
|
||||
if (!is_embedding_[key]) {
|
||||
tokens_[key] = worker_num_;
|
||||
}
|
||||
|
@ -527,23 +544,26 @@ template <typename T>
|
|||
void ParameterServer<T>::AccumGrad(const Keys &keys, const Values &values, const Lengths &lengths) {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
const Key &key = keys[0];
|
||||
std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
|
||||
bool no_sparse_grad = values.size() == 1 && values[0] == -100;
|
||||
if (!no_sparse_grad) {
|
||||
std::shared_ptr<OptimizerInfo> optim_info = optim_infos_[key];
|
||||
|
||||
// Create or update the optimizer info
|
||||
if (optim_info == nullptr) {
|
||||
const std::shared_ptr<OptimizerInfoBuilder> &builder = optim_info_builders_[weight_key_to_optims_[key]];
|
||||
std::shared_ptr<kernel::ps::PServerKernel> pserver_kernel = optimizers_[key];
|
||||
if (pserver_kernel == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "no optimizer found for key " << key << " optim name " << weight_key_to_optims_[key];
|
||||
// Create or update the optimizer info
|
||||
if (optim_info == nullptr) {
|
||||
const std::shared_ptr<OptimizerInfoBuilder> &builder = optim_info_builders_[weight_key_to_optims_[key]];
|
||||
std::shared_ptr<kernel::ps::PServerKernel> pserver_kernel = optimizers_[key];
|
||||
if (pserver_kernel == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "no optimizer found for key " << key << " optim name " << weight_key_to_optims_[key];
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(pserver_kernel);
|
||||
OptimizerInfo *optim =
|
||||
builder->Build(pserver_kernel, weights_[key], keys, values, lengths, optim_inputs_shape_[key], worker_num_);
|
||||
optim_info.reset(optim);
|
||||
optim_infos_[key] = optim_info;
|
||||
} else {
|
||||
optim_info->Update(values, lengths);
|
||||
optim_info->Accumulate(values, lengths);
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(pserver_kernel);
|
||||
OptimizerInfo *optim =
|
||||
builder->Build(pserver_kernel, weights_[key], keys, values, lengths, optim_inputs_shape_[key], worker_num_);
|
||||
optim_info.reset(optim);
|
||||
optim_infos_[key] = optim_info;
|
||||
} else {
|
||||
optim_info->Update(values, lengths);
|
||||
optim_info->Accumulate(values, lengths);
|
||||
}
|
||||
|
||||
grads_accum_counter_[key] += 1;
|
||||
|
@ -721,6 +741,7 @@ void ParameterServer<T>::Run(const FuncGraphPtr &func_graph) {
|
|||
return;
|
||||
}
|
||||
Init(func_graph);
|
||||
Util::SetRankId(rank_id_);
|
||||
thread_->join();
|
||||
::ps::Finalize(0, true);
|
||||
}
|
||||
|
|
|
@ -22,6 +22,8 @@
|
|||
namespace mindspore {
|
||||
namespace parallel {
|
||||
namespace ps {
|
||||
int Util::rank_id_ = -1;
|
||||
|
||||
std::unordered_map<std::string, int> Util::optimizer_to_ids{
|
||||
{kApplyMomentum, 0},
|
||||
{kSparseAdam, 1},
|
||||
|
@ -132,13 +134,63 @@ std::string Util::optimizer_node_name(int id) {
|
|||
bool Util::is_optimizer(std::string name) { return optimizer_to_ids.count(name) > 0; }
|
||||
|
||||
int Util::LocalShard(int first_dim, int rank_id, int server_num) {
|
||||
int shard_size = std::round((static_cast<float>(first_dim)) / server_num);
|
||||
int remain_size = first_dim % server_num;
|
||||
if (remain_size == 0 || rank_id < server_num - 1) {
|
||||
return shard_size;
|
||||
} else {
|
||||
return first_dim - (shard_size * (server_num - 1));
|
||||
std::map<int, int> shard_dims = AllRankLocalShard(first_dim, rank_id, server_num);
|
||||
if (shard_dims.count(rank_id) == 0) {
|
||||
MS_LOG(EXCEPTION) << "Invalid rank id " << rank_id;
|
||||
}
|
||||
return shard_dims[rank_id];
|
||||
}
|
||||
|
||||
std::map<int, int> Util::AllRankLocalShard(int first_dim, int rank_id, int server_num) {
|
||||
if (rank_id >= server_num) {
|
||||
MS_LOG(EXCEPTION) << "The rank ID " << rank_id << " should be less than the number of servers " << server_num;
|
||||
}
|
||||
std::map<int, int> shard_dims;
|
||||
for (int i = 0; i < server_num; i++) {
|
||||
shard_dims[i] = 0;
|
||||
}
|
||||
if (server_num != static_cast<int>(shard_dims.size())) {
|
||||
MS_LOG(EXCEPTION) << "Inconsistent server num " << server_num << " shard dims counter size " << shard_dims.size();
|
||||
}
|
||||
int server_index = -1;
|
||||
for (int i = 0; i < first_dim; i++) {
|
||||
server_index = (server_index + 1) % server_num;
|
||||
shard_dims[server_index] = shard_dims[server_index] + 1;
|
||||
}
|
||||
if (shard_dims.count(rank_id) == 0) {
|
||||
MS_LOG(EXCEPTION) << "Invalid rank id " << rank_id << ", total server num " << server_num;
|
||||
}
|
||||
return shard_dims;
|
||||
}
|
||||
|
||||
void Util::SetRankId(int rank_id) { rank_id_ = rank_id; }
|
||||
|
||||
int Util::GetRankId() { return rank_id_; }
|
||||
|
||||
void Util::ReduceSparseGradient(float *gradients, int *indices, const size_t indices_size, size_t segment_size,
|
||||
const size_t first_dim_size, const size_t outer_dim_size,
|
||||
mindspore::kernel::SparseGradient *unique_sparse_grad) {
|
||||
size_t slice_segment_size = indices_size * segment_size;
|
||||
auto workspace_grad = new float[slice_segment_size];
|
||||
auto workspace_indices = new int[indices_size];
|
||||
|
||||
MS_EXCEPTION_IF_NULL(gradients);
|
||||
MS_EXCEPTION_IF_NULL(indices);
|
||||
MS_EXCEPTION_IF_NULL(workspace_grad);
|
||||
MS_EXCEPTION_IF_NULL(workspace_indices);
|
||||
|
||||
mindspore::kernel::SparseGradient workspace_sparse_grad({workspace_grad, workspace_indices, indices_size});
|
||||
mindspore::kernel::SparseGradient input_sparse_grad({gradients, indices, indices_size});
|
||||
mindspore::kernel::ReduceSparseGradientParam param;
|
||||
param.input_grad_ = &input_sparse_grad;
|
||||
param.workspace_grad_ = &workspace_sparse_grad;
|
||||
param.output_grad_ = unique_sparse_grad;
|
||||
param.max_index_ = first_dim_size;
|
||||
param.value_stride_ = outer_dim_size;
|
||||
|
||||
BucketReduceSparseGradient(param);
|
||||
delete[] workspace_grad;
|
||||
delete[] workspace_indices;
|
||||
}
|
||||
} // namespace ps
|
||||
} // namespace parallel
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "backend/kernel_compiler/common_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace parallel {
|
||||
|
@ -37,11 +38,18 @@ class Util {
|
|||
static std::string optimizer_node_name(int id);
|
||||
static bool is_optimizer(std::string name);
|
||||
static int LocalShard(int first_dim, int rank_id, int server_num);
|
||||
static std::map<int, int> AllRankLocalShard(int first_dim, int rank_id, int server_num);
|
||||
static void SetRankId(int rank_id);
|
||||
static int GetRankId();
|
||||
static void ReduceSparseGradient(float *gradients, int *indices, const size_t indices_size, size_t segment_size,
|
||||
const size_t first_dim_size, const size_t outer_dim_size,
|
||||
mindspore::kernel::SparseGradient *unique_sparse_grad);
|
||||
|
||||
private:
|
||||
static std::unordered_map<std::string, int> optimizer_to_ids;
|
||||
static std::unordered_map<int, std::string> id_to_optimizers;
|
||||
static std::unordered_map<int, std::string> id_to_optimizer_nodes;
|
||||
static int rank_id_;
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace parallel
|
||||
|
|
|
@ -95,6 +95,32 @@ void Worker<T>::Run() {
|
|||
|
||||
template <typename T>
|
||||
void Worker<T>::Push(const std::vector<size_t> &keys, std::vector<uintptr_t> addrs, const std::vector<int> &sizes) {
|
||||
if (keys.size() == 0) {
|
||||
MS_LOG(EXCEPTION) << "key size should be greater than zero";
|
||||
}
|
||||
if (key_to_optimId_.count(keys[0]) == 0) {
|
||||
MS_LOG(EXCEPTION) << "no optim id found for key" << keys[0];
|
||||
}
|
||||
Key key = keys[0];
|
||||
int optim_id = key_to_optimId_[key];
|
||||
bool is_sparse = false;
|
||||
if (optim_id == 1 || optim_id == 2 || optim_id == 3) {
|
||||
is_sparse = true;
|
||||
}
|
||||
int grad_index = -1;
|
||||
int indice_index = -1;
|
||||
|
||||
// Sparse adam gradient
|
||||
if (optim_id == 1 || optim_id == 2) {
|
||||
grad_index = 6;
|
||||
indice_index = 7;
|
||||
|
||||
// Sparse ftrl gradient
|
||||
} else if (optim_id == 3) {
|
||||
grad_index = 0;
|
||||
indice_index = 1;
|
||||
}
|
||||
|
||||
size_t total_size = 0;
|
||||
for (auto size : sizes) {
|
||||
total_size += size;
|
||||
|
@ -109,10 +135,22 @@ void Worker<T>::Push(const std::vector<size_t> &keys, std::vector<uintptr_t> add
|
|||
}
|
||||
offset += sizes[i] * sizeof(T);
|
||||
}
|
||||
|
||||
while (!kv_worker_->IsReadyForPush(keys[0])) {
|
||||
continue;
|
||||
}
|
||||
kv_worker_->PushData(::ps::SArray<::ps::Key>(keys), total_buffer, ::ps::SArray<int>(sizes));
|
||||
if (!is_sparse) {
|
||||
kv_worker_->PushData(::ps::SArray<::ps::Key>(keys), total_buffer, ::ps::SArray<int>(sizes));
|
||||
} else {
|
||||
std::vector<int> &var_shape = key_to_optim_shapes_[key][0];
|
||||
int first_dim_size = var_shape[0];
|
||||
int outer_dim_size = 1;
|
||||
for (size_t i = 1; i < var_shape.size(); ++i) {
|
||||
outer_dim_size *= var_shape[i];
|
||||
}
|
||||
kv_worker_->PushSparseData(::ps::SArray<::ps::Key>(keys), total_buffer, ::ps::SArray<int>(sizes), grad_index,
|
||||
indice_index, first_dim_size, outer_dim_size);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -17,14 +17,16 @@
|
|||
#ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_PS_WORKER_PROXY_H_
|
||||
#define MINDSPORE_CCSRC_FRONTEND_PARALLEL_PS_WORKER_PROXY_H_
|
||||
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
#include "ps/ps.h"
|
||||
#include "frontend/parallel/ps/util.h"
|
||||
#include "backend/kernel_compiler/common_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace parallel {
|
||||
|
@ -36,23 +38,26 @@ class WorkerProxy : public ::ps::KVWorker<T> {
|
|||
using Callback = std::function<void()>;
|
||||
using SlicedKVs = std::vector<std::pair<bool, ::ps::KVPairs<T>>>;
|
||||
using Slicer = std::function<void(int ts, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &ranges,
|
||||
SlicedKVs *sliced)>;
|
||||
SlicedKVs *sliced, const std::map<int, int> &attrs)>;
|
||||
using ::ps::SimpleApp::obj_;
|
||||
explicit WorkerProxy(int app_id, int customer_id, int lookup_customer_id, int general_customer_id)
|
||||
: Worker(app_id, customer_id) {
|
||||
server_num_ = ::ps::NumServers();
|
||||
Util::SetRankId(::ps::MyRank());
|
||||
using std::placeholders::_1;
|
||||
using std::placeholders::_2;
|
||||
using std::placeholders::_3;
|
||||
using std::placeholders::_4;
|
||||
using std::placeholders::_5;
|
||||
lookup_customer_ = std::unique_ptr<::ps::Customer>(
|
||||
new ::ps::Customer(app_id, lookup_customer_id, std::bind(&WorkerProxy<T>::ProcessLookupResult, this, _1)));
|
||||
general_customer_ = std::unique_ptr<::ps::Customer>(
|
||||
new ::ps::Customer(app_id, general_customer_id, std::bind(&WorkerProxy<T>::ProcessResponse, this, _1)));
|
||||
lookup_slicer_ = std::bind(&WorkerProxy<T>::LookupIdSlicer, this, _1, _2, _3, _4);
|
||||
broadcast_slicer_ = std::bind(&WorkerProxy<T>::BroadcastSlicer, this, _1, _2, _3, _4);
|
||||
round_robin_slicer_ = std::bind(&WorkerProxy<T>::RoundRobinSlicer, this, _1, _2, _3, _4);
|
||||
worker_init_embedding_slicer_ = std::bind(&WorkerProxy<T>::WorkerInitEmbeddingSlicer, this, _1, _2, _3, _4);
|
||||
lookup_slicer_ = std::bind(&WorkerProxy<T>::LookupIdSlicer, this, _1, _2, _3, _4, _5);
|
||||
sparse_slicer_ = std::bind(&WorkerProxy<T>::SparseSlicer, this, _1, _2, _3, _4, _5);
|
||||
broadcast_slicer_ = std::bind(&WorkerProxy<T>::BroadcastSlicer, this, _1, _2, _3, _4, _5);
|
||||
round_robin_slicer_ = std::bind(&WorkerProxy<T>::RoundRobinSlicer, this, _1, _2, _3, _4, _5);
|
||||
worker_init_embedding_slicer_ = std::bind(&WorkerProxy<T>::WorkerInitEmbeddingSlicer, this, _1, _2, _3, _4, _5);
|
||||
}
|
||||
~WorkerProxy() override = default;
|
||||
|
||||
|
@ -67,6 +72,8 @@ class WorkerProxy : public ::ps::KVWorker<T> {
|
|||
bool IsReadyForPull(const Key &key);
|
||||
void PushData(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals, const ::ps::SArray<int> &lens = {},
|
||||
int cmd = 0, int priority = 0);
|
||||
void PushSparseData(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals, const ::ps::SArray<int> &lens,
|
||||
size_t grad_index, size_t indice_index, size_t first_dim_size, size_t outer_dim_size);
|
||||
void PullData(const ::ps::SArray<::ps::Key> &keys, ::ps::SArray<T> *vals, ::ps::SArray<int> *lens = nullptr,
|
||||
int cmd = 0, int priority = 0);
|
||||
void Finalize();
|
||||
|
@ -78,27 +85,37 @@ class WorkerProxy : public ::ps::KVWorker<T> {
|
|||
int AddGeneralRspCB(const ::ps::SArray<::ps::Key> &keys, ::ps::SArray<T> *vals, ::ps::SArray<int> *lens, int cmd,
|
||||
const Callback &cb);
|
||||
void LookupIdSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced, const std::map<int, int> &attrs);
|
||||
void SparseSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced, const std::map<int, int> &attrs);
|
||||
void BroadcastSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced, const std::map<int, int> &attrs);
|
||||
void RoundRobinSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced, const std::map<int, int> &attrs);
|
||||
void WorkerInitEmbeddingSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced);
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced,
|
||||
const std::map<int, int> &attrs);
|
||||
void ProcessLookupResult(const ::ps::Message &msg);
|
||||
void ProcessResponse(const ::ps::Message &msg);
|
||||
void Send(::ps::Customer *customer, int timestamp, bool push, bool pull, int cmd, const ::ps::KVPairs<T> &kvs,
|
||||
const Slicer &slicer);
|
||||
const Slicer &slicer, std::map<int, int> attrs = {});
|
||||
void AddKeyByHashMod(const ::ps::Key &key);
|
||||
|
||||
void PrepareSparseGradient(const size_t begin, const size_t end, const std::unordered_set<int> &distinct_ids,
|
||||
const std::vector<std::pair<int, T *>> &indice_to_grad, const int *all_indice,
|
||||
const size_t segment_size, T *gradient, int *indice);
|
||||
void BuildSparseValue(const ::ps::SArray<int> &lengths, const size_t grad_index, const size_t indice_index,
|
||||
const T *original_data, const T *grads, int *indices, ::ps::SArray<T> *reduced_data);
|
||||
|
||||
int server_num_;
|
||||
std::unique_ptr<::ps::Customer> lookup_customer_;
|
||||
std::unique_ptr<::ps::Customer> general_customer_;
|
||||
std::unordered_map<::ps::Key, std::shared_ptr<std::vector<::ps::Range>>> embedding_table_ranges_;
|
||||
std::unordered_map<int, std::vector<::ps::KVPairs<T>>> lookup_results_;
|
||||
std::unordered_map<int, ::ps::KVPairs<T>> gathered_response_;
|
||||
std::unordered_map<int, std::map<int, ::ps::KVPairs<T>>> gathered_response_;
|
||||
std::mutex mutex_;
|
||||
Slicer lookup_slicer_;
|
||||
Slicer sparse_slicer_;
|
||||
Slicer broadcast_slicer_;
|
||||
Slicer round_robin_slicer_;
|
||||
Slicer worker_init_embedding_slicer_;
|
||||
|
@ -220,6 +237,28 @@ void WorkerProxy<T>::PushData(const ::ps::SArray<::ps::Key> &keys, const ::ps::S
|
|||
general_customer_->WaitRequest(ts);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void WorkerProxy<T>::PushSparseData(const ::ps::SArray<::ps::Key> &keys, const ::ps::SArray<T> &vals,
|
||||
const ::ps::SArray<int> &lens, size_t grad_index, size_t indice_index,
|
||||
size_t first_dim_size, size_t outer_dim_size) {
|
||||
int ts = AddGeneralRspCB(keys, nullptr, nullptr, 0, nullptr);
|
||||
::ps::KVPairs<T> kvs;
|
||||
kvs.keys = keys;
|
||||
kvs.vals = vals;
|
||||
kvs.lens = lens;
|
||||
int cmd = 0;
|
||||
if (embedding_table_ranges_.count(keys[0])) {
|
||||
std::map<int, int> attrs{{0, grad_index}, {1, indice_index}, {2, first_dim_size}, {3, outer_dim_size}};
|
||||
Send(general_customer_.get(), ts, true, false, cmd, kvs, sparse_slicer_, attrs);
|
||||
} else {
|
||||
Send(general_customer_.get(), ts, true, false, cmd, kvs, round_robin_slicer_);
|
||||
}
|
||||
if (expected_result_count_[ts] < server_num_) {
|
||||
general_customer_->AddResponse(ts, server_num_ - expected_result_count_[ts]);
|
||||
}
|
||||
general_customer_->WaitRequest(ts);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void WorkerProxy<T>::PullData(const ::ps::SArray<::ps::Key> &keys, ::ps::SArray<T> *vals, ::ps::SArray<int> *lens,
|
||||
int cmd, int priority) {
|
||||
|
@ -298,12 +337,19 @@ int WorkerProxy<T>::AddGeneralRspCB(const ::ps::SArray<::ps::Key> &keys, ::ps::S
|
|||
int ts = general_customer_->NewRequest(::ps::kServerGroup);
|
||||
const auto &callback = [this, ts, keys, vals, lens, cb]() mutable {
|
||||
mutex_.lock();
|
||||
auto &kvs = gathered_response_[ts];
|
||||
std::map<int, ::ps::KVPairs<T>> server_kvs = gathered_response_[ts];
|
||||
mutex_.unlock();
|
||||
|
||||
*vals = kvs.vals;
|
||||
if (lens) {
|
||||
*lens = kvs.lens;
|
||||
vals->clear();
|
||||
for (auto kvs : server_kvs) {
|
||||
for (auto val : kvs.second.vals) {
|
||||
vals->push_back(val);
|
||||
}
|
||||
if (lens) {
|
||||
for (auto len : kvs.second.lens) {
|
||||
lens->push_back(len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mutex_.lock();
|
||||
|
@ -319,7 +365,8 @@ int WorkerProxy<T>::AddGeneralRspCB(const ::ps::SArray<::ps::Key> &keys, ::ps::S
|
|||
|
||||
template <typename T>
|
||||
void WorkerProxy<T>::LookupIdSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced,
|
||||
const std::map<int, int> &attrs) {
|
||||
int *lookup_ids = send.lens.data();
|
||||
size_t id_size = send.lens.size();
|
||||
|
||||
|
@ -357,9 +404,193 @@ void WorkerProxy<T>::LookupIdSlicer(int timestamp, const ::ps::KVPairs<T> &send,
|
|||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void WorkerProxy<T>::SparseSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced,
|
||||
const std::map<int, int> &attrs) {
|
||||
// Init variables
|
||||
T *data = send.vals.data();
|
||||
|
||||
if (attrs.count(0) == 0 || attrs.count(1) == 0 || attrs.count(2) == 0 || attrs.count(3) == 0) {
|
||||
MS_LOG(EXCEPTION) << "Invalid attrs keys";
|
||||
}
|
||||
auto iter = attrs.find(0);
|
||||
size_t grad_index = static_cast<size_t>(iter->second);
|
||||
iter = attrs.find(1);
|
||||
size_t indice_index = static_cast<size_t>(iter->second);
|
||||
iter = attrs.find(2);
|
||||
size_t first_dim_size = static_cast<size_t>(iter->second);
|
||||
iter = attrs.find(3);
|
||||
size_t outer_dim_size = static_cast<size_t>(iter->second);
|
||||
|
||||
int grad_size = send.lens[grad_index];
|
||||
int indice_size = send.lens[indice_index];
|
||||
int segment_size = grad_size / indice_size;
|
||||
|
||||
int grad_offset = 0;
|
||||
int indice_offset = 0;
|
||||
for (size_t i = 0; i < grad_index; i++) {
|
||||
grad_offset += send.lens[i];
|
||||
}
|
||||
for (size_t j = 0; j < indice_index; j++) {
|
||||
indice_offset += send.lens[j];
|
||||
}
|
||||
|
||||
T *grad_data = data + grad_offset;
|
||||
int *indice_data = reinterpret_cast<int *>(data) + indice_offset;
|
||||
|
||||
// Build the mappings of indice to gradient
|
||||
std::vector<std::pair<int, T *>> indice_to_grads;
|
||||
for (int i = 0; i < indice_size; i++) {
|
||||
int indice = indice_data[i];
|
||||
T *grad = grad_data + i * segment_size;
|
||||
indice_to_grads.push_back(std::make_pair(indice, grad));
|
||||
}
|
||||
|
||||
const Key &key = send.keys[0];
|
||||
const std::vector<::ps::Range> &ranges = *(embedding_table_ranges_[key]);
|
||||
sliced->resize(ranges.size());
|
||||
|
||||
// Construct reduced sparse data for each server
|
||||
for (size_t i = 0; i < ranges.size(); i++) {
|
||||
const ::ps::Range &range = ranges[i];
|
||||
const auto &begin = range.begin();
|
||||
const auto &end = range.end();
|
||||
auto &kvs = sliced->at(i).second;
|
||||
kvs.keys = send.keys;
|
||||
kvs.lens = send.lens;
|
||||
|
||||
// Prepare the sparse gradient and indice
|
||||
std::vector<int> indice_ids;
|
||||
std::unordered_set<int> distinct_ids;
|
||||
for (int j = 0; j < indice_size; j++) {
|
||||
size_t indice = static_cast<size_t>(indice_data[j]);
|
||||
if (indice >= begin && indice <= end) {
|
||||
indice_ids.push_back(indice);
|
||||
distinct_ids.insert(indice);
|
||||
}
|
||||
}
|
||||
size_t indices_size = indice_ids.size();
|
||||
if (indices_size > 0) {
|
||||
int slice_segment_size = indices_size * segment_size;
|
||||
T *src_grad_data = new T[slice_segment_size];
|
||||
int *src_indice_data = new int[indices_size];
|
||||
PrepareSparseGradient(begin, end, distinct_ids, indice_to_grads, indice_data, segment_size, src_grad_data,
|
||||
src_indice_data);
|
||||
|
||||
// Reduce the sparse gradient and indice
|
||||
T *new_grad = new T[slice_segment_size];
|
||||
int *new_indices = new int[indices_size];
|
||||
mindspore::kernel::SparseGradient unique_sparse_grad({new_grad, new_indices, indices_size});
|
||||
Util::ReduceSparseGradient(src_grad_data, src_indice_data, indices_size, segment_size, first_dim_size,
|
||||
outer_dim_size, &unique_sparse_grad);
|
||||
|
||||
// Update the length of reduce sparse gradient and indice
|
||||
::ps::SArray<int> reduced_lens;
|
||||
reduced_lens.CopyFrom(kvs.lens);
|
||||
reduced_lens[grad_index] = unique_sparse_grad.indices_size_ * segment_size;
|
||||
reduced_lens[indice_index] = unique_sparse_grad.indices_size_;
|
||||
|
||||
// Build the sparse value to be sent
|
||||
size_t total_size = 0;
|
||||
for (auto size : reduced_lens) {
|
||||
total_size += size;
|
||||
}
|
||||
::ps::SArray<T> reduced_data(total_size, 0);
|
||||
BuildSparseValue(reduced_lens, grad_index, indice_index, data, unique_sparse_grad.value_,
|
||||
unique_sparse_grad.indices_, &reduced_data);
|
||||
|
||||
kvs.lens = reduced_lens;
|
||||
kvs.vals = reduced_data;
|
||||
|
||||
delete[] src_grad_data;
|
||||
delete[] src_indice_data;
|
||||
delete[] new_grad;
|
||||
delete[] new_indices;
|
||||
}
|
||||
|
||||
if (indices_size <= 0) {
|
||||
::ps::SArray<T> no_keys;
|
||||
::ps::SArray<T> no_vals;
|
||||
::ps::SArray<T> no_lens;
|
||||
no_keys.push_back(key);
|
||||
no_vals.push_back(-100);
|
||||
kvs.vals = no_vals;
|
||||
kvs.lens = no_lens;
|
||||
}
|
||||
sliced->at(i).first = true;
|
||||
expected_result_count_[timestamp] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void WorkerProxy<T>::PrepareSparseGradient(const size_t begin, const size_t end,
|
||||
const std::unordered_set<int> &distinct_ids,
|
||||
const std::vector<std::pair<int, T *>> &indice_to_grads,
|
||||
const int *all_indice, const size_t segment_size, T *gradient,
|
||||
int *indices) {
|
||||
int offset = 0;
|
||||
int index = 0;
|
||||
size_t segment_data_size = segment_size * sizeof(T);
|
||||
for (auto &pair : indice_to_grads) {
|
||||
if (distinct_ids.count(pair.first) == 0) {
|
||||
continue;
|
||||
}
|
||||
indices[index++] = pair.first;
|
||||
auto ret = memcpy_s(gradient + offset, segment_data_size, pair.second, segment_data_size);
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "memcpy_s error, errorno(" << ret << ")";
|
||||
}
|
||||
offset += segment_size;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void WorkerProxy<T>::BuildSparseValue(const ::ps::SArray<int> &lengths, const size_t grad_index,
|
||||
const size_t indice_index, const T *original_data, const T *grads, int *indices,
|
||||
::ps::SArray<T> *reduced_data) {
|
||||
int offset = 0;
|
||||
for (size_t i = 0; i < lengths.size(); i++) {
|
||||
if (i != grad_index && i != indice_index) {
|
||||
int data_size = lengths[i] * sizeof(T);
|
||||
auto ret = memcpy_s(reduced_data->data() + offset, data_size, original_data + offset, data_size);
|
||||
if (ret != 0) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
}
|
||||
}
|
||||
offset += lengths[i];
|
||||
}
|
||||
|
||||
// Fill the reduced gradient
|
||||
int grad_offset = 0;
|
||||
for (size_t i = 0; i < grad_index; i++) {
|
||||
grad_offset += lengths[i];
|
||||
}
|
||||
int data_size = lengths[grad_index] * sizeof(T);
|
||||
auto ret = memcpy_s(reduced_data->data() + grad_offset, data_size, grads, data_size);
|
||||
if (ret != 0) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
}
|
||||
|
||||
// Fill the reduced indice
|
||||
int indice_offset = grad_offset + lengths[grad_index];
|
||||
data_size = lengths[indice_index] * sizeof(T);
|
||||
T *indice_data = reduced_data->data() + indice_offset;
|
||||
T *convert = new T[lengths[indice_index]];
|
||||
for (int i = 0; i < lengths[indice_index]; i++) {
|
||||
convert[i] = static_cast<T>(indices[i]);
|
||||
}
|
||||
ret = memcpy_s(indice_data, data_size, convert, data_size);
|
||||
if (ret != 0) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
}
|
||||
delete[] convert;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void WorkerProxy<T>::BroadcastSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced,
|
||||
const std::map<int, int> &attr) {
|
||||
sliced->resize(server_num_);
|
||||
for (int i = 0; i < server_num_; i++) {
|
||||
sliced->at(i).first = true;
|
||||
|
@ -370,7 +601,8 @@ void WorkerProxy<T>::BroadcastSlicer(int timestamp, const ::ps::KVPairs<T> &send
|
|||
|
||||
template <typename T>
|
||||
void WorkerProxy<T>::RoundRobinSlicer(int timestamp, const ::ps::KVPairs<T> &send, const std::vector<::ps::Range> &,
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced,
|
||||
const std::map<int, int> &attr) {
|
||||
sliced->resize(server_num_);
|
||||
auto keys = send.keys;
|
||||
auto vals = send.vals;
|
||||
|
@ -407,7 +639,8 @@ void WorkerProxy<T>::RoundRobinSlicer(int timestamp, const ::ps::KVPairs<T> &sen
|
|||
template <typename T>
|
||||
void WorkerProxy<T>::WorkerInitEmbeddingSlicer(int timestamp, const ::ps::KVPairs<T> &send,
|
||||
const std::vector<::ps::Range> &,
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced) {
|
||||
std::vector<std::pair<bool, ::ps::KVPairs<T>>> *sliced,
|
||||
const std::map<int, int> &attrs) {
|
||||
sliced->resize(server_num_);
|
||||
auto keys = send.keys;
|
||||
auto vals = send.vals;
|
||||
|
@ -442,7 +675,7 @@ void WorkerProxy<T>::ProcessLookupResult(const ::ps::Message &msg) {
|
|||
lookup_results_[ts].push_back(kvs);
|
||||
mutex_.unlock();
|
||||
}
|
||||
if (lookup_customer_->NumResponse(ts) == expected_result_count_[ts] - 1) {
|
||||
if (lookup_customer_->NumResponse(ts) + 1 == server_num_) {
|
||||
const auto &cb = lookup_callbacks_[ts];
|
||||
cb();
|
||||
lookup_callbacks_.erase(ts);
|
||||
|
@ -462,15 +695,8 @@ void WorkerProxy<T>::ProcessResponse(const ::ps::Message &msg) {
|
|||
kvs.lens = msg.data[2];
|
||||
}
|
||||
mutex_.lock();
|
||||
for (auto key : kvs.keys) {
|
||||
gathered_response_[ts].keys.push_back(key);
|
||||
}
|
||||
for (auto val : kvs.vals) {
|
||||
gathered_response_[ts].vals.push_back(val);
|
||||
}
|
||||
for (auto len : kvs.lens) {
|
||||
gathered_response_[ts].lens.push_back(len);
|
||||
}
|
||||
int rsp_server_rank = ::ps::Postoffice::Get()->IDtoRank(msg.meta.sender);
|
||||
gathered_response_[ts][rsp_server_rank] = kvs;
|
||||
mutex_.unlock();
|
||||
if (general_customer_->NumResponse(ts) + 1 == server_num_) {
|
||||
const auto &cb = general_callbacks_[ts];
|
||||
|
@ -482,9 +708,9 @@ void WorkerProxy<T>::ProcessResponse(const ::ps::Message &msg) {
|
|||
|
||||
template <typename T>
|
||||
void WorkerProxy<T>::Send(::ps::Customer *customer, int timestamp, bool push, bool pull, int cmd,
|
||||
const ::ps::KVPairs<T> &kvs, const Slicer &slicer) {
|
||||
const ::ps::KVPairs<T> &kvs, const Slicer &slicer, std::map<int, int> attrs) {
|
||||
SlicedKVs sliced;
|
||||
slicer(timestamp, kvs, ::ps::Postoffice::Get()->GetServerKeyRanges(), &sliced);
|
||||
slicer(timestamp, kvs, ::ps::Postoffice::Get()->GetServerKeyRanges(), &sliced, attrs);
|
||||
|
||||
for (size_t i = 0; i < sliced.size(); i++) {
|
||||
const auto &s = sliced[i];
|
||||
|
|
|
@ -191,8 +191,8 @@ std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir,
|
|||
}
|
||||
|
||||
// Function to create a ManifestDataset.
|
||||
std::shared_ptr<ManifestDataset> Manifest(std::string dataset_file, std::string usage,
|
||||
std::shared_ptr<SamplerObj> sampler,
|
||||
std::shared_ptr<ManifestDataset> Manifest(const std::string &dataset_file, const std::string &usage,
|
||||
const std::shared_ptr<SamplerObj> &sampler,
|
||||
const std::map<std::string, int32_t> &class_indexing, bool decode) {
|
||||
auto ds = std::make_shared<ManifestDataset>(dataset_file, usage, sampler, class_indexing, decode);
|
||||
|
||||
|
@ -211,14 +211,14 @@ std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::s
|
|||
// Function to overload "+" operator to concat two datasets
|
||||
std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &datasets1,
|
||||
const std::shared_ptr<Dataset> &datasets2) {
|
||||
std::shared_ptr<ConcatDataset> ds = std::make_shared<ConcatDataset>(std::vector({datasets1, datasets2}));
|
||||
std::shared_ptr<ConcatDataset> ds = std::make_shared<ConcatDataset>(std::vector({datasets2, datasets1}));
|
||||
|
||||
// Call derived class validation method.
|
||||
return ds->ValidateParams() ? ds : nullptr;
|
||||
}
|
||||
|
||||
// Function to create a TextFileDataset.
|
||||
std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &dataset_files, int32_t num_samples,
|
||||
std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &dataset_files, int64_t num_samples,
|
||||
ShuffleMode shuffle, int32_t num_shards, int32_t shard_id) {
|
||||
auto ds = std::make_shared<TextFileDataset>(dataset_files, num_samples, shuffle, num_shards, shard_id);
|
||||
|
||||
|
@ -580,13 +580,6 @@ bool SchemaObj::from_json(nlohmann::json json_obj) {
|
|||
|
||||
// OTHER FUNCTIONS
|
||||
|
||||
// Helper function to create default RandomSampler.
|
||||
std::shared_ptr<SamplerObj> CreateDefaultSampler() {
|
||||
const int32_t num_samples = 0; // 0 means to sample all ids.
|
||||
bool replacement = false;
|
||||
return std::make_shared<RandomSamplerObj>(replacement, num_samples);
|
||||
}
|
||||
|
||||
// Helper function to compute a default shuffle size
|
||||
Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
|
||||
int64_t *shuffle_size) {
|
||||
|
@ -682,6 +675,36 @@ bool ValidateDatasetShardParams(const std::string &dataset_name, int32_t num_sha
|
|||
return true;
|
||||
}
|
||||
|
||||
// Helper function to validate dataset sampler parameter
|
||||
bool ValidateDatasetSampler(const std::string &dataset_name, const std::shared_ptr<SamplerObj> &sampler) {
|
||||
if (sampler == nullptr) {
|
||||
MS_LOG(ERROR) << dataset_name << ": Sampler is not constructed correctly, sampler: nullptr";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Helper function to validate dataset input/output column parameter
|
||||
bool ValidateDatasetColumnParam(const std::string &dataset_name, const std::string &column_param,
|
||||
const std::vector<std::string> &columns) {
|
||||
if (columns.empty()) {
|
||||
MS_LOG(ERROR) << dataset_name << ":" << column_param << " should not be empty";
|
||||
return false;
|
||||
}
|
||||
for (uint32_t i = 0; i < columns.size(); ++i) {
|
||||
if (columns[i].empty()) {
|
||||
MS_LOG(ERROR) << dataset_name << ":" << column_param << "[" << i << "] should not be empty";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
std::set<std::string> columns_set(columns.begin(), columns.end());
|
||||
if (columns_set.size() != columns.size()) {
|
||||
MS_LOG(ERROR) << dataset_name << ":" << column_param << ": Every column name should not be same with others";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* ####################################### Derived Dataset classes ################################# */
|
||||
|
||||
// DERIVED DATASET CLASSES LEAF-NODE DATASETS
|
||||
|
@ -701,6 +724,9 @@ bool CelebADataset::ValidateParams() {
|
|||
if (!ValidateDatasetDirParam("CelebADataset", dataset_dir_)) {
|
||||
return false;
|
||||
}
|
||||
if (!ValidateDatasetSampler("CelebADataset", sampler_)) {
|
||||
return false;
|
||||
}
|
||||
std::set<std::string> dataset_type_list = {"all", "train", "valid", "test"};
|
||||
auto iter = dataset_type_list.find(dataset_type_);
|
||||
if (iter == dataset_type_list.end()) {
|
||||
|
@ -715,11 +741,6 @@ std::vector<std::shared_ptr<DatasetOp>> CelebADataset::Build() {
|
|||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
|
||||
if (sampler_ == nullptr) {
|
||||
sampler_ = CreateDefaultSampler();
|
||||
}
|
||||
|
||||
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
|
||||
RETURN_EMPTY_IF_ERROR(
|
||||
schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
|
||||
|
@ -736,18 +757,15 @@ std::vector<std::shared_ptr<DatasetOp>> CelebADataset::Build() {
|
|||
Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler)
|
||||
: dataset_dir_(dataset_dir), sampler_(sampler) {}
|
||||
|
||||
bool Cifar10Dataset::ValidateParams() { return ValidateDatasetDirParam("Cifar10Dataset", dataset_dir_); }
|
||||
bool Cifar10Dataset::ValidateParams() {
|
||||
return ValidateDatasetDirParam("Cifar10Dataset", dataset_dir_) && ValidateDatasetSampler("Cifar10Dataset", sampler_);
|
||||
}
|
||||
|
||||
// Function to build CifarOp for Cifar10
|
||||
std::vector<std::shared_ptr<DatasetOp>> Cifar10Dataset::Build() {
|
||||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
|
||||
if (sampler_ == nullptr) {
|
||||
sampler_ = CreateDefaultSampler();
|
||||
}
|
||||
|
||||
// Do internal Schema generation.
|
||||
auto schema = std::make_unique<DataSchema>();
|
||||
RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
|
||||
|
@ -765,18 +783,16 @@ std::vector<std::shared_ptr<DatasetOp>> Cifar10Dataset::Build() {
|
|||
Cifar100Dataset::Cifar100Dataset(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler)
|
||||
: dataset_dir_(dataset_dir), sampler_(sampler) {}
|
||||
|
||||
bool Cifar100Dataset::ValidateParams() { return ValidateDatasetDirParam("Cifar100Dataset", dataset_dir_); }
|
||||
bool Cifar100Dataset::ValidateParams() {
|
||||
return ValidateDatasetDirParam("Cifar100Dataset", dataset_dir_) &&
|
||||
ValidateDatasetSampler("Cifar100Dataset", sampler_);
|
||||
}
|
||||
|
||||
// Function to build CifarOp for Cifar100
|
||||
std::vector<std::shared_ptr<DatasetOp>> Cifar100Dataset::Build() {
|
||||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
|
||||
if (sampler_ == nullptr) {
|
||||
sampler_ = CreateDefaultSampler();
|
||||
}
|
||||
|
||||
// Do internal Schema generation.
|
||||
auto schema = std::make_unique<DataSchema>();
|
||||
RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
|
||||
|
@ -987,6 +1003,9 @@ bool CocoDataset::ValidateParams() {
|
|||
if (!ValidateDatasetDirParam("CocoDataset", dataset_dir_)) {
|
||||
return false;
|
||||
}
|
||||
if (!ValidateDatasetSampler("CocoDataset", sampler_)) {
|
||||
return false;
|
||||
}
|
||||
Path annotation_file(annotation_file_);
|
||||
if (!annotation_file.Exists()) {
|
||||
MS_LOG(ERROR) << "annotation_file is invalid or not exist";
|
||||
|
@ -1006,11 +1025,6 @@ std::vector<std::shared_ptr<DatasetOp>> CocoDataset::Build() {
|
|||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
|
||||
if (sampler_ == nullptr) {
|
||||
sampler_ = CreateDefaultSampler();
|
||||
}
|
||||
|
||||
CocoOp::TaskType task_type;
|
||||
if (task_ == "Detection") {
|
||||
task_type = CocoOp::TaskType::Detection;
|
||||
|
@ -1100,6 +1114,12 @@ bool CSVDataset::ValidateParams() {
|
|||
return false;
|
||||
}
|
||||
|
||||
if (!column_names_.empty()) {
|
||||
if (!ValidateDatasetColumnParam("CSVDataset", "column_names", column_names_)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1155,17 +1175,15 @@ ImageFolderDataset::ImageFolderDataset(std::string dataset_dir, bool decode, std
|
|||
class_indexing_(class_indexing),
|
||||
exts_(extensions) {}
|
||||
|
||||
bool ImageFolderDataset::ValidateParams() { return ValidateDatasetDirParam("ImageFolderDataset", dataset_dir_); }
|
||||
bool ImageFolderDataset::ValidateParams() {
|
||||
return ValidateDatasetDirParam("ImageFolderDataset", dataset_dir_) &&
|
||||
ValidateDatasetSampler("ImageFolderDataset", sampler_);
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<DatasetOp>> ImageFolderDataset::Build() {
|
||||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
// If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
|
||||
if (sampler_ == nullptr) {
|
||||
sampler_ = CreateDefaultSampler();
|
||||
}
|
||||
|
||||
// Do internal Schema generation.
|
||||
// This arg is exist in ImageFolderOp, but not externalized (in Python API).
|
||||
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
|
||||
|
@ -1180,7 +1198,8 @@ std::vector<std::shared_ptr<DatasetOp>> ImageFolderDataset::Build() {
|
|||
return node_ops;
|
||||
}
|
||||
|
||||
ManifestDataset::ManifestDataset(std::string dataset_file, std::string usage, std::shared_ptr<SamplerObj> sampler,
|
||||
ManifestDataset::ManifestDataset(const std::string &dataset_file, const std::string &usage,
|
||||
const std::shared_ptr<SamplerObj> &sampler,
|
||||
const std::map<std::string, int32_t> &class_indexing, bool decode)
|
||||
: dataset_file_(dataset_file), usage_(usage), decode_(decode), class_index_(class_indexing), sampler_(sampler) {}
|
||||
|
||||
|
@ -1190,6 +1209,9 @@ bool ManifestDataset::ValidateParams() {
|
|||
MS_LOG(ERROR) << "dataset file: [" << dataset_file_ << "] is invalid or not exist";
|
||||
return false;
|
||||
}
|
||||
if (!ValidateDatasetSampler("ManifestDataset", sampler_)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<std::string> usage_list = {"train", "eval", "inference"};
|
||||
if (find(usage_list.begin(), usage_list.end(), usage_) == usage_list.end()) {
|
||||
|
@ -1204,11 +1226,6 @@ std::vector<std::shared_ptr<DatasetOp>> ManifestDataset::Build() {
|
|||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
|
||||
if (sampler_ == nullptr) {
|
||||
sampler_ = CreateDefaultSampler();
|
||||
}
|
||||
|
||||
// Do internal Schema generation.
|
||||
auto schema = std::make_unique<DataSchema>();
|
||||
RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
|
||||
|
@ -1228,17 +1245,14 @@ std::vector<std::shared_ptr<DatasetOp>> ManifestDataset::Build() {
|
|||
MnistDataset::MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler)
|
||||
: dataset_dir_(dataset_dir), sampler_(sampler) {}
|
||||
|
||||
bool MnistDataset::ValidateParams() { return ValidateDatasetDirParam("MnistDataset", dataset_dir_); }
|
||||
bool MnistDataset::ValidateParams() {
|
||||
return ValidateDatasetDirParam("MnistDataset", dataset_dir_) && ValidateDatasetSampler("MnistDataset", sampler_);
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<DatasetOp>> MnistDataset::Build() {
|
||||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
// If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
|
||||
if (sampler_ == nullptr) {
|
||||
sampler_ = CreateDefaultSampler();
|
||||
}
|
||||
|
||||
// Do internal Schema generation.
|
||||
auto schema = std::make_unique<DataSchema>();
|
||||
RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
|
||||
|
@ -1257,6 +1271,14 @@ bool RandomDataset::ValidateParams() {
|
|||
MS_LOG(ERROR) << "RandomDataset: total_rows must be greater than 0, now get " << total_rows_;
|
||||
return false;
|
||||
}
|
||||
if (!ValidateDatasetSampler("RandomDataset", sampler_)) {
|
||||
return false;
|
||||
}
|
||||
if (!columns_list_.empty()) {
|
||||
if (!ValidateDatasetColumnParam("RandomDataset", "columns_list", columns_list_)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1279,11 +1301,6 @@ std::vector<std::shared_ptr<DatasetOp>> RandomDataset::Build() {
|
|||
total_rows_ = schema_obj->get_num_rows();
|
||||
}
|
||||
|
||||
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
|
||||
if (sampler_ == nullptr) {
|
||||
sampler_ = CreateDefaultSampler();
|
||||
}
|
||||
|
||||
std::string schema_json_string, schema_file_path;
|
||||
if (schema_ != nullptr) {
|
||||
schema_->set_dataset_type("Random");
|
||||
|
@ -1331,7 +1348,7 @@ bool TextFileDataset::ValidateParams() {
|
|||
return false;
|
||||
}
|
||||
|
||||
if (!ValidateDatasetShardParams("TextfileDataset", num_shards_, shard_id_)) {
|
||||
if (!ValidateDatasetShardParams("TextFileDataset", num_shards_, shard_id_)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1392,6 +1409,9 @@ bool VOCDataset::ValidateParams() {
|
|||
MS_LOG(ERROR) << "Invalid dataset path or no dataset path is specified.";
|
||||
return false;
|
||||
}
|
||||
if (!ValidateDatasetSampler("VOCDataset", sampler_)) {
|
||||
return false;
|
||||
}
|
||||
if (task_ == "Segmentation") {
|
||||
if (!class_index_.empty()) {
|
||||
MS_LOG(ERROR) << "class_indexing is invalid in Segmentation task.";
|
||||
|
@ -1420,11 +1440,6 @@ std::vector<std::shared_ptr<DatasetOp>> VOCDataset::Build() {
|
|||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
|
||||
if (sampler_ == nullptr) {
|
||||
sampler_ = CreateDefaultSampler();
|
||||
}
|
||||
|
||||
auto schema = std::make_unique<DataSchema>();
|
||||
VOCOp::TaskType task_type_;
|
||||
|
||||
|
@ -1539,6 +1554,10 @@ bool ConcatDataset::ValidateParams() {
|
|||
MS_LOG(ERROR) << "Concat: concatenated datasets are not specified.";
|
||||
return false;
|
||||
}
|
||||
if (find(datasets_.begin(), datasets_.end(), nullptr) != datasets_.end()) {
|
||||
MS_LOG(ERROR) << "Concat: concatenated dataset should not be null.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1586,6 +1605,21 @@ bool MapDataset::ValidateParams() {
|
|||
MS_LOG(ERROR) << "Map: No operation is specified.";
|
||||
return false;
|
||||
}
|
||||
if (!input_columns_.empty()) {
|
||||
if (!ValidateDatasetColumnParam("MapDataset", "input_columns", input_columns_)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (!output_columns_.empty()) {
|
||||
if (!ValidateDatasetColumnParam("MapDataset", "output_columns", output_columns_)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (!project_columns_.empty()) {
|
||||
if (!ValidateDatasetColumnParam("MapDataset", "project_columns", project_columns_)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -1615,12 +1649,12 @@ RenameDataset::RenameDataset(const std::vector<std::string> &input_columns,
|
|||
: input_columns_(input_columns), output_columns_(output_columns) {}
|
||||
|
||||
bool RenameDataset::ValidateParams() {
|
||||
if (input_columns_.empty() || output_columns_.empty()) {
|
||||
MS_LOG(ERROR) << "input and output columns must be specified";
|
||||
if (input_columns_.size() != output_columns_.size()) {
|
||||
MS_LOG(ERROR) << "RenameDataset: input and output columns must be the same size";
|
||||
return false;
|
||||
}
|
||||
if (input_columns_.size() != output_columns_.size()) {
|
||||
MS_LOG(ERROR) << "input and output columns must be the same size";
|
||||
if (!ValidateDatasetColumnParam("RenameDataset", "input_columns", input_columns_) ||
|
||||
!ValidateDatasetColumnParam("RenameDataset", "output_columns", output_columns_)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -1713,7 +1747,7 @@ std::vector<std::shared_ptr<DatasetOp>> TakeDataset::Build() {
|
|||
|
||||
// Function to validate the parameters for TakeDataset
|
||||
bool TakeDataset::ValidateParams() {
|
||||
if (take_count_ < 0 && take_count_ != -1) {
|
||||
if (take_count_ <= 0 && take_count_ != -1) {
|
||||
MS_LOG(ERROR) << "Take: take_count should be either -1 or positive integer, take_count: " << take_count_;
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -45,6 +45,8 @@ PYBIND_REGISTER(ConfigManager, 0, ([](const py::module *m) {
|
|||
.def("get_op_connector_size", &ConfigManager::op_connector_size)
|
||||
.def("get_seed", &ConfigManager::seed)
|
||||
.def("get_monitor_sampling_interval", &ConfigManager::monitor_sampling_interval)
|
||||
.def("get_callback_timeout", &ConfigManager::callback_timeout)
|
||||
.def("set_callback_timeout", &ConfigManager::set_callback_timeout)
|
||||
.def("load", [](ConfigManager &c, std::string s) { THROW_IF_ERROR(c.LoadFile(s)); });
|
||||
}));
|
||||
|
||||
|
|
|
@ -382,7 +382,7 @@ CutMixBatchOperation::CutMixBatchOperation(ImageBatchFormat image_batch_format,
|
|||
: image_batch_format_(image_batch_format), alpha_(alpha), prob_(prob) {}
|
||||
|
||||
bool CutMixBatchOperation::ValidateParams() {
|
||||
if (alpha_ < 0) {
|
||||
if (alpha_ <= 0) {
|
||||
MS_LOG(ERROR) << "CutMixBatch: alpha cannot be negative.";
|
||||
return false;
|
||||
}
|
||||
|
@ -434,7 +434,7 @@ std::shared_ptr<TensorOp> HwcToChwOperation::Build() { return std::make_shared<H
|
|||
MixUpBatchOperation::MixUpBatchOperation(float alpha) : alpha_(alpha) {}
|
||||
|
||||
bool MixUpBatchOperation::ValidateParams() {
|
||||
if (alpha_ < 0) {
|
||||
if (alpha_ <= 0) {
|
||||
MS_LOG(ERROR) << "MixUpBatch: alpha must be a positive floating value however it is: " << alpha_;
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -50,7 +50,7 @@ Status CallbackManager::Begin(const CallbackParam &cb_param) {
|
|||
// return Status::OK() if no begin is needed
|
||||
RETURN_OK_IF_TRUE(callback_inds.empty());
|
||||
|
||||
RETURN_IF_NOT_OK(op_->PauseFromMaster());
|
||||
RETURN_IF_NOT_OK(op_->WaitForWorkers());
|
||||
|
||||
// Now do the actual callback
|
||||
for (size_t ind : callback_inds) {
|
||||
|
@ -69,7 +69,7 @@ Status CallbackManager::EpochBegin(const CallbackParam &cb_param) {
|
|||
// return Status::OK() if no epoch_begin is needed
|
||||
RETURN_OK_IF_TRUE(callback_inds.empty());
|
||||
|
||||
RETURN_IF_NOT_OK(op_->PauseFromMaster());
|
||||
RETURN_IF_NOT_OK(op_->WaitForWorkers());
|
||||
|
||||
// Now do the actual callback
|
||||
for (size_t ind : callback_inds) {
|
||||
|
@ -89,7 +89,7 @@ Status CallbackManager::StepBegin(const CallbackParam &cb_param) {
|
|||
// return Status::OK() if no step_begin is needed
|
||||
RETURN_OK_IF_TRUE(callback_inds.empty());
|
||||
|
||||
RETURN_IF_NOT_OK(op_->PauseFromMaster());
|
||||
RETURN_IF_NOT_OK(op_->WaitForWorkers());
|
||||
|
||||
// Now do the actual callback
|
||||
for (size_t ind : callback_inds) {
|
||||
|
@ -108,7 +108,7 @@ Status CallbackManager::End(const CallbackParam &cb_param) {
|
|||
// return Status::OK() if no end is needed
|
||||
RETURN_OK_IF_TRUE(callback_inds.empty());
|
||||
|
||||
RETURN_IF_NOT_OK(op_->PauseFromMaster());
|
||||
RETURN_IF_NOT_OK(op_->WaitForWorkers());
|
||||
|
||||
// Now do the actual callback
|
||||
for (size_t ind : callback_inds) {
|
||||
|
@ -127,7 +127,7 @@ Status CallbackManager::EpochEnd(const CallbackParam &cb_param) {
|
|||
// return Status::OK() if no epoch_end is needed
|
||||
RETURN_OK_IF_TRUE(callback_inds.empty());
|
||||
|
||||
RETURN_IF_NOT_OK(op_->PauseFromMaster());
|
||||
RETURN_IF_NOT_OK(op_->WaitForWorkers());
|
||||
|
||||
// Now do the actual callback
|
||||
for (size_t ind : callback_inds) {
|
||||
|
@ -147,7 +147,7 @@ Status CallbackManager::StepEnd(const CallbackParam &cb_param) {
|
|||
// return Status::OK() if no step_end is needed
|
||||
RETURN_OK_IF_TRUE(callback_inds.empty());
|
||||
|
||||
RETURN_IF_NOT_OK(op_->PauseFromMaster());
|
||||
RETURN_IF_NOT_OK(op_->WaitForWorkers());
|
||||
|
||||
// Now do the actual callback
|
||||
for (size_t ind : callback_inds) {
|
||||
|
|
|
@ -32,7 +32,7 @@ class DatasetOp;
|
|||
/// This class manages all the callbacks that are associated with a single DatasetOp. For now, only MapOp supports this.
|
||||
class CallbackManager {
|
||||
public:
|
||||
/// CallbackManager default constructor. Init needs to be called before using the created instance.
|
||||
/// \brief CallbackManager default constructor. Init needs to be called before using the created instance.
|
||||
CallbackManager() : enabled_(false) {}
|
||||
|
||||
/// \brief
|
||||
|
|
|
@ -88,5 +88,8 @@ uint32_t ConfigManager::seed() const { return seed_; }
|
|||
void ConfigManager::set_seed(uint32_t seed) { seed_ = seed; }
|
||||
|
||||
void ConfigManager::set_monitor_sampling_interval(uint32_t interval) { monitor_sampling_interval_ = interval; }
|
||||
|
||||
void ConfigManager::set_callback_timeout(uint32_t timeout) { callback_timout_ = timeout; }
|
||||
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -116,9 +116,17 @@ class ConfigManager {
|
|||
void set_monitor_sampling_interval(uint32_t interval);
|
||||
|
||||
// getter function
|
||||
// @return The iterval of monitor sampling
|
||||
// @return The interval of monitor sampling
|
||||
int32_t monitor_sampling_interval() const { return monitor_sampling_interval_; }
|
||||
|
||||
// setter function
|
||||
// @param timeout - The setting to apply to the config
|
||||
void set_callback_timeout(uint32_t timeout);
|
||||
|
||||
// getter function
|
||||
// @return The timeout DSWaitedCallback would wait for before raising an error
|
||||
int32_t callback_timeout() const { return callback_timout_; }
|
||||
|
||||
private:
|
||||
int32_t rows_per_buffer_{kCfgRowsPerBuffer};
|
||||
int32_t num_parallel_workers_{kCfgParallelWorkers};
|
||||
|
@ -126,8 +134,9 @@ class ConfigManager {
|
|||
int32_t op_connector_size_{kCfgOpConnectorSize};
|
||||
uint32_t seed_{kCfgDefaultSeed};
|
||||
uint32_t monitor_sampling_interval_{kCfgMonitorSamplingInterval};
|
||||
uint32_t callback_timout_{kCfgCallbackTimeout};
|
||||
|
||||
// Private helper function that taks a nlohmann json format and populates the settings
|
||||
// Private helper function that takes a nlohmann json format and populates the settings
|
||||
// @param j - The json nlohmann json info
|
||||
Status FromJson(const nlohmann::json &j);
|
||||
};
|
||||
|
|
|
@ -68,6 +68,7 @@ constexpr uint32_t kCfgWorkerConnectorSize = 16;
|
|||
constexpr uint32_t kCfgOpConnectorSize = 16;
|
||||
constexpr uint32_t kCfgDefaultSeed = std::mt19937::default_seed;
|
||||
constexpr uint32_t kCfgMonitorSamplingInterval = 10;
|
||||
constexpr uint32_t kCfgCallbackTimeout = 60; // timeout value for callback in seconds
|
||||
|
||||
// Invalid OpenCV type should not be from 0 to 7 (opencv4/opencv2/core/hal/interface.h)
|
||||
constexpr uint8_t kCVInvalidType = 255;
|
||||
|
|
|
@ -59,7 +59,7 @@ constexpr static uint32_t kDataIsInSharedMemory = 2;
|
|||
/// \param rc[in] Status object
|
||||
/// \param reply[in/out] pointer to pre-allocated protobuf object
|
||||
inline void Status2CacheReply(const Status &rc, CacheReply *reply) {
|
||||
reply->set_rc(static_cast<google::int32>(rc.get_code()));
|
||||
reply->set_rc(static_cast<int32_t>(rc.get_code()));
|
||||
reply->set_msg(rc.ToString());
|
||||
}
|
||||
|
||||
|
|
|
@ -76,7 +76,7 @@ class BaseRequest {
|
|||
|
||||
/// \brief Base class of a cache server request
|
||||
/// \param type Type of the request
|
||||
explicit BaseRequest(RequestType type) : type_(type) { rq_.set_type(static_cast<google::int32>(type_)); }
|
||||
explicit BaseRequest(RequestType type) : type_(type) { rq_.set_type(static_cast<int16_t>(type_)); }
|
||||
virtual ~BaseRequest() = default;
|
||||
|
||||
/// \brief A print method for debugging
|
||||
|
|
|
@ -37,8 +37,10 @@ class DataBuffer {
|
|||
// Buffer flags
|
||||
enum BufferFlags : uint32_t {
|
||||
kDeBFlagNone = 0,
|
||||
kDeBFlagEOF = 1, // The buffer is an eof end-of-data msg
|
||||
kDeBFlagEOE = 1u << 1 // The buffer is an eoe end-of-epoch msg
|
||||
kDeBFlagEOF = 1, // The buffer is an eof end-of-data msg
|
||||
kDeBFlagEOE = 1u << 1, // The buffer is an eoe end-of-epoch msg
|
||||
kDeBFlagWait = 1u << 2, // The buffer is an control signal for workers to suspend operations
|
||||
kDeBFlagQuit = 1u << 3 // The buffer is a control signal for workers to quit
|
||||
};
|
||||
|
||||
// Name: Constructor #1
|
||||
|
@ -64,6 +66,10 @@ class DataBuffer {
|
|||
|
||||
bool eoe() const { return (static_cast<uint32_t>(buffer_flags_) & static_cast<uint32_t>(kDeBFlagEOE)); }
|
||||
|
||||
bool wait() const { return (static_cast<uint32_t>(buffer_flags_) & static_cast<uint32_t>(kDeBFlagWait)); }
|
||||
|
||||
bool quit() const { return (static_cast<uint32_t>(buffer_flags_) & static_cast<uint32_t>(kDeBFlagQuit)); }
|
||||
|
||||
// Simple getter funcs
|
||||
int32_t id() const { return buffer_id_; }
|
||||
|
||||
|
|
|
@ -363,10 +363,9 @@ class DatasetOp : public std::enable_shared_from_this<DatasetOp> {
|
|||
/// This function is only intended to be called by CallbackManager within the master thread of ParallelOp
|
||||
/// The expected behavior is this, when this function is invoked, this function will block until all the workers
|
||||
/// have finished their remaining work and go to sleep. Since all ParallelOps use a QueueList to sync with master.
|
||||
/// They would automatically wait on the QueueList when they are done. Hence, for now, a Unpause() function is not
|
||||
/// needed. Only parallelOp needs to override this function.
|
||||
/// They would automatically wait on the QueueList when they are done.
|
||||
/// \return Status
|
||||
virtual Status PauseFromMaster() { return Status::OK(); }
|
||||
virtual Status WaitForWorkers() { return Status::OK(); }
|
||||
|
||||
protected:
|
||||
/// \brief Removes a parent operator from this operator
|
||||
|
|
|
@ -44,9 +44,9 @@ DeviceQueueOp::DeviceQueueOp(std::string channel_name, DeviceType device_type, i
|
|||
DeviceQueueOp::~DeviceQueueOp() {}
|
||||
|
||||
#ifdef ENABLE_GPUQUE
|
||||
void ReleaseData(void *addr) {
|
||||
void DeviceQueueOp::ReleaseData(void *addr) {
|
||||
if (addr != nullptr) {
|
||||
free(addr);
|
||||
pool_->Deallocate(addr);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -87,6 +87,7 @@ Status DeviceQueueOp::operator()() {
|
|||
#endif
|
||||
} else if (device_type_ == DeviceType::GPU) {
|
||||
#ifdef ENABLE_GPUQUE
|
||||
RETURN_IF_NOT_OK(CircularPool::CreateCircularPool(&pool_));
|
||||
RETURN_IF_NOT_OK(SendDataToGPU());
|
||||
#endif
|
||||
} else if (device_type_ == DeviceType::CPU) {
|
||||
|
@ -187,6 +188,7 @@ Status DeviceQueueOp::SendDataToGPU() {
|
|||
bool is_break_loop = false;
|
||||
bool is_open = false;
|
||||
uint32_t handle = INVALID_HANDLE;
|
||||
auto release_function = std::bind(&DeviceQueueOp::ReleaseData, this, std::placeholders::_1);
|
||||
|
||||
std::unique_ptr<DataBuffer> current_buffer;
|
||||
RETURN_IF_NOT_OK(GetNextInput(¤t_buffer));
|
||||
|
@ -204,7 +206,7 @@ Status DeviceQueueOp::SendDataToGPU() {
|
|||
data_size.push_back(static_cast<size_t>(curr_row[i]->SizeInBytes()));
|
||||
}
|
||||
if (!is_open) {
|
||||
handle = GpuBufferMgr::GetInstance().Open(0, channel_name_, data_size, ReleaseData);
|
||||
handle = GpuBufferMgr::GetInstance().Open(0, channel_name_, data_size, release_function);
|
||||
if (handle == INVALID_HANDLE) {
|
||||
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "open failed");
|
||||
}
|
||||
|
@ -246,7 +248,7 @@ Status DeviceQueueOp::RetryPushGPUData(const std::vector<size_t> &data_size, con
|
|||
BlockQueueStatus_T ret = GpuBufferMgr::GetInstance().Push(handle, items, WAIT_TIME);
|
||||
if (ret) {
|
||||
for (int i = 0; i < items.size(); i++) {
|
||||
free(items[i].data_ptr_);
|
||||
ReleaseData(items[i].data_ptr_);
|
||||
}
|
||||
if (ret == BlockQueueStatus_T::ERROR_INPUT) {
|
||||
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "invalid input Data, please check it.");
|
||||
|
@ -267,7 +269,7 @@ Status DeviceQueueOp::RetryPushGPUData(const std::vector<size_t> &data_size, con
|
|||
Status DeviceQueueOp::MallocForGPUData(std::vector<device::DataItemGpu> *items, const TensorRow &curr_row) {
|
||||
int i = 0;
|
||||
for (auto &sub_item : *items) {
|
||||
sub_item.data_ptr_ = (unsigned char *)malloc(sub_item.data_len_);
|
||||
RETURN_IF_NOT_OK(pool_->Allocate(sub_item.data_len_, &sub_item.data_ptr_));
|
||||
if (sub_item.data_ptr_ == nullptr) {
|
||||
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "memory malloc failed.");
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#endif
|
||||
|
||||
#ifdef ENABLE_GPUQUE
|
||||
#include "minddata/dataset/util/circular_pool.h"
|
||||
#include "runtime/device/gpu/gpu_buffer_mgr.h"
|
||||
using mindspore::device::BlockQueueStatus_T;
|
||||
using mindspore::device::GpuBufferMgr;
|
||||
|
@ -162,6 +163,9 @@ class DeviceQueueOp : public PipelineOp {
|
|||
Status SendDataToGPU();
|
||||
Status RetryPushGPUData(const std::vector<size_t> &data_size, const TensorRow &curr_row, uint32_t handle);
|
||||
Status MallocForGPUData(std::vector<device::DataItemGpu> *items, const TensorRow &curr_row);
|
||||
void ReleaseData(void *addr);
|
||||
|
||||
std::shared_ptr<MemoryPool> pool_;
|
||||
#endif
|
||||
|
||||
Status SendDataToCPU();
|
||||
|
|
|
@ -166,7 +166,7 @@ Status MapOp::operator()() {
|
|||
// init callback
|
||||
RETURN_IF_NOT_OK(callback_manager_.Init(shared_from_this()));
|
||||
Status rc = local_queues_.Register(tree_->AllTasks());
|
||||
RETURN_IF_NOT_OK(master_pause_wp_.Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
|
||||
if (rc.IsError()) {
|
||||
TaskManager::FindMe()->Post();
|
||||
return rc;
|
||||
|
@ -205,23 +205,29 @@ Status MapOp::operator()() {
|
|||
RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buff, 0));
|
||||
}
|
||||
|
||||
// send the eoe buffer to worker
|
||||
|
||||
// reset epoch_step when a new epoch is about to start
|
||||
// check whether this is the end of a real epoch (not all eoe signals end of epoch)
|
||||
if ((op_current_repeats_ + 1) % op_num_repeats_per_epoch() == 0) {
|
||||
RETURN_IF_NOT_OK(callback_manager_.EpochEnd(CallbackParam(op_current_epochs_ + 1, ep_step, total_step)));
|
||||
ep_step = 0;
|
||||
}
|
||||
// Propagate the eoe buffer to worker
|
||||
std::unique_ptr<MapWorkerJob> worker_job = std::make_unique<MapWorkerJob>(std::move(buff));
|
||||
RETURN_IF_NOT_OK(local_queues_[num_buf++ % num_workers_]->Add(std::move(worker_job)));
|
||||
UpdateRepeatAndEpochCounter();
|
||||
RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buff, 0));
|
||||
}
|
||||
// the last eoe increments the eoe count by 1, but this shouldn't be reflected on End() callback
|
||||
// RETURN_IF_NOT_OK(callback_manager_.End(CallbackParam(op_current_epochs_, ep_step, total_step)));
|
||||
// handle eof logic
|
||||
// End() is commented out because it might never be called due to the lack of EOF when EpochCtrl is -1
|
||||
// RETURN_IF_NOT_OK(callback_manager_.End(CallbackParam(op_current_epochs_, ep_step, total_step)));
|
||||
// Handle eof logic, this code might never be reached if epoch_ctrl = -1.
|
||||
std::unique_ptr<MapWorkerJob> worker_job = std::make_unique<MapWorkerJob>(std::move(buff));
|
||||
RETURN_IF_NOT_OK(local_queues_[num_buf++ % num_workers_]->Add(std::move(worker_job)));
|
||||
|
||||
// Quit all workers, this code might never be reached if EpochCtrl is -1.
|
||||
for (int32_t wkr_id = 0; wkr_id < num_workers_; wkr_id++) {
|
||||
auto quit = std::make_unique<MapWorkerJob>(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagQuit));
|
||||
RETURN_IF_NOT_OK(local_queues_[num_buf++ % num_workers_]->Add(std::move(quit)));
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -242,26 +248,27 @@ Status MapOp::WorkerEntry(int32_t worker_id) {
|
|||
// Map op does not use child iterator, and it needs to manually handle eoe and eof's itself
|
||||
// rather than use the base-class defaults.
|
||||
while (true) {
|
||||
// handle the pause logic. Pause is triggered when an buffer id of -1 with no special flag and no row is received
|
||||
if (in_buffer->id() == -1 && in_buffer->buffer_flags() == DataBuffer::kDeBFlagNone && in_buffer->NumRows() == 0) {
|
||||
// when worker receives the signal from master thread, it increments a atomic int
|
||||
// the last guy who increments the counter, wakes up master thread
|
||||
if (++num_workers_paused_ == num_workers_) master_pause_wp_.Set();
|
||||
// this will block the worker until master thread gives it a new work
|
||||
// Handle special logic where buffer carries a ctrl flag.
|
||||
if (in_buffer->buffer_flags() != DataBuffer::kDeBFlagNone) {
|
||||
if (in_buffer->wait()) {
|
||||
// When worker receives the signal from master thread, it increments a atomic int
|
||||
// The last guy who increments the counter, wakes up master thread
|
||||
if (++num_workers_paused_ == num_workers_) {
|
||||
wait_for_workers_post_.Set();
|
||||
}
|
||||
// This will block the worker until master thread gives it a new work
|
||||
} else if (in_buffer->eoe()) {
|
||||
// Calling base class EoeReceived to forward eoe buffer.
|
||||
RETURN_IF_NOT_OK(EoeReceived(worker_id));
|
||||
} else if (in_buffer->eof()) {
|
||||
// Calling base class EofReceived to forward eof buffer.
|
||||
RETURN_IF_NOT_OK(EofReceived(worker_id));
|
||||
} else if (in_buffer->quit()) {
|
||||
break;
|
||||
}
|
||||
RETURN_IF_NOT_OK(FetchNextWork(worker_id, &in_buffer, &job_list));
|
||||
continue;
|
||||
} else if (in_buffer->eoe()) {
|
||||
// Calling base class EoeReceived to forward eoe buffer.
|
||||
RETURN_IF_NOT_OK(EoeReceived(worker_id));
|
||||
// Fetch next data buffer and map job list
|
||||
RETURN_IF_NOT_OK(FetchNextWork(worker_id, &in_buffer, &job_list));
|
||||
continue;
|
||||
} else if (in_buffer->eof()) {
|
||||
// Calling base class EofReceived to forward eof buffer.
|
||||
RETURN_IF_NOT_OK(EofReceived(worker_id));
|
||||
break;
|
||||
}
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(in_buffer->NumRows() * in_buffer->NumCols() != 0, "MapOp got an empty DataBuffer.");
|
||||
std::unique_ptr<TensorQTable> new_tensor_table(std::make_unique<TensorQTable>());
|
||||
// Perform the compute function of TensorOp(s) and store the result in new_tensor_table.
|
||||
|
@ -299,9 +306,9 @@ Status MapOp::WorkerCompute(DataBuffer *in_buffer, TensorQTable *new_tensor_tabl
|
|||
|
||||
// Variable to keep the result after executing the job.
|
||||
std::vector<TensorRow> result_table;
|
||||
// Executing the list of jobs
|
||||
// Executing the list of jobs.
|
||||
for (size_t i = 0; i < job_list.size(); i++) {
|
||||
// Execute MapJob.
|
||||
// Execute MapWorkerJob.
|
||||
RETURN_IF_NOT_OK(job_list[i]->Run(job_input_table, &result_table));
|
||||
// Assign the processed data as an input for the next job processing, except for the last TensorOp in the list.
|
||||
if (i + 1 < job_list.size()) {
|
||||
|
@ -311,8 +318,7 @@ Status MapOp::WorkerCompute(DataBuffer *in_buffer, TensorQTable *new_tensor_tabl
|
|||
|
||||
// Sanity check a row in result_table
|
||||
if (!result_table.empty() && out_columns_.size() != result_table[0].size()) {
|
||||
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
|
||||
"Result of a tensorOp doesn't match output column names");
|
||||
RETURN_STATUS_UNEXPECTED("Result of a tensorOp doesn't match output column names");
|
||||
}
|
||||
|
||||
// Merging the data processed by job (result_table) with the data that are not used.
|
||||
|
@ -386,7 +392,7 @@ Status MapOp::InitPrivateVariable(std::unordered_map<std::string, int32_t> *col_
|
|||
// columns from child are correct
|
||||
RETURN_IF_NOT_OK(this->ValidateInColumns(*col_name_id_map));
|
||||
|
||||
// initialize keep_input_columns, true means to keep the column.
|
||||
// Initialize keep_input_columns, true means to keep the column.
|
||||
keep_input_columns_.resize(col_name_id_map->size(), true);
|
||||
for (const auto &col_name : in_columns_) {
|
||||
int32_t missed = (*col_name_id_map)[col_name];
|
||||
|
@ -449,18 +455,18 @@ Status MapOp::Accept(NodePass *p, bool *modified) {
|
|||
return p->RunOnNode(shared_from_base<MapOp>(), modified);
|
||||
}
|
||||
|
||||
Status MapOp::PauseFromMaster() {
|
||||
Status MapOp::WaitForWorkers() {
|
||||
// reset num_paused workers to 0
|
||||
num_workers_paused_ = 0;
|
||||
for (int32_t wkr_id = 0; wkr_id < num_workers_; wkr_id++) {
|
||||
// a special buffer (id=-1, empty, none flag) is used to signal that worker needs to pause.
|
||||
RETURN_IF_NOT_OK(local_queues_[wkr_id]->Add(
|
||||
std::make_unique<MapWorkerJob>(std::make_unique<DataBuffer>(-1, DataBuffer::kDeBFlagNone))));
|
||||
std::make_unique<MapWorkerJob>(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagWait))));
|
||||
}
|
||||
// wait until all workers are done processing their work in local_queue_
|
||||
RETURN_IF_NOT_OK(master_pause_wp_.Wait());
|
||||
RETURN_IF_NOT_OK(wait_for_workers_post_.Wait());
|
||||
// clear the WaitPost for the next Wait()
|
||||
master_pause_wp_.Clear();
|
||||
wait_for_workers_post_.Clear();
|
||||
return Status::OK();
|
||||
}
|
||||
} // namespace dataset
|
||||
|
|
|
@ -228,10 +228,10 @@ class MapOp : public ParallelOp {
|
|||
// Indices of the columns to process.
|
||||
std::vector<size_t> to_process_indices_;
|
||||
|
||||
// wait post used to perform the pausing logic in MapOp
|
||||
WaitPost master_pause_wp_;
|
||||
// Wait post used to perform the pausing logic in MapOp
|
||||
WaitPost wait_for_workers_post_;
|
||||
|
||||
// count number of workers that have signaled master
|
||||
// Count number of workers that have signaled master
|
||||
std::atomic_int num_workers_paused_;
|
||||
|
||||
// Private function for worker/thread to loop continuously. It comprises the main
|
||||
|
@ -272,7 +272,7 @@ class MapOp : public ParallelOp {
|
|||
// Workers upon receiving the suspension token from master thread, increment an atomic count, the last worker
|
||||
// who does the increment wakes up the master.
|
||||
// @return - Status
|
||||
Status PauseFromMaster() override;
|
||||
Status WaitForWorkers() override;
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -75,6 +75,9 @@ Status DistributedSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer
|
|||
RETURN_STATUS_UNEXPECTED("Distributed Sampler Error");
|
||||
} else if (cnt_ == samples_per_buffer_ && (non_empty_ || !even_dist_)) {
|
||||
(*out_buffer) = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
|
||||
if (!samples_per_buffer_) {
|
||||
non_empty_ = false;
|
||||
}
|
||||
} else if (!samples_per_buffer_ && !non_empty_) {
|
||||
// If the buffer is empty, we add samples with subscript 0 in the current dataset.
|
||||
// This step is to make up for the solution that the code default buffer is not empty before.
|
||||
|
|
|
@ -84,32 +84,32 @@ std::shared_ptr<SchemaObj> Schema(const std::string &schema_file = "");
|
|||
// The type of the image tensor is uint8. The attr tensor is uint32 and one hot type.
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset.
|
||||
/// \param[in] dataset_type One of 'all', 'train', 'valid' or 'test'.
|
||||
/// \param[in] decode Decode the images after reading (default=False).
|
||||
/// \param[in] extensions List of file extensions to be included in the dataset (default=None).
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
|
||||
/// will be used to randomly iterate the entire dataset
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
|
||||
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
|
||||
/// \param[in] decode Decode the images after reading (default=false).
|
||||
/// \param[in] extensions Set of file extensions to be included in the dataset (default={}).
|
||||
/// \return Shared pointer to the current Dataset
|
||||
std::shared_ptr<CelebADataset> CelebA(const std::string &dataset_dir, const std::string &dataset_type = "all",
|
||||
const std::shared_ptr<SamplerObj> &sampler = nullptr, bool decode = false,
|
||||
const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), bool decode = false,
|
||||
const std::set<std::string> &extensions = {});
|
||||
|
||||
/// \brief Function to create a Cifar10 Dataset
|
||||
/// \notes The generated dataset has two columns ['image', 'label']
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
|
||||
/// will be used to randomly iterate the entire dataset
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
|
||||
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
|
||||
/// \return Shared pointer to the current Dataset
|
||||
std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir,
|
||||
const std::shared_ptr<SamplerObj> &sampler = nullptr);
|
||||
const std::shared_ptr<SamplerObj> &sampler = RandomSampler());
|
||||
|
||||
/// \brief Function to create a Cifar100 Dataset
|
||||
/// \notes The generated dataset has three columns ['image', 'coarse_label', 'fine_label']
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
|
||||
/// will be used to randomly iterate the entire dataset
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
|
||||
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
|
||||
/// \return Shared pointer to the current Dataset
|
||||
std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir,
|
||||
const std::shared_ptr<SamplerObj> &sampler = nullptr);
|
||||
const std::shared_ptr<SamplerObj> &sampler = RandomSampler());
|
||||
|
||||
/// \brief Function to create a CLUEDataset
|
||||
/// \notes The generated dataset has a variable number of columns depending on the task and usage
|
||||
|
@ -146,12 +146,12 @@ std::shared_ptr<CLUEDataset> CLUE(const std::vector<std::string> &dataset_files,
|
|||
/// \param[in] annotation_file Path to the annotation json
|
||||
/// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint'
|
||||
/// \param[in] decode Decode the images after reading
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
|
||||
/// will be used to randomly iterate the entire dataset
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
|
||||
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
|
||||
/// \return Shared pointer to the current Dataset
|
||||
std::shared_ptr<CocoDataset> Coco(const std::string &dataset_dir, const std::string &annotation_file,
|
||||
const std::string &task = "Detection", const bool &decode = false,
|
||||
const std::shared_ptr<SamplerObj> &sampler = nullptr);
|
||||
const std::shared_ptr<SamplerObj> &sampler = RandomSampler());
|
||||
|
||||
/// \brief Function to create a CSVDataset
|
||||
/// \notes The generated dataset has a variable number of columns
|
||||
|
@ -185,13 +185,13 @@ std::shared_ptr<CSVDataset> CSV(const std::vector<std::string> &dataset_files, c
|
|||
/// The generated dataset has two columns ['image', 'label']
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
/// \param[in] decode A flag to decode in ImageFolder
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
|
||||
/// A `RandomSampler` will be used to randomly iterate the entire dataset
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
|
||||
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
|
||||
/// \param[in] extensions File extensions to be read
|
||||
/// \param[in] class_indexing a class name to label map
|
||||
/// \return Shared pointer to the current ImageFolderDataset
|
||||
std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir, bool decode = false,
|
||||
const std::shared_ptr<SamplerObj> &sampler = nullptr,
|
||||
const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
|
||||
const std::set<std::string> &extensions = {},
|
||||
const std::map<std::string, int32_t> &class_indexing = {});
|
||||
|
||||
|
@ -199,25 +199,25 @@ std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir,
|
|||
/// \notes The generated dataset has two columns ['image', 'label']
|
||||
/// \param[in] dataset_file The dataset file to be read
|
||||
/// \param[in] usage Need "train", "eval" or "inference" data (default="train")
|
||||
/// \param[in] decode Decode the images after reading (default=false).
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
|
||||
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
|
||||
/// \param[in] class_indexing A str-to-int mapping from label name to index (default={}, the folder
|
||||
/// names will be sorted alphabetically and each class will be given a unique index starting from 0).
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
|
||||
/// A `RandomSampler` will be used to randomly iterate the entire dataset
|
||||
/// \param[in] decode Decode the images after reading (default=false).
|
||||
/// \return Shared pointer to the current ManifestDataset
|
||||
std::shared_ptr<ManifestDataset> Manifest(std::string dataset_file, std::string usage = "train",
|
||||
std::shared_ptr<SamplerObj> sampler = nullptr,
|
||||
std::shared_ptr<ManifestDataset> Manifest(const std::string &dataset_file, const std::string &usage = "train",
|
||||
const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
|
||||
const std::map<std::string, int32_t> &class_indexing = {},
|
||||
bool decode = false);
|
||||
|
||||
/// \brief Function to create a MnistDataset
|
||||
/// \notes The generated dataset has two columns ['image', 'label']
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
|
||||
/// A `RandomSampler` will be used to randomly iterate the entire dataset
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
|
||||
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
|
||||
/// \return Shared pointer to the current MnistDataset
|
||||
std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir,
|
||||
const std::shared_ptr<SamplerObj> &sampler = nullptr);
|
||||
const std::shared_ptr<SamplerObj> &sampler = RandomSampler());
|
||||
|
||||
/// \brief Function to create a ConcatDataset
|
||||
/// \notes Reload "+" operator to concat two datasets
|
||||
|
@ -230,15 +230,15 @@ std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &dataset
|
|||
/// \brief Function to create a RandomDataset
|
||||
/// \param[in] total_rows Number of rows for the dataset to generate (default=0, number of rows is random)
|
||||
/// \param[in] schema SchemaObj to set column type, data type and data shape
|
||||
/// \param[in] columns_list List of columns to be read (default=None, read all columns)
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
|
||||
/// will be used to randomly iterate the entire dataset
|
||||
/// \param[in] columns_list List of columns to be read (default={}, read all columns)
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
|
||||
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
|
||||
/// \return Shared pointer to the current Dataset
|
||||
template <typename T = std::shared_ptr<SchemaObj>>
|
||||
std::shared_ptr<RandomDataset> RandomData(const int32_t &total_rows = 0, T schema = nullptr,
|
||||
std::vector<std::string> columns_list = {},
|
||||
std::shared_ptr<SamplerObj> sampler = nullptr) {
|
||||
auto ds = std::make_shared<RandomDataset>(total_rows, schema, std::move(columns_list), std::move(sampler));
|
||||
const std::vector<std::string> &columns_list = {},
|
||||
const std::shared_ptr<SamplerObj> &sampler = RandomSampler()) {
|
||||
auto ds = std::make_shared<RandomDataset>(total_rows, schema, columns_list, std::move(sampler));
|
||||
return ds->ValidateParams() ? ds : nullptr;
|
||||
}
|
||||
|
||||
|
@ -257,7 +257,7 @@ std::shared_ptr<RandomDataset> RandomData(const int32_t &total_rows = 0, T schem
|
|||
/// \param[in] shard_id The shard ID within num_shards. This argument should be
|
||||
/// specified only when num_shards is also specified. (Default = 0)
|
||||
/// \return Shared pointer to the current TextFileDataset
|
||||
std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &dataset_files, int32_t num_samples = 0,
|
||||
std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &dataset_files, int64_t num_samples = 0,
|
||||
ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1,
|
||||
int32_t shard_id = 0);
|
||||
|
||||
|
@ -271,13 +271,13 @@ std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &datase
|
|||
/// \param[in] mode Set the data list txt file to be readed
|
||||
/// \param[in] class_indexing A str-to-int mapping from label name to index
|
||||
/// \param[in] decode Decode the images after reading
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
|
||||
/// will be used to randomly iterate the entire dataset
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
|
||||
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
|
||||
/// \return Shared pointer to the current Dataset
|
||||
std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::string &task = "Segmentation",
|
||||
const std::string &mode = "train",
|
||||
const std::map<std::string, int32_t> &class_indexing = {}, bool decode = false,
|
||||
const std::shared_ptr<SamplerObj> &sampler = nullptr);
|
||||
const std::shared_ptr<SamplerObj> &sampler = RandomSampler());
|
||||
|
||||
/// \brief Function to create a ZipDataset
|
||||
/// \notes Applies zip to the dataset
|
||||
|
@ -302,7 +302,7 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
|
|||
virtual std::vector<std::shared_ptr<DatasetOp>> Build() = 0;
|
||||
|
||||
/// \brief Pure virtual function for derived class to implement parameters validation
|
||||
/// \return bool True if all the params are valid
|
||||
/// \return bool true if all the parameters are valid
|
||||
virtual bool ValidateParams() = 0;
|
||||
|
||||
/// \brief Setter function for runtime number of workers
|
||||
|
@ -716,7 +716,7 @@ class ImageFolderDataset : public Dataset {
|
|||
class ManifestDataset : public Dataset {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
ManifestDataset(std::string dataset_file, std::string usage, std::shared_ptr<SamplerObj> sampler,
|
||||
ManifestDataset(const std::string &dataset_file, const std::string &usage, const std::shared_ptr<SamplerObj> &sampler,
|
||||
const std::map<std::string, int32_t> &class_indexing, bool decode);
|
||||
|
||||
/// \brief Destructor
|
||||
|
@ -767,8 +767,8 @@ class RandomDataset : public Dataset {
|
|||
static constexpr int32_t kMaxDimValue = 32;
|
||||
|
||||
/// \brief Constructor
|
||||
RandomDataset(const int32_t &total_rows, std::shared_ptr<SchemaObj> schema, std::vector<std::string> columns_list,
|
||||
std::shared_ptr<SamplerObj> sampler)
|
||||
RandomDataset(const int32_t &total_rows, std::shared_ptr<SchemaObj> schema,
|
||||
const std::vector<std::string> &columns_list, const std::shared_ptr<SamplerObj> &sampler)
|
||||
: total_rows_(total_rows),
|
||||
schema_path_(""),
|
||||
schema_(std::move(schema)),
|
||||
|
@ -776,8 +776,8 @@ class RandomDataset : public Dataset {
|
|||
sampler_(std::move(sampler)) {}
|
||||
|
||||
/// \brief Constructor
|
||||
RandomDataset(const int32_t &total_rows, std::string schema_path, std::vector<std::string> columns_list,
|
||||
std::shared_ptr<SamplerObj> sampler)
|
||||
RandomDataset(const int32_t &total_rows, std::string schema_path, const std::vector<std::string> &columns_list,
|
||||
const std::shared_ptr<SamplerObj> &sampler)
|
||||
: total_rows_(total_rows), schema_path_(schema_path), columns_list_(columns_list), sampler_(std::move(sampler)) {}
|
||||
|
||||
/// \brief Destructor
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_API_SAMPLERS_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_API_SAMPLERS_H_
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_SAMPLERS_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_SAMPLERS_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
@ -70,7 +70,7 @@ std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle = false, i
|
|||
|
||||
/// Function to create a Random Sampler.
|
||||
/// \notes Samples the elements randomly.
|
||||
/// \param[in] replacement - If True, put the sample ID back for the next draw.
|
||||
/// \param[in] replacement - If true, put the sample ID back for the next draw.
|
||||
/// \param[in] num_samples - The number of samples to draw (default to all elements).
|
||||
/// \return Shared pointer to the current Sampler.
|
||||
std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement = false, int64_t num_samples = 0);
|
||||
|
@ -94,7 +94,7 @@ std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(std::vector<int64_t>
|
|||
/// weights (probabilities).
|
||||
/// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1.
|
||||
/// \param[in] num_samples - The number of samples to draw (default to all elements).
|
||||
/// \param[in] replacement - If True, put the sample ID back for the next draw.
|
||||
/// \param[in] replacement - If true, put the sample ID back for the next draw.
|
||||
/// \return Shared pointer to the current Sampler.
|
||||
std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(std::vector<double> weights, int64_t num_samples = 0,
|
||||
bool replacement = true);
|
||||
|
@ -199,4 +199,4 @@ class WeightedRandomSamplerObj : public SamplerObj {
|
|||
} // namespace api
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_API_SAMPLERS_H_
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_SAMPLERS_H_
|
||||
|
|
|
@ -50,7 +50,7 @@ void CutMixBatchOp::GetCropBox(int height, int width, float lam, int *x, int *y,
|
|||
|
||||
Status CutMixBatchOp::Compute(const TensorRow &input, TensorRow *output) {
|
||||
if (input.size() < 2) {
|
||||
RETURN_STATUS_UNEXPECTED("Both images and labels columns are required for this operation");
|
||||
RETURN_STATUS_UNEXPECTED("Both images and labels columns are required for this operation.");
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<Tensor>> images;
|
||||
|
@ -59,10 +59,17 @@ Status CutMixBatchOp::Compute(const TensorRow &input, TensorRow *output) {
|
|||
|
||||
// Check inputs
|
||||
if (image_shape.size() != 4 || image_shape[0] != label_shape[0]) {
|
||||
RETURN_STATUS_UNEXPECTED("You must batch before calling CutMixBatch.");
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"CutMixBatch: You must make sure images are HWC or CHW and batched before calling CutMixBatch.");
|
||||
}
|
||||
if (label_shape.size() != 2) {
|
||||
RETURN_STATUS_UNEXPECTED("CutMixBatch: Label's must be in one-hot format and in a batch");
|
||||
if (!input.at(1)->type().IsInt()) {
|
||||
RETURN_STATUS_UNEXPECTED("CutMixBatch: Wrong labels type. The second column (labels) must only include int types.");
|
||||
}
|
||||
if (label_shape.size() != 2 && label_shape.size() != 3) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"CutMixBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC where N is the batch "
|
||||
"size, L is the number of labels in each row, "
|
||||
"and C is the number of classes. labels must be in one-hot format and in a batch.");
|
||||
}
|
||||
if ((image_shape[1] != 1 && image_shape[1] != 3) && image_batch_format_ == ImageBatchFormat::kNCHW) {
|
||||
RETURN_STATUS_UNEXPECTED("CutMixBatch: Image doesn't match the given image format.");
|
||||
|
@ -84,10 +91,12 @@ Status CutMixBatchOp::Compute(const TensorRow &input, TensorRow *output) {
|
|||
|
||||
// Tensor holding the output labels
|
||||
std::shared_ptr<Tensor> out_labels;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape(label_shape), DataType(DataType::DE_FLOAT32), &out_labels));
|
||||
RETURN_IF_NOT_OK(TypeCast(std::move(input.at(1)), &out_labels, DataType(DataType::DE_FLOAT32)));
|
||||
|
||||
int64_t row_labels = label_shape.size() == 3 ? label_shape[1] : 1;
|
||||
int64_t num_classes = label_shape.size() == 3 ? label_shape[2] : label_shape[1];
|
||||
// Compute labels and images
|
||||
for (int i = 0; i < image_shape[0]; i++) {
|
||||
for (int64_t i = 0; i < image_shape[0]; i++) {
|
||||
// Calculating lambda
|
||||
// If x1 is a random variable from Gamma(a1, 1) and x2 is a random variable from Gamma(a2, 1)
|
||||
// then x = x1 / (x1+x2) is a random variable from Beta(a1, a2)
|
||||
|
@ -138,15 +147,29 @@ Status CutMixBatchOp::Compute(const TensorRow &input, TensorRow *output) {
|
|||
}
|
||||
|
||||
// Compute labels
|
||||
for (int j = 0; j < label_shape[1]; j++) {
|
||||
uint64_t first_value, second_value;
|
||||
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, {i, j}));
|
||||
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, {rand_indx[i] % label_shape[0], j}));
|
||||
RETURN_IF_NOT_OK(out_labels->SetItemAt({i, j}, label_lam * first_value + (1 - label_lam) * second_value));
|
||||
|
||||
for (int64_t j = 0; j < row_labels; j++) {
|
||||
for (int64_t k = 0; k < num_classes; k++) {
|
||||
std::vector<int64_t> first_index = label_shape.size() == 3 ? std::vector{i, j, k} : std::vector{i, k};
|
||||
std::vector<int64_t> second_index =
|
||||
label_shape.size() == 3 ? std::vector{rand_indx[i], j, k} : std::vector{rand_indx[i], k};
|
||||
if (input.at(1)->type().IsSignedInt()) {
|
||||
int64_t first_value, second_value;
|
||||
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, first_index));
|
||||
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, second_index));
|
||||
RETURN_IF_NOT_OK(
|
||||
out_labels->SetItemAt(first_index, label_lam * first_value + (1 - label_lam) * second_value));
|
||||
} else {
|
||||
uint64_t first_value, second_value;
|
||||
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, first_index));
|
||||
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, second_index));
|
||||
RETURN_IF_NOT_OK(
|
||||
out_labels->SetItemAt(first_index, label_lam * first_value + (1 - label_lam) * second_value));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Tensor> out_images;
|
||||
RETURN_IF_NOT_OK(TensorVectorToBatchTensor(images, &out_images));
|
||||
|
||||
|
|
|
@ -415,9 +415,7 @@ Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Te
|
|||
for (int i = 0; i < crop_width; i++) {
|
||||
for (int j = 0; j < crop_height; j++) {
|
||||
for (int c = 0; c < number_of_channels; c++) {
|
||||
uint8_t pixel_value;
|
||||
RETURN_IF_NOT_OK(sub_mat->GetItemAt(&pixel_value, {j, i, c}));
|
||||
RETURN_IF_NOT_OK((*input)->SetItemAt({y + j, x + i, c}, pixel_value));
|
||||
RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {j, i, c}, {y + j, x + i, c}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -432,9 +430,7 @@ Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Te
|
|||
for (int i = 0; i < crop_width; i++) {
|
||||
for (int j = 0; j < crop_height; j++) {
|
||||
for (int c = 0; c < number_of_channels; c++) {
|
||||
uint8_t pixel_value;
|
||||
RETURN_IF_NOT_OK(sub_mat->GetItemAt(&pixel_value, {c, j, i}));
|
||||
RETURN_IF_NOT_OK((*input)->SetItemAt({c, y + j, x + i}, pixel_value));
|
||||
RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {c, j, i}, {c, y + j, x + i}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -447,9 +443,7 @@ Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Te
|
|||
}
|
||||
for (int i = 0; i < crop_width; i++) {
|
||||
for (int j = 0; j < crop_height; j++) {
|
||||
uint8_t pixel_value;
|
||||
RETURN_IF_NOT_OK(sub_mat->GetItemAt(&pixel_value, {j, i}));
|
||||
RETURN_IF_NOT_OK((*input)->SetItemAt({y + j, x + i}, pixel_value));
|
||||
RETURN_IF_NOT_OK(CopyTensorValue(sub_mat, input, {j, i}, {y + j, x + i}));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -458,6 +452,24 @@ Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Te
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status CopyTensorValue(const std::shared_ptr<Tensor> &source_tensor, std::shared_ptr<Tensor> *dest_tensor,
|
||||
const std::vector<int64_t> &source_indx, const std::vector<int64_t> &dest_indx) {
|
||||
if (source_tensor->type() != (*dest_tensor)->type())
|
||||
RETURN_STATUS_UNEXPECTED("CopyTensorValue: source and destination tensor must have the same type.");
|
||||
if (source_tensor->type() == DataType::DE_UINT8) {
|
||||
uint8_t pixel_value;
|
||||
RETURN_IF_NOT_OK(source_tensor->GetItemAt(&pixel_value, source_indx));
|
||||
RETURN_IF_NOT_OK((*dest_tensor)->SetItemAt(dest_indx, pixel_value));
|
||||
} else if (source_tensor->type() == DataType::DE_FLOAT32) {
|
||||
float pixel_value;
|
||||
RETURN_IF_NOT_OK(source_tensor->GetItemAt(&pixel_value, source_indx));
|
||||
RETURN_IF_NOT_OK((*dest_tensor)->SetItemAt(dest_indx, pixel_value));
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("CopyTensorValue: Tensor type is not supported. Tensor type must be float32 or uint8.");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) {
|
||||
try {
|
||||
std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
|
||||
|
|
|
@ -133,6 +133,17 @@ Status HwcToChw(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output);
|
|||
Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Tensor> *input, int x, int y, int width,
|
||||
int height, ImageFormat image_format);
|
||||
|
||||
/// \brief Copies a value from a source tensor into a destination tensor
|
||||
/// \note This is meant for images and therefore only works if tensor is uint8 or float32
|
||||
/// \param[in] source_tensor The tensor we take the value from
|
||||
/// \param[in] dest_tensor The pointer to the tensor we want to copy the value to
|
||||
/// \param[in] source_indx index of the value in the source tensor
|
||||
/// \param[in] dest_indx index of the value in the destination tensor
|
||||
/// \param[out] dest_tensor Copies the value to the given dest_tensor and returns it
|
||||
/// @return Status ok/error
|
||||
Status CopyTensorValue(const std::shared_ptr<Tensor> &source_tensor, std::shared_ptr<Tensor> *dest_tensor,
|
||||
const std::vector<int64_t> &source_indx, const std::vector<int64_t> &dest_indx);
|
||||
|
||||
/// \brief Swap the red and blue pixels (RGB <-> BGR)
|
||||
/// \param input: Tensor of shape <H,W,3> and any OpenCv compatible type, see CVTensor.
|
||||
/// \param output: Swapped image of same shape and type
|
||||
|
|
|
@ -38,13 +38,20 @@ Status MixUpBatchOp::Compute(const TensorRow &input, TensorRow *output) {
|
|||
|
||||
// Check inputs
|
||||
if (image_shape.size() != 4 || image_shape[0] != label_shape[0]) {
|
||||
RETURN_STATUS_UNEXPECTED("You must batch before calling MixUpBatch");
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"MixUpBatch:You must make sure images are HWC or CHW and batched before calling MixUpBatch.");
|
||||
}
|
||||
if (label_shape.size() != 2) {
|
||||
RETURN_STATUS_UNEXPECTED("MixUpBatch: Label's must be in one-hot format and in a batch");
|
||||
if (!input.at(1)->type().IsInt()) {
|
||||
RETURN_STATUS_UNEXPECTED("MixUpBatch: Wrong labels type. The second column (labels) must only include int types.");
|
||||
}
|
||||
if (label_shape.size() != 2 && label_shape.size() != 3) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"MixUpBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC where N is the batch "
|
||||
"size, L is the number of labels in each row, "
|
||||
"and C is the number of classes. labels must be in one-hot format and in a batch.");
|
||||
}
|
||||
if ((image_shape[1] != 1 && image_shape[1] != 3) && (image_shape[3] != 1 && image_shape[3] != 3)) {
|
||||
RETURN_STATUS_UNEXPECTED("MixUpBatch: Images must be in the shape of HWC or CHW");
|
||||
RETURN_STATUS_UNEXPECTED("MixUpBatch: Images must be in the shape of HWC or CHW.");
|
||||
}
|
||||
|
||||
// Move images into a vector of CVTensors
|
||||
|
@ -65,16 +72,31 @@ Status MixUpBatchOp::Compute(const TensorRow &input, TensorRow *output) {
|
|||
|
||||
// Compute labels
|
||||
std::shared_ptr<Tensor> out_labels;
|
||||
RETURN_IF_NOT_OK(TypeCast(std::move(input.at(1)), &out_labels, DataType("float32")));
|
||||
RETURN_IF_NOT_OK(TypeCast(std::move(input.at(1)), &out_labels, DataType(DataType::DE_FLOAT32)));
|
||||
|
||||
int64_t row_labels = label_shape.size() == 3 ? label_shape[1] : 1;
|
||||
int64_t num_classes = label_shape.size() == 3 ? label_shape[2] : label_shape[1];
|
||||
|
||||
for (int64_t i = 0; i < label_shape[0]; i++) {
|
||||
for (int64_t j = 0; j < label_shape[1]; j++) {
|
||||
uint64_t first_value, second_value;
|
||||
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, {i, j}));
|
||||
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, {rand_indx[i], j}));
|
||||
RETURN_IF_NOT_OK(out_labels->SetItemAt({i, j}, lam * first_value + (1 - lam) * second_value));
|
||||
for (int64_t j = 0; j < row_labels; j++) {
|
||||
for (int64_t k = 0; k < num_classes; k++) {
|
||||
std::vector<int64_t> first_index = label_shape.size() == 3 ? std::vector{i, j, k} : std::vector{i, k};
|
||||
std::vector<int64_t> second_index =
|
||||
label_shape.size() == 3 ? std::vector{rand_indx[i], j, k} : std::vector{rand_indx[i], k};
|
||||
if (input.at(1)->type().IsSignedInt()) {
|
||||
int64_t first_value, second_value;
|
||||
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, first_index));
|
||||
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, second_index));
|
||||
RETURN_IF_NOT_OK(out_labels->SetItemAt(first_index, lam * first_value + (1 - lam) * second_value));
|
||||
} else {
|
||||
uint64_t first_value, second_value;
|
||||
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&first_value, first_index));
|
||||
RETURN_IF_NOT_OK(input.at(1)->GetItemAt(&second_value, second_index));
|
||||
RETURN_IF_NOT_OK(out_labels->SetItemAt(first_index, lam * first_value + (1 - lam) * second_value));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute images
|
||||
for (int64_t i = 0; i < images.size(); i++) {
|
||||
TensorShape remaining({-1});
|
||||
|
|
|
@ -40,6 +40,8 @@ Status PosterizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt
|
|||
}
|
||||
cv::Mat in_image = input_cv->mat();
|
||||
cv::Mat output_img;
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(in_image.depth() == CV_8U || in_image.depth() == CV_8S,
|
||||
"Input image data type can not be float, but got " + input->type().ToString());
|
||||
cv::LUT(in_image, lut_vector, output_img);
|
||||
std::shared_ptr<CVTensor> result_tensor;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, &result_tensor));
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue