From 56ce6c36fa85515e4c05c253b617e9920fd09e4b Mon Sep 17 00:00:00 2001 From: jonyguo Date: Wed, 7 Sep 2022 17:12:17 +0800 Subject: [PATCH] reconstruct map parameters --- .../api_python/dataset/map_parameter_cn.png | Bin 0 -> 2099 bytes .../api_python/dataset/map_parameter_en.png | Bin 0 -> 2379 bytes .../dataset/map_parameter_pyfunc_cn.png | Bin 0 -> 2926 bytes .../dataset/map_parameter_pyfunc_en.png | Bin 0 -> 3246 bytes .../dataset/mindspore.dataset.Dataset.e.rst | 25 ++++++++--- .../mindspore/dataset/engine/datasets.py | 40 +++++++++++++----- .../mindspore/dataset/engine/validators.py | 16 ++++++- 7 files changed, 62 insertions(+), 19 deletions(-) create mode 100644 docs/api/api_python/dataset/map_parameter_cn.png create mode 100644 docs/api/api_python/dataset/map_parameter_en.png create mode 100644 docs/api/api_python/dataset/map_parameter_pyfunc_cn.png create mode 100644 docs/api/api_python/dataset/map_parameter_pyfunc_en.png diff --git a/docs/api/api_python/dataset/map_parameter_cn.png b/docs/api/api_python/dataset/map_parameter_cn.png new file mode 100644 index 0000000000000000000000000000000000000000..2112397608b955c7d7e7b04e887225f99765d778 GIT binary patch literal 2099 zcma)8`#Teg8=sWh$t4}3T+S(xI@v+AP$LP|v_d;_YuJ%v!?HC~$E8GNkxR8O(q*~M zZ5wAX%e9E*vc#BWp&1)ngc+y)fbZpb-p~8I&-3}^eV+I8`8@9(sF%B@#!d|Y0HEpV zapF7xpw#z$KCPzmy#qa)eh}(09)7U^fL7}dD0N1cgaH7mlb$DBeBuh`#=x<=It`N+ z(!9Ey3v-M2F2YZ0rbDwtCua2KmCeqP-aH=6oiQU{^Irix8TD>*OSc+)EJmENG*&lF zvb>6`2JwQESW0|Z{y#@1s&VzNBs|_2&t;Qmw%Pwx4Ev=d87LSl${nK9lTANX*t~6s z%5y42g>rGb3;-6a>X!=K(z$WN%fcjR%}u6ltx84t`oY~Q9ljS`z!FDgTZb)CO9g2+ zL5U;xqx7U@OAGz+)|46FG16s5CG+;ckxv(jKw#DNozs?(grAs!3xOk17Z0Ow+Ht|z z37egi5>&6>E=qTOj>SX96_%%Eg~CsF;dF5c+3U}&X__SvvY5wYi|}urHzKf{&q=HC z2jo^mxi3Q+WvF|nx<29>rA~u1g_ZU16h$X(t_J5jS=mpPRLuPJYKerB_8~}NZJ6tK zpP8u+e?zTKH99r#pFKdkgQ*!@Y^?pld@40_00gxWknO_Ld`Gf&Ln5I0-nc|L*!qZ{ zQ>F;1giCfMYZIwr2;VpcxK5&D4ZhLNesN}QiE3lleO}awGipxU2G$NK)Cx7JPYVBS z9q;TLlwB5&>i2=U?cr~m_Z%H^P&1C@RAG_Gs9m8sfysoLIxfh^0pkM-rz_i+qv_W(p+I>` zRw`xsf&XvC5MR;>Q$T%kGoqVGke#hVWU9Mt2ATVe&LI`lUc=(oNB1EmflDho2A$30 zZ7Y>K5SR0Z*y^t3fDM&|x1afjZ7658Rm(}~0j32yZ}Jqy3J49Xf<)j~G-#(-J>Zu`O5H2l7 zZ|&jxYOqe~(<|Y_o-0ko7NJ9irEZJ{36klg_4V%}+ZP%JxZNbZX?G>>k>-p`) zm1E{qnr+LLcL$GOa|mTJSJH`MS;k6lJ?90ar((t|{$xpklYx*B#8Hz;*ujvglR%(&Bp9$~H9(sDnC{}-&z=>~SMEZ@Cy$Wf%>k#A3BRM@60)w(wP;kajT zJPDc@ap+PZ6TfvWYyR@?CY?dM@zW9Bz7~DBl(f2U#I4&$GrbcY#I}5fl)9ryICQ+n zKIFxXlx_jfKv!{K91-#Uko*h} z<7oL~%YUqkdwbOP&z{lRp+%s@36J?hgzQ|)%Rlc2G-xuBgb#6}UdWE6I zYa@gaz|na@Hcacu27~3!4SR$wAd0zwaq=Nn*H|}I!y9Va#zw&0I#W(UshZb&!e+ts z%GVS39vMS+!0SHa4E$62ZqnJU1i_3ArHtwxH&8MASIMk@%g44GVTz`1|jKdeis)gtsn|eu>Zfxf%0raxvJnO!e6^_?)w)2@O1M! JK{@Y28^^a4Q7EM3$6n`BY1kp8o$b0K98ui-h@p+7W@s$LP~Yo^j@4nlTCDH2 zxU_6T8*8l{=4WvcRvU@2{Mc;MW`2L&y}o~Z|GM|<_5Aqnd7jtn^E|KT_URyBLxcSW z006+y-_JV)007cA^W)ofH@zNv$Y(R?r}{o5__oqhdm}p_M;zLXnZ%RgOO%qVG?o}PTcM&YP<=lJ6MvSd!DT)q%+ z@KGO7b9fo4Si&=mUr}HR6YW;ZmqtyVzrLv@$IeGG`qmy|KPg*eQ)TXUcf)Q*%=LfJ znrkgQCeBb{FxYS)d_7U%^wg`Xo)VI|e@^Oyde!CbcpS7YWa6lP56b9*762PSAPT1o&lp12$$|SG6zV z@&<3`k@q?>@?hwO9VaSlOpy`uUA~{U4RVyqr&bMaEjTGA>fG{0iNA-A287&G# z<}Ivf%_-k4JO}g8M;Q-(Pm4%hv``KSmzo+-W_8c;3RLdygeCn?5x)1@Lp@rwi&=Q^J#ZA zv>Hy(=0@M+nrDo$)Sl`|*Kkc5))s_dnU+H6y|ahr@87on!1^{Rt9Q0$N;aTcer;Qv ze>H+&)0|7&I_X-dCQAb?vRi8sfkfk}}bbi^1k^)3^fSh+PZq94{KmxK$u zvISp0C;d`nz*RWAH)Ba$jvet+QGRS(mmj%G-;jB)#RRWhQhBL-`saCpdk9*^Atq*sZ^Q;)=IFx9fR#0g=%O^6Qyr7^$hv6;QqYYokmi*X! z$dB(l#K9G~>P8xQ*c0!eCSLIh4;rm_>4qs@(4)2V1O|=LO!4_%ELi%iW9VGhQ_RTs zgm?*P+W5kH5#t*?;@BMr@*#Z2@8G{{UmRA4?xjfG3Kv5gG)X=yuZ0>qvxa8lp0KuG z>!yIPb4a*pfY#u}?i<0c6K0}Bo+TLz9R7axbT_%qYP4ceoS2ItWZy#Gn!hGP(RZ#u zc^{?2x7FFyV4~TRsQW;?OW0BfpVp5m#|)bf;94Vh#XPPQRB#Wz8Kg=<$W-0$nN!OKFg z9?_+)Z0Vq2-Zcz+1hgIznKHens6q8yJ6lu7611tTW^1~?Br##XU7%Q$Xh`Q=e0};J z(~d2B9O`+;?!Clq3|duZ1lZ$8e#;p#?8fO$6H9%(skroonJS4{sCGb$ZWcexaf3kg z;3;(%fBtM`viqN?7!HZZRE+%r(`?KKdU|=Cs=HFja(z~#Zmb#_$UaLS%Z{z8s)G58 zPI~_nROj3=lO|nF0Z=ho)=CLrW0AUikwNj>D=QXpvPka{bjBIXi?1rckcx(^4^ar4 z!nqCBWMWMByrUW3?`~!OUcUaHQPG3vLmIu3Yww^$+;|ay#WX@emG4*DvLnr{DH0_% zlrZ&p72m11XMUc@_icQ2U1lp_xEtjj48u#w|lV^L%F18Fllkb>ZU z|DqkZEUBTjyyg9UP*ZEcue*>v0f^F%=1-TW)Y}yM_np&U)3t1N0N$(v&1}Zkmqy)4qa)#RlNq77FmA4rL47 zWJOW=5$(VtBAY=DpymW9HjnUd_f))IrZ+Rmsp2#t3R6W62Q{IVeeQbkIY#B>5cuSkf~n@LBF64^6Nb_1-eUoqd)pHJ6WkW$Y5^1!v%{2Wv(-X; ztEY1Qt5)E5{T4&O+xt6$Y`#7p1fCTT_kmT?2Y#>{9( zoSyI0p3P}#-SgZV!gA8O;e+_`R>+aHdL$cjr)$w=FLvckT;x5EY*Yud{iJIYwt@nc z8wgmH?GZsSsSAjikLL#bJ}uf^4dfjXC%DK4&@KqGAEz!2lr@~3r~);wsCiRz3H8Un z+FfgDPhs-c^#f+;3`wj`o(|!9(XQ2cA?Q+K+)9jDNtyr5df4OdFYC+cnK#sbH~FHW0i}g_iO!V!rm@ z)NAwy&^+Qvha^LOuyJ!+pW@5WQO_fBgXu=7|0&vUBu{E*Ya7z+;z763e11V;qdMl$ wvWqIn_6+?D&IfqEb zxvJAb5SYA3W4wJCv2i?32dJKt@(MT71ql}3gz0335S($?S;{lUWIM{~(HpJu1z<$S zJ~w*f7!C4*-sUZ6vgRz)me2VwtdVy=sfgwqz$2c6SCU$TDf^mzr=-jXTav$1bl6iy z6O=lLEy_f=~s$D7CfJ#{YyS@#ucMbc*hvifh1juDFO8>Ire@?pI=N`D~HY^ zQ8m48$dTsq98NZD>^Cx3vO_~aQMKVBZi+={AMjj1tDDyHw89uOeYetrQH8y;xL;1K zaI8|v6H!Q8&56qEJW~`Qt`LR7w zk(SlL?Yl;C%Jy5KNnJyFeBWDNS*!~tx+5H3J#(6~-m!e)5Al%(+FUG)UJ2}*I{8XTTrnd#wOY1BuK$5c#FYwr4~Y1e3?<_uDX>e63*bR`H&1u+ym>cu6A&up_Rp2Y)`KQ9 z?EqRW4_YX5maF#I)UK2$C{gYw(Pc$>fL@53y*k9NDcL1`J&Sc= z0kh(Ik?g5LJuJ$Fl8r%v5Bm+wg~YHJ_9Ud>^jVE)mj|y4nTPtaFqu$%4jV#*@XQU}K z<7*y0c*u-%{;LZd&e6WNKS z1a&mHRf$w?rU(co{f61R=T(~iu?Z}jSPMl8-uP;8zP4UFedZiK{ZTCzs870a7_ig_nrqRFRvsWPL$}t&}ib&JQqBTIbleZ<%den>?t|i zXqZX#Q*cq@80_;_fJmF)GP$F*K5tM@EivphD$PP1^dYK6veqIyDI{rQP zJv0I#m@DS=Z|KX;pm^j)?}07Bao|rfyQAGb&IQ?_VEjnvuv8>c{%C7MO62f#8ale# z5ua-A#A2$_*H_wKZQAD}6*24l8PK8`QB8{rc`2@C_To@n0L*79$f(p$ywTqLk&t@S*Z zhkKWw=gb8mvY=hIA$>_~RHZ%8(YS@mH6M$ym zF$XDUI};uDX5B^IBUoAsmz-YpY8X4SCol)D%3uw{P0Ji4Rsz@SD5+eylfz1PAydpJ zN0b^k<$=7|gJqnRqJAL?T30-B9DiCV zFC7$?{EskfO8@@xKbwDyZ^wTeQf$hnIu87!2;g+vx@PKeiNNDlI^C3iF^6EzK(USD zweKfZ>rcO;1Jx$2}3dPiJirD2prtfV3q>t&w)~yS`sKYu2Byjn~up zw0HZ5F9bTJ>GGZKy3b33miuLC${Co;ynuQBF}*FTG?K0Nu<*yrW!EnxJu+}W`l6Iv zJN!COHh!?Ylm2n*cZRLVt6YcU(?stgkYyQ%--Op$2wX=jq|@D}bACLf^(^t5{*(ay z2;;{ly_on?bxqdLHhQ7u@UB*=&0mdVLQDb)j?@kFNsB3o#hy={o2Wj9%L-Jg_3a4& zG+$P%^ZittWSf9Y$n5y-CH4H>&c1wzaiED>17L#1%SnxGO>)<`^igEGEne%|F>AE| z-v)GJLgn(v7`#&nl&P+8SM&cphppn!`*&)qmV7V%9}0IYahrwCXkF6AK!4Pw&Fe5o zVW0cvM#-1qa2`Xe4{lUWq(^cf4CJvZmuk`y^BSu)X0;|G)0_j*eqBzbStZ|YZkylo zxkUcdwK|xOZ)uJ}d@U_&H_@9v_`rj3l^a+1-}SwMbO$_eEWKlonM_#Q+8zK~B2KV? zg;VU-m}qWq)tvHp7AN695Z){W4WVzmKgV}m0JObbkX!P*=mL>UYcfZ&uEE_`vWYmj zn>Xy6t#meKW1S+D&)UL&?>|{!19c4S`;Z(J1zs|^TXoGUIT%Z3u1*R5{48c_M{;}3 qFywlyBbv*2{a60qry_Ijnn}L6JOnrjkNomL1HeD~948$O{PkZp{|c)B literal 0 HcmV?d00001 diff --git a/docs/api/api_python/dataset/map_parameter_pyfunc_en.png b/docs/api/api_python/dataset/map_parameter_pyfunc_en.png new file mode 100644 index 0000000000000000000000000000000000000000..dcf7595f9d05b1958267562cd280aa63ddd95bd1 GIT binary patch literal 3246 zcmbuCdo&aLAIDv~P;RA^B=saJBF$WyOQq6yTz(X}%%qyoHW4;vC{H4`V#swV*D&`H z8p)+KqcFE&p0w?kncHHG;iup4JkRs{@AsVZJLmJgobUOZ&*!|(=llJ>zu$OAdu#a} z2X;tENXUb)Ty&C>K*E>3l-q5ZnX=k@rgNl(mu!AZvEmToILb3zgw=jD-VCT?p^+^nf#tBjs>ln z@R`X@dG9;V;B|?wRNae)nazx81d`SNHY%LeD|fSec4B;XuD)q*eaBNxaPt3Y`CCV% zC|iwGAkP$NUM&ZF^W=H$nDR73Xx$r+t2pach<8NKDkS!rx^+Q#g@2aQrbFqilBu=s zmpY%0D2|#>9-9oj0X&%%vX<7UoO8sgV*~@HebdF!udPzb?x~Dc+N7PGpoPC^{53S@ z?IqU>8@?7UPegtBGk{Fs>hO*e>ct32hv*`DiwAwqx^-Xc5^Tg7r;U+D4T#rwP1Q7? zYUAfuq_B|M%*V_T2@7d2&Fo`{dy9!xrHFfP9g^X*S|PT=bnI;HD1ukV$lKFIHp1={ zC!{PCmpCN$I^$Svyab4A#9~nMDU|t0j~98o*-p@7Jr-7iA7~%J(8I*F{IFtj>UX*y zTyV9_mYzLW6wp)lj8%w-W0hx{ng@&-xCgaZ3$Z1Wt*QhTc*P>2@8f1pU^+x1q~LD+k)x zxsI@9=6+&9{RIH(K}Y4eW_2{h0RWXh>7oh0Q|2gVquUh{W~y2VvA0h~-|UZUZ%y5H zrv!oCekC0qhzYVuadisjq$<89ohA6V`Uf+&FplASHLG}|Q0BYyiI6c^Cf7Pq(C`>D zKn5T^DBT!3Qhfj4oP@DGOkOX#mQ#@flDNHJTUlI`u=YT&d?2h!X@}mzP{?wxq)V{rfjpv z2mn~4lzPo?=KVrSk@>W}zZTVj{}a*EUFbyeE#xIkZ=IBB=qJn>vUgI?4$1Ta4oVrv zq{rAzioOOLXDz~E2z7g3C2YJHaB!5CKQB3%<)obT$t2shAQJt_p(I+d{_|BQM6aB4u*Q_=A`HHB5%YLaY-Ow-5{IyB25UH}lAG z`g95>MCQqdE?uS{1qQ%ssR9H_f^60qVn?=1CLq8}^LzYIG;ud6u=zrRx>&g_{iC6P zb8K?1H9~p7c)Z7q>$#sR*T$c1sL-(kHQFC&T$!k40cmoEu#l8+&R-lyLb#(-E`d zx*)q5*bNua^V=Z$P*4)lEgn6QkIl^twJ*&9*b+Aq=6^dQMG5%WB27W2IrOI^lt|*{ zwgnygl!36^NG;CHci8oxYy;_5&$WY><@Rq;w` zO0W#P6R0S@M&v&BBg2~cl58fVJC}e5le97bw#%|rZ)iVGcJH0c%D3>uUrmw~DepY`lG-GeWIlYcrLq%+KSLzRORqVje1Fy;_rR{?Us3 zR$hG{bWd$a)iOUzPsdz>RgZ5r>#bk6RV0UXAdE$8+Oai+@4-OD_^wF%6&1AC9Hp`L zE$UhNzEpfe1*zUOVCpV~V~K?^$jMH*wt@l8V0S-U@n*Re_vtUDoA!nHb2(BKL+at&!(a$wsMP23)v(&2E`RjVoK^go9CbxOKZV zsK++|m}GM^3O}WPu;aSjVshw86GcqmL{m^3X9BF5jRl_iNuyM#wrUf@hcfsO?PU%$ z_4vWaJywZ`1Vzt2 zm7sZE$F=f=$UA2Yyg;$(t}0c?V4#LjM!zQ#{P^BbO)BjJ<(K~Lg!d58|Rsge^bi)z-p~WCr?!FyesLD8MMNB1siwU z)Nplg4K;cwS%#lsJ>DOwMq?1TdLM&Pnahf)>6v`u5=NdbuGl9i-@V$E3|74S0)&d# z6nDdx*LDSD^Cx5VRh|}T4@t7@!wAsoc*@id)`vh{YQpGxyTopw)#oj^pyVFL}k(;gtJ9k zZxm%GCg*NbZ40VUKnSc0V(h1}YrrNM4gJ+KMuAmszFRPoo?O(JQFL$gK2>5yJzIC; zvQ6*Dsc@nu^H-#0fvLODjGiEZ>T^7fE#p;HSI#Y&PwGrQeVXVX>${&Don?n6=r)b$ zuBy#5Pb>@1%uCKo1zsIyZTtN$$G>If(FY2m=w^UtTO zzp<1aNND%!A&l7cTO)MX2z=7gdEj>$lj30(8}^n-t$u%E*;uDD$lUX2`}9+#`t*pw zmUAC$yWi&&V)9OC81%fCoRVF98rW7v#Pl3I^=As3B2!0m&1~6hX)o#Zon{{f!HTVi z0OHj8qn*Ad3)bKY&8$3C8sq*sk?EK3la|8h(`Yk~VJ!9$c7UL-#g02gd^lDA81@-@ zYkeq5WWI2zIM@eZ*DZ#1Z^5|1+q$0TuIBQ@>ExpQSM-;ZC<<8 zBousi?%G;aW%&~wmUrMX_KIASoM3t=?z{7rnG(C;HZ_wt40UZITOlA zn)a#4<0g7czck1Vu_yaIN06R!8;Javx*Qf4IC!tXO@%mq-K8@KcyL!xZ5P$jdeOSA zJm)*QARv(_7{7thfwliwwmaF&Vi%5u3?A$N%rs1Nl_7C3>R(%H?F^$S_Juh0>C<|d z_=p{1b@54lK+178|%Y zWD%K%9z|V5l}#BO(O->Cc_2M$i*rB(o*Asq41zWzjhTsf#6hK%_Kv+RLQS(uEczbB*ETga|Jz%O^&(Nptjc^0j$*QPsYS{+v_2#qO zFcqwD)jF2;xhYbI^c-@bi6e?iJe^j`(`6&n7lDR`_, + `nlp类 `_, + `audio类 `_),请使用如下参数: + + .. image:: map_parameter_cn.png + + - 如果使用的是自定义PyFunc数据增强,请使用如下参数: + + .. image:: map_parameter_pyfunc_cn.png + 参数: - **operations** (Union[list[TensorOperation], list[functions]]) - 一组数据增强操作,支持数据集增强算子或者用户自定义的Python Callable对象。map操作将按顺序将一组数据增强作用在数据集对象上。 - **input_columns** (Union[str, list[str]], 可选) - 第一个数据增强操作的输入数据列。此列表的长度必须与 `operations` 列表中第一个数据增强的预期输入列数相匹配。默认值:None。表示所有数据列都将传递给第一个数据增强操作。 - **output_columns** (Union[str, list[str]], 可选) - 最后一个数据增强操作的输出数据列。如果 `input_columns` 长度不等于 `output_columns` 长度,则必须指定此参数。列表的长度必须必须与最后一个数据增强的输出列数相匹配。默认值:None,输出列将与输入列具有相同的名称。 - **column_order** (Union[str, list[str]], 可选) - 指定传递到下一个数据集操作的数据列的顺序。如果 `input_columns` 长度不等于 `output_columns` 长度,则必须指定此参数。注意:参数的列名不限定在 `input_columns` 和 `output_columns` 中指定的列,也可以是上一个操作输出的未被处理的数据列。默认值:None,按照原输入顺序排列。 - **num_parallel_workers** (int, 可选) - 指定map操作的多进程/多线程并发数,加快处理速度。默认值:None,将使用 `set_num_parallel_workers` 设置的并发数。 - - **python_multiprocessing** (bool, 可选) - 启用Python多进程模式加速map操作。当传入的 `operations` 计算量很大时,开启此选项可能会有较好效果。默认值:False。 - - **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 `_ 。默认值:None,不使用缓存。 - - **callbacks** (DSCallback, list[DSCallback], 可选) - 要调用的Dataset回调函数列表。默认值:None。 - - **max_rowsize** (int, 可选) - 指定在多进程之间复制数据时,共享内存分配的最大空间,仅当 `python_multiprocessing` 为True时,该选项有效。默认值:16,单位为MB。 - - **offload** (bool, 可选) - 是否进行异构硬件加速,详情请阅读 `数据准备异构加速 `_ 。默认值:None。 + - **\*\*kwargs** - 其他参数。 + + - python_multiprocessing (bool, 可选) - 启用Python多进程模式加速map操作。当传入的 `operations` 计算量很大时,开启此选项可能会有较好效果。默认值:False。 + - max_rowsize (int, 可选) - 指定在多进程之间复制数据时,共享内存分配的最大空间,仅当 `python_multiprocessing` 为True时,该选项有效。默认值:16,单位为MB。 + - cache (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 `_ 。默认值:None,不使用缓存。 + - callbacks (DSCallback, list[DSCallback], 可选) - 要调用的Dataset回调函数列表。默认值:None。 + - offload (bool, 可选) - 是否进行异构硬件加速,详情请阅读 `数据准备异构加速 `_ 。默认值:None。 .. note:: - `operations` 参数接收 `TensorOperation` 类型的数据处理操作,以及用户定义的Python函数(PyFuncs)。 diff --git a/mindspore/python/mindspore/dataset/engine/datasets.py b/mindspore/python/mindspore/dataset/engine/datasets.py index 60372faec96..348cc0067ce 100644 --- a/mindspore/python/mindspore/dataset/engine/datasets.py +++ b/mindspore/python/mindspore/dataset/engine/datasets.py @@ -767,8 +767,7 @@ class Dataset: @check_map def map(self, operations, input_columns=None, output_columns=None, column_order=None, - num_parallel_workers=None, python_multiprocessing=False, cache=None, callbacks=None, - max_rowsize=16, offload=None): + num_parallel_workers=None, **kwargs): """ Apply each operation in operations to this dataset. @@ -780,6 +779,18 @@ class Dataset: The columns outputted by the very last operation will be assigned names specified by `output_columns`, and if not specified, the column name of output column is same as that of `input_columns`. + - If you use transformations ( + `vision transform `_, + `nlp transform `_, + `audio transform `_) + provided by mindspore dataset, please use the following parameters: + + .. image:: map_parameter_en.png + + - If you use user-defined transform as PyFunc (Python Func), please use the following parameters: + + .. image:: map_parameter_pyfunc_en.png + Args: operations (Union[list[TensorOperation], list[functions]]): List of operations to be applied on the dataset. Operations are applied in the order they appear in this list. @@ -798,14 +809,21 @@ class Dataset: Caution: the list here is not just the columns specified in parameter input_columns and output_columns. num_parallel_workers (int, optional): Number of threads used to process the dataset in parallel (default=None, the value from the configuration will be used). - python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes. This - option could be beneficial if the Python operation is computational heavy (default=False). - cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. - (default=None, which means no cache is used). - callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called (Default=None). - max_rowsize (int, optional): Maximum size of row in MB that is used for shared memory allocation to copy - data between processes. This is only used if python_multiprocessing is set to True (Default=16). - offload (bool, optional): Flag to indicate whether offload is used (Default=None). + **kwargs: + + - python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker processes. + This option could be beneficial if the Python operation is computational heavy (default=False). + + - max_rowsize (int, optional): Maximum size of row in MB that is used for shared memory allocation to + copy data between processes. This is only used if python_multiprocessing is set to True (Default=16). + + - cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. + (default=None, which means no cache is used). + + - callbacks (DSCallback, list[DSCallback], optional): List of Dataset callbacks to be called + (Default=None). + + - offload (bool, optional): Flag to indicate whether offload is used (Default=None). Note: - Input `operations` accepts TensorOperations defined in mindspore.dataset part, plus user-defined @@ -914,7 +932,7 @@ class Dataset: "Please use '.project' operation instead.") return MapDataset(self, operations, input_columns, output_columns, column_order, num_parallel_workers, - python_multiprocessing, cache, callbacks, max_rowsize, offload) + **kwargs) @check_filter def filter(self, predicate, input_columns=None, num_parallel_workers=None): diff --git a/mindspore/python/mindspore/dataset/engine/validators.py b/mindspore/python/mindspore/dataset/engine/validators.py index d6396d0650d..06e8d92282a 100644 --- a/mindspore/python/mindspore/dataset/engine/validators.py +++ b/mindspore/python/mindspore/dataset/engine/validators.py @@ -1316,16 +1316,28 @@ def check_shuffle(method): return new_method +def get_map_kwargs_from_dict(param_dict): + """get map operation kwargs parameters.""" + if param_dict is not None: + python_multiprocessing = param_dict.get("python_multiprocessing", False) + max_rowsize = param_dict.get("max_rowsize", 16) + cache = param_dict.get("cache", None) + callbacks = param_dict.get("callbacks", None) + offload = param_dict.get("offload", None) + return python_multiprocessing, max_rowsize, cache, callbacks, offload + + def check_map(method): """check the input arguments of map.""" @wraps(method) def new_method(self, *args, **kwargs): from mindspore.dataset.callback import DSCallback - [operations, input_columns, output_columns, column_order, num_parallel_workers, python_multiprocessing, cache, - callbacks, max_rowsize, offload], _ = \ + [operations, input_columns, output_columns, column_order, num_parallel_workers, param_dict], _ = \ parse_user_args(method, *args, **kwargs) + (python_multiprocessing, max_rowsize, cache, callbacks, offload) = get_map_kwargs_from_dict(param_dict) + # check whether network computing operator exist in input operations(python function) # check used variable and function document whether contain computing operator from types import FunctionType