diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc index aac9166c64..9abef8fa70 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc @@ -35,7 +35,7 @@ tensor::TensorPtr CreateTensor(const AnfNodePtr &node) { // 1 create tensor auto shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0); auto last_dim = shape[shape.size() - 1]; - std::vector indices_shape = {SizeToInt(last_dim)}; + std::vector indices_shape = {SizeToInt(last_dim * 2)}; TensorTypePtr tensor_type = std::make_shared(kFloat16); MS_EXCEPTION_IF_NULL(tensor_type); tensor::DeviceInfo device_info{kOpFormat_DEFAULT, tensor_type}; @@ -50,7 +50,11 @@ tensor::TensorPtr CreateTensor(const AnfNodePtr &node) { for (size_t i = 0; i < last_dim; ++i) { half_data.emplace_back(Eigen::half(static_cast(i))); } - auto elem_num = last_dim * kFloat16Len; + for (size_t i = 0; i < last_dim; ++i) { + auto gap = static_cast(i) - static_cast(Eigen::half(static_cast(i))); + half_data.emplace_back(Eigen::half(static_cast(gap))); + } + auto elem_num = last_dim * kFloat16Len * 2; auto ret_code = memcpy_s(data_ptr, static_cast(indices_tensor->data().nbytes()), half_data.data(), elem_num); if (ret_code != 0) { MS_LOG(ERROR) << "Failed to copy data into Tensor."; @@ -108,6 +112,13 @@ const AnfNodePtr TopKSplit::Process(const FuncGraphPtr &func_graph, const AnfNod MS_LOG(INFO) << "The input k of topk has been converted to attr"; return nullptr; } + auto shape = AnfAlgo::GetPrevNodeOutputInferShape(node, 0); + auto last_dim = shape[shape.size() - 1]; + const size_t kMaxFloat16 = 65500; + if (last_dim > kMaxFloat16) { + MS_LOG(INFO) << "The last dim is more than 65500, switch to aicpu ops."; + return nullptr; + } // Copy a new node to check supported. std::vector new_inputs{NewValueNode(std::make_shared(kTopKOpName))}; new_inputs.insert(new_inputs.end(), cnode->inputs().begin() + 1, cnode->inputs().end()); diff --git a/model_zoo/faster_rcnn/scripts/run_distribute_train.sh b/model_zoo/faster_rcnn/scripts/run_distribute_train.sh index 5c87a25c7b..bc6ebd4a18 100755 --- a/model_zoo/faster_rcnn/scripts/run_distribute_train.sh +++ b/model_zoo/faster_rcnn/scripts/run_distribute_train.sh @@ -59,6 +59,7 @@ do mkdir ./train_parallel$i cp ../*.py ./train_parallel$i cp *.sh ./train_parallel$i + cp -r ../src ./train_parallel$i cd ./train_parallel$i || exit echo "start training for rank $RANK_ID, device $DEVICE_ID" env > env.log diff --git a/model_zoo/faster_rcnn/scripts/run_eval.sh b/model_zoo/faster_rcnn/scripts/run_eval.sh index 0c12d16086..07bdfac1dd 100755 --- a/model_zoo/faster_rcnn/scripts/run_eval.sh +++ b/model_zoo/faster_rcnn/scripts/run_eval.sh @@ -57,6 +57,7 @@ fi mkdir ./eval cp ../*.py ./eval cp *.sh ./eval +cp -r ../src ./eval cd ./eval || exit env > env.log echo "start eval for device $DEVICE_ID" diff --git a/model_zoo/faster_rcnn/scripts/run_standalone_train.sh b/model_zoo/faster_rcnn/scripts/run_standalone_train.sh index fc897e1b23..3197da016e 100755 --- a/model_zoo/faster_rcnn/scripts/run_standalone_train.sh +++ b/model_zoo/faster_rcnn/scripts/run_standalone_train.sh @@ -49,6 +49,7 @@ fi mkdir ./train cp ../*.py ./train cp *.sh ./train +cp -r ../src ./train cd ./train || exit echo "start training for device $DEVICE_ID" env > env.log diff --git a/model_zoo/faster_rcnn/src/config.py b/model_zoo/faster_rcnn/src/config.py index 3767524ffb..78e8f4242e 100644 --- a/model_zoo/faster_rcnn/src/config.py +++ b/model_zoo/faster_rcnn/src/config.py @@ -134,7 +134,7 @@ config = ed({ "keep_checkpoint_max": 10, "save_checkpoint_path": "./checkpoint", - "mindrecord_dir": "../MindRecoid_COCO_TRAIN", + "mindrecord_dir": "../MindRecord_COCO_TRAIN", "coco_root": "./cocodataset/", "train_data_type": "train2017", "val_data_type": "val2017", diff --git a/model_zoo/faster_rcnn/src/dataset.py b/model_zoo/faster_rcnn/src/dataset.py index eaf6de72c3..e384534f77 100644 --- a/model_zoo/faster_rcnn/src/dataset.py +++ b/model_zoo/faster_rcnn/src/dataset.py @@ -24,7 +24,7 @@ import mmcv import mindspore.dataset as de import mindspore.dataset.transforms.vision.c_transforms as C from mindspore.mindrecord import FileWriter -from config import config +from src.config import config def bbox_overlaps(bboxes1, bboxes2, mode='iou'): diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc index 1c0454a56d..4cee3577ed 100644 --- a/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc +++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc @@ -90,7 +90,7 @@ TEST_F(TestHWTopKSplit, test_topk_split) { EXPECT_TRUE(value_node->value()->isa()); auto tensor = value_node->value()->cast(); EXPECT_EQ(tensor->shape().size(), 1); - EXPECT_EQ(tensor->shape()[0], 4); + EXPECT_EQ(tensor->shape()[0], 8); } TEST_F(TestHWTopKSplit, test_topk_no_split) {