forked from mindspore-Ecosystem/mindspore
parent
32a38810ed
commit
01cb5b3007
|
@ -207,7 +207,65 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
|
|||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_single
|
||||
def test_resnet_thor_imagenet_8p():
|
||||
def test_resnet_thor_imagenet_8p_0():
|
||||
"""
|
||||
Feature: Resnet50 thor network
|
||||
Description: Train and evaluate resnet50 thor network on imagenet dataset
|
||||
Expectation: accuracy > 0.28, time cost < 25.
|
||||
"""
|
||||
context.set_context(enable_graph_kernel=False, enable_sparse=False)
|
||||
context.reset_auto_parallel_context()
|
||||
context.reset_ps_context()
|
||||
|
||||
q = Queue()
|
||||
|
||||
# resnet50_thor
|
||||
device_num = 8
|
||||
epoch_size = 1
|
||||
enable_hccl = True
|
||||
process = []
|
||||
for i in range(device_num):
|
||||
device_id = i
|
||||
process.append(Process(target=train_process_thor,
|
||||
args=(q, device_id, epoch_size, device_num, enable_hccl)))
|
||||
|
||||
cpu_count = os.cpu_count()
|
||||
each_cpu_count = cpu_count // device_num
|
||||
for i in range(device_num):
|
||||
process[i].start()
|
||||
if each_cpu_count > 1:
|
||||
cpu_start = each_cpu_count * i
|
||||
cpu_end = each_cpu_count * (i + 1)
|
||||
process_cpu = [x for x in range(cpu_start, cpu_end)]
|
||||
pid = process[i].pid
|
||||
os.sched_setaffinity(pid, set(process_cpu))
|
||||
|
||||
print("Waiting for all subprocesses done...")
|
||||
|
||||
for i in range(device_num):
|
||||
process[i].join()
|
||||
|
||||
# THOR
|
||||
thor_acc = 0.0
|
||||
thor_cost = 0.0
|
||||
for i in range(device_num):
|
||||
output = q.get()
|
||||
thor_acc += output['acc']
|
||||
thor_cost += output['cost']
|
||||
thor_acc = thor_acc / device_num
|
||||
thor_cost = thor_cost / device_num
|
||||
|
||||
for i in range(0, device_num):
|
||||
os.system("rm -rf " + str(i))
|
||||
print("End training...")
|
||||
assert thor_acc > 0.28
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_single
|
||||
def test_resnet_thor_imagenet_8p_1():
|
||||
"""
|
||||
Feature: Resnet50 thor network
|
||||
Description: Train and evaluate resnet50 thor network on imagenet dataset
|
||||
|
|
Loading…
Reference in New Issue