From 83c55057272fc949246af718375b1dc95be51bd5 Mon Sep 17 00:00:00 2001 From: hanjun996 Date: Wed, 1 Jul 2020 11:44:10 +0800 Subject: [PATCH] clean codedex warning --- model_zoo/wide_and_deep/src/preprocess_data.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/model_zoo/wide_and_deep/src/preprocess_data.py b/model_zoo/wide_and_deep/src/preprocess_data.py index 35d13b841da..75562aa71be 100644 --- a/model_zoo/wide_and_deep/src/preprocess_data.py +++ b/model_zoo/wide_and_deep/src/preprocess_data.py @@ -17,6 +17,8 @@ import os import pickle import collections import argparse +import urllib.request +import tarfile import numpy as np from mindspore.mindrecord import FileWriter @@ -257,10 +259,15 @@ if __name__ == '__main__': download_data_path = data_path + "origin_data/" mkdir_path(download_data_path) - os.system( - "wget -P {} -c https://s3-eu-west-1.amazonaws.com/kaggle-display-advertising-challenge-dataset/dac.tar.gz --no-check-certificate".format( - download_data_path)) - os.system("tar -zxvf {}dac.tar.gz".format(download_data_path)) + url = "https://s3-eu-west-1.amazonaws.com/kaggle-display-advertising-challenge-dataset/dac.tar.gz" + file_name = download_data_path + '/' + url.split('/')[-1] + urllib.request.urlretrieve(url, filename=file_name) + + tar = tarfile.open(file_name) + names = tar.getnames() + for name in names: + tar.extract(name, path=download_data_path) + tar.close() criteo_stats = CriteoStatsDict() data_file_path = data_path + "origin_data/train.txt"