!13659 replace tensorflow io API with python standard library API

From: @zhouneng2 Reviewed-by: @liangchenghui,@c_34 Signed-off-by: @liangchenghui
2021-03-20 18:48:17 +08:00 · 2021-03-20 18:48:17 +08:00 · 6b9de24797
parent 2e9a105709 ae101dec11
commit 6b9de24797
1 changed files with 14 additions and 14 deletions
--- a/model_zoo/official/recommend/ncf/src/movielens.py
+++ b/model_zoo/official/recommend/ncf/src/movielens.py
@ -22,6 +22,7 @@ from __future__ import division
 from __future__ import print_function

 import os
+import shutil
 import tempfile
 import zipfile
 import argparse
@ -32,7 +33,6 @@ from six.moves import urllib
 import numpy as np
 import pandas as pd
 from absl import logging
-import tensorflow as tf

 ML_1M = "ml-1m"
 ML_20M = "ml-20m"
@ -100,9 +100,9 @@ def _download_and_clean(dataset, data_dir):

    expected_files = ["{}.zip".format(dataset), RATINGS_FILE, MOVIES_FILE]

-    tf.io.gfile.makedirs(data_subdir)
+    os.makedirs(data_subdir, exist_ok=True)
    if set(expected_files).intersection(
-            tf.io.gfile.listdir(data_subdir)) == set(expected_files):
+            os.listdir(data_subdir)) == set(expected_files):
        logging.info("Dataset {} has already been downloaded".format(dataset))
        return

@ -127,16 +127,16 @@ def _download_and_clean(dataset, data_dir):
        else:
            _regularize_20m_dataset(temp_dir)

-        for fname in tf.io.gfile.listdir(temp_dir):
-            if not tf.io.gfile.exists(os.path.join(data_subdir, fname)):
-                tf.io.gfile.copy(os.path.join(temp_dir, fname),
-                                 os.path.join(data_subdir, fname))
+        for fname in os.listdir(temp_dir):
+            if not os.path.exists(os.path.join(data_subdir, fname)):
+                shutil.copy(os.path.join(temp_dir, fname),
+                            os.path.join(data_subdir, fname))
            else:
                logging.info("Skipping copy of {}, as it already exists in the "
                             "destination folder.".format(fname))

    finally:
-        tf.io.gfile.rmtree(temp_dir)
+        shutil.rmtree(temp_dir)


 def _transform_csv(input_path, output_path, names, skip_first, separator=","):
@ -152,8 +152,8 @@ def _transform_csv(input_path, output_path, names, skip_first, separator=","):
    if six.PY2:
        names = [six.ensure_text(n, "utf-8") for n in names]

-    with tf.io.gfile.GFile(output_path, "wb") as f_out, \
-            tf.io.gfile.GFile(input_path, "rb") as f_in:
+    with open(output_path, "wb") as f_out, \
+            open(input_path, "rb") as f_in:

        # Write column names to the csv.
        f_out.write(",".join(names).encode("utf-8"))
@ -199,7 +199,7 @@ def _regularize_1m_dataset(temp_dir):
        output_path=os.path.join(temp_dir, MOVIES_FILE),
        names=MOVIE_COLUMNS, skip_first=False, separator="::")

-    tf.io.gfile.rmtree(working_dir)
+    shutil.rmtree(working_dir)


 def _regularize_20m_dataset(temp_dir):
@ -233,7 +233,7 @@ def _regularize_20m_dataset(temp_dir):
        output_path=os.path.join(temp_dir, MOVIES_FILE),
        names=MOVIE_COLUMNS, skip_first=True, separator=",")

-    tf.io.gfile.rmtree(working_dir)
+    shutil.rmtree(working_dir)


 def download(dataset, data_dir):
@ -244,14 +244,14 @@ def download(dataset, data_dir):


 def ratings_csv_to_dataframe(data_dir, dataset):
-    with tf.io.gfile.GFile(os.path.join(data_dir, dataset, RATINGS_FILE)) as f:
+    with open(os.path.join(data_dir, dataset, RATINGS_FILE)) as f:
        return pd.read_csv(f, encoding="utf-8")


 def csv_to_joint_dataframe(data_dir, dataset):
    ratings = ratings_csv_to_dataframe(data_dir, dataset)

-    with tf.io.gfile.GFile(os.path.join(data_dir, dataset, MOVIES_FILE)) as f:
+    with open(os.path.join(data_dir, dataset, MOVIES_FILE)) as f:
        movies = pd.read_csv(f, encoding="utf-8")

    df = ratings.merge(movies, on=ITEM_COLUMN)