fix textcnn possible file write incomplete

This commit is contained in:
zhouneng 2021-06-02 13:34:14 +08:00
parent 14cf33a6df
commit 0c58fa9d1c
1 changed files with 14 additions and 14 deletions

View File

@ -53,12 +53,12 @@ class DataProcessor:
""" collect weight """ """ collect weight """
vocab_size = self.get_dict_len() vocab_size = self.get_dict_len()
embedding_index = {} embedding_index = {}
f = open(glove_path) with open(glove_path) as f:
for line in f: for line in f:
values = line.split() values = line.split()
word = values[0] word = values[0]
vec = np.array(values[1:], dtype='float32') vec = np.array(values[1:], dtype='float32')
embedding_index[word] = vec embedding_index[word] = vec
weight_np = np.zeros((vocab_size, embed_size)).astype(np.float32) weight_np = np.zeros((vocab_size, embed_size)).astype(np.float32)
for word, vec in embedding_index.items(): for word, vec in embedding_index.items():
@ -129,10 +129,10 @@ class MovieReview(DataProcessor):
self.Pos = [] self.Pos = []
self.Neg = [] self.Neg = []
for filename in self.files: for filename in self.files:
f = codecs.open(filename, 'r') with codecs.open(filename, 'r') as f:
ff = f.read() ff = f.read()
file_object = codecs.open(filename, 'w', 'utf-8') with codecs.open(filename, 'w', 'utf-8') as file_object:
file_object.write(ff) file_object.write(ff)
self.read_data(filename) self.read_data(filename)
self.PosNeg = self.Pos + self.Neg self.PosNeg = self.Pos + self.Neg
self.text2vec(maxlen=maxlen) self.text2vec(maxlen=maxlen)
@ -394,10 +394,10 @@ class SST2(DataProcessor):
self.maxlen = float("-inf") self.maxlen = float("-inf")
for filename in self.files: for filename in self.files:
if 'train' in filename or 'dev' in filename: if 'train' in filename or 'dev' in filename:
f = codecs.open(filename, 'r') with codecs.open(filename, 'r') as f:
ff = f.read() ff = f.read()
file_object = codecs.open(filename, 'w', 'utf-8') with codecs.open(filename, 'w', 'utf-8') as file_object:
file_object.write(ff) file_object.write(ff)
self.read_data(filename) self.read_data(filename)
self.text2vec(maxlen=maxlen) self.text2vec(maxlen=maxlen)
self.split_dataset(split=split) self.split_dataset(split=split)