update: dialogue-cse

This commit is contained in:
出蛰 2022-11-07 22:20:56 +08:00
parent f1171591bb
commit db830f6fd9
1 changed files with 2 additions and 1 deletions

View File

@ -104,7 +104,7 @@ class RetrieverEmbed:
for line in codecs.open(file, "r", "utf-8"): for line in codecs.open(file, "r", "utf-8"):
arr = line.strip("\n").split("\t") arr = line.strip("\n").split("\t")
v = np.array([float(_) for _ in arr[2].split(",")], dtype=np.float32) v = np.array([float(_) for _ in arr[2].split(",")], dtype=np.float32)
text_list.append(arr[0]) text_list.append(arr[0].strip())
vec_list.append(v) vec_list.append(v)
o.build_index(text_list, vec_list) o.build_index(text_list, vec_list)
return o return o
@ -198,6 +198,7 @@ def main():
for line in codecs.open(selection_file, "r", "utf-8"): for line in codecs.open(selection_file, "r", "utf-8"):
arr = line.strip("\n").split("\t") arr = line.strip("\n").split("\t")
qid, q, _, p_ids, n_ids = arr qid, q, _, p_ids, n_ids = arr
q = q.strip()
assert qid not in id2text, "重复qid" assert qid not in id2text, "重复qid"
id2text[qid] = q id2text[qid] = q
text2id[q] = qid text2id[q] = qid