update: dialogue-cse
This commit is contained in:
parent
f1171591bb
commit
db830f6fd9
|
@ -104,7 +104,7 @@ class RetrieverEmbed:
|
||||||
for line in codecs.open(file, "r", "utf-8"):
|
for line in codecs.open(file, "r", "utf-8"):
|
||||||
arr = line.strip("\n").split("\t")
|
arr = line.strip("\n").split("\t")
|
||||||
v = np.array([float(_) for _ in arr[2].split(",")], dtype=np.float32)
|
v = np.array([float(_) for _ in arr[2].split(",")], dtype=np.float32)
|
||||||
text_list.append(arr[0])
|
text_list.append(arr[0].strip())
|
||||||
vec_list.append(v)
|
vec_list.append(v)
|
||||||
o.build_index(text_list, vec_list)
|
o.build_index(text_list, vec_list)
|
||||||
return o
|
return o
|
||||||
|
@ -198,6 +198,7 @@ def main():
|
||||||
for line in codecs.open(selection_file, "r", "utf-8"):
|
for line in codecs.open(selection_file, "r", "utf-8"):
|
||||||
arr = line.strip("\n").split("\t")
|
arr = line.strip("\n").split("\t")
|
||||||
qid, q, _, p_ids, n_ids = arr
|
qid, q, _, p_ids, n_ids = arr
|
||||||
|
q = q.strip()
|
||||||
assert qid not in id2text, "重复qid"
|
assert qid not in id2text, "重复qid"
|
||||||
id2text[qid] = q
|
id2text[qid] = q
|
||||||
text2id[q] = qid
|
text2id[q] = qid
|
||||||
|
|
Loading…
Reference in New Issue