rerank is refined
This commit is contained in:
parent
0afcdf9917
commit
166980260e
39
search.py
39
search.py
|
@ -227,7 +227,7 @@ import csv
|
|||
if __name__ == '__main__':
|
||||
|
||||
########### single search
|
||||
query = 'java ide'
|
||||
query = 'java message queue'
|
||||
# items = query.split(' ')
|
||||
# items = [get_syns(term) for term in items]
|
||||
combined_queries = combine_item(query)
|
||||
|
@ -250,7 +250,8 @@ if __name__ == '__main__':
|
|||
final_result = list()
|
||||
for item in sorted(cans.items(),lambda x, y: cmp(x[1], y[1]),reverse = True)[:20]:
|
||||
final_result.append(item[0])
|
||||
print item[0]
|
||||
|
||||
print final_result
|
||||
print_time('>>> sort done')
|
||||
|
||||
|
||||
|
@ -360,16 +361,14 @@ if __name__ == '__main__':
|
|||
# re rank top 10
|
||||
# 把 需要重新排的 和 不需要排的分开 re_rank_prj 和 stay_rank_prj
|
||||
# for 1 to to 从两个里面选,优先选 重排的,如果有冲突,优先选down的。
|
||||
stay_rank_prj = [(i,final_result[i]) for i in range(0,len(final_result))]
|
||||
re_rank_prj = dict() # 每一个pos 对应着 想要排到这个位置的所有prj,并且按照之前的排序顺序进行拍序
|
||||
stay_rank_prj = {i:(i,final_result[i]) for i in range(0,len(final_result))} #{pos:prj}
|
||||
re_rank_prj = dict() # 每一个pos 对应着 想要排到这个位置的所有prj,并且按照之前的排序顺序进行拍序 {pos: [prj] }
|
||||
for rr in rerank:
|
||||
if rr[1] not in re_rank_prj:
|
||||
re_rank_prj[rr[1]] = list()
|
||||
re_rank_prj[rr[1]].append(rr[0])
|
||||
|
||||
index = stay_rank_prj.index(rr[0])
|
||||
if index != -1:
|
||||
del stay_rank_prj[index]
|
||||
del stay_rank_prj[rr[0][0]]# 从原来的位置删除
|
||||
|
||||
# 按照之前的排序顺序进行拍序
|
||||
for key,value in re_rank_prj.items():
|
||||
|
@ -377,21 +376,19 @@ if __name__ == '__main__':
|
|||
|
||||
top_10_list = list()
|
||||
#merge re_rank_prj and stary_rank_prj
|
||||
pos_in_stay = 0
|
||||
for i in range(0,10):
|
||||
#先选取要重新拍的,再排以前的
|
||||
pos = 0
|
||||
while True:
|
||||
if (len(re_rank_prj) == 0) and (len(stay_rank_prj) == 0):
|
||||
break
|
||||
|
||||
flag = True
|
||||
for j in range(0,i+1):
|
||||
if j in re_rank_prj: # 如果在re_rank_prj中比i小的还有剩余,应该先把之前的搞完吧
|
||||
value = re_rank_prj[j].pop(0)
|
||||
if len(re_rank_prj[j]) == 0:
|
||||
del re_rank_prj[j]
|
||||
flag = False
|
||||
break
|
||||
if flag:
|
||||
value = stay_rank_prj[pos_in_stay]
|
||||
pos_in_stay += 1
|
||||
top_10_list.append(value[1])
|
||||
if pos in re_rank_prj: # 如果在re_rank_prj中比i小的还有剩余,应该先把之前的搞完吧
|
||||
top_10_list.extend(re_rank_prj[pos])
|
||||
del re_rank_prj[pos]
|
||||
if pos in stay_rank_prj:
|
||||
top_10_list.append(stay_rank_prj[pos])
|
||||
del stay_rank_prj[pos]
|
||||
pos += 1
|
||||
print top_10_list
|
||||
# local search rejust
|
||||
|
||||
|
|
Loading…
Reference in New Issue