Merge pull request '将特征提取修改为可以多线程运行的' (#61) from zy into master
This commit is contained in:
commit
67453767a1
|
@ -1,6 +1,6 @@
|
|||
import json
|
||||
import pathlib
|
||||
import queue
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
from collections import Counter
|
||||
from typing import Dict, List, Tuple
|
||||
|
@ -9,6 +9,7 @@ import pandas as pd
|
|||
from sqlalchemy import create_engine
|
||||
|
||||
import GlobalConstants
|
||||
from ConfigOperator import ConfigOperator
|
||||
from FileOperator import FileOperator
|
||||
from models.RepoInfo import RepoInfo
|
||||
from MySQLOperator import MySQLOperator
|
||||
|
@ -518,16 +519,16 @@ def factor_extractor(function_id_1: int, function_id_2: int, mysqlOp: MySQLOpera
|
|||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
repoInfos: List[RepoInfo] = FileOperator("factor_repos").load_repos()
|
||||
for repoInfo in repoInfos:
|
||||
def run(q):
|
||||
while not q.empty():
|
||||
repoInfo: RepoInfo = q.get()
|
||||
mysqlOp: MySQLOperator = MySQLOperator(
|
||||
config_path="config.yml", autocommit=True, repoInfo=repoInfo
|
||||
)
|
||||
clone_relations_function = mysqlOp.tablename_dict["clone_relations_function"]
|
||||
sql_all_clones = """
|
||||
select distinct function_id_1,function_id_2 from `{tablename}`
|
||||
""".format(
|
||||
select distinct function_id_1,function_id_2 from `{tablename}`
|
||||
""".format(
|
||||
tablename=clone_relations_function
|
||||
)
|
||||
while True:
|
||||
|
@ -548,35 +549,35 @@ if __name__ == "__main__":
|
|||
repo_id=repoInfo.id, separator=GlobalConstants.SEPARATOR
|
||||
)
|
||||
sql_result = """
|
||||
create table if not exists `{tablename}` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`function_id_1` int(11) NULL,
|
||||
`function_id_2` int(11) NULL,
|
||||
`similarity` int(11) NULL,
|
||||
`degree_diff` int(11) NULL,
|
||||
`is_test` int(11) NULL,
|
||||
`file_distance` int(11) NULL,
|
||||
`method_name_same` int(11) NULL,
|
||||
`history_change_sum` int(11) NULL,
|
||||
`co_change` int(11) NULL,
|
||||
`consistant_change` int(11) NULL,
|
||||
`main_author_same` int(11) NULL,
|
||||
`author_exp_sum` int(11) NULL,
|
||||
PRIMARY KEY (`id`),
|
||||
INDEX(`function_id_1`) USING BTREE,
|
||||
INDEX(`function_id_2`) USING BTREE,
|
||||
INDEX(`similarity`) USING BTREE,
|
||||
INDEX(`degree_diff`) USING BTREE,
|
||||
INDEX(`is_test`) USING BTREE,
|
||||
INDEX(`file_distance`) USING BTREE,
|
||||
INDEX(`method_name_same`) USING BTREE,
|
||||
INDEX(`history_change_sum`) USING BTREE,
|
||||
INDEX(`co_change`) USING BTREE,
|
||||
INDEX(`consistant_change`) USING BTREE,
|
||||
INDEX(`main_author_same`) USING BTREE,
|
||||
INDEX(`author_exp_sum`) USING BTREE
|
||||
)
|
||||
""".format(
|
||||
create table if not exists `{tablename}` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`function_id_1` int(11) NULL,
|
||||
`function_id_2` int(11) NULL,
|
||||
`similarity` int(11) NULL,
|
||||
`degree_diff` int(11) NULL,
|
||||
`is_test` int(11) NULL,
|
||||
`file_distance` int(11) NULL,
|
||||
`method_name_same` int(11) NULL,
|
||||
`history_change_sum` int(11) NULL,
|
||||
`co_change` int(11) NULL,
|
||||
`consistant_change` int(11) NULL,
|
||||
`main_author_same` int(11) NULL,
|
||||
`author_exp_sum` int(11) NULL,
|
||||
PRIMARY KEY (`id`),
|
||||
INDEX(`function_id_1`) USING BTREE,
|
||||
INDEX(`function_id_2`) USING BTREE,
|
||||
INDEX(`similarity`) USING BTREE,
|
||||
INDEX(`degree_diff`) USING BTREE,
|
||||
INDEX(`is_test`) USING BTREE,
|
||||
INDEX(`file_distance`) USING BTREE,
|
||||
INDEX(`method_name_same`) USING BTREE,
|
||||
INDEX(`history_change_sum`) USING BTREE,
|
||||
INDEX(`co_change`) USING BTREE,
|
||||
INDEX(`consistant_change`) USING BTREE,
|
||||
INDEX(`main_author_same`) USING BTREE,
|
||||
INDEX(`author_exp_sum`) USING BTREE
|
||||
)
|
||||
""".format(
|
||||
tablename=factors
|
||||
)
|
||||
mysqlOp.cursor.execute(sql_result)
|
||||
|
@ -597,6 +598,32 @@ if __name__ == "__main__":
|
|||
index=False,
|
||||
if_exists="append",
|
||||
)
|
||||
q.task_done()
|
||||
print(
|
||||
"finish extract factors in repo: {git_url}".format(git_url=repoInfo.git_url)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
repoInfos: List[RepoInfo] = FileOperator("factor_repos").load_repos()
|
||||
mysqlOp = MySQLOperator(config_path=GlobalConstants.CONFIG_PATH)
|
||||
workQueue = queue.Queue()
|
||||
for repoInfo in repoInfos:
|
||||
# query the id of this repository
|
||||
repo_id = mysqlOp.get_repo_id_by_names(repoInfo=repoInfo)
|
||||
repoInfo.id = repo_id
|
||||
workQueue.put(repoInfo)
|
||||
threads = []
|
||||
for i in range(
|
||||
int(
|
||||
ConfigOperator(config_path=GlobalConstants.CONFIG_PATH).read_config()[
|
||||
"RCD"
|
||||
]["thread_num"]
|
||||
)
|
||||
):
|
||||
t = threading.Thread(target=run, args=(workQueue,))
|
||||
t.start()
|
||||
threads.append(t)
|
||||
for t in threads:
|
||||
t.join()
|
||||
print("Finish")
|
||||
|
|
Loading…
Reference in New Issue