Merge pull request '将特征提取修改为可以多线程运行的' (#61) from zy into master
This commit is contained in:
commit
67453767a1
|
@ -1,6 +1,6 @@
|
||||||
import json
|
import queue
|
||||||
import pathlib
|
|
||||||
import re
|
import re
|
||||||
|
import threading
|
||||||
import time
|
import time
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
from typing import Dict, List, Tuple
|
from typing import Dict, List, Tuple
|
||||||
|
@ -9,6 +9,7 @@ import pandas as pd
|
||||||
from sqlalchemy import create_engine
|
from sqlalchemy import create_engine
|
||||||
|
|
||||||
import GlobalConstants
|
import GlobalConstants
|
||||||
|
from ConfigOperator import ConfigOperator
|
||||||
from FileOperator import FileOperator
|
from FileOperator import FileOperator
|
||||||
from models.RepoInfo import RepoInfo
|
from models.RepoInfo import RepoInfo
|
||||||
from MySQLOperator import MySQLOperator
|
from MySQLOperator import MySQLOperator
|
||||||
|
@ -518,16 +519,16 @@ def factor_extractor(function_id_1: int, function_id_2: int, mysqlOp: MySQLOpera
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def run(q):
|
||||||
repoInfos: List[RepoInfo] = FileOperator("factor_repos").load_repos()
|
while not q.empty():
|
||||||
for repoInfo in repoInfos:
|
repoInfo: RepoInfo = q.get()
|
||||||
mysqlOp: MySQLOperator = MySQLOperator(
|
mysqlOp: MySQLOperator = MySQLOperator(
|
||||||
config_path="config.yml", autocommit=True, repoInfo=repoInfo
|
config_path="config.yml", autocommit=True, repoInfo=repoInfo
|
||||||
)
|
)
|
||||||
clone_relations_function = mysqlOp.tablename_dict["clone_relations_function"]
|
clone_relations_function = mysqlOp.tablename_dict["clone_relations_function"]
|
||||||
sql_all_clones = """
|
sql_all_clones = """
|
||||||
select distinct function_id_1,function_id_2 from `{tablename}`
|
select distinct function_id_1,function_id_2 from `{tablename}`
|
||||||
""".format(
|
""".format(
|
||||||
tablename=clone_relations_function
|
tablename=clone_relations_function
|
||||||
)
|
)
|
||||||
while True:
|
while True:
|
||||||
|
@ -548,35 +549,35 @@ if __name__ == "__main__":
|
||||||
repo_id=repoInfo.id, separator=GlobalConstants.SEPARATOR
|
repo_id=repoInfo.id, separator=GlobalConstants.SEPARATOR
|
||||||
)
|
)
|
||||||
sql_result = """
|
sql_result = """
|
||||||
create table if not exists `{tablename}` (
|
create table if not exists `{tablename}` (
|
||||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||||
`function_id_1` int(11) NULL,
|
`function_id_1` int(11) NULL,
|
||||||
`function_id_2` int(11) NULL,
|
`function_id_2` int(11) NULL,
|
||||||
`similarity` int(11) NULL,
|
`similarity` int(11) NULL,
|
||||||
`degree_diff` int(11) NULL,
|
`degree_diff` int(11) NULL,
|
||||||
`is_test` int(11) NULL,
|
`is_test` int(11) NULL,
|
||||||
`file_distance` int(11) NULL,
|
`file_distance` int(11) NULL,
|
||||||
`method_name_same` int(11) NULL,
|
`method_name_same` int(11) NULL,
|
||||||
`history_change_sum` int(11) NULL,
|
`history_change_sum` int(11) NULL,
|
||||||
`co_change` int(11) NULL,
|
`co_change` int(11) NULL,
|
||||||
`consistant_change` int(11) NULL,
|
`consistant_change` int(11) NULL,
|
||||||
`main_author_same` int(11) NULL,
|
`main_author_same` int(11) NULL,
|
||||||
`author_exp_sum` int(11) NULL,
|
`author_exp_sum` int(11) NULL,
|
||||||
PRIMARY KEY (`id`),
|
PRIMARY KEY (`id`),
|
||||||
INDEX(`function_id_1`) USING BTREE,
|
INDEX(`function_id_1`) USING BTREE,
|
||||||
INDEX(`function_id_2`) USING BTREE,
|
INDEX(`function_id_2`) USING BTREE,
|
||||||
INDEX(`similarity`) USING BTREE,
|
INDEX(`similarity`) USING BTREE,
|
||||||
INDEX(`degree_diff`) USING BTREE,
|
INDEX(`degree_diff`) USING BTREE,
|
||||||
INDEX(`is_test`) USING BTREE,
|
INDEX(`is_test`) USING BTREE,
|
||||||
INDEX(`file_distance`) USING BTREE,
|
INDEX(`file_distance`) USING BTREE,
|
||||||
INDEX(`method_name_same`) USING BTREE,
|
INDEX(`method_name_same`) USING BTREE,
|
||||||
INDEX(`history_change_sum`) USING BTREE,
|
INDEX(`history_change_sum`) USING BTREE,
|
||||||
INDEX(`co_change`) USING BTREE,
|
INDEX(`co_change`) USING BTREE,
|
||||||
INDEX(`consistant_change`) USING BTREE,
|
INDEX(`consistant_change`) USING BTREE,
|
||||||
INDEX(`main_author_same`) USING BTREE,
|
INDEX(`main_author_same`) USING BTREE,
|
||||||
INDEX(`author_exp_sum`) USING BTREE
|
INDEX(`author_exp_sum`) USING BTREE
|
||||||
)
|
)
|
||||||
""".format(
|
""".format(
|
||||||
tablename=factors
|
tablename=factors
|
||||||
)
|
)
|
||||||
mysqlOp.cursor.execute(sql_result)
|
mysqlOp.cursor.execute(sql_result)
|
||||||
|
@ -597,6 +598,32 @@ if __name__ == "__main__":
|
||||||
index=False,
|
index=False,
|
||||||
if_exists="append",
|
if_exists="append",
|
||||||
)
|
)
|
||||||
|
q.task_done()
|
||||||
print(
|
print(
|
||||||
"finish extract factors in repo: {git_url}".format(git_url=repoInfo.git_url)
|
"finish extract factors in repo: {git_url}".format(git_url=repoInfo.git_url)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
repoInfos: List[RepoInfo] = FileOperator("factor_repos").load_repos()
|
||||||
|
mysqlOp = MySQLOperator(config_path=GlobalConstants.CONFIG_PATH)
|
||||||
|
workQueue = queue.Queue()
|
||||||
|
for repoInfo in repoInfos:
|
||||||
|
# query the id of this repository
|
||||||
|
repo_id = mysqlOp.get_repo_id_by_names(repoInfo=repoInfo)
|
||||||
|
repoInfo.id = repo_id
|
||||||
|
workQueue.put(repoInfo)
|
||||||
|
threads = []
|
||||||
|
for i in range(
|
||||||
|
int(
|
||||||
|
ConfigOperator(config_path=GlobalConstants.CONFIG_PATH).read_config()[
|
||||||
|
"RCD"
|
||||||
|
]["thread_num"]
|
||||||
|
)
|
||||||
|
):
|
||||||
|
t = threading.Thread(target=run, args=(workQueue,))
|
||||||
|
t.start()
|
||||||
|
threads.append(t)
|
||||||
|
for t in threads:
|
||||||
|
t.join()
|
||||||
|
print("Finish")
|
||||||
|
|
Loading…
Reference in New Issue