Merge pull request '将特征提取修改为可以多线程运行的' (#61) from zy into master

This commit is contained in:
MillerEvan 2022-09-18 00:28:04 +08:00
commit 67453767a1
1 changed files with 63 additions and 36 deletions

View File

@ -1,6 +1,6 @@
import json
import pathlib
import queue
import re
import threading
import time
from collections import Counter
from typing import Dict, List, Tuple
@ -9,6 +9,7 @@ import pandas as pd
from sqlalchemy import create_engine
import GlobalConstants
from ConfigOperator import ConfigOperator
from FileOperator import FileOperator
from models.RepoInfo import RepoInfo
from MySQLOperator import MySQLOperator
@ -518,16 +519,16 @@ def factor_extractor(function_id_1: int, function_id_2: int, mysqlOp: MySQLOpera
}
if __name__ == "__main__":
repoInfos: List[RepoInfo] = FileOperator("factor_repos").load_repos()
for repoInfo in repoInfos:
def run(q):
while not q.empty():
repoInfo: RepoInfo = q.get()
mysqlOp: MySQLOperator = MySQLOperator(
config_path="config.yml", autocommit=True, repoInfo=repoInfo
)
clone_relations_function = mysqlOp.tablename_dict["clone_relations_function"]
sql_all_clones = """
select distinct function_id_1,function_id_2 from `{tablename}`
""".format(
select distinct function_id_1,function_id_2 from `{tablename}`
""".format(
tablename=clone_relations_function
)
while True:
@ -548,35 +549,35 @@ if __name__ == "__main__":
repo_id=repoInfo.id, separator=GlobalConstants.SEPARATOR
)
sql_result = """
create table if not exists `{tablename}` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`function_id_1` int(11) NULL,
`function_id_2` int(11) NULL,
`similarity` int(11) NULL,
`degree_diff` int(11) NULL,
`is_test` int(11) NULL,
`file_distance` int(11) NULL,
`method_name_same` int(11) NULL,
`history_change_sum` int(11) NULL,
`co_change` int(11) NULL,
`consistant_change` int(11) NULL,
`main_author_same` int(11) NULL,
`author_exp_sum` int(11) NULL,
PRIMARY KEY (`id`),
INDEX(`function_id_1`) USING BTREE,
INDEX(`function_id_2`) USING BTREE,
INDEX(`similarity`) USING BTREE,
INDEX(`degree_diff`) USING BTREE,
INDEX(`is_test`) USING BTREE,
INDEX(`file_distance`) USING BTREE,
INDEX(`method_name_same`) USING BTREE,
INDEX(`history_change_sum`) USING BTREE,
INDEX(`co_change`) USING BTREE,
INDEX(`consistant_change`) USING BTREE,
INDEX(`main_author_same`) USING BTREE,
INDEX(`author_exp_sum`) USING BTREE
)
""".format(
create table if not exists `{tablename}` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`function_id_1` int(11) NULL,
`function_id_2` int(11) NULL,
`similarity` int(11) NULL,
`degree_diff` int(11) NULL,
`is_test` int(11) NULL,
`file_distance` int(11) NULL,
`method_name_same` int(11) NULL,
`history_change_sum` int(11) NULL,
`co_change` int(11) NULL,
`consistant_change` int(11) NULL,
`main_author_same` int(11) NULL,
`author_exp_sum` int(11) NULL,
PRIMARY KEY (`id`),
INDEX(`function_id_1`) USING BTREE,
INDEX(`function_id_2`) USING BTREE,
INDEX(`similarity`) USING BTREE,
INDEX(`degree_diff`) USING BTREE,
INDEX(`is_test`) USING BTREE,
INDEX(`file_distance`) USING BTREE,
INDEX(`method_name_same`) USING BTREE,
INDEX(`history_change_sum`) USING BTREE,
INDEX(`co_change`) USING BTREE,
INDEX(`consistant_change`) USING BTREE,
INDEX(`main_author_same`) USING BTREE,
INDEX(`author_exp_sum`) USING BTREE
)
""".format(
tablename=factors
)
mysqlOp.cursor.execute(sql_result)
@ -597,6 +598,32 @@ if __name__ == "__main__":
index=False,
if_exists="append",
)
q.task_done()
print(
"finish extract factors in repo: {git_url}".format(git_url=repoInfo.git_url)
)
if __name__ == "__main__":
repoInfos: List[RepoInfo] = FileOperator("factor_repos").load_repos()
mysqlOp = MySQLOperator(config_path=GlobalConstants.CONFIG_PATH)
workQueue = queue.Queue()
for repoInfo in repoInfos:
# query the id of this repository
repo_id = mysqlOp.get_repo_id_by_names(repoInfo=repoInfo)
repoInfo.id = repo_id
workQueue.put(repoInfo)
threads = []
for i in range(
int(
ConfigOperator(config_path=GlobalConstants.CONFIG_PATH).read_config()[
"RCD"
]["thread_num"]
)
):
t = threading.Thread(target=run, args=(workQueue,))
t.start()
threads.append(t)
for t in threads:
t.join()
print("Finish")