bad_clone_prediction/BlobMethodExtractor.py

59 lines
2.1 KiB
Python

import os
from models.RepoInfo import RepoInfo
from MySQLOperator import MySQLOperator
def extract_blob_methods(repoInfo: RepoInfo, mysqlOp: MySQLOperator):
"""
Function: extract methods from blob.java files
params:
- repoInfo: the repo that blobs belong to
- mysqlOp: the operator for handling mysql database
"""
step_name = "blob method extraction"
mysqlOp.cursor.execute(
"select handled from `{steps_tablename}` where step_name=%s".format(
steps_tablename=mysqlOp.tablename_dict["steps"]
),
(step_name),
)
handled = mysqlOp.cursor.fetchone()["handled"]
if not handled:
mysqlOp.truncate_table(tablename=mysqlOp.tablename_dict["blob_methods"])
blob_sha_id_dict = mysqlOp.get_blob_sha_id_dict()
count = 0
with open(os.path.join(repoInfo.blob_target_path, "code_blocks"), "r") as f:
methods = f.read().strip().splitlines()
for i in range(len(methods)):
method = methods[i]
filepath, start, end, method_name = method.split(",")
blob_sha = (
os.path.relpath(filepath, repoInfo.blob_target_path)
.split(".")[0]
.encode()
)
blob_id = blob_sha_id_dict[blob_sha]
mysqlOp.cursor.execute(
"insert into `{blob_method_tablename}` (id, blob_id, start, end, method_name) values (%s, %s, %s, %s, %s)".format(
blob_method_tablename=mysqlOp.tablename_dict["blob_methods"]
),
(i + 1, blob_id, int(start), int(end), method_name),
)
count += 1
if count % 1000 == 0:
mysqlOp.connection.commit()
# update steps table
mysqlOp.cursor.execute(
"update `{steps_tablename}` set handled=%s where step_name=%s".format(
steps_tablename=mysqlOp.tablename_dict["steps"]
),
(1, step_name),
)
mysqlOp.connection.commit()