59 lines
2.1 KiB
Python
59 lines
2.1 KiB
Python
import os
|
|
|
|
from models.RepoInfo import RepoInfo
|
|
from MySQLOperator import MySQLOperator
|
|
|
|
|
|
def extract_blob_methods(repoInfo: RepoInfo, mysqlOp: MySQLOperator):
|
|
"""
|
|
Function: extract methods from blob.java files
|
|
params:
|
|
- repoInfo: the repo that blobs belong to
|
|
- mysqlOp: the operator for handling mysql database
|
|
"""
|
|
|
|
step_name = "blob method extraction"
|
|
mysqlOp.cursor.execute(
|
|
"select handled from `{steps_tablename}` where step_name=%s".format(
|
|
steps_tablename=mysqlOp.tablename_dict["steps"]
|
|
),
|
|
(step_name),
|
|
)
|
|
handled = mysqlOp.cursor.fetchone()["handled"]
|
|
|
|
if not handled:
|
|
|
|
mysqlOp.truncate_table(tablename=mysqlOp.tablename_dict["blob_methods"])
|
|
|
|
blob_sha_id_dict = mysqlOp.get_blob_sha_id_dict()
|
|
|
|
count = 0
|
|
with open(os.path.join(repoInfo.blob_target_path, "code_blocks"), "r") as f:
|
|
methods = f.read().strip().splitlines()
|
|
for i in range(len(methods)):
|
|
method = methods[i]
|
|
filepath, start, end, method_name = method.split(",")
|
|
blob_sha = (
|
|
os.path.relpath(filepath, repoInfo.blob_target_path)
|
|
.split(".")[0]
|
|
.encode()
|
|
)
|
|
blob_id = blob_sha_id_dict[blob_sha]
|
|
mysqlOp.cursor.execute(
|
|
"insert into `{blob_method_tablename}` (id, blob_id, start, end, method_name) values (%s, %s, %s, %s, %s)".format(
|
|
blob_method_tablename=mysqlOp.tablename_dict["blob_methods"]
|
|
),
|
|
(i + 1, blob_id, int(start), int(end), method_name),
|
|
)
|
|
count += 1
|
|
if count % 1000 == 0:
|
|
mysqlOp.connection.commit()
|
|
# update steps table
|
|
mysqlOp.cursor.execute(
|
|
"update `{steps_tablename}` set handled=%s where step_name=%s".format(
|
|
steps_tablename=mysqlOp.tablename_dict["steps"]
|
|
),
|
|
(1, step_name),
|
|
)
|
|
mysqlOp.connection.commit()
|