87 lines
3.4 KiB
Python
87 lines
3.4 KiB
Python
from typing import List
|
|
|
|
from dulwich.objects import Commit
|
|
from dulwich.repo import Repo
|
|
|
|
from ConfigOperator import ConfigOperator
|
|
from GitOperator import GitOperator
|
|
from models.BlobInfo import BlobInfo
|
|
from models.RepoInfo import RepoInfo
|
|
from MySQLOperator import MySQLOperator
|
|
|
|
|
|
def extract_blob_commit_relation(
|
|
repoInfo: RepoInfo,
|
|
mysqlOp: MySQLOperator,
|
|
gitOp: GitOperator,
|
|
configOp: ConfigOperator,
|
|
):
|
|
|
|
step_name = "blob commit relation extraction"
|
|
|
|
mysqlOp.cursor.execute(
|
|
"select handled from `{steps_tablename}` where step_name=%s".format(
|
|
steps_tablename=mysqlOp.tablename_dict["steps"]
|
|
),
|
|
(step_name),
|
|
)
|
|
handled = mysqlOp.cursor.fetchone()["handled"]
|
|
|
|
if not handled:
|
|
# this step has not been handled
|
|
mysqlOp.truncate_table(mysqlOp.tablename_dict["blob_commit_relations"])
|
|
mysqlOp.truncate_table(mysqlOp.tablename_dict["filepaths"])
|
|
|
|
blob_sha_id_dict = mysqlOp.get_blob_sha_id_dict()
|
|
commit_sha_id_dict = mysqlOp.get_commit_sha_id_dict()
|
|
filepath_sha_id_dict = mysqlOp.get_filepath_sha_id_dict()
|
|
|
|
def get_filepath_id(blobInfo: BlobInfo):
|
|
"""
|
|
Function: get the filepath_id of the target blob
|
|
params:
|
|
- blobInfo: the BlobInfo object
|
|
"""
|
|
if blobInfo.filepath_sha in filepath_sha_id_dict:
|
|
return filepath_sha_id_dict[blobInfo.filepath_sha]
|
|
else:
|
|
if not len(filepath_sha_id_dict) == 0:
|
|
filepath_id = max(list(filepath_sha_id_dict.values())) + 1
|
|
else:
|
|
filepath_id = 1
|
|
filepath_sha_id_dict[blobInfo.filepath_sha] = filepath_id
|
|
mysqlOp.cursor.execute(
|
|
"insert into `{filepath_tablename}` (id, sha, filepath) values (%s, %s, %s)".format(
|
|
filepath_tablename=mysqlOp.tablename_dict["filepaths"]
|
|
),
|
|
(filepath_id, blobInfo.filepath_sha, blobInfo.filepath),
|
|
) # transaction and commit at the end of handling this commit
|
|
return filepath_id
|
|
|
|
repo = Repo(repoInfo.bare_repo_path)
|
|
|
|
for commit_sha, commit_id in commit_sha_id_dict.items():
|
|
commit: Commit = repo.object_store[commit_sha]
|
|
blobInfos: List[BlobInfo] = gitOp.extract_blobs_4_commit(commit=commit)
|
|
for blobInfo in blobInfos:
|
|
if configOp.is_lang_supported(filepath=blobInfo.filepath):
|
|
blob_id = blob_sha_id_dict[blobInfo.blob.id]
|
|
filepath_id = get_filepath_id(blobInfo=blobInfo)
|
|
mysqlOp.cursor.execute(
|
|
"insert into `{blob_commit_relation_tablename}` (blob_id, commit_id, filepath_id) values (%s, %s, %s)".format(
|
|
blob_commit_relation_tablename=mysqlOp.tablename_dict[
|
|
"blob_commit_relations"
|
|
]
|
|
),
|
|
(blob_id, commit_id, filepath_id),
|
|
)
|
|
mysqlOp.connection.commit()
|
|
# update steps table
|
|
mysqlOp.cursor.execute(
|
|
"update `{steps_tablename}` set handled=%s where step_name=%s".format(
|
|
steps_tablename=mysqlOp.tablename_dict["steps"]
|
|
),
|
|
(1, step_name),
|
|
)
|
|
mysqlOp.connection.commit()
|