Merge pull request '截至到8月21日完成的任务' (#43) from zy into master

2022-09-02 23:13:41 +08:00 · 2022-09-02 23:13:41 +08:00 · f2d9408d74
parent e194c74df0 87702ded25
commit f2d9408d74
12 changed files with 556 additions and 42 deletions
--- a/.gitignore
+++ b/.gitignore
@ -144,3 +144,4 @@ config.yml
 #repos
 repos
 delete_repos
+test/
--- a/CloneOperator.py
+++ b/CloneOperator.py
@ -84,12 +84,13 @@ class CloneOperator(object):

        if not handled:
            p = subprocess.Popen(
-                "cd {target_folder} && java -jar {NIL_path} -s ./ -mit {mit} -mil {mil} -t {thread_num} -o result.csv".format(
+                "cd {target_folder} && java -jar {NIL_path} -s ./ -mit {mit} -mil {mil} -t {thread_num} -o result.csv -p {partition_num}".format(
                    target_folder=target_folder,
                    NIL_path=NIL_path,
                    mit=int(nil_config["mit"]),
                    mil=int(nil_config["mil"]),
                    thread_num=int(nil_config["thread_num"]),
+                    partition_num=int(nil_config["partition_num"]),
                ),
                shell=True,
                stdout=subprocess.PIPE,
--- a/FileOperator.py
+++ b/FileOperator.py
@ -18,9 +18,9 @@ class FileOperator(object):
        with open(self.path, "r") as file:
            list = file.read().strip().splitlines()
            for line in list:
-                ownername, reponame, git_url = line.split(" ")
+                ownername, reponame, git_url, id = line.split(" ")
                repoInfo = RepoInfo(
-                    ownername=ownername, reponame=reponame, git_url=git_url
+                    id=id, ownername=ownername, reponame=reponame, git_url=git_url
                )
                result.append(repoInfo)
        return result
--- a/GitOperator.py
+++ b/GitOperator.py
@ -1,4 +1,5 @@
 import os
+import pathlib
 import subprocess
 from typing import List

@ -39,6 +40,48 @@ class GitOperator(object):
            -
        """

+        # def find_blobs_in_tree(
+        #     repo: Repo, commit: Commit, tree: Tree, relpath: bytes = b""
+        # ) -> List[BlobInfo]:
+        #     """
+        #     Function: iterately find all the blobs for the target commit
+        #     params:
+        #         - commit: the target commit
+        #         - tree: current Tree object in iteration
+        #         - relpath: the relative path before this iteration
+        #     return:
+        #         - a list of BlobInfo objects regarding this commit
+        #     """
+        #     result = []
+        #     for entry in tree.items():
+        #         if (not repo.object_store.contains_loose(entry.sha)) and (
+        #             not repo.object_store.contains_packed(entry.sha)
+        #         ):
+        #             # the object cannot be found in the repo
+        #             return result
+        #         obj = repo.object_store[entry.sha]
+        #         new_relpath = os.path.join(relpath, entry.path)
+        #         if obj.type_name == b"blob":
+        #             result.append(
+        #                 BlobInfo(
+        #                     repo=repo, commit=commit, filepath=new_relpath, blob=obj
+        #                 )
+        #             )
+        #         elif obj.type_name == b"tree":
+        #             new_tree = obj
+        #             result.extend(
+        #                 find_blobs_in_tree(
+        #                     repo=repo,
+        #                     commit=commit,
+        #                     tree=new_tree,
+        #                     relpath=new_relpath,
+        #                 )
+        #             )
+        #         else:
+        #             # there is something wrong with this tree object
+        #             return result
+        #     return result
+
        def find_blobs_in_tree(
            repo: Repo, commit: Commit, tree: Tree, relpath: bytes = b""
        ) -> List[BlobInfo]:
@ -52,33 +95,19 @@ class GitOperator(object):
                - a list of BlobInfo objects regarding this commit
            """
            result = []
-            for entry in tree.items():
+            for entry in Repo(
+                self.repoInfo.bare_repo_path
+            ).object_store.iter_tree_contents(commit.tree):
                if (not repo.object_store.contains_loose(entry.sha)) and (
                    not repo.object_store.contains_packed(entry.sha)
                ):
                    # the object cannot be found in the repo
-                    return result
+                    continue
                obj = repo.object_store[entry.sha]
-                new_relpath = os.path.join(relpath, entry.path)
-                if obj.type_name == b"blob":
-                    result.append(
-                        BlobInfo(
-                            repo=repo, commit=commit, filepath=new_relpath, blob=obj
-                        )
-                    )
-                elif obj.type_name == b"tree":
-                    new_tree = obj
-                    result.extend(
-                        find_blobs_in_tree(
-                            repo=repo,
-                            commit=commit,
-                            tree=new_tree,
-                            relpath=new_relpath,
-                        )
-                    )
-                else:
-                    # there is something wrong with this tree object
-                    return result
+                new_relpath = str(pathlib.Path(entry.path.decode())).encode()
+                result.append(
+                    BlobInfo(repo=repo, commit=commit, filepath=new_relpath, blob=obj)
+                )
            return result

        blobInfos = find_blobs_in_tree(
--- a/MethodFunctionRelationExtractor.py
+++ b/MethodFunctionRelationExtractor.py
@ -278,12 +278,31 @@ def extract_method_function_relation(
                if row["filepath_old"] is None:
                    filepath_id_old = np.nan
                else:
-                    filepath_id_old = filepath_id_dict[row["filepath_old"]]
+                    if row["filepath_old"] not in filepath_id_dict:
+                        """
+                        Some filepaths gotten by dulwich are different with the real filepaths
+                        in the mysql database and the key names in filepath_id_dict. When this bug
+                        happened we set filepath_id_old = None.
+                        """
+                        filepath_id_old = None
+                    else:
+                        filepath_id_old = filepath_id_dict[row["filepath_old"]]

                if row["filepath_new"] is None:
                    filepath_id_new = np.nan
                else:
-                    filepath_id_new = filepath_id_dict[row["filepath_new"]]
+                    if row["filepath_new"] not in filepath_id_dict:
+                        """
+                        Some filepaths gotten by dulwich are different with the real filepaths
+                        in the mysql database and the key names in filepath_id_dict. When this bug
+                        happened we set filepath_id_old = None.
+                        Example: When deal with the repository git@github.com:apache/iotdb.git, a filepath in
+                        filepath_id_dict is 'iotdb\\metrics\\interface\\src\\main\\java\\org\\apache\\iotdb\\metrics\\DoNothingMetricService.java'
+                        while the filepath obtained by dulwich will ignore "iotdb\\"
+                        """
+                        filepath_id_new = None
+                    else:
+                        filepath_id_new = filepath_id_dict[row["filepath_new"]]

                commit_id_old = row["commit_id_old"]
                commit_id_new = row["commit_id_new"]
@ -438,7 +457,6 @@ def extract_method_function_relation(
                                ),
                                (method_id_1, method_id_2, change_content),
                            )
-            mysqlOp.connection.commit()

        for commit_sha, commit_id in commit_sha_id_dict.items():
            handle_commit(commit_id=commit_id, commit=repo.object_store[commit_sha])
--- a/MySQLOperator.py
+++ b/MySQLOperator.py
@ -85,14 +85,29 @@ class MySQLOperator(object):
            - repoInfos: a list of RepoInfo objects
        """
        for repoInfo in repoInfos:
+            id = repoInfo.id
            ownername = repoInfo.ownername
            reponame = repoInfo.reponame
            self.cursor.execute(
-                "insert ignore into repositories (ownername, reponame, handled) values (%s, %s, %s)",
-                (ownername, reponame, 0),
+                "insert ignore into repositories (id, ownername, reponame, handled) values (%s, %s, %s, %s)",
+                (id, ownername, reponame, 0),
            )
        self.connection.commit()

+    def update_handled_repository(self, repoInfo: RepoInfo):
+        """
+        Function: insert all the repositories into repositories table
+        params:
+            - repoInfos: a list of RepoInfo objects
+        """
+        ownername = repoInfo.ownername
+        reponame = repoInfo.reponame
+        self.cursor.execute(
+            "update repositories set handled = 1 where ownername=%s and reponame=%s",
+            (ownername, reponame),
+        )
+        self.connection.commit()
+
    def init_steps_table(self, repoInfo: RepoInfo):
        """
        Function: initialize the handled_repositories table
--- a/README.md
+++ b/README.md
@ -13,7 +13,9 @@ This is a project for finding factors related to bad clones.
  - this project uses [Mysql](https://dev.mysql.com/downloads/) 8.0.30
  - copy the configuration template and rename it using command`cp ./config.template.yml ./config.yml`
  - set the section of the config with the hints in the template
-
+- Java
+  - To run the clone detector NIL, [jdk](https://www.oracle.com/java/technologies/downloads/) 1.8+ is needed.
+  
 ## run the project
 1. Start collecting data for repositories by running the following commands:
 ```
--- a/RepoExecutor.py
+++ b/RepoExecutor.py
@ -15,6 +15,7 @@ from GitOperator import GitOperator
 from MethodFunctionRelationExtractor import extract_method_function_relation
 from models.RepoInfo import RepoInfo
 from MySQLOperator import MySQLOperator
+from RiskEvaluator import evaluate_all_pairs


 class RepoExecutorThread(threading.Thread):
@ -173,5 +174,20 @@ class RepoExecutorThread(threading.Thread):
                )
            )

+            # evaluate the risk of the clone pairs
+            evaluate_all_pairs(repoInfo=repoInfo)
+            print(
+                "[Info]: Thread: {thread_name} finish evaluating all clone pairs in the whole repo: {git_url}".format(
+                    thread_name=self.name, git_url=repoInfo.git_url
+                )
+            )
+
+            # mark the handled repository
+            mysqlOp.update_handled_repository(repoInfo=repoInfo)
+            print(
+                "[Info]: Thread: {thread_name} finish handling the whole repo: {git_url}".format(
+                    thread_name=self.name, git_url=repoInfo.git_url
+                )
+            )
            self.q.task_done()
        print("[Info]: Exist thread: " + self.name)
--- a/RiskEvaluator.py
+++ b/RiskEvaluator.py
@ -0,0 +1,432 @@
+import re
+from typing import Dict, List, Tuple
+
+import pandas as pd
+from sqlalchemy import create_engine
+
+import GlobalConstants
+from FileOperator import FileOperator
+from models.RepoInfo import RepoInfo
+from MySQLOperator import MySQLOperator
+
+
+class RiskEvaluator(object):
+    def __init__(
+        self,
+        function_id_1: int,
+        function_id_2: int,
+        repoInfo: RepoInfo,
+    ):
+        self.function_id_1 = function_id_1
+        self.function_id_2 = function_id_2
+        self.repoInfo = repoInfo
+
+    def evaluate(self, mysqlOp: MySQLOperator):
+        """
+        Function: evaluate the risk of a clone pair in five steps:
+        1.get the change num of clone pair
+        2.get the consistent change num of clone pair
+        3.get the interval of consistent change of clone class
+        4.get the fix message of commit of consistent change
+        5.evaluate the risk
+        """
+        method_function_relations = mysqlOp.tablename_dict["method_function_relations"]
+        clone_relations_function = mysqlOp.tablename_dict["clone_relations_function"]
+        commits = mysqlOp.tablename_dict["commits"]
+        blob_methods = mysqlOp.tablename_dict["blob_methods"]
+        blob_commit_relations = mysqlOp.tablename_dict["blob_commit_relations"]
+        commit_relations = mysqlOp.tablename_dict["commit_relations"]
+        # Fix: only consider the life time of clone pair(clone pair existing commits)
+        relate_commits = []
+        sql_clone_pairs = """
+            select commit_id from `{tablename}`
+            where (function_id_1 = {function_id_1} and function_id_2 = {function_id_2}) or
+            (function_id_1 = {function_id_2} and function_id_2 = {function_id_1})
+        """.format(
+            tablename=clone_relations_function,
+            function_id_1=self.function_id_1,
+            function_id_2=self.function_id_2,
+        )
+        mysqlOp.cursor.execute(sql_clone_pairs)
+        clone_pairs = mysqlOp.cursor.fetchall()
+        for clone_pair in clone_pairs:
+            relate_commits.append(clone_pair.get("commit_id"))
+        # get the related commmits of clone pair
+        candidate_commits = [
+            commit_id
+            for commit_id in range(min(relate_commits), max(relate_commits) + 1)
+        ]
+
+        start_commits = []
+        end_commits = []
+        middle_commits = []
+
+        commit_children_dict = {}  # find children by parent_id
+        commit_parents_dict = {}  # find parents by child_id
+        mysqlOp.cursor.execute(
+            "select id, parent_id from `{commit_relations}`".format(
+                commit_relations=commit_relations
+            )
+        )
+        commit_relation_results = mysqlOp.cursor.fetchall()
+        for relation in commit_relation_results:
+            commit_id = relation["id"]
+            parent_id = relation["parent_id"]
+            commit_children_dict.setdefault(parent_id, [])
+            commit_children_dict[parent_id].append(commit_id)
+            commit_parents_dict.setdefault(commit_id, [])
+            commit_parents_dict[commit_id].append(parent_id)
+
+        # if the clone pair only live in one commit, this commit belongs to both start commit and end commit
+        if len(candidate_commits) == 1:
+            start_commits.append(candidate_commits[0])
+            end_commits.append(candidate_commits[0])
+        else:
+            for commit_id in candidate_commits:
+                if commit_id not in commit_parents_dict:
+                    parent_ids = []
+                else:
+                    parent_ids = commit_parents_dict[commit_id]
+
+                if commit_id not in commit_children_dict:
+                    son_ids = []
+                else:
+                    son_ids = commit_children_dict[commit_id]
+
+                intersect_parents = list(set(parent_ids) & set(candidate_commits))
+                intersect_children = list(set(son_ids) & set(candidate_commits))
+                # if no parent in candidate_commits & at least one child in candidate_commits & there exists clone relationship in this commit -> candidate_start
+                if (
+                    len(intersect_parents) == 0
+                    and len(intersect_children) > 0
+                    and commit_id in relate_commits
+                ):
+                    start_commits.append(commit_id)
+                # if at least one parent in candidate_commits & no child in candidate_commits & there exists clone relationship in this commit -> candidate_end
+                elif (
+                    len(intersect_parents) > 0
+                    and len(intersect_children) == 0
+                    and commit_id in relate_commits
+                ):
+                    end_commits.append(commit_id)
+                # if no parent in candidate_commits & no child in candidate_commits -> ignore
+                elif len(intersect_parents) == 0 and len(intersect_children) == 0:
+                    continue
+                # if at least one parent in candidate_commits & at least one child in candidate_commits -> middle_commit
+                elif len(intersect_parents) > 0 and len(intersect_children) > 0:
+                    middle_commits.append(commit_id)
+        for start_commit in start_commits:
+            commit_parents_dict.setdefault(start_commit, [])
+        for end_commit in end_commits:
+            commit_children_dict.setdefault(end_commit, [])
+
+        # get the CpI
+        # find related method ids in commits
+        def find_related_methods(function_id: int) -> List[Tuple[int, int]]:
+            result = []
+            sql = """
+                select bm.id as method_id, bcr.commit_id
+                  from `{blob_methods}` bm, `{blob_commit_relations}` bcr
+                 where bm.blob_id=bcr.blob_id
+                   and bm.function_id=%s
+            """.format(
+                blob_methods=blob_methods, blob_commit_relations=blob_commit_relations
+            )
+            mysqlOp.cursor.execute(sql, (function_id,))
+            methods = mysqlOp.cursor.fetchall()
+            for method in methods:
+                method_id = method["method_id"]
+                commit_id = method["commit_id"]
+                result.append((method_id, commit_id))
+            return result
+
+        candidate_methods_1 = find_related_methods(function_id=self.function_id_1)
+        candidate_methods_2 = find_related_methods(function_id=self.function_id_2)
+
+        def filter_candidate_methods(
+            candidate_methods: List[Tuple[int, int]], commit_ids: List[int]
+        ) -> Tuple[List[int], Dict[int, List[int]]]:
+            """
+            return:
+                - method ids
+                - {
+                    method_id: [commit_ids] # a method can be related to multiple commits
+                }
+            """
+            method_ids = []
+            method_commit_dict = {}
+            for candidate_method in candidate_methods:
+                if candidate_method[1] in commit_ids:
+                    method_ids.append(candidate_method[0])
+                    method_commit_dict.setdefault(candidate_method[0], [])
+                    method_commit_dict[candidate_method[0]].append(candidate_method[1])
+            return list(set(method_ids)), method_commit_dict
+
+        all_methods_1, method_commit_dict_1 = filter_candidate_methods(
+            candidate_methods=candidate_methods_1,
+            commit_ids=list(
+                set(start_commits) | set(end_commits) | set(middle_commits)
+            ),
+        )
+        all_methods_2, method_commit_dict_2 = filter_candidate_methods(
+            candidate_methods=candidate_methods_2,
+            commit_ids=list(
+                set(start_commits) | set(end_commits) | set(middle_commits)
+            ),
+        )
+
+        # Find all changes during clone pair evolution
+        def get_method_change(
+            all_methods: List[int],
+        ) -> List[Tuple[int, int, bytes, bytes]]:
+            """
+            result:
+                [(
+                    method_old,
+                    method_new,
+                    add_change,
+                    delete_change
+                )]
+            """
+            result_changes = []
+            all_methods_str = [str(method_id) for method_id in all_methods]
+            method_ids = "(" + ",".join(all_methods_str) + ")"
+            sql_change = """
+                select method_id_1, method_id_2, GROUP_CONCAT(distinct `change`) as `change` from `{tablename}`
+                 where method_id_1 in {method_ids}
+                   and method_id_2 in {method_ids}
+                   and `change` is not null
+                group by method_id_1, method_id_2
+            """.format(
+                tablename=method_function_relations, method_ids=method_ids
+            )
+            mysqlOp.cursor.execute(sql_change)
+            changes = mysqlOp.cursor.fetchall()
+
+            def extract_changes(content: bytes) -> Tuple[bytes, bytes]:
+                """
+                return:
+                    - add contents
+                    - delete contents
+                """
+                add_contents = b""
+                delete_content = b""
+                lines = content.splitlines()
+                add_flag = False
+                for line in lines:
+                    if line == b"ADD:":
+                        add_flag = True
+                    elif line == b"DELETE:":
+                        add_flag = False
+                    else:
+                        if add_flag:
+                            add_contents += b"".join(line.split())
+                        else:
+                            delete_content += b"".join(line.split())
+                return (add_contents, delete_content)
+
+            for change in changes:
+                method_id_1 = change["method_id_1"]
+                method_id_2 = change["method_id_2"]
+                change = change["change"]
+                add_change, delete_change = extract_changes(content=change)
+                result_changes.append(
+                    (method_id_1, method_id_2, add_change, delete_change)
+                )
+            return result_changes
+
+        result_changes_1 = get_method_change(all_methods=all_methods_1)
+        result_changes_2 = get_method_change(all_methods=all_methods_2)
+
+        sum_changes = len(result_changes_1) + len(result_changes_2)
+
+        # get CCR
+        # Find consistent_changes in all changes
+        consistent_change_list1 = []
+        consistent_change_list2 = []
+        consistent_changes = 0
+        for change_1 in result_changes_1:
+            for change_2 in result_changes_2:
+                if change_1[2] == change_2[2] and change_1[3] == change_2[3]:
+                    consistent_change_list1.append(change_1)
+                    consistent_change_list2.append(change_2)
+                    consistent_changes = consistent_changes + 2
+
+        # get CCL
+        # Find Latency in different commits
+        def get_commit_change_by_method_change(
+            method_old: int, method_new: int, method_commit_dict: dict
+        ) -> List[Tuple[int, int]]:
+            """
+            Function: get the change of commits via the change of methods
+            return:
+                - [(
+                    commit_old,
+                    commit_new
+                )]
+            """
+            result = []
+            commits_old = method_commit_dict[method_old]
+            commits_new = method_commit_dict[method_new]
+            for commit_old in commits_old:
+                children_old = commit_children_dict[commit_old]
+                intersect_commits = set(children_old) & set(commits_new)
+                for commit_id in intersect_commits:
+                    result.append((commit_old, commit_id))
+            return result
+
+        target_commits = []
+        if consistent_changes == 0:
+            CCL = 0
+        else:
+            CCL = 0
+            for i in range(len(consistent_change_list1)):
+                change_1 = consistent_change_list1[i]
+                change_2 = consistent_change_list2[i]
+                method_old_1 = change_1[0]
+                method_new_1 = change_1[1]
+                method_old_2 = change_2[0]
+                method_new_2 = change_2[1]
+
+                commit_changes_1 = get_commit_change_by_method_change(
+                    method_old=method_old_1,
+                    method_new=method_new_1,
+                    method_commit_dict=method_commit_dict_1,
+                )
+                commit_changes_2 = get_commit_change_by_method_change(
+                    method_old=method_old_2,
+                    method_new=method_new_2,
+                    method_commit_dict=method_commit_dict_2,
+                )
+
+                consistent_change_commit_paths = list(
+                    set(commit_changes_1) | set(commit_changes_2)
+                )
+                CCL += len(consistent_change_commit_paths)
+
+                target_commits.extend(
+                    list(set([path[1] for path in consistent_change_commit_paths]))
+                )
+
+        # get bug_fix_num
+        if CCL == 0:
+            bug_fix_num = 0
+        else:
+            bug_fix_num = 0
+            for commit in target_commits:
+                sql_message = """
+                    select message from `{tablename1}` where id = {id}
+                """.format(
+                    tablename1=commits, id=commit
+                )
+                mysqlOp.cursor.execute(sql_message)
+                message = mysqlOp.cursor.fetchone()["message"].lower()
+                if (
+                    re.search(
+                        rb"(close|closes|closed|fix|fixes|fixed|resolve|resolves|resolved)\s+.*?#\d+",
+                        message,
+                    )
+                    is not None
+                ):
+                    bug_fix_num += 1
+
+        def Harmness_Evaluating(CpI: int, CCR: int, CCL: int, bug_fix_num: int) -> int:
+            """
+            Function : Evaluate the harmness of a clone
+            input:
+            - CpI: Changes per clone Instance
+            - CCR: Consistent Change Ratio
+            - CCl: Consistent Change Latency
+            - bug_fix_num: the number of bug_fix commit
+            output:
+            - risk_level:
+            - 1 -> Clone is harmless
+            - 2 -> Clone is low risky
+            - 3 -> Clone is medium risky
+            - 4 -> Clone is high risky
+            """
+            if CpI == 0 | (CpI > 0 & CCR == 0):
+                risk_level = 0
+            else:
+                if CCL == 0:
+                    risk_level = 1
+                else:
+                    if bug_fix_num == 0:
+                        risk_level = 2
+                    else:
+                        risk_level = 3
+            return risk_level
+
+        return Harmness_Evaluating(sum_changes, consistent_changes, CCL, bug_fix_num)
+
+
+# Only for test
+# repoInfos: List[RepoInfo] = FileOperator("repos").load_repos()
+# for repoInfo in repoInfos:
+#     mysqlOp: MySQLOperator = MySQLOperator(config_path="config.yml", repoInfo=repoInfo)
+#     clone_pair = RiskEvaluator(
+#         10, 9, repoInfo
+#     )
+#     print(clone_pair.evaluate(mysqlOp))
+def evaluate_all_pairs(repoInfo):
+    mysqlOp: MySQLOperator = MySQLOperator(config_path="config.yml", repoInfo=repoInfo)
+    # clone_pair = RiskEvaluator(
+    #     10, 9, repoInfo
+    # )
+    # print(clone_pair.evaluate(mysqlOp))
+    clone_relations_function = mysqlOp.tablename_dict["clone_relations_function"]
+    sql_all_clones = """
+        select function_id_1,function_id_2 from `{tablename}`
+    """.format(
+        tablename=clone_relations_function
+    )
+    mysqlOp.cursor.execute(sql_all_clones)
+    all_clone_pairs = mysqlOp.cursor.fetchall()
+    evaluate_list = []
+    for clone_pair in all_clone_pairs:
+        function_id_1 = clone_pair.get("function_id_1")
+        function_id_2 = clone_pair.get("function_id_2")
+        clone_pair = RiskEvaluator(function_id_1, function_id_2, repoInfo)
+        risklevel = clone_pair.evaluate(mysqlOp)
+        pair = {
+            "function_id_1": function_id_1,
+            "function_id_2": function_id_2,
+            "risk_level": risklevel,
+        }
+        evaluate_list.append(pair)
+    result = pd.DataFrame(evaluate_list)
+    result_of_evaluator = "{repo_id}{separator}result_of_evaluator".format(
+        repo_id=repoInfo.id, separator=GlobalConstants.SEPARATOR
+    )
+    sql_result = """
+        create table if not exists `{tablename}` (
+            `id` int(11) NOT NULL AUTO_INCREMENT,
+            `function_id_1` int(11) NULL,
+            `function_id_2` int(11) NULL,
+            `risk_level` int(11) NULL,
+            PRIMARY KEY (`id`),
+            INDEX(`function_id_1`) USING BTREE,
+            INDEX(`function_id_2`) USING BTREE,
+            INDEX(`risk_level`) USING BTREE
+        )
+    """.format(
+        tablename=result_of_evaluator
+    )
+    mysqlOp.cursor.execute(sql_result)
+    mysqlOp.truncate_table(tablename=result_of_evaluator)
+    config = mysqlOp.config["mysql"]
+    engine = create_engine(
+        "mysql+pymysql://{username}:{password}@{host}:{port}/{database}".format(
+            username=config["user"],
+            password=config["passwd"],
+            host=config["host"],
+            port=config["port"],
+            database=config["database"],
+        )
+    )
+    result.to_sql(
+        result_of_evaluator,
+        engine,
+        index=False,
+        if_exists="append",
+    )
--- a/delete_repos.example
+++ b/delete_repos.example
@ -1,5 +1,5 @@
-apache ant git@github.com:apache/ant.git
-apache dubbo git@github.com:apache/dubbo.git
-apache kafka git@github.com:apache/kafka.git
-apache maven git@github.com:apache/maven.git
-apache rocketmq git@github.com:apache/rocketmq.git
+apache ant git@github.com:apache/ant.git 11
+apache dubbo git@github.com:apache/dubbo.git 12
+apache kafka git@github.com:apache/kafka.git 13
+apache maven git@github.com:apache/maven.git 14
+apache rocketmq git@github.com:apache/rocketmq.git 15
--- a/repos.example
+++ b/repos.example
@ -1,5 +1,5 @@
-apache ant git@github.com:apache/ant.git
-apache dubbo git@github.com:apache/dubbo.git
-apache kafka git@github.com:apache/kafka.git
-apache maven git@github.com:apache/maven.git
-apache rocketmq git@github.com:apache/rocketmq.git
+apache ant git@github.com:apache/ant.git 11
+apache dubbo git@github.com:apache/dubbo.git 12
+apache kafka git@github.com:apache/kafka.git 13
+apache maven git@github.com:apache/maven.git 14
+apache rocketmq git@github.com:apache/rocketmq.git 15
--- a/sql_templates/method_function_relations.sql
+++ b/sql_templates/method_function_relations.sql
@ -2,7 +2,7 @@ CREATE TABLE IF NOT EXISTS `{tablename}`  (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `method_id_1` int(11) NULL,
  `method_id_2` int(11) NULL,
-  `change` blob NULL, # set(block1, block2) a set of change, no order, and no whitespace in each block
+  `change` longblob NULL, # set(block1, block2) a set of change, no order, and no whitespace in each block
  PRIMARY KEY (`id`),
  INDEX(`method_id_1`) USING BTREE,
  INDEX(`method_id_2`) USING BTREE