[Debugify][OriginalDIMode] Update script to handle large JSON reports

This patch updates llvm/utils/llvm-original-di-preservation.py to create more
compact HTML verify-debuginfo-preserve reports by:
- removing duplicated debug info bugs,
- introducing -compress option to create highly compressed report.
Additionally, this patch makes script able to process very large JSON inputs.
That is done by reading & analyzing JSON report in chunks.

Differential Revision: https://reviews.llvm.org/D115617
This commit is contained in:
Nikola Tesic 2022-09-28 12:34:32 +02:00 committed by Djordje Todorovic
parent b5d28f3ea5
commit 8b38a2c0a5
4 changed files with 242 additions and 97 deletions

View File

@ -0,0 +1,110 @@
<html>
<head>
<style>
table, th, td {
border: 1px solid black;
}
table.center {
margin-left: auto;
margin-right: auto;
}
</style>
</head>
<body>
<table>
<caption><b>Location Bugs found by the Debugify</b></caption>
<tr>
<th>File</th>
<th>LLVM Pass Name</th>
<th>LLVM IR Instruction</th>
<th>Function Name</th>
<th>Basic Block Name</th>
<th>Action</th>
</tr>
</tr>
<tr>
<td>test.ll</td>
<td>no-name</td>
<td>extractvalue</td>
<td>fn</td>
<td>no-name</td>
<td>not-generate</td>
</tr>
<tr>
<td>test.ll</td>
<td>no-name</td>
<td>insertvalue</td>
<td>fn</td>
<td>no-name</td>
<td>not-generate</td>
</tr>
<tr>
</table>
<br>
<table>
<caption><b>Summary of Location Bugs</b></caption>
<tr>
<th>LLVM Pass Name</th>
<th>Number of bugs</th>
</tr>
<tr>
<td>no-name</td>
<td>8</td>
</tr>
<tr>
</table>
<br>
<br>
<table>
<caption><b>SP Bugs found by the Debugify</b></caption>
<tr>
<th>File</th>
<th>LLVM Pass Name</th>
<th>Function Name</th>
<th>Action</th>
</tr>
<tr>
<td colspan='4'> No bugs found </td>
</tr>
</table>
<br>
<table>
<caption><b>Summary of SP Bugs</b></caption>
<tr>
<th>LLVM Pass Name</th>
<th>Number of bugs</th>
</tr>
<tr>
<tr>
<td colspan='2'> No bugs found </td>
</tr>
</table>
<br>
<br>
<table>
<caption><b>Variable Location Bugs found by the Debugify</b></caption>
<tr>
<th>File</th>
<th>LLVM Pass Name</th>
<th>Variable</th>
<th>Function</th>
<th>Action</th>
</tr>
<tr>
<td colspan='4'> No bugs found </td>
</tr>
</table>
<br>
<table>
<caption><b>Summary of Variable Location Bugs</b></caption>
<tr>
<th>LLVM Pass Name</th>
<th>Number of bugs</th>
</tr>
<tr>
<tr>
<td colspan='2'> No bugs found </td>
</tr>
</table>
</body>
</html>

View File

@ -41,14 +41,6 @@
<tr>
<td>test.ll</td>
<td>no-name</td>
<td>extractvalue</td>
<td>fn</td>
<td>no-name</td>
<td>not-generate</td>
</tr>
<tr>
<td>test.ll</td>
<td>no-name</td>
<td>insertvalue</td>
<td>fn1</td>
<td>no-name</td>
@ -57,30 +49,6 @@
<tr>
<td>test.ll</td>
<td>no-name</td>
<td>insertvalue</td>
<td>fn1</td>
<td>no-name</td>
<td>not-generate</td>
</tr>
<tr>
<td>test.ll</td>
<td>no-name</td>
<td>insertvalue</td>
<td>fn</td>
<td>no-name</td>
<td>not-generate</td>
</tr>
<tr>
<td>test.ll</td>
<td>no-name</td>
<td>extractvalue</td>
<td>fn1</td>
<td>no-name</td>
<td>not-generate</td>
</tr>
<tr>
<td>test.ll</td>
<td>no-name</td>
<td>extractvalue</td>
<td>fn1</td>
<td>no-name</td>

View File

@ -6,3 +6,8 @@ RUN: %llvm-original-di-preservation %p/Inputs/corrupted.json %t2.html | FileChec
RUN: diff -w %p/Inputs/expected-skipped.html %t2.html
CORRUPTED: Skipped lines: 3
CORRUPTED: Skipped bugs: 1
RUN: %llvm-original-di-preservation -compress %p/Inputs/sample.json %t3.html | FileCheck %s -check-prefix=COMPRESSED
RUN: diff -w %p/Inputs/expected-compressed.html %t3.html
COMPRESSED-NOT: Skipped lines:

View File

@ -17,17 +17,23 @@ class DILocBug:
self.bb_name = bb_name
self.fn_name = fn_name
self.instr = instr
def __str__(self):
return self.action + self.bb_name + self.fn_name + self.instr
class DISPBug:
def __init__(self, action, fn_name):
self.action = action
self.fn_name = fn_name
def __str__(self):
return self.action + self.fn_name
class DIVarBug:
def __init__(self, action, name, fn_name):
self.action = action
self.name = name
self.fn_name = fn_name
def __str__(self):
return self.action + self.name + self.fn_name
# Report the bugs in form of html.
def generate_html_report(di_location_bugs, di_subprogram_bugs, di_var_bugs, \
@ -326,11 +332,12 @@ def generate_html_report(di_location_bugs, di_subprogram_bugs, di_var_bugs, \
print("The " + html_file + " generated.")
# Read the JSON file.
def get_json(file):
# Read the JSON file in chunks.
def get_json_chunk(file,start,size):
json_parsed = None
di_checker_data = []
skipped_lines = 0
line = 0
# The file contains json object per line.
# An example of the line (formatted json):
@ -354,6 +361,11 @@ def get_json(file):
#}
with open(file) as json_objects_file:
for json_object_line in json_objects_file:
line += 1
if line < start:
continue
if line >= start+size:
break
try:
json_object = loads(json_object_line)
except:
@ -361,12 +373,13 @@ def get_json(file):
else:
di_checker_data.append(json_object)
return (di_checker_data, skipped_lines)
return (di_checker_data, skipped_lines, line)
# Parse the program arguments.
def parse_program_args(parser):
parser.add_argument("file_name", type=str, help="json file to process")
parser.add_argument("html_file", type=str, help="html file to output data")
parser.add_argument("-compress", action="store_true", help="create reduced html report")
return parser.parse_args()
@ -378,8 +391,6 @@ def Main():
print ("error: The output file must be '.html'.")
sys.exit(1)
(debug_info_bugs, skipped_lines) = get_json(opts.file_name)
# Use the defaultdict in order to make multidim dicts.
di_location_bugs = defaultdict(lambda: defaultdict(dict))
di_subprogram_bugs = defaultdict(lambda: defaultdict(dict))
@ -390,7 +401,27 @@ def Main():
di_sp_bugs_summary = OrderedDict()
di_var_bugs_summary = OrderedDict()
# Compress similar bugs.
# DILocBugs with same pass & instruction name.
di_loc_pass_instr_set = set()
# DISPBugs with same pass & function name.
di_sp_pass_fn_set = set()
# DIVarBugs with same pass & variable name.
di_var_pass_var_set = set()
start_line = 0
chunk_size = 1000000
end_line = chunk_size - 1
skipped_lines = 0
skipped_bugs = 0
# Process each chunk of 1 million JSON lines.
while True:
if start_line > end_line:
break
(debug_info_bugs, skipped, end_line) = get_json_chunk(opts.file_name,start_line,chunk_size)
start_line += chunk_size
skipped_lines += skipped
# Map the bugs into the file-pass pairs.
for bugs_per_pass in debug_info_bugs:
try:
@ -405,6 +436,10 @@ def Main():
di_sp_bugs = []
di_var_bugs = []
# Omit duplicated bugs.
di_loc_set = set()
di_sp_set = set()
di_var_set = set()
for bug in bugs:
try:
bugs_metadata = bug["metadata"]
@ -421,7 +456,16 @@ def Main():
except:
skipped_bugs += 1
continue
di_loc_bugs.append(DILocBug(action, bb_name, fn_name, instr))
di_loc_bug = DILocBug(action, bb_name, fn_name, instr)
if not str(di_loc_bug) in di_loc_set:
di_loc_set.add(str(di_loc_bug))
if opts.compress:
pass_instr = bugs_pass + instr
if not pass_instr in di_loc_pass_instr_set:
di_loc_pass_instr_set.add(pass_instr)
di_loc_bugs.append(di_loc_bug)
else:
di_loc_bugs.append(di_loc_bug)
# Fill the summary dict.
if bugs_pass in di_location_bugs_summary:
@ -435,7 +479,16 @@ def Main():
except:
skipped_bugs += 1
continue
di_sp_bugs.append(DISPBug(action, name))
di_sp_bug = DISPBug(action, name)
if not str(di_sp_bug) in di_sp_set:
di_sp_set.add(str(di_sp_bug))
if opts.compress:
pass_fn = bugs_pass + name
if not pass_fn in di_sp_pass_fn_set:
di_sp_pass_fn_set.add(pass_fn)
di_sp_bugs.append(di_sp_bug)
else:
di_sp_bugs.append(di_sp_bug)
# Fill the summary dict.
if bugs_pass in di_sp_bugs_summary:
@ -450,7 +503,16 @@ def Main():
except:
skipped_bugs += 1
continue
di_var_bugs.append(DIVarBug(action, name, fn_name))
di_var_bug = DIVarBug(action, name, fn_name)
if not str(di_var_bug) in di_var_set:
di_var_set.add(str(di_var_bug))
if opts.compress:
pass_var = bugs_pass + name
if not pass_var in di_var_pass_var_set:
di_var_pass_var_set.add(pass_var)
di_var_bugs.append(di_var_bug)
else:
di_var_bugs.append(di_var_bug)
# Fill the summary dict.
if bugs_pass in di_var_bugs_summary: