[lit] Clean up internal diff's encoding handling

As suggested by rnk at D67643#1673043, instead of reading files
multiple times until an appropriate encoding is found, read them once
as binary, and then try to decode what was read.

For Python >= 3.5, don't fail when attempting to decode the
`diff_bytes` output in order to print it.

Avoid failures for Python 2.7 used on some Windows bots by
transforming diff output with `lit.util.to_string` before writing it
to stdout.

Finally, add some tests for encoding handling.

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D68664

llvm-svn: 375018
This commit is contained in:
Joel E. Denny 2019-10-16 17:21:24 +00:00
parent f89cf21337
commit f095b8c425
7 changed files with 88 additions and 35 deletions

View File

@ -5,6 +5,7 @@ import functools
import io import io
import itertools import itertools
import getopt import getopt
import locale
import os, signal, subprocess, sys import os, signal, subprocess, sys
import re import re
import stat import stat
@ -415,32 +416,21 @@ def executeBuiltinDiff(cmd, cmd_shenv):
return path, sorted(child_trees) return path, sorted(child_trees)
def compareTwoFiles(filepaths): def compareTwoFiles(filepaths):
compare_bytes = False
encoding = None
filelines = [] filelines = []
for file in filepaths: for file in filepaths:
with open(file, 'rb') as file_bin:
filelines.append(file_bin.readlines())
try:
return compareTwoTextFiles(filepaths, filelines,
locale.getpreferredencoding(False))
except UnicodeDecodeError:
try: try:
with open(file, 'r') as f: return compareTwoTextFiles(filepaths, filelines, "utf-8")
filelines.append(f.readlines()) except:
except UnicodeDecodeError: return compareTwoBinaryFiles(filepaths, filelines)
try:
with io.open(file, 'r', encoding="utf-8") as f:
filelines.append(f.readlines())
encoding = "utf-8"
except:
compare_bytes = True
if compare_bytes:
return compareTwoBinaryFiles(filepaths)
else:
return compareTwoTextFiles(filepaths, encoding)
def compareTwoBinaryFiles(filepaths):
filelines = []
for file in filepaths:
with open(file, 'rb') as f:
filelines.append(f.readlines())
def compareTwoBinaryFiles(filepaths, filelines):
exitCode = 0 exitCode = 0
if hasattr(difflib, 'diff_bytes'): if hasattr(difflib, 'diff_bytes'):
# python 3.5 or newer # python 3.5 or newer
@ -448,7 +438,7 @@ def executeBuiltinDiff(cmd, cmd_shenv):
filelines[1], filepaths[0].encode(), filelines[1], filepaths[0].encode(),
filepaths[1].encode(), filepaths[1].encode(),
n = num_context_lines) n = num_context_lines)
diffs = [diff.decode() for diff in diffs] diffs = [diff.decode(errors="backslashreplace") for diff in diffs]
else: else:
# python 2.7 # python 2.7
func = difflib.unified_diff if unified_diff else difflib.context_diff func = difflib.unified_diff if unified_diff else difflib.context_diff
@ -456,19 +446,18 @@ def executeBuiltinDiff(cmd, cmd_shenv):
n = num_context_lines) n = num_context_lines)
for diff in diffs: for diff in diffs:
stdout.write(diff) stdout.write(to_string(diff))
exitCode = 1 exitCode = 1
return exitCode return exitCode
def compareTwoTextFiles(filepaths, encoding): def compareTwoTextFiles(filepaths, filelines_bin, encoding):
filelines = [] filelines = []
for file in filepaths: for lines_bin in filelines_bin:
if encoding is None: lines = []
with open(file, 'r') as f: for line_bin in lines_bin:
filelines.append(f.readlines()) line = line_bin.decode(encoding=encoding)
else: lines.append(line)
with io.open(file, 'r', encoding=encoding) as f: filelines.append(lines)
filelines.append(f.readlines())
exitCode = 0 exitCode = 0
def compose2(f, g): def compose2(f, g):
@ -488,7 +477,7 @@ def executeBuiltinDiff(cmd, cmd_shenv):
func = difflib.unified_diff if unified_diff else difflib.context_diff func = difflib.unified_diff if unified_diff else difflib.context_diff
for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1], for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1],
n = num_context_lines): n = num_context_lines):
stdout.write(diff) stdout.write(to_string(diff))
exitCode = 1 exitCode = 1
return exitCode return exitCode

View File

@ -0,0 +1,9 @@
# Check that diff falls back to binary mode if it cannot decode a file.
# RUN: diff -u diff-in.bin diff-in.bin
# RUN: diff -u diff-in.utf16 diff-in.bin && false || true
# RUN: diff -u diff-in.utf8 diff-in.bin && false || true
# RUN: diff -u diff-in.bin diff-in.utf8 && false || true
# Fail so lit will print output.
# RUN: false

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,3 @@
foo
bar
baz

View File

@ -8,7 +8,7 @@
# #
# END. # END.
# CHECK: Failing Tests (30) # CHECK: Failing Tests (31)
# CHECK: Failing Tests (1) # CHECK: Failing Tests (1)
# CHECK: Failing Tests (2) # CHECK: Failing Tests (2)
# CHECK: error: argument --max-failures: requires positive integer, but found '0' # CHECK: error: argument --max-failures: requires positive integer, but found '0'

View File

@ -34,6 +34,58 @@
# CHECK: error: command failed with exit status: 127 # CHECK: error: command failed with exit status: 127
# CHECK: *** # CHECK: ***
# CHECK: FAIL: shtest-shell :: diff-encodings.txt
# CHECK: *** TEST 'shtest-shell :: diff-encodings.txt' FAILED ***
# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.bin"
# CHECK-NOT: error
# CHECK: $ "diff" "-u" "diff-in.utf16" "diff-in.bin"
# CHECK: # command output:
# CHECK-NEXT: ---
# CHECK-NEXT: +++
# CHECK-NEXT: @@
# CHECK-NEXT: {{^ .f.o.o.$}}
# CHECK-NEXT: {{^-.b.a.r.$}}
# CHECK-NEXT: {{^\+.b.a.r..}}
# CHECK-NEXT: {{^ .b.a.z.$}}
# CHECK: error: command failed with exit status: 1
# CHECK: $ "true"
# CHECK: $ "diff" "-u" "diff-in.utf8" "diff-in.bin"
# CHECK: # command output:
# CHECK-NEXT: ---
# CHECK-NEXT: +++
# CHECK-NEXT: @@
# CHECK-NEXT: -foo
# CHECK-NEXT: -bar
# CHECK-NEXT: -baz
# CHECK-NEXT: {{^\+.f.o.o.$}}
# CHECK-NEXT: {{^\+.b.a.r..}}
# CHECK-NEXT: {{^\+.b.a.z.$}}
# CHECK: error: command failed with exit status: 1
# CHECK: $ "true"
# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.utf8"
# CHECK: # command output:
# CHECK-NEXT: ---
# CHECK-NEXT: +++
# CHECK-NEXT: @@
# CHECK-NEXT: {{^\-.f.o.o.$}}
# CHECK-NEXT: {{^\-.b.a.r..}}
# CHECK-NEXT: {{^\-.b.a.z.$}}
# CHECK-NEXT: +foo
# CHECK-NEXT: +bar
# CHECK-NEXT: +baz
# CHECK: error: command failed with exit status: 1
# CHECK: $ "true"
# CHECK: $ "false"
# CHECK: ***
# CHECK: FAIL: shtest-shell :: diff-error-0.txt # CHECK: FAIL: shtest-shell :: diff-error-0.txt
# CHECK: *** TEST 'shtest-shell :: diff-error-0.txt' FAILED *** # CHECK: *** TEST 'shtest-shell :: diff-error-0.txt' FAILED ***
# CHECK: $ "diff" "diff-error-0.txt" "diff-error-0.txt" # CHECK: $ "diff" "diff-error-0.txt" "diff-error-0.txt"
@ -308,4 +360,4 @@
# CHECK: PASS: shtest-shell :: sequencing-0.txt # CHECK: PASS: shtest-shell :: sequencing-0.txt
# CHECK: XFAIL: shtest-shell :: sequencing-1.txt # CHECK: XFAIL: shtest-shell :: sequencing-1.txt
# CHECK: PASS: shtest-shell :: valid-shell.txt # CHECK: PASS: shtest-shell :: valid-shell.txt
# CHECK: Failing Tests (30) # CHECK: Failing Tests (31)