Reland r374389: [lit] Clean up internal diff's encoding handling

To avoid breaking some tests, D66574, D68664, D67643, and D68668
landed together.  However, D68664 introduced an issue now addressed by
D68839, with which these are now all relanding.

Differential Revision: https://reviews.llvm.org/D68664

llvm-svn: 374649
This commit is contained in:
Joel E. Denny 2019-10-12 11:57:20 +00:00
parent daf42dc36d
commit e4f11a3192
7 changed files with 86 additions and 33 deletions

View File

@ -1,6 +1,7 @@
import difflib
import functools
import getopt
import locale
import os
import sys
@ -24,37 +25,26 @@ def getDirTree(path, basedir=""):
return path, sorted(child_trees)
def compareTwoFiles(flags, filepaths):
compare_bytes = False
encoding = None
filelines = []
for file in filepaths:
with open(file, 'rb') as file_bin:
filelines.append(file_bin.readlines())
try:
return compareTwoTextFiles(flags, filepaths, filelines,
locale.getpreferredencoding(False))
except UnicodeDecodeError:
try:
with open(file, 'r') as f:
filelines.append(f.readlines())
except UnicodeDecodeError:
try:
with io.open(file, 'r', encoding="utf-8") as f:
filelines.append(f.readlines())
encoding = "utf-8"
except:
compare_bytes = True
if compare_bytes:
return compareTwoBinaryFiles(flags, filepaths)
else:
return compareTwoTextFiles(flags, filepaths, encoding)
def compareTwoBinaryFiles(flags, filepaths):
filelines = []
for file in filepaths:
with open(file, 'rb') as f:
filelines.append(f.readlines())
return compareTwoTextFiles(flags, filepaths, filelines, "utf-8")
except:
return compareTwoBinaryFiles(flags, filepaths, filelines)
def compareTwoBinaryFiles(flags, filepaths, filelines):
exitCode = 0
if hasattr(difflib, 'diff_bytes'):
# python 3.5 or newer
diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode())
diffs = [diff.decode() for diff in diffs]
diffs = [diff.decode(errors="backslashreplace") for diff in diffs]
else:
# python 2.7
if flags.unified_diff:
@ -68,15 +58,14 @@ def compareTwoBinaryFiles(flags, filepaths):
exitCode = 1
return exitCode
def compareTwoTextFiles(flags, filepaths, encoding):
def compareTwoTextFiles(flags, filepaths, filelines_bin, encoding):
filelines = []
for file in filepaths:
if encoding is None:
with open(file, 'r') as f:
filelines.append(f.readlines())
else:
with io.open(file, 'r', encoding=encoding) as f:
filelines.append(f.readlines())
for lines_bin in filelines_bin:
lines = []
for line_bin in lines_bin:
line = line_bin.decode(encoding=encoding)
lines.append(line)
filelines.append(lines)
exitCode = 0
def compose2(f, g):

View File

@ -0,0 +1,9 @@
# Check that diff falls back to binary mode if it cannot decode a file.
# RUN: diff -u diff-in.bin diff-in.bin
# RUN: diff -u diff-in.utf16 diff-in.bin && false || true
# RUN: diff -u diff-in.utf8 diff-in.bin && false || true
# RUN: diff -u diff-in.bin diff-in.utf8 && false || true
# Fail so lit will print output.
# RUN: false

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,3 @@
foo
bar
baz

View File

@ -8,7 +8,7 @@
#
# END.
# CHECK: Failing Tests (27)
# CHECK: Failing Tests (28)
# CHECK: Failing Tests (1)
# CHECK: Failing Tests (2)
# CHECK: error: argument --max-failures: requires positive integer, but found '0'

View File

@ -34,6 +34,58 @@
# CHECK: error: command failed with exit status: 127
# CHECK: ***
# CHECK: FAIL: shtest-shell :: diff-encodings.txt
# CHECK: *** TEST 'shtest-shell :: diff-encodings.txt' FAILED ***
# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.bin"
# CHECK-NOT: error
# CHECK: $ "diff" "-u" "diff-in.utf16" "diff-in.bin"
# CHECK: # command output:
# CHECK-NEXT: ---
# CHECK-NEXT: +++
# CHECK-NEXT: @@
# CHECK-NEXT: {{^ .f.o.o.$}}
# CHECK-NEXT: {{^-.b.a.r.$}}
# CHECK-NEXT: {{^\+.b.a.r..}}
# CHECK-NEXT: {{^ .b.a.z.$}}
# CHECK: error: command failed with exit status: 1
# CHECK: $ "true"
# CHECK: $ "diff" "-u" "diff-in.utf8" "diff-in.bin"
# CHECK: # command output:
# CHECK-NEXT: ---
# CHECK-NEXT: +++
# CHECK-NEXT: @@
# CHECK-NEXT: -foo
# CHECK-NEXT: -bar
# CHECK-NEXT: -baz
# CHECK-NEXT: {{^\+.f.o.o.$}}
# CHECK-NEXT: {{^\+.b.a.r..}}
# CHECK-NEXT: {{^\+.b.a.z.$}}
# CHECK: error: command failed with exit status: 1
# CHECK: $ "true"
# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.utf8"
# CHECK: # command output:
# CHECK-NEXT: ---
# CHECK-NEXT: +++
# CHECK-NEXT: @@
# CHECK-NEXT: {{^\-.f.o.o.$}}
# CHECK-NEXT: {{^\-.b.a.r..}}
# CHECK-NEXT: {{^\-.b.a.z.$}}
# CHECK-NEXT: +foo
# CHECK-NEXT: +bar
# CHECK-NEXT: +baz
# CHECK: error: command failed with exit status: 1
# CHECK: $ "true"
# CHECK: $ "false"
# CHECK: ***
# CHECK: FAIL: shtest-shell :: diff-error-1.txt
# CHECK: *** TEST 'shtest-shell :: diff-error-1.txt' FAILED ***
# CHECK: $ "diff" "-B" "temp1.txt" "temp2.txt"
@ -245,4 +297,4 @@
# CHECK: PASS: shtest-shell :: sequencing-0.txt
# CHECK: XFAIL: shtest-shell :: sequencing-1.txt
# CHECK: PASS: shtest-shell :: valid-shell.txt
# CHECK: Failing Tests (27)
# CHECK: Failing Tests (28)