forked from OSchip/llvm-project
[lit] Clean up internal diff's encoding handling
As suggested by rnk at D67643#1673043, instead of reading files multiple times until an appropriate encoding is found, read them once as binary, and then try to decode what was read. For Python >= 3.5, don't fail when attempting to decode the `diff_bytes` output in order to print it. Avoid failures for Python 2.7 used on some Windows bots by transforming diff output with `lit.util.to_string` before writing it to stdout. Finally, add some tests for encoding handling. Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D68664 llvm-svn: 375018
This commit is contained in:
parent
f89cf21337
commit
f095b8c425
|
@ -5,6 +5,7 @@ import functools
|
||||||
import io
|
import io
|
||||||
import itertools
|
import itertools
|
||||||
import getopt
|
import getopt
|
||||||
|
import locale
|
||||||
import os, signal, subprocess, sys
|
import os, signal, subprocess, sys
|
||||||
import re
|
import re
|
||||||
import stat
|
import stat
|
||||||
|
@ -415,32 +416,21 @@ def executeBuiltinDiff(cmd, cmd_shenv):
|
||||||
return path, sorted(child_trees)
|
return path, sorted(child_trees)
|
||||||
|
|
||||||
def compareTwoFiles(filepaths):
|
def compareTwoFiles(filepaths):
|
||||||
compare_bytes = False
|
|
||||||
encoding = None
|
|
||||||
filelines = []
|
filelines = []
|
||||||
for file in filepaths:
|
for file in filepaths:
|
||||||
|
with open(file, 'rb') as file_bin:
|
||||||
|
filelines.append(file_bin.readlines())
|
||||||
|
|
||||||
|
try:
|
||||||
|
return compareTwoTextFiles(filepaths, filelines,
|
||||||
|
locale.getpreferredencoding(False))
|
||||||
|
except UnicodeDecodeError:
|
||||||
try:
|
try:
|
||||||
with open(file, 'r') as f:
|
return compareTwoTextFiles(filepaths, filelines, "utf-8")
|
||||||
filelines.append(f.readlines())
|
except:
|
||||||
except UnicodeDecodeError:
|
return compareTwoBinaryFiles(filepaths, filelines)
|
||||||
try:
|
|
||||||
with io.open(file, 'r', encoding="utf-8") as f:
|
|
||||||
filelines.append(f.readlines())
|
|
||||||
encoding = "utf-8"
|
|
||||||
except:
|
|
||||||
compare_bytes = True
|
|
||||||
|
|
||||||
if compare_bytes:
|
|
||||||
return compareTwoBinaryFiles(filepaths)
|
|
||||||
else:
|
|
||||||
return compareTwoTextFiles(filepaths, encoding)
|
|
||||||
|
|
||||||
def compareTwoBinaryFiles(filepaths):
|
|
||||||
filelines = []
|
|
||||||
for file in filepaths:
|
|
||||||
with open(file, 'rb') as f:
|
|
||||||
filelines.append(f.readlines())
|
|
||||||
|
|
||||||
|
def compareTwoBinaryFiles(filepaths, filelines):
|
||||||
exitCode = 0
|
exitCode = 0
|
||||||
if hasattr(difflib, 'diff_bytes'):
|
if hasattr(difflib, 'diff_bytes'):
|
||||||
# python 3.5 or newer
|
# python 3.5 or newer
|
||||||
|
@ -448,7 +438,7 @@ def executeBuiltinDiff(cmd, cmd_shenv):
|
||||||
filelines[1], filepaths[0].encode(),
|
filelines[1], filepaths[0].encode(),
|
||||||
filepaths[1].encode(),
|
filepaths[1].encode(),
|
||||||
n = num_context_lines)
|
n = num_context_lines)
|
||||||
diffs = [diff.decode() for diff in diffs]
|
diffs = [diff.decode(errors="backslashreplace") for diff in diffs]
|
||||||
else:
|
else:
|
||||||
# python 2.7
|
# python 2.7
|
||||||
func = difflib.unified_diff if unified_diff else difflib.context_diff
|
func = difflib.unified_diff if unified_diff else difflib.context_diff
|
||||||
|
@ -456,19 +446,18 @@ def executeBuiltinDiff(cmd, cmd_shenv):
|
||||||
n = num_context_lines)
|
n = num_context_lines)
|
||||||
|
|
||||||
for diff in diffs:
|
for diff in diffs:
|
||||||
stdout.write(diff)
|
stdout.write(to_string(diff))
|
||||||
exitCode = 1
|
exitCode = 1
|
||||||
return exitCode
|
return exitCode
|
||||||
|
|
||||||
def compareTwoTextFiles(filepaths, encoding):
|
def compareTwoTextFiles(filepaths, filelines_bin, encoding):
|
||||||
filelines = []
|
filelines = []
|
||||||
for file in filepaths:
|
for lines_bin in filelines_bin:
|
||||||
if encoding is None:
|
lines = []
|
||||||
with open(file, 'r') as f:
|
for line_bin in lines_bin:
|
||||||
filelines.append(f.readlines())
|
line = line_bin.decode(encoding=encoding)
|
||||||
else:
|
lines.append(line)
|
||||||
with io.open(file, 'r', encoding=encoding) as f:
|
filelines.append(lines)
|
||||||
filelines.append(f.readlines())
|
|
||||||
|
|
||||||
exitCode = 0
|
exitCode = 0
|
||||||
def compose2(f, g):
|
def compose2(f, g):
|
||||||
|
@ -488,7 +477,7 @@ def executeBuiltinDiff(cmd, cmd_shenv):
|
||||||
func = difflib.unified_diff if unified_diff else difflib.context_diff
|
func = difflib.unified_diff if unified_diff else difflib.context_diff
|
||||||
for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1],
|
for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1],
|
||||||
n = num_context_lines):
|
n = num_context_lines):
|
||||||
stdout.write(diff)
|
stdout.write(to_string(diff))
|
||||||
exitCode = 1
|
exitCode = 1
|
||||||
return exitCode
|
return exitCode
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
# Check that diff falls back to binary mode if it cannot decode a file.
|
||||||
|
|
||||||
|
# RUN: diff -u diff-in.bin diff-in.bin
|
||||||
|
# RUN: diff -u diff-in.utf16 diff-in.bin && false || true
|
||||||
|
# RUN: diff -u diff-in.utf8 diff-in.bin && false || true
|
||||||
|
# RUN: diff -u diff-in.bin diff-in.utf8 && false || true
|
||||||
|
|
||||||
|
# Fail so lit will print output.
|
||||||
|
# RUN: false
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,3 @@
|
||||||
|
foo
|
||||||
|
bar
|
||||||
|
baz
|
|
@ -8,7 +8,7 @@
|
||||||
#
|
#
|
||||||
# END.
|
# END.
|
||||||
|
|
||||||
# CHECK: Failing Tests (30)
|
# CHECK: Failing Tests (31)
|
||||||
# CHECK: Failing Tests (1)
|
# CHECK: Failing Tests (1)
|
||||||
# CHECK: Failing Tests (2)
|
# CHECK: Failing Tests (2)
|
||||||
# CHECK: error: argument --max-failures: requires positive integer, but found '0'
|
# CHECK: error: argument --max-failures: requires positive integer, but found '0'
|
||||||
|
|
|
@ -34,6 +34,58 @@
|
||||||
# CHECK: error: command failed with exit status: 127
|
# CHECK: error: command failed with exit status: 127
|
||||||
# CHECK: ***
|
# CHECK: ***
|
||||||
|
|
||||||
|
|
||||||
|
# CHECK: FAIL: shtest-shell :: diff-encodings.txt
|
||||||
|
# CHECK: *** TEST 'shtest-shell :: diff-encodings.txt' FAILED ***
|
||||||
|
|
||||||
|
# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.bin"
|
||||||
|
# CHECK-NOT: error
|
||||||
|
|
||||||
|
# CHECK: $ "diff" "-u" "diff-in.utf16" "diff-in.bin"
|
||||||
|
# CHECK: # command output:
|
||||||
|
# CHECK-NEXT: ---
|
||||||
|
# CHECK-NEXT: +++
|
||||||
|
# CHECK-NEXT: @@
|
||||||
|
# CHECK-NEXT: {{^ .f.o.o.$}}
|
||||||
|
# CHECK-NEXT: {{^-.b.a.r.$}}
|
||||||
|
# CHECK-NEXT: {{^\+.b.a.r..}}
|
||||||
|
# CHECK-NEXT: {{^ .b.a.z.$}}
|
||||||
|
# CHECK: error: command failed with exit status: 1
|
||||||
|
# CHECK: $ "true"
|
||||||
|
|
||||||
|
# CHECK: $ "diff" "-u" "diff-in.utf8" "diff-in.bin"
|
||||||
|
# CHECK: # command output:
|
||||||
|
# CHECK-NEXT: ---
|
||||||
|
# CHECK-NEXT: +++
|
||||||
|
# CHECK-NEXT: @@
|
||||||
|
# CHECK-NEXT: -foo
|
||||||
|
# CHECK-NEXT: -bar
|
||||||
|
# CHECK-NEXT: -baz
|
||||||
|
# CHECK-NEXT: {{^\+.f.o.o.$}}
|
||||||
|
# CHECK-NEXT: {{^\+.b.a.r..}}
|
||||||
|
# CHECK-NEXT: {{^\+.b.a.z.$}}
|
||||||
|
# CHECK: error: command failed with exit status: 1
|
||||||
|
# CHECK: $ "true"
|
||||||
|
|
||||||
|
# CHECK: $ "diff" "-u" "diff-in.bin" "diff-in.utf8"
|
||||||
|
# CHECK: # command output:
|
||||||
|
# CHECK-NEXT: ---
|
||||||
|
# CHECK-NEXT: +++
|
||||||
|
# CHECK-NEXT: @@
|
||||||
|
# CHECK-NEXT: {{^\-.f.o.o.$}}
|
||||||
|
# CHECK-NEXT: {{^\-.b.a.r..}}
|
||||||
|
# CHECK-NEXT: {{^\-.b.a.z.$}}
|
||||||
|
# CHECK-NEXT: +foo
|
||||||
|
# CHECK-NEXT: +bar
|
||||||
|
# CHECK-NEXT: +baz
|
||||||
|
# CHECK: error: command failed with exit status: 1
|
||||||
|
# CHECK: $ "true"
|
||||||
|
|
||||||
|
# CHECK: $ "false"
|
||||||
|
|
||||||
|
# CHECK: ***
|
||||||
|
|
||||||
|
|
||||||
# CHECK: FAIL: shtest-shell :: diff-error-0.txt
|
# CHECK: FAIL: shtest-shell :: diff-error-0.txt
|
||||||
# CHECK: *** TEST 'shtest-shell :: diff-error-0.txt' FAILED ***
|
# CHECK: *** TEST 'shtest-shell :: diff-error-0.txt' FAILED ***
|
||||||
# CHECK: $ "diff" "diff-error-0.txt" "diff-error-0.txt"
|
# CHECK: $ "diff" "diff-error-0.txt" "diff-error-0.txt"
|
||||||
|
@ -308,4 +360,4 @@
|
||||||
# CHECK: PASS: shtest-shell :: sequencing-0.txt
|
# CHECK: PASS: shtest-shell :: sequencing-0.txt
|
||||||
# CHECK: XFAIL: shtest-shell :: sequencing-1.txt
|
# CHECK: XFAIL: shtest-shell :: sequencing-1.txt
|
||||||
# CHECK: PASS: shtest-shell :: valid-shell.txt
|
# CHECK: PASS: shtest-shell :: valid-shell.txt
|
||||||
# CHECK: Failing Tests (30)
|
# CHECK: Failing Tests (31)
|
||||||
|
|
Loading…
Reference in New Issue