[lit] Fix problem in how Python versions open files with different encodings

Summary:
This issue was found when running the clang unit test on Windows. Python 3.x cannot open some of the files that the tests are using with a simple open because of their encoding. Python 2.7+ and Python 3.x both support io.open which allows for an encoding to be specified. 

This change will determine whether two files being compared should be opened (and then compared) as text or binary and whether to use utf-8 or the default encoding before proceeding with a line-by-line comparison.

Patch by Stella Stamenova!

Reviewers: zturner, llvm-commits, rnk, MaggieYi

Reviewed By: zturner

Subscribers: asmith, MatzeB, stella.stamenova, delcypher, llvm-commits

Differential Revision: https://reviews.llvm.org/D43165

llvm-svn: 329012
This commit is contained in:
Aaron Smith 2018-04-02 20:57:06 +00:00
parent cbb0450540
commit 321c2487d7
1 changed files with 44 additions and 1 deletions

View File

@ -2,6 +2,7 @@ from __future__ import absolute_import
import difflib
import errno
import functools
import io
import itertools
import getopt
import os, signal, subprocess, sys
@ -386,9 +387,51 @@ def executeBuiltinDiff(cmd, cmd_shenv):
return path, sorted(child_trees)
def compareTwoFiles(filepaths):
compare_bytes = False
encoding = None
filelines = []
for file in filepaths:
with open(file, 'r') as f:
try:
with open(file, 'r') as f:
filelines.append(f.readlines())
except UnicodeDecodeError:
try:
with open(file, 'r', encoding="utf-8") as f:
filelines.append(f.readlines())
encoding = "utf-8"
except:
compare_bytes = True
if compare_bytes:
return compareTwoBinaryFiles(filepaths)
else:
return compareTwoTextFiles(filepaths, encoding)
def compareTwoBinaryFiles(filepaths):
filelines = []
for file in filepaths:
with open(file, 'rb') as f:
filelines.append(f.readlines())
exitCode = 0
if hasattr(difflib, 'diff_bytes'):
# python 3.5 or newer
diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode())
diffs = [diff.decode() for diff in diffs]
else:
# python 2.7
func = difflib.unified_diff if unified_diff else difflib.context_diff
diffs = func(filelines[0], filelines[1], filepaths[0], filepaths[1])
for diff in diffs:
stdout.write(diff)
exitCode = 1
return exitCode
def compareTwoTextFiles(filepaths, encoding):
filelines = []
for file in filepaths:
with open(file, 'r', encoding=encoding) as f:
filelines.append(f.readlines())
exitCode = 0