From d68bed0fa94e133f79c14248621f8c5544f7bdd9 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 3 Nov 2020 10:32:38 +0000 Subject: [PATCH] [SCCP] Handle bitcast of vector constants. Vectors where all elements have the same known constant range are treated as a single constant range in the lattice. When bitcasting such vectors, there is a mis-match between the width of the lattice value (single constant range) and the original operands (vector). Go to overdefined in that case. Fixes PR47991. --- llvm/lib/Transforms/Scalar/SCCP.cpp | 10 ++++ llvm/test/Transforms/SCCP/vector-bitcast.ll | 55 ++++++++++++++++++++- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index b3732c11babe..24b4da64703f 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -843,6 +843,16 @@ void SCCPSolver::visitCastInst(CastInst &I) { auto &LV = getValueState(&I); ConstantRange OpRange = OpSt.getConstantRange(); Type *DestTy = I.getDestTy(); + // Vectors where all elements have the same known constant range are treated + // as a single constant range in the lattice. When bitcasting such vectors, + // there is a mis-match between the width of the lattice value (single + // constant range) and the original operands (vector). Go to overdefined in + // that case. + if (I.getOpcode() == Instruction::BitCast && + I.getOperand(0)->getType()->isVectorTy() && + OpRange.getBitWidth() < DL.getTypeSizeInBits(DestTy)) + return (void)markOverdefined(&I); + ConstantRange Res = OpRange.castOp(I.getOpcode(), DL.getTypeSizeInBits(DestTy)); mergeInValue(LV, &I, ValueLatticeElement::getRange(Res)); diff --git a/llvm/test/Transforms/SCCP/vector-bitcast.ll b/llvm/test/Transforms/SCCP/vector-bitcast.ll index b032085083c6..3cfa014421e4 100644 --- a/llvm/test/Transforms/SCCP/vector-bitcast.ll +++ b/llvm/test/Transforms/SCCP/vector-bitcast.ll @@ -1,11 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -sccp -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128" -; CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64>* %p ; rdar://11324230 +declare void @use(i1) + define void @foo(<2 x i64>* %p) nounwind { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY_I:%.*]] +; CHECK: while.body.i: +; CHECK-NEXT: [[VWORKEXPONENT_I_033:%.*]] = phi <4 x i32> [ [[SUB_I_I:%.*]], [[WHILE_BODY_I]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SUB_I_I]] = add <4 x i32> [[VWORKEXPONENT_I_033]], +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[SUB_I_I]] to <2 x i64> +; CHECK-NEXT: store volatile <2 x i64> zeroinitializer, <2 x i64>* [[P:%.*]], align 16 +; CHECK-NEXT: br label [[WHILE_BODY_I]] +; entry: br label %while.body.i @@ -18,3 +30,44 @@ while.body.i: ; preds = %while.body.i, %entr br label %while.body.i } +%union.V512 = type { <16 x float> } + +@i8_mix = dso_local global %union.V512 zeroinitializer +declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1 immarg) + +; Test for PR47991. +define void @vec_cast_abs() { +; CHECK-LABEL: @vec_cast_abs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, <64 x i8>* bitcast (%union.V512* @i8_mix to <64 x i8>*), align 64 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <64 x i8> @llvm.abs.v64i8(<64 x i8> [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <64 x i8> [[TMP2]] to i512 +; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i512 [[TMP3]], 12 +; CHECK-NEXT: call void @use(i1 [[CMP_1]]) +; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i512 [[TMP3]], 500 +; CHECK-NEXT: call void @use(i1 [[CMP_2]]) +; CHECK-NEXT: [[TMP4:%.*]] = trunc i512 [[TMP3]] to i32 +; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i32 [[TMP4]], 12 +; CHECK-NEXT: call void @use(i1 [[CMP_3]]) +; CHECK-NEXT: [[CMP_4:%.*]] = icmp ult i32 [[TMP4]], 500 +; CHECK-NEXT: call void @use(i1 [[CMP_3]]) +; CHECK-NEXT: ret void +; +entry: + %tmp1 = load <64 x i8>, <64 x i8>* bitcast (%union.V512* @i8_mix to <64 x i8>*) + %tmp2 = tail call <64 x i8> @llvm.abs.v64i8(<64 x i8> %tmp1, i1 false) + + %tmp3 = bitcast <64 x i8> %tmp2 to i512 + %cmp.1 = icmp eq i512 %tmp3, 12 + call void @use(i1 %cmp.1) + %cmp.2 = icmp ult i512 %tmp3, 500 + call void @use(i1 %cmp.2) + + %tmp4 = trunc i512 %tmp3 to i32 + %cmp.3 = icmp eq i32 %tmp4, 12 + call void @use(i1 %cmp.3) + %cmp.4 = icmp ult i32 %tmp4, 500 + call void @use(i1 %cmp.3) + + ret void +}