[WebAssembly] Compute known bits for SIMD bitmask intrinsics

This optimizes out the mask when the result of a bitmask is interpreted as an i8
or i16 value. Resolves PR50507.

Differential Revision: https://reviews.llvm.org/D107103
This commit is contained in:
Thomas Lively 2021-08-02 09:52:34 -07:00
parent bab86463df
commit 417e500668
3 changed files with 145 additions and 0 deletions

View File

@ -33,6 +33,7 @@
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
@ -823,6 +824,30 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}
}
void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
const SelectionDAG &DAG, unsigned Depth) const {
switch (Op.getOpcode()) {
default:
break;
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (IntNo) {
default:
break;
case Intrinsic::wasm_bitmask: {
unsigned BitWidth = Known.getBitWidth();
EVT VT = Op.getOperand(1).getSimpleValueType();
unsigned PossibleBits = VT.getVectorNumElements();
APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
Known.Zero |= ZeroMask;
break;
}
}
}
}
}
//===----------------------------------------------------------------------===//
// WebAssembly Lowering private implementation.
//===----------------------------------------------------------------------===//

View File

@ -108,6 +108,11 @@ private:
MachineFunction &MF,
unsigned Intrinsic) const override;
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const override;
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,

View File

@ -0,0 +1,115 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mattr=+simd128 | FileCheck %s
; Test that masks on the output of bitselect are optimized out.
target triple = "wasm32-unknown-unknown"
declare i32 @llvm.wasm.bitmask.v16i8(<16 x i8>)
declare i32 @llvm.wasm.bitmask.v8i16(<8 x i16>)
declare i32 @llvm.wasm.bitmask.v4i32(<4 x i32>)
declare i32 @llvm.wasm.bitmask.v2i64(<2 x i64>)
define i32 @bitmask_v16i8_mask(<16 x i8> %x) {
; CHECK-LABEL: bitmask_v16i8_mask:
; CHECK: .functype bitmask_v16i8_mask (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.bitmask
; CHECK-NEXT: i32.const 32767
; CHECK-NEXT: i32.and
; CHECK-NEXT: # fallthrough-return
%m = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> %x)
%v = and i32 %m, 32767 ;; 2^15 - 1
ret i32 %v
}
define i32 @bitmask_v16i8_no_mask(<16 x i8> %x) {
; CHECK-LABEL: bitmask_v16i8_no_mask:
; CHECK: .functype bitmask_v16i8_no_mask (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.bitmask
; CHECK-NEXT: # fallthrough-return
%m = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> %x)
%v = and i32 %m, 65535 ;; 2^16 - 1
ret i32 %v
}
define i32 @bitmask_v8i16_mask(<8 x i16> %x) {
; CHECK-LABEL: bitmask_v8i16_mask:
; CHECK: .functype bitmask_v8i16_mask (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.bitmask
; CHECK-NEXT: i32.const 127
; CHECK-NEXT: i32.and
; CHECK-NEXT: # fallthrough-return
%m = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> %x)
%v = and i32 %m, 127 ;; 2^7 - 1
ret i32 %v
}
define i32 @bitmask_v8i16_no_mask(<8 x i16> %x) {
; CHECK-LABEL: bitmask_v8i16_no_mask:
; CHECK: .functype bitmask_v8i16_no_mask (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i16x8.bitmask
; CHECK-NEXT: # fallthrough-return
%m = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> %x)
%v = and i32 %m, 255 ;; 2^8 - 1
ret i32 %v
}
define i32 @bitmask_v4i32_mask(<4 x i32> %x) {
; CHECK-LABEL: bitmask_v4i32_mask:
; CHECK: .functype bitmask_v4i32_mask (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.bitmask
; CHECK-NEXT: i32.const 7
; CHECK-NEXT: i32.and
; CHECK-NEXT: # fallthrough-return
%m = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> %x)
%v = and i32 %m, 7 ;; 2^3 - 1
ret i32 %v
}
define i32 @bitmask_v4i32_no_mask(<4 x i32> %x) {
; CHECK-LABEL: bitmask_v4i32_no_mask:
; CHECK: .functype bitmask_v4i32_no_mask (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32x4.bitmask
; CHECK-NEXT: # fallthrough-return
%m = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> %x)
%v = and i32 %m, 15 ;; 2^4 - 1
ret i32 %v
}
define i32 @bitmask_v2i64_mask(<2 x i64> %x) {
; CHECK-LABEL: bitmask_v2i64_mask:
; CHECK: .functype bitmask_v2i64_mask (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i64x2.bitmask
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: # fallthrough-return
%m = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> %x)
%v = and i32 %m, 1 ;; 2^1 - 1
ret i32 %v
}
define i32 @bitmask_v2i64_no_mask(<2 x i64> %x) {
; CHECK-LABEL: bitmask_v2i64_no_mask:
; CHECK: .functype bitmask_v2i64_no_mask (v128) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i64x2.bitmask
; CHECK-NEXT: # fallthrough-return
%m = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> %x)
%v = and i32 %m, 3 ;; 2^2 - 1
ret i32 %v
}