From 6645bfa8f501fd7698ce584976bea9c99c49d64d Mon Sep 17 00:00:00 2001 From: Dmitry Vassiliev Date: Tue, 15 Feb 2022 01:23:11 +0300 Subject: [PATCH] [NVPTX] Fix bug with int_nvvm_rotate_b64 when operand immediate Need to subract from 64, not 32. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D119639 --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 2 +- llvm/test/CodeGen/NVPTX/rotate_64.ll | 25 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/NVPTX/rotate_64.ll diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index ec069a0a02ae..479b0143ab7c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -2473,7 +2473,7 @@ def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), // SW version of rotate 64 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), - (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, + (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>, Requires<[noHWROT32]>; def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, diff --git a/llvm/test/CodeGen/NVPTX/rotate_64.ll b/llvm/test/CodeGen/NVPTX/rotate_64.ll new file mode 100644 index 000000000000..1ba0dfa90e02 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/rotate_64.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=nvptx | FileCheck %s + + +declare i64 @llvm.nvvm.rotate.b64(i64, i32) +declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) + +; CHECK: rotate64 +define i64 @rotate64(i64 %a, i32 %b) { +; CHECK: shl.b64 [[LHS:%.*]], [[RD1:%.*]], 3; +; CHECK: shr.b64 [[RHS:%.*]], [[RD1]], 61; +; CHECK: add.u64 [[RD2:%.*]], [[LHS]], [[RHS]]; +; CHECK: ret + %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 3) + ret i64 %val +} + +; CHECK: rotateright64 +define i64 @rotateright64(i64 %a, i32 %b) { +; CHECK: shl.b64 [[LHS:%.*]], [[RD1:%.*]], 61; +; CHECK: shr.b64 [[RHS:%.*]], [[RD1]], 3; +; CHECK: add.u64 [[RD2:%.*]], [[LHS]], [[RHS]]; +; CHECK: ret + %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 3) + ret i64 %val +}