MIPS: math-emu: RINT.<D|S>: Fix several problems by reimplementation
Reimplement RINT.<D|S> kernel emulation so that all RINT.<D|S> specifications are met. For the sake of simplicity, let's analyze RINT.S only. Prior to this patch, RINT.S emulation was essentially implemented as (in pseudocode) <output> = ieee754sp_flong(ieee754sp_tlong(<input>)), where ieee754sp_tlong() and ieee754sp_flong() are functions providing conversion from double to integer, and from integer to double, respectively. On surface, this implementation looks correct, but actually fails in many cases. Following problems were detected: 1. NaN and infinity cases will not be handled properly. The function ieee754sp_flong() never returns NaN nor infinity. 2. For RINT.S, for all inputs larger than LONG_MAX, and smaller than FLT_MAX, the result will be wrong, and the overflow exception will be erroneously set. A similar problem for negative inputs exists as well. 3. For some rounding modes, for some negative inputs close to zero, the return value will be zero, and should be -zero. This is because ieee754sp_flong() never returns -zero. This patch removes the problems above by implementing dedicated functions for RINT.<D|S> emulation. The core of the new function functionality is adapted version of the core of the function ieee754sp_tlong(). However, there are many details that are implemented to match RINT.<D|S> specification. It should be said that the functionality of ieee754sp_tlong() actually closely corresponds to CVT.L.S instruction, and it is used while emulating CVT.L.S. However, RINT.S and CVT.L.S instructions differ in many aspects. This patch fulfills missing support for RINT.<D|S>. Signed-off-by: Miodrag Dinic <miodrag.dinic@imgtec.com> Signed-off-by: Goran Ferenc <goran.ferenc@imgtec.com> Signed-off-by: Aleksandar Markovic <aleksandar.markovic@imgtec.com> Cc: David S. Miller <davem@davemloft.net> Cc: Douglas Leung <douglas.leung@imgtec.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Hans Verkuil <hans.verkuil@cisco.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Maciej W. Rozycki <macro@imgtec.com> Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Mauro Carvalho Chehab <mchehab@kernel.org> Cc: Paul Burton <paul.burton@imgtec.com> Cc: Petar Jovanovic <petar.jovanovic@imgtec.com> Cc: Raghu Gandham <raghu.gandham@imgtec.com> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17141/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
This commit is contained in:
parent
1ff8560ac9
commit
3ec404d88c
|
@ -8802,6 +8802,13 @@ F: arch/mips/include/asm/mach-loongson32/
|
|||
F: drivers/*/*loongson1*
|
||||
F: drivers/*/*/*loongson1*
|
||||
|
||||
MIPS RINT INSTRUCTION EMULATION
|
||||
M: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
|
||||
L: linux-mips@linux-mips.org
|
||||
S: Supported
|
||||
F: arch/mips/math-emu/sp_rint.c
|
||||
F: arch/mips/math-emu/dp_rint.c
|
||||
|
||||
MIROSOUND PCM20 FM RADIO RECEIVER DRIVER
|
||||
M: Hans Verkuil <hverkuil@xs4all.nl>
|
||||
L: linux-media@vger.kernel.org
|
||||
|
|
|
@ -4,9 +4,11 @@
|
|||
|
||||
obj-y += cp1emu.o ieee754dp.o ieee754sp.o ieee754.o \
|
||||
dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \
|
||||
dp_tint.o dp_fint.o dp_maddf.o dp_2008class.o dp_fmin.o dp_fmax.o \
|
||||
dp_tint.o dp_fint.o dp_rint.o dp_maddf.o dp_2008class.o dp_fmin.o \
|
||||
dp_fmax.o \
|
||||
sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \
|
||||
sp_tint.o sp_fint.o sp_maddf.o sp_2008class.o sp_fmin.o sp_fmax.o \
|
||||
sp_tint.o sp_fint.o sp_rint.o sp_maddf.o sp_2008class.o sp_fmin.o \
|
||||
sp_fmax.o \
|
||||
dsemul.o
|
||||
|
||||
lib-y += ieee754d.o \
|
||||
|
|
|
@ -1805,8 +1805,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
|
|||
return SIGILL;
|
||||
|
||||
SPFROMREG(fs, MIPSInst_FS(ir));
|
||||
rv.l = ieee754sp_tlong(fs);
|
||||
rv.s = ieee754sp_flong(rv.l);
|
||||
rv.s = ieee754sp_rint(fs);
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
|
@ -2134,8 +2133,7 @@ copcsr:
|
|||
return SIGILL;
|
||||
|
||||
DPFROMREG(fs, MIPSInst_FS(ir));
|
||||
rv.l = ieee754dp_tlong(fs);
|
||||
rv.d = ieee754dp_flong(rv.l);
|
||||
rv.d = ieee754dp_rint(fs);
|
||||
goto copcsr;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
/* IEEE754 floating point arithmetic
|
||||
* double precision: common utilities
|
||||
*/
|
||||
/*
|
||||
* MIPS floating point support
|
||||
* Copyright (C) 1994-2000 Algorithmics Ltd.
|
||||
* Copyright (C) 2017 Imagination Technologies, Ltd.
|
||||
* Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
|
||||
*
|
||||
* This program is free software; you can distribute it and/or modify it
|
||||
* under the terms of the GNU General Public License (Version 2) as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program.
|
||||
*/
|
||||
|
||||
#include "ieee754dp.h"
|
||||
|
||||
union ieee754dp ieee754dp_rint(union ieee754dp x)
|
||||
{
|
||||
union ieee754dp ret;
|
||||
u64 residue;
|
||||
int sticky;
|
||||
int round;
|
||||
int odd;
|
||||
|
||||
COMPXDP;
|
||||
|
||||
ieee754_clearcx();
|
||||
|
||||
EXPLODEXDP;
|
||||
FLUSHXDP;
|
||||
|
||||
if (xc == IEEE754_CLASS_SNAN)
|
||||
return ieee754dp_nanxcpt(x);
|
||||
|
||||
if ((xc == IEEE754_CLASS_QNAN) ||
|
||||
(xc == IEEE754_CLASS_INF) ||
|
||||
(xc == IEEE754_CLASS_ZERO))
|
||||
return x;
|
||||
|
||||
if (xe >= DP_FBITS)
|
||||
return x;
|
||||
|
||||
if (xe < -1) {
|
||||
residue = xm;
|
||||
round = 0;
|
||||
sticky = residue != 0;
|
||||
xm = 0;
|
||||
} else {
|
||||
residue = xm << (64 - DP_FBITS + xe);
|
||||
round = (residue >> 63) != 0;
|
||||
sticky = (residue << 1) != 0;
|
||||
xm >>= DP_FBITS - xe;
|
||||
}
|
||||
|
||||
odd = (xm & 0x1) != 0x0;
|
||||
|
||||
switch (ieee754_csr.rm) {
|
||||
case FPU_CSR_RN: /* toward nearest */
|
||||
if (round && (sticky || odd))
|
||||
xm++;
|
||||
break;
|
||||
case FPU_CSR_RZ: /* toward zero */
|
||||
break;
|
||||
case FPU_CSR_RU: /* toward +infinity */
|
||||
if ((round || sticky) && !xs)
|
||||
xm++;
|
||||
break;
|
||||
case FPU_CSR_RD: /* toward -infinity */
|
||||
if ((round || sticky) && xs)
|
||||
xm++;
|
||||
break;
|
||||
}
|
||||
|
||||
if (round || sticky)
|
||||
ieee754_setcx(IEEE754_INEXACT);
|
||||
|
||||
ret = ieee754dp_flong(xm);
|
||||
DPSIGN(ret) = xs;
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -67,6 +67,7 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y);
|
|||
union ieee754sp ieee754sp_fint(int x);
|
||||
union ieee754sp ieee754sp_flong(s64 x);
|
||||
union ieee754sp ieee754sp_fdp(union ieee754dp x);
|
||||
union ieee754sp ieee754sp_rint(union ieee754sp x);
|
||||
|
||||
int ieee754sp_tint(union ieee754sp x);
|
||||
s64 ieee754sp_tlong(union ieee754sp x);
|
||||
|
@ -101,6 +102,7 @@ union ieee754dp ieee754dp_neg(union ieee754dp x);
|
|||
union ieee754dp ieee754dp_fint(int x);
|
||||
union ieee754dp ieee754dp_flong(s64 x);
|
||||
union ieee754dp ieee754dp_fsp(union ieee754sp x);
|
||||
union ieee754dp ieee754dp_rint(union ieee754dp x);
|
||||
|
||||
int ieee754dp_tint(union ieee754dp x);
|
||||
s64 ieee754dp_tlong(union ieee754dp x);
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
/* IEEE754 floating point arithmetic
|
||||
* single precision
|
||||
*/
|
||||
/*
|
||||
* MIPS floating point support
|
||||
* Copyright (C) 1994-2000 Algorithmics Ltd.
|
||||
* Copyright (C) 2017 Imagination Technologies, Ltd.
|
||||
* Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
|
||||
*
|
||||
* This program is free software; you can distribute it and/or modify it
|
||||
* under the terms of the GNU General Public License (Version 2) as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program.
|
||||
*/
|
||||
|
||||
#include "ieee754sp.h"
|
||||
|
||||
union ieee754sp ieee754sp_rint(union ieee754sp x)
|
||||
{
|
||||
union ieee754sp ret;
|
||||
u32 residue;
|
||||
int sticky;
|
||||
int round;
|
||||
int odd;
|
||||
|
||||
COMPXDP; /* <-- DP needed for 64-bit mantissa tmp */
|
||||
|
||||
ieee754_clearcx();
|
||||
|
||||
EXPLODEXSP;
|
||||
FLUSHXSP;
|
||||
|
||||
if (xc == IEEE754_CLASS_SNAN)
|
||||
return ieee754sp_nanxcpt(x);
|
||||
|
||||
if ((xc == IEEE754_CLASS_QNAN) ||
|
||||
(xc == IEEE754_CLASS_INF) ||
|
||||
(xc == IEEE754_CLASS_ZERO))
|
||||
return x;
|
||||
|
||||
if (xe >= SP_FBITS)
|
||||
return x;
|
||||
|
||||
if (xe < -1) {
|
||||
residue = xm;
|
||||
round = 0;
|
||||
sticky = residue != 0;
|
||||
xm = 0;
|
||||
} else {
|
||||
residue = xm << (xe + 1);
|
||||
residue <<= 31 - SP_FBITS;
|
||||
round = (residue >> 31) != 0;
|
||||
sticky = (residue << 1) != 0;
|
||||
xm >>= SP_FBITS - xe;
|
||||
}
|
||||
|
||||
odd = (xm & 0x1) != 0x0;
|
||||
|
||||
switch (ieee754_csr.rm) {
|
||||
case FPU_CSR_RN: /* toward nearest */
|
||||
if (round && (sticky || odd))
|
||||
xm++;
|
||||
break;
|
||||
case FPU_CSR_RZ: /* toward zero */
|
||||
break;
|
||||
case FPU_CSR_RU: /* toward +infinity */
|
||||
if ((round || sticky) && !xs)
|
||||
xm++;
|
||||
break;
|
||||
case FPU_CSR_RD: /* toward -infinity */
|
||||
if ((round || sticky) && xs)
|
||||
xm++;
|
||||
break;
|
||||
}
|
||||
|
||||
if (round || sticky)
|
||||
ieee754_setcx(IEEE754_INEXACT);
|
||||
|
||||
ret = ieee754sp_flong(xm);
|
||||
SPSIGN(ret) = xs;
|
||||
|
||||
return ret;
|
||||
}
|
Loading…
Reference in New Issue