siesta/Src/fft1d.F

2598 lines
63 KiB
Fortran

! ---
! Copyright (C) 1996-2016 The SIESTA group
! This file is distributed under the terms of the
! GNU General Public License: see COPYING in the top directory
! or http://www.gnu.org/copyleft/gpl.txt .
! See Docs/Contributors.txt for a list of contributors.
! ---
!*********************************************************************
! MODULE fft1d
! Just a wrapper for routines nfft, gpfa, and setgpfa
!
!*********************************************************************
!
! subroutine nfft( N )
!
! CHANGES N INTO THE NEXT INTEGER ALLOWED BY THE GPFA ROUTINE
! WRITTEN BY J.M.SOLER. MAY/95.
!
! Input and output:
! integer:: N ! Candidate FFT vector size, increased to
! next allowed value, if necessary
!
!*********************************************************************
!
! SUBROUTINE 'GPFA'
! SELF-SORTING IN-PLACE GENERALIZED PRIME FACTOR (COMPLEX) FFT
!
! *** THIS IS THE ALL-FORTRAN VERSION ***
! -------------------------------
!
! CALL GPFA(A,B,TRIGS,INC,JUMP,N,LOT,ISIGN)
!
! A IS FIRST REAL INPUT/OUTPUT VECTOR
! B IS FIRST IMAGINARY INPUT/OUTPUT VECTOR
! TRIGS IS A TABLE OF TWIDDLE FACTORS, PRECALCULATED
! BY CALLING SUBROUTINE 'SETGPFA'
! INC IS THE INCREMENT WITHIN EACH DATA VECTOR
! JUMP IS THE INCREMENT BETWEEN DATA VECTORS
! N IS THE LENGTH OF THE TRANSFORMS:
! -----------------------------------
! N = (2**IP) * (3**IQ) * (5**IR)
! -----------------------------------
! LOT IS THE NUMBER OF TRANSFORMS
! ISIGN = +1 FOR FORWARD TRANSFORM
! = -1 FOR INVERSE TRANSFORM
!
! WRITTEN BY CLIVE TEMPERTON
! RECHERCHE EN PREVISION NUMERIQUE
! ATMOSPHERIC ENVIRONMENT SERVICE, CANADA
!
!----------------------------------------------------------------------
!
! DEFINITION OF TRANSFORM
! -----------------------
!
! X(J) = SUM(K=0,...,N-1)(C(K)*EXP(ISIGN*2*I*J*K*PI/N))
!
!---------------------------------------------------------------------
!
! FOR A MATHEMATICAL DEVELOPMENT OF THE ALGORITHM USED,
! SEE:
!
! C TEMPERTON : "A GENERALIZED PRIME FACTOR FFT ALGORITHM
! FOR ANY N = (2**P)(3**Q)(5**R)",
! SIAM J. SCI. STAT. COMP., MAY 1992.
!
!*********************************************************************
!
! SUBROUTINE 'SETGPFA'
! SETUP ROUTINE FOR SELF-SORTING IN-PLACE
! GENERALIZED PRIME FACTOR (COMPLEX) FFT [GPFA]
!
! CALL SETGPFA(TRIGS,N)
!
! INPUT :
! -----
! N IS THE LENGTH OF THE TRANSFORMS. N MUST BE OF THE FORM:
! -----------------------------------
! N = (2**IP) * (3**IQ) * (5**IR)
! -----------------------------------
!
! OUTPUT:
! ------
! TRIGS IS A TABLE OF TWIDDLE FACTORS,
! OF LENGTH 2*IPQR (REAL) WORDS, WHERE:
! --------------------------------------
! IPQR = (2**IP) + (3**IQ) + (5**IR)
! --------------------------------------
!
! WRITTEN BY CLIVE TEMPERTON 1990
!
!*********************************************************************
MODULE fft1d
! Used module procedures:
USE sys, only: die ! Termination routine
! Used module parameters, variables, and arrays:
USE precision, only: dp ! Double precision real kind
USE precision, only: grid_p ! Real kind used in grid arrays
USE parallel, only: ionode ! Is this the IO processor?
implicit none
! Public module procedures:
PUBLIC::
. nfft, ! Finds the next integer allowed by the FFT routine
. gpfa, ! 1D FFT
. setgpfa ! Initializes gpfa routine
! Public module parameters, variables, and arrays:
! none
PRIVATE ! Nothing is declared public below this point
CONTAINS
!*********************************************************************
SUBROUTINE nfft( N )
C CHANGES N INTO THE NEXT INTEGER ALLOWED BY THE FFT ROUTINE
C WRITTEN BY J.M.SOLER. MAY/95.
integer :: np, nmax, nmin, n, nrem, ip
parameter (NP = 3, NMAX = 1000000)
integer :: IPRIME(NP)
data IPRIME / 2, 3, 5 /
NMIN = N
DO N = NMIN, NMAX
NREM = N
DO IP = 1,NP
10 CONTINUE
IF ( MODULO( NREM, IPRIME(IP) ) .EQ. 0 ) THEN
NREM = NREM / IPRIME(IP)
GOTO 10
ENDIF
ENDDO
IF (NREM .EQ. 1) RETURN
ENDDO
if (Ionode)
$ write(6,*) 'NFFT: NO SUITABLE INTEGER FOUND FOR N =', NMIN
call die("see above")
END SUBROUTINE nfft
!*********************************************************************
SUBROUTINE GPFA(A,B,TRIGS,INC,JUMP,N,LOT,ISIGN)
IMPLICIT NONE
*
REAL(grid_p) :: A(*), B(*)
REAL(dp) TRIGS(*)
INTEGER INC, JUMP, N, LOT, ISIGN
C
INTEGER
. NJ(3), NN, IFAC, LL, KK, IP, IQ, IR, I
*
* DECOMPOSE N INTO FACTORS 2,3,5
* ------------------------------
NN = N
IFAC = 2
*
DO 30 LL = 1 , 3
KK = 0
10 CONTINUE
IF (MOD(NN,IFAC).NE.0) GO TO 20
KK = KK + 1
NN = NN / IFAC
GO TO 10
20 CONTINUE
NJ(LL) = KK
IFAC = IFAC + LL
30 CONTINUE
*
IF (NN.NE.1) THEN
WRITE(6,40) N
40 FORMAT(' *** WARNING!!!',I10,' IS NOT A LEGAL VALUE OF N ***')
RETURN
ENDIF
*
IP = NJ(1)
IQ = NJ(2)
IR = NJ(3)
*
* COMPUTE THE TRANSFORM
* ---------------------
I = 1
IF (IP.GT.0) THEN
CALL GPFA2F(A,B,TRIGS,INC,JUMP,N,IP,LOT,ISIGN)
I = I + 2 * ( 2**IP)
ENDIF
IF (IQ.GT.0) THEN
CALL GPFA3F(A,B,TRIGS(I),INC,JUMP,N,IQ,LOT,ISIGN)
I = I + 2 * (3**IQ)
ENDIF
IF (IR.GT.0) THEN
CALL GPFA5F(A,B,TRIGS(I),INC,JUMP,N,IR,LOT,ISIGN)
ENDIF
*
RETURN
END SUBROUTINE GPFA
!*********************************************************************
* fortran version of *gpfa2* -
* radix-2 section of self-sorting, in-place, generalized pfa
* central radix-2 and radix-8 passes included
* so that transform length can be any power of 2
*
subroutine gpfa2f(a,b,trigs,inc,jump,n,mm,lot,isign)
real(grid_p) :: a(*), b(*)
real(dp) :: trigs(*)
integer inc, jump, n, mm, lot, isign
integer n2, inq, jstepx, ninc, ink, m2, m8,
$ m, mh, nblox, left, istart, nb, nvex,
$ la, mu, ipass, jstep, jstepl, jjj, ja,
$ nu, jb, jc, jd, j, l, kk, k, je, jf,
$ jg, jh, laincl, ll, ji, jj, jk, jl, jm,
$ jn, jo, jp
real(dp) :: s, aja, ajc, t0, t2, ajb, ajd,
$ t1, t3, bja, bjc, u0, u2, bjb,
$ bjd, u1, u3, co1, si1, co2, si2,
$ co3, si3, ss, c1, c2, c3, aje,
$ ajf, ajg, ajh, bje, bjf, bjg,
$ bjh, co4, si4, co5, si5, co6, si6,
$ co7, si7, aji, bjm, ajj, bjj, ajk,
$ ajl, bji, bjk, ajo, bjl, bjo, ajm,
$ ajn, ajp, bjn, bjp
#ifdef OLD_CRAY
integer, parameter :: lvr = CRAY_LVR_PARAMETER
#else
integer, parameter :: lvr = 1024
#endif
*
* ***************************************************************
* * *
* * N.B. LVR = LENGTH OF VECTOR REGISTERS, SET TO 128 FOR C90. *
* * RESET TO 64 FOR OTHER CRAY MACHINES, OR TO ANY LARGE VALUE *
* * (GREATER THAN OR EQUAL TO LOT) FOR A SCALAR COMPUTER. *
* * *
* ***************************************************************
*
n2 = 2**mm
inq = n/n2
jstepx = (n2-n) * inc
ninc = n * inc
ink = inc * inq
*
m2 = 0
m8 = 0
if (mod(mm,2).eq.0) then
m = mm/2
else if (mod(mm,4).eq.1) then
m = (mm-1)/2
m2 = 1
else if (mod(mm,4).eq.3) then
m = (mm-3)/2
m8 = 1
endif
mh = (m+1)/2
*
nblox = 1 + (lot-1)/lvr
left = lot
s = real(isign, grid_p)
istart = 1
*
* loop on blocks of lvr transforms
* --------------------------------
do 500 nb = 1 , nblox
*
if (left.le.lvr) then
nvex = left
else if (left.lt.(2*lvr)) then
nvex = left/2
nvex = nvex + mod(nvex,2)
else
nvex = lvr
endif
left = left - nvex
*
la = 1
*
* loop on type I radix-4 passes
* -----------------------------
mu = mod(inq,4)
if (isign.eq.-1) mu = 4 - mu
ss = 1.0d0
if (mu.eq.3) ss = -1.0d0
*
if (mh.eq.0) go to 200
*
do 160 ipass = 1 , mh
jstep = (n*inc) / (4*la)
jstepl = jstep - ninc
*
* k = 0 loop (no twiddle factors)
* -------------------------------
do 120 jjj = 0 , (n-1)*inc , 4*jstep
ja = istart + jjj
*
* "transverse" loop
* -----------------
do 115 nu = 1 , inq
jb = ja + jstepl
if (jb.lt.istart) jb = jb + ninc
jc = jb + jstepl
if (jc.lt.istart) jc = jc + ninc
jd = jc + jstepl
if (jd.lt.istart) jd = jd + ninc
j = 0
*
* loop across transforms
* ----------------------
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 110 l = 1 , nvex
aja = a(ja+j)
ajc = a(jc+j)
t0 = aja + ajc
t2 = aja - ajc
ajb = a(jb+j)
ajd = a(jd+j)
t1 = ajb + ajd
t3 = ss * ( ajb - ajd )
bja = b(ja+j)
bjc = b(jc+j)
u0 = bja + bjc
u2 = bja - bjc
bjb = b(jb+j)
bjd = b(jd+j)
u1 = bjb + bjd
u3 = ss * ( bjb - bjd )
a(ja+j) = t0 + t1
a(jc+j) = t0 - t1
b(ja+j) = u0 + u1
b(jc+j) = u0 - u1
a(jb+j) = t2 - u3
a(jd+j) = t2 + u3
b(jb+j) = u2 + t3
b(jd+j) = u2 - t3
j = j + jump
110 continue
ja = ja + jstepx
if (ja.lt.istart) ja = ja + ninc
115 continue
120 continue
*
* finished if n2 = 4
* ------------------
if (n2.eq.4) go to 490
kk = 2 * la
*
* loop on nonzero k
* -----------------
do 150 k = ink , jstep-ink , ink
co1 = trigs(kk+1)
si1 = s*trigs(kk+2)
co2 = trigs(2*kk+1)
si2 = s*trigs(2*kk+2)
co3 = trigs(3*kk+1)
si3 = s*trigs(3*kk+2)
*
* loop along transform
* --------------------
do 140 jjj = k , (n-1)*inc , 4*jstep
ja = istart + jjj
*
* "transverse" loop
* -----------------
do 135 nu = 1 , inq
jb = ja + jstepl
if (jb.lt.istart) jb = jb + ninc
jc = jb + jstepl
if (jc.lt.istart) jc = jc + ninc
jd = jc + jstepl
if (jd.lt.istart) jd = jd + ninc
j = 0
*
* loop across transforms
* ----------------------
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 130 l = 1 , nvex
aja = a(ja+j)
ajc = a(jc+j)
t0 = aja + ajc
t2 = aja - ajc
ajb = a(jb+j)
ajd = a(jd+j)
t1 = ajb + ajd
t3 = ss * ( ajb - ajd )
bja = b(ja+j)
bjc = b(jc+j)
u0 = bja + bjc
u2 = bja - bjc
bjb = b(jb+j)
bjd = b(jd+j)
u1 = bjb + bjd
u3 = ss * ( bjb - bjd )
a(ja+j) = t0 + t1
b(ja+j) = u0 + u1
a(jb+j) = co1*(t2-u3) - si1*(u2+t3)
b(jb+j) = si1*(t2-u3) + co1*(u2+t3)
a(jc+j) = co2*(t0-t1) - si2*(u0-u1)
b(jc+j) = si2*(t0-t1) + co2*(u0-u1)
a(jd+j) = co3*(t2+u3) - si3*(u2-t3)
b(jd+j) = si3*(t2+u3) + co3*(u2-t3)
j = j + jump
130 continue
*-----( end of loop across transforms )
ja = ja + jstepx
if (ja.lt.istart) ja = ja + ninc
135 continue
140 continue
*-----( end of loop along transforms )
kk = kk + 2*la
150 continue
*-----( end of loop on nonzero k )
la = 4*la
160 continue
*-----( end of loop on type I radix-4 passes)
*
* central radix-2 pass
* --------------------
200 continue
if (m2.eq.0) go to 300
*
jstep = (n*inc) / (2*la)
jstepl = jstep - ninc
*
* k=0 loop (no twiddle factors)
* -----------------------------
do 220 jjj = 0 , (n-1)*inc , 2*jstep
ja = istart + jjj
*
* "transverse" loop
* -----------------
do 215 nu = 1 , inq
jb = ja + jstepl
if (jb.lt.istart) jb = jb + ninc
j = 0
*
* loop across transforms
* ----------------------
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 210 l = 1 , nvex
aja = a(ja+j)
ajb = a(jb+j)
t0 = aja - ajb
a(ja+j) = aja + ajb
a(jb+j) = t0
bja = b(ja+j)
bjb = b(jb+j)
u0 = bja - bjb
b(ja+j) = bja + bjb
b(jb+j) = u0
j = j + jump
210 continue
*-----(end of loop across transforms)
ja = ja + jstepx
if (ja.lt.istart) ja = ja + ninc
215 continue
220 continue
*
* finished if n2=2
* ----------------
if (n2.eq.2) go to 490
*
kk = 2 * la
*
* loop on nonzero k
* -----------------
do 260 k = ink , jstep - ink , ink
co1 = trigs(kk+1)
si1 = s*trigs(kk+2)
*
* loop along transforms
* ---------------------
do 250 jjj = k , (n-1)*inc , 2*jstep
ja = istart + jjj
*
* "transverse" loop
* -----------------
do 245 nu = 1 , inq
jb = ja + jstepl
if (jb.lt.istart) jb = jb + ninc
j = 0
*
* loop across transforms
* ----------------------
if (kk.eq.n2/2) then
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 230 l = 1 , nvex
aja = a(ja+j)
ajb = a(jb+j)
t0 = ss * ( aja - ajb )
a(ja+j) = aja + ajb
bjb = b(jb+j)
bja = b(ja+j)
a(jb+j) = ss * ( bjb - bja )
b(ja+j) = bja + bjb
b(jb+j) = t0
j = j + jump
230 continue
*
else
*
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 240 l = 1 , nvex
aja = a(ja+j)
ajb = a(jb+j)
t0 = aja - ajb
a(ja+j) = aja + ajb
bja = b(ja+j)
bjb = b(jb+j)
u0 = bja - bjb
b(ja+j) = bja + bjb
a(jb+j) = co1*t0 - si1*u0
b(jb+j) = si1*t0 + co1*u0
j = j + jump
240 continue
*
endif
*
*-----(end of loop across transforms)
ja = ja + jstepx
if (ja.lt.istart) ja = ja + ninc
245 continue
250 continue
*-----(end of loop along transforms)
kk = kk + 2 * la
260 continue
*-----(end of loop on nonzero k)
*-----(end of radix-2 pass)
*
la = 2 * la
go to 400
*
* central radix-8 pass
* --------------------
300 continue
if (m8.eq.0) go to 400
jstep = (n*inc) / (8*la)
jstepl = jstep - ninc
mu = mod(inq,8)
if (isign.eq.-1) mu = 8 - mu
c1 = 1.0d0
if (mu.eq.3.or.mu.eq.7) c1 = -1.0d0
c2 = sqrt(0.5d0)
if (mu.eq.3.or.mu.eq.5) c2 = -c2
c3 = c1 * c2
*
* stage 1
* -------
do 320 k = 0 , jstep - ink , ink
do 315 jjj = k , (n-1)*inc , 8*jstep
ja = istart + jjj
*
* "transverse" loop
* -----------------
do 312 nu = 1 , inq
jb = ja + jstepl
if (jb.lt.istart) jb = jb + ninc
jc = jb + jstepl
if (jc.lt.istart) jc = jc + ninc
jd = jc + jstepl
if (jd.lt.istart) jd = jd + ninc
je = jd + jstepl
if (je.lt.istart) je = je + ninc
jf = je + jstepl
if (jf.lt.istart) jf = jf + ninc
jg = jf + jstepl
if (jg.lt.istart) jg = jg + ninc
jh = jg + jstepl
if (jh.lt.istart) jh = jh + ninc
j = 0
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 310 l = 1 , nvex
aja = a(ja+j)
aje = a(je+j)
t0 = aja - aje
a(ja+j) = aja + aje
ajc = a(jc+j)
ajg = a(jg+j)
t1 = c1 * ( ajc - ajg )
a(je+j) = ajc + ajg
ajb = a(jb+j)
ajf = a(jf+j)
t2 = ajb - ajf
a(jc+j) = ajb + ajf
ajd = a(jd+j)
ajh = a(jh+j)
t3 = ajd - ajh
a(jg+j) = ajd + ajh
a(jb+j) = t0
a(jf+j) = t1
a(jd+j) = c2 * ( t2 - t3 )
a(jh+j) = c3 * ( t2 + t3 )
bja = b(ja+j)
bje = b(je+j)
u0 = bja - bje
b(ja+j) = bja + bje
bjc = b(jc+j)
bjg = b(jg+j)
u1 = c1 * ( bjc - bjg )
b(je+j) = bjc + bjg
bjb = b(jb+j)
bjf = b(jf+j)
u2 = bjb - bjf
b(jc+j) = bjb + bjf
bjd = b(jd+j)
bjh = b(jh+j)
u3 = bjd - bjh
b(jg+j) = bjd + bjh
b(jb+j) = u0
b(jf+j) = u1
b(jd+j) = c2 * ( u2 - u3 )
b(jh+j) = c3 * ( u2 + u3 )
j = j + jump
310 continue
ja = ja + jstepx
if (ja.lt.istart) ja = ja + ninc
312 continue
315 continue
320 continue
*
* stage 2
* -------
*
* k=0 (no twiddle factors)
* ------------------------
do 330 jjj = 0 , (n-1)*inc , 8*jstep
ja = istart + jjj
*
* "transverse" loop
* -----------------
do 328 nu = 1 , inq
jb = ja + jstepl
if (jb.lt.istart) jb = jb + ninc
jc = jb + jstepl
if (jc.lt.istart) jc = jc + ninc
jd = jc + jstepl
if (jd.lt.istart) jd = jd + ninc
je = jd + jstepl
if (je.lt.istart) je = je + ninc
jf = je + jstepl
if (jf.lt.istart) jf = jf + ninc
jg = jf + jstepl
if (jg.lt.istart) jg = jg + ninc
jh = jg + jstepl
if (jh.lt.istart) jh = jh + ninc
j = 0
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 325 l = 1 , nvex
aja = a(ja+j)
aje = a(je+j)
t0 = aja + aje
t2 = aja - aje
ajc = a(jc+j)
ajg = a(jg+j)
t1 = ajc + ajg
t3 = c1 * ( ajc - ajg )
bja = b(ja+j)
bje = b(je+j)
u0 = bja + bje
u2 = bja - bje
bjc = b(jc+j)
bjg = b(jg+j)
u1 = bjc + bjg
u3 = c1 * ( bjc - bjg )
a(ja+j) = t0 + t1
a(je+j) = t0 - t1
b(ja+j) = u0 + u1
b(je+j) = u0 - u1
a(jc+j) = t2 - u3
a(jg+j) = t2 + u3
b(jc+j) = u2 + t3
b(jg+j) = u2 - t3
ajb = a(jb+j)
ajd = a(jd+j)
t0 = ajb + ajd
t2 = ajb - ajd
ajf = a(jf+j)
ajh = a(jh+j)
t1 = ajf - ajh
t3 = ajf + ajh
bjb = b(jb+j)
bjd = b(jd+j)
u0 = bjb + bjd
u2 = bjb - bjd
bjf = b(jf+j)
bjh = b(jh+j)
u1 = bjf - bjh
u3 = bjf + bjh
a(jb+j) = t0 - u3
a(jh+j) = t0 + u3
b(jb+j) = u0 + t3
b(jh+j) = u0 - t3
a(jd+j) = t2 + u1
a(jf+j) = t2 - u1
b(jd+j) = u2 - t1
b(jf+j) = u2 + t1
j = j + jump
325 continue
ja = ja + jstepx
if (ja.lt.istart) ja = ja + ninc
328 continue
330 continue
*
if (n2.eq.8) go to 490
*
* loop on nonzero k
* -----------------
kk = 2 * la
*
do 350 k = ink , jstep - ink , ink
*
co1 = trigs(kk+1)
si1 = s * trigs(kk+2)
co2 = trigs(2*kk+1)
si2 = s * trigs(2*kk+2)
co3 = trigs(3*kk+1)
si3 = s * trigs(3*kk+2)
co4 = trigs(4*kk+1)
si4 = s * trigs(4*kk+2)
co5 = trigs(5*kk+1)
si5 = s * trigs(5*kk+2)
co6 = trigs(6*kk+1)
si6 = s * trigs(6*kk+2)
co7 = trigs(7*kk+1)
si7 = s * trigs(7*kk+2)
*
do 345 jjj = k , (n-1)*inc , 8*jstep
ja = istart + jjj
*
* "transverse" loop
* -----------------
do 342 nu = 1 , inq
jb = ja + jstepl
if (jb.lt.istart) jb = jb + ninc
jc = jb + jstepl
if (jc.lt.istart) jc = jc + ninc
jd = jc + jstepl
if (jd.lt.istart) jd = jd + ninc
je = jd + jstepl
if (je.lt.istart) je = je + ninc
jf = je + jstepl
if (jf.lt.istart) jf = jf + ninc
jg = jf + jstepl
if (jg.lt.istart) jg = jg + ninc
jh = jg + jstepl
if (jh.lt.istart) jh = jh + ninc
j = 0
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 340 l = 1 , nvex
aja = a(ja+j)
aje = a(je+j)
t0 = aja + aje
t2 = aja - aje
ajc = a(jc+j)
ajg = a(jg+j)
t1 = ajc + ajg
t3 = c1 * ( ajc - ajg )
bja = b(ja+j)
bje = b(je+j)
u0 = bja + bje
u2 = bja - bje
bjc = b(jc+j)
bjg = b(jg+j)
u1 = bjc + bjg
u3 = c1 * ( bjc - bjg )
a(ja+j) = t0 + t1
b(ja+j) = u0 + u1
a(je+j) = co4*(t0-t1) - si4*(u0-u1)
b(je+j) = si4*(t0-t1) + co4*(u0-u1)
a(jc+j) = co2*(t2-u3) - si2*(u2+t3)
b(jc+j) = si2*(t2-u3) + co2*(u2+t3)
a(jg+j) = co6*(t2+u3) - si6*(u2-t3)
b(jg+j) = si6*(t2+u3) + co6*(u2-t3)
ajb = a(jb+j)
ajd = a(jd+j)
t0 = ajb + ajd
t2 = ajb - ajd
ajf = a(jf+j)
ajh = a(jh+j)
t1 = ajf - ajh
t3 = ajf + ajh
bjb = b(jb+j)
bjd = b(jd+j)
u0 = bjb + bjd
u2 = bjb - bjd
bjf = b(jf+j)
bjh = b(jh+j)
u1 = bjf - bjh
u3 = bjf + bjh
a(jb+j) = co1*(t0-u3) - si1*(u0+t3)
b(jb+j) = si1*(t0-u3) + co1*(u0+t3)
a(jh+j) = co7*(t0+u3) - si7*(u0-t3)
b(jh+j) = si7*(t0+u3) + co7*(u0-t3)
a(jd+j) = co3*(t2+u1) - si3*(u2-t1)
b(jd+j) = si3*(t2+u1) + co3*(u2-t1)
a(jf+j) = co5*(t2-u1) - si5*(u2+t1)
b(jf+j) = si5*(t2-u1) + co5*(u2+t1)
j = j + jump
340 continue
ja = ja + jstepx
if (ja.lt.istart) ja = ja + ninc
342 continue
345 continue
kk = kk + 2 * la
350 continue
*
la = 8 * la
*
* loop on type II radix-4 passes
* ------------------------------
400 continue
mu = mod(inq,4)
if (isign.eq.-1) mu = 4 - mu
ss = 1.0d0
if (mu.eq.3) ss = -1.0d0
*
do 480 ipass = mh+1 , m
jstep = (n*inc) / (4*la)
jstepl = jstep - ninc
laincl = la * ink - ninc
*
* k=0 loop (no twiddle factors)
* -----------------------------
do 430 ll = 0 , (la-1)*ink , 4*jstep
*
do 420 jjj = ll , (n-1)*inc , 4*la*ink
ja = istart + jjj
*
* "transverse" loop
* -----------------
do 415 nu = 1 , inq
jb = ja + jstepl
if (jb.lt.istart) jb = jb + ninc
jc = jb + jstepl
if (jc.lt.istart) jc = jc + ninc
jd = jc + jstepl
if (jd.lt.istart) jd = jd + ninc
je = ja + laincl
if (je.lt.istart) je = je + ninc
jf = je + jstepl
if (jf.lt.istart) jf = jf + ninc
jg = jf + jstepl
if (jg.lt.istart) jg = jg + ninc
jh = jg + jstepl
if (jh.lt.istart) jh = jh + ninc
ji = je + laincl
if (ji.lt.istart) ji = ji + ninc
jj = ji + jstepl
if (jj.lt.istart) jj = jj + ninc
jk = jj + jstepl
if (jk.lt.istart) jk = jk + ninc
jl = jk + jstepl
if (jl.lt.istart) jl = jl + ninc
jm = ji + laincl
if (jm.lt.istart) jm = jm + ninc
jn = jm + jstepl
if (jn.lt.istart) jn = jn + ninc
jo = jn + jstepl
if (jo.lt.istart) jo = jo + ninc
jp = jo + jstepl
if (jp.lt.istart) jp = jp + ninc
j = 0
*
* loop across transforms
* ----------------------
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 410 l = 1 , nvex
aja = a(ja+j)
ajc = a(jc+j)
t0 = aja + ajc
t2 = aja - ajc
ajb = a(jb+j)
ajd = a(jd+j)
t1 = ajb + ajd
t3 = ss * ( ajb - ajd )
aji = a(ji+j)
ajc = aji
bja = b(ja+j)
bjc = b(jc+j)
u0 = bja + bjc
u2 = bja - bjc
bjb = b(jb+j)
bjd = b(jd+j)
u1 = bjb + bjd
u3 = ss * ( bjb - bjd )
aje = a(je+j)
ajb = aje
a(ja+j) = t0 + t1
a(ji+j) = t0 - t1
b(ja+j) = u0 + u1
bjc = u0 - u1
bjm = b(jm+j)
bjd = bjm
a(je+j) = t2 - u3
ajd = t2 + u3
bjb = u2 + t3
b(jm+j) = u2 - t3
*----------------------
ajg = a(jg+j)
t0 = ajb + ajg
t2 = ajb - ajg
ajf = a(jf+j)
ajh = a(jh+j)
t1 = ajf + ajh
t3 = ss * ( ajf - ajh )
ajj = a(jj+j)
ajg = ajj
bje = b(je+j)
bjg = b(jg+j)
u0 = bje + bjg
u2 = bje - bjg
bjf = b(jf+j)
bjh = b(jh+j)
u1 = bjf + bjh
u3 = ss * ( bjf - bjh )
b(je+j) = bjb
a(jb+j) = t0 + t1
a(jj+j) = t0 - t1
bjj = b(jj+j)
bjg = bjj
b(jb+j) = u0 + u1
b(jj+j) = u0 - u1
a(jf+j) = t2 - u3
ajh = t2 + u3
b(jf+j) = u2 + t3
bjh = u2 - t3
*----------------------
ajk = a(jk+j)
t0 = ajc + ajk
t2 = ajc - ajk
ajl = a(jl+j)
t1 = ajg + ajl
t3 = ss * ( ajg - ajl )
bji = b(ji+j)
bjk = b(jk+j)
u0 = bji + bjk
u2 = bji - bjk
ajo = a(jo+j)
ajl = ajo
bjl = b(jl+j)
u1 = bjg + bjl
u3 = ss * ( bjg - bjl )
b(ji+j) = bjc
a(jc+j) = t0 + t1
a(jk+j) = t0 - t1
bjo = b(jo+j)
bjl = bjo
b(jc+j) = u0 + u1
b(jk+j) = u0 - u1
a(jg+j) = t2 - u3
a(jo+j) = t2 + u3
b(jg+j) = u2 + t3
b(jo+j) = u2 - t3
*----------------------
ajm = a(jm+j)
t0 = ajm + ajl
t2 = ajm - ajl
ajn = a(jn+j)
ajp = a(jp+j)
t1 = ajn + ajp
t3 = ss * ( ajn - ajp )
a(jm+j) = ajd
u0 = bjd + bjl
u2 = bjd - bjl
bjn = b(jn+j)
bjp = b(jp+j)
u1 = bjn + bjp
u3 = ss * ( bjn - bjp )
a(jn+j) = ajh
a(jd+j) = t0 + t1
a(jl+j) = t0 - t1
b(jd+j) = u0 + u1
b(jl+j) = u0 - u1
b(jn+j) = bjh
a(jh+j) = t2 - u3
a(jp+j) = t2 + u3
b(jh+j) = u2 + t3
b(jp+j) = u2 - t3
j = j + jump
410 continue
*-----( end of loop across transforms )
ja = ja + jstepx
if (ja.lt.istart) ja = ja + ninc
415 continue
420 continue
430 continue
*-----( end of double loop for k=0 )
*
* finished if last pass
* ---------------------
if (ipass.eq.m) go to 490
*
kk = 2*la
*
* loop on nonzero k
* -----------------
do 470 k = ink , jstep-ink , ink
co1 = trigs(kk+1)
si1 = s*trigs(kk+2)
co2 = trigs(2*kk+1)
si2 = s*trigs(2*kk+2)
co3 = trigs(3*kk+1)
si3 = s*trigs(3*kk+2)
*
* double loop along first transform in block
* ------------------------------------------
do 460 ll = k , (la-1)*ink , 4*jstep
*
do 450 jjj = ll , (n-1)*inc , 4*la*ink
ja = istart + jjj
*
* "transverse" loop
* -----------------
do 445 nu = 1 , inq
jb = ja + jstepl
if (jb.lt.istart) jb = jb + ninc
jc = jb + jstepl
if (jc.lt.istart) jc = jc + ninc
jd = jc + jstepl
if (jd.lt.istart) jd = jd + ninc
je = ja + laincl
if (je.lt.istart) je = je + ninc
jf = je + jstepl
if (jf.lt.istart) jf = jf + ninc
jg = jf + jstepl
if (jg.lt.istart) jg = jg + ninc
jh = jg + jstepl
if (jh.lt.istart) jh = jh + ninc
ji = je + laincl
if (ji.lt.istart) ji = ji + ninc
jj = ji + jstepl
if (jj.lt.istart) jj = jj + ninc
jk = jj + jstepl
if (jk.lt.istart) jk = jk + ninc
jl = jk + jstepl
if (jl.lt.istart) jl = jl + ninc
jm = ji + laincl
if (jm.lt.istart) jm = jm + ninc
jn = jm + jstepl
if (jn.lt.istart) jn = jn + ninc
jo = jn + jstepl
if (jo.lt.istart) jo = jo + ninc
jp = jo + jstepl
if (jp.lt.istart) jp = jp + ninc
j = 0
*
* loop across transforms
* ----------------------
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 440 l = 1 , nvex
aja = a(ja+j)
ajc = a(jc+j)
t0 = aja + ajc
t2 = aja - ajc
ajb = a(jb+j)
ajd = a(jd+j)
t1 = ajb + ajd
t3 = ss * ( ajb - ajd )
aji = a(ji+j)
ajc = aji
bja = b(ja+j)
bjc = b(jc+j)
u0 = bja + bjc
u2 = bja - bjc
bjb = b(jb+j)
bjd = b(jd+j)
u1 = bjb + bjd
u3 = ss * ( bjb - bjd )
aje = a(je+j)
ajb = aje
a(ja+j) = t0 + t1
b(ja+j) = u0 + u1
a(je+j) = co1*(t2-u3) - si1*(u2+t3)
bjb = si1*(t2-u3) + co1*(u2+t3)
bjm = b(jm+j)
bjd = bjm
a(ji+j) = co2*(t0-t1) - si2*(u0-u1)
bjc = si2*(t0-t1) + co2*(u0-u1)
ajd = co3*(t2+u3) - si3*(u2-t3)
b(jm+j) = si3*(t2+u3) + co3*(u2-t3)
*----------------------------------------
ajg = a(jg+j)
t0 = ajb + ajg
t2 = ajb - ajg
ajf = a(jf+j)
ajh = a(jh+j)
t1 = ajf + ajh
t3 = ss * ( ajf - ajh )
ajj = a(jj+j)
ajg = ajj
bje = b(je+j)
bjg = b(jg+j)
u0 = bje + bjg
u2 = bje - bjg
bjf = b(jf+j)
bjh = b(jh+j)
u1 = bjf + bjh
u3 = ss * ( bjf - bjh )
b(je+j) = bjb
a(jb+j) = t0 + t1
b(jb+j) = u0 + u1
bjj = b(jj+j)
bjg = bjj
a(jf+j) = co1*(t2-u3) - si1*(u2+t3)
b(jf+j) = si1*(t2-u3) + co1*(u2+t3)
a(jj+j) = co2*(t0-t1) - si2*(u0-u1)
b(jj+j) = si2*(t0-t1) + co2*(u0-u1)
ajh = co3*(t2+u3) - si3*(u2-t3)
bjh = si3*(t2+u3) + co3*(u2-t3)
*----------------------------------------
ajk = a(jk+j)
t0 = ajc + ajk
t2 = ajc - ajk
ajl = a(jl+j)
t1 = ajg + ajl
t3 = ss * ( ajg - ajl )
bji = b(ji+j)
bjk = b(jk+j)
u0 = bji + bjk
u2 = bji - bjk
ajo = a(jo+j)
ajl = ajo
bjl = b(jl+j)
u1 = bjg + bjl
u3 = ss * ( bjg - bjl )
b(ji+j) = bjc
a(jc+j) = t0 + t1
b(jc+j) = u0 + u1
bjo = b(jo+j)
bjl = bjo
a(jg+j) = co1*(t2-u3) - si1*(u2+t3)
b(jg+j) = si1*(t2-u3) + co1*(u2+t3)
a(jk+j) = co2*(t0-t1) - si2*(u0-u1)
b(jk+j) = si2*(t0-t1) + co2*(u0-u1)
a(jo+j) = co3*(t2+u3) - si3*(u2-t3)
b(jo+j) = si3*(t2+u3) + co3*(u2-t3)
*----------------------------------------
ajm = a(jm+j)
t0 = ajm + ajl
t2 = ajm - ajl
ajn = a(jn+j)
ajp = a(jp+j)
t1 = ajn + ajp
t3 = ss * ( ajn - ajp )
a(jm+j) = ajd
u0 = bjd + bjl
u2 = bjd - bjl
a(jn+j) = ajh
bjn = b(jn+j)
bjp = b(jp+j)
u1 = bjn + bjp
u3 = ss * ( bjn - bjp )
b(jn+j) = bjh
a(jd+j) = t0 + t1
b(jd+j) = u0 + u1
a(jh+j) = co1*(t2-u3) - si1*(u2+t3)
b(jh+j) = si1*(t2-u3) + co1*(u2+t3)
a(jl+j) = co2*(t0-t1) - si2*(u0-u1)
b(jl+j) = si2*(t0-t1) + co2*(u0-u1)
a(jp+j) = co3*(t2+u3) - si3*(u2-t3)
b(jp+j) = si3*(t2+u3) + co3*(u2-t3)
j = j + jump
440 continue
*-----(end of loop across transforms)
ja = ja + jstepx
if (ja.lt.istart) ja = ja + ninc
445 continue
450 continue
460 continue
*-----( end of double loop for this k )
kk = kk + 2*la
470 continue
*-----( end of loop over values of k )
la = 4*la
480 continue
*-----( end of loop on type II radix-4 passes )
*-----( nvex transforms completed)
490 continue
istart = istart + nvex * jump
500 continue
*-----( end of loop on blocks of transforms )
*
return
end subroutine gpfa2f
!******************************************************************
* fortran version of *gpfa3* -
* radix-3 section of self-sorting, in-place
* generalized PFA
*
*-------------------------------------------------------------------
*
subroutine gpfa3f(a,b,trigs,inc,jump,n,mm,lot,isign)
real(grid_p) :: a(*), b(*)
real(dp) :: trigs(*)
integer inc, jump, n, mm, lot, isign
real(dp), parameter :: sin60 = 0.866025403784437_dp
integer :: je, jf, jg, jh, laincl, ll, ji
integer :: n3, inq, jstepx, ninc, ink, mu, m, mh,
$ nblox, left, istart, nb, nvex, la,
$ ipass, jstep, jstepl, jjj, ja, nu, jb,
$ jc, j, l, kk, k, jd
real(dp) :: s, ajb, ajc, t1, aja, t2, t3, bjb, bjc,
$ u1, bja, u2, u3, co1, si1, co2, si2, ajd,
$ bjd
real(dp) :: c1, aje, ajg, ajf, ajh, bje,
$ bjf, bjg, bjh, aji, bji
integer, parameter :: lvr = 128
*
* ***************************************************************
* * *
* * N.B. LVR = LENGTH OF VECTOR REGISTERS, SET TO 128 FOR C90. *
* * RESET TO 64 FOR OTHER CRAY MACHINES, OR TO ANY LARGE VALUE *
* * (GREATER THAN OR EQUAL TO LOT) FOR A SCALAR COMPUTER. *
* * *
* ***************************************************************
*
n3 = 3**mm
inq = n/n3
jstepx = (n3-n) * inc
ninc = n * inc
ink = inc * inq
mu = mod(inq,3)
if (isign.eq.-1) mu = 3-mu
m = mm
mh = (m+1)/2
s = real(isign, grid_p)
c1 = sin60
if (mu.eq.2) c1 = -c1
*
nblox = 1 + (lot-1)/lvr
left = lot
s = real(isign, grid_p)
istart = 1
*
* loop on blocks of lvr transforms
* --------------------------------
do 500 nb = 1 , nblox
*
if (left.le.lvr) then
nvex = left
else if (left.lt.(2*lvr)) then
nvex = left/2
nvex = nvex + mod(nvex,2)
else
nvex = lvr
endif
left = left - nvex
*
la = 1
*
* loop on type I radix-3 passes
* -----------------------------
do 160 ipass = 1 , mh
jstep = (n*inc) / (3*la)
jstepl = jstep - ninc
*
* k = 0 loop (no twiddle factors)
* -------------------------------
do 120 jjj = 0 , (n-1)*inc , 3*jstep
ja = istart + jjj
*
* "transverse" loop
* -----------------
do 115 nu = 1 , inq
jb = ja + jstepl
if (jb.lt.istart) jb = jb + ninc
jc = jb + jstepl
if (jc.lt.istart) jc = jc + ninc
j = 0
*
* loop across transforms
* ----------------------
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 110 l = 1 , nvex
ajb = a(jb+j)
ajc = a(jc+j)
t1 = ajb + ajc
aja = a(ja+j)
t2 = aja - 0.5_dp * t1
t3 = c1 * ( ajb - ajc )
bjb = b(jb+j)
bjc = b(jc+j)
u1 = bjb + bjc
bja = b(ja+j)
u2 = bja - 0.5_dp * u1
u3 = c1 * ( bjb - bjc )
a(ja+j) = aja + t1
b(ja+j) = bja + u1
a(jb+j) = t2 - u3
b(jb+j) = u2 + t3
a(jc+j) = t2 + u3
b(jc+j) = u2 - t3
j = j + jump
110 continue
ja = ja + jstepx
if (ja.lt.istart) ja = ja + ninc
115 continue
120 continue
*
* finished if n3 = 3
* ------------------
if (n3.eq.3) go to 490
kk = 2 * la
*
* loop on nonzero k
* -----------------
do 150 k = ink , jstep-ink , ink
co1 = trigs(kk+1)
si1 = s*trigs(kk+2)
co2 = trigs(2*kk+1)
si2 = s*trigs(2*kk+2)
*
* loop along transform
* --------------------
do 140 jjj = k , (n-1)*inc , 3*jstep
ja = istart + jjj
*
* "transverse" loop
* -----------------
do 135 nu = 1 , inq
jb = ja + jstepl
if (jb.lt.istart) jb = jb + ninc
jc = jb + jstepl
if (jc.lt.istart) jc = jc + ninc
j = 0
*
* loop across transforms
* ----------------------
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 130 l = 1 , nvex
ajb = a(jb+j)
ajc = a(jc+j)
t1 = ajb + ajc
aja = a(ja+j)
t2 = aja - 0.5_dp * t1
t3 = c1 * ( ajb - ajc )
bjb = b(jb+j)
bjc = b(jc+j)
u1 = bjb + bjc
bja = b(ja+j)
u2 = bja - 0.5_dp * u1
u3 = c1 * ( bjb - bjc )
a(ja+j) = aja + t1
b(ja+j) = bja + u1
a(jb+j) = co1*(t2-u3) - si1*(u2+t3)
b(jb+j) = si1*(t2-u3) + co1*(u2+t3)
a(jc+j) = co2*(t2+u3) - si2*(u2-t3)
b(jc+j) = si2*(t2+u3) + co2*(u2-t3)
j = j + jump
130 continue
*-----( end of loop across transforms )
ja = ja + jstepx
if (ja.lt.istart) ja = ja + ninc
135 continue
140 continue
*-----( end of loop along transforms )
kk = kk + 2*la
150 continue
*-----( end of loop on nonzero k )
la = 3*la
160 continue
*-----( end of loop on type I radix-3 passes)
*
* loop on type II radix-3 passes
* ------------------------------
400 continue
*
do 480 ipass = mh+1 , m
jstep = (n*inc) / (3*la)
jstepl = jstep - ninc
laincl = la*ink - ninc
*
* k=0 loop (no twiddle factors)
* -----------------------------
do 430 ll = 0 , (la-1)*ink , 3*jstep
*
do 420 jjj = ll , (n-1)*inc , 3*la*ink
ja = istart + jjj
*
* "transverse" loop
* -----------------
do 415 nu = 1 , inq
jb = ja + jstepl
if (jb.lt.istart) jb = jb + ninc
jc = jb + jstepl
if (jc.lt.istart) jc = jc + ninc
jd = ja + laincl
if (jd.lt.istart) jd = jd + ninc
je = jd + jstepl
if (je.lt.istart) je = je + ninc
jf = je + jstepl
if (jf.lt.istart) jf = jf + ninc
jg = jd + laincl
if (jg.lt.istart) jg = jg + ninc
jh = jg + jstepl
if (jh.lt.istart) jh = jh + ninc
ji = jh + jstepl
if (ji.lt.istart) ji = ji + ninc
j = 0
*
* loop across transforms
* ----------------------
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 410 l = 1 , nvex
ajb = a(jb+j)
ajc = a(jc+j)
t1 = ajb + ajc
aja = a(ja+j)
t2 = aja - 0.5_dp * t1
t3 = c1 * ( ajb - ajc )
ajd = a(jd+j)
ajb = ajd
bjb = b(jb+j)
bjc = b(jc+j)
u1 = bjb + bjc
bja = b(ja+j)
u2 = bja - 0.5_dp * u1
u3 = c1 * ( bjb - bjc )
bjd = b(jd+j)
bjb = bjd
a(ja+j) = aja + t1
b(ja+j) = bja + u1
a(jd+j) = t2 - u3
b(jd+j) = u2 + t3
ajc = t2 + u3
bjc = u2 - t3
*----------------------
aje = a(je+j)
ajf = a(jf+j)
t1 = aje + ajf
t2 = ajb - 0.5_dp * t1
t3 = c1 * ( aje - ajf )
ajh = a(jh+j)
ajf = ajh
bje = b(je+j)
bjf = b(jf+j)
u1 = bje + bjf
u2 = bjb - 0.5_dp * u1
u3 = c1 * ( bje - bjf )
bjh = b(jh+j)
bjf = bjh
a(jb+j) = ajb + t1
b(jb+j) = bjb + u1
a(je+j) = t2 - u3
b(je+j) = u2 + t3
a(jh+j) = t2 + u3
b(jh+j) = u2 - t3
*----------------------
aji = a(ji+j)
t1 = ajf + aji
ajg = a(jg+j)
t2 = ajg - 0.5_dp * t1
t3 = c1 * ( ajf - aji )
t1 = ajg + t1
a(jg+j) = ajc
bji = b(ji+j)
u1 = bjf + bji
bjg = b(jg+j)
u2 = bjg - 0.5_dp * u1
u3 = c1 * ( bjf - bji )
u1 = bjg + u1
b(jg+j) = bjc
a(jc+j) = t1
b(jc+j) = u1
a(jf+j) = t2 - u3
b(jf+j) = u2 + t3
a(ji+j) = t2 + u3
b(ji+j) = u2 - t3
j = j + jump
410 continue
*-----( end of loop across transforms )
ja = ja + jstepx
if (ja.lt.istart) ja = ja + ninc
415 continue
420 continue
430 continue
*-----( end of double loop for k=0 )
*
* finished if last pass
* ---------------------
if (ipass.eq.m) go to 490
*
kk = 2*la
*
* loop on nonzero k
* -----------------
do 470 k = ink , jstep-ink , ink
co1 = trigs(kk+1)
si1 = s*trigs(kk+2)
co2 = trigs(2*kk+1)
si2 = s*trigs(2*kk+2)
*
* double loop along first transform in block
* ------------------------------------------
do 460 ll = k , (la-1)*ink , 3*jstep
*
do 450 jjj = ll , (n-1)*inc , 3*la*ink
ja = istart + jjj
*
* "transverse" loop
* -----------------
do 445 nu = 1 , inq
jb = ja + jstepl
if (jb.lt.istart) jb = jb + ninc
jc = jb + jstepl
if (jc.lt.istart) jc = jc + ninc
jd = ja + laincl
if (jd.lt.istart) jd = jd + ninc
je = jd + jstepl
if (je.lt.istart) je = je + ninc
jf = je + jstepl
if (jf.lt.istart) jf = jf + ninc
jg = jd + laincl
if (jg.lt.istart) jg = jg + ninc
jh = jg + jstepl
if (jh.lt.istart) jh = jh + ninc
ji = jh + jstepl
if (ji.lt.istart) ji = ji + ninc
j = 0
*
* loop across transforms
* ----------------------
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 440 l = 1 , nvex
ajb = a(jb+j)
ajc = a(jc+j)
t1 = ajb + ajc
aja = a(ja+j)
t2 = aja - 0.5_dp * t1
t3 = c1 * ( ajb - ajc )
ajd = a(jd+j)
ajb = ajd
bjb = b(jb+j)
bjc = b(jc+j)
u1 = bjb + bjc
bja = b(ja+j)
u2 = bja - 0.5_dp * u1
u3 = c1 * ( bjb - bjc )
bjd = b(jd+j)
bjb = bjd
a(ja+j) = aja + t1
b(ja+j) = bja + u1
a(jd+j) = co1*(t2-u3) - si1*(u2+t3)
b(jd+j) = si1*(t2-u3) + co1*(u2+t3)
ajc = co2*(t2+u3) - si2*(u2-t3)
bjc = si2*(t2+u3) + co2*(u2-t3)
*----------------------
aje = a(je+j)
ajf = a(jf+j)
t1 = aje + ajf
t2 = ajb - 0.5_dp * t1
t3 = c1 * ( aje - ajf )
ajh = a(jh+j)
ajf = ajh
bje = b(je+j)
bjf = b(jf+j)
u1 = bje + bjf
u2 = bjb - 0.5_dp * u1
u3 = c1 * ( bje - bjf )
bjh = b(jh+j)
bjf = bjh
a(jb+j) = ajb + t1
b(jb+j) = bjb + u1
a(je+j) = co1*(t2-u3) - si1*(u2+t3)
b(je+j) = si1*(t2-u3) + co1*(u2+t3)
a(jh+j) = co2*(t2+u3) - si2*(u2-t3)
b(jh+j) = si2*(t2+u3) + co2*(u2-t3)
*----------------------
aji = a(ji+j)
t1 = ajf + aji
ajg = a(jg+j)
t2 = ajg - 0.5_dp * t1
t3 = c1 * ( ajf - aji )
t1 = ajg + t1
a(jg+j) = ajc
bji = b(ji+j)
u1 = bjf + bji
bjg = b(jg+j)
u2 = bjg - 0.5_dp * u1
u3 = c1 * ( bjf - bji )
u1 = bjg + u1
b(jg+j) = bjc
a(jc+j) = t1
b(jc+j) = u1
a(jf+j) = co1*(t2-u3) - si1*(u2+t3)
b(jf+j) = si1*(t2-u3) + co1*(u2+t3)
a(ji+j) = co2*(t2+u3) - si2*(u2-t3)
b(ji+j) = si2*(t2+u3) + co2*(u2-t3)
j = j + jump
440 continue
*-----(end of loop across transforms)
ja = ja + jstepx
if (ja.lt.istart) ja = ja + ninc
445 continue
450 continue
460 continue
*-----( end of double loop for this k )
kk = kk + 2*la
470 continue
*-----( end of loop over values of k )
la = 3*la
480 continue
*-----( end of loop on type II radix-3 passes )
*-----( nvex transforms completed)
490 continue
istart = istart + nvex * jump
500 continue
*-----( end of loop on blocks of transforms )
*
return
end subroutine gpfa3f
!*******************************************************************
* fortran version of *gpfa5* -
* radix-5 section of self-sorting, in-place,
* generalized pfa
*
*-------------------------------------------------------------------
*
subroutine gpfa5f(a,b,trigs,inc,jump,n,mm,lot,isign)
use precision, only : dp, grid_p
real(grid_p) :: a(*), b(*)
real(dp) :: trigs(*)
integer inc, jump, n, mm, lot, isign
real(dp), parameter :: sin36 = 0.587785252292473_dp,
$ sin72 = 0.951056516295154_dp,
$ qrt5 = 0.559016994374947_dp
integer, parameter :: lvr = 128
integer :: inq, jstepx, ninc, ink, mu, m, mh,
$ nblox, n5, left, istart, nb, nvex, la,
$ ipass, jstep, jstepl, kk, k, jjj, ja,
$ nu, jb, jc, jd, je, j, laincl, ll, l,
$ jf, jg, jh, ji, jj, jk, jl, jm, jn, jo,
$ jp, jq, jr, js, jt, ju, jv, jw, jx, jy
real(dp) :: s, c1, c2, c3, co1, si1, co2, si2, co3,
$ si3, co4, si4, ajb, aje, t1, ajc, ajd,
$ t2, t3, t4, t5, t6, aja, t7, t8, t9, t10,
$ t11, bjb, bje, u1, bjc, bjd, u2, u3, u4,
$ u5, u6, bja, u7, u8, u9, u10, u11, ajf,
$ ajk, bjf, bjk, ajg, ajj, ajh, aji, ajl,
$ ajq, bjg, bjj, bjh, bji, bjl, bjq, ajo,
$ ajm, ajn, ajr, ajw, bjo, bjm, bjn, bjr,
$ bjw, ajt, ajs, ajx, ajp, ax, bjt, bjs,
$ bjx, bjp, bx, ajv, ajy, aju, bjv, bjy,
$ bju
*
* ***************************************************************
* * *
* * N.B. LVR = LENGTH OF VECTOR REGISTERS, SET TO 128 FOR C90. *
* * RESET TO 64 FOR OTHER CRAY MACHINES, OR TO ANY LARGE VALUE *
* * (GREATER THAN OR EQUAL TO LOT) FOR A SCALAR COMPUTER. *
* * *
* ***************************************************************
*
n5 = 5 ** mm
inq = n / n5
jstepx = (n5-n) * inc
ninc = n * inc
ink = inc * inq
mu = mod(inq,5)
if (isign.eq.-1) mu = 5 - mu
*
m = mm
mh = (m+1)/2
s = real(isign, grid_p)
c1 = qrt5
c2 = sin72
c3 = sin36
if (mu.eq.2.or.mu.eq.3) then
c1 = -c1
c2 = sin36
c3 = sin72
endif
if (mu.eq.3.or.mu.eq.4) c2 = -c2
if (mu.eq.2.or.mu.eq.4) c3 = -c3
*
nblox = 1 + (lot-1)/lvr
left = lot
s = real(isign, grid_p)
istart = 1
*
* loop on blocks of lvr transforms
* --------------------------------
do 500 nb = 1 , nblox
*
if (left.le.lvr) then
nvex = left
else if (left.lt.(2*lvr)) then
nvex = left/2
nvex = nvex + mod(nvex,2)
else
nvex = lvr
endif
left = left - nvex
*
la = 1
*
* loop on type I radix-5 passes
* -----------------------------
do 160 ipass = 1 , mh
jstep = (n*inc) / (5*la)
jstepl = jstep - ninc
kk = 0
*
* loop on k
* ---------
do 150 k = 0 , jstep-ink , ink
*
if (k.gt.0) then
co1 = trigs(kk+1)
si1 = s*trigs(kk+2)
co2 = trigs(2*kk+1)
si2 = s*trigs(2*kk+2)
co3 = trigs(3*kk+1)
si3 = s*trigs(3*kk+2)
co4 = trigs(4*kk+1)
si4 = s*trigs(4*kk+2)
endif
*
* loop along transform
* --------------------
do 140 jjj = k , (n-1)*inc , 5*jstep
ja = istart + jjj
*
* "transverse" loop
* -----------------
do 135 nu = 1 , inq
jb = ja + jstepl
if (jb.lt.istart) jb = jb + ninc
jc = jb + jstepl
if (jc.lt.istart) jc = jc + ninc
jd = jc + jstepl
if (jd.lt.istart) jd = jd + ninc
je = jd + jstepl
if (je.lt.istart) je = je + ninc
j = 0
*
* loop across transforms
* ----------------------
if (k.eq.0) then
*
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 110 l = 1 , nvex
ajb = a(jb+j)
aje = a(je+j)
t1 = ajb + aje
ajc = a(jc+j)
ajd = a(jd+j)
t2 = ajc + ajd
t3 = ajb - aje
t4 = ajc - ajd
t5 = t1 + t2
t6 = c1 * ( t1 - t2 )
aja = a(ja+j)
t7 = aja - 0.25_dp * t5
a(ja+j) = aja + t5
t8 = t7 + t6
t9 = t7 - t6
t10 = c3 * t3 - c2 * t4
t11 = c2 * t3 + c3 * t4
bjb = b(jb+j)
bje = b(je+j)
u1 = bjb + bje
bjc = b(jc+j)
bjd = b(jd+j)
u2 = bjc + bjd
u3 = bjb - bje
u4 = bjc - bjd
u5 = u1 + u2
u6 = c1 * ( u1 - u2 )
bja = b(ja+j)
u7 = bja - 0.25_dp * u5
b(ja+j) = bja + u5
u8 = u7 + u6
u9 = u7 - u6
u10 = c3 * u3 - c2 * u4
u11 = c2 * u3 + c3 * u4
a(jb+j) = t8 - u11
b(jb+j) = u8 + t11
a(je+j) = t8 + u11
b(je+j) = u8 - t11
a(jc+j) = t9 - u10
b(jc+j) = u9 + t10
a(jd+j) = t9 + u10
b(jd+j) = u9 - t10
j = j + jump
110 continue
*
else
*
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 130 l = 1 , nvex
ajb = a(jb+j)
aje = a(je+j)
t1 = ajb + aje
ajc = a(jc+j)
ajd = a(jd+j)
t2 = ajc + ajd
t3 = ajb - aje
t4 = ajc - ajd
t5 = t1 + t2
t6 = c1 * ( t1 - t2 )
aja = a(ja+j)
t7 = aja - 0.25_dp * t5
a(ja+j) = aja + t5
t8 = t7 + t6
t9 = t7 - t6
t10 = c3 * t3 - c2 * t4
t11 = c2 * t3 + c3 * t4
bjb = b(jb+j)
bje = b(je+j)
u1 = bjb + bje
bjc = b(jc+j)
bjd = b(jd+j)
u2 = bjc + bjd
u3 = bjb - bje
u4 = bjc - bjd
u5 = u1 + u2
u6 = c1 * ( u1 - u2 )
bja = b(ja+j)
u7 = bja - 0.25_dp * u5
b(ja+j) = bja + u5
u8 = u7 + u6
u9 = u7 - u6
u10 = c3 * u3 - c2 * u4
u11 = c2 * u3 + c3 * u4
a(jb+j) = co1*(t8-u11) - si1*(u8+t11)
b(jb+j) = si1*(t8-u11) + co1*(u8+t11)
a(je+j) = co4*(t8+u11) - si4*(u8-t11)
b(je+j) = si4*(t8+u11) + co4*(u8-t11)
a(jc+j) = co2*(t9-u10) - si2*(u9+t10)
b(jc+j) = si2*(t9-u10) + co2*(u9+t10)
a(jd+j) = co3*(t9+u10) - si3*(u9-t10)
b(jd+j) = si3*(t9+u10) + co3*(u9-t10)
j = j + jump
130 continue
*
endif
*
*-----( end of loop across transforms )
*
ja = ja + jstepx
if (ja.lt.istart) ja = ja + ninc
135 continue
140 continue
*-----( end of loop along transforms )
kk = kk + 2*la
150 continue
*-----( end of loop on nonzero k )
la = 5*la
160 continue
*-----( end of loop on type I radix-5 passes)
*
if (n.eq.5) go to 490
*
* loop on type II radix-5 passes
* ------------------------------
400 continue
*
do 480 ipass = mh+1 , m
jstep = (n*inc) / (5*la)
jstepl = jstep - ninc
laincl = la * ink - ninc
kk = 0
*
* loop on k
* ---------
do 470 k = 0 , jstep-ink , ink
*
if (k.gt.0) then
co1 = trigs(kk+1)
si1 = s*trigs(kk+2)
co2 = trigs(2*kk+1)
si2 = s*trigs(2*kk+2)
co3 = trigs(3*kk+1)
si3 = s*trigs(3*kk+2)
co4 = trigs(4*kk+1)
si4 = s*trigs(4*kk+2)
endif
*
* double loop along first transform in block
* ------------------------------------------
do 460 ll = k , (la-1)*ink , 5*jstep
*
do 450 jjj = ll , (n-1)*inc , 5*la*ink
ja = istart + jjj
*
* "transverse" loop
* -----------------
do 445 nu = 1 , inq
jb = ja + jstepl
if (jb.lt.istart) jb = jb + ninc
jc = jb + jstepl
if (jc.lt.istart) jc = jc + ninc
jd = jc + jstepl
if (jd.lt.istart) jd = jd + ninc
je = jd + jstepl
if (je.lt.istart) je = je + ninc
jf = ja + laincl
if (jf.lt.istart) jf = jf + ninc
jg = jf + jstepl
if (jg.lt.istart) jg = jg + ninc
jh = jg + jstepl
if (jh.lt.istart) jh = jh + ninc
ji = jh + jstepl
if (ji.lt.istart) ji = ji + ninc
jj = ji + jstepl
if (jj.lt.istart) jj = jj + ninc
jk = jf + laincl
if (jk.lt.istart) jk = jk + ninc
jl = jk + jstepl
if (jl.lt.istart) jl = jl + ninc
jm = jl + jstepl
if (jm.lt.istart) jm = jm + ninc
jn = jm + jstepl
if (jn.lt.istart) jn = jn + ninc
jo = jn + jstepl
if (jo.lt.istart) jo = jo + ninc
jp = jk + laincl
if (jp.lt.istart) jp = jp + ninc
jq = jp + jstepl
if (jq.lt.istart) jq = jq + ninc
jr = jq + jstepl
if (jr.lt.istart) jr = jr + ninc
js = jr + jstepl
if (js.lt.istart) js = js + ninc
jt = js + jstepl
if (jt.lt.istart) jt = jt + ninc
ju = jp + laincl
if (ju.lt.istart) ju = ju + ninc
jv = ju + jstepl
if (jv.lt.istart) jv = jv + ninc
jw = jv + jstepl
if (jw.lt.istart) jw = jw + ninc
jx = jw + jstepl
if (jx.lt.istart) jx = jx + ninc
jy = jx + jstepl
if (jy.lt.istart) jy = jy + ninc
j = 0
*
* loop across transforms
* ----------------------
if (k.eq.0) then
*
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 410 l = 1 , nvex
ajb = a(jb+j)
aje = a(je+j)
t1 = ajb + aje
ajc = a(jc+j)
ajd = a(jd+j)
t2 = ajc + ajd
t3 = ajb - aje
t4 = ajc - ajd
ajf = a(jf+j)
ajb = ajf
t5 = t1 + t2
t6 = c1 * ( t1 - t2 )
aja = a(ja+j)
t7 = aja - 0.25_dp * t5
a(ja+j) = aja + t5
t8 = t7 + t6
t9 = t7 - t6
ajk = a(jk+j)
ajc = ajk
t10 = c3 * t3 - c2 * t4
t11 = c2 * t3 + c3 * t4
bjb = b(jb+j)
bje = b(je+j)
u1 = bjb + bje
bjc = b(jc+j)
bjd = b(jd+j)
u2 = bjc + bjd
u3 = bjb - bje
u4 = bjc - bjd
bjf = b(jf+j)
bjb = bjf
u5 = u1 + u2
u6 = c1 * ( u1 - u2 )
bja = b(ja+j)
u7 = bja - 0.25_dp * u5
b(ja+j) = bja + u5
u8 = u7 + u6
u9 = u7 - u6
bjk = b(jk+j)
bjc = bjk
u10 = c3 * u3 - c2 * u4
u11 = c2 * u3 + c3 * u4
a(jf+j) = t8 - u11
b(jf+j) = u8 + t11
aje = t8 + u11
bje = u8 - t11
a(jk+j) = t9 - u10
b(jk+j) = u9 + t10
ajd = t9 + u10
bjd = u9 - t10
*----------------------
ajg = a(jg+j)
ajj = a(jj+j)
t1 = ajg + ajj
ajh = a(jh+j)
aji = a(ji+j)
t2 = ajh + aji
t3 = ajg - ajj
t4 = ajh - aji
ajl = a(jl+j)
ajh = ajl
t5 = t1 + t2
t6 = c1 * ( t1 - t2 )
t7 = ajb - 0.25_dp * t5
a(jb+j) = ajb + t5
t8 = t7 + t6
t9 = t7 - t6
ajq = a(jq+j)
aji = ajq
t10 = c3 * t3 - c2 * t4
t11 = c2 * t3 + c3 * t4
bjg = b(jg+j)
bjj = b(jj+j)
u1 = bjg + bjj
bjh = b(jh+j)
bji = b(ji+j)
u2 = bjh + bji
u3 = bjg - bjj
u4 = bjh - bji
bjl = b(jl+j)
bjh = bjl
u5 = u1 + u2
u6 = c1 * ( u1 - u2 )
u7 = bjb - 0.25_dp * u5
b(jb+j) = bjb + u5
u8 = u7 + u6
u9 = u7 - u6
bjq = b(jq+j)
bji = bjq
u10 = c3 * u3 - c2 * u4
u11 = c2 * u3 + c3 * u4
a(jg+j) = t8 - u11
b(jg+j) = u8 + t11
ajj = t8 + u11
bjj = u8 - t11
a(jl+j) = t9 - u10
b(jl+j) = u9 + t10
a(jq+j) = t9 + u10
b(jq+j) = u9 - t10
*----------------------
ajo = a(jo+j)
t1 = ajh + ajo
ajm = a(jm+j)
ajn = a(jn+j)
t2 = ajm + ajn
t3 = ajh - ajo
t4 = ajm - ajn
ajr = a(jr+j)
ajn = ajr
t5 = t1 + t2
t6 = c1 * ( t1 - t2 )
t7 = ajc - 0.25_dp * t5
a(jc+j) = ajc + t5
t8 = t7 + t6
t9 = t7 - t6
ajw = a(jw+j)
ajo = ajw
t10 = c3 * t3 - c2 * t4
t11 = c2 * t3 + c3 * t4
bjo = b(jo+j)
u1 = bjh + bjo
bjm = b(jm+j)
bjn = b(jn+j)
u2 = bjm + bjn
u3 = bjh - bjo
u4 = bjm - bjn
bjr = b(jr+j)
bjn = bjr
u5 = u1 + u2
u6 = c1 * ( u1 - u2 )
u7 = bjc - 0.25_dp * u5
b(jc+j) = bjc + u5
u8 = u7 + u6
u9 = u7 - u6
bjw = b(jw+j)
bjo = bjw
u10 = c3 * u3 - c2 * u4
u11 = c2 * u3 + c3 * u4
a(jh+j) = t8 - u11
b(jh+j) = u8 + t11
a(jw+j) = t8 + u11
b(jw+j) = u8 - t11
a(jm+j) = t9 - u10
b(jm+j) = u9 + t10
a(jr+j) = t9 + u10
b(jr+j) = u9 - t10
*----------------------
ajt = a(jt+j)
t1 = aji + ajt
ajs = a(js+j)
t2 = ajn + ajs
t3 = aji - ajt
t4 = ajn - ajs
ajx = a(jx+j)
ajt = ajx
t5 = t1 + t2
t6 = c1 * ( t1 - t2 )
ajp = a(jp+j)
t7 = ajp - 0.25_dp * t5
ax = ajp + t5
t8 = t7 + t6
t9 = t7 - t6
a(jp+j) = ajd
t10 = c3 * t3 - c2 * t4
t11 = c2 * t3 + c3 * t4
a(jd+j) = ax
bjt = b(jt+j)
u1 = bji + bjt
bjs = b(js+j)
u2 = bjn + bjs
u3 = bji - bjt
u4 = bjn - bjs
bjx = b(jx+j)
bjt = bjx
u5 = u1 + u2
u6 = c1 * ( u1 - u2 )
bjp = b(jp+j)
u7 = bjp - 0.25_dp * u5
bx = bjp + u5
u8 = u7 + u6
u9 = u7 - u6
b(jp+j) = bjd
u10 = c3 * u3 - c2 * u4
u11 = c2 * u3 + c3 * u4
b(jd+j) = bx
a(ji+j) = t8 - u11
b(ji+j) = u8 + t11
a(jx+j) = t8 + u11
b(jx+j) = u8 - t11
a(jn+j) = t9 - u10
b(jn+j) = u9 + t10
a(js+j) = t9 + u10
b(js+j) = u9 - t10
*----------------------
ajv = a(jv+j)
ajy = a(jy+j)
t1 = ajv + ajy
t2 = ajo + ajt
t3 = ajv - ajy
t4 = ajo - ajt
a(jv+j) = ajj
t5 = t1 + t2
t6 = c1 * ( t1 - t2 )
aju = a(ju+j)
t7 = aju - 0.25_dp * t5
ax = aju + t5
t8 = t7 + t6
t9 = t7 - t6
a(ju+j) = aje
t10 = c3 * t3 - c2 * t4
t11 = c2 * t3 + c3 * t4
a(je+j) = ax
bjv = b(jv+j)
bjy = b(jy+j)
u1 = bjv + bjy
u2 = bjo + bjt
u3 = bjv - bjy
u4 = bjo - bjt
b(jv+j) = bjj
u5 = u1 + u2
u6 = c1 * ( u1 - u2 )
bju = b(ju+j)
u7 = bju - 0.25_dp * u5
bx = bju + u5
u8 = u7 + u6
u9 = u7 - u6
b(ju+j) = bje
u10 = c3 * u3 - c2 * u4
u11 = c2 * u3 + c3 * u4
b(je+j) = bx
a(jj+j) = t8 - u11
b(jj+j) = u8 + t11
a(jy+j) = t8 + u11
b(jy+j) = u8 - t11
a(jo+j) = t9 - u10
b(jo+j) = u9 + t10
a(jt+j) = t9 + u10
b(jt+j) = u9 - t10
j = j + jump
410 continue
*
else
*
#ifdef OLD_CRAY
cdir$ ivdep, shortloop
#endif
do 440 l = 1 , nvex
ajb = a(jb+j)
aje = a(je+j)
t1 = ajb + aje
ajc = a(jc+j)
ajd = a(jd+j)
t2 = ajc + ajd
t3 = ajb - aje
t4 = ajc - ajd
ajf = a(jf+j)
ajb = ajf
t5 = t1 + t2
t6 = c1 * ( t1 - t2 )
aja = a(ja+j)
t7 = aja - 0.25_dp * t5
a(ja+j) = aja + t5
t8 = t7 + t6
t9 = t7 - t6
ajk = a(jk+j)
ajc = ajk
t10 = c3 * t3 - c2 * t4
t11 = c2 * t3 + c3 * t4
bjb = b(jb+j)
bje = b(je+j)
u1 = bjb + bje
bjc = b(jc+j)
bjd = b(jd+j)
u2 = bjc + bjd
u3 = bjb - bje
u4 = bjc - bjd
bjf = b(jf+j)
bjb = bjf
u5 = u1 + u2
u6 = c1 * ( u1 - u2 )
bja = b(ja+j)
u7 = bja - 0.25_dp * u5
b(ja+j) = bja + u5
u8 = u7 + u6
u9 = u7 - u6
bjk = b(jk+j)
bjc = bjk
u10 = c3 * u3 - c2 * u4
u11 = c2 * u3 + c3 * u4
a(jf+j) = co1*(t8-u11) - si1*(u8+t11)
b(jf+j) = si1*(t8-u11) + co1*(u8+t11)
aje = co4*(t8+u11) - si4*(u8-t11)
bje = si4*(t8+u11) + co4*(u8-t11)
a(jk+j) = co2*(t9-u10) - si2*(u9+t10)
b(jk+j) = si2*(t9-u10) + co2*(u9+t10)
ajd = co3*(t9+u10) - si3*(u9-t10)
bjd = si3*(t9+u10) + co3*(u9-t10)
*----------------------
ajg = a(jg+j)
ajj = a(jj+j)
t1 = ajg + ajj
ajh = a(jh+j)
aji = a(ji+j)
t2 = ajh + aji
t3 = ajg - ajj
t4 = ajh - aji
ajl = a(jl+j)
ajh = ajl
t5 = t1 + t2
t6 = c1 * ( t1 - t2 )
t7 = ajb - 0.25_dp * t5
a(jb+j) = ajb + t5
t8 = t7 + t6
t9 = t7 - t6
ajq = a(jq+j)
aji = ajq
t10 = c3 * t3 - c2 * t4
t11 = c2 * t3 + c3 * t4
bjg = b(jg+j)
bjj = b(jj+j)
u1 = bjg + bjj
bjh = b(jh+j)
bji = b(ji+j)
u2 = bjh + bji
u3 = bjg - bjj
u4 = bjh - bji
bjl = b(jl+j)
bjh = bjl
u5 = u1 + u2
u6 = c1 * ( u1 - u2 )
u7 = bjb - 0.25_dp * u5
b(jb+j) = bjb + u5
u8 = u7 + u6
u9 = u7 - u6
bjq = b(jq+j)
bji = bjq
u10 = c3 * u3 - c2 * u4
u11 = c2 * u3 + c3 * u4
a(jg+j) = co1*(t8-u11) - si1*(u8+t11)
b(jg+j) = si1*(t8-u11) + co1*(u8+t11)
ajj = co4*(t8+u11) - si4*(u8-t11)
bjj = si4*(t8+u11) + co4*(u8-t11)
a(jl+j) = co2*(t9-u10) - si2*(u9+t10)
b(jl+j) = si2*(t9-u10) + co2*(u9+t10)
a(jq+j) = co3*(t9+u10) - si3*(u9-t10)
b(jq+j) = si3*(t9+u10) + co3*(u9-t10)
*----------------------
ajo = a(jo+j)
t1 = ajh + ajo
ajm = a(jm+j)
ajn = a(jn+j)
t2 = ajm + ajn
t3 = ajh - ajo
t4 = ajm - ajn
ajr = a(jr+j)
ajn = ajr
t5 = t1 + t2
t6 = c1 * ( t1 - t2 )
t7 = ajc - 0.25_dp * t5
a(jc+j) = ajc + t5
t8 = t7 + t6
t9 = t7 - t6
ajw = a(jw+j)
ajo = ajw
t10 = c3 * t3 - c2 * t4
t11 = c2 * t3 + c3 * t4
bjo = b(jo+j)
u1 = bjh + bjo
bjm = b(jm+j)
bjn = b(jn+j)
u2 = bjm + bjn
u3 = bjh - bjo
u4 = bjm - bjn
bjr = b(jr+j)
bjn = bjr
u5 = u1 + u2
u6 = c1 * ( u1 - u2 )
u7 = bjc - 0.25_dp * u5
b(jc+j) = bjc + u5
u8 = u7 + u6
u9 = u7 - u6
bjw = b(jw+j)
bjo = bjw
u10 = c3 * u3 - c2 * u4
u11 = c2 * u3 + c3 * u4
a(jh+j) = co1*(t8-u11) - si1*(u8+t11)
b(jh+j) = si1*(t8-u11) + co1*(u8+t11)
a(jw+j) = co4*(t8+u11) - si4*(u8-t11)
b(jw+j) = si4*(t8+u11) + co4*(u8-t11)
a(jm+j) = co2*(t9-u10) - si2*(u9+t10)
b(jm+j) = si2*(t9-u10) + co2*(u9+t10)
a(jr+j) = co3*(t9+u10) - si3*(u9-t10)
b(jr+j) = si3*(t9+u10) + co3*(u9-t10)
*----------------------
ajt = a(jt+j)
t1 = aji + ajt
ajs = a(js+j)
t2 = ajn + ajs
t3 = aji - ajt
t4 = ajn - ajs
ajx = a(jx+j)
ajt = ajx
t5 = t1 + t2
t6 = c1 * ( t1 - t2 )
ajp = a(jp+j)
t7 = ajp - 0.25_dp * t5
ax = ajp + t5
t8 = t7 + t6
t9 = t7 - t6
a(jp+j) = ajd
t10 = c3 * t3 - c2 * t4
t11 = c2 * t3 + c3 * t4
a(jd+j) = ax
bjt = b(jt+j)
u1 = bji + bjt
bjs = b(js+j)
u2 = bjn + bjs
u3 = bji - bjt
u4 = bjn - bjs
bjx = b(jx+j)
bjt = bjx
u5 = u1 + u2
u6 = c1 * ( u1 - u2 )
bjp = b(jp+j)
u7 = bjp - 0.25_dp * u5
bx = bjp + u5
u8 = u7 + u6
u9 = u7 - u6
b(jp+j) = bjd
u10 = c3 * u3 - c2 * u4
u11 = c2 * u3 + c3 * u4
b(jd+j) = bx
a(ji+j) = co1*(t8-u11) - si1*(u8+t11)
b(ji+j) = si1*(t8-u11) + co1*(u8+t11)
a(jx+j) = co4*(t8+u11) - si4*(u8-t11)
b(jx+j) = si4*(t8+u11) + co4*(u8-t11)
a(jn+j) = co2*(t9-u10) - si2*(u9+t10)
b(jn+j) = si2*(t9-u10) + co2*(u9+t10)
a(js+j) = co3*(t9+u10) - si3*(u9-t10)
b(js+j) = si3*(t9+u10) + co3*(u9-t10)
*----------------------
ajv = a(jv+j)
ajy = a(jy+j)
t1 = ajv + ajy
t2 = ajo + ajt
t3 = ajv - ajy
t4 = ajo - ajt
a(jv+j) = ajj
t5 = t1 + t2
t6 = c1 * ( t1 - t2 )
aju = a(ju+j)
t7 = aju - 0.25_dp * t5
ax = aju + t5
t8 = t7 + t6
t9 = t7 - t6
a(ju+j) = aje
t10 = c3 * t3 - c2 * t4
t11 = c2 * t3 + c3 * t4
a(je+j) = ax
bjv = b(jv+j)
bjy = b(jy+j)
u1 = bjv + bjy
u2 = bjo + bjt
u3 = bjv - bjy
u4 = bjo - bjt
b(jv+j) = bjj
u5 = u1 + u2
u6 = c1 * ( u1 - u2 )
bju = b(ju+j)
u7 = bju - 0.25_dp * u5
bx = bju + u5
u8 = u7 + u6
u9 = u7 - u6
b(ju+j) = bje
u10 = c3 * u3 - c2 * u4
u11 = c2 * u3 + c3 * u4
b(je+j) = bx
a(jj+j) = co1*(t8-u11) - si1*(u8+t11)
b(jj+j) = si1*(t8-u11) + co1*(u8+t11)
a(jy+j) = co4*(t8+u11) - si4*(u8-t11)
b(jy+j) = si4*(t8+u11) + co4*(u8-t11)
a(jo+j) = co2*(t9-u10) - si2*(u9+t10)
b(jo+j) = si2*(t9-u10) + co2*(u9+t10)
a(jt+j) = co3*(t9+u10) - si3*(u9-t10)
b(jt+j) = si3*(t9+u10) + co3*(u9-t10)
j = j + jump
440 continue
*
endif
*
*-----(end of loop across transforms)
*
ja = ja + jstepx
if (ja.lt.istart) ja = ja + ninc
445 continue
450 continue
460 continue
*-----( end of double loop for this k )
kk = kk + 2*la
470 continue
*-----( end of loop over values of k )
la = 5*la
480 continue
*-----( end of loop on type II radix-5 passes )
*-----( nvex transforms completed)
490 continue
istart = istart + nvex * jump
500 continue
*-----( end of loop on blocks of transforms )
*
return
end subroutine gpfa5f
!*********************************************************************
SUBROUTINE SETGPFA(TRIGS,maxtrigs,ntrigs,N)
*
use precision, only : dp
INTEGER maxtrigs,NJ(3),ntrigs
REAL(dp) TRIGS(maxtrigs)
integer :: nn, n, ifac, ll, kk, ip, iq, ir,
$ i, ni, irot, kink, k
real(dp) :: angle, twopi, del
*
* DECOMPOSE N INTO FACTORS 2,3,5
* ------------------------------
NN = N
IFAC = 2
*
DO 30 LL = 1 , 3
KK = 0
10 CONTINUE
IF (MOD(NN,IFAC).NE.0) GO TO 20
KK = KK + 1
NN = NN / IFAC
GO TO 10
20 CONTINUE
NJ(LL) = KK
IFAC = IFAC + LL
30 CONTINUE
*
IF (NN.NE.1) THEN
WRITE(6,40) N
40 FORMAT(' *** WARNING!!!',I10,' IS NOT A LEGAL VALUE OF N ***')
RETURN
ENDIF
*
IP = NJ(1)
IQ = NJ(2)
IR = NJ(3)
*
* COMPUTE LIST OF ROTATED TWIDDLE FACTORS
* ---------------------------------------
NJ(1) = 2**IP
NJ(2) = 3**IQ
NJ(3) = 5**IR
*
TWOPI = 4.0_dp * ASIN(1.0_dp)
I = 1
*
DO 60 LL = 1 , 3
NI = NJ(LL)
IF (NI.EQ.1) GO TO 60
*
DEL = TWOPI / real(NI,kind=dp)
IROT = N / NI
KINK = MOD(IROT,NI)
KK = 0
*
DO 50 K = 1 , NI
ANGLE = real(KK,kind=dp) * DEL
if (i.lt.maxtrigs) then
TRIGS(I) = COS(ANGLE)
TRIGS(I+1) = SIN(ANGLE)
endif
I = I + 2
KK = KK + KINK
IF (KK.GT.NI) KK = KK - NI
50 CONTINUE
60 CONTINUE
ntrigs = i
*
RETURN
END SUBROUTINE SETGPFA
END MODULE fft1d