436 lines
15 KiB
Fortran
436 lines
15 KiB
Fortran
!
|
|
! Copyright (C) 1996-2016 The SIESTA group
|
|
! This file is distributed under the terms of the
|
|
! GNU General Public License: see COPYING in the top directory
|
|
! or http://www.gnu.org/copyleft/gpl.txt.
|
|
! See Docs/Contributors.txt for a list of contributors.
|
|
!
|
|
subroutine pdos3k( nuo, no, maxuo, maxnh, &
|
|
nuotot, numh, listhptr, listh, H, S, &
|
|
E1, E2, nhist, sigma, &
|
|
xij, indxuo, nk, kpoint, wk, &
|
|
Haux, Saux, psi, eo, dtot, dpr )
|
|
|
|
! **********************************************************************
|
|
! Find the density of states projected onto the atomic orbitals
|
|
! D_mu(E) = Sum(n,k,nu) C(mu,n,k) C(nu,n,k) S(mu,nu,k) Delta(E-E(n,k))
|
|
! where n run over all the bands between two given energies
|
|
! Written by J. Junquera and E. Artacho. Nov' 99
|
|
! Spin-orbit coupling version by J. Ferrer, October 2007
|
|
! Huge performance increase by N. Papior, 2022
|
|
! **** INPUT *********************************************************
|
|
! INTEGER NUO : Number of atomic orbitals in the unit cell
|
|
! INTEGER NO : Number of atomic orbitals in the supercell
|
|
! INTEGER MAXUO : Maximum number of atomic orbitals in the unit cell
|
|
! INTEGER MAXNH : Maximum number of orbitals interacting
|
|
! with any orbital
|
|
! INTEGER NUOTOT : Total number of orbitals per unit cell
|
|
! INTEGER NUMH(NUO) : Number of nonzero elements of each row
|
|
! of hamiltonian matrix
|
|
! INTEGER LISTHPTR(NUO) : Pointer to each row (-1) of the
|
|
! hamiltonian matrix
|
|
! INTEGER LISTH(MAXNH) : Nonzero hamiltonian-matrix element
|
|
! column indexes for each matrix row
|
|
! REAL*8 H(MAXNH,8) : Hamiltonian in sparse format
|
|
! REAL*8 S(MAXNH) : Overlap in sparse format
|
|
! REAL*8 E1, E2 : Energy range for density-matrix states
|
|
! (to find local density of states)
|
|
! Not used if e1 > e2
|
|
! INTEGER NHIST : Number of the subdivisions of the histogram
|
|
! REAL*8 SIGMA : Width of the gaussian to expand the eigenvectors
|
|
! REAL*8 XIJ(3,MAXNH) : Vectors between orbital centers (sparse)
|
|
! (not used if only gamma point)
|
|
! INTEGER INDXUO(NO) : Index of equivalent orbital in unit cell
|
|
! INTEGER NK : Number of k points
|
|
! REAL*8 KPOINT(3,NK) : k point vectors
|
|
! REAL*8 WK(NK) : Weights for k points
|
|
! **** AUXILIARY *****************************************************
|
|
! complex*16 HAUX(2,NUOTOT,2,NUO) : Auxiliary space for the hamiltonian matrix
|
|
! complex*16 SAUX(2,NUOTOT,2,NUO) : Auxiliary space for the overlap matrix
|
|
! complex*16 PSI(2,NUOTOT,2,NUO) : Auxiliary space for the eigenvectors
|
|
! real*8 eo(nuotot*2) : Auxiliary space for the eigenvalues
|
|
! **** OUTPUT ********************************************************
|
|
! REAL*8 DTOT(4,NHIST) : Total density of states
|
|
! REAL*8 DPR(4,nuotot,NHIST): Projected density of states
|
|
! **********************************************************************
|
|
|
|
use precision
|
|
use parallel, only : Node, Nodes, BlockSize
|
|
use parallelsubs, only : GetNodeOrbs, LocalToGlobalOrb
|
|
use units, only : pi
|
|
use alloc, only : re_alloc, de_alloc
|
|
#ifdef MPI
|
|
use mpi_siesta
|
|
#endif
|
|
use sys, only : die
|
|
use m_diag, only: diag_get_1d_context, diag_descinit
|
|
use m_diag_option, only: Serial
|
|
|
|
implicit none
|
|
|
|
integer :: nuo, no, maxuo, maxnh, nk, nhist, nuotot
|
|
integer :: numh(nuo), listhptr(nuo), listh(maxnh), indxuo(no)
|
|
|
|
real(dp) :: H(maxnh,8), S(maxnh), E1, E2, sigma, &
|
|
xij(3,maxnh), kpoint(3,nk), &
|
|
dtot(4,nhist), dpr(4,nuotot,nhist), wk(nk)
|
|
complex(dp) :: Haux(2,nuotot,2,nuo), Saux(2,nuotot,2,nuo)
|
|
complex(dp) :: psi(2,nuotot,2,nuo)
|
|
real(dp) :: eo(nuotot*2)
|
|
|
|
! Internal variables ---------------------------------------------------
|
|
integer :: ik, ispin, iuo, io, juo, j, jo, ihist, iband, ind, ierror
|
|
integer :: iEmin, iEmax
|
|
integer :: nuo2, nuotot2, BlockSize2
|
|
|
|
real(dp) :: delta, ener, diff, gauss, norm, wksum
|
|
real(dp) :: limit, inv_sigma2
|
|
real(dp) :: D1, D2
|
|
|
|
#ifdef MPI
|
|
! All of our matrices are described in the same manner
|
|
! So we only need one descriptor
|
|
integer :: ctxt, desc(9)
|
|
real(dp), dimension(:,:,:), pointer :: aux_red => null()
|
|
#endif
|
|
|
|
external :: cdiag
|
|
|
|
! START -----------------------------------------------------------------
|
|
|
|
! Initialize some variables
|
|
delta = (e2 - e1)/(nhist-1)
|
|
inv_sigma2 = 1._dp / sigma**2
|
|
! Limit is exp(-20) ~ 2e-9
|
|
limit = sqrt(20._dp) * sigma
|
|
nuo2 = nuo * 2
|
|
nuotot2 = nuotot * 2
|
|
BlockSize2 = BlockSize * 2
|
|
|
|
! Find eigenvalues at every k point
|
|
do ik = 1,nk
|
|
|
|
call setup_k(kpoint(:,ik))
|
|
|
|
! Diagonalize for each k point. Note duplication of problem size
|
|
call cdiag( Haux, Saux, nuotot2, nuo2, nuotot2, &
|
|
eo, psi, nuotot2, 1, ierror, BlockSize2 )
|
|
|
|
if ( ierror > 0 ) then
|
|
call die('Terminating due to failed diagonalisation')
|
|
|
|
elseif ( ierror < 0 ) then
|
|
|
|
! Repeat diagonalisation with increased memory to handle clustering
|
|
call setup_k(kpoint(:,ik))
|
|
|
|
call cdiag( Haux, Saux, nuotot2, nuo2, nuotot2, &
|
|
eo, psi, nuotot2, 1, ierror, BlockSize2 )
|
|
endif
|
|
|
|
! Figure out the minimum and maximum eigenstates that will contribute
|
|
! This ensures we calculate fewer columns of the psi basis
|
|
! Note, eo *MUST* be sorted. This is ensured by lapack/scalapack.
|
|
iEmin = nuotot2
|
|
do jo = 1, nuotot2
|
|
if ( (E1 - limit) < EO(jo) .and. EO(jo) < (E2 + limit) ) then
|
|
iEmin = jo
|
|
exit
|
|
end if
|
|
end do
|
|
|
|
iEmax = 1
|
|
do jo = nuotot2, 1, -1
|
|
if ( (E1 - limit) < EO(jo) .and. EO(jo) < (E2 + limit) ) then
|
|
iEmax = jo
|
|
exit
|
|
end if
|
|
end do
|
|
! Ensure that they are in full sections (makes the below easier)
|
|
iEmin = iEmin - mod(iEmin-1,2)
|
|
iEmax = iEmax + mod(iEmax,2)
|
|
|
|
! correct wrong cases, should probably never be found?
|
|
if ( iEmin > iEmax ) then
|
|
iEmin = 1
|
|
iEmax = 0
|
|
end if
|
|
|
|
! Recalculate again the overlap matrix in k-space
|
|
call setup_Sk(kpoint(:,ik))
|
|
|
|
! Convert iEmin/iEmax to local indices (not factor 2)
|
|
iEmin = (iEmin + 1)/2
|
|
iEmax = iEmax / 2
|
|
! Total number of elements calculated (jo is allowed to be 0)
|
|
jo = (iEmax - iEmin + 1) * 2
|
|
|
|
! Now perform the matrix-multiplications
|
|
! This is: S | psi >
|
|
|
|
if ( Serial ) then
|
|
call zgemm('N','N',nuotot2, jo, nuotot2, cmplx(1._dp, 0._dp, dp), &
|
|
Saux(1,1,1,1),nuotot2, psi(1,1,1,iEmin),nuotot2, cmplx(0._dp, 0._dp, dp), &
|
|
Haux(1,1,1,iEmin), nuotot2)
|
|
end if
|
|
|
|
#ifdef MPI
|
|
|
|
if ( .not. Serial ) then
|
|
! We need to define the contexts and matrix descriptors
|
|
ctxt = diag_get_1d_context()
|
|
call diag_descinit(nuotot2, nuotot2, BlockSize2, desc, ctxt)
|
|
|
|
call pzgemm('N', 'N', nuotot2, jo, nuotot2, cmplx(1._dp, 0._dp, dp), &
|
|
Saux(1,1,1,1), 1, 1, desc, psi(1,1,1,1), 1, iEmin*2-1, desc, &
|
|
cmplx(0._dp, 0._dp, dp), Haux(1,1,1,1), 1, iEmin*2-1, desc)
|
|
end if
|
|
|
|
! reset
|
|
iuo = iEmin
|
|
juo = iEmax
|
|
iEmin = 1
|
|
iEmax = 0
|
|
do jo = 1, nuo
|
|
call LocalToGlobalOrb(jo, Node, Nodes, j)
|
|
if ( iuo <= j ) then
|
|
! lowest point where we have this orbital
|
|
iEmin = jo
|
|
exit
|
|
end if
|
|
end do
|
|
do jo = nuo, 1, -1
|
|
call LocalToGlobalOrb(jo, Node, Nodes, j)
|
|
if ( j <= juo ) then
|
|
iEmax = jo
|
|
exit
|
|
end if
|
|
end do
|
|
|
|
! Now iEmin, iEmax are local indices
|
|
|
|
!!$OMP parallel default(none) shared(Haux,psi,dtot,dpr,iEmin,iEmax,inv_sigma2,D1,D2) &
|
|
!!$OMP& shared(nhist,Node,Nodes,eo,limit,wk,nuotot,nuo,ik,delta,e1) &
|
|
!!$OMP& private(jo,iuo,ihist,ener,iband,diff,gauss,j)
|
|
|
|
! Ensure we multiply with the local nodes complex conjugate
|
|
! This is the final step of < psi | S | psi >
|
|
! but doing it element wise, rather than a dot-product
|
|
! Now we will do some magic.
|
|
! A complex array is equivalent to two reals next to each other.
|
|
! So to utilize daxpy below, we just ensure the following:
|
|
! UU = real(UU)
|
|
! DD = aimag(UU)
|
|
! X = real(UD)
|
|
! Y = aimag(UD)
|
|
! This enables us efficient usage of BLAS while obfuscating things
|
|
! a bit.
|
|
! Additionally consider that we want the correct sign to use daxpy.
|
|
! So basically we need to calculate D21 + conjg(D12) which
|
|
! then has the correct signs.
|
|
|
|
!!$OMP do schedule(static)
|
|
do jo = iEmin, iEmax
|
|
do io = 1, nuotot
|
|
D1 = real(conjg(psi(1,io,1,jo)) * Haux(1,io,1,jo), dp)
|
|
D2 = real(conjg(psi(2,io,1,jo)) * Haux(2,io,1,jo), dp)
|
|
Haux(2,io,1,jo) = conjg(psi(1,io,1,jo)) * Haux(2,io,1,jo) &
|
|
+ psi(2,io,1,jo) * conjg(Haux(1,io,1,jo))
|
|
Haux(1,io,1,jo) = cmplx(D1, D2, dp)
|
|
end do
|
|
do io = 1, nuotot
|
|
D1 = real(conjg(psi(1,io,2,jo)) * Haux(1,io,2,jo), dp)
|
|
D2 = real(conjg(psi(2,io,2,jo)) * Haux(2,io,2,jo), dp)
|
|
Haux(2,io,2,jo) = conjg(psi(1,io,2,jo)) * Haux(2,io,2,jo) &
|
|
+ psi(2,io,2,jo) * conjg(Haux(1,io,2,jo))
|
|
Haux(1,io,2,jo) = cmplx(D1, D2, dp)
|
|
end do
|
|
end do
|
|
!!$OMP end do
|
|
|
|
!!$OMP do schedule(static,16)
|
|
do ihist = 1, nhist
|
|
ener = E1 + (ihist - 1) * delta
|
|
do iband = iEmin, iEmax
|
|
! the energy comes from the global array
|
|
call LocalToGlobalOrb(iband, Node, Nodes, j)
|
|
diff = abs(ener - eo(j*2-1))
|
|
|
|
! TODO, this *could* be merged into a single zgemm call with gauss(2), but...
|
|
! In fact, all of iEmin ... iEmax could be merged to do everything *once*
|
|
! But a bit more complicated.
|
|
|
|
if ( diff < limit ) then
|
|
gauss = exp(-diff**2*inv_sigma2) * wk(ik)
|
|
! See discussion about daxpy + OMP usage in pdosg.F90
|
|
call daxpy(nuotot*4,gauss,Haux(1,1,1,iband),1,dpr(1,1,ihist),1)
|
|
end if
|
|
|
|
diff = abs(ener - eo(j*2))
|
|
if ( diff < limit ) then
|
|
gauss = exp(-diff**2*inv_sigma2) * wk(ik)
|
|
! See discussion about daxpy + OMP usage in pdosg.F90
|
|
call daxpy(nuotot*4,gauss,Haux(1,1,2,iband),1,dpr(1,1,ihist),1)
|
|
end if
|
|
|
|
end do
|
|
end do
|
|
!!$OMP end do nowait
|
|
|
|
!!$OMP end parallel
|
|
|
|
#else
|
|
|
|
!!$OMP parallel default(none) shared(Haux,psi,dtot,dpr,iEmin,iEmax,inv_sigma2,D1,D2) &
|
|
!!$OMP& shared(nhist,Node,Nodes,eo,limit,wk,nuotot,nuo,ik,delta,e1) &
|
|
!!$OMP& private(jo,iuo,ihist,ener,iband,diff,gauss,j)
|
|
|
|
! Ensure we multiply with the local nodes complex conjugate
|
|
! This is the final step of < psi | S | psi >
|
|
! but doing it element wise, rather than a dot-product
|
|
! Now we will do some magic.
|
|
! A complex array is equivalent to two reals next to each other.
|
|
! So to utilize daxpy below, we just ensure the following:
|
|
! UU = real(UU)
|
|
! DD = aimag(UU)
|
|
! X = real(UD)
|
|
! Y = aimag(UD)
|
|
! This enables us efficient usage of BLAS while obfuscating things
|
|
! a bit.
|
|
! Additionally consider that we want the correct sign to use daxpy.
|
|
! So basically we need to calculate D21 + conjg(D12) which
|
|
! then has the correct signs.
|
|
|
|
!!$OMP do schedule(static)
|
|
do jo = iEmin, iEmax
|
|
do io = 1, nuotot
|
|
D1 = real(conjg(psi(1,io,1,jo)) * Haux(1,io,1,jo), dp)
|
|
D2 = real(conjg(psi(2,io,1,jo)) * Haux(2,io,1,jo), dp)
|
|
Haux(2,io,1,jo) = conjg(psi(1,io,1,jo)) * Haux(2,io,1,jo) &
|
|
+ psi(2,io,1,jo) * conjg(Haux(1,io,1,jo))
|
|
Haux(1,io,1,jo) = cmplx(D1, D2, dp)
|
|
end do
|
|
do io = 1, nuotot
|
|
D1 = real(conjg(psi(1,io,2,jo)) * Haux(1,io,2,jo), dp)
|
|
D2 = real(conjg(psi(2,io,2,jo)) * Haux(2,io,2,jo), dp)
|
|
Haux(2,io,2,jo) = conjg(psi(1,io,2,jo)) * Haux(2,io,2,jo) &
|
|
+ psi(2,io,2,jo) * conjg(Haux(1,io,2,jo))
|
|
Haux(1,io,2,jo) = cmplx(D1, D2, dp)
|
|
end do
|
|
end do
|
|
!!$OMP end do
|
|
|
|
!!$OMP do schedule(static,16)
|
|
do ihist = 1, nhist
|
|
ener = E1 + (ihist - 1) * delta
|
|
do iband = iEmin, iEmax
|
|
! the energy comes from the global array
|
|
diff = abs(ener - eo(iband*2-1))
|
|
|
|
if ( diff < limit ) then
|
|
gauss = exp(-diff**2*inv_sigma2) * wk(ik)
|
|
! See discussion about daxpy + OMP usage in pdosg.F90
|
|
call daxpy(nuotot*4,gauss,Haux(1,1,1,iband),1,dpr(1,1,ihist),1)
|
|
end if
|
|
|
|
diff = abs(ener - eo(iband*2))
|
|
if ( diff < limit ) then
|
|
gauss = exp(-diff**2*inv_sigma2) * wk(ik)
|
|
! See discussion about daxpy + OMP usage in pdosg.F90
|
|
call daxpy(nuotot*4,gauss,Haux(1,1,2,iband),1,dpr(1,1,ihist),1)
|
|
end if
|
|
|
|
end do
|
|
end do
|
|
!!$OMP end do nowait
|
|
|
|
!!$OMP end parallel
|
|
|
|
#endif
|
|
|
|
end do ! nk
|
|
|
|
#ifdef MPI
|
|
! Allocate workspace array for global reduction
|
|
call re_alloc( aux_red, 1, 4, 1, nuotot, 1, nhist, &
|
|
name='aux_red', routine='pdos3k')
|
|
|
|
! Global reduction of dpr matrix
|
|
call MPI_Reduce(dpr(1,1,1),aux_red(1,1,1),4*nuotot*nhist, &
|
|
MPI_double_precision,MPI_sum,0,MPI_Comm_World,ierror)
|
|
dpr(:,:,:) = aux_red(:,:,:)
|
|
|
|
call de_alloc(aux_red, name='aux_red', routine='pdos3k')
|
|
|
|
#endif
|
|
|
|
wksum = 0.0d0
|
|
do ik = 1, nk
|
|
wksum = wksum + wk(ik)
|
|
end do
|
|
norm = 1._dp / (sigma * sqrt(pi) * wksum)
|
|
call dscal(4*nuotot*nhist,norm,dpr(1,1,1),1)
|
|
|
|
do ihist = 1, nhist
|
|
dtot(1,ihist) = sum(dpr(1,:,ihist))
|
|
dtot(2,ihist) = sum(dpr(2,:,ihist))
|
|
dtot(3,ihist) = sum(dpr(3,:,ihist))
|
|
dtot(4,ihist) = sum(dpr(4,:,ihist))
|
|
end do
|
|
|
|
contains
|
|
|
|
subroutine setup_k(k)
|
|
real(dp), intent(in) :: k(3)
|
|
real(dp) :: kxij
|
|
complex(dp) :: kphs
|
|
|
|
Saux(:,:,:,:) = cmplx(0.0_dp,0.0_dp,dp)
|
|
Haux(:,:,:,:) = cmplx(0.0_dp,0.0_dp,dp)
|
|
|
|
do iuo = 1,nuo
|
|
do j = 1,numh(iuo)
|
|
ind = listhptr(iuo) + j
|
|
jo = listh(ind)
|
|
juo = indxuo(jo)
|
|
kxij = k(1) * xij(1,ind) + k(2) * xij(2,ind) + k(3) * xij(3,ind)
|
|
kphs = exp(cmplx(0.0_dp, -kxij, dp))
|
|
|
|
Saux(1,juo,1,iuo) = Saux(1,juo,1,iuo) + S(ind) * kphs
|
|
Saux(2,juo,2,iuo) = Saux(2,juo,2,iuo) + S(ind) * kphs
|
|
Haux(1,juo,1,iuo) = Haux(1,juo,1,iuo) + cmplx(H(ind,1), H(ind,5),dp) * kphs
|
|
Haux(2,juo,2,iuo) = Haux(2,juo,2,iuo) + cmplx(H(ind,2), H(ind,6),dp) * kphs
|
|
Haux(1,juo,2,iuo) = Haux(1,juo,2,iuo) + cmplx(H(ind,3), - H(ind,4),dp) * kphs
|
|
Haux(2,juo,1,iuo) = Haux(2,juo,1,iuo) + cmplx(H(ind,7), + H(ind,8),dp) * kphs
|
|
|
|
end do
|
|
end do
|
|
|
|
end subroutine setup_k
|
|
|
|
|
|
subroutine setup_Sk(k)
|
|
real(dp), intent(in) :: k(3)
|
|
real(dp) :: kxij
|
|
complex(dp) :: kphs
|
|
|
|
Saux(:,:,:,:) = cmplx(0.0_dp,0.0_dp,dp)
|
|
|
|
do iuo = 1,nuo
|
|
do j = 1,numh(iuo)
|
|
ind = listhptr(iuo) + j
|
|
jo = listh(ind)
|
|
juo = indxuo(jo)
|
|
kxij = k(1) * xij(1,ind) + k(2) * xij(2,ind) + k(3) * xij(3,ind)
|
|
kphs = exp(cmplx(0.0_dp, -kxij, dp))
|
|
|
|
Saux(1,juo,1,iuo) = Saux(1,juo,1,iuo) + S(ind) * kphs
|
|
Saux(2,juo,2,iuo) = Saux(2,juo,2,iuo) + S(ind) * kphs
|
|
|
|
end do
|
|
end do
|
|
|
|
end subroutine setup_Sk
|
|
|
|
end subroutine pdos3k
|