linalg: add functions needed for MSCG

2018-05-18 15:08:08 -06:00 · 2018-05-18 15:08:08 -06:00 · 858c211fdc
parent 6997aedf30
commit 858c211fdc
13 changed files with 6647 additions and 0 deletions
--- a/lib/linalg/dgelsd.f
+++ b/lib/linalg/dgelsd.f
@ -0,0 +1,629 @@
+*> \brief <b> DGELSD computes the minimum-norm solution to a linear least squares problem for GE matrices</b>
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DGELSD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelsd.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelsd.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelsd.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
+*                          WORK, LWORK, IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+*       DOUBLE PRECISION   RCOND
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            IWORK( * )
+*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DGELSD computes the minimum-norm solution to a real linear least
+*> squares problem:
+*>     minimize 2-norm(| b - A*x |)
+*> using the singular value decomposition (SVD) of A. A is an M-by-N
+*> matrix which may be rank-deficient.
+*>
+*> Several right hand side vectors b and solution vectors x can be
+*> handled in a single call; they are stored as the columns of the
+*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*> matrix X.
+*>
+*> The problem is solved in three steps:
+*> (1) Reduce the coefficient matrix A to bidiagonal form with
+*>     Householder transformations, reducing the original problem
+*>     into a "bidiagonal least squares problem" (BLS)
+*> (2) Solve the BLS using a divide and conquer approach.
+*> (3) Apply back all the Householder transformations to solve
+*>     the original least squares problem.
+*>
+*> The effective rank of A is determined by treating as zero those
+*> singular values which are less than RCOND times the largest singular
+*> value.
+*>
+*> The divide and conquer algorithm makes very mild assumptions about
+*> floating point arithmetic. It will work on machines with a guard
+*> digit in add/subtract, or on those binary machines without guard
+*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*> Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*> without guard digits, but we know of none.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of A. M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of A. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] NRHS
+*> \verbatim
+*>          NRHS is INTEGER
+*>          The number of right hand sides, i.e., the number of columns
+*>          of the matrices B and X. NRHS >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>          On entry, the M-by-N matrix A.
+*>          On exit, A has been destroyed.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[in,out] B
+*> \verbatim
+*>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
+*>          On entry, the M-by-NRHS right hand side matrix B.
+*>          On exit, B is overwritten by the N-by-NRHS solution
+*>          matrix X.  If m >= n and RANK = n, the residual
+*>          sum-of-squares for the solution in the i-th column is given
+*>          by the sum of squares of elements n+1:m in that column.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>          The leading dimension of the array B. LDB >= max(1,max(M,N)).
+*> \endverbatim
+*>
+*> \param[out] S
+*> \verbatim
+*>          S is DOUBLE PRECISION array, dimension (min(M,N))
+*>          The singular values of A in decreasing order.
+*>          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+*> \endverbatim
+*>
+*> \param[in] RCOND
+*> \verbatim
+*>          RCOND is DOUBLE PRECISION
+*>          RCOND is used to determine the effective rank of A.
+*>          Singular values S(i) <= RCOND*S(1) are treated as zero.
+*>          If RCOND < 0, machine precision is used instead.
+*> \endverbatim
+*>
+*> \param[out] RANK
+*> \verbatim
+*>          RANK is INTEGER
+*>          The effective rank of A, i.e., the number of singular values
+*>          which are greater than RCOND*S(1).
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
+*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*> \verbatim
+*>          LWORK is INTEGER
+*>          The dimension of the array WORK. LWORK must be at least 1.
+*>          The exact minimum amount of workspace needed depends on M,
+*>          N and NRHS. As long as LWORK is at least
+*>              12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2,
+*>          if M is greater than or equal to N or
+*>              12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2,
+*>          if M is less than N, the code will execute correctly.
+*>          SMLSIZ is returned by ILAENV and is equal to the maximum
+*>          size of the subproblems at the bottom of the computation
+*>          tree (usually about 25), and
+*>             NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+*>          For good performance, LWORK should generally be larger.
+*>
+*>          If LWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal size of the WORK array, returns
+*>          this value as the first entry of the WORK array, and no error
+*>          message related to LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension (MAX(1,LIWORK))
+*>          LIWORK >= max(1, 3 * MINMN * NLVL + 11 * MINMN),
+*>          where MINMN = MIN( M,N ).
+*>          On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>          > 0:  the algorithm for computing the SVD failed to converge;
+*>                if INFO = i, i off-diagonal elements of an intermediate
+*>                bidiagonal form did not converge to zero.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date June 2017
+*
+*> \ingroup doubleGEsolve
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*>       California at Berkeley, USA \n
+*>     Osni Marques, LBNL/NERSC, USA \n
+*
+*  =====================================================================
+      SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
+     $                   WORK, LWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.7.1) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     June 2017
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ,
+     $                   LDWORK, LIWORK, MAXMN, MAXWRK, MINMN, MINWRK,
+     $                   MM, MNTHR, NLVL, NWORK, SMLSIZ, WLALSD
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEBRD, DGELQF, DGEQRF, DLABAD, DLACPY, DLALSD,
+     $                   DLASCL, DLASET, DORMBR, DORMLQ, DORMQR, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, INT, LOG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MAXMN = MAX( M, N )
+      MNTHR = ILAENV( 6, 'DGELSD', ' ', M, N, NRHS, -1 )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
+         INFO = -7
+      END IF
+*
+      SMLSIZ = ILAENV( 9, 'DGELSD', ' ', 0, 0, 0, 0 )
+*
+*     Compute workspace.
+*     (Note: Comments in the code beginning "Workspace:" describe the
+*     minimal amount of workspace needed at that point in the code,
+*     as well as the preferred amount for good performance.
+*     NB refers to the optimal block size for the immediately
+*     following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      LIWORK = 1
+      MINMN = MAX( 1, MINMN )
+      NLVL = MAX( INT( LOG( DBLE( MINMN ) / DBLE( SMLSIZ+1 ) ) /
+     $       LOG( TWO ) ) + 1, 0 )
+*
+      IF( INFO.EQ.0 ) THEN
+         MAXWRK = 0
+         LIWORK = 3*MINMN*NLVL + 11*MINMN
+         MM = M
+         IF( M.GE.N .AND. M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns.
+*
+            MM = N
+            MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'DGEQRF', ' ', M, N,
+     $               -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, N+NRHS*
+     $               ILAENV( 1, 'DORMQR', 'LT', M, NRHS, N, -1 ) )
+         END IF
+         IF( M.GE.N ) THEN
+*
+*           Path 1 - overdetermined or exactly determined.
+*
+            MAXWRK = MAX( MAXWRK, 3*N+( MM+N )*
+     $               ILAENV( 1, 'DGEBRD', ' ', MM, N, -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, 3*N+NRHS*
+     $               ILAENV( 1, 'DORMBR', 'QLT', MM, NRHS, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
+     $               ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, N, -1 ) )
+            WLALSD = 9*N+2*N*SMLSIZ+8*N*NLVL+N*NRHS+(SMLSIZ+1)**2
+            MAXWRK = MAX( MAXWRK, 3*N+WLALSD )
+            MINWRK = MAX( 3*N+MM, 3*N+NRHS, 3*N+WLALSD )
+         END IF
+         IF( N.GT.M ) THEN
+            WLALSD = 9*M+2*M*SMLSIZ+8*M*NLVL+M*NRHS+(SMLSIZ+1)**2
+            IF( N.GE.MNTHR ) THEN
+*
+*              Path 2a - underdetermined, with many more columns
+*              than rows.
+*
+               MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+2*M*
+     $                  ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS*
+     $                  ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )*
+     $                  ILAENV( 1, 'DORMBR', 'PLN', M, NRHS, M, -1 ) )
+               IF( NRHS.GT.1 ) THEN
+                  MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS )
+               ELSE
+                  MAXWRK = MAX( MAXWRK, M*M+2*M )
+               END IF
+               MAXWRK = MAX( MAXWRK, M+NRHS*
+     $                  ILAENV( 1, 'DORMLQ', 'LT', N, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+WLALSD )
+!     XXX: Ensure the Path 2a case below is triggered.  The workspace
+!     calculation should use queries for all routines eventually.
+               MAXWRK = MAX( MAXWRK,
+     $              4*M+M*M+MAX( M, 2*M-4, NRHS, N-3*M ) )
+            ELSE
+*
+*              Path 2 - remaining underdetermined cases.
+*
+               MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'DGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MAXWRK = MAX( MAXWRK, 3*M+NRHS*
+     $                  ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, N, -1 ) )
+               MAXWRK = MAX( MAXWRK, 3*M+M*
+     $                  ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, 3*M+WLALSD )
+            END IF
+            MINWRK = MAX( 3*M+NRHS, 3*M+M, 3*M+WLALSD )
+         END IF
+         MINWRK = MIN( MINWRK, MAXWRK )
+         WORK( 1 ) = MAXWRK
+         IWORK( 1 ) = LIWORK
+
+         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+            INFO = -12
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGELSD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         GO TO 10
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters.
+*
+      EPS = DLAMCH( 'P' )
+      SFMIN = DLAMCH( 'S' )
+      SMLNUM = SFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A if max entry outside range [SMLNUM,BIGNUM].
+*
+      ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM.
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM.
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 )
+         RANK = 0
+         GO TO 10
+      END IF
+*
+*     Scale B if max entry outside range [SMLNUM,BIGNUM].
+*
+      BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM.
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM.
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     If M < N make sure certain entries of B are zero.
+*
+      IF( M.LT.N )
+     $   CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
+*
+*     Overdetermined case.
+*
+      IF( M.GE.N ) THEN
+*
+*        Path 1 - overdetermined or exactly determined.
+*
+         MM = M
+         IF( M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns.
+*
+            MM = N
+            ITAU = 1
+            NWORK = ITAU + N
+*
+*           Compute A=Q*R.
+*           (Workspace: need 2*N, prefer N+N*NB)
+*
+            CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                   LWORK-NWORK+1, INFO )
+*
+*           Multiply B by transpose(Q).
+*           (Workspace: need N+NRHS, prefer N+NRHS*NB)
+*
+            CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
+     $                   LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*           Zero out below R.
+*
+            IF( N.GT.1 ) THEN
+               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
+            END IF
+         END IF
+*
+         IE = 1
+         ITAUQ = IE + N
+         ITAUP = ITAUQ + N
+         NWORK = ITAUP + N
+*
+*        Bidiagonalize R in A.
+*        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
+*
+         CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of R.
+*        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL DLALSD( 'U', SMLSIZ, N, NRHS, S, WORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of R.
+*
+         CALL DORMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
+     $         MAX( M, 2*M-4, NRHS, N-3*M, WLALSD ) ) THEN
+*
+*        Path 2a - underdetermined, with many more columns than rows
+*        and sufficient workspace for an efficient algorithm.
+*
+         LDWORK = M
+         IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
+     $       M*LDA+M+M*NRHS, 4*M+M*LDA+WLALSD ) )LDWORK = LDA
+         ITAU = 1
+         NWORK = M + 1
+*
+*        Compute A=L*Q.
+*        (Workspace: need 2*M, prefer M+M*NB)
+*
+         CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+         IL = NWORK
+*
+*        Copy L to WORK(IL), zeroing out above its diagonal.
+*
+         CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
+         CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
+     $                LDWORK )
+         IE = IL + LDWORK*M
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         NWORK = ITAUP + M
+*
+*        Bidiagonalize L in WORK(IL).
+*        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
+*
+         CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
+     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of L.
+*        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUQ ), B, LDB, WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL DLALSD( 'U', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of L.
+*
+         CALL DORMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUP ), B, LDB, WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Zero out below first M rows of B.
+*
+         CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
+         NWORK = ITAU + M
+*
+*        Multiply transpose(Q) by B.
+*        (Workspace: need M+NRHS, prefer M+NRHS*NB)
+*
+         CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
+     $                LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      ELSE
+*
+*        Path 2 - remaining underdetermined cases.
+*
+         IE = 1
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         NWORK = ITAUP + M
+*
+*        Bidiagonalize A.
+*        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*
+         CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors.
+*        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL DLALSD( 'L', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of A.
+*
+         CALL DORMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      END IF
+*
+*     Undo scaling.
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+   10 CONTINUE
+      WORK( 1 ) = MAXWRK
+      IWORK( 1 ) = LIWORK
+      RETURN
+*
+*     End of DGELSD
+*
+      END
--- a/lib/linalg/dgelss.f
+++ b/lib/linalg/dgelss.f
@ -0,0 +1,747 @@
+*> \brief <b> DGELSS solves overdetermined or underdetermined systems for GE matrices</b>
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DGELSS + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelss.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelss.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelss.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
+*                          WORK, LWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+*       DOUBLE PRECISION   RCOND
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DGELSS computes the minimum norm solution to a real linear least
+*> squares problem:
+*>
+*> Minimize 2-norm(| b - A*x |).
+*>
+*> using the singular value decomposition (SVD) of A. A is an M-by-N
+*> matrix which may be rank-deficient.
+*>
+*> Several right hand side vectors b and solution vectors x can be
+*> handled in a single call; they are stored as the columns of the
+*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
+*> X.
+*>
+*> The effective rank of A is determined by treating as zero those
+*> singular values which are less than RCOND times the largest singular
+*> value.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A. M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrix A. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] NRHS
+*> \verbatim
+*>          NRHS is INTEGER
+*>          The number of right hand sides, i.e., the number of columns
+*>          of the matrices B and X. NRHS >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>          On entry, the M-by-N matrix A.
+*>          On exit, the first min(m,n) rows of A are overwritten with
+*>          its right singular vectors, stored rowwise.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A.  LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[in,out] B
+*> \verbatim
+*>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
+*>          On entry, the M-by-NRHS right hand side matrix B.
+*>          On exit, B is overwritten by the N-by-NRHS solution
+*>          matrix X.  If m >= n and RANK = n, the residual
+*>          sum-of-squares for the solution in the i-th column is given
+*>          by the sum of squares of elements n+1:m in that column.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>          The leading dimension of the array B. LDB >= max(1,max(M,N)).
+*> \endverbatim
+*>
+*> \param[out] S
+*> \verbatim
+*>          S is DOUBLE PRECISION array, dimension (min(M,N))
+*>          The singular values of A in decreasing order.
+*>          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+*> \endverbatim
+*>
+*> \param[in] RCOND
+*> \verbatim
+*>          RCOND is DOUBLE PRECISION
+*>          RCOND is used to determine the effective rank of A.
+*>          Singular values S(i) <= RCOND*S(1) are treated as zero.
+*>          If RCOND < 0, machine precision is used instead.
+*> \endverbatim
+*>
+*> \param[out] RANK
+*> \verbatim
+*>          RANK is INTEGER
+*>          The effective rank of A, i.e., the number of singular values
+*>          which are greater than RCOND*S(1).
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
+*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*> \verbatim
+*>          LWORK is INTEGER
+*>          The dimension of the array WORK. LWORK >= 1, and also:
+*>          LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
+*>          For good performance, LWORK should generally be larger.
+*>
+*>          If LWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal size of the WORK array, returns
+*>          this value as the first entry of the WORK array, and no error
+*>          message related to LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>          > 0:  the algorithm for computing the SVD failed to converge;
+*>                if INFO = i, i off-diagonal elements of an intermediate
+*>                bidiagonal form did not converge to zero.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup doubleGEsolve
+*
+*  =====================================================================
+      SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     December 2016
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL,
+     $                   ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN,
+     $                   MAXWRK, MINMN, MINWRK, MM, MNTHR
+      INTEGER            LWORK_DGEQRF, LWORK_DORMQR, LWORK_DGEBRD,
+     $                   LWORK_DORMBR, LWORK_DORGBR, LWORK_DORMLQ,
+     $                   LWORK_DGELQF
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DBDSQR, DCOPY, DGEBRD, DGELQF, DGEMM, DGEMV,
+     $                   DGEQRF, DLABAD, DLACPY, DLASCL, DLASET, DORGBR,
+     $                   DORMBR, DORMLQ, DORMQR, DRSCL, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MAXMN = MAX( M, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
+         INFO = -7
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      IF( INFO.EQ.0 ) THEN
+         MINWRK = 1
+         MAXWRK = 1
+         IF( MINMN.GT.0 ) THEN
+            MM = M
+            MNTHR = ILAENV( 6, 'DGELSS', ' ', M, N, NRHS, -1 )
+            IF( M.GE.N .AND. M.GE.MNTHR ) THEN
+*
+*              Path 1a - overdetermined, with many more rows than
+*                        columns
+*
+*              Compute space needed for DGEQRF
+               CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, INFO )
+               LWORK_DGEQRF=DUM(1)
+*              Compute space needed for DORMQR
+               CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, DUM(1), B,
+     $                   LDB, DUM(1), -1, INFO )
+               LWORK_DORMQR=DUM(1)
+               MM = N
+               MAXWRK = MAX( MAXWRK, N + LWORK_DGEQRF )
+               MAXWRK = MAX( MAXWRK, N + LWORK_DORMQR )
+            END IF
+            IF( M.GE.N ) THEN
+*
+*              Path 1 - overdetermined or exactly determined
+*
+*              Compute workspace needed for DBDSQR
+*
+               BDSPAC = MAX( 1, 5*N )
+*              Compute space needed for DGEBRD
+               CALL DGEBRD( MM, N, A, LDA, S, DUM(1), DUM(1),
+     $                      DUM(1), DUM(1), -1, INFO )
+               LWORK_DGEBRD=DUM(1)
+*              Compute space needed for DORMBR
+               CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, DUM(1),
+     $                B, LDB, DUM(1), -1, INFO )
+               LWORK_DORMBR=DUM(1)
+*              Compute space needed for DORGBR
+               CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1),
+     $                   DUM(1), -1, INFO )
+               LWORK_DORGBR=DUM(1)
+*              Compute total workspace needed
+               MAXWRK = MAX( MAXWRK, 3*N + LWORK_DGEBRD )
+               MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORMBR )
+               MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR )
+               MAXWRK = MAX( MAXWRK, BDSPAC )
+               MAXWRK = MAX( MAXWRK, N*NRHS )
+               MINWRK = MAX( 3*N + MM, 3*N + NRHS, BDSPAC )
+               MAXWRK = MAX( MINWRK, MAXWRK )
+            END IF
+            IF( N.GT.M ) THEN
+*
+*              Compute workspace needed for DBDSQR
+*
+               BDSPAC = MAX( 1, 5*M )
+               MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC )
+               IF( N.GE.MNTHR ) THEN
+*
+*                 Path 2a - underdetermined, with many more columns
+*                 than rows
+*
+*                 Compute space needed for DGELQF
+                  CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1),
+     $                -1, INFO )
+                  LWORK_DGELQF=DUM(1)
+*                 Compute space needed for DGEBRD
+                  CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
+     $                      DUM(1), DUM(1), -1, INFO )
+                  LWORK_DGEBRD=DUM(1)
+*                 Compute space needed for DORMBR
+                  CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA,
+     $                DUM(1), B, LDB, DUM(1), -1, INFO )
+                  LWORK_DORMBR=DUM(1)
+*                 Compute space needed for DORGBR
+                  CALL DORGBR( 'P', M, M, M, A, LDA, DUM(1),
+     $                   DUM(1), -1, INFO )
+                  LWORK_DORGBR=DUM(1)
+*                 Compute space needed for DORMLQ
+                  CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, DUM(1),
+     $                 B, LDB, DUM(1), -1, INFO )
+                  LWORK_DORMLQ=DUM(1)
+*                 Compute total workspace needed
+                  MAXWRK = M + LWORK_DGELQF
+                  MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DGEBRD )
+                  MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORMBR )
+                  MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORGBR )
+                  MAXWRK = MAX( MAXWRK, M*M + M + BDSPAC )
+                  IF( NRHS.GT.1 ) THEN
+                     MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS )
+                  ELSE
+                     MAXWRK = MAX( MAXWRK, M*M + 2*M )
+                  END IF
+                  MAXWRK = MAX( MAXWRK, M + LWORK_DORMLQ )
+               ELSE
+*
+*                 Path 2 - underdetermined
+*
+*                 Compute space needed for DGEBRD
+                  CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
+     $                      DUM(1), DUM(1), -1, INFO )
+                  LWORK_DGEBRD=DUM(1)
+*                 Compute space needed for DORMBR
+                  CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, A, LDA,
+     $                DUM(1), B, LDB, DUM(1), -1, INFO )
+                  LWORK_DORMBR=DUM(1)
+*                 Compute space needed for DORGBR
+                  CALL DORGBR( 'P', M, N, M, A, LDA, DUM(1),
+     $                   DUM(1), -1, INFO )
+                  LWORK_DORGBR=DUM(1)
+                  MAXWRK = 3*M + LWORK_DGEBRD
+                  MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORMBR )
+                  MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR )
+                  MAXWRK = MAX( MAXWRK, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, N*NRHS )
+               END IF
+            END IF
+            MAXWRK = MAX( MINWRK, MAXWRK )
+         END IF
+         WORK( 1 ) = MAXWRK
+*
+         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
+     $      INFO = -12
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGELSS', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      EPS = DLAMCH( 'P' )
+      SFMIN = DLAMCH( 'S' )
+      SMLNUM = SFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN )
+         RANK = 0
+         GO TO 70
+      END IF
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     Overdetermined case
+*
+      IF( M.GE.N ) THEN
+*
+*        Path 1 - overdetermined or exactly determined
+*
+         MM = M
+         IF( M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns
+*
+            MM = N
+            ITAU = 1
+            IWORK = ITAU + N
+*
+*           Compute A=Q*R
+*           (Workspace: need 2*N, prefer N+N*NB)
+*
+            CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                   LWORK-IWORK+1, INFO )
+*
+*           Multiply B by transpose(Q)
+*           (Workspace: need N+NRHS, prefer N+NRHS*NB)
+*
+            CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
+     $                   LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*           Zero out below R
+*
+            IF( N.GT.1 )
+     $         CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
+         END IF
+*
+         IE = 1
+         ITAUQ = IE + N
+         ITAUP = ITAUQ + N
+         IWORK = ITAUP + N
+*
+*        Bidiagonalize R in A
+*        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
+*
+         CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of R
+*        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors of R in A
+*        (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+         CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IWORK = IE + N
+*
+*        Perform bidiagonal QR iteration
+*          multiply B by transpose of left singular vectors
+*          compute right singular vectors in A
+*        (Workspace: need BDSPAC)
+*
+         CALL DBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM,
+     $                1, B, LDB, WORK( IWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 10 I = 1, N
+            IF( S( I ).GT.THR ) THEN
+               CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            END IF
+   10    CONTINUE
+*
+*        Multiply B by right singular vectors
+*        (Workspace: need N, prefer N*NRHS)
+*
+         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
+            CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO,
+     $                  WORK, LDB )
+            CALL DLACPY( 'G', N, NRHS, WORK, LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = LWORK / N
+            DO 20 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL DGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ),
+     $                     LDB, ZERO, WORK, N )
+               CALL DLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
+   20       CONTINUE
+         ELSE
+            CALL DGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
+            CALL DCOPY( N, WORK, 1, B, 1 )
+         END IF
+*
+      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
+     $         MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN
+*
+*        Path 2a - underdetermined, with many more columns than rows
+*        and sufficient workspace for an efficient algorithm
+*
+         LDWORK = M
+         IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
+     $       M*LDA+M+M*NRHS ) )LDWORK = LDA
+         ITAU = 1
+         IWORK = M + 1
+*
+*        Compute A=L*Q
+*        (Workspace: need 2*M, prefer M+M*NB)
+*
+         CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+         IL = IWORK
+*
+*        Copy L to WORK(IL), zeroing out above it
+*
+         CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
+         CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
+     $                LDWORK )
+         IE = IL + LDWORK*M
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         IWORK = ITAUP + M
+*
+*        Bidiagonalize L in WORK(IL)
+*        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
+*
+         CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
+     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of L
+*        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUQ ), B, LDB, WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors of R in WORK(IL)
+*        (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB)
+*
+         CALL DORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IWORK = IE + M
+*
+*        Perform bidiagonal QR iteration,
+*           computing right singular vectors of L in WORK(IL) and
+*           multiplying B by transpose of left singular vectors
+*        (Workspace: need M*M+M+BDSPAC)
+*
+         CALL DBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ),
+     $                LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 30 I = 1, M
+            IF( S( I ).GT.THR ) THEN
+               CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            END IF
+   30    CONTINUE
+         IWORK = IE
+*
+*        Multiply B by right singular vectors of L in WORK(IL)
+*        (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS)
+*
+         IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN
+            CALL DGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK,
+     $                  B, LDB, ZERO, WORK( IWORK ), LDB )
+            CALL DLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = ( LWORK-IWORK+1 ) / M
+            DO 40 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL DGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK,
+     $                     B( 1, I ), LDB, ZERO, WORK( IWORK ), M )
+               CALL DLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ),
+     $                      LDB )
+   40       CONTINUE
+         ELSE
+            CALL DGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ),
+     $                  1, ZERO, WORK( IWORK ), 1 )
+            CALL DCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
+         END IF
+*
+*        Zero out below first M rows of B
+*
+         CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
+         IWORK = ITAU + M
+*
+*        Multiply transpose(Q) by B
+*        (Workspace: need M+NRHS, prefer M+NRHS*NB)
+*
+         CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
+     $                LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+      ELSE
+*
+*        Path 2 - remaining underdetermined cases
+*
+         IE = 1
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         IWORK = ITAUP + M
+*
+*        Bidiagonalize A
+*        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*
+         CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors
+*        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors in A
+*        (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+         CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IWORK = IE + M
+*
+*        Perform bidiagonal QR iteration,
+*           computing right singular vectors of A in A and
+*           multiplying B by transpose of left singular vectors
+*        (Workspace: need BDSPAC)
+*
+         CALL DBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM,
+     $                1, B, LDB, WORK( IWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 50 I = 1, M
+            IF( S( I ).GT.THR ) THEN
+               CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            END IF
+   50    CONTINUE
+*
+*        Multiply B by right singular vectors of A
+*        (Workspace: need N, prefer N*NRHS)
+*
+         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
+            CALL DGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO,
+     $                  WORK, LDB )
+            CALL DLACPY( 'F', N, NRHS, WORK, LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = LWORK / N
+            DO 60 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL DGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ),
+     $                     LDB, ZERO, WORK, N )
+               CALL DLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
+   60       CONTINUE
+         ELSE
+            CALL DGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
+            CALL DCOPY( N, WORK, 1, B, 1 )
+         END IF
+      END IF
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+   70 CONTINUE
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of DGELSS
+*
+      END
--- a/lib/linalg/dlals0.f
+++ b/lib/linalg/dlals0.f
@ -0,0 +1,499 @@
+*> \brief \b DLALS0 applies back multiplying factors in solving the least squares problem using divide and conquer SVD approach. Used by sgelsd.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLALS0 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlals0.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlals0.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlals0.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX,
+*                          PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,
+*                          POLES, DIFL, DIFR, Z, K, C, S, WORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL,
+*      $                   LDGNUM, NL, NR, NRHS, SQRE
+*       DOUBLE PRECISION   C, S
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            GIVCOL( LDGCOL, * ), PERM( * )
+*       DOUBLE PRECISION   B( LDB, * ), BX( LDBX, * ), DIFL( * ),
+*      $                   DIFR( LDGNUM, * ), GIVNUM( LDGNUM, * ),
+*      $                   POLES( LDGNUM, * ), WORK( * ), Z( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLALS0 applies back the multiplying factors of either the left or the
+*> right singular vector matrix of a diagonal matrix appended by a row
+*> to the right hand side matrix B in solving the least squares problem
+*> using the divide-and-conquer SVD approach.
+*>
+*> For the left singular vector matrix, three types of orthogonal
+*> matrices are involved:
+*>
+*> (1L) Givens rotations: the number of such rotations is GIVPTR; the
+*>      pairs of columns/rows they were applied to are stored in GIVCOL;
+*>      and the C- and S-values of these rotations are stored in GIVNUM.
+*>
+*> (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
+*>      row, and for J=2:N, PERM(J)-th row of B is to be moved to the
+*>      J-th row.
+*>
+*> (3L) The left singular vector matrix of the remaining matrix.
+*>
+*> For the right singular vector matrix, four types of orthogonal
+*> matrices are involved:
+*>
+*> (1R) The right singular vector matrix of the remaining matrix.
+*>
+*> (2R) If SQRE = 1, one extra Givens rotation to generate the right
+*>      null space.
+*>
+*> (3R) The inverse transformation of (2L).
+*>
+*> (4R) The inverse transformation of (1L).
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ICOMPQ
+*> \verbatim
+*>          ICOMPQ is INTEGER
+*>         Specifies whether singular vectors are to be computed in
+*>         factored form:
+*>         = 0: Left singular vector matrix.
+*>         = 1: Right singular vector matrix.
+*> \endverbatim
+*>
+*> \param[in] NL
+*> \verbatim
+*>          NL is INTEGER
+*>         The row dimension of the upper block. NL >= 1.
+*> \endverbatim
+*>
+*> \param[in] NR
+*> \verbatim
+*>          NR is INTEGER
+*>         The row dimension of the lower block. NR >= 1.
+*> \endverbatim
+*>
+*> \param[in] SQRE
+*> \verbatim
+*>          SQRE is INTEGER
+*>         = 0: the lower block is an NR-by-NR square matrix.
+*>         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*>
+*>         The bidiagonal matrix has row dimension N = NL + NR + 1,
+*>         and column dimension M = N + SQRE.
+*> \endverbatim
+*>
+*> \param[in] NRHS
+*> \verbatim
+*>          NRHS is INTEGER
+*>         The number of columns of B and BX. NRHS must be at least 1.
+*> \endverbatim
+*>
+*> \param[in,out] B
+*> \verbatim
+*>          B is DOUBLE PRECISION array, dimension ( LDB, NRHS )
+*>         On input, B contains the right hand sides of the least
+*>         squares problem in rows 1 through M. On output, B contains
+*>         the solution X in rows 1 through N.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>         The leading dimension of B. LDB must be at least
+*>         max(1,MAX( M, N ) ).
+*> \endverbatim
+*>
+*> \param[out] BX
+*> \verbatim
+*>          BX is DOUBLE PRECISION array, dimension ( LDBX, NRHS )
+*> \endverbatim
+*>
+*> \param[in] LDBX
+*> \verbatim
+*>          LDBX is INTEGER
+*>         The leading dimension of BX.
+*> \endverbatim
+*>
+*> \param[in] PERM
+*> \verbatim
+*>          PERM is INTEGER array, dimension ( N )
+*>         The permutations (from deflation and sorting) applied
+*>         to the two blocks.
+*> \endverbatim
+*>
+*> \param[in] GIVPTR
+*> \verbatim
+*>          GIVPTR is INTEGER
+*>         The number of Givens rotations which took place in this
+*>         subproblem.
+*> \endverbatim
+*>
+*> \param[in] GIVCOL
+*> \verbatim
+*>          GIVCOL is INTEGER array, dimension ( LDGCOL, 2 )
+*>         Each pair of numbers indicates a pair of rows/columns
+*>         involved in a Givens rotation.
+*> \endverbatim
+*>
+*> \param[in] LDGCOL
+*> \verbatim
+*>          LDGCOL is INTEGER
+*>         The leading dimension of GIVCOL, must be at least N.
+*> \endverbatim
+*>
+*> \param[in] GIVNUM
+*> \verbatim
+*>          GIVNUM is DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*>         Each number indicates the C or S value used in the
+*>         corresponding Givens rotation.
+*> \endverbatim
+*>
+*> \param[in] LDGNUM
+*> \verbatim
+*>          LDGNUM is INTEGER
+*>         The leading dimension of arrays DIFR, POLES and
+*>         GIVNUM, must be at least K.
+*> \endverbatim
+*>
+*> \param[in] POLES
+*> \verbatim
+*>          POLES is DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*>         On entry, POLES(1:K, 1) contains the new singular
+*>         values obtained from solving the secular equation, and
+*>         POLES(1:K, 2) is an array containing the poles in the secular
+*>         equation.
+*> \endverbatim
+*>
+*> \param[in] DIFL
+*> \verbatim
+*>          DIFL is DOUBLE PRECISION array, dimension ( K ).
+*>         On entry, DIFL(I) is the distance between I-th updated
+*>         (undeflated) singular value and the I-th (undeflated) old
+*>         singular value.
+*> \endverbatim
+*>
+*> \param[in] DIFR
+*> \verbatim
+*>          DIFR is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ).
+*>         On entry, DIFR(I, 1) contains the distances between I-th
+*>         updated (undeflated) singular value and the I+1-th
+*>         (undeflated) old singular value. And DIFR(I, 2) is the
+*>         normalizing factor for the I-th right singular vector.
+*> \endverbatim
+*>
+*> \param[in] Z
+*> \verbatim
+*>          Z is DOUBLE PRECISION array, dimension ( K )
+*>         Contain the components of the deflation-adjusted updating row
+*>         vector.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*>          K is INTEGER
+*>         Contains the dimension of the non-deflated matrix,
+*>         This is the order of the related secular equation. 1 <= K <=N.
+*> \endverbatim
+*>
+*> \param[in] C
+*> \verbatim
+*>          C is DOUBLE PRECISION
+*>         C contains garbage if SQRE =0 and the C-value of a Givens
+*>         rotation related to the right null space if SQRE = 1.
+*> \endverbatim
+*>
+*> \param[in] S
+*> \verbatim
+*>          S is DOUBLE PRECISION
+*>         S contains garbage if SQRE =0 and the S-value of a Givens
+*>         rotation related to the right null space if SQRE = 1.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension ( K )
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup doubleOTHERcomputational
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*>       California at Berkeley, USA \n
+*>     Osni Marques, LBNL/NERSC, USA \n
+*
+*  =====================================================================
+      SUBROUTINE DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX,
+     $                   PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,
+     $                   POLES, DIFL, DIFR, Z, K, C, S, WORK, INFO )
+*
+*  -- LAPACK computational routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     December 2016
+*
+*     .. Scalar Arguments ..
+      INTEGER            GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL,
+     $                   LDGNUM, NL, NR, NRHS, SQRE
+      DOUBLE PRECISION   C, S
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), PERM( * )
+      DOUBLE PRECISION   B( LDB, * ), BX( LDBX, * ), DIFL( * ),
+     $                   DIFR( LDGNUM, * ), GIVNUM( LDGNUM, * ),
+     $                   POLES( LDGNUM, * ), WORK( * ), Z( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO, NEGONE
+      PARAMETER          ( ONE = 1.0D0, ZERO = 0.0D0, NEGONE = -1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, M, N, NLP1
+      DOUBLE PRECISION   DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMV, DLACPY, DLASCL, DROT, DSCAL,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMC3, DNRM2
+      EXTERNAL           DLAMC3, DNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      N = NL + NR + 1
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( NL.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.1 ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -7
+      ELSE IF( LDBX.LT.N ) THEN
+         INFO = -9
+      ELSE IF( GIVPTR.LT.0 ) THEN
+         INFO = -11
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -13
+      ELSE IF( LDGNUM.LT.N ) THEN
+         INFO = -15
+      ELSE IF( K.LT.1 ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLALS0', -INFO )
+         RETURN
+      END IF
+*
+      M = N + SQRE
+      NLP1 = NL + 1
+*
+      IF( ICOMPQ.EQ.0 ) THEN
+*
+*        Apply back orthogonal transformations from the left.
+*
+*        Step (1L): apply back the Givens rotations performed.
+*
+         DO 10 I = 1, GIVPTR
+            CALL DROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB,
+     $                 B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ),
+     $                 GIVNUM( I, 1 ) )
+   10    CONTINUE
+*
+*        Step (2L): permute rows of B.
+*
+         CALL DCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX )
+         DO 20 I = 2, N
+            CALL DCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX )
+   20    CONTINUE
+*
+*        Step (3L): apply the inverse of the left singular vector
+*        matrix to BX.
+*
+         IF( K.EQ.1 ) THEN
+            CALL DCOPY( NRHS, BX, LDBX, B, LDB )
+            IF( Z( 1 ).LT.ZERO ) THEN
+               CALL DSCAL( NRHS, NEGONE, B, LDB )
+            END IF
+         ELSE
+            DO 50 J = 1, K
+               DIFLJ = DIFL( J )
+               DJ = POLES( J, 1 )
+               DSIGJ = -POLES( J, 2 )
+               IF( J.LT.K ) THEN
+                  DIFRJ = -DIFR( J, 1 )
+                  DSIGJP = -POLES( J+1, 2 )
+               END IF
+               IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) )
+     $              THEN
+                  WORK( J ) = ZERO
+               ELSE
+                  WORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ /
+     $                        ( POLES( J, 2 )+DJ )
+               END IF
+               DO 30 I = 1, J - 1
+                  IF( ( Z( I ).EQ.ZERO ) .OR.
+     $                ( POLES( I, 2 ).EQ.ZERO ) ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = POLES( I, 2 )*Z( I ) /
+     $                           ( DLAMC3( POLES( I, 2 ), DSIGJ )-
+     $                           DIFLJ ) / ( POLES( I, 2 )+DJ )
+                  END IF
+   30          CONTINUE
+               DO 40 I = J + 1, K
+                  IF( ( Z( I ).EQ.ZERO ) .OR.
+     $                ( POLES( I, 2 ).EQ.ZERO ) ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = POLES( I, 2 )*Z( I ) /
+     $                           ( DLAMC3( POLES( I, 2 ), DSIGJP )+
+     $                           DIFRJ ) / ( POLES( I, 2 )+DJ )
+                  END IF
+   40          CONTINUE
+               WORK( 1 ) = NEGONE
+               TEMP = DNRM2( K, WORK, 1 )
+               CALL DGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO,
+     $                     B( J, 1 ), LDB )
+               CALL DLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ),
+     $                      LDB, INFO )
+   50       CONTINUE
+         END IF
+*
+*        Move the deflated rows of BX to B also.
+*
+         IF( K.LT.MAX( M, N ) )
+     $      CALL DLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX,
+     $                   B( K+1, 1 ), LDB )
+      ELSE
+*
+*        Apply back the right orthogonal transformations.
+*
+*        Step (1R): apply back the new right singular vector matrix
+*        to B.
+*
+         IF( K.EQ.1 ) THEN
+            CALL DCOPY( NRHS, B, LDB, BX, LDBX )
+         ELSE
+            DO 80 J = 1, K
+               DSIGJ = POLES( J, 2 )
+               IF( Z( J ).EQ.ZERO ) THEN
+                  WORK( J ) = ZERO
+               ELSE
+                  WORK( J ) = -Z( J ) / DIFL( J ) /
+     $                        ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 )
+               END IF
+               DO 60 I = 1, J - 1
+                  IF( Z( J ).EQ.ZERO ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I+1,
+     $                           2 ) )-DIFR( I, 1 ) ) /
+     $                           ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 )
+                  END IF
+   60          CONTINUE
+               DO 70 I = J + 1, K
+                  IF( Z( J ).EQ.ZERO ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I,
+     $                           2 ) )-DIFL( I ) ) /
+     $                           ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 )
+                  END IF
+   70          CONTINUE
+               CALL DGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO,
+     $                     BX( J, 1 ), LDBX )
+   80       CONTINUE
+         END IF
+*
+*        Step (2R): if SQRE = 1, apply back the rotation that is
+*        related to the right null space of the subproblem.
+*
+         IF( SQRE.EQ.1 ) THEN
+            CALL DCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX )
+            CALL DROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S )
+         END IF
+         IF( K.LT.MAX( M, N ) )
+     $      CALL DLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB, BX( K+1, 1 ),
+     $                   LDBX )
+*
+*        Step (3R): permute rows of B.
+*
+         CALL DCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB )
+         IF( SQRE.EQ.1 ) THEN
+            CALL DCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB )
+         END IF
+         DO 90 I = 2, N
+            CALL DCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB )
+   90    CONTINUE
+*
+*        Step (4R): apply back the Givens rotations performed.
+*
+         DO 100 I = GIVPTR, 1, -1
+            CALL DROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB,
+     $                 B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ),
+     $                 -GIVNUM( I, 1 ) )
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DLALS0
+*
+      END
--- a/lib/linalg/dlalsa.f
+++ b/lib/linalg/dlalsa.f
@ -0,0 +1,493 @@
+*> \brief \b DLALSA computes the SVD of the coefficient matrix in compact form. Used by sgelsd.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLALSA + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlalsa.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlalsa.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlalsa.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,
+*                          LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
+*                          GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK,
+*                          IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
+*      $                   SMLSIZ
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+*      $                   K( * ), PERM( LDGCOL, * )
+*       DOUBLE PRECISION   B( LDB, * ), BX( LDBX, * ), C( * ),
+*      $                   DIFL( LDU, * ), DIFR( LDU, * ),
+*      $                   GIVNUM( LDU, * ), POLES( LDU, * ), S( * ),
+*      $                   U( LDU, * ), VT( LDU, * ), WORK( * ),
+*      $                   Z( LDU, * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLALSA is an itermediate step in solving the least squares problem
+*> by computing the SVD of the coefficient matrix in compact form (The
+*> singular vectors are computed as products of simple orthorgonal
+*> matrices.).
+*>
+*> If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector
+*> matrix of an upper bidiagonal matrix to the right hand side; and if
+*> ICOMPQ = 1, DLALSA applies the right singular vector matrix to the
+*> right hand side. The singular vector matrices were generated in
+*> compact form by DLALSA.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ICOMPQ
+*> \verbatim
+*>          ICOMPQ is INTEGER
+*>         Specifies whether the left or the right singular vector
+*>         matrix is involved.
+*>         = 0: Left singular vector matrix
+*>         = 1: Right singular vector matrix
+*> \endverbatim
+*>
+*> \param[in] SMLSIZ
+*> \verbatim
+*>          SMLSIZ is INTEGER
+*>         The maximum size of the subproblems at the bottom of the
+*>         computation tree.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         The row and column dimensions of the upper bidiagonal matrix.
+*> \endverbatim
+*>
+*> \param[in] NRHS
+*> \verbatim
+*>          NRHS is INTEGER
+*>         The number of columns of B and BX. NRHS must be at least 1.
+*> \endverbatim
+*>
+*> \param[in,out] B
+*> \verbatim
+*>          B is DOUBLE PRECISION array, dimension ( LDB, NRHS )
+*>         On input, B contains the right hand sides of the least
+*>         squares problem in rows 1 through M.
+*>         On output, B contains the solution X in rows 1 through N.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>         The leading dimension of B in the calling subprogram.
+*>         LDB must be at least max(1,MAX( M, N ) ).
+*> \endverbatim
+*>
+*> \param[out] BX
+*> \verbatim
+*>          BX is DOUBLE PRECISION array, dimension ( LDBX, NRHS )
+*>         On exit, the result of applying the left or right singular
+*>         vector matrix to B.
+*> \endverbatim
+*>
+*> \param[in] LDBX
+*> \verbatim
+*>          LDBX is INTEGER
+*>         The leading dimension of BX.
+*> \endverbatim
+*>
+*> \param[in] U
+*> \verbatim
+*>          U is DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).
+*>         On entry, U contains the left singular vector matrices of all
+*>         subproblems at the bottom level.
+*> \endverbatim
+*>
+*> \param[in] LDU
+*> \verbatim
+*>          LDU is INTEGER, LDU = > N.
+*>         The leading dimension of arrays U, VT, DIFL, DIFR,
+*>         POLES, GIVNUM, and Z.
+*> \endverbatim
+*>
+*> \param[in] VT
+*> \verbatim
+*>          VT is DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).
+*>         On entry, VT**T contains the right singular vector matrices of
+*>         all subproblems at the bottom level.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*>          K is INTEGER array, dimension ( N ).
+*> \endverbatim
+*>
+*> \param[in] DIFL
+*> \verbatim
+*>          DIFL is DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+*>         where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
+*> \endverbatim
+*>
+*> \param[in] DIFR
+*> \verbatim
+*>          DIFR is DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+*>         On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
+*>         distances between singular values on the I-th level and
+*>         singular values on the (I -1)-th level, and DIFR(*, 2 * I)
+*>         record the normalizing factors of the right singular vectors
+*>         matrices of subproblems on I-th level.
+*> \endverbatim
+*>
+*> \param[in] Z
+*> \verbatim
+*>          Z is DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+*>         On entry, Z(1, I) contains the components of the deflation-
+*>         adjusted updating row vector for subproblems on the I-th
+*>         level.
+*> \endverbatim
+*>
+*> \param[in] POLES
+*> \verbatim
+*>          POLES is DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+*>         On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
+*>         singular values involved in the secular equations on the I-th
+*>         level.
+*> \endverbatim
+*>
+*> \param[in] GIVPTR
+*> \verbatim
+*>          GIVPTR is INTEGER array, dimension ( N ).
+*>         On entry, GIVPTR( I ) records the number of Givens
+*>         rotations performed on the I-th problem on the computation
+*>         tree.
+*> \endverbatim
+*>
+*> \param[in] GIVCOL
+*> \verbatim
+*>          GIVCOL is INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
+*>         On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
+*>         locations of Givens rotations performed on the I-th level on
+*>         the computation tree.
+*> \endverbatim
+*>
+*> \param[in] LDGCOL
+*> \verbatim
+*>          LDGCOL is INTEGER, LDGCOL = > N.
+*>         The leading dimension of arrays GIVCOL and PERM.
+*> \endverbatim
+*>
+*> \param[in] PERM
+*> \verbatim
+*>          PERM is INTEGER array, dimension ( LDGCOL, NLVL ).
+*>         On entry, PERM(*, I) records permutations done on the I-th
+*>         level of the computation tree.
+*> \endverbatim
+*>
+*> \param[in] GIVNUM
+*> \verbatim
+*>          GIVNUM is DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+*>         On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
+*>         values of Givens rotations performed on the I-th level on the
+*>         computation tree.
+*> \endverbatim
+*>
+*> \param[in] C
+*> \verbatim
+*>          C is DOUBLE PRECISION array, dimension ( N ).
+*>         On entry, if the I-th subproblem is not square,
+*>         C( I ) contains the C-value of a Givens rotation related to
+*>         the right null space of the I-th subproblem.
+*> \endverbatim
+*>
+*> \param[in] S
+*> \verbatim
+*>          S is DOUBLE PRECISION array, dimension ( N ).
+*>         On entry, if the I-th subproblem is not square,
+*>         S( I ) contains the S-value of a Givens rotation related to
+*>         the right null space of the I-th subproblem.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (N)
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension (3*N)
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date June 2017
+*
+*> \ingroup doubleOTHERcomputational
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*>       California at Berkeley, USA \n
+*>     Osni Marques, LBNL/NERSC, USA \n
+*
+*  =====================================================================
+      SUBROUTINE DLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,
+     $                   LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
+     $                   GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK computational routine (version 3.7.1) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     June 2017
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
+     $                   SMLSIZ
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+     $                   K( * ), PERM( LDGCOL, * )
+      DOUBLE PRECISION   B( LDB, * ), BX( LDBX, * ), C( * ),
+     $                   DIFL( LDU, * ), DIFR( LDU, * ),
+     $                   GIVNUM( LDU, * ), POLES( LDU, * ), S( * ),
+     $                   U( LDU, * ), VT( LDU, * ), WORK( * ),
+     $                   Z( LDU, * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, IC, IM1, INODE, J, LF, LL, LVL, LVL2,
+     $                   ND, NDB1, NDIML, NDIMR, NL, NLF, NLP1, NLVL,
+     $                   NR, NRF, NRP1, SQRE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMM, DLALS0, DLASDT, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( SMLSIZ.LT.3 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.SMLSIZ ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -6
+      ELSE IF( LDBX.LT.N ) THEN
+         INFO = -8
+      ELSE IF( LDU.LT.N ) THEN
+         INFO = -10
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -19
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLALSA', -INFO )
+         RETURN
+      END IF
+*
+*     Book-keeping and  setting up the computation tree.
+*
+      INODE = 1
+      NDIML = INODE + N
+      NDIMR = NDIML + N
+*
+      CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
+     $             IWORK( NDIMR ), SMLSIZ )
+*
+*     The following code applies back the left singular vector factors.
+*     For applying back the right singular vector factors, go to 50.
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         GO TO 50
+      END IF
+*
+*     The nodes on the bottom level of the tree were solved
+*     by DLASDQ. The corresponding left and right singular vector
+*     matrices are in explicit form. First apply back the left
+*     singular vector matrices.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 10 I = NDB1, ND
+*
+*        IC : center row of each node
+*        NL : number of rows of left  subproblem
+*        NR : number of rows of right subproblem
+*        NLF: starting row of the left   subproblem
+*        NRF: starting row of the right  subproblem
+*
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NR = IWORK( NDIMR+I1 )
+         NLF = IC - NL
+         NRF = IC + 1
+         CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+     $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+         CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+     $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+   10 CONTINUE
+*
+*     Next copy the rows of B that correspond to unchanged rows
+*     in the bidiagonal matrix to BX.
+*
+      DO 20 I = 1, ND
+         IC = IWORK( INODE+I-1 )
+         CALL DCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX )
+   20 CONTINUE
+*
+*     Finally go through the left singular vector matrices of all
+*     the other subproblems bottom-up on the tree.
+*
+      J = 2**NLVL
+      SQRE = 0
+*
+      DO 40 LVL = NLVL, 1, -1
+         LVL2 = 2*LVL - 1
+*
+*        find the first node LF and last node LL on
+*        the current level LVL
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 30 I = LF, LL
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            J = J - 1
+            CALL DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX,
+     $                   B( NLF, 1 ), LDB, PERM( NLF, LVL ),
+     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK,
+     $                   INFO )
+   30    CONTINUE
+   40 CONTINUE
+      GO TO 90
+*
+*     ICOMPQ = 1: applying back the right singular vector factors.
+*
+   50 CONTINUE
+*
+*     First now go through the right singular vector matrices of all
+*     the tree nodes top-down.
+*
+      J = 0
+      DO 70 LVL = 1, NLVL
+         LVL2 = 2*LVL - 1
+*
+*        Find the first node LF and last node LL on
+*        the current level LVL.
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 60 I = LL, LF, -1
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            IF( I.EQ.LL ) THEN
+               SQRE = 0
+            ELSE
+               SQRE = 1
+            END IF
+            J = J + 1
+            CALL DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB,
+     $                   BX( NLF, 1 ), LDBX, PERM( NLF, LVL ),
+     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK,
+     $                   INFO )
+   60    CONTINUE
+   70 CONTINUE
+*
+*     The nodes on the bottom level of the tree were solved
+*     by DLASDQ. The corresponding right singular vector
+*     matrices are in explicit form. Apply them back.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 80 I = NDB1, ND
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NR = IWORK( NDIMR+I1 )
+         NLP1 = NL + 1
+         IF( I.EQ.ND ) THEN
+            NRP1 = NR
+         ELSE
+            NRP1 = NR + 1
+         END IF
+         NLF = IC - NL
+         NRF = IC + 1
+         CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+     $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+         CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+     $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+   80 CONTINUE
+*
+   90 CONTINUE
+*
+      RETURN
+*
+*     End of DLALSA
+*
+      END
--- a/lib/linalg/dlalsd.f
+++ b/lib/linalg/dlalsd.f
@ -0,0 +1,523 @@
+*> \brief \b DLALSD uses the singular value decomposition of A to solve the least squares problem.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLALSD + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlalsd.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlalsd.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlalsd.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND,
+*                          RANK, WORK, IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          UPLO
+*       INTEGER            INFO, LDB, N, NRHS, RANK, SMLSIZ
+*       DOUBLE PRECISION   RCOND
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            IWORK( * )
+*       DOUBLE PRECISION   B( LDB, * ), D( * ), E( * ), WORK( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLALSD uses the singular value decomposition of A to solve the least
+*> squares problem of finding X to minimize the Euclidean norm of each
+*> column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
+*> are N-by-NRHS. The solution X overwrites B.
+*>
+*> The singular values of A smaller than RCOND times the largest
+*> singular value are treated as zero in solving the least squares
+*> problem; in this case a minimum norm solution is returned.
+*> The actual singular values are returned in D in ascending order.
+*>
+*> This code makes very mild assumptions about floating point
+*> arithmetic. It will work on machines with a guard digit in
+*> add/subtract, or on those binary machines without guard digits
+*> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+*> It could conceivably fail on hexadecimal or decimal machines
+*> without guard digits, but we know of none.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] UPLO
+*> \verbatim
+*>          UPLO is CHARACTER*1
+*>         = 'U': D and E define an upper bidiagonal matrix.
+*>         = 'L': D and E define a  lower bidiagonal matrix.
+*> \endverbatim
+*>
+*> \param[in] SMLSIZ
+*> \verbatim
+*>          SMLSIZ is INTEGER
+*>         The maximum size of the subproblems at the bottom of the
+*>         computation tree.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         The dimension of the  bidiagonal matrix.  N >= 0.
+*> \endverbatim
+*>
+*> \param[in] NRHS
+*> \verbatim
+*>          NRHS is INTEGER
+*>         The number of columns of B. NRHS must be at least 1.
+*> \endverbatim
+*>
+*> \param[in,out] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension (N)
+*>         On entry D contains the main diagonal of the bidiagonal
+*>         matrix. On exit, if INFO = 0, D contains its singular values.
+*> \endverbatim
+*>
+*> \param[in,out] E
+*> \verbatim
+*>          E is DOUBLE PRECISION array, dimension (N-1)
+*>         Contains the super-diagonal entries of the bidiagonal matrix.
+*>         On exit, E has been destroyed.
+*> \endverbatim
+*>
+*> \param[in,out] B
+*> \verbatim
+*>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
+*>         On input, B contains the right hand sides of the least
+*>         squares problem. On output, B contains the solution X.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>         The leading dimension of B in the calling subprogram.
+*>         LDB must be at least max(1,N).
+*> \endverbatim
+*>
+*> \param[in] RCOND
+*> \verbatim
+*>          RCOND is DOUBLE PRECISION
+*>         The singular values of A less than or equal to RCOND times
+*>         the largest singular value are treated as zero in solving
+*>         the least squares problem. If RCOND is negative,
+*>         machine precision is used instead.
+*>         For example, if diag(S)*X=B were the least squares problem,
+*>         where diag(S) is a diagonal matrix of singular values, the
+*>         solution would be X(i) = B(i) / S(i) if S(i) is greater than
+*>         RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
+*>         RCOND*max(S).
+*> \endverbatim
+*>
+*> \param[out] RANK
+*> \verbatim
+*>          RANK is INTEGER
+*>         The number of singular values of A greater than RCOND times
+*>         the largest singular value.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension at least
+*>         (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2),
+*>         where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1).
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension at least
+*>         (3*N*NLVL + 11*N)
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>         = 0:  successful exit.
+*>         < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>         > 0:  The algorithm failed to compute a singular value while
+*>               working on the submatrix lying in rows and columns
+*>               INFO/(N+1) through MOD(INFO,N+1).
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup doubleOTHERcomputational
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*>       California at Berkeley, USA \n
+*>     Osni Marques, LBNL/NERSC, USA \n
+*
+*  =====================================================================
+      SUBROUTINE DLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND,
+     $                   RANK, WORK, IWORK, INFO )
+*
+*  -- LAPACK computational routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     December 2016
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS, RANK, SMLSIZ
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   B( LDB, * ), D( * ), E( * ), WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM,
+     $                   GIVPTR, I, ICMPQ1, ICMPQ2, IWK, J, K, NLVL,
+     $                   NM1, NSIZE, NSUB, NWORK, PERM, POLES, S, SIZEI,
+     $                   SMLSZP, SQRE, ST, ST1, U, VT, Z
+      DOUBLE PRECISION   CS, EPS, ORGNRM, R, RCND, SN, TOL
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH, DLANST
+      EXTERNAL           IDAMAX, DLAMCH, DLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMM, DLACPY, DLALSA, DLARTG, DLASCL,
+     $                   DLASDA, DLASDQ, DLASET, DLASRT, DROT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, INT, LOG, SIGN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLALSD', -INFO )
+         RETURN
+      END IF
+*
+      EPS = DLAMCH( 'Epsilon' )
+*
+*     Set up the tolerance.
+*
+      IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN
+         RCND = EPS
+      ELSE
+         RCND = RCOND
+      END IF
+*
+      RANK = 0
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 ) THEN
+         RETURN
+      ELSE IF( N.EQ.1 ) THEN
+         IF( D( 1 ).EQ.ZERO ) THEN
+            CALL DLASET( 'A', 1, NRHS, ZERO, ZERO, B, LDB )
+         ELSE
+            RANK = 1
+            CALL DLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO )
+            D( 1 ) = ABS( D( 1 ) )
+         END IF
+         RETURN
+      END IF
+*
+*     Rotate the matrix if it is lower bidiagonal.
+*
+      IF( UPLO.EQ.'L' ) THEN
+         DO 10 I = 1, N - 1
+            CALL DLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( NRHS.EQ.1 ) THEN
+               CALL DROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN )
+            ELSE
+               WORK( I*2-1 ) = CS
+               WORK( I*2 ) = SN
+            END IF
+   10    CONTINUE
+         IF( NRHS.GT.1 ) THEN
+            DO 30 I = 1, NRHS
+               DO 20 J = 1, N - 1
+                  CS = WORK( J*2-1 )
+                  SN = WORK( J*2 )
+                  CALL DROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN )
+   20          CONTINUE
+   30       CONTINUE
+         END IF
+      END IF
+*
+*     Scale.
+*
+      NM1 = N - 1
+      ORGNRM = DLANST( 'M', N, D, E )
+      IF( ORGNRM.EQ.ZERO ) THEN
+         CALL DLASET( 'A', N, NRHS, ZERO, ZERO, B, LDB )
+         RETURN
+      END IF
+*
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO )
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO )
+*
+*     If N is smaller than the minimum divide size SMLSIZ, then solve
+*     the problem with another solver.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         NWORK = 1 + N*N
+         CALL DLASET( 'A', N, N, ZERO, ONE, WORK, N )
+         CALL DLASDQ( 'U', 0, N, N, 0, NRHS, D, E, WORK, N, WORK, N, B,
+     $                LDB, WORK( NWORK ), INFO )
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         TOL = RCND*ABS( D( IDAMAX( N, D, 1 ) ) )
+         DO 40 I = 1, N
+            IF( D( I ).LE.TOL ) THEN
+               CALL DLASET( 'A', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            ELSE
+               CALL DLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ),
+     $                      LDB, INFO )
+               RANK = RANK + 1
+            END IF
+   40    CONTINUE
+         CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO,
+     $               WORK( NWORK ), N )
+         CALL DLACPY( 'A', N, NRHS, WORK( NWORK ), N, B, LDB )
+*
+*        Unscale.
+*
+         CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+         CALL DLASRT( 'D', N, D, INFO )
+         CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
+*
+         RETURN
+      END IF
+*
+*     Book-keeping and setting up some constants.
+*
+      NLVL = INT( LOG( DBLE( N ) / DBLE( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1
+*
+      SMLSZP = SMLSIZ + 1
+*
+      U = 1
+      VT = 1 + SMLSIZ*N
+      DIFL = VT + SMLSZP*N
+      DIFR = DIFL + NLVL*N
+      Z = DIFR + NLVL*N*2
+      C = Z + NLVL*N
+      S = C + N
+      POLES = S + N
+      GIVNUM = POLES + 2*NLVL*N
+      BX = GIVNUM + 2*NLVL*N
+      NWORK = BX + N*NRHS
+*
+      SIZEI = 1 + N
+      K = SIZEI + N
+      GIVPTR = K + N
+      PERM = GIVPTR + N
+      GIVCOL = PERM + NLVL*N
+      IWK = GIVCOL + NLVL*N*2
+*
+      ST = 1
+      SQRE = 0
+      ICMPQ1 = 1
+      ICMPQ2 = 0
+      NSUB = 0
+*
+      DO 50 I = 1, N
+         IF( ABS( D( I ) ).LT.EPS ) THEN
+            D( I ) = SIGN( EPS, D( I ) )
+         END IF
+   50 CONTINUE
+*
+      DO 60 I = 1, NM1
+         IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN
+            NSUB = NSUB + 1
+            IWORK( NSUB ) = ST
+*
+*           Subproblem found. First determine its size and then
+*           apply divide and conquer on it.
+*
+            IF( I.LT.NM1 ) THEN
+*
+*              A subproblem with E(I) small for I < NM1.
+*
+               NSIZE = I - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+            ELSE IF( ABS( E( I ) ).GE.EPS ) THEN
+*
+*              A subproblem with E(NM1) not too small but I = NM1.
+*
+               NSIZE = N - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+            ELSE
+*
+*              A subproblem with E(NM1) small. This implies an
+*              1-by-1 subproblem at D(N), which is not solved
+*              explicitly.
+*
+               NSIZE = I - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+               NSUB = NSUB + 1
+               IWORK( NSUB ) = N
+               IWORK( SIZEI+NSUB-1 ) = 1
+               CALL DCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N )
+            END IF
+            ST1 = ST - 1
+            IF( NSIZE.EQ.1 ) THEN
+*
+*              This is a 1-by-1 subproblem and is not solved
+*              explicitly.
+*
+               CALL DCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N )
+            ELSE IF( NSIZE.LE.SMLSIZ ) THEN
+*
+*              This is a small subproblem and is solved by DLASDQ.
+*
+               CALL DLASET( 'A', NSIZE, NSIZE, ZERO, ONE,
+     $                      WORK( VT+ST1 ), N )
+               CALL DLASDQ( 'U', 0, NSIZE, NSIZE, 0, NRHS, D( ST ),
+     $                      E( ST ), WORK( VT+ST1 ), N, WORK( NWORK ),
+     $                      N, B( ST, 1 ), LDB, WORK( NWORK ), INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+               CALL DLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB,
+     $                      WORK( BX+ST1 ), N )
+            ELSE
+*
+*              A large problem. Solve it using divide and conquer.
+*
+               CALL DLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ),
+     $                      E( ST ), WORK( U+ST1 ), N, WORK( VT+ST1 ),
+     $                      IWORK( K+ST1 ), WORK( DIFL+ST1 ),
+     $                      WORK( DIFR+ST1 ), WORK( Z+ST1 ),
+     $                      WORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ),
+     $                      IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ),
+     $                      WORK( GIVNUM+ST1 ), WORK( C+ST1 ),
+     $                      WORK( S+ST1 ), WORK( NWORK ), IWORK( IWK ),
+     $                      INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+               BXST = BX + ST1
+               CALL DLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ),
+     $                      LDB, WORK( BXST ), N, WORK( U+ST1 ), N,
+     $                      WORK( VT+ST1 ), IWORK( K+ST1 ),
+     $                      WORK( DIFL+ST1 ), WORK( DIFR+ST1 ),
+     $                      WORK( Z+ST1 ), WORK( POLES+ST1 ),
+     $                      IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
+     $                      IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ),
+     $                      WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ),
+     $                      IWORK( IWK ), INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+            END IF
+            ST = I + 1
+         END IF
+   60 CONTINUE
+*
+*     Apply the singular values and treat the tiny ones as zero.
+*
+      TOL = RCND*ABS( D( IDAMAX( N, D, 1 ) ) )
+*
+      DO 70 I = 1, N
+*
+*        Some of the elements in D can be negative because 1-by-1
+*        subproblems were not solved explicitly.
+*
+         IF( ABS( D( I ) ).LE.TOL ) THEN
+            CALL DLASET( 'A', 1, NRHS, ZERO, ZERO, WORK( BX+I-1 ), N )
+         ELSE
+            RANK = RANK + 1
+            CALL DLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS,
+     $                   WORK( BX+I-1 ), N, INFO )
+         END IF
+         D( I ) = ABS( D( I ) )
+   70 CONTINUE
+*
+*     Now apply back the right singular vectors.
+*
+      ICMPQ2 = 1
+      DO 80 I = 1, NSUB
+         ST = IWORK( I )
+         ST1 = ST - 1
+         NSIZE = IWORK( SIZEI+I-1 )
+         BXST = BX + ST1
+         IF( NSIZE.EQ.1 ) THEN
+            CALL DCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB )
+         ELSE IF( NSIZE.LE.SMLSIZ ) THEN
+            CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+     $                  WORK( VT+ST1 ), N, WORK( BXST ), N, ZERO,
+     $                  B( ST, 1 ), LDB )
+         ELSE
+            CALL DLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N,
+     $                   B( ST, 1 ), LDB, WORK( U+ST1 ), N,
+     $                   WORK( VT+ST1 ), IWORK( K+ST1 ),
+     $                   WORK( DIFL+ST1 ), WORK( DIFR+ST1 ),
+     $                   WORK( Z+ST1 ), WORK( POLES+ST1 ),
+     $                   IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
+     $                   IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ),
+     $                   WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ),
+     $                   IWORK( IWK ), INFO )
+            IF( INFO.NE.0 ) THEN
+               RETURN
+            END IF
+         END IF
+   80 CONTINUE
+*
+*     Unscale and sort the singular values.
+*
+      CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+      CALL DLASRT( 'D', N, D, INFO )
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
+*
+      RETURN
+*
+*     End of DLALSD
+*
+      END
--- a/lib/linalg/dlasd4.f
+++ b/lib/linalg/dlasd4.f
--- a/lib/linalg/dlasd5.f
+++ b/lib/linalg/dlasd5.f
@ -0,0 +1,231 @@
+*> \brief \b DLASD5 computes the square root of the i-th eigenvalue of a positive symmetric rank-one modification of a 2-by-2 diagonal matrix. Used by sbdsdc.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASD5 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasd5.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasd5.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasd5.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASD5( I, D, Z, DELTA, RHO, DSIGMA, WORK )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            I
+*       DOUBLE PRECISION   DSIGMA, RHO
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   D( 2 ), DELTA( 2 ), WORK( 2 ), Z( 2 )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> This subroutine computes the square root of the I-th eigenvalue
+*> of a positive symmetric rank-one modification of a 2-by-2 diagonal
+*> matrix
+*>
+*>            diag( D ) * diag( D ) +  RHO * Z * transpose(Z) .
+*>
+*> The diagonal entries in the array D are assumed to satisfy
+*>
+*>            0 <= D(i) < D(j)  for  i < j .
+*>
+*> We also assume RHO > 0 and that the Euclidean norm of the vector
+*> Z is one.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] I
+*> \verbatim
+*>          I is INTEGER
+*>         The index of the eigenvalue to be computed.  I = 1 or I = 2.
+*> \endverbatim
+*>
+*> \param[in] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension ( 2 )
+*>         The original eigenvalues.  We assume 0 <= D(1) < D(2).
+*> \endverbatim
+*>
+*> \param[in] Z
+*> \verbatim
+*>          Z is DOUBLE PRECISION array, dimension ( 2 )
+*>         The components of the updating vector.
+*> \endverbatim
+*>
+*> \param[out] DELTA
+*> \verbatim
+*>          DELTA is DOUBLE PRECISION array, dimension ( 2 )
+*>         Contains (D(j) - sigma_I) in its  j-th component.
+*>         The vector DELTA contains the information necessary
+*>         to construct the eigenvectors.
+*> \endverbatim
+*>
+*> \param[in] RHO
+*> \verbatim
+*>          RHO is DOUBLE PRECISION
+*>         The scalar in the symmetric updating formula.
+*> \endverbatim
+*>
+*> \param[out] DSIGMA
+*> \verbatim
+*>          DSIGMA is DOUBLE PRECISION
+*>         The computed sigma_I, the I-th updated eigenvalue.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension ( 2 )
+*>         WORK contains (D(j) + sigma_I) in its  j-th component.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup OTHERauxiliary
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ren-Cang Li, Computer Science Division, University of California
+*>     at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DLASD5( I, D, Z, DELTA, RHO, DSIGMA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     December 2016
+*
+*     .. Scalar Arguments ..
+      INTEGER            I
+      DOUBLE PRECISION   DSIGMA, RHO
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( 2 ), DELTA( 2 ), WORK( 2 ), Z( 2 )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO, THREE, FOUR
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
+     $                   THREE = 3.0D+0, FOUR = 4.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   B, C, DEL, DELSQ, TAU, W
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      DEL = D( 2 ) - D( 1 )
+      DELSQ = DEL*( D( 2 )+D( 1 ) )
+      IF( I.EQ.1 ) THEN
+         W = ONE + FOUR*RHO*( Z( 2 )*Z( 2 ) / ( D( 1 )+THREE*D( 2 ) )-
+     $       Z( 1 )*Z( 1 ) / ( THREE*D( 1 )+D( 2 ) ) ) / DEL
+         IF( W.GT.ZERO ) THEN
+            B = DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+            C = RHO*Z( 1 )*Z( 1 )*DELSQ
+*
+*           B > ZERO, always
+*
+*           The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 )
+*
+            TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) )
+*
+*           The following TAU is DSIGMA - D( 1 )
+*
+            TAU = TAU / ( D( 1 )+SQRT( D( 1 )*D( 1 )+TAU ) )
+            DSIGMA = D( 1 ) + TAU
+            DELTA( 1 ) = -TAU
+            DELTA( 2 ) = DEL - TAU
+            WORK( 1 ) = TWO*D( 1 ) + TAU
+            WORK( 2 ) = ( D( 1 )+TAU ) + D( 2 )
+*           DELTA( 1 ) = -Z( 1 ) / TAU
+*           DELTA( 2 ) = Z( 2 ) / ( DEL-TAU )
+         ELSE
+            B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+            C = RHO*Z( 2 )*Z( 2 )*DELSQ
+*
+*           The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 )
+*
+            IF( B.GT.ZERO ) THEN
+               TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) )
+            ELSE
+               TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO
+            END IF
+*
+*           The following TAU is DSIGMA - D( 2 )
+*
+            TAU = TAU / ( D( 2 )+SQRT( ABS( D( 2 )*D( 2 )+TAU ) ) )
+            DSIGMA = D( 2 ) + TAU
+            DELTA( 1 ) = -( DEL+TAU )
+            DELTA( 2 ) = -TAU
+            WORK( 1 ) = D( 1 ) + TAU + D( 2 )
+            WORK( 2 ) = TWO*D( 2 ) + TAU
+*           DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+*           DELTA( 2 ) = -Z( 2 ) / TAU
+         END IF
+*        TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+*        DELTA( 1 ) = DELTA( 1 ) / TEMP
+*        DELTA( 2 ) = DELTA( 2 ) / TEMP
+      ELSE
+*
+*        Now I=2
+*
+         B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+         C = RHO*Z( 2 )*Z( 2 )*DELSQ
+*
+*        The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 )
+*
+         IF( B.GT.ZERO ) THEN
+            TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO
+         ELSE
+            TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) )
+         END IF
+*
+*        The following TAU is DSIGMA - D( 2 )
+*
+         TAU = TAU / ( D( 2 )+SQRT( D( 2 )*D( 2 )+TAU ) )
+         DSIGMA = D( 2 ) + TAU
+         DELTA( 1 ) = -( DEL+TAU )
+         DELTA( 2 ) = -TAU
+         WORK( 1 ) = D( 1 ) + TAU + D( 2 )
+         WORK( 2 ) = TWO*D( 2 ) + TAU
+*        DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+*        DELTA( 2 ) = -Z( 2 ) / TAU
+*        TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+*        DELTA( 1 ) = DELTA( 1 ) / TEMP
+*        DELTA( 2 ) = DELTA( 2 ) / TEMP
+      END IF
+      RETURN
+*
+*     End of DLASD5
+*
+      END
--- a/lib/linalg/dlasd6.f
+++ b/lib/linalg/dlasd6.f
@ -0,0 +1,443 @@
+*> \brief \b DLASD6 computes the SVD of an updated upper bidiagonal matrix obtained by merging two smaller ones by appending a row. Used by sbdsdc.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASD6 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasd6.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasd6.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasd6.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASD6( ICOMPQ, NL, NR, SQRE, D, VF, VL, ALPHA, BETA,
+*                          IDXQ, PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM,
+*                          LDGNUM, POLES, DIFL, DIFR, Z, K, C, S, WORK,
+*                          IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL,
+*      $                   NR, SQRE
+*       DOUBLE PRECISION   ALPHA, BETA, C, S
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            GIVCOL( LDGCOL, * ), IDXQ( * ), IWORK( * ),
+*      $                   PERM( * )
+*       DOUBLE PRECISION   D( * ), DIFL( * ), DIFR( * ),
+*      $                   GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ),
+*      $                   VF( * ), VL( * ), WORK( * ), Z( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLASD6 computes the SVD of an updated upper bidiagonal matrix B
+*> obtained by merging two smaller ones by appending a row. This
+*> routine is used only for the problem which requires all singular
+*> values and optionally singular vector matrices in factored form.
+*> B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE.
+*> A related subroutine, DLASD1, handles the case in which all singular
+*> values and singular vectors of the bidiagonal matrix are desired.
+*>
+*> DLASD6 computes the SVD as follows:
+*>
+*>               ( D1(in)    0    0       0 )
+*>   B = U(in) * (   Z1**T   a   Z2**T    b ) * VT(in)
+*>               (   0       0   D2(in)   0 )
+*>
+*>     = U(out) * ( D(out) 0) * VT(out)
+*>
+*> where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M
+*> with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
+*> elsewhere; and the entry b is empty if SQRE = 0.
+*>
+*> The singular values of B can be computed using D1, D2, the first
+*> components of all the right singular vectors of the lower block, and
+*> the last components of all the right singular vectors of the upper
+*> block. These components are stored and updated in VF and VL,
+*> respectively, in DLASD6. Hence U and VT are not explicitly
+*> referenced.
+*>
+*> The singular values are stored in D. The algorithm consists of two
+*> stages:
+*>
+*>       The first stage consists of deflating the size of the problem
+*>       when there are multiple singular values or if there is a zero
+*>       in the Z vector. For each such occurrence the dimension of the
+*>       secular equation problem is reduced by one. This stage is
+*>       performed by the routine DLASD7.
+*>
+*>       The second stage consists of calculating the updated
+*>       singular values. This is done by finding the roots of the
+*>       secular equation via the routine DLASD4 (as called by DLASD8).
+*>       This routine also updates VF and VL and computes the distances
+*>       between the updated singular values and the old singular
+*>       values.
+*>
+*> DLASD6 is called from DLASDA.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ICOMPQ
+*> \verbatim
+*>          ICOMPQ is INTEGER
+*>         Specifies whether singular vectors are to be computed in
+*>         factored form:
+*>         = 0: Compute singular values only.
+*>         = 1: Compute singular vectors in factored form as well.
+*> \endverbatim
+*>
+*> \param[in] NL
+*> \verbatim
+*>          NL is INTEGER
+*>         The row dimension of the upper block.  NL >= 1.
+*> \endverbatim
+*>
+*> \param[in] NR
+*> \verbatim
+*>          NR is INTEGER
+*>         The row dimension of the lower block.  NR >= 1.
+*> \endverbatim
+*>
+*> \param[in] SQRE
+*> \verbatim
+*>          SQRE is INTEGER
+*>         = 0: the lower block is an NR-by-NR square matrix.
+*>         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*>
+*>         The bidiagonal matrix has row dimension N = NL + NR + 1,
+*>         and column dimension M = N + SQRE.
+*> \endverbatim
+*>
+*> \param[in,out] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension ( NL+NR+1 ).
+*>         On entry D(1:NL,1:NL) contains the singular values of the
+*>         upper block, and D(NL+2:N) contains the singular values
+*>         of the lower block. On exit D(1:N) contains the singular
+*>         values of the modified matrix.
+*> \endverbatim
+*>
+*> \param[in,out] VF
+*> \verbatim
+*>          VF is DOUBLE PRECISION array, dimension ( M )
+*>         On entry, VF(1:NL+1) contains the first components of all
+*>         right singular vectors of the upper block; and VF(NL+2:M)
+*>         contains the first components of all right singular vectors
+*>         of the lower block. On exit, VF contains the first components
+*>         of all right singular vectors of the bidiagonal matrix.
+*> \endverbatim
+*>
+*> \param[in,out] VL
+*> \verbatim
+*>          VL is DOUBLE PRECISION array, dimension ( M )
+*>         On entry, VL(1:NL+1) contains the  last components of all
+*>         right singular vectors of the upper block; and VL(NL+2:M)
+*>         contains the last components of all right singular vectors of
+*>         the lower block. On exit, VL contains the last components of
+*>         all right singular vectors of the bidiagonal matrix.
+*> \endverbatim
+*>
+*> \param[in,out] ALPHA
+*> \verbatim
+*>          ALPHA is DOUBLE PRECISION
+*>         Contains the diagonal element associated with the added row.
+*> \endverbatim
+*>
+*> \param[in,out] BETA
+*> \verbatim
+*>          BETA is DOUBLE PRECISION
+*>         Contains the off-diagonal element associated with the added
+*>         row.
+*> \endverbatim
+*>
+*> \param[in,out] IDXQ
+*> \verbatim
+*>          IDXQ is INTEGER array, dimension ( N )
+*>         This contains the permutation which will reintegrate the
+*>         subproblem just solved back into sorted order, i.e.
+*>         D( IDXQ( I = 1, N ) ) will be in ascending order.
+*> \endverbatim
+*>
+*> \param[out] PERM
+*> \verbatim
+*>          PERM is INTEGER array, dimension ( N )
+*>         The permutations (from deflation and sorting) to be applied
+*>         to each block. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[out] GIVPTR
+*> \verbatim
+*>          GIVPTR is INTEGER
+*>         The number of Givens rotations which took place in this
+*>         subproblem. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[out] GIVCOL
+*> \verbatim
+*>          GIVCOL is INTEGER array, dimension ( LDGCOL, 2 )
+*>         Each pair of numbers indicates a pair of columns to take place
+*>         in a Givens rotation. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[in] LDGCOL
+*> \verbatim
+*>          LDGCOL is INTEGER
+*>         leading dimension of GIVCOL, must be at least N.
+*> \endverbatim
+*>
+*> \param[out] GIVNUM
+*> \verbatim
+*>          GIVNUM is DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*>         Each number indicates the C or S value to be used in the
+*>         corresponding Givens rotation. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[in] LDGNUM
+*> \verbatim
+*>          LDGNUM is INTEGER
+*>         The leading dimension of GIVNUM and POLES, must be at least N.
+*> \endverbatim
+*>
+*> \param[out] POLES
+*> \verbatim
+*>          POLES is DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*>         On exit, POLES(1,*) is an array containing the new singular
+*>         values obtained from solving the secular equation, and
+*>         POLES(2,*) is an array containing the poles in the secular
+*>         equation. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[out] DIFL
+*> \verbatim
+*>          DIFL is DOUBLE PRECISION array, dimension ( N )
+*>         On exit, DIFL(I) is the distance between I-th updated
+*>         (undeflated) singular value and the I-th (undeflated) old
+*>         singular value.
+*> \endverbatim
+*>
+*> \param[out] DIFR
+*> \verbatim
+*>          DIFR is DOUBLE PRECISION array,
+*>                   dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and
+*>                   dimension ( K ) if ICOMPQ = 0.
+*>          On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not
+*>          defined and will not be referenced.
+*>
+*>          If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
+*>          normalizing factors for the right singular vector matrix.
+*>
+*>         See DLASD8 for details on DIFL and DIFR.
+*> \endverbatim
+*>
+*> \param[out] Z
+*> \verbatim
+*>          Z is DOUBLE PRECISION array, dimension ( M )
+*>         The first elements of this array contain the components
+*>         of the deflation-adjusted updating row vector.
+*> \endverbatim
+*>
+*> \param[out] K
+*> \verbatim
+*>          K is INTEGER
+*>         Contains the dimension of the non-deflated matrix,
+*>         This is the order of the related secular equation. 1 <= K <=N.
+*> \endverbatim
+*>
+*> \param[out] C
+*> \verbatim
+*>          C is DOUBLE PRECISION
+*>         C contains garbage if SQRE =0 and the C-value of a Givens
+*>         rotation related to the right null space if SQRE = 1.
+*> \endverbatim
+*>
+*> \param[out] S
+*> \verbatim
+*>          S is DOUBLE PRECISION
+*>         S contains garbage if SQRE =0 and the S-value of a Givens
+*>         rotation related to the right null space if SQRE = 1.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension ( 4 * M )
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension ( 3 * N )
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>          > 0:  if INFO = 1, a singular value did not converge
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date June 2016
+*
+*> \ingroup OTHERauxiliary
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Huan Ren, Computer Science Division, University of
+*>     California at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DLASD6( ICOMPQ, NL, NR, SQRE, D, VF, VL, ALPHA, BETA,
+     $                   IDXQ, PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM,
+     $                   LDGNUM, POLES, DIFL, DIFR, Z, K, C, S, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     June 2016
+*
+*     .. Scalar Arguments ..
+      INTEGER            GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL,
+     $                   NR, SQRE
+      DOUBLE PRECISION   ALPHA, BETA, C, S
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), IDXQ( * ), IWORK( * ),
+     $                   PERM( * )
+      DOUBLE PRECISION   D( * ), DIFL( * ), DIFR( * ),
+     $                   GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ),
+     $                   VF( * ), VL( * ), WORK( * ), Z( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IDX, IDXC, IDXP, ISIGMA, IVFW, IVLW, IW, M,
+     $                   N, N1, N2
+      DOUBLE PRECISION   ORGNRM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLAMRG, DLASCL, DLASD7, DLASD8, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      N = NL + NR + 1
+      M = N + SQRE
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( NL.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -14
+      ELSE IF( LDGNUM.LT.N ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASD6', -INFO )
+         RETURN
+      END IF
+*
+*     The following values are for bookkeeping purposes only.  They are
+*     integer pointers which indicate the portion of the workspace
+*     used by a particular array in DLASD7 and DLASD8.
+*
+      ISIGMA = 1
+      IW = ISIGMA + N
+      IVFW = IW + M
+      IVLW = IVFW + M
+*
+      IDX = 1
+      IDXC = IDX + N
+      IDXP = IDXC + N
+*
+*     Scale.
+*
+      ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) )
+      D( NL+1 ) = ZERO
+      DO 10 I = 1, N
+         IF( ABS( D( I ) ).GT.ORGNRM ) THEN
+            ORGNRM = ABS( D( I ) )
+         END IF
+   10 CONTINUE
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO )
+      ALPHA = ALPHA / ORGNRM
+      BETA = BETA / ORGNRM
+*
+*     Sort and Deflate singular values.
+*
+      CALL DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, WORK( IW ), VF,
+     $             WORK( IVFW ), VL, WORK( IVLW ), ALPHA, BETA,
+     $             WORK( ISIGMA ), IWORK( IDX ), IWORK( IDXP ), IDXQ,
+     $             PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, C, S,
+     $             INFO )
+*
+*     Solve Secular Equation, compute DIFL, DIFR, and update VF, VL.
+*
+      CALL DLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDGNUM,
+     $             WORK( ISIGMA ), WORK( IW ), INFO )
+*
+*     Report the possible convergence failure.
+*
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Save the poles if ICOMPQ = 1.
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         CALL DCOPY( K, D, 1, POLES( 1, 1 ), 1 )
+         CALL DCOPY( K, WORK( ISIGMA ), 1, POLES( 1, 2 ), 1 )
+      END IF
+*
+*     Unscale.
+*
+      CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+*
+*     Prepare the IDXQ sorting permutation.
+*
+      N1 = K
+      N2 = N - K
+      CALL DLAMRG( N1, N2, D, 1, -1, IDXQ )
+*
+      RETURN
+*
+*     End of DLASD6
+*
+      END
--- a/lib/linalg/dlasd7.f
+++ b/lib/linalg/dlasd7.f
@ -0,0 +1,580 @@
+*> \brief \b DLASD7 merges the two sets of singular values together into a single sorted set. Then it tries to deflate the size of the problem. Used by sbdsdc.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASD7 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasd7.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasd7.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasd7.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL,
+*                          VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ,
+*                          PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,
+*                          C, S, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL,
+*      $                   NR, SQRE
+*       DOUBLE PRECISION   ALPHA, BETA, C, S
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ),
+*      $                   IDXQ( * ), PERM( * )
+*       DOUBLE PRECISION   D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ),
+*      $                   VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ),
+*      $                   ZW( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLASD7 merges the two sets of singular values together into a single
+*> sorted set. Then it tries to deflate the size of the problem. There
+*> are two ways in which deflation can occur:  when two or more singular
+*> values are close together or if there is a tiny entry in the Z
+*> vector. For each such occurrence the order of the related
+*> secular equation problem is reduced by one.
+*>
+*> DLASD7 is called from DLASD6.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ICOMPQ
+*> \verbatim
+*>          ICOMPQ is INTEGER
+*>          Specifies whether singular vectors are to be computed
+*>          in compact form, as follows:
+*>          = 0: Compute singular values only.
+*>          = 1: Compute singular vectors of upper
+*>               bidiagonal matrix in compact form.
+*> \endverbatim
+*>
+*> \param[in] NL
+*> \verbatim
+*>          NL is INTEGER
+*>         The row dimension of the upper block. NL >= 1.
+*> \endverbatim
+*>
+*> \param[in] NR
+*> \verbatim
+*>          NR is INTEGER
+*>         The row dimension of the lower block. NR >= 1.
+*> \endverbatim
+*>
+*> \param[in] SQRE
+*> \verbatim
+*>          SQRE is INTEGER
+*>         = 0: the lower block is an NR-by-NR square matrix.
+*>         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*>
+*>         The bidiagonal matrix has
+*>         N = NL + NR + 1 rows and
+*>         M = N + SQRE >= N columns.
+*> \endverbatim
+*>
+*> \param[out] K
+*> \verbatim
+*>          K is INTEGER
+*>         Contains the dimension of the non-deflated matrix, this is
+*>         the order of the related secular equation. 1 <= K <=N.
+*> \endverbatim
+*>
+*> \param[in,out] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension ( N )
+*>         On entry D contains the singular values of the two submatrices
+*>         to be combined. On exit D contains the trailing (N-K) updated
+*>         singular values (those which were deflated) sorted into
+*>         increasing order.
+*> \endverbatim
+*>
+*> \param[out] Z
+*> \verbatim
+*>          Z is DOUBLE PRECISION array, dimension ( M )
+*>         On exit Z contains the updating row vector in the secular
+*>         equation.
+*> \endverbatim
+*>
+*> \param[out] ZW
+*> \verbatim
+*>          ZW is DOUBLE PRECISION array, dimension ( M )
+*>         Workspace for Z.
+*> \endverbatim
+*>
+*> \param[in,out] VF
+*> \verbatim
+*>          VF is DOUBLE PRECISION array, dimension ( M )
+*>         On entry, VF(1:NL+1) contains the first components of all
+*>         right singular vectors of the upper block; and VF(NL+2:M)
+*>         contains the first components of all right singular vectors
+*>         of the lower block. On exit, VF contains the first components
+*>         of all right singular vectors of the bidiagonal matrix.
+*> \endverbatim
+*>
+*> \param[out] VFW
+*> \verbatim
+*>          VFW is DOUBLE PRECISION array, dimension ( M )
+*>         Workspace for VF.
+*> \endverbatim
+*>
+*> \param[in,out] VL
+*> \verbatim
+*>          VL is DOUBLE PRECISION array, dimension ( M )
+*>         On entry, VL(1:NL+1) contains the  last components of all
+*>         right singular vectors of the upper block; and VL(NL+2:M)
+*>         contains the last components of all right singular vectors
+*>         of the lower block. On exit, VL contains the last components
+*>         of all right singular vectors of the bidiagonal matrix.
+*> \endverbatim
+*>
+*> \param[out] VLW
+*> \verbatim
+*>          VLW is DOUBLE PRECISION array, dimension ( M )
+*>         Workspace for VL.
+*> \endverbatim
+*>
+*> \param[in] ALPHA
+*> \verbatim
+*>          ALPHA is DOUBLE PRECISION
+*>         Contains the diagonal element associated with the added row.
+*> \endverbatim
+*>
+*> \param[in] BETA
+*> \verbatim
+*>          BETA is DOUBLE PRECISION
+*>         Contains the off-diagonal element associated with the added
+*>         row.
+*> \endverbatim
+*>
+*> \param[out] DSIGMA
+*> \verbatim
+*>          DSIGMA is DOUBLE PRECISION array, dimension ( N )
+*>         Contains a copy of the diagonal elements (K-1 singular values
+*>         and one zero) in the secular equation.
+*> \endverbatim
+*>
+*> \param[out] IDX
+*> \verbatim
+*>          IDX is INTEGER array, dimension ( N )
+*>         This will contain the permutation used to sort the contents of
+*>         D into ascending order.
+*> \endverbatim
+*>
+*> \param[out] IDXP
+*> \verbatim
+*>          IDXP is INTEGER array, dimension ( N )
+*>         This will contain the permutation used to place deflated
+*>         values of D at the end of the array. On output IDXP(2:K)
+*>         points to the nondeflated D-values and IDXP(K+1:N)
+*>         points to the deflated singular values.
+*> \endverbatim
+*>
+*> \param[in] IDXQ
+*> \verbatim
+*>          IDXQ is INTEGER array, dimension ( N )
+*>         This contains the permutation which separately sorts the two
+*>         sub-problems in D into ascending order.  Note that entries in
+*>         the first half of this permutation must first be moved one
+*>         position backward; and entries in the second half
+*>         must first have NL+1 added to their values.
+*> \endverbatim
+*>
+*> \param[out] PERM
+*> \verbatim
+*>          PERM is INTEGER array, dimension ( N )
+*>         The permutations (from deflation and sorting) to be applied
+*>         to each singular block. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[out] GIVPTR
+*> \verbatim
+*>          GIVPTR is INTEGER
+*>         The number of Givens rotations which took place in this
+*>         subproblem. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[out] GIVCOL
+*> \verbatim
+*>          GIVCOL is INTEGER array, dimension ( LDGCOL, 2 )
+*>         Each pair of numbers indicates a pair of columns to take place
+*>         in a Givens rotation. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[in] LDGCOL
+*> \verbatim
+*>          LDGCOL is INTEGER
+*>         The leading dimension of GIVCOL, must be at least N.
+*> \endverbatim
+*>
+*> \param[out] GIVNUM
+*> \verbatim
+*>          GIVNUM is DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*>         Each number indicates the C or S value to be used in the
+*>         corresponding Givens rotation. Not referenced if ICOMPQ = 0.
+*> \endverbatim
+*>
+*> \param[in] LDGNUM
+*> \verbatim
+*>          LDGNUM is INTEGER
+*>         The leading dimension of GIVNUM, must be at least N.
+*> \endverbatim
+*>
+*> \param[out] C
+*> \verbatim
+*>          C is DOUBLE PRECISION
+*>         C contains garbage if SQRE =0 and the C-value of a Givens
+*>         rotation related to the right null space if SQRE = 1.
+*> \endverbatim
+*>
+*> \param[out] S
+*> \verbatim
+*>          S is DOUBLE PRECISION
+*>         S contains garbage if SQRE =0 and the S-value of a Givens
+*>         rotation related to the right null space if SQRE = 1.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>         = 0:  successful exit.
+*>         < 0:  if INFO = -i, the i-th argument had an illegal value.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup OTHERauxiliary
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Huan Ren, Computer Science Division, University of
+*>     California at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL,
+     $                   VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ,
+     $                   PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,
+     $                   C, S, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     December 2016
+*
+*     .. Scalar Arguments ..
+      INTEGER            GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL,
+     $                   NR, SQRE
+      DOUBLE PRECISION   ALPHA, BETA, C, S
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ),
+     $                   IDXQ( * ), PERM( * )
+      DOUBLE PRECISION   D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ),
+     $                   VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ),
+     $                   ZW( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO, EIGHT
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
+     $                   EIGHT = 8.0D+0 )
+*     ..
+*     .. Local Scalars ..
+*
+      INTEGER            I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, N,
+     $                   NLP1, NLP2
+      DOUBLE PRECISION   EPS, HLFTOL, TAU, TOL, Z1
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLAMRG, DROT, XERBLA
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, DLAPY2
+      EXTERNAL           DLAMCH, DLAPY2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      N = NL + NR + 1
+      M = N + SQRE
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( NL.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -22
+      ELSE IF( LDGNUM.LT.N ) THEN
+         INFO = -24
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASD7', -INFO )
+         RETURN
+      END IF
+*
+      NLP1 = NL + 1
+      NLP2 = NL + 2
+      IF( ICOMPQ.EQ.1 ) THEN
+         GIVPTR = 0
+      END IF
+*
+*     Generate the first part of the vector Z and move the singular
+*     values in the first part of D one position backward.
+*
+      Z1 = ALPHA*VL( NLP1 )
+      VL( NLP1 ) = ZERO
+      TAU = VF( NLP1 )
+      DO 10 I = NL, 1, -1
+         Z( I+1 ) = ALPHA*VL( I )
+         VL( I ) = ZERO
+         VF( I+1 ) = VF( I )
+         D( I+1 ) = D( I )
+         IDXQ( I+1 ) = IDXQ( I ) + 1
+   10 CONTINUE
+      VF( 1 ) = TAU
+*
+*     Generate the second part of the vector Z.
+*
+      DO 20 I = NLP2, M
+         Z( I ) = BETA*VF( I )
+         VF( I ) = ZERO
+   20 CONTINUE
+*
+*     Sort the singular values into increasing order
+*
+      DO 30 I = NLP2, N
+         IDXQ( I ) = IDXQ( I ) + NLP1
+   30 CONTINUE
+*
+*     DSIGMA, IDXC, IDXC, and ZW are used as storage space.
+*
+      DO 40 I = 2, N
+         DSIGMA( I ) = D( IDXQ( I ) )
+         ZW( I ) = Z( IDXQ( I ) )
+         VFW( I ) = VF( IDXQ( I ) )
+         VLW( I ) = VL( IDXQ( I ) )
+   40 CONTINUE
+*
+      CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
+*
+      DO 50 I = 2, N
+         IDXI = 1 + IDX( I )
+         D( I ) = DSIGMA( IDXI )
+         Z( I ) = ZW( IDXI )
+         VF( I ) = VFW( IDXI )
+         VL( I ) = VLW( IDXI )
+   50 CONTINUE
+*
+*     Calculate the allowable deflation tolerence
+*
+      EPS = DLAMCH( 'Epsilon' )
+      TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
+      TOL = EIGHT*EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
+*
+*     There are 2 kinds of deflation -- first a value in the z-vector
+*     is small, second two (or more) singular values are very close
+*     together (their difference is small).
+*
+*     If the value in the z-vector is small, we simply permute the
+*     array so that the corresponding singular value is moved to the
+*     end.
+*
+*     If two values in the D-vector are close, we perform a two-sided
+*     rotation designed to make one of the corresponding z-vector
+*     entries zero, and then permute the array so that the deflated
+*     singular value is moved to the end.
+*
+*     If there are multiple singular values then the problem deflates.
+*     Here the number of equal singular values are found.  As each equal
+*     singular value is found, an elementary reflector is computed to
+*     rotate the corresponding singular subspace so that the
+*     corresponding components of Z are zero in this new basis.
+*
+      K = 1
+      K2 = N + 1
+      DO 60 J = 2, N
+         IF( ABS( Z( J ) ).LE.TOL ) THEN
+*
+*           Deflate due to small z component.
+*
+            K2 = K2 - 1
+            IDXP( K2 ) = J
+            IF( J.EQ.N )
+     $         GO TO 100
+         ELSE
+            JPREV = J
+            GO TO 70
+         END IF
+   60 CONTINUE
+   70 CONTINUE
+      J = JPREV
+   80 CONTINUE
+      J = J + 1
+      IF( J.GT.N )
+     $   GO TO 90
+      IF( ABS( Z( J ) ).LE.TOL ) THEN
+*
+*        Deflate due to small z component.
+*
+         K2 = K2 - 1
+         IDXP( K2 ) = J
+      ELSE
+*
+*        Check if singular values are close enough to allow deflation.
+*
+         IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
+*
+*           Deflation is possible.
+*
+            S = Z( JPREV )
+            C = Z( J )
+*
+*           Find sqrt(a**2+b**2) without overflow or
+*           destructive underflow.
+*
+            TAU = DLAPY2( C, S )
+            Z( J ) = TAU
+            Z( JPREV ) = ZERO
+            C = C / TAU
+            S = -S / TAU
+*
+*           Record the appropriate Givens rotation
+*
+            IF( ICOMPQ.EQ.1 ) THEN
+               GIVPTR = GIVPTR + 1
+               IDXJP = IDXQ( IDX( JPREV )+1 )
+               IDXJ = IDXQ( IDX( J )+1 )
+               IF( IDXJP.LE.NLP1 ) THEN
+                  IDXJP = IDXJP - 1
+               END IF
+               IF( IDXJ.LE.NLP1 ) THEN
+                  IDXJ = IDXJ - 1
+               END IF
+               GIVCOL( GIVPTR, 2 ) = IDXJP
+               GIVCOL( GIVPTR, 1 ) = IDXJ
+               GIVNUM( GIVPTR, 2 ) = C
+               GIVNUM( GIVPTR, 1 ) = S
+            END IF
+            CALL DROT( 1, VF( JPREV ), 1, VF( J ), 1, C, S )
+            CALL DROT( 1, VL( JPREV ), 1, VL( J ), 1, C, S )
+            K2 = K2 - 1
+            IDXP( K2 ) = JPREV
+            JPREV = J
+         ELSE
+            K = K + 1
+            ZW( K ) = Z( JPREV )
+            DSIGMA( K ) = D( JPREV )
+            IDXP( K ) = JPREV
+            JPREV = J
+         END IF
+      END IF
+      GO TO 80
+   90 CONTINUE
+*
+*     Record the last singular value.
+*
+      K = K + 1
+      ZW( K ) = Z( JPREV )
+      DSIGMA( K ) = D( JPREV )
+      IDXP( K ) = JPREV
+*
+  100 CONTINUE
+*
+*     Sort the singular values into DSIGMA. The singular values which
+*     were not deflated go into the first K slots of DSIGMA, except
+*     that DSIGMA(1) is treated separately.
+*
+      DO 110 J = 2, N
+         JP = IDXP( J )
+         DSIGMA( J ) = D( JP )
+         VFW( J ) = VF( JP )
+         VLW( J ) = VL( JP )
+  110 CONTINUE
+      IF( ICOMPQ.EQ.1 ) THEN
+         DO 120 J = 2, N
+            JP = IDXP( J )
+            PERM( J ) = IDXQ( IDX( JP )+1 )
+            IF( PERM( J ).LE.NLP1 ) THEN
+               PERM( J ) = PERM( J ) - 1
+            END IF
+  120    CONTINUE
+      END IF
+*
+*     The deflated singular values go back into the last N - K slots of
+*     D.
+*
+      CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
+*
+*     Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and
+*     VL(M).
+*
+      DSIGMA( 1 ) = ZERO
+      HLFTOL = TOL / TWO
+      IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
+     $   DSIGMA( 2 ) = HLFTOL
+      IF( M.GT.N ) THEN
+         Z( 1 ) = DLAPY2( Z1, Z( M ) )
+         IF( Z( 1 ).LE.TOL ) THEN
+            C = ONE
+            S = ZERO
+            Z( 1 ) = TOL
+         ELSE
+            C = Z1 / Z( 1 )
+            S = -Z( M ) / Z( 1 )
+         END IF
+         CALL DROT( 1, VF( M ), 1, VF( 1 ), 1, C, S )
+         CALL DROT( 1, VL( M ), 1, VL( 1 ), 1, C, S )
+      ELSE
+         IF( ABS( Z1 ).LE.TOL ) THEN
+            Z( 1 ) = TOL
+         ELSE
+            Z( 1 ) = Z1
+         END IF
+      END IF
+*
+*     Restore Z, VF, and VL.
+*
+      CALL DCOPY( K-1, ZW( 2 ), 1, Z( 2 ), 1 )
+      CALL DCOPY( N-1, VFW( 2 ), 1, VF( 2 ), 1 )
+      CALL DCOPY( N-1, VLW( 2 ), 1, VL( 2 ), 1 )
+*
+      RETURN
+*
+*     End of DLASD7
+*
+      END
--- a/lib/linalg/dlasd8.f
+++ b/lib/linalg/dlasd8.f
@ -0,0 +1,342 @@
+*> \brief \b DLASD8 finds the square roots of the roots of the secular equation, and stores, for each element in D, the distance to its two nearest poles. Used by sbdsdc.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASD8 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasd8.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasd8.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasd8.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR,
+*                          DSIGMA, WORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            ICOMPQ, INFO, K, LDDIFR
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   D( * ), DIFL( * ), DIFR( LDDIFR, * ),
+*      $                   DSIGMA( * ), VF( * ), VL( * ), WORK( * ),
+*      $                   Z( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLASD8 finds the square roots of the roots of the secular equation,
+*> as defined by the values in DSIGMA and Z. It makes the appropriate
+*> calls to DLASD4, and stores, for each  element in D, the distance
+*> to its two nearest poles (elements in DSIGMA). It also updates
+*> the arrays VF and VL, the first and last components of all the
+*> right singular vectors of the original bidiagonal matrix.
+*>
+*> DLASD8 is called from DLASD6.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ICOMPQ
+*> \verbatim
+*>          ICOMPQ is INTEGER
+*>          Specifies whether singular vectors are to be computed in
+*>          factored form in the calling routine:
+*>          = 0: Compute singular values only.
+*>          = 1: Compute singular vectors in factored form as well.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*>          K is INTEGER
+*>          The number of terms in the rational function to be solved
+*>          by DLASD4.  K >= 1.
+*> \endverbatim
+*>
+*> \param[out] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension ( K )
+*>          On output, D contains the updated singular values.
+*> \endverbatim
+*>
+*> \param[in,out] Z
+*> \verbatim
+*>          Z is DOUBLE PRECISION array, dimension ( K )
+*>          On entry, the first K elements of this array contain the
+*>          components of the deflation-adjusted updating row vector.
+*>          On exit, Z is updated.
+*> \endverbatim
+*>
+*> \param[in,out] VF
+*> \verbatim
+*>          VF is DOUBLE PRECISION array, dimension ( K )
+*>          On entry, VF contains  information passed through DBEDE8.
+*>          On exit, VF contains the first K components of the first
+*>          components of all right singular vectors of the bidiagonal
+*>          matrix.
+*> \endverbatim
+*>
+*> \param[in,out] VL
+*> \verbatim
+*>          VL is DOUBLE PRECISION array, dimension ( K )
+*>          On entry, VL contains  information passed through DBEDE8.
+*>          On exit, VL contains the first K components of the last
+*>          components of all right singular vectors of the bidiagonal
+*>          matrix.
+*> \endverbatim
+*>
+*> \param[out] DIFL
+*> \verbatim
+*>          DIFL is DOUBLE PRECISION array, dimension ( K )
+*>          On exit, DIFL(I) = D(I) - DSIGMA(I).
+*> \endverbatim
+*>
+*> \param[out] DIFR
+*> \verbatim
+*>          DIFR is DOUBLE PRECISION array,
+*>                   dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and
+*>                   dimension ( K ) if ICOMPQ = 0.
+*>          On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not
+*>          defined and will not be referenced.
+*>
+*>          If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
+*>          normalizing factors for the right singular vector matrix.
+*> \endverbatim
+*>
+*> \param[in] LDDIFR
+*> \verbatim
+*>          LDDIFR is INTEGER
+*>          The leading dimension of DIFR, must be at least K.
+*> \endverbatim
+*>
+*> \param[in,out] DSIGMA
+*> \verbatim
+*>          DSIGMA is DOUBLE PRECISION array, dimension ( K )
+*>          On entry, the first K elements of this array contain the old
+*>          roots of the deflated updating problem.  These are the poles
+*>          of the secular equation.
+*>          On exit, the elements of DSIGMA may be very slightly altered
+*>          in value.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (3*K)
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>          > 0:  if INFO = 1, a singular value did not converge
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date June 2017
+*
+*> \ingroup OTHERauxiliary
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Huan Ren, Computer Science Division, University of
+*>     California at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR,
+     $                   DSIGMA, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.7.1) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     June 2017
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, K, LDDIFR
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), DIFL( * ), DIFR( LDDIFR, * ),
+     $                   DSIGMA( * ), VF( * ), VL( * ), WORK( * ),
+     $                   Z( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IWK1, IWK2, IWK2I, IWK3, IWK3I, J
+      DOUBLE PRECISION   DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, RHO, TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLASCL, DLASD4, DLASET, XERBLA
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DDOT, DLAMC3, DNRM2
+      EXTERNAL           DDOT, DLAMC3, DNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( K.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( LDDIFR.LT.K ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASD8', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.1 ) THEN
+         D( 1 ) = ABS( Z( 1 ) )
+         DIFL( 1 ) = D( 1 )
+         IF( ICOMPQ.EQ.1 ) THEN
+            DIFL( 2 ) = ONE
+            DIFR( 1, 2 ) = ONE
+         END IF
+         RETURN
+      END IF
+*
+*     Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
+*     be computed with high relative accuracy (barring over/underflow).
+*     This is a problem on machines without a guard digit in
+*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+*     The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
+*     which on any of these machines zeros out the bottommost
+*     bit of DSIGMA(I) if it is 1; this makes the subsequent
+*     subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
+*     occurs. On binary machines with a guard digit (almost all
+*     machines) it does not change DSIGMA(I) at all. On hexadecimal
+*     and decimal machines with a guard digit, it slightly
+*     changes the bottommost bits of DSIGMA(I). It does not account
+*     for hexadecimal or decimal machines without guard digits
+*     (we know of none). We use a subroutine call to compute
+*     2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+*     this code.
+*
+      DO 10 I = 1, K
+         DSIGMA( I ) = DLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I )
+   10 CONTINUE
+*
+*     Book keeping.
+*
+      IWK1 = 1
+      IWK2 = IWK1 + K
+      IWK3 = IWK2 + K
+      IWK2I = IWK2 - 1
+      IWK3I = IWK3 - 1
+*
+*     Normalize Z.
+*
+      RHO = DNRM2( K, Z, 1 )
+      CALL DLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO )
+      RHO = RHO*RHO
+*
+*     Initialize WORK(IWK3).
+*
+      CALL DLASET( 'A', K, 1, ONE, ONE, WORK( IWK3 ), K )
+*
+*     Compute the updated singular values, the arrays DIFL, DIFR,
+*     and the updated Z.
+*
+      DO 40 J = 1, K
+         CALL DLASD4( K, J, DSIGMA, Z, WORK( IWK1 ), RHO, D( J ),
+     $                WORK( IWK2 ), INFO )
+*
+*        If the root finder fails, report the convergence failure.
+*
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         WORK( IWK3I+J ) = WORK( IWK3I+J )*WORK( J )*WORK( IWK2I+J )
+         DIFL( J ) = -WORK( J )
+         DIFR( J, 1 ) = -WORK( J+1 )
+         DO 20 I = 1, J - 1
+            WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )*
+     $                        WORK( IWK2I+I ) / ( DSIGMA( I )-
+     $                        DSIGMA( J ) ) / ( DSIGMA( I )+
+     $                        DSIGMA( J ) )
+   20    CONTINUE
+         DO 30 I = J + 1, K
+            WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )*
+     $                        WORK( IWK2I+I ) / ( DSIGMA( I )-
+     $                        DSIGMA( J ) ) / ( DSIGMA( I )+
+     $                        DSIGMA( J ) )
+   30    CONTINUE
+   40 CONTINUE
+*
+*     Compute updated Z.
+*
+      DO 50 I = 1, K
+         Z( I ) = SIGN( SQRT( ABS( WORK( IWK3I+I ) ) ), Z( I ) )
+   50 CONTINUE
+*
+*     Update VF and VL.
+*
+      DO 80 J = 1, K
+         DIFLJ = DIFL( J )
+         DJ = D( J )
+         DSIGJ = -DSIGMA( J )
+         IF( J.LT.K ) THEN
+            DIFRJ = -DIFR( J, 1 )
+            DSIGJP = -DSIGMA( J+1 )
+         END IF
+         WORK( J ) = -Z( J ) / DIFLJ / ( DSIGMA( J )+DJ )
+         DO 60 I = 1, J - 1
+            WORK( I ) = Z( I ) / ( DLAMC3( DSIGMA( I ), DSIGJ )-DIFLJ )
+     $                   / ( DSIGMA( I )+DJ )
+   60    CONTINUE
+         DO 70 I = J + 1, K
+            WORK( I ) = Z( I ) / ( DLAMC3( DSIGMA( I ), DSIGJP )+DIFRJ )
+     $                   / ( DSIGMA( I )+DJ )
+   70    CONTINUE
+         TEMP = DNRM2( K, WORK, 1 )
+         WORK( IWK2I+J ) = DDOT( K, WORK, 1, VF, 1 ) / TEMP
+         WORK( IWK3I+J ) = DDOT( K, WORK, 1, VL, 1 ) / TEMP
+         IF( ICOMPQ.EQ.1 ) THEN
+            DIFR( J, 2 ) = TEMP
+         END IF
+   80 CONTINUE
+*
+      CALL DCOPY( K, WORK( IWK2 ), 1, VF, 1 )
+      CALL DCOPY( K, WORK( IWK3 ), 1, VL, 1 )
+*
+      RETURN
+*
+*     End of DLASD8
+*
+      END
+
--- a/lib/linalg/dlasda.f
+++ b/lib/linalg/dlasda.f
@ -0,0 +1,514 @@
+*> \brief \b DLASDA computes the singular value decomposition (SVD) of a real upper bidiagonal matrix with diagonal d and off-diagonal e. Used by sbdsdc.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASDA + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasda.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasda.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasda.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASDA( ICOMPQ, SMLSIZ, N, SQRE, D, E, U, LDU, VT, K,
+*                          DIFL, DIFR, Z, POLES, GIVPTR, GIVCOL, LDGCOL,
+*                          PERM, GIVNUM, C, S, WORK, IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            ICOMPQ, INFO, LDGCOL, LDU, N, SMLSIZ, SQRE
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+*      $                   K( * ), PERM( LDGCOL, * )
+*       DOUBLE PRECISION   C( * ), D( * ), DIFL( LDU, * ), DIFR( LDU, * ),
+*      $                   E( * ), GIVNUM( LDU, * ), POLES( LDU, * ),
+*      $                   S( * ), U( LDU, * ), VT( LDU, * ), WORK( * ),
+*      $                   Z( LDU, * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> Using a divide and conquer approach, DLASDA computes the singular
+*> value decomposition (SVD) of a real upper bidiagonal N-by-M matrix
+*> B with diagonal D and offdiagonal E, where M = N + SQRE. The
+*> algorithm computes the singular values in the SVD B = U * S * VT.
+*> The orthogonal matrices U and VT are optionally computed in
+*> compact form.
+*>
+*> A related subroutine, DLASD0, computes the singular values and
+*> the singular vectors in explicit form.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ICOMPQ
+*> \verbatim
+*>          ICOMPQ is INTEGER
+*>         Specifies whether singular vectors are to be computed
+*>         in compact form, as follows
+*>         = 0: Compute singular values only.
+*>         = 1: Compute singular vectors of upper bidiagonal
+*>              matrix in compact form.
+*> \endverbatim
+*>
+*> \param[in] SMLSIZ
+*> \verbatim
+*>          SMLSIZ is INTEGER
+*>         The maximum size of the subproblems at the bottom of the
+*>         computation tree.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>         The row dimension of the upper bidiagonal matrix. This is
+*>         also the dimension of the main diagonal array D.
+*> \endverbatim
+*>
+*> \param[in] SQRE
+*> \verbatim
+*>          SQRE is INTEGER
+*>         Specifies the column dimension of the bidiagonal matrix.
+*>         = 0: The bidiagonal matrix has column dimension M = N;
+*>         = 1: The bidiagonal matrix has column dimension M = N + 1.
+*> \endverbatim
+*>
+*> \param[in,out] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension ( N )
+*>         On entry D contains the main diagonal of the bidiagonal
+*>         matrix. On exit D, if INFO = 0, contains its singular values.
+*> \endverbatim
+*>
+*> \param[in] E
+*> \verbatim
+*>          E is DOUBLE PRECISION array, dimension ( M-1 )
+*>         Contains the subdiagonal entries of the bidiagonal matrix.
+*>         On exit, E has been destroyed.
+*> \endverbatim
+*>
+*> \param[out] U
+*> \verbatim
+*>          U is DOUBLE PRECISION array,
+*>         dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced
+*>         if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left
+*>         singular vector matrices of all subproblems at the bottom
+*>         level.
+*> \endverbatim
+*>
+*> \param[in] LDU
+*> \verbatim
+*>          LDU is INTEGER, LDU = > N.
+*>         The leading dimension of arrays U, VT, DIFL, DIFR, POLES,
+*>         GIVNUM, and Z.
+*> \endverbatim
+*>
+*> \param[out] VT
+*> \verbatim
+*>          VT is DOUBLE PRECISION array,
+*>         dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced
+*>         if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT**T contains the right
+*>         singular vector matrices of all subproblems at the bottom
+*>         level.
+*> \endverbatim
+*>
+*> \param[out] K
+*> \verbatim
+*>          K is INTEGER array,
+*>         dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0.
+*>         If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th
+*>         secular equation on the computation tree.
+*> \endverbatim
+*>
+*> \param[out] DIFL
+*> \verbatim
+*>          DIFL is DOUBLE PRECISION array, dimension ( LDU, NLVL ),
+*>         where NLVL = floor(log_2 (N/SMLSIZ))).
+*> \endverbatim
+*>
+*> \param[out] DIFR
+*> \verbatim
+*>          DIFR is DOUBLE PRECISION array,
+*>                  dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and
+*>                  dimension ( N ) if ICOMPQ = 0.
+*>         If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1)
+*>         record distances between singular values on the I-th
+*>         level and singular values on the (I -1)-th level, and
+*>         DIFR(1:N, 2 * I ) contains the normalizing factors for
+*>         the right singular vector matrix. See DLASD8 for details.
+*> \endverbatim
+*>
+*> \param[out] Z
+*> \verbatim
+*>          Z is DOUBLE PRECISION array,
+*>                  dimension ( LDU, NLVL ) if ICOMPQ = 1 and
+*>                  dimension ( N ) if ICOMPQ = 0.
+*>         The first K elements of Z(1, I) contain the components of
+*>         the deflation-adjusted updating row vector for subproblems
+*>         on the I-th level.
+*> \endverbatim
+*>
+*> \param[out] POLES
+*> \verbatim
+*>          POLES is DOUBLE PRECISION array,
+*>         dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced
+*>         if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and
+*>         POLES(1, 2*I) contain  the new and old singular values
+*>         involved in the secular equations on the I-th level.
+*> \endverbatim
+*>
+*> \param[out] GIVPTR
+*> \verbatim
+*>          GIVPTR is INTEGER array,
+*>         dimension ( N ) if ICOMPQ = 1, and not referenced if
+*>         ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records
+*>         the number of Givens rotations performed on the I-th
+*>         problem on the computation tree.
+*> \endverbatim
+*>
+*> \param[out] GIVCOL
+*> \verbatim
+*>          GIVCOL is INTEGER array,
+*>         dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not
+*>         referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
+*>         GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations
+*>         of Givens rotations performed on the I-th level on the
+*>         computation tree.
+*> \endverbatim
+*>
+*> \param[in] LDGCOL
+*> \verbatim
+*>          LDGCOL is INTEGER, LDGCOL = > N.
+*>         The leading dimension of arrays GIVCOL and PERM.
+*> \endverbatim
+*>
+*> \param[out] PERM
+*> \verbatim
+*>          PERM is INTEGER array,
+*>         dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced
+*>         if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records
+*>         permutations done on the I-th level of the computation tree.
+*> \endverbatim
+*>
+*> \param[out] GIVNUM
+*> \verbatim
+*>          GIVNUM is DOUBLE PRECISION array,
+*>         dimension ( LDU,  2 * NLVL ) if ICOMPQ = 1, and not
+*>         referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
+*>         GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S-
+*>         values of Givens rotations performed on the I-th level on
+*>         the computation tree.
+*> \endverbatim
+*>
+*> \param[out] C
+*> \verbatim
+*>          C is DOUBLE PRECISION array,
+*>         dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0.
+*>         If ICOMPQ = 1 and the I-th subproblem is not square, on exit,
+*>         C( I ) contains the C-value of a Givens rotation related to
+*>         the right null space of the I-th subproblem.
+*> \endverbatim
+*>
+*> \param[out] S
+*> \verbatim
+*>          S is DOUBLE PRECISION array, dimension ( N ) if
+*>         ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1
+*>         and the I-th subproblem is not square, on exit, S( I )
+*>         contains the S-value of a Givens rotation related to
+*>         the right null space of the I-th subproblem.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension
+*>         (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)).
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension (7*N)
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>          > 0:  if INFO = 1, a singular value did not converge
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date June 2017
+*
+*> \ingroup OTHERauxiliary
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Huan Ren, Computer Science Division, University of
+*>     California at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DLASDA( ICOMPQ, SMLSIZ, N, SQRE, D, E, U, LDU, VT, K,
+     $                   DIFL, DIFR, Z, POLES, GIVPTR, GIVCOL, LDGCOL,
+     $                   PERM, GIVNUM, C, S, WORK, IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.7.1) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     June 2017
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, LDGCOL, LDU, N, SMLSIZ, SQRE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+     $                   K( * ), PERM( LDGCOL, * )
+      DOUBLE PRECISION   C( * ), D( * ), DIFL( LDU, * ), DIFR( LDU, * ),
+     $                   E( * ), GIVNUM( LDU, * ), POLES( LDU, * ),
+     $                   S( * ), U( LDU, * ), VT( LDU, * ), WORK( * ),
+     $                   Z( LDU, * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, IC, IDXQ, IDXQI, IM1, INODE, ITEMP, IWK,
+     $                   J, LF, LL, LVL, LVL2, M, NCC, ND, NDB1, NDIML,
+     $                   NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, NRU,
+     $                   NWORK1, NWORK2, SMLSZP, SQREI, VF, VFI, VL, VLI
+      DOUBLE PRECISION   ALPHA, BETA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLASD6, DLASDQ, DLASDT, DLASET, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( SMLSIZ.LT.3 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      ELSE IF( LDU.LT.( N+SQRE ) ) THEN
+         INFO = -8
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -17
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASDA', -INFO )
+         RETURN
+      END IF
+*
+      M = N + SQRE
+*
+*     If the input matrix is too small, call DLASDQ to find the SVD.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         IF( ICOMPQ.EQ.0 ) THEN
+            CALL DLASDQ( 'U', SQRE, N, 0, 0, 0, D, E, VT, LDU, U, LDU,
+     $                   U, LDU, WORK, INFO )
+         ELSE
+            CALL DLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDU, U, LDU,
+     $                   U, LDU, WORK, INFO )
+         END IF
+         RETURN
+      END IF
+*
+*     Book-keeping and  set up the computation tree.
+*
+      INODE = 1
+      NDIML = INODE + N
+      NDIMR = NDIML + N
+      IDXQ = NDIMR + N
+      IWK = IDXQ + N
+*
+      NCC = 0
+      NRU = 0
+*
+      SMLSZP = SMLSIZ + 1
+      VF = 1
+      VL = VF + M
+      NWORK1 = VL + M
+      NWORK2 = NWORK1 + SMLSZP*SMLSZP
+*
+      CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
+     $             IWORK( NDIMR ), SMLSIZ )
+*
+*     for the nodes on bottom level of the tree, solve
+*     their subproblems by DLASDQ.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 30 I = NDB1, ND
+*
+*        IC : center row of each node
+*        NL : number of rows of left  subproblem
+*        NR : number of rows of right subproblem
+*        NLF: starting row of the left   subproblem
+*        NRF: starting row of the right  subproblem
+*
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NLP1 = NL + 1
+         NR = IWORK( NDIMR+I1 )
+         NLF = IC - NL
+         NRF = IC + 1
+         IDXQI = IDXQ + NLF - 2
+         VFI = VF + NLF - 1
+         VLI = VL + NLF - 1
+         SQREI = 1
+         IF( ICOMPQ.EQ.0 ) THEN
+            CALL DLASET( 'A', NLP1, NLP1, ZERO, ONE, WORK( NWORK1 ),
+     $                   SMLSZP )
+            CALL DLASDQ( 'U', SQREI, NL, NLP1, NRU, NCC, D( NLF ),
+     $                   E( NLF ), WORK( NWORK1 ), SMLSZP,
+     $                   WORK( NWORK2 ), NL, WORK( NWORK2 ), NL,
+     $                   WORK( NWORK2 ), INFO )
+            ITEMP = NWORK1 + NL*SMLSZP
+            CALL DCOPY( NLP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 )
+            CALL DCOPY( NLP1, WORK( ITEMP ), 1, WORK( VLI ), 1 )
+         ELSE
+            CALL DLASET( 'A', NL, NL, ZERO, ONE, U( NLF, 1 ), LDU )
+            CALL DLASET( 'A', NLP1, NLP1, ZERO, ONE, VT( NLF, 1 ), LDU )
+            CALL DLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ),
+     $                   E( NLF ), VT( NLF, 1 ), LDU, U( NLF, 1 ), LDU,
+     $                   U( NLF, 1 ), LDU, WORK( NWORK1 ), INFO )
+            CALL DCOPY( NLP1, VT( NLF, 1 ), 1, WORK( VFI ), 1 )
+            CALL DCOPY( NLP1, VT( NLF, NLP1 ), 1, WORK( VLI ), 1 )
+         END IF
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         DO 10 J = 1, NL
+            IWORK( IDXQI+J ) = J
+   10    CONTINUE
+         IF( ( I.EQ.ND ) .AND. ( SQRE.EQ.0 ) ) THEN
+            SQREI = 0
+         ELSE
+            SQREI = 1
+         END IF
+         IDXQI = IDXQI + NLP1
+         VFI = VFI + NLP1
+         VLI = VLI + NLP1
+         NRP1 = NR + SQREI
+         IF( ICOMPQ.EQ.0 ) THEN
+            CALL DLASET( 'A', NRP1, NRP1, ZERO, ONE, WORK( NWORK1 ),
+     $                   SMLSZP )
+            CALL DLASDQ( 'U', SQREI, NR, NRP1, NRU, NCC, D( NRF ),
+     $                   E( NRF ), WORK( NWORK1 ), SMLSZP,
+     $                   WORK( NWORK2 ), NR, WORK( NWORK2 ), NR,
+     $                   WORK( NWORK2 ), INFO )
+            ITEMP = NWORK1 + ( NRP1-1 )*SMLSZP
+            CALL DCOPY( NRP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 )
+            CALL DCOPY( NRP1, WORK( ITEMP ), 1, WORK( VLI ), 1 )
+         ELSE
+            CALL DLASET( 'A', NR, NR, ZERO, ONE, U( NRF, 1 ), LDU )
+            CALL DLASET( 'A', NRP1, NRP1, ZERO, ONE, VT( NRF, 1 ), LDU )
+            CALL DLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ),
+     $                   E( NRF ), VT( NRF, 1 ), LDU, U( NRF, 1 ), LDU,
+     $                   U( NRF, 1 ), LDU, WORK( NWORK1 ), INFO )
+            CALL DCOPY( NRP1, VT( NRF, 1 ), 1, WORK( VFI ), 1 )
+            CALL DCOPY( NRP1, VT( NRF, NRP1 ), 1, WORK( VLI ), 1 )
+         END IF
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         DO 20 J = 1, NR
+            IWORK( IDXQI+J ) = J
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Now conquer each subproblem bottom-up.
+*
+      J = 2**NLVL
+      DO 50 LVL = NLVL, 1, -1
+         LVL2 = LVL*2 - 1
+*
+*        Find the first node LF and last node LL on
+*        the current level LVL.
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 40 I = LF, LL
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            IF( I.EQ.LL ) THEN
+               SQREI = SQRE
+            ELSE
+               SQREI = 1
+            END IF
+            VFI = VF + NLF - 1
+            VLI = VL + NLF - 1
+            IDXQI = IDXQ + NLF - 1
+            ALPHA = D( IC )
+            BETA = E( IC )
+            IF( ICOMPQ.EQ.0 ) THEN
+               CALL DLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ),
+     $                      WORK( VFI ), WORK( VLI ), ALPHA, BETA,
+     $                      IWORK( IDXQI ), PERM, GIVPTR( 1 ), GIVCOL,
+     $                      LDGCOL, GIVNUM, LDU, POLES, DIFL, DIFR, Z,
+     $                      K( 1 ), C( 1 ), S( 1 ), WORK( NWORK1 ),
+     $                      IWORK( IWK ), INFO )
+            ELSE
+               J = J - 1
+               CALL DLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ),
+     $                      WORK( VFI ), WORK( VLI ), ALPHA, BETA,
+     $                      IWORK( IDXQI ), PERM( NLF, LVL ),
+     $                      GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                      GIVNUM( NLF, LVL2 ), LDU,
+     $                      POLES( NLF, LVL2 ), DIFL( NLF, LVL ),
+     $                      DIFR( NLF, LVL2 ), Z( NLF, LVL ), K( J ),
+     $                      C( J ), S( J ), WORK( NWORK1 ),
+     $                      IWORK( IWK ), INFO )
+            END IF
+            IF( INFO.NE.0 ) THEN
+               RETURN
+            END IF
+   40    CONTINUE
+   50 CONTINUE
+*
+      RETURN
+*
+*     End of DLASDA
+*
+      END
--- a/lib/linalg/dlasdq.f
+++ b/lib/linalg/dlasdq.f
@ -0,0 +1,413 @@
+*> \brief \b DLASDQ computes the SVD of a real bidiagonal matrix with diagonal d and off-diagonal e. Used by sbdsdc.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASDQ + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasdq.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasdq.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasdq.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASDQ( UPLO, SQRE, N, NCVT, NRU, NCC, D, E, VT, LDVT,
+*                          U, LDU, C, LDC, WORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          UPLO
+*       INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU, SQRE
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   C( LDC, * ), D( * ), E( * ), U( LDU, * ),
+*      $                   VT( LDVT, * ), WORK( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLASDQ computes the singular value decomposition (SVD) of a real
+*> (upper or lower) bidiagonal matrix with diagonal D and offdiagonal
+*> E, accumulating the transformations if desired. Letting B denote
+*> the input bidiagonal matrix, the algorithm computes orthogonal
+*> matrices Q and P such that B = Q * S * P**T (P**T denotes the transpose
+*> of P). The singular values S are overwritten on D.
+*>
+*> The input matrix U  is changed to U  * Q  if desired.
+*> The input matrix VT is changed to P**T * VT if desired.
+*> The input matrix C  is changed to Q**T * C  if desired.
+*>
+*> See "Computing  Small Singular Values of Bidiagonal Matrices With
+*> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+*> LAPACK Working Note #3, for a detailed description of the algorithm.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] UPLO
+*> \verbatim
+*>          UPLO is CHARACTER*1
+*>        On entry, UPLO specifies whether the input bidiagonal matrix
+*>        is upper or lower bidiagonal, and whether it is square are
+*>        not.
+*>           UPLO = 'U' or 'u'   B is upper bidiagonal.
+*>           UPLO = 'L' or 'l'   B is lower bidiagonal.
+*> \endverbatim
+*>
+*> \param[in] SQRE
+*> \verbatim
+*>          SQRE is INTEGER
+*>        = 0: then the input matrix is N-by-N.
+*>        = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and
+*>             (N+1)-by-N if UPLU = 'L'.
+*>
+*>        The bidiagonal matrix has
+*>        N = NL + NR + 1 rows and
+*>        M = N + SQRE >= N columns.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>        On entry, N specifies the number of rows and columns
+*>        in the matrix. N must be at least 0.
+*> \endverbatim
+*>
+*> \param[in] NCVT
+*> \verbatim
+*>          NCVT is INTEGER
+*>        On entry, NCVT specifies the number of columns of
+*>        the matrix VT. NCVT must be at least 0.
+*> \endverbatim
+*>
+*> \param[in] NRU
+*> \verbatim
+*>          NRU is INTEGER
+*>        On entry, NRU specifies the number of rows of
+*>        the matrix U. NRU must be at least 0.
+*> \endverbatim
+*>
+*> \param[in] NCC
+*> \verbatim
+*>          NCC is INTEGER
+*>        On entry, NCC specifies the number of columns of
+*>        the matrix C. NCC must be at least 0.
+*> \endverbatim
+*>
+*> \param[in,out] D
+*> \verbatim
+*>          D is DOUBLE PRECISION array, dimension (N)
+*>        On entry, D contains the diagonal entries of the
+*>        bidiagonal matrix whose SVD is desired. On normal exit,
+*>        D contains the singular values in ascending order.
+*> \endverbatim
+*>
+*> \param[in,out] E
+*> \verbatim
+*>          E is DOUBLE PRECISION array.
+*>        dimension is (N-1) if SQRE = 0 and N if SQRE = 1.
+*>        On entry, the entries of E contain the offdiagonal entries
+*>        of the bidiagonal matrix whose SVD is desired. On normal
+*>        exit, E will contain 0. If the algorithm does not converge,
+*>        D and E will contain the diagonal and superdiagonal entries
+*>        of a bidiagonal matrix orthogonally equivalent to the one
+*>        given as input.
+*> \endverbatim
+*>
+*> \param[in,out] VT
+*> \verbatim
+*>          VT is DOUBLE PRECISION array, dimension (LDVT, NCVT)
+*>        On entry, contains a matrix which on exit has been
+*>        premultiplied by P**T, dimension N-by-NCVT if SQRE = 0
+*>        and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0).
+*> \endverbatim
+*>
+*> \param[in] LDVT
+*> \verbatim
+*>          LDVT is INTEGER
+*>        On entry, LDVT specifies the leading dimension of VT as
+*>        declared in the calling (sub) program. LDVT must be at
+*>        least 1. If NCVT is nonzero LDVT must also be at least N.
+*> \endverbatim
+*>
+*> \param[in,out] U
+*> \verbatim
+*>          U is DOUBLE PRECISION array, dimension (LDU, N)
+*>        On entry, contains a  matrix which on exit has been
+*>        postmultiplied by Q, dimension NRU-by-N if SQRE = 0
+*>        and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0).
+*> \endverbatim
+*>
+*> \param[in] LDU
+*> \verbatim
+*>          LDU is INTEGER
+*>        On entry, LDU  specifies the leading dimension of U as
+*>        declared in the calling (sub) program. LDU must be at
+*>        least max( 1, NRU ) .
+*> \endverbatim
+*>
+*> \param[in,out] C
+*> \verbatim
+*>          C is DOUBLE PRECISION array, dimension (LDC, NCC)
+*>        On entry, contains an N-by-NCC matrix which on exit
+*>        has been premultiplied by Q**T  dimension N-by-NCC if SQRE = 0
+*>        and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0).
+*> \endverbatim
+*>
+*> \param[in] LDC
+*> \verbatim
+*>          LDC is INTEGER
+*>        On entry, LDC  specifies the leading dimension of C as
+*>        declared in the calling (sub) program. LDC must be at
+*>        least 1. If NCC is nonzero, LDC must also be at least N.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (4*N)
+*>        Workspace. Only referenced if one of NCVT, NRU, or NCC is
+*>        nonzero, and if N is at least 2.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>        On exit, a value of 0 indicates a successful exit.
+*>        If INFO < 0, argument number -INFO is illegal.
+*>        If INFO > 0, the algorithm did not converge, and INFO
+*>        specifies how many superdiagonals did not converge.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date June 2016
+*
+*> \ingroup OTHERauxiliary
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Huan Ren, Computer Science Division, University of
+*>     California at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DLASDQ( UPLO, SQRE, N, NCVT, NRU, NCC, D, E, VT, LDVT,
+     $                   U, LDU, C, LDC, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     June 2016
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU, SQRE
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( LDC, * ), D( * ), E( * ), U( LDU, * ),
+     $                   VT( LDVT, * ), WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ROTATE
+      INTEGER            I, ISUB, IUPLO, J, NP1, SQRE1
+      DOUBLE PRECISION   CS, R, SMIN, SN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DBDSQR, DLARTG, DLASR, DSWAP, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IUPLO = 0
+      IF( LSAME( UPLO, 'U' ) )
+     $   IUPLO = 1
+      IF( LSAME( UPLO, 'L' ) )
+     $   IUPLO = 2
+      IF( IUPLO.EQ.0 ) THEN
+         INFO = -1
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NCVT.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRU.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NCC.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR.
+     $         ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN
+         INFO = -10
+      ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN
+         INFO = -12
+      ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR.
+     $         ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASDQ', -INFO )
+         RETURN
+      END IF
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     ROTATE is true if any singular vectors desired, false otherwise
+*
+      ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 )
+      NP1 = N + 1
+      SQRE1 = SQRE
+*
+*     If matrix non-square upper bidiagonal, rotate to be lower
+*     bidiagonal.  The rotations are on the right.
+*
+      IF( ( IUPLO.EQ.1 ) .AND. ( SQRE1.EQ.1 ) ) THEN
+         DO 10 I = 1, N - 1
+            CALL DLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( ROTATE ) THEN
+               WORK( I ) = CS
+               WORK( N+I ) = SN
+            END IF
+   10    CONTINUE
+         CALL DLARTG( D( N ), E( N ), CS, SN, R )
+         D( N ) = R
+         E( N ) = ZERO
+         IF( ROTATE ) THEN
+            WORK( N ) = CS
+            WORK( N+N ) = SN
+         END IF
+         IUPLO = 2
+         SQRE1 = 0
+*
+*        Update singular vectors if desired.
+*
+         IF( NCVT.GT.0 )
+     $      CALL DLASR( 'L', 'V', 'F', NP1, NCVT, WORK( 1 ),
+     $                  WORK( NP1 ), VT, LDVT )
+      END IF
+*
+*     If matrix lower bidiagonal, rotate to be upper bidiagonal
+*     by applying Givens rotations on the left.
+*
+      IF( IUPLO.EQ.2 ) THEN
+         DO 20 I = 1, N - 1
+            CALL DLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( ROTATE ) THEN
+               WORK( I ) = CS
+               WORK( N+I ) = SN
+            END IF
+   20    CONTINUE
+*
+*        If matrix (N+1)-by-N lower bidiagonal, one additional
+*        rotation is needed.
+*
+         IF( SQRE1.EQ.1 ) THEN
+            CALL DLARTG( D( N ), E( N ), CS, SN, R )
+            D( N ) = R
+            IF( ROTATE ) THEN
+               WORK( N ) = CS
+               WORK( N+N ) = SN
+            END IF
+         END IF
+*
+*        Update singular vectors if desired.
+*
+         IF( NRU.GT.0 ) THEN
+            IF( SQRE1.EQ.0 ) THEN
+               CALL DLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ),
+     $                     WORK( NP1 ), U, LDU )
+            ELSE
+               CALL DLASR( 'R', 'V', 'F', NRU, NP1, WORK( 1 ),
+     $                     WORK( NP1 ), U, LDU )
+            END IF
+         END IF
+         IF( NCC.GT.0 ) THEN
+            IF( SQRE1.EQ.0 ) THEN
+               CALL DLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ),
+     $                     WORK( NP1 ), C, LDC )
+            ELSE
+               CALL DLASR( 'L', 'V', 'F', NP1, NCC, WORK( 1 ),
+     $                     WORK( NP1 ), C, LDC )
+            END IF
+         END IF
+      END IF
+*
+*     Call DBDSQR to compute the SVD of the reduced real
+*     N-by-N upper bidiagonal matrix.
+*
+      CALL DBDSQR( 'U', N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C,
+     $             LDC, WORK, INFO )
+*
+*     Sort the singular values into ascending order (insertion sort on
+*     singular values, but only one transposition per singular vector)
+*
+      DO 40 I = 1, N
+*
+*        Scan for smallest D(I).
+*
+         ISUB = I
+         SMIN = D( I )
+         DO 30 J = I + 1, N
+            IF( D( J ).LT.SMIN ) THEN
+               ISUB = J
+               SMIN = D( J )
+            END IF
+   30    CONTINUE
+         IF( ISUB.NE.I ) THEN
+*
+*           Swap singular values and vectors.
+*
+            D( ISUB ) = D( I )
+            D( I ) = SMIN
+            IF( NCVT.GT.0 )
+     $         CALL DSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( I, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL DSWAP( NRU, U( 1, ISUB ), 1, U( 1, I ), 1 )
+            IF( NCC.GT.0 )
+     $         CALL DSWAP( NCC, C( ISUB, 1 ), LDC, C( I, 1 ), LDC )
+         END IF
+   40 CONTINUE
+*
+      RETURN
+*
+*     End of DLASDQ
+*
+      END
--- a/lib/linalg/dlasdt.f
+++ b/lib/linalg/dlasdt.f
@ -0,0 +1,172 @@
+*> \brief \b DLASDT creates a tree of subproblems for bidiagonal divide and conquer. Used by sbdsdc.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASDT + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasdt.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasdt.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasdt.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASDT( N, LVL, ND, INODE, NDIML, NDIMR, MSUB )
+*
+*       .. Scalar Arguments ..
+*       INTEGER            LVL, MSUB, N, ND
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            INODE( * ), NDIML( * ), NDIMR( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLASDT creates a tree of subproblems for bidiagonal divide and
+*> conquer.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          On entry, the number of diagonal elements of the
+*>          bidiagonal matrix.
+*> \endverbatim
+*>
+*> \param[out] LVL
+*> \verbatim
+*>          LVL is INTEGER
+*>          On exit, the number of levels on the computation tree.
+*> \endverbatim
+*>
+*> \param[out] ND
+*> \verbatim
+*>          ND is INTEGER
+*>          On exit, the number of nodes on the tree.
+*> \endverbatim
+*>
+*> \param[out] INODE
+*> \verbatim
+*>          INODE is INTEGER array, dimension ( N )
+*>          On exit, centers of subproblems.
+*> \endverbatim
+*>
+*> \param[out] NDIML
+*> \verbatim
+*>          NDIML is INTEGER array, dimension ( N )
+*>          On exit, row dimensions of left children.
+*> \endverbatim
+*>
+*> \param[out] NDIMR
+*> \verbatim
+*>          NDIMR is INTEGER array, dimension ( N )
+*>          On exit, row dimensions of right children.
+*> \endverbatim
+*>
+*> \param[in] MSUB
+*> \verbatim
+*>          MSUB is INTEGER
+*>          On entry, the maximum row dimension each subproblem at the
+*>          bottom of the tree can be of.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \date December 2016
+*
+*> \ingroup OTHERauxiliary
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Ming Gu and Huan Ren, Computer Science Division, University of
+*>     California at Berkeley, USA
+*>
+*  =====================================================================
+      SUBROUTINE DLASDT( N, LVL, ND, INODE, NDIML, NDIMR, MSUB )
+*
+*  -- LAPACK auxiliary routine (version 3.7.0) --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*     December 2016
+*
+*     .. Scalar Arguments ..
+      INTEGER            LVL, MSUB, N, ND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            INODE( * ), NDIML( * ), NDIMR( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IL, IR, LLST, MAXN, NCRNT, NLVL
+      DOUBLE PRECISION   TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, INT, LOG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Find the number of levels on the tree.
+*
+      MAXN = MAX( 1, N )
+      TEMP = LOG( DBLE( MAXN ) / DBLE( MSUB+1 ) ) / LOG( TWO )
+      LVL = INT( TEMP ) + 1
+*
+      I = N / 2
+      INODE( 1 ) = I + 1
+      NDIML( 1 ) = I
+      NDIMR( 1 ) = N - I - 1
+      IL = 0
+      IR = 1
+      LLST = 1
+      DO 20 NLVL = 1, LVL - 1
+*
+*        Constructing the tree at (NLVL+1)-st level. The number of
+*        nodes created on this level is LLST * 2.
+*
+         DO 10 I = 0, LLST - 1
+            IL = IL + 2
+            IR = IR + 2
+            NCRNT = LLST + I
+            NDIML( IL ) = NDIML( NCRNT ) / 2
+            NDIMR( IL ) = NDIML( NCRNT ) - NDIML( IL ) - 1
+            INODE( IL ) = INODE( NCRNT ) - NDIMR( IL ) - 1
+            NDIML( IR ) = NDIMR( NCRNT ) / 2
+            NDIMR( IR ) = NDIMR( NCRNT ) - NDIML( IR ) - 1
+            INODE( IR ) = INODE( NCRNT ) + NDIML( IR ) + 1
+   10    CONTINUE
+         LLST = LLST*2
+   20 CONTINUE
+      ND = LLST*2 - 1
+*
+      RETURN
+*
+*     End of DLASDT
+*
+      END