#include "cppdefs.h"
#ifdef SOLVE3D
 
      subroutine step3d_uv2 (tile)
      implicit none
      integer tile
# include "param.h"
# include "private_scratch.h"
# include "compute_tile_bounds.h"
      call step3d_uv2_tile (istr,iend,jstr,jend, A2d(1,1), A2d(1,2),
     &                                           A2d(1,3), A2d(1,4))
      return
      end
 
      subroutine step3d_uv2_tile (istr,iend,jstr,jend, BC,CF,FC,DC)
!
! Perform timestep for the horizontal momentum equations. Due to the
! implicit treatment of vertical viscosity, it solves the following
! tri-diagonal problem:
!
!          FC(k-1) * F(k-1) + BC(k) * F(k) + FC(k) * F(k+1) = DC(k)
!
! where
!          BC(k) = Hz(k) - FC(k) - FC(k-1)
! and
!          FC(k) = - dt * Ak(k) / Hz(k)
! and
!          F(k) is the unknown field: u,v at time step nnew.
!
!  As long as all diffusivity/viscosity coefficients Ak(k) are
! nonnegative, the tri-diagonal matrics is diagonally dominant,
! so that a simple Gaussian elimination algorithm is stable,
! (e.g., Richtmeyer annd  Morton,  1967).
! The implicit vertical viscosity/diffusion terms are discretized
! using implicit backward time step.
!
      implicit none
      integer istr,iend,jstr,jend, i,j,k, is
# include "param.h"
      real BC(PRIVATE_1D_SCRATCH_ARRAY,0:N),
     &     CF(PRIVATE_1D_SCRATCH_ARRAY,0:N),
     &     FC(PRIVATE_1D_SCRATCH_ARRAY,0:N), cff,
     &     DC(PRIVATE_1D_SCRATCH_ARRAY,0:N)
# include "grid.h"
# include "ocean2d.h"
# include "ocean3d.h"
# include "coupling.h"
# include "forces.h"
# include "mixing.h"
# include "scalars.h"
# include "sources.h"
!
# include "compute_auxiliary_bounds.h"
!
! Time step momentum equation in the XI-direction.
!----- ---- -------- -------- -- --- --------------
! Compute coefficients FC [dt*Akv/Hz] for the implicit
! vertical viscosity term at future time level at horizontal
! U-points and vertical W-points.
!
      do j=jstr,jend
        do k=1,N-1
          do i=istrU,iend
            FC(i,k)=-dt*(Akv(i,j,k)+Akv(i-1,j,k))
     &                 /( z_r(i,j,k+1)+z_r(i-1,j,k+1)
     &                   -z_r(i,j,k  )-z_r(i-1,j,k  ))
          enddo
        enddo
        do i=istrU,iend
          FC(i,0)=0.                       ! Load the right-hand-side
          FC(i,N)=0.                       ! terms into matrix DC;
        enddo                              ! Also compute the matrix
        do k=1,N                           ! of coefficients on the
          do i=istrU,iend                  ! main diagonal BC.
            DC(i,k)=u(i,j,k,nnew)
            BC(i,k)=0.5*(Hz(i,j,k)+Hz(i-1,j,k))-FC(i,k)-FC(i,k-1)
          enddo
        enddo
# ifndef BODYFORCE
        do i=istrU,iend                    ! Apply wind forcing.
          DC(i,N)=DC(i,N)+dt*sustr(i,j)
          DC(i,1)=DC(i,1)-dt*bustr(i,j)
        enddo
# endif
!
! Resolve the tri-diagonal system. Also perform coupling between the
! barotropic and baroclinic modes. For the purpose of the processor
! efficiency these two operations are partially overlapped. So that
! in the third and fourth loop below operations above the separator
! (!>) belong to the tri-diagonal solver, while operations below it
! belong to the coupling.
!
! The coupling procedure implies vertical integration of the newly
! computed XI-component velocity [which ends up being stored in the
! scratch array DC(:,1:N) after the tri-diagonal problem is resolved,
! the fourth loop below] in order to obtain its vertical mean [stored
! as DC(:,0)], and replace it with the more accurate mean obtained
! from the 2D barotropic sub-model ubar=DU_avg1/(D*dn_u), where
! D=CF(:,0) is the total depth of water column.
!
        do i=istrU,iend
          cff=1./BC(i,1)
          CF(i,1)=cff*FC(i,1)
          DC(i,1)=cff*DC(i,1)
        enddo
        do k=2,N-1
          do i=istrU,iend
            cff=1./(BC(i,k)-FC(i,k-1)*CF(i,k-1))
            CF(i,k)=cff*FC(i,k)
            DC(i,k)=cff*(DC(i,k)-FC(i,k-1)*DC(i,k-1))
          enddo
        enddo
        do i=istrU,iend
          DC(i,N)=(DC(i,N)-FC(i,N-1)*DC(i,N-1))
     &           /(BC(i,N)-FC(i,N-1)*CF(i,N-1))
!>
          CF(i,0)=0.5*(Hz(i,j,N)+Hz(i-1,j,N))
          DC(i,0)=CF(i,0)*DC(i,N)
        enddo
        do k=N-1,1,-1
          do i=istrU,iend
            DC(i,k)=DC(i,k)-CF(i,k)*DC(i,k+1)
!>
            cff=0.5*(Hz(i,j,k)+Hz(i-1,j,k))
            CF(i,0)=CF(i,0)+cff
            DC(i,0)=DC(i,0)+cff*DC(i,k)
          enddo
        enddo      !--> discard FC,BC; keep DC,CF(:,0)
 
        do i=istrU,iend
          DC(i,0)=(DC(i,0)*dn_u(i,j)-DU_avg1(i,j))
     &                         /(CF(i,0)*dn_u(i,j))
        enddo
        do k=1,N
          do i=istrU,iend
            u(i,j,k,nnew)=(DC(i,k)-DC(i,0))
# ifdef MASKING
     &                            *umask(i,j)
# endif
          enddo
        enddo        !--> discard DC,CF
!
! Time step momentum equation in the ETA-direction.
!----- ---- -------- -------- -- --- --------------
! (All steps are similar to XI-component above.)
!
        if (j.ge.jstrV) then
          do k=1,N-1
            do i=istr,iend
              FC(i,k)=-dt*(Akv(i,j,k)+Akv(i,j-1,k))
     &                   /( z_r(i,j,k+1)+z_r(i,j-1,k+1)
     &                     -z_r(i,j,k  )-z_r(i,j-1,k  ))
            enddo
          enddo
          do i=istr,iend
            FC(i,0)=0.                     ! Load the right-hand-side
            FC(i,N)=0.                     ! terms into matrix DC;
          enddo                            ! Also compute the matrix
          do k=1,N                         ! of coefficients on the
            do i=istr,iend                 ! main diagonal BC
              DC(i,k)=v(i,j,k,nnew)
              BC(i,k)=0.5*(Hz(i,j,k)+Hz(i,j-1,k))-FC(i,k)-FC(i,k-1)
            enddo
          enddo
# ifndef BODYFORCE
          do i=istr,iend                   ! Apply wind forcing
            DC(i,N)=DC(i,N)+dt*svstr(i,j)
            DC(i,1)=DC(i,1)-dt*bvstr(i,j)
          enddo
# endif
 
          do i=istr,iend                   ! Resolve tri-diagonal
            cff=1./BC(i,1)                 ! system. Also perform
            CF(i,1)=cff*FC(i,1)            ! coupling between the
            DC(i,1)=cff*DC(i,1)            ! barotropic and
          enddo                            ! baroclinic modes.
          do k=2,N-1
            do i=istr,iend
              cff=1./(BC(i,k)-FC(i,k-1)*CF(i,k-1))
              CF(i,k)=cff*FC(i,k)
              DC(i,k)=cff*(DC(i,k)-FC(i,k-1)*DC(i,k-1))
            enddo
          enddo
          do i=istr,iend
            DC(i,N)=(DC(i,N)-FC(i,N-1)*DC(i,N-1))
     &             /(BC(i,N)-FC(i,N-1)*CF(i,N-1))
!>
            CF(i,0)=0.5*(Hz(i,j,N)+Hz(i,j-1,N))
            DC(i,0)=CF(i,0)*DC(i,N)
          enddo
          do k=N-1,1,-1
            do i=istr,iend
              DC(i,k)=DC(i,k)-CF(i,k)*DC(i,k+1)
!>
              cff=0.5*(Hz(i,j,k)+Hz(i,j-1,k))
              CF(i,0)=CF(i,0)+cff
              DC(i,0)=DC(i,0)+cff*DC(i,k)
            enddo
          enddo      !--> discard FC,BC; keep DC,CF(:,0)
 
          do i=istr,iend
            DC(i,0)=(DC(i,0)*dm_v(i,j)-DV_avg1(i,j))
     &                           /(CF(i,0)*dm_v(i,j))
          enddo
          do k=1,N
            do i=istr,iend
              v(i,j,k,nnew)=(DC(i,k)-DC(i,0))
# ifdef MASKING
     &                              *vmask(i,j)
# endif
            enddo
          enddo       !--> discard DC,CF(:,0)
        endif
      enddo      ! <-- j
!
! Set PHYSICAL lateral boundary conditions.
!
      call u3dbc_tile (istr,iend,jstr,jend, DC)
      call v3dbc_tile (istr,iend,jstr,jend, DC)

# ifdef PSOURCE
      do is=1,Nsrc           ! Apply point mass sources associated
        i=Isrc(is)           ! with river runoff simulations.
        j=Jsrc(is)
        if (istrR.le.i .and. i.le.iendR .and.
     &      jstrR.le.j .and. j.le.jendR) then
          if (Dsrc(is).eq.0) then
            do k=1,N
              u(i,j,k,nnew)=2.*Qsrc(is,k)/( dn_u(i,j)*(
     &                       z_w(i-1,j,k)-z_w(i-1,j,k-1)
     &                      +z_w(i  ,j,k)-z_w(i  ,j,k-1)
     &                                                ))
            enddo
          else
            do k=1,N
              v(i,j,k,nnew)=2.*Qsrc(is,k)/( dm_v(i,j)*(
     &                       z_w(i,j-1,k)-z_w(i,j-1,k-1)
     &                      +z_w(i,j  ,k)-z_w(i,j  ,k-1)
     &                                                ))
            enddo
          endif
        endif
      enddo
# endif
!
! Coupling 2D and 3D momentum equations:
!--------- -- --- -- -------- ----------
! Compute inaccurate vertical mean of the three-dimensional
! velocity field, then subtract it and replace it with the vertically
! integrated (barotropic) velocity field computed from the two-
! dimensional submodel. After that compute mass fluxes through grid
! box faces.
!
! Meaning of scratch variables below:
!
! DC(i,k) [where k=1,N]  height of grid box for U- or V-cell.
! DC(i,0) total depth of water column at horizontal U- or V-points.
! CF(i,0) vertically integrated mass flux/error/correction for
!                       the new time step velocity u,v(:,:,:,nnew)
! FC(i,0) vertically integrated mass flux/error/correction for the
!             intermediate time step [n+1/2] mass fluxes Huon,Hvom.
!
! This procedure also replaces instantaneous (with respect to the
! fast time step) vertically integrated (barotropic) velocities with
! their values based on fast-time-averaged mass fluxes. These are to
! be used as initial conditions for the barotropic mode at the new
! time step.
!
! Explanation of horizontal loop indices: in the case of periodic
! boundaries (in either direction) the coupling and computation of
! mass fluxes Huon,Hvom is performed within the internal range of
! indices (excluding ghost zones), after that the ghost points for
! the newly computed arrays are exchanged; in the case of nonperiodic
! boundaries the coupling is done over the extendeed range of indices
! (that is including boundary points).
!
# ifdef EW_PERIODIC
#  define IU_RANGE istr,iend
#  define IV_RANGE istr,iend
# else
#  define IU_RANGE istr,iendR
#  define IV_RANGE istrR,iendR
# endif
 
# ifdef NS_PERIODIC
#  define JU_RANGE jstr,jend
#  define JV_RANGE jstr,jend
# else
#  define JU_RANGE jstrR,jendR
#  define JV_RANGE jstr,jendR
# endif
 
      do j=JU_RANGE                      ! XI-component
        do i=IU_RANGE
          DC(i,0)=0.
          CF(i,0)=0.
          FC(i,0)=0.
        enddo
        do k=1,N,+1
          do i=IU_RANGE
            DC(i,k)=0.5*(Hz(i,j,k)+Hz(i-1,j,k))*dn_u(i,j)
            DC(i,0)=DC(i,0)+DC(i,k)
            CF(i,0)=CF(i,0)+DC(i,k)*u(i,j,k,nnew)
          enddo
        enddo
        do i=IU_RANGE
          DC(i,0)=1./DC(i,0)
          CF(i,0)=DC(i,0)*(CF(i,0)-DU_avg1(i,j))
          ubar(i,j,knew)=DC(i,0)*DU_avg1(i,j)
        enddo
        do k=N,1,-1
          do i=IU_RANGE
            u(i,j,k,nnew)=(u(i,j,k,nnew)-CF(i,0))
# ifdef MASKING
     &                                  *umask(i,j)
# endif
c--# define TR
# ifdef TR
            FC(i,k)=0.5*DC(i,k)*(u(i,j,k,nstp)+u(i,j,k,nnew))
#else
            FC(i,k)=0.656*Huon(i,j,k) + 0.172*DC(i,k)
     &                         *(u(i,j,k,nstp)+u(i,j,k,nnew))
#endif
            FC(i,0)=FC(i,0)+FC(i,k)
          enddo
        enddo
        do i=IU_RANGE
          FC(i,0)=DC(i,0)*(FC(i,0)-DU_avg2(i,j))
        enddo
        do k=1,N,+1
          do i=IU_RANGE
            Huon(i,j,k)=FC(i,k)-DC(i,k)*FC(i,0)
          enddo
        enddo
 
        if (j.ge.jstr) then
          do i=IV_RANGE                   ! ETA-component
            DC(i,0)=0.
            CF(i,0)=0.
            FC(i,0)=0.
          enddo
          do k=1,N,+1
            do i=IV_RANGE
              DC(i,k)=0.5*(Hz(i,j,k)+Hz(i,j-1,k))*dm_v(i,j)
              DC(i,0)=DC(i,0)+DC(i,k)
              CF(i,0)=CF(i,0)+DC(i,k)*v(i,j,k,nnew)
            enddo
          enddo
          do i=IV_RANGE
            DC(i,0)=1./DC(i,0)
            CF(i,0)=DC(i,0)*(CF(i,0)-DV_avg1(i,j))
            vbar(i,j,knew)=DC(i,0)*DV_avg1(i,j)
          enddo
          do k=N,1,-1
            do i=IV_RANGE
              v(i,j,k,nnew)=(v(i,j,k,nnew)-CF(i,0))
# ifdef MASKING
     &                                    *vmask(i,j)
# endif
# ifdef TR
              FC(i,k)=0.5*DC(i,k)*(v(i,j,k,nstp)+v(i,j,k,nnew))
# else
              FC(i,k)=0.656*Hvom(i,j,k) + 0.172*DC(i,k)
     &                            *(v(i,j,k,nstp)+v(i,j,k,nnew))
# endif
              FC(i,0)=FC(i,0)+FC(i,k)
            enddo
          enddo
          do i=IV_RANGE
            FC(i,0)=DC(i,0)*(FC(i,0)-DV_avg2(i,j))
          enddo
          do k=1,N,+1
            do i=IV_RANGE
              Hvom(i,j,k)=FC(i,k)-DC(i,k)*FC(i,0)
            enddo
          enddo
        endif
      enddo
# undef IU_RANGE
# undef JU_RANGE
# undef IV_RANGE
# undef JV_RANGE
!
! Exchange periodic boundaries and computational margins.
!
# if defined EW_PERIODIC || defined NS_PERIODIC || defined MPI
      call exchange_u3d_tile (istr,iend,jstr,jend,
     &                                 u(START_2D_ARRAY,1,nnew))
      call exchange_v3d_tile (istr,iend,jstr,jend,
     &                                 v(START_2D_ARRAY,1,nnew))
 
      call exchange_u3d_tile (istr,iend,jstr,jend, Huon)
      call exchange_v3d_tile (istr,iend,jstr,jend, Hvom)
 
      call exchange_u2d_tile (istr,iend,jstr,jend,
     &                                ubar(START_2D_ARRAY,knew))
      call exchange_v2d_tile (istr,iend,jstr,jend,
     &                                vbar(START_2D_ARRAY,knew))
# endif
      return
      end
#else
      subroutine step3d_uv_empty
      return
      end
#endif /* SOLVE3D */
 
