ecmwf-ifs / loki

Freely programmable source-to-source translation for Fortran

Home Page:https://sites.ecmwf.int/docs/loki/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

No support for splitting of the driver into offloaded and not-offloaded part.

piotrows opened this issue · comments

It is impossible to compute part of the driver on the device and part of the host, since the acc exit data is always placed at the end of the file. Thus, it is not accessible to potential diagnostics or other operations that by design need to be performed on the hoist. The possible solution seem to be to introduce the Loki directive that would order AnnotateTransformation to finalise the offloaded program region.

# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

import pytest

from loki import (
    Subroutine, Dimension, fgen, Sourcefile, SubroutineItem
)
from transformations import (
    SCCDevectorTransformation, SCCHoistTransformation,
    SCCDemoteTransformation, SCCAnnotateTransformation,
)
from conftest import available_frontends
#pylint: disable=too-many-lines

@pytest.fixture(scope='module', name='horizontal')
def fixture_horizontal():
    return Dimension(name='horizontal', size='nlon', index='jl', bounds=('start', 'end'))


@pytest.fixture(scope='module', name='vertical')
def fixture_vertical():
    return Dimension(name='vertical', size='nz', index='jk')


@pytest.fixture(scope='module', name='blocking')
def fixture_blocking():
    return Dimension(name='blocking', size='nb', index='b')



@pytest.mark.parametrize('frontend', available_frontends())
def test_scc_hoist_multiple_kernels(frontend, horizontal, vertical, blocking):
    """
    Test hoisting of column temporaries to "driver" level.
    """

    fcode_driver = """
  SUBROUTINE column_driver(nlon, nz, q, nb)
    INTEGER, INTENT(IN)   :: nlon, nz, nb  ! Size of the horizontal and vertical
    REAL, INTENT(INOUT)   :: q(nlon,nz,nb)
    REAL :: talt(nlon,nz)
    INTEGER :: b, start, end

    start = 1
    end = nlon
    do b=1, nb
      call compute_column(start, end, nlon, nz, q(:,:,b))
    end do

     call compute_error(start, end, nlon, nz, q(:,:,1))
  END SUBROUTINE column_driver
"""

    fcode_kernel2 = """
  SUBROUTINE compute_error(start, end, nlon, nz, q)
    INTEGER, INTENT(IN) :: start, end  ! Iteration indices
    INTEGER, INTENT(IN) :: nlon, nz    ! Size of the horizontal and vertical
    REAL, INTENT(INOUT) :: q(nlon,nz)
    INTEGER :: jl, jk
    REAL :: c,d
    d = 0
    DO jk = 1, nz
      c = SUM(q(1:nlon,jk))
      d = d+c
    END DO
  END SUBROUTINE compute_error
"""
    fcode_kernel = """
  SUBROUTINE compute_column(start, end, nlon, nz, q)
    INTEGER, INTENT(IN) :: start, end  ! Iteration indices
    INTEGER, INTENT(IN) :: nlon, nz    ! Size of the horizontal and vertical
    REAL, INTENT(INOUT) :: q(nlon,nz)
    REAL :: t(nlon,nz)
    INTEGER :: jl, jk
    REAL :: c

    c = 5.345
    DO jk = 2, nz
      DO jl = start, end
        t(jl, jk) = c * k
        q(jl, jk) = q(jl, jk-1) + t(jl, jk) * c
      END DO
    END DO

    ! The scaling is purposefully upper-cased
    DO JL = START, END
      Q(JL, NZ) = Q(JL, NZ) * C
    END DO
  END SUBROUTINE compute_column
"""
    driver_item = SubroutineItem(name='#column_driver',
                                 source=Sourcefile.from_source(fcode_driver, frontend=frontend))
    driver0_item = SubroutineItem(name='#column_driver',
                                 source=Sourcefile.from_source(fcode_driver, frontend=frontend))
    kernel = Subroutine.from_source(fcode_kernel, frontend=frontend)
    kernel2 = Subroutine.from_source(fcode_kernel2, frontend=frontend)
    driver = Subroutine.from_source(fcode_driver, frontend=frontend)
    driver.enrich_calls(kernel)  # Attach kernel source to driver call
    driver.enrich_calls(kernel2)  # Attach kernel source to driver call

    driver0 = Subroutine.from_source(fcode_driver, frontend=frontend)
    driver0.enrich_calls(kernel)  # Attach kernel source to driver call

    scc_transform = (SCCDevectorTransformation(horizontal=horizontal),)
    scc_transform += (SCCDemoteTransformation(horizontal=horizontal),)
    scc_transform += (SCCHoistTransformation(horizontal=horizontal, vertical=vertical,
                                             block_dim=blocking),)
    scc_transform += (SCCAnnotateTransformation(horizontal=horizontal, vertical=vertical,
                                                directive='openacc',
                                                block_dim=blocking, hoist_column_arrays=True),)
    for transform in scc_transform:
        transform.apply(driver0, role='driver',
                        targets=['compute_column'],
                        item=driver0_item)
        transform.apply(kernel, role='kernel')
    print("*******First scenario: \"diagnostic\" kernel is not processed by Loki")
    print(fgen(driver0))
    print("*******Exit data placed after \"diagnostic\" kernel call, so it can't access data while being executed on host")
    print("*******Second scenario: \"diagnostic\" kernel is processed by Loki, but lacks horizontal loop by design. Transformation unsuccessful")
    for transform in scc_transform:
        transform.apply(driver, role='driver',
                        targets=['compute_column','compute_error'],
                        item=driver_item)
        transform.apply(kernel, role='kernel')
        transform.apply(kernel2, role='kernel')
    assert False