File could not be opened for reading with mpi and FileStorage

Question

File could not be opened for reading with mpi and FileStorage

yohad opened this issue 2 years ago · comments

Hello, I have a simulation I run with mpiexec -n 16 python py_pde_test.py

every other time I run it I get the error File data\storage-1108_191954\ could not be opened for reading from some of the mpi threads.

The code:

import numpy as np
import numba as nb
import pde
from pde.tools import mpi
from pde.solvers import Controller, ExplicitMPISolver
import time


class ModelPDE(pde.PDEBase):
    def __init__(self, bc="auto_periodic_neumann", nu=10 / 3, eta=3.5, rho=0.95, gamma=50 / 3, delta_b=1 / 30,
                 delta_w=10 / 3, delta_h=1000 / 3, a=33.33, q=0.05, f=0.1, p=0.5):
        self._nu = nu
        self._eta = eta
        self._rho = rho
        self._gamma = gamma
        self._delta_b = delta_b
        self._delta_w = delta_w
        self._delta_h = delta_h
        self._a = a
        self._q = q
        self._f = f
        self._p = p

        self._bc = bc

    def _make_pde_rhs_numba(self, state):
        a = self._a
        q = self._q
        f = self._f
        eta = self._eta
        nu = self._nu
        rho = self._rho
        gamma = self._gamma
        delta_b = self._delta_b
        delta_w = self._delta_w
        delta_h = self._delta_h
        p = self._p

        laplace = state.grid.make_operator("laplace", bc=self._bc)
        div = state.grid.make_operator("divergence", bc=self._bc)
        grad = state.grid.make_operator("gradient", bc=self._bc)

        zeta = np.fromfunction(lambda x, _: x/32, state.grid.shape) # TODO: this is bad

        @nb.jit
        def pde_rhs(state_data, t):
            # state
            b = state_data[0]
            w = state_data[1]
            h = state_data[2]

            rate = np.empty_like(state_data)
            # Calculate constants
            I = a * (b + q * f) / (b + q)  # Infiltration Rate
            L2 = np.float_power(1 + eta * b, 2)
            Gb = nu * w * L2
            Gw = gamma * b * L2

            # Calculate time derivatives
            rate[0] = Gb * b * (1 - b) - b + delta_b * laplace(b)
            rate[1] = I * h - nu * (1 - rho * b) * w - Gw * w + delta_w * laplace(w)

            J = -2*delta_h*h*grad(h+zeta)
            rate[2] = p - I * h - div(J)

            return rate

        return pde_rhs

    def evolution_rate(self, state: pde.FieldBase, t: float = 0) -> pde.FieldBase:
        b = state[0]
        w = state[1]
        h = state[2]

        zeta = np.fromfunction(lambda x, _: x/32, state.grid.shape) # TODO: this is bad

        # Calculate constants
        I = self._a * (b + self._q * self._f) / (b + self._q)  # Infiltration Rate
        L2 = np.float_power(1 + self._eta * b, 2)
        Gb = self._nu * w * L2
        Gw = self._gamma * b * L2

        # Calculate time derivatives
        b_t = Gb * b * (1 - b) - b + self._delta_b * b.laplace(bc=self._bc)
        w_t = I * h - self._nu * (1 - self._rho * b) * w - Gw * w + self._delta_w * w.laplace(bc=self._bc)

        J = -2*self._delta_h*h *(h + zeta).gradient(bc=self._bc)
        h_t = self._p - I * h - J.divergence(bc=self._bc)

        return pde.FieldCollection([b_t, w_t, h_t])


def terrain(coords):
    return coords[:, :, 0] / 32

def main():
    shape = (128, 128)
    # grid = pde.UnitGrid([128, 128], periodic=[True, True])
    grid = pde.CartesianGrid([(0, 32), (0, 32)], shape, periodic=[True, False])
    b = pde.ScalarField(grid, 1)
    w = pde.ScalarField(grid, 0)
    h = pde.ScalarField(grid, 0)
    state = pde.FieldCollection([b, w, h])

    bc_x = "periodic"
    bc_y = [{"value": 0} ,{"curvature": 0}]
    bc = [bc_x, bc_y]

    years = 2

    t = time.localtime()
    timestamp = time.strftime("%m%d_%H%M%S", t)
    BACKUP_NAME = "data\storage-" + timestamp
    storage = pde.FileStorage(BACKUP_NAME, write_mode="append")

    eq = ModelPDE(p=0.4, bc=bc)

    solver = ExplicitMPISolver(eq, backend="numba")
    controller = Controller(solver, t_range=years, tracker=["progress", storage.tracker(1e-1)])
    sol = controller.run(state, dt=1e-4)
    if mpi.is_main:
        VIDEO_NAME = f"results\movie-{timestamp}.mp4"
        pde.movie(storage, filename=VIDEO_NAME, plot_args={"vmin": 0, "vmax": 1})


if __name__ == '__main__':
    main()

David Zwicker · Answer 1 · Wed Nov 09 2022 17:25:22 GMT+0800 (China Standard Time)

Thank you for the report! I can't quite reproduce the problem, but given that you also don't see the problem all the time, this might not be surprising. In any case, I did not carefully check how the FileStorage interacts with the ExplicitMPISolver, so it is entirely possible that there are race conditions or other IO related problems.

To circumvent this, I just implemented a check that simply does not touch files unless we're on the main process. Could you please check whether the problem still occurs with the latest master branch?

Yotam Ohad · Answer 2 · Wed Nov 09 2022 19:41:32 GMT+0800 (China Standard Time)

This seems to have solved the problem, thanks 😄