Source code for mlmc.sim.synth_simulation

import os
import ruamel.yaml as ruyaml
import numpy as np
from typing import List
import scipy.stats as stats
from mlmc.sim.simulation import Simulation
from mlmc.quantity.quantity_spec import QuantitySpec
from mlmc.level_simulation import LevelSimulation



[docs]
class SynthSimulation(Simulation):
    """
    Artificial (synthetic) simulation used for testing and examples.

    The simulation generates random samples from a specified distribution and
    optionally injects numerical error / NaN failures according to configuration.
    It implements the Simulation interface: provides `level_instance`, `calculate`,
    and `result_format` methods and a simple cost estimator `n_ops_estimate`.
    """

    n_nans = 0
    nan_fraction = 0
    len_results = 0
    result_dict = {}

    # Artificial simulation. Just random parameter + numerical error."""
    def __init__(self, config=None):
        """
        Initialize the synthetic simulation.

        :param config: Dict, optional configuration with keys:
                       - 'distr': a scipy.stats distribution object (default: stats.norm())
                       - 'complexity': exponent used for cost estimate (default: 2)
                       - 'nan_fraction': fraction of samples that should be returned as NaN
                       If config is None, a default normal distribution is used.
        """
        super().__init__()
        if config is None:
            config = dict(distr=stats.norm(), complexity=2)
        self.config = config

        # Static counters / settings used across instances
        SynthSimulation.n_nans = 0
        SynthSimulation.nan_fraction = config.get('nan_fraction', 0.0)
        SynthSimulation.len_results = 0

        # Indicates whether this simulation needs a workspace directory for samples
        self.need_workspace: bool = False


[docs]
    @staticmethod
    def sample_fn(x, h):
        """
        Compute a (noisy) synthetic sample value for given distribution samples.

        :param x: Distribution sample(s) (scalar or array-like).
        :param h: Simulation step (resolution parameter). Typically small positive float.
        :return: Computed sample(s). Introduces small h-dependent perturbation:
                 x + h * sqrt(1e-4 + |x|). This can produce outliers for certain x.
        """
        return x + h * np.sqrt(1e-4 + np.abs(x))



[docs]
    @staticmethod
    def sample_fn_no_error(x, h):
        """
        Compute a synthetic sample without introducing numerical error.

        :param x: Distribution sample(s) (scalar or array-like).
        :param h: Simulation step (ignored for this function).
        :return: The input sample(s) unchanged (identity mapping).
        """
        return x



[docs]
    def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
        """
        Create a LevelSimulation configured for a pair of fine/coarse level parameters.

        :param fine_level_params: List-like where the first element is the fine step size.
        :param coarse_level_params: List-like where the first element is the coarse step size.
        :return: LevelSimulation instance initialized with:
                 - config_dict containing 'fine.step', 'coarse.step', 'distr', and 'res_format'
                 - task_size estimated by n_ops_estimate(...)
        """
        config = dict()
        config["fine"] = {}
        config["coarse"] = {}
        config["fine"]["step"] = fine_level_params[0]
        config["coarse"]["step"] = coarse_level_params[0]
        config["distr"] = self.config["distr"]
        config["res_format"] = self.result_format()

        return LevelSimulation(config_dict=config, task_size=self.n_ops_estimate(fine_level_params[0]))



[docs]
    @staticmethod
    def generate_random_samples(distr, seed, size):
        """
        Draw random samples from the provided scipy distribution reproducibly.

        :param distr: scipy.stats distribution object (must support .rvs()).
        :param seed: Integer seed used to construct a RandomState for reproducibility.
        :param size: Number of samples to draw.
        :return: Tuple (fine_samples, coarse_samples). For this synthetic sim both are identical.
                 May return [np.nan] to simulate a failed sample according to nan_fraction.
        """
        SynthSimulation.len_results += 1
        distr.random_state = np.random.RandomState(seed)
        y = distr.rvs(size=size)

        # Inject NaN failures up to configured fraction
        if SynthSimulation.n_nans / (1e-10 + SynthSimulation.len_results) < SynthSimulation.nan_fraction:
            SynthSimulation.n_nans += 1
            y = [np.nan]

        return y, y



[docs]
    @staticmethod
    def calculate(config, seed):
        """
        Calculate fine and coarse samples and convert them to the expected result format.

        :param config: Dictionary containing simulation configuration (must include 'res_format',
                       'fine.step' and 'coarse.step' keys).
        :param seed: Integer RNG seed for reproducibility.
        :return: Tuple (fine_flat, coarse_flat) where both are 1D numpy arrays produced by
                 flattening the per-quantity/time/location arrays constructed below.
        :raises: Exception if any resulting sample contains NaN.
        """
        quantity_format = config["res_format"]

        # Generate base random values for fine and coarse (identical in this toy sim)
        fine_random, coarse_random = SynthSimulation.generate_random_samples(
            config["distr"],
            seed,
            np.prod(quantity_format[0].shape)
        )

        fine_step = config["fine"]["step"]
        coarse_step = config["coarse"]["step"]

        # Compute sample values for fine and coarse levels
        fine_result = SynthSimulation.sample_fn(fine_random, fine_step)

        if coarse_step == 0:
            coarse_result = np.zeros(len(fine_result))  # coarse = zero baseline if step==0
        else:
            coarse_result = SynthSimulation.sample_fn(coarse_random, coarse_step)

        # Fail hard if NaNs are present
        if np.any(np.isnan(fine_result)) or np.any(np.isnan(coarse_result)):
            raise Exception("result is nan")

        # Convert results into list-of-quantities × times × locations arrays and then flatten
        results = []
        for result in [fine_result, coarse_result]:
            quantities = []
            for quantity in quantity_format:
                if coarse_step == 0:
                    # replicate the same result for each location (coarse step 0 special case)
                    locations = np.array([result for _ in range(len(quantity.locations))])
                else:
                    # create simple distinct location-dependent arrays for demonstration
                    locations = np.array([result + i for i in range(len(quantity.locations))])
                # repeat across times
                times = np.array([locations for _ in range(len(quantity.times))])
                quantities.append(times)

            results.append(np.array(quantities))

        return results[0].flatten(), results[1].flatten()



[docs]
    def n_ops_estimate(self, step):
        """
        Estimate number of operations (cost) for a sample at given step size.

        :param step: Level step size (h).
        :return: Estimated operation count (float). Uses configured complexity exponent.
        """
        return (1 / step) ** self.config['complexity'] * np.log(max(1 / step, 2.0))



[docs]
    def result_format(self) -> List[QuantitySpec]:
        """
        Define the synthetic simulation's result format.

        :return: List[QuantitySpec] describing the shape, units, times and locations
                 for each reported quantity. This informs how `calculate` arranges
                 and flattens outputs.
        """
        spec1 = QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3], locations=['10', '20'])
        spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3], locations=['30', '40'])
        # Alternative examples with numeric locations (commented out)
        # spec1 = QuantitySpec(name="length", unit="m", shape=(2, 1), times=[1, 2, 3],
        #                      locations=[(1, 2, 3), (4, 5, 6)])
        # spec2 = QuantitySpec(name="width", unit="mm", shape=(2, 1), times=[1, 2, 3],
        #                      locations=[(7, 8, 9), (10, 11, 12)])
        return [spec1, spec2]






[docs]
class SynthSimulationWorkspace(SynthSimulation):
    """
    Synthetic simulation variant that requires a workspace (reads config from YAML).

    This subclass behaves like `SynthSimulation` but:
      - Reads distribution and nan_fraction from a YAML configuration file.
      - Declares `need_workspace = True` so sample files are written to/read from disk.
      - Supplies `common_files` (the YAML) to LevelSimulation so workspaces get that file.
    """

    n_nans = 0
    nan_fraction = 0
    len_results = 0
    result_dict = {}

    CONFIG_FILE = 'synth_sim_config.yaml'

    # Artificial simulation. Just random parameter + numerical error."""
    def __init__(self, config):
        """
        Initialize the workspace-capable synthetic simulation.

        :param config: Dict with at least:
            - "config_yaml": path to YAML configuration file (relative or absolute)
            Optionally may contain 'nan_fraction' as a fallback.
        """
        self.config_yaml = config["config_yaml"]

        # Reset static counters
        SynthSimulationWorkspace.n_nans = 0
        SynthSimulationWorkspace.nan_fraction = config.get('nan_fraction', 0.0)
        SynthSimulationWorkspace.len_results = 0

        # This simulation requires a workspace directory for sample execution
        self.need_workspace: bool = True


[docs]
    @staticmethod
    def sample_fn(x, h):
        """
        Compute a (noisy) synthetic sample value for given distribution samples.

        :param x: Distribution sample(s) (scalar or array-like).
        :param h: Simulation step (resolution parameter).
        :return: Computed sample(s): x + h * sqrt(1e-4 + |x|).
        """
        return x + h * np.sqrt(1e-4 + np.abs(x))



[docs]
    @staticmethod
    def sample_fn_no_error(x, h):
        """
        Identity sampling function (no added numerical error).

        :param x: Distribution sample(s).
        :param h: Simulation step (ignored).
        :return: x (unchanged).
        """
        return x



[docs]
    def level_instance(self, fine_level_params: List[float], coarse_level_params: List[float]) -> LevelSimulation:
        """
        Produce a LevelSimulation configured to use the YAML config as a common file.

        :param fine_level_params: list-like where first element is fine step size.
        :param coarse_level_params: list-like where first element is coarse step size.
        :return: LevelSimulation configured with:
                 - config_dict: containing 'fine.step', 'coarse.step', 'res_format'
                 - common_files: list containing the YAML path (so worker/workspace has it)
                 - task_size: small constant (1/job_weight) to simulate job weighting
                 - need_sample_workspace: True (this class requires workspace)
        """
        config = dict()
        config["fine"] = {}
        config["coarse"] = {}

        config["fine"]["step"] = fine_level_params[0]
        config["coarse"]["step"] = coarse_level_params[0]
        config["res_format"] = self.result_format()

        # Use a fixed job weight to keep task_size small (simulating many small jobs)
        job_weight = 20000

        return LevelSimulation(
            config_dict=config,
            common_files=[self.config_yaml],
            task_size=1.0 / job_weight,
            need_sample_workspace=self.need_workspace
        )



[docs]
    @staticmethod
    def generate_random_samples(distr, seed, size):
        """
        Draw random samples based on YAML-specified distribution names.

        This implementation currently supports only the string "norm" which
        maps to scipy.stats.norm(loc=1, scale=2). A NotImplementedError is raised
        for other distribution identifiers.

        :param distr: Either a string identifier (e.g. "norm") or a scipy distribution.
        :param seed: Integer RNG seed used to create a RandomState for reproducibility.
        :param size: Integer number of samples to draw.
        :return: Tuple (fine_samples, coarse_samples) — identical arrays for this toy sim.
                 May return [np.nan] to simulate a failed sample according to nan_fraction.
        """
        SynthSimulationWorkspace.len_results += 1

        if distr == "norm":
            distr = stats.norm(loc=1, scale=2)
        else:
            raise NotImplementedError("Other distributions are not implemented yet")

        distr.random_state = np.random.RandomState(seed)
        y = distr.rvs(size=size)

        # Inject NaN failure if configured fraction not yet reached
        if SynthSimulationWorkspace.n_nans / (1e-10 + SynthSimulationWorkspace.len_results) < SynthSimulationWorkspace.nan_fraction:
            SynthSimulationWorkspace.n_nans += 1
            y = [np.nan]

        return y, y



[docs]
    @staticmethod
    def calculate(config, seed):
        """
        Calculate fine and coarse samples (using values from the YAML config file).

        Workflow:
          1. Read YAML configuration (via _read_config) to get distribution and nan_fraction.
          2. Generate base random numbers (fine_random, coarse_random).
          3. Compute fine_result and coarse_result via sample functions.
          4. Assemble results into arrays shaped by res_format and flatten them.

        :param config: LevelSimulation.config_dict (must include 'res_format', 'fine.step', 'coarse.step').
        :param seed: Integer RNG seed.
        :return: Tuple (fine_flat, coarse_flat) — 1D numpy arrays produced by flattening quantities × times × locations.
        :raises: Exception if any computed result contains NaN.
        """
        # Load runtime YAML config (distribution name and nan_fraction)
        config_file = SynthSimulationWorkspace._read_config()
        SynthSimulationWorkspace.nan_fraction = config_file["nan_fraction"]

        quantity_format = config["res_format"]

        fine_random, coarse_random = SynthSimulationWorkspace.generate_random_samples(
            config_file["distr"], seed, np.prod(quantity_format[0].shape)
        )

        fine_step = config["fine"]["step"]
        coarse_step = config["coarse"]["step"]

        fine_result = SynthSimulation.sample_fn(fine_random, fine_step)

        if coarse_step == 0:
            coarse_result = np.zeros(len(fine_result))
        else:
            coarse_result = SynthSimulation.sample_fn(coarse_random, coarse_step)

        if np.any(np.isnan(fine_result)) or np.any(np.isnan(coarse_result)):
            raise Exception("result is nan")

        results = []
        for result in [fine_result, coarse_result]:
            quantities = []
            for quantity in quantity_format:
                if coarse_step == 0:
                    locations = np.array([result for _ in range(len(quantity.locations))])
                else:
                    locations = np.array([result + i for i in range(len(quantity.locations))])
                times = np.array([locations for _ in range(len(quantity.times))])
                quantities.append(times)
            results.append(np.array(quantities))

        return results[0].flatten(), results[1].flatten()



[docs]
    def n_ops_estimate(self, step):
        """
        Estimate a synthetic operation count for the workspace-enabled simulation.
        :param step: Level step size.
        :return: Estimated operation cost (float). Uses a fixed exponent of 2 here.
        """
        return (1 / step) ** 2 * np.log(max(1 / step, 2.0))


    @staticmethod
    def _read_config():
        """
        Read the YAML configuration file (CONFIG_FILE) from the current working directory.

        The YAML is parsed using ruamel.yaml and should contain keys expected by this class
        (e.g. "distr" and "nan_fraction").

        :return: Parsed configuration dictionary.
        :raises: IOError / FileNotFoundError if the YAML file is missing.
        """
        with open(os.path.join(os.getcwd(), SynthSimulationWorkspace.CONFIG_FILE)) as file:
            yaml = ruyaml.YAML(typ='rt')
            config = yaml.load(file)

        return config