跳转至

Validate(评估) 模块

ppsci.validate

Validator

Base class for validators.

Parameters:

Name Type Description Default
dataset Dataset

Dataset for validator.

required
dataloader_cfg Dict[str, Any]

Dataloader config.

required
loss Loss

Loss functor.

required
metric Optional[Dict[str, Metric]]

Named metric functors in dict.

required
name str

Name of validator.

required
Source code in ppsci/validate/base.py
class Validator:
    """Base class for validators.

    Args:
        dataset (io.Dataset): Dataset for validator.
        dataloader_cfg (Dict[str, Any]): Dataloader config.
        loss (loss.Loss): Loss functor.
        metric (Optional[Dict[str, metric.Metric]]): Named metric functors in dict.
        name (str): Name of validator.
    """

    def __init__(
        self,
        dataset: io.Dataset,
        dataloader_cfg: Dict[str, Any],
        loss: "loss.Loss",
        metric: Optional[Dict[str, "metric.Metric"]],
        name: str,
    ):
        self.data_loader = data.build_dataloader(dataset, dataloader_cfg)
        self.data_iter = iter(self.data_loader)
        self.loss = loss
        self.metric = metric
        self.name = name

    def __str__(self):
        return ", ".join(
            [
                self.__class__.__name__,
                f"name = {self.name}",
                f"input_keys = {self.input_keys}",
                f"output_keys = {self.output_keys}",
                f"output_expr = {self.output_expr}",
                f"label_dict = {self.label_dict}",
                f"len(dataloader) = {len(self.data_loader)}",
                f"loss = {self.loss}",
                f"metric = {list(self.metric.keys())}",
            ]
        )

GeometryValidator

Bases: Validator

Validator for geometry.

Parameters:

Name Type Description Default
output_expr Dict[str, Callable]

Function in dict for computing output. e.g. {"u_mul_v": lambda out: out["u"] * out["v"]} means the model output u will be multiplied by model output v and the result will be named "u_mul_v".

required
label_dict Dict[str, Union[float, Callable]]

Function in dict for computing label, which will be a reference value to participate in the loss calculation.

required
geom Geometry

Geometry where data sampled from.

required
dataloader_cfg Dict[str, Any]

Dataloader config.

required
loss Loss

Loss functor.

required
random Literal['pseudo', 'Halton', 'LHS']

Random method for sampling data in geometry. Defaults to "pseudo".

'pseudo'
criteria Optional[Callable]

Criteria for refining specified domain. Defaults to None.

None
evenly bool

Whether to use evenly distribution sampling. Defaults to False.

False
metric Optional[Dict[str, Metric]]

Named metric functors in dict. Defaults to None.

None
with_initial bool

Whether the data contains time t0. Defaults to False.

False
name Optional[str]

Name of validator. Defaults to None.

None

Examples:

>>> import ppsci
>>> rect = ppsci.geometry.Rectangle((0, 0), (1, 1))
>>> geom_validator = ppsci.validate.GeometryValidator(
...     {"u": lambda out: out["u"]},
...     {"u": 0},
...     rect,
...     {
...         "dataset": "IterableNamedArrayDataset",
...         "iters_per_epoch": 1,
...         "total_size": 32,
...         "batch_size": 16,
...     },
...     ppsci.loss.MSELoss("mean"),
... )
Source code in ppsci/validate/geo_validator.py
class GeometryValidator(base.Validator):
    """Validator for geometry.

    Args:
        output_expr (Dict[str, Callable]): Function in dict for computing output.
            e.g. {"u_mul_v": lambda out: out["u"] * out["v"]} means the model output u
            will be multiplied by model output v and the result will be named "u_mul_v".
        label_dict (Dict[str, Union[float, Callable]]): Function in dict for computing
            label, which will be a reference value to participate in the loss calculation.
        geom (geometry.Geometry): Geometry where data sampled from.
        dataloader_cfg (Dict[str, Any]): Dataloader config.
        loss (loss.Loss): Loss functor.
        random (Literal["pseudo", "Halton", "LHS"], optional): Random method for sampling data in
            geometry. Defaults to "pseudo".
        criteria (Optional[Callable]): Criteria for refining specified domain. Defaults to None.
        evenly (bool, optional): Whether to use evenly distribution sampling. Defaults to False.
        metric (Optional[Dict[str, metric.Metric]]): Named metric functors in dict. Defaults to None.
        with_initial (bool, optional): Whether the data contains time t0. Defaults to False.
        name (Optional[str]): Name of validator. Defaults to None.

    Examples:
        >>> import ppsci
        >>> rect = ppsci.geometry.Rectangle((0, 0), (1, 1))
        >>> geom_validator = ppsci.validate.GeometryValidator(
        ...     {"u": lambda out: out["u"]},
        ...     {"u": 0},
        ...     rect,
        ...     {
        ...         "dataset": "IterableNamedArrayDataset",
        ...         "iters_per_epoch": 1,
        ...         "total_size": 32,
        ...         "batch_size": 16,
        ...     },
        ...     ppsci.loss.MSELoss("mean"),
        ... )
    """

    def __init__(
        self,
        output_expr: Dict[str, Callable],
        label_dict: Dict[str, Union[float, Callable]],
        geom: geometry.Geometry,
        dataloader_cfg: Dict[str, Any],
        loss: loss.Loss,
        random: Literal["pseudo", "Halton", "LHS"] = "pseudo",
        criteria: Optional[Callable] = None,
        evenly: bool = False,
        metric: Optional[Dict[str, metric.Metric]] = None,
        with_initial: bool = False,
        name: Optional[str] = None,
    ):
        self.output_expr = output_expr
        self.label_dict = label_dict
        self.input_keys = geom.dim_keys
        self.output_keys = tuple(label_dict.keys())

        nx = dataloader_cfg["total_size"]
        self.num_timestamps = 1
        # TODO(sensen): Simplify code below
        if isinstance(geom, geometry.TimeXGeometry):
            if geom.timedomain.num_timestamps is not None:
                if with_initial:
                    # include t0
                    self.num_timestamps = geom.timedomain.num_timestamps
                    assert (
                        nx % self.num_timestamps == 0
                    ), f"{nx} % {self.num_timestamps} != 0"
                    nx //= self.num_timestamps
                    input = geom.sample_interior(
                        nx * (geom.timedomain.num_timestamps - 1),
                        random,
                        criteria,
                        evenly,
                    )
                    initial = geom.sample_initial_interior(nx, random, criteria, evenly)
                    input = {
                        key: np.vstack((initial[key], input[key])) for key in input
                    }
                else:
                    # exclude t0
                    self.num_timestamps = geom.timedomain.num_timestamps - 1
                    assert (
                        nx % self.num_timestamps == 0
                    ), f"{nx} % {self.num_timestamps} != 0"
                    nx //= self.num_timestamps
                    input = geom.sample_interior(
                        nx * (geom.timedomain.num_timestamps - 1),
                        random,
                        criteria,
                        evenly,
                    )
            else:
                raise NotImplementedError(
                    "TimeXGeometry with random timestamp not implemented yet."
                )
        else:
            input = geom.sample_interior(nx, random, criteria, evenly)

        label = {}
        for key, value in label_dict.items():
            if isinstance(value, (int, float)):
                label[key] = np.full_like(next(iter(input.values())), value)
            elif isinstance(value, sympy.Basic):
                func = sympy.lambdify(
                    sympy.symbols(geom.dim_keys),
                    value,
                    [{"amax": lambda xy, _: np.maximum(xy[0], xy[1])}, "numpy"],
                )
                label[key] = func(
                    **{k: v for k, v in input.items() if k in geom.dim_keys}
                )
            elif callable(value):
                func = value
                label[key] = func(input)
                if isinstance(label[key], (int, float)):
                    label[key] = np.full(
                        (next(iter(input.values())).shape[0], 1),
                        label[key],
                        paddle.get_default_dtype(),
                    )
            else:
                raise NotImplementedError(f"type of {type(value)} is invalid yet.")

        weight = {key: np.ones_like(next(iter(label.values()))) for key in label}

        _dataset = getattr(dataset, dataloader_cfg["dataset"])(input, label, weight)
        super().__init__(_dataset, dataloader_cfg, loss, metric, name)

SupervisedValidator

Bases: Validator

Validator for supervised models.

Parameters:

Name Type Description Default
dataloader_cfg Dict[str, Any]

Config of building a dataloader.

required
loss Loss

Loss functor.

required
output_expr Optional[Dict[str, Callable]]

List of label expression.

None
metric Optional[Dict[str, Metric]]

Named metric functors in dict. Defaults to None.

None
name Optional[str]

Name of validator. Defaults to None.

None

Examples:

>>> import ppsci
>>> valid_dataloader_cfg = {
...     "dataset": {
...         "name": "MatDataset",
...         "file_path": "/path/to/file.mat",
...         "input_keys": ("t_f",),
...         "label_keys": ("eta", "f"),
...     },
...     "batch_size": 32,
...     "sampler": {
...         "name": "BatchSampler",
...         "drop_last": False,
...         "shuffle": False,
...     },
... }
>>> eta_mse_validator = ppsci.validate.SupervisedValidator(
...     valid_dataloader_cfg,
...     ppsci.loss.MSELoss("mean"),
...     {"eta": lambda out: out["eta"]},
...     metric={"MSE": ppsci.metric.MSE()},
...     name="eta_mse",
... )
Source code in ppsci/validate/sup_validator.py
class SupervisedValidator(base.Validator):
    """Validator for supervised models.

    Args:
        dataloader_cfg (Dict[str, Any]): Config of building a dataloader.
        loss (loss.Loss): Loss functor.
        output_expr (Optional[Dict[str, Callable]]): List of label expression.
        metric (Optional[Dict[str, metric.Metric]]): Named metric functors in dict. Defaults to None.
        name (Optional[str]): Name of validator. Defaults to None.

    Examples:
        >>> import ppsci
        >>> valid_dataloader_cfg = {
        ...     "dataset": {
        ...         "name": "MatDataset",
        ...         "file_path": "/path/to/file.mat",
        ...         "input_keys": ("t_f",),
        ...         "label_keys": ("eta", "f"),
        ...     },
        ...     "batch_size": 32,
        ...     "sampler": {
        ...         "name": "BatchSampler",
        ...         "drop_last": False,
        ...         "shuffle": False,
        ...     },
        ... }  # doctest: +SKIP
        >>> eta_mse_validator = ppsci.validate.SupervisedValidator(
        ...     valid_dataloader_cfg,
        ...     ppsci.loss.MSELoss("mean"),
        ...     {"eta": lambda out: out["eta"]},
        ...     metric={"MSE": ppsci.metric.MSE()},
        ...     name="eta_mse",
        ... )  # doctest: +SKIP
    """

    def __init__(
        self,
        dataloader_cfg: Dict[str, Any],
        loss: loss.Loss,
        output_expr: Optional[Dict[str, Callable]] = None,
        metric: Optional[Dict[str, metric.Metric]] = None,
        name: Optional[str] = None,
    ):
        self.output_expr = output_expr

        # build dataset
        _dataset = dataset.build_dataset(dataloader_cfg["dataset"])

        self.input_keys = _dataset.input_keys
        self.output_keys = (
            tuple(output_expr.keys())
            if output_expr is not None
            else _dataset.label_keys
        )

        if self.output_expr is None:
            self.output_expr = {
                key: lambda out, k=key: out[k] for key in self.output_keys
            }

        # construct dataloader with dataset and dataloader_cfg
        super().__init__(_dataset, dataloader_cfg, loss, metric, name)

    def __str__(self):
        return ", ".join(
            [
                self.__class__.__name__,
                f"name = {self.name}",
                f"input_keys = {self.input_keys}",
                f"output_keys = {self.output_keys}",
                f"output_expr = {self.output_expr}",
                f"len(dataloader) = {len(self.data_loader)}",
                f"loss = {self.loss}",
                f"metric = {list(self.metric.keys())}",
            ]
        )