Source code for mybigdft.iofiles.logfiles

r"""
The :class:`Logfile` class is the base class you want to be
using all the time. It is mainly meant to manipulate the output of a BigDFT
calculation (written using the YAML format).

However, it might happen that the output actually contains many documents (for
instance, one document per geometry optimization procedure). In such cases,
the initialization of a ``Logfile`` actually gives another type of object,
deriving from the :class:`MultipleLogfile` class. Be careful:
these objects behave as a list of ``Logfile`` instances, not as a ``Logfile``
instance (even though they are initialized *via* the ``Logfile`` class).
To keep the same example as above, the output file of a geometry optimization
calculation can be read via the :meth:`Logfile.from_file`
method of the ``Logfile`` class, returning a
:class:`GeoptLogfile` instance.
"""

from __future__ import print_function
import warnings
from collections import Sequence, Mapping
from copy import deepcopy
import yaml

try:
    from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:  # pragma: no cover
    from yaml import Loader, Dumper
import numpy as np
from mybigdft.globals import INPUT_PARAMETERS_DEFINITIONS
from .inputparams import InputParams, clean
from .posinp import Posinp


__all__ = ["Logfile", "MultipleLogfile", "GeoptLogfile"]


PATHS = "paths"
DOC = "doc"
LAST_GROUND_STATE_OPTIMIZATION = ["Ground State Optimization", -1]
LAST_SUBSPACE_OPTIMIZATION = LAST_GROUND_STATE_OPTIMIZATION + [
    "Hamiltonian Optimization",
    -1,
    "Subspace Optimization",
]


def _get_value_from_last_optimization(key):
    r"""
    """
    l_1 = deepcopy(LAST_GROUND_STATE_OPTIMIZATION)
    l_1.append(key)
    l_2 = deepcopy(LAST_SUBSPACE_OPTIMIZATION)
    l_2.append(key)
    return [l_1, l_2]


ATTRIBUTES = {
    "n_at": {
        PATHS: [["Atomic System Properties", "Number of atoms"]],
        DOC: "Number of Atoms",
    },
    "boundary_conditions": {
        PATHS: [["Atomic System Properties", "Boundary Conditions"]],
        DOC: "Boundary Conditions",
    },
    "cell": {PATHS: [["Atomic System Properties", "Box Sizes (AU)"]], DOC: "Cell size"},
    "symmetry": {
        PATHS: [["Atomic System Properties", "Space group"]],
        DOC: "Symmetry group",
    },
    "atom_types": {
        PATHS: [["Atomic System Properties", "Types of atoms"]],
        DOC: "List of the atomic types present in the posinp",
    },
    "energy": {
        PATHS: [
            ["Last Iteration", "FKS"],
            ["Last Iteration", "EKS"],
            ["Energy (Hartree)"],
        ],
        DOC: "Energy (Hartree)",
    },
    "astruct": {PATHS: [["Atomic structure"]], DOC: "Atomic structure"},
    "evals": {
        PATHS: [["Complete list of energy eigenvalues"]]
        + _get_value_from_last_optimization("Orbitals"),
        DOC: "Orbital energies and occupations",
    },
    "fermi_level": {
        PATHS: _get_value_from_last_optimization("Fermi Energy"),
        DOC: "Fermi level",
    },
    "magnetization": {
        PATHS: _get_value_from_last_optimization("Total magnetization"),
        DOC: "Total magnetization of the system",
    },
    "kpt_mesh": {PATHS: [["kpt", "ngkpt"]], DOC: "No. of Monkhorst-Pack grid points"},
    "kpts": {PATHS: [["K points"]], DOC: "Grid of k-points"},
    "gnrm_cv": {
        PATHS: [["dft", "gnrm_cv"]],
        DOC: "Convergence criterion on wavefunction residue",
    },
    "forcemax_cv": {
        PATHS: [["geopt", "forcemax"]],
        DOC: "Convergence criterion on forces",
    },
    "forcemax": {
        PATHS: [
            ["Geometry", "FORCES norm(Ha/Bohr)", "maxval"],
            ["Clean forces norm (Ha/Bohr)", "maxval"],
        ],
        DOC: "Maximum value of forces",
    },
    "pressure": {PATHS: [["Pressure", "GPa"]], DOC: "Pressure (GPa)"},
    "dipole": {
        PATHS: [["Electric Dipole Moment (AU)", "P vector"]],
        DOC: "Electric Dipole Moment (AU)",
    },
    "forces": {PATHS: [["Atomic Forces (Ha/Bohr)"]], DOC: "Atomic Forces (Ha/Bohr)"},
    "force_fluct": {
        PATHS: [["Geometry", "FORCES norm(Ha/Bohr)", "fluct"]],
        DOC: "Threshold fluctuation of Forces",
    },
    "support_functions": {
        PATHS: [
            ["Gross support functions moments", "Multipole coefficients", "values"]
        ],
        DOC: "Support functions",
    },
    "electrostatic_multipoles": {
        PATHS: [["Multipole coefficients", "values"]],
        DOC: "Electrostatic multipoles",
    },
    "sdos": {PATHS: [["SDos files"]], DOC: "SDos files"},
    "walltime": {
        PATHS: [["Walltime since initialization"]],
        DOC: "Walltime since initialization",
    },
    "WARNINGS": {PATHS: [["WARNINGS"]], DOC: "Warnings raised during the BigDFT run"},
}


[docs]class Logfile(Mapping):
    r"""
    Class allowing to initialize, read, write and interact with an
    output file of a BigDFT calculation.
    """

    def __init__(self, log=None):
        r"""
        Parameters
        ----------
        log : dict
            Output of the BigDFT code as a yaml dictionary.
        """
        if log is None:
            log = {}
        self._log = log
        self._set_builtin_attributes()
        self._clean_attributes()
        # It might happen that a geopt calculation has a step whose log
        # looks like it is incomplete, but they are actually acceptable.
        # To that end, we look for a specific warning message in one of
        # the logs (except the initial one). To make sure the current
        # log is not the initial one, we look for the "geopt" key, which
        # is not repreated in the subsequent logs. It's a workaround
        # which might prove edgy in the future.
        if "geopt" not in self.log:
            avoidable_warning = (
                "The norm of the residue is too large, need "
                "to recalculate input wavefunctions"
            )
            warnings = log.get("WARNINGS")
            acceptable_though_incomplete = (
                warnings is not None and avoidable_warning in warnings
            )
        else:
            acceptable_though_incomplete = False
        # Check if the logfile is incomplete
        if (
            self.log != {}
            and not acceptable_though_incomplete
            and (self.energy is None and self.forces is None and self.walltime is None)
        ):
            raise ValueError("The logfile is incomplete!")
        params = {key: log.get(key) for key in INPUT_PARAMETERS_DEFINITIONS}
        params = clean(params)
        self._inputparams = InputParams(params=params)
        self._posinp = self.inputparams.posinp
        self._check_warnings()

    def _set_builtin_attributes(self):
        r"""
        Set all the base attributes of a BigDFT Logfile.

        They are defined by the ATTRIBUTES dictionary, whose keys are
        the base name of each attribute, the values being the
        description of the attribute as another dictionary.

        Once retrieved from the logfile, the attributes are set under
        their base name preceded by an underscore (e.g., the number of
        atoms read thanks to the `n_at` key of ATTRIBUTES is finally
        stored as the attribute `_n_at` of the Logfile instance).
        This extra underscore is meant to prevent the user from updating
        the value of the attribute.
        """
        for name, description in ATTRIBUTES.items():
            # Loop over the various paths (or logfile levels) where the
            # value might be stored.
            for path in description[PATHS]:
                # Loop over the different levels of the logfile to
                # retrieve the value
                value = self  # Always start from the bare logfile
                for key in path:
                    try:
                        value = value.get(key)  # value can be a dict
                    except AttributeError:
                        try:
                            value = value[key]  # value can be a list
                        except TypeError:
                            # This path leads to a dead-end: set a
                            # default value before moving to the next
                            # possible path.
                            value = None
                            continue
                if value is not None:
                    # A value was found: no need to look for other paths
                    break
            # Set the value to the underscored attribute
            setattr(self, "_" + name, value)
            # Set the attribute as a property
            setattr(
                self.__class__,
                name,
                property(self._init_getter(name), doc=description.get(DOC, "")),
            )

    def _init_getter(self, name):
        def getter(self):
            return getattr(self, "_" + name)

        return getter

    def _clean_attributes(self):
        r"""
        Clean the value of the built-in attributes.
        """
        if self.boundary_conditions is not None:
            self._boundary_conditions = self._boundary_conditions.lower()
        # Make the forces as a numpy array of shape (n_at, 3)
        if self.forces is not None:
            new_forces = np.array([])
            for force in self.forces:
                new_forces = np.append(new_forces, list(force.values())[0])
            n_at = len(self.forces)
            new_forces = new_forces.reshape((n_at, 3))
            self._forces = new_forces

    def _check_warnings(self):
        r"""
        Warns
        -----
        UserWarning
            If there are some warnings in the Logfile or if the XC of
            the pseudo-potentials do not match those of the input
            parameters.
        """
        if self.WARNINGS is not None:
            for message in self.WARNINGS:
                if isinstance(message, dict):
                    # It might happen that a ":" symbol is in the
                    # description of a warning, hence it is decoded as a
                    # dictionary; make sure to treat it as a string
                    # instead
                    key, value = list(message.items())[0]
                    message = "{}: {}".format(key, value)
                elif not isinstance(message, str):  # pragma: no cover
                    print("MyBigDFT: weird error message found")
                    message = str(message)
                warnings.warn(message, UserWarning)
        self._check_psppar()

    def _check_psppar(self):
        r"""
        Warns
        -----
        UserWarning
            If the XC of the potential is different from the XC of the
            input parameters.
        """
        if self.atom_types is not None:
            for atom_type in self.atom_types:
                psp = "psppar.{}".format(atom_type)
                psp_ixc = self[psp]["Pseudopotential XC"]
                inp_ixc = self["dft"]["ixc"]
                if psp_ixc != inp_ixc:
                    warnings.warn(
                        "The XC of pseudo potentials ({}) is different from "
                        "the input XC ({}) for the '{}' atoms".format(
                            psp_ixc, inp_ixc, atom_type
                        ),
                        UserWarning,
                    )

[docs]    @classmethod
    def from_file(cls, filename):
        r"""
        Initialize the Logfile from a file on disk.

        Parameters
        ----------
        filename : str
            Name of the logfile.

        Returns
        -------
        Logfile or GeoptLogfile or MultipleLogfile
            Logfile initialized from a file on disk.


        >>> log = Logfile.from_file("tests/log.yaml")
        >>> print(log.posinp)
        2   angstroem
        free
        N   2.97630782434901e-23   6.87220595204354e-23   0.0107161998748779
        N  -1.10434491945017e-23  -4.87342174483075e-23   1.10427379608154
        <BLANKLINE>
        >>> log.energy
        -19.884659235401838
        """
        with open(filename, "r") as stream:
            return cls.from_stream(stream)

[docs]    @classmethod
    def from_stream(cls, stream):
        r"""
        Initialize the Logfile from a stream.

        Parameters
        ----------
        stream
            Logfile as a stream.

        Returns
        -------
        Logfile or GeoptLogfile or MultipleLogfile
            Logfile initialized from a stream.
        """
        # The logfile might contain multiple documents
        docs = yaml.load_all(stream, Loader=Loader)
        logs = [cls(doc) for doc in docs]
        if len(logs) == 1:
            # If only one document, return a Logfile instance
            return logs[0]
        else:
            if logs[0].inputparams["geopt"] is not None:
                # If the logfile corresponds to a geopt calculation,
                # return a GeoptLogfile instance
                return GeoptLogfile(logs)
            else:
                warnings.warn(
                    "More than one document found in the logfile!", UserWarning
                )
                # In other cases, just return a MultipleLogfile instance
                return MultipleLogfile(logs)

    @property
    def log(self):
        r"""
        Returns
        -------
        Logfile
            Yaml dictionary of the output of the BigDFT code.
        """
        return self._log

[docs]    def __dir__(self):
        r"""
        The base attributes are not found when doing `dir()` on a
        `Logfile` instance, but their counterpart with a preceding
        underscore is. What is done here is a removal of the underscored
        names, replaced by the bare names (in order to avoid name
        repetition).

        The bare attributes still behave as properties, while their
        value might be updated via the underscored attribute.
        """
        hidden_attributes = list(ATTRIBUTES.keys())
        try:  # pragma: no cover
            base_dir = super(Logfile, self).__dir__()  # Python3
        except AttributeError:  # pragma: no cover
            base_dir = dir(super(Logfile, self))  # Python2
            # Add the missing stuff
            base_dir += [
                "write",
                "log",
                "from_file",
                "from_stream",
                "posinp",
                "values",
                "keys",
                "get",
                "items",
                "inputparams",
                "_check_psppar",
                "_check_warnings",
                "_clean_attributes",
                "_set_builtin_attributes",
                "_init_getter",
            ]
            base_dir += hidden_attributes
        # Always remove the underscored attributes (so that there are less
        # elements in the returned list)
        for name in hidden_attributes:
            base_dir.remove("_" + name)
        return base_dir

    def __getitem__(self, key):
        return self.log[key]

    def __iter__(self):
        return iter(self.log)

    def __len__(self):
        return len(self.log)

    def __repr__(self):  # pragma: no cover
        return repr(self.log)

[docs]    def write(self, filename):
        r"""
        Write the logfile on disk.

        Parameters
        ----------
        filename : str
            Name of the logfile.
        """
        with open(filename, "w") as stream:
            yaml.dump(self.log, stream=stream, Dumper=Dumper)

    @property
    def posinp(self):
        r"""
        Returns
        -------
        Posinp
            Posinp used during the calculation.
        """
        return self._posinp

    @property
    def inputparams(self):
        r"""
        Returns
        -------
        InputParams
            Input parameters used during the calculation.
        """
        return self._inputparams


[docs]class MultipleLogfile(Sequence):
    r"""
    Class allowing to initialize, read, write and interact with an
    output file of a BigDFT calculation containing multiple documents.
    """

    def __init__(self, logs):
        r"""
        Parameters
        ----------
        logs : list
            List of the various documents contained in the logfile of a
            BigDFT calculation.
        """
        self._logs = logs

    @property
    def logs(self):
        r"""
        Returns
        -------
        list
            List of the documents read from a single output of a BigDFT
            calculation.
        """
        return self._logs

    def __getitem__(self, index):
        return self.logs[index]

    def __len__(self):
        return len(self.logs)

[docs]    def write(self, filename):
        r"""
        Write the logfile on disk.

        Parameters
        ----------
        filename : str
            Name of the logfile.
        """
        logs = [log.log for log in self.logs]
        with open(filename, "w") as stream:
            yaml.dump_all(logs, stream=stream, Dumper=Dumper, explicit_start=True)


[docs]class GeoptLogfile(MultipleLogfile):
    r"""
    Class allowing to initialize, read, write and interact with an
    output file of a geometry optimization calculation.
    """

    def __init__(self, logs):
        r"""
        Parameters
        ----------
        logs : list
            List of the various documents contained in the logfile of a
            geometry optimization calculation.
        """
        super(GeoptLogfile, self).__init__(logs)
        # Update the input parameters and positions of the documents
        for log in self.logs[1:]:
            log._inputparams = self.inputparams
            log._posinp = Posinp.from_dict(log["Atomic structure"])
        self._posinps = [log.posinp for log in self.logs]

    @property
    def inputparams(self):
        r"""
        Returns
        -------
        InputParams
            Input parameters used for each step of the geometry
            optimization procedure.
        """
        return self.logs[0].inputparams

    @property
    def posinps(self):
        r"""
        Returns
        -------
        list
            List of the input positions for each step of the geometry
            optimization procedure.
        """
        return self._posinps