Source code for mlcalcdriver.base.job

r"""
The :class:`Job` class is the base object defining similar machine learning
predictions for a single or many atomic configurations.
"""

import numpy as np
import torch
from copy import deepcopy
from mlcalcdriver.globals import HA_TO_EV, B_TO_ANG, ANG_TO_B
import warnings


[docs]class Job:
    r"""
    This class defines a machine learning prediction. It must
    contain a mlcalcdriver.Posinp instance to define the atomic
    configuration.
    """

    def __init__(self, name="", posinp=None, calculator=None):
        r"""
        Parameters
        ----------
        name : str
            Name of the job. Will be used to name the created files.
        posinp : :class:`Posinp` or list of :class:`Posinp`
            Atomic positions for the job. Many different configurations
            may be predicted at the same time, in that case they should
            be passed in a list.
        calculator : :class:`Calculator`
            `Calculator` instance to use to evaluate
            properties in the run() method.
        """
        self.name = name
        self.calculator = calculator
        self.posinp = posinp
        self.num_struct = len(self.posinp)
        self.results = JobResults(properties=self.calculator.available_properties)

    @property
    def posinp(self):
        r"""
        Returns
        -------
        Posinp
            Initial positions of the prediction
        """
        return self._posinp

    @posinp.setter
    def posinp(self, posinp):
        if posinp is None:
            raise ValueError("A Job instance has no initial positions.")
        elif not isinstance(posinp, list):
            posinp = [posinp]
        from mlcalcdriver.base.posinp import Posinp

        for pos in posinp:
            if not isinstance(pos, Posinp):
                raise TypeError(
                    """
                    Atomic positions should be given
                    only in Posinp instances.
                    """
                )
        for pos in posinp:
            if pos.units != self.calculator.units["positions"]:
                pos.convert_units(self.calculator.units["positions"])
        self._posinp = posinp

    @property
    def name(self):
        r"""
        Returns
        -------
        str
            Base name of the prediction used to set the names
            of files and directories.
        """
        return self._name

    @name.setter
    def name(self, name):
        self._name = str(name)

    @property
    def num_struct(self):
        r"""
        Returns
        -------
        int
            Number of different structures when Job is declared
        """
        return self._num_struct

    @num_struct.setter
    def num_struct(self, num_struct):
        self._num_struct = num_struct

    @property
    def results(self):
        r"""
        Returns
        -------
        JobResults
            The dictionnary containing the results of the calculation
        """
        return self._results

    @results.setter
    def results(self, results):
        self._results = results

    @property
    def calculator(self):
        r"""
        Returns
        -------
        Calculator
            The Calculator object to use for the Job
        """
        return self._calculator

    @calculator.setter
    def calculator(self, calculator):
        from mlcalcdriver.calculators.calculator import Calculator

        if isinstance(calculator, Calculator):
            self._calculator = calculator
        else:
            raise TypeError(
                """
                The calculator for the Job must be a class or a
                metaclass derived from mlcalcdriver.calculators.Calculator.
                """
            )

[docs]    def run(self, property, batch_size=128, finite_difference=False):
        r"""
        Main method to call to obtain results for a Job

        Parameters
        ----------
        property : str
            Property to calculate. Must be in the
            available_properties of the Calculator except the
            forces which can be derived from an energy
            Calculator.
        batch_size : int
            Size of the mini-batches used in predictions.
            Default is 128.
        """
        if not finite_difference:
            predictions = self.calculator.run(
                property=property, posinp=self.posinp, batch_size=batch_size
            )
        else:
            self._create_additional_structures()
            raw_predictions = self.calculator.run(
                property="energy", posinp=self.posinp, batch_size=batch_size
            )
            pred_idx = 0
            predictions = {}
            predictions["energy"], predictions["forces"] = [], []
            for struct_idx in range(self.num_struct):
                predictions["energy"].append(raw_predictions["energy"][pred_idx][0])
                pred_idx += 1
                predictions["forces"].append(
                    self._calculate_forces(
                        raw_predictions["energy"][
                            pred_idx : pred_idx
                            + 12 * len(self._init_posinp[struct_idx])
                        ]
                    )
                )
                pred_idx += 12 * len(self._init_posinp[struct_idx])
            self.posinp = deepcopy(self._init_posinp)

        for pred in predictions.keys():
            # Future proofing, will probably need some work
            if pred in ["energy", "energy_std", "gap"]:
                if self.calculator.units["energy"] == "hartree":
                    predictions[pred] *= HA_TO_EV
            elif pred in ["forces", "forces_std"]:
                if self.calculator.units["energy"] == "hartree":
                    predictions[pred] *= HA_TO_EV
                if self.calculator.units["positions"] == "atomic":
                    predictions[pred] *= ANG_TO_B
            elif pred == "hessian":
                if self.calculator.units["energy"] == "hartree":
                    predictions[pred] *= HA_TO_EV
                if self.calculator.units["positions"] == "atomic":
                    predictions[pred] *= ANG_TO_B ** 2
            elif pred in ["dipole_moment", "mu"]:
                if self.calculator.units["dipole_moment"] == "Debye":
                    pass
                else:
                    raise NotImplementedError(
                        "The unit {} of dipole moment is not implemented yet.".format(
                            self.calculator.units["dipole_moment"]
                        )
                    )
            else:
                raise KeyError(
                    "The units for this predicted quantity have not been implemented yet."
                )
            self.results.update({pred: predictions[pred]})

    def _create_additional_structures(self, deriv_length=0.015):
        r"""
        Creates the additional structures needed to do a numeric
        derivation of the energy to calculate the forces.
        """
        self._init_posinp = deepcopy(self.posinp)
        self._deriv_length = deriv_length
        all_structs = []
        # Second order forces calculations
        for str_idx, struct in enumerate(self.posinp):
            all_structs.append(struct)
            for factor in [2, 1, -1, -2]:
                for dim in [
                    np.array([1, 0, 0]),
                    np.array([0, 1, 0]),
                    np.array([0, 0, 1]),
                ]:
                    all_structs.extend(
                        [
                            struct.translate_atom(atom_idx, deriv_length * factor * dim)
                            for atom_idx in range(len(struct))
                        ]
                    )
        self.posinp = all_structs

    def _calculate_forces(self, predictions):
        r"""
        Method to calculate forces from the displaced atomic positions

        Parameters
        ----------
        predictions : 1D numpy array (size 6*n_at)
             Contains the predictions obtained from the neural network

        Returns
        -------
        forces : 2D numpy array (size (n_at, 3))
            Forces for each structure
        """
        nat = int(len(predictions) / 12)
        forces = np.zeros((nat, 3))
        for i in range(3):
            ener1, ener2, ener3, ener4 = (
                predictions[np.arange(i * nat, (i + 1) * nat, 1)],
                predictions[np.arange((i + 3) * nat, (i + 4) * nat, 1)],
                predictions[np.arange((i + 6) * nat, (i + 7) * nat, 1)],
                predictions[np.arange((i + 9) * nat, (i + 10) * nat, 1)],
            )
            forces[:, i] = -(
                (-ener1 + 8 * (ener2 - ener3) + ener4).reshape(nat)
                / (12 * self._deriv_length)
            )
        return forces


[docs]class JobResults(dict):
    r"""
    Dictionnary containing results from a Job after the run() method
    is completed. A JobResults instance is created for each Job, and
    the results of the latter should be accessed through the former,
    by using the `Job.results` property.

    Predicted values can be accessed as in a standard dictionnary


    >>> energy = Job.results["energy"]
    >>> type(energy)
    <class 'list'>

    The returned values will be `None` if the Job was not complete.
    Otherwise, the list contains one value for each structure in the Job.
    """

    def __init__(self, properties):
        r"""
        Parameters
        ----------
        properties : str or list of str
            Property or properties that are returned by the chosen
            model.
        """
        self.properties = properties
        for prop in self.properties:
            self[prop] = None

    @property
    def properties(self):
        return self["properties"]

    @properties.setter
    def properties(self, properties):
        if isinstance(properties, str):
            properties = [properties]
        if isinstance(properties, list):
            if any([not isinstance(prop, str) for prop in properties]):
                raise ("All properties should be given as a string.")
            else:
                self["properties"] = properties
        else:
            raise ("Properties should be given as a string or a list of strings.")