Source code for mlcalcdriver.base.job

r"""
The :class:`Job` class is the base object defining similar machine learning
predictions for a single or many atomic configurations.
"""

import numpy as np
import torch
from copy import deepcopy
from mlcalcdriver.globals import HA_TO_EV, B_TO_ANG, ANG_TO_B
import warnings


[docs]class Job: r""" This class defines a machine learning prediction. It must contain a mlcalcdriver.Posinp instance to define the atomic configuration. """ def __init__(self, name="", posinp=None, calculator=None): r""" Parameters ---------- name : str Name of the job. Will be used to name the created files. posinp : :class:`Posinp` or list of :class:`Posinp` Atomic positions for the job. Many different configurations may be predicted at the same time, in that case they should be passed in a list. calculator : :class:`Calculator` `Calculator` instance to use to evaluate properties in the run() method. """ self.name = name self.calculator = calculator self.posinp = posinp self.num_struct = len(self.posinp) self.results = JobResults(properties=self.calculator.available_properties) @property def posinp(self): r""" Returns ------- Posinp Initial positions of the prediction """ return self._posinp @posinp.setter def posinp(self, posinp): if posinp is None: raise ValueError("A Job instance has no initial positions.") elif not isinstance(posinp, list): posinp = [posinp] from mlcalcdriver.base.posinp import Posinp for pos in posinp: if not isinstance(pos, Posinp): raise TypeError( """ Atomic positions should be given only in Posinp instances. """ ) for pos in posinp: if pos.units != self.calculator.units["positions"]: pos.convert_units(self.calculator.units["positions"]) self._posinp = posinp @property def name(self): r""" Returns ------- str Base name of the prediction used to set the names of files and directories. """ return self._name @name.setter def name(self, name): self._name = str(name) @property def num_struct(self): r""" Returns ------- int Number of different structures when Job is declared """ return self._num_struct @num_struct.setter def num_struct(self, num_struct): self._num_struct = num_struct @property def results(self): r""" Returns ------- JobResults The dictionnary containing the results of the calculation """ return self._results @results.setter def results(self, results): self._results = results @property def calculator(self): r""" Returns ------- Calculator The Calculator object to use for the Job """ return self._calculator @calculator.setter def calculator(self, calculator): from mlcalcdriver.calculators.calculator import Calculator if isinstance(calculator, Calculator): self._calculator = calculator else: raise TypeError( """ The calculator for the Job must be a class or a metaclass derived from mlcalcdriver.calculators.Calculator. """ )
[docs] def run(self, property, batch_size=128, finite_difference=False): r""" Main method to call to obtain results for a Job Parameters ---------- property : str Property to calculate. Must be in the available_properties of the Calculator except the forces which can be derived from an energy Calculator. batch_size : int Size of the mini-batches used in predictions. Default is 128. """ if not finite_difference: predictions = self.calculator.run( property=property, posinp=self.posinp, batch_size=batch_size ) else: self._create_additional_structures() raw_predictions = self.calculator.run( property="energy", posinp=self.posinp, batch_size=batch_size ) pred_idx = 0 predictions = {} predictions["energy"], predictions["forces"] = [], [] for struct_idx in range(self.num_struct): predictions["energy"].append(raw_predictions["energy"][pred_idx][0]) pred_idx += 1 predictions["forces"].append( self._calculate_forces( raw_predictions["energy"][ pred_idx : pred_idx + 12 * len(self._init_posinp[struct_idx]) ] ) ) pred_idx += 12 * len(self._init_posinp[struct_idx]) self.posinp = deepcopy(self._init_posinp) for pred in predictions.keys(): # Future proofing, will probably need some work if pred in ["energy", "energy_std", "gap"]: if self.calculator.units["energy"] == "hartree": predictions[pred] *= HA_TO_EV elif pred in ["forces", "forces_std"]: if self.calculator.units["energy"] == "hartree": predictions[pred] *= HA_TO_EV if self.calculator.units["positions"] == "atomic": predictions[pred] *= ANG_TO_B elif pred == "hessian": if self.calculator.units["energy"] == "hartree": predictions[pred] *= HA_TO_EV if self.calculator.units["positions"] == "atomic": predictions[pred] *= ANG_TO_B ** 2 elif pred in ["dipole_moment", "mu"]: if self.calculator.units["dipole_moment"] == "Debye": pass else: raise NotImplementedError( "The unit {} of dipole moment is not implemented yet.".format( self.calculator.units["dipole_moment"] ) ) else: raise KeyError( "The units for this predicted quantity have not been implemented yet." ) self.results.update({pred: predictions[pred]})
def _create_additional_structures(self, deriv_length=0.015): r""" Creates the additional structures needed to do a numeric derivation of the energy to calculate the forces. """ self._init_posinp = deepcopy(self.posinp) self._deriv_length = deriv_length all_structs = [] # Second order forces calculations for str_idx, struct in enumerate(self.posinp): all_structs.append(struct) for factor in [2, 1, -1, -2]: for dim in [ np.array([1, 0, 0]), np.array([0, 1, 0]), np.array([0, 0, 1]), ]: all_structs.extend( [ struct.translate_atom(atom_idx, deriv_length * factor * dim) for atom_idx in range(len(struct)) ] ) self.posinp = all_structs def _calculate_forces(self, predictions): r""" Method to calculate forces from the displaced atomic positions Parameters ---------- predictions : 1D numpy array (size 6*n_at) Contains the predictions obtained from the neural network Returns ------- forces : 2D numpy array (size (n_at, 3)) Forces for each structure """ nat = int(len(predictions) / 12) forces = np.zeros((nat, 3)) for i in range(3): ener1, ener2, ener3, ener4 = ( predictions[np.arange(i * nat, (i + 1) * nat, 1)], predictions[np.arange((i + 3) * nat, (i + 4) * nat, 1)], predictions[np.arange((i + 6) * nat, (i + 7) * nat, 1)], predictions[np.arange((i + 9) * nat, (i + 10) * nat, 1)], ) forces[:, i] = -( (-ener1 + 8 * (ener2 - ener3) + ener4).reshape(nat) / (12 * self._deriv_length) ) return forces
[docs]class JobResults(dict): r""" Dictionnary containing results from a Job after the run() method is completed. A JobResults instance is created for each Job, and the results of the latter should be accessed through the former, by using the `Job.results` property. Predicted values can be accessed as in a standard dictionnary >>> energy = Job.results["energy"] >>> type(energy) <class 'list'> The returned values will be `None` if the Job was not complete. Otherwise, the list contains one value for each structure in the Job. """ def __init__(self, properties): r""" Parameters ---------- properties : str or list of str Property or properties that are returned by the chosen model. """ self.properties = properties for prop in self.properties: self[prop] = None @property def properties(self): return self["properties"] @properties.setter def properties(self, properties): if isinstance(properties, str): properties = [properties] if isinstance(properties, list): if any([not isinstance(prop, str) for prop in properties]): raise ("All properties should be given as a string.") else: self["properties"] = properties else: raise ("Properties should be given as a string or a list of strings.")