Source code for shakenbreak.analysis

"""
Module containing functions to analyse rattled and bond-distorted defect
structure relaxations
"""

import json
import os
import sys
import warnings
from copy import deepcopy
from typing import Optional, Union

import numpy as np
import pandas as pd
from doped.utils.parsing import get_outcar
from monty.serialization import loadfn
from pymatgen.analysis.local_env import CrystalNN
from pymatgen.analysis.structure_matcher import StructureMatcher
from pymatgen.core.periodic_table import Element
from pymatgen.core.structure import Structure
from pymatgen.io.vasp.outputs import Outcar

from shakenbreak import input, io

crystalNN = CrystalNN(
    distance_cutoffs=None, x_diff_weight=0.0, porous_adjustment=False, search_cutoff=5.0
)


def _warning_on_one_line(
    message,
    category,
    filename,
    lineno,
    file=None,  # pylint: disable=unused-argument  # required for warnings.formatwarning
    line=None,  # pylint: disable=unused-argument
):
    """Format warnings output."""
    return f"{os.path.split(filename)[-1]}:{lineno}: {category.__name__}: {message}\n"


warnings.formatwarning = _warning_on_one_line


def _isipython():
    """Check if code if executed in ipython notebook."""
    # Using stackoverflow.com/questions/15411967/
    # how-can-i-check-if-code-is-executed-in-the-ipython-notebook
    try:
        get_ipython().__class__.__name__
        return True
    except NameError:
        return False  # Probably standard Python interpreter


if _isipython():
    from IPython.display import display


class _HiddenPrints:
    """Block calls to print."""

    # https://stackoverflow.com/questions/8391411/how-to-block-calls-to-print
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, "w")

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout


# Helper functions
def _read_distortion_metadata(output_path: str) -> dict:
    """
    Parse distortion_metadata.json file

    Args:
        output_path (:obj:`str`):
            Path to directory containing `distortion_metadata.json`
            (Default: '.', current directory)

    Returns:
        distortion_metadata (:obj:`dict`):
            Dictionary of metadata as generated by
            shakenbreak.input.apply_shakenbreak.
    """
    try:  # Read distortion parameters from distortion_metadata.json
        with open(f"{output_path}/distortion_metadata.json") as json_file:
            distortion_metadata = json.load(json_file)
    except FileNotFoundError:
        raise FileNotFoundError(
            f"No `distortion_metadata.json` file found in {output_path}."
        )
    return distortion_metadata


def _get_distortion_filename(distortion) -> str:
    """
    Format distortion names for file naming. (e.g. from 0.5 to
    'Bond_Distortion_50.0%')

    Args:
        distortion (float or str):
            Distortion factor (e.g. -0.6, 0.0, +0.6) or string
            (e.g. "Unperturbed" / "Rattled")

    Returns:
        distortion (:obj:`str`):
            distortion label used for file names.
    """
    if isinstance(distortion, (float, int)):
        if distortion != 0:
            distortion_label = f"Bond_Distortion_{round(distortion * 100, 1)+0}%"
            # as percentage with 1 decimal place (e.g. 50.0%)
        else:
            distortion_label = f"Bond_Distortion_{distortion:.1f}%"
    elif isinstance(distortion, str):
        if "_from_" in distortion and (
            "Rattled" not in distortion and "Dimer" not in distortion
        ):
            distortion_label = f"Bond_Distortion_{distortion}"
            # runs from other charge states
        elif (
            "Rattled_from_" in distortion
            or "Dimer_from" in distortion
            or distortion
            in [
                "Unperturbed",
                "Rattled",
                "Dimer",
            ]
        ):
            distortion_label = distortion
        elif (
            distortion == "Unperturbed"
            or distortion == "Rattled"
            or distortion == "Dimer"
        ):
            distortion_label = distortion  # e.g. "Unperturbed"/"Rattled"/"Dimer"
        else:
            try:  # try converting to float, in case user entered '0.5'
                distortion = float(distortion)
                distortion_label = f"Bond_Distortion_{round(distortion * 100, 1)+0}%"
            except Exception:
                distortion_label = "Distortion_not_recognized"
    else:
        distortion_label = "Distortion_not_recognized"
    return distortion_label


def _format_distortion_names(
    distortion_label: str,
) -> str:
    """
    Formats the distortion filename to the names used internally and for
    analysis. (i.e. 'Bond_Distortion_-50.0%' -> -0.5)

    Args:
        distortion_label (:obj:`str`):
            distortion label used for file names.

    Returns:
        distortion (:obj:`float` or :obj:`float`):
            distortion factor (e.g. -0.6, 0.0, +0.6) or string (e.g.
            "Unperturbed"/"Rattled"/"-60.0%_from_+2"/"Rattled_from_-1")
    """
    distortion_label = distortion_label.strip()  # remove any whitespace
    if (
        "Unperturbed" in distortion_label
        or "Rattled" in distortion_label
        or "Dimer" in distortion_label
    ) and "from" not in distortion_label:
        return distortion_label
    elif distortion_label.startswith("Bond_Distortion") and distortion_label.endswith(
        "%"
    ):
        return float(distortion_label.split("Bond_Distortion_")[-1].split("%")[0]) / 100
    # From other charge states
    elif distortion_label.startswith("Bond_Distortion") and (
        "_from_" in distortion_label
    ):
        # distortions from other charge state of the defect
        return distortion_label.split("Bond_Distortion_")[-1]
    elif "Rattled" in distortion_label and "_from_" in distortion_label:
        return distortion_label
    elif "Dimer" in distortion_label and "_from_" in distortion_label:
        return distortion_label
    # detected as High_Energy - normally wouldn't be parsed, but for debugging purposes
    # TODO: remove this
    elif (
        distortion_label.startswith("Bond_Distortion")
        and "High_Energy" in distortion_label
    ):
        return float(distortion_label.split("Bond_Distortion_")[-1].split("%")[0]) / 100
    elif (
        "Dimer" in distortion_label or "Rattled" in distortion_label
    ) and "High_Energy" in distortion_label:
        return distortion_label
    else:
        return "Label_not_recognized"


[docs] def get_gs_distortion(defect_energies_dict: dict) -> tuple: """ Calculate energy difference between `Unperturbed` structure and lowest energy distortion. Returns the energy (in eV) and bond distortion of the ground-state relative to `Unperturbed`. If `Unperturbed` not present, returns (None, ground-state bond distortion). Args: defect_energies_dict (:obj:`dict`): Dictionary matching distortion to final energy, as produced by `get_energies()` or `_sort_data`. Returns: :obj:`tuple`: (Energy difference, ground state bond distortion) """ if not defect_energies_dict["distortions"]: if "Unperturbed" in defect_energies_dict: return 0, "Unperturbed" lowest_E_distortion = min( defect_energies_dict["distortions"].values() ) # lowest energy obtained with bond distortions if "Unperturbed" in defect_energies_dict: if list(defect_energies_dict["distortions"].keys()) == [ "Rattled" ]: # If only Rattled energy_diff = ( defect_energies_dict["distortions"]["Rattled"] - defect_energies_dict["Unperturbed"] ) gs_distortion = "Rattled" if energy_diff < 0 else "Unperturbed" else: energy_diff = lowest_E_distortion - defect_energies_dict["Unperturbed"] if ( lowest_E_distortion < defect_energies_dict["Unperturbed"] ): # if energy lower than Unperturbed gs_distortion = list(defect_energies_dict["distortions"].keys())[ list(defect_energies_dict["distortions"].values()).index( lowest_E_distortion ) ] # bond distortion that led to ground-state else: gs_distortion = "Unperturbed" else: energy_diff = None gs_distortion = list(defect_energies_dict["distortions"].keys())[ list(defect_energies_dict["distortions"].values()).index( lowest_E_distortion ) ] return energy_diff, gs_distortion
def _sort_data( energies_file: str, verbose: bool = True, min_e_diff: float = 0.05 ) -> tuple: """ Organize bond distortion results in a dictionary, calculate energy of ground-state defect structure relative to `Unperturbed` structure (in eV) and its corresponding bond distortion, and return all three as a tuple. If `Unperturbed` not present, returns (defect_energies_dict, None, ground-state distortion). Args: energies_file (:obj:`str`): Path to `yaml` file with bond distortions and final energies (in eV), obtained using the CLI command `snb-parse` or the function `parse_energies()`. verbose (:obj:`bool`): Whether to print information about energy lowering distortions, if found. (Default: True) min_e_diff (:obj: `float`): Minimum energy difference (in eV) between the ground-state defect structure, relative to the `Unperturbed` structure, to consider it as having found a new energy-lowering distortion. Default is 0.05 eV. Returns: defect_energies_dict (:obj:`dict`): Dictionary matching distortion to final energy, as produced by `_organize_data()` energy_diff (:obj:`float`): Energy difference between minimum energy structure and `Unperturbed` (in eV). None if `Unperturbed` not present. gs_distortion (:obj:`float`): Distortion corresponding to the minimum energy structure """ # Parse dictionary from file if os.path.exists(energies_file): defect_energies_dict = loadfn(energies_file) else: warnings.warn(f"Path {energies_file} does not exist") return None, None, None if defect_energies_dict == {"distortions": {}}: # no parsed data warnings.warn(f"No data parsed from {energies_file}, returning None") return None, None, None if ( len(defect_energies_dict["distortions"]) == 0 and "Unperturbed" in defect_energies_dict ): # no parsed distortion results but Unperturbed present warnings.warn(f"No distortion results parsed from {energies_file}") energy_diff, gs_distortion = get_gs_distortion(defect_energies_dict) defect_name = energies_file.split("/")[-1].split(".yaml")[0] if verbose: if energy_diff and float(energy_diff) < -min_e_diff: print( f"{defect_name}: Energy difference between minimum, found with {gs_distortion} bond " f"distortion, and unperturbed: {energy_diff:+.2f} eV." ) elif energy_diff is None: print( f"{defect_name}: Unperturbed energy not found in {energies_file}. Lowest energy " f"structure found with {gs_distortion} bond distortion." ) return defect_energies_dict, energy_diff, gs_distortion # Main analysis functions
[docs] def analyse_defect_site( structure: Structure, name: Optional[str] = None, site_num: Optional[int] = None, vac_site: Optional[list] = None, ) -> tuple: """ Analyse coordination environment and bond distances to nearest neighbours of defect site. Args: structure (:obj:`Structure`): `pymatgen` Structure object to analyse name (:obj:`str`): Defect name for printing. (Default: None) site_num (:obj:`int`): Defect site index in the structure, starting from 1 (VASP rather than python indexing). (Default: None) vac_site (:obj:`list`): For vacancies, the fractional coordinates of the vacant lattice site. (Default: None) Returns: :obj:`tuple`: Tuple of coordination analysis and bond length DataFrames, respectively. """ # get defect site struct = deepcopy(structure) if site_num: isite = site_num - 1 # python/pymatgen indexing (starts counting from zero!) elif vac_site: struct.append( "V", vac_site ) # Have to add a fake element for coordination analysis isite = ( len(struct.sites) - 1 ) # python/pymatgen indexing (starts counting from zero!) else: raise ValueError("Either site_num or vac_site must be specified") if name is not None: input._bold_print(name + " structural analysis ") print("Analysing site", struct[isite].specie, struct[isite].frac_coords) coordination = crystalNN.get_local_order_parameters(struct, isite) if coordination is not None: coord_list = [] for coord, value in coordination.items(): coordination_dict = {"Coordination": coord, "Factor": round(value, 2)} coord_list.append(coordination_dict) print( "Local order parameters (i.e. resemblance to given structural motif, " "via CrystalNN):" ) if _isipython(): display(pd.DataFrame(coord_list)) # display in Jupyter notebook bond_lengths = [ { "Element": i["site"].specie.as_dict()["element"], "Distance (\u212B)": f"{i['site'].distance(struct[isite]):.2f}", } for i in crystalNN.get_nn_info(struct, isite) ] bond_length_df = pd.DataFrame(bond_lengths) print("\nBond-lengths (in \u212B) to nearest neighbours: ") if _isipython(): display(bond_length_df) print() # spacing if coordination is not None: return pd.DataFrame(coord_list), bond_length_df else: return None, bond_length_df
[docs] def analyse_structure( defect_species: str, structure: Structure, output_path: str = ".", ) -> tuple: """ Analyse the local distortion of the input defect structure. Requires access to the `distortion_metadata.json` file generated with ShakeNBreak to read info about defect site. If lacking this, can alternatively use `analyse_defect_site()`. Args: defect_species (:obj:`str`): Defect name including charge (e.g. 'vac_1_Cd_0') structure (:obj:`~pymatgen.core.structure.Structure`): Defect structure to analyse output_path (:obj:`str`): Path to directory containing `distortion_metadata.json` (Default: '.', current directory) Returns: :obj:`tuple`: Tuple of coordination analysis and bond length DataFrames, respectively. """ defect_name_without_charge = defect_species.rsplit("_", 1)[0] if not os.path.exists(f"{output_path}/distortion_metadata.json"): raise FileNotFoundError( f"No `distortion_metadata.json` file found in {output_path}. " f"Use function `analyse_defect_site` instead." ) distortion_metadata = _read_distortion_metadata( output_path=output_path ) # Read site from distortion_metadata.json defect_site = distortion_metadata["defects"][defect_name_without_charge].get( "defect_site_index" ) # VASP indexing (starts counting from 1) if defect_site is None: # for vacancies, get fractional coordinates defect_frac_coords = distortion_metadata["defects"][defect_name_without_charge][ "unique_site" ] return analyse_defect_site( structure, name=defect_species, vac_site=defect_frac_coords ) return analyse_defect_site(structure, name=defect_species, site_num=defect_site)
[docs] def get_structures( defect_species: str, output_path: str = ".", bond_distortions: Optional[list] = None, code: Optional[str] = "vasp", structure_filename: Optional[str] = "CONTCAR", ) -> dict: """ Import all structures found with rattling & bond distortions, and store them in a dictionary matching the bond distortion to the final structure. By default, will read the structures from the distortion subdirectories present in each defect folder. If only certain distortions should be parsed, use the argument `bond_distortions` to specify them. Args: defect_species (:obj:`str`): Defect name including charge (e.g. 'vac_1_Cd_0') output_path (:obj:`str`): Path to top-level directory containing `defect_species` subdirectories. (Default: current directory. bond_distortions (:obj:`list`, optional): List of bond distortions whose structures will be analysed (i.e. [-0.2, 0.0, 0.5]). By default, all distortions present in subdirectories are considered. (Default: None) code (:obj:`str`, optional): Code used for the geometry relaxations. (Default: vasp) structure_filename (:obj:`str`, optional): Name of the file containing the structure. (Default: CONTCAR) Returns: :obj:`dict`: Dictionary of bond distortions and corresponding final structures. """ defect_structures_dict = {} if ( not bond_distortions ): # if the user didn't specify any set of distortions, loop over subdirectories if not os.path.isdir( f"{output_path}/{defect_species}" ): # check if defect folder exists raise FileNotFoundError( f"Path f'{output_path}/{defect_species}' does not exist!" ) distortion_subdirectories = [ i for i in next(os.walk(f"{output_path}/{defect_species}"))[1] if ("Bond_Distortion" in i) or ("Unperturbed" in i) or ("Rattled" in i) or ("Dimer" in i) ] # distortion subdirectories if not distortion_subdirectories: raise FileNotFoundError( f"No distortion subdirectories found in {output_path}/" f"{defect_species}" ) for distortion_subdirectory in distortion_subdirectories: if "High_Energy" not in distortion_subdirectory: distortion = _format_distortion_names( distortion_label=distortion_subdirectory ) # From subdirectory name, get the distortion label used for analysis # e.g. from 'Bond_Distortion_-10.0% -> -0.1 if ( distortion != "Label_not_recognized" ): # If the subdirectory name is recognised try: defect_structures_dict[distortion] = io.parse_structure( code=code, structure_path=f"{output_path}/{defect_species}/{distortion_subdirectory}", structure_filename=structure_filename, ) except Exception: warnings.warn( f"Unable to parse structure at {output_path}/" f"{defect_species}/{structure_filename}" f"/{distortion_subdirectory}/, storing as 'Not converged'" ) defect_structures_dict[distortion] = "Not converged" else: if "Unperturbed" not in bond_distortions: bond_distortions.append( "Unperturbed" ) # always include unperturbed structure for distortion in bond_distortions: if not (isinstance(distortion, str) and "High_Energy" in distortion): distortion_label = _get_distortion_filename(distortion) # get filename if ( distortion_label != "Distortion_not_recognized" ): # If the distortion label is recognised try: defect_structures_dict[distortion] = io.parse_structure( code=code, structure_path=f"{output_path}/{defect_species}/{distortion_label}/", structure_filename=structure_filename, ) except Exception: warnings.warn( f"Unable to parse structure at {output_path}/{defect_species}" f"/{distortion_label}/, storing as 'Not converged'" ) defect_structures_dict[distortion] = "Not converged" return ( defect_structures_dict # now contains the distortions from other charge states )
[docs] def get_energies( defect_species: str, output_path: str = ".", units: str = "eV", verbose: bool = True, ) -> dict: """ Parse final energies for each bond distortion and store them in a dictionary matching the bond distortion to the final energy in eV. Args: defect_species (:obj:`str`): Defect name including charge (e.g. 'vac_1_Cd_0') output_path (:obj:`str`): Path to top-level directory containing `defect_species` subdirectories. (Default: current directory) distortion_increment (:obj:`float`): Bond distortion increment. Recommended values: 0.1-0.3 (Default: 0.1) units (:obj:`str`): Energy units for outputs (either 'eV' or 'meV'). (Default: "eV") verbose (:obj:`bool`): Whether to print information about energy lowering distortions, if found. (Default: True) Returns: :obj:`dict`: Dictionary matching bond distortions to final energies in eV. """ energy_file_path = f"{output_path}/{defect_species}/{defect_species}.yaml" if not os.path.isfile(energy_file_path): raise FileNotFoundError(f"File {energy_file_path} not found!") defect_energies_dict, _e_diff, gs_distortion = _sort_data( energy_file_path, verbose=verbose ) if "Unperturbed" in defect_energies_dict: for distortion, energy in defect_energies_dict["distortions"].items(): defect_energies_dict["distortions"][distortion] = ( energy - defect_energies_dict["Unperturbed"] ) defect_energies_dict["Unperturbed"] = 0.0 else: warnings.warn( "Unperturbed defect energy not found in energies file. Energies will " "be given relative to the lowest energy defect structure found." ) lowest_E_distortion = defect_energies_dict["distortions"][gs_distortion] for distortion, energy in defect_energies_dict["distortions"].items(): defect_energies_dict["distortions"][distortion] = ( energy - lowest_E_distortion ) if units == "meV": defect_energies_dict["distortions"] = { k: v * 1000 for k, v in defect_energies_dict["distortions"].items() } return defect_energies_dict
def _calculate_atomic_disp( struct1: Structure, struct2: Structure, stol: float = 0.5, ltol: float = 0.3, angle_tol: float = 5, ) -> tuple: """ Calculate root mean square displacement and atomic displacements, normalized by the free length per atom ((Vol/Nsites)^(1/3)) between two structures. Args: struct1 (:obj:`Structure`): Structure to compare to struct2. struct2 (:obj:`Structure`): Structure to compare to struct1. stol (:obj:`float`): Site tolerance used for structural comparison (via `pymatgen`'s `StructureMatcher`), as a fraction of the average free length per atom := ( V / Nsites ) ** (1/3). If output contains too many 'NaN' values, this likely needs to be increased. (Default: 0.5) Returns: :obj:`tuple`: Tuple of normalized root mean squared displacements and normalized displacements between the two structures. """ sm = StructureMatcher( ltol=ltol, stol=stol, angle_tol=angle_tol, primitive_cell=False, scale=True ) struct1, struct2 = sm._process_species([struct1, struct2]) struct1, struct2, fu, s1_supercell = sm._preprocess(struct1, struct2) match = sm._match( struct1, struct2, fu, s1_supercell, use_rms=True, break_on_match=False ) return None if match is None else (match[0], match[1])
[docs] def calculate_struct_comparison( defect_structures_dict: dict, metric: str = "max_dist", ref_structure: Union[str, float, Structure] = "Unperturbed", stol: float = 0.5, ltol: float = 0.3, angle_tol: float = 5, min_dist: float = 0.1, verbose: bool = False, ) -> dict: """ Calculate either the summed atomic displacement, with metric = "disp", or the maximum distance between matched atoms, with metric = "max_dist", (default) between each distorted structure in `defect_struct_dict`, and either 'Unperturbed' or a specified structure (`ref_structure`). For metric = "disp", atomic displacements below `min_dist` (in Å, default 0.1 Å) are considered noise and omitted from the sum, to reduce supercell size dependence. Args: defect_structures_dict (:obj:`dict`): Dictionary of bond distortions and corresponding (final) structures (as pymatgen Structure objects). metric (:obj:`str`): Structure comparison metric to use. Either summed atomic displacement normalised to the average free length per atom ('disp') or the maximum distance between matched atoms ('max_dist', default). (Default: "max_dist") ref_structure (:obj:`str` or :obj:`float` or :obj:`Structure`): Structure to use as a reference for comparison (to compute atomic displacements). Either as a key from `defect_structures_dict` (e.g. '-0.4' for "Bond_Distortion_-40.0%") or a pymatgen Structure object (to compare with a specific external structure). (Default: "Unperturbed") stol (:obj:`float`): Site tolerance used for structural comparison (via `pymatgen`'s `StructureMatcher`), as a fraction of the average free length per atom := ( V / Nsites ) ** (1/3). If output contains too many 'NaN' values, this likely needs to be increased. (Default: 0.5) min_dist (:obj:`float`): Minimum atomic displacement threshold to include in atomic displacements sum (in Å, default 0.1 Å). verbose (:obj:`bool`): Whether to print information message about structures being compared. Returns: :obj:`dict`: Dictionary matching bond distortions to structure comparison metric (disp or max_dist). """ # Check reference structure if isinstance(ref_structure, (str, float)): ref_name = ( ref_structure if isinstance(ref_structure, str) else f"{ref_structure:.1%} bond distorted structure" ) try: ref_structure = defect_structures_dict[ref_structure] except KeyError as e: raise KeyError( f"Reference structure key '{ref_structure}' not found in " f"defect_structures_dict." ) from e if ref_structure == "Not converged": raise ValueError( f"Specified reference structure ({ref_name}) is not converged " f"and cannot be used for structural comparison. Check structures" " or specify a different reference structure (ref_structure)." ) elif isinstance(ref_structure, Structure): ref_name = f"specified ref_structure ({ref_structure.composition})" else: raise TypeError( f"ref_structure must be either a key from defect_structures_dict " f"or a pymatgen Structure object. Got {type(ref_structure)} instead." ) if verbose: print(f"Comparing structures to {ref_name}...") disp_dict = {} normalization = (len(ref_structure) / ref_structure.volume) ** (1 / 3) for distortion in list(defect_structures_dict.keys()): if defect_structures_dict[distortion] == "Not converged": disp_dict[distortion] = "Not converged" # Structure not converged else: try: _, norm_dist = _calculate_atomic_disp( struct1=ref_structure, struct2=defect_structures_dict[distortion], stol=stol, ltol=ltol, angle_tol=angle_tol, ) if metric == "disp": disp_dict[distortion] = ( np.sum(norm_dist[norm_dist > min_dist * normalization]) / normalization ) # Only include displacements above min_dist threshold, # and remove normalization elif metric == "max_dist": disp_dict[distortion] = ( max(norm_dist) / normalization ) # Remove normalization else: raise ValueError( f"Invalid metric '{metric}'. Must be one of 'disp' or " f"'max_dist'." ) except TypeError: disp_dict[ distortion ] = None # algorithm couldn't match lattices. Set comparison # metric to None # warnings.warn( # f"pymatgen StructureMatcher could not match lattices between " # f"{ref_name} and {distortion} structures." # ) # Unnecessary warning, occurs often with highly-distorted/disordered structures return disp_dict
[docs] def compare_structures( defect_structures_dict: dict, defect_energies_dict: dict, ref_structure: Union[str, float, Structure] = "Unperturbed", stol: float = 0.5, units: str = "eV", min_dist: float = 0.1, display_df: bool = True, verbose: bool = True, ) -> Union[None, pd.DataFrame]: """ Compare final bond-distorted structures with either 'Unperturbed' or a specified structure (`ref_structure`), and calculate the summed atomic displacement (in Å), and maximum distance between matched atomic sites (in Å). Args: defect_structures_dict (:obj:`dict`): Dictionary mapping bond distortion to (relaxed) structure defect_energies_dict (:obj:`dict`): Dictionary matching distortion to final energy (eV), as produced by `_organize_data()`. ref_structure (:obj:`str` or :obj:`float` or :obj:`Structure`): Structure to use as a reference for comparison (to compute atomic displacements). Either as a key from `defect_structures_dict` (e.g. '-0.4' for 'Bond_Distortion_-40.0%') or a pymatgen Structure object (to compare with a specific external structure). (Default: "Unperturbed") stol (:obj:`float`): Site tolerance used for structural comparison (via `pymatgen`'s `StructureMatcher`), as a fraction of the average free length per atom := ( V / Nsites ) ** (1/3). If structure comparison output contains too many 'NaN' values, this likely needs to be increased. (Default: 0.5) units (:obj:`str`): Energy units label for outputs (either 'eV' or 'meV'). Should be the same as the units in `defect_energies_dict`, as this does not modify the supplied values. (Default: "eV") min_dist (:obj:`float`): Minimum atomic displacement threshold to include in atomic displacements sum (in Å, default 0.1 Å). display_df (:obj:`bool`): Whether to display the structure comparison DataFrame interactively in Jupyter/Ipython (Default: True). verbose (:obj:`bool`): Whether to print information message about structures being compared. Returns: :obj:`pd.DataFrame`: DataFrame containing structural comparison results (summed normalised atomic displacement and maximum distance between matched atomic sites), and relative energies. """ if all( [ structure == "Not converged" for key, structure in defect_structures_dict.items() ] ): warnings.warn( "All structures in defect_structures_dict are not converged. " "Returning None." ) return None df_list = [] disp_dict = calculate_struct_comparison( defect_structures_dict, metric="disp", ref_structure=ref_structure, stol=stol, min_dist=min_dist, verbose=verbose, ) with _HiddenPrints(): # only print "Comparing to..." once max_dist_dict = calculate_struct_comparison( defect_structures_dict, metric="max_dist", ref_structure=ref_structure, stol=stol, verbose=verbose, ) # Check if too many 'NaN' values in disp_dict, if so, try with higher stol number_of_nan = len([value for value in disp_dict.values() if value is None]) if number_of_nan > len(disp_dict.values()) // 3: warnings.warn( f"The specified tolerance {stol} seems to be too tight as" " too many lattices could not be matched. Will retry with" f" larger tolerance ({stol+0.4})." ) max_dist_dict = calculate_struct_comparison( defect_structures_dict, metric="max_dist", ref_structure=ref_structure, stol=stol + 0.4, verbose=verbose, ) disp_dict = calculate_struct_comparison( defect_structures_dict, metric="disp", ref_structure=ref_structure, stol=stol + 0.4, min_dist=min_dist, verbose=verbose, ) for distortion in defect_energies_dict["distortions"]: try: rel_energy = defect_energies_dict["distortions"][distortion] except KeyError: # if relaxation didn't converge for this bond distortion, # store it as NotANumber rel_energy = float("NaN") if distortion in disp_dict: df_list.append( [ distortion, round(disp_dict[distortion], 3) + 0 if isinstance(disp_dict[distortion], float) else None, round(max_dist_dict[distortion], 3) + 0 if isinstance(max_dist_dict[distortion], float) else None, round(rel_energy, 2) + 0, ] ) else: warnings.warn( f"Could not find bond distortion '{distortion}' in defect_structures_dict. " f"Storing as 'Not converged'." ) df_list.append([distortion, None, None, rel_energy]) if "Unperturbed" in defect_energies_dict: if "Unperturbed" in disp_dict: df_list.append( [ "Unperturbed", round(disp_dict["Unperturbed"], 3) + 0 if isinstance(disp_dict["Unperturbed"], float) else None, round(max_dist_dict["Unperturbed"], 3) + 0 if isinstance(max_dist_dict["Unperturbed"], float) else None, round(defect_energies_dict["Unperturbed"], 2) + 0, ] ) else: warnings.warn( "Could not find Unperturbed structure in defect_structures_dict. Storing as 'Not " "converged'." ) df_list.append( [ "Unperturbed", None, None, round(defect_energies_dict["Unperturbed"], 2) + 0, ] ) struct_comparison_df = pd.DataFrame( df_list, columns=[ "Bond Distortion", "\u03A3{Displacements} (\u212B)", # Sigma and Angstrom "Max Distance (\u212B)", # Angstrom f"\u0394 Energy ({units})", # Delta ], ) if _isipython() and display_df: display(struct_comparison_df) return struct_comparison_df
[docs] def get_homoionic_bonds( structure: Structure, elements: list, radius: Optional[float] = 3.3, verbose: bool = True, ) -> dict: """ Returns a list of homoionic bonds for the given element. These bonds are often formed by the defect neighbouts to accomodate charge deficiency. Args: structure (:obj:`~pymatgen.core.structure.Structure`): `pymatgen` Structure object to analyse elements (:obj:`list`): List of element symbols (wihout oxidation state) for which to find the homoionic bonds (e.g. ["Te", "Se"]). radius (:obj:`float`, optional): Distance cutoff to look for homoionic bonds. Defaults to 3.3 A. verbose (:obj:`bool`, optional): Whether or not to print the list of homoionic bonds. Returns: :obj:`dict`: dictionary with homoionic bonds, matching site to the homoionic neighbours and distances (A) (e.g. {'O(1)': {'O(2)': '2.0 A', 'O(3)': '2.0 A'}}) """ if isinstance(elements, str): # For backward compatibility elements = [ elements, ] structure = structure.copy() structure.remove_oxidation_states() for element in elements: if Element(element) not in structure.composition.elements: warnings.warn(f"Your structure does not contain element {element}!") return {} # element = elements[0] # Search for homoionic bonds in the whole structure sites = [] for element in elements: sites.extend( [ (site_index, site) for site_index, site in enumerate(structure) if site.species_string == element ] ) homoionic_bonds = {} for site_index, site in sites: neighbours = structure.get_neighbors(site, r=radius) # Check if any of the neighbours is the specified elements for element in elements: if element in [site.species_string for site in neighbours]: site_neighbours = [ ( neighbour.species_string, neighbour.index, round(neighbour.distance(site), 2), ) for neighbour in neighbours ] if f"{site.species_string}({site_index})" not in [ list(element.keys())[0] for element in homoionic_bonds.values() ]: # avoid duplicates homoionic_neighbours = { f"{neighbour[0]}({neighbour[1]})": f"{neighbour[2]} A" for neighbour in site_neighbours if neighbour[0] == element } if f"{site.species_string}({site_index})" in homoionic_bonds: homoionic_bonds[f"{site.species_string}({site_index})"].update( homoionic_neighbours ) else: homoionic_bonds[ f"{site.species_string}({site_index})" ] = homoionic_neighbours if verbose: print( f"{site.species_string}({site_index}): " f"{homoionic_neighbours}", "\n", ) if not homoionic_bonds and verbose: print(f"No homoionic bonds found with a search radius of {radius} A") return homoionic_bonds
def _site_magnetizations( outcar: Outcar, structure: Structure, threshold: float = 0.1, defect_site: Optional[int] = None, orbital_projections: Optional[bool] = False, ) -> pd.DataFrame: """ Prints sites with magnetization above threshold. Only implemented for VASP calculations. Args: outcar (pymatgen.io.vasp.outputs.Outcar): Outcar object structure (pymatgen.core.structure.Structure): Structure object threshold (float, optional): Magnetization threhold to print site. Defaults to 0.1 e-. defect_site (int, optional): Index of defect site. orbital_projections (bool, optional): Whether to print orbital projections. If not necessary, set to False (faster). (Default: False) Returns: :obj:`pandas.DataFrame`: pandas.Dataframe with sites with magnetization above threshold. """ # Site magnetizations mag = outcar.magnetization significant_magnetizations = {} for index, element in enumerate(mag): # loop over structure sites mag_array = np.array(list(element.values())) total_mag = np.sum(mag_array[np.abs(mag_array) > 0.01]) if np.abs(total_mag) > threshold: significant_magnetizations[ f"{structure[index].species_string}({index})" ] = { "Site": f"{structure[index].species_string}({index})", "Frac coords": [ round(coord, 3) for coord in structure[index].frac_coords ], "Site mag": round(total_mag, 3), } if isinstance(defect_site, int): significant_magnetizations[ f"{structure[index].species_string}({index})" ].update( { "Dist. (\u212B)": round( structure.get_distance(i=defect_site, j=index), 2, ) } ) if orbital_projections: significant_magnetizations[ f"{structure[index].species_string}({index})" ].update( {k: round(v, 3) for k, v in element.items() if k != "tot"} # include site magnetization of each orbital # but dont include total site magnetization again ) df = pd.DataFrame.from_dict(significant_magnetizations, orient="index") return df
[docs] def get_site_magnetizations( defect_species: str, distortions: list, output_path: str = ".", threshold: float = 0.1, defect_site: Optional[int or list] = None, orbital_projections: Optional[bool] = False, verbose: Optional[bool] = True, ) -> Optional[dict]: """ For given distortions, find sites with significant magnetization and return as dictionary. Only implemented for VASP calculations. Args: defect_species (:obj:`str`): Name of defect including charge state (e.g. 'vac_1_Cd_0') distortions (:obj:`list`): List of distortions to analyse (e.g. ['Unperturbed', 0.1, -0.2]) output_path (:obj:`str`): Path to top-level directory containing `defect_species` subdirectories. (Default: current directory) threshold (:obj:`float`, optional): Magnetization threshold to consider site. (Default: 0.1) defect_site (:obj:`int` or :obj:`list`, optional): Site index or fractional coordinates of the defect. If not specified, will try to read from distortion_metadata.json. If this file is not present, will not include the distance between the defect and the sites with significant magnetization in the dataframe. orbital_projections (bool, optional): Whether to print orbital projections. If not necessary, set to False (faster). (Default: False) verbose (bool, optional): Whether to print verbose output. Returns: :obj:`dict`: Dictionary matching distortion to DataFrame containing magnetization info. """ magnetizations = {} if not os.path.exists(f"{output_path}/{defect_species}"): raise FileNotFoundError(f"{output_path}/{defect_species} does not exist!") defect_site_coords = None if isinstance(defect_site, (list, np.ndarray)): defect_site_coords = defect_site elif not defect_site: # look for defect site, in order to include the distance # between sites with significant magnetization and the defect if os.path.exists(f"{output_path}/distortion_metadata.json"): with open(f"{output_path}/distortion_metadata.json", "r") as f: try: defect_species_without_charge = "_".join( defect_species.split("_")[:-1] ) defect_site_coords = json.load(f)["defects"][ defect_species_without_charge ]["unique_site"] except KeyError: warnings.warn( f"Could not find defect {defect_species} in " f"distortion_metadata.json file. Will not include " f"distance between defect and sites with significant " f"magnetization." ) defect_site = None for distortion in distortions: dist_label = _get_distortion_filename(distortion) # get filename # (e.g. Bond_Distortion_50.0%) if dist_label == "Distortion_not_recognized": warnings.warn( f"Problem parsing the distortion label {distortion}. " "The distortions specified in the `distortions` list " "should be the distortion factor (e.g. 0.2) or the " "Unperturbed/Rattled name." ) continue structure = io.read_vasp_structure( f"{output_path}/{defect_species}/{dist_label}/CONTCAR" ) if not isinstance(structure, Structure): warnings.warn( f"Structure for {defect_species} either not converged or not " "found. Skipping magnetisation analysis." ) continue if isinstance(defect_site_coords, (list, np.ndarray)): # for vacancies, append fake atom structure.append( species="V", coords=defect_site_coords, coords_are_cartesian=False ) defect_site = -1 # index of the added fake atom try: outcar = get_outcar(f"{output_path}/{defect_species}/{dist_label}/OUTCAR") except FileNotFoundError: warnings.warn( f"OUTCAR(.gz) file not found in path {output_path}/{defect_species}/{dist_label}. " "Skipping magnetization analysis." ) continue if not outcar.spin: warnings.warn( f"{output_path}/{defect_species}/{dist_label}/OUTCAR is from a non-spin-polarised " f"calculation (ISPIN = 1), so magnetization analysis is not possible. Skipping." ) continue if verbose: print( f"Analysing distortion {distortion}. " f"Total magnetization: {round(outcar.total_mag, 2)}" ) df = _site_magnetizations( outcar=outcar, structure=structure, threshold=threshold, defect_site=defect_site, orbital_projections=orbital_projections, ) if not df.empty: magnetizations[distortion] = df elif verbose: print( f"No significant magnetizations found for distortion: " f"{distortion} \n" ) return magnetizations