"""
Module to parse input and output files for VASP, Quantum Espresso,
FHI-aims, CASTEP and CP2K.
"""
import contextlib
import datetime
import os
import warnings
from ase.io import read
from monty.re import regrep
from monty.serialization import dumpfn, loadfn
from pymatgen.core.structure import Structure
from pymatgen.core.units import Energy
from pymatgen.io.ase import AseAtomsAdaptor
aaa = AseAtomsAdaptor()
[docs]
def parse_energies(
defect: str,
path: str | None = ".",
code: str | None = "vasp",
filename: str | None = "OUTCAR",
verbose: bool = False,
) -> str:
r"""
Parse final energy for all distortions present in the given defect
directory and write them to a ``yaml`` file in the defect directory.
Returns the ``energies_file`` path.
Args:
defect (:obj:`str`):
Name of defect to parse, including charge state. Should match the
name of the defect folder.
path (:obj:`str`):
Path to the top-level directory containing the defect folder.
Defaults to current directory (".").
code (:obj:`str`):
Name of ab-initio code used to run the geometry optimisations, case
insensitive. Options include: "vasp", "cp2k", "espresso", "castep"
and "fhi-aims". Defaults to "vasp".
filename (:obj:`str`):
Filename of the output file, if different from the ShakeNBreak defaults
that are defined in the default input files:
(i.e. vasp: "OUTCAR", cp2k: "relax.out", espresso: "espresso.out",
castep: "\*.castep", fhi-aims: "aims.out")
Defaults to the ``ShakeNBreak`` default filenames.
verbose (:obj:`bool`):
If True, print information about renamed/saved-over files.
Defaults to False.
Returns:
:obj:`str`:
Path to the ``energies_file``.
"""
from shakenbreak.analysis import _format_distortion_names, _sort_data
def _match(filename, grep_string):
"""Helper function to grep for a string in a file."""
try:
return regrep(
filename=filename,
patterns={"match": grep_string},
reverse=True,
terminate_on_match=True,
)["match"]
except Exception:
return None
def sort_energies(defect_energies_dict):
"""Order dict items by key (e.g. from -0.6 to 0 to +0.6)."""
# sort distortions
sorted_energies_dict = {
"distortions": dict(
sorted(
defect_energies_dict["distortions"].items(),
key=lambda k: (0, k[0]) if isinstance(k[0], float) else (1, k[0]),
# to deal with list of both floats and strings
# (https://www.geeksforgeeks.org/sort-mixed-list-in-python/)
)
)
}
if "Unperturbed" in defect_energies_dict:
sorted_energies_dict["Unperturbed"] = defect_energies_dict["Unperturbed"]
if "Dimer" in defect_energies_dict:
sorted_energies_dict["Dimer"] = defect_energies_dict["Dimer"]
return sorted_energies_dict
def save_file(energies, defect, path, verbose=False):
"""Save yaml file with final energies for each distortion."""
# File to write energies to
filename = f"{path}/{defect}/{defect}.yaml"
# Check if previous version of file exists
if os.path.exists(filename):
old_file = loadfn(filename)
if old_file != energies:
current_datetime = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")
if verbose:
print(
f"Moving old {filename} to "
f"{filename.replace('.yaml', '')}_{current_datetime}.yaml "
"to avoid overwriting"
)
os.rename(
filename, f"{filename.replace('.yaml', '')}_{current_datetime}.yaml"
) # Keep copy of old file
dumpfn(energies, filename)
else:
dumpfn(energies, filename)
def parse_vasp_energy(defect_dir, dist, energy, outcar):
"""Parse VASP energy from ``OUTCAR`` file."""
converged = False
if os.path.exists(os.path.join(defect_dir, dist, "OUTCAR")):
outcar = os.path.join(defect_dir, dist, "OUTCAR")
if outcar: # regrep faster than using Outcar/vasprun class
with contextlib.suppress(IndexError):
energy = _match(outcar, r"entropy=.*energy\(sigma->0\)\s+=\s+([\d\-\.]+)")[0][0][
0
] # Energy of first match
converged = _match(outcar, "required accuracy") # check if ionic relaxation converged
if not converged:
converged = _match(outcar, "considering this converged")
return converged, energy, outcar
def parse_espresso_energy(defect_dir, dist, energy, espresso_out):
"""Parse Quantum Espresso energy from ``espresso.out`` file."""
if os.path.join(defect_dir, dist, "espresso.out"): # Default SnB output filename
espresso_out = os.path.join(defect_dir, dist, "espresso.out")
elif os.path.exists(os.path.join(defect_dir, dist, filename)):
espresso_out = os.path.join(defect_dir, dist, filename)
if espresso_out:
energy_in_Ry = _match(espresso_out, r"! total energy\s+=\s+([\d\-\.]+)")
if energy_in_Ry:
# Energy of first match, in Rydberg
energy = float(Energy(float(energy_in_Ry[0][0][0]), "Ry").to("eV"))
return True, energy, espresso_out # no check for ionic convergence in QE
def parse_cp2k_energy(defect_dir, dist, energy, cp2k_out):
"""Parse CP2K energy from relax.out file."""
converged = False
if os.path.join(defect_dir, dist, "relax.out"):
cp2k_out = os.path.join(defect_dir, dist, "relax.out")
elif os.path.exists(os.path.join(defect_dir, dist, filename)):
cp2k_out = os.path.join(defect_dir, dist, filename)
if cp2k_out:
converged = _match(cp2k_out, "GEOMETRY OPTIMIZATION COMPLETED") # check if ionic
# relaxation is converged
energy_in_Ha = _match(cp2k_out, r"Total energy:\s+([\d\-\.]+)") # Energy of first
# match in Hartree
energy = float(Energy(energy_in_Ha[0][0][0], "Ha").to("eV"))
return converged, energy, cp2k_out
def parse_castep_energy(defect_dir, dist, energy, castep_out):
"""Parse CASTEP energy from .castep file."""
converged = False
output_files = [file for file in os.listdir(f"{defect_dir}/{dist}") if ".castep" in file]
if output_files and os.path.exists(f"{defect_dir}/{dist}/{output_files[0]}"):
castep_out = f"{defect_dir}/{dist}/{output_files[0]}"
elif os.path.exists(os.path.join(defect_dir, dist, filename)):
castep_out = os.path.join(defect_dir, dist, filename)
if castep_out:
# check if ionic relaxation is converged
# Convergence string deduced from:
# https://www.tcm.phy.cam.ac.uk/castep/Geom_Opt/node20.html
# and https://gitlab.mpcdf.mpg.de/nomad-lab/parser-castep/-/
# blob/master/test/examples/TiO2-geom.castep
converged = _match(castep_out, "Geometry optimization completed successfully.")
energy = _match(castep_out, r"Final Total Energy\s+([\d\-\.]+)")[0][0][
0
] # Energy of first match in eV
return converged, energy, castep_out
def parse_fhi_aims_energy(defect_dir, dist, energy, aims_out):
"""Parse FHI-aims energy from .out file."""
converged = False
if os.path.join(defect_dir, dist, "aims.out"):
aims_out = os.path.join(defect_dir, dist, "aims.out")
elif os.path.exists(os.path.join(defect_dir, dist, filename)):
aims_out = os.path.join(defect_dir, dist, filename)
if aims_out:
converged = _match(aims_out, "converged.") # check if ionic relaxation is converged
# Convergence string deduced from:
# https://fhi-aims-club.gitlab.io/tutorials/basics-of-running-fhi-aims/3-Periodic-Systems/
# and https://gitlab.com/fhi-aims-club/tutorials/basics-of-running-fhi-aims/-/
# blob/master/Tutorial/3-Periodic-Systems/solutions/Si/PBE_relaxation/aims.out
energy = _match(
aims_out,
r"\| Total energy of the DFT / Hartree-Fock s.c.f. calculation\s+:\s+([\d\-\.]+)",
)
if energy:
energy = energy[0][0][0] # Energy of first match in eV
return converged, energy, aims_out
energies = {"distortions": {}} # maps each distortion to the energy of the optimised structure
if defect == os.path.basename(os.path.normpath(path)) and not [
dir for dir in path if (os.path.isdir(dir) and os.path.basename(os.path.normpath(dir)) == defect)
]: # if ``defect`` is in end of ``path`` and ``path`` doesn't have a subdirectory called ``defect``
# then remove defect from end of path
path = os.path.dirname(path)
defect_dir = f"{path}/{defect}"
if not os.path.isdir(defect_dir):
orig_defect_name = defect
defect = defect.replace("+", "") # try removing '+' from defect name, old format
defect_dir = f"{path}/{defect}" # try removing '+' from defect name, old format
if not os.path.isdir(defect_dir):
warnings.warn(
f"Defect folder '{orig_defect_name}' not found in '{path}'. Please check these folders "
f"and paths."
)
return None
dist_dirs = [
dir
for dir in os.listdir(defect_dir)
if os.path.isdir(os.path.join(defect_dir, dir))
and any(
substring in dir
for substring in [
"Bond_Distortion",
"Rattled",
"Unperturbed",
"Dimer",
] # distortion directories
)
and "High_Energy" not in dir
]
# load previously-parsed energies file if present
energies_file = f"{path}/{defect}/{defect}.yaml"
if os.path.exists(energies_file):
try:
prev_energies_dict, _, _ = _sort_data(energies_file, verbose=False)
except Exception:
prev_energies_dict = {}
else:
prev_energies_dict = {}
# Parse energies and write them to file
for dist in dist_dirs:
outcar = None
energy = None
converged = False
if code.lower() == "vasp":
converged, energy, outcar = parse_vasp_energy(defect_dir, dist, energy, outcar)
elif code.lower() in [
"espresso",
"quantum_espresso",
"quantum-espresso",
"quantumespresso",
]:
converged, energy, outcar = parse_espresso_energy(defect_dir, dist, energy, outcar)
elif code.lower() == "cp2k":
converged, energy, outcar = parse_cp2k_energy(defect_dir, dist, energy, outcar)
elif code.lower() == "castep":
converged, energy, outcar = parse_castep_energy(defect_dir, dist, energy, outcar)
elif code.lower() in ["fhi-aims", "fhi_aims", "fhiaims"]:
converged, energy, outcar = parse_fhi_aims_energy(defect_dir, dist, energy, outcar)
if _format_distortion_names(dist) != "Label_not_recognized":
dist_name = _format_distortion_names(dist)
else:
dist_name = dist
if energy:
if "Unperturbed" in dist:
energies[dist_name] = float(energy)
else:
energies["distortions"][dist_name] = float(energy)
if not converged:
print(f"{dist} for {defect} is not fully relaxed")
elif not outcar:
# check if energy not found, but was previously parsed, then add to dict
if dist_name in prev_energies_dict:
energies[dist_name] = prev_energies_dict[dist_name]
elif "distortions" in prev_energies_dict and dist_name in prev_energies_dict["distortions"]:
energies["distortions"][dist_name] = prev_energies_dict["distortions"][dist_name]
elif f"{dist}_High_Energy" not in dist_dirs: # don't warn if was renamed to High_Energy
warnings.warn(f"No output file in {dist} directory")
if energies["distortions"]:
if "Unperturbed" in energies and all(
value - energies["Unperturbed"] > 0.1 for value in energies["distortions"].values()
):
warnings.warn(
f"All distortions parsed for {defect} are >0.1 eV higher energy than unperturbed, "
f"indicating problems with the relaxations. You should first check if the calculations "
f"finished ok for this defect species and if this defect charge state is reasonable ("
f"often this is the result of an unreasonable charge state). If both checks pass, "
f"you likely need to adjust the `stdev` rattling parameter (can occur for "
f"hard/ionic/magnetic materials); see "
f"https://shakenbreak.readthedocs.io/en/latest/Tips.html#hard-ionic-magnetic-materials\n"
f"This often indicates a complex PES with multiple minima, thus energy-lowering "
f"distortions particularly likely, so important to test with reduced `stdev`!"
)
elif (
"Unperturbed" in energies
and all(value - energies["Unperturbed"] < -0.1 for value in energies["distortions"].values())
and len(energies["distortions"]) > 2
):
warnings.warn(
f"All distortions parsed for {defect} are <-0.1 eV lower energy than unperturbed. If "
f"this happens for multiple defects/charge states, it can indicate that a bulk phase "
f"transformation is occurring within your defect supercell. If so, see "
f"https://shakenbreak.readthedocs.io/en/latest/Tips.html#bulk-phase-transformations for "
f"advice on dealing with this phenomenon."
)
else:
# check if distortion directories were present but were all "*High_Energy*"
try:
high_energy_dist_dirs = [
dir
for dir in os.listdir(defect_dir)
if os.path.isdir(os.path.join(defect_dir, dir))
and any(
substring in dir
for substring in [
"Bond_Distortion",
"Rattled",
"Unperturbed",
"Dimer",
]
)
and "High_Energy" in dir
]
except FileNotFoundError: # no "*High_Energy*" distortion folders
high_energy_dist_dirs = []
if (
high_energy_dist_dirs
): # "*High_Energy*" distortion directories present and no distortion energies parsed
warnings.warn(
f"All distortions for {defect} gave positive energies or forces errors, indicating "
f"problems with these relaxations. You should first check that no user INCAR setting is "
f"causing this issue. If not, you likely need to adjust the `stdev` rattling parameter ("
f"can occur for hard/ionic/magnetic materials); see "
f"https://shakenbreak.readthedocs.io/en/latest/Tips.html#hard-ionic-magnetic-materials."
)
else:
warnings.warn(
f"Energies could not be parsed for defect '{defect}' in '{path}'. If these directories "
f"are correct, check calculations have converged, and that distortion subfolders match "
f"ShakeNBreak naming (e.g. Bond_Distortion_xxx, Rattled, Unperturbed)"
)
# if any entries in prev_energies_dict not in energies_dict, add to energies_dict and
# warn user about output files not being parsed for this entry
if prev_energies_dict:
for key, value in prev_energies_dict.items():
if key not in energies:
energies[key] = value
warnings.warn(
f"No output file in {key} directory, but previous parsed result found in "
f"{energies_file}, using this."
)
elif key == "distortions":
for key2, value2 in prev_energies_dict["distortions"].items():
if key2 not in energies["distortions"]:
energies["distortions"][key2] = value2
warnings.warn(
f"No output file in {key2} directory, but previous parsed result found in "
f"{energies_file}, using this."
)
energies = sort_energies(energies)
if energies and energies != {"distortions": {}}:
save_file(energies, defect, path, verbose=verbose)
return energies_file
# Parsing output structures of different codes
[docs]
def read_vasp_structure(
file_path: str,
) -> Structure | str:
"""
Read VASP structure from ``file_path`` and convert to ``pymatgen`` Structure
object.
Args:
file_path (:obj:`str`):
Path to VASP ``CONTCAR`` file
Returns:
:obj:`Structure`:
``pymatgen`` ``Structure`` object
"""
filename = file_path.replace("\\", "/") # for Windows compatibility
if not os.path.isfile(filename):
# check if there's an equivalent high-energy folder, and if so don't warn
if not os.path.exists(filename.rsplit("/", 1)[0] + "_High_Energy"):
warnings.warn(
f"{filename} file doesn't exist, storing as 'Not converged'. Check path & relaxation."
)
return "Not converged"
try:
return Structure.from_file(filename)
except Exception as exc:
warnings.warn(
f"Problem parsing structure from {filename}; {exc!r}. Storing as "
f"'Not converged'. Check file & relaxation."
)
return "Not converged"
[docs]
def read_structure_w_ase(
filename: str,
) -> Structure | str:
"""
Reads a structure from ``QE``/``FHI-aims``/``CP2K``/``CASTEP`` output file,
using ``ASE``, and return it as a ``pymatgen`` ``Structure``.
Args:
filename (:obj:`str`):
Path to the output file.
Returns:
:obj:`Structure`:
``pymatgen`` ``Structure`` object
"""
if not os.path.exists(filename):
raise FileNotFoundError(f"File {filename} does not exist!")
try:
atoms = read(filename=filename)
structure = Structure.from_ase_atoms(atoms)
structure = structure.get_sorted_structure() # sort sites by EN
except Exception as exc:
warnings.warn(
f"Problem parsing structure from {filename}; {exc!r}. Storing as "
f"'Not converged'. Check file & relaxation."
)
structure = "Not converged"
return structure
[docs]
def parse_structure(
code: str,
structure_path: str,
structure_filename: str,
) -> Structure | str:
"""
Parses the output structure from different codes (VASP, CP2K, Quantum Espresso,
CATSEP, FHI-aims) and converts it to a ``pymatgen`` ``Structure`` object.
Args:
code (:obj:`str`):
Code used for geometry optimizations. Valid code names are:
"vasp", "espresso", "cp2k", "castep" and "fhi-aims" (case insensitive).
structure_path (:obj:`str`):
Path to directory containing the structure file.
structure_filename (:obj:`str`):
Name of the structure file or the output file containing the
optimized structure. If not set, the following values will be used
for each code:
- ``VASP``: "CONTCAR",
- ``cp2k``: "cp2k.restart" (The restart file is used),
- ``Quantum Espresso``: "espresso.out",
- ``CASTEP``: "castep.castep" (castep output file is used)
- ``Fhi-AIMS``: "geometry.in.next_step"
Returns:
:obj:`Structure`:
``pymatgen`` ``Structure`` object
"""
if code.lower() == "vasp":
if not structure_filename:
structure_filename = "CONTCAR"
structure = read_vasp_structure(f"{structure_path}/{structure_filename}")
else:
if code.lower() == "espresso" and not structure_filename:
structure_filename = "espresso.out"
elif code.lower() == "cp2k" and not structure_filename:
structure_filename = "cp2k.restart"
elif code.lower() == "fhi-aims" and not structure_filename:
structure_filename = "geometry.in.next_step"
elif code.lower() == "castep" and not structure_filename:
structure_filename = "castep.castep"
structure = read_structure_w_ase(f"{structure_path}/{structure_filename}")
return structure
# Parse code input files