Source code for aiida_lsmo.parsers.parser_functions

# -*- coding: utf-8 -*-
"""Functions used for specific parsing of output files."""

import re

from aiida_cp2k.utils.parser import _parse_bands


[docs]def parse_cp2k_output_bsse(fstring):
    """Parse CP2K BSSE output into a dictionary (tested with PRINT_LEVEL MEDIUM)."""
    from aiida_lsmo.utils import HARTREE2KJMOL

    lines = fstring.splitlines()

    result_dict = {
        'exceeded_walltime': False,
        'energy_description_list': [
            'Energy of A with basis set A', 'Energy of B with basis set B', 'Energy of A with basis set of A+B',
            'Energy of B with basis set of A+B', 'Energy of A+B with basis set of A+B'
        ],
        'energy_list': [],
        'energy_dispersion_list': []
    }

    read_energy = False
    for line in lines:
        if 'The number of warnings for this run is' in line:
            result_dict['nwarnings'] = int(line.split()[-1])
        if 'exceeded requested execution time' in line:
            result_dict['exceeded_walltime'] = True
        if 'SCF run converged in' in line:
            read_energy = True
        if read_energy:
            if r'Dispersion energy:' in line:
                result_dict['energy_dispersion_list'].append(float(line.split()[-1]))
            if r'  Total energy:' in line:
                result_dict['energy_list'].append(float(line.split()[-1]))
                read_energy = False

    result_dict['energy'] = result_dict['energy_list'][4]
    result_dict['energy_units'] = 'a.u.'
    result_dict['binding_energy_raw'] = (result_dict['energy_list'][4] - result_dict['energy_list'][0] -
                                         result_dict['energy_list'][1]) * HARTREE2KJMOL
    result_dict['binding_energy_corr'] = (result_dict['energy_list'][4] - result_dict['energy_list'][2] -
                                          result_dict['energy_list'][3]) * HARTREE2KJMOL
    result_dict['binding_energy_bsse'] = result_dict['binding_energy_raw'] - result_dict['binding_energy_corr']
    result_dict['binding_energy_unit'] = 'kJ/mol'
    if result_dict['energy_dispersion_list']:
        result_dict['binding_energy_dispersion'] = (result_dict['energy_dispersion_list'][4] -
                                                    result_dict['energy_dispersion_list'][0] -
                                                    result_dict['energy_dispersion_list'][1]) * HARTREE2KJMOL

    return result_dict


[docs]def parse_cp2k_output_advanced(fstring):  # pylint: disable=too-many-locals, too-many-statements, too-many-branches
    """Parse CP2K output into a dictionary (ADVANCED: more info parsed @ PRINT_LEVEL MEDIUM).
    Tested for CP2K 5.1, 8.1, and calculations: ENERGY, GEO_OPT, CELL_OPT, MD NVT, MD NPT_F.
    """
    lines = fstring.splitlines()

    result_dict = {'exceeded_walltime': False}
    result_dict['warnings'] = []
    n_spin_channels = None
    energy = None
    bohr2ang = 0.529177208590000

    for i_line, line in enumerate(lines):
        if line.startswith(' CP2K| version string:'):
            cp2k_version = float(line.split()[5])
            result_dict['cp2k_version'] = cp2k_version
        if line.startswith(' ENERGY| '):
            energy = float(line.split()[8])
            result_dict['energy'] = energy
            result_dict['energy_units'] = 'a.u.'
        if 'The number of warnings for this run is' in line:
            result_dict['nwarnings'] = int(line.split()[-1])
        if 'exceeded requested execution time' in line:
            result_dict['exceeded_walltime'] = True
        if 'KPOINTS| Band Structure Calculation' in line:
            kpoints, labels, bands = _parse_bands(lines, i_line, cp2k_version)
            result_dict['kpoint_data'] = {
                'kpoints': kpoints,
                'labels': labels,
                'bands': bands,
                'bands_unit': 'eV',
            }
        if line.startswith(' GLOBAL| Run type'):
            result_dict['run_type'] = line.split()[-1]

        if line.startswith(' MD| Ensemble Type') or line.startswith(' MD_PAR| Ensemble type'):  # CP2K 5.1 and 8.1
            result_dict['run_type'] += '-'
            result_dict['run_type'] += line.split()[-1]  #e.g., 'MD-NVT' or 'MD-NPT_F'

        if line.startswith(' DFT| ') and 'dft_type' not in result_dict.keys():
            result_dict['dft_type'] = line.split()[-1]  # RKS, UKS or ROKS

        # read the number of electrons in the first scf (NOTE: it may change but it is not updated!)
        if re.search('Number of electrons: ', line):
            if 'init_nel_spin1' not in result_dict.keys():
                result_dict['init_nel_spin1'] = int(line.split()[3])
                if result_dict['dft_type'] == 'RKS':
                    result_dict['init_nel_spin1'] //= 2  #// returns an integer
                    result_dict['init_nel_spin2'] = result_dict['init_nel_spin1']
            elif 'init_nel_spin2' not in result_dict.keys():
                result_dict['init_nel_spin2'] = int(line.split()[3])

        if re.search('- Atoms: ', line):
            result_dict['natoms'] = int(line.split()[-1])

        if re.search('Smear method', line):
            result_dict['smear_method'] = line.split()[-1]

        if re.search(r'subspace spin', line):

            n_spin_channels = int(line.split()[-1])
            result_dict.setdefault(f'eigen_spin{n_spin_channels}_au', [])
            continue

        # Parse warnings
        if re.search(r'Using a non-square number of', line):
            result_dict['warnings'].append('Using a non-square number of MPI ranks')
        if re.search(r'SCF run NOT converged', line):
            warn = 'One or more SCF run did not converge'
            if warn not in result_dict['warnings']:
                result_dict['warnings'].append(warn)
        if re.search(r'Specific L-BFGS convergence criteria', line):
            result_dict['warnings'].append('LBFGS converged with specific criteria')

        # If a tag has been detected, now read the following line knowing what they are
        if n_spin_channels in [1, 2]:
            # Read eigenvalues as 4-columns row, then convert to float
            if re.search(r'-------------', line) or re.search(r'eached convergence', line):
                continue
            if line.split() and len(line.split()) <= 4:
                result_dict[f'eigen_spin{n_spin_channels}_au'] += [float(x) for x in line.split()]
            else:
                n_spin_channels = None

        ####################################################################
        #  THIS SECTION PARSES THE PROPERTIES AT GOE_OPT/CELL_OPT/MD STEP  #
        #  BC: it can be not robust!                                         #
        ####################################################################
        if 'run_type' in result_dict.keys() and result_dict['run_type'] in [
                'ENERGY', 'ENERGY_FORCE', 'GEO_OPT', 'CELL_OPT', 'MD', 'MD-NVT', 'MD-NPT_F'
        ]:
            # Initialization
            if 'motion_step_info' not in result_dict:
                result_dict['motion_opt_converged'] = False
                result_dict['motion_step_info'] = {
                    'step': [],  # MOTION step
                    'energy_au': [],  # total energy
                    'dispersion_energy_au': [],  # Dispersion energy (if dispersion correction activated)
                    'pressure_bar': [],  # Total pressure on the cell
                    'cell_vol_angs3': [],  # Cell Volume
                    'cell_a_angs': [],  # Cell dimension A
                    'cell_b_angs': [],  # Cell dimension B
                    'cell_c_angs': [],  # Cell dimension C
                    'cell_alp_deg': [],  # Cell angle Alpha
                    'cell_bet_deg': [],  # Cell angle Beta
                    'cell_gam_deg': [],  # Cell angle Gamma
                    'max_step_au': [],  # Max atomic displacement (in optimization)
                    'rms_step_au': [],  # RMS atomic displacement (in optimization)
                    'max_grad_au': [],  # Max atomic force (in optimization)
                    'rms_grad_au': [],  # RMS atomic force (in optimization)
                    'edens_rspace': [],  # Total charge density on r-space grids (should stay small)
                    'scf_converged': [],  # SCF converged in this motions step (bool)
                }
                step = 0
                energy = None
                dispersion = None  #Needed if no dispersions are included
                pressure = None
                max_step = None
                rms_step = None
                max_grad = None
                rms_grad = None
                edens_rspace = None
                scf_converged = True

            print_now = False
            data = line.split()
            # Parse general info
            if line.startswith(' CELL|'):
                if re.search(r'Volume', line):
                    cell_vol = float(data[3])
                if re.search(r'Vector a', line):
                    cell_a = float(data[9])
                if re.search(r'Vector b', line):
                    cell_b = float(data[9])
                if re.search(r'Vector c', line):
                    cell_c = float(data[9])
                if re.search(r'alpha', line):
                    cell_alp = float(data[5])
                if re.search(r'beta', line):
                    cell_bet = float(data[5])
                if re.search(r'gamma', line):
                    cell_gam = float(data[5])

            if re.search(r'Dispersion energy', line):
                dispersion = float(data[2])
            if re.search('Total charge density on r-space grids:', line):
                # Printed at every outer OT, and needed for understanding if something is going wrong (if !=0)
                edens_rspace = float(line.split()[-1])
            if re.search(r'SCF run NOT converged', line):
                scf_converged = False

            # Parse specific info
            if result_dict['run_type'] in ['ENERGY', 'ENERGY_FORCE']:
                if energy is not None and not result_dict['motion_step_info']['step']:
                    print_now = True
            if result_dict['run_type'] in ['GEO_OPT', 'CELL_OPT']:
                #Note: with CELL_OPT/LBFGS there is no "STEP 0", while there is with CELL_OPT/BFGS
                if re.search(r'Informations at step', line):
                    step = int(data[5])
                if re.search(r'Max. step size             =', line):
                    max_step = float(data[-1])
                if re.search(r'RMS step size              =', line):
                    rms_step = float(data[-1])
                if re.search(r'Max. gradient              =', line):
                    max_grad = float(data[-1])
                if re.search(r'RMS gradient               =', line):
                    rms_grad = float(data[-1])
                if len(data) == 1 and data[0] == '---------------------------------------------------':
                    print_now = True  # 51('-')
                if re.search(r'Reevaluating energy at the minimum', line):  #not clear why it is doing a last one...
                    result_dict['motion_opt_converged'] = True

            if result_dict['run_type'] == 'CELL_OPT':
                if re.search(r'Internal Pressure', line):
                    pressure = float(data[4])
            if result_dict['run_type'] == 'MD-NVT':
                if re.search(r'STEP NUMBER', line) or re.search(r'MD\| Step number', line):
                    step = int(data[-1])
                if re.search(r'INITIAL PRESSURE\[bar\]', line) or re.search(r'MD_INI\| Pressure', line):
                    pressure = float(data[-1])
                    print_now = True
                if re.search(r'PRESSURE \[bar\]', line) or re.search(r'MD\| Pressure', line):
                    pressure = float(data[-2])
                    print_now = True
            if result_dict['run_type'] == 'MD-NPT_F':  # The two matches are tested for CP2K 5.1 and 8.1
                if re.search(r'^ STEP NUMBER', line) or re.search(r'^ MD\| Step number', line):
                    step = int(data[-1])
                if re.search(r'^ INITIAL PRESSURE\[bar\]', line) or re.search(r'^ MD_INI\| Pressure', line):
                    pressure = float(data[-1])
                    print_now = True
                if re.search(r'^ PRESSURE \[bar\]', line) or re.search(r'^ MD\| Pressure', line):
                    pressure = float(data[-2])  # Note: -2 is the instantaneous, -1 the average
                if re.search(r'^ VOLUME\[bohr\^3\]', line) or re.search(r'^ MD\| Cell volume \[bohr\^3\]', line):
                    cell_vol = float(data[-2]) * (bohr2ang**3)
                if re.search(r'^ CELL LNTHS\[bohr\]', line) or re.search(r'^ MD\| Cell lengths \[bohr\]', line):
                    cell_a = float(data[-3]) * bohr2ang
                    cell_b = float(data[-2]) * bohr2ang
                    cell_c = float(data[-1]) * bohr2ang
                if re.search(r'^ CELL ANGLS\[deg\]', line) or re.search(r'^ MD\| Cell angles \[deg\]', line):
                    cell_alp = float(data[-3])
                    cell_bet = float(data[-2])
                    cell_gam = float(data[-1])
                    print_now = True

            if print_now and energy is not None:
                result_dict['motion_step_info']['step'].append(step)
                result_dict['motion_step_info']['energy_au'].append(energy)
                result_dict['motion_step_info']['dispersion_energy_au'].append(dispersion)
                result_dict['motion_step_info']['pressure_bar'].append(pressure)
                result_dict['motion_step_info']['cell_vol_angs3'].append(cell_vol)
                result_dict['motion_step_info']['cell_a_angs'].append(cell_a)
                result_dict['motion_step_info']['cell_b_angs'].append(cell_b)
                result_dict['motion_step_info']['cell_c_angs'].append(cell_c)
                result_dict['motion_step_info']['cell_alp_deg'].append(cell_alp)
                result_dict['motion_step_info']['cell_bet_deg'].append(cell_bet)
                result_dict['motion_step_info']['cell_gam_deg'].append(cell_gam)
                result_dict['motion_step_info']['max_step_au'].append(max_step)
                result_dict['motion_step_info']['rms_step_au'].append(rms_step)
                result_dict['motion_step_info']['max_grad_au'].append(max_grad)
                result_dict['motion_step_info']['rms_grad_au'].append(rms_grad)
                result_dict['motion_step_info']['edens_rspace'].append(edens_rspace)
                result_dict['motion_step_info']['scf_converged'].append(scf_converged)
                scf_converged = True
        ####################################################################
        #  END PARSING GEO_OPT/CELL_OPT/MD STEP                            #
        ####################################################################

    return result_dict