Source code for geom_scan

################################################################################################################################################
##                                                             Geometry Files Scanner                                                         ##
##                                                                                                                                            ##
##                                        This script contains the scanning functions for ABIN LAUNCHER,                                      ##
##                                consult the documentation at https://chains-ulb.readthedocs.io/ for details                                 ##
################################################################################################################################################

import re
import abin_errors

[docs]def xyz_scan(mol_content:list): """Scans the content of an XYZ geometry file and extracts the chemical formula and atomic coordinates of the molecule. Parameters ---------- mol_content : list Content of the XYZ geometry file. Each element of the list is a line of the file. Returns ------- file_data : dict The extracted information of the file, following the pattern { 'chemical_formula' : { }, 'atomic_coordinates' : [ ] } Raises ------ AbinError If the number of atomic coordinates lines does not match the number of atoms mentioned in the first line of the .xyz file. """ # Initialize the file_data dictionary that will be returned by the function file_data = {'chemical_formula':{}, 'atomic_coordinates':[]} # Determining the number of atoms (first line of the xyz file) nb_atoms = int(mol_content[0]) # Initialize a variable will be used to check if the number of coordinate lines matches the number of atoms of the molecule checksum_nlines = 0 # Define the pattern of the atomic coordinates lines (They look like 'Si -0.31438 1.89081 0.00000') # This is based on regular expressions (regex), consult https://docs.python.org/3/library/re.html for details # You can also paste everything inside the raw string (r'<here>') on https://regex101.com for an explanation of this particular regex (use your .xyz file as a test string on the site) pattern = re.compile(r'^\s*(?P<atomSymbol>[a-zA-Z]{1,3})(?:\s+-?\d+\.\d+){3}\s*$') # Scanning the content of the XYZ file to determine the chemical formula and atomic coordinates of the molecule # We only start at the 3rd line ([2:]) because the first two won't contain any coordinates for line in mol_content[2:]: matching_line = pattern.match(line) # If the line matches our pattern if matching_line is not None: checksum_nlines += 1 # Store the line in the 'atomic_coordinates' key to be rendered in the input file later on file_data['atomic_coordinates'].append(line) # Count the number of occurrences of the atom type atom_type = matching_line.group("atomSymbol") if atom_type not in file_data['chemical_formula']: file_data['chemical_formula'][atom_type] = 1 else: file_data['chemical_formula'][atom_type] += 1 # Check if the number of lines matches the number of atoms defined in the first line of the .xyz file if checksum_nlines != nb_atoms: raise abin_errors.AbinError("ERROR: Number of atomic coordinates lines (%s) doesn't match the number of atoms mentioned in the first line of the .xyz file (%s) !" % (checksum_nlines, nb_atoms)) print("") print(''.center(35, '-')) print("{:<16} {:<18}".format('Atom Type','Number of atoms')) print(''.center(35, '-')) for atom,nb_atom in file_data['chemical_formula'].items(): print("{:<16} {:<18}".format(atom, nb_atom)) print(''.center(35, '-')) print("{:<16} {:<18}".format('Total',sum(file_data['chemical_formula'].values()))) print(''.center(35, '-')) print("\n%s lines of atomic coordinates have been registered." % checksum_nlines) # Scanning complete, now return file_data return file_data