Coverage for cclib/parser/data.py : 81%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2#
3# Copyright (c) 2020, the cclib development team
4#
5# This file is part of cclib (http://cclib.github.io) and is distributed under
6# the terms of the BSD 3-Clause License.
8"""Classes and tools for storing and handling parsed data"""
10import logging
11from collections import namedtuple
13import numpy
15from cclib.method import Electrons
16from cclib.method import orbitals
19Attribute = namedtuple('Attribute', ['type', 'json_key', 'attribute_path'])
22class ccData:
23 """Stores data extracted by cclib parsers
25 Description of cclib attributes:
26 aonames -- atomic orbital names (list of strings)
27 aooverlaps -- atomic orbital overlap matrix (array[2])
28 atombasis -- indices of atomic orbitals on each atom (list of lists)
29 atomcharges -- atomic partial charges (dict of arrays[1])
30 atomcoords -- atom coordinates (array[3], angstroms)
31 atommasses -- atom masses (array[1], daltons)
32 atomnos -- atomic numbers (array[1])
33 atomspins -- atomic spin densities (dict of arrays[1])
34 ccenergies -- molecular energies with Coupled-Cluster corrections (array[2], eV)
35 charge -- net charge of the system (integer)
36 coreelectrons -- number of core electrons in atom pseudopotentials (array[1])
37 dispersionenergies -- a molecular dispersion energy corrections (array[1], eV)
38 enthalpy -- sum of electronic and thermal enthalpies (float, hartree/particle)
39 entropy -- entropy (float, hartree/particle)
40 etenergies -- energies of electronic transitions (array[1], 1/cm)
41 etoscs -- oscillator strengths of electronic transitions (array[1])
42 etdips -- electric transition dipoles of electronic transitions (array[2], ebohr)
43 etveldips -- velocity-gauge electric transition dipoles of electronic transitions (array[2], ebohr)
44 etmagdips -- magnetic transition dipoles of electronic transitions (array[2], ebohr)
45 etrotats -- rotatory strengths of electronic transitions (array[1], ??)
46 etsecs -- singly-excited configurations for electronic transitions (list of lists)
47 etsyms -- symmetries of electronic transitions (list of string)
48 freeenergy -- sum of electronic and thermal free energies (float, hartree/particle)
49 fonames -- fragment orbital names (list of strings)
50 fooverlaps -- fragment orbital overlap matrix (array[2])
51 fragnames -- names of fragments (list of strings)
52 frags -- indices of atoms in a fragment (list of lists)
53 gbasis -- coefficients and exponents of Gaussian basis functions (PyQuante format)
54 geotargets -- targets for convergence of geometry optimization (array[1])
55 geovalues -- current values for convergence of geometry optmization (array[1])
56 grads -- current values of forces (gradients) in geometry optimization (array[3])
57 hessian -- elements of the force constant matrix (array[1])
58 homos -- molecular orbital indices of HOMO(s) (array[1])
59 metadata -- various metadata about the package and computation (dict)
60 mocoeffs -- molecular orbital coefficients (list of arrays[2])
61 moenergies -- molecular orbital energies (list of arrays[1], eV)
62 moments -- molecular multipole moments (list of arrays[], a.u.)
63 mosyms -- orbital symmetries (list of lists)
64 mpenergies -- molecular electronic energies with Møller-Plesset corrections (array[2], eV)
65 mult -- multiplicity of the system (integer)
66 natom -- number of atoms (integer)
67 nbasis -- number of basis functions (integer)
68 nmo -- number of molecular orbitals (integer)
69 nocoeffs -- natural orbital coefficients (array[2])
70 nooccnos -- natural orbital occupation numbers (array[1])
71 nsocoeffs -- natural spin orbital coefficients (list of array[2])
72 nsooccnos -- natural spin orbital occupation numbers (list of array[1])
73 optdone -- flags whether an optimization has converged (Boolean)
74 optstatus -- optimization status for each set of atomic coordinates (array[1])
75 polarizabilities -- (dipole) polarizabilities, static or dynamic (list of arrays[2])
76 pressure -- pressure used for Thermochemistry (float, atm)
77 scancoords -- geometries of each scan step (array[3], angstroms)
78 scanenergies -- energies of potential energy surface (list)
79 scannames -- names of varaibles scanned (list of strings)
80 scanparm -- values of parameters in potential energy surface (list of lists)
81 scfenergies -- molecular electronic energies after SCF (Hartree-Fock, DFT) (array[1], eV)
82 scftargets -- targets for convergence of the SCF (array[2])
83 scfvalues -- current values for convergence of the SCF (list of arrays[2])
84 temperature -- temperature used for Thermochemistry (float, kelvin)
85 time -- time in molecular dynamics and other trajectories (array[1], fs)
86 transprop -- all absorption and emission spectra (dictionary {name:(etenergies, etoscs)})
87 WARNING: this attribute is not standardized and is liable to change in cclib 2.0
88 vibanharms -- vibrational anharmonicity constants (array[2], 1/cm)
89 vibdisps -- cartesian displacement vectors (array[3], delta angstrom)
90 vibfreqs -- vibrational frequencies (array[1], 1/cm)
91 vibfconsts -- force constants of vibrations (array[1], mDyne/angstrom)
92 vibirs -- IR intensities (array[1], km/mol)
93 vibramans -- Raman activities (array[1], A^4/Da)
94 vibrmasses -- reduced masses of vibrations (array[1], daltons)
95 vibsyms -- symmetries of vibrations (list of strings)
96 zpve -- zero-point vibrational energy correction (float, hartree/particle)
97 (1) The term 'array' refers to a numpy array
98 (2) The number of dimensions of an array is given in square brackets
99 (3) Python indexes arrays/lists starting at zero, so if homos==[10], then
100 the 11th molecular orbital is the HOMO
101 """
103 # The expected types for all supported attributes.
104 # The json_key is the key name used for attributes in the CJSON/JSON format
105 # 'TBD' - To Be Decided are the key names of attributes which haven't been included in the cjson format
106 _attributes = {
107 "aonames": Attribute(list, 'names', 'atoms:orbitals'),
108 "aooverlaps": Attribute(numpy.ndarray, 'overlaps', 'properties:orbitals'),
109 "atombasis": Attribute(list, 'indices', 'atoms:orbitals'),
110 "atomcharges": Attribute(dict, 'partial charges', 'properties'),
111 "atomcoords": Attribute(numpy.ndarray, 'coords', 'atoms:coords:3d'),
112 "atommasses": Attribute(numpy.ndarray, 'mass', 'atoms'),
113 "atomnos": Attribute(numpy.ndarray, 'number', 'atoms:elements'),
114 "atomspins": Attribute(dict, 'spins', 'atoms'),
115 "ccenergies": Attribute(numpy.ndarray, 'coupled cluster', 'properties:energy'),
116 "charge": Attribute(int, 'charge', 'properties'),
117 "coreelectrons": Attribute(numpy.ndarray, 'core electrons', 'atoms'),
118 "dispersionenergies":Attribute(numpy.ndarray, 'dispersion correction', 'properties:energy'),
119 "enthalpy": Attribute(float, 'enthalpy', 'properties'),
120 "entropy": Attribute(float, 'entropy', 'properties'),
121 "etenergies": Attribute(numpy.ndarray, 'electronic transitions', 'transitions'),
122 "etoscs": Attribute(numpy.ndarray, 'oscillator strength', 'transitions'),
123 "etdips": Attribute(numpy.ndarray, 'electic transition dipoles', 'transitions'),
124 "etveldips": Attribute(numpy.ndarray, 'velocity-gauge electric transition dipoles', 'transitions'),
125 "etmagdips": Attribute(numpy.ndarray, 'magnetic transition dipoles', 'transitions'),
126 "etrotats": Attribute(numpy.ndarray, 'rotatory strength', 'transitions'),
127 "etsecs": Attribute(list, 'one excited config', 'transitions'),
128 "etsyms": Attribute(list, 'symmetry', 'transitions'),
129 "freeenergy": Attribute(float, 'free energy', 'properties:energy'),
130 "fonames": Attribute(list, 'orbital names', 'fragments'),
131 "fooverlaps": Attribute(numpy.ndarray, 'orbital overlap', 'fragments'),
132 "fragnames": Attribute(list, 'fragment names', 'fragments'),
133 "frags": Attribute(list, 'atom indices', 'fragments'),
134 "gbasis": Attribute(list, 'basis functions', 'atoms:orbitals'),
135 "geotargets": Attribute(numpy.ndarray, 'geometric targets', 'optimization'),
136 "geovalues": Attribute(numpy.ndarray, 'geometric values', 'optimization'),
137 "grads": Attribute(numpy.ndarray, 'TBD', 'N/A'),
138 "hessian": Attribute(numpy.ndarray, 'hessian matrix', 'vibrations'),
139 "homos": Attribute(numpy.ndarray, 'homos', 'properties:orbitals'),
140 "metadata": Attribute(dict, 'TBD', 'N/A'),
141 "mocoeffs": Attribute(list, 'coeffs', 'properties:orbitals'),
142 "moenergies": Attribute(list, 'energies', 'properties:orbitals'),
143 "moments": Attribute(list, 'total dipole moment', 'properties'),
144 "mosyms": Attribute(list, 'molecular orbital symmetry', 'properties:orbitals'),
145 "mpenergies": Attribute(numpy.ndarray, 'moller plesset', 'properties:energy'),
146 "mult": Attribute(int, 'multiplicity', 'properties'),
147 "natom": Attribute(int, 'number of atoms', 'properties'),
148 "nbasis": Attribute(int, 'basis number', 'properties:orbitals'),
149 "nmo": Attribute(int, 'MO number', 'properties:orbitals'),
150 "nocoeffs": Attribute(numpy.ndarray, 'TBD', 'N/A'),
151 "nooccnos": Attribute(numpy.ndarray, 'TBD', 'N/A'),
152 "nsocoeffs": Attribute(list, 'TBD', 'N/A'),
153 "nsooccnos": Attribute(list, 'TBD', 'N/A'),
154 "optdone": Attribute(list, 'done', 'optimization'),
155 "optstatus": Attribute(numpy.ndarray, 'status', 'optimization'),
156 "polarizabilities": Attribute(list, 'polarizabilities', 'N/A'),
157 "pressure": Attribute(float, 'pressure', 'properties'),
158 "scancoords": Attribute(numpy.ndarray, 'step geometry', 'optimization:scan'),
159 "scanenergies": Attribute(list, 'PES energies', 'optimization:scan'),
160 "scannames": Attribute(list, 'variable names', 'optimization:scan'),
161 "scanparm": Attribute(list, 'PES parameter values', 'optimization:scan'),
162 "scfenergies": Attribute(numpy.ndarray, 'scf energies', 'optimization:scf'),
163 "scftargets": Attribute(numpy.ndarray, 'targets', 'optimization:scf'),
164 "scfvalues": Attribute(list, 'values', 'optimization:scf'),
165 "temperature": Attribute(float, 'temperature', 'properties'),
166 "time": Attribute(numpy.ndarray, 'time', 'N/A'),
167 "transprop": Attribute(dict, 'electronic transitions', 'transitions'),
168 "vibanharms": Attribute(numpy.ndarray, 'anharmonicity constants', 'vibrations'),
169 "vibdisps": Attribute(numpy.ndarray, 'displacement', 'vibrations'),
170 "vibfreqs": Attribute(numpy.ndarray, 'frequencies', 'vibrations'),
171 "vibfconsts": Attribute(numpy.ndarray, 'force constants', 'vibrations'),
172 "vibirs": Attribute(numpy.ndarray, 'IR', 'vibrations:intensities'),
173 "vibramans": Attribute(numpy.ndarray, 'raman', 'vibrations:intensities'),
174 "vibrmasses": Attribute(numpy.ndarray, 'reduced masses', 'vibrations'),
175 "vibsyms": Attribute(list, 'vibration symmetry', 'vibrations'),
176 "zpve": Attribute(float, 'zero-point correction', 'properties:energies')
177 }
179 # The name of all attributes can be generated from the dictionary above.
180 _attrlist = sorted(_attributes.keys())
182 # Arrays are double precision by default, but these will be integer arrays.
183 _intarrays = ['atomnos', 'coreelectrons', 'homos', 'optstatus']
185 # Attributes that should be lists of arrays (double precision).
186 _listsofarrays = ['mocoeffs', 'moenergies', 'moments', 'polarizabilities', 'scfvalues']
188 # Attributes that should be dictionaries of arrays (double precision).
189 _dictsofarrays = ["atomcharges", "atomspins"]
191 # Possible statuses for optimization steps.
192 # OPT_UNKNOWN is the default and means optimization is in progress.
193 # OPT_NEW is set for every new optimization (e.g. PES, IRCs, etc.)
194 # OPT_DONE is set for the last step of an optimisation that converged.
195 # OPT_UNCONVERGED is set for every unconverged step (e.g. should be mutually exclusive with OPT_DONE)
196 # bit value notation allows coding for multiple states: OPT_NEW and OPT_UNCONVERGED or OPT_NEW and OPT_DONE.
197 OPT_UNKNOWN = 0b000
198 OPT_NEW = 0b001
199 OPT_UNCONVERGED = 0b010
200 OPT_DONE = 0b100
202 def __init__(self, attributes={}):
203 """Initialize the cclibData object.
205 Normally called in the parse() method of a Logfile subclass.
207 Inputs:
208 attributes - optional dictionary of attributes to load as data
209 """
211 if attributes:
212 self.setattributes(attributes)
214 def listify(self):
215 """Converts all attributes that are arrays or lists/dicts of arrays to lists."""
217 attrlist = [k for k in self._attrlist if hasattr(self, k)]
218 for k in attrlist:
219 v = self._attributes[k].type
220 if v == numpy.ndarray:
221 setattr(self, k, getattr(self, k).tolist())
222 elif v == list and k in self._listsofarrays:
223 setattr(self, k, [x.tolist() for x in getattr(self, k)])
224 elif v == dict and k in self._dictsofarrays:
225 items = getattr(self, k).items()
226 pairs = [(key, val.tolist()) for key, val in items]
227 setattr(self, k, dict(pairs))
229 def arrayify(self):
230 """Converts appropriate attributes to arrays or lists/dicts of arrays."""
232 attrlist = [k for k in self._attrlist if hasattr(self, k)]
233 for k in attrlist:
234 v = self._attributes[k].type
235 precision = 'd'
236 if k in self._intarrays:
237 precision = 'i'
238 if v == numpy.ndarray:
239 setattr(self, k, numpy.array(getattr(self, k), precision))
240 elif v == list and k in self._listsofarrays:
241 setattr(self, k, [numpy.array(x, precision) for x in getattr(self, k)])
242 elif v == dict and k in self._dictsofarrays:
243 items = getattr(self, k).items()
244 pairs = [(key, numpy.array(val, precision)) for key, val in items]
245 setattr(self, k, dict(pairs))
247 def getattributes(self, tolists=False):
248 """Returns a dictionary of existing data attributes.
250 Inputs:
251 tolists - flag to convert attributes to lists where applicable
252 """
254 if tolists:
255 self.listify()
256 attributes = {}
257 for attr in self._attrlist:
258 if hasattr(self, attr):
259 attributes[attr] = getattr(self, attr)
260 if tolists:
261 self.arrayify()
262 return attributes
264 def setattributes(self, attributes):
265 """Sets data attributes given in a dictionary.
267 Inputs:
268 attributes - dictionary of attributes to set
269 Outputs:
270 invalid - list of attributes names that were not set, which
271 means they are not specified in self._attrlist
272 """
274 if type(attributes) is not dict:
275 raise TypeError("attributes must be in a dictionary")
277 valid = [a for a in attributes if a in self._attrlist]
278 invalid = [a for a in attributes if a not in self._attrlist]
280 for attr in valid:
281 setattr(self, attr, attributes[attr])
283 self.arrayify()
284 self.typecheck()
286 return invalid
288 def typecheck(self):
289 """Check the types of all attributes.
291 If an attribute does not match the expected type, then attempt to
292 convert; if that fails, only then raise a TypeError.
293 """
295 self.arrayify()
296 for attr in [a for a in self._attrlist if hasattr(self, a)]:
298 val = getattr(self, attr)
299 if type(val) == self._attributes[attr].type:
300 continue
302 try:
303 val = self._attributes[attr].type(val)
304 except ValueError:
305 args = (attr, type(val), self._attributes[attr].type)
306 raise TypeError("attribute %s is %s instead of %s and could not be converted" % args)
308 def check_values(self, logger=logging):
309 """Perform custom checks on the values of attributes."""
310 if hasattr(self, "etenergies") and any(e < 0 for e in self.etenergies):
311 negative_values = [e for e in self.etenergies if e < 0]
312 msg = ("At least one excitation energy is negative. "
313 "\nNegative values: %s\nFull etenergies: %s"
314 % (negative_values, self.etenergies))
315 logger.error(msg)
317 def write(self, filename=None, indices=None, *args, **kwargs):
318 """Write parsed attributes to a file.
320 Possible extensions:
321 .cjson or .json - output a chemical JSON file
322 .cml - output a chemical markup language (CML) file
323 .xyz - output a Cartesian XYZ file of the last coordinates available
324 """
326 from cclib.io import ccwrite
327 outputstr = ccwrite(self, outputdest=filename, indices=indices,
328 *args, **kwargs)
329 return outputstr
331 def writejson(self, filename=None, indices=None):
332 """Write parsed attributes to a JSON file."""
333 return self.write(filename=filename, indices=indices,
334 outputtype='cjson')
336 def writecml(self, filename=None, indices=None):
337 """Write parsed attributes to a CML file."""
338 return self.write(filename=filename, indices=indices,
339 outputtype='cml')
341 def writexyz(self, filename=None, indices=None):
342 """Write parsed attributes to an XML file."""
343 return self.write(filename=filename, indices=indices,
344 outputtype='xyz')
346 @property
347 def converged_geometries(self):
348 """
349 Return all converged geometries.
351 An array containing only the converged geometries, e.g.:
352 - For PES or IRCs, return all geometries for which optstatus matches OPT_DONE
353 - The converged geometry for simple optimisations
354 - The input geometry for single points
355 """
356 if hasattr(self, 'optstatus'):
357 converged_indexes = [x for x, y in enumerate(self.optstatus) if y & self.OPT_DONE > 0]
358 return self.atomcoords[converged_indexes]
359 else:
360 return self.atomcoords
362 @property
363 def new_geometries(self):
364 """
365 Return all starting geometries.
367 An array containing only the starting geometries, e.g.:
368 - For PES or IRCs, return all geometries for which optstatus matches OPT_NEW
369 - The input geometry for simple optimisations or single points
370 """
371 if hasattr(self, 'optstatus'):
372 new_indexes = [x for x, y in enumerate(self.optstatus) if y & self.OPT_NEW > 0]
373 return self.atomcoords[new_indexes]
374 else:
375 return self.atomcoords
377 @property
378 def unknown_geometries(self):
379 """
380 Return all OPT_UNKNOWN geometries.
382 An array containing only the starting geometries, e.g.:
383 - For PES or IRCs, return all geometries for which optstatus matches OPT_UNKNOWN
384 - The input geometry for simple optimisations or single points
385 """
386 if hasattr(self, 'optstatus'):
387 unknown_indexes = [x for x, y in enumerate(self.optstatus) if y == self.OPT_UNKNOWN]
388 return self.atomcoords[unknown_indexes]
389 else:
390 return self.atomcoords
392 @property
393 def unconverged_geometries(self):
394 """
395 Return all unconverged geometries.
397 An array containing only the starting geometries, e.g.:
398 - For PES or IRCs, return all geometries for which optstatus matches OPT_UNCONVERGED
399 - The input geometry for simple optimisations or single points
400 """
401 if hasattr(self, 'optstatus'):
402 unconverged_indexes = [x for x, y in enumerate(self.optstatus) if y & self.OPT_UNCONVERGED > 0]
403 return self.atomcoords[unconverged_indexes]
404 else:
405 return self.atomcoords
407 @property
408 def nelectrons(self):
409 return Electrons(self).count()
411 @property
412 def closed_shell(self):
413 return orbitals.Orbitals(self).closed_shell()
416class ccData_optdone_bool(ccData):
417 """This is the version of ccData where optdone is a Boolean."""
419 def __init__(self, *args, **kwargs):
421 super(ccData_optdone_bool, self).__init__(*args, **kwargs)
422 self._attributes["optdone"] = Attribute(bool, 'done', 'optimization')
424 def setattributes(self, *args, **kwargs):
425 invalid = super(ccData_optdone_bool, self).setattributes(*args, **kwargs)
427 # Reduce optdone to a Boolean, because it will be parsed as a list. If this list has any element,
428 # it means that there was an optimized structure and optdone should be True.
429 if hasattr(self, 'optdone'):
430 self.optdone = len(self.optdone) > 0