Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2# 

3# Copyright (c) 2020, the cclib development team 

4# 

5# This file is part of cclib (http://cclib.github.io) and is distributed under 

6# the terms of the BSD 3-Clause License. 

7 

8"""Classes and tools for storing and handling parsed data""" 

9 

10import logging 

11from collections import namedtuple 

12 

13import numpy 

14 

15from cclib.method import Electrons 

16from cclib.method import orbitals 

17 

18 

19Attribute = namedtuple('Attribute', ['type', 'json_key', 'attribute_path']) 

20 

21 

22class ccData: 

23 """Stores data extracted by cclib parsers 

24 

25 Description of cclib attributes: 

26 aonames -- atomic orbital names (list of strings) 

27 aooverlaps -- atomic orbital overlap matrix (array[2]) 

28 atombasis -- indices of atomic orbitals on each atom (list of lists) 

29 atomcharges -- atomic partial charges (dict of arrays[1]) 

30 atomcoords -- atom coordinates (array[3], angstroms) 

31 atommasses -- atom masses (array[1], daltons) 

32 atomnos -- atomic numbers (array[1]) 

33 atomspins -- atomic spin densities (dict of arrays[1]) 

34 ccenergies -- molecular energies with Coupled-Cluster corrections (array[2], eV) 

35 charge -- net charge of the system (integer) 

36 coreelectrons -- number of core electrons in atom pseudopotentials (array[1]) 

37 dispersionenergies -- a molecular dispersion energy corrections (array[1], eV) 

38 enthalpy -- sum of electronic and thermal enthalpies (float, hartree/particle) 

39 entropy -- entropy (float, hartree/particle) 

40 etenergies -- energies of electronic transitions (array[1], 1/cm) 

41 etoscs -- oscillator strengths of electronic transitions (array[1]) 

42 etdips -- electric transition dipoles of electronic transitions (array[2], ebohr) 

43 etveldips -- velocity-gauge electric transition dipoles of electronic transitions (array[2], ebohr) 

44 etmagdips -- magnetic transition dipoles of electronic transitions (array[2], ebohr) 

45 etrotats -- rotatory strengths of electronic transitions (array[1], ??) 

46 etsecs -- singly-excited configurations for electronic transitions (list of lists) 

47 etsyms -- symmetries of electronic transitions (list of string) 

48 freeenergy -- sum of electronic and thermal free energies (float, hartree/particle) 

49 fonames -- fragment orbital names (list of strings) 

50 fooverlaps -- fragment orbital overlap matrix (array[2]) 

51 fragnames -- names of fragments (list of strings) 

52 frags -- indices of atoms in a fragment (list of lists) 

53 gbasis -- coefficients and exponents of Gaussian basis functions (PyQuante format) 

54 geotargets -- targets for convergence of geometry optimization (array[1]) 

55 geovalues -- current values for convergence of geometry optmization (array[1]) 

56 grads -- current values of forces (gradients) in geometry optimization (array[3]) 

57 hessian -- elements of the force constant matrix (array[1]) 

58 homos -- molecular orbital indices of HOMO(s) (array[1]) 

59 metadata -- various metadata about the package and computation (dict) 

60 mocoeffs -- molecular orbital coefficients (list of arrays[2]) 

61 moenergies -- molecular orbital energies (list of arrays[1], eV) 

62 moments -- molecular multipole moments (list of arrays[], a.u.) 

63 mosyms -- orbital symmetries (list of lists) 

64 mpenergies -- molecular electronic energies with Møller-Plesset corrections (array[2], eV) 

65 mult -- multiplicity of the system (integer) 

66 natom -- number of atoms (integer) 

67 nbasis -- number of basis functions (integer) 

68 nmo -- number of molecular orbitals (integer) 

69 nocoeffs -- natural orbital coefficients (array[2]) 

70 nooccnos -- natural orbital occupation numbers (array[1]) 

71 nsocoeffs -- natural spin orbital coefficients (list of array[2]) 

72 nsooccnos -- natural spin orbital occupation numbers (list of array[1]) 

73 optdone -- flags whether an optimization has converged (Boolean) 

74 optstatus -- optimization status for each set of atomic coordinates (array[1]) 

75 polarizabilities -- (dipole) polarizabilities, static or dynamic (list of arrays[2]) 

76 pressure -- pressure used for Thermochemistry (float, atm) 

77 scancoords -- geometries of each scan step (array[3], angstroms) 

78 scanenergies -- energies of potential energy surface (list) 

79 scannames -- names of varaibles scanned (list of strings) 

80 scanparm -- values of parameters in potential energy surface (list of lists) 

81 scfenergies -- molecular electronic energies after SCF (Hartree-Fock, DFT) (array[1], eV) 

82 scftargets -- targets for convergence of the SCF (array[2]) 

83 scfvalues -- current values for convergence of the SCF (list of arrays[2]) 

84 temperature -- temperature used for Thermochemistry (float, kelvin) 

85 time -- time in molecular dynamics and other trajectories (array[1], fs) 

86 transprop -- all absorption and emission spectra (dictionary {name:(etenergies, etoscs)}) 

87 WARNING: this attribute is not standardized and is liable to change in cclib 2.0 

88 vibanharms -- vibrational anharmonicity constants (array[2], 1/cm) 

89 vibdisps -- cartesian displacement vectors (array[3], delta angstrom) 

90 vibfreqs -- vibrational frequencies (array[1], 1/cm) 

91 vibfconsts -- force constants of vibrations (array[1], mDyne/angstrom) 

92 vibirs -- IR intensities (array[1], km/mol) 

93 vibramans -- Raman activities (array[1], A^4/Da) 

94 vibrmasses -- reduced masses of vibrations (array[1], daltons) 

95 vibsyms -- symmetries of vibrations (list of strings) 

96 zpve -- zero-point vibrational energy correction (float, hartree/particle) 

97 (1) The term 'array' refers to a numpy array 

98 (2) The number of dimensions of an array is given in square brackets 

99 (3) Python indexes arrays/lists starting at zero, so if homos==[10], then 

100 the 11th molecular orbital is the HOMO 

101 """ 

102 

103 # The expected types for all supported attributes. 

104 # The json_key is the key name used for attributes in the CJSON/JSON format 

105 # 'TBD' - To Be Decided are the key names of attributes which haven't been included in the cjson format 

106 _attributes = { 

107 "aonames": Attribute(list, 'names', 'atoms:orbitals'), 

108 "aooverlaps": Attribute(numpy.ndarray, 'overlaps', 'properties:orbitals'), 

109 "atombasis": Attribute(list, 'indices', 'atoms:orbitals'), 

110 "atomcharges": Attribute(dict, 'partial charges', 'properties'), 

111 "atomcoords": Attribute(numpy.ndarray, 'coords', 'atoms:coords:3d'), 

112 "atommasses": Attribute(numpy.ndarray, 'mass', 'atoms'), 

113 "atomnos": Attribute(numpy.ndarray, 'number', 'atoms:elements'), 

114 "atomspins": Attribute(dict, 'spins', 'atoms'), 

115 "ccenergies": Attribute(numpy.ndarray, 'coupled cluster', 'properties:energy'), 

116 "charge": Attribute(int, 'charge', 'properties'), 

117 "coreelectrons": Attribute(numpy.ndarray, 'core electrons', 'atoms'), 

118 "dispersionenergies":Attribute(numpy.ndarray, 'dispersion correction', 'properties:energy'), 

119 "enthalpy": Attribute(float, 'enthalpy', 'properties'), 

120 "entropy": Attribute(float, 'entropy', 'properties'), 

121 "etenergies": Attribute(numpy.ndarray, 'electronic transitions', 'transitions'), 

122 "etoscs": Attribute(numpy.ndarray, 'oscillator strength', 'transitions'), 

123 "etdips": Attribute(numpy.ndarray, 'electic transition dipoles', 'transitions'), 

124 "etveldips": Attribute(numpy.ndarray, 'velocity-gauge electric transition dipoles', 'transitions'), 

125 "etmagdips": Attribute(numpy.ndarray, 'magnetic transition dipoles', 'transitions'), 

126 "etrotats": Attribute(numpy.ndarray, 'rotatory strength', 'transitions'), 

127 "etsecs": Attribute(list, 'one excited config', 'transitions'), 

128 "etsyms": Attribute(list, 'symmetry', 'transitions'), 

129 "freeenergy": Attribute(float, 'free energy', 'properties:energy'), 

130 "fonames": Attribute(list, 'orbital names', 'fragments'), 

131 "fooverlaps": Attribute(numpy.ndarray, 'orbital overlap', 'fragments'), 

132 "fragnames": Attribute(list, 'fragment names', 'fragments'), 

133 "frags": Attribute(list, 'atom indices', 'fragments'), 

134 "gbasis": Attribute(list, 'basis functions', 'atoms:orbitals'), 

135 "geotargets": Attribute(numpy.ndarray, 'geometric targets', 'optimization'), 

136 "geovalues": Attribute(numpy.ndarray, 'geometric values', 'optimization'), 

137 "grads": Attribute(numpy.ndarray, 'TBD', 'N/A'), 

138 "hessian": Attribute(numpy.ndarray, 'hessian matrix', 'vibrations'), 

139 "homos": Attribute(numpy.ndarray, 'homos', 'properties:orbitals'), 

140 "metadata": Attribute(dict, 'TBD', 'N/A'), 

141 "mocoeffs": Attribute(list, 'coeffs', 'properties:orbitals'), 

142 "moenergies": Attribute(list, 'energies', 'properties:orbitals'), 

143 "moments": Attribute(list, 'total dipole moment', 'properties'), 

144 "mosyms": Attribute(list, 'molecular orbital symmetry', 'properties:orbitals'), 

145 "mpenergies": Attribute(numpy.ndarray, 'moller plesset', 'properties:energy'), 

146 "mult": Attribute(int, 'multiplicity', 'properties'), 

147 "natom": Attribute(int, 'number of atoms', 'properties'), 

148 "nbasis": Attribute(int, 'basis number', 'properties:orbitals'), 

149 "nmo": Attribute(int, 'MO number', 'properties:orbitals'), 

150 "nocoeffs": Attribute(numpy.ndarray, 'TBD', 'N/A'), 

151 "nooccnos": Attribute(numpy.ndarray, 'TBD', 'N/A'), 

152 "nsocoeffs": Attribute(list, 'TBD', 'N/A'), 

153 "nsooccnos": Attribute(list, 'TBD', 'N/A'), 

154 "optdone": Attribute(list, 'done', 'optimization'), 

155 "optstatus": Attribute(numpy.ndarray, 'status', 'optimization'), 

156 "polarizabilities": Attribute(list, 'polarizabilities', 'N/A'), 

157 "pressure": Attribute(float, 'pressure', 'properties'), 

158 "scancoords": Attribute(numpy.ndarray, 'step geometry', 'optimization:scan'), 

159 "scanenergies": Attribute(list, 'PES energies', 'optimization:scan'), 

160 "scannames": Attribute(list, 'variable names', 'optimization:scan'), 

161 "scanparm": Attribute(list, 'PES parameter values', 'optimization:scan'), 

162 "scfenergies": Attribute(numpy.ndarray, 'scf energies', 'optimization:scf'), 

163 "scftargets": Attribute(numpy.ndarray, 'targets', 'optimization:scf'), 

164 "scfvalues": Attribute(list, 'values', 'optimization:scf'), 

165 "temperature": Attribute(float, 'temperature', 'properties'), 

166 "time": Attribute(numpy.ndarray, 'time', 'N/A'), 

167 "transprop": Attribute(dict, 'electronic transitions', 'transitions'), 

168 "vibanharms": Attribute(numpy.ndarray, 'anharmonicity constants', 'vibrations'), 

169 "vibdisps": Attribute(numpy.ndarray, 'displacement', 'vibrations'), 

170 "vibfreqs": Attribute(numpy.ndarray, 'frequencies', 'vibrations'), 

171 "vibfconsts": Attribute(numpy.ndarray, 'force constants', 'vibrations'), 

172 "vibirs": Attribute(numpy.ndarray, 'IR', 'vibrations:intensities'), 

173 "vibramans": Attribute(numpy.ndarray, 'raman', 'vibrations:intensities'), 

174 "vibrmasses": Attribute(numpy.ndarray, 'reduced masses', 'vibrations'), 

175 "vibsyms": Attribute(list, 'vibration symmetry', 'vibrations'), 

176 "zpve": Attribute(float, 'zero-point correction', 'properties:energies') 

177 } 

178 

179 # The name of all attributes can be generated from the dictionary above. 

180 _attrlist = sorted(_attributes.keys()) 

181 

182 # Arrays are double precision by default, but these will be integer arrays. 

183 _intarrays = ['atomnos', 'coreelectrons', 'homos', 'optstatus'] 

184 

185 # Attributes that should be lists of arrays (double precision). 

186 _listsofarrays = ['mocoeffs', 'moenergies', 'moments', 'polarizabilities', 'scfvalues'] 

187 

188 # Attributes that should be dictionaries of arrays (double precision). 

189 _dictsofarrays = ["atomcharges", "atomspins"] 

190 

191 # Possible statuses for optimization steps. 

192 # OPT_UNKNOWN is the default and means optimization is in progress. 

193 # OPT_NEW is set for every new optimization (e.g. PES, IRCs, etc.) 

194 # OPT_DONE is set for the last step of an optimisation that converged. 

195 # OPT_UNCONVERGED is set for every unconverged step (e.g. should be mutually exclusive with OPT_DONE) 

196 # bit value notation allows coding for multiple states: OPT_NEW and OPT_UNCONVERGED or OPT_NEW and OPT_DONE. 

197 OPT_UNKNOWN = 0b000 

198 OPT_NEW = 0b001 

199 OPT_UNCONVERGED = 0b010 

200 OPT_DONE = 0b100 

201 

202 def __init__(self, attributes={}): 

203 """Initialize the cclibData object. 

204 

205 Normally called in the parse() method of a Logfile subclass. 

206 

207 Inputs: 

208 attributes - optional dictionary of attributes to load as data 

209 """ 

210 

211 if attributes: 

212 self.setattributes(attributes) 

213 

214 def listify(self): 

215 """Converts all attributes that are arrays or lists/dicts of arrays to lists.""" 

216 

217 attrlist = [k for k in self._attrlist if hasattr(self, k)] 

218 for k in attrlist: 

219 v = self._attributes[k].type 

220 if v == numpy.ndarray: 

221 setattr(self, k, getattr(self, k).tolist()) 

222 elif v == list and k in self._listsofarrays: 

223 setattr(self, k, [x.tolist() for x in getattr(self, k)]) 

224 elif v == dict and k in self._dictsofarrays: 

225 items = getattr(self, k).items() 

226 pairs = [(key, val.tolist()) for key, val in items] 

227 setattr(self, k, dict(pairs)) 

228 

229 def arrayify(self): 

230 """Converts appropriate attributes to arrays or lists/dicts of arrays.""" 

231 

232 attrlist = [k for k in self._attrlist if hasattr(self, k)] 

233 for k in attrlist: 

234 v = self._attributes[k].type 

235 precision = 'd' 

236 if k in self._intarrays: 

237 precision = 'i' 

238 if v == numpy.ndarray: 

239 setattr(self, k, numpy.array(getattr(self, k), precision)) 

240 elif v == list and k in self._listsofarrays: 

241 setattr(self, k, [numpy.array(x, precision) for x in getattr(self, k)]) 

242 elif v == dict and k in self._dictsofarrays: 

243 items = getattr(self, k).items() 

244 pairs = [(key, numpy.array(val, precision)) for key, val in items] 

245 setattr(self, k, dict(pairs)) 

246 

247 def getattributes(self, tolists=False): 

248 """Returns a dictionary of existing data attributes. 

249 

250 Inputs: 

251 tolists - flag to convert attributes to lists where applicable 

252 """ 

253 

254 if tolists: 

255 self.listify() 

256 attributes = {} 

257 for attr in self._attrlist: 

258 if hasattr(self, attr): 

259 attributes[attr] = getattr(self, attr) 

260 if tolists: 

261 self.arrayify() 

262 return attributes 

263 

264 def setattributes(self, attributes): 

265 """Sets data attributes given in a dictionary. 

266 

267 Inputs: 

268 attributes - dictionary of attributes to set 

269 Outputs: 

270 invalid - list of attributes names that were not set, which 

271 means they are not specified in self._attrlist 

272 """ 

273 

274 if type(attributes) is not dict: 

275 raise TypeError("attributes must be in a dictionary") 

276 

277 valid = [a for a in attributes if a in self._attrlist] 

278 invalid = [a for a in attributes if a not in self._attrlist] 

279 

280 for attr in valid: 

281 setattr(self, attr, attributes[attr]) 

282 

283 self.arrayify() 

284 self.typecheck() 

285 

286 return invalid 

287 

288 def typecheck(self): 

289 """Check the types of all attributes. 

290 

291 If an attribute does not match the expected type, then attempt to 

292 convert; if that fails, only then raise a TypeError. 

293 """ 

294 

295 self.arrayify() 

296 for attr in [a for a in self._attrlist if hasattr(self, a)]: 

297 

298 val = getattr(self, attr) 

299 if type(val) == self._attributes[attr].type: 

300 continue 

301 

302 try: 

303 val = self._attributes[attr].type(val) 

304 except ValueError: 

305 args = (attr, type(val), self._attributes[attr].type) 

306 raise TypeError("attribute %s is %s instead of %s and could not be converted" % args) 

307 

308 def check_values(self, logger=logging): 

309 """Perform custom checks on the values of attributes.""" 

310 if hasattr(self, "etenergies") and any(e < 0 for e in self.etenergies): 

311 negative_values = [e for e in self.etenergies if e < 0] 

312 msg = ("At least one excitation energy is negative. " 

313 "\nNegative values: %s\nFull etenergies: %s" 

314 % (negative_values, self.etenergies)) 

315 logger.error(msg) 

316 

317 def write(self, filename=None, indices=None, *args, **kwargs): 

318 """Write parsed attributes to a file. 

319 

320 Possible extensions: 

321 .cjson or .json - output a chemical JSON file 

322 .cml - output a chemical markup language (CML) file 

323 .xyz - output a Cartesian XYZ file of the last coordinates available 

324 """ 

325 

326 from cclib.io import ccwrite 

327 outputstr = ccwrite(self, outputdest=filename, indices=indices, 

328 *args, **kwargs) 

329 return outputstr 

330 

331 def writejson(self, filename=None, indices=None): 

332 """Write parsed attributes to a JSON file.""" 

333 return self.write(filename=filename, indices=indices, 

334 outputtype='cjson') 

335 

336 def writecml(self, filename=None, indices=None): 

337 """Write parsed attributes to a CML file.""" 

338 return self.write(filename=filename, indices=indices, 

339 outputtype='cml') 

340 

341 def writexyz(self, filename=None, indices=None): 

342 """Write parsed attributes to an XML file.""" 

343 return self.write(filename=filename, indices=indices, 

344 outputtype='xyz') 

345 

346 @property 

347 def converged_geometries(self): 

348 """ 

349 Return all converged geometries. 

350 

351 An array containing only the converged geometries, e.g.: 

352 - For PES or IRCs, return all geometries for which optstatus matches OPT_DONE 

353 - The converged geometry for simple optimisations 

354 - The input geometry for single points 

355 """ 

356 if hasattr(self, 'optstatus'): 

357 converged_indexes = [x for x, y in enumerate(self.optstatus) if y & self.OPT_DONE > 0] 

358 return self.atomcoords[converged_indexes] 

359 else: 

360 return self.atomcoords 

361 

362 @property 

363 def new_geometries(self): 

364 """ 

365 Return all starting geometries. 

366 

367 An array containing only the starting geometries, e.g.: 

368 - For PES or IRCs, return all geometries for which optstatus matches OPT_NEW 

369 - The input geometry for simple optimisations or single points 

370 """ 

371 if hasattr(self, 'optstatus'): 

372 new_indexes = [x for x, y in enumerate(self.optstatus) if y & self.OPT_NEW > 0] 

373 return self.atomcoords[new_indexes] 

374 else: 

375 return self.atomcoords 

376 

377 @property 

378 def unknown_geometries(self): 

379 """ 

380 Return all OPT_UNKNOWN geometries. 

381 

382 An array containing only the starting geometries, e.g.: 

383 - For PES or IRCs, return all geometries for which optstatus matches OPT_UNKNOWN 

384 - The input geometry for simple optimisations or single points 

385 """ 

386 if hasattr(self, 'optstatus'): 

387 unknown_indexes = [x for x, y in enumerate(self.optstatus) if y == self.OPT_UNKNOWN] 

388 return self.atomcoords[unknown_indexes] 

389 else: 

390 return self.atomcoords 

391 

392 @property 

393 def unconverged_geometries(self): 

394 """ 

395 Return all unconverged geometries. 

396 

397 An array containing only the starting geometries, e.g.: 

398 - For PES or IRCs, return all geometries for which optstatus matches OPT_UNCONVERGED 

399 - The input geometry for simple optimisations or single points 

400 """ 

401 if hasattr(self, 'optstatus'): 

402 unconverged_indexes = [x for x, y in enumerate(self.optstatus) if y & self.OPT_UNCONVERGED > 0] 

403 return self.atomcoords[unconverged_indexes] 

404 else: 

405 return self.atomcoords 

406 

407 @property 

408 def nelectrons(self): 

409 return Electrons(self).count() 

410 

411 @property 

412 def closed_shell(self): 

413 return orbitals.Orbitals(self).closed_shell() 

414 

415 

416class ccData_optdone_bool(ccData): 

417 """This is the version of ccData where optdone is a Boolean.""" 

418 

419 def __init__(self, *args, **kwargs): 

420 

421 super(ccData_optdone_bool, self).__init__(*args, **kwargs) 

422 self._attributes["optdone"] = Attribute(bool, 'done', 'optimization') 

423 

424 def setattributes(self, *args, **kwargs): 

425 invalid = super(ccData_optdone_bool, self).setattributes(*args, **kwargs) 

426 

427 # Reduce optdone to a Boolean, because it will be parsed as a list. If this list has any element, 

428 # it means that there was an optimized structure and optdone should be True. 

429 if hasattr(self, 'optdone'): 

430 self.optdone = len(self.optdone) > 0