Coverage for cclib/parser/molcasparser.py : 97%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2#
3# Copyright (c) 2020, the cclib development team
4#
5# This file is part of cclib (http://cclib.github.io) and is distributed under
6# the terms of the BSD 3-Clause License.
8"""Parser for Molcas output files"""
10import re
11import string
13import numpy
15from cclib.parser import logfileparser
16from cclib.parser import utils
19class Molcas(logfileparser.Logfile):
20 """A Molcas log file."""
22 def __init__(self, *args, **kwargs):
24 # Call the __init__ method of the superclass
25 super(Molcas, self).__init__(logname="Molcas", *args, **kwargs)
27 def __str__(self):
28 """Return a string repeesentation of the object."""
29 return "Molcas log file %s" % (self.filename)
31 def __repr__(self):
32 """Return a representation of the object."""
33 return 'Molcas("%s")' % (self.filename)
35 def normalisesym(self, label):
36 """Normalise the symmetries used by Molcas.
38 The labels are standardized except for the first character being lowercase.
39 """
40 return label[0].upper() + label[1:]
42 def after_parsing(self):
43 for element, ncore in self.core_array:
44 self._assign_coreelectrons_to_element(element, ncore)
46 if "package_version" in self.metadata:
47 # Use the short version as the legacy version.
48 self.metadata["legacy_package_version"] = self.metadata["package_version"]
49 # If there is both a tag and the full hash, place the tag
50 # first. Both are chosen to be local, since there isn't a
51 # distinction between development and release builds in their
52 # version cycle.
53 if "tag" in self.metadata and "revision" in self.metadata:
54 self.metadata["package_version"] = "{}+{}.{}".format(
55 self.metadata["package_version"],
56 self.metadata["tag"],
57 self.metadata["revision"]
58 )
59 elif "tag" in self.metadata:
60 self.metadata["package_version"] = "{}+{}".format(
61 self.metadata["package_version"],
62 self.metadata["tag"]
63 )
64 elif "revision" in self.metadata:
65 self.metadata["package_version"] = "{}+{}".format(
66 self.metadata["package_version"],
67 self.metadata["revision"]
68 )
70 def before_parsing(self):
71 # Compile the regex for extracting the element symbol from the
72 # atom label in the "Molecular structure info" block.
73 self.re_atomelement = re.compile(r'([a-zA-Z]+)\d?')
75 # Compile the dashes-and-or-spaces-only regex.
76 self.re_dashes_and_spaces = re.compile(r'^[\s-]+$')
78 # Molcas can do multiple calculations in one job, and each one
79 # starts from the gateway module. Onle parse the first.
80 # TODO: It would be best to parse each calculation as a separate
81 # ccData object and return an iterator - something for 2.x
82 self.gateway_module_count = 0
84 def extract(self, inputfile, line):
85 """Extract information from the file object inputfile."""
87 if "Start Module: gateway" in line:
88 self.gateway_module_count += 1
90 if self.gateway_module_count > 1:
91 return
93 # Extract the version number and optionally the Git tag and hash.
94 if "version" in line:
95 match = re.search(r"\s{2,}version:?\s(\d*\.\d*)", line)
96 if match:
97 self.metadata["package_version"] = match.groups()[0]
98 if "tag" in line:
99 self.metadata["tag"] = line.split()[-1]
100 if "build" in line:
101 match = re.search(r"\*\s*build\s(\S*)\s*\*", line)
102 if match:
103 self.metadata["revision"] = match.groups()[0]
105 ## This section is present when executing &GATEWAY.
106 # ++ Molecular structure info:
107 # -------------------------
109 # ************************************************
110 # **** Cartesian Coordinates / Bohr, Angstrom ****
111 # ************************************************
113 # Center Label x y z x y z
114 # 1 C1 0.526628 -2.582937 0.000000 0.278679 -1.366832 0.000000
115 # 2 C2 2.500165 -0.834760 0.000000 1.323030 -0.441736 0.000000
116 if line[25:63] == 'Cartesian Coordinates / Bohr, Angstrom':
117 if not hasattr(self, 'atomnos'):
118 self.atomnos = []
120 self.skip_lines(inputfile, ['stars', 'blank', 'header'])
122 line = next(inputfile)
124 atomelements = []
125 atomcoords = []
127 while line.strip() not in ('', '--'):
128 sline = line.split()
129 atomelement = sline[1].rstrip(string.digits).title()
130 atomelements.append(atomelement)
131 atomcoords.append(list(map(float, sline[5:])))
132 line = next(inputfile)
134 self.append_attribute('atomcoords', atomcoords)
136 if self.atomnos == []:
137 self.atomnos = [self.table.number[ae.title()] for ae in atomelements]
139 if not hasattr(self, 'natom'):
140 self.set_attribute('natom', len(self.atomnos))
142 ## This section is present when executing &SCF.
143 # ++ Orbital specifications:
144 # -----------------------
146 # Symmetry species 1
148 # Frozen orbitals 0
149 # Occupied orbitals 3
150 # Secondary orbitals 77
151 # Deleted orbitals 0
152 # Total number of orbitals 80
153 # Number of basis functions 80
154 # --
155 if line[:29] == '++ Orbital specifications:':
157 self.skip_lines(inputfile, ['dashes', 'blank'])
158 line = next(inputfile)
160 symmetry_count = 1
161 while not line.startswith('--'):
162 if line.strip().startswith('Symmetry species'):
163 symmetry_count = int(line.split()[-1])
164 if line.strip().startswith('Total number of orbitals'):
165 nmos = line.split()[-symmetry_count:]
166 self.set_attribute('nmo', sum(map(int, nmos)))
167 if line.strip().startswith('Number of basis functions'):
168 nbasis = line.split()[-symmetry_count:]
169 self.set_attribute('nbasis', sum(map(int, nbasis)))
171 line = next(inputfile)
173 if line.strip().startswith(('Molecular charge', 'Total molecular charge')):
174 self.set_attribute('charge', int(float(line.split()[-1])))
176 # ++ Molecular charges:
177 # ------------------
179 # Mulliken charges per centre and basis function type
180 # ---------------------------------------------------
182 # C1
183 # 1s 2.0005
184 # 2s 2.0207
185 # 2px 0.0253
186 # 2pz 0.1147
187 # 2py 1.8198
188 # *s -0.0215
189 # *px 0.0005
190 # *pz 0.0023
191 # *py 0.0368
192 # *d2+ 0.0002
193 # *d1+ 0.0000
194 # *d0 0.0000
195 # *d1- 0.0000
196 # *d2- 0.0000
197 # *f3+ 0.0000
198 # *f2+ 0.0001
199 # *f1+ 0.0000
200 # *f0 0.0001
201 # *f1- 0.0001
202 # *f2- 0.0000
203 # *f3- 0.0003
204 # *g4+ 0.0000
205 # *g3+ 0.0000
206 # *g2+ 0.0000
207 # *g1+ 0.0000
208 # *g0 0.0000
209 # *g1- 0.0000
210 # *g2- 0.0000
211 # *g3- 0.0000
212 # *g4- 0.0000
213 # Total 6.0000
215 # N-E 0.0000
217 # Total electronic charge= 6.000000
219 # Total charge= 0.000000
220 #--
221 if line[:24] == '++ Molecular charges:':
223 atomcharges = []
225 while line[6:29] != 'Total electronic charge':
226 line = next(inputfile)
227 if line[6:9] == 'N-E':
228 atomcharges.extend(map(float, line.split()[1:]))
230 # Molcas only performs Mulliken population analysis.
231 self.set_attribute('atomcharges', {'mulliken': atomcharges})
233 # Ensure the charge printed here is identical to the
234 # charge printed before entering the SCF.
235 self.skip_line(inputfile, 'blank')
236 line = next(inputfile)
237 assert line[6:30] == 'Total charge='
238 if hasattr(self, 'charge'):
239 assert int(float(line.split()[2])) == self.charge
241 # This section is present when executing &SCF
242 # This section parses the total SCF Energy.
243 # *****************************************************************************************************************************
244 # * *
245 # * SCF/KS-DFT Program, Final results *
246 # * *
247 # * *
248 # * *
249 # * Final Results *
250 # * *
251 # *****************************************************************************************************************************
253 # :: Total SCF energy -37.6045426484
254 if line[:22] == ':: Total SCF energy' or line[:25] == ':: Total KS-DFT energy':
255 if not hasattr(self, 'scfenergies'):
256 self.scfenergies = []
257 scfenergy = float(line.split()[-1])
258 self.scfenergies.append(utils.convertor(scfenergy, 'hartree', 'eV'))
260 ## Parsing the scftargets in this section
261 # ++ Optimization specifications:
262 # ----------------------------
264 # SCF Algorithm: Conventional
265 # Minimized density differences are used
266 # Number of density matrices in core 9
267 # Maximum number of NDDO SCF iterations 400
268 # Maximum number of HF SCF iterations 400
269 # Threshold for SCF energy change 0.10E-08
270 # Threshold for density matrix 0.10E-03
271 # Threshold for Fock matrix 0.15E-03
272 # Threshold for linear dependence 0.10E-08
273 # Threshold at which DIIS is turned on 0.15E+00
274 # Threshold at which QNR/C2DIIS is turned on 0.75E-01
275 # Threshold for Norm(delta) (QNR/C2DIIS) 0.20E-04
276 if line[:34] == '++ Optimization specifications:':
277 self.skip_lines(inputfile, ['d', 'b'])
278 line = next(inputfile)
279 if line.strip().startswith('SCF'):
280 scftargets = []
281 self.skip_lines(inputfile,
282 ['Minimized', 'Number', 'Maximum', 'Maximum'])
283 lines = [next(inputfile) for i in range(7)]
284 targets = [
285 'Threshold for SCF energy change',
286 'Threshold for density matrix',
287 'Threshold for Fock matrix',
288 'Threshold for Norm(delta)',
289 ]
290 for y in targets:
291 scftargets.extend([float(x.split()[-1]) for x in lines if y in x])
293 self.append_attribute('scftargets', scftargets)
295 # ++ Convergence information
296 # SCF iterations: Energy and convergence statistics
297 #
298 # Iter Tot. SCF One-electron Two-electron Energy Max Dij or Max Fij DNorm TNorm AccCon Time
299 # Energy Energy Energy Change Delta Norm in Sec.
300 # 1 -36.83817703 -50.43096166 13.59278464 0.00E+00 0.16E+00* 0.27E+01* 0.30E+01 0.33E+02 NoneDa 0.
301 # 2 -36.03405202 -45.74525152 9.71119950 0.80E+00* 0.14E+00* 0.93E-02* 0.26E+01 0.43E+01 Damp 0.
302 # 3 -37.08936118 -48.41536598 11.32600480 -0.11E+01* 0.12E+00* 0.91E-01* 0.97E+00 0.16E+01 Damp 0.
303 # 4 -37.31610460 -50.54103969 13.22493509 -0.23E+00* 0.11E+00* 0.96E-01* 0.72E+00 0.27E+01 Damp 0.
304 # 5 -37.33596239 -49.47021484 12.13425245 -0.20E-01* 0.59E-01* 0.59E-01* 0.37E+00 0.16E+01 Damp 0.
305 # ...
306 # Convergence after 26 Macro Iterations
307 # --
308 if line[46:91] == 'iterations: Energy and convergence statistics':
310 self.skip_line(inputfile, 'blank')
312 while line.split() != ['Energy', 'Energy', 'Energy', 'Change', 'Delta', 'Norm', 'in', 'Sec.']:
313 line = next(inputfile)
315 iteration_regex = (r"^([0-9]+)" # Iter
316 r"( [ \-0-9]*\.[0-9]{6,9})" # Tot. SCF Energy
317 r"( [ \-0-9]*\.[0-9]{6,9})" # One-electron Energy
318 r"( [ \-0-9]*\.[0-9]{6,9})" # Two-electron Energy
319 r"( [ \-0-9]*\.[0-9]{2}E[\-\+][0-9]{2}\*?)" # Energy Change
320 r"( [ \-0-9]*\.[0-9]{2}E[\-\+][0-9]{2}\*?)" # Max Dij or Delta Norm
321 r"( [ \-0-9]*\.[0-9]{2}E[\-\+][0-9]{2}\*?)" # Max Fij
322 r"( [ \-0-9]*\.[0-9]{2}E[\-\+][0-9]{2}\*?)" # DNorm
323 r"( [ \-0-9]*\.[0-9]{2}E[\-\+][0-9]{2}\*?)" # TNorm
324 r"( [ A-Za-z0-9]*)" # AccCon
325 r"( [ \.0-9]*)$") # Time in Sec.
327 scfvalues = []
328 line = next(inputfile)
329 while not line.strip().startswith("Convergence"):
331 match = re.match(iteration_regex, line.strip())
332 if match:
333 groups = match.groups()
334 cols = [g.strip() for g in match.groups()]
335 cols = [c.replace('*', '') for c in cols]
337 energy = float(cols[4])
338 density = float(cols[5])
339 fock = float(cols[6])
340 dnorm = float(cols[7])
341 scfvalues.append([energy, density, fock, dnorm])
343 if line.strip() == "--":
344 self.logger.warning('File terminated before end of last SCF!')
345 break
347 line = next(inputfile)
349 self.append_attribute('scfvalues', scfvalues)
351 # Harmonic frequencies in cm-1
352 #
353 # IR Intensities in km/mol
354 #
355 # 1 2 3 4 5 6
356 #
357 # Frequency: i60.14 i57.39 128.18 210.06 298.24 309.65
358 #
359 # Intensity: 3.177E-03 2.129E-06 4.767E-01 2.056E-01 6.983E-07 1.753E-07
360 # Red. mass: 2.42030 2.34024 2.68044 3.66414 2.61721 3.34904
361 #
362 # C1 x -0.00000 0.00000 0.00000 -0.05921 0.00000 -0.06807
363 # C1 y 0.00001 -0.00001 -0.00001 0.00889 0.00001 -0.02479
364 # C1 z -0.03190 0.04096 -0.03872 0.00001 -0.12398 -0.00002
365 # C2 x -0.00000 0.00001 0.00000 -0.06504 0.00000 -0.03487
366 # C2 y 0.00000 -0.00000 -0.00000 0.01045 0.00001 -0.05659
367 # C2 z -0.03703 -0.03449 -0.07269 0.00000 -0.07416 -0.00001
368 # C3 x -0.00000 0.00001 0.00000 -0.06409 -0.00001 0.05110
369 # C3 y -0.00000 0.00001 0.00000 0.00152 0.00000 -0.03263
370 # C3 z -0.03808 -0.08037 -0.07267 -0.00001 0.07305 0.00000
371 # ...
372 # H20 y 0.00245 -0.00394 0.03215 0.03444 -0.10424 -0.10517
373 # H20 z 0.00002 -0.00001 0.00000 -0.00000 -0.00000 0.00000
374 #
375 #
376 #
377 # ++ Thermochemistry
378 if line[1:29] == 'Harmonic frequencies in cm-1':
380 self.skip_line(inputfile, 'blank')
381 line = next(inputfile)
383 while 'Thermochemistry' not in line:
385 if 'Frequency:' in line:
386 if not hasattr(self, 'vibfreqs'):
387 self.vibfreqs = []
388 vibfreqs = [float(i.replace('i', '-')) for i in line.split()[1:]]
389 self.vibfreqs.extend(vibfreqs)
391 if 'Intensity:' in line:
392 if not hasattr(self, 'vibirs'):
393 self.vibirs = []
394 vibirs = map(float, line.split()[1:])
395 self.vibirs.extend(vibirs)
397 if 'Red.' in line:
398 if not hasattr(self, 'vibrmasses'):
399 self.vibrmasses = []
400 vibrmasses = map(float, line.split()[2:])
401 self.vibrmasses.extend(vibrmasses)
403 self.skip_line(inputfile, 'blank')
404 line = next(inputfile)
405 if not hasattr(self, 'vibdisps'):
406 self.vibdisps = []
407 disps = []
408 for n in range(3*self.natom):
409 numbers = [float(s) for s in line[17:].split()]
410 # The atomindex should start at 0 instead of 1.
411 atomindex = int(re.search(r'\d+$', line.split()[0]).group()) - 1
412 numbermodes = len(numbers)
413 if len(disps) == 0:
414 # Appends empty array of the following
415 # dimensions (numbermodes, natom, 0) to disps.
416 for mode in range(numbermodes):
417 disps.append([[] for x in range(0, self.natom)])
418 for mode in range(numbermodes):
419 disps[mode][atomindex].append(numbers[mode])
420 line = next(inputfile)
421 self.vibdisps.extend(disps)
423 line = next(inputfile)
425 ## Parsing thermochemistry attributes here
426 # ++ Thermochemistry
427 #
428 # *********************
429 # * *
430 # * THERMOCHEMISTRY *
431 # * *
432 # *********************
433 #
434 # Mass-centered Coordinates (Angstrom):
435 # ***********************************************************
436 # ...
437 # *****************************************************
438 # Temperature = 0.00 Kelvin, Pressure = 1.00 atm
439 # -----------------------------------------------------
440 # Molecular Partition Function and Molar Entropy:
441 # q/V (M**-3) S(kcal/mol*K)
442 # Electronic 0.100000D+01 0.000
443 # Translational 0.100000D+01 0.000
444 # Rotational 0.100000D+01 2.981
445 # Vibrational 0.100000D+01 0.000
446 # TOTAL 0.100000D+01 2.981
447 #
448 # Thermal contributions to INTERNAL ENERGY:
449 # Electronic 0.000 kcal/mol 0.000000 au.
450 # Translational 0.000 kcal/mol 0.000000 au.
451 # Rotational 0.000 kcal/mol 0.000000 au.
452 # Vibrational 111.885 kcal/mol 0.178300 au.
453 # TOTAL 111.885 kcal/mol 0.178300 au.
454 #
455 # Thermal contributions to
456 # ENTHALPY 111.885 kcal/mol 0.178300 au.
457 # GIBBS FREE ENERGY 111.885 kcal/mol 0.178300 au.
458 #
459 # Sum of energy and thermal contributions
460 # INTERNAL ENERGY -382.121931 au.
461 # ENTHALPY -382.121931 au.
462 # GIBBS FREE ENERGY -382.121931 au.
463 # -----------------------------------------------------
464 # ...
465 # ENTHALPY -382.102619 au.
466 # GIBBS FREE ENERGY -382.179819 au.
467 # -----------------------------------------------------
468 # --
469 #
470 # ++ Isotopic shifts:
471 if line[4:19] == 'THERMOCHEMISTRY':
473 temperature_values = []
474 pressure_values = []
475 entropy_values = []
476 internal_energy_values = []
477 enthalpy_values = []
478 free_energy_values = []
480 while 'Isotopic' not in line:
482 if line[1:12] == 'Temperature':
483 temperature_values.append(float(line.split()[2]))
484 pressure_values.append(float(line.split()[6]))
486 if line[1:48] == 'Molecular Partition Function and Molar Entropy:':
487 while 'TOTAL' not in line:
488 line = next(inputfile)
489 entropy_values.append(utils.convertor(float(line.split()[2]), 'kcal/mol', 'hartree'))
491 if line[1:40] == 'Sum of energy and thermal contributions':
492 internal_energy_values.append(float(next(inputfile).split()[2]))
493 enthalpy_values.append(float(next(inputfile).split()[1]))
494 free_energy_values.append(float(next(inputfile).split()[3]))
496 line = next(inputfile)
497 # When calculations for more than one temperature value are
498 # performed, the values corresponding to room temperature (298.15 K)
499 # are returned and if no calculations are performed for 298.15 K, then
500 # the values corresponding last temperature value are returned.
501 index = -1
502 if 298.15 in temperature_values:
503 index = temperature_values.index(298.15)
505 self.set_attribute('temperature', temperature_values[index])
506 if len(temperature_values) > 1:
507 self.logger.warning('More than 1 values of temperature found')
509 self.set_attribute('pressure', pressure_values[index])
510 if len(pressure_values) > 1:
511 self.logger.warning('More than 1 values of pressure found')
513 self.set_attribute('entropy', entropy_values[index])
514 if len(entropy_values) > 1:
515 self.logger.warning('More than 1 values of entropy found')
517 self.set_attribute('enthalpy', enthalpy_values[index])
518 if len(enthalpy_values) > 1:
519 self.logger.warning('More than 1 values of enthalpy found')
521 self.set_attribute('freeenergy', free_energy_values[index])
522 if len(free_energy_values) > 1:
523 self.logger.warning('More than 1 values of freeenergy found')
525 ## Parsing Geometrical Optimization attributes in this section.
526 # ++ Slapaf input parameters:
527 # ------------------------
528 #
529 # Max iterations: 2000
530 # Convergence test a la Schlegel.
531 # Convergence criterion on gradient/para.<=: 0.3E-03
532 # Convergence criterion on step/parameter<=: 0.3E-03
533 # Convergence criterion on energy change <=: 0.0E+00
534 # Max change of an internal coordinate: 0.30E+00
535 # ...
536 # ...
537 # **********************************************************************************************************************
538 # * Energy Statistics for Geometry Optimization *
539 # **********************************************************************************************************************
540 # Energy Grad Grad Step Estimated Geom Hessian
541 # Iter Energy Change Norm Max Element Max Element Final Energy Update Update Index
542 # 1 -382.30023222 0.00000000 0.107221 0.039531 nrc047 0.085726 nrc047 -382.30533799 RS-RFO None 0
543 # 2 -382.30702964 -0.00679742 0.043573 0.014908 nrc001 0.068195 nrc001 -382.30871333 RS-RFO BFGS 0
544 # 3 -382.30805348 -0.00102384 0.014883 0.005458 nrc010 -0.020973 nrc001 -382.30822089 RS-RFO BFGS 0
545 # ...
546 # ...
547 # 18 -382.30823419 -0.00000136 0.001032 0.000100 nrc053 0.012319 nrc053 -382.30823452 RS-RFO BFGS 0
548 # 19 -382.30823198 0.00000221 0.001051 -0.000092 nrc054 0.066565 nrc053 -382.30823822 RS-RFO BFGS 0
549 # 20 -382.30820252 0.00002946 0.001132 -0.000167 nrc021 -0.064003 nrc053 -382.30823244 RS-RFO BFGS 0
550 #
551 # +----------------------------------+----------------------------------+
552 # + Cartesian Displacements + Gradient in internals +
553 # + Value Threshold Converged? + Value Threshold Converged? +
554 # +-----+----------------------------------+----------------------------------+
555 # + RMS + 5.7330E-02 1.2000E-03 No + 1.6508E-04 3.0000E-04 Yes +
556 # +-----+----------------------------------+----------------------------------+
557 # + Max + 1.2039E-01 1.8000E-03 No + 1.6711E-04 4.5000E-04 Yes +
558 # +-----+----------------------------------+----------------------------------+
559 if 'Convergence criterion on energy change' in line:
560 self.energy_threshold = float(line.split()[6])
561 # If energy change threshold equals zero,
562 # then energy change is not a criteria for convergence.
563 if self.energy_threshold == 0:
564 self.energy_threshold = numpy.inf
566 if 'Energy Statistics for Geometry Optimization' in line:
567 if not hasattr(self, 'geovalues'):
568 self.geovalues = []
570 self.skip_lines(inputfile, ['stars', 'header'])
571 line = next(inputfile)
572 assert 'Iter Energy Change Norm' in line
573 # A variable keeping track of ongoing iteration.
574 iter_number = len(self.geovalues) + 1
575 # Iterate till blank line.
576 while line.split() != []:
577 for i in range(iter_number):
578 line = next(inputfile)
579 self.geovalues.append([float(line.split()[2])])
580 line = next(inputfile)
581 # Along with energy change, RMS and Max values of change in
582 # Cartesian Diaplacement and Gradients are used as optimization
583 # criteria.
584 self.skip_lines(inputfile, ['border', 'header', 'header', 'border'])
585 line = next(inputfile)
586 assert '+ RMS +' in line
587 line_rms = line.split()
588 line = next(inputfile)
589 line_max = next(inputfile).split()
590 if not hasattr(self, 'geotargets'):
591 # The attribute geotargets is an array consisting of the following
592 # values: [Energy threshold, Max Gradient threshold, RMS Gradient threshold, \
593 # Max Displacements threshold, RMS Displacements threshold].
594 max_gradient_threshold = float(line_max[8])
595 rms_gradient_threshold = float(line_rms[8])
596 max_displacement_threshold = float(line_max[4])
597 rms_displacement_threshold = float(line_rms[4])
598 self.geotargets = [self.energy_threshold, max_gradient_threshold, rms_gradient_threshold, max_displacement_threshold, rms_displacement_threshold]
600 max_gradient_change = float(line_max[7])
601 rms_gradient_change = float(line_rms[7])
602 max_displacement_change = float(line_max[3])
603 rms_displacement_change = float(line_rms[3])
604 self.geovalues[iter_number - 1].extend([max_gradient_change, rms_gradient_change, max_displacement_change, rms_displacement_change])
606 # *********************************************************
607 # * Nuclear coordinates for the next iteration / Angstrom *
608 # *********************************************************
609 # ATOM X Y Z
610 # C1 0.235560 -1.415847 0.012012
611 # C2 1.313797 -0.488199 0.015149
612 # C3 1.087050 0.895510 0.014200
613 # ...
614 # ...
615 # H19 -0.021327 -4.934915 -0.029355
616 # H20 -1.432030 -3.721047 -0.039835
617 #
618 # --
619 if 'Nuclear coordinates for the next iteration / Angstrom' in line:
620 self.skip_lines(inputfile, ['s', 'header'])
621 line = next(inputfile)
623 atomcoords = []
624 while line.split() != []:
625 atomcoords.append([float(c) for c in line.split()[1:]])
626 line = next(inputfile)
628 if len(atomcoords) == self.natom:
629 self.atomcoords.append(atomcoords)
630 else:
631 self.logger.warning(
632 "Parsed coordinates not consistent with previous, skipping. "
633 "This could be due to symmetry being turned on during the job. "
634 "Length was %i, now found %i. New coordinates: %s"
635 % (len(self.atomcoords[-1]), len(atomcoords), str(atomcoords)))
637 # **********************************************************************************************************************
638 # * Energy Statistics for Geometry Optimization *
639 # **********************************************************************************************************************
640 # Energy Grad Grad Step Estimated Geom Hessian
641 # Iter Energy Change Norm Max Element Max Element Final Energy Update Update Index
642 # 1 -382.30023222 0.00000000 0.107221 0.039531 nrc047 0.085726 nrc047 -382.30533799 RS-RFO None 0
643 # ...
644 # ...
645 # 23 -382.30823115 -0.00000089 0.001030 0.000088 nrc053 0.000955 nrc053 -382.30823118 RS-RFO BFGS 0
646 #
647 # +----------------------------------+----------------------------------+
648 # + Cartesian Displacements + Gradient in internals +
649 # + Value Threshold Converged? + Value Threshold Converged? +
650 # +-----+----------------------------------+----------------------------------+
651 # + RMS + 7.2395E-04 1.2000E-03 Yes + 2.7516E-04 3.0000E-04 Yes +
652 # +-----+----------------------------------+----------------------------------+
653 # + Max + 1.6918E-03 1.8000E-03 Yes + 8.7768E-05 4.5000E-04 Yes +
654 # +-----+----------------------------------+----------------------------------+
655 #
656 # Geometry is converged in 23 iterations to a Minimum Structure
657 if 'Geometry is converged' in line:
658 if not hasattr(self, 'optdone'):
659 self.optdone = []
660 self.optdone.append(len(self.atomcoords))
662 # *********************************************************
663 # * Nuclear coordinates of the final structure / Angstrom *
664 # *********************************************************
665 # ATOM X Y Z
666 # C1 0.235547 -1.415838 0.012193
667 # C2 1.313784 -0.488201 0.015297
668 # C3 1.087036 0.895508 0.014333
669 # ...
670 # ...
671 # H19 -0.021315 -4.934913 -0.029666
672 # H20 -1.431994 -3.721026 -0.041078
673 if 'Nuclear coordinates of the final structure / Angstrom' in line:
674 self.skip_lines(inputfile, ['s', 'header'])
675 line = next(inputfile)
677 atomcoords = []
679 while line.split() != []:
680 atomcoords.append([float(c) for c in line.split()[1:]])
681 line = next(inputfile)
683 if len(atomcoords) == self.natom:
684 self.atomcoords.append(atomcoords)
685 else:
686 self.logger.error(
687 'Number of atoms (%d) in parsed atom coordinates '
688 'is smaller than previously (%d), possibly due to '
689 'symmetry. Ignoring these coordinates.'
690 % (len(atomcoords), self.natom))
692 ## Parsing Molecular Gradients attributes in this section.
693 # ()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()
694 #
695 # &ALASKA
696 #
697 # only a single process is used
698 # available to each process: 2.0 GB of memory, 1 thread
699 # ()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()
700 # ...
701 # ...
702 # **************************************************
703 # * *
704 # * Molecular gradients *
705 # * *
706 # **************************************************
707 #
708 # Irreducible representation: a
709 # ---------------------------------------------------------
710 # X Y Z
711 # ---------------------------------------------------------
712 # C1 -0.00009983 -0.00003043 0.00001004
713 # ...
714 # H20 -0.00027629 0.00010546 0.00003317
715 # ---------------------------------------------------
716 # WARNING: "Molecular gradients, after ESPF" is found for ESPF QM/MM calculations
717 if "Molecular gradients " in line:
719 if not hasattr(self, "grads"):
720 self.grads = []
722 self.skip_lines(inputfile, ['stars', 'stars', 'blank', 'header',
723 'dashes', 'header', 'dashes'])
725 grads = []
726 line = next(inputfile)
727 while len(line.split()) == 4:
728 tmpgrads = list(map(float, line.split()[1:]))
729 grads.append(tmpgrads)
730 line = next(inputfile)
732 self.append_attribute('grads', grads)
734 # This code here works, but QM/MM gradients are printed after QM ones.
735 # Maybe another attribute is needed to store them to have both.
736 if "Molecular gradients, after ESPF" in line:
738 self.skip_lines(inputfile, ['stars', 'stars', 'blank', 'header',
739 'dashes', 'header', 'dashes'])
741 grads = []
742 line = next(inputfile)
743 while len(line.split()) == 4:
744 tmpgrads = list(map(float, line.split()[1:]))
745 grads.append(tmpgrads)
746 line = next(inputfile)
748 self.grads[-1] = grads
750 ###
751 # All orbitals with orbital energies smaller than E(LUMO)+0.5 are printed
752 #
753 # ++ Molecular orbitals:
754 # -------------------
755 #
756 # Title: RKS-DFT orbitals
757 #
758 # Molecular orbitals for symmetry species 1: a
759 #
760 # Orbital 1 2 3 4 5 6 7 8 9 10
761 # Energy -10.0179 -10.0179 -10.0075 -10.0075 -10.0066 -10.0066 -10.0056 -10.0055 -9.9919 -9.9919
762 # Occ. No. 2.0000 2.0000 2.0000 2.0000 2.0000 2.0000 2.0000 2.0000 2.0000 2.0000
763 #
764 # 1 C1 1s -0.6990 0.6989 0.0342 0.0346 0.0264 -0.0145 -0.0124 -0.0275 -0.0004 -0.0004
765 # 2 C1 2s -0.0319 0.0317 -0.0034 -0.0033 -0.0078 0.0034 0.0041 0.0073 -0.0002 -0.0002
766 # ...
767 # ...
768 # 58 H18 1s 0.2678
769 # 59 H19 1s -0.2473
770 # 60 H20 1s 0.1835
771 # --
772 if '++ Molecular orbitals:' in line:
774 self.skip_lines(inputfile, ['d', 'b'])
775 line = next(inputfile)
777 # We don't currently support parsing natural orbitals or active space orbitals.
778 if 'Natural orbitals' not in line and "Pseudonatural" not in line:
779 self.skip_line(inputfile, 'b')
781 # Symmetry is not currently supported, so this line can have one form.
782 while 'Molecular orbitals for symmetry species 1: a' not in line.strip():
783 line = next(inputfile)
785 # Symmetry is not currently supported, so this line can have one form.
786 if line.strip() != 'Molecular orbitals for symmetry species 1: a':
787 return
789 line = next(inputfile)
790 moenergies = []
791 homos = 0
792 mocoeffs = []
793 while line[:2] != '--':
794 line = next(inputfile)
795 if line.strip().startswith('Orbital'):
796 orbital_index = line.split()[1:]
797 for i in orbital_index:
798 mocoeffs.append([])
800 if 'Energy' in line:
801 energies = [utils.convertor(float(x), 'hartree', 'eV') for x in line.split()[1:]]
802 moenergies.extend(energies)
804 if 'Occ. No.' in line:
805 for i in line.split()[2:]:
806 if float(i) != 0:
807 homos += 1
809 aonames = []
810 tokens = line.split()
811 if tokens and tokens[0] == '1':
812 while tokens and tokens[0] != '--':
813 aonames.append("{atom}_{orbital}".format(atom=tokens[1], orbital=tokens[2]))
814 info = tokens[3:]
815 j = 0
816 for i in orbital_index:
817 mocoeffs[int(i)-1].append(float(info[j]))
818 j += 1
819 line = next(inputfile)
820 tokens = line.split()
821 self.set_attribute('aonames', aonames)
823 if len(moenergies) != self.nmo:
824 moenergies.extend([numpy.nan for x in range(self.nmo - len(moenergies))])
826 self.append_attribute('moenergies', moenergies)
828 if not hasattr(self, 'homos'):
829 self.homos = []
830 self.homos.extend([homos-1])
832 while len(mocoeffs) < self.nmo:
833 nan_array = [numpy.nan for i in range(self.nbasis)]
834 mocoeffs.append(nan_array)
836 self.append_attribute('mocoeffs', mocoeffs)
838 ## Parsing MP energy from the &MBPT2 module.
839 # Conventional algorithm used...
840 #
841 # SCF energy = -74.9644564043 a.u.
842 # Second-order correlation energy = -0.0364237923 a.u.
843 #
844 # Total energy = -75.0008801966 a.u.
845 # Reference weight ( Cref**2 ) = 0.98652
846 #
847 # :: Total MBPT2 energy -75.0008801966
848 #
849 #
850 # Zeroth-order energy (E0) = -36.8202538520 a.u.
851 #
852 # Shanks-type energy S1(E) = -75.0009150108 a.u.
853 if 'Total MBPT2 energy' in line:
854 mpenergies = []
855 mpenergies.append(utils.convertor(utils.float(line.split()[4]), 'hartree', 'eV'))
856 if not hasattr(self, 'mpenergies'):
857 self.mpenergies = []
858 self.mpenergies.append(mpenergies)
860 # Parsing data ccenergies from &CCSDT module.
861 # --- Start Module: ccsdt at Thu Jul 26 14:03:23 2018 ---
862 #
863 # ()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()
864 #
865 # &CCSDT
866 # ...
867 # ...
868 # 14 -75.01515915 -0.05070274 -0.00000029
869 # 15 -75.01515929 -0.05070289 -0.00000014
870 # 16 -75.01515936 -0.05070296 -0.00000007
871 # Convergence after 17 Iterations
872 #
873 #
874 # Total energy (diff) : -75.01515936 -0.00000007
875 # Correlation energy : -0.0507029554992
876 if 'Start Module: ccsdt' in line:
877 self.skip_lines(inputfile, ['b', '()', 'b'])
878 line = next(inputfile)
879 if '&CCSDT' in line:
880 while not line.strip().startswith('Total energy (diff)'):
881 line = next(inputfile)
883 ccenergies = utils.convertor(utils.float(line.split()[4]), 'hartree', 'eV')
884 if not hasattr(self, 'ccenergies'):
885 self.ccenergies= []
886 self.ccenergies.append(ccenergies)
888 # ++ Primitive basis info:
889 # ---------------------
890 #
891 #
892 # *****************************************************
893 # ******** Primitive Basis Functions (Valence) ********
894 # *****************************************************
895 #
896 #
897 # Basis set:C.AUG-CC-PVQZ.........
898 #
899 # Type
900 # s
901 # No. Exponent Contraction Coefficients
902 # 1 0.339800000D+05 0.000091 -0.000019 0.000000 0.000000 0.000000 0.000000
903 # 2 0.508900000D+04 0.000704 -0.000151 0.000000 0.000000 0.000000 0.000000
904 # ...
905 # ...
906 # 29 0.424000000D+00 0.000000 1.000000
907 #
908 # Number of primitives 93
909 # Number of basis functions 80
910 #
911 # --
912 if line.startswith('++ Primitive basis info:'):
913 self.skip_lines(inputfile, ['d', 'b', 'b', 's', 'header', 's', 'b'])
914 line = next(inputfile)
915 gbasis_array = []
916 while '--' not in line and '****' not in line:
917 if 'Basis set:' in line:
918 basis_element_patterns = re.findall(r'Basis set:([A-Za-z]{1,2})\.', line)
919 assert len(basis_element_patterns) == 1
920 basis_element = basis_element_patterns[0].title()
921 gbasis_array.append((basis_element, []))
923 if 'Type' in line:
924 line = next(inputfile)
925 shell_type = line.split()[0].upper()
927 self.skip_line(inputfile, 'headers')
928 line = next(inputfile)
930 exponents = []
931 coefficients = []
932 func_array = []
933 while line.split():
934 exponents.append(utils.float(line.split()[1]))
935 coefficients.append([utils.float(i) for i in line.split()[2:]])
936 line = next(inputfile)
938 for i in range(len(coefficients[0])):
939 func_tuple = (shell_type, [])
940 for iexp, exp in enumerate(exponents):
941 coeff = coefficients[iexp][i]
942 if coeff != 0:
943 func_tuple[1].append((exp, coeff))
944 gbasis_array[-1][1].append(func_tuple)
946 line = next(inputfile)
948 atomsymbols = [self.table.element[atomno] for atomno in self.atomnos]
949 self.gbasis = [[] for i in range(self.natom)]
950 for element, gbasis in gbasis_array:
951 mask = [element == possible_element for possible_element in atomsymbols]
952 indices = [i for (i, x) in enumerate(mask) if x]
953 for index in indices:
954 self.gbasis[index] = gbasis
956 # ++ Basis set information:
957 # ----------------------
958 # ...
959 # Basis set label: MO.ECP.HAY-WADT.5S6P4D.3S3P2D.14E-LANL2DZ.....
960 #
961 # Electronic valence basis set:
962 # ------------------
963 # Associated Effective Charge 14.000000 au
964 # Associated Actual Charge 42.000000 au
965 # Nuclear Model: Point charge
966 # ...
967 #
968 # Effective Core Potential specification:
969 # =======================================
970 #
971 # Label Cartesian Coordinates / Bohr
972 #
973 # MO 0.0006141610 -0.0006141610 0.0979067106
974 # --
975 if '++ Basis set information:' in line:
976 self.core_array = []
977 basis_element = None
978 ncore = 0
980 while line[:2] != '--':
981 if 'Basis set label' in line:
982 try:
983 basis_element = line.split()[3].split('.')[0]
984 basis_element = basis_element[0] + basis_element[1:].lower()
985 except:
986 self.logger.warning('Basis set label is missing!')
987 basis_element = ''
988 if 'valence basis set:' in line.lower():
989 self.skip_line(inputfile, 'd')
990 line = next(inputfile)
991 if 'Associated Effective Charge' in line:
992 effective_charge = float(line.split()[3])
993 actual_charge = float(next(inputfile).split()[3])
994 element = self.table.element[int(actual_charge)]
995 ncore = int(actual_charge - effective_charge)
996 if basis_element:
997 assert basis_element == element
998 else:
999 basis_element = element
1001 if basis_element and ncore:
1002 self.core_array.append((basis_element, ncore))
1003 basis_element = ''
1004 ncore = 0
1006 line = next(inputfile)