Coverage for cclib/parser/orcaparser.py : 92%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2#
3# Copyright (c) 2020, the cclib development team
4#
5# This file is part of cclib (http://cclib.github.io) and is distributed under
6# the terms of the BSD 3-Clause License.
8"""Parser for ORCA output files"""
11import re
12from itertools import zip_longest
14import numpy
15from packaging.version import parse as parse_version
17from cclib.parser import logfileparser
18from cclib.parser import utils
21class ORCA(logfileparser.Logfile):
22 """An ORCA log file."""
24 def __init__(self, *args, **kwargs):
26 # Call the __init__ method of the superclass
27 super(ORCA, self).__init__(logname="ORCA", *args, **kwargs)
29 def __str__(self):
30 """Return a string representation of the object."""
31 return "ORCA log file %s" % (self.filename)
33 def __repr__(self):
34 """Return a representation of the object."""
35 return 'ORCA("%s")' % (self.filename)
37 def normalisesym(self, label):
38 """ORCA does not require normalizing symmetry labels."""
39 return label
41 def before_parsing(self):
43 # A geometry optimization is started only when
44 # we parse a cycle (so it will be larger than zero().
45 self.gopt_cycle = 0
47 # Keep track of whether this is a relaxed scan calculation
48 self.is_relaxed_scan = False
50 def after_parsing(self):
51 # ORCA doesn't add the dispersion energy to the "Total energy" (which
52 # we parse), only to the "FINAL SINGLE POINT ENERGY" (which we don't
53 # parse).
54 if hasattr(self, "scfenergies") and hasattr(self, "dispersionenergies"):
55 for i, (scfenergy, dispersionenergy) in enumerate(
56 zip_longest(self.scfenergies, self.dispersionenergies)
57 ):
58 # It isn't as problematic if there are more dispersion than
59 # SCF energies, since all dispersion energies can still be
60 # added to the SCF energies, hence the difference in log level.
61 if dispersionenergy is None:
62 self.logger.error(
63 "The number of SCF and dispersion energies are not equal: %d vs. %d, "
64 "can't add dispersion energy to all SCF energies",
65 len(self.scfenergies),
66 len(self.dispersionenergies)
67 )
68 break
69 if scfenergy is None:
70 self.logger.warning(
71 "The number of SCF and dispersion energies are not equal: %d vs. %d, "
72 "can't add dispersion energy to all SCF energies",
73 len(self.scfenergies),
74 len(self.dispersionenergies)
75 )
76 break
77 self.scfenergies[i] += dispersionenergy
79 def extract(self, inputfile, line):
80 """Extract information from the file object inputfile."""
82 # Extract the version number.
83 if "Program Version" == line.strip()[:15]:
84 # Handle development versions.
85 self.metadata["legacy_package_version"] = line.split()[2]
86 self.metadata["package_version"] = self.metadata["legacy_package_version"].replace(".x", "dev")
87 possible_revision_line = next(inputfile)
88 if "SVN: $Rev" in possible_revision_line:
89 self.metadata["package_version"] += "+{}".format(
90 re.search(r"\d+", possible_revision_line).group()
91 )
94 # ================================================================================
95 # WARNINGS
96 # Please study these warnings very carefully!
97 # ================================================================================
98 #
99 # Warning: TCutStore was < 0. Adjusted to Thresh (uncritical)
100 #
101 # WARNING: your system is open-shell and RHF/RKS was chosen
102 # ===> : WILL SWITCH to UHF/UKS
103 #
104 #
105 # INFO : the flag for use of LIBINT has been found!
106 #
107 # ================================================================================
108 if "WARNINGS" == line.strip():
109 self.skip_lines(inputfile, ['text', '=', 'blank'])
110 if 'warnings' not in self.metadata:
111 self.metadata['warnings'] = []
112 if 'info' not in self.metadata:
113 self.metadata['info'] = []
115 line = next(inputfile)
116 while line[0] != '=':
117 if line.lower()[:7] == 'warning':
118 self.metadata['warnings'].append('')
119 while len(line) > 1:
120 self.metadata['warnings'][-1] += line[9:].strip()
121 line = next(inputfile)
122 elif line.lower()[:4] == 'info':
123 self.metadata['info'].append('')
124 while len(line) > 1:
125 self.metadata['info'][-1] += line[9:].strip()
126 line = next(inputfile)
127 line = next(inputfile)
129 # ================================================================================
130 # INPUT FILE
131 # ================================================================================
132 # NAME = input.dat
133 # | 1> %pal nprocs 4 end
134 # | 2> ! B3LYP def2-svp
135 # | 3> ! Grid4
136 # | 4>
137 # | 5> *xyz 0 3
138 # | 6> O 0 0 0
139 # | 7> O 0 0 1.5
140 # | 8> *
141 # | 9>
142 # | 10> ****END OF INPUT****
143 # ================================================================================
144 if "INPUT FILE" == line.strip():
145 self.skip_line(inputfile, '=')
146 self.metadata['input_file_name'] = next(inputfile).split()[-1]
148 # First, collect all the lines...
149 lines = []
150 for line in inputfile:
151 if line[0] != '|':
152 break
153 lines.append(line[6:])
155 self.metadata['input_file_contents'] = ''.join(lines[:-1])
156 lines_iter = iter(lines[:-1])
158 keywords = []
159 coords = []
160 # ...then parse them separately.
161 for line in lines_iter:
162 line = line.strip()
163 if not line:
164 continue
166 # Keywords block
167 if line[0] == '!':
168 keywords += line[1:].split()
170 # Impossible to parse without knowing whether a keyword opens a new block
171 elif line[0] == '%':
172 pass
174 # Geometry block
175 elif line[0] == '*':
176 coord_type, charge, multiplicity = line[1:].split()[:3]
177 self.set_attribute('charge', int(charge))
178 self.set_attribute('multiplicity', int(multiplicity))
179 coord_type = coord_type.lower()
180 self.metadata['coord_type'] = coord_type
181 if coord_type == 'xyz':
182 def splitter(line):
183 atom, x, y, z = line.split()[:4]
184 return [atom, float(x), float(y), float(z)]
185 elif coord_type in ['int', 'internal']:
186 def splitter(line):
187 atom, a1, a2, a3, bond, angle, dihedral = line.split()[:7]
188 return [atom, int(a1), int(a2), int(a3), float(bond), float(angle), float(dihedral)]
189 elif coord_type == 'gzmt':
190 def splitter(line):
191 vals = line.split()[:7]
192 if len(vals) == 7:
193 atom, a1, bond, a2, angle, a3, dihedral = vals
194 return [atom, int(a1), float(bond), int(a2), float(angle), int(a3), float(dihedral)]
195 elif len(vals) == 5:
196 return [vals[0], int(vals[1]), float(vals[2]), int(vals[3]), float(vals[4])]
197 elif len(vals) == 3:
198 return [vals[0], int(vals[1]), float(vals[2])]
199 elif len(vals) == 1:
200 return [vals[0]]
201 self.logger.warning('Incorrect number of atoms in input geometry.')
202 elif 'file' in coord_type:
203 pass
204 else:
205 self.logger.warning('Invalid coordinate type.')
207 if 'file' not in coord_type:
208 for line in lines_iter:
209 if not line:
210 continue
211 if line[0] == '#' or line.strip(' ') == '\n':
212 continue
213 if line.strip()[0] == '*' or line.strip() == "end":
214 break
215 # Strip basis specification that can appear after coordinates
216 line = line.split('newGTO')[0].strip()
217 coords.append(splitter(line))
219 self.metadata['keywords'] = keywords
220 self.metadata['coords'] = coords
222 if line[0:15] == "Number of atoms":
224 natom = int(line.split()[-1])
225 self.set_attribute('natom', natom)
227 if line[1:13] == "Total Charge":
229 charge = int(line.split()[-1])
230 self.set_attribute('charge', charge)
232 line = next(inputfile)
234 mult = int(line.split()[-1])
235 self.set_attribute('mult', mult)
237 # SCF convergence output begins with:
238 #
239 # --------------
240 # SCF ITERATIONS
241 # --------------
242 #
243 # However, there are two common formats which need to be handled, implemented as separate functions.
244 if line.strip() == "SCF ITERATIONS":
246 self.skip_line(inputfile, 'dashes')
248 line = next(inputfile)
249 columns = line.split()
250 # "Starting incremental Fock matrix formation" doesn't
251 # necessarily appear before the extended format.
252 if not columns:
253 self.parse_scf_expanded_format(inputfile, columns)
254 # A header with distinct columns indicates the condensed
255 # format.
256 elif columns[1] == "Energy":
257 self.parse_scf_condensed_format(inputfile, columns)
258 # Assume the extended format.
259 else:
260 self.parse_scf_expanded_format(inputfile, columns)
262 # Information about the final iteration, which also includes the convergence
263 # targets and the convergence values, is printed separately, in a section like this:
264 #
265 # *****************************************************
266 # * SUCCESS *
267 # * SCF CONVERGED AFTER 9 CYCLES *
268 # *****************************************************
269 #
270 # ...
271 #
272 # Total Energy : -382.04963064 Eh -10396.09898 eV
273 #
274 # ...
275 #
276 # ------------------------- ----------------
277 # FINAL SINGLE POINT ENERGY -382.049630637
278 # ------------------------- ----------------
279 #
280 # We cannot use this last message as a stop condition in general, because
281 # often there is vibrational output before it. So we use the 'Total Energy'
282 # line. However, what comes after that is different for single point calculations
283 # and in the inner steps of geometry optimizations.
284 if "SCF CONVERGED AFTER" in line:
286 if not hasattr(self, "scfenergies"):
287 self.scfenergies = []
288 if not hasattr(self, "scfvalues"):
289 self.scfvalues = []
290 if not hasattr(self, "scftargets"):
291 self.scftargets = []
293 while not "Total Energy :" in line:
294 line = next(inputfile)
295 energy = utils.convertor(float(line.split()[3]), "hartree", "eV")
296 self.scfenergies.append(energy)
298 self._append_scfvalues_scftargets(inputfile, line)
300 # Sometimes the SCF does not converge, but does not halt the
301 # the run (like in bug 3184890). In this this case, we should
302 # remain consistent and use the energy from the last reported
303 # SCF cycle. In this case, ORCA print a banner like this:
304 #
305 # *****************************************************
306 # * ERROR *
307 # * SCF NOT CONVERGED AFTER 8 CYCLES *
308 # *****************************************************
309 if "SCF NOT CONVERGED AFTER" in line:
311 if not hasattr(self, "scfenergies"):
312 self.scfenergies = []
313 if not hasattr(self, "scfvalues"):
314 self.scfvalues = []
315 if not hasattr(self, "scftargets"):
316 self.scftargets = []
318 energy = utils.convertor(self.scfvalues[-1][-1][0], "hartree", "eV")
319 self.scfenergies.append(energy)
321 self._append_scfvalues_scftargets(inputfile, line)
323 """
324-------------------------------------------------------------------------------
325 DFT DISPERSION CORRECTION
327 DFTD3 V3.1 Rev 1
328 USING zero damping
329-------------------------------------------------------------------------------
330The omegaB97X-D3 functional is recognized. Fit by Chai et al.
331Active option DFTDOPT ... 3
333molecular C6(AA) [au] = 9563.878941
336 DFT-D V3
337 parameters
338 s6 scaling factor : 1.0000
339 rs6 scaling factor : 1.2810
340 s8 scaling factor : 1.0000
341 rs8 scaling factor : 1.0940
342 Damping factor alpha6 : 14.0000
343 Damping factor alpha8 : 16.0000
344 ad hoc parameters k1-k3 : 16.0000 1.3333 -4.0000
346 Edisp/kcal,au: -10.165629059768 -0.016199959356
347 E6 /kcal : -4.994512983
348 E8 /kcal : -5.171116077
349 % E8 : 50.868628459
351------------------------- ----------------
352Dispersion correction -0.016199959
353------------------------- ----------------
354"""
355 if "DFT DISPERSION CORRECTION" in line:
356 # A bunch of parameters are printed the first time dispersion is called
357 # However, they vary wildly in form and number, making parsing problematic
358 line = next(inputfile)
359 while 'Dispersion correction' not in line:
360 line = next(inputfile)
361 dispersion = utils.convertor(float(line.split()[-1]), "hartree", "eV")
362 self.append_attribute("dispersionenergies", dispersion)
364 # The convergence targets for geometry optimizations are printed at the
365 # beginning of the output, although the order and their description is
366 # different than later on. So, try to standardize the names of the criteria
367 # and save them for later so that we can get the order right.
368 #
369 # *****************************
370 # * Geometry Optimization Run *
371 # *****************************
372 #
373 # Geometry optimization settings:
374 # Update method Update .... BFGS
375 # Choice of coordinates CoordSys .... Redundant Internals
376 # Initial Hessian InHess .... Almoef's Model
377 #
378 # Convergence Tolerances:
379 # Energy Change TolE .... 5.0000e-06 Eh
380 # Max. Gradient TolMAXG .... 3.0000e-04 Eh/bohr
381 # RMS Gradient TolRMSG .... 1.0000e-04 Eh/bohr
382 # Max. Displacement TolMAXD .... 4.0000e-03 bohr
383 # RMS Displacement TolRMSD .... 2.0000e-03 bohr
384 #
385 if line[25:50] == "Geometry Optimization Run":
387 stars = next(inputfile)
388 blank = next(inputfile)
390 line = next(inputfile)
391 while line[0:23] != "Convergence Tolerances:":
392 line = next(inputfile)
394 if hasattr(self, 'geotargets'):
395 self.logger.warning('The geotargets attribute should not exist yet. There is a problem in the parser.')
396 self.geotargets = []
397 self.geotargets_names = []
399 # There should always be five tolerance values printed here.
400 for i in range(5):
401 line = next(inputfile)
402 name = line[:25].strip().lower().replace('.', '').replace('displacement', 'step')
403 target = float(line.split()[-2])
404 self.geotargets_names.append(name)
405 self.geotargets.append(target)
407 # The convergence targets for relaxed surface scan steps are printed at the
408 # beginning of the output, although the order and their description is
409 # different than later on. So, try to standardize the names of the criteria
410 # and save them for later so that we can get the order right.
411 #
412 # *************************************************************
413 # * RELAXED SURFACE SCAN STEP 12 *
414 # * *
415 # * Dihedral ( 11, 10, 3, 4) : 180.00000000 *
416 # *************************************************************
417 #
418 # Geometry optimization settings:
419 # Update method Update .... BFGS
420 # Choice of coordinates CoordSys .... Redundant Internals
421 # Initial Hessian InHess .... Almoef's Model
422 #
423 # Convergence Tolerances:
424 # Energy Change TolE .... 5.0000e-06 Eh
425 # Max. Gradient TolMAXG .... 3.0000e-04 Eh/bohr
426 # RMS Gradient TolRMSG .... 1.0000e-04 Eh/bohr
427 # Max. Displacement TolMAXD .... 4.0000e-03 bohr
428 # RMS Displacement TolRMSD .... 2.0000e-03 bohr
429 if line[25:50] == "RELAXED SURFACE SCAN STEP":
431 self.is_relaxed_scan = True
432 blank = next(inputfile)
433 info = next(inputfile)
434 stars = next(inputfile)
435 blank = next(inputfile)
437 line = next(inputfile)
438 while line[0:23] != "Convergence Tolerances:":
439 line = next(inputfile)
441 self.geotargets = []
442 self.geotargets_names = []
444 # There should always be five tolerance values printed here.
445 for i in range(5):
446 line = next(inputfile)
447 name = line[:25].strip().lower().replace('.', '').replace('displacement', 'step')
448 target = float(line.split()[-2])
449 self.geotargets_names.append(name)
450 self.geotargets.append(target)
453 # Moller-Plesset energies.
454 #
455 # ---------------------------------------
456 # MP2 TOTAL ENERGY: -76.112119693 Eh
457 # ---------------------------------------
458 if 'MP2 TOTAL ENERGY' in line[:16]:
460 if not hasattr(self, 'mpenergies'):
461 self.metadata['methods'].append('MP2')
462 self.mpenergies = []
464 self.mpenergies.append([])
465 mp2energy = utils.float(line.split()[-2])
466 self.mpenergies[-1].append(utils.convertor(mp2energy, 'hartree', 'eV'))
468 # MP2 energy output line is different for MP3, since it uses the MDCI
469 # code, which is also in charge of coupled cluster.
470 #
471 # MP3 calculation:
472 # E(MP2) = -76.112119775 EC(MP2)= -0.128216417
473 # E(MP3) = -76.113783480 EC(MP3)= -0.129880122 E3= -0.001663705
474 #
475 # CCSD calculation:
476 # E(MP2) ... -0.393722942
477 # Initial E(tot) ... -1639.631576169
478 # <T|T> ... 0.087231847
479 # Number of pairs included ... 55
480 # Total number of pairs ... 55
481 if 'E(MP2)' in line:
483 if not hasattr(self, 'mpenergies'):
484 self.mpenergies = []
486 self.mpenergies.append([])
487 mp2energy = utils.float(line.split()[-1])
488 self.mpenergies[-1].append(utils.convertor(mp2energy, 'hartree', 'eV'))
490 line = next(inputfile)
491 if line[:6] == 'E(MP3)':
492 self.metadata['methods'].append('MP3')
493 mp3energy = utils.float(line.split()[2])
494 self.mpenergies[-1].append(utils.convertor(mp3energy, 'hartree', 'eV'))
495 else:
496 assert line[:14] == 'Initial E(tot)'
498 # ----------------------
499 # COUPLED CLUSTER ENERGY
500 # ----------------------
501 #
502 # E(0) ... -1639.237853227
503 # E(CORR) ... -0.360153516
504 # E(TOT) ... -1639.598006742
505 # Singles Norm <S|S>**1/2 ... 0.176406354
506 # T1 diagnostic ... 0.039445660
507 if line[:22] == 'COUPLED CLUSTER ENERGY':
508 self.skip_lines(inputfile, ['d', 'b'])
509 line = next(inputfile)
510 assert line[:4] == 'E(0)'
511 scfenergy = utils.convertor(utils.float(line.split()[-1]), 'hartree', 'eV')
512 line = next(inputfile)
513 assert line[:7] == 'E(CORR)'
514 while 'E(TOT)' not in line:
515 line = next(inputfile)
516 self.append_attribute(
517 'ccenergies',
518 utils.convertor(utils.float(line.split()[-1]), 'hartree', 'eV')
519 )
520 line = next(inputfile)
521 assert line[:23] == 'Singles Norm <S|S>**1/2'
522 line = next(inputfile)
523 self.metadata["t1_diagnostic"] = utils.float(line.split()[-1])
525 # ------------------
526 # CARTESIAN GRADIENT
527 # ------------------
528 #
529 # 1 H : 0.000000004 0.019501450 -0.021537091
530 # 2 O : 0.000000054 -0.042431648 0.042431420
531 # 3 H : 0.000000004 0.021537179 -0.019501388
532 #
533 # ORCA MP2 module has different signal than 'CARTESIAN GRADIENT'.
534 #
535 # The final MP2 gradient
536 # 0: 0.01527469 -0.00292883 0.01125000
537 # 1: 0.00098782 -0.00040549 0.00196825
538 # 2: -0.01626251 0.00333431 -0.01321825
539 if line[:18] == 'CARTESIAN GRADIENT' or line[:22] == 'The final MP2 gradient':
541 grads = []
542 if line[:18] == 'CARTESIAN GRADIENT':
543 self.skip_lines(inputfile, ['dashes', 'blank'])
545 line = next(inputfile).strip()
546 if 'CONSTRAINED CARTESIAN COORDINATES' in line:
547 self.skip_line(
548 inputfile, 'constrained Cartesian coordinate warning'
549 )
550 line = next(inputfile).strip()
552 while line:
553 tokens = line.split()
554 x, y, z = float(tokens[-3]), float(tokens[-2]), float(tokens[-1])
555 grads.append((x, y, z))
556 line = next(inputfile).strip()
558 if not hasattr(self, 'grads'):
559 self.grads = []
560 self.grads.append(grads)
563 # After each geometry optimization step, ORCA prints the current convergence
564 # parameters and the targets (again), so it is a good idea to check that they
565 # have not changed. Note that the order of these criteria here are different
566 # than at the beginning of the output, so make use of the geotargets_names created
567 # before and save the new geovalues in correct order.
568 #
569 # ----------------------|Geometry convergence|---------------------
570 # Item value Tolerance Converged
571 # -----------------------------------------------------------------
572 # Energy change 0.00006021 0.00000500 NO
573 # RMS gradient 0.00031313 0.00010000 NO
574 # RMS step 0.01596159 0.00200000 NO
575 # MAX step 0.04324586 0.00400000 NO
576 # ....................................................
577 # Max(Bonds) 0.0218 Max(Angles) 2.48
578 # Max(Dihed) 0.00 Max(Improp) 0.00
579 # -----------------------------------------------------------------
580 #
581 if line[33:53] == "Geometry convergence":
583 headers = next(inputfile)
584 dashes = next(inputfile)
586 names = []
587 values = []
588 targets = []
589 line = next(inputfile)
590 # Handle both the dots only and dashes only cases
591 while len(list(set(line.strip()))) != 1:
592 name = line[10:28].strip().lower()
593 tokens = line.split()
594 value = float(tokens[2])
595 target = float(tokens[3])
596 names.append(name)
597 values.append(value)
598 targets.append(target)
599 line = next(inputfile)
601 # The energy change is normally not printed in the first iteration, because
602 # there was no previous energy -- in that case assume zero. There are also some
603 # edge cases where the energy change is not printed, for example when internal
604 # angles become improper and internal coordinates are rebuilt as in regression
605 # CuI-MePY2-CH3CN_optxes, and in such cases use NaN.
606 newvalues = []
607 for i, n in enumerate(self.geotargets_names):
608 if (n == "energy change") and (n not in names):
609 if self.is_relaxed_scan:
610 newvalues.append(0.0)
611 else:
612 newvalues.append(numpy.nan)
613 else:
614 newvalues.append(values[names.index(n)])
615 assert targets[names.index(n)] == self.geotargets[i]
617 self.append_attribute("geovalues", newvalues)
619 """ Grab cartesian coordinates
620 ---------------------------------
621 CARTESIAN COORDINATES (ANGSTROEM)
622 ---------------------------------
623 H 0.000000 0.000000 0.000000
624 O 0.000000 0.000000 1.000000
625 H 0.000000 1.000000 1.000000
626 """
627 if line[0:33] == "CARTESIAN COORDINATES (ANGSTROEM)":
628 next(inputfile)
630 atomnos = []
631 atomcoords = []
632 line = next(inputfile)
633 while len(line) > 1:
634 atom, x, y, z = line.split()
635 if atom[-1] != ">":
636 atomnos.append(self.table.number[atom])
637 atomcoords.append([float(x), float(y), float(z)])
638 line = next(inputfile)
640 self.set_attribute('natom', len(atomnos))
641 self.set_attribute('atomnos', atomnos)
642 self.append_attribute("atomcoords", atomcoords)
644 """ Grab atom masses
645 ----------------------------
646 CARTESIAN COORDINATES (A.U.)
647 ----------------------------
648 NO LB ZA FRAG MASS X Y Z
649 0 H 1.0000 0 1.008 0.000000 0.000000 0.000000
650 1 O 8.0000 0 15.999 0.000000 0.000000 1.889726
651 2 H 1.0000 0 1.008 0.000000 1.889726 1.889726
652 """
653 if line[0:28] == "CARTESIAN COORDINATES (A.U.)" and not hasattr(self, 'atommasses'):
654 next(inputfile)
655 next(inputfile)
657 line = next(inputfile)
658 self.atommasses = []
659 while len(line) > 1:
660 if line[:32] == '* core charge reduced due to ECP':
661 break
662 if line.strip() == "> coreless ECP center with (optional) point charge":
663 break
664 no, lb, za, frag, mass, x, y, z = line.split()
665 if lb[-1] != ">":
666 self.atommasses.append(float(mass))
667 line = next(inputfile)
669 if line[21:68] == "FINAL ENERGY EVALUATION AT THE STATIONARY POINT":
670 if not hasattr(self, 'optdone'):
671 self.optdone = []
672 self.optdone.append(len(self.atomcoords))
674 if "The optimization did not converge" in line:
675 if not hasattr(self, 'optdone'):
676 self.optdone = []
678 if line[0:16] == "ORBITAL ENERGIES":
680 self.skip_lines(inputfile, ['d', 'text', 'text'])
682 self.mooccnos = [[]]
683 self.moenergies = [[]]
685 line = next(inputfile)
686 while len(line) > 20: # restricted calcs are terminated by ------
687 info = line.split()
688 mooccno = int(float(info[1]))
689 moenergy = float(info[2])
690 self.mooccnos[0].append(mooccno)
691 self.moenergies[0].append(utils.convertor(moenergy, "hartree", "eV"))
692 line = next(inputfile)
694 line = next(inputfile)
696 # handle beta orbitals for UHF
697 if line[17:35] == "SPIN DOWN ORBITALS":
698 text = next(inputfile)
700 self.mooccnos.append([])
701 self.moenergies.append([])
703 line = next(inputfile)
704 while len(line) > 20: # actually terminated by ------
705 info = line.split()
706 mooccno = int(float(info[1]))
707 moenergy = float(info[2])
708 self.mooccnos[1].append(mooccno)
709 self.moenergies[1].append(utils.convertor(moenergy, "hartree", "eV"))
710 line = next(inputfile)
712 if not hasattr(self, 'homos'):
713 doubly_occupied = self.mooccnos[0].count(2)
714 singly_occupied = self.mooccnos[0].count(1)
715 # Restricted closed-shell.
716 if doubly_occupied > 0 and singly_occupied == 0:
717 self.set_attribute('homos', [doubly_occupied - 1])
718 # Restricted open-shell.
719 elif doubly_occupied > 0 and singly_occupied > 0:
720 self.set_attribute('homos', [doubly_occupied + singly_occupied - 1,
721 doubly_occupied - 1])
722 # Unrestricted.
723 else:
724 assert len(self.moenergies) == 2
725 assert doubly_occupied == 0
726 assert self.mooccnos[1].count(2) == 0
727 nbeta = self.mooccnos[1].count(1)
728 self.set_attribute('homos', [singly_occupied - 1, nbeta - 1])
730 # So nbasis was parsed at first with the first pattern, but it turns out that
731 # semiempirical methods (at least AM1 as reported by Julien Idé) do not use this.
732 # For this reason, also check for the second patterns, and use it as an assert
733 # if nbasis was already parsed. Regression PCB_1_122.out covers this test case.
734 if line[1:32] == "# of contracted basis functions":
735 self.set_attribute('nbasis', int(line.split()[-1]))
736 if line[1:27] == "Basis Dimension Dim":
737 self.set_attribute('nbasis', int(line.split()[-1]))
739 if line[0:14] == "OVERLAP MATRIX":
741 self.skip_line(inputfile, 'dashes')
743 self.aooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d")
744 for i in range(0, self.nbasis, 6):
745 self.updateprogress(inputfile, "Overlap")
747 header = next(inputfile)
748 size = len(header.split())
750 for j in range(self.nbasis):
751 line = next(inputfile)
752 broken = line.split()
753 self.aooverlaps[j, i:i+size] = list(map(float, broken[1:size+1]))
755 # Molecular orbital coefficients are parsed here, but also related things
756 #like atombasis and aonames if possible.
757 #
758 # Normally the output is easy to parse like this:
759 # ------------------
760 # MOLECULAR ORBITALS
761 # ------------------
762 # 0 1 2 3 4 5
763 # -19.28527 -19.26828 -19.26356 -19.25801 -19.25765 -19.21471
764 # 2.00000 2.00000 2.00000 2.00000 2.00000 2.00000
765 # -------- -------- -------- -------- -------- --------
766 # 0C 1s 0.000002 -0.000001 0.000000 0.000000 -0.000000 0.000001
767 # 0C 2s -0.000007 0.000006 -0.000002 -0.000000 0.000001 -0.000003
768 # 0C 3s -0.000086 -0.000061 0.000058 -0.000033 -0.000027 -0.000058
769 # ...
770 #
771 # But when the numbers get big, things get yucky since ORCA does not use
772 # fixed width formatting for the floats, and does not insert extra spaces
773 # when the numbers get wider. So things get stuck together overflowing columns,
774 # like this:
775 # 12C 6s -11.608845-53.775398161.302640-76.633779 29.914985 22.083999
776 #
777 # One assumption that seems to hold is that there are always six significant
778 # digits in the coefficients, so we can try to use that to delineate numbers
779 # when the parsing gets rough. This is what we do below with a regex, and a case
780 # like this is tested in regression ORCA/ORCA4.0/invalid-literal-for-float.out
781 # which was reported in https://github.com/cclib/cclib/issues/629
782 if line[0:18] == "MOLECULAR ORBITALS":
784 self.skip_line(inputfile, 'dashes')
786 aonames = []
787 atombasis = [[] for i in range(self.natom)]
788 mocoeffs = [numpy.zeros((self.nbasis, self.nbasis), "d")]
790 for spin in range(len(self.moenergies)):
792 if spin == 1:
793 self.skip_line(inputfile, 'blank')
794 mocoeffs.append(numpy.zeros((self.nbasis, self.nbasis), "d"))
796 for i in range(0, self.nbasis, 6):
798 self.updateprogress(inputfile, "Coefficients")
800 self.skip_lines(inputfile, ['numbers', 'energies', 'occs'])
801 dashes = next(inputfile)
803 for j in range(self.nbasis):
804 line = next(inputfile)
806 # Only need this in the first iteration.
807 if spin == 0 and i == 0:
808 atomname = line[3:5].split()[0]
809 num = int(line[0:3])
810 orbital = line.split()[1].upper()
812 aonames.append("%s%i_%s" % (atomname, num+1, orbital))
813 atombasis[num].append(j)
815 # This regex will tease out all number with exactly
816 # six digits after the decimal point.
817 coeffs = re.findall(r'-?\d+\.\d{6}', line)
819 # Something is very wrong if this does not hold.
820 assert len(coeffs) <= 6
822 mocoeffs[spin][i:i+len(coeffs), j] = [float(c) for c in coeffs]
824 self.set_attribute('aonames', aonames)
825 self.set_attribute('atombasis', atombasis)
826 self.set_attribute("mocoeffs", mocoeffs)
828 # Basis set information
829 # ORCA prints this out in a somewhat indirect fashion.
830 # Therefore, parsing occurs in several steps:
831 # 1. read which atom belongs to which basis set group
832 if line[0:21] == "BASIS SET INFORMATION":
833 line = next(inputfile)
834 line = next(inputfile)
836 self.tmp_atnames = [] # temporary attribute, needed later
837 while(not line[0:5] == '-----'):
838 if line[0:4] == "Atom":
839 self.tmp_atnames.append(line[8:12].strip())
840 line = next(inputfile)
842 # 2. Read information for the basis set groups
843 if line[0:25] == "BASIS SET IN INPUT FORMAT":
844 line = next(inputfile)
845 line = next(inputfile)
847 # loop over basis set groups
848 gbasis_tmp = {}
849 while(not line[0:5] == '-----'):
850 if line[1:7] == 'NewGTO':
851 bas_atname = line.split()[1]
852 gbasis_tmp[bas_atname] = []
854 line = next(inputfile)
855 # loop over contracted GTOs
856 while(not line[0:6] == ' end;'):
857 words = line.split()
858 ang = words[0]
859 nprim = int(words[1])
861 # loop over primitives
862 coeff = []
863 for iprim in range(nprim):
864 words = next(inputfile).split()
865 coeff.append( (float(words[1]), float(words[2])) )
866 gbasis_tmp[bas_atname].append((ang, coeff))
868 line = next(inputfile)
869 line = next(inputfile)
871 # 3. Assign the basis sets to gbasis
872 self.gbasis = []
873 for bas_atname in self.tmp_atnames:
874 self.gbasis.append(gbasis_tmp[bas_atname])
875 del self.tmp_atnames
877 """
878 --------------------------
879 THERMOCHEMISTRY AT 298.15K
880 --------------------------
882 Temperature ... 298.15 K
883 Pressure ... 1.00 atm
884 Total Mass ... 130.19 AMU
886 Throughout the following assumptions are being made:
887 (1) The electronic state is orbitally nondegenerate
888 ...
890 freq. 45.75 E(vib) ... 0.53
891 freq. 78.40 E(vib) ... 0.49
892 ...
895 ------------
896 INNER ENERGY
897 ------------
899 The inner energy is: U= E(el) + E(ZPE) + E(vib) + E(rot) + E(trans)
900 E(el) - is the total energy from the electronic structure calc
901 ...
903 Summary of contributions to the inner energy U:
904 Electronic energy ... -382.05075804 Eh
905 ...
906 """
907 if line.strip().startswith('THERMOCHEMISTRY AT'):
909 self.skip_lines(inputfile, ['dashes', 'blank'])
910 self.temperature = float(next(inputfile).split()[2])
911 self.pressure = float(next(inputfile).split()[2])
912 total_mass = float(next(inputfile).split()[3])
914 # Vibrations, rotations, and translations
915 line = next(inputfile)
916 while line[:17] != 'Electronic energy':
917 line = next(inputfile)
918 self.electronic_energy = float(line.split()[3])
919 self.zpe = float(next(inputfile).split()[4])
920 thermal_vibrational_correction = float(next(inputfile).split()[4])
921 thermal_rotional_correction = float(next(inputfile).split()[4])
922 thermal_translational_correction = float(next(inputfile).split()[4])
923 self.skip_lines(inputfile, ['dashes'])
924 total_thermal_energy = float(next(inputfile).split()[3])
926 # Enthalpy
927 while line[:17] != 'Total free energy':
928 line = next(inputfile)
929 thermal_enthalpy_correction = float(next(inputfile).split()[4])
930 next(inputfile)
932 # For a single atom, ORCA provides the total free energy or inner energy
933 # which includes a spurious vibrational correction (see #817 for details).
934 if self.natom > 1:
935 self.enthalpy = float(next(inputfile).split()[3])
936 else:
937 self.enthalpy = self.electronic_energy + thermal_translational_correction
939 # Entropy
940 while line[:18] != 'Electronic entropy':
941 line = next(inputfile)
942 electronic_entropy = float(line.split()[3])
943 vibrational_entropy = float(next(inputfile).split()[3])
944 rotational_entropy = float(next(inputfile).split()[3])
945 translational_entropy = float(next(inputfile).split()[3])
946 self.skip_lines(inputfile, ['dashes'])
948 # ORCA prints -inf for single atom entropy.
949 if self.natom > 1:
950 self.entropy = float(next(inputfile).split()[4])
951 else:
952 self.entropy = (electronic_entropy + translational_entropy) / self.temperature
954 while (line[:25] != 'Final Gibbs free enthalpy') and (line[:23] != 'Final Gibbs free energy'):
955 line = next(inputfile)
956 self.skip_lines(inputfile, ['dashes'])
959 # ORCA prints -inf for sinle atom free energy.
960 if self.natom > 1:
961 self.freeenergy = float(line.split()[5])
962 else:
963 self.freeenergy = self.enthalpy - self.temperature * self.entropy
965 # Read TDDFT information
966 if any(x in line for x in ("TD-DFT/TDA EXCITED", "TD-DFT EXCITED")):
967 # Could be singlets or triplets
968 if line.find("SINGLETS") >= 0:
969 sym = "Singlet"
970 elif line.find("TRIPLETS") >= 0:
971 sym = "Triplet"
972 else:
973 sym = "Not specified"
975 etsecs = []
976 etenergies = []
977 etsyms = []
979 lookup = {'a': 0, 'b': 1}
980 line = next(inputfile)
981 while line.find("STATE") < 0:
982 line = next(inputfile)
983 # Contains STATE or is blank
984 while line.find("STATE") >= 0:
985 broken = line.split()
986 etenergies.append(float(broken[-2]))
987 etsyms.append(sym)
988 line = next(inputfile)
989 sec = []
990 # Contains SEC or is blank
991 while line.strip():
992 start = line[0:8].strip()
993 start = (int(start[:-1]), lookup[start[-1]])
994 end = line[10:17].strip()
995 end = (int(end[:-1]), lookup[end[-1]])
996 # Coeffients are not printed for RPA, only
997 # TDA/CIS.
998 contrib = line[35:47].strip()
999 try:
1000 contrib = float(contrib)
1001 except ValueError:
1002 contrib = numpy.nan
1003 sec.append([start, end, contrib])
1004 line = next(inputfile)
1005 etsecs.append(sec)
1006 line = next(inputfile)
1008 self.extend_attribute('etenergies', etenergies)
1009 self.extend_attribute('etsecs', etsecs)
1010 self.extend_attribute('etsyms', etsyms)
1012 # Parse the various absorption spectra for TDDFT and ROCIS.
1013 if 'ABSORPTION SPECTRUM' in line or 'ELECTRIC DIPOLE' in line:
1014 # CASSCF has an anomalous printing of ABSORPTION SPECTRUM.
1015 if line[:-1] == 'ABSORPTION SPECTRUM':
1016 return
1018 line = line.strip()
1020 # Standard header, occasionally changes
1021 header = ['d', 'header', 'header', 'd']
1023 def energy_intensity(line):
1024 """ TDDFT and related methods standard method of output
1025-----------------------------------------------------------------------------
1026 ABSORPTION SPECTRUM VIA TRANSITION ELECTRIC DIPOLE MOMENTS
1027-----------------------------------------------------------------------------
1028State Energy Wavelength fosc T2 TX TY TZ
1029 (cm-1) (nm) (au**2) (au) (au) (au)
1030-----------------------------------------------------------------------------
1031 1 5184116.7 1.9 0.040578220 0.00258 -0.05076 -0.00000 -0.00000
1032"""
1033 try:
1034 state, energy, wavelength, intensity, t2, tx, ty, tz = line.split()
1035 except ValueError as e:
1036 # Must be spin forbidden and thus no intensity
1037 energy = line.split()[1]
1038 intensity = 0
1039 return energy, intensity
1041 # Check for variations
1042 if line == 'COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM' or \
1043 line == 'COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM (origin adjusted)':
1044 def energy_intensity(line):
1045 """ TDDFT with DoQuad == True
1046------------------------------------------------------------------------------------------------------
1047 COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM
1048------------------------------------------------------------------------------------------------------
1049State Energy Wavelength D2 m2 Q2 D2+m2+Q2 D2/TOT m2/TOT Q2/TOT
1050 (cm-1) (nm) (*1e6) (*1e6)
1051------------------------------------------------------------------------------------------------------
1052 1 61784150.6 0.2 0.00000 0.00000 3.23572 0.00000323571519 0.00000 0.00000 1.00000
1053"""
1054 state, energy, wavelength, d2, m2, q2, intensity, d2_contrib, m2_contrib, q2_contrib = line.split()
1055 return energy, intensity
1057 elif line == 'COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM (Origin Independent, Length Representation)':
1058 def energy_intensity(line):
1059 """ TDDFT with doQuad == True (Origin Independent Length Representation)
1060-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1061 COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM (Origin Independent, Length Representation)
1062-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1063State Energy Wavelength D2 m2 Q2 DM DO D2+m2+Q2+DM+DO D2/TOT m2/TOT Q2/TOT DM/TOT DO/TOT
1064 (cm-1) (nm) (*1e6) (*1e6) (*1e6) (*1e6)
1065-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1066 1 61784150.6 0.2 0.00000 0.00000 3.23572 0.00000 0.00000 0.00000323571519 0.00000 0.00000 1.00000 0.00000 0.00000
1067 2 61793079.3 0.2 0.00000 0.00000 2.85949 0.00000 -0.00000 0.00000285948800 0.00000 0.00000 1.00000 0.00000 -0.00000
1068"""
1069 vals = line.split()
1070 if len(vals) < 14:
1071 return vals[1], 0
1072 return vals[1], vals[8]
1074 elif line[:5] == 'X-RAY' and \
1075 (line[6:23] == 'EMISSION SPECTRUM' or line[6:25] == 'ABSORPTION SPECTRUM'):
1076 def energy_intensity(line):
1077 """ X-Ray from XES (emission or absorption, electric or velocity dipole moments)
1078-------------------------------------------------------------------------------------
1079 X-RAY ABSORPTION SPECTRUM VIA TRANSITION ELECTRIC DIPOLE MOMENTS
1080-------------------------------------------------------------------------------------
1081 Transition Energy INT TX TY TZ
1082 (eV) (normalized) (au) (au) (au)
1083-------------------------------------------------------------------------------------
1084 1 90a -> 0a 8748.824 0.000002678629 0.00004 -0.00001 0.00003
1085"""
1086 state, start, arrow, end, energy, intensity, tx, ty, tz = line.split()
1087 return energy, intensity
1089 elif line[:70] == 'COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE X-RAY':
1090 header = ['header', 'd', 'header', 'd', 'header', 'header', 'd']
1091 def energy_intensity(line):
1092 """ XAS with quadrupole (origin adjusted)
1093-------------------------------------------------------------------------------------------------------------------------------
1094 COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE X-RAY ABSORPTION SPECTRUM
1095 (origin adjusted)
1096-------------------------------------------------------------------------------------------------------------------------------
1097 INT (normalized)
1098 ---------------------------------------------------------
1099 Transition Energy D2 M2 Q2 D2+M2+Q2 D2/TOT M2/TOT Q2/TOT
1100 (eV) (*1e6) (*1e6)
1101-------------------------------------------------------------------------------------------------------------------------------
1102 1 90a -> 0a 8748.824 0.000000 0.000292 0.003615 0.000000027512 0.858012 0.010602 0.131386
1103"""
1104 state, start, arrow, end, energy, d2, m2, q2, intensity, d2_contrib, m2_contrib, q2_contrib = line.split()
1105 return energy, intensity
1107 elif line[:55] == 'SPIN ORBIT CORRECTED ABSORPTION SPECTRUM VIA TRANSITION':
1108 def energy_intensity(line):
1109 """ ROCIS dipole approximation with SOC == True (electric or velocity dipole moments)
1110-------------------------------------------------------------------------------
1111SPIN ORBIT CORRECTED ABSORPTION SPECTRUM VIA TRANSITION ELECTRIC DIPOLE MOMENTS
1112-------------------------------------------------------------------------------
1113States Energy Wavelength fosc T2 TX TY TZ
1114 (cm-1) (nm) (au**2) (au) (au) (au)
1115-------------------------------------------------------------------------------
1116 0 1 0.0 0.0 0.000000000 0.00000 0.00000 0.00000 0.00000
1117 0 2 5184116.4 1.9 0.020288451 0.00258 0.05076 0.00003 0.00000
1118"""
1119 state, state2, energy, wavelength, intensity, t2, tx, ty, tz = line.split()
1120 return energy, intensity
1122 elif line[:79] == 'ROCIS COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM' \
1123 or line[:87] == 'SOC CORRECTED COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM':
1124 def energy_intensity(line):
1125 """ ROCIS with DoQuad = True and SOC = True (also does origin adjusted)
1126------------------------------------------------------------------------------------------------------
1127 ROCIS COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM
1128------------------------------------------------------------------------------------------------------
1129States Energy Wavelength D2 m2 Q2 D2+m2+Q2 D2/TOT m2/TOT Q2/TOT
1130 (cm-1) (nm) (*1e6) (*1e6) (*population)
1131------------------------------------------------------------------------------------------------------
1132 0 1 0.0 0.0 0.00000 0.00000 0.00000 0.00000000000000 0.00000 0.00000 0.00000
1133 0 2 669388066.6 0.0 0.00000 0.00000 0.00876 0.00000000437784 0.00000 0.00000 1.00000
1134"""
1135 state, state2, energy, wavelength, d2, m2, q2, intensity, d2_contrib, m2_contrib, q2_contrib = line.split()
1136 return energy, intensity
1138 # Clashes with Orca 2.6 (and presumably before) TDDFT absorption spectrum printing
1139 elif line == 'ABSORPTION SPECTRUM' and \
1140 parse_version(self.metadata['package_version']).release > (2, 6):
1141 def energy_intensity(line):
1142 """ CASSCF absorption spectrum
1143------------------------------------------------------------------------------------------
1144 ABSORPTION SPECTRUM
1145------------------------------------------------------------------------------------------
1146 States Energy Wavelength fosc T2 TX TY TZ
1147 (cm-1) (nm) (D**2) (D) (D) (D)
1148------------------------------------------------------------------------------------------
1149 0( 0)-> 1( 0) 1 83163.2 120.2 0.088250385 2.25340 0.00000 0.00000 1.50113
1150"""
1151 reg = r'(\d+)\( ?(\d+)\)-> ?(\d+)\( ?(\d+)\) (\d+)'+ r'\s+(\d+\.\d+)'*4 + r'\s+(-?\d+\.\d+)'*3
1152 res = re.search(reg, line)
1153 jstate, jblock, istate, iblock, mult, energy, wavelength, intensity, t2, tx, ty, tz = res.groups()
1154 return energy, intensity
1156 name = line
1157 self.skip_lines(inputfile, header)
1159 if not hasattr(self, 'transprop'):
1160 self.transprop = {}
1162 etenergies = []
1163 etoscs = []
1164 line = next(inputfile)
1165 # The sections are occasionally ended with dashed lines
1166 # other times they are blank (other than a new line)
1167 while len(line.strip('-')) > 2:
1168 energy, intensity = energy_intensity(line)
1169 etenergies.append(float(energy))
1170 etoscs.append(float(intensity))
1172 line = next(inputfile)
1174 self.set_attribute('etenergies', etenergies)
1175 self.set_attribute('etoscs', etoscs)
1176 self.transprop[name] = (numpy.asarray(etenergies), numpy.asarray(etoscs))
1178 if line.strip() == "CD SPECTRUM":
1179 # -------------------------------------------------------------------
1180 # CD SPECTRUM
1181 # -------------------------------------------------------------------
1182 # State Energy Wavelength R MX MY MZ
1183 # (cm-1) (nm) (1e40*cgs) (au) (au) (au)
1184 # -------------------------------------------------------------------
1185 # 1 43167.6 231.7 0.00000 0.00000 -0.00000 0.00000
1186 #
1187 etenergies = []
1188 etrotats = []
1189 self.skip_lines(inputfile, ["d", "State Energy Wavelength", "(cm-1) (nm)", "d"])
1190 line = next(inputfile)
1191 while line.strip():
1192 tokens = line.split()
1193 if "spin forbidden" in line:
1194 etrotat, mx, my, mz = 0.0, 0.0, 0.0, 0.0
1195 else:
1196 etrotat, mx, my, mz = [utils.float(t) for t in tokens[3:]]
1197 etenergies.append(utils.float(tokens[1]))
1198 etrotats.append(etrotat)
1199 line = next(inputfile)
1200 self.set_attribute("etrotats", etrotats)
1201 if not hasattr(self, "etenergies"):
1202 self.logger.warning("etenergies not parsed before ECD section, "
1203 "the output file may be malformed")
1204 self.set_attribute("etenergies", etenergies)
1206 if line[:23] == "VIBRATIONAL FREQUENCIES":
1207 self.skip_lines(inputfile, ['d', 'b'])
1209 # Starting with 4.1, a scaling factor for frequencies is printed
1210 if float(self.metadata["package_version"][:3]) > 4.0:
1211 self.skip_lines(inputfile, ['Scaling factor for frequencies', 'b'])
1213 if self.natom > 1:
1214 vibfreqs = numpy.zeros(3 * self.natom)
1215 for i, line in zip(range(3 * self.natom), inputfile):
1216 vibfreqs[i] = float(line.split()[1])
1218 nonzero = numpy.nonzero(vibfreqs)[0]
1219 self.first_mode = nonzero[0]
1220 # Take all modes after first
1221 # Mode between imaginary and real modes could be 0
1222 self.num_modes = 3*self.natom - self.first_mode
1223 if self.num_modes > 3*self.natom - 6:
1224 msg = "Modes corresponding to rotations/translations may be non-zero."
1225 if self.num_modes == 3*self.natom - 5:
1226 msg += '\n You can ignore this if the molecule is linear.'
1227 self.set_attribute('vibfreqs', vibfreqs[self.first_mode:])
1228 else:
1229 # we have a single atom
1230 self.set_attribute('vibfreqs', numpy.array([]))
1232 # NORMAL MODES
1233 # ------------
1234 #
1235 # These modes are the cartesian displacements weighted by the diagonal matrix
1236 # M(i,i)=1/sqrt(m[i]) where m[i] is the mass of the displaced atom
1237 # Thus, these vectors are normalized but *not* orthogonal
1238 #
1239 # 0 1 2 3 4 5
1240 # 0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
1241 # 1 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
1242 # 2 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
1243 # ...
1244 if line[:12] == "NORMAL MODES":
1245 if self.natom > 1:
1246 all_vibdisps = numpy.zeros((3 * self.natom, self.natom, 3), "d")
1248 self.skip_lines(inputfile, ['d', 'b', 'text', 'text', 'text', 'b'])
1250 for mode in range(0, 3 * self.natom, 6):
1251 header = next(inputfile)
1252 for atom in range(self.natom):
1253 all_vibdisps[mode:mode + 6, atom, 0] = next(inputfile).split()[1:]
1254 all_vibdisps[mode:mode + 6, atom, 1] = next(inputfile).split()[1:]
1255 all_vibdisps[mode:mode + 6, atom, 2] = next(inputfile).split()[1:]
1257 self.set_attribute('vibdisps', all_vibdisps[self.first_mode:])
1258 else:
1259 # we have a single atom
1260 self.set_attribute('vibdisps', numpy.array([]))
1262 # -----------
1263 # IR SPECTRUM
1264 # -----------
1265 #
1266 # Mode freq (cm**-1) T**2 TX TY TZ
1267 # -------------------------------------------------------------------
1268 # 6: 2069.36 1.674341 ( -0.000000 0.914970 -0.914970)
1269 # 7: 3978.81 76.856228 ( 0.000000 6.199041 -6.199042)
1270 # 8: 4113.34 61.077784 ( -0.000000 5.526201 5.526200)
1271 if line[:11] == "IR SPECTRUM":
1272 self.skip_lines(inputfile, ['d', 'b', 'header', 'd'])
1274 if self.natom > 1:
1275 all_vibirs = numpy.zeros((3 * self.natom,), "d")
1277 line = next(inputfile)
1278 while len(line) > 2:
1279 num = int(line[0:4])
1280 all_vibirs[num] = float(line.split()[2])
1281 line = next(inputfile)
1283 self.set_attribute('vibirs', all_vibirs[self.first_mode:])
1284 else:
1285 # we have a single atom
1286 self.set_attribute('vibirs', numpy.array([]))
1288 # --------------
1289 # RAMAN SPECTRUM
1290 # --------------
1291 #
1292 # Mode freq (cm**-1) Activity Depolarization
1293 # -------------------------------------------------------------------
1294 # 6: 296.23 5.291229 0.399982
1295 # 7: 356.70 0.000000 0.749764
1296 # 8: 368.27 0.000000 0.202068
1297 if line[:14] == "RAMAN SPECTRUM":
1298 self.skip_lines(inputfile, ['d', 'b', 'header', 'd'])
1300 if self.natom > 1:
1301 all_vibramans = numpy.zeros(3 * self.natom)
1303 line = next(inputfile)
1304 while len(line) > 2:
1305 num = int(line[0:4])
1306 all_vibramans[num] = float(line.split()[2])
1307 line = next(inputfile)
1309 self.set_attribute('vibramans', all_vibramans[self.first_mode:])
1310 else:
1311 # we have a single atom
1312 self.set_attribute('vibramans', numpy.array([]))
1314 # ORCA will print atomic charges along with the spin populations,
1315 # so care must be taken about choosing the proper column.
1316 # Population analyses are performed usually only at the end
1317 # of a geometry optimization or other run, so we want to
1318 # leave just the final atom charges.
1319 # Here is an example for Mulliken charges:
1320 # --------------------------------------------
1321 # MULLIKEN ATOMIC CHARGES AND SPIN POPULATIONS
1322 # --------------------------------------------
1323 # 0 H : 0.126447 0.002622
1324 # 1 C : -0.613018 -0.029484
1325 # 2 H : 0.189146 0.015452
1326 # 3 H : 0.320041 0.037434
1327 # ...
1328 # Sum of atomic charges : -0.0000000
1329 # Sum of atomic spin populations: 1.0000000
1330 if line[:23] == "MULLIKEN ATOMIC CHARGES":
1331 self.parse_charge_section(line, inputfile, 'mulliken')
1332 # Things are the same for Lowdin populations, except that the sums
1333 # are not printed (there is a blank line at the end).
1334 if line[:22] == "LOEWDIN ATOMIC CHARGES":
1335 self.parse_charge_section(line, inputfile, 'lowdin')
1336 #CHELPG Charges
1337 #--------------------------------
1338 # 0 C : 0.363939
1339 # 1 H : 0.025695
1340 # ...
1341 #--------------------------------
1342 #Total charge: -0.000000
1343 #--------------------------------
1344 if line.startswith('CHELPG Charges'):
1345 self.parse_charge_section(line, inputfile, 'chelpg')
1347 # It is not stated explicitely, but the dipole moment components printed by ORCA
1348 # seem to be in atomic units, so they will need to be converted. Also, they
1349 # are most probably calculated with respect to the origin .
1350 #
1351 # -------------
1352 # DIPOLE MOMENT
1353 # -------------
1354 # X Y Z
1355 # Electronic contribution: 0.00000 -0.00000 -0.00000
1356 # Nuclear contribution : 0.00000 0.00000 0.00000
1357 # -----------------------------------------
1358 # Total Dipole Moment : 0.00000 -0.00000 -0.00000
1359 # -----------------------------------------
1360 # Magnitude (a.u.) : 0.00000
1361 # Magnitude (Debye) : 0.00000
1362 #
1363 if line.strip() == "DIPOLE MOMENT":
1365 self.skip_lines(inputfile, ['d', 'XYZ', 'electronic', 'nuclear', 'd'])
1366 total = next(inputfile)
1367 assert "Total Dipole Moment" in total
1369 reference = [0.0, 0.0, 0.0]
1370 dipole = numpy.array([float(d) for d in total.split()[-3:]])
1371 dipole = utils.convertor(dipole, "ebohr", "Debye")
1373 if not hasattr(self, 'moments'):
1374 self.set_attribute('moments', [reference, dipole])
1375 else:
1376 try:
1377 assert numpy.all(self.moments[1] == dipole)
1378 except AssertionError:
1379 self.logger.warning('Overwriting previous multipole moments with new values')
1380 self.set_attribute('moments', [reference, dipole])
1382 if "Molecular Dynamics Iteration" in line:
1383 self.skip_lines(inputfile, ['d', 'ORCA MD', 'd', 'New Coordinates'])
1384 line = next(inputfile)
1385 tokens = line.split()
1386 assert tokens[0] == "time"
1387 time = utils.convertor(float(tokens[2]), "time_au", "fs")
1388 self.append_attribute('time', time)
1390 # Static polarizability.
1391 if line.strip() == "THE POLARIZABILITY TENSOR":
1392 if not hasattr(self, 'polarizabilities'):
1393 self.polarizabilities = []
1394 self.skip_lines(inputfile, ['d', 'b'])
1395 line = next(inputfile)
1396 assert line.strip() == "The raw cartesian tensor (atomic units):"
1397 polarizability = []
1398 for _ in range(3):
1399 line = next(inputfile)
1400 polarizability.append(line.split())
1401 self.polarizabilities.append(numpy.array(polarizability))
1403 if line.strip() == 'ORCA-CASSCF':
1404 # -------------------------------------------------------------------------------
1405 # ORCA-CASSCF
1406 # -------------------------------------------------------------------------------
1407 #
1408 # Symmetry handling UseSym ... ON
1409 # Point group ... C2
1410 # Used point group ... C2
1411 # Number of irreps ... 2
1412 # Irrep A has 10 SALCs (ofs= 0) #(closed)= 0 #(active)= 2
1413 # Irrep B has 10 SALCs (ofs= 10) #(closed)= 0 #(active)= 2
1414 # Symmetries of active orbitals:
1415 # MO = 0 IRREP= 0 (A)
1416 # MO = 1 IRREP= 1 (B)
1417 self.skip_lines(inputfile, ['d', 'b'])
1418 vals = next(inputfile).split()
1419 # Symmetry section is only printed if symmetry is used.
1420 if vals[0] == 'Symmetry':
1421 assert vals[-1] == 'ON'
1422 point_group = next(inputfile).split()[-1]
1423 used_point_group = next(inputfile).split()[-1]
1424 num_irreps = int(next(inputfile).split()[-1])
1425 num_active = 0
1426 # Parse the irreps.
1427 for i, line in zip(range(num_irreps), inputfile):
1428 reg = r'Irrep\s+(\w+) has\s+(\d+) SALCs \(ofs=\s*(\d+)\) #\(closed\)=\s*(\d+) #\(active\)=\s*(\d+)'
1429 groups = re.search(reg, line).groups()
1430 irrep = groups[0]
1431 salcs, ofs, closed, active = map(int, groups[1:])
1432 num_active += active
1433 self.skip_line(inputfile, 'Symmetries')
1434 # Parse the symmetries of the active orbitals.
1435 for i, line in zip(range(num_active), inputfile):
1436 reg = r'(\d+) IRREP= (\d+) \((\w+)\)'
1437 groups = re.search(reg, line).groups()
1438 mo, irrep_idx, irrep = groups
1440 # Skip until the system specific settings.
1441 # This will align the cases of symmetry on and off.
1442 line = next(inputfile)
1443 while line[:25] != 'SYSTEM-SPECIFIC SETTINGS:':
1444 line = next(inputfile)
1446 # SYSTEM-SPECIFIC SETTINGS:
1447 # Number of active electrons ... 4
1448 # Number of active orbitals ... 4
1449 # Total number of electrons ... 4
1450 # Total number of orbitals ... 20
1451 num_el = int(next(inputfile).split()[-1])
1452 num_orbs = int(next(inputfile).split()[-1])
1453 total_el = int(next(inputfile).split()[-1])
1454 total_orbs = int(next(inputfile).split()[-1])
1456 # Determined orbital ranges:
1457 # Internal 0 - -1 ( 0 orbitals)
1458 # Active 0 - 3 ( 4 orbitals)
1459 # External 4 - 19 ( 16 orbitals)
1460 self.skip_lines(inputfile, ['b', 'Determined'])
1461 orbital_ranges = []
1462 # Parse the orbital ranges for: Internal, Active, and External orbitals.
1463 for i in range(3):
1464 vals = next(inputfile).split()
1465 start, end, num = int(vals[1]), int(vals[3]), int(vals[5])
1466 # Change from inclusive to exclusive in order to match python.
1467 end = end + 1
1468 assert end - start == num
1469 orbital_ranges.append((start, end, num))
1471 line = next(inputfile)
1472 while line[:8] != 'CI-STEP:':
1473 line = next(inputfile)
1475 # CI-STEP:
1476 # CI strategy ... General CI
1477 # Number of symmetry/multplity blocks ... 1
1478 # BLOCK 1 WEIGHT= 1.0000
1479 # Multiplicity ... 1
1480 # Irrep ... 0 (A)
1481 # #(Configurations) ... 11
1482 # #(CSFs) ... 12
1483 # #(Roots) ... 1
1484 # ROOT=0 WEIGHT= 1.000000
1485 self.skip_line(inputfile, 'CI strategy')
1486 num_blocks = int(next(inputfile).split()[-1])
1487 for b in range(1, num_blocks + 1):
1488 vals = next(inputfile).split()
1489 block = int(vals[1])
1490 weight = float(vals[3])
1491 assert b == block
1492 mult = int(next(inputfile).split()[-1])
1493 vals = next(inputfile).split()
1494 # The irrep will only be printed if using symmetry.
1495 if vals[0] == 'Irrep':
1496 irrep_idx = int(vals[-2])
1497 irrep = vals[-1].strip('()')
1498 vals = next(inputfile).split()
1499 num_confs = int(vals[-1])
1500 num_csfs = int(next(inputfile).split()[-1])
1501 num_roots = int(next(inputfile).split()[-1])
1502 # Parse the roots.
1503 for r, line in zip(range(num_roots), inputfile):
1504 reg = r'=(\d+) WEIGHT=\s*(\d\.\d+)'
1505 groups = re.search(reg, line).groups()
1506 root = int(groups[0])
1507 weight = float(groups[1])
1508 assert r == root
1510 # Skip additional setup printing and CASSCF iterations.
1511 line = next(inputfile).strip()
1512 while line != 'CASSCF RESULTS':
1513 line = next(inputfile).strip()
1515 # --------------
1516 # CASSCF RESULTS
1517 # --------------
1518 #
1519 # Final CASSCF energy : -14.597120777 Eh -397.2078 eV
1520 self.skip_lines(inputfile, ['d', 'b'])
1521 casscf_energy = float(next(inputfile).split()[4])
1523 # This is only printed for first and last step of geometry optimization.
1524 # ----------------
1525 # ORBITAL ENERGIES
1526 # ----------------
1527 #
1528 # NO OCC E(Eh) E(eV) Irrep
1529 # 0 0.0868 0.257841 7.0162 1-A
1530 self.skip_lines(inputfile, ['b', 'd'])
1531 if next(inputfile).strip() == 'ORBITAL ENERGIES':
1532 self.skip_lines(inputfile, ['d', 'b', 'NO'])
1533 orbitals = []
1534 vals = next(inputfile).split()
1535 while vals:
1536 occ, eh, ev = map(float, vals[1:4])
1537 # The irrep will only be printed if using symmetry.
1538 if len(vals) == 5:
1539 idx, irrep = vals[4].split('-')
1540 orbitals.append((occ, ev, int(idx), irrep))
1541 else:
1542 orbitals.append((occ, ev))
1543 vals = next(inputfile).split()
1544 self.skip_lines(inputfile, ['b', 'd'])
1546 # Orbital Compositions
1547 # ---------------------------------------------
1548 # CAS-SCF STATES FOR BLOCK 1 MULT= 1 IRREP= Ag NROOTS= 2
1549 # ---------------------------------------------
1550 #
1551 # ROOT 0: E= -14.5950507665 Eh
1552 # 0.89724 [ 0]: 2000
1553 for b in range(num_blocks):
1554 # Parse the block data.
1555 reg = r'BLOCK\s+(\d+) MULT=\s*(\d+) (IRREP=\s*\w+ )?(NROOTS=\s*(\d+))?'
1556 groups = re.search(reg, next(inputfile)).groups()
1557 block = int(groups[0])
1558 mult = int(groups[1])
1559 # The irrep will only be printed if using symmetry.
1560 if groups[2] is not None:
1561 irrep = groups[2].split('=')[1].strip()
1562 nroots = int(groups[3].split('=')[1])
1564 self.skip_lines(inputfile, ['d', 'b'])
1566 line = next(inputfile).strip()
1567 while line:
1568 if line[:4] == 'ROOT':
1569 # Parse the root section.
1570 reg = r'(\d+):\s*E=\s*(-?\d+.\d+) Eh(\s+\d+\.\d+ eV)?(\s+\d+\.\d+)?'
1571 groups = re.search(reg, line).groups()
1572 root = int(groups[0])
1573 energy = float(groups[1])
1574 # Excitation energies are only printed for excited state roots.
1575 if groups[2] is not None:
1576 excitation_energy_ev = float(groups[2].split()[0])
1577 excitation_energy_cm = float(groups[3])
1578 else:
1579 # Parse the occupations section.
1580 reg = r'(\d+\.\d+) \[\s*(\d+)\]: (\d+)'
1581 groups = re.search(reg, line).groups()
1582 coeff = float(groups[0])
1583 number = float(groups[1])
1584 occupations = list(map(int, groups[2]))
1586 line = next(inputfile).strip()
1588 # Skip any extended wavefunction printing.
1589 while line != 'DENSITY MATRIX':
1590 line = next(inputfile).strip()
1592 self.skip_lines(inputfile, ['d', 'b'])
1593 # --------------
1594 # DENSITY MATRIX
1595 # --------------
1596 #
1597 # 0 1 2 3
1598 # 0 0.897244 0.000000 0.000000 0.000000
1599 # 1 0.000000 0.533964 0.000000 0.000000
1600 density = numpy.zeros((num_orbs, num_orbs))
1601 for i in range(0, num_orbs, 6):
1602 next(inputfile)
1603 for j, line in zip(range(num_orbs), inputfile):
1604 density[j][i:i + 6] = list(map(float, line.split()[1:]))
1605 self.skip_lines(inputfile, ['Trace', 'b', 'd'])
1607 # This is only printed for open-shells.
1608 # -------------------
1609 # SPIN-DENSITY MATRIX
1610 # -------------------
1611 #
1612 # 0 1 2 3 4 5
1613 # 0 -0.003709 0.001410 0.000074 -0.000564 -0.007978 0.000735
1614 # 1 0.001410 -0.001750 -0.000544 -0.003815 0.008462 -0.004529
1615 if next(inputfile).strip() == 'SPIN-DENSITY MATRIX':
1616 self.skip_lines(inputfile, ['d', 'b'])
1617 spin_density = numpy.zeros((num_orbs, num_orbs))
1618 for i in range(0, num_orbs, 6):
1619 next(inputfile)
1620 for j, line in zip(range(num_orbs), inputfile):
1621 spin_density[j][i:i + 6] = list(map(float, line.split()[1:]))
1622 self.skip_lines(inputfile, ['Trace', 'b', 'd', 'ENERGY'])
1623 self.skip_lines(inputfile, ['d', 'b'])
1625 # -----------------
1626 # ENERGY COMPONENTS
1627 # -----------------
1628 #
1629 # One electron energy : -18.811767801 Eh -511.8942 eV
1630 # Two electron energy : 4.367616074 Eh 118.8489 eV
1631 # Nuclear repuslion energy : 0.000000000 Eh 0.0000 eV
1632 # ----------------
1633 # -14.444151727
1634 #
1635 # Kinetic energy : 14.371970266 Eh 391.0812 eV
1636 # Potential energy : -28.816121993 Eh -784.1265 eV
1637 # Virial ratio : -2.005022378
1638 # ----------------
1639 # -14.444151727
1640 #
1641 # Core energy : -13.604678408 Eh -370.2021 eV
1642 one_el_energy = float(next(inputfile).split()[4])
1643 two_el_energy = float(next(inputfile).split()[4])
1644 nuclear_repulsion_energy = float(next(inputfile).split()[4])
1645 self.skip_line(inputfile, 'dashes')
1646 energy = float(next(inputfile).strip())
1647 self.skip_line(inputfile, 'blank')
1648 kinetic_energy = float(next(inputfile).split()[3])
1649 potential_energy = float(next(inputfile).split()[3])
1650 virial_ratio = float(next(inputfile).split()[3])
1651 self.skip_line(inputfile, 'dashes')
1652 energy = float(next(inputfile).strip())
1653 self.skip_line(inputfile, 'blank')
1654 core_energy = float(next(inputfile).split()[3])
1656 if line[:15] == 'TOTAL RUN TIME:':
1657 self.metadata['success'] = True
1659 def parse_charge_section(self, line, inputfile, chargestype):
1660 """Parse a charge section, modifies class in place
1662 Parameters
1663 ----------
1664 line : str
1665 the line which triggered entry here
1666 inputfile : file
1667 handle to file object
1668 chargestype : str
1669 what type of charge we're dealing with, must be one of
1670 'mulliken', 'lowdin' or 'chelpg'
1671 """
1672 has_spins = 'AND SPIN POPULATIONS' in line
1674 if not hasattr(self, "atomcharges"):
1675 self.atomcharges = {}
1676 if has_spins and not hasattr(self, "atomspins"):
1677 self.atomspins = {}
1679 self.skip_line(inputfile, 'dashes')
1681 # depending on chargestype, decide when to stop parsing lines
1682 # start, stop - indices for slicing lines and grabbing values
1683 if chargestype == 'mulliken':
1684 should_stop = lambda x: x.startswith('Sum of atomic charges')
1685 start, stop = 8, 20
1686 elif chargestype == 'lowdin':
1687 # stops when blank line encountered
1688 should_stop = lambda x: not bool(x.strip())
1689 start, stop = 8, 20
1690 elif chargestype == 'chelpg':
1691 should_stop = lambda x: x.startswith('---')
1692 start, stop = 11, 26
1694 charges = []
1695 if has_spins:
1696 spins = []
1698 line = next(inputfile)
1699 while not should_stop(line):
1700 # Don't add point charges or embedding potentials.
1701 if "Q :" not in line:
1702 charges.append(float(line[start:stop]))
1703 if has_spins:
1704 spins.append(float(line[stop:]))
1705 line = next(inputfile)
1707 self.atomcharges[chargestype] = charges
1708 if has_spins:
1709 self.atomspins[chargestype] = spins
1711 def parse_scf_condensed_format(self, inputfile, line):
1712 """ Parse the SCF convergence information in condensed format """
1714 # This is what it looks like
1715 # ITER Energy Delta-E Max-DP RMS-DP [F,P] Damp
1716 # *** Starting incremental Fock matrix formation ***
1717 # 0 -384.5203638934 0.000000000000 0.03375012 0.00223249 0.1351565 0.7000
1718 # 1 -384.5792776162 -0.058913722842 0.02841696 0.00175952 0.0734529 0.7000
1719 # ***Turning on DIIS***
1720 # 2 -384.6074211837 -0.028143567475 0.04968025 0.00326114 0.0310435 0.0000
1721 # 3 -384.6479682063 -0.040547022616 0.02097477 0.00121132 0.0361982 0.0000
1722 # 4 -384.6571124353 -0.009144228947 0.00576471 0.00035160 0.0061205 0.0000
1723 # 5 -384.6574659959 -0.000353560584 0.00191156 0.00010160 0.0025838 0.0000
1724 # 6 -384.6574990782 -0.000033082375 0.00052492 0.00003800 0.0002061 0.0000
1725 # 7 -384.6575005762 -0.000001497987 0.00020257 0.00001146 0.0001652 0.0000
1726 # 8 -384.6575007321 -0.000000155848 0.00008572 0.00000435 0.0000745 0.0000
1727 # **** Energy Check signals convergence ****
1729 assert line[2] == "Delta-E"
1730 assert line[3] == "Max-DP"
1732 if not hasattr(self, "scfvalues"):
1733 self.scfvalues = []
1735 self.scfvalues.append([])
1737 # Try to keep track of the converger (NR, DIIS, SOSCF, etc.).
1738 diis_active = True
1739 while line:
1741 maxDP = None
1742 if 'Newton-Raphson' in line:
1743 diis_active = False
1744 elif 'SOSCF' in line:
1745 diis_active = False
1746 elif line[0].isdigit():
1747 shim = 0
1748 try:
1749 energy = float(line[1])
1750 deltaE = float(line[2])
1751 maxDP = float(line[3 + int(not diis_active)])
1752 rmsDP = float(line[4 + int(not diis_active)])
1753 except ValueError as e:
1754 # Someone in Orca forgot to properly add spaces in the scf printing
1755 # code looks like:
1756 # %3i %17.10f%12.12f%11.8f %11.8f
1757 if line[1].count('.') == 2:
1758 integer1, decimal1_integer2, decimal2 = line[1].split('.')
1759 decimal1, integer2 = decimal1_integer2[:10], decimal1_integer2[10:]
1760 energy = float(integer1 + '.' + decimal1)
1761 deltaE = float(integer2 + '.' + decimal2)
1762 maxDP = float(line[2 + int(not diis_active)])
1763 rmsDP = float(line[3 + int(not diis_active)])
1764 elif line[1].count('.') == 3:
1765 integer1, decimal1_integer2, decimal2_integer3, decimal3 = line[1].split('.')
1766 decimal1, integer2 = decimal1_integer2[:10], decimal1_integer2[10:]
1767 decimal2, integer3 = decimal2_integer3[:12], decimal2_integer3[12:]
1768 energy = float(integer1 + '.' + decimal1)
1769 deltaE = float(integer2 + '.' + decimal2)
1770 maxDP = float(integer3 + '.' + decimal3)
1771 rmsDP = float(line[2 + int(not diis_active)])
1772 elif line[2].count('.') == 2:
1773 integer1, decimal1_integer2, decimal2 = line[2].split('.')
1774 decimal1, integer2 = decimal1_integer2[:12], decimal1_integer2[12:]
1775 deltaE = float(integer1 + '.' + decimal1)
1776 maxDP = float(integer2 + '.' + decimal2)
1777 rmsDP = float(line[3 + int(not diis_active)])
1778 else:
1779 raise e
1781 self.scfvalues[-1].append([deltaE, maxDP, rmsDP])
1783 try:
1784 line = next(inputfile).split()
1785 except StopIteration:
1786 self.logger.warning('File terminated before end of last SCF! Last Max-DP: {}'.format(maxDP))
1787 break
1789 def parse_scf_expanded_format(self, inputfile, line):
1790 """ Parse SCF convergence when in expanded format. """
1792 # The following is an example of the format
1793 # -----------------------------------------
1794 #
1795 # *** Starting incremental Fock matrix formation ***
1796 #
1797 # ----------------------------
1798 # ! ITERATION 0 !
1799 # ----------------------------
1800 # Total Energy : -377.960836651297 Eh
1801 # Energy Change : -377.960836651297 Eh
1802 # MAX-DP : 0.100175793695
1803 # RMS-DP : 0.004437973661
1804 # Actual Damping : 0.7000
1805 # Actual Level Shift : 0.2500 Eh
1806 # Int. Num. El. : 43.99982197 (UP= 21.99991099 DN= 21.99991099)
1807 # Exchange : -34.27550826
1808 # Correlation : -2.02540957
1809 #
1810 #
1811 # ----------------------------
1812 # ! ITERATION 1 !
1813 # ----------------------------
1814 # Total Energy : -378.118458080109 Eh
1815 # Energy Change : -0.157621428812 Eh
1816 # MAX-DP : 0.053240648588
1817 # RMS-DP : 0.002375092508
1818 # Actual Damping : 0.7000
1819 # Actual Level Shift : 0.2500 Eh
1820 # Int. Num. El. : 43.99994143 (UP= 21.99997071 DN= 21.99997071)
1821 # Exchange : -34.00291075
1822 # Correlation : -2.01607243
1823 #
1824 # ***Turning on DIIS***
1825 #
1826 # ----------------------------
1827 # ! ITERATION 2 !
1828 # ----------------------------
1829 # ....
1830 #
1831 if not hasattr(self, "scfvalues"):
1832 self.scfvalues = []
1834 self.scfvalues.append([])
1836 line = "Foo" # dummy argument to enter loop
1837 while line.find("******") < 0:
1838 try:
1839 line = next(inputfile)
1840 except StopIteration:
1841 self.logger.warning('File terminated before end of last SCF!')
1842 break
1843 info = line.split()
1844 if len(info) > 1 and info[1] == "ITERATION":
1845 dashes = next(inputfile)
1846 energy_line = next(inputfile).split()
1847 energy = float(energy_line[3])
1848 deltaE_line = next(inputfile).split()
1849 deltaE = float(deltaE_line[3])
1850 if energy == deltaE:
1851 deltaE = 0
1852 maxDP_line = next(inputfile).split()
1853 maxDP = float(maxDP_line[2])
1854 rmsDP_line = next(inputfile).split()
1855 rmsDP = float(rmsDP_line[2])
1856 self.scfvalues[-1].append([deltaE, maxDP, rmsDP])
1858 return
1860 # end of parse_scf_expanded_format
1862 def _append_scfvalues_scftargets(self, inputfile, line):
1863 # The SCF convergence targets are always printed after this, but apparently
1864 # not all of them always -- for example the RMS Density is missing for geometry
1865 # optimization steps. So, assume the previous value is still valid if it is
1866 # not found. For additional certainty, assert that the other targets are unchanged.
1867 while not "Last Energy change" in line:
1868 line = next(inputfile)
1869 deltaE_value = float(line.split()[4])
1870 deltaE_target = float(line.split()[7])
1871 line = next(inputfile)
1872 if "Last MAX-Density change" in line:
1873 maxDP_value = float(line.split()[4])
1874 maxDP_target = float(line.split()[7])
1875 line = next(inputfile)
1876 if "Last RMS-Density change" in line:
1877 rmsDP_value = float(line.split()[4])
1878 rmsDP_target = float(line.split()[7])
1879 else:
1880 rmsDP_value = self.scfvalues[-1][-1][2]
1881 rmsDP_target = self.scftargets[-1][2]
1882 assert deltaE_target == self.scftargets[-1][0]
1883 assert maxDP_target == self.scftargets[-1][1]
1884 self.scfvalues[-1].append([deltaE_value, maxDP_value, rmsDP_value])
1885 self.scftargets.append([deltaE_target, maxDP_target, rmsDP_target])