Coverage for cclib/parser/daltonparser.py : 99%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2#
3# Copyright (c) 2020, the cclib development team
4#
5# This file is part of cclib (http://cclib.github.io) and is distributed under
6# the terms of the BSD 3-Clause License.
8"""Parser for DALTON output files"""
11import re
13import numpy
15from cclib.parser import logfileparser
16from cclib.parser import utils
19class DALTON(logfileparser.Logfile):
20 """A DALTON log file."""
22 def __init__(self, *args, **kwargs):
24 # Call the __init__ method of the superclass
25 super(DALTON, self).__init__(logname="DALTON", *args, **kwargs)
27 def __str__(self):
28 """Return a string representation of the object."""
29 return "DALTON log file %s" % (self.filename)
31 def __repr__(self):
32 """Return a representation of the object."""
33 return 'DALTON("%s")' % (self.filename)
35 def normalisesym(self, label):
36 """DALTON does not require normalizing symmetry labels."""
37 return label
39 def before_parsing(self):
41 # Used to decide whether to wipe the atomcoords clean.
42 self.firststdorient = True
44 # Use to track which section/program output we are parsing,
45 # since some programs print out the same headers, which we
46 # would like to use as triggers.
47 self.section = None
49 # If there is no symmetry, assume this.
50 self.symlabels = ['Ag']
52 # Is the basis set from a single library file? This is true
53 # when the first line is BASIS, false for INTGRL/ATOMBASIS.
54 self.basislibrary = True
57 def parse_geometry(self, lines):
58 """Parse DALTON geometry lines into an atomcoords array."""
60 coords = []
61 for lin in lines:
63 # Without symmetry there are simply four columns, and with symmetry
64 # an extra label is printed after the atom type.
65 cols = lin.split()
66 if cols[1][0] == "_":
67 xyz = cols[2:]
68 else:
69 xyz = cols[1:]
71 # The assumption is that DALTON always print in atomic units.
72 xyz = [utils.convertor(float(x), 'bohr', 'Angstrom') for x in xyz]
73 coords.append(xyz)
75 return coords
77 def extract(self, inputfile, line):
78 """Extract information from the file object inputfile."""
80 # Extract the version number and optionally the Git revision
81 # number.
82 #
83 # Example strings that at least the major version is parsed from:
84 #
85 # This is output from DALTON 2013.2
86 # 2013.4
87 # 2014.0
88 # 2014.alpha
89 # 2015.0
90 # 2016.alpha
91 # (Release 1.1, September 2000)
92 # Release 2011 (DEVELOPMENT VERSION)
93 # Release 2011 (Rev. 0, Dec. 2010)
94 # Release 2011 (Rev. 0, Mar. 2011)
95 # (Release 2.0 rev. 0, Mar. 2005)
96 # (Release Dalton2013 patch 0)
97 # release Dalton2017.alpha (2016)
98 # release Dalton2017.alpha (2017)
99 # release Dalton2018.0 (2018)
100 # release Dalton2018.0-rc (2018)
101 # release Dalton2018.alpha (2018)
102 # release Dalton2019.alpha (2018)
103 if line[4:30] == "This is output from DALTON":
104 rs = (r"from DALTON \(?(?:Release|release)?\s?(?:Dalton)?"
105 r"(\d+\.?[\w\d\-]*)"
106 r"(?:[\s,]\(?)?")
107 match = re.search(rs, line)
108 if match:
109 package_version = match.groups()[0]
110 self.metadata["package_version"] = package_version
111 self.metadata["legacy_package_version"] = package_version
112 # Don't add revision information to the main package version for now.
113 if "Last Git revision" in line:
114 revision = line.split()[4]
115 package_version = self.metadata.get("package_version")
116 if package_version:
117 self.metadata["package_version"] = "{}+{}".format(package_version, revision)
119 # Is the basis set from a single library file, or is it
120 # manually specified? See before_parsing().
121 if line[:6] == 'INTGRL'or line[:9] == 'ATOMBASIS':
122 self.basislibrary = False
124 # This section at the start of geometry optimization jobs gives us information
125 # about optimization targets (geotargets) and possibly other things as well.
126 # Notice how the number of criteria required to converge is set to 2 here,
127 # but this parameter can (probably) be tweaked in the input.
128 #
129 # Chosen parameters for *OPTIMI :
130 # -------------------------------
131 #
132 # Default 1st order method will be used: BFGS update.
133 # Optimization will be performed in redundant internal coordinates (by default).
134 # Model Hessian will be used as initial Hessian.
135 # The model Hessian parameters of Roland Lindh will be used.
136 #
137 #
138 # Trust region method will be used to control step (default).
139 #
140 # Convergence threshold for gradient set to : 1.00D-04
141 # Convergence threshold for energy set to : 1.00D-06
142 # Convergence threshold for step set to : 1.00D-04
143 # Number of convergence criteria set to : 2
144 #
145 if line.strip()[:25] == "Convergence threshold for":
147 if not hasattr(self, 'geotargets'):
148 self.geotargets = []
149 self.geotargets_names = []
151 target = utils.float(line.split()[-1])
152 name = line.strip()[25:].split()[0]
154 self.geotargets.append(target)
155 self.geotargets_names.append(name)
157 # This is probably the first place where atomic symmetry labels are printed,
158 # somewhere afer the SYMGRP point group information section. We need to know
159 # which atom is in which symmetry, since this influences how some things are
160 # print later on. We can also get some generic attributes along the way.
161 #
162 # Isotopic Masses
163 # ---------------
164 #
165 # C _1 12.000000
166 # C _2 12.000000
167 # C _1 12.000000
168 # C _2 12.000000
169 # ...
170 #
171 # Note that when there is no symmetry there are only two columns here.
172 #
173 # It is also a good idea to keep in mind that DALTON, with symmetry on, operates
174 # in a specific point group, so symmetry atoms have no internal representation.
175 # Therefore only atoms marked as "_1" or "#1" in other places are actually
176 # represented in the model. The symmetry atoms (higher symmetry indices) are
177 # generated on the fly when writing the output. We will save the symmetry indices
178 # here for later use.
179 #
180 # Additional note: the symmetry labels are printed only for atoms that have
181 # symmetry images... so assume "_1" if a label is missing. For example, there will
182 # be no label for atoms on an axes, such as the oxygen in water in C2v:
183 #
184 # O 15.994915
185 # H _1 1.007825
186 # H _2 1.007825
187 #
188 if line.strip() == "Isotopic Masses":
190 self.skip_lines(inputfile, ['d', 'b'])
192 # Since some symmetry labels may be missing, read in all lines first.
193 lines = []
194 line = next(inputfile)
195 while line.strip():
196 lines.append(line)
197 line = next(inputfile)
199 # Split lines into columsn and dd any missing symmetry labels, if needed.
200 lines = [l.split() for l in lines]
201 if any([len(l) == 3 for l in lines]):
202 for il, l in enumerate(lines):
203 if len(l) == 2:
204 lines[il] = [l[0], "_1", l[1]]
206 atomnos = []
207 symmetry_atoms = []
208 atommasses = []
209 for cols in lines:
210 cols0 = ''.join([i for i in cols[0] if not i.isdigit()]) #remove numbers
211 atomnos.append(self.table.number[cols0])
212 if len(cols) == 3:
213 symmetry_atoms.append(int(cols[1][1]))
214 atommasses.append(float(cols[2]))
215 else:
216 atommasses.append(float(cols[1]))
218 self.set_attribute('atomnos', atomnos)
219 self.set_attribute('atommasses', atommasses)
221 self.set_attribute('natom', len(atomnos))
222 self.set_attribute('natom', len(atommasses))
224 # Save this for later if there were any labels.
225 self.symmetry_atoms = symmetry_atoms or None
227 # This section is close to the beginning of the file, and can be used
228 # to parse natom, nbasis and atomnos. We also construct atombasis here,
229 # although that is symmetry-dependent (see inline comments). Note that
230 # DALTON operates on the idea of atom type, which are not necessarily
231 # unique element-wise.
232 #
233 # Atoms and basis sets
234 # --------------------
235 #
236 # Number of atom types : 6
237 # Total number of atoms: 20
238 #
239 # Basis set used is "STO-3G" from the basis set library.
240 #
241 # label atoms charge prim cont basis
242 # ----------------------------------------------------------------------
243 # C 6 6.0000 15 5 [6s3p|2s1p]
244 # H 4 1.0000 3 1 [3s|1s]
245 # C 2 6.0000 15 5 [6s3p|2s1p]
246 # H 2 1.0000 3 1 [3s|1s]
247 # C 2 6.0000 15 5 [6s3p|2s1p]
248 # H 4 1.0000 3 1 [3s|1s]
249 # ----------------------------------------------------------------------
250 # total: 20 70.0000 180 60
251 # ----------------------------------------------------------------------
252 #
253 # Threshold for neglecting AO integrals: 1.00D-12
254 #
255 if line.strip() == "Atoms and basis sets":
257 self.skip_lines(inputfile, ['d', 'b'])
259 line = next(inputfile)
260 assert "Number of atom types" in line
261 self.ntypes = int(line.split()[-1])
263 line = next(inputfile)
264 assert "Total number of atoms:" in line
265 self.set_attribute("natom", int(line.split()[-1]))
267 # When using the INTGRL keyword and not pulling from the
268 # basis set library, the "Basis set used" line doesn't
269 # appear.
270 if not self.basislibrary:
271 self.skip_line(inputfile, 'b')
272 else:
273 #self.skip_lines(inputfile, ['b', 'basisname', 'b'])
274 line = next(inputfile)
275 line = next(inputfile)
276 self.metadata["basis_set"] = line.split()[4].strip('\"')
277 line = next(inputfile)
279 line = next(inputfile)
280 cols = line.split()
282 # Detecting which columns things are in will be somewhat more robust
283 # to formatting changes in the future.
284 iatoms = cols.index('atoms')
285 icharge = cols.index('charge')
286 icont = cols.index('cont')
288 self.skip_line(inputfile, 'dashes')
290 atomnos = []
291 atombasis = []
292 nbasis = 0
293 for itype in range(self.ntypes):
295 line = next(inputfile)
296 cols = line.split()
298 atoms = int(cols[iatoms])
299 charge = float(cols[icharge])
300 assert int(charge) == charge
301 charge = int(charge)
302 cont = int(cols[icont])
304 for at in range(atoms):
306 atomnos.append(charge)
308 # If symmetry atoms are present, these will have basis functions
309 # printed immediately after the one unique atom, so for all
310 # practical purposes cclib can assume the ordering in atombasis
311 # follows this out-of order scheme to match the output.
312 if self.symmetry_atoms:
314 # So we extend atombasis only for the unique atoms (with a
315 # symmetry index of 1), interleaving the basis functions
316 # for this atoms with basis functions for all symmetry atoms.
317 if self.symmetry_atoms[at] == 1:
318 nsyms = 1
319 while (at + nsyms < self.natom) and self.symmetry_atoms[at + nsyms] == nsyms + 1:
320 nsyms += 1
321 for isym in range(nsyms):
322 istart = nbasis + isym
323 iend = nbasis + cont*nsyms + isym
324 atombasis.append(list(range(istart, iend, nsyms)))
325 nbasis += cont*nsyms
327 else:
328 atombasis.append(list(range(nbasis, nbasis + cont)))
329 nbasis += cont
331 self.set_attribute('atomnos', atomnos)
332 self.set_attribute('atombasis', atombasis)
333 self.set_attribute('nbasis', nbasis)
335 self.skip_line(inputfile, 'dashes')
337 line = next(inputfile)
338 self.set_attribute('natom', int(line.split()[iatoms]))
339 self.set_attribute('nbasis', int(line.split()[icont]))
341 self.skip_line(inputfile, 'dashes')
343 # The Gaussian exponents and contraction coefficients are printed for each primitive
344 # and then the contraction information is printed separately (see below) Both segmented
345 # and general contractions are used, but we can parse them the same way since zeros are
346 # inserted for primitives that are not used. However, no atom index is printed here
347 # so we don't really know when a new atom is started without using information
348 # from other section (we should already have atombasis parsed at this point).
349 #
350 # Orbital exponents and contraction coefficients
351 # ----------------------------------------------
352 #
353 #
354 # C #1 1s 1 71.616837 0.1543 0.0000
355 # seg. cont. 2 13.045096 0.5353 0.0000
356 # 3 3.530512 0.4446 0.0000
357 # 4 2.941249 0.0000 -0.1000
358 # ...
359 #
360 # Here is a corresponding fragment for general contractions:
361 #
362 # C 1s 1 33980.000000 0.0001 -0.0000 0.0000 0.0000 0.0000
363 # 0.0000 0.0000 0.0000 0.0000
364 # gen. cont. 2 5089.000000 0.0007 -0.0002 0.0000 0.0000 0.0000
365 # 0.0000 0.0000 0.0000 0.0000
366 # 3 1157.000000 0.0037 -0.0008 0.0000 0.0000 0.0000
367 # 0.0000 0.0000 0.0000 0.0000
368 # 4 326.600000 0.0154 -0.0033 0.0000 0.0000 0.0000
369 # ...
370 #
371 if line.strip() == "Orbital exponents and contraction coefficients":
373 self.skip_lines(inputfile, ['d', 'b', 'b'])
375 # Here we simply want to save the numbers defining each primitive for later use,
376 # where the first number is the exponent, and the rest are coefficients which
377 # should be zero if the primitive is not used in a contraction. This list is
378 # symmetry agnostic, although primitives/contractions are not generally.
379 self.primitives = []
381 prims = []
382 line = next(inputfile)
383 while line.strip():
385 # Each contraction/section is separated by a blank line, and at the very
386 # end there is an extra blank line.
387 while line.strip():
389 # For generalized contraction it is typical to see the coefficients wrapped
390 # to new lines, so we must collect them until we are sure a primitive starts.
391 if line[:30].strip():
392 if prims:
393 self.primitives.append(prims)
394 prims = []
396 prims += [float(x) for x in line[20:].split()]
398 line = next(inputfile)
400 line = next(inputfile)
402 # At the end we have the final primitive to save.
403 self.primitives.append(prims)
405 # This is the corresponding section to the primitive definitions parsed above, so we
406 # assume those numbers are available in the variable 'primitives'. Here we read in the
407 # indicies of primitives, which we use to construct gbasis.
408 #
409 # Contracted Orbitals
410 # -------------------
411 #
412 # 1 C 1s 1 2 3 4 5 6 7 8 9 10 11 12
413 # 2 C 1s 1 2 3 4 5 6 7 8 9 10 11 12
414 # 3 C 1s 10
415 # 4 C 1s 11
416 # ...
417 #
418 # Here is an fragment with symmetry labels:
419 #
420 # ...
421 # 1 C #1 1s 1 2 3
422 # 2 C #2 1s 7 8 9
423 # 3 C #1 1s 4 5 6
424 # ...
425 #
426 if line.strip() == "Contracted Orbitals":
428 self.skip_lines(inputfile, ['d', 'b'])
430 # This is the reverse of atombasis, so that we can easily map from a basis functions
431 # to the corresponding atom for use in the loop below.
432 basisatoms = [None for i in range(self.nbasis)]
433 for iatom in range(self.natom):
434 for ibasis in self.atombasis[iatom]:
435 basisatoms[ibasis] = iatom
437 # Since contractions are not generally given in order (when there is symmetry),
438 # start with an empty list for gbasis.
439 gbasis = [[] for i in range(self.natom)]
441 # This will hold the number of contractions already printed for each orbital,
442 # counting symmetry orbitals separately.
443 orbitalcount = {}
445 for ibasis in range(self.nbasis):
447 line = next(inputfile)
448 cols = line.split()
450 # The first columns is always the basis function index, which we can assert.
451 assert int(cols[0]) == ibasis + 1
453 # The number of columns is differnet when symmetry is used. If there are further
454 # complications, it may be necessary to use exact slicing, since the formatting
455 # of this section seems to be fixed (although columns can be missing). Notice how
456 # We subtract one from the primitive indices here already to match cclib's
457 # way of counting from zero in atombasis.
458 if '#' in line:
459 sym = cols[2]
460 orbital = cols[3]
461 prims = [int(i) - 1 for i in cols[4:]]
462 else:
463 sym = None
464 orbital = cols[2]
465 prims = [int(i) - 1 for i in cols[3:]]
467 shell = orbital[0]
468 subshell = orbital[1].upper()
470 iatom = basisatoms[ibasis]
472 # We want to count the number of contractiong already parsed for each orbital,
473 # but need to make sure to differentiate between atoms and symmetry atoms.
474 orblabel = str(iatom) + '.' + orbital + (sym or "")
475 orbitalcount[orblabel] = orbitalcount.get(orblabel, 0) + 1
477 # Here construct the actual primitives for gbasis, which should be a list
478 # of 2-tuples containing an exponent an coefficient. Note how we are indexing
479 # self.primitives from zero although the printed numbering starts from one.
480 primitives = []
481 for ip in prims:
482 p = self.primitives[ip]
483 exponent = p[0]
484 coefficient = p[orbitalcount[orblabel]]
485 primitives.append((exponent, coefficient))
487 contraction = (subshell, primitives)
488 if contraction not in gbasis[iatom]:
489 gbasis[iatom].append(contraction)
491 self.skip_line(inputfile, 'blank')
493 self.set_attribute('gbasis', gbasis)
495 # Since DALTON sometimes uses symmetry labels (Ag, Au, etc.) and sometimes
496 # just the symmetry group index, we need to parse and keep a mapping between
497 # these two for later use.
498 #
499 # Symmetry Orbitals
500 # -----------------
501 #
502 # Number of orbitals in each symmetry: 25 5 25 5
503 #
504 #
505 # Symmetry Ag ( 1)
506 #
507 # 1 C 1s 1 + 2
508 # 2 C 1s 3 + 4
509 # ...
510 #
511 if line.strip() == "Symmetry Orbitals":
513 self.skip_lines(inputfile, ['d', 'b'])
515 line = inputfile.next()
516 self.symcounts = [int(c) for c in line.split(':')[1].split()]
518 self.symlabels = []
519 for sc in self.symcounts:
521 self.skip_lines(inputfile, ['b', 'b'])
523 # If the number of orbitals for a symmetry is zero, the printout
524 # is different (see MP2 unittest logfile for an example).
525 line = inputfile.next()
527 if sc == 0:
528 assert "No orbitals in symmetry" in line
529 else:
530 assert line.split()[0] == "Symmetry"
531 self.symlabels.append(line.split()[1])
532 self.skip_line(inputfile, 'blank')
533 for i in range(sc):
534 orbital = inputfile.next()
536 if "Starting in Wave Function Section (SIRIUS)" in line:
537 self.section = "SIRIUS"
539 # Orbital specifications
540 # ======================
541 # Abelian symmetry species All | 1 2 3 4
542 # | Ag Au Bu Bg
543 # --- | --- --- --- ---
544 # Total number of orbitals 60 | 25 5 25 5
545 # Number of basis functions 60 | 25 5 25 5
546 #
547 # ** Automatic occupation of RKS orbitals **
548 #
549 # -- Initial occupation of symmetries is determined from extended Huckel guess.
550 # -- Initial occupation of symmetries is :
551 # @ Occupied SCF orbitals 35 | 15 2 15 3
552 #
553 # Maximum number of Fock iterations 0
554 # Maximum number of DIIS iterations 60
555 # Maximum number of QC-SCF iterations 60
556 # Threshold for SCF convergence 1.00D-05
557 # This is a DFT calculation of type: B3LYP
558 # ...
559 #
560 if "Total number of orbitals" in line:
561 # DALTON 2015 adds a @ in front of number of orbitals
562 chomp = line.split()
563 index = 4
564 if "@" in chomp:
565 index = 5
566 self.set_attribute("nbasis", int(chomp[index]))
567 self.nmo_per_symmetry = list(map(int, chomp[index+2:]))
568 assert self.nbasis == sum(self.nmo_per_symmetry)
569 if "Threshold for SCF convergence" in line:
570 if not hasattr(self, "scftargets"):
571 self.scftargets = []
572 scftarget = utils.float(line.split()[-1])
573 self.scftargets.append([scftarget])
575 # Wave function specification
576 # ============================
577 # @ Wave function type >>> KS-DFT <<<
578 # @ Number of closed shell electrons 70
579 # @ Number of electrons in active shells 0
580 # @ Total charge of the molecule 0
581 #
582 # @ Spin multiplicity and 2 M_S 1 0
583 # @ Total number of symmetries 4 (point group: C2h)
584 # @ Reference state symmetry 1 (irrep name : Ag )
585 #
586 # This is a DFT calculation of type: B3LYP
587 # ...
588 #
589 if line.strip() == "Wave function specification":
590 self.skip_line(inputfile, 'e')
591 line = next(inputfile)
592 # Must be a coupled cluster calculation.
593 if line.strip() == '':
594 self.skip_lines(inputfile, ['b', 'Coupled Cluster', 'b'])
595 else:
596 assert "wave function" in line.lower()
597 line = next(inputfile)
598 assert "Number of closed shell electrons" in line
599 self.paired_electrons = int(line.split()[-1])
600 line = next(inputfile)
601 assert "Number of electrons in active shells" in line
602 self.unpaired_electrons = int(line.split()[-1])
603 line = next(inputfile)
604 assert "Total charge of the molecule" in line
605 self.set_attribute("charge", int(line.split()[-1]))
606 self.skip_line(inputfile, 'b')
607 line = next(inputfile)
608 assert "Spin multiplicity and 2 M_S" in line
609 self.set_attribute("mult", int(line.split()[-2]))
610 # Dalton only has ROHF, no UHF
611 if self.mult != 1:
612 self.metadata["unrestricted"] = True
614 if not hasattr(self, 'homos'):
615 self.set_attribute('homos', [(self.paired_electrons // 2) - 1])
616 if self.unpaired_electrons > 0:
617 self.homos.append(self.homos[0])
618 self.homos[0] += self.unpaired_electrons
620 if "Dispersion Energy Correction" in line:
621 self.skip_lines(inputfile, ["pluses_and_dashes", "b"])
622 line = next(inputfile)
623 dispersion = utils.convertor(float(line.split()[-1]), "hartree", "eV")
624 self.append_attribute("dispersionenergies", dispersion)
626 # *********************************************
627 # ***** DIIS optimization of Hartree-Fock *****
628 # *********************************************
629 #
630 # C1-DIIS algorithm; max error vectors = 8
631 #
632 # Automatic occupation of symmetries with 70 electrons.
633 #
634 # Iter Total energy Error norm Delta(E) SCF occupation
635 # -----------------------------------------------------------------------------
636 # K-S energy, electrons, error : -46.547567739269 69.9999799123 -2.01D-05
637 # @ 1 -381.645762476 4.00D+00 -3.82D+02 15 2 15 3
638 # Virial theorem: -V/T = 2.008993
639 # @ MULPOP C _1 0.15; C _2 0.15; C _1 0.12; C _2 0.12; C _1 0.11; C _2 0.11; H _1 -0.15; H _2 -0.15; H _1 -0.14; H _2 -0.14;
640 # @ C _1 0.23; C _2 0.23; H _1 -0.15; H _2 -0.15; C _1 0.08; C _2 0.08; H _1 -0.12; H _2 -0.12; H _1 -0.13; H _2 -0.13;
641 # -----------------------------------------------------------------------------
642 # K-S energy, electrons, error : -46.647668038900 69.9999810430 -1.90D-05
643 # @ 2 -381.949410128 1.05D+00 -3.04D-01 15 2 15 3
644 # Virial theorem: -V/T = 2.013393
645 # ...
646 #
647 # With and without symmetry, the "Total energy" line is shifted a little.
648 if self.section == "SIRIUS" and "Iter" in line and "Total energy" in line:
650 iteration = 0
651 converged = False
652 values = []
653 if not hasattr(self, "scfvalues"):
654 self.scfvalues = []
656 while not converged:
658 try:
659 line = next(inputfile)
660 except StopIteration:
661 self.logger.warning('File terminated before end of last SCF!')
662 break
664 # each iteration is bracketed by "-------------"
665 if "-------------------" in line:
666 iteration += 1
667 continue
669 # the first hit of @ n where n is the current iteration
670 strcompare = "@{0:>3d}".format(iteration)
671 if strcompare in line:
672 temp = line.split()
673 error_norm = utils.float(temp[3])
674 values.append([error_norm])
676 if line[0] == "@" and "converged in" in line:
677 converged = True
679 # It seems DALTON does change the SCF convergence criteria during a
680 # geometry optimization, but also does not print them. So, assume they
681 # are unchanged and copy the initial values after the first step. However,
682 # it would be good to check up on this - perhaps it is possible to print.
683 self.scfvalues.append(values)
684 if len(self.scfvalues) > 1:
685 self.scftargets.append(self.scftargets[-1])
687 # DALTON organizes the energies by symmetry, so we need to parse first,
688 # and then sort the energies (and labels) before we store them.
689 #
690 # The formatting varies depending on RHF/DFT and/or version. Here is
691 # an example from a DFT job:
692 #
693 # *** SCF orbital energy analysis ***
694 #
695 # Only the five lowest virtual orbital energies printed in each symmetry.
696 #
697 # Number of electrons : 70
698 # Orbital occupations : 15 2 15 3
699 #
700 # Sym Kohn-Sham orbital energies
701 #
702 # 1 Ag -10.01616533 -10.00394288 -10.00288640 -10.00209612 -9.98818062
703 # -0.80583154 -0.71422407 -0.58487249 -0.55551093 -0.50630125
704 # ...
705 #
706 # Here is an example from an RHF job that only has symmetry group indices:
707 #
708 # *** SCF orbital energy analysis ***
709 #
710 # Only the five lowest virtual orbital energies printed in each symmetry.
711 #
712 # Number of electrons : 70
713 # Orbital occupations : 15 2 15 3
714 #
715 # Sym Hartree-Fock orbital energies
716 #
717 # 1 -11.04052518 -11.03158921 -11.02882211 -11.02858563 -11.01747921
718 # -1.09029777 -0.97492511 -0.79988247 -0.76282547 -0.69677619
719 # ...
720 #
721 if self.section == "SIRIUS" and "*** SCF orbital energy analysis ***" in line:
723 # to get ALL orbital energies, the .PRINTLEVELS keyword needs
724 # to be at least 0,10 (up from 0,5). I know, obvious, right?
725 # this, however, will conflict with the scfvalues output that
726 # changes into some weird form of DIIS debug output.
728 mosyms = []
729 moenergies = []
731 self.skip_line(inputfile, 'blank')
732 line = next(inputfile)
734 # There is some extra text between the section header and
735 # the number of electrons for open-shell calculations.
736 while "Number of electrons" not in line:
737 line = next(inputfile)
738 nelectrons = int(line.split()[-1])
740 line = next(inputfile)
741 occupations = [int(o) for o in line.split()[3:]]
742 nsym = len(occupations)
744 self.skip_lines(inputfile, ['b', 'header', 'b'])
746 # now parse nsym symmetries
747 for isym in range(nsym):
749 # For unoccupied symmetries, nothing is printed here.
750 if occupations[isym] == 0:
751 continue
753 # When there are exactly five energies printed (on just one line), it seems
754 # an extra blank line is printed after a block.
755 line = next(inputfile)
756 if not line.strip():
757 line = next(inputfile)
758 cols = line.split()
760 # The first line has the orbital symmetry information, but sometimes
761 # it's the label and sometimes it's the index. There are always five
762 # energies per line, though, so we can deduce if we have the labels or
763 # not just the index. In the latter case, we depend on the labels
764 # being read earlier into the list `symlabels`. Finally, if no symlabels
765 # were read that implies there is only one symmetry, namely Ag.
766 if 'A' in cols[1] or 'B' in cols[1]:
767 sym = self.normalisesym(cols[1])
768 energies = [float(t) for t in cols[2:]]
769 else:
770 if hasattr(self, 'symlabels'):
771 sym = self.normalisesym(self.symlabels[int(cols[0]) - 1])
772 else:
773 assert cols[0] == '1'
774 sym = "Ag"
775 energies = [float(t) for t in cols[1:]]
777 while len(energies) > 0:
778 moenergies.extend(energies)
779 mosyms.extend(len(energies)*[sym])
780 line = next(inputfile)
781 energies = [float(col) for col in line.split()]
783 # now sort the data about energies and symmetries. see the following post for the magic
784 # http://stackoverflow.com/questions/19339/a-transpose-unzip-function-in-python-inverse-of-zip
785 sdata = sorted(zip(moenergies, mosyms), key=lambda x: x[0])
786 moenergies, mosyms = zip(*sdata)
788 self.moenergies = [[]]
789 self.moenergies[0] = [utils.convertor(moenergy, 'hartree', 'eV') for moenergy in moenergies]
790 self.mosyms = [[]]
791 self.mosyms[0] = mosyms
793 if not hasattr(self, "nmo"):
794 self.nmo = self.nbasis
795 if len(self.moenergies[0]) != self.nmo:
796 self.set_attribute('nmo', len(self.moenergies[0]))
798 # .-----------------------------------.
799 # | >>> Final results from SIRIUS <<< |
800 # `-----------------------------------'
801 #
802 #
803 # @ Spin multiplicity: 1
804 # @ Spatial symmetry: 1 ( irrep Ag in C2h )
805 # @ Total charge of molecule: 0
806 #
807 # @ Final DFT energy: -382.050716652387
808 # @ Nuclear repulsion: 445.936979976608
809 # @ Electronic energy: -827.987696628995
810 #
811 # @ Final gradient norm: 0.000003746706
812 # ...
813 #
814 if "Final HF energy" in line and not (hasattr(self, "mpenergies") or hasattr(self, "ccenergies")):
815 self.metadata["methods"].append("HF")
816 if "Final DFT energy" in line:
817 self.metadata["methods"].append("DFT")
818 if "This is a DFT calculation of type" in line:
819 self.metadata["functional"] = line.split()[-1]
821 if "Final DFT energy" in line or "Final HF energy" in line:
822 if not hasattr(self, "scfenergies"):
823 self.scfenergies = []
824 temp = line.split()
825 self.scfenergies.append(utils.convertor(float(temp[-1]), "hartree", "eV"))
827 if "@ = MP2 second order energy" in line:
828 self.metadata["methods"].append("MP2")
829 energ = utils.convertor(float(line.split()[-1]), 'hartree', 'eV')
830 if not hasattr(self, "mpenergies"):
831 self.mpenergies = []
832 self.mpenergies.append([])
833 self.mpenergies[-1].append(energ)
835 if "Starting in Coupled Cluster Section (CC)" in line:
836 self.section = "CC"
838 if self.section == "CC" and "SUMMARY OF COUPLED CLUSTER CALCULATION" in line:
839 ccenergies = []
840 while "END OF COUPLED CLUSTER CALCULATION" not in line:
841 if "Total MP2 energy" in line:
842 # If **WAVE FUNCTIONS\n.MP2 was specified, don't double-add this.
843 if not hasattr(self, "mpenergies") or \
844 not len(self.mpenergies) == len(self.scfenergies):
845 self.append_attribute(
846 "mpenergies",
847 [utils.convertor(float(line.split()[-1]), "hartree", "eV")]
848 )
849 if "Total CCSD energy:" in line:
850 self.metadata["methods"].append("CCSD")
851 ccenergies.append(float(line.split()[-1]))
852 if "Total energy CCSD(T)" in line:
853 self.metadata["methods"].append("CCSD(T)")
854 ccenergies.append(float(line.split()[-1]))
855 line = next(inputfile)
856 if ccenergies:
857 self.append_attribute(
858 "ccenergies", utils.convertor(ccenergies[-1], "hartree", "eV")
859 )
861 if "Tau1 diagnostic" in line:
862 self.metadata["t1_diagnostic"] = float(line.split()[-1])
864 # The molecular geometry requires the use of .RUN PROPERTIES in the input.
865 # Note that the second column is not the nuclear charge, but the atom type
866 # index used internally by DALTON.
867 #
868 # Molecular geometry (au)
869 # -----------------------
870 #
871 # C _1 1.3498778652 2.3494125195 0.0000000000
872 # C _2 -1.3498778652 -2.3494125195 0.0000000000
873 # C _1 2.6543517307 0.0000000000 0.0000000000
874 # ...
875 #
876 if "Molecular geometry (au)" in line:
878 if not hasattr(self, "atomcoords"):
879 self.atomcoords = []
881 if self.firststdorient:
882 self.firststdorient = False
884 self.skip_lines(inputfile, ['d', 'b'])
886 lines = [next(inputfile) for i in range(self.natom)]
887 atomcoords = self.parse_geometry(lines)
888 self.atomcoords.append(atomcoords)
890 if "Optimization Control Center" in line:
891 self.section = "OPT"
892 assert set(next(inputfile).strip()) == set(":")
894 # During geometry optimizations the geometry is printed in the section
895 # that is titles "Optimization Control Center". Note that after an optimizations
896 # finishes, DALTON normally runs another "static property section (ABACUS)",
897 # so the final geometry will be repeated in atomcoords.
898 #
899 # Next geometry (au)
900 # ------------------
901 #
902 # C _1 1.3203201560 2.3174808341 0.0000000000
903 # C _2 -1.3203201560 -2.3174808341 0.0000000000
904 # ...
905 if self.section == "OPT" and line.strip() == "Next geometry (au)":
907 self.skip_lines(inputfile, ['d', 'b'])
909 lines = [next(inputfile) for i in range(self.natom)]
910 coords = self.parse_geometry(lines)
911 self.atomcoords.append(coords)
913 # This section contains data for optdone and geovalues, although we could use
914 # it to double check some atttributes that were parsed before.
915 #
916 # Optimization information
917 # ------------------------
918 #
919 # Iteration number : 4
920 # End of optimization : T
921 # Energy at this geometry is : -379.777956
922 # Energy change from last geom. : -0.000000
923 # Predicted change : -0.000000
924 # Ratio, actual/predicted change : 0.952994
925 # Norm of gradient : 0.000058
926 # Norm of step : 0.000643
927 # Updated trust radius : 0.714097
928 # Total Hessian index : 0
929 #
930 if self.section == "OPT" and line.strip() == "Optimization information":
932 self.skip_lines(inputfile, ['d', 'b'])
934 line = next(inputfile)
935 assert 'Iteration number' in line
936 iteration = int(line.split()[-1])
937 line = next(inputfile)
938 assert 'End of optimization' in line
939 if not hasattr(self, 'optdone'):
940 self.optdone = []
941 self.optdone.append(line.split()[-1] == 'T')
943 # We need a way to map between lines here and the targets stated at the
944 # beginning of the file in 'Chosen parameters for *OPTIMI (see above),
945 # and this dictionary facilitates that. The keys are target names parsed
946 # in that initial section after input processing, and the values are
947 # substrings that should appear in the lines in this section. Make an
948 # exception for the energy at iteration zero where there is no gradient,
949 # and take the total energy for geovalues.
950 targets_labels = {
951 'gradient': 'Norm of gradient',
952 'energy': 'Energy change from last',
953 'step': 'Norm of step',
954 }
955 values = [numpy.nan] * len(self.geotargets)
956 while line.strip():
957 if iteration == 0 and "Energy at this geometry" in line:
958 index = self.geotargets_names.index('energy')
959 values[index] = utils.float(line.split()[-1])
960 for tgt, lbl in targets_labels.items():
961 if lbl in line and tgt in self.geotargets_names:
962 index = self.geotargets_names.index(tgt)
963 values[index] = utils.float(line.split()[-1])
964 line = next(inputfile)
966 # If we're missing something above, throw away the partial geovalues since
967 # we don't want artificial NaNs getting into cclib. Instead, fix the dictionary
968 # to make things work.
969 if not numpy.nan in values:
970 if not hasattr(self, 'geovalues'):
971 self.geovalues = []
972 self.geovalues.append(values)
974 # -------------------------------------------------
975 # extract the center of mass line
976 if "Center-of-mass coordinates (a.u.):" in line:
977 temp = line.split()
978 reference = [utils.convertor(float(temp[i]), "bohr", "Angstrom") for i in [3, 4, 5]]
979 if not hasattr(self, 'moments'):
980 self.moments = [reference]
982 # -------------------------------------------------
983 # Extract the dipole moment
984 if "Dipole moment components" in line:
985 dipole = numpy.zeros(3)
986 line = next(inputfile)
987 line = next(inputfile)
988 line = next(inputfile)
989 if not "zero by symmetry" in line:
990 line = next(inputfile)
992 line = next(inputfile)
993 temp = line.split()
994 for i in range(3):
995 dipole[i] = float(temp[2]) # store the Debye value
996 if hasattr(self, 'moments'):
997 self.moments.append(dipole)
999 ## 'vibfreqs', 'vibirs', and 'vibsyms' appear in ABACUS.
1000 # Vibrational Frequencies and IR Intensities
1001 # ------------------------------------------
1002 #
1003 # mode irrep frequency IR intensity
1004 # ============================================================
1005 # cm-1 hartrees km/mol (D/A)**2/amu
1006 # ------------------------------------------------------------
1007 # 1 A 3546.72 0.016160 0.000 0.0000
1008 # 2 A 3546.67 0.016160 0.024 0.0006
1009 # ...
1010 if "Vibrational Frequencies and IR Intensities" in line:
1012 self.skip_lines(inputfile, ['dashes', 'blank'])
1013 line = next(inputfile)
1014 assert line.strip() == "mode irrep frequency IR intensity"
1015 self.skip_line(inputfile, 'equals')
1016 line = next(inputfile)
1017 assert line.strip() == "cm-1 hartrees km/mol (D/A)**2/amu"
1018 self.skip_line(inputfile, 'dashes')
1019 line = next(inputfile)
1021 # The normal modes are in order of decreasing IR
1022 # frequency, so they can't be added directly to
1023 # attributes; they must be grouped together first, sorted
1024 # in order of increasing frequency, then added to their
1025 # respective attributes.
1027 vibdata = []
1029 while line.strip():
1030 sline = line.split()
1031 vibsym = sline[1]
1032 vibfreq = float(sline[2])
1033 vibir = float(sline[4])
1034 vibdata.append((vibfreq, vibir, vibsym))
1035 line = next(inputfile)
1037 vibdata.sort(key=lambda normalmode: normalmode[0])
1039 self.vibfreqs = [normalmode[0] for normalmode in vibdata]
1040 self.vibirs = [normalmode[1] for normalmode in vibdata]
1041 self.vibsyms = [normalmode[2] for normalmode in vibdata]
1043 # Now extract the normal mode displacements.
1044 self.skip_lines(inputfile, ['b', 'b'])
1045 line = next(inputfile)
1046 assert line.strip() == "Normal Coordinates (bohrs*amu**(1/2)):"
1048 # Normal Coordinates (bohrs*amu**(1/2)):
1049 # --------------------------------------
1050 #
1051 #
1052 # 1 3547 2 3547 3 3474 4 3471 5 3451
1053 # ----------------------------------------------------------------------
1054 #
1055 # C x -0.000319 -0.000314 0.002038 0.000003 -0.001599
1056 # C y -0.000158 -0.000150 -0.001446 0.003719 -0.002576
1057 # C z 0.000000 -0.000000 -0.000000 0.000000 -0.000000
1058 #
1059 # C x 0.000319 -0.000315 -0.002038 0.000003 0.001600
1060 # C y 0.000157 -0.000150 0.001448 0.003717 0.002577
1061 # ...
1062 self.skip_line(inputfile, 'd')
1063 line = next(inputfile)
1065 vibdisps = numpy.empty(shape=(len(self.vibirs), self.natom, 3))
1067 ndisps = 0
1068 while ndisps < len(self.vibirs):
1069 # Skip two blank lines.
1070 line = next(inputfile)
1071 line = next(inputfile)
1072 # Use the header with the normal mode indices and
1073 # frequencies to update where we are.
1074 ndisps_block = (len(line.split()) // 2)
1075 mode_min, mode_max = ndisps, ndisps + ndisps_block
1076 # Skip a line of dashes and a blank line.
1077 line = next(inputfile)
1078 line = next(inputfile)
1079 for w in range(self.natom):
1080 for coord in range(3):
1081 line = next(inputfile)
1082 vibdisps[mode_min:mode_max, w, coord] = [float(i) for i in line.split()[2:]]
1083 # Skip a blank line.
1084 line = next(inputfile)
1085 ndisps += ndisps_block
1087 # The vibrational displacements are in the wrong order;
1088 # reverse them.
1089 self.vibdisps = vibdisps[::-1, :, :]
1091 ## 'vibramans'
1092 # Raman related properties for freq. 0.000000 au = Infinity nm
1093 # ---------------------------------------------------------------
1094 #
1095 # Mode Freq. Alpha**2 Beta(a)**2 Pol.Int. Depol.Int. Dep. Ratio
1096 #
1097 # 1 3546.72 0.379364 16.900089 84.671721 50.700268 0.598786
1098 # 2 3546.67 0.000000 0.000000 0.000000 0.000000 0.599550
1099 if "Raman related properties for freq." in line:
1101 self.skip_lines(inputfile, ['d', 'b'])
1102 line = next(inputfile)
1103 assert line[1:76] == "Mode Freq. Alpha**2 Beta(a)**2 Pol.Int. Depol.Int. Dep. Ratio"
1104 self.skip_line(inputfile, 'b')
1105 line = next(inputfile)
1107 vibramans = []
1109 # The Raman activities appear under the "Pol.Int."
1110 # (polarization intensity) column.
1111 for m in range(len(self.vibfreqs)):
1112 vibramans.append(float(line.split()[4]))
1113 line = next(inputfile)
1115 # All vibrational properties in DALTON appear in reverse
1116 # order.
1117 self.vibramans = vibramans[::-1]
1119 # Static polarizability from **PROPERTIES/.POLARI.
1120 if line.strip() == "Static polarizabilities (au)":
1121 if not hasattr(self, 'polarizabilities'):
1122 self.polarizabilities = []
1123 polarizability = []
1124 self.skip_lines(inputfile, ['d', 'b', 'directions', 'b'])
1125 for _ in range(3):
1126 line = next(inputfile)
1127 # Separate possibly unspaced huge negative polarizability tensor
1128 # element and the left adjacent column from each other.
1129 line = line.replace('-', ' -')
1130 polarizability.append(line.split()[1:])
1131 self.polarizabilities.append(numpy.array(polarizability))
1133 # Static and dynamic polarizability from **PROPERTIES/.ALPHA/*ABALNR.
1134 if "Polarizability tensor for frequency" in line:
1135 if not hasattr(self, 'polarizabilities'):
1136 self.polarizabilities = []
1137 polarizability = []
1138 self.skip_lines(inputfile, ['d', 'directions', 'b'])
1139 for _ in range(3):
1140 line = next(inputfile)
1141 polarizability.append(line.split()[1:])
1142 self.polarizabilities.append(numpy.array(polarizability))
1144 if "Starting in Dynamic Property Section (RESPONS)" in line:
1145 self.section = "RESPONSE"
1147 # Static and dynamic polarizability from **RESPONSE/*LINEAR.
1148 # This section is *very* general and will need to be expanded later.
1149 # For now, only form the matrix from dipole (length gauge) values.
1150 if "@ FREQUENCY INDEPENDENT SECOND ORDER PROPERTIES" in line:
1152 coord_to_idx = {'X': 0, 'Y': 1, 'Z': 2}
1154 self.skip_line(inputfile, 'b')
1155 line = next(inputfile)
1157 polarizability_diplen = numpy.empty(shape=(3, 3)) * numpy.nan
1158 while "Time used in linear response calculation is" not in line:
1159 tokens = line.split()
1160 if line.count("DIPLEN") == 2:
1161 assert len(tokens) == 8
1162 if not hasattr(self, 'polarizabilities'):
1163 self.polarizabilities = []
1164 i, j = coord_to_idx[tokens[2][0]], coord_to_idx[tokens[4][0]]
1165 polarizability_diplen[i, j] = utils.float(tokens[7])
1166 line = next(inputfile)
1168 polarizability_diplen = utils.symmetrize(polarizability_diplen, use_triangle='upper')
1169 if hasattr(self, 'polarizabilities'):
1170 self.polarizabilities.append(polarizability_diplen)
1172 ## Electronic excitations: single residues of the linear response
1173 ## equations.
1174 #
1175 #
1176 # @ Excited state no: 1 in symmetry 3 ( Bu )
1177 # ----------------------------------------------
1178 #
1179 # @ Excitation energy : 0.19609400 au
1180 # @ 5.3359892 eV; 43037.658 cm-1; 514.84472 kJ / mol
1181 #
1182 # @ Total energy : -381.85462 au
1183 #
1184 # @ Operator type: XDIPLEN
1185 # @ Oscillator strength (LENGTH) : 8.93558787E-03 (Transition moment : 0.26144181 )
1186 #
1187 # @ Operator type: YDIPLEN
1188 # @ Oscillator strength (LENGTH) : 0.15204812 (Transition moment : 1.0784599 )
1189 #
1190 # Eigenvector for state no. 1
1191 #
1192 # Response orbital operator symmetry = 3
1193 # (only scaled elements abs greater than 10.00 % of max abs value)
1194 #
1195 # Index(r,s) r s (r s) operator (s r) operator (r s) scaled (s r) scaled
1196 # ---------- ----- ----- -------------- -------------- -------------- --------------
1197 # 308 57(4) 28(2) 0.4829593728 -0.0024872024 0.6830076950 -0.0035174354
1198 # ...
1199 if "Linear Response single residue calculation" in line:
1201 etsyms = []
1202 etenergies = []
1203 etsecs = []
1204 etoscs = dict()
1205 etoscs_keys = set()
1207 symmap = {"T": "Triplet", "F": "Singlet"}
1209 while "End of Dynamic Property Section (RESPONS)" not in line:
1211 line = next(inputfile)
1213 if "Operator symmetry" in line:
1214 do_triplet = line[-2]
1216 # @ Excited state no: 4 in symmetry 2 ( Au )
1217 if line.startswith(" @ Excited state no:"):
1218 tokens = line.split()
1219 excited_state_num_in_sym = int(tokens[4])
1220 sym_num = int(tokens[7])
1221 etosc_key = (sym_num, excited_state_num_in_sym)
1222 etoscs_keys.add(etosc_key)
1223 etsym = tokens[9]
1224 etsyms.append(symmap[do_triplet] + "-" + etsym)
1225 self.skip_lines(inputfile, ["d", "b", "Excitation energy in a.u."])
1226 line = next(inputfile)
1227 etenergies.append(utils.float(line.split()[3]))
1228 self.skip_lines(inputfile, ["b", "@ Total energy", "b"])
1230 if line.startswith("@ Operator type:"):
1231 line = next(inputfile)
1232 assert line.startswith("@ Oscillator strength")
1233 if etosc_key not in etoscs:
1234 etoscs[etosc_key] = 0.0
1235 etoscs[etosc_key] += utils.float(line.split()[5])
1236 self.skip_line(inputfile, "b")
1238 # To understand why the "PBHT MO Overlap Diagnostic" section
1239 # cannot be used, see
1240 # `test/regression.py/testDALTON_DALTON_2013_dvb_td_normalprint_out`.
1241 if "Eigenvector for state no." in line:
1242 assert int(line.split()[4]) == excited_state_num_in_sym
1243 self.skip_lines(inputfile, [
1244 "b",
1245 "Response orbital operator symmetry",
1246 "only scaled elements",
1247 "b",
1248 "Index(r,s)",
1249 "d"
1250 ])
1251 line = next(inputfile)
1252 etsec = []
1253 while line.strip():
1254 tokens = line.split()
1255 startidx = int(tokens[1].split("(")[0]) - 1
1256 endidx = int(tokens[2].split("(")[0]) - 1
1257 # `(r s) scaled`; to handle anything other than
1258 # CIS/TDA properly, the deexcitation coefficient `(s
1259 # r) scaled` should also be considered, but this
1260 # requires a rework of the attribute structure.
1261 contrib = float(tokens[5])
1262 # Since DALTON is restricted open-shell only, there is
1263 # no distinction between alpha and beta spin.
1264 etsec.append([(startidx, 0), (endidx, 0), contrib])
1265 line = next(inputfile)
1266 etsecs.append(etsec)
1268 self.set_attribute("etsyms", etsyms)
1269 self.set_attribute("etenergies", etenergies)
1270 if etsecs:
1271 self.set_attribute("etsecs", etsecs)
1272 if etoscs:
1273 for k in etoscs_keys:
1274 # If the oscillator strength of a transition is known to
1275 # be zero for symmetry reasons, it isn't printed, however
1276 # we need it for consistency; if it wasn't found, add it.
1277 if k not in etoscs:
1278 etoscs[k] = 0.0
1279 # `.keys()` is not strictly necessary, but make it obvious
1280 # that this is being sorted in order of excitation and
1281 # symmetry, not oscillator strength.
1282 self.set_attribute("etoscs", [etoscs[k] for k in sorted(etoscs.keys())])
1284 if line[:37] == ' >>>> Total wall time used in DALTON:':
1285 self.metadata['success'] = True
1287 # TODO:
1288 # aonames
1289 # aooverlaps
1290 # atomcharges
1291 # atomspins
1292 # coreelectrons
1293 # enthalpy
1294 # entropy
1295 # etrotats
1296 # freeenergy
1297 # grads
1298 # hessian
1299 # mocoeffs
1300 # nocoeffs
1301 # nooccnos
1302 # scancoords
1303 # scanenergies
1304 # scannames
1305 # scanparm
1306 # temperature
1307 # vibanharms
1309 # N/A:
1310 # fonames
1311 # fooverlaps
1312 # fragnames
1313 # frags