Coverage for cclib/parser/nwchemparser.py : 97%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2#
3# Copyright (c) 2020, the cclib development team
4#
5# This file is part of cclib (http://cclib.github.io) and is distributed under
6# the terms of the BSD 3-Clause License.
8"""Parser for NWChem output files"""
11import itertools
12import re
14import numpy
16from cclib.parser import logfileparser
17from cclib.parser import utils
20class NWChem(logfileparser.Logfile):
21 """An NWChem log file."""
23 def __init__(self, *args, **kwargs):
25 # Call the __init__ method of the superclass
26 super(NWChem, self).__init__(logname="NWChem", *args, **kwargs)
28 def __str__(self):
29 """Return a string representation of the object."""
30 return "NWChem log file %s" % (self.filename)
32 def __repr__(self):
33 """Return a representation of the object."""
34 return 'NWChem("%s")' % (self.filename)
36 def normalisesym(self, label):
37 """NWChem does not require normalizing symmetry labels."""
38 return label
40 name2element = lambda self, lbl: "".join(itertools.takewhile(str.isalpha, str(lbl)))
42 def extract(self, inputfile, line):
43 """Extract information from the file object inputfile."""
45 # Extract the version number and the version control information, if
46 # it exists.
47 if "nwchem branch" in line:
48 base_package_version = line.split()[3]
49 self.metadata["legacy_package_version"] = base_package_version
50 self.metadata["package_version"] = base_package_version
51 line = next(inputfile)
52 if "nwchem revision" in line:
53 self.metadata["package_version"] = "{}+{}".format(
54 self.metadata["package_version"], line.split()[3].split("-")[-1]
55 )
57 # This is printed in the input module, so should always be the first coordinates,
58 # and contains some basic information we want to parse as well. However, this is not
59 # the only place where the coordinates are printed during geometry optimization,
60 # since the gradients module has a separate coordinate printout, which happens
61 # alongside the coordinate gradients. This geometry printout happens at the
62 # beginning of each optimization step only.
63 if line.strip() == 'Geometry "geometry" -> ""' or line.strip() == 'Geometry "geometry" -> "geometry"':
65 self.skip_lines(inputfile, ['dashes', 'blank', 'units', 'blank', 'header', 'dashes'])
67 if not hasattr(self, 'atomcoords'):
68 self.atomcoords = []
70 line = next(inputfile)
71 coords = []
72 atomnos = []
73 while line.strip():
74 # The column labeled 'tag' is usually empty, but I'm not sure whether it can have spaces,
75 # so for now assume that it can and that there will be seven columns in that case.
76 if len(line.split()) == 6:
77 index, atomname, nuclear, x, y, z = line.split()
78 else:
79 index, atomname, tag, nuclear, x, y, z = line.split()
80 coords.append(list(map(float, [x, y, z])))
81 atomnos.append(int(float(nuclear)))
82 line = next(inputfile)
84 self.atomcoords.append(coords)
86 self.set_attribute('atomnos', atomnos)
88 # If the geometry is printed in XYZ format, it will have the number of atoms.
89 if line[12:31] == "XYZ format geometry":
91 self.skip_line(inputfile, 'dashes')
92 natom = int(next(inputfile).strip())
93 self.set_attribute('natom', natom)
95 if line.strip() == "NWChem Geometry Optimization":
96 self.skip_lines(inputfile, ['d', 'b', 'b', 'b', 'b', 'title', 'b', 'b'])
97 line = next(inputfile)
98 while line.strip():
99 if "maximum gradient threshold" in line:
100 gmax = float(line.split()[-1])
101 if "rms gradient threshold" in line:
102 grms = float(line.split()[-1])
103 if "maximum cartesian step threshold" in line:
104 xmax = float(line.split()[-1])
105 if "rms cartesian step threshold" in line:
106 xrms = float(line.split()[-1])
107 line = next(inputfile)
109 self.set_attribute('geotargets', [gmax, grms, xmax, xrms])
111 # NWChem does not normally print the basis set for each atom, but rather
112 # chooses the concise option of printing Gaussian coefficients for each
113 # atom type/element only once. Therefore, we need to first parse those
114 # coefficients and afterwards build the appropriate gbasis attribute based
115 # on that and atom types/elements already parsed (atomnos). However, if atom
116 # are given different names (number after element, like H1 and H2), then NWChem
117 # generally prints the gaussian parameters for all unique names, like this:
118 #
119 # Basis "ao basis" -> "ao basis" (cartesian)
120 # -----
121 # O (Oxygen)
122 # ----------
123 # Exponent Coefficients
124 # -------------- ---------------------------------------------------------
125 # 1 S 1.30709320E+02 0.154329
126 # 1 S 2.38088610E+01 0.535328
127 # (...)
128 #
129 # H1 (Hydrogen)
130 # -------------
131 # Exponent Coefficients
132 # -------------- ---------------------------------------------------------
133 # 1 S 3.42525091E+00 0.154329
134 # (...)
135 #
136 # H2 (Hydrogen)
137 # -------------
138 # Exponent Coefficients
139 # -------------- ---------------------------------------------------------
140 # 1 S 3.42525091E+00 0.154329
141 # (...)
142 #
143 # This current parsing code below assumes all atoms of the same element
144 # use the same basis set, but that might not be true, and this will probably
145 # need to be considered in the future when such a logfile appears.
146 if line.strip() == """Basis "ao basis" -> "ao basis" (cartesian)""":
147 self.skip_line(inputfile, 'dashes')
148 gbasis_dict = {}
149 line = next(inputfile)
150 while line.strip():
151 atomname = line.split()[0]
152 atomelement = self.name2element(atomname)
153 gbasis_dict[atomelement] = []
154 self.skip_lines(inputfile, ['d', 'labels', 'd'])
155 shells = []
156 line = next(inputfile)
157 while line.strip() and line.split()[0].isdigit():
158 shell = None
159 while line.strip():
160 nshell, type, exp, coeff = line.split()
161 nshell = int(nshell)
162 assert len(shells) == nshell - 1
163 if not shell:
164 shell = (type, [])
165 else:
166 assert shell[0] == type
167 exp = float(exp)
168 coeff = float(coeff)
169 shell[1].append((exp, coeff))
170 line = next(inputfile)
171 shells.append(shell)
172 line = next(inputfile)
173 gbasis_dict[atomelement].extend(shells)
175 gbasis = []
176 for i in range(self.natom):
177 atomtype = self.table.element[self.atomnos[i]]
178 gbasis.append(gbasis_dict[atomtype])
180 self.set_attribute('gbasis', gbasis)
182 # Normally the indexes of AOs assigned to specific atoms are also not printed,
183 # so we need to infer that. We could do that from the previous section,
184 # it might be worthwhile to take numbers from two different places, hence
185 # the code below, which builds atombasis based on the number of functions
186 # listed in this summary of the AO basis. Similar to previous section, here
187 # we assume all atoms of the same element have the same basis sets, but
188 # this will probably need to be revised later.
190 # The section we can glean info about aonmaes looks like:
191 #
192 # Summary of "ao basis" -> "ao basis" (cartesian)
193 # ------------------------------------------------------------------------------
194 # Tag Description Shells Functions and Types
195 # ---------------- ------------------------------ ------ ---------------------
196 # C sto-3g 3 5 2s1p
197 # H sto-3g 1 1 1s
198 #
199 # However, we need to make sure not to match the following entry lines:
200 #
201 # * Summary of "ao basis" -> "" (cartesian)
202 # * Summary of allocated global arrays
203 #
204 # Unfortantely, "ao basis" isn't unique because it can be renamed to anything for
205 # later reference: http://www.nwchem-sw.org/index.php/Basis
206 # It also appears that we have to handle cartesian vs. spherical
208 if line[1:11] == "Summary of":
209 match = re.match(r' Summary of "([^\"]*)" -> "([^\"]*)" \((.+)\)', line)
211 if match and match.group(1) == match.group(2):
213 self.skip_lines(inputfile, ['d', 'title', 'd'])
215 self.shells = {}
216 self.shells["type"] = match.group(3)
218 atombasis_dict = {}
220 line = next(inputfile)
221 while line.strip():
222 atomname, desc, shells, funcs, types = line.split()
223 atomelement = self.name2element(atomname)
224 self.metadata["basis_set"] = desc
226 self.shells[atomname] = types
227 atombasis_dict[atomelement] = int(funcs)
228 line = next(inputfile)
230 last = 0
231 atombasis = []
232 for atom in self.atomnos:
233 atomelement = self.table.element[atom]
234 nfuncs = atombasis_dict[atomelement]
235 atombasis.append(list(range(last, last+nfuncs)))
236 last = atombasis[-1][-1] + 1
238 self.set_attribute('atombasis', atombasis)
240 # This section contains general parameters for Hartree-Fock calculations,
241 # which do not contain the 'General Information' section like most jobs.
242 if line.strip() == "NWChem SCF Module":
243 # If the calculation doesn't have a title specified, there
244 # aren't as many lines to skip here.
245 self.skip_lines(inputfile, ['d', 'b', 'b'])
246 line = next(inputfile)
247 if line.strip():
248 self.skip_lines(inputfile, ['b', 'b', 'b'])
249 line = next(inputfile)
250 while line.strip():
251 if line[2:8] == "charge":
252 charge = int(float(line.split()[-1]))
253 self.set_attribute('charge', charge)
254 if line[2:13] == "open shells":
255 unpaired = int(line.split()[-1])
256 self.set_attribute('mult', 2*unpaired + 1)
257 if line[2:7] == "atoms":
258 natom = int(line.split()[-1])
259 self.set_attribute('natom', natom)
260 if line[2:11] == "functions":
261 nfuncs = int(line.split()[-1])
262 self.set_attribute("nbasis", nfuncs)
263 line = next(inputfile)
265 # This section contains general parameters for DFT calculations, as well as
266 # for the many-electron theory module.
267 if line.strip() == "General Information":
269 if hasattr(self, 'linesearch') and self.linesearch:
270 return
272 while line.strip():
274 if "No. of atoms" in line:
275 self.set_attribute('natom', int(line.split()[-1]))
276 if "Charge" in line:
277 self.set_attribute('charge', int(line.split()[-1]))
278 if "Spin multiplicity" in line:
279 mult = line.split()[-1]
280 if mult == "singlet":
281 mult = 1
282 self.set_attribute('mult', int(mult))
283 if "AO basis - number of function" in line:
284 nfuncs = int(line.split()[-1])
285 self.set_attribute('nbasis', nfuncs)
287 # These will be present only in the DFT module.
288 if "Convergence on energy requested" in line:
289 target_energy = utils.float(line.split()[-1])
290 if "Convergence on density requested" in line:
291 target_density = utils.float(line.split()[-1])
292 if "Convergence on gradient requested" in line:
293 target_gradient = utils.float(line.split()[-1])
295 line = next(inputfile)
297 # Pretty nasty temporary hack to set scftargets only in the SCF module.
298 if "target_energy" in dir() and "target_density" in dir() and "target_gradient" in dir():
299 if not hasattr(self, 'scftargets'):
300 self.scftargets = []
301 self.scftargets.append([target_energy, target_density, target_gradient])
303 #DFT functional information
304 if "XC Information" in line:
305 line = next(inputfile)
306 line = next(inputfile)
307 self.metadata["functional"] = line.split()[0]
309 # If the full overlap matrix is printed, it looks like this:
310 #
311 # global array: Temp Over[1:60,1:60], handle: -996
312 #
313 # 1 2 3 4 5 6
314 # ----------- ----------- ----------- ----------- ----------- -----------
315 # 1 1.00000 0.24836 -0.00000 -0.00000 0.00000 0.00000
316 # 2 0.24836 1.00000 0.00000 -0.00000 0.00000 0.00030
317 # 3 -0.00000 0.00000 1.00000 0.00000 0.00000 -0.00014
318 # ...
319 if "global array: Temp Over[" in line:
321 self.set_attribute('nbasis', int(line.split('[')[1].split(',')[0].split(':')[1]))
322 self.set_attribute('nmo', int(line.split(']')[0].split(',')[1].split(':')[1]))
324 aooverlaps = []
325 while len(aooverlaps) < self.nbasis:
327 self.skip_line(inputfile, 'blank')
329 indices = [int(i) for i in inputfile.next().split()]
330 assert indices[0] == len(aooverlaps) + 1
332 self.skip_line(inputfile, "dashes")
333 data = [inputfile.next().split() for i in range(self.nbasis)]
334 indices = [int(d[0]) for d in data]
335 assert indices == list(range(1, self.nbasis+1))
337 for i in range(1, len(data[0])):
338 vector = [float(d[i]) for d in data]
339 aooverlaps.append(vector)
341 self.set_attribute('aooverlaps', aooverlaps)
343 if line.strip() in ("The SCF is already converged", "The DFT is already converged"):
344 if self.linesearch:
345 return
346 if hasattr(self, 'scftargets'):
347 self.scftargets.append(self.scftargets[-1])
348 if hasattr(self, 'scfvalues'):
349 self.scfvalues.append(self.scfvalues[-1])
351 # The default (only?) SCF algorithm for Hartree-Fock is a preconditioned conjugate
352 # gradient method that apparently "always" converges, so this header should reliably
353 # signal a start of the SCF cycle. The convergence targets are also printed here.
354 if line.strip() == "Quadratically convergent ROHF":
356 if hasattr(self, 'linesearch') and self.linesearch:
357 return
359 while not "Final" in line:
361 # Only the norm of the orbital gradient is used to test convergence.
362 if line[:22] == " Convergence threshold":
363 target = float(line.split()[-1])
364 if not hasattr(self, "scftargets"):
365 self.scftargets = []
366 self.scftargets.append([target])
368 # This is critical for the stop condition of the section,
369 # because the 'Final Fock-matrix accuracy' is along the way.
370 # It would be prudent to find a more robust stop condition.
371 while list(set(line.strip())) != ["-"]:
372 line = next(inputfile)
374 if line.split() == ['iter', 'energy', 'gnorm', 'gmax', 'time']:
375 values = []
376 self.skip_line(inputfile, 'dashes')
377 line = next(inputfile)
378 while line.strip():
379 it, energy, gnorm, gmax, time = line.split()
380 gnorm = utils.float(gnorm)
381 values.append([gnorm])
382 try:
383 line = next(inputfile)
384 # Is this the end of the file for some reason?
385 except StopIteration:
386 self.logger.warning('File terminated before end of last SCF! Last gradient norm: {}'.format(gnorm))
387 break
388 if not hasattr(self, 'scfvalues'):
389 self.scfvalues = []
390 self.scfvalues.append(values)
392 try:
393 line = next(inputfile)
394 except StopIteration:
395 self.logger.warning('File terminated?')
396 break
398 # The SCF for DFT does not use the same algorithm as Hartree-Fock, but always
399 # seems to use the following format to report SCF convergence:
400 # convergence iter energy DeltaE RMS-Dens Diis-err time
401 # ---------------- ----- ----------------- --------- --------- --------- ------
402 # d= 0,ls=0.0,diis 1 -382.2544324446 -8.28D+02 1.42D-02 3.78D-01 23.2
403 # d= 0,ls=0.0,diis 2 -382.3017298534 -4.73D-02 6.99D-03 3.82D-02 39.3
404 # d= 0,ls=0.0,diis 3 -382.2954343173 6.30D-03 4.21D-03 7.95D-02 55.3
405 # ...
406 if line.split() == ['convergence', 'iter', 'energy', 'DeltaE', 'RMS-Dens', 'Diis-err', 'time']:
408 if hasattr(self, 'linesearch') and self.linesearch:
409 return
411 self.skip_line(inputfile, 'dashes')
412 line = next(inputfile)
413 values = []
414 while line.strip():
416 # Sometimes there are things in between iterations with fewer columns,
417 # and we want to skip those lines, most probably. An exception might
418 # unrestricted calcualtions, which show extra RMS density and DIIS
419 # errors, although it is not clear yet whether these are for the
420 # beta orbitals or somethine else. The iterations look like this in that case:
421 # convergence iter energy DeltaE RMS-Dens Diis-err time
422 # ---------------- ----- ----------------- --------- --------- --------- ------
423 # d= 0,ls=0.0,diis 1 -382.0243202601 -8.28D+02 7.77D-03 1.04D-01 30.0
424 # 7.68D-03 1.02D-01
425 # d= 0,ls=0.0,diis 2 -382.0647539758 -4.04D-02 4.64D-03 1.95D-02 59.2
426 # 5.39D-03 2.36D-02
427 # ...
428 if len(line[17:].split()) == 6:
429 iter, energy, deltaE, dens, diis, time = line[17:].split()
430 val_energy = utils.float(deltaE)
431 val_density = utils.float(dens)
432 val_gradient = utils.float(diis)
433 values.append([val_energy, val_density, val_gradient])
435 try:
436 line = next(inputfile)
437 # Is this the end of the file for some reason?
438 except StopIteration:
439 self.logger.warning('File terminated before end of last SCF! Last error: {}'.format(diis))
440 break
442 if not hasattr(self, 'scfvalues'):
443 self.scfvalues = []
445 self.scfvalues.append(values)
447 # These triggers are supposed to catch the current step in a geometry optimization search
448 # and determine whether we are currently in the main (initial) SCF cycle of that step
449 # or in the subsequent line search. The step is printed between dashes like this:
450 #
451 # --------
452 # Step 0
453 # --------
454 #
455 # and the summary lines that describe the main SCF cycle for the frsit step look like this:
456 #
457 #@ Step Energy Delta E Gmax Grms Xrms Xmax Walltime
458 #@ ---- ---------------- -------- -------- -------- -------- -------- --------
459 #@ 0 -379.76896249 0.0D+00 0.04567 0.01110 0.00000 0.00000 4.2
460 # ok ok
461 #
462 # However, for subsequent step the format is a bit different:
463 #
464 # Step Energy Delta E Gmax Grms Xrms Xmax Walltime
465 # ---- ---------------- -------- -------- -------- -------- -------- --------
466 #@ 2 -379.77794602 -7.4D-05 0.00118 0.00023 0.00440 0.01818 14.8
467 # ok
468 #
469 # There is also a summary of the line search (which we don't use now), like this:
470 #
471 # Line search:
472 # step= 1.00 grad=-1.8D-05 hess= 8.9D-06 energy= -379.777955 mode=accept
473 # new step= 1.00 predicted energy= -379.777955
474 #
475 if line[10:14] == "Step":
476 self.geostep = int(line.split()[-1])
477 self.skip_line(inputfile, 'dashes')
478 self.linesearch = False
479 if line[0] == "@" and line.split()[1] == "Step":
480 at_and_dashes = next(inputfile)
481 line = next(inputfile)
482 assert int(line.split()[1]) == self.geostep == 0
483 gmax = float(line.split()[4])
484 grms = float(line.split()[5])
485 xrms = float(line.split()[6])
486 xmax = float(line.split()[7])
487 if not hasattr(self, 'geovalues'):
488 self.geovalues = []
489 self.geovalues.append([gmax, grms, xmax, xrms])
490 self.linesearch = True
491 if line[2:6] == "Step":
492 self.skip_line(inputfile, 'dashes')
493 line = next(inputfile)
494 assert int(line.split()[1]) == self.geostep
495 if self.linesearch:
496 #print(line)
497 return
498 gmax = float(line.split()[4])
499 grms = float(line.split()[5])
500 xrms = float(line.split()[6])
501 xmax = float(line.split()[7])
502 if not hasattr(self, 'geovalues'):
503 self.geovalues = []
504 self.geovalues.append([gmax, grms, xmax, xrms])
505 self.linesearch = True
507 # There is a clear message when the geometry optimization has converged:
508 #
509 # ----------------------
510 # Optimization converged
511 # ----------------------
512 #
513 if line.strip() == "Optimization converged":
514 self.skip_line(inputfile, 'dashes')
515 if not hasattr(self, 'optdone'):
516 self.optdone = []
517 self.optdone.append(len(self.geovalues) - 1)
519 if "Failed to converge" in line and hasattr(self, 'geovalues'):
520 if not hasattr(self, 'optdone'):
521 self.optdone = []
523 # extract the theoretical method
524 if "Total SCF energy" in line:
525 self.metadata["methods"].append("HF")
526 if "Total DFT energy" in line:
527 self.metadata["methods"].append("DFT")
529 # The line containing the final SCF energy seems to be always identifiable like this.
530 if "Total SCF energy" in line or "Total DFT energy" in line:
532 # NWChem often does a line search during geometry optimization steps, reporting
533 # the SCF information but not the coordinates (which are not necessarily 'intermediate'
534 # since the step size can become smaller). We want to skip these SCF cycles,
535 # unless the coordinates can also be extracted (possibly from the gradients?).
536 if hasattr(self, 'linesearch') and self.linesearch:
537 return
539 if not hasattr(self, "scfenergies"):
540 self.scfenergies = []
541 energy = float(line.split()[-1])
542 energy = utils.convertor(energy, "hartree", "eV")
543 self.scfenergies.append(energy)
545 if "Dispersion correction" in line:
546 dispersion = utils.convertor(float(line.split()[-1]), "hartree", "eV")
547 self.append_attribute("dispersionenergies", dispersion)
549 # The final MO orbitals are printed in a simple list, but apparently not for
550 # DFT calcs, and often this list does not contain all MOs, so make sure to
551 # parse them from the MO analysis below if possible. This section will be like this:
552 #
553 # Symmetry analysis of molecular orbitals - final
554 # -----------------------------------------------
555 #
556 # Numbering of irreducible representations:
557 #
558 # 1 ag 2 au 3 bg 4 bu
559 #
560 # Orbital symmetries:
561 #
562 # 1 bu 2 ag 3 bu 4 ag 5 bu
563 # 6 ag 7 bu 8 ag 9 bu 10 ag
564 # ...
565 if line.strip() == "Symmetry analysis of molecular orbitals - final":
567 self.skip_lines(inputfile, ['d', 'b', 'numbering', 'b', 'reps', 'b', 'syms', 'b'])
569 if not hasattr(self, 'mosyms'):
570 self.mosyms = [[None]*self.nbasis]
571 line = next(inputfile)
572 while line.strip():
573 ncols = len(line.split())
574 assert ncols % 2 == 0
575 for i in range(ncols//2):
576 index = int(line.split()[i*2]) - 1
577 sym = line.split()[i*2+1]
578 sym = sym[0].upper() + sym[1:]
579 if self.mosyms[0][index]:
580 if self.mosyms[0][index] != sym:
581 self.logger.warning("Symmetry of MO %i has changed" % (index+1))
582 self.mosyms[0][index] = sym
583 line = next(inputfile)
585 # The same format is used for HF and DFT molecular orbital analysis. We want to parse
586 # the MO energies from this section, although it is printed already before this with
587 # less precision (might be useful to parse that if this is not available). Also, this
588 # section contains coefficients for the leading AO contributions, so it might also
589 # be useful to parse and use those values if the full vectors are not printed.
590 #
591 # The block looks something like this (two separate alpha/beta blocks in the unrestricted case):
592 #
593 # ROHF Final Molecular Orbital Analysis
594 # -------------------------------------
595 #
596 # Vector 1 Occ=2.000000D+00 E=-1.104059D+01 Symmetry=bu
597 # MO Center= 1.4D-17, 0.0D+00, -6.5D-37, r^2= 2.1D+00
598 # Bfn. Coefficient Atom+Function Bfn. Coefficient Atom+Function
599 # ----- ------------ --------------- ----- ------------ ---------------
600 # 1 0.701483 1 C s 6 -0.701483 2 C s
601 #
602 # Vector 2 Occ=2.000000D+00 E=-1.104052D+01 Symmetry=ag
603 # ...
604 # Vector 12 Occ=2.000000D+00 E=-1.020253D+00 Symmetry=bu
605 # MO Center= -1.4D-17, -5.6D-17, 2.9D-34, r^2= 7.9D+00
606 # Bfn. Coefficient Atom+Function Bfn. Coefficient Atom+Function
607 # ----- ------------ --------------- ----- ------------ ---------------
608 # 36 -0.298699 11 C s 41 0.298699 12 C s
609 # 2 0.270804 1 C s 7 -0.270804 2 C s
610 # 48 -0.213655 15 C s 53 0.213655 16 C s
611 # ...
612 #
613 if "Final" in line and "Molecular Orbital Analysis" in line:
615 # Unrestricted jobs have two such blocks, for alpha and beta orbitals, and
616 # we need to keep track of which one we're parsing (always alpha in restricted case).
617 unrestricted = ("Alpha" in line) or ("Beta" in line)
618 alphabeta = int("Beta" in line)
620 self.skip_lines(inputfile, ['dashes', 'blank'])
622 nvectors = []
623 mooccnos = []
624 energies = []
625 symmetries = [None]*self.nbasis
626 line = next(inputfile)
627 while line[:7] == " Vector":
629 # Note: the vector count starts from 1 in NWChem.
630 nvector = int(line[7:12])
631 nvectors.append(nvector)
633 # A nonzero occupancy for SCF jobs means the orbital is occupied.
634 mooccno = int(utils.float(line[18:30]))
635 mooccnos.append(mooccno)
637 # If the printout does not start from the first MO, assume None for all previous orbitals.
638 if len(energies) == 0 and nvector > 1:
639 for i in range(1, nvector):
640 energies.append(None)
642 energy = utils.float(line[34:47])
643 energy = utils.convertor(energy, "hartree", "eV")
644 energies.append(energy)
646 # When symmetry is not used, this part of the line is missing.
647 if line[47:58].strip() == "Symmetry=":
648 sym = line[58:].strip()
649 sym = sym[0].upper() + sym[1:]
650 symmetries[nvector-1] = sym
652 line = next(inputfile)
653 if "MO Center" in line:
654 line = next(inputfile)
655 if "Bfn." in line:
656 line = next(inputfile)
657 if "-----" in line:
658 line = next(inputfile)
659 while line.strip():
660 line = next(inputfile)
661 line = next(inputfile)
663 self.set_attribute('nmo', nvector)
665 if not hasattr(self, 'moenergies') or (len(self.moenergies) > alphabeta):
666 self.moenergies = []
667 self.moenergies.append(energies)
669 if not hasattr(self, 'mosyms') or (len(self.mosyms) > alphabeta):
670 self.mosyms = []
671 self.mosyms.append(symmetries)
673 if not hasattr(self, 'homos') or (len(self.homos) > alphabeta):
674 self.homos = []
675 nvector_index = mooccnos.index(0) - 1
676 if nvector_index > -1:
677 self.homos.append(nvectors[nvector_index] - 1)
678 else:
679 self.homos.append(-1)
680 # If this was a restricted open-shell calculation, append
681 # to HOMOs twice since only one Molecular Orbital Analysis
682 # section is in the output file.
683 if (not unrestricted) and (1 in mooccnos):
684 nvector_index = mooccnos.index(1) - 1
685 if nvector_index > -1:
686 self.homos.append(nvectors[nvector_index] - 1)
687 else:
688 self.homos.append(-1)
690 # This is where the full MO vectors are printed, but a special
691 # directive is needed for it in the `scf` or `dft` block:
692 # print "final vectors" "final vectors analysis"
693 # which gives:
694 #
695 # Final MO vectors
696 # ----------------
697 #
698 #
699 # global array: alpha evecs[1:60,1:60], handle: -995
700 #
701 # 1 2 3 4 5 6
702 # ----------- ----------- ----------- ----------- ----------- -----------
703 # 1 -0.69930 -0.69930 -0.02746 -0.02769 -0.00313 -0.02871
704 # 2 -0.03156 -0.03135 0.00410 0.00406 0.00078 0.00816
705 # 3 0.00002 -0.00003 0.00067 0.00065 -0.00526 -0.00120
706 # ...
707 #
708 if line.strip() == "Final MO vectors":
710 if not hasattr(self, 'mocoeffs'):
711 self.mocoeffs = []
713 self.skip_lines(inputfile, ['d', 'b', 'b'])
715 # The columns are MOs, rows AOs, but that's and educated guess since no
716 # atom information is printed alongside the indices. This next line gives
717 # the dimensions, which we can check. if set before this. Also, this line
718 # specifies whether we are dealing with alpha or beta vectors.
719 array_info = next(inputfile)
720 while ("global array" in array_info):
721 alphabeta = int(line.split()[2] == "beta")
722 size = array_info.split('[')[1].split(']')[0]
723 nbasis = int(size.split(',')[0].split(':')[1])
724 nmo = int(size.split(',')[1].split(':')[1])
725 self.set_attribute('nbasis', nbasis)
726 self.set_attribute('nmo', nmo)
728 self.skip_line(inputfile, 'blank')
729 mocoeffs = []
730 while len(mocoeffs) < self.nmo:
731 nmos = list(map(int, next(inputfile).split()))
732 assert len(mocoeffs) == nmos[0] - 1
733 for n in nmos:
734 mocoeffs.append([])
735 self.skip_line(inputfile, 'dashes')
736 for nb in range(nbasis):
737 line = next(inputfile)
738 index = int(line.split()[0])
739 assert index == nb+1
740 coefficients = list(map(float, line.split()[1:]))
741 assert len(coefficients) == len(nmos)
742 for i, c in enumerate(coefficients):
743 mocoeffs[nmos[i]-1].append(c)
744 self.skip_line(inputfile, 'blank')
745 self.mocoeffs.append(mocoeffs)
747 array_info = next(inputfile)
749 # For Hartree-Fock, the atomic Mulliken charges are typically printed like this:
750 #
751 # Mulliken analysis of the total density
752 # --------------------------------------
753 #
754 # Atom Charge Shell Charges
755 # ----------- ------ -------------------------------------------------------
756 # 1 C 6 6.00 1.99 1.14 2.87
757 # 2 C 6 6.00 1.99 1.14 2.87
758 # ...
759 if line.strip() == "Mulliken analysis of the total density":
761 if not hasattr(self, "atomcharges"):
762 self.atomcharges = {}
764 self.skip_lines(inputfile, ['d', 'b', 'header', 'd'])
766 charges = []
767 line = next(inputfile)
768 while line.strip():
769 index, atomname, nuclear, atom = line.split()[:4]
770 shells = line.split()[4:]
771 charges.append(float(atom)-float(nuclear))
772 line = next(inputfile)
773 self.atomcharges['mulliken'] = charges
775 # Not the the 'overlap population' as printed in the Mulliken population analysis,
776 # is not the same thing as the 'overlap matrix'. In fact, it is the overlap matrix
777 # multiplied elementwise times the density matrix.
778 #
779 # ----------------------------
780 # Mulliken population analysis
781 # ----------------------------
782 #
783 # ----- Total overlap population -----
784 #
785 # 1 2 3 4 5 6 7
786 #
787 # 1 1 C s 2.0694818227 -0.0535883400 -0.0000000000 -0.0000000000 -0.0000000000 -0.0000000000 0.0000039991
788 # 2 1 C s -0.0535883400 0.8281341291 0.0000000000 -0.0000000000 0.0000000000 0.0000039991 -0.0009906747
789 # ...
790 #
791 # DFT does not seem to print the separate listing of Mulliken charges
792 # by default, but they are printed by this modules later on. They are also print
793 # for Hartree-Fock runs, though, so in that case make sure they are consistent.
794 if line.strip() == "Mulliken population analysis":
796 self.skip_lines(inputfile, ['d', 'b', 'total_overlap_population', 'b'])
798 overlaps = []
799 line = next(inputfile)
800 while all([c.isdigit() for c in line.split()]):
802 # There is always a line with the MO indices printed in thie block.
803 indices = [int(i)-1 for i in line.split()]
804 for i in indices:
805 overlaps.append([])
807 # There is usually a blank line after the MO indices, but
808 # there are exceptions, so check if line is blank first.
809 line = next(inputfile)
810 if not line.strip():
811 line = next(inputfile)
813 # Now we can iterate or atomic orbitals.
814 for nao in range(self.nbasis):
815 data = list(map(float, line.split()[4:]))
816 for i, d in enumerate(data):
817 overlaps[indices[i]].append(d)
818 line = next(inputfile)
820 line = next(inputfile)
822 # This header should be printed later, before the charges are print, which of course
823 # are just sums of the overlaps and could be calculated. But we just go ahead and
824 # parse them, make sure they're consistent with previously parsed values and
825 # use these since they are more precise (previous precision could have been just 0.01).
826 while "Total gross population on atoms" not in line:
827 line = next(inputfile)
828 self.skip_line(inputfile, 'blank')
829 charges = []
830 for i in range(self.natom):
831 line = next(inputfile)
832 iatom, element, ncharge, epop = line.split()
833 iatom = int(iatom)
834 ncharge = float(ncharge)
835 epop = float(epop)
836 assert iatom == (i+1)
837 charges.append(epop-ncharge)
839 if not hasattr(self, 'atomcharges'):
840 self.atomcharges = {}
841 if not "mulliken" in self.atomcharges:
842 self.atomcharges['mulliken'] = charges
843 else:
844 assert max(self.atomcharges['mulliken'] - numpy.array(charges)) < 0.01
845 self.atomcharges['mulliken'] = charges
847 # NWChem prints the dipole moment in atomic units first, and we could just fast forward
848 # to the values in Debye, which are also printed. But we can also just convert them
849 # right away and so parse a little bit less. Note how the reference point is print
850 # here within the block nicely, as it is for all moment later.
851 #
852 # -------------
853 # Dipole Moment
854 # -------------
855 #
856 # Center of charge (in au) is the expansion point
857 # X = 0.0000000 Y = 0.0000000 Z = 0.0000000
858 #
859 # Dipole moment 0.0000000000 Debye(s)
860 # DMX 0.0000000000 DMXEFC 0.0000000000
861 # DMY 0.0000000000 DMYEFC 0.0000000000
862 # DMZ -0.0000000000 DMZEFC 0.0000000000
863 #
864 # ...
865 #
866 if line.strip() == "Dipole Moment":
868 self.skip_lines(inputfile, ['d', 'b'])
870 reference_comment = next(inputfile)
871 assert "(in au)" in reference_comment
872 reference = next(inputfile).split()
873 self.reference = [reference[-7], reference[-4], reference[-1]]
874 self.reference = numpy.array([float(x) for x in self.reference])
875 self.reference = utils.convertor(self.reference, 'bohr', 'Angstrom')
877 self.skip_line(inputfile, 'blank')
879 magnitude = next(inputfile)
880 assert magnitude.split()[-1] == "A.U."
882 dipole = []
883 for i in range(3):
884 line = next(inputfile)
885 dipole.append(float(line.split()[1]))
887 dipole = utils.convertor(numpy.array(dipole), "ebohr", "Debye")
889 if not hasattr(self, 'moments'):
890 self.moments = [self.reference, dipole]
891 else:
892 self.moments[1] == dipole
894 # The quadrupole moment is pretty straightforward to parse. There are several
895 # blocks printed, and the first one called 'second moments' contains the raw
896 # moments, and later traceless values are printed. The moments, however, are
897 # not in lexicographical order, so we need to sort them. Also, the first block
898 # is in atomic units, so remember to convert to Buckinghams along the way.
899 #
900 # -----------------
901 # Quadrupole Moment
902 # -----------------
903 #
904 # Center of charge (in au) is the expansion point
905 # X = 0.0000000 Y = 0.0000000 Z = 0.0000000
906 #
907 # < R**2 > = ********** a.u. ( 1 a.u. = 0.280023 10**(-16) cm**2 )
908 # ( also called diamagnetic susceptibility )
909 #
910 # Second moments in atomic units
911 #
912 # Component Electronic+nuclear Point charges Total
913 # --------------------------------------------------------------------------
914 # XX -38.3608511210 0.0000000000 -38.3608511210
915 # YY -39.0055467347 0.0000000000 -39.0055467347
916 # ...
917 #
918 if line.strip() == "Quadrupole Moment":
920 self.skip_lines(inputfile, ['d', 'b'])
922 reference_comment = next(inputfile)
923 assert "(in au)" in reference_comment
924 reference = next(inputfile).split()
925 self.reference = [reference[-7], reference[-4], reference[-1]]
926 self.reference = numpy.array([float(x) for x in self.reference])
927 self.reference = utils.convertor(self.reference, 'bohr', 'Angstrom')
929 self.skip_lines(inputfile, ['b', 'units', 'susc', 'b'])
931 line = next(inputfile)
932 assert line.strip() == "Second moments in atomic units"
934 self.skip_lines(inputfile, ['b', 'header', 'd'])
936 # Parse into a dictionary and then sort by the component key.
937 quadrupole = {}
938 for i in range(6):
939 line = next(inputfile)
940 quadrupole[line.split()[0]] = float(line.split()[-1])
941 lex = sorted(quadrupole.keys())
942 quadrupole = [quadrupole[key] for key in lex]
944 quadrupole = utils.convertor(numpy.array(quadrupole), "ebohr2", "Buckingham")
946 # The checking of potential previous values if a bit more involved here,
947 # because it turns out NWChem has separate keywords for dipole, quadrupole
948 # and octupole output. So, it is perfectly possible to print the quadrupole
949 # and not the dipole... if that is the case set the former to None and
950 # issue a warning. Also, a regression has been added to cover this case.
951 if not hasattr(self, 'moments') or len(self.moments) < 2:
952 self.logger.warning("Found quadrupole moments but no previous dipole")
953 self.moments = [self.reference, None, quadrupole]
954 else:
955 if len(self.moments) == 2:
956 self.moments.append(quadrupole)
957 else:
958 assert self.moments[2] == quadrupole
960 # The octupole moment is analogous to the quadrupole, but there are more components
961 # and the checking of previously parsed dipole and quadrupole moments is more involved,
962 # with a corresponding test also added to regressions.
963 #
964 # ---------------
965 # Octupole Moment
966 # ---------------
967 #
968 # Center of charge (in au) is the expansion point
969 # X = 0.0000000 Y = 0.0000000 Z = 0.0000000
970 #
971 # Third moments in atomic units
972 #
973 # Component Electronic+nuclear Point charges Total
974 # --------------------------------------------------------------------------
975 # XXX -0.0000000000 0.0000000000 -0.0000000000
976 # YYY -0.0000000000 0.0000000000 -0.0000000000
977 # ...
978 #
979 if line.strip() == "Octupole Moment":
981 self.skip_lines(inputfile, ['d', 'b'])
983 reference_comment = next(inputfile)
984 assert "(in au)" in reference_comment
985 reference = next(inputfile).split()
986 self.reference = [reference[-7], reference[-4], reference[-1]]
987 self.reference = numpy.array([float(x) for x in self.reference])
988 self.reference = utils.convertor(self.reference, 'bohr', 'Angstrom')
990 self.skip_line(inputfile, 'blank')
992 line = next(inputfile)
993 assert line.strip() == "Third moments in atomic units"
995 self.skip_lines(inputfile, ['b', 'header', 'd'])
997 octupole = {}
998 for i in range(10):
999 line = next(inputfile)
1000 octupole[line.split()[0]] = float(line.split()[-1])
1001 lex = sorted(octupole.keys())
1002 octupole = [octupole[key] for key in lex]
1004 octupole = utils.convertor(numpy.array(octupole), "ebohr3", "Debye.ang2")
1006 if not hasattr(self, 'moments') or len(self.moments) < 2:
1007 self.logger.warning("Found octupole moments but no previous dipole or quadrupole moments")
1008 self.moments = [self.reference, None, None, octupole]
1009 elif len(self.moments) == 2:
1010 self.logger.warning("Found octupole moments but no previous quadrupole moments")
1011 self.moments.append(None)
1012 self.moments.append(octupole)
1013 else:
1014 if len(self.moments) == 3:
1015 self.moments.append(octupole)
1016 else:
1017 assert self.moments[3] == octupole
1019 if "Total MP2 energy" in line:
1020 self.metadata["methods"].append("MP2")
1021 mpenerg = float(line.split()[-1])
1022 if not hasattr(self, "mpenergies"):
1023 self.mpenergies = []
1024 self.mpenergies.append([])
1025 self.mpenergies[-1].append(utils.convertor(mpenerg, "hartree", "eV"))
1027 if line.strip() == "NWChem Extensible Many-Electron Theory Module":
1028 ccenergies = []
1029 while "Parallel integral file used" not in line:
1030 line = next(inputfile)
1031 if "CCSD total energy / hartree" in line or "total CCSD energy:" in line:
1032 self.metadata["methods"].append("CCSD")
1033 ccenergies.append(float(line.split()[-1]))
1034 if "CCSD(T) total energy / hartree" in line:
1035 self.metadata["methods"].append("CCSD(T)")
1036 ccenergies.append(float(line.split()[-1]))
1037 if ccenergies:
1038 self.append_attribute(
1039 "ccenergies", utils.convertor(ccenergies[-1], "hartree", "eV")
1040 )
1042 # Static and dynamic polarizability.
1043 if "Linear Response polarizability / au" in line:
1044 if not hasattr(self, "polarizabilities"):
1045 self.polarizabilities = []
1046 polarizability = []
1047 line = next(inputfile)
1048 assert line.split()[0] == "Frequency"
1049 line = next(inputfile)
1050 assert line.split()[0] == "Wavelength"
1051 self.skip_lines(inputfile, ['coordinates', 'd'])
1052 for _ in range(3):
1053 line = next(inputfile)
1054 polarizability.append(line.split()[1:])
1055 self.polarizabilities.append(numpy.array(polarizability))
1057 if line[:18] == ' Total times cpu:':
1058 self.metadata['success'] = True
1060 if line.strip() == "NWChem QMD Module":
1061 self.is_BOMD = True
1063 # Born-Oppenheimer molecular dynamics (BOMD): time.
1064 if "QMD Run Information" in line:
1065 self.skip_line(inputfile, 'd')
1066 line = next(inputfile)
1067 assert "Time elapsed (fs)" in line
1068 time = float(line.split()[4])
1069 self.append_attribute('time', time)
1071 # BOMD: geometry coordinates when `print low`.
1072 if line.strip() == "DFT ENERGY GRADIENTS":
1073 if self.is_BOMD:
1074 self.skip_lines(inputfile, ['b', 'atom coordinates gradient', 'xyzxyz'])
1075 line = next(inputfile)
1076 atomcoords_step = []
1077 while line.strip():
1078 tokens = line.split()
1079 assert len(tokens) == 8
1080 atomcoords_step.append([float(c) for c in tokens[2:5]])
1081 line = next(inputfile)
1082 self.atomcoords.append(atomcoords_step)
1084 def before_parsing(self):
1085 """NWChem-specific routines performed before parsing a file.
1086 """
1088 # The only reason we need this identifier is if `print low` is
1089 # set in the input file, which we assume is likely for a BOMD
1090 # trajectory. This will enable parsing coordinates from the
1091 # 'DFT ENERGY GRADIENTS' section.
1092 self.is_BOMD = False
1094 def after_parsing(self):
1095 """NWChem-specific routines for after parsing a file.
1097 Currently, expands self.shells() into self.aonames.
1098 """
1100 # setup a few necessary things, including a regular expression
1101 # for matching the shells
1102 table = utils.PeriodicTable()
1103 elements = [table.element[x] for x in self.atomnos]
1104 pattern = re.compile(r"(\ds)+(\dp)*(\dd)*(\df)*(\dg)*")
1106 labels = {}
1107 labels['s'] = ["%iS"]
1108 labels['p'] = ["%iPX", "%iPY", "%iPZ"]
1109 if self.shells['type'] == 'spherical':
1110 labels['d'] = ['%iD-2', '%iD-1', '%iD0', '%iD1', '%iD2']
1111 labels['f'] = ['%iF-3', '%iF-2', '%iF-1', '%iF0',
1112 '%iF1', '%iF2', '%iF3']
1113 labels['g'] = ['%iG-4', '%iG-3', '%iG-2', '%iG-1', '%iG0',
1114 '%iG1', '%iG2', '%iG3', '%iG4']
1115 elif self.shells['type'] == 'cartesian':
1116 labels['d'] = ['%iDXX', '%iDXY', '%iDXZ',
1117 '%iDYY', '%iDYZ',
1118 '%iDZZ']
1119 labels['f'] = ['%iFXXX', '%iFXXY', '%iFXXZ',
1120 '%iFXYY', '%iFXYZ', '%iFXZZ',
1121 '%iFYYY', '%iFYYZ', '%iFYZZ',
1122 '%iFZZZ']
1123 labels['g'] = ['%iGXXXX', '%iGXXXY', '%iGXXXZ',
1124 '%iGXXYY', '%iGXXYZ', '%iGXXZZ',
1125 '%iGXYYY', '%iGXYYZ', '%iGXYZZ',
1126 '%iGXZZZ', '%iGYYYY', '%iGYYYZ',
1127 '%iGYYZZ', '%iGYZZZ', '%iGZZZZ']
1128 else:
1129 self.logger.warning("Found a non-standard aoname representation type.")
1130 return
1132 # now actually build aonames
1133 # involves expanding 2s1p into appropriate types
1135 self.aonames = []
1136 for i, element in enumerate(elements):
1137 try:
1138 shell_text = self.shells[element]
1139 except KeyError:
1140 del self.aonames
1141 msg = "Cannot determine aonames for at least one atom."
1142 self.logger.warning(msg)
1143 break
1145 prefix = "%s%i_" % (element, i + 1) # (e.g. C1_)
1147 matches = pattern.match(shell_text)
1148 for j, group in enumerate(matches.groups()):
1149 if group is None:
1150 continue
1152 count = int(group[:-1])
1153 label = group[-1]
1155 for k in range(count):
1156 temp = [x % (j + k + 1) for x in labels[label]]
1157 self.aonames.extend([prefix + x for x in temp])
1159 # If we parsed a BOMD trajectory, the first two parsed
1160 # geometries are identical, and all from the second onward are
1161 # in Bohr. Delete the first one and perform the unit
1162 # conversion.
1163 if self.is_BOMD:
1164 self.atomcoords = utils.convertor(numpy.asarray(self.atomcoords)[1:, ...],
1165 'bohr', 'Angstrom')