Coverage for cclib/parser/gamessukparser.py : 99%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2#
3# Copyright (c) 2020, the cclib development team
4#
5# This file is part of cclib (http://cclib.github.io) and is distributed under
6# the terms of the BSD 3-Clause License.
8"""Parser for GAMESS-UK output files"""
11import re
13import numpy
15from cclib.parser import logfileparser
16from cclib.parser import utils
19class GAMESSUK(logfileparser.Logfile):
20 """A GAMESS UK log file"""
21 SCFRMS, SCFMAX, SCFENERGY = list(range(3)) # Used to index self.scftargets[]
23 def __init__(self, *args, **kwargs):
25 # Call the __init__ method of the superclass
26 super(GAMESSUK, self).__init__(logname="GAMESSUK", *args, **kwargs)
28 def __str__(self):
29 """Return a string representation of the object."""
30 return "GAMESS UK log file %s" % (self.filename)
32 def __repr__(self):
33 """Return a representation of the object."""
34 return 'GAMESSUK("%s")' % (self.filename)
36 def normalisesym(self, label):
37 """Use standard symmetry labels instead of GAMESS UK labels."""
38 label = label.replace("''", '"').replace("+", "").replace("-", "")
39 ans = label[0].upper() + label[1:]
40 return ans
42 def before_parsing(self):
44 # used for determining whether to add a second mosyms, etc.
45 self.betamosyms = self.betamoenergies = self.betamocoeffs = False
47 def extract(self, inputfile, line):
48 """Extract information from the file object inputfile."""
50 # Extract the version number and optionally the revision number.
51 if "version" in line:
52 search = re.search(r"\sversion\s*(\d\.\d)", line)
53 if search:
54 package_version = search.groups()[0]
55 self.metadata["package_version"] = package_version
56 self.metadata["legacy_package_version"] = package_version
57 if "Revision" in line:
58 revision = line.split()[1]
59 package_version = self.metadata.get("package_version")
60 if package_version:
61 self.metadata["package_version"] = "{}+{}".format(
62 package_version, revision
63 )
65 if line[1:22] == "total number of atoms":
66 natom = int(line.split()[-1])
67 self.set_attribute('natom', natom)
69 if line[3:44] == "convergence threshold in optimization run":
70 # Assuming that this is only found in the case of OPTXYZ
71 # (i.e. an optimization in Cartesian coordinates)
72 self.geotargets = [float(line.split()[-2])]
74 if line[32:61] == "largest component of gradient":
75 # This is the geotarget in the case of OPTXYZ
76 if not hasattr(self, "geovalues"):
77 self.geovalues = []
78 self.geovalues.append([float(line.split()[4])])
80 if line[37:49] == "convergence?":
81 # Get the geovalues and geotargets for OPTIMIZE
82 if not hasattr(self, "geovalues"):
83 self.geovalues = []
84 self.geotargets = []
85 geotargets = []
86 geovalues = []
87 for i in range(4):
88 temp = line.split()
89 geovalues.append(float(temp[2]))
90 if not self.geotargets:
91 geotargets.append(float(temp[-2]))
92 line = next(inputfile)
93 self.geovalues.append(geovalues)
94 if not self.geotargets:
95 self.geotargets = geotargets
97 # This is the only place coordinates are printed in single point calculations. Note that
98 # in the following fragment, the basis set selection is not always printed:
99 #
100 # ******************
101 # molecular geometry
102 # ******************
103 #
104 # ****************************************
105 # * basis selected is sto sto3g *
106 # ****************************************
107 #
108 # *******************************************************************************
109 # * *
110 # * atom atomic coordinates number of *
111 # * charge x y z shells *
112 # * *
113 # *******************************************************************************
114 # * *
115 # * *
116 # * c 6.0 0.0000000 -2.6361501 0.0000000 2 *
117 # * 1s 2sp *
118 # * *
119 # * *
120 # * c 6.0 0.0000000 2.6361501 0.0000000 2 *
121 # * 1s 2sp *
122 # * *
123 # ...
124 #
125 if line.strip() == "molecular geometry":
127 self.updateprogress(inputfile, "Coordinates")
129 self.skip_lines(inputfile, ['s', 'b', 's'])
130 line = next(inputfile)
131 if "basis selected is" in line:
132 self.skip_lines(inputfile, ['s', 'b', 's', 's'])
134 self.skip_lines(inputfile, ['header1', 'header2', 's', 's'])
136 atomnos = []
137 atomcoords = []
138 line = next(inputfile)
139 while line.strip():
140 line = next(inputfile)
141 if line.strip()[1:10].strip() and list(set(line.strip())) != ['*']:
142 atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom") for x in line.split()[3:6]])
143 atomnos.append(int(round(float(line.split()[2]))))
145 if not hasattr(self, "atomcoords"):
146 self.atomcoords = []
147 self.atomcoords.append(atomcoords)
148 self.set_attribute('atomnos', atomnos)
150 # Each step of a geometry optimization will also print the coordinates:
151 #
152 # search 0
153 # *******************
154 # point 0 nuclear coordinates
155 # *******************
156 #
157 # x y z chg tag
158 # ============================================================
159 # 0.0000000 -2.6361501 0.0000000 6.00 c
160 # 0.0000000 2.6361501 0.0000000 6.00 c
161 # ..
162 #
163 if line[40:59] == "nuclear coordinates":
165 self.updateprogress(inputfile, "Coordinates")
167 # We need not remember the first geometry in geometry optimizations, as this will
168 # be already parsed from the "molecular geometry" section (see above).
169 if not hasattr(self, 'firstnuccoords') or self.firstnuccoords:
170 self.firstnuccoords = False
171 return
173 self.skip_lines(inputfile, ['s', 'b', 'colname', 'e'])
175 atomcoords = []
176 atomnos = []
177 line = next(inputfile)
178 while list(set(line.strip())) != ['=']:
180 cols = line.split()
181 atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom") for x in cols[0:3]])
182 atomnos.append(int(float(cols[3])))
184 line = next(inputfile)
186 if not hasattr(self, "atomcoords"):
187 self.atomcoords = []
188 self.atomcoords.append(atomcoords)
189 self.set_attribute('atomnos', atomnos)
191 # This is printed when a geometry optimization succeeds, after the last gradient of the energy.
192 if line[40:62] == "optimization converged":
193 self.skip_line(inputfile, 's')
194 if not hasattr(self, 'optdone'):
195 self.optdone = []
196 self.optdone.append(len(self.geovalues)-1)
198 # This is apparently printed when a geometry optimization is not converged but the job ends.
199 if "minimisation not converging" in line:
200 self.skip_line(inputfile, 's')
201 self.optdone = []
203 if line[1:32] == "total number of basis functions":
205 nbasis = int(line.split()[-1])
206 self.set_attribute('nbasis', nbasis)
208 while line.find("charge of molecule") < 0:
209 line = next(inputfile)
211 charge = int(line.split()[-1])
212 self.set_attribute('charge', charge)
214 mult = int(next(inputfile).split()[-1])
215 self.set_attribute('mult', mult)
217 alpha = int(next(inputfile).split()[-1])-1
218 beta = int(next(inputfile).split()[-1])-1
219 if self.mult == 1:
220 self.homos = numpy.array([alpha], "i")
221 else:
222 self.homos = numpy.array([alpha, beta], "i")
224 if line[37:69] == "s-matrix over gaussian basis set":
225 self.aooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d")
227 self.skip_lines(inputfile, ['d', 'b'])
229 i = 0
230 while i < self.nbasis:
231 self.updateprogress(inputfile, "Overlap")
233 self.skip_lines(inputfile, ['b', 'b', 'header', 'b', 'b'])
235 for j in range(self.nbasis):
236 temp = list(map(float, next(inputfile).split()[1:]))
237 self.aooverlaps[j, (0+i):(len(temp)+i)] = temp
239 i += len(temp)
241 if line[18:43] == 'EFFECTIVE CORE POTENTIALS':
243 self.skip_line(inputfile, 'stars')
245 self.coreelectrons = numpy.zeros(self.natom, 'i')
246 line = next(inputfile)
247 while line[15:46] != "*"*31:
248 if line.find("for atoms ...") >= 0:
249 atomindex = []
250 line = next(inputfile)
251 while line.find("core charge") < 0:
252 broken = line.split()
253 atomindex.extend([int(x.split("-")[0]) for x in broken])
254 line = next(inputfile)
255 charge = float(line.split()[4])
256 for idx in atomindex:
257 self.coreelectrons[idx-1] = self.atomnos[idx-1] - charge
258 line = next(inputfile)
260 if line[3:27] == "Wavefunction convergence":
261 self.scftarget = float(line.split()[-2])
262 self.scftargets = []
264 if line[11:22] == "normal mode":
265 if not hasattr(self, "vibfreqs"):
266 self.vibfreqs = []
267 self.vibirs = []
269 units = next(inputfile)
270 xyz = next(inputfile)
271 equals = next(inputfile)
272 line = next(inputfile)
273 while line != equals:
274 temp = line.split()
275 self.vibfreqs.append(float(temp[1]))
276 self.vibirs.append(float(temp[-2]))
277 line = next(inputfile)
278 # Use the length of the vibdisps to figure out
279 # how many rotations and translations to remove
280 self.vibfreqs = self.vibfreqs[-len(self.vibdisps):]
281 self.vibirs = self.vibirs[-len(self.vibdisps):]
283 if line[44:73] == "normalised normal coordinates":
285 self.skip_lines(inputfile, ['e', 'b', 'b'])
287 self.vibdisps = []
288 freqnum = next(inputfile)
289 while freqnum.find("=") < 0:
291 self.skip_lines(inputfile, ['b', 'e', 'freqs', 'e', 'b', 'header', 'e'])
293 p = [[] for x in range(9)]
294 for i in range(len(self.atomnos)):
295 brokenx = list(map(float, next(inputfile)[25:].split()))
296 brokeny = list(map(float, next(inputfile)[25:].split()))
297 brokenz = list(map(float, next(inputfile)[25:].split()))
298 for j, x in enumerate(list(zip(brokenx, brokeny, brokenz))):
299 p[j].append(x)
300 self.vibdisps.extend(p)
302 self.skip_lines(inputfile, ['b', 'b'])
304 freqnum = next(inputfile)
306 if line[26:36] == "raman data":
307 self.vibramans = []
309 self.skip_lines(inputfile, ['s', 'b', 'header', 'b'])
311 line = next(inputfile)
312 while line[1] != "*":
313 self.vibramans.append(float(line.split()[3]))
314 self.skip_line(inputfile, 'blank')
315 line = next(inputfile)
316 # Use the length of the vibdisps to figure out
317 # how many rotations and translations to remove
318 self.vibramans = self.vibramans[-len(self.vibdisps):]
320 if line[3:11] == "SCF TYPE":
321 self.scftype = line.split()[-2]
322 assert self.scftype in ['rhf', 'uhf', 'gvb'], "%s not one of 'rhf', 'uhf' or 'gvb'" % self.scftype
324 if line[15:31] == "convergence data":
325 if not hasattr(self, "scfvalues"):
326 self.scfvalues = []
327 self.scftargets.append([self.scftarget]) # Assuming it does not change over time
328 while line[1:10] != "="*9:
329 line = next(inputfile)
330 line = next(inputfile)
331 tester = line.find("tester") # Can be in a different place depending
332 assert tester >= 0
333 while line[1:10] != "="*9: # May be two or three lines (unres)
334 line = next(inputfile)
336 scfvalues = []
337 line = next(inputfile)
338 while line.strip():
339 # e.g. **** recalulation of fock matrix on iteration 4 (examples/chap12/pyridine.out)
340 if line[2:6] != "****":
341 scfvalues.append([float(line[tester-5:tester+6])])
342 try:
343 line = next(inputfile)
344 except StopIteration:
345 self.logger.warning('File terminated before end of last SCF! Last tester: {}'.format(line.split()[5]))
346 break
347 self.scfvalues.append(scfvalues)
349 if line[10:22] == "total energy" and len(line.split()) == 3:
350 if not hasattr(self, "scfenergies"):
351 self.scfenergies = []
352 scfenergy = utils.convertor(float(line.split()[-1]), "hartree", "eV")
353 self.scfenergies.append(scfenergy)
355 # Total energies after Moller-Plesset corrections
356 # Second order correction is always first, so its first occurance
357 # triggers creation of mpenergies (list of lists of energies)
358 # Further corrections are appended as found
359 # Note: GAMESS-UK sometimes prints only the corrections,
360 # so they must be added to the last value of scfenergies
361 if line[10:32] == "mp2 correlation energy" or \
362 line[10:42] == "second order perturbation energy":
363 if not hasattr(self, "mpenergies"):
364 self.mpenergies = []
365 self.mpenergies.append([])
366 self.mp2correction = utils.float(line.split()[-1])
367 self.mp2energy = self.scfenergies[-1] + self.mp2correction
368 self.mpenergies[-1].append(utils.convertor(self.mp2energy, "hartree", "eV"))
369 if line[10:41] == "third order perturbation energy":
370 self.mp3correction = utils.float(line.split()[-1])
371 self.mp3energy = self.mp2energy + self.mp3correction
372 self.mpenergies[-1].append(utils.convertor(self.mp3energy, "hartree", "eV"))
374 if line[40:59] == "molecular basis set":
375 self.gbasis = []
376 line = next(inputfile)
377 while line.find("contraction coefficients") < 0:
378 line = next(inputfile)
379 equals = next(inputfile)
380 blank = next(inputfile)
381 atomname = next(inputfile)
382 basisregexp = re.compile(r"\d*(\D+)") # Get everything after any digits
383 shellcounter = 1
384 while line != equals:
385 gbasis = [] # Stores basis sets on one atom
386 blank = next(inputfile)
387 blank = next(inputfile)
388 line = next(inputfile)
389 shellno = int(line.split()[0])
390 shellgap = shellno - shellcounter
391 shellsize = 0
392 while len(line.split()) != 1 and line != equals:
393 if line.split():
394 shellsize += 1
395 coeff = {}
396 # coefficients and symmetries for a block of rows
397 while line.strip() and line != equals:
398 temp = line.strip().split()
399 # temp[1] may be either like (a) "1s" and "1sp", or (b) "s" and "sp"
400 # See GAMESS-UK 7.0 distribution/examples/chap12/pyridine2_21m10r.out
401 # for an example of the latter
402 sym = basisregexp.match(temp[1]).groups()[0]
403 assert sym in ['s', 'p', 'd', 'f', 'sp'], "'%s' not a recognized symmetry" % sym
404 if sym == "sp":
405 coeff.setdefault("S", []).append((float(temp[3]), float(temp[6])))
406 coeff.setdefault("P", []).append((float(temp[3]), float(temp[10])))
407 else:
408 coeff.setdefault(sym.upper(), []).append((float(temp[3]), float(temp[6])))
409 line = next(inputfile)
410 # either a blank or a continuation of the block
411 if coeff:
412 if sym == "sp":
413 gbasis.append(('S', coeff['S']))
414 gbasis.append(('P', coeff['P']))
415 else:
416 gbasis.append((sym.upper(), coeff[sym.upper()]))
417 if line == equals:
418 continue
419 line = next(inputfile)
420 # either the start of the next block or the start of a new atom or
421 # the end of the basis function section (signified by a line of equals)
422 numtoadd = 1 + (shellgap // shellsize)
423 shellcounter = shellno + shellsize
424 for x in range(numtoadd):
425 self.gbasis.append(gbasis)
427 if line[50:70] == "----- beta set -----":
428 self.betamosyms = True
429 self.betamoenergies = True
430 self.betamocoeffs = True
431 # betamosyms will be turned off in the next
432 # SYMMETRY ASSIGNMENT section
434 if line[31:50] == "SYMMETRY ASSIGNMENT":
435 if not hasattr(self, "mosyms"):
436 self.mosyms = []
438 multiple = {'a': 1, 'b': 1, 'e': 2, 't': 3, 'g': 4, 'h': 5}
440 equals = next(inputfile)
441 line = next(inputfile)
442 while line != equals: # There may be one or two lines of title (compare mg10.out and duhf_1.out)
443 line = next(inputfile)
445 mosyms = []
446 line = next(inputfile)
447 while line != equals:
448 temp = line[25:30].strip()
449 if temp[-1] == '?':
450 # e.g. e? or t? or g? (see example/chap12/na7mg_uhf.out)
451 # for two As, an A and an E, and two Es of the same energy respectively.
452 t = line[91:].strip().split()
453 for i in range(1, len(t), 2):
454 for j in range(multiple[t[i][0]]): # add twice for 'e', etc.
455 mosyms.append(self.normalisesym(t[i]))
456 else:
457 for j in range(multiple[temp[0]]):
458 mosyms.append(self.normalisesym(temp)) # add twice for 'e', etc.
459 line = next(inputfile)
460 assert len(mosyms) == self.nmo, "mosyms: %d but nmo: %d" % (len(mosyms), self.nmo)
461 if self.betamosyms:
462 # Only append if beta (otherwise with IPRINT SCF
463 # it will add mosyms for every step of a geo opt)
464 self.mosyms.append(mosyms)
465 self.betamosyms = False
466 elif self.scftype == 'gvb':
467 # gvb has alpha and beta orbitals but they are identical
468 self.mosysms = [mosyms, mosyms]
469 else:
470 self.mosyms = [mosyms]
472 if line[50:62] == "eigenvectors":
473 # Mocoeffs...can get evalues from here too
474 # (only if using FORMAT HIGH though will they all be present)
475 if not hasattr(self, "mocoeffs"):
476 self.aonames = []
477 aonames = []
478 minus = next(inputfile)
480 mocoeffs = numpy.zeros((self.nmo, self.nbasis), "d")
481 readatombasis = False
482 if not hasattr(self, "atombasis"):
483 self.atombasis = []
484 for i in range(self.natom):
485 self.atombasis.append([])
486 readatombasis = True
488 self.skip_lines(inputfile, ['b', 'b', 'evalues'])
490 p = re.compile(r"\d+\s+(\d+)\s*(\w+) (\w+)")
491 oldatomname = "DUMMY VALUE"
493 mo = 0
494 while mo < self.nmo:
495 self.updateprogress(inputfile, "Coefficients")
497 self.skip_lines(inputfile, ['b', 'b', 'nums', 'b', 'b'])
499 for basis in range(self.nbasis):
500 line = next(inputfile)
501 # Fill atombasis only first time around.
502 if readatombasis:
503 orbno = int(line[1:5])-1
504 atomno = int(line[6:9])-1
505 self.atombasis[atomno].append(orbno)
506 if not self.aonames:
507 pg = p.match(line[:18].strip()).groups()
508 atomname = "%s%s%s" % (pg[1][0].upper(), pg[1][1:], pg[0])
509 if atomname != oldatomname:
510 aonum = 1
511 oldatomname = atomname
512 name = "%s_%d%s" % (atomname, aonum, pg[2].upper())
513 if name in aonames:
514 aonum += 1
515 name = "%s_%d%s" % (atomname, aonum, pg[2].upper())
516 aonames.append(name)
517 temp = list(map(float, line[19:].split()))
518 mocoeffs[mo:(mo+len(temp)), basis] = temp
519 # Fill atombasis only first time around.
520 readatombasis = False
521 if not self.aonames:
522 self.aonames = aonames
524 line = next(inputfile) # blank line
525 while not line.strip():
526 line = next(inputfile)
527 evalues = line
528 if evalues[:17].strip(): # i.e. if these aren't evalues
529 break # Not all the MOs are present
530 mo += len(temp)
531 mocoeffs = mocoeffs[0:(mo+len(temp)), :] # In case some aren't present
532 if self.betamocoeffs:
533 self.mocoeffs.append(mocoeffs)
534 else:
535 self.mocoeffs = [mocoeffs]
537 if line[7:12] == "irrep":
538 ########## eigenvalues ###########
539 # This section appears once at the start of a geo-opt and once at the end
540 # unless IPRINT SCF is used (when it appears at every step in addition)
541 if not hasattr(self, "moenergies"):
542 self.moenergies = []
544 equals = next(inputfile)
545 while equals[1:5] != "====": # May be one or two lines of title (compare duhf_1.out and mg10.out)
546 equals = next(inputfile)
548 moenergies = []
549 line = next(inputfile)
550 if not line.strip(): # May be a blank line here (compare duhf_1.out and mg10.out)
551 line = next(inputfile)
553 while line.strip() and line != equals: # May end with a blank or equals
554 temp = line.strip().split()
555 moenergies.append(utils.convertor(float(temp[2]), "hartree", "eV"))
556 line = next(inputfile)
557 self.nmo = len(moenergies)
558 if self.betamoenergies:
559 self.moenergies.append(moenergies)
560 self.betamoenergies = False
561 elif self.scftype == 'gvb':
562 self.moenergies = [moenergies, moenergies]
563 else:
564 self.moenergies = [moenergies]
566 # The dipole moment is printed by default at the beginning of the wavefunction analysis,
567 # but the value is in atomic units, so we need to convert to Debye. It seems pretty
568 # evident that the reference point is the origin (0,0,0) which is also the center
569 # of mass after reorientation at the beginning of the job, although this is not
570 # stated anywhere (would be good to check).
571 #
572 # *********************
573 # wavefunction analysis
574 # *********************
575 #
576 # commence analysis at 24.61 seconds
577 #
578 # dipole moments
579 #
580 #
581 # nuclear electronic total
582 #
583 # x 0.0000000 0.0000000 0.0000000
584 # y 0.0000000 0.0000000 0.0000000
585 # z 0.0000000 0.0000000 0.0000000
586 #
587 if line.strip() == "dipole moments":
589 # In older version there is only one blank line before the header,
590 # and newer version there are two.
591 self.skip_line(inputfile, 'blank')
592 line = next(inputfile)
593 if not line.strip():
594 line = next(inputfile)
595 self.skip_line(inputfile, 'blank')
597 dipole = []
598 for i in range(3):
599 line = next(inputfile)
600 dipole.append(float(line.split()[-1]))
602 reference = [0.0, 0.0, 0.0]
603 dipole = utils.convertor(numpy.array(dipole), "ebohr", "Debye")
605 if not hasattr(self, 'moments'):
606 self.moments = [reference, dipole]
607 else:
608 assert self.moments[1] == dipole
610 # Net atomic charges are not printed at all, it seems,
611 # but you can get at them from nuclear charges and
612 # electron populations, which are printed like so:
613 #
614 # ---------------------------------------
615 # mulliken and lowdin population analyses
616 # ---------------------------------------
617 #
618 # ----- total gross population in aos ------
619 #
620 # 1 1 c s 1.99066 1.98479
621 # 2 1 c s 1.14685 1.04816
622 # ...
623 #
624 # ----- total gross population on atoms ----
625 #
626 # 1 c 6.0 6.00446 5.99625
627 # 2 c 6.0 6.00446 5.99625
628 # 3 c 6.0 6.07671 6.04399
629 # ...
630 if line[10:49] == "mulliken and lowdin population analyses":
632 if not hasattr(self, "atomcharges"):
633 self.atomcharges = {}
635 while not "total gross population on atoms" in line:
636 line = next(inputfile)
638 self.skip_line(inputfile, 'blank')
640 line = next(inputfile)
641 mulliken, lowdin = [], []
642 while line.strip():
643 nuclear = float(line.split()[2])
644 mulliken.append(nuclear - float(line.split()[3]))
645 lowdin.append(nuclear - float(line.split()[4]))
646 line = next(inputfile)
648 self.atomcharges["mulliken"] = mulliken
649 self.atomcharges["lowdin"] = lowdin
651 # ----- spinfree UHF natural orbital occupations -----
652 #
653 # 2.0000000 2.0000000 2.0000000 2.0000000 2.0000000 2.0000000 2.0000000
654 #
655 # 2.0000000 2.0000000 2.0000000 2.0000000 2.0000000 1.9999997 1.9999997
656 # ...
657 if "natural orbital occupations" in line:
659 occupations = []
661 self.skip_line(inputfile, "blank")
662 line = inputfile.next()
664 while line.strip():
665 occupations += map(float, line.split())
667 self.skip_line(inputfile, "blank")
668 line = inputfile.next()
670 self.set_attribute('nooccnos', occupations)
672 if line[:33] == ' end of G A M E S S program at':
673 self.metadata['success'] = True