Coverage for cclib/parser/gamessparser.py : 96%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2#
3# Copyright (c) 2020, the cclib development team
4#
5# This file is part of cclib (http://cclib.github.io) and is distributed under
6# the terms of the BSD 3-Clause License.
8"""Parser for GAMESS(US) output files"""
11import re
13import numpy
16from cclib.parser import logfileparser
17from cclib.parser import utils
20class GAMESS(logfileparser.Logfile):
21 """A GAMESS/Firefly log file."""
23 # Used to index self.scftargets[].
24 SCFRMS, SCFMAX, SCFENERGY = list(range(3))
26 # Used to extact Dunning basis set names.
27 dunningbas = {'CCD': 'cc-pVDZ', \
28 'CCT': 'cc-pVTZ', \
29 'CCQ': 'cc-pVQZ', \
30 'CC5': 'cc-pV5Z', \
31 'CC6': 'cc-pV6Z', \
32 'ACCD': 'aug-cc-pVDZ', \
33 'ACCT': 'aug-cc-pVTZ', \
34 'ACCQ': 'aug-cc-pVQZ', \
35 'ACC5': 'aug-cc-pV5Z', \
36 'ACC6': 'aug-cc-pV6Z', \
37 'CCDC': 'cc-pCVDZ', \
38 'CCTC': 'cc-pCVTZ', \
39 'CCQC': 'cc-pCVQZ', \
40 'CC5C': 'cc-pCV5Z', \
41 'CC6C': 'cc-pCV6Z', \
42 'ACCDC': 'aug-cc-pCVDZ', \
43 'ACCTC': 'aug-cc-pCVTZ', \
44 'ACCQC': 'aug-cc-pCVQZ', \
45 'ACC5C': 'aug-cc-pCV5Z', \
46 'ACC6C': 'aug-cc-pCV6Z'}
48 def __init__(self, *args, **kwargs):
50 # Call the __init__ method of the superclass
51 super(GAMESS, self).__init__(logname="GAMESS", *args, **kwargs)
53 def __str__(self):
54 """Return a string representation of the object."""
55 return "GAMESS log file %s" % (self.filename)
57 def __repr__(self):
58 """Return a representation of the object."""
59 return 'GAMESS("%s")' % (self.filename)
61 def normalisesym(self, label):
62 """Normalise the symmetries used by GAMESS.
64 To normalise, two rules need to be applied:
65 (1) Occurences of U/G in the 2/3 position of the label
66 must be lower-cased
67 (2) Two single quotation marks must be replaced by a double
68 """
70 if label[1:] == "''":
71 end = '"'
72 else:
73 end = label[1:].replace("U", "u").replace("G", "g")
74 return label[0] + end
76 def before_parsing(self):
78 self.firststdorient = True # Used to decide whether to wipe the atomcoords clean
79 self.cihamtyp = "none" # Type of CI Hamiltonian: saps or dets.
80 self.scftype = "none" # Type of SCF calculation: BLYP, RHF, ROHF, etc.
82 def extract(self, inputfile, line):
83 """Extract information from the file object inputfile."""
85 # Extract the version number. If the calculation is from
86 # Firefly, its version number comes before a line that looks
87 # like the normal GAMESS version number...
88 if "Firefly version" in line:
89 match = re.search(r"Firefly version\s([\d.]*)\D*(\d*)\s*\*", line)
90 if match:
91 base_version, build = match.groups()
92 package_version = "{}+{}".format(base_version, build)
93 self.metadata["package_version"] = package_version
94 self.metadata["legacy_package_version"] = base_version
95 if "GAMESS VERSION =" in line:
96 # ...so avoid overwriting it if Firefly already set this field.
97 if "package_version" not in self.metadata:
98 tokens = line.split()
99 day, month, year = tokens[4:7]
100 possible_release = tokens[-2]
101 # There may not be a (Rn) for the nth release that year, in
102 # which case this index is the same as 7 (the year).
103 if possible_release == year:
104 release = "1"
105 else:
106 # `(R23)` -> 23
107 release = possible_release[2:-1]
108 self.metadata["package_version"] = '{}.r{}'.format(year, release)
109 self.metadata["legacy_package_version"] = "{}R{}".format(year, release)
111 if line[1:12] == "INPUT CARD>":
112 return
114 # extract the methods
115 if line[1:7] == "SCFTYP":
116 method = line.split()[0][7:]
117 if len(self.metadata["methods"]) == 0:
118 self.metadata["methods"].append(method)
120 # extract the basis set name
121 if line[5:11] == "GBASIS":
122 basnm1 = line.split()[0][7:]
123 if basnm1 in self.dunningbas:
124 self.metadata["basis_set"] = self.dunningbas[basnm1]
125 else:
126 if basnm1 == "PM3" or basnm1 == "AM1":
127 self.metadata["methods"].append(basnm1)
128 if basnm1 == "STO" :
129 if line.split()[2] == "2":
130 self.metadata["basis_set"] = "STO-2G"
131 elif line.split()[2] == "3":
132 self.metadata["basis_set"] = "STO-3G"
133 elif line.split()[2] == "4":
134 self.metadata["basis_set"] = "STO-4G"
135 elif line.split()[2] == "5":
136 self.metadata["basis_set"] = "STO-5G"
137 if basnm1 == "N21" :
138 if line.split()[2] == "3" and line.split()[3] == "POLAR=COMMON":
139 self.metadata["basis_set"] = "3-21G*"
140 if line.split()[2] == "3" and line.split()[3] == "POLAR=NONE":
141 self.metadata["basis_set"] = "3-21G"
142 if line.split()[2] == "4" and line.split()[3] == "POLAR=NONE":
143 self.metadata["basis_set"] = "4-21G"
144 if line.split()[2] == "6" and line.split()[3] == "POLAR=NONE":
145 self.metadata["basis_set"] = "6-21G"
146 if basnm1 == "N31" :
147 if line.split()[2] == "6" and (line.split()[3] == "POLAR=POPN31" \
148 or line.split()[3] == "POLAR=POPLE"):
149 self.metadata["basis_set"] = "6-31G*"
150 line = next(inputfile)
151 if line.split()[-1] == "T":
152 self.metadata["basis_set"] = "6-31+G*"
153 line = next(inputfile)
154 if line.split()[1] == "0" and line.split()[3] == "T":
155 self.metadata["basis_set"] = "6-31++G*"
156 if line.split()[1] == "1" and line.split()[3] == "T":
157 self.metadata["basis_set"] = "6-31++G**"
158 else:
159 line = next(inputfile)
160 if line.split()[1] == "1": #NPFUNC = 1
161 self.metadata["basis_set"] = "6-31G**"
162 if line.split()[2] == "6" and line.split()[3] == "POLAR=NONE":
163 self.metadata["basis_set"] = "6-31G"
164 if line.split()[2] == "4" and line.split()[3] == "POLAR=NONE":
165 self.metadata["basis_set"] = "4-31G"
166 if line.split()[2] == "4" and line.split()[3] == "POLAR=POPN31":
167 self.metadata["basis_set"] = "4-31G*"
168 if basnm1 == "N311" :
169 if line.split()[2] == "6" and line.split()[3] == "POLAR=POPN311":
170 self.metadata["basis_set"] = "6-311G*"
171 line = next(inputfile)
172 if line.split()[-1] == "T":
173 self.metadata["basis_set"] = "6-311+G*"
174 line = next(inputfile)
175 if line.split()[1] == "0" and line.split()[3] == "T":
176 self.metadata["basis_set"] = "6-311++G*"
177 if line.split()[1] == "1" and line.split()[3] == "T":
178 self.metadata["basis_set"] = "6-311++G**"
179 else:
180 line = next(inputfile)
181 if line.split()[1] == "1": #NPFUNC = 1
182 self.metadata["basis_set"] = "6-311G**"
183 if line.split()[2] == "6" and line.split()[3] == "POLAR=NONE":
184 self.metadata["basis_set"] = "6-311G"
186 # We are looking for this line:
187 # PARAMETERS CONTROLLING GEOMETRY SEARCH ARE
188 # ...
189 # OPTTOL = 1.000E-04 RMIN = 1.500E-03
190 if line[10:18] == "OPTTOL =":
191 if not hasattr(self, "geotargets"):
192 opttol = float(line.split()[2])
193 self.geotargets = numpy.array([opttol, 3. / opttol], "d")
195 # Has to deal with such lines as:
196 # FINAL R-B3LYP ENERGY IS -382.0507446475 AFTER 10 ITERATIONS
197 # FINAL ENERGY IS -379.7594673378 AFTER 9 ITERATIONS
198 # ...so take the number after the "IS"
199 if line.find("FINAL") == 1:
200 if not hasattr(self, "scfenergies"):
201 self.scfenergies = []
202 temp = line.split()
203 self.scfenergies.append(utils.convertor(float(temp[temp.index("IS") + 1]), "hartree", "eV"))
204 # Empirical dispersion: first is GAMESS-US, second is Firefly
205 if any(
206 line.find(dispersion_trigger) == 1
207 for dispersion_trigger in (
208 "GRIMME'S DISPERSION ENERGY", "Dispersion correction to total energy"
209 )
210 ):
211 dispersion = utils.convertor(float(line.split()[-1]), "hartree", "eV")
212 self.append_attribute("dispersionenergies", dispersion)
214 # For total energies after Moller-Plesset corrections, the output looks something like this:
215 #
216 # RESULTS OF MOLLER-PLESSET 2ND ORDER CORRECTION ARE
217 # E(0)= -285.7568061536
218 # E(1)= 0.0
219 # E(2)= -0.9679419329
220 # E(MP2)= -286.7247480864
221 # where E(MP2) = E(0) + E(2)
222 #
223 # With GAMESS-US 12 Jan 2009 (R3), the preceding text is different:
224 # DIRECT 4-INDEX TRANSFORMATION
225 # SCHWARZ INEQUALITY TEST SKIPPED 0 INTEGRAL BLOCKS
226 # E(SCF)= -76.0088477471
227 # E(2)= -0.1403745370
228 # E(MP2)= -76.1492222841
229 #
230 # With GAMESS-US 20 APR 2017 (R1), the following block may be present:
231 # SCHWARZ INEQUALITY TEST SKIPPED 0 INTEGRAL BLOCKS
232 # ... END OF INTEGRAL TRANSFORMATION ...
234 if line.find("RESULTS OF MOLLER-PLESSET") >= 0 or line[6:37] == "SCHWARZ INEQUALITY TEST SKIPPED":
236 if not hasattr(self, "mpenergies"):
237 self.mpenergies = []
239 line = next(inputfile)
240 # Each iteration has a new print-out
241 if "END OF INTEGRAL TRANSFORMATION" not in line:
242 self.mpenergies.append([])
244 # GAMESS-US presently supports only second order corrections (MP2)
245 # PC GAMESS also has higher levels (3rd and 4th), with different output
246 # Only the highest level MP4 energy is gathered (SDQ or SDTQ)
247 # Loop breaks when substring "DONE WITH MPn ENERGY" is encountered,
248 # where n=2, 3 or 4.
249 while "DONE WITH MP" not in line:
251 if len(line.split()) > 0:
252 # Only up to MP2 correction
253 if line.split()[0] == "E(MP2)=":
254 self.metadata["methods"].append("MP2")
255 mp2energy = float(line.split()[1])
256 self.mpenergies[-1].append(utils.convertor(mp2energy, "hartree", "eV"))
257 # MP2 before higher order calculations
258 if line.split()[0] == "E(MP2)":
259 mp2energy = float(line.split()[2])
260 self.mpenergies[-1].append(utils.convertor(mp2energy, "hartree", "eV"))
261 if line.split()[0] == "E(MP3)":
262 self.metadata["methods"].append("MP3")
263 mp3energy = float(line.split()[2])
264 self.mpenergies[-1].append(utils.convertor(mp3energy, "hartree", "eV"))
265 if line.split()[0] in ["E(MP4-SDQ)", "E(MP4-SDTQ)"]:
266 self.metadata["methods"].append("MP4")
267 mp4energy = float(line.split()[2])
268 self.mpenergies[-1].append(utils.convertor(mp4energy, "hartree", "eV"))
269 line = next(inputfile)
271 # Total energies after Coupled Cluster calculations
272 # Only the highest Coupled Cluster level result is gathered
273 if line[12:23] == "CCD ENERGY:":
274 self.metadata["methods"].append("CCD")
275 if not hasattr(self, "ccenergies"):
276 self.ccenergies = []
277 ccenergy = float(line.split()[2])
278 self.ccenergies.append(utils.convertor(ccenergy, "hartree", "eV"))
279 if line.find("CCSD") >= 0 and line.split()[0:2] == ["CCSD", "ENERGY:"]:
280 self.metadata["methods"].append("CCSD")
281 if not hasattr(self, "ccenergies"):
282 self.ccenergies = []
283 ccenergy = float(line.split()[2])
284 line = next(inputfile)
285 if line[8:23] == "CCSD[T] ENERGY:":
286 self.metadata["methods"].append("CCSD[T]")
287 ccenergy = float(line.split()[2])
288 line = next(inputfile)
289 if line[8:23] == "CCSD(T) ENERGY:":
290 self.metadata["methods"].append("CCSD(T)")
291 ccenergy = float(line.split()[2])
292 self.ccenergies.append(utils.convertor(ccenergy, "hartree", "eV"))
294 if "T1 DIAGNOSTIC" in line:
295 self.metadata["t1_diagnostic"] = float(line.split()[3])
297 # Also collect MP2 energies, which are always calculated before CC
298 if line[8:23] == "MBPT(2) ENERGY:":
299 if not hasattr(self, "mpenergies"):
300 self.mpenergies = []
301 self.mpenergies.append([])
302 mp2energy = float(line.split()[2])
303 self.mpenergies[-1].append(utils.convertor(mp2energy, "hartree", "eV"))
305 # Extract charge and multiplicity
306 if line[1:19] == "CHARGE OF MOLECULE":
308 charge = int(round(float(line.split()[-1])))
309 self.set_attribute('charge', charge)
311 line = next(inputfile)
312 mult = int(line.split()[-1])
313 self.set_attribute('mult', mult)
315 # Electronic transitions (etenergies) for CIS runs and TD-DFT, which
316 # have very similar outputs. The outputs EOM look very differentm, though.
317 #
318 # ---------------------------------------------------------------------
319 # CI-SINGLES EXCITATION ENERGIES
320 # STATE HARTREE EV KCAL/MOL CM-1 NM
321 # ---------------------------------------------------------------------
322 # 1A'' 0.1677341781 4.5643 105.2548 36813.40 271.64
323 # ...
324 if re.match("(CI-SINGLES|TDDFT) EXCITATION ENERGIES", line.strip()):
326 if not hasattr(self, "etenergies"):
327 self.etenergies = []
329 get_etosc = False
330 header = next(inputfile).rstrip()
331 if header.endswith("OSC. STR."):
332 # water_cis_dets.out does not have the oscillator strength
333 # in this table...it is extracted from a different section below
334 get_etosc = True
335 self.etoscs = []
337 self.skip_line(inputfile, 'dashes')
339 line = next(inputfile)
340 broken = line.split()
341 while len(broken) > 0:
343 # Take hartree value with more numbers, and convert.
344 # Note that the values listed after this are also less exact!
345 etenergy = float(broken[1])
346 self.etenergies.append(utils.convertor(etenergy, "hartree", "wavenumber"))
347 if get_etosc:
348 etosc = float(broken[-1])
349 self.etoscs.append(etosc)
350 broken = next(inputfile).split()
352 # Detect the CI hamiltonian type, if applicable.
353 # Should always be detected if CIS is done.
354 if line[8:64] == "RESULTS FROM SPIN-ADAPTED ANTISYMMETRIZED PRODUCT (SAPS)":
355 self.cihamtyp = "saps"
356 if line[8:64] == "RESULTS FROM DETERMINANT BASED ATOMIC ORBITAL CI-SINGLES":
357 self.cihamtyp = "dets"
359 # etsecs (used only for CIS runs for now)
360 if line[1:14] == "EXCITED STATE":
361 if not hasattr(self, 'etsecs'):
362 self.etsecs = []
363 if not hasattr(self, 'etsyms'):
364 self.etsyms = []
365 statenumber = int(line.split()[2])
366 spin = int(float(line.split()[7]))
367 if spin == 0:
368 sym = "Singlet"
369 if spin == 1:
370 sym = "Triplet"
371 sym += '-' + line.split()[-1]
372 self.etsyms.append(sym)
373 # skip 5 lines
374 for i in range(5):
375 line = next(inputfile)
376 line = next(inputfile)
377 CIScontribs = []
378 while line.strip()[0] != "-":
379 MOtype = 0
380 # alpha/beta are specified for hamtyp=dets
381 if self.cihamtyp == "dets":
382 if line.split()[0] == "BETA":
383 MOtype = 1
384 fromMO = int(line.split()[-3])-1
385 toMO = int(line.split()[-2])-1
386 coeff = float(line.split()[-1])
387 # With the SAPS hamiltonian, the coefficients are multiplied
388 # by sqrt(2) so that they normalize to 1.
389 # With DETS, both alpha and beta excitations are printed.
390 # if self.cihamtyp == "saps":
391 # coeff /= numpy.sqrt(2.0)
392 CIScontribs.append([(fromMO, MOtype), (toMO, MOtype), coeff])
393 line = next(inputfile)
394 self.etsecs.append(CIScontribs)
396 # etoscs (used only for CIS runs now)
397 if line[1:50] == "TRANSITION FROM THE GROUND STATE TO EXCITED STATE":
398 if not hasattr(self, "etoscs"):
399 self.etoscs = []
401 # This was the suggested as a fix in issue #61, and it does allow
402 # the parser to finish without crashing. However, it seems that
403 # etoscs is shorter in this case than the other transition attributes,
404 # so that should be somehow corrected and tested for.
405 if "OPTICALLY" in line:
406 pass
407 else:
408 statenumber = int(line.split()[-1])
409 # skip 7 lines
410 for i in range(8):
411 line = next(inputfile)
412 strength = float(line.split()[3])
413 self.etoscs.append(strength)
415 # TD-DFT for GAMESS-US.
416 # The format for excitations has changed a bit between 2007 and 2012.
417 # Original format parser was written for:
418 #
419 # -------------------
420 # TRIPLET EXCITATIONS
421 # -------------------
422 #
423 # STATE # 1 ENERGY = 3.027228 EV
424 # OSCILLATOR STRENGTH = 0.000000
425 # DRF COEF OCC VIR
426 # --- ---- --- ---
427 # 35 -1.105383 35 -> 36
428 # 69 -0.389181 34 -> 37
429 # 103 -0.405078 33 -> 38
430 # 137 0.252485 32 -> 39
431 # 168 -0.158406 28 -> 40
432 #
433 # STATE # 2 ENERGY = 4.227763 EV
434 # ...
435 #
436 # Here is the corresponding 2012 version:
437 #
438 # -------------------
439 # TRIPLET EXCITATIONS
440 # -------------------
441 #
442 # STATE # 1 ENERGY = 3.027297 EV
443 # OSCILLATOR STRENGTH = 0.000000
444 # LAMBDA DIAGNOSTIC = 0.925 (RYDBERG/CHARGE TRANSFER CHARACTER)
445 # SYMMETRY OF STATE = A
446 # EXCITATION DE-EXCITATION
447 # OCC VIR AMPLITUDE AMPLITUDE
448 # I A X(I->A) Y(A->I)
449 # --- --- -------- --------
450 # 35 36 -0.929190 -0.176167
451 # 34 37 -0.279823 -0.109414
452 # ...
453 #
454 # We discern these two by the presence of the arrow in the old version.
455 #
456 # The "LET EXCITATIONS" pattern used below catches both
457 # singlet and triplet excitations output.
458 if line[14:29] == "LET EXCITATIONS":
460 self.etenergies = []
461 self.etoscs = []
462 self.etsecs = []
463 etsyms = []
465 self.skip_lines(inputfile, ['d', 'b'])
467 # Loop while states are still being printed.
468 line = next(inputfile)
469 while line[1:6] == "STATE":
471 self.updateprogress(inputfile, "Excited States")
473 etenergy = utils.convertor(float(line.split()[-2]), "eV", "wavenumber")
474 etoscs = float(next(inputfile).split()[-1])
475 self.etenergies.append(etenergy)
476 self.etoscs.append(etoscs)
478 # Symmetry is not always present, especially in old versions.
479 # Newer versions, on the other hand, can also provide a line
480 # with lambda diagnostic and some extra headers.
481 line = next(inputfile)
482 if "LAMBDA DIAGNOSTIC" in line:
483 line = next(inputfile)
484 if "SYMMETRY" in line:
485 etsyms.append(line.split()[-1])
486 line = next(inputfile)
487 if "EXCITATION" in line and "DE-EXCITATION" in line:
488 line = next(inputfile)
489 if line.count("AMPLITUDE") == 2:
490 line = next(inputfile)
492 self.skip_line(inputfile, 'dashes')
494 CIScontribs = []
495 line = next(inputfile)
496 while line.strip():
497 cols = line.split()
498 if "->" in line:
499 i_occ_vir = [2, 4]
500 i_coeff = 1
502 else:
503 i_occ_vir = [0, 1]
504 i_coeff = 2
505 fromMO, toMO = [int(cols[i]) - 1 for i in i_occ_vir]
506 coeff = float(cols[i_coeff])
507 CIScontribs.append([(fromMO, 0), (toMO, 0), coeff])
508 line = next(inputfile)
509 self.etsecs.append(CIScontribs)
510 line = next(inputfile)
512 # The symmetries are not always present.
513 if etsyms:
514 self.etsyms = etsyms
516 # Maximum and RMS gradients.
517 if "MAXIMUM GRADIENT" in line or "RMS GRADIENT" in line:
519 parts = line.split()
521 # Avoid parsing the following...
523 ## YOU SHOULD RESTART "OPTIMIZE" RUNS WITH THE COORDINATES
524 ## WHOSE ENERGY IS LOWEST. RESTART "SADPOINT" RUNS WITH THE
525 ## COORDINATES WHOSE RMS GRADIENT IS SMALLEST. THESE ARE NOT
526 ## ALWAYS THE LAST POINT COMPUTED!
528 if parts[0] not in ["MAXIMUM", "RMS", "(1)"]:
529 return
531 if not hasattr(self, "geovalues"):
532 self.geovalues = []
534 # Newer versions (around 2006) have both maximum and RMS on one line:
535 # MAXIMUM GRADIENT = 0.0531540 RMS GRADIENT = 0.0189223
536 if len(parts) == 8:
537 maximum = float(parts[3])
538 rms = float(parts[7])
540 # In older versions of GAMESS, this spanned two lines, like this:
541 # MAXIMUM GRADIENT = 0.057578167
542 # RMS GRADIENT = 0.027589766
543 if len(parts) == 4:
544 maximum = float(parts[3])
545 line = next(inputfile)
546 parts = line.split()
547 rms = float(parts[3])
549 # FMO also prints two final one- and two-body gradients (see exam37):
550 # (1) MAXIMUM GRADIENT = 0.0531540 RMS GRADIENT = 0.0189223
551 if len(parts) == 9:
552 maximum = float(parts[4])
553 rms = float(parts[8])
555 self.geovalues.append([maximum, rms])
557 # This is the input orientation, which is the only data available for
558 # SP calcs, but which should be overwritten by the standard orientation
559 # values, which is the only information available for all geoopt cycles.
560 if line[11:50] == "ATOMIC COORDINATES":
562 if not hasattr(self, "atomcoords"):
563 self.atomcoords = []
565 line = next(inputfile)
566 atomcoords = []
567 atomnos = []
568 line = next(inputfile)
569 while line.strip():
570 temp = line.strip().split()
571 atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom") for x in temp[2:5]])
572 atomnos.append(int(round(float(temp[1])))) # Don't use the atom name as this is arbitary
573 line = next(inputfile)
575 self.set_attribute('atomnos', atomnos)
576 self.atomcoords.append(atomcoords)
578 if line[12:40] == "EQUILIBRIUM GEOMETRY LOCATED":
579 # Prevent extraction of the final geometry twice
580 if not hasattr(self, 'optdone'):
581 self.optdone = []
582 self.optdone.append(len(self.geovalues) - 1)
584 # Make sure we always have optdone for geomtry optimization, even if not converged.
585 if "GEOMETRY SEARCH IS NOT CONVERGED" in line:
586 if not hasattr(self, 'optdone'):
587 self.optdone = []
589 # This is the standard orientation, which is the only coordinate
590 # information available for all geometry optimisation cycles.
591 # The input orientation will be overwritten if this is a geometry optimisation
592 # We assume that a previous Input Orientation has been found and
593 # used to extract the atomnos
594 if line[1:29] == "COORDINATES OF ALL ATOMS ARE" and (not hasattr(self, "optdone") or self.optdone == []):
596 self.updateprogress(inputfile, "Coordinates")
598 if self.firststdorient:
599 self.firststdorient = False
600 # Wipes out the single input coordinate at the start of the file
601 self.atomcoords = []
603 self.skip_lines(inputfile, ['line', '-'])
605 atomcoords = []
606 line = next(inputfile)
608 for i in range(self.natom):
609 temp = line.strip().split()
610 atomcoords.append(list(map(float, temp[2:5])))
611 line = next(inputfile)
612 self.atomcoords.append(atomcoords)
614 # Section with SCF information.
615 #
616 # The space at the start of the search string is to differentiate from MCSCF.
617 # Everything before the search string is stored as the type of SCF.
618 # SCF types may include: BLYP, RHF, ROHF, UHF, etc.
619 #
620 # For example, in exam17 the section looks like this (note that this is GVB):
621 # ------------------------
622 # ROHF-GVB SCF CALCULATION
623 # ------------------------
624 # GVB STEP WILL USE 119875 WORDS OF MEMORY.
625 #
626 # MAXIT= 30 NPUNCH= 2 SQCDF TOL=1.0000E-05
627 # NUCLEAR ENERGY= 6.1597411978
628 # EXTRAP=T DAMP=F SHIFT=F RSTRCT=F DIIS=F SOSCF=F
629 #
630 # ITER EX TOTAL ENERGY E CHANGE SQCDF DIIS ERROR
631 # 0 0 -38.298939963 -38.298939963 0.131784454 0.000000000
632 # 1 1 -38.332044339 -0.033104376 0.026019716 0.000000000
633 # ... and will be terminated by a blank line.
634 if line.rstrip()[-16:] == " SCF CALCULATION":
636 # Remember the type of SCF.
637 self.scftype = line.strip()[:-16]
639 self.skip_line(inputfile, 'dashes')
641 while line[:5] != " ITER":
643 self.updateprogress(inputfile, "Attributes")
645 # GVB uses SQCDF for checking convergence (for example in exam17).
646 if "GVB" in self.scftype and "SQCDF TOL=" in line:
647 scftarget = float(line.split("=")[-1])
649 # Normally, however, the density is used as the convergence criterium.
650 # Deal with various versions:
651 # (GAMESS VERSION = 12 DEC 2003)
652 # DENSITY MATRIX CONV= 2.00E-05 DFT GRID SWITCH THRESHOLD= 3.00E-04
653 # (GAMESS VERSION = 22 FEB 2006)
654 # DENSITY MATRIX CONV= 1.00E-05
655 # (PC GAMESS version 6.2, Not DFT?)
656 # DENSITY CONV= 1.00E-05
657 elif "DENSITY CONV" in line or "DENSITY MATRIX CONV" in line:
658 scftarget = float(line.split()[-1])
660 line = next(inputfile)
662 if not hasattr(self, "scftargets"):
663 self.scftargets = []
665 self.scftargets.append([scftarget])
667 if not hasattr(self, "scfvalues"):
668 self.scfvalues = []
670 # Normally the iterations print in 6 columns.
671 # For ROHF, however, it is 5 columns, thus this extra parameter.
672 if "ROHF" in self.scftype:
673 self.scf_valcol = 4
674 else:
675 self.scf_valcol = 5
677 line = next(inputfile)
679 # SCF iterations are terminated by a blank line.
680 # The first four characters usually contains the step number.
681 # However, lines can also contain messages, including:
682 # * * * INITIATING DIIS PROCEDURE * * *
683 # CONVERGED TO SWOFF, SO DFT CALCULATION IS NOW SWITCHED ON
684 # DFT CODE IS SWITCHING BACK TO THE FINER GRID
685 values = []
686 while line.strip():
687 try:
688 temp = int(line[0:4])
689 except ValueError:
690 pass
691 else:
692 # if there were 100 iterations or more, the first part of the line
693 # will look like 10099, 101100, 102101, etc., with no spaces between
694 # the numbers. We can check if this is the case by seeing if the first
695 # number on the line exceeds 10000.
696 split_line = [line[0:4], line[4:7]] + line[7:].split()
697 values.append([float(split_line[self.scf_valcol])])
698 try:
699 line = next(inputfile)
700 except StopIteration:
701 self.logger.warning('File terminated before end of last SCF!')
702 break
703 self.scfvalues.append(values)
706 # Sometimes, only the first SCF cycle has the banner parsed for above,
707 # so we must identify them from the header before the SCF iterations.
708 # The example we have for this is the GeoOpt unittest for Firefly8.
709 if line[1:8] == "ITER EX":
711 # In this case, the convergence targets are not printed, so we assume
712 # they do not change.
713 self.scftargets.append(self.scftargets[-1])
715 values = []
716 line = next(inputfile)
717 while line.strip():
718 try:
719 temp = int(line[0:4])
720 except ValueError:
721 pass
722 else:
723 values.append([float(line.split()[self.scf_valcol])])
724 line = next(inputfile)
725 self.scfvalues.append(values)
727 # Extract normal coordinate analysis, including vibrational frequencies (vibfreq),
728 # IT intensities (vibirs) and displacements (vibdisps).
729 #
730 # This section typically looks like the following in GAMESS-US:
731 #
732 # MODES 1 TO 6 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.
733 #
734 # FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2,
735 # REDUCED MASSES IN AMU.
736 #
737 # 1 2 3 4 5
738 # FREQUENCY: 52.49 41.45 17.61 9.23 10.61
739 # REDUCED MASS: 3.92418 3.77048 5.43419 6.44636 5.50693
740 # IR INTENSITY: 0.00013 0.00001 0.00004 0.00000 0.00003
741 #
742 # ...or in the case of a numerical Hessian job...
743 #
744 # MODES 1 TO 5 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.
745 #
746 # FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2,
747 # REDUCED MASSES IN AMU.
748 #
749 # 1 2 3 4 5
750 # FREQUENCY: 0.05 0.03 0.03 30.89 30.94
751 # REDUCED MASS: 8.50125 8.50137 8.50136 1.06709 1.06709
752 #
753 # ...whereas PC-GAMESS has...
754 #
755 # MODES 1 TO 6 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.
756 #
757 # FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2
758 #
759 # 1 2 3 4 5
760 # FREQUENCY: 5.89 1.46 0.01 0.01 0.01
761 # IR INTENSITY: 0.00000 0.00000 0.00000 0.00000 0.00000
762 #
763 # If Raman is present we have (for PC-GAMESS)...
764 #
765 # MODES 1 TO 6 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.
766 #
767 # FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2
768 # RAMAN ACTIVITIES IN ANGSTROM**4/AMU, DEPOLARIZATIONS ARE DIMENSIONLESS
769 #
770 # 1 2 3 4 5
771 # FREQUENCY: 5.89 1.46 0.04 0.03 0.01
772 # IR INTENSITY: 0.00000 0.00000 0.00000 0.00000 0.00000
773 # RAMAN ACTIVITY: 12.675 1.828 0.000 0.000 0.000
774 # DEPOLARIZATION: 0.750 0.750 0.124 0.009 0.750
775 #
776 # If GAMESS-US or PC-GAMESS has not reached the stationary point we have
777 # and additional warning, repeated twice, like so (see n_water.log for an example):
778 #
779 # *******************************************************
780 # * THIS IS NOT A STATIONARY POINT ON THE MOLECULAR PES *
781 # * THE VIBRATIONAL ANALYSIS IS NOT VALID !!! *
782 # *******************************************************
783 #
784 # There can also be additional warnings about the selection of modes, for example:
785 #
786 # * * * WARNING, MODE 6 HAS BEEN CHOSEN AS A VIBRATION
787 # WHILE MODE12 IS ASSUMED TO BE A TRANSLATION/ROTATION.
788 # PLEASE VERIFY THE PROGRAM'S DECISION MANUALLY!
789 #
790 if "NORMAL COORDINATE ANALYSIS IN THE HARMONIC APPROXIMATION" in line:
792 self.vibfreqs = []
793 self.vibirs = []
794 self.vibdisps = []
796 # Need to get to the modes line, which is often preceeded by
797 # a list of atomic weights and some possible warnings.
798 # Pass the warnings to the logger if they are there.
799 while not "MODES" in line:
800 self.updateprogress(inputfile, "Frequency Information")
802 line = next(inputfile)
804 # Typical Atomic Masses section printed in GAMESS
805 # ATOMIC WEIGHTS (AMU)
806 #
807 # 1 O 15.99491
808 # 2 H 1.00782
809 # 3 H 1.00782
810 if "ATOMIC WEIGHTS" in line:
811 atommasses = []
812 self.skip_line(inputfile,['b'])
813 # There is a blank line after ATOMIC WEIGHTS
814 line = next(inputfile)
815 while line.strip():
816 temp = line.strip().split()
817 atommasses.append(float(temp[2]))
818 line = next(inputfile)
819 self.set_attribute('atommasses', atommasses)
821 if "THIS IS NOT A STATIONARY POINT" in line:
822 msg = "\n This is not a stationary point on the molecular PES"
823 msg += "\n The vibrational analysis is not valid!!!"
824 self.logger.warning(msg)
825 if "* * * WARNING, MODE" in line:
826 line1 = line.strip()
827 line2 = next(inputfile).strip()
828 line3 = next(inputfile).strip()
829 self.logger.warning("\n " + "\n ".join((line1, line2, line3)))
831 # In at least one case (regression zolm_dft3a.log) for older version of GAMESS-US,
832 # the header concerning the range of nodes is formatted wrong and can look like so:
833 # MODES 9 TO14 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.
834 # ... although it's unclear whether this happens for all two-digit values.
835 startrot = int(line.split()[1])
836 if len(line.split()[2]) == 2:
837 endrot = int(line.split()[3])
838 else:
839 endrot = int(line.split()[2][2:])
841 self.skip_line(inputfile, 'blank')
843 # Continue down to the first frequencies
844 line = next(inputfile)
845 # With GAMESS-US 20 APR 2017 (R1), there are 28 blank spaces,
846 # in earlier versions there used to be 26.
847 while not line.strip() or not re.search(' {26,}1', line) is not None:
848 line = next(inputfile)
850 while not "SAYVETZ" in line:
851 self.updateprogress(inputfile, "Frequency Information")
853 # Note: there may be imaginary frequencies like this (which we make negative):
854 # FREQUENCY: 825.18 I 111.53 12.62 10.70 0.89
855 #
856 # A note for debuggers: some of these frequencies will be removed later,
857 # assumed to be translations or rotations (see startrot/endrot above).
858 for col in next(inputfile).split()[1:]:
859 if col == "I":
860 self.vibfreqs[-1] *= -1
861 else:
862 self.vibfreqs.append(float(col))
864 line = next(inputfile)
866 # Skip the symmetry (appears in newer versions), fixes bug #3476063.
867 if line.find("SYMMETRY") >= 0:
868 line = next(inputfile)
870 # Skip the reduced mass (not always present).
871 if line.find("REDUCED") >= 0:
872 if not hasattr(self, "vibrmasses"):
873 self.vibrmasses = []
874 self.vibrmasses.extend(list(map(float, line.strip().split()[2:])))
875 line = next(inputfile)
877 # Not present in numerical Hessian calculations.
878 if line.find("IR INTENSITY") >= 0:
879 irIntensity = map(float, line.strip().split()[2:])
880 self.vibirs.extend([utils.convertor(x, "Debye^2/amu-Angstrom^2", "km/mol") for x in irIntensity])
881 line = next(inputfile)
883 # Read in Raman vibrational intensities if present.
884 if line.find("RAMAN") >= 0:
885 if not hasattr(self, "vibramans"):
886 self.vibramans = []
887 ramanIntensity = line.strip().split()
888 self.vibramans.extend(list(map(float, ramanIntensity[2:])))
889 depolar = next(inputfile)
890 line = next(inputfile)
892 # This line seems always to be blank.
893 assert line.strip() == ''
895 # Extract the Cartesian displacement vectors.
896 p = [[], [], [], [], []]
897 for j in range(self.natom):
898 q = [[], [], [], [], []]
899 for coord in "xyz":
900 line = next(inputfile)[21:]
901 cols = list(map(float, line.split()))
902 for i, val in enumerate(cols):
903 q[i].append(val)
904 for k in range(len(cols)):
905 p[k].append(q[k])
906 self.vibdisps.extend(p[:len(cols)])
908 # Skip the Sayvetz stuff at the end.
909 for j in range(10):
910 line = next(inputfile)
912 self.skip_line(inputfile, 'blank')
913 line = next(inputfile)
915 # Exclude rotations and translations.
916 self.vibfreqs = numpy.array(self.vibfreqs[:startrot-1]+self.vibfreqs[endrot:], "d")
917 self.vibirs = numpy.array(self.vibirs[:startrot-1]+self.vibirs[endrot:], "d")
918 self.vibdisps = numpy.array(self.vibdisps[:startrot-1]+self.vibdisps[endrot:], "d")
919 if hasattr(self, "vibrmasses"):
920 self.vibrmasses = numpy.array(self.vibrmasses[:startrot-1]+self.vibrmasses[endrot:], "d")
921 if hasattr(self, "vibramans"):
922 self.vibramans = numpy.array(self.vibramans[:startrot-1]+self.vibramans[endrot:], "d")
924 if line[5:21] == "ATOMIC BASIS SET":
925 self.gbasis = []
926 line = next(inputfile)
927 while line.find("SHELL") < 0:
928 line = next(inputfile)
930 self.skip_lines(inputfile, ['blank', 'atomname'])
932 # shellcounter stores the shell no of the last shell
933 # in the previous set of primitives
934 shellcounter = 1
935 while line.find("TOTAL NUMBER") < 0:
937 self.skip_line(inputfile, 'blank')
939 line = next(inputfile)
940 shellno = int(line.split()[0])
941 shellgap = shellno - shellcounter
942 gbasis = [] # Stores basis sets on one atom
943 shellsize = 0
944 while len(line.split()) != 1 and line.find("TOTAL NUMBER") < 0:
945 shellsize += 1
946 coeff = {}
947 # coefficients and symmetries for a block of rows
948 while line.strip():
949 temp = line.strip().split()
950 sym = temp[1]
951 assert sym in ['S', 'P', 'D', 'F', 'G', 'L']
952 if sym == "L": # L refers to SP
953 if len(temp) == 6: # GAMESS US
954 coeff.setdefault("S", []).append((float(temp[3]), float(temp[4])))
955 coeff.setdefault("P", []).append((float(temp[3]), float(temp[5])))
956 else: # PC GAMESS
957 assert temp[6][-1] == temp[9][-1] == ')'
958 coeff.setdefault("S", []).append((float(temp[3]), float(temp[6][:-1])))
959 coeff.setdefault("P", []).append((float(temp[3]), float(temp[9][:-1])))
960 else:
961 if len(temp) == 5: # GAMESS US
962 coeff.setdefault(sym, []).append((float(temp[3]), float(temp[4])))
963 else: # PC GAMESS
964 assert temp[6][-1] == ')'
965 coeff.setdefault(sym, []).append((float(temp[3]), float(temp[6][:-1])))
966 line = next(inputfile)
967 # either a blank or a continuation of the block
968 if sym == "L":
969 gbasis.append(('S', coeff['S']))
970 gbasis.append(('P', coeff['P']))
971 else:
972 gbasis.append((sym, coeff[sym]))
973 line = next(inputfile)
974 # either the start of the next block or the start of a new atom or
975 # the end of the basis function section
977 numtoadd = 1 + (shellgap // shellsize)
978 shellcounter = shellno + shellsize
979 for x in range(numtoadd):
980 self.gbasis.append(gbasis)
982 # The eigenvectors, which also include MO energies and symmetries, follow
983 # the *final* report of evalues and the last list of symmetries in the log file:
984 #
985 # ------------
986 # EIGENVECTORS
987 # ------------
988 #
989 # 1 2 3 4 5
990 # -10.0162 -10.0161 -10.0039 -10.0039 -10.0029
991 # BU AG BU AG AG
992 # 1 C 1 S 0.699293 0.699290 -0.027566 0.027799 0.002412
993 # 2 C 1 S 0.031569 0.031361 0.004097 -0.004054 -0.000605
994 # 3 C 1 X 0.000908 0.000632 -0.004163 0.004132 0.000619
995 # 4 C 1 Y -0.000019 0.000033 0.000668 -0.000651 0.005256
996 # 5 C 1 Z 0.000000 0.000000 0.000000 0.000000 0.000000
997 # 6 C 2 S -0.699293 0.699290 0.027566 0.027799 0.002412
998 # 7 C 2 S -0.031569 0.031361 -0.004097 -0.004054 -0.000605
999 # 8 C 2 X 0.000908 -0.000632 -0.004163 -0.004132 -0.000619
1000 # 9 C 2 Y -0.000019 -0.000033 0.000668 0.000651 -0.005256
1001 # 10 C 2 Z 0.000000 0.000000 0.000000 0.000000 0.000000
1002 # 11 C 3 S -0.018967 -0.019439 0.011799 -0.014884 -0.452328
1003 # 12 C 3 S -0.007748 -0.006932 0.000680 -0.000695 -0.024917
1004 # 13 C 3 X 0.002628 0.002997 0.000018 0.000061 -0.003608
1005 # ...
1006 #
1007 # There are blanks lines between each block.
1008 #
1009 # Warning! There are subtle differences between GAMESS-US and PC-GAMES
1010 # in the formatting of the first four columns. In particular, for F orbitals,
1011 # PC GAMESS:
1012 # 19 C 1 YZ 0.000000 0.000000 0.000000 0.000000 0.000000
1013 # 20 C XXX 0.000000 0.000000 0.000000 0.000000 0.002249
1014 # 21 C YYY 0.000000 0.000000 -0.025555 0.000000 0.000000
1015 # 22 C ZZZ 0.000000 0.000000 0.000000 0.002249 0.000000
1016 # 23 C XXY 0.000000 0.000000 0.001343 0.000000 0.000000
1017 # GAMESS US
1018 # 55 C 1 XYZ 0.000000 0.000000 0.000000 0.000000 0.000000
1019 # 56 C 1XXXX -0.000014 -0.000067 0.000000 0.000000 0.000000
1020 #
1021 if line.find("EIGENVECTORS") == 10 or line.find("MOLECULAR ORBITALS") == 10:
1023 # This is the stuff that we can read from these blocks.
1024 self.moenergies = [[]]
1025 self.mosyms = [[]]
1027 if not hasattr(self, "nmo"):
1028 self.nmo = self.nbasis
1030 self.mocoeffs = [numpy.zeros((self.nmo, self.nbasis), "d")]
1032 readatombasis = False
1033 if not hasattr(self, "atombasis"):
1034 self.atombasis = []
1035 self.aonames = []
1036 for i in range(self.natom):
1037 self.atombasis.append([])
1038 self.aonames = []
1039 readatombasis = True
1041 self.skip_line(inputfile, 'dashes')
1043 for base in range(0, self.nmo, 5):
1044 self.updateprogress(inputfile, "Coefficients")
1046 line = next(inputfile)
1048 # This makes sure that this section does not end prematurely,
1049 # which happens in regression 2CO.ccsd.aug-cc-pVDZ.out.
1050 if line.strip() != "":
1051 break
1053 numbers = next(inputfile) # Eigenvector numbers.
1055 # This is for regression CdtetraM1B3LYP.
1056 if "ALPHA SET" in numbers:
1057 blank = next(inputfile)
1058 numbers = next(inputfile)
1060 # If not all coefficients are printed, the logfile will go right to
1061 # the beta section if there is one, so break out in that case.
1062 if "BETA SET" in numbers:
1063 line = numbers
1064 break
1066 # Sometimes there are some blank lines here.
1067 while not line.strip():
1068 line = next(inputfile)
1070 # Geometry optimizations don't have END OF RHF/DFT
1071 # CALCULATION, they head right into the next section.
1072 if "--------" in line:
1073 break
1075 # Eigenvalues for these orbitals (in hartrees).
1076 try:
1077 self.moenergies[0].extend([utils.convertor(float(x), "hartree", "eV") for x in line.split()])
1078 except:
1079 self.logger.warning('MO section found but could not be parsed!')
1080 break
1082 # Orbital symmetries.
1083 line = next(inputfile)
1084 if line.strip():
1085 self.mosyms[0].extend(list(map(self.normalisesym, line.split())))
1087 # Now we have nbasis lines. We will use the same method as in normalise_aonames() before.
1088 p = re.compile(r"(\d+)\s*([A-Z][A-Z]?)\s*(\d+)\s*([A-Z]+)")
1089 oldatom = '0'
1090 i_atom = 0 # counter to keep track of n_atoms > 99
1091 flag_w = True # flag necessary to keep from adding 100's at wrong time
1093 for i in range(self.nbasis):
1094 line = next(inputfile)
1096 # If line is empty, break (ex. for FMO in exam37 which is a regression).
1097 if not line.strip():
1098 break
1100 # Fill atombasis and aonames only first time around
1101 if readatombasis and base == 0:
1103 aonames = []
1104 start = line[:17].strip()
1105 m = p.search(start)
1107 if m:
1108 g = m.groups()
1109 g2 = int(g[2]) # atom index in GAMESS file; changes to 0 after 99
1111 # Check if we have moved to a hundred
1112 # if so, increment the counter and add it to the parsed value
1113 # There will be subsequent 0's as that atoms AO's are parsed
1114 # so wait until the next atom is parsed before resetting flag
1115 if g2 == 0 and flag_w:
1116 i_atom = i_atom + 100
1117 flag_w = False # handle subsequent AO's
1118 if g2 != 0:
1119 flag_w = True # reset flag
1120 g2 = g2 + i_atom
1122 aoname = "%s%i_%s" % (g[1].capitalize(), g2, g[3])
1123 oldatom = str(g2)
1124 atomno = g2-1
1125 orbno = int(g[0])-1
1126 else: # For F orbitals, as shown above
1127 g = [x.strip() for x in line.split()]
1128 aoname = "%s%s_%s" % (g[1].capitalize(), oldatom, g[2])
1129 atomno = int(oldatom)-1
1130 orbno = int(g[0])-1
1132 self.atombasis[atomno].append(orbno)
1133 self.aonames.append(aoname)
1135 coeffs = line[15:] # Strip off the crud at the start.
1136 j = 0
1137 while j*11+4 < len(coeffs):
1138 self.mocoeffs[0][base+j, i] = float(coeffs[j * 11:(j + 1) * 11])
1139 j += 1
1141 # If it's a restricted calc and no more properties, we have:
1142 #
1143 # ...... END OF RHF/DFT CALCULATION ......
1144 #
1145 # If there are more properties (such as the density matrix):
1146 # --------------
1147 #
1148 # If it's an unrestricted calculation, however, we now get the beta orbitals:
1149 #
1150 # ----- BETA SET -----
1151 #
1152 # ------------
1153 # EIGENVECTORS
1154 # ------------
1155 #
1156 # 1 2 3 4 5
1157 # ...
1158 #
1159 if "BETA SET" not in line:
1160 line = next(inputfile)
1161 line = next(inputfile)
1163 # This can come in between the alpha and beta orbitals (see #130).
1164 if line.strip() == "LZ VALUE ANALYSIS FOR THE MOS":
1165 while line.strip():
1166 line = next(inputfile)
1167 line = next(inputfile)
1169 # Covers label with both dashes and stars (like regression CdtetraM1B3LYP).
1170 if "BETA SET" in line:
1171 self.mocoeffs.append(numpy.zeros((self.nmo, self.nbasis), "d"))
1172 self.moenergies.append([])
1173 self.mosyms.append([])
1174 blank = next(inputfile)
1175 line = next(inputfile)
1177 # Sometimes EIGENVECTORS is missing, so rely on dashes to signal it.
1178 if set(line.strip()) == {'-'}:
1179 self.skip_lines(inputfile, ['EIGENVECTORS', 'd', 'b'])
1180 line = next(inputfile)
1182 for base in range(0, self.nmo, 5):
1183 self.updateprogress(inputfile, "Coefficients")
1184 if base != 0:
1185 line = next(inputfile)
1186 line = next(inputfile)
1187 line = next(inputfile)
1188 if "properties" in line.lower():
1189 break
1190 self.moenergies[1].extend([utils.convertor(float(x), "hartree", "eV") for x in line.split()])
1191 line = next(inputfile)
1192 self.mosyms[1].extend(list(map(self.normalisesym, line.split())))
1193 for i in range(self.nbasis):
1194 line = next(inputfile)
1195 temp = line[15:] # Strip off the crud at the start
1196 j = 0
1197 while j * 11 + 4 < len(temp):
1198 self.mocoeffs[1][base+j, i] = float(temp[j * 11:(j + 1) * 11])
1199 j += 1
1200 line = next(inputfile)
1201 self.moenergies = [numpy.array(x, "d") for x in self.moenergies]
1203 # Natural orbital coefficients and occupation numbers, presently supported only
1204 # for CIS calculations. Looks the same as eigenvectors, without symmetry labels.
1205 #
1206 # --------------------
1207 # CIS NATURAL ORBITALS
1208 # --------------------
1209 #
1210 # 1 2 3 4 5
1211 #
1212 # 2.0158 2.0036 2.0000 2.0000 1.0000
1213 #
1214 # 1 O 1 S 0.000000 -0.157316 0.999428 0.164938 0.000000
1215 # 2 O 1 S 0.000000 0.754402 0.004472 -0.581970 0.000000
1216 # ...
1217 #
1218 if line[10:30] == "CIS NATURAL ORBITALS":
1220 self.nocoeffs = numpy.zeros((self.nmo, self.nbasis), "d")
1221 self.nooccnos = []
1223 self.skip_line(inputfile, 'dashes')
1225 for base in range(0, self.nmo, 5):
1227 self.skip_lines(inputfile, ['blank', 'numbers'])
1229 # The eigenvalues that go along with these natural orbitals are
1230 # their occupation numbers. Sometimes there are blank lines before them.
1231 line = next(inputfile)
1232 while not line.strip():
1233 line = next(inputfile)
1234 eigenvalues = map(float, line.split())
1235 self.nooccnos.extend(eigenvalues)
1237 # Orbital symemtry labels are normally here for MO coefficients.
1238 line = next(inputfile)
1240 # Now we have nbasis lines with the coefficients.
1241 for i in range(self.nbasis):
1243 line = next(inputfile)
1244 coeffs = line[15:]
1245 j = 0
1246 while j*11+4 < len(coeffs):
1247 self.nocoeffs[base+j, i] = float(coeffs[j * 11:(j + 1) * 11])
1248 j += 1
1250 # We cannot trust this self.homos until we come to the phrase:
1251 # SYMMETRIES FOR INITAL GUESS ORBITALS FOLLOW
1252 # which either is followed by "ALPHA" or "BOTH" at which point we can say
1253 # for certain that it is an un/restricted calculations.
1254 # Note that MCSCF calcs also print this search string, so make sure
1255 # that self.homos does not exist yet.
1256 if line[1:28] == "NUMBER OF OCCUPIED ORBITALS" and not hasattr(self, 'homos'):
1258 homos = [int(line.split()[-1])-1]
1259 line = next(inputfile)
1260 homos.append(int(line.split()[-1])-1)
1262 self.set_attribute('homos', homos)
1264 if line.find("SYMMETRIES FOR INITIAL GUESS ORBITALS FOLLOW") >= 0:
1265 # Not unrestricted, so lop off the second index.
1266 # In case the search string above was not used (ex. FMO in exam38),
1267 # we can try to use the next line which should also contain the
1268 # number of occupied orbitals.
1269 if line.find("BOTH SET(S)") >= 0:
1270 nextline = next(inputfile)
1271 if "ORBITALS ARE OCCUPIED" in nextline:
1272 homos = int(nextline.split()[0])-1
1273 if hasattr(self, "homos"):
1274 try:
1275 assert self.homos[0] == homos
1276 except AssertionError:
1277 self.logger.warning("Number of occupied orbitals not consistent. This is normal for ECP and FMO jobs.")
1278 else:
1279 self.homos = [homos]
1280 self.homos = numpy.resize(self.homos, [1])
1282 # Set the total number of atoms, only once.
1283 # Normally GAMESS print TOTAL NUMBER OF ATOMS, however in some cases
1284 # this is slightly different (ex. lower case for FMO in exam37).
1285 if not hasattr(self, "natom") and "NUMBER OF ATOMS" in line.upper():
1286 natom = int(line.split()[-1])
1287 self.set_attribute('natom', natom)
1289 # The first is from Julien's Example and the second is from Alexander's
1290 # I think it happens if you use a polar basis function instead of a cartesian one
1291 if line.find("NUMBER OF CARTESIAN GAUSSIAN BASIS") == 1 or line.find("TOTAL NUMBER OF BASIS FUNCTIONS") == 1:
1292 nbasis = int(line.strip().split()[-1])
1293 self.set_attribute('nbasis', nbasis)
1295 elif line.find("TOTAL NUMBER OF CONTAMINANTS DROPPED") >= 0:
1296 nmos_dropped = int(line.split()[-1])
1297 if hasattr(self, "nmo"):
1298 self.set_attribute('nmo', self.nmo - nmos_dropped)
1299 else:
1300 self.set_attribute('nmo', self.nbasis - nmos_dropped)
1302 # Note that this line is present if ISPHER=1, e.g. for C_bigbasis
1303 elif line.find("SPHERICAL HARMONICS KEPT IN THE VARIATION SPACE") >= 0:
1304 nmo = int(line.strip().split()[-1])
1305 self.set_attribute('nmo', nmo)
1307 # Note that this line is not always present, so by default
1308 # NBsUse is set equal to NBasis (see below).
1309 elif line.find("TOTAL NUMBER OF MOS IN VARIATION SPACE") == 1:
1310 nmo = int(line.split()[-1])
1311 self.set_attribute('nmo', nmo)
1313 elif line.find("OVERLAP MATRIX") == 0 or line.find("OVERLAP MATRIX") == 1:
1314 # The first is for PC-GAMESS, the second for GAMESS
1315 # Read 1-electron overlap matrix
1316 if not hasattr(self, "aooverlaps"):
1317 self.aooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d")
1318 else:
1319 self.logger.info("Reading additional aooverlaps...")
1320 base = 0
1321 while base < self.nbasis:
1322 self.updateprogress(inputfile, "Overlap")
1324 self.skip_lines(inputfile, ['b', 'basis_fn_number', 'b'])
1326 for i in range(self.nbasis - base): # Fewer lines each time
1327 line = next(inputfile)
1328 temp = line.split()
1329 for j in range(4, len(temp)):
1330 self.aooverlaps[base+j-4, i+base] = float(temp[j])
1331 self.aooverlaps[i+base, base+j-4] = float(temp[j])
1332 base += 5
1334 # ECP Pseudopotential information
1335 if "ECP POTENTIALS" in line:
1336 if not hasattr(self, "coreelectrons"):
1337 self.coreelectrons = [0]*self.natom
1339 self.skip_lines(inputfile, ['d', 'b'])
1341 header = next(inputfile)
1342 while header.split()[0] == "PARAMETERS":
1343 name = header[17:25]
1344 atomnum = int(header[34:40])
1345 # The pseudopotnetial is given explicitely
1346 if header[40:50] == "WITH ZCORE":
1347 zcore = int(header[50:55])
1348 lmax = int(header[63:67])
1349 self.coreelectrons[atomnum-1] = zcore
1350 # The pseudopotnetial is copied from another atom
1351 if header[40:55] == "ARE THE SAME AS":
1352 atomcopy = int(header[60:])
1353 self.coreelectrons[atomnum-1] = self.coreelectrons[atomcopy-1]
1354 line = next(inputfile)
1355 while line.split() != []:
1356 line = next(inputfile)
1357 header = next(inputfile)
1359 # This was used before refactoring the parser, geotargets was set here after parsing.
1360 #if not hasattr(self, "geotargets"):
1361 # opttol = 1e-4
1362 # self.geotargets = numpy.array([opttol, 3. / opttol], "d")
1363 #if hasattr(self,"geovalues"): self.geovalues = numpy.array(self.geovalues, "d")
1365 # This is quite simple to parse, but some files seem to print certain lines twice,
1366 # repeating the populations without charges, but not in proper order.
1367 # The unrestricted calculations are a bit tricky, since GAMESS-US prints populations
1368 # for both alpha and beta orbitals in the same format and with the same title,
1369 # but it still prints the charges only at the very end.
1370 if "TOTAL MULLIKEN AND LOWDIN ATOMIC POPULATIONS" in line:
1372 if not hasattr(self, "atomcharges"):
1373 self.atomcharges = {}
1375 header = next(inputfile)
1376 line = next(inputfile)
1378 # It seems that when population are printed twice (without charges),
1379 # there is a blank line along the way (after the first header),
1380 # so let's get a flag out of that circumstance.
1381 doubles_printed = line.strip() == ""
1382 if doubles_printed:
1383 title = next(inputfile)
1384 header = next(inputfile)
1385 line = next(inputfile)
1387 # Only go further if the header had five columns, which should
1388 # be the case when both populations and charges are printed.
1389 # This is pertinent for both double printing and unrestricted output.
1390 if not len(header.split()) == 5:
1391 return
1392 mulliken, lowdin = [], []
1393 while line.strip():
1394 if line.strip() and doubles_printed:
1395 line = next(inputfile)
1396 mulliken.append(float(line.split()[3]))
1397 lowdin.append(float(line.split()[5]))
1398 line = next(inputfile)
1399 self.atomcharges["mulliken"] = mulliken
1400 self.atomcharges["lowdin"] = lowdin
1402 # ---------------------
1403 # ELECTROSTATIC MOMENTS
1404 # ---------------------
1405 #
1406 # POINT 1 X Y Z (BOHR) CHARGE
1407 # -0.000000 0.000000 0.000000 -0.00 (A.U.)
1408 # DX DY DZ /D/ (DEBYE)
1409 # 0.000000 -0.000000 0.000000 0.000000
1410 #
1411 if line.strip() == "ELECTROSTATIC MOMENTS":
1413 self.skip_lines(inputfile, ['d', 'b'])
1414 line = next(inputfile)
1416 # The old PC-GAMESS prints memory assignment information here.
1417 if "MEMORY ASSIGNMENT" in line:
1418 memory_assignment = next(inputfile)
1419 line = next(inputfile)
1421 # If something else ever comes up, we should get a signal from this assert.
1422 assert line.split()[0] == "POINT"
1424 # We can get the reference point from here, as well as
1425 # check here that the net charge of the molecule is correct.
1426 coords_and_charge = next(inputfile)
1427 assert coords_and_charge.split()[-1] == '(A.U.)'
1428 reference = numpy.array([float(x) for x in coords_and_charge.split()[:3]])
1429 reference = utils.convertor(reference, 'bohr', 'Angstrom')
1430 charge = int(round(float(coords_and_charge.split()[-2])))
1431 self.set_attribute('charge', charge)
1433 dipoleheader = next(inputfile)
1434 assert dipoleheader.split()[:3] == ['DX', 'DY', 'DZ']
1435 assert dipoleheader.split()[-1] == "(DEBYE)"
1437 dipoleline = next(inputfile)
1438 dipole = [float(d) for d in dipoleline.split()[:3]]
1440 # The dipole is always the first multipole moment to be printed,
1441 # so if it already exists, we will overwrite all moments since we want
1442 # to leave just the last printed value (could change in the future).
1443 if not hasattr(self, 'moments'):
1444 self.moments = [reference, dipole]
1445 else:
1446 try:
1447 assert self.moments[1] == dipole
1448 except AssertionError:
1449 self.logger.warning('Overwriting previous multipole moments with new values')
1450 self.logger.warning('This could be from post-HF properties or geometry optimization')
1451 self.moments = [reference, dipole]
1453 # Static polarizability from a harmonic frequency calculation
1454 # with $CPHF/POLAR=.TRUE.
1455 if line.strip() == 'ALPHA POLARIZABILITY TENSOR (ANGSTROMS**3)':
1456 if not hasattr(self, 'polarizabilities'):
1457 self.polarizabilities = []
1458 polarizability = numpy.zeros(shape=(3, 3))
1459 self.skip_lines(inputfile, ['d', 'b', 'directions'])
1460 for i in range(3):
1461 line = next(inputfile)
1462 polarizability[i, :i+1] = [float(x) for x in line.split()[1:]]
1463 polarizability = utils.symmetrize(polarizability, use_triangle='lower')
1464 # Convert from Angstrom**3 to bohr**3 (a.u.**3).
1465 volume_convert = numpy.vectorize(lambda x: x * utils.convertor(1, 'Angstrom', 'bohr') ** 3)
1466 polarizability = volume_convert(polarizability)
1467 self.polarizabilities.append(polarizability)
1469 # Static and dynamic polarizability from RUNTYP=TDHF.
1470 if line.strip() == 'TIME-DEPENDENT HARTREE-FOCK NLO PROPERTIES':
1471 if not hasattr(self, 'polarizabilities'):
1472 self.polarizabilities = []
1473 polarizability = numpy.empty(shape=(3, 3))
1474 coord_to_idx = {'X': 0, 'Y': 1, 'Z': 2}
1475 self.skip_lines(inputfile, ['d', 'b', 'dots'])
1476 line = next(inputfile)
1477 assert 'ALPHA AT' in line
1478 self.skip_lines(inputfile, ['dots', 'b'])
1479 for a in range(3):
1480 for b in range(3):
1481 line = next(inputfile)
1482 tokens = line.split()
1483 i, j = coord_to_idx[tokens[1][0]], coord_to_idx[tokens[1][1]]
1484 polarizability[i, j] = tokens[3]
1485 self.polarizabilities.append(polarizability)
1487 # Extract thermochemistry
1489 # -------------------------------
1490 # THERMOCHEMISTRY AT T= 298.15 K
1491 # -------------------------------
1493 # USING IDEAL GAS, RIGID ROTOR, HARMONIC NORMAL MODE APPROXIMATIONS.
1494 # P= 1.01325E+05 PASCAL.
1495 # ALL FREQUENCIES ARE SCALED BY 1.00000
1496 # THE MOMENTS OF INERTIA ARE (IN AMU*BOHR**2)
1497 # 1.77267 4.73429 6.50696
1498 # THE ROTATIONAL SYMMETRY NUMBER IS 1.0
1499 # THE ROTATIONAL CONSTANTS ARE (IN GHZ)
1500 # 1017.15677 380.85747 277.10144
1501 # 7 - 9 VIBRATIONAL MODES ARE USED IN THERMOCHEMISTRY.
1502 # THE HARMONIC ZERO POINT ENERGY IS (SCALED BY 1.000)
1503 # 0.020711 HARTREE/MOLECULE 4545.618665 CM**-1/MOLECULE
1504 # 12.996589 KCAL/MOL 54.377728 KJ/MOL
1506 # Q LN Q
1507 # ELEC. 1.00000E+00 0.000000
1508 # TRANS. 3.00431E+06 14.915558
1509 # ROT. 8.36512E+01 4.426656
1510 # VIB. 1.00067E+00 0.000665
1511 # TOT. 2.51481E+08 19.342880
1513 # E H G CV CP S
1514 # KJ/MOL KJ/MOL KJ/MOL J/MOL-K J/MOL-K J/MOL-K
1515 # ELEC. 0.000 0.000 0.000 0.000 0.000 0.000
1516 # TRANS. 3.718 6.197 -36.975 12.472 20.786 144.800
1517 # ROT. 3.718 3.718 -10.973 12.472 12.472 49.277
1518 # VIB. 54.390 54.390 54.376 0.296 0.296 0.046
1519 # TOTAL 61.827 64.306 6.428 25.240 33.554 194.123
1520 # VIB. THERMAL CORRECTION E(T)-E(0) = H(T)-H(0) = 12.071 J/MOL
1522 # E H G CV CP S
1523 # KCAL/MOL KCAL/MOL KCAL/MOL CAL/MOL-K CAL/MOL-K CAL/MOL-K
1524 # ELEC. 0.000 0.000 0.000 0.000 0.000 0.000
1525 # TRANS. 0.889 1.481 -8.837 2.981 4.968 34.608
1526 # ROT. 0.889 0.889 -2.623 2.981 2.981 11.777
1527 # VIB. 12.999 12.999 12.996 0.071 0.071 0.011
1528 # TOTAL 14.777 15.369 1.536 6.032 8.020 46.396
1529 # VIB. THERMAL CORRECTION E(T)-E(0) = H(T)-H(0) = 2.885 CAL/MOL
1531 if "THERMOCHEMISTRY AT T=" in line:
1532 match = re.search(r"THERMOCHEMISTRY AT T=(.*)K", line)
1533 if match:
1534 self.set_attribute('temperature', float(match.group(1)))
1535 if "PASCAL." in line:
1536 match = re.search(r"P=(.*)PASCAL.", line)
1537 if match:
1538 self.set_attribute('pressure', float(match.group(1))/1.01325e5)
1540 if "KCAL/MOL KCAL/MOL KCAL/MOL CAL/MOL-K CAL/MOL-K CAL/MOL-K" in line:
1541 self.skip_lines(inputfile,["ELEC","TRANS","ROT","VIB"])
1542 line = next(inputfile) #TOTAL
1543 thermoValues = line.split()
1545 if hasattr(self, 'scfenergies'):
1546 electronicEnergy = utils.convertor(self.scfenergies[-1],"eV","hartree")
1547 else:
1548 electronicEnergy = 0 # GAMESS prints thermochemistry at the end, so it should have a value for this already
1549 self.set_attribute('enthalpy', electronicEnergy + utils.convertor(float(thermoValues[2]),"kcal/mol","hartree"))
1550 self.set_attribute('freeenergy', electronicEnergy + utils.convertor(float(thermoValues[3]),"kcal/mol","hartree"))
1551 self.set_attribute('entropy', utils.convertor(float(thermoValues[6])/1000.0,"kcal/mol","hartree"))
1554 if line[:30] == ' ddikick.x: exited gracefully.'\
1555 or line[:41] == ' EXECUTION OF FIREFLY TERMINATED NORMALLY'\
1556 or line[:40] == ' EXECUTION OF GAMESS TERMINATED NORMALLY':
1557 self.metadata['success'] = True