Coverage for cclib/parser/molproparser.py : 96%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2#
3# Copyright (c) 2019, the cclib development team
4#
5# This file is part of cclib (http://cclib.github.io) and is distributed under
6# the terms of the BSD 3-Clause License.
8"""Parser for Molpro output files"""
11import itertools
13import numpy
15from cclib.parser import logfileparser
16from cclib.parser import utils
19def create_atomic_orbital_names(orbitals):
20 """Generate all atomic orbital names that could be used by Molpro.
22 The names are returned in a dictionary, organized by subshell (S, P, D and so on).
23 """
25 # We can write out the first two manually, since there are not that many.
26 atomic_orbital_names = {
27 'S': ['s', '1s'],
28 'P': ['x', 'y', 'z', '2px', '2py', '2pz'],
29 }
31 # Although we could write out all names for the other subshells, it is better
32 # to generate them if we need to expand further, since the number of functions quickly
33 # grows and there are both Cartesian and spherical variants to consider.
34 # For D orbitals, the Cartesian functions are xx, yy, zz, xy, xz and yz, and the
35 # spherical ones are called 3d0, 3d1-, 3d1+, 3d2- and 3d2+. For F orbitals, the Cartesians
36 # are xxx, xxy, xxz, xyy, ... and the sphericals are 4f0, 4f1-, 4f+ and so on.
37 for i, orb in enumerate(orbitals):
39 # Cartesian can be generated directly by combinations.
40 cartesian = list(map(''.join, list(itertools.combinations_with_replacement(['x', 'y', 'z'], i+2))))
42 # For spherical functions, we need to construct the names.
43 pre = str(i+3) + orb.lower()
44 spherical = [pre + '0'] + [pre + str(j) + s for j in range(1, i+3) for s in ['-', '+']]
45 atomic_orbital_names[orb] = cartesian + spherical
47 return atomic_orbital_names
50class Molpro(logfileparser.Logfile):
51 """Molpro file parser"""
53 atomic_orbital_names = create_atomic_orbital_names(['D', 'F', 'G'])
55 def __init__(self, *args, **kwargs):
56 # Call the __init__ method of the superclass
57 super(Molpro, self).__init__(logname="Molpro", *args, **kwargs)
59 def __str__(self):
60 """Return a string representation of the object."""
61 return "Molpro log file %s" % (self.filename)
63 def __repr__(self):
64 """Return a representation of the object."""
65 return 'Molpro("%s")' % (self.filename)
67 def normalisesym(self, label):
68 """Normalise the symmetries used by Molpro."""
69 ans = label.replace("`", "'").replace("``", "''")
70 return ans
72 def before_parsing(self):
74 self.electronorbitals = ""
75 self.insidescf = False
77 def after_parsing(self):
79 # If optimization thresholds are default, they are normally not printed and we need
80 # to set them to the default after parsing. Make sure to set them in the same order that
81 # they appear in the in the geometry optimization progress printed in the output,
82 # namely: energy difference, maximum gradient, maximum step.
83 if not hasattr(self, "geotargets"):
84 self.geotargets = []
85 # Default THRENERG (required accuracy of the optimized energy).
86 self.geotargets.append(1E-6)
87 # Default THRGRAD (required accuracy of the optimized gradient).
88 self.geotargets.append(3E-4)
89 # Default THRSTEP (convergence threshold for the geometry optimization step).
90 self.geotargets.append(3E-4)
92 def _parse_orbitals(self, inputfile, line):
93 # From this block aonames, atombasis, moenergies and mocoeffs can be parsed. The data is
94 # flipped compared to most programs (GAMESS, Gaussian), since the MOs are in rows. Also, Molpro
95 # does not cut the table into parts, rather each MO row has as many lines as it takes ro print
96 # all of the MO coefficients. Each row normally has 10 coefficients, although this can be less
97 # for the last row and when symmetry is used (each irrep has its own block).
98 #
99 # ELECTRON ORBITALS
100 # =================
101 #
102 #
103 # Orb Occ Energy Couls-En Coefficients
104 #
105 # 1 1s 1 1s 1 2px 1 2py 1 2pz 2 1s (...)
106 # 3 1s 3 1s 3 2px 3 2py 3 2pz 4 1s (...)
107 # (...)
108 #
109 # 1.1 2 -11.0351 -43.4915 0.701460 0.025696 -0.000365 -0.000006 0.000000 0.006922 (...)
110 # -0.006450 0.004742 -0.001028 -0.002955 0.000000 -0.701460 (...)
111 # (...)
112 #
113 # If an MCSCF calculation was performed, the natural orbitals
114 # (coefficients and occupation numbers) are printed in a
115 # format nearly identical to the ELECTRON ORBITALS section.
116 #
117 # NATURAL ORBITALS (state averaged)
118 # =================================
119 #
120 # Orb Occ Energy Coefficients
121 #
122 # 1 s 1 s 1 s 1 z 1 z 1 xx 1 yy 1 zz 2 s 2 s
123 # 2 s 2 z 2 z 2 xx 2 yy 2 zz 3 s 3 s 3 z 3 y
124 #
125 # 1.1 2.00000 -20.678730 0.000141 -0.000057 0.001631 -0.001377 0.001117 0.000029 0.000293 -0.000852 1.000748 0.001746
126 # -0.002552 -0.002005 0.001658 -0.001266 -0.001274 -0.001001 0.000215 -0.000131 -0.000242 -0.000126
127 #
128 # 2.1 2.00000 -11.322823 1.000682 0.004626 -0.000485 0.006634 -0.002096 -0.003072 -0.003282 -0.001724 -0.000181 0.006734
129 # -0.002398 -0.000527 0.001335 0.000091 0.000058 0.000396 -0.003219 0.000981 0.000250 -0.000191
130 # (...)
132 # The assigment of final cclib attributes is different for
133 # canonical/natural orbitals.
134 self.naturalorbitals = (line[1:17] == "NATURAL ORBITALS")
135 # Make sure we didn't get here by mistake.
136 assert line[1:18] == "ELECTRON ORBITALS" or self.electronorbitals or self.naturalorbitals
138 # For unrestricted calculations, ELECTRON ORBITALS is followed on the same line
139 # by FOR POSITIVE SPIN or FOR NEGATIVE SPIN as appropriate.
140 spin = (line[19:36] == "FOR NEGATIVE SPIN") or (self.electronorbitals[19:36] == "FOR NEGATIVE SPIN")
142 if self.naturalorbitals:
143 self.skip_lines(inputfile, ['equals', 'b', 'headers', 'b'])
144 else:
145 if not self.electronorbitals:
146 self.skip_line(inputfile, 'equals')
147 self.skip_lines(inputfile, ['b', 'b', 'headers', 'b'])
149 aonames = []
150 atombasis = [[] for i in range(self.natom)]
151 moenergies = []
152 # Use for both canonical and natural orbital coefficients.
153 mocoeffs = []
154 occnos = []
155 line = next(inputfile)
157 # Besides a double blank line, stop when the next orbitals are encountered for unrestricted jobs
158 # or if there are stars on the line which always signifies the end of the block.
159 while line.strip() and (not "ORBITALS" in line) and (not set(line.strip()) == {'*'}):
161 # The function names are normally printed just once, but if symmetry is used then each irrep
162 # has its own mocoeff block with a preceding list of names.
163 is_aonames = line[:25].strip() == ""
164 if is_aonames:
166 # We need to save this offset for parsing the coefficients later.
167 offset = len(aonames)
169 aonum = len(aonames)
170 while line.strip():
171 for s in line.split():
172 if s.isdigit():
173 atomno = int(s)
174 atombasis[atomno-1].append(aonum)
175 aonum += 1
176 else:
177 functype = s
178 element = self.table.element[self.atomnos[atomno-1]]
179 aoname = "%s%i_%s" % (element, atomno, functype)
180 aonames.append(aoname)
181 line = next(inputfile)
183 # Now there can be one or two blank lines.
184 while not line.strip():
185 line = next(inputfile)
187 # Newer versions of Molpro (for example, 2012 test files) will print some
188 # more things here, such as HOMO and LUMO, but these have less than 10 columns.
189 if "HOMO" in line or "LUMO" in line:
190 break
192 # End of the NATURAL ORBITALS section.
193 if "Natural orbital dump" in line:
194 break
196 # Now parse the MO coefficients, padding the list with an appropriate amount of zeros.
197 coeffs = [0.0 for i in range(offset)]
198 while line.strip() != "":
199 if line[:31].rstrip():
200 tokens = line.split()
201 moenergy = float(tokens[2])
202 moenergy = utils.convertor(moenergy, "hartree", "eV")
203 moenergies.append(moenergy)
204 if self.naturalorbitals:
205 occno = float(tokens[1])
206 occnos.append(occno)
208 # Coefficients are in 10.6f format and splitting does not work since there are not
209 # always spaces between them. If the numbers are very large, there will be stars.
210 str_coeffs = line[31:]
211 ncoeffs = len(str_coeffs) // 10
212 coeff = []
213 for ic in range(ncoeffs):
214 p = str_coeffs[ic*10:(ic+1)*10]
215 try:
216 c = float(p)
217 except ValueError as detail:
218 self.logger.warn("setting coeff element to zero: %s" % detail)
219 c = 0.0
220 coeff.append(c)
221 coeffs.extend(coeff)
222 line = next(inputfile)
223 mocoeffs.append(coeffs)
225 # The loop should keep going until there is a double blank line, and there is
226 # a single line between each coefficient block.
227 line = next(inputfile)
228 if not line.strip():
229 line = next(inputfile)
231 # If symmetry was used (offset was needed) then we will need to pad all MO vectors
232 # up to nbasis for all irreps before the last one.
233 if offset > 0:
234 for im, m in enumerate(mocoeffs):
235 if len(m) < self.nbasis:
236 mocoeffs[im] = m + [0.0 for i in range(self.nbasis - len(m))]
238 self.set_attribute('atombasis', atombasis)
239 self.set_attribute('aonames', aonames)
241 if self.naturalorbitals:
242 # Consistent with current cclib conventions, keep only the
243 # last possible set of natural orbital coefficients and
244 # occupation numbers.
245 self.nocoeffs = mocoeffs
246 self.nooccnos = occnos
247 else:
248 # Consistent with current cclib conventions, reset moenergies/mocoeffs if they have been
249 # previously parsed, since we want to produce only the final values.
250 if not hasattr(self, "moenergies") or spin == 0:
251 self.mocoeffs = []
252 self.moenergies = []
253 self.moenergies.append(moenergies)
254 self.mocoeffs.append(mocoeffs)
256 # Check if last line begins the next ELECTRON ORBITALS section, because we already used
257 # this line and need to know when this method is called next time.
258 if line[1:18] == "ELECTRON ORBITALS":
259 self.electronorbitals = line
260 else:
261 self.electronorbitals = ""
263 return
265 def extract(self, inputfile, line):
266 """Extract information from the file object inputfile."""
268 # Extract the package version number. The one associated with NAME may
269 # be more specific.
270 if line.strip()[:4] == "NAME":
271 self.metadata["package_version"] = line.split()[-1]
272 # Only take the less-specific version if it doesn't already exist.
273 if "Version" in line:
274 package_version = self.metadata.get("package_version")
275 less_specific_package_version = line.split()[1]
276 if not package_version:
277 self.metadata["package_version"] = less_specific_package_version
278 # ...but use it for the legacy (short) version.
279 self.metadata["legacy_package_version"] = less_specific_package_version
280 if "SHA1" in line:
281 package_version = self.metadata.get("package_version")
282 if package_version:
283 self.metadata["package_version"] = ''.join([
284 package_version, "+", line.split()[-1]
285 ])
287 if line[1:19] == "ATOMIC COORDINATES":
289 if not hasattr(self, "atomcoords"):
290 self.atomcoords = []
292 atomcoords = []
293 atomnos = []
295 self.skip_lines(inputfile, ['line', 'line', 'line'])
297 line = next(inputfile)
298 while line.strip():
299 temp = line.strip().split()
300 atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom") for x in temp[3:6]]) # bohrs to angs
301 atomnos.append(int(round(float(temp[2]))))
302 line = next(inputfile)
304 self.atomcoords.append(atomcoords)
306 self.set_attribute('atomnos', atomnos)
307 self.set_attribute('natom', len(self.atomnos))
309 # Use BASIS DATA to parse input for gbasis, aonames and atombasis. If symmetry is used,
310 # the function number starts from 1 for each irrep (the irrep index comes after the dot).
311 #
312 # BASIS DATA
313 #
314 # Nr Sym Nuc Type Exponents Contraction coefficients
315 #
316 # 1.1 A 1 1s 71.616837 0.154329
317 # 13.045096 0.535328
318 # 3.530512 0.444635
319 # 2.1 A 1 1s 2.941249 -0.099967
320 # 0.683483 0.399513
321 # ...
322 #
323 if line[1:11] == "BASIS DATA":
325 # We can do a sanity check with the header.
326 self.skip_line(inputfile, 'blank')
327 header = next(inputfile)
328 assert header.split() == ["Nr", "Sym", "Nuc", "Type", "Exponents", "Contraction", "coefficients"]
329 self.skip_line(inputfile, 'blank')
331 aonames = []
332 atombasis = [[] for i in range(self.natom)]
333 gbasis = [[] for i in range(self.natom)]
334 while line.strip():
336 # We need to read the line at the start of the loop here, because the last function
337 # will be added when a blank line signalling the end of the block is encountered.
338 line = next(inputfile)
340 # The formatting here can exhibit subtle differences, including the number of spaces
341 # or indentation size. However, we will rely on explicit slices since not all components
342 # are always available. In fact, components not being there has some meaning (see below).
343 line_nr = line[1:6].strip()
344 line_sym = line[7:9].strip()
345 line_nuc = line[11:15].strip()
346 line_type = line[16:22].strip()
347 line_exp = line[25:38].strip()
348 line_coeffs = line[38:].strip()
350 # If a new function type is printed or the BASIS DATA block ends with a blank line,
351 # then add the previous function to gbasis, except for the first function since
352 # there was no preceeding one. When translating the Molpro function name to gbasis,
353 # note that Molpro prints all components, but we want it only once, with the proper
354 # shell type (S,P,D,F,G). Molpro names also differ between Cartesian/spherical representations.
355 if (line_type and aonames) or line.strip() == "":
357 # All the possible AO names are created with the class. The function should always
358 # find a match in that dictionary, so we can check for that here and will need to
359 # update the dict if something unexpected comes up.
360 funcbasis = None
361 for fb, names in self.atomic_orbital_names.items():
362 if functype in names:
363 funcbasis = fb
364 assert funcbasis
366 # There is a separate basis function for each column of contraction coefficients. Since all
367 # atomic orbitals for a subshell will have the same parameters, we can simply check if
368 # the function tuple is already in gbasis[i] before adding it.
369 for i in range(len(coefficients[0])):
371 func = (funcbasis, [])
372 for j in range(len(exponents)):
373 func[1].append((exponents[j], coefficients[j][i]))
374 if func not in gbasis[funcatom-1]:
375 gbasis[funcatom-1].append(func)
377 # If it is a new type, set up the variables for the next shell(s). An exception is symmetry functions,
378 # which we want to copy from the previous function and don't have a new number on the line. For them,
379 # we just want to update the nuclear index.
380 if line_type:
381 if line_nr:
382 exponents = []
383 coefficients = []
384 functype = line_type
385 funcatom = int(line_nuc)
387 # Add any exponents and coefficients to lists.
388 if line_exp and line_coeffs:
389 funcexp = float(line_exp)
390 funccoeffs = [float(s) for s in line_coeffs.split()]
391 exponents.append(funcexp)
392 coefficients.append(funccoeffs)
394 # If the function number is present then add to atombasis and aonames, which is different from
395 # adding to gbasis since it enumerates AOs rather than basis functions. The number counts functions
396 # in each irrep from 1 and we could add up the functions for each irrep to get the global count,
397 # but it is simpler to just see how many aonames we have already parsed. Any symmetry functions
398 # are also printed, but they don't get numbers so they are nor parsed.
399 if line_nr:
400 element = self.table.element[self.atomnos[funcatom-1]]
401 aoname = "%s%i_%s" % (element, funcatom, functype)
402 aonames.append(aoname)
403 funcnr = len(aonames)
404 atombasis[funcatom-1].append(funcnr-1)
406 self.set_attribute('aonames', aonames)
407 self.set_attribute('atombasis', atombasis)
408 self.set_attribute('gbasis', gbasis)
410 if line[1:23] == "NUMBER OF CONTRACTIONS":
411 nbasis = int(line.split()[3])
412 self.set_attribute('nbasis', nbasis)
414 # Basis set name
415 if line[1:8] == "Library":
416 self.metadata["basis_set"] = line.split()[4]
418 # This is used to signalize whether we are inside an SCF calculation.
419 if line[1:8] == "PROGRAM" and line[14:18] == "-SCF":
421 self.insidescf = True
422 self.metadata["methods"].append("HF")
424 # Use this information instead of 'SETTING ...', in case the defaults are standard.
425 # Note that this is sometimes printed in each geometry optimization step.
426 if line[1:20] == "NUMBER OF ELECTRONS":
428 spinup = int(line.split()[3][:-1])
429 spindown = int(line.split()[4][:-1])
430 # Nuclear charges (atomnos) should be parsed by now.
431 nuclear = numpy.sum(self.atomnos)
433 charge = nuclear - spinup - spindown
434 self.set_attribute('charge', charge)
436 mult = spinup - spindown + 1
437 self.set_attribute('mult', mult)
439 # Convergenve thresholds for SCF cycle, should be contained in a line such as:
440 # CONVERGENCE THRESHOLDS: 1.00E-05 (Density) 1.40E-07 (Energy)
441 if self.insidescf and line[1:24] == "CONVERGENCE THRESHOLDS:":
443 if not hasattr(self, "scftargets"):
444 self.scftargets = []
446 scftargets = list(map(float, line.split()[2::2]))
447 self.scftargets.append(scftargets)
448 # Usually two criteria, but save the names this just in case.
449 self.scftargetnames = line.split()[3::2]
451 # Read in the print out of the SCF cycle - for scfvalues. For RHF looks like:
452 # ITERATION DDIFF GRAD ENERGY 2-EL.EN. DIPOLE MOMENTS DIIS
453 # 1 0.000D+00 0.000D+00 -379.71523700 1159.621171 0.000000 0.000000 0.000000 0
454 # 2 0.000D+00 0.898D-02 -379.74469736 1162.389787 0.000000 0.000000 0.000000 1
455 # 3 0.817D-02 0.144D-02 -379.74635529 1162.041033 0.000000 0.000000 0.000000 2
456 # 4 0.213D-02 0.571D-03 -379.74658063 1162.159929 0.000000 0.000000 0.000000 3
457 # 5 0.799D-03 0.166D-03 -379.74660889 1162.144256 0.000000 0.000000 0.000000 4
458 if self.insidescf and line[1:10] == "ITERATION":
460 if not hasattr(self, "scfvalues"):
461 self.scfvalues = []
463 line = next(inputfile)
464 energy = 0.0
465 scfvalues = []
466 while line.strip() != "":
467 chomp = line.split()
468 if chomp[0].isdigit():
470 ddiff = float(chomp[1].replace('D', 'E'))
471 grad = float(chomp[2].replace('D', 'E'))
472 newenergy = float(chomp[3])
473 ediff = newenergy - energy
474 energy = newenergy
476 # The convergence thresholds must have been read above.
477 # Presently, we recognize MAX DENSITY and MAX ENERGY thresholds.
478 numtargets = len(self.scftargetnames)
479 values = [numpy.nan]*numtargets
480 for n, name in zip(list(range(numtargets)), self.scftargetnames):
481 if "ENERGY" in name.upper():
482 values[n] = ediff
483 elif "DENSITY" in name.upper():
484 values[n] = ddiff
485 scfvalues.append(values)
487 try:
488 line = next(inputfile)
489 except StopIteration:
490 self.logger.warning('File terminated before end of last SCF! Last gradient: {}'.format(grad))
491 break
492 self.scfvalues.append(numpy.array(scfvalues))
494 if "dispersion correction" in line \
495 and line.strip() != "dispersion correction activated":
496 dispersion = utils.convertor(float(line.split()[-1]), "hartree", "eV")
497 self.append_attribute("dispersionenergies", dispersion)
499 # SCF result - RHF/UHF and DFT (RKS) energies.
500 if (line[1:5] in ["!RHF", "!UHF", "!RKS"] and line[16:22].lower() == "energy"):
502 if not hasattr(self, "scfenergies"):
503 self.scfenergies = []
504 scfenergy = float(line.split()[4])
505 self.scfenergies.append(utils.convertor(scfenergy, "hartree", "eV"))
507 # We are now done with SCF cycle (after a few lines).
508 self.insidescf = False
510 # MP2 energies.
511 if line[1:5] == "!MP2":
513 self.metadata["methods"].append("MP2")
515 if not hasattr(self, 'mpenergies'):
516 self.mpenergies = []
517 mp2energy = float(line.split()[-1])
518 mp2energy = utils.convertor(mp2energy, "hartree", "eV")
519 self.mpenergies.append([mp2energy])
521 # MP2 energies if MP3 or MP4 is also calculated.
522 if line[1:5] == "MP2:":
524 self.metadata["methods"].append("MP2")
525 if not hasattr(self, 'mpenergies'):
526 self.mpenergies = []
527 mp2energy = float(line.split()[2])
528 mp2energy = utils.convertor(mp2energy, "hartree", "eV")
529 self.mpenergies.append([mp2energy])
531 # MP3 (D) and MP4 (DQ or SDQ) energies.
532 if line[1:8] == "MP3(D):":
534 self.metadata["methods"].append("MP3")
535 mp3energy = float(line.split()[2])
536 mp2energy = utils.convertor(mp3energy, "hartree", "eV")
537 line = next(inputfile)
538 self.mpenergies[-1].append(mp2energy)
539 if line[1:9] == "MP4(DQ):":
540 self.metadata["methods"].append("MP4")
541 mp4energy = float(line.split()[2])
542 line = next(inputfile)
543 if line[1:10] == "MP4(SDQ):":
544 self.metadata["methods"].append("MP4")
545 mp4energy = float(line.split()[2])
546 mp4energy = utils.convertor(mp4energy, "hartree", "eV")
547 self.mpenergies[-1].append(mp4energy)
549 # The CCSD program operates all closed-shel coupled cluster runs.
550 if line[1:15] == "PROGRAM * CCSD":
552 self.metadata["methods"].append("CCSD")
553 if not hasattr(self, "ccenergies"):
554 self.ccenergies = []
555 while line[1:20] != "Program statistics:":
556 if line[71:84] == "T1 diagnostic":
557 self.metadata["t1_diagnostic"] = float(line.split()[-1])
558 # The last energy (most exact) will be read last and thus saved.
559 if line[1:5] == "!CCD" or line[1:6] == "!CCSD" or line[1:9] == "!CCSD(T)":
560 ccenergy = float(line.split()[-1])
561 ccenergy = utils.convertor(ccenergy, "hartree", "eV")
562 line = next(inputfile)
563 self.ccenergies.append(ccenergy)
565 # Read the occupancy (index of HOMO s).
566 # For restricted calculations, there is one line here. For unrestricted, two:
567 # Final alpha occupancy: ...
568 # Final beta occupancy: ...
569 if line[1:17] == "Final occupancy:":
570 self.homos = [int(line.split()[-1])-1]
571 if line[1:23] == "Final alpha occupancy:":
572 self.homos = [int(line.split()[-1])-1]
573 line = next(inputfile)
574 self.homos.append(int(line.split()[-1])-1)
576 # Dipole is always printed on one line after the final RHF energy, and by default
577 # it seems Molpro uses the origin as the reference point.
578 if line.strip()[:13] == "Dipole moment":
580 assert line.split()[2] == "/Debye"
582 reference = [0.0, 0.0, 0.0]
583 dipole = [float(d) for d in line.split()[-3:]]
585 if not hasattr(self, 'moments'):
586 self.moments = [reference, dipole]
587 else:
588 self.moments[1] == dipole
590 # Static dipole polarizability.
591 if line.strip() == "SCF dipole polarizabilities":
592 if not hasattr(self, "polarizabilities"):
593 self.polarizabilities = []
594 polarizability = []
595 self.skip_lines(inputfile, ['b', 'directions'])
596 for _ in range(3):
597 line = next(inputfile)
598 polarizability.append(line.split()[1:])
599 self.polarizabilities.append(numpy.array(polarizability))
601 # Check for ELECTRON ORBITALS (canonical molecular orbitals).
602 if line[1:18] == "ELECTRON ORBITALS" or self.electronorbitals:
603 self._parse_orbitals(inputfile, line)
606 # If the MATROP program was called appropriately,
607 # the atomic obital overlap matrix S is printed.
608 # The matrix is printed straight-out, ten elements in each row, both halves.
609 # Note that is the entire matrix is not printed, then aooverlaps
610 # will not have dimensions nbasis x nbasis.
611 if line[1:9] == "MATRIX S":
613 if not hasattr(self, "aooverlaps"):
614 self.aooverlaps = [[]]
616 self.skip_lines(inputfile, ['b', 'symblocklabel'])
618 line = next(inputfile)
619 while line.strip() != "":
620 elements = [float(s) for s in line.split()]
621 if len(self.aooverlaps[-1]) + len(elements) <= self.nbasis:
622 self.aooverlaps[-1] += elements
623 else:
624 n = len(self.aooverlaps[-1]) + len(elements) - self.nbasis
625 self.aooverlaps[-1] += elements[:-n]
626 self.aooverlaps.append([])
627 self.aooverlaps[-1] += elements[-n:]
628 line = next(inputfile)
630 # Check for MCSCF natural orbitals.
631 if line[1:17] == "NATURAL ORBITALS":
632 self._parse_orbitals(inputfile, line)
634 # Thresholds are printed only if the defaults are changed with GTHRESH.
635 # In that case, we can fill geotargets with non-default values.
636 # The block should look like this as of Molpro 2006.1:
637 # THRESHOLDS:
639 # ZERO = 1.00D-12 ONEINT = 1.00D-12 TWOINT = 1.00D-11 PREFAC = 1.00D-14 LOCALI = 1.00D-09 EORDER = 1.00D-04
640 # ENERGY = 0.00D+00 ETEST = 0.00D+00 EDENS = 0.00D+00 THRDEDEF= 1.00D-06 GRADIENT= 1.00D-02 STEP = 1.00D-03
641 # ORBITAL = 1.00D-05 CIVEC = 1.00D-05 COEFF = 1.00D-04 PRINTCI = 5.00D-02 PUNCHCI = 9.90D+01 OPTGRAD = 3.00D-04
642 # OPTENERG= 1.00D-06 OPTSTEP = 3.00D-04 THRGRAD = 2.00D-04 COMPRESS= 1.00D-11 VARMIN = 1.00D-07 VARMAX = 1.00D-03
643 # THRDOUB = 0.00D+00 THRDIV = 1.00D-05 THRRED = 1.00D-07 THRPSP = 1.00D+00 THRDC = 1.00D-10 THRCS = 1.00D-10
644 # THRNRM = 1.00D-08 THREQ = 0.00D+00 THRDE = 1.00D+00 THRREF = 1.00D-05 SPARFAC = 1.00D+00 THRDLP = 1.00D-07
645 # THRDIA = 1.00D-10 THRDLS = 1.00D-07 THRGPS = 0.00D+00 THRKEX = 0.00D+00 THRDIS = 2.00D-01 THRVAR = 1.00D-10
646 # THRLOC = 1.00D-06 THRGAP = 1.00D-06 THRLOCT = -1.00D+00 THRGAPT = -1.00D+00 THRORB = 1.00D-06 THRMLTP = 0.00D+00
647 # THRCPQCI= 1.00D-10 KEXTA = 0.00D+00 THRCOARS= 0.00D+00 SYMTOL = 1.00D-06 GRADTOL = 1.00D-06 THROVL = 1.00D-08
648 # THRORTH = 1.00D-08 GRID = 1.00D-06 GRIDMAX = 1.00D-03 DTMAX = 0.00D+00
649 if line[1:12] == "THRESHOLDS":
651 self.skip_line(input, 'blank')
653 line = next(inputfile)
654 while line.strip():
656 if "OPTENERG" in line:
657 start = line.find("OPTENERG")
658 optenerg = line[start+10:start+20]
659 if "OPTGRAD" in line:
660 start = line.find("OPTGRAD")
661 optgrad = line[start+10:start+20]
662 if "OPTSTEP" in line:
663 start = line.find("OPTSTEP")
664 optstep = line[start+10:start+20]
665 line = next(inputfile)
667 self.geotargets = [optenerg, optgrad, optstep]
669 # The optimization history is the source for geovlues:
670 #
671 # END OF GEOMETRY OPTIMIZATION. TOTAL CPU: 246.9 SEC
672 #
673 # ITER. ENERGY(OLD) ENERGY(NEW) DE GRADMAX GRADNORM GRADRMS STEPMAX STEPLEN STEPRMS
674 # 1 -382.02936898 -382.04914450 -0.01977552 0.11354875 0.20127947 0.01183997 0.12972761 0.20171740 0.01186573
675 # 2 -382.04914450 -382.05059234 -0.00144784 0.03299860 0.03963339 0.00233138 0.05577169 0.06687650 0.00393391
676 # 3 -382.05059234 -382.05069136 -0.00009902 0.00694359 0.01069889 0.00062935 0.01654549 0.02016307 0.00118606
677 # ...
678 #
679 # The above is an exerpt from Molpro 2006, but it is a little bit different
680 # for Molpro 2012, namely the 'END OF GEOMETRY OPTIMIZATION occurs after the
681 # actual history list. It seems there is a another consistent line before the
682 # history, but this might not be always true -- so this is a potential weak link.
683 if line[1:30] == "END OF GEOMETRY OPTIMIZATION." or line.strip() == "Quadratic Steepest Descent - Minimum Search":
685 # I think this is the trigger for convergence, and it shows up at the top in Molpro 2006.
686 geometry_converged = line[1:30] == "END OF GEOMETRY OPTIMIZATION."
688 self.skip_line(inputfile, 'blank')
690 # Newer version of Molpro (at least for 2012) print and additional column
691 # with the timing information for each step. Otherwise, the history looks the same.
692 headers = next(inputfile).split()
693 if not len(headers) in (10, 11):
694 return
696 # Although criteria can be changed, the printed format should not change.
697 # In case it does, retrieve the columns for each parameter.
698 index_ITER = headers.index('ITER.')
699 index_THRENERG = headers.index('DE')
700 index_THRGRAD = headers.index('GRADMAX')
701 index_THRSTEP = headers.index('STEPMAX')
703 line = next(inputfile)
704 self.geovalues = []
705 while line.strip():
707 line = line.split()
708 istep = int(line[index_ITER])
710 geovalues = []
711 geovalues.append(float(line[index_THRENERG]))
712 geovalues.append(float(line[index_THRGRAD]))
713 geovalues.append(float(line[index_THRSTEP]))
714 self.geovalues.append(geovalues)
715 line = next(inputfile)
716 if line.strip() == "Freezing grid":
717 line = next(inputfile)
719 # The convergence trigger shows up somewhere at the bottom in Molpro 2012,
720 # before the final stars. If convergence is not reached, there is an additional
721 # line that can be checked for. This is a little tricky, though, since it is
722 # not the last line... so bail out of the loop if convergence failure is detected.
723 while "*****" not in line:
724 line = next(inputfile)
725 if line.strip() == "END OF GEOMETRY OPTIMIZATION.":
726 geometry_converged = True
727 if "No convergence" in line:
728 geometry_converged = False
729 break
731 # Finally, deal with optdone, append the last step to it only if we had convergence.
732 if not hasattr(self, 'optdone'):
733 self.optdone = []
734 if geometry_converged:
735 self.optdone.append(istep-1)
737 # This block should look like this:
738 # Normal Modes
739 #
740 # 1 Au 2 Bu 3 Ag 4 Bg 5 Ag
741 # Wavenumbers [cm-1] 151.81 190.88 271.17 299.59 407.86
742 # Intensities [km/mol] 0.33 0.28 0.00 0.00 0.00
743 # Intensities [relative] 0.34 0.28 0.00 0.00 0.00
744 # CX1 0.00000 -0.01009 0.02577 0.00000 0.06008
745 # CY1 0.00000 -0.05723 -0.06696 0.00000 0.06349
746 # CZ1 -0.02021 0.00000 0.00000 0.11848 0.00000
747 # CX2 0.00000 -0.01344 0.05582 0.00000 -0.02513
748 # CY2 0.00000 -0.06288 -0.03618 0.00000 0.00349
749 # CZ2 -0.05565 0.00000 0.00000 0.07815 0.00000
750 # ...
751 # Molpro prints low frequency modes in a subsequent section with the same format,
752 # which also contains zero frequency modes, with the title:
753 # Normal Modes of low/zero frequencies
754 if line[1:13] == "Normal Modes":
756 islow = (line[1:37] == "Normal Modes of low/zero frequencies")
758 self.skip_line(inputfile, 'blank')
760 # Each portion of five modes is followed by a single blank line.
761 # The whole block is followed by an additional blank line.
762 line = next(inputfile)
763 while line.strip():
765 if line[1:25].isspace():
766 if not islow: # vibsyms not printed for low freq modes
767 numbers = list(map(int, line.split()[::2]))
768 vibsyms = line.split()[1::2]
769 else:
770 # give low freq modes an empty str as vibsym
771 # note there could be other possibilities..
772 numbers = list(map(int, line.split()))
773 vibsyms = ['']*len(numbers)
775 if line[1:12] == "Wavenumbers":
776 vibfreqs = list(map(float, line.strip().split()[2:]))
778 if line[1:21] == "Intensities [km/mol]":
779 vibirs = list(map(float, line.strip().split()[2:]))
781 # There should always by 3xnatom displacement rows.
782 if line[1:11].isspace() and line[13:25].strip().isdigit():
784 # There are a maximum of 5 modes per line.
785 nmodes = len(line.split())-1
787 vibdisps = []
788 for i in range(nmodes):
789 vibdisps.append([])
790 for n in range(self.natom):
791 vibdisps[i].append([])
792 for i in range(nmodes):
793 disp = float(line.split()[i+1])
794 vibdisps[i][0].append(disp)
795 for i in range(self.natom*3 - 1):
796 line = next(inputfile)
797 iatom = (i+1)//3
798 for i in range(nmodes):
799 disp = float(line.split()[i+1])
800 vibdisps[i][iatom].append(disp)
802 line = next(inputfile)
803 if not line.strip():
805 if not hasattr(self, "vibfreqs"):
806 self.vibfreqs = []
807 if not hasattr(self, "vibsyms"):
808 self.vibsyms = []
809 if not hasattr(self, "vibirs") and "vibirs" in dir():
810 self.vibirs = []
811 if not hasattr(self, "vibdisps") and "vibdisps" in dir():
812 self.vibdisps = []
814 if not islow:
815 self.vibfreqs.extend(vibfreqs)
816 self.vibsyms.extend(vibsyms)
817 if "vibirs" in dir():
818 self.vibirs.extend(vibirs)
819 if "vibdisps" in dir():
820 self.vibdisps.extend(vibdisps)
821 else:
822 nonzero = [f > 0 for f in vibfreqs]
823 vibfreqs = [f for f in vibfreqs if f > 0]
824 self.vibfreqs = vibfreqs + self.vibfreqs
825 vibsyms = [vibsyms[i] for i in range(len(vibsyms)) if nonzero[i]]
826 self.vibsyms = vibsyms + self.vibsyms
827 if "vibirs" in dir():
828 vibirs = [vibirs[i] for i in range(len(vibirs)) if nonzero[i]]
829 self.vibirs = vibirs + self.vibirs
830 if "vibdisps" in dir():
831 vibdisps = [vibdisps[i] for i in range(len(vibdisps)) if nonzero[i]]
832 self.vibdisps = vibdisps + self.vibdisps
834 line = next(inputfile)
836 if line[1:16] == "Force Constants":
838 hessian = []
839 line = next(inputfile)
840 hess = []
841 tmp = []
843 while line.strip():
844 try:
845 list(map(float, line.strip().split()[2:]))
846 except:
847 line = next(inputfile)
848 line.strip().split()[1:]
849 hess.extend([list(map(float, line.strip().split()[1:]))])
850 line = next(inputfile)
851 lig = 0
853 while (lig == 0) or (len(hess[0]) > 1):
854 tmp.append(hess.pop(0))
855 lig += 1
856 k = 5
858 while len(hess) != 0:
859 tmp[k] += hess.pop(0)
860 k += 1
861 if (len(tmp[k-1]) == lig):
862 break
863 if k >= lig:
864 k = len(tmp[-1])
865 for l in tmp:
866 hessian += l
868 self.set_attribute("hessian", hessian)
870 if line[1:14] == "Atomic Masses" and hasattr(self, "hessian"):
872 line = next(inputfile)
873 self.amass = list(map(float, line.strip().split()[2:]))
875 while line.strip():
876 line = next(inputfile)
877 self.amass += list(map(float, line.strip().split()[2:]))
879 #1PROGRAM * POP (Mulliken population analysis)
880 #
881 #
882 # Density matrix read from record 2100.2 Type=RHF/CHARGE (state 1.1)
883 #
884 # Population analysis by basis function type
885 #
886 # Unique atom s p d f g Total Charge
887 # 2 C 3.11797 2.88497 0.00000 0.00000 0.00000 6.00294 - 0.00294
888 # 3 C 3.14091 2.91892 0.00000 0.00000 0.00000 6.05984 - 0.05984
889 # ...
890 if line.strip() == "1PROGRAM * POP (Mulliken population analysis)":
892 self.skip_lines(inputfile, ['b', 'b', 'density_source', 'b', 'func_type', 'b'])
894 header = next(inputfile)
895 icharge = header.split().index('Charge')
897 charges = []
898 line = next(inputfile)
899 while line.strip():
900 cols = line.split()
901 charges.append(float(cols[icharge]+cols[icharge+1]))
902 line = next(inputfile)
904 if not hasattr(self, "atomcharges"):
905 self.atomcharges = {}
906 self.atomcharges['mulliken'] = charges
908 if 'GRADIENT FOR STATE' in line:
909 for _ in range(3):
910 next(inputfile)
911 grad = []
912 lines_read = 0
913 while lines_read < self.natom:
914 line = next(inputfile)
915 # Because molpro inserts an empty line every 50th atom.
916 if line:
917 grad.append([float(x) for x in line.split()[1:]])
918 lines_read += 1
919 if not hasattr(self, 'grads'):
920 self.grads = []
921 self.grads.append(grad)
923 if line[:25] == ' Variable memory released':
924 self.metadata['success'] = True