Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2# 

3# Copyright (c) 2019, the cclib development team 

4# 

5# This file is part of cclib (http://cclib.github.io) and is distributed under 

6# the terms of the BSD 3-Clause License. 

7 

8"""Parser for Molpro output files""" 

9 

10 

11import itertools 

12 

13import numpy 

14 

15from cclib.parser import logfileparser 

16from cclib.parser import utils 

17 

18 

19def create_atomic_orbital_names(orbitals): 

20 """Generate all atomic orbital names that could be used by Molpro. 

21 

22 The names are returned in a dictionary, organized by subshell (S, P, D and so on). 

23 """ 

24 

25 # We can write out the first two manually, since there are not that many. 

26 atomic_orbital_names = { 

27 'S': ['s', '1s'], 

28 'P': ['x', 'y', 'z', '2px', '2py', '2pz'], 

29 } 

30 

31 # Although we could write out all names for the other subshells, it is better 

32 # to generate them if we need to expand further, since the number of functions quickly 

33 # grows and there are both Cartesian and spherical variants to consider. 

34 # For D orbitals, the Cartesian functions are xx, yy, zz, xy, xz and yz, and the 

35 # spherical ones are called 3d0, 3d1-, 3d1+, 3d2- and 3d2+. For F orbitals, the Cartesians 

36 # are xxx, xxy, xxz, xyy, ... and the sphericals are 4f0, 4f1-, 4f+ and so on. 

37 for i, orb in enumerate(orbitals): 

38 

39 # Cartesian can be generated directly by combinations. 

40 cartesian = list(map(''.join, list(itertools.combinations_with_replacement(['x', 'y', 'z'], i+2)))) 

41 

42 # For spherical functions, we need to construct the names. 

43 pre = str(i+3) + orb.lower() 

44 spherical = [pre + '0'] + [pre + str(j) + s for j in range(1, i+3) for s in ['-', '+']] 

45 atomic_orbital_names[orb] = cartesian + spherical 

46 

47 return atomic_orbital_names 

48 

49 

50class Molpro(logfileparser.Logfile): 

51 """Molpro file parser""" 

52 

53 atomic_orbital_names = create_atomic_orbital_names(['D', 'F', 'G']) 

54 

55 def __init__(self, *args, **kwargs): 

56 # Call the __init__ method of the superclass 

57 super(Molpro, self).__init__(logname="Molpro", *args, **kwargs) 

58 

59 def __str__(self): 

60 """Return a string representation of the object.""" 

61 return "Molpro log file %s" % (self.filename) 

62 

63 def __repr__(self): 

64 """Return a representation of the object.""" 

65 return 'Molpro("%s")' % (self.filename) 

66 

67 def normalisesym(self, label): 

68 """Normalise the symmetries used by Molpro.""" 

69 ans = label.replace("`", "'").replace("``", "''") 

70 return ans 

71 

72 def before_parsing(self): 

73 

74 self.electronorbitals = "" 

75 self.insidescf = False 

76 

77 def after_parsing(self): 

78 

79 # If optimization thresholds are default, they are normally not printed and we need 

80 # to set them to the default after parsing. Make sure to set them in the same order that 

81 # they appear in the in the geometry optimization progress printed in the output, 

82 # namely: energy difference, maximum gradient, maximum step. 

83 if not hasattr(self, "geotargets"): 

84 self.geotargets = [] 

85 # Default THRENERG (required accuracy of the optimized energy). 

86 self.geotargets.append(1E-6) 

87 # Default THRGRAD (required accuracy of the optimized gradient). 

88 self.geotargets.append(3E-4) 

89 # Default THRSTEP (convergence threshold for the geometry optimization step). 

90 self.geotargets.append(3E-4) 

91 

92 def _parse_orbitals(self, inputfile, line): 

93 # From this block aonames, atombasis, moenergies and mocoeffs can be parsed. The data is 

94 # flipped compared to most programs (GAMESS, Gaussian), since the MOs are in rows. Also, Molpro 

95 # does not cut the table into parts, rather each MO row has as many lines as it takes ro print 

96 # all of the MO coefficients. Each row normally has 10 coefficients, although this can be less 

97 # for the last row and when symmetry is used (each irrep has its own block). 

98 # 

99 # ELECTRON ORBITALS 

100 # ================= 

101 # 

102 # 

103 # Orb Occ Energy Couls-En Coefficients 

104 # 

105 # 1 1s 1 1s 1 2px 1 2py 1 2pz 2 1s (...) 

106 # 3 1s 3 1s 3 2px 3 2py 3 2pz 4 1s (...) 

107 # (...) 

108 # 

109 # 1.1 2 -11.0351 -43.4915 0.701460 0.025696 -0.000365 -0.000006 0.000000 0.006922 (...) 

110 # -0.006450 0.004742 -0.001028 -0.002955 0.000000 -0.701460 (...) 

111 # (...) 

112 # 

113 # If an MCSCF calculation was performed, the natural orbitals 

114 # (coefficients and occupation numbers) are printed in a 

115 # format nearly identical to the ELECTRON ORBITALS section. 

116 # 

117 # NATURAL ORBITALS (state averaged) 

118 # ================================= 

119 # 

120 # Orb Occ Energy Coefficients 

121 # 

122 # 1 s 1 s 1 s 1 z 1 z 1 xx 1 yy 1 zz 2 s 2 s 

123 # 2 s 2 z 2 z 2 xx 2 yy 2 zz 3 s 3 s 3 z 3 y 

124 # 

125 # 1.1 2.00000 -20.678730 0.000141 -0.000057 0.001631 -0.001377 0.001117 0.000029 0.000293 -0.000852 1.000748 0.001746 

126 # -0.002552 -0.002005 0.001658 -0.001266 -0.001274 -0.001001 0.000215 -0.000131 -0.000242 -0.000126 

127 # 

128 # 2.1 2.00000 -11.322823 1.000682 0.004626 -0.000485 0.006634 -0.002096 -0.003072 -0.003282 -0.001724 -0.000181 0.006734 

129 # -0.002398 -0.000527 0.001335 0.000091 0.000058 0.000396 -0.003219 0.000981 0.000250 -0.000191 

130 # (...) 

131 

132 # The assigment of final cclib attributes is different for 

133 # canonical/natural orbitals. 

134 self.naturalorbitals = (line[1:17] == "NATURAL ORBITALS") 

135 # Make sure we didn't get here by mistake. 

136 assert line[1:18] == "ELECTRON ORBITALS" or self.electronorbitals or self.naturalorbitals 

137 

138 # For unrestricted calculations, ELECTRON ORBITALS is followed on the same line 

139 # by FOR POSITIVE SPIN or FOR NEGATIVE SPIN as appropriate. 

140 spin = (line[19:36] == "FOR NEGATIVE SPIN") or (self.electronorbitals[19:36] == "FOR NEGATIVE SPIN") 

141 

142 if self.naturalorbitals: 

143 self.skip_lines(inputfile, ['equals', 'b', 'headers', 'b']) 

144 else: 

145 if not self.electronorbitals: 

146 self.skip_line(inputfile, 'equals') 

147 self.skip_lines(inputfile, ['b', 'b', 'headers', 'b']) 

148 

149 aonames = [] 

150 atombasis = [[] for i in range(self.natom)] 

151 moenergies = [] 

152 # Use for both canonical and natural orbital coefficients. 

153 mocoeffs = [] 

154 occnos = [] 

155 line = next(inputfile) 

156 

157 # Besides a double blank line, stop when the next orbitals are encountered for unrestricted jobs 

158 # or if there are stars on the line which always signifies the end of the block. 

159 while line.strip() and (not "ORBITALS" in line) and (not set(line.strip()) == {'*'}): 

160 

161 # The function names are normally printed just once, but if symmetry is used then each irrep 

162 # has its own mocoeff block with a preceding list of names. 

163 is_aonames = line[:25].strip() == "" 

164 if is_aonames: 

165 

166 # We need to save this offset for parsing the coefficients later. 

167 offset = len(aonames) 

168 

169 aonum = len(aonames) 

170 while line.strip(): 

171 for s in line.split(): 

172 if s.isdigit(): 

173 atomno = int(s) 

174 atombasis[atomno-1].append(aonum) 

175 aonum += 1 

176 else: 

177 functype = s 

178 element = self.table.element[self.atomnos[atomno-1]] 

179 aoname = "%s%i_%s" % (element, atomno, functype) 

180 aonames.append(aoname) 

181 line = next(inputfile) 

182 

183 # Now there can be one or two blank lines. 

184 while not line.strip(): 

185 line = next(inputfile) 

186 

187 # Newer versions of Molpro (for example, 2012 test files) will print some 

188 # more things here, such as HOMO and LUMO, but these have less than 10 columns. 

189 if "HOMO" in line or "LUMO" in line: 

190 break 

191 

192 # End of the NATURAL ORBITALS section. 

193 if "Natural orbital dump" in line: 

194 break 

195 

196 # Now parse the MO coefficients, padding the list with an appropriate amount of zeros. 

197 coeffs = [0.0 for i in range(offset)] 

198 while line.strip() != "": 

199 if line[:31].rstrip(): 

200 tokens = line.split() 

201 moenergy = float(tokens[2]) 

202 moenergy = utils.convertor(moenergy, "hartree", "eV") 

203 moenergies.append(moenergy) 

204 if self.naturalorbitals: 

205 occno = float(tokens[1]) 

206 occnos.append(occno) 

207 

208 # Coefficients are in 10.6f format and splitting does not work since there are not 

209 # always spaces between them. If the numbers are very large, there will be stars. 

210 str_coeffs = line[31:] 

211 ncoeffs = len(str_coeffs) // 10 

212 coeff = [] 

213 for ic in range(ncoeffs): 

214 p = str_coeffs[ic*10:(ic+1)*10] 

215 try: 

216 c = float(p) 

217 except ValueError as detail: 

218 self.logger.warn("setting coeff element to zero: %s" % detail) 

219 c = 0.0 

220 coeff.append(c) 

221 coeffs.extend(coeff) 

222 line = next(inputfile) 

223 mocoeffs.append(coeffs) 

224 

225 # The loop should keep going until there is a double blank line, and there is 

226 # a single line between each coefficient block. 

227 line = next(inputfile) 

228 if not line.strip(): 

229 line = next(inputfile) 

230 

231 # If symmetry was used (offset was needed) then we will need to pad all MO vectors 

232 # up to nbasis for all irreps before the last one. 

233 if offset > 0: 

234 for im, m in enumerate(mocoeffs): 

235 if len(m) < self.nbasis: 

236 mocoeffs[im] = m + [0.0 for i in range(self.nbasis - len(m))] 

237 

238 self.set_attribute('atombasis', atombasis) 

239 self.set_attribute('aonames', aonames) 

240 

241 if self.naturalorbitals: 

242 # Consistent with current cclib conventions, keep only the 

243 # last possible set of natural orbital coefficients and 

244 # occupation numbers. 

245 self.nocoeffs = mocoeffs 

246 self.nooccnos = occnos 

247 else: 

248 # Consistent with current cclib conventions, reset moenergies/mocoeffs if they have been 

249 # previously parsed, since we want to produce only the final values. 

250 if not hasattr(self, "moenergies") or spin == 0: 

251 self.mocoeffs = [] 

252 self.moenergies = [] 

253 self.moenergies.append(moenergies) 

254 self.mocoeffs.append(mocoeffs) 

255 

256 # Check if last line begins the next ELECTRON ORBITALS section, because we already used 

257 # this line and need to know when this method is called next time. 

258 if line[1:18] == "ELECTRON ORBITALS": 

259 self.electronorbitals = line 

260 else: 

261 self.electronorbitals = "" 

262 

263 return 

264 

265 def extract(self, inputfile, line): 

266 """Extract information from the file object inputfile.""" 

267 

268 # Extract the package version number. The one associated with NAME may 

269 # be more specific. 

270 if line.strip()[:4] == "NAME": 

271 self.metadata["package_version"] = line.split()[-1] 

272 # Only take the less-specific version if it doesn't already exist. 

273 if "Version" in line: 

274 package_version = self.metadata.get("package_version") 

275 less_specific_package_version = line.split()[1] 

276 if not package_version: 

277 self.metadata["package_version"] = less_specific_package_version 

278 # ...but use it for the legacy (short) version. 

279 self.metadata["legacy_package_version"] = less_specific_package_version 

280 if "SHA1" in line: 

281 package_version = self.metadata.get("package_version") 

282 if package_version: 

283 self.metadata["package_version"] = ''.join([ 

284 package_version, "+", line.split()[-1] 

285 ]) 

286 

287 if line[1:19] == "ATOMIC COORDINATES": 

288 

289 if not hasattr(self, "atomcoords"): 

290 self.atomcoords = [] 

291 

292 atomcoords = [] 

293 atomnos = [] 

294 

295 self.skip_lines(inputfile, ['line', 'line', 'line']) 

296 

297 line = next(inputfile) 

298 while line.strip(): 

299 temp = line.strip().split() 

300 atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom") for x in temp[3:6]]) # bohrs to angs 

301 atomnos.append(int(round(float(temp[2])))) 

302 line = next(inputfile) 

303 

304 self.atomcoords.append(atomcoords) 

305 

306 self.set_attribute('atomnos', atomnos) 

307 self.set_attribute('natom', len(self.atomnos)) 

308 

309 # Use BASIS DATA to parse input for gbasis, aonames and atombasis. If symmetry is used, 

310 # the function number starts from 1 for each irrep (the irrep index comes after the dot). 

311 # 

312 # BASIS DATA 

313 # 

314 # Nr Sym Nuc Type Exponents Contraction coefficients 

315 # 

316 # 1.1 A 1 1s 71.616837 0.154329 

317 # 13.045096 0.535328 

318 # 3.530512 0.444635 

319 # 2.1 A 1 1s 2.941249 -0.099967 

320 # 0.683483 0.399513 

321 # ... 

322 # 

323 if line[1:11] == "BASIS DATA": 

324 

325 # We can do a sanity check with the header. 

326 self.skip_line(inputfile, 'blank') 

327 header = next(inputfile) 

328 assert header.split() == ["Nr", "Sym", "Nuc", "Type", "Exponents", "Contraction", "coefficients"] 

329 self.skip_line(inputfile, 'blank') 

330 

331 aonames = [] 

332 atombasis = [[] for i in range(self.natom)] 

333 gbasis = [[] for i in range(self.natom)] 

334 while line.strip(): 

335 

336 # We need to read the line at the start of the loop here, because the last function 

337 # will be added when a blank line signalling the end of the block is encountered. 

338 line = next(inputfile) 

339 

340 # The formatting here can exhibit subtle differences, including the number of spaces 

341 # or indentation size. However, we will rely on explicit slices since not all components 

342 # are always available. In fact, components not being there has some meaning (see below). 

343 line_nr = line[1:6].strip() 

344 line_sym = line[7:9].strip() 

345 line_nuc = line[11:15].strip() 

346 line_type = line[16:22].strip() 

347 line_exp = line[25:38].strip() 

348 line_coeffs = line[38:].strip() 

349 

350 # If a new function type is printed or the BASIS DATA block ends with a blank line, 

351 # then add the previous function to gbasis, except for the first function since 

352 # there was no preceeding one. When translating the Molpro function name to gbasis, 

353 # note that Molpro prints all components, but we want it only once, with the proper 

354 # shell type (S,P,D,F,G). Molpro names also differ between Cartesian/spherical representations. 

355 if (line_type and aonames) or line.strip() == "": 

356 

357 # All the possible AO names are created with the class. The function should always 

358 # find a match in that dictionary, so we can check for that here and will need to 

359 # update the dict if something unexpected comes up. 

360 funcbasis = None 

361 for fb, names in self.atomic_orbital_names.items(): 

362 if functype in names: 

363 funcbasis = fb 

364 assert funcbasis 

365 

366 # There is a separate basis function for each column of contraction coefficients. Since all 

367 # atomic orbitals for a subshell will have the same parameters, we can simply check if 

368 # the function tuple is already in gbasis[i] before adding it. 

369 for i in range(len(coefficients[0])): 

370 

371 func = (funcbasis, []) 

372 for j in range(len(exponents)): 

373 func[1].append((exponents[j], coefficients[j][i])) 

374 if func not in gbasis[funcatom-1]: 

375 gbasis[funcatom-1].append(func) 

376 

377 # If it is a new type, set up the variables for the next shell(s). An exception is symmetry functions, 

378 # which we want to copy from the previous function and don't have a new number on the line. For them, 

379 # we just want to update the nuclear index. 

380 if line_type: 

381 if line_nr: 

382 exponents = [] 

383 coefficients = [] 

384 functype = line_type 

385 funcatom = int(line_nuc) 

386 

387 # Add any exponents and coefficients to lists. 

388 if line_exp and line_coeffs: 

389 funcexp = float(line_exp) 

390 funccoeffs = [float(s) for s in line_coeffs.split()] 

391 exponents.append(funcexp) 

392 coefficients.append(funccoeffs) 

393 

394 # If the function number is present then add to atombasis and aonames, which is different from 

395 # adding to gbasis since it enumerates AOs rather than basis functions. The number counts functions 

396 # in each irrep from 1 and we could add up the functions for each irrep to get the global count, 

397 # but it is simpler to just see how many aonames we have already parsed. Any symmetry functions 

398 # are also printed, but they don't get numbers so they are nor parsed. 

399 if line_nr: 

400 element = self.table.element[self.atomnos[funcatom-1]] 

401 aoname = "%s%i_%s" % (element, funcatom, functype) 

402 aonames.append(aoname) 

403 funcnr = len(aonames) 

404 atombasis[funcatom-1].append(funcnr-1) 

405 

406 self.set_attribute('aonames', aonames) 

407 self.set_attribute('atombasis', atombasis) 

408 self.set_attribute('gbasis', gbasis) 

409 

410 if line[1:23] == "NUMBER OF CONTRACTIONS": 

411 nbasis = int(line.split()[3]) 

412 self.set_attribute('nbasis', nbasis) 

413 

414 # Basis set name 

415 if line[1:8] == "Library": 

416 self.metadata["basis_set"] = line.split()[4] 

417 

418 # This is used to signalize whether we are inside an SCF calculation. 

419 if line[1:8] == "PROGRAM" and line[14:18] == "-SCF": 

420 

421 self.insidescf = True 

422 self.metadata["methods"].append("HF") 

423 

424 # Use this information instead of 'SETTING ...', in case the defaults are standard. 

425 # Note that this is sometimes printed in each geometry optimization step. 

426 if line[1:20] == "NUMBER OF ELECTRONS": 

427 

428 spinup = int(line.split()[3][:-1]) 

429 spindown = int(line.split()[4][:-1]) 

430 # Nuclear charges (atomnos) should be parsed by now. 

431 nuclear = numpy.sum(self.atomnos) 

432 

433 charge = nuclear - spinup - spindown 

434 self.set_attribute('charge', charge) 

435 

436 mult = spinup - spindown + 1 

437 self.set_attribute('mult', mult) 

438 

439 # Convergenve thresholds for SCF cycle, should be contained in a line such as: 

440 # CONVERGENCE THRESHOLDS: 1.00E-05 (Density) 1.40E-07 (Energy) 

441 if self.insidescf and line[1:24] == "CONVERGENCE THRESHOLDS:": 

442 

443 if not hasattr(self, "scftargets"): 

444 self.scftargets = [] 

445 

446 scftargets = list(map(float, line.split()[2::2])) 

447 self.scftargets.append(scftargets) 

448 # Usually two criteria, but save the names this just in case. 

449 self.scftargetnames = line.split()[3::2] 

450 

451 # Read in the print out of the SCF cycle - for scfvalues. For RHF looks like: 

452 # ITERATION DDIFF GRAD ENERGY 2-EL.EN. DIPOLE MOMENTS DIIS 

453 # 1 0.000D+00 0.000D+00 -379.71523700 1159.621171 0.000000 0.000000 0.000000 0 

454 # 2 0.000D+00 0.898D-02 -379.74469736 1162.389787 0.000000 0.000000 0.000000 1 

455 # 3 0.817D-02 0.144D-02 -379.74635529 1162.041033 0.000000 0.000000 0.000000 2 

456 # 4 0.213D-02 0.571D-03 -379.74658063 1162.159929 0.000000 0.000000 0.000000 3 

457 # 5 0.799D-03 0.166D-03 -379.74660889 1162.144256 0.000000 0.000000 0.000000 4 

458 if self.insidescf and line[1:10] == "ITERATION": 

459 

460 if not hasattr(self, "scfvalues"): 

461 self.scfvalues = [] 

462 

463 line = next(inputfile) 

464 energy = 0.0 

465 scfvalues = [] 

466 while line.strip() != "": 

467 chomp = line.split() 

468 if chomp[0].isdigit(): 

469 

470 ddiff = float(chomp[1].replace('D', 'E')) 

471 grad = float(chomp[2].replace('D', 'E')) 

472 newenergy = float(chomp[3]) 

473 ediff = newenergy - energy 

474 energy = newenergy 

475 

476 # The convergence thresholds must have been read above. 

477 # Presently, we recognize MAX DENSITY and MAX ENERGY thresholds. 

478 numtargets = len(self.scftargetnames) 

479 values = [numpy.nan]*numtargets 

480 for n, name in zip(list(range(numtargets)), self.scftargetnames): 

481 if "ENERGY" in name.upper(): 

482 values[n] = ediff 

483 elif "DENSITY" in name.upper(): 

484 values[n] = ddiff 

485 scfvalues.append(values) 

486 

487 try: 

488 line = next(inputfile) 

489 except StopIteration: 

490 self.logger.warning('File terminated before end of last SCF! Last gradient: {}'.format(grad)) 

491 break 

492 self.scfvalues.append(numpy.array(scfvalues)) 

493 

494 if "dispersion correction" in line \ 

495 and line.strip() != "dispersion correction activated": 

496 dispersion = utils.convertor(float(line.split()[-1]), "hartree", "eV") 

497 self.append_attribute("dispersionenergies", dispersion) 

498 

499 # SCF result - RHF/UHF and DFT (RKS) energies. 

500 if (line[1:5] in ["!RHF", "!UHF", "!RKS"] and line[16:22].lower() == "energy"): 

501 

502 if not hasattr(self, "scfenergies"): 

503 self.scfenergies = [] 

504 scfenergy = float(line.split()[4]) 

505 self.scfenergies.append(utils.convertor(scfenergy, "hartree", "eV")) 

506 

507 # We are now done with SCF cycle (after a few lines). 

508 self.insidescf = False 

509 

510 # MP2 energies. 

511 if line[1:5] == "!MP2": 

512 

513 self.metadata["methods"].append("MP2") 

514 

515 if not hasattr(self, 'mpenergies'): 

516 self.mpenergies = [] 

517 mp2energy = float(line.split()[-1]) 

518 mp2energy = utils.convertor(mp2energy, "hartree", "eV") 

519 self.mpenergies.append([mp2energy]) 

520 

521 # MP2 energies if MP3 or MP4 is also calculated. 

522 if line[1:5] == "MP2:": 

523 

524 self.metadata["methods"].append("MP2") 

525 if not hasattr(self, 'mpenergies'): 

526 self.mpenergies = [] 

527 mp2energy = float(line.split()[2]) 

528 mp2energy = utils.convertor(mp2energy, "hartree", "eV") 

529 self.mpenergies.append([mp2energy]) 

530 

531 # MP3 (D) and MP4 (DQ or SDQ) energies. 

532 if line[1:8] == "MP3(D):": 

533 

534 self.metadata["methods"].append("MP3") 

535 mp3energy = float(line.split()[2]) 

536 mp2energy = utils.convertor(mp3energy, "hartree", "eV") 

537 line = next(inputfile) 

538 self.mpenergies[-1].append(mp2energy) 

539 if line[1:9] == "MP4(DQ):": 

540 self.metadata["methods"].append("MP4") 

541 mp4energy = float(line.split()[2]) 

542 line = next(inputfile) 

543 if line[1:10] == "MP4(SDQ):": 

544 self.metadata["methods"].append("MP4") 

545 mp4energy = float(line.split()[2]) 

546 mp4energy = utils.convertor(mp4energy, "hartree", "eV") 

547 self.mpenergies[-1].append(mp4energy) 

548 

549 # The CCSD program operates all closed-shel coupled cluster runs. 

550 if line[1:15] == "PROGRAM * CCSD": 

551 

552 self.metadata["methods"].append("CCSD") 

553 if not hasattr(self, "ccenergies"): 

554 self.ccenergies = [] 

555 while line[1:20] != "Program statistics:": 

556 if line[71:84] == "T1 diagnostic": 

557 self.metadata["t1_diagnostic"] = float(line.split()[-1]) 

558 # The last energy (most exact) will be read last and thus saved. 

559 if line[1:5] == "!CCD" or line[1:6] == "!CCSD" or line[1:9] == "!CCSD(T)": 

560 ccenergy = float(line.split()[-1]) 

561 ccenergy = utils.convertor(ccenergy, "hartree", "eV") 

562 line = next(inputfile) 

563 self.ccenergies.append(ccenergy) 

564 

565 # Read the occupancy (index of HOMO s). 

566 # For restricted calculations, there is one line here. For unrestricted, two: 

567 # Final alpha occupancy: ... 

568 # Final beta occupancy: ... 

569 if line[1:17] == "Final occupancy:": 

570 self.homos = [int(line.split()[-1])-1] 

571 if line[1:23] == "Final alpha occupancy:": 

572 self.homos = [int(line.split()[-1])-1] 

573 line = next(inputfile) 

574 self.homos.append(int(line.split()[-1])-1) 

575 

576 # Dipole is always printed on one line after the final RHF energy, and by default 

577 # it seems Molpro uses the origin as the reference point. 

578 if line.strip()[:13] == "Dipole moment": 

579 

580 assert line.split()[2] == "/Debye" 

581 

582 reference = [0.0, 0.0, 0.0] 

583 dipole = [float(d) for d in line.split()[-3:]] 

584 

585 if not hasattr(self, 'moments'): 

586 self.moments = [reference, dipole] 

587 else: 

588 self.moments[1] == dipole 

589 

590 # Static dipole polarizability. 

591 if line.strip() == "SCF dipole polarizabilities": 

592 if not hasattr(self, "polarizabilities"): 

593 self.polarizabilities = [] 

594 polarizability = [] 

595 self.skip_lines(inputfile, ['b', 'directions']) 

596 for _ in range(3): 

597 line = next(inputfile) 

598 polarizability.append(line.split()[1:]) 

599 self.polarizabilities.append(numpy.array(polarizability)) 

600 

601 # Check for ELECTRON ORBITALS (canonical molecular orbitals). 

602 if line[1:18] == "ELECTRON ORBITALS" or self.electronorbitals: 

603 self._parse_orbitals(inputfile, line) 

604 

605 

606 # If the MATROP program was called appropriately, 

607 # the atomic obital overlap matrix S is printed. 

608 # The matrix is printed straight-out, ten elements in each row, both halves. 

609 # Note that is the entire matrix is not printed, then aooverlaps 

610 # will not have dimensions nbasis x nbasis. 

611 if line[1:9] == "MATRIX S": 

612 

613 if not hasattr(self, "aooverlaps"): 

614 self.aooverlaps = [[]] 

615 

616 self.skip_lines(inputfile, ['b', 'symblocklabel']) 

617 

618 line = next(inputfile) 

619 while line.strip() != "": 

620 elements = [float(s) for s in line.split()] 

621 if len(self.aooverlaps[-1]) + len(elements) <= self.nbasis: 

622 self.aooverlaps[-1] += elements 

623 else: 

624 n = len(self.aooverlaps[-1]) + len(elements) - self.nbasis 

625 self.aooverlaps[-1] += elements[:-n] 

626 self.aooverlaps.append([]) 

627 self.aooverlaps[-1] += elements[-n:] 

628 line = next(inputfile) 

629 

630 # Check for MCSCF natural orbitals. 

631 if line[1:17] == "NATURAL ORBITALS": 

632 self._parse_orbitals(inputfile, line) 

633 

634 # Thresholds are printed only if the defaults are changed with GTHRESH. 

635 # In that case, we can fill geotargets with non-default values. 

636 # The block should look like this as of Molpro 2006.1: 

637 # THRESHOLDS: 

638 

639 # ZERO = 1.00D-12 ONEINT = 1.00D-12 TWOINT = 1.00D-11 PREFAC = 1.00D-14 LOCALI = 1.00D-09 EORDER = 1.00D-04 

640 # ENERGY = 0.00D+00 ETEST = 0.00D+00 EDENS = 0.00D+00 THRDEDEF= 1.00D-06 GRADIENT= 1.00D-02 STEP = 1.00D-03 

641 # ORBITAL = 1.00D-05 CIVEC = 1.00D-05 COEFF = 1.00D-04 PRINTCI = 5.00D-02 PUNCHCI = 9.90D+01 OPTGRAD = 3.00D-04 

642 # OPTENERG= 1.00D-06 OPTSTEP = 3.00D-04 THRGRAD = 2.00D-04 COMPRESS= 1.00D-11 VARMIN = 1.00D-07 VARMAX = 1.00D-03 

643 # THRDOUB = 0.00D+00 THRDIV = 1.00D-05 THRRED = 1.00D-07 THRPSP = 1.00D+00 THRDC = 1.00D-10 THRCS = 1.00D-10 

644 # THRNRM = 1.00D-08 THREQ = 0.00D+00 THRDE = 1.00D+00 THRREF = 1.00D-05 SPARFAC = 1.00D+00 THRDLP = 1.00D-07 

645 # THRDIA = 1.00D-10 THRDLS = 1.00D-07 THRGPS = 0.00D+00 THRKEX = 0.00D+00 THRDIS = 2.00D-01 THRVAR = 1.00D-10 

646 # THRLOC = 1.00D-06 THRGAP = 1.00D-06 THRLOCT = -1.00D+00 THRGAPT = -1.00D+00 THRORB = 1.00D-06 THRMLTP = 0.00D+00 

647 # THRCPQCI= 1.00D-10 KEXTA = 0.00D+00 THRCOARS= 0.00D+00 SYMTOL = 1.00D-06 GRADTOL = 1.00D-06 THROVL = 1.00D-08 

648 # THRORTH = 1.00D-08 GRID = 1.00D-06 GRIDMAX = 1.00D-03 DTMAX = 0.00D+00 

649 if line[1:12] == "THRESHOLDS": 

650 

651 self.skip_line(input, 'blank') 

652 

653 line = next(inputfile) 

654 while line.strip(): 

655 

656 if "OPTENERG" in line: 

657 start = line.find("OPTENERG") 

658 optenerg = line[start+10:start+20] 

659 if "OPTGRAD" in line: 

660 start = line.find("OPTGRAD") 

661 optgrad = line[start+10:start+20] 

662 if "OPTSTEP" in line: 

663 start = line.find("OPTSTEP") 

664 optstep = line[start+10:start+20] 

665 line = next(inputfile) 

666 

667 self.geotargets = [optenerg, optgrad, optstep] 

668 

669 # The optimization history is the source for geovlues: 

670 # 

671 # END OF GEOMETRY OPTIMIZATION. TOTAL CPU: 246.9 SEC 

672 # 

673 # ITER. ENERGY(OLD) ENERGY(NEW) DE GRADMAX GRADNORM GRADRMS STEPMAX STEPLEN STEPRMS 

674 # 1 -382.02936898 -382.04914450 -0.01977552 0.11354875 0.20127947 0.01183997 0.12972761 0.20171740 0.01186573 

675 # 2 -382.04914450 -382.05059234 -0.00144784 0.03299860 0.03963339 0.00233138 0.05577169 0.06687650 0.00393391 

676 # 3 -382.05059234 -382.05069136 -0.00009902 0.00694359 0.01069889 0.00062935 0.01654549 0.02016307 0.00118606 

677 # ... 

678 # 

679 # The above is an exerpt from Molpro 2006, but it is a little bit different 

680 # for Molpro 2012, namely the 'END OF GEOMETRY OPTIMIZATION occurs after the 

681 # actual history list. It seems there is a another consistent line before the 

682 # history, but this might not be always true -- so this is a potential weak link. 

683 if line[1:30] == "END OF GEOMETRY OPTIMIZATION." or line.strip() == "Quadratic Steepest Descent - Minimum Search": 

684 

685 # I think this is the trigger for convergence, and it shows up at the top in Molpro 2006. 

686 geometry_converged = line[1:30] == "END OF GEOMETRY OPTIMIZATION." 

687 

688 self.skip_line(inputfile, 'blank') 

689 

690 # Newer version of Molpro (at least for 2012) print and additional column 

691 # with the timing information for each step. Otherwise, the history looks the same. 

692 headers = next(inputfile).split() 

693 if not len(headers) in (10, 11): 

694 return 

695 

696 # Although criteria can be changed, the printed format should not change. 

697 # In case it does, retrieve the columns for each parameter. 

698 index_ITER = headers.index('ITER.') 

699 index_THRENERG = headers.index('DE') 

700 index_THRGRAD = headers.index('GRADMAX') 

701 index_THRSTEP = headers.index('STEPMAX') 

702 

703 line = next(inputfile) 

704 self.geovalues = [] 

705 while line.strip(): 

706 

707 line = line.split() 

708 istep = int(line[index_ITER]) 

709 

710 geovalues = [] 

711 geovalues.append(float(line[index_THRENERG])) 

712 geovalues.append(float(line[index_THRGRAD])) 

713 geovalues.append(float(line[index_THRSTEP])) 

714 self.geovalues.append(geovalues) 

715 line = next(inputfile) 

716 if line.strip() == "Freezing grid": 

717 line = next(inputfile) 

718 

719 # The convergence trigger shows up somewhere at the bottom in Molpro 2012, 

720 # before the final stars. If convergence is not reached, there is an additional 

721 # line that can be checked for. This is a little tricky, though, since it is 

722 # not the last line... so bail out of the loop if convergence failure is detected. 

723 while "*****" not in line: 

724 line = next(inputfile) 

725 if line.strip() == "END OF GEOMETRY OPTIMIZATION.": 

726 geometry_converged = True 

727 if "No convergence" in line: 

728 geometry_converged = False 

729 break 

730 

731 # Finally, deal with optdone, append the last step to it only if we had convergence. 

732 if not hasattr(self, 'optdone'): 

733 self.optdone = [] 

734 if geometry_converged: 

735 self.optdone.append(istep-1) 

736 

737 # This block should look like this: 

738 # Normal Modes 

739 # 

740 # 1 Au 2 Bu 3 Ag 4 Bg 5 Ag 

741 # Wavenumbers [cm-1] 151.81 190.88 271.17 299.59 407.86 

742 # Intensities [km/mol] 0.33 0.28 0.00 0.00 0.00 

743 # Intensities [relative] 0.34 0.28 0.00 0.00 0.00 

744 # CX1 0.00000 -0.01009 0.02577 0.00000 0.06008 

745 # CY1 0.00000 -0.05723 -0.06696 0.00000 0.06349 

746 # CZ1 -0.02021 0.00000 0.00000 0.11848 0.00000 

747 # CX2 0.00000 -0.01344 0.05582 0.00000 -0.02513 

748 # CY2 0.00000 -0.06288 -0.03618 0.00000 0.00349 

749 # CZ2 -0.05565 0.00000 0.00000 0.07815 0.00000 

750 # ... 

751 # Molpro prints low frequency modes in a subsequent section with the same format, 

752 # which also contains zero frequency modes, with the title: 

753 # Normal Modes of low/zero frequencies 

754 if line[1:13] == "Normal Modes": 

755 

756 islow = (line[1:37] == "Normal Modes of low/zero frequencies") 

757 

758 self.skip_line(inputfile, 'blank') 

759 

760 # Each portion of five modes is followed by a single blank line. 

761 # The whole block is followed by an additional blank line. 

762 line = next(inputfile) 

763 while line.strip(): 

764 

765 if line[1:25].isspace(): 

766 if not islow: # vibsyms not printed for low freq modes 

767 numbers = list(map(int, line.split()[::2])) 

768 vibsyms = line.split()[1::2] 

769 else: 

770 # give low freq modes an empty str as vibsym 

771 # note there could be other possibilities.. 

772 numbers = list(map(int, line.split())) 

773 vibsyms = ['']*len(numbers) 

774 

775 if line[1:12] == "Wavenumbers": 

776 vibfreqs = list(map(float, line.strip().split()[2:])) 

777 

778 if line[1:21] == "Intensities [km/mol]": 

779 vibirs = list(map(float, line.strip().split()[2:])) 

780 

781 # There should always by 3xnatom displacement rows. 

782 if line[1:11].isspace() and line[13:25].strip().isdigit(): 

783 

784 # There are a maximum of 5 modes per line. 

785 nmodes = len(line.split())-1 

786 

787 vibdisps = [] 

788 for i in range(nmodes): 

789 vibdisps.append([]) 

790 for n in range(self.natom): 

791 vibdisps[i].append([]) 

792 for i in range(nmodes): 

793 disp = float(line.split()[i+1]) 

794 vibdisps[i][0].append(disp) 

795 for i in range(self.natom*3 - 1): 

796 line = next(inputfile) 

797 iatom = (i+1)//3 

798 for i in range(nmodes): 

799 disp = float(line.split()[i+1]) 

800 vibdisps[i][iatom].append(disp) 

801 

802 line = next(inputfile) 

803 if not line.strip(): 

804 

805 if not hasattr(self, "vibfreqs"): 

806 self.vibfreqs = [] 

807 if not hasattr(self, "vibsyms"): 

808 self.vibsyms = [] 

809 if not hasattr(self, "vibirs") and "vibirs" in dir(): 

810 self.vibirs = [] 

811 if not hasattr(self, "vibdisps") and "vibdisps" in dir(): 

812 self.vibdisps = [] 

813 

814 if not islow: 

815 self.vibfreqs.extend(vibfreqs) 

816 self.vibsyms.extend(vibsyms) 

817 if "vibirs" in dir(): 

818 self.vibirs.extend(vibirs) 

819 if "vibdisps" in dir(): 

820 self.vibdisps.extend(vibdisps) 

821 else: 

822 nonzero = [f > 0 for f in vibfreqs] 

823 vibfreqs = [f for f in vibfreqs if f > 0] 

824 self.vibfreqs = vibfreqs + self.vibfreqs 

825 vibsyms = [vibsyms[i] for i in range(len(vibsyms)) if nonzero[i]] 

826 self.vibsyms = vibsyms + self.vibsyms 

827 if "vibirs" in dir(): 

828 vibirs = [vibirs[i] for i in range(len(vibirs)) if nonzero[i]] 

829 self.vibirs = vibirs + self.vibirs 

830 if "vibdisps" in dir(): 

831 vibdisps = [vibdisps[i] for i in range(len(vibdisps)) if nonzero[i]] 

832 self.vibdisps = vibdisps + self.vibdisps 

833 

834 line = next(inputfile) 

835 

836 if line[1:16] == "Force Constants": 

837 

838 hessian = [] 

839 line = next(inputfile) 

840 hess = [] 

841 tmp = [] 

842 

843 while line.strip(): 

844 try: 

845 list(map(float, line.strip().split()[2:])) 

846 except: 

847 line = next(inputfile) 

848 line.strip().split()[1:] 

849 hess.extend([list(map(float, line.strip().split()[1:]))]) 

850 line = next(inputfile) 

851 lig = 0 

852 

853 while (lig == 0) or (len(hess[0]) > 1): 

854 tmp.append(hess.pop(0)) 

855 lig += 1 

856 k = 5 

857 

858 while len(hess) != 0: 

859 tmp[k] += hess.pop(0) 

860 k += 1 

861 if (len(tmp[k-1]) == lig): 

862 break 

863 if k >= lig: 

864 k = len(tmp[-1]) 

865 for l in tmp: 

866 hessian += l 

867 

868 self.set_attribute("hessian", hessian) 

869 

870 if line[1:14] == "Atomic Masses" and hasattr(self, "hessian"): 

871 

872 line = next(inputfile) 

873 self.amass = list(map(float, line.strip().split()[2:])) 

874 

875 while line.strip(): 

876 line = next(inputfile) 

877 self.amass += list(map(float, line.strip().split()[2:])) 

878 

879 #1PROGRAM * POP (Mulliken population analysis) 

880 # 

881 # 

882 # Density matrix read from record 2100.2 Type=RHF/CHARGE (state 1.1) 

883 # 

884 # Population analysis by basis function type 

885 # 

886 # Unique atom s p d f g Total Charge 

887 # 2 C 3.11797 2.88497 0.00000 0.00000 0.00000 6.00294 - 0.00294 

888 # 3 C 3.14091 2.91892 0.00000 0.00000 0.00000 6.05984 - 0.05984 

889 # ... 

890 if line.strip() == "1PROGRAM * POP (Mulliken population analysis)": 

891 

892 self.skip_lines(inputfile, ['b', 'b', 'density_source', 'b', 'func_type', 'b']) 

893 

894 header = next(inputfile) 

895 icharge = header.split().index('Charge') 

896 

897 charges = [] 

898 line = next(inputfile) 

899 while line.strip(): 

900 cols = line.split() 

901 charges.append(float(cols[icharge]+cols[icharge+1])) 

902 line = next(inputfile) 

903 

904 if not hasattr(self, "atomcharges"): 

905 self.atomcharges = {} 

906 self.atomcharges['mulliken'] = charges 

907 

908 if 'GRADIENT FOR STATE' in line: 

909 for _ in range(3): 

910 next(inputfile) 

911 grad = [] 

912 lines_read = 0 

913 while lines_read < self.natom: 

914 line = next(inputfile) 

915 # Because molpro inserts an empty line every 50th atom. 

916 if line: 

917 grad.append([float(x) for x in line.split()[1:]]) 

918 lines_read += 1 

919 if not hasattr(self, 'grads'): 

920 self.grads = [] 

921 self.grads.append(grad) 

922 

923 if line[:25] == ' Variable memory released': 

924 self.metadata['success'] = True