Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2# 

3# Copyright (c) 2020, the cclib development team 

4# 

5# This file is part of cclib (http://cclib.github.io) and is distributed under 

6# the terms of the BSD 3-Clause License. 

7 

8"""Parser for GAMESS(US) output files""" 

9 

10 

11import re 

12 

13import numpy 

14 

15 

16from cclib.parser import logfileparser 

17from cclib.parser import utils 

18 

19 

20class GAMESS(logfileparser.Logfile): 

21 """A GAMESS/Firefly log file.""" 

22 

23 # Used to index self.scftargets[]. 

24 SCFRMS, SCFMAX, SCFENERGY = list(range(3)) 

25 

26 # Used to extact Dunning basis set names. 

27 dunningbas = {'CCD': 'cc-pVDZ', \ 

28 'CCT': 'cc-pVTZ', \ 

29 'CCQ': 'cc-pVQZ', \ 

30 'CC5': 'cc-pV5Z', \ 

31 'CC6': 'cc-pV6Z', \ 

32 'ACCD': 'aug-cc-pVDZ', \ 

33 'ACCT': 'aug-cc-pVTZ', \ 

34 'ACCQ': 'aug-cc-pVQZ', \ 

35 'ACC5': 'aug-cc-pV5Z', \ 

36 'ACC6': 'aug-cc-pV6Z', \ 

37 'CCDC': 'cc-pCVDZ', \ 

38 'CCTC': 'cc-pCVTZ', \ 

39 'CCQC': 'cc-pCVQZ', \ 

40 'CC5C': 'cc-pCV5Z', \ 

41 'CC6C': 'cc-pCV6Z', \ 

42 'ACCDC': 'aug-cc-pCVDZ', \ 

43 'ACCTC': 'aug-cc-pCVTZ', \ 

44 'ACCQC': 'aug-cc-pCVQZ', \ 

45 'ACC5C': 'aug-cc-pCV5Z', \ 

46 'ACC6C': 'aug-cc-pCV6Z'} 

47 

48 def __init__(self, *args, **kwargs): 

49 

50 # Call the __init__ method of the superclass 

51 super(GAMESS, self).__init__(logname="GAMESS", *args, **kwargs) 

52 

53 def __str__(self): 

54 """Return a string representation of the object.""" 

55 return "GAMESS log file %s" % (self.filename) 

56 

57 def __repr__(self): 

58 """Return a representation of the object.""" 

59 return 'GAMESS("%s")' % (self.filename) 

60 

61 def normalisesym(self, label): 

62 """Normalise the symmetries used by GAMESS. 

63 

64 To normalise, two rules need to be applied: 

65 (1) Occurences of U/G in the 2/3 position of the label 

66 must be lower-cased 

67 (2) Two single quotation marks must be replaced by a double 

68 """ 

69 

70 if label[1:] == "''": 

71 end = '"' 

72 else: 

73 end = label[1:].replace("U", "u").replace("G", "g") 

74 return label[0] + end 

75 

76 def before_parsing(self): 

77 

78 self.firststdorient = True # Used to decide whether to wipe the atomcoords clean 

79 self.cihamtyp = "none" # Type of CI Hamiltonian: saps or dets. 

80 self.scftype = "none" # Type of SCF calculation: BLYP, RHF, ROHF, etc. 

81 

82 def extract(self, inputfile, line): 

83 """Extract information from the file object inputfile.""" 

84 

85 # Extract the version number. If the calculation is from 

86 # Firefly, its version number comes before a line that looks 

87 # like the normal GAMESS version number... 

88 if "Firefly version" in line: 

89 match = re.search(r"Firefly version\s([\d.]*)\D*(\d*)\s*\*", line) 

90 if match: 

91 base_version, build = match.groups() 

92 package_version = "{}+{}".format(base_version, build) 

93 self.metadata["package_version"] = package_version 

94 self.metadata["legacy_package_version"] = base_version 

95 if "GAMESS VERSION =" in line: 

96 # ...so avoid overwriting it if Firefly already set this field. 

97 if "package_version" not in self.metadata: 

98 tokens = line.split() 

99 day, month, year = tokens[4:7] 

100 possible_release = tokens[-2] 

101 # There may not be a (Rn) for the nth release that year, in 

102 # which case this index is the same as 7 (the year). 

103 if possible_release == year: 

104 release = "1" 

105 else: 

106 # `(R23)` -> 23 

107 release = possible_release[2:-1] 

108 self.metadata["package_version"] = '{}.r{}'.format(year, release) 

109 self.metadata["legacy_package_version"] = "{}R{}".format(year, release) 

110 

111 if line[1:12] == "INPUT CARD>": 

112 return 

113 

114 # extract the methods 

115 if line[1:7] == "SCFTYP": 

116 method = line.split()[0][7:] 

117 if len(self.metadata["methods"]) == 0: 

118 self.metadata["methods"].append(method) 

119 

120 # extract the basis set name 

121 if line[5:11] == "GBASIS": 

122 basnm1 = line.split()[0][7:] 

123 if basnm1 in self.dunningbas: 

124 self.metadata["basis_set"] = self.dunningbas[basnm1] 

125 else: 

126 if basnm1 == "PM3" or basnm1 == "AM1": 

127 self.metadata["methods"].append(basnm1) 

128 if basnm1 == "STO" : 

129 if line.split()[2] == "2": 

130 self.metadata["basis_set"] = "STO-2G" 

131 elif line.split()[2] == "3": 

132 self.metadata["basis_set"] = "STO-3G" 

133 elif line.split()[2] == "4": 

134 self.metadata["basis_set"] = "STO-4G" 

135 elif line.split()[2] == "5": 

136 self.metadata["basis_set"] = "STO-5G" 

137 if basnm1 == "N21" : 

138 if line.split()[2] == "3" and line.split()[3] == "POLAR=COMMON": 

139 self.metadata["basis_set"] = "3-21G*" 

140 if line.split()[2] == "3" and line.split()[3] == "POLAR=NONE": 

141 self.metadata["basis_set"] = "3-21G" 

142 if line.split()[2] == "4" and line.split()[3] == "POLAR=NONE": 

143 self.metadata["basis_set"] = "4-21G" 

144 if line.split()[2] == "6" and line.split()[3] == "POLAR=NONE": 

145 self.metadata["basis_set"] = "6-21G" 

146 if basnm1 == "N31" : 

147 if line.split()[2] == "6" and (line.split()[3] == "POLAR=POPN31" \ 

148 or line.split()[3] == "POLAR=POPLE"): 

149 self.metadata["basis_set"] = "6-31G*" 

150 line = next(inputfile) 

151 if line.split()[-1] == "T": 

152 self.metadata["basis_set"] = "6-31+G*" 

153 line = next(inputfile) 

154 if line.split()[1] == "0" and line.split()[3] == "T": 

155 self.metadata["basis_set"] = "6-31++G*" 

156 if line.split()[1] == "1" and line.split()[3] == "T": 

157 self.metadata["basis_set"] = "6-31++G**" 

158 else: 

159 line = next(inputfile) 

160 if line.split()[1] == "1": #NPFUNC = 1 

161 self.metadata["basis_set"] = "6-31G**" 

162 if line.split()[2] == "6" and line.split()[3] == "POLAR=NONE": 

163 self.metadata["basis_set"] = "6-31G" 

164 if line.split()[2] == "4" and line.split()[3] == "POLAR=NONE": 

165 self.metadata["basis_set"] = "4-31G" 

166 if line.split()[2] == "4" and line.split()[3] == "POLAR=POPN31": 

167 self.metadata["basis_set"] = "4-31G*" 

168 if basnm1 == "N311" : 

169 if line.split()[2] == "6" and line.split()[3] == "POLAR=POPN311": 

170 self.metadata["basis_set"] = "6-311G*" 

171 line = next(inputfile) 

172 if line.split()[-1] == "T": 

173 self.metadata["basis_set"] = "6-311+G*" 

174 line = next(inputfile) 

175 if line.split()[1] == "0" and line.split()[3] == "T": 

176 self.metadata["basis_set"] = "6-311++G*" 

177 if line.split()[1] == "1" and line.split()[3] == "T": 

178 self.metadata["basis_set"] = "6-311++G**" 

179 else: 

180 line = next(inputfile) 

181 if line.split()[1] == "1": #NPFUNC = 1 

182 self.metadata["basis_set"] = "6-311G**" 

183 if line.split()[2] == "6" and line.split()[3] == "POLAR=NONE": 

184 self.metadata["basis_set"] = "6-311G" 

185 

186 # We are looking for this line: 

187 # PARAMETERS CONTROLLING GEOMETRY SEARCH ARE 

188 # ... 

189 # OPTTOL = 1.000E-04 RMIN = 1.500E-03 

190 if line[10:18] == "OPTTOL =": 

191 if not hasattr(self, "geotargets"): 

192 opttol = float(line.split()[2]) 

193 self.geotargets = numpy.array([opttol, 3. / opttol], "d") 

194 

195 # Has to deal with such lines as: 

196 # FINAL R-B3LYP ENERGY IS -382.0507446475 AFTER 10 ITERATIONS 

197 # FINAL ENERGY IS -379.7594673378 AFTER 9 ITERATIONS 

198 # ...so take the number after the "IS" 

199 if line.find("FINAL") == 1: 

200 if not hasattr(self, "scfenergies"): 

201 self.scfenergies = [] 

202 temp = line.split() 

203 self.scfenergies.append(utils.convertor(float(temp[temp.index("IS") + 1]), "hartree", "eV")) 

204 # Empirical dispersion: first is GAMESS-US, second is Firefly 

205 if any( 

206 line.find(dispersion_trigger) == 1 

207 for dispersion_trigger in ( 

208 "GRIMME'S DISPERSION ENERGY", "Dispersion correction to total energy" 

209 ) 

210 ): 

211 dispersion = utils.convertor(float(line.split()[-1]), "hartree", "eV") 

212 self.append_attribute("dispersionenergies", dispersion) 

213 

214 # For total energies after Moller-Plesset corrections, the output looks something like this: 

215 # 

216 # RESULTS OF MOLLER-PLESSET 2ND ORDER CORRECTION ARE 

217 # E(0)= -285.7568061536 

218 # E(1)= 0.0 

219 # E(2)= -0.9679419329 

220 # E(MP2)= -286.7247480864 

221 # where E(MP2) = E(0) + E(2) 

222 # 

223 # With GAMESS-US 12 Jan 2009 (R3), the preceding text is different: 

224 # DIRECT 4-INDEX TRANSFORMATION 

225 # SCHWARZ INEQUALITY TEST SKIPPED 0 INTEGRAL BLOCKS 

226 # E(SCF)= -76.0088477471 

227 # E(2)= -0.1403745370 

228 # E(MP2)= -76.1492222841 

229 # 

230 # With GAMESS-US 20 APR 2017 (R1), the following block may be present: 

231 # SCHWARZ INEQUALITY TEST SKIPPED 0 INTEGRAL BLOCKS 

232 # ... END OF INTEGRAL TRANSFORMATION ... 

233 

234 if line.find("RESULTS OF MOLLER-PLESSET") >= 0 or line[6:37] == "SCHWARZ INEQUALITY TEST SKIPPED": 

235 

236 if not hasattr(self, "mpenergies"): 

237 self.mpenergies = [] 

238 

239 line = next(inputfile) 

240 # Each iteration has a new print-out 

241 if "END OF INTEGRAL TRANSFORMATION" not in line: 

242 self.mpenergies.append([]) 

243 

244 # GAMESS-US presently supports only second order corrections (MP2) 

245 # PC GAMESS also has higher levels (3rd and 4th), with different output 

246 # Only the highest level MP4 energy is gathered (SDQ or SDTQ) 

247 # Loop breaks when substring "DONE WITH MPn ENERGY" is encountered, 

248 # where n=2, 3 or 4. 

249 while "DONE WITH MP" not in line: 

250 

251 if len(line.split()) > 0: 

252 # Only up to MP2 correction 

253 if line.split()[0] == "E(MP2)=": 

254 self.metadata["methods"].append("MP2") 

255 mp2energy = float(line.split()[1]) 

256 self.mpenergies[-1].append(utils.convertor(mp2energy, "hartree", "eV")) 

257 # MP2 before higher order calculations 

258 if line.split()[0] == "E(MP2)": 

259 mp2energy = float(line.split()[2]) 

260 self.mpenergies[-1].append(utils.convertor(mp2energy, "hartree", "eV")) 

261 if line.split()[0] == "E(MP3)": 

262 self.metadata["methods"].append("MP3") 

263 mp3energy = float(line.split()[2]) 

264 self.mpenergies[-1].append(utils.convertor(mp3energy, "hartree", "eV")) 

265 if line.split()[0] in ["E(MP4-SDQ)", "E(MP4-SDTQ)"]: 

266 self.metadata["methods"].append("MP4") 

267 mp4energy = float(line.split()[2]) 

268 self.mpenergies[-1].append(utils.convertor(mp4energy, "hartree", "eV")) 

269 line = next(inputfile) 

270 

271 # Total energies after Coupled Cluster calculations 

272 # Only the highest Coupled Cluster level result is gathered 

273 if line[12:23] == "CCD ENERGY:": 

274 self.metadata["methods"].append("CCD") 

275 if not hasattr(self, "ccenergies"): 

276 self.ccenergies = [] 

277 ccenergy = float(line.split()[2]) 

278 self.ccenergies.append(utils.convertor(ccenergy, "hartree", "eV")) 

279 if line.find("CCSD") >= 0 and line.split()[0:2] == ["CCSD", "ENERGY:"]: 

280 self.metadata["methods"].append("CCSD") 

281 if not hasattr(self, "ccenergies"): 

282 self.ccenergies = [] 

283 ccenergy = float(line.split()[2]) 

284 line = next(inputfile) 

285 if line[8:23] == "CCSD[T] ENERGY:": 

286 self.metadata["methods"].append("CCSD[T]") 

287 ccenergy = float(line.split()[2]) 

288 line = next(inputfile) 

289 if line[8:23] == "CCSD(T) ENERGY:": 

290 self.metadata["methods"].append("CCSD(T)") 

291 ccenergy = float(line.split()[2]) 

292 self.ccenergies.append(utils.convertor(ccenergy, "hartree", "eV")) 

293 

294 if "T1 DIAGNOSTIC" in line: 

295 self.metadata["t1_diagnostic"] = float(line.split()[3]) 

296 

297 # Also collect MP2 energies, which are always calculated before CC 

298 if line[8:23] == "MBPT(2) ENERGY:": 

299 if not hasattr(self, "mpenergies"): 

300 self.mpenergies = [] 

301 self.mpenergies.append([]) 

302 mp2energy = float(line.split()[2]) 

303 self.mpenergies[-1].append(utils.convertor(mp2energy, "hartree", "eV")) 

304 

305 # Extract charge and multiplicity 

306 if line[1:19] == "CHARGE OF MOLECULE": 

307 

308 charge = int(round(float(line.split()[-1]))) 

309 self.set_attribute('charge', charge) 

310 

311 line = next(inputfile) 

312 mult = int(line.split()[-1]) 

313 self.set_attribute('mult', mult) 

314 

315 # Electronic transitions (etenergies) for CIS runs and TD-DFT, which 

316 # have very similar outputs. The outputs EOM look very differentm, though. 

317 # 

318 # --------------------------------------------------------------------- 

319 # CI-SINGLES EXCITATION ENERGIES 

320 # STATE HARTREE EV KCAL/MOL CM-1 NM 

321 # --------------------------------------------------------------------- 

322 # 1A'' 0.1677341781 4.5643 105.2548 36813.40 271.64 

323 # ... 

324 if re.match("(CI-SINGLES|TDDFT) EXCITATION ENERGIES", line.strip()): 

325 

326 if not hasattr(self, "etenergies"): 

327 self.etenergies = [] 

328 

329 get_etosc = False 

330 header = next(inputfile).rstrip() 

331 if header.endswith("OSC. STR."): 

332 # water_cis_dets.out does not have the oscillator strength 

333 # in this table...it is extracted from a different section below 

334 get_etosc = True 

335 self.etoscs = [] 

336 

337 self.skip_line(inputfile, 'dashes') 

338 

339 line = next(inputfile) 

340 broken = line.split() 

341 while len(broken) > 0: 

342 

343 # Take hartree value with more numbers, and convert. 

344 # Note that the values listed after this are also less exact! 

345 etenergy = float(broken[1]) 

346 self.etenergies.append(utils.convertor(etenergy, "hartree", "wavenumber")) 

347 if get_etosc: 

348 etosc = float(broken[-1]) 

349 self.etoscs.append(etosc) 

350 broken = next(inputfile).split() 

351 

352 # Detect the CI hamiltonian type, if applicable. 

353 # Should always be detected if CIS is done. 

354 if line[8:64] == "RESULTS FROM SPIN-ADAPTED ANTISYMMETRIZED PRODUCT (SAPS)": 

355 self.cihamtyp = "saps" 

356 if line[8:64] == "RESULTS FROM DETERMINANT BASED ATOMIC ORBITAL CI-SINGLES": 

357 self.cihamtyp = "dets" 

358 

359 # etsecs (used only for CIS runs for now) 

360 if line[1:14] == "EXCITED STATE": 

361 if not hasattr(self, 'etsecs'): 

362 self.etsecs = [] 

363 if not hasattr(self, 'etsyms'): 

364 self.etsyms = [] 

365 statenumber = int(line.split()[2]) 

366 spin = int(float(line.split()[7])) 

367 if spin == 0: 

368 sym = "Singlet" 

369 if spin == 1: 

370 sym = "Triplet" 

371 sym += '-' + line.split()[-1] 

372 self.etsyms.append(sym) 

373 # skip 5 lines 

374 for i in range(5): 

375 line = next(inputfile) 

376 line = next(inputfile) 

377 CIScontribs = [] 

378 while line.strip()[0] != "-": 

379 MOtype = 0 

380 # alpha/beta are specified for hamtyp=dets 

381 if self.cihamtyp == "dets": 

382 if line.split()[0] == "BETA": 

383 MOtype = 1 

384 fromMO = int(line.split()[-3])-1 

385 toMO = int(line.split()[-2])-1 

386 coeff = float(line.split()[-1]) 

387 # With the SAPS hamiltonian, the coefficients are multiplied 

388 # by sqrt(2) so that they normalize to 1. 

389 # With DETS, both alpha and beta excitations are printed. 

390 # if self.cihamtyp == "saps": 

391 # coeff /= numpy.sqrt(2.0) 

392 CIScontribs.append([(fromMO, MOtype), (toMO, MOtype), coeff]) 

393 line = next(inputfile) 

394 self.etsecs.append(CIScontribs) 

395 

396 # etoscs (used only for CIS runs now) 

397 if line[1:50] == "TRANSITION FROM THE GROUND STATE TO EXCITED STATE": 

398 if not hasattr(self, "etoscs"): 

399 self.etoscs = [] 

400 

401 # This was the suggested as a fix in issue #61, and it does allow 

402 # the parser to finish without crashing. However, it seems that 

403 # etoscs is shorter in this case than the other transition attributes, 

404 # so that should be somehow corrected and tested for. 

405 if "OPTICALLY" in line: 

406 pass 

407 else: 

408 statenumber = int(line.split()[-1]) 

409 # skip 7 lines 

410 for i in range(8): 

411 line = next(inputfile) 

412 strength = float(line.split()[3]) 

413 self.etoscs.append(strength) 

414 

415 # TD-DFT for GAMESS-US. 

416 # The format for excitations has changed a bit between 2007 and 2012. 

417 # Original format parser was written for: 

418 # 

419 # ------------------- 

420 # TRIPLET EXCITATIONS 

421 # ------------------- 

422 # 

423 # STATE # 1 ENERGY = 3.027228 EV 

424 # OSCILLATOR STRENGTH = 0.000000 

425 # DRF COEF OCC VIR 

426 # --- ---- --- --- 

427 # 35 -1.105383 35 -> 36 

428 # 69 -0.389181 34 -> 37 

429 # 103 -0.405078 33 -> 38 

430 # 137 0.252485 32 -> 39 

431 # 168 -0.158406 28 -> 40 

432 # 

433 # STATE # 2 ENERGY = 4.227763 EV 

434 # ... 

435 # 

436 # Here is the corresponding 2012 version: 

437 # 

438 # ------------------- 

439 # TRIPLET EXCITATIONS 

440 # ------------------- 

441 # 

442 # STATE # 1 ENERGY = 3.027297 EV 

443 # OSCILLATOR STRENGTH = 0.000000 

444 # LAMBDA DIAGNOSTIC = 0.925 (RYDBERG/CHARGE TRANSFER CHARACTER) 

445 # SYMMETRY OF STATE = A 

446 # EXCITATION DE-EXCITATION 

447 # OCC VIR AMPLITUDE AMPLITUDE 

448 # I A X(I->A) Y(A->I) 

449 # --- --- -------- -------- 

450 # 35 36 -0.929190 -0.176167 

451 # 34 37 -0.279823 -0.109414 

452 # ... 

453 # 

454 # We discern these two by the presence of the arrow in the old version. 

455 # 

456 # The "LET EXCITATIONS" pattern used below catches both 

457 # singlet and triplet excitations output. 

458 if line[14:29] == "LET EXCITATIONS": 

459 

460 self.etenergies = [] 

461 self.etoscs = [] 

462 self.etsecs = [] 

463 etsyms = [] 

464 

465 self.skip_lines(inputfile, ['d', 'b']) 

466 

467 # Loop while states are still being printed. 

468 line = next(inputfile) 

469 while line[1:6] == "STATE": 

470 

471 self.updateprogress(inputfile, "Excited States") 

472 

473 etenergy = utils.convertor(float(line.split()[-2]), "eV", "wavenumber") 

474 etoscs = float(next(inputfile).split()[-1]) 

475 self.etenergies.append(etenergy) 

476 self.etoscs.append(etoscs) 

477 

478 # Symmetry is not always present, especially in old versions. 

479 # Newer versions, on the other hand, can also provide a line 

480 # with lambda diagnostic and some extra headers. 

481 line = next(inputfile) 

482 if "LAMBDA DIAGNOSTIC" in line: 

483 line = next(inputfile) 

484 if "SYMMETRY" in line: 

485 etsyms.append(line.split()[-1]) 

486 line = next(inputfile) 

487 if "EXCITATION" in line and "DE-EXCITATION" in line: 

488 line = next(inputfile) 

489 if line.count("AMPLITUDE") == 2: 

490 line = next(inputfile) 

491 

492 self.skip_line(inputfile, 'dashes') 

493 

494 CIScontribs = [] 

495 line = next(inputfile) 

496 while line.strip(): 

497 cols = line.split() 

498 if "->" in line: 

499 i_occ_vir = [2, 4] 

500 i_coeff = 1 

501 

502 else: 

503 i_occ_vir = [0, 1] 

504 i_coeff = 2 

505 fromMO, toMO = [int(cols[i]) - 1 for i in i_occ_vir] 

506 coeff = float(cols[i_coeff]) 

507 CIScontribs.append([(fromMO, 0), (toMO, 0), coeff]) 

508 line = next(inputfile) 

509 self.etsecs.append(CIScontribs) 

510 line = next(inputfile) 

511 

512 # The symmetries are not always present. 

513 if etsyms: 

514 self.etsyms = etsyms 

515 

516 # Maximum and RMS gradients. 

517 if "MAXIMUM GRADIENT" in line or "RMS GRADIENT" in line: 

518 

519 parts = line.split() 

520 

521 # Avoid parsing the following... 

522 

523 ## YOU SHOULD RESTART "OPTIMIZE" RUNS WITH THE COORDINATES 

524 ## WHOSE ENERGY IS LOWEST. RESTART "SADPOINT" RUNS WITH THE 

525 ## COORDINATES WHOSE RMS GRADIENT IS SMALLEST. THESE ARE NOT 

526 ## ALWAYS THE LAST POINT COMPUTED! 

527 

528 if parts[0] not in ["MAXIMUM", "RMS", "(1)"]: 

529 return 

530 

531 if not hasattr(self, "geovalues"): 

532 self.geovalues = [] 

533 

534 # Newer versions (around 2006) have both maximum and RMS on one line: 

535 # MAXIMUM GRADIENT = 0.0531540 RMS GRADIENT = 0.0189223 

536 if len(parts) == 8: 

537 maximum = float(parts[3]) 

538 rms = float(parts[7]) 

539 

540 # In older versions of GAMESS, this spanned two lines, like this: 

541 # MAXIMUM GRADIENT = 0.057578167 

542 # RMS GRADIENT = 0.027589766 

543 if len(parts) == 4: 

544 maximum = float(parts[3]) 

545 line = next(inputfile) 

546 parts = line.split() 

547 rms = float(parts[3]) 

548 

549 # FMO also prints two final one- and two-body gradients (see exam37): 

550 # (1) MAXIMUM GRADIENT = 0.0531540 RMS GRADIENT = 0.0189223 

551 if len(parts) == 9: 

552 maximum = float(parts[4]) 

553 rms = float(parts[8]) 

554 

555 self.geovalues.append([maximum, rms]) 

556 

557 # This is the input orientation, which is the only data available for 

558 # SP calcs, but which should be overwritten by the standard orientation 

559 # values, which is the only information available for all geoopt cycles. 

560 if line[11:50] == "ATOMIC COORDINATES": 

561 

562 if not hasattr(self, "atomcoords"): 

563 self.atomcoords = [] 

564 

565 line = next(inputfile) 

566 atomcoords = [] 

567 atomnos = [] 

568 line = next(inputfile) 

569 while line.strip(): 

570 temp = line.strip().split() 

571 atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom") for x in temp[2:5]]) 

572 atomnos.append(int(round(float(temp[1])))) # Don't use the atom name as this is arbitary 

573 line = next(inputfile) 

574 

575 self.set_attribute('atomnos', atomnos) 

576 self.atomcoords.append(atomcoords) 

577 

578 if line[12:40] == "EQUILIBRIUM GEOMETRY LOCATED": 

579 # Prevent extraction of the final geometry twice 

580 if not hasattr(self, 'optdone'): 

581 self.optdone = [] 

582 self.optdone.append(len(self.geovalues) - 1) 

583 

584 # Make sure we always have optdone for geomtry optimization, even if not converged. 

585 if "GEOMETRY SEARCH IS NOT CONVERGED" in line: 

586 if not hasattr(self, 'optdone'): 

587 self.optdone = [] 

588 

589 # This is the standard orientation, which is the only coordinate 

590 # information available for all geometry optimisation cycles. 

591 # The input orientation will be overwritten if this is a geometry optimisation 

592 # We assume that a previous Input Orientation has been found and 

593 # used to extract the atomnos 

594 if line[1:29] == "COORDINATES OF ALL ATOMS ARE" and (not hasattr(self, "optdone") or self.optdone == []): 

595 

596 self.updateprogress(inputfile, "Coordinates") 

597 

598 if self.firststdorient: 

599 self.firststdorient = False 

600 # Wipes out the single input coordinate at the start of the file 

601 self.atomcoords = [] 

602 

603 self.skip_lines(inputfile, ['line', '-']) 

604 

605 atomcoords = [] 

606 line = next(inputfile) 

607 

608 for i in range(self.natom): 

609 temp = line.strip().split() 

610 atomcoords.append(list(map(float, temp[2:5]))) 

611 line = next(inputfile) 

612 self.atomcoords.append(atomcoords) 

613 

614 # Section with SCF information. 

615 # 

616 # The space at the start of the search string is to differentiate from MCSCF. 

617 # Everything before the search string is stored as the type of SCF. 

618 # SCF types may include: BLYP, RHF, ROHF, UHF, etc. 

619 # 

620 # For example, in exam17 the section looks like this (note that this is GVB): 

621 # ------------------------ 

622 # ROHF-GVB SCF CALCULATION 

623 # ------------------------ 

624 # GVB STEP WILL USE 119875 WORDS OF MEMORY. 

625 # 

626 # MAXIT= 30 NPUNCH= 2 SQCDF TOL=1.0000E-05 

627 # NUCLEAR ENERGY= 6.1597411978 

628 # EXTRAP=T DAMP=F SHIFT=F RSTRCT=F DIIS=F SOSCF=F 

629 # 

630 # ITER EX TOTAL ENERGY E CHANGE SQCDF DIIS ERROR 

631 # 0 0 -38.298939963 -38.298939963 0.131784454 0.000000000 

632 # 1 1 -38.332044339 -0.033104376 0.026019716 0.000000000 

633 # ... and will be terminated by a blank line. 

634 if line.rstrip()[-16:] == " SCF CALCULATION": 

635 

636 # Remember the type of SCF. 

637 self.scftype = line.strip()[:-16] 

638 

639 self.skip_line(inputfile, 'dashes') 

640 

641 while line[:5] != " ITER": 

642 

643 self.updateprogress(inputfile, "Attributes") 

644 

645 # GVB uses SQCDF for checking convergence (for example in exam17). 

646 if "GVB" in self.scftype and "SQCDF TOL=" in line: 

647 scftarget = float(line.split("=")[-1]) 

648 

649 # Normally, however, the density is used as the convergence criterium. 

650 # Deal with various versions: 

651 # (GAMESS VERSION = 12 DEC 2003) 

652 # DENSITY MATRIX CONV= 2.00E-05 DFT GRID SWITCH THRESHOLD= 3.00E-04 

653 # (GAMESS VERSION = 22 FEB 2006) 

654 # DENSITY MATRIX CONV= 1.00E-05 

655 # (PC GAMESS version 6.2, Not DFT?) 

656 # DENSITY CONV= 1.00E-05 

657 elif "DENSITY CONV" in line or "DENSITY MATRIX CONV" in line: 

658 scftarget = float(line.split()[-1]) 

659 

660 line = next(inputfile) 

661 

662 if not hasattr(self, "scftargets"): 

663 self.scftargets = [] 

664 

665 self.scftargets.append([scftarget]) 

666 

667 if not hasattr(self, "scfvalues"): 

668 self.scfvalues = [] 

669 

670 # Normally the iterations print in 6 columns. 

671 # For ROHF, however, it is 5 columns, thus this extra parameter. 

672 if "ROHF" in self.scftype: 

673 self.scf_valcol = 4 

674 else: 

675 self.scf_valcol = 5 

676 

677 line = next(inputfile) 

678 

679 # SCF iterations are terminated by a blank line. 

680 # The first four characters usually contains the step number. 

681 # However, lines can also contain messages, including: 

682 # * * * INITIATING DIIS PROCEDURE * * * 

683 # CONVERGED TO SWOFF, SO DFT CALCULATION IS NOW SWITCHED ON 

684 # DFT CODE IS SWITCHING BACK TO THE FINER GRID 

685 values = [] 

686 while line.strip(): 

687 try: 

688 temp = int(line[0:4]) 

689 except ValueError: 

690 pass 

691 else: 

692 # if there were 100 iterations or more, the first part of the line 

693 # will look like 10099, 101100, 102101, etc., with no spaces between 

694 # the numbers. We can check if this is the case by seeing if the first 

695 # number on the line exceeds 10000. 

696 split_line = [line[0:4], line[4:7]] + line[7:].split() 

697 values.append([float(split_line[self.scf_valcol])]) 

698 try: 

699 line = next(inputfile) 

700 except StopIteration: 

701 self.logger.warning('File terminated before end of last SCF!') 

702 break 

703 self.scfvalues.append(values) 

704 

705 

706 # Sometimes, only the first SCF cycle has the banner parsed for above, 

707 # so we must identify them from the header before the SCF iterations. 

708 # The example we have for this is the GeoOpt unittest for Firefly8. 

709 if line[1:8] == "ITER EX": 

710 

711 # In this case, the convergence targets are not printed, so we assume 

712 # they do not change. 

713 self.scftargets.append(self.scftargets[-1]) 

714 

715 values = [] 

716 line = next(inputfile) 

717 while line.strip(): 

718 try: 

719 temp = int(line[0:4]) 

720 except ValueError: 

721 pass 

722 else: 

723 values.append([float(line.split()[self.scf_valcol])]) 

724 line = next(inputfile) 

725 self.scfvalues.append(values) 

726 

727 # Extract normal coordinate analysis, including vibrational frequencies (vibfreq), 

728 # IT intensities (vibirs) and displacements (vibdisps). 

729 # 

730 # This section typically looks like the following in GAMESS-US: 

731 # 

732 # MODES 1 TO 6 ARE TAKEN AS ROTATIONS AND TRANSLATIONS. 

733 # 

734 # FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2, 

735 # REDUCED MASSES IN AMU. 

736 # 

737 # 1 2 3 4 5 

738 # FREQUENCY: 52.49 41.45 17.61 9.23 10.61 

739 # REDUCED MASS: 3.92418 3.77048 5.43419 6.44636 5.50693 

740 # IR INTENSITY: 0.00013 0.00001 0.00004 0.00000 0.00003 

741 # 

742 # ...or in the case of a numerical Hessian job... 

743 # 

744 # MODES 1 TO 5 ARE TAKEN AS ROTATIONS AND TRANSLATIONS. 

745 # 

746 # FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2, 

747 # REDUCED MASSES IN AMU. 

748 # 

749 # 1 2 3 4 5 

750 # FREQUENCY: 0.05 0.03 0.03 30.89 30.94 

751 # REDUCED MASS: 8.50125 8.50137 8.50136 1.06709 1.06709 

752 # 

753 # ...whereas PC-GAMESS has... 

754 # 

755 # MODES 1 TO 6 ARE TAKEN AS ROTATIONS AND TRANSLATIONS. 

756 # 

757 # FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2 

758 # 

759 # 1 2 3 4 5 

760 # FREQUENCY: 5.89 1.46 0.01 0.01 0.01 

761 # IR INTENSITY: 0.00000 0.00000 0.00000 0.00000 0.00000 

762 # 

763 # If Raman is present we have (for PC-GAMESS)... 

764 # 

765 # MODES 1 TO 6 ARE TAKEN AS ROTATIONS AND TRANSLATIONS. 

766 # 

767 # FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2 

768 # RAMAN ACTIVITIES IN ANGSTROM**4/AMU, DEPOLARIZATIONS ARE DIMENSIONLESS 

769 # 

770 # 1 2 3 4 5 

771 # FREQUENCY: 5.89 1.46 0.04 0.03 0.01 

772 # IR INTENSITY: 0.00000 0.00000 0.00000 0.00000 0.00000 

773 # RAMAN ACTIVITY: 12.675 1.828 0.000 0.000 0.000 

774 # DEPOLARIZATION: 0.750 0.750 0.124 0.009 0.750 

775 # 

776 # If GAMESS-US or PC-GAMESS has not reached the stationary point we have 

777 # and additional warning, repeated twice, like so (see n_water.log for an example): 

778 # 

779 # ******************************************************* 

780 # * THIS IS NOT A STATIONARY POINT ON THE MOLECULAR PES * 

781 # * THE VIBRATIONAL ANALYSIS IS NOT VALID !!! * 

782 # ******************************************************* 

783 # 

784 # There can also be additional warnings about the selection of modes, for example: 

785 # 

786 # * * * WARNING, MODE 6 HAS BEEN CHOSEN AS A VIBRATION 

787 # WHILE MODE12 IS ASSUMED TO BE A TRANSLATION/ROTATION. 

788 # PLEASE VERIFY THE PROGRAM'S DECISION MANUALLY! 

789 # 

790 if "NORMAL COORDINATE ANALYSIS IN THE HARMONIC APPROXIMATION" in line: 

791 

792 self.vibfreqs = [] 

793 self.vibirs = [] 

794 self.vibdisps = [] 

795 

796 # Need to get to the modes line, which is often preceeded by 

797 # a list of atomic weights and some possible warnings. 

798 # Pass the warnings to the logger if they are there. 

799 while not "MODES" in line: 

800 self.updateprogress(inputfile, "Frequency Information") 

801 

802 line = next(inputfile) 

803 

804 # Typical Atomic Masses section printed in GAMESS 

805 # ATOMIC WEIGHTS (AMU) 

806 # 

807 # 1 O 15.99491 

808 # 2 H 1.00782 

809 # 3 H 1.00782 

810 if "ATOMIC WEIGHTS" in line: 

811 atommasses = [] 

812 self.skip_line(inputfile,['b']) 

813 # There is a blank line after ATOMIC WEIGHTS 

814 line = next(inputfile) 

815 while line.strip(): 

816 temp = line.strip().split() 

817 atommasses.append(float(temp[2])) 

818 line = next(inputfile) 

819 self.set_attribute('atommasses', atommasses) 

820 

821 if "THIS IS NOT A STATIONARY POINT" in line: 

822 msg = "\n This is not a stationary point on the molecular PES" 

823 msg += "\n The vibrational analysis is not valid!!!" 

824 self.logger.warning(msg) 

825 if "* * * WARNING, MODE" in line: 

826 line1 = line.strip() 

827 line2 = next(inputfile).strip() 

828 line3 = next(inputfile).strip() 

829 self.logger.warning("\n " + "\n ".join((line1, line2, line3))) 

830 

831 # In at least one case (regression zolm_dft3a.log) for older version of GAMESS-US, 

832 # the header concerning the range of nodes is formatted wrong and can look like so: 

833 # MODES 9 TO14 ARE TAKEN AS ROTATIONS AND TRANSLATIONS. 

834 # ... although it's unclear whether this happens for all two-digit values. 

835 startrot = int(line.split()[1]) 

836 if len(line.split()[2]) == 2: 

837 endrot = int(line.split()[3]) 

838 else: 

839 endrot = int(line.split()[2][2:]) 

840 

841 self.skip_line(inputfile, 'blank') 

842 

843 # Continue down to the first frequencies 

844 line = next(inputfile) 

845 # With GAMESS-US 20 APR 2017 (R1), there are 28 blank spaces, 

846 # in earlier versions there used to be 26. 

847 while not line.strip() or not re.search(' {26,}1', line) is not None: 

848 line = next(inputfile) 

849 

850 while not "SAYVETZ" in line: 

851 self.updateprogress(inputfile, "Frequency Information") 

852 

853 # Note: there may be imaginary frequencies like this (which we make negative): 

854 # FREQUENCY: 825.18 I 111.53 12.62 10.70 0.89 

855 # 

856 # A note for debuggers: some of these frequencies will be removed later, 

857 # assumed to be translations or rotations (see startrot/endrot above). 

858 for col in next(inputfile).split()[1:]: 

859 if col == "I": 

860 self.vibfreqs[-1] *= -1 

861 else: 

862 self.vibfreqs.append(float(col)) 

863 

864 line = next(inputfile) 

865 

866 # Skip the symmetry (appears in newer versions), fixes bug #3476063. 

867 if line.find("SYMMETRY") >= 0: 

868 line = next(inputfile) 

869 

870 # Skip the reduced mass (not always present). 

871 if line.find("REDUCED") >= 0: 

872 if not hasattr(self, "vibrmasses"): 

873 self.vibrmasses = [] 

874 self.vibrmasses.extend(list(map(float, line.strip().split()[2:]))) 

875 line = next(inputfile) 

876 

877 # Not present in numerical Hessian calculations. 

878 if line.find("IR INTENSITY") >= 0: 

879 irIntensity = map(float, line.strip().split()[2:]) 

880 self.vibirs.extend([utils.convertor(x, "Debye^2/amu-Angstrom^2", "km/mol") for x in irIntensity]) 

881 line = next(inputfile) 

882 

883 # Read in Raman vibrational intensities if present. 

884 if line.find("RAMAN") >= 0: 

885 if not hasattr(self, "vibramans"): 

886 self.vibramans = [] 

887 ramanIntensity = line.strip().split() 

888 self.vibramans.extend(list(map(float, ramanIntensity[2:]))) 

889 depolar = next(inputfile) 

890 line = next(inputfile) 

891 

892 # This line seems always to be blank. 

893 assert line.strip() == '' 

894 

895 # Extract the Cartesian displacement vectors. 

896 p = [[], [], [], [], []] 

897 for j in range(self.natom): 

898 q = [[], [], [], [], []] 

899 for coord in "xyz": 

900 line = next(inputfile)[21:] 

901 cols = list(map(float, line.split())) 

902 for i, val in enumerate(cols): 

903 q[i].append(val) 

904 for k in range(len(cols)): 

905 p[k].append(q[k]) 

906 self.vibdisps.extend(p[:len(cols)]) 

907 

908 # Skip the Sayvetz stuff at the end. 

909 for j in range(10): 

910 line = next(inputfile) 

911 

912 self.skip_line(inputfile, 'blank') 

913 line = next(inputfile) 

914 

915 # Exclude rotations and translations. 

916 self.vibfreqs = numpy.array(self.vibfreqs[:startrot-1]+self.vibfreqs[endrot:], "d") 

917 self.vibirs = numpy.array(self.vibirs[:startrot-1]+self.vibirs[endrot:], "d") 

918 self.vibdisps = numpy.array(self.vibdisps[:startrot-1]+self.vibdisps[endrot:], "d") 

919 if hasattr(self, "vibrmasses"): 

920 self.vibrmasses = numpy.array(self.vibrmasses[:startrot-1]+self.vibrmasses[endrot:], "d") 

921 if hasattr(self, "vibramans"): 

922 self.vibramans = numpy.array(self.vibramans[:startrot-1]+self.vibramans[endrot:], "d") 

923 

924 if line[5:21] == "ATOMIC BASIS SET": 

925 self.gbasis = [] 

926 line = next(inputfile) 

927 while line.find("SHELL") < 0: 

928 line = next(inputfile) 

929 

930 self.skip_lines(inputfile, ['blank', 'atomname']) 

931 

932 # shellcounter stores the shell no of the last shell 

933 # in the previous set of primitives 

934 shellcounter = 1 

935 while line.find("TOTAL NUMBER") < 0: 

936 

937 self.skip_line(inputfile, 'blank') 

938 

939 line = next(inputfile) 

940 shellno = int(line.split()[0]) 

941 shellgap = shellno - shellcounter 

942 gbasis = [] # Stores basis sets on one atom 

943 shellsize = 0 

944 while len(line.split()) != 1 and line.find("TOTAL NUMBER") < 0: 

945 shellsize += 1 

946 coeff = {} 

947 # coefficients and symmetries for a block of rows 

948 while line.strip(): 

949 temp = line.strip().split() 

950 sym = temp[1] 

951 assert sym in ['S', 'P', 'D', 'F', 'G', 'L'] 

952 if sym == "L": # L refers to SP 

953 if len(temp) == 6: # GAMESS US 

954 coeff.setdefault("S", []).append((float(temp[3]), float(temp[4]))) 

955 coeff.setdefault("P", []).append((float(temp[3]), float(temp[5]))) 

956 else: # PC GAMESS 

957 assert temp[6][-1] == temp[9][-1] == ')' 

958 coeff.setdefault("S", []).append((float(temp[3]), float(temp[6][:-1]))) 

959 coeff.setdefault("P", []).append((float(temp[3]), float(temp[9][:-1]))) 

960 else: 

961 if len(temp) == 5: # GAMESS US 

962 coeff.setdefault(sym, []).append((float(temp[3]), float(temp[4]))) 

963 else: # PC GAMESS 

964 assert temp[6][-1] == ')' 

965 coeff.setdefault(sym, []).append((float(temp[3]), float(temp[6][:-1]))) 

966 line = next(inputfile) 

967 # either a blank or a continuation of the block 

968 if sym == "L": 

969 gbasis.append(('S', coeff['S'])) 

970 gbasis.append(('P', coeff['P'])) 

971 else: 

972 gbasis.append((sym, coeff[sym])) 

973 line = next(inputfile) 

974 # either the start of the next block or the start of a new atom or 

975 # the end of the basis function section 

976 

977 numtoadd = 1 + (shellgap // shellsize) 

978 shellcounter = shellno + shellsize 

979 for x in range(numtoadd): 

980 self.gbasis.append(gbasis) 

981 

982 # The eigenvectors, which also include MO energies and symmetries, follow 

983 # the *final* report of evalues and the last list of symmetries in the log file: 

984 # 

985 # ------------ 

986 # EIGENVECTORS 

987 # ------------ 

988 # 

989 # 1 2 3 4 5 

990 # -10.0162 -10.0161 -10.0039 -10.0039 -10.0029 

991 # BU AG BU AG AG 

992 # 1 C 1 S 0.699293 0.699290 -0.027566 0.027799 0.002412 

993 # 2 C 1 S 0.031569 0.031361 0.004097 -0.004054 -0.000605 

994 # 3 C 1 X 0.000908 0.000632 -0.004163 0.004132 0.000619 

995 # 4 C 1 Y -0.000019 0.000033 0.000668 -0.000651 0.005256 

996 # 5 C 1 Z 0.000000 0.000000 0.000000 0.000000 0.000000 

997 # 6 C 2 S -0.699293 0.699290 0.027566 0.027799 0.002412 

998 # 7 C 2 S -0.031569 0.031361 -0.004097 -0.004054 -0.000605 

999 # 8 C 2 X 0.000908 -0.000632 -0.004163 -0.004132 -0.000619 

1000 # 9 C 2 Y -0.000019 -0.000033 0.000668 0.000651 -0.005256 

1001 # 10 C 2 Z 0.000000 0.000000 0.000000 0.000000 0.000000 

1002 # 11 C 3 S -0.018967 -0.019439 0.011799 -0.014884 -0.452328 

1003 # 12 C 3 S -0.007748 -0.006932 0.000680 -0.000695 -0.024917 

1004 # 13 C 3 X 0.002628 0.002997 0.000018 0.000061 -0.003608 

1005 # ... 

1006 # 

1007 # There are blanks lines between each block. 

1008 # 

1009 # Warning! There are subtle differences between GAMESS-US and PC-GAMES 

1010 # in the formatting of the first four columns. In particular, for F orbitals, 

1011 # PC GAMESS: 

1012 # 19 C 1 YZ 0.000000 0.000000 0.000000 0.000000 0.000000 

1013 # 20 C XXX 0.000000 0.000000 0.000000 0.000000 0.002249 

1014 # 21 C YYY 0.000000 0.000000 -0.025555 0.000000 0.000000 

1015 # 22 C ZZZ 0.000000 0.000000 0.000000 0.002249 0.000000 

1016 # 23 C XXY 0.000000 0.000000 0.001343 0.000000 0.000000 

1017 # GAMESS US 

1018 # 55 C 1 XYZ 0.000000 0.000000 0.000000 0.000000 0.000000 

1019 # 56 C 1XXXX -0.000014 -0.000067 0.000000 0.000000 0.000000 

1020 # 

1021 if line.find("EIGENVECTORS") == 10 or line.find("MOLECULAR ORBITALS") == 10: 

1022 

1023 # This is the stuff that we can read from these blocks. 

1024 self.moenergies = [[]] 

1025 self.mosyms = [[]] 

1026 

1027 if not hasattr(self, "nmo"): 

1028 self.nmo = self.nbasis 

1029 

1030 self.mocoeffs = [numpy.zeros((self.nmo, self.nbasis), "d")] 

1031 

1032 readatombasis = False 

1033 if not hasattr(self, "atombasis"): 

1034 self.atombasis = [] 

1035 self.aonames = [] 

1036 for i in range(self.natom): 

1037 self.atombasis.append([]) 

1038 self.aonames = [] 

1039 readatombasis = True 

1040 

1041 self.skip_line(inputfile, 'dashes') 

1042 

1043 for base in range(0, self.nmo, 5): 

1044 self.updateprogress(inputfile, "Coefficients") 

1045 

1046 line = next(inputfile) 

1047 

1048 # This makes sure that this section does not end prematurely, 

1049 # which happens in regression 2CO.ccsd.aug-cc-pVDZ.out. 

1050 if line.strip() != "": 

1051 break 

1052 

1053 numbers = next(inputfile) # Eigenvector numbers. 

1054 

1055 # This is for regression CdtetraM1B3LYP. 

1056 if "ALPHA SET" in numbers: 

1057 blank = next(inputfile) 

1058 numbers = next(inputfile) 

1059 

1060 # If not all coefficients are printed, the logfile will go right to 

1061 # the beta section if there is one, so break out in that case. 

1062 if "BETA SET" in numbers: 

1063 line = numbers 

1064 break 

1065 

1066 # Sometimes there are some blank lines here. 

1067 while not line.strip(): 

1068 line = next(inputfile) 

1069 

1070 # Geometry optimizations don't have END OF RHF/DFT 

1071 # CALCULATION, they head right into the next section. 

1072 if "--------" in line: 

1073 break 

1074 

1075 # Eigenvalues for these orbitals (in hartrees). 

1076 try: 

1077 self.moenergies[0].extend([utils.convertor(float(x), "hartree", "eV") for x in line.split()]) 

1078 except: 

1079 self.logger.warning('MO section found but could not be parsed!') 

1080 break 

1081 

1082 # Orbital symmetries. 

1083 line = next(inputfile) 

1084 if line.strip(): 

1085 self.mosyms[0].extend(list(map(self.normalisesym, line.split()))) 

1086 

1087 # Now we have nbasis lines. We will use the same method as in normalise_aonames() before. 

1088 p = re.compile(r"(\d+)\s*([A-Z][A-Z]?)\s*(\d+)\s*([A-Z]+)") 

1089 oldatom = '0' 

1090 i_atom = 0 # counter to keep track of n_atoms > 99 

1091 flag_w = True # flag necessary to keep from adding 100's at wrong time 

1092 

1093 for i in range(self.nbasis): 

1094 line = next(inputfile) 

1095 

1096 # If line is empty, break (ex. for FMO in exam37 which is a regression). 

1097 if not line.strip(): 

1098 break 

1099 

1100 # Fill atombasis and aonames only first time around 

1101 if readatombasis and base == 0: 

1102 

1103 aonames = [] 

1104 start = line[:17].strip() 

1105 m = p.search(start) 

1106 

1107 if m: 

1108 g = m.groups() 

1109 g2 = int(g[2]) # atom index in GAMESS file; changes to 0 after 99 

1110 

1111 # Check if we have moved to a hundred 

1112 # if so, increment the counter and add it to the parsed value 

1113 # There will be subsequent 0's as that atoms AO's are parsed 

1114 # so wait until the next atom is parsed before resetting flag 

1115 if g2 == 0 and flag_w: 

1116 i_atom = i_atom + 100 

1117 flag_w = False # handle subsequent AO's 

1118 if g2 != 0: 

1119 flag_w = True # reset flag 

1120 g2 = g2 + i_atom 

1121 

1122 aoname = "%s%i_%s" % (g[1].capitalize(), g2, g[3]) 

1123 oldatom = str(g2) 

1124 atomno = g2-1 

1125 orbno = int(g[0])-1 

1126 else: # For F orbitals, as shown above 

1127 g = [x.strip() for x in line.split()] 

1128 aoname = "%s%s_%s" % (g[1].capitalize(), oldatom, g[2]) 

1129 atomno = int(oldatom)-1 

1130 orbno = int(g[0])-1 

1131 

1132 self.atombasis[atomno].append(orbno) 

1133 self.aonames.append(aoname) 

1134 

1135 coeffs = line[15:] # Strip off the crud at the start. 

1136 j = 0 

1137 while j*11+4 < len(coeffs): 

1138 self.mocoeffs[0][base+j, i] = float(coeffs[j * 11:(j + 1) * 11]) 

1139 j += 1 

1140 

1141 # If it's a restricted calc and no more properties, we have: 

1142 # 

1143 # ...... END OF RHF/DFT CALCULATION ...... 

1144 # 

1145 # If there are more properties (such as the density matrix): 

1146 # -------------- 

1147 # 

1148 # If it's an unrestricted calculation, however, we now get the beta orbitals: 

1149 # 

1150 # ----- BETA SET ----- 

1151 # 

1152 # ------------ 

1153 # EIGENVECTORS 

1154 # ------------ 

1155 # 

1156 # 1 2 3 4 5 

1157 # ... 

1158 # 

1159 if "BETA SET" not in line: 

1160 line = next(inputfile) 

1161 line = next(inputfile) 

1162 

1163 # This can come in between the alpha and beta orbitals (see #130). 

1164 if line.strip() == "LZ VALUE ANALYSIS FOR THE MOS": 

1165 while line.strip(): 

1166 line = next(inputfile) 

1167 line = next(inputfile) 

1168 

1169 # Covers label with both dashes and stars (like regression CdtetraM1B3LYP). 

1170 if "BETA SET" in line: 

1171 self.mocoeffs.append(numpy.zeros((self.nmo, self.nbasis), "d")) 

1172 self.moenergies.append([]) 

1173 self.mosyms.append([]) 

1174 blank = next(inputfile) 

1175 line = next(inputfile) 

1176 

1177 # Sometimes EIGENVECTORS is missing, so rely on dashes to signal it. 

1178 if set(line.strip()) == {'-'}: 

1179 self.skip_lines(inputfile, ['EIGENVECTORS', 'd', 'b']) 

1180 line = next(inputfile) 

1181 

1182 for base in range(0, self.nmo, 5): 

1183 self.updateprogress(inputfile, "Coefficients") 

1184 if base != 0: 

1185 line = next(inputfile) 

1186 line = next(inputfile) 

1187 line = next(inputfile) 

1188 if "properties" in line.lower(): 

1189 break 

1190 self.moenergies[1].extend([utils.convertor(float(x), "hartree", "eV") for x in line.split()]) 

1191 line = next(inputfile) 

1192 self.mosyms[1].extend(list(map(self.normalisesym, line.split()))) 

1193 for i in range(self.nbasis): 

1194 line = next(inputfile) 

1195 temp = line[15:] # Strip off the crud at the start 

1196 j = 0 

1197 while j * 11 + 4 < len(temp): 

1198 self.mocoeffs[1][base+j, i] = float(temp[j * 11:(j + 1) * 11]) 

1199 j += 1 

1200 line = next(inputfile) 

1201 self.moenergies = [numpy.array(x, "d") for x in self.moenergies] 

1202 

1203 # Natural orbital coefficients and occupation numbers, presently supported only 

1204 # for CIS calculations. Looks the same as eigenvectors, without symmetry labels. 

1205 # 

1206 # -------------------- 

1207 # CIS NATURAL ORBITALS 

1208 # -------------------- 

1209 # 

1210 # 1 2 3 4 5 

1211 # 

1212 # 2.0158 2.0036 2.0000 2.0000 1.0000 

1213 # 

1214 # 1 O 1 S 0.000000 -0.157316 0.999428 0.164938 0.000000 

1215 # 2 O 1 S 0.000000 0.754402 0.004472 -0.581970 0.000000 

1216 # ... 

1217 # 

1218 if line[10:30] == "CIS NATURAL ORBITALS": 

1219 

1220 self.nocoeffs = numpy.zeros((self.nmo, self.nbasis), "d") 

1221 self.nooccnos = [] 

1222 

1223 self.skip_line(inputfile, 'dashes') 

1224 

1225 for base in range(0, self.nmo, 5): 

1226 

1227 self.skip_lines(inputfile, ['blank', 'numbers']) 

1228 

1229 # The eigenvalues that go along with these natural orbitals are 

1230 # their occupation numbers. Sometimes there are blank lines before them. 

1231 line = next(inputfile) 

1232 while not line.strip(): 

1233 line = next(inputfile) 

1234 eigenvalues = map(float, line.split()) 

1235 self.nooccnos.extend(eigenvalues) 

1236 

1237 # Orbital symemtry labels are normally here for MO coefficients. 

1238 line = next(inputfile) 

1239 

1240 # Now we have nbasis lines with the coefficients. 

1241 for i in range(self.nbasis): 

1242 

1243 line = next(inputfile) 

1244 coeffs = line[15:] 

1245 j = 0 

1246 while j*11+4 < len(coeffs): 

1247 self.nocoeffs[base+j, i] = float(coeffs[j * 11:(j + 1) * 11]) 

1248 j += 1 

1249 

1250 # We cannot trust this self.homos until we come to the phrase: 

1251 # SYMMETRIES FOR INITAL GUESS ORBITALS FOLLOW 

1252 # which either is followed by "ALPHA" or "BOTH" at which point we can say 

1253 # for certain that it is an un/restricted calculations. 

1254 # Note that MCSCF calcs also print this search string, so make sure 

1255 # that self.homos does not exist yet. 

1256 if line[1:28] == "NUMBER OF OCCUPIED ORBITALS" and not hasattr(self, 'homos'): 

1257 

1258 homos = [int(line.split()[-1])-1] 

1259 line = next(inputfile) 

1260 homos.append(int(line.split()[-1])-1) 

1261 

1262 self.set_attribute('homos', homos) 

1263 

1264 if line.find("SYMMETRIES FOR INITIAL GUESS ORBITALS FOLLOW") >= 0: 

1265 # Not unrestricted, so lop off the second index. 

1266 # In case the search string above was not used (ex. FMO in exam38), 

1267 # we can try to use the next line which should also contain the 

1268 # number of occupied orbitals. 

1269 if line.find("BOTH SET(S)") >= 0: 

1270 nextline = next(inputfile) 

1271 if "ORBITALS ARE OCCUPIED" in nextline: 

1272 homos = int(nextline.split()[0])-1 

1273 if hasattr(self, "homos"): 

1274 try: 

1275 assert self.homos[0] == homos 

1276 except AssertionError: 

1277 self.logger.warning("Number of occupied orbitals not consistent. This is normal for ECP and FMO jobs.") 

1278 else: 

1279 self.homos = [homos] 

1280 self.homos = numpy.resize(self.homos, [1]) 

1281 

1282 # Set the total number of atoms, only once. 

1283 # Normally GAMESS print TOTAL NUMBER OF ATOMS, however in some cases 

1284 # this is slightly different (ex. lower case for FMO in exam37). 

1285 if not hasattr(self, "natom") and "NUMBER OF ATOMS" in line.upper(): 

1286 natom = int(line.split()[-1]) 

1287 self.set_attribute('natom', natom) 

1288 

1289 # The first is from Julien's Example and the second is from Alexander's 

1290 # I think it happens if you use a polar basis function instead of a cartesian one 

1291 if line.find("NUMBER OF CARTESIAN GAUSSIAN BASIS") == 1 or line.find("TOTAL NUMBER OF BASIS FUNCTIONS") == 1: 

1292 nbasis = int(line.strip().split()[-1]) 

1293 self.set_attribute('nbasis', nbasis) 

1294 

1295 elif line.find("TOTAL NUMBER OF CONTAMINANTS DROPPED") >= 0: 

1296 nmos_dropped = int(line.split()[-1]) 

1297 if hasattr(self, "nmo"): 

1298 self.set_attribute('nmo', self.nmo - nmos_dropped) 

1299 else: 

1300 self.set_attribute('nmo', self.nbasis - nmos_dropped) 

1301 

1302 # Note that this line is present if ISPHER=1, e.g. for C_bigbasis 

1303 elif line.find("SPHERICAL HARMONICS KEPT IN THE VARIATION SPACE") >= 0: 

1304 nmo = int(line.strip().split()[-1]) 

1305 self.set_attribute('nmo', nmo) 

1306 

1307 # Note that this line is not always present, so by default 

1308 # NBsUse is set equal to NBasis (see below). 

1309 elif line.find("TOTAL NUMBER OF MOS IN VARIATION SPACE") == 1: 

1310 nmo = int(line.split()[-1]) 

1311 self.set_attribute('nmo', nmo) 

1312 

1313 elif line.find("OVERLAP MATRIX") == 0 or line.find("OVERLAP MATRIX") == 1: 

1314 # The first is for PC-GAMESS, the second for GAMESS 

1315 # Read 1-electron overlap matrix 

1316 if not hasattr(self, "aooverlaps"): 

1317 self.aooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d") 

1318 else: 

1319 self.logger.info("Reading additional aooverlaps...") 

1320 base = 0 

1321 while base < self.nbasis: 

1322 self.updateprogress(inputfile, "Overlap") 

1323 

1324 self.skip_lines(inputfile, ['b', 'basis_fn_number', 'b']) 

1325 

1326 for i in range(self.nbasis - base): # Fewer lines each time 

1327 line = next(inputfile) 

1328 temp = line.split() 

1329 for j in range(4, len(temp)): 

1330 self.aooverlaps[base+j-4, i+base] = float(temp[j]) 

1331 self.aooverlaps[i+base, base+j-4] = float(temp[j]) 

1332 base += 5 

1333 

1334 # ECP Pseudopotential information 

1335 if "ECP POTENTIALS" in line: 

1336 if not hasattr(self, "coreelectrons"): 

1337 self.coreelectrons = [0]*self.natom 

1338 

1339 self.skip_lines(inputfile, ['d', 'b']) 

1340 

1341 header = next(inputfile) 

1342 while header.split()[0] == "PARAMETERS": 

1343 name = header[17:25] 

1344 atomnum = int(header[34:40]) 

1345 # The pseudopotnetial is given explicitely 

1346 if header[40:50] == "WITH ZCORE": 

1347 zcore = int(header[50:55]) 

1348 lmax = int(header[63:67]) 

1349 self.coreelectrons[atomnum-1] = zcore 

1350 # The pseudopotnetial is copied from another atom 

1351 if header[40:55] == "ARE THE SAME AS": 

1352 atomcopy = int(header[60:]) 

1353 self.coreelectrons[atomnum-1] = self.coreelectrons[atomcopy-1] 

1354 line = next(inputfile) 

1355 while line.split() != []: 

1356 line = next(inputfile) 

1357 header = next(inputfile) 

1358 

1359 # This was used before refactoring the parser, geotargets was set here after parsing. 

1360 #if not hasattr(self, "geotargets"): 

1361 # opttol = 1e-4 

1362 # self.geotargets = numpy.array([opttol, 3. / opttol], "d") 

1363 #if hasattr(self,"geovalues"): self.geovalues = numpy.array(self.geovalues, "d") 

1364 

1365 # This is quite simple to parse, but some files seem to print certain lines twice, 

1366 # repeating the populations without charges, but not in proper order. 

1367 # The unrestricted calculations are a bit tricky, since GAMESS-US prints populations 

1368 # for both alpha and beta orbitals in the same format and with the same title, 

1369 # but it still prints the charges only at the very end. 

1370 if "TOTAL MULLIKEN AND LOWDIN ATOMIC POPULATIONS" in line: 

1371 

1372 if not hasattr(self, "atomcharges"): 

1373 self.atomcharges = {} 

1374 

1375 header = next(inputfile) 

1376 line = next(inputfile) 

1377 

1378 # It seems that when population are printed twice (without charges), 

1379 # there is a blank line along the way (after the first header), 

1380 # so let's get a flag out of that circumstance. 

1381 doubles_printed = line.strip() == "" 

1382 if doubles_printed: 

1383 title = next(inputfile) 

1384 header = next(inputfile) 

1385 line = next(inputfile) 

1386 

1387 # Only go further if the header had five columns, which should 

1388 # be the case when both populations and charges are printed. 

1389 # This is pertinent for both double printing and unrestricted output. 

1390 if not len(header.split()) == 5: 

1391 return 

1392 mulliken, lowdin = [], [] 

1393 while line.strip(): 

1394 if line.strip() and doubles_printed: 

1395 line = next(inputfile) 

1396 mulliken.append(float(line.split()[3])) 

1397 lowdin.append(float(line.split()[5])) 

1398 line = next(inputfile) 

1399 self.atomcharges["mulliken"] = mulliken 

1400 self.atomcharges["lowdin"] = lowdin 

1401 

1402 # --------------------- 

1403 # ELECTROSTATIC MOMENTS 

1404 # --------------------- 

1405 # 

1406 # POINT 1 X Y Z (BOHR) CHARGE 

1407 # -0.000000 0.000000 0.000000 -0.00 (A.U.) 

1408 # DX DY DZ /D/ (DEBYE) 

1409 # 0.000000 -0.000000 0.000000 0.000000 

1410 # 

1411 if line.strip() == "ELECTROSTATIC MOMENTS": 

1412 

1413 self.skip_lines(inputfile, ['d', 'b']) 

1414 line = next(inputfile) 

1415 

1416 # The old PC-GAMESS prints memory assignment information here. 

1417 if "MEMORY ASSIGNMENT" in line: 

1418 memory_assignment = next(inputfile) 

1419 line = next(inputfile) 

1420 

1421 # If something else ever comes up, we should get a signal from this assert. 

1422 assert line.split()[0] == "POINT" 

1423 

1424 # We can get the reference point from here, as well as 

1425 # check here that the net charge of the molecule is correct. 

1426 coords_and_charge = next(inputfile) 

1427 assert coords_and_charge.split()[-1] == '(A.U.)' 

1428 reference = numpy.array([float(x) for x in coords_and_charge.split()[:3]]) 

1429 reference = utils.convertor(reference, 'bohr', 'Angstrom') 

1430 charge = int(round(float(coords_and_charge.split()[-2]))) 

1431 self.set_attribute('charge', charge) 

1432 

1433 dipoleheader = next(inputfile) 

1434 assert dipoleheader.split()[:3] == ['DX', 'DY', 'DZ'] 

1435 assert dipoleheader.split()[-1] == "(DEBYE)" 

1436 

1437 dipoleline = next(inputfile) 

1438 dipole = [float(d) for d in dipoleline.split()[:3]] 

1439 

1440 # The dipole is always the first multipole moment to be printed, 

1441 # so if it already exists, we will overwrite all moments since we want 

1442 # to leave just the last printed value (could change in the future). 

1443 if not hasattr(self, 'moments'): 

1444 self.moments = [reference, dipole] 

1445 else: 

1446 try: 

1447 assert self.moments[1] == dipole 

1448 except AssertionError: 

1449 self.logger.warning('Overwriting previous multipole moments with new values') 

1450 self.logger.warning('This could be from post-HF properties or geometry optimization') 

1451 self.moments = [reference, dipole] 

1452 

1453 # Static polarizability from a harmonic frequency calculation 

1454 # with $CPHF/POLAR=.TRUE. 

1455 if line.strip() == 'ALPHA POLARIZABILITY TENSOR (ANGSTROMS**3)': 

1456 if not hasattr(self, 'polarizabilities'): 

1457 self.polarizabilities = [] 

1458 polarizability = numpy.zeros(shape=(3, 3)) 

1459 self.skip_lines(inputfile, ['d', 'b', 'directions']) 

1460 for i in range(3): 

1461 line = next(inputfile) 

1462 polarizability[i, :i+1] = [float(x) for x in line.split()[1:]] 

1463 polarizability = utils.symmetrize(polarizability, use_triangle='lower') 

1464 # Convert from Angstrom**3 to bohr**3 (a.u.**3). 

1465 volume_convert = numpy.vectorize(lambda x: x * utils.convertor(1, 'Angstrom', 'bohr') ** 3) 

1466 polarizability = volume_convert(polarizability) 

1467 self.polarizabilities.append(polarizability) 

1468 

1469 # Static and dynamic polarizability from RUNTYP=TDHF. 

1470 if line.strip() == 'TIME-DEPENDENT HARTREE-FOCK NLO PROPERTIES': 

1471 if not hasattr(self, 'polarizabilities'): 

1472 self.polarizabilities = [] 

1473 polarizability = numpy.empty(shape=(3, 3)) 

1474 coord_to_idx = {'X': 0, 'Y': 1, 'Z': 2} 

1475 self.skip_lines(inputfile, ['d', 'b', 'dots']) 

1476 line = next(inputfile) 

1477 assert 'ALPHA AT' in line 

1478 self.skip_lines(inputfile, ['dots', 'b']) 

1479 for a in range(3): 

1480 for b in range(3): 

1481 line = next(inputfile) 

1482 tokens = line.split() 

1483 i, j = coord_to_idx[tokens[1][0]], coord_to_idx[tokens[1][1]] 

1484 polarizability[i, j] = tokens[3] 

1485 self.polarizabilities.append(polarizability) 

1486 

1487 # Extract thermochemistry 

1488 

1489 # ------------------------------- 

1490 # THERMOCHEMISTRY AT T= 298.15 K 

1491 # ------------------------------- 

1492 

1493 # USING IDEAL GAS, RIGID ROTOR, HARMONIC NORMAL MODE APPROXIMATIONS. 

1494 # P= 1.01325E+05 PASCAL. 

1495 # ALL FREQUENCIES ARE SCALED BY 1.00000 

1496 # THE MOMENTS OF INERTIA ARE (IN AMU*BOHR**2) 

1497 # 1.77267 4.73429 6.50696 

1498 # THE ROTATIONAL SYMMETRY NUMBER IS 1.0 

1499 # THE ROTATIONAL CONSTANTS ARE (IN GHZ) 

1500 # 1017.15677 380.85747 277.10144 

1501 # 7 - 9 VIBRATIONAL MODES ARE USED IN THERMOCHEMISTRY. 

1502 # THE HARMONIC ZERO POINT ENERGY IS (SCALED BY 1.000) 

1503 # 0.020711 HARTREE/MOLECULE 4545.618665 CM**-1/MOLECULE  

1504 # 12.996589 KCAL/MOL 54.377728 KJ/MOL 

1505 

1506 # Q LN Q 

1507 # ELEC. 1.00000E+00 0.000000 

1508 # TRANS. 3.00431E+06 14.915558 

1509 # ROT. 8.36512E+01 4.426656 

1510 # VIB. 1.00067E+00 0.000665 

1511 # TOT. 2.51481E+08 19.342880 

1512 

1513 # E H G CV CP S 

1514 # KJ/MOL KJ/MOL KJ/MOL J/MOL-K J/MOL-K J/MOL-K 

1515 # ELEC. 0.000 0.000 0.000 0.000 0.000 0.000 

1516 # TRANS. 3.718 6.197 -36.975 12.472 20.786 144.800 

1517 # ROT. 3.718 3.718 -10.973 12.472 12.472 49.277 

1518 # VIB. 54.390 54.390 54.376 0.296 0.296 0.046 

1519 # TOTAL 61.827 64.306 6.428 25.240 33.554 194.123 

1520 # VIB. THERMAL CORRECTION E(T)-E(0) = H(T)-H(0) = 12.071 J/MOL 

1521 

1522 # E H G CV CP S 

1523 # KCAL/MOL KCAL/MOL KCAL/MOL CAL/MOL-K CAL/MOL-K CAL/MOL-K 

1524 # ELEC. 0.000 0.000 0.000 0.000 0.000 0.000 

1525 # TRANS. 0.889 1.481 -8.837 2.981 4.968 34.608 

1526 # ROT. 0.889 0.889 -2.623 2.981 2.981 11.777 

1527 # VIB. 12.999 12.999 12.996 0.071 0.071 0.011 

1528 # TOTAL 14.777 15.369 1.536 6.032 8.020 46.396 

1529 # VIB. THERMAL CORRECTION E(T)-E(0) = H(T)-H(0) = 2.885 CAL/MOL  

1530 

1531 if "THERMOCHEMISTRY AT T=" in line: 

1532 match = re.search(r"THERMOCHEMISTRY AT T=(.*)K", line) 

1533 if match: 

1534 self.set_attribute('temperature', float(match.group(1))) 

1535 if "PASCAL." in line: 

1536 match = re.search(r"P=(.*)PASCAL.", line) 

1537 if match: 

1538 self.set_attribute('pressure', float(match.group(1))/1.01325e5) 

1539 

1540 if "KCAL/MOL KCAL/MOL KCAL/MOL CAL/MOL-K CAL/MOL-K CAL/MOL-K" in line: 

1541 self.skip_lines(inputfile,["ELEC","TRANS","ROT","VIB"]) 

1542 line = next(inputfile) #TOTAL 

1543 thermoValues = line.split() 

1544 

1545 if hasattr(self, 'scfenergies'): 

1546 electronicEnergy = utils.convertor(self.scfenergies[-1],"eV","hartree") 

1547 else: 

1548 electronicEnergy = 0 # GAMESS prints thermochemistry at the end, so it should have a value for this already 

1549 self.set_attribute('enthalpy', electronicEnergy + utils.convertor(float(thermoValues[2]),"kcal/mol","hartree")) 

1550 self.set_attribute('freeenergy', electronicEnergy + utils.convertor(float(thermoValues[3]),"kcal/mol","hartree")) 

1551 self.set_attribute('entropy', utils.convertor(float(thermoValues[6])/1000.0,"kcal/mol","hartree")) 

1552 

1553 

1554 if line[:30] == ' ddikick.x: exited gracefully.'\ 

1555 or line[:41] == ' EXECUTION OF FIREFLY TERMINATED NORMALLY'\ 

1556 or line[:40] == ' EXECUTION OF GAMESS TERMINATED NORMALLY': 

1557 self.metadata['success'] = True