Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2# 

3# Copyright (c) 2020, the cclib development team 

4# 

5# This file is part of cclib (http://cclib.github.io) and is distributed under 

6# the terms of the BSD 3-Clause License. 

7 

8"""Parser for Molcas output files""" 

9 

10import re 

11import string 

12 

13import numpy 

14 

15from cclib.parser import logfileparser 

16from cclib.parser import utils 

17 

18 

19class Molcas(logfileparser.Logfile): 

20 """A Molcas log file.""" 

21 

22 def __init__(self, *args, **kwargs): 

23 

24 # Call the __init__ method of the superclass 

25 super(Molcas, self).__init__(logname="Molcas", *args, **kwargs) 

26 

27 def __str__(self): 

28 """Return a string repeesentation of the object.""" 

29 return "Molcas log file %s" % (self.filename) 

30 

31 def __repr__(self): 

32 """Return a representation of the object.""" 

33 return 'Molcas("%s")' % (self.filename) 

34 

35 def normalisesym(self, label): 

36 """Normalise the symmetries used by Molcas. 

37 

38 The labels are standardized except for the first character being lowercase. 

39 """ 

40 return label[0].upper() + label[1:] 

41 

42 def after_parsing(self): 

43 for element, ncore in self.core_array: 

44 self._assign_coreelectrons_to_element(element, ncore) 

45 

46 if "package_version" in self.metadata: 

47 # Use the short version as the legacy version. 

48 self.metadata["legacy_package_version"] = self.metadata["package_version"] 

49 # If there is both a tag and the full hash, place the tag 

50 # first. Both are chosen to be local, since there isn't a 

51 # distinction between development and release builds in their 

52 # version cycle. 

53 if "tag" in self.metadata and "revision" in self.metadata: 

54 self.metadata["package_version"] = "{}+{}.{}".format( 

55 self.metadata["package_version"], 

56 self.metadata["tag"], 

57 self.metadata["revision"] 

58 ) 

59 elif "tag" in self.metadata: 

60 self.metadata["package_version"] = "{}+{}".format( 

61 self.metadata["package_version"], 

62 self.metadata["tag"] 

63 ) 

64 elif "revision" in self.metadata: 

65 self.metadata["package_version"] = "{}+{}".format( 

66 self.metadata["package_version"], 

67 self.metadata["revision"] 

68 ) 

69 

70 def before_parsing(self): 

71 # Compile the regex for extracting the element symbol from the 

72 # atom label in the "Molecular structure info" block. 

73 self.re_atomelement = re.compile(r'([a-zA-Z]+)\d?') 

74 

75 # Compile the dashes-and-or-spaces-only regex. 

76 self.re_dashes_and_spaces = re.compile(r'^[\s-]+$') 

77 

78 # Molcas can do multiple calculations in one job, and each one 

79 # starts from the gateway module. Onle parse the first. 

80 # TODO: It would be best to parse each calculation as a separate 

81 # ccData object and return an iterator - something for 2.x 

82 self.gateway_module_count = 0 

83 

84 def extract(self, inputfile, line): 

85 """Extract information from the file object inputfile.""" 

86 

87 if "Start Module: gateway" in line: 

88 self.gateway_module_count += 1 

89 

90 if self.gateway_module_count > 1: 

91 return 

92 

93 # Extract the version number and optionally the Git tag and hash. 

94 if "version" in line: 

95 match = re.search(r"\s{2,}version:?\s(\d*\.\d*)", line) 

96 if match: 

97 self.metadata["package_version"] = match.groups()[0] 

98 if "tag" in line: 

99 self.metadata["tag"] = line.split()[-1] 

100 if "build" in line: 

101 match = re.search(r"\*\s*build\s(\S*)\s*\*", line) 

102 if match: 

103 self.metadata["revision"] = match.groups()[0] 

104 

105 ## This section is present when executing &GATEWAY. 

106 # ++ Molecular structure info: 

107 # ------------------------- 

108 

109 # ************************************************ 

110 # **** Cartesian Coordinates / Bohr, Angstrom **** 

111 # ************************************************ 

112 

113 # Center Label x y z x y z 

114 # 1 C1 0.526628 -2.582937 0.000000 0.278679 -1.366832 0.000000 

115 # 2 C2 2.500165 -0.834760 0.000000 1.323030 -0.441736 0.000000 

116 if line[25:63] == 'Cartesian Coordinates / Bohr, Angstrom': 

117 if not hasattr(self, 'atomnos'): 

118 self.atomnos = [] 

119 

120 self.skip_lines(inputfile, ['stars', 'blank', 'header']) 

121 

122 line = next(inputfile) 

123 

124 atomelements = [] 

125 atomcoords = [] 

126 

127 while line.strip() not in ('', '--'): 

128 sline = line.split() 

129 atomelement = sline[1].rstrip(string.digits).title() 

130 atomelements.append(atomelement) 

131 atomcoords.append(list(map(float, sline[5:]))) 

132 line = next(inputfile) 

133 

134 self.append_attribute('atomcoords', atomcoords) 

135 

136 if self.atomnos == []: 

137 self.atomnos = [self.table.number[ae.title()] for ae in atomelements] 

138 

139 if not hasattr(self, 'natom'): 

140 self.set_attribute('natom', len(self.atomnos)) 

141 

142 ## This section is present when executing &SCF. 

143 # ++ Orbital specifications: 

144 # ----------------------- 

145 

146 # Symmetry species 1 

147 

148 # Frozen orbitals 0 

149 # Occupied orbitals 3 

150 # Secondary orbitals 77 

151 # Deleted orbitals 0 

152 # Total number of orbitals 80 

153 # Number of basis functions 80 

154 # -- 

155 if line[:29] == '++ Orbital specifications:': 

156 

157 self.skip_lines(inputfile, ['dashes', 'blank']) 

158 line = next(inputfile) 

159 

160 symmetry_count = 1 

161 while not line.startswith('--'): 

162 if line.strip().startswith('Symmetry species'): 

163 symmetry_count = int(line.split()[-1]) 

164 if line.strip().startswith('Total number of orbitals'): 

165 nmos = line.split()[-symmetry_count:] 

166 self.set_attribute('nmo', sum(map(int, nmos))) 

167 if line.strip().startswith('Number of basis functions'): 

168 nbasis = line.split()[-symmetry_count:] 

169 self.set_attribute('nbasis', sum(map(int, nbasis))) 

170 

171 line = next(inputfile) 

172 

173 if line.strip().startswith(('Molecular charge', 'Total molecular charge')): 

174 self.set_attribute('charge', int(float(line.split()[-1]))) 

175 

176 # ++ Molecular charges: 

177 # ------------------ 

178 

179 # Mulliken charges per centre and basis function type 

180 # --------------------------------------------------- 

181 

182 # C1 

183 # 1s 2.0005 

184 # 2s 2.0207 

185 # 2px 0.0253 

186 # 2pz 0.1147 

187 # 2py 1.8198 

188 # *s -0.0215 

189 # *px 0.0005 

190 # *pz 0.0023 

191 # *py 0.0368 

192 # *d2+ 0.0002 

193 # *d1+ 0.0000 

194 # *d0 0.0000 

195 # *d1- 0.0000 

196 # *d2- 0.0000 

197 # *f3+ 0.0000 

198 # *f2+ 0.0001 

199 # *f1+ 0.0000 

200 # *f0 0.0001 

201 # *f1- 0.0001 

202 # *f2- 0.0000 

203 # *f3- 0.0003 

204 # *g4+ 0.0000 

205 # *g3+ 0.0000 

206 # *g2+ 0.0000 

207 # *g1+ 0.0000 

208 # *g0 0.0000 

209 # *g1- 0.0000 

210 # *g2- 0.0000 

211 # *g3- 0.0000 

212 # *g4- 0.0000 

213 # Total 6.0000 

214 

215 # N-E 0.0000 

216 

217 # Total electronic charge= 6.000000 

218 

219 # Total charge= 0.000000 

220 #-- 

221 if line[:24] == '++ Molecular charges:': 

222 

223 atomcharges = [] 

224 

225 while line[6:29] != 'Total electronic charge': 

226 line = next(inputfile) 

227 if line[6:9] == 'N-E': 

228 atomcharges.extend(map(float, line.split()[1:])) 

229 

230 # Molcas only performs Mulliken population analysis. 

231 self.set_attribute('atomcharges', {'mulliken': atomcharges}) 

232 

233 # Ensure the charge printed here is identical to the 

234 # charge printed before entering the SCF. 

235 self.skip_line(inputfile, 'blank') 

236 line = next(inputfile) 

237 assert line[6:30] == 'Total charge=' 

238 if hasattr(self, 'charge'): 

239 assert int(float(line.split()[2])) == self.charge 

240 

241 # This section is present when executing &SCF 

242 # This section parses the total SCF Energy. 

243 # ***************************************************************************************************************************** 

244 # * * 

245 # * SCF/KS-DFT Program, Final results * 

246 # * * 

247 # * * 

248 # * * 

249 # * Final Results * 

250 # * * 

251 # ***************************************************************************************************************************** 

252 

253 # :: Total SCF energy -37.6045426484 

254 if line[:22] == ':: Total SCF energy' or line[:25] == ':: Total KS-DFT energy': 

255 if not hasattr(self, 'scfenergies'): 

256 self.scfenergies = [] 

257 scfenergy = float(line.split()[-1]) 

258 self.scfenergies.append(utils.convertor(scfenergy, 'hartree', 'eV')) 

259 

260 ## Parsing the scftargets in this section 

261 # ++ Optimization specifications: 

262 # ---------------------------- 

263 

264 # SCF Algorithm: Conventional 

265 # Minimized density differences are used 

266 # Number of density matrices in core 9 

267 # Maximum number of NDDO SCF iterations 400 

268 # Maximum number of HF SCF iterations 400 

269 # Threshold for SCF energy change 0.10E-08 

270 # Threshold for density matrix 0.10E-03 

271 # Threshold for Fock matrix 0.15E-03 

272 # Threshold for linear dependence 0.10E-08 

273 # Threshold at which DIIS is turned on 0.15E+00 

274 # Threshold at which QNR/C2DIIS is turned on 0.75E-01 

275 # Threshold for Norm(delta) (QNR/C2DIIS) 0.20E-04 

276 if line[:34] == '++ Optimization specifications:': 

277 self.skip_lines(inputfile, ['d', 'b']) 

278 line = next(inputfile) 

279 if line.strip().startswith('SCF'): 

280 scftargets = [] 

281 self.skip_lines(inputfile, 

282 ['Minimized', 'Number', 'Maximum', 'Maximum']) 

283 lines = [next(inputfile) for i in range(7)] 

284 targets = [ 

285 'Threshold for SCF energy change', 

286 'Threshold for density matrix', 

287 'Threshold for Fock matrix', 

288 'Threshold for Norm(delta)', 

289 ] 

290 for y in targets: 

291 scftargets.extend([float(x.split()[-1]) for x in lines if y in x]) 

292 

293 self.append_attribute('scftargets', scftargets) 

294 

295 # ++ Convergence information 

296 # SCF iterations: Energy and convergence statistics 

297 # 

298 # Iter Tot. SCF One-electron Two-electron Energy Max Dij or Max Fij DNorm TNorm AccCon Time 

299 # Energy Energy Energy Change Delta Norm in Sec. 

300 # 1 -36.83817703 -50.43096166 13.59278464 0.00E+00 0.16E+00* 0.27E+01* 0.30E+01 0.33E+02 NoneDa 0. 

301 # 2 -36.03405202 -45.74525152 9.71119950 0.80E+00* 0.14E+00* 0.93E-02* 0.26E+01 0.43E+01 Damp 0. 

302 # 3 -37.08936118 -48.41536598 11.32600480 -0.11E+01* 0.12E+00* 0.91E-01* 0.97E+00 0.16E+01 Damp 0. 

303 # 4 -37.31610460 -50.54103969 13.22493509 -0.23E+00* 0.11E+00* 0.96E-01* 0.72E+00 0.27E+01 Damp 0. 

304 # 5 -37.33596239 -49.47021484 12.13425245 -0.20E-01* 0.59E-01* 0.59E-01* 0.37E+00 0.16E+01 Damp 0. 

305 # ... 

306 # Convergence after 26 Macro Iterations 

307 # -- 

308 if line[46:91] == 'iterations: Energy and convergence statistics': 

309 

310 self.skip_line(inputfile, 'blank') 

311 

312 while line.split() != ['Energy', 'Energy', 'Energy', 'Change', 'Delta', 'Norm', 'in', 'Sec.']: 

313 line = next(inputfile) 

314 

315 iteration_regex = (r"^([0-9]+)" # Iter 

316 r"( [ \-0-9]*\.[0-9]{6,9})" # Tot. SCF Energy 

317 r"( [ \-0-9]*\.[0-9]{6,9})" # One-electron Energy 

318 r"( [ \-0-9]*\.[0-9]{6,9})" # Two-electron Energy 

319 r"( [ \-0-9]*\.[0-9]{2}E[\-\+][0-9]{2}\*?)" # Energy Change 

320 r"( [ \-0-9]*\.[0-9]{2}E[\-\+][0-9]{2}\*?)" # Max Dij or Delta Norm 

321 r"( [ \-0-9]*\.[0-9]{2}E[\-\+][0-9]{2}\*?)" # Max Fij 

322 r"( [ \-0-9]*\.[0-9]{2}E[\-\+][0-9]{2}\*?)" # DNorm 

323 r"( [ \-0-9]*\.[0-9]{2}E[\-\+][0-9]{2}\*?)" # TNorm 

324 r"( [ A-Za-z0-9]*)" # AccCon 

325 r"( [ \.0-9]*)$") # Time in Sec. 

326 

327 scfvalues = [] 

328 line = next(inputfile) 

329 while not line.strip().startswith("Convergence"): 

330 

331 match = re.match(iteration_regex, line.strip()) 

332 if match: 

333 groups = match.groups() 

334 cols = [g.strip() for g in match.groups()] 

335 cols = [c.replace('*', '') for c in cols] 

336 

337 energy = float(cols[4]) 

338 density = float(cols[5]) 

339 fock = float(cols[6]) 

340 dnorm = float(cols[7]) 

341 scfvalues.append([energy, density, fock, dnorm]) 

342 

343 if line.strip() == "--": 

344 self.logger.warning('File terminated before end of last SCF!') 

345 break 

346 

347 line = next(inputfile) 

348 

349 self.append_attribute('scfvalues', scfvalues) 

350 

351 # Harmonic frequencies in cm-1 

352 # 

353 # IR Intensities in km/mol 

354 # 

355 # 1 2 3 4 5 6 

356 # 

357 # Frequency: i60.14 i57.39 128.18 210.06 298.24 309.65 

358 # 

359 # Intensity: 3.177E-03 2.129E-06 4.767E-01 2.056E-01 6.983E-07 1.753E-07 

360 # Red. mass: 2.42030 2.34024 2.68044 3.66414 2.61721 3.34904 

361 # 

362 # C1 x -0.00000 0.00000 0.00000 -0.05921 0.00000 -0.06807 

363 # C1 y 0.00001 -0.00001 -0.00001 0.00889 0.00001 -0.02479 

364 # C1 z -0.03190 0.04096 -0.03872 0.00001 -0.12398 -0.00002 

365 # C2 x -0.00000 0.00001 0.00000 -0.06504 0.00000 -0.03487 

366 # C2 y 0.00000 -0.00000 -0.00000 0.01045 0.00001 -0.05659 

367 # C2 z -0.03703 -0.03449 -0.07269 0.00000 -0.07416 -0.00001 

368 # C3 x -0.00000 0.00001 0.00000 -0.06409 -0.00001 0.05110 

369 # C3 y -0.00000 0.00001 0.00000 0.00152 0.00000 -0.03263 

370 # C3 z -0.03808 -0.08037 -0.07267 -0.00001 0.07305 0.00000 

371 # ... 

372 # H20 y 0.00245 -0.00394 0.03215 0.03444 -0.10424 -0.10517 

373 # H20 z 0.00002 -0.00001 0.00000 -0.00000 -0.00000 0.00000 

374 # 

375 # 

376 # 

377 # ++ Thermochemistry 

378 if line[1:29] == 'Harmonic frequencies in cm-1': 

379 

380 self.skip_line(inputfile, 'blank') 

381 line = next(inputfile) 

382 

383 while 'Thermochemistry' not in line: 

384 

385 if 'Frequency:' in line: 

386 if not hasattr(self, 'vibfreqs'): 

387 self.vibfreqs = [] 

388 vibfreqs = [float(i.replace('i', '-')) for i in line.split()[1:]] 

389 self.vibfreqs.extend(vibfreqs) 

390 

391 if 'Intensity:' in line: 

392 if not hasattr(self, 'vibirs'): 

393 self.vibirs = [] 

394 vibirs = map(float, line.split()[1:]) 

395 self.vibirs.extend(vibirs) 

396 

397 if 'Red.' in line: 

398 if not hasattr(self, 'vibrmasses'): 

399 self.vibrmasses = [] 

400 vibrmasses = map(float, line.split()[2:]) 

401 self.vibrmasses.extend(vibrmasses) 

402 

403 self.skip_line(inputfile, 'blank') 

404 line = next(inputfile) 

405 if not hasattr(self, 'vibdisps'): 

406 self.vibdisps = [] 

407 disps = [] 

408 for n in range(3*self.natom): 

409 numbers = [float(s) for s in line[17:].split()] 

410 # The atomindex should start at 0 instead of 1. 

411 atomindex = int(re.search(r'\d+$', line.split()[0]).group()) - 1 

412 numbermodes = len(numbers) 

413 if len(disps) == 0: 

414 # Appends empty array of the following 

415 # dimensions (numbermodes, natom, 0) to disps. 

416 for mode in range(numbermodes): 

417 disps.append([[] for x in range(0, self.natom)]) 

418 for mode in range(numbermodes): 

419 disps[mode][atomindex].append(numbers[mode]) 

420 line = next(inputfile) 

421 self.vibdisps.extend(disps) 

422 

423 line = next(inputfile) 

424 

425 ## Parsing thermochemistry attributes here 

426 # ++ Thermochemistry 

427 # 

428 # ********************* 

429 # * * 

430 # * THERMOCHEMISTRY * 

431 # * * 

432 # ********************* 

433 # 

434 # Mass-centered Coordinates (Angstrom): 

435 # *********************************************************** 

436 # ... 

437 # ***************************************************** 

438 # Temperature = 0.00 Kelvin, Pressure = 1.00 atm 

439 # ----------------------------------------------------- 

440 # Molecular Partition Function and Molar Entropy: 

441 # q/V (M**-3) S(kcal/mol*K) 

442 # Electronic 0.100000D+01 0.000 

443 # Translational 0.100000D+01 0.000 

444 # Rotational 0.100000D+01 2.981 

445 # Vibrational 0.100000D+01 0.000 

446 # TOTAL 0.100000D+01 2.981 

447 # 

448 # Thermal contributions to INTERNAL ENERGY: 

449 # Electronic 0.000 kcal/mol 0.000000 au. 

450 # Translational 0.000 kcal/mol 0.000000 au. 

451 # Rotational 0.000 kcal/mol 0.000000 au. 

452 # Vibrational 111.885 kcal/mol 0.178300 au. 

453 # TOTAL 111.885 kcal/mol 0.178300 au. 

454 # 

455 # Thermal contributions to 

456 # ENTHALPY 111.885 kcal/mol 0.178300 au. 

457 # GIBBS FREE ENERGY 111.885 kcal/mol 0.178300 au. 

458 # 

459 # Sum of energy and thermal contributions 

460 # INTERNAL ENERGY -382.121931 au. 

461 # ENTHALPY -382.121931 au. 

462 # GIBBS FREE ENERGY -382.121931 au. 

463 # ----------------------------------------------------- 

464 # ... 

465 # ENTHALPY -382.102619 au. 

466 # GIBBS FREE ENERGY -382.179819 au. 

467 # ----------------------------------------------------- 

468 # -- 

469 # 

470 # ++ Isotopic shifts: 

471 if line[4:19] == 'THERMOCHEMISTRY': 

472 

473 temperature_values = [] 

474 pressure_values = [] 

475 entropy_values = [] 

476 internal_energy_values = [] 

477 enthalpy_values = [] 

478 free_energy_values = [] 

479 

480 while 'Isotopic' not in line: 

481 

482 if line[1:12] == 'Temperature': 

483 temperature_values.append(float(line.split()[2])) 

484 pressure_values.append(float(line.split()[6])) 

485 

486 if line[1:48] == 'Molecular Partition Function and Molar Entropy:': 

487 while 'TOTAL' not in line: 

488 line = next(inputfile) 

489 entropy_values.append(utils.convertor(float(line.split()[2]), 'kcal/mol', 'hartree')) 

490 

491 if line[1:40] == 'Sum of energy and thermal contributions': 

492 internal_energy_values.append(float(next(inputfile).split()[2])) 

493 enthalpy_values.append(float(next(inputfile).split()[1])) 

494 free_energy_values.append(float(next(inputfile).split()[3])) 

495 

496 line = next(inputfile) 

497 # When calculations for more than one temperature value are 

498 # performed, the values corresponding to room temperature (298.15 K) 

499 # are returned and if no calculations are performed for 298.15 K, then 

500 # the values corresponding last temperature value are returned. 

501 index = -1 

502 if 298.15 in temperature_values: 

503 index = temperature_values.index(298.15) 

504 

505 self.set_attribute('temperature', temperature_values[index]) 

506 if len(temperature_values) > 1: 

507 self.logger.warning('More than 1 values of temperature found') 

508 

509 self.set_attribute('pressure', pressure_values[index]) 

510 if len(pressure_values) > 1: 

511 self.logger.warning('More than 1 values of pressure found') 

512 

513 self.set_attribute('entropy', entropy_values[index]) 

514 if len(entropy_values) > 1: 

515 self.logger.warning('More than 1 values of entropy found') 

516 

517 self.set_attribute('enthalpy', enthalpy_values[index]) 

518 if len(enthalpy_values) > 1: 

519 self.logger.warning('More than 1 values of enthalpy found') 

520 

521 self.set_attribute('freeenergy', free_energy_values[index]) 

522 if len(free_energy_values) > 1: 

523 self.logger.warning('More than 1 values of freeenergy found') 

524 

525 ## Parsing Geometrical Optimization attributes in this section. 

526 # ++ Slapaf input parameters: 

527 # ------------------------ 

528 # 

529 # Max iterations: 2000 

530 # Convergence test a la Schlegel. 

531 # Convergence criterion on gradient/para.<=: 0.3E-03 

532 # Convergence criterion on step/parameter<=: 0.3E-03 

533 # Convergence criterion on energy change <=: 0.0E+00 

534 # Max change of an internal coordinate: 0.30E+00 

535 # ... 

536 # ... 

537 # ********************************************************************************************************************** 

538 # * Energy Statistics for Geometry Optimization * 

539 # ********************************************************************************************************************** 

540 # Energy Grad Grad Step Estimated Geom Hessian 

541 # Iter Energy Change Norm Max Element Max Element Final Energy Update Update Index 

542 # 1 -382.30023222 0.00000000 0.107221 0.039531 nrc047 0.085726 nrc047 -382.30533799 RS-RFO None 0 

543 # 2 -382.30702964 -0.00679742 0.043573 0.014908 nrc001 0.068195 nrc001 -382.30871333 RS-RFO BFGS 0 

544 # 3 -382.30805348 -0.00102384 0.014883 0.005458 nrc010 -0.020973 nrc001 -382.30822089 RS-RFO BFGS 0 

545 # ... 

546 # ... 

547 # 18 -382.30823419 -0.00000136 0.001032 0.000100 nrc053 0.012319 nrc053 -382.30823452 RS-RFO BFGS 0 

548 # 19 -382.30823198 0.00000221 0.001051 -0.000092 nrc054 0.066565 nrc053 -382.30823822 RS-RFO BFGS 0 

549 # 20 -382.30820252 0.00002946 0.001132 -0.000167 nrc021 -0.064003 nrc053 -382.30823244 RS-RFO BFGS 0 

550 # 

551 # +----------------------------------+----------------------------------+ 

552 # + Cartesian Displacements + Gradient in internals + 

553 # + Value Threshold Converged? + Value Threshold Converged? + 

554 # +-----+----------------------------------+----------------------------------+ 

555 # + RMS + 5.7330E-02 1.2000E-03 No + 1.6508E-04 3.0000E-04 Yes + 

556 # +-----+----------------------------------+----------------------------------+ 

557 # + Max + 1.2039E-01 1.8000E-03 No + 1.6711E-04 4.5000E-04 Yes + 

558 # +-----+----------------------------------+----------------------------------+ 

559 if 'Convergence criterion on energy change' in line: 

560 self.energy_threshold = float(line.split()[6]) 

561 # If energy change threshold equals zero, 

562 # then energy change is not a criteria for convergence. 

563 if self.energy_threshold == 0: 

564 self.energy_threshold = numpy.inf 

565 

566 if 'Energy Statistics for Geometry Optimization' in line: 

567 if not hasattr(self, 'geovalues'): 

568 self.geovalues = [] 

569 

570 self.skip_lines(inputfile, ['stars', 'header']) 

571 line = next(inputfile) 

572 assert 'Iter Energy Change Norm' in line 

573 # A variable keeping track of ongoing iteration. 

574 iter_number = len(self.geovalues) + 1 

575 # Iterate till blank line. 

576 while line.split() != []: 

577 for i in range(iter_number): 

578 line = next(inputfile) 

579 self.geovalues.append([float(line.split()[2])]) 

580 line = next(inputfile) 

581 # Along with energy change, RMS and Max values of change in 

582 # Cartesian Diaplacement and Gradients are used as optimization 

583 # criteria. 

584 self.skip_lines(inputfile, ['border', 'header', 'header', 'border']) 

585 line = next(inputfile) 

586 assert '+ RMS +' in line 

587 line_rms = line.split() 

588 line = next(inputfile) 

589 line_max = next(inputfile).split() 

590 if not hasattr(self, 'geotargets'): 

591 # The attribute geotargets is an array consisting of the following 

592 # values: [Energy threshold, Max Gradient threshold, RMS Gradient threshold, \ 

593 # Max Displacements threshold, RMS Displacements threshold]. 

594 max_gradient_threshold = float(line_max[8]) 

595 rms_gradient_threshold = float(line_rms[8]) 

596 max_displacement_threshold = float(line_max[4]) 

597 rms_displacement_threshold = float(line_rms[4]) 

598 self.geotargets = [self.energy_threshold, max_gradient_threshold, rms_gradient_threshold, max_displacement_threshold, rms_displacement_threshold] 

599 

600 max_gradient_change = float(line_max[7]) 

601 rms_gradient_change = float(line_rms[7]) 

602 max_displacement_change = float(line_max[3]) 

603 rms_displacement_change = float(line_rms[3]) 

604 self.geovalues[iter_number - 1].extend([max_gradient_change, rms_gradient_change, max_displacement_change, rms_displacement_change]) 

605 

606 # ********************************************************* 

607 # * Nuclear coordinates for the next iteration / Angstrom * 

608 # ********************************************************* 

609 # ATOM X Y Z 

610 # C1 0.235560 -1.415847 0.012012 

611 # C2 1.313797 -0.488199 0.015149 

612 # C3 1.087050 0.895510 0.014200 

613 # ... 

614 # ... 

615 # H19 -0.021327 -4.934915 -0.029355 

616 # H20 -1.432030 -3.721047 -0.039835 

617 # 

618 # -- 

619 if 'Nuclear coordinates for the next iteration / Angstrom' in line: 

620 self.skip_lines(inputfile, ['s', 'header']) 

621 line = next(inputfile) 

622 

623 atomcoords = [] 

624 while line.split() != []: 

625 atomcoords.append([float(c) for c in line.split()[1:]]) 

626 line = next(inputfile) 

627 

628 if len(atomcoords) == self.natom: 

629 self.atomcoords.append(atomcoords) 

630 else: 

631 self.logger.warning( 

632 "Parsed coordinates not consistent with previous, skipping. " 

633 "This could be due to symmetry being turned on during the job. " 

634 "Length was %i, now found %i. New coordinates: %s" 

635 % (len(self.atomcoords[-1]), len(atomcoords), str(atomcoords))) 

636 

637 # ********************************************************************************************************************** 

638 # * Energy Statistics for Geometry Optimization * 

639 # ********************************************************************************************************************** 

640 # Energy Grad Grad Step Estimated Geom Hessian 

641 # Iter Energy Change Norm Max Element Max Element Final Energy Update Update Index 

642 # 1 -382.30023222 0.00000000 0.107221 0.039531 nrc047 0.085726 nrc047 -382.30533799 RS-RFO None 0 

643 # ... 

644 # ... 

645 # 23 -382.30823115 -0.00000089 0.001030 0.000088 nrc053 0.000955 nrc053 -382.30823118 RS-RFO BFGS 0 

646 # 

647 # +----------------------------------+----------------------------------+ 

648 # + Cartesian Displacements + Gradient in internals + 

649 # + Value Threshold Converged? + Value Threshold Converged? + 

650 # +-----+----------------------------------+----------------------------------+ 

651 # + RMS + 7.2395E-04 1.2000E-03 Yes + 2.7516E-04 3.0000E-04 Yes + 

652 # +-----+----------------------------------+----------------------------------+ 

653 # + Max + 1.6918E-03 1.8000E-03 Yes + 8.7768E-05 4.5000E-04 Yes + 

654 # +-----+----------------------------------+----------------------------------+ 

655 # 

656 # Geometry is converged in 23 iterations to a Minimum Structure 

657 if 'Geometry is converged' in line: 

658 if not hasattr(self, 'optdone'): 

659 self.optdone = [] 

660 self.optdone.append(len(self.atomcoords)) 

661 

662 # ********************************************************* 

663 # * Nuclear coordinates of the final structure / Angstrom * 

664 # ********************************************************* 

665 # ATOM X Y Z 

666 # C1 0.235547 -1.415838 0.012193 

667 # C2 1.313784 -0.488201 0.015297 

668 # C3 1.087036 0.895508 0.014333 

669 # ... 

670 # ... 

671 # H19 -0.021315 -4.934913 -0.029666 

672 # H20 -1.431994 -3.721026 -0.041078 

673 if 'Nuclear coordinates of the final structure / Angstrom' in line: 

674 self.skip_lines(inputfile, ['s', 'header']) 

675 line = next(inputfile) 

676 

677 atomcoords = [] 

678 

679 while line.split() != []: 

680 atomcoords.append([float(c) for c in line.split()[1:]]) 

681 line = next(inputfile) 

682 

683 if len(atomcoords) == self.natom: 

684 self.atomcoords.append(atomcoords) 

685 else: 

686 self.logger.error( 

687 'Number of atoms (%d) in parsed atom coordinates ' 

688 'is smaller than previously (%d), possibly due to ' 

689 'symmetry. Ignoring these coordinates.' 

690 % (len(atomcoords), self.natom)) 

691 

692 ## Parsing Molecular Gradients attributes in this section. 

693 # ()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()() 

694 #  

695 # &ALASKA 

696 #  

697 # only a single process is used 

698 # available to each process: 2.0 GB of memory, 1 thread 

699 # ()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()() 

700 # ... 

701 # ... 

702 # ************************************************** 

703 # * * 

704 # * Molecular gradients * 

705 # * * 

706 # ************************************************** 

707 #  

708 # Irreducible representation: a  

709 # --------------------------------------------------------- 

710 # X Y Z  

711 # --------------------------------------------------------- 

712 # C1 -0.00009983 -0.00003043 0.00001004 

713 # ... 

714 # H20 -0.00027629 0.00010546 0.00003317 

715 # --------------------------------------------------- 

716 # WARNING: "Molecular gradients, after ESPF" is found for ESPF QM/MM calculations 

717 if "Molecular gradients " in line: 

718 

719 if not hasattr(self, "grads"): 

720 self.grads = [] 

721 

722 self.skip_lines(inputfile, ['stars', 'stars', 'blank', 'header', 

723 'dashes', 'header', 'dashes']) 

724 

725 grads = [] 

726 line = next(inputfile) 

727 while len(line.split()) == 4: 

728 tmpgrads = list(map(float, line.split()[1:])) 

729 grads.append(tmpgrads) 

730 line = next(inputfile) 

731 

732 self.append_attribute('grads', grads) 

733 

734 # This code here works, but QM/MM gradients are printed after QM ones. 

735 # Maybe another attribute is needed to store them to have both. 

736 if "Molecular gradients, after ESPF" in line: 

737 

738 self.skip_lines(inputfile, ['stars', 'stars', 'blank', 'header', 

739 'dashes', 'header', 'dashes']) 

740 

741 grads = [] 

742 line = next(inputfile) 

743 while len(line.split()) == 4: 

744 tmpgrads = list(map(float, line.split()[1:])) 

745 grads.append(tmpgrads) 

746 line = next(inputfile) 

747 

748 self.grads[-1] = grads 

749 

750 ### 

751 # All orbitals with orbital energies smaller than E(LUMO)+0.5 are printed 

752 # 

753 # ++ Molecular orbitals: 

754 # ------------------- 

755 # 

756 # Title: RKS-DFT orbitals 

757 # 

758 # Molecular orbitals for symmetry species 1: a 

759 # 

760 # Orbital 1 2 3 4 5 6 7 8 9 10 

761 # Energy -10.0179 -10.0179 -10.0075 -10.0075 -10.0066 -10.0066 -10.0056 -10.0055 -9.9919 -9.9919 

762 # Occ. No. 2.0000 2.0000 2.0000 2.0000 2.0000 2.0000 2.0000 2.0000 2.0000 2.0000 

763 # 

764 # 1 C1 1s -0.6990 0.6989 0.0342 0.0346 0.0264 -0.0145 -0.0124 -0.0275 -0.0004 -0.0004 

765 # 2 C1 2s -0.0319 0.0317 -0.0034 -0.0033 -0.0078 0.0034 0.0041 0.0073 -0.0002 -0.0002 

766 # ... 

767 # ... 

768 # 58 H18 1s 0.2678 

769 # 59 H19 1s -0.2473 

770 # 60 H20 1s 0.1835 

771 # -- 

772 if '++ Molecular orbitals:' in line: 

773 

774 self.skip_lines(inputfile, ['d', 'b']) 

775 line = next(inputfile) 

776 

777 # We don't currently support parsing natural orbitals or active space orbitals. 

778 if 'Natural orbitals' not in line and "Pseudonatural" not in line: 

779 self.skip_line(inputfile, 'b') 

780 

781 # Symmetry is not currently supported, so this line can have one form. 

782 while 'Molecular orbitals for symmetry species 1: a' not in line.strip(): 

783 line = next(inputfile) 

784 

785 # Symmetry is not currently supported, so this line can have one form. 

786 if line.strip() != 'Molecular orbitals for symmetry species 1: a': 

787 return 

788 

789 line = next(inputfile) 

790 moenergies = [] 

791 homos = 0 

792 mocoeffs = [] 

793 while line[:2] != '--': 

794 line = next(inputfile) 

795 if line.strip().startswith('Orbital'): 

796 orbital_index = line.split()[1:] 

797 for i in orbital_index: 

798 mocoeffs.append([]) 

799 

800 if 'Energy' in line: 

801 energies = [utils.convertor(float(x), 'hartree', 'eV') for x in line.split()[1:]] 

802 moenergies.extend(energies) 

803 

804 if 'Occ. No.' in line: 

805 for i in line.split()[2:]: 

806 if float(i) != 0: 

807 homos += 1 

808 

809 aonames = [] 

810 tokens = line.split() 

811 if tokens and tokens[0] == '1': 

812 while tokens and tokens[0] != '--': 

813 aonames.append("{atom}_{orbital}".format(atom=tokens[1], orbital=tokens[2])) 

814 info = tokens[3:] 

815 j = 0 

816 for i in orbital_index: 

817 mocoeffs[int(i)-1].append(float(info[j])) 

818 j += 1 

819 line = next(inputfile) 

820 tokens = line.split() 

821 self.set_attribute('aonames', aonames) 

822 

823 if len(moenergies) != self.nmo: 

824 moenergies.extend([numpy.nan for x in range(self.nmo - len(moenergies))]) 

825 

826 self.append_attribute('moenergies', moenergies) 

827 

828 if not hasattr(self, 'homos'): 

829 self.homos = [] 

830 self.homos.extend([homos-1]) 

831 

832 while len(mocoeffs) < self.nmo: 

833 nan_array = [numpy.nan for i in range(self.nbasis)] 

834 mocoeffs.append(nan_array) 

835 

836 self.append_attribute('mocoeffs', mocoeffs) 

837 

838 ## Parsing MP energy from the &MBPT2 module. 

839 # Conventional algorithm used... 

840 # 

841 # SCF energy = -74.9644564043 a.u. 

842 # Second-order correlation energy = -0.0364237923 a.u. 

843 # 

844 # Total energy = -75.0008801966 a.u. 

845 # Reference weight ( Cref**2 ) = 0.98652 

846 # 

847 # :: Total MBPT2 energy -75.0008801966 

848 # 

849 # 

850 # Zeroth-order energy (E0) = -36.8202538520 a.u. 

851 # 

852 # Shanks-type energy S1(E) = -75.0009150108 a.u. 

853 if 'Total MBPT2 energy' in line: 

854 mpenergies = [] 

855 mpenergies.append(utils.convertor(utils.float(line.split()[4]), 'hartree', 'eV')) 

856 if not hasattr(self, 'mpenergies'): 

857 self.mpenergies = [] 

858 self.mpenergies.append(mpenergies) 

859 

860 # Parsing data ccenergies from &CCSDT module. 

861 # --- Start Module: ccsdt at Thu Jul 26 14:03:23 2018 --- 

862 # 

863 # ()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()() 

864 # 

865 # &CCSDT 

866 # ... 

867 # ... 

868 # 14 -75.01515915 -0.05070274 -0.00000029 

869 # 15 -75.01515929 -0.05070289 -0.00000014 

870 # 16 -75.01515936 -0.05070296 -0.00000007 

871 # Convergence after 17 Iterations 

872 # 

873 # 

874 # Total energy (diff) : -75.01515936 -0.00000007 

875 # Correlation energy : -0.0507029554992 

876 if 'Start Module: ccsdt' in line: 

877 self.skip_lines(inputfile, ['b', '()', 'b']) 

878 line = next(inputfile) 

879 if '&CCSDT' in line: 

880 while not line.strip().startswith('Total energy (diff)'): 

881 line = next(inputfile) 

882 

883 ccenergies = utils.convertor(utils.float(line.split()[4]), 'hartree', 'eV') 

884 if not hasattr(self, 'ccenergies'): 

885 self.ccenergies= [] 

886 self.ccenergies.append(ccenergies) 

887 

888 # ++ Primitive basis info: 

889 # --------------------- 

890 # 

891 # 

892 # ***************************************************** 

893 # ******** Primitive Basis Functions (Valence) ******** 

894 # ***************************************************** 

895 # 

896 # 

897 # Basis set:C.AUG-CC-PVQZ.........  

898 # 

899 # Type  

900 # s 

901 # No. Exponent Contraction Coefficients 

902 # 1 0.339800000D+05 0.000091 -0.000019 0.000000 0.000000 0.000000 0.000000 

903 # 2 0.508900000D+04 0.000704 -0.000151 0.000000 0.000000 0.000000 0.000000 

904 # ... 

905 # ... 

906 # 29 0.424000000D+00 0.000000 1.000000 

907 # 

908 # Number of primitives 93 

909 # Number of basis functions 80 

910 # 

911 # -- 

912 if line.startswith('++ Primitive basis info:'): 

913 self.skip_lines(inputfile, ['d', 'b', 'b', 's', 'header', 's', 'b']) 

914 line = next(inputfile) 

915 gbasis_array = [] 

916 while '--' not in line and '****' not in line: 

917 if 'Basis set:' in line: 

918 basis_element_patterns = re.findall(r'Basis set:([A-Za-z]{1,2})\.', line) 

919 assert len(basis_element_patterns) == 1 

920 basis_element = basis_element_patterns[0].title() 

921 gbasis_array.append((basis_element, [])) 

922 

923 if 'Type' in line: 

924 line = next(inputfile) 

925 shell_type = line.split()[0].upper() 

926 

927 self.skip_line(inputfile, 'headers') 

928 line = next(inputfile) 

929 

930 exponents = [] 

931 coefficients = [] 

932 func_array = [] 

933 while line.split(): 

934 exponents.append(utils.float(line.split()[1])) 

935 coefficients.append([utils.float(i) for i in line.split()[2:]]) 

936 line = next(inputfile) 

937 

938 for i in range(len(coefficients[0])): 

939 func_tuple = (shell_type, []) 

940 for iexp, exp in enumerate(exponents): 

941 coeff = coefficients[iexp][i] 

942 if coeff != 0: 

943 func_tuple[1].append((exp, coeff)) 

944 gbasis_array[-1][1].append(func_tuple) 

945 

946 line = next(inputfile) 

947 

948 atomsymbols = [self.table.element[atomno] for atomno in self.atomnos] 

949 self.gbasis = [[] for i in range(self.natom)] 

950 for element, gbasis in gbasis_array: 

951 mask = [element == possible_element for possible_element in atomsymbols] 

952 indices = [i for (i, x) in enumerate(mask) if x] 

953 for index in indices: 

954 self.gbasis[index] = gbasis 

955 

956 # ++ Basis set information: 

957 # ---------------------- 

958 # ... 

959 # Basis set label: MO.ECP.HAY-WADT.5S6P4D.3S3P2D.14E-LANL2DZ..... 

960 # 

961 # Electronic valence basis set: 

962 # ------------------ 

963 # Associated Effective Charge 14.000000 au 

964 # Associated Actual Charge 42.000000 au 

965 # Nuclear Model: Point charge 

966 # ... 

967 # 

968 # Effective Core Potential specification: 

969 # ======================================= 

970 # 

971 # Label Cartesian Coordinates / Bohr 

972 # 

973 # MO 0.0006141610 -0.0006141610 0.0979067106 

974 # -- 

975 if '++ Basis set information:' in line: 

976 self.core_array = [] 

977 basis_element = None 

978 ncore = 0 

979 

980 while line[:2] != '--': 

981 if 'Basis set label' in line: 

982 try: 

983 basis_element = line.split()[3].split('.')[0] 

984 basis_element = basis_element[0] + basis_element[1:].lower() 

985 except: 

986 self.logger.warning('Basis set label is missing!') 

987 basis_element = '' 

988 if 'valence basis set:' in line.lower(): 

989 self.skip_line(inputfile, 'd') 

990 line = next(inputfile) 

991 if 'Associated Effective Charge' in line: 

992 effective_charge = float(line.split()[3]) 

993 actual_charge = float(next(inputfile).split()[3]) 

994 element = self.table.element[int(actual_charge)] 

995 ncore = int(actual_charge - effective_charge) 

996 if basis_element: 

997 assert basis_element == element 

998 else: 

999 basis_element = element 

1000 

1001 if basis_element and ncore: 

1002 self.core_array.append((basis_element, ncore)) 

1003 basis_element = '' 

1004 ncore = 0 

1005 

1006 line = next(inputfile)