Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2# 

3# Copyright (c) 2020, the cclib development team 

4# 

5# This file is part of cclib (http://cclib.github.io) and is distributed under 

6# the terms of the BSD 3-Clause License. 

7 

8"""Parser for ORCA output files""" 

9 

10 

11import re 

12from itertools import zip_longest 

13 

14import numpy 

15from packaging.version import parse as parse_version 

16 

17from cclib.parser import logfileparser 

18from cclib.parser import utils 

19 

20 

21class ORCA(logfileparser.Logfile): 

22 """An ORCA log file.""" 

23 

24 def __init__(self, *args, **kwargs): 

25 

26 # Call the __init__ method of the superclass 

27 super(ORCA, self).__init__(logname="ORCA", *args, **kwargs) 

28 

29 def __str__(self): 

30 """Return a string representation of the object.""" 

31 return "ORCA log file %s" % (self.filename) 

32 

33 def __repr__(self): 

34 """Return a representation of the object.""" 

35 return 'ORCA("%s")' % (self.filename) 

36 

37 def normalisesym(self, label): 

38 """ORCA does not require normalizing symmetry labels.""" 

39 return label 

40 

41 def before_parsing(self): 

42 

43 # A geometry optimization is started only when 

44 # we parse a cycle (so it will be larger than zero(). 

45 self.gopt_cycle = 0 

46 

47 # Keep track of whether this is a relaxed scan calculation 

48 self.is_relaxed_scan = False 

49 

50 def after_parsing(self): 

51 # ORCA doesn't add the dispersion energy to the "Total energy" (which 

52 # we parse), only to the "FINAL SINGLE POINT ENERGY" (which we don't 

53 # parse). 

54 if hasattr(self, "scfenergies") and hasattr(self, "dispersionenergies"): 

55 for i, (scfenergy, dispersionenergy) in enumerate( 

56 zip_longest(self.scfenergies, self.dispersionenergies) 

57 ): 

58 # It isn't as problematic if there are more dispersion than 

59 # SCF energies, since all dispersion energies can still be 

60 # added to the SCF energies, hence the difference in log level. 

61 if dispersionenergy is None: 

62 self.logger.error( 

63 "The number of SCF and dispersion energies are not equal: %d vs. %d, " 

64 "can't add dispersion energy to all SCF energies", 

65 len(self.scfenergies), 

66 len(self.dispersionenergies) 

67 ) 

68 break 

69 if scfenergy is None: 

70 self.logger.warning( 

71 "The number of SCF and dispersion energies are not equal: %d vs. %d, " 

72 "can't add dispersion energy to all SCF energies", 

73 len(self.scfenergies), 

74 len(self.dispersionenergies) 

75 ) 

76 break 

77 self.scfenergies[i] += dispersionenergy 

78 

79 def extract(self, inputfile, line): 

80 """Extract information from the file object inputfile.""" 

81 

82 # Extract the version number. 

83 if "Program Version" == line.strip()[:15]: 

84 # Handle development versions. 

85 self.metadata["legacy_package_version"] = line.split()[2] 

86 self.metadata["package_version"] = self.metadata["legacy_package_version"].replace(".x", "dev") 

87 possible_revision_line = next(inputfile) 

88 if "SVN: $Rev" in possible_revision_line: 

89 self.metadata["package_version"] += "+{}".format( 

90 re.search(r"\d+", possible_revision_line).group() 

91 ) 

92 

93 

94 # ================================================================================ 

95 # WARNINGS 

96 # Please study these warnings very carefully! 

97 # ================================================================================ 

98 # 

99 # Warning: TCutStore was < 0. Adjusted to Thresh (uncritical) 

100 # 

101 # WARNING: your system is open-shell and RHF/RKS was chosen 

102 # ===> : WILL SWITCH to UHF/UKS 

103 # 

104 # 

105 # INFO : the flag for use of LIBINT has been found! 

106 # 

107 # ================================================================================ 

108 if "WARNINGS" == line.strip(): 

109 self.skip_lines(inputfile, ['text', '=', 'blank']) 

110 if 'warnings' not in self.metadata: 

111 self.metadata['warnings'] = [] 

112 if 'info' not in self.metadata: 

113 self.metadata['info'] = [] 

114 

115 line = next(inputfile) 

116 while line[0] != '=': 

117 if line.lower()[:7] == 'warning': 

118 self.metadata['warnings'].append('') 

119 while len(line) > 1: 

120 self.metadata['warnings'][-1] += line[9:].strip() 

121 line = next(inputfile) 

122 elif line.lower()[:4] == 'info': 

123 self.metadata['info'].append('') 

124 while len(line) > 1: 

125 self.metadata['info'][-1] += line[9:].strip() 

126 line = next(inputfile) 

127 line = next(inputfile) 

128 

129 # ================================================================================ 

130 # INPUT FILE 

131 # ================================================================================ 

132 # NAME = input.dat 

133 # | 1> %pal nprocs 4 end 

134 # | 2> ! B3LYP def2-svp 

135 # | 3> ! Grid4 

136 # | 4> 

137 # | 5> *xyz 0 3 

138 # | 6> O 0 0 0 

139 # | 7> O 0 0 1.5 

140 # | 8> * 

141 # | 9> 

142 # | 10> ****END OF INPUT**** 

143 # ================================================================================ 

144 if "INPUT FILE" == line.strip(): 

145 self.skip_line(inputfile, '=') 

146 self.metadata['input_file_name'] = next(inputfile).split()[-1] 

147 

148 # First, collect all the lines... 

149 lines = [] 

150 for line in inputfile: 

151 if line[0] != '|': 

152 break 

153 lines.append(line[6:]) 

154 

155 self.metadata['input_file_contents'] = ''.join(lines[:-1]) 

156 lines_iter = iter(lines[:-1]) 

157 

158 keywords = [] 

159 coords = [] 

160 # ...then parse them separately. 

161 for line in lines_iter: 

162 line = line.strip() 

163 if not line: 

164 continue 

165 

166 # Keywords block 

167 if line[0] == '!': 

168 keywords += line[1:].split() 

169 

170 # Impossible to parse without knowing whether a keyword opens a new block 

171 elif line[0] == '%': 

172 pass 

173 

174 # Geometry block 

175 elif line[0] == '*': 

176 coord_type, charge, multiplicity = line[1:].split()[:3] 

177 self.set_attribute('charge', int(charge)) 

178 self.set_attribute('multiplicity', int(multiplicity)) 

179 coord_type = coord_type.lower() 

180 self.metadata['coord_type'] = coord_type 

181 if coord_type == 'xyz': 

182 def splitter(line): 

183 atom, x, y, z = line.split()[:4] 

184 return [atom, float(x), float(y), float(z)] 

185 elif coord_type in ['int', 'internal']: 

186 def splitter(line): 

187 atom, a1, a2, a3, bond, angle, dihedral = line.split()[:7] 

188 return [atom, int(a1), int(a2), int(a3), float(bond), float(angle), float(dihedral)] 

189 elif coord_type == 'gzmt': 

190 def splitter(line): 

191 vals = line.split()[:7] 

192 if len(vals) == 7: 

193 atom, a1, bond, a2, angle, a3, dihedral = vals 

194 return [atom, int(a1), float(bond), int(a2), float(angle), int(a3), float(dihedral)] 

195 elif len(vals) == 5: 

196 return [vals[0], int(vals[1]), float(vals[2]), int(vals[3]), float(vals[4])] 

197 elif len(vals) == 3: 

198 return [vals[0], int(vals[1]), float(vals[2])] 

199 elif len(vals) == 1: 

200 return [vals[0]] 

201 self.logger.warning('Incorrect number of atoms in input geometry.') 

202 elif 'file' in coord_type: 

203 pass 

204 else: 

205 self.logger.warning('Invalid coordinate type.') 

206 

207 if 'file' not in coord_type: 

208 for line in lines_iter: 

209 if not line: 

210 continue 

211 if line[0] == '#' or line.strip(' ') == '\n': 

212 continue 

213 if line.strip()[0] == '*' or line.strip() == "end": 

214 break 

215 # Strip basis specification that can appear after coordinates 

216 line = line.split('newGTO')[0].strip() 

217 coords.append(splitter(line)) 

218 

219 self.metadata['keywords'] = keywords 

220 self.metadata['coords'] = coords 

221 

222 if line[0:15] == "Number of atoms": 

223 

224 natom = int(line.split()[-1]) 

225 self.set_attribute('natom', natom) 

226 

227 if line[1:13] == "Total Charge": 

228 

229 charge = int(line.split()[-1]) 

230 self.set_attribute('charge', charge) 

231 

232 line = next(inputfile) 

233 

234 mult = int(line.split()[-1]) 

235 self.set_attribute('mult', mult) 

236 

237 # SCF convergence output begins with: 

238 # 

239 # -------------- 

240 # SCF ITERATIONS 

241 # -------------- 

242 # 

243 # However, there are two common formats which need to be handled, implemented as separate functions. 

244 if line.strip() == "SCF ITERATIONS": 

245 

246 self.skip_line(inputfile, 'dashes') 

247 

248 line = next(inputfile) 

249 columns = line.split() 

250 # "Starting incremental Fock matrix formation" doesn't 

251 # necessarily appear before the extended format. 

252 if not columns: 

253 self.parse_scf_expanded_format(inputfile, columns) 

254 # A header with distinct columns indicates the condensed 

255 # format. 

256 elif columns[1] == "Energy": 

257 self.parse_scf_condensed_format(inputfile, columns) 

258 # Assume the extended format. 

259 else: 

260 self.parse_scf_expanded_format(inputfile, columns) 

261 

262 # Information about the final iteration, which also includes the convergence 

263 # targets and the convergence values, is printed separately, in a section like this: 

264 # 

265 # ***************************************************** 

266 # * SUCCESS * 

267 # * SCF CONVERGED AFTER 9 CYCLES * 

268 # ***************************************************** 

269 # 

270 # ... 

271 # 

272 # Total Energy : -382.04963064 Eh -10396.09898 eV 

273 # 

274 # ... 

275 # 

276 # ------------------------- ---------------- 

277 # FINAL SINGLE POINT ENERGY -382.049630637 

278 # ------------------------- ---------------- 

279 # 

280 # We cannot use this last message as a stop condition in general, because 

281 # often there is vibrational output before it. So we use the 'Total Energy' 

282 # line. However, what comes after that is different for single point calculations 

283 # and in the inner steps of geometry optimizations. 

284 if "SCF CONVERGED AFTER" in line: 

285 

286 if not hasattr(self, "scfenergies"): 

287 self.scfenergies = [] 

288 if not hasattr(self, "scfvalues"): 

289 self.scfvalues = [] 

290 if not hasattr(self, "scftargets"): 

291 self.scftargets = [] 

292 

293 while not "Total Energy :" in line: 

294 line = next(inputfile) 

295 energy = utils.convertor(float(line.split()[3]), "hartree", "eV") 

296 self.scfenergies.append(energy) 

297 

298 self._append_scfvalues_scftargets(inputfile, line) 

299 

300 # Sometimes the SCF does not converge, but does not halt the 

301 # the run (like in bug 3184890). In this this case, we should 

302 # remain consistent and use the energy from the last reported 

303 # SCF cycle. In this case, ORCA print a banner like this: 

304 # 

305 # ***************************************************** 

306 # * ERROR * 

307 # * SCF NOT CONVERGED AFTER 8 CYCLES * 

308 # ***************************************************** 

309 if "SCF NOT CONVERGED AFTER" in line: 

310 

311 if not hasattr(self, "scfenergies"): 

312 self.scfenergies = [] 

313 if not hasattr(self, "scfvalues"): 

314 self.scfvalues = [] 

315 if not hasattr(self, "scftargets"): 

316 self.scftargets = [] 

317 

318 energy = utils.convertor(self.scfvalues[-1][-1][0], "hartree", "eV") 

319 self.scfenergies.append(energy) 

320 

321 self._append_scfvalues_scftargets(inputfile, line) 

322 

323 """ 

324------------------------------------------------------------------------------- 

325 DFT DISPERSION CORRECTION 

326 

327 DFTD3 V3.1 Rev 1 

328 USING zero damping 

329------------------------------------------------------------------------------- 

330The omegaB97X-D3 functional is recognized. Fit by Chai et al. 

331Active option DFTDOPT ... 3 

332 

333molecular C6(AA) [au] = 9563.878941 

334 

335 

336 DFT-D V3 

337 parameters 

338 s6 scaling factor : 1.0000 

339 rs6 scaling factor : 1.2810 

340 s8 scaling factor : 1.0000 

341 rs8 scaling factor : 1.0940 

342 Damping factor alpha6 : 14.0000 

343 Damping factor alpha8 : 16.0000 

344 ad hoc parameters k1-k3 : 16.0000 1.3333 -4.0000 

345 

346 Edisp/kcal,au: -10.165629059768 -0.016199959356 

347 E6 /kcal : -4.994512983 

348 E8 /kcal : -5.171116077 

349 % E8 : 50.868628459 

350 

351------------------------- ---------------- 

352Dispersion correction -0.016199959 

353------------------------- ---------------- 

354""" 

355 if "DFT DISPERSION CORRECTION" in line: 

356 # A bunch of parameters are printed the first time dispersion is called 

357 # However, they vary wildly in form and number, making parsing problematic 

358 line = next(inputfile) 

359 while 'Dispersion correction' not in line: 

360 line = next(inputfile) 

361 dispersion = utils.convertor(float(line.split()[-1]), "hartree", "eV") 

362 self.append_attribute("dispersionenergies", dispersion) 

363 

364 # The convergence targets for geometry optimizations are printed at the 

365 # beginning of the output, although the order and their description is 

366 # different than later on. So, try to standardize the names of the criteria 

367 # and save them for later so that we can get the order right. 

368 # 

369 # ***************************** 

370 # * Geometry Optimization Run * 

371 # ***************************** 

372 # 

373 # Geometry optimization settings: 

374 # Update method Update .... BFGS 

375 # Choice of coordinates CoordSys .... Redundant Internals 

376 # Initial Hessian InHess .... Almoef's Model 

377 # 

378 # Convergence Tolerances: 

379 # Energy Change TolE .... 5.0000e-06 Eh 

380 # Max. Gradient TolMAXG .... 3.0000e-04 Eh/bohr 

381 # RMS Gradient TolRMSG .... 1.0000e-04 Eh/bohr 

382 # Max. Displacement TolMAXD .... 4.0000e-03 bohr 

383 # RMS Displacement TolRMSD .... 2.0000e-03 bohr 

384 # 

385 if line[25:50] == "Geometry Optimization Run": 

386 

387 stars = next(inputfile) 

388 blank = next(inputfile) 

389 

390 line = next(inputfile) 

391 while line[0:23] != "Convergence Tolerances:": 

392 line = next(inputfile) 

393 

394 if hasattr(self, 'geotargets'): 

395 self.logger.warning('The geotargets attribute should not exist yet. There is a problem in the parser.') 

396 self.geotargets = [] 

397 self.geotargets_names = [] 

398 

399 # There should always be five tolerance values printed here. 

400 for i in range(5): 

401 line = next(inputfile) 

402 name = line[:25].strip().lower().replace('.', '').replace('displacement', 'step') 

403 target = float(line.split()[-2]) 

404 self.geotargets_names.append(name) 

405 self.geotargets.append(target) 

406 

407 # The convergence targets for relaxed surface scan steps are printed at the 

408 # beginning of the output, although the order and their description is 

409 # different than later on. So, try to standardize the names of the criteria 

410 # and save them for later so that we can get the order right. 

411 # 

412 # ************************************************************* 

413 # * RELAXED SURFACE SCAN STEP 12 * 

414 # * * 

415 # * Dihedral ( 11, 10, 3, 4) : 180.00000000 * 

416 # ************************************************************* 

417 # 

418 # Geometry optimization settings: 

419 # Update method Update .... BFGS 

420 # Choice of coordinates CoordSys .... Redundant Internals 

421 # Initial Hessian InHess .... Almoef's Model 

422 # 

423 # Convergence Tolerances: 

424 # Energy Change TolE .... 5.0000e-06 Eh 

425 # Max. Gradient TolMAXG .... 3.0000e-04 Eh/bohr 

426 # RMS Gradient TolRMSG .... 1.0000e-04 Eh/bohr 

427 # Max. Displacement TolMAXD .... 4.0000e-03 bohr 

428 # RMS Displacement TolRMSD .... 2.0000e-03 bohr 

429 if line[25:50] == "RELAXED SURFACE SCAN STEP": 

430 

431 self.is_relaxed_scan = True 

432 blank = next(inputfile) 

433 info = next(inputfile) 

434 stars = next(inputfile) 

435 blank = next(inputfile) 

436 

437 line = next(inputfile) 

438 while line[0:23] != "Convergence Tolerances:": 

439 line = next(inputfile) 

440 

441 self.geotargets = [] 

442 self.geotargets_names = [] 

443 

444 # There should always be five tolerance values printed here. 

445 for i in range(5): 

446 line = next(inputfile) 

447 name = line[:25].strip().lower().replace('.', '').replace('displacement', 'step') 

448 target = float(line.split()[-2]) 

449 self.geotargets_names.append(name) 

450 self.geotargets.append(target) 

451 

452 

453 # Moller-Plesset energies. 

454 # 

455 # --------------------------------------- 

456 # MP2 TOTAL ENERGY: -76.112119693 Eh 

457 # --------------------------------------- 

458 if 'MP2 TOTAL ENERGY' in line[:16]: 

459 

460 if not hasattr(self, 'mpenergies'): 

461 self.metadata['methods'].append('MP2') 

462 self.mpenergies = [] 

463 

464 self.mpenergies.append([]) 

465 mp2energy = utils.float(line.split()[-2]) 

466 self.mpenergies[-1].append(utils.convertor(mp2energy, 'hartree', 'eV')) 

467 

468 # MP2 energy output line is different for MP3, since it uses the MDCI 

469 # code, which is also in charge of coupled cluster. 

470 # 

471 # MP3 calculation: 

472 # E(MP2) = -76.112119775 EC(MP2)= -0.128216417 

473 # E(MP3) = -76.113783480 EC(MP3)= -0.129880122 E3= -0.001663705 

474 # 

475 # CCSD calculation: 

476 # E(MP2) ... -0.393722942 

477 # Initial E(tot) ... -1639.631576169 

478 # <T|T> ... 0.087231847 

479 # Number of pairs included ... 55 

480 # Total number of pairs ... 55 

481 if 'E(MP2)' in line: 

482 

483 if not hasattr(self, 'mpenergies'): 

484 self.mpenergies = [] 

485 

486 self.mpenergies.append([]) 

487 mp2energy = utils.float(line.split()[-1]) 

488 self.mpenergies[-1].append(utils.convertor(mp2energy, 'hartree', 'eV')) 

489 

490 line = next(inputfile) 

491 if line[:6] == 'E(MP3)': 

492 self.metadata['methods'].append('MP3') 

493 mp3energy = utils.float(line.split()[2]) 

494 self.mpenergies[-1].append(utils.convertor(mp3energy, 'hartree', 'eV')) 

495 else: 

496 assert line[:14] == 'Initial E(tot)' 

497 

498 # ---------------------- 

499 # COUPLED CLUSTER ENERGY 

500 # ---------------------- 

501 # 

502 # E(0) ... -1639.237853227 

503 # E(CORR) ... -0.360153516 

504 # E(TOT) ... -1639.598006742 

505 # Singles Norm <S|S>**1/2 ... 0.176406354  

506 # T1 diagnostic ... 0.039445660  

507 if line[:22] == 'COUPLED CLUSTER ENERGY': 

508 self.skip_lines(inputfile, ['d', 'b']) 

509 line = next(inputfile) 

510 assert line[:4] == 'E(0)' 

511 scfenergy = utils.convertor(utils.float(line.split()[-1]), 'hartree', 'eV') 

512 line = next(inputfile) 

513 assert line[:7] == 'E(CORR)' 

514 while 'E(TOT)' not in line: 

515 line = next(inputfile) 

516 self.append_attribute( 

517 'ccenergies', 

518 utils.convertor(utils.float(line.split()[-1]), 'hartree', 'eV') 

519 ) 

520 line = next(inputfile) 

521 assert line[:23] == 'Singles Norm <S|S>**1/2' 

522 line = next(inputfile) 

523 self.metadata["t1_diagnostic"] = utils.float(line.split()[-1]) 

524 

525 # ------------------ 

526 # CARTESIAN GRADIENT 

527 # ------------------ 

528 # 

529 # 1 H : 0.000000004 0.019501450 -0.021537091 

530 # 2 O : 0.000000054 -0.042431648 0.042431420 

531 # 3 H : 0.000000004 0.021537179 -0.019501388 

532 # 

533 # ORCA MP2 module has different signal than 'CARTESIAN GRADIENT'. 

534 # 

535 # The final MP2 gradient 

536 # 0: 0.01527469 -0.00292883 0.01125000 

537 # 1: 0.00098782 -0.00040549 0.00196825 

538 # 2: -0.01626251 0.00333431 -0.01321825 

539 if line[:18] == 'CARTESIAN GRADIENT' or line[:22] == 'The final MP2 gradient': 

540 

541 grads = [] 

542 if line[:18] == 'CARTESIAN GRADIENT': 

543 self.skip_lines(inputfile, ['dashes', 'blank']) 

544 

545 line = next(inputfile).strip() 

546 if 'CONSTRAINED CARTESIAN COORDINATES' in line: 

547 self.skip_line( 

548 inputfile, 'constrained Cartesian coordinate warning' 

549 ) 

550 line = next(inputfile).strip() 

551 

552 while line: 

553 tokens = line.split() 

554 x, y, z = float(tokens[-3]), float(tokens[-2]), float(tokens[-1]) 

555 grads.append((x, y, z)) 

556 line = next(inputfile).strip() 

557 

558 if not hasattr(self, 'grads'): 

559 self.grads = [] 

560 self.grads.append(grads) 

561 

562 

563 # After each geometry optimization step, ORCA prints the current convergence 

564 # parameters and the targets (again), so it is a good idea to check that they 

565 # have not changed. Note that the order of these criteria here are different 

566 # than at the beginning of the output, so make use of the geotargets_names created 

567 # before and save the new geovalues in correct order. 

568 # 

569 # ----------------------|Geometry convergence|--------------------- 

570 # Item value Tolerance Converged 

571 # ----------------------------------------------------------------- 

572 # Energy change 0.00006021 0.00000500 NO 

573 # RMS gradient 0.00031313 0.00010000 NO 

574 # RMS step 0.01596159 0.00200000 NO 

575 # MAX step 0.04324586 0.00400000 NO 

576 # .................................................... 

577 # Max(Bonds) 0.0218 Max(Angles) 2.48 

578 # Max(Dihed) 0.00 Max(Improp) 0.00 

579 # ----------------------------------------------------------------- 

580 # 

581 if line[33:53] == "Geometry convergence": 

582 

583 headers = next(inputfile) 

584 dashes = next(inputfile) 

585 

586 names = [] 

587 values = [] 

588 targets = [] 

589 line = next(inputfile) 

590 # Handle both the dots only and dashes only cases 

591 while len(list(set(line.strip()))) != 1: 

592 name = line[10:28].strip().lower() 

593 tokens = line.split() 

594 value = float(tokens[2]) 

595 target = float(tokens[3]) 

596 names.append(name) 

597 values.append(value) 

598 targets.append(target) 

599 line = next(inputfile) 

600 

601 # The energy change is normally not printed in the first iteration, because 

602 # there was no previous energy -- in that case assume zero. There are also some 

603 # edge cases where the energy change is not printed, for example when internal 

604 # angles become improper and internal coordinates are rebuilt as in regression 

605 # CuI-MePY2-CH3CN_optxes, and in such cases use NaN. 

606 newvalues = [] 

607 for i, n in enumerate(self.geotargets_names): 

608 if (n == "energy change") and (n not in names): 

609 if self.is_relaxed_scan: 

610 newvalues.append(0.0) 

611 else: 

612 newvalues.append(numpy.nan) 

613 else: 

614 newvalues.append(values[names.index(n)]) 

615 assert targets[names.index(n)] == self.geotargets[i] 

616 

617 self.append_attribute("geovalues", newvalues) 

618 

619 """ Grab cartesian coordinates 

620 --------------------------------- 

621 CARTESIAN COORDINATES (ANGSTROEM) 

622 --------------------------------- 

623 H 0.000000 0.000000 0.000000 

624 O 0.000000 0.000000 1.000000 

625 H 0.000000 1.000000 1.000000 

626 """ 

627 if line[0:33] == "CARTESIAN COORDINATES (ANGSTROEM)": 

628 next(inputfile) 

629 

630 atomnos = [] 

631 atomcoords = [] 

632 line = next(inputfile) 

633 while len(line) > 1: 

634 atom, x, y, z = line.split() 

635 if atom[-1] != ">": 

636 atomnos.append(self.table.number[atom]) 

637 atomcoords.append([float(x), float(y), float(z)]) 

638 line = next(inputfile) 

639 

640 self.set_attribute('natom', len(atomnos)) 

641 self.set_attribute('atomnos', atomnos) 

642 self.append_attribute("atomcoords", atomcoords) 

643 

644 """ Grab atom masses 

645 ---------------------------- 

646 CARTESIAN COORDINATES (A.U.) 

647 ---------------------------- 

648 NO LB ZA FRAG MASS X Y Z 

649 0 H 1.0000 0 1.008 0.000000 0.000000 0.000000 

650 1 O 8.0000 0 15.999 0.000000 0.000000 1.889726 

651 2 H 1.0000 0 1.008 0.000000 1.889726 1.889726 

652 """ 

653 if line[0:28] == "CARTESIAN COORDINATES (A.U.)" and not hasattr(self, 'atommasses'): 

654 next(inputfile) 

655 next(inputfile) 

656 

657 line = next(inputfile) 

658 self.atommasses = [] 

659 while len(line) > 1: 

660 if line[:32] == '* core charge reduced due to ECP': 

661 break 

662 if line.strip() == "> coreless ECP center with (optional) point charge": 

663 break 

664 no, lb, za, frag, mass, x, y, z = line.split() 

665 if lb[-1] != ">": 

666 self.atommasses.append(float(mass)) 

667 line = next(inputfile) 

668 

669 if line[21:68] == "FINAL ENERGY EVALUATION AT THE STATIONARY POINT": 

670 if not hasattr(self, 'optdone'): 

671 self.optdone = [] 

672 self.optdone.append(len(self.atomcoords)) 

673 

674 if "The optimization did not converge" in line: 

675 if not hasattr(self, 'optdone'): 

676 self.optdone = [] 

677 

678 if line[0:16] == "ORBITAL ENERGIES": 

679 

680 self.skip_lines(inputfile, ['d', 'text', 'text']) 

681 

682 self.mooccnos = [[]] 

683 self.moenergies = [[]] 

684 

685 line = next(inputfile) 

686 while len(line) > 20: # restricted calcs are terminated by ------ 

687 info = line.split() 

688 mooccno = int(float(info[1])) 

689 moenergy = float(info[2]) 

690 self.mooccnos[0].append(mooccno) 

691 self.moenergies[0].append(utils.convertor(moenergy, "hartree", "eV")) 

692 line = next(inputfile) 

693 

694 line = next(inputfile) 

695 

696 # handle beta orbitals for UHF 

697 if line[17:35] == "SPIN DOWN ORBITALS": 

698 text = next(inputfile) 

699 

700 self.mooccnos.append([]) 

701 self.moenergies.append([]) 

702 

703 line = next(inputfile) 

704 while len(line) > 20: # actually terminated by ------ 

705 info = line.split() 

706 mooccno = int(float(info[1])) 

707 moenergy = float(info[2]) 

708 self.mooccnos[1].append(mooccno) 

709 self.moenergies[1].append(utils.convertor(moenergy, "hartree", "eV")) 

710 line = next(inputfile) 

711 

712 if not hasattr(self, 'homos'): 

713 doubly_occupied = self.mooccnos[0].count(2) 

714 singly_occupied = self.mooccnos[0].count(1) 

715 # Restricted closed-shell. 

716 if doubly_occupied > 0 and singly_occupied == 0: 

717 self.set_attribute('homos', [doubly_occupied - 1]) 

718 # Restricted open-shell. 

719 elif doubly_occupied > 0 and singly_occupied > 0: 

720 self.set_attribute('homos', [doubly_occupied + singly_occupied - 1, 

721 doubly_occupied - 1]) 

722 # Unrestricted. 

723 else: 

724 assert len(self.moenergies) == 2 

725 assert doubly_occupied == 0 

726 assert self.mooccnos[1].count(2) == 0 

727 nbeta = self.mooccnos[1].count(1) 

728 self.set_attribute('homos', [singly_occupied - 1, nbeta - 1]) 

729 

730 # So nbasis was parsed at first with the first pattern, but it turns out that 

731 # semiempirical methods (at least AM1 as reported by Julien Idé) do not use this. 

732 # For this reason, also check for the second patterns, and use it as an assert 

733 # if nbasis was already parsed. Regression PCB_1_122.out covers this test case. 

734 if line[1:32] == "# of contracted basis functions": 

735 self.set_attribute('nbasis', int(line.split()[-1])) 

736 if line[1:27] == "Basis Dimension Dim": 

737 self.set_attribute('nbasis', int(line.split()[-1])) 

738 

739 if line[0:14] == "OVERLAP MATRIX": 

740 

741 self.skip_line(inputfile, 'dashes') 

742 

743 self.aooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d") 

744 for i in range(0, self.nbasis, 6): 

745 self.updateprogress(inputfile, "Overlap") 

746 

747 header = next(inputfile) 

748 size = len(header.split()) 

749 

750 for j in range(self.nbasis): 

751 line = next(inputfile) 

752 broken = line.split() 

753 self.aooverlaps[j, i:i+size] = list(map(float, broken[1:size+1])) 

754 

755 # Molecular orbital coefficients are parsed here, but also related things 

756 #like atombasis and aonames if possible. 

757 # 

758 # Normally the output is easy to parse like this: 

759 # ------------------ 

760 # MOLECULAR ORBITALS 

761 # ------------------ 

762 # 0 1 2 3 4 5  

763 # -19.28527 -19.26828 -19.26356 -19.25801 -19.25765 -19.21471 

764 # 2.00000 2.00000 2.00000 2.00000 2.00000 2.00000 

765 # -------- -------- -------- -------- -------- -------- 

766 # 0C 1s 0.000002 -0.000001 0.000000 0.000000 -0.000000 0.000001 

767 # 0C 2s -0.000007 0.000006 -0.000002 -0.000000 0.000001 -0.000003 

768 # 0C 3s -0.000086 -0.000061 0.000058 -0.000033 -0.000027 -0.000058 

769 # ... 

770 # 

771 # But when the numbers get big, things get yucky since ORCA does not use 

772 # fixed width formatting for the floats, and does not insert extra spaces 

773 # when the numbers get wider. So things get stuck together overflowing columns, 

774 # like this: 

775 # 12C 6s -11.608845-53.775398161.302640-76.633779 29.914985 22.083999 

776 # 

777 # One assumption that seems to hold is that there are always six significant 

778 # digits in the coefficients, so we can try to use that to delineate numbers 

779 # when the parsing gets rough. This is what we do below with a regex, and a case 

780 # like this is tested in regression ORCA/ORCA4.0/invalid-literal-for-float.out 

781 # which was reported in https://github.com/cclib/cclib/issues/629 

782 if line[0:18] == "MOLECULAR ORBITALS": 

783 

784 self.skip_line(inputfile, 'dashes') 

785 

786 aonames = [] 

787 atombasis = [[] for i in range(self.natom)] 

788 mocoeffs = [numpy.zeros((self.nbasis, self.nbasis), "d")] 

789 

790 for spin in range(len(self.moenergies)): 

791 

792 if spin == 1: 

793 self.skip_line(inputfile, 'blank') 

794 mocoeffs.append(numpy.zeros((self.nbasis, self.nbasis), "d")) 

795 

796 for i in range(0, self.nbasis, 6): 

797 

798 self.updateprogress(inputfile, "Coefficients") 

799 

800 self.skip_lines(inputfile, ['numbers', 'energies', 'occs']) 

801 dashes = next(inputfile) 

802 

803 for j in range(self.nbasis): 

804 line = next(inputfile) 

805 

806 # Only need this in the first iteration. 

807 if spin == 0 and i == 0: 

808 atomname = line[3:5].split()[0] 

809 num = int(line[0:3]) 

810 orbital = line.split()[1].upper() 

811 

812 aonames.append("%s%i_%s" % (atomname, num+1, orbital)) 

813 atombasis[num].append(j) 

814 

815 # This regex will tease out all number with exactly 

816 # six digits after the decimal point. 

817 coeffs = re.findall(r'-?\d+\.\d{6}', line) 

818 

819 # Something is very wrong if this does not hold. 

820 assert len(coeffs) <= 6 

821 

822 mocoeffs[spin][i:i+len(coeffs), j] = [float(c) for c in coeffs] 

823 

824 self.set_attribute('aonames', aonames) 

825 self.set_attribute('atombasis', atombasis) 

826 self.set_attribute("mocoeffs", mocoeffs) 

827 

828 # Basis set information 

829 # ORCA prints this out in a somewhat indirect fashion. 

830 # Therefore, parsing occurs in several steps: 

831 # 1. read which atom belongs to which basis set group 

832 if line[0:21] == "BASIS SET INFORMATION": 

833 line = next(inputfile) 

834 line = next(inputfile) 

835 

836 self.tmp_atnames = [] # temporary attribute, needed later 

837 while(not line[0:5] == '-----'): 

838 if line[0:4] == "Atom": 

839 self.tmp_atnames.append(line[8:12].strip()) 

840 line = next(inputfile) 

841 

842 # 2. Read information for the basis set groups 

843 if line[0:25] == "BASIS SET IN INPUT FORMAT": 

844 line = next(inputfile) 

845 line = next(inputfile) 

846 

847 # loop over basis set groups 

848 gbasis_tmp = {} 

849 while(not line[0:5] == '-----'): 

850 if line[1:7] == 'NewGTO': 

851 bas_atname = line.split()[1] 

852 gbasis_tmp[bas_atname] = [] 

853 

854 line = next(inputfile) 

855 # loop over contracted GTOs 

856 while(not line[0:6] == ' end;'): 

857 words = line.split() 

858 ang = words[0] 

859 nprim = int(words[1]) 

860 

861 # loop over primitives 

862 coeff = [] 

863 for iprim in range(nprim): 

864 words = next(inputfile).split() 

865 coeff.append( (float(words[1]), float(words[2])) ) 

866 gbasis_tmp[bas_atname].append((ang, coeff)) 

867 

868 line = next(inputfile) 

869 line = next(inputfile) 

870 

871 # 3. Assign the basis sets to gbasis 

872 self.gbasis = [] 

873 for bas_atname in self.tmp_atnames: 

874 self.gbasis.append(gbasis_tmp[bas_atname]) 

875 del self.tmp_atnames 

876 

877 """ 

878 -------------------------- 

879 THERMOCHEMISTRY AT 298.15K 

880 -------------------------- 

881 

882 Temperature ... 298.15 K 

883 Pressure ... 1.00 atm 

884 Total Mass ... 130.19 AMU 

885 

886 Throughout the following assumptions are being made: 

887 (1) The electronic state is orbitally nondegenerate 

888 ... 

889 

890 freq. 45.75 E(vib) ... 0.53  

891 freq. 78.40 E(vib) ... 0.49 

892 ... 

893 

894 

895 ------------ 

896 INNER ENERGY 

897 ------------ 

898 

899 The inner energy is: U= E(el) + E(ZPE) + E(vib) + E(rot) + E(trans) 

900 E(el) - is the total energy from the electronic structure calc 

901 ... 

902 

903 Summary of contributions to the inner energy U: 

904 Electronic energy ... -382.05075804 Eh 

905 ... 

906 """ 

907 if line.strip().startswith('THERMOCHEMISTRY AT'): 

908 

909 self.skip_lines(inputfile, ['dashes', 'blank']) 

910 self.temperature = float(next(inputfile).split()[2]) 

911 self.pressure = float(next(inputfile).split()[2]) 

912 total_mass = float(next(inputfile).split()[3]) 

913 

914 # Vibrations, rotations, and translations 

915 line = next(inputfile) 

916 while line[:17] != 'Electronic energy': 

917 line = next(inputfile) 

918 self.electronic_energy = float(line.split()[3]) 

919 self.zpe = float(next(inputfile).split()[4]) 

920 thermal_vibrational_correction = float(next(inputfile).split()[4]) 

921 thermal_rotional_correction = float(next(inputfile).split()[4]) 

922 thermal_translational_correction = float(next(inputfile).split()[4]) 

923 self.skip_lines(inputfile, ['dashes']) 

924 total_thermal_energy = float(next(inputfile).split()[3]) 

925 

926 # Enthalpy 

927 while line[:17] != 'Total free energy': 

928 line = next(inputfile) 

929 thermal_enthalpy_correction = float(next(inputfile).split()[4]) 

930 next(inputfile) 

931 

932 # For a single atom, ORCA provides the total free energy or inner energy 

933 # which includes a spurious vibrational correction (see #817 for details). 

934 if self.natom > 1: 

935 self.enthalpy = float(next(inputfile).split()[3]) 

936 else: 

937 self.enthalpy = self.electronic_energy + thermal_translational_correction 

938 

939 # Entropy 

940 while line[:18] != 'Electronic entropy': 

941 line = next(inputfile) 

942 electronic_entropy = float(line.split()[3]) 

943 vibrational_entropy = float(next(inputfile).split()[3]) 

944 rotational_entropy = float(next(inputfile).split()[3]) 

945 translational_entropy = float(next(inputfile).split()[3]) 

946 self.skip_lines(inputfile, ['dashes']) 

947 

948 # ORCA prints -inf for single atom entropy. 

949 if self.natom > 1: 

950 self.entropy = float(next(inputfile).split()[4]) 

951 else: 

952 self.entropy = (electronic_entropy + translational_entropy) / self.temperature 

953 

954 while (line[:25] != 'Final Gibbs free enthalpy') and (line[:23] != 'Final Gibbs free energy'): 

955 line = next(inputfile) 

956 self.skip_lines(inputfile, ['dashes']) 

957 

958 

959 # ORCA prints -inf for sinle atom free energy. 

960 if self.natom > 1: 

961 self.freeenergy = float(line.split()[5]) 

962 else: 

963 self.freeenergy = self.enthalpy - self.temperature * self.entropy 

964 

965 # Read TDDFT information 

966 if any(x in line for x in ("TD-DFT/TDA EXCITED", "TD-DFT EXCITED")): 

967 # Could be singlets or triplets 

968 if line.find("SINGLETS") >= 0: 

969 sym = "Singlet" 

970 elif line.find("TRIPLETS") >= 0: 

971 sym = "Triplet" 

972 else: 

973 sym = "Not specified" 

974 

975 etsecs = [] 

976 etenergies = [] 

977 etsyms = [] 

978 

979 lookup = {'a': 0, 'b': 1} 

980 line = next(inputfile) 

981 while line.find("STATE") < 0: 

982 line = next(inputfile) 

983 # Contains STATE or is blank 

984 while line.find("STATE") >= 0: 

985 broken = line.split() 

986 etenergies.append(float(broken[-2])) 

987 etsyms.append(sym) 

988 line = next(inputfile) 

989 sec = [] 

990 # Contains SEC or is blank 

991 while line.strip(): 

992 start = line[0:8].strip() 

993 start = (int(start[:-1]), lookup[start[-1]]) 

994 end = line[10:17].strip() 

995 end = (int(end[:-1]), lookup[end[-1]]) 

996 # Coeffients are not printed for RPA, only 

997 # TDA/CIS. 

998 contrib = line[35:47].strip() 

999 try: 

1000 contrib = float(contrib) 

1001 except ValueError: 

1002 contrib = numpy.nan 

1003 sec.append([start, end, contrib]) 

1004 line = next(inputfile) 

1005 etsecs.append(sec) 

1006 line = next(inputfile) 

1007 

1008 self.extend_attribute('etenergies', etenergies) 

1009 self.extend_attribute('etsecs', etsecs) 

1010 self.extend_attribute('etsyms', etsyms) 

1011 

1012 # Parse the various absorption spectra for TDDFT and ROCIS. 

1013 if 'ABSORPTION SPECTRUM' in line or 'ELECTRIC DIPOLE' in line: 

1014 # CASSCF has an anomalous printing of ABSORPTION SPECTRUM. 

1015 if line[:-1] == 'ABSORPTION SPECTRUM': 

1016 return 

1017 

1018 line = line.strip() 

1019 

1020 # Standard header, occasionally changes 

1021 header = ['d', 'header', 'header', 'd'] 

1022 

1023 def energy_intensity(line): 

1024 """ TDDFT and related methods standard method of output 

1025----------------------------------------------------------------------------- 

1026 ABSORPTION SPECTRUM VIA TRANSITION ELECTRIC DIPOLE MOMENTS 

1027----------------------------------------------------------------------------- 

1028State Energy Wavelength fosc T2 TX TY TZ 

1029 (cm-1) (nm) (au**2) (au) (au) (au) 

1030----------------------------------------------------------------------------- 

1031 1 5184116.7 1.9 0.040578220 0.00258 -0.05076 -0.00000 -0.00000 

1032""" 

1033 try: 

1034 state, energy, wavelength, intensity, t2, tx, ty, tz = line.split() 

1035 except ValueError as e: 

1036 # Must be spin forbidden and thus no intensity 

1037 energy = line.split()[1] 

1038 intensity = 0 

1039 return energy, intensity 

1040 

1041 # Check for variations 

1042 if line == 'COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM' or \ 

1043 line == 'COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM (origin adjusted)': 

1044 def energy_intensity(line): 

1045 """ TDDFT with DoQuad == True 

1046------------------------------------------------------------------------------------------------------ 

1047 COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM 

1048------------------------------------------------------------------------------------------------------ 

1049State Energy Wavelength D2 m2 Q2 D2+m2+Q2 D2/TOT m2/TOT Q2/TOT 

1050 (cm-1) (nm) (*1e6) (*1e6) 

1051------------------------------------------------------------------------------------------------------ 

1052 1 61784150.6 0.2 0.00000 0.00000 3.23572 0.00000323571519 0.00000 0.00000 1.00000 

1053""" 

1054 state, energy, wavelength, d2, m2, q2, intensity, d2_contrib, m2_contrib, q2_contrib = line.split() 

1055 return energy, intensity 

1056 

1057 elif line == 'COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM (Origin Independent, Length Representation)': 

1058 def energy_intensity(line): 

1059 """ TDDFT with doQuad == True (Origin Independent Length Representation) 

1060------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 

1061 COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM (Origin Independent, Length Representation) 

1062------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 

1063State Energy Wavelength D2 m2 Q2 DM DO D2+m2+Q2+DM+DO D2/TOT m2/TOT Q2/TOT DM/TOT DO/TOT 

1064 (cm-1) (nm) (*1e6) (*1e6) (*1e6) (*1e6) 

1065------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 

1066 1 61784150.6 0.2 0.00000 0.00000 3.23572 0.00000 0.00000 0.00000323571519 0.00000 0.00000 1.00000 0.00000 0.00000 

1067 2 61793079.3 0.2 0.00000 0.00000 2.85949 0.00000 -0.00000 0.00000285948800 0.00000 0.00000 1.00000 0.00000 -0.00000 

1068""" 

1069 vals = line.split() 

1070 if len(vals) < 14: 

1071 return vals[1], 0 

1072 return vals[1], vals[8] 

1073 

1074 elif line[:5] == 'X-RAY' and \ 

1075 (line[6:23] == 'EMISSION SPECTRUM' or line[6:25] == 'ABSORPTION SPECTRUM'): 

1076 def energy_intensity(line): 

1077 """ X-Ray from XES (emission or absorption, electric or velocity dipole moments) 

1078------------------------------------------------------------------------------------- 

1079 X-RAY ABSORPTION SPECTRUM VIA TRANSITION ELECTRIC DIPOLE MOMENTS 

1080------------------------------------------------------------------------------------- 

1081 Transition Energy INT TX TY TZ 

1082 (eV) (normalized) (au) (au) (au) 

1083------------------------------------------------------------------------------------- 

1084 1 90a -> 0a 8748.824 0.000002678629 0.00004 -0.00001 0.00003 

1085""" 

1086 state, start, arrow, end, energy, intensity, tx, ty, tz = line.split() 

1087 return energy, intensity 

1088 

1089 elif line[:70] == 'COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE X-RAY': 

1090 header = ['header', 'd', 'header', 'd', 'header', 'header', 'd'] 

1091 def energy_intensity(line): 

1092 """ XAS with quadrupole (origin adjusted) 

1093------------------------------------------------------------------------------------------------------------------------------- 

1094 COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE X-RAY ABSORPTION SPECTRUM 

1095 (origin adjusted) 

1096------------------------------------------------------------------------------------------------------------------------------- 

1097 INT (normalized) 

1098 --------------------------------------------------------- 

1099 Transition Energy D2 M2 Q2 D2+M2+Q2 D2/TOT M2/TOT Q2/TOT 

1100 (eV) (*1e6) (*1e6) 

1101------------------------------------------------------------------------------------------------------------------------------- 

1102 1 90a -> 0a 8748.824 0.000000 0.000292 0.003615 0.000000027512 0.858012 0.010602 0.131386 

1103""" 

1104 state, start, arrow, end, energy, d2, m2, q2, intensity, d2_contrib, m2_contrib, q2_contrib = line.split() 

1105 return energy, intensity 

1106 

1107 elif line[:55] == 'SPIN ORBIT CORRECTED ABSORPTION SPECTRUM VIA TRANSITION': 

1108 def energy_intensity(line): 

1109 """ ROCIS dipole approximation with SOC == True (electric or velocity dipole moments) 

1110------------------------------------------------------------------------------- 

1111SPIN ORBIT CORRECTED ABSORPTION SPECTRUM VIA TRANSITION ELECTRIC DIPOLE MOMENTS 

1112------------------------------------------------------------------------------- 

1113States Energy Wavelength fosc T2 TX TY TZ 

1114 (cm-1) (nm) (au**2) (au) (au) (au) 

1115------------------------------------------------------------------------------- 

1116 0 1 0.0 0.0 0.000000000 0.00000 0.00000 0.00000 0.00000 

1117 0 2 5184116.4 1.9 0.020288451 0.00258 0.05076 0.00003 0.00000 

1118""" 

1119 state, state2, energy, wavelength, intensity, t2, tx, ty, tz = line.split() 

1120 return energy, intensity 

1121 

1122 elif line[:79] == 'ROCIS COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM' \ 

1123 or line[:87] == 'SOC CORRECTED COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM': 

1124 def energy_intensity(line): 

1125 """ ROCIS with DoQuad = True and SOC = True (also does origin adjusted) 

1126------------------------------------------------------------------------------------------------------ 

1127 ROCIS COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM 

1128------------------------------------------------------------------------------------------------------ 

1129States Energy Wavelength D2 m2 Q2 D2+m2+Q2 D2/TOT m2/TOT Q2/TOT 

1130 (cm-1) (nm) (*1e6) (*1e6) (*population) 

1131------------------------------------------------------------------------------------------------------ 

1132 0 1 0.0 0.0 0.00000 0.00000 0.00000 0.00000000000000 0.00000 0.00000 0.00000 

1133 0 2 669388066.6 0.0 0.00000 0.00000 0.00876 0.00000000437784 0.00000 0.00000 1.00000 

1134""" 

1135 state, state2, energy, wavelength, d2, m2, q2, intensity, d2_contrib, m2_contrib, q2_contrib = line.split() 

1136 return energy, intensity 

1137 

1138 # Clashes with Orca 2.6 (and presumably before) TDDFT absorption spectrum printing 

1139 elif line == 'ABSORPTION SPECTRUM' and \ 

1140 parse_version(self.metadata['package_version']).release > (2, 6): 

1141 def energy_intensity(line): 

1142 """ CASSCF absorption spectrum 

1143------------------------------------------------------------------------------------------ 

1144 ABSORPTION SPECTRUM 

1145------------------------------------------------------------------------------------------ 

1146 States Energy Wavelength fosc T2 TX TY TZ 

1147 (cm-1) (nm) (D**2) (D) (D) (D) 

1148------------------------------------------------------------------------------------------ 

1149 0( 0)-> 1( 0) 1 83163.2 120.2 0.088250385 2.25340 0.00000 0.00000 1.50113 

1150""" 

1151 reg = r'(\d+)\( ?(\d+)\)-> ?(\d+)\( ?(\d+)\) (\d+)'+ r'\s+(\d+\.\d+)'*4 + r'\s+(-?\d+\.\d+)'*3 

1152 res = re.search(reg, line) 

1153 jstate, jblock, istate, iblock, mult, energy, wavelength, intensity, t2, tx, ty, tz = res.groups() 

1154 return energy, intensity 

1155 

1156 name = line 

1157 self.skip_lines(inputfile, header) 

1158 

1159 if not hasattr(self, 'transprop'): 

1160 self.transprop = {} 

1161 

1162 etenergies = [] 

1163 etoscs = [] 

1164 line = next(inputfile) 

1165 # The sections are occasionally ended with dashed lines 

1166 # other times they are blank (other than a new line) 

1167 while len(line.strip('-')) > 2: 

1168 energy, intensity = energy_intensity(line) 

1169 etenergies.append(float(energy)) 

1170 etoscs.append(float(intensity)) 

1171 

1172 line = next(inputfile) 

1173 

1174 self.set_attribute('etenergies', etenergies) 

1175 self.set_attribute('etoscs', etoscs) 

1176 self.transprop[name] = (numpy.asarray(etenergies), numpy.asarray(etoscs)) 

1177 

1178 if line.strip() == "CD SPECTRUM": 

1179 # ------------------------------------------------------------------- 

1180 # CD SPECTRUM 

1181 # ------------------------------------------------------------------- 

1182 # State Energy Wavelength R MX MY MZ  

1183 # (cm-1) (nm) (1e40*cgs) (au) (au) (au)  

1184 # ------------------------------------------------------------------- 

1185 # 1 43167.6 231.7 0.00000 0.00000 -0.00000 0.00000 

1186 # 

1187 etenergies = [] 

1188 etrotats = [] 

1189 self.skip_lines(inputfile, ["d", "State Energy Wavelength", "(cm-1) (nm)", "d"]) 

1190 line = next(inputfile) 

1191 while line.strip(): 

1192 tokens = line.split() 

1193 if "spin forbidden" in line: 

1194 etrotat, mx, my, mz = 0.0, 0.0, 0.0, 0.0 

1195 else: 

1196 etrotat, mx, my, mz = [utils.float(t) for t in tokens[3:]] 

1197 etenergies.append(utils.float(tokens[1])) 

1198 etrotats.append(etrotat) 

1199 line = next(inputfile) 

1200 self.set_attribute("etrotats", etrotats) 

1201 if not hasattr(self, "etenergies"): 

1202 self.logger.warning("etenergies not parsed before ECD section, " 

1203 "the output file may be malformed") 

1204 self.set_attribute("etenergies", etenergies) 

1205 

1206 if line[:23] == "VIBRATIONAL FREQUENCIES": 

1207 self.skip_lines(inputfile, ['d', 'b']) 

1208 

1209 # Starting with 4.1, a scaling factor for frequencies is printed 

1210 if float(self.metadata["package_version"][:3]) > 4.0: 

1211 self.skip_lines(inputfile, ['Scaling factor for frequencies', 'b']) 

1212 

1213 if self.natom > 1: 

1214 vibfreqs = numpy.zeros(3 * self.natom) 

1215 for i, line in zip(range(3 * self.natom), inputfile): 

1216 vibfreqs[i] = float(line.split()[1]) 

1217 

1218 nonzero = numpy.nonzero(vibfreqs)[0] 

1219 self.first_mode = nonzero[0] 

1220 # Take all modes after first 

1221 # Mode between imaginary and real modes could be 0 

1222 self.num_modes = 3*self.natom - self.first_mode 

1223 if self.num_modes > 3*self.natom - 6: 

1224 msg = "Modes corresponding to rotations/translations may be non-zero." 

1225 if self.num_modes == 3*self.natom - 5: 

1226 msg += '\n You can ignore this if the molecule is linear.' 

1227 self.set_attribute('vibfreqs', vibfreqs[self.first_mode:]) 

1228 else: 

1229 # we have a single atom 

1230 self.set_attribute('vibfreqs', numpy.array([])) 

1231 

1232 # NORMAL MODES 

1233 # ------------ 

1234 # 

1235 # These modes are the cartesian displacements weighted by the diagonal matrix 

1236 # M(i,i)=1/sqrt(m[i]) where m[i] is the mass of the displaced atom 

1237 # Thus, these vectors are normalized but *not* orthogonal 

1238 # 

1239 # 0 1 2 3 4 5 

1240 # 0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 

1241 # 1 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 

1242 # 2 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 

1243 # ... 

1244 if line[:12] == "NORMAL MODES": 

1245 if self.natom > 1: 

1246 all_vibdisps = numpy.zeros((3 * self.natom, self.natom, 3), "d") 

1247 

1248 self.skip_lines(inputfile, ['d', 'b', 'text', 'text', 'text', 'b']) 

1249 

1250 for mode in range(0, 3 * self.natom, 6): 

1251 header = next(inputfile) 

1252 for atom in range(self.natom): 

1253 all_vibdisps[mode:mode + 6, atom, 0] = next(inputfile).split()[1:] 

1254 all_vibdisps[mode:mode + 6, atom, 1] = next(inputfile).split()[1:] 

1255 all_vibdisps[mode:mode + 6, atom, 2] = next(inputfile).split()[1:] 

1256 

1257 self.set_attribute('vibdisps', all_vibdisps[self.first_mode:]) 

1258 else: 

1259 # we have a single atom 

1260 self.set_attribute('vibdisps', numpy.array([])) 

1261 

1262 # ----------- 

1263 # IR SPECTRUM 

1264 # ----------- 

1265 # 

1266 # Mode freq (cm**-1) T**2 TX TY TZ 

1267 # ------------------------------------------------------------------- 

1268 # 6: 2069.36 1.674341 ( -0.000000 0.914970 -0.914970) 

1269 # 7: 3978.81 76.856228 ( 0.000000 6.199041 -6.199042) 

1270 # 8: 4113.34 61.077784 ( -0.000000 5.526201 5.526200) 

1271 if line[:11] == "IR SPECTRUM": 

1272 self.skip_lines(inputfile, ['d', 'b', 'header', 'd']) 

1273 

1274 if self.natom > 1: 

1275 all_vibirs = numpy.zeros((3 * self.natom,), "d") 

1276 

1277 line = next(inputfile) 

1278 while len(line) > 2: 

1279 num = int(line[0:4]) 

1280 all_vibirs[num] = float(line.split()[2]) 

1281 line = next(inputfile) 

1282 

1283 self.set_attribute('vibirs', all_vibirs[self.first_mode:]) 

1284 else: 

1285 # we have a single atom 

1286 self.set_attribute('vibirs', numpy.array([])) 

1287 

1288 # -------------- 

1289 # RAMAN SPECTRUM 

1290 # -------------- 

1291 # 

1292 # Mode freq (cm**-1) Activity Depolarization 

1293 # ------------------------------------------------------------------- 

1294 # 6: 296.23 5.291229 0.399982 

1295 # 7: 356.70 0.000000 0.749764 

1296 # 8: 368.27 0.000000 0.202068 

1297 if line[:14] == "RAMAN SPECTRUM": 

1298 self.skip_lines(inputfile, ['d', 'b', 'header', 'd']) 

1299 

1300 if self.natom > 1: 

1301 all_vibramans = numpy.zeros(3 * self.natom) 

1302 

1303 line = next(inputfile) 

1304 while len(line) > 2: 

1305 num = int(line[0:4]) 

1306 all_vibramans[num] = float(line.split()[2]) 

1307 line = next(inputfile) 

1308 

1309 self.set_attribute('vibramans', all_vibramans[self.first_mode:]) 

1310 else: 

1311 # we have a single atom 

1312 self.set_attribute('vibramans', numpy.array([])) 

1313 

1314 # ORCA will print atomic charges along with the spin populations, 

1315 # so care must be taken about choosing the proper column. 

1316 # Population analyses are performed usually only at the end 

1317 # of a geometry optimization or other run, so we want to 

1318 # leave just the final atom charges. 

1319 # Here is an example for Mulliken charges: 

1320 # -------------------------------------------- 

1321 # MULLIKEN ATOMIC CHARGES AND SPIN POPULATIONS 

1322 # -------------------------------------------- 

1323 # 0 H : 0.126447 0.002622 

1324 # 1 C : -0.613018 -0.029484 

1325 # 2 H : 0.189146 0.015452 

1326 # 3 H : 0.320041 0.037434 

1327 # ... 

1328 # Sum of atomic charges : -0.0000000 

1329 # Sum of atomic spin populations: 1.0000000 

1330 if line[:23] == "MULLIKEN ATOMIC CHARGES": 

1331 self.parse_charge_section(line, inputfile, 'mulliken') 

1332 # Things are the same for Lowdin populations, except that the sums 

1333 # are not printed (there is a blank line at the end). 

1334 if line[:22] == "LOEWDIN ATOMIC CHARGES": 

1335 self.parse_charge_section(line, inputfile, 'lowdin') 

1336 #CHELPG Charges 

1337 #-------------------------------- 

1338 # 0 C : 0.363939 

1339 # 1 H : 0.025695 

1340 # ... 

1341 #-------------------------------- 

1342 #Total charge: -0.000000 

1343 #-------------------------------- 

1344 if line.startswith('CHELPG Charges'): 

1345 self.parse_charge_section(line, inputfile, 'chelpg') 

1346 

1347 # It is not stated explicitely, but the dipole moment components printed by ORCA 

1348 # seem to be in atomic units, so they will need to be converted. Also, they 

1349 # are most probably calculated with respect to the origin . 

1350 # 

1351 # ------------- 

1352 # DIPOLE MOMENT 

1353 # ------------- 

1354 # X Y Z 

1355 # Electronic contribution: 0.00000 -0.00000 -0.00000 

1356 # Nuclear contribution : 0.00000 0.00000 0.00000 

1357 # ----------------------------------------- 

1358 # Total Dipole Moment : 0.00000 -0.00000 -0.00000 

1359 # ----------------------------------------- 

1360 # Magnitude (a.u.) : 0.00000 

1361 # Magnitude (Debye) : 0.00000 

1362 # 

1363 if line.strip() == "DIPOLE MOMENT": 

1364 

1365 self.skip_lines(inputfile, ['d', 'XYZ', 'electronic', 'nuclear', 'd']) 

1366 total = next(inputfile) 

1367 assert "Total Dipole Moment" in total 

1368 

1369 reference = [0.0, 0.0, 0.0] 

1370 dipole = numpy.array([float(d) for d in total.split()[-3:]]) 

1371 dipole = utils.convertor(dipole, "ebohr", "Debye") 

1372 

1373 if not hasattr(self, 'moments'): 

1374 self.set_attribute('moments', [reference, dipole]) 

1375 else: 

1376 try: 

1377 assert numpy.all(self.moments[1] == dipole) 

1378 except AssertionError: 

1379 self.logger.warning('Overwriting previous multipole moments with new values') 

1380 self.set_attribute('moments', [reference, dipole]) 

1381 

1382 if "Molecular Dynamics Iteration" in line: 

1383 self.skip_lines(inputfile, ['d', 'ORCA MD', 'd', 'New Coordinates']) 

1384 line = next(inputfile) 

1385 tokens = line.split() 

1386 assert tokens[0] == "time" 

1387 time = utils.convertor(float(tokens[2]), "time_au", "fs") 

1388 self.append_attribute('time', time) 

1389 

1390 # Static polarizability. 

1391 if line.strip() == "THE POLARIZABILITY TENSOR": 

1392 if not hasattr(self, 'polarizabilities'): 

1393 self.polarizabilities = [] 

1394 self.skip_lines(inputfile, ['d', 'b']) 

1395 line = next(inputfile) 

1396 assert line.strip() == "The raw cartesian tensor (atomic units):" 

1397 polarizability = [] 

1398 for _ in range(3): 

1399 line = next(inputfile) 

1400 polarizability.append(line.split()) 

1401 self.polarizabilities.append(numpy.array(polarizability)) 

1402 

1403 if line.strip() == 'ORCA-CASSCF': 

1404 # ------------------------------------------------------------------------------- 

1405 # ORCA-CASSCF 

1406 # ------------------------------------------------------------------------------- 

1407 # 

1408 # Symmetry handling UseSym ... ON 

1409 # Point group ... C2 

1410 # Used point group ... C2 

1411 # Number of irreps ... 2 

1412 # Irrep A has 10 SALCs (ofs= 0) #(closed)= 0 #(active)= 2 

1413 # Irrep B has 10 SALCs (ofs= 10) #(closed)= 0 #(active)= 2 

1414 # Symmetries of active orbitals: 

1415 # MO = 0 IRREP= 0 (A) 

1416 # MO = 1 IRREP= 1 (B) 

1417 self.skip_lines(inputfile, ['d', 'b']) 

1418 vals = next(inputfile).split() 

1419 # Symmetry section is only printed if symmetry is used. 

1420 if vals[0] == 'Symmetry': 

1421 assert vals[-1] == 'ON' 

1422 point_group = next(inputfile).split()[-1] 

1423 used_point_group = next(inputfile).split()[-1] 

1424 num_irreps = int(next(inputfile).split()[-1]) 

1425 num_active = 0 

1426 # Parse the irreps. 

1427 for i, line in zip(range(num_irreps), inputfile): 

1428 reg = r'Irrep\s+(\w+) has\s+(\d+) SALCs \(ofs=\s*(\d+)\) #\(closed\)=\s*(\d+) #\(active\)=\s*(\d+)' 

1429 groups = re.search(reg, line).groups() 

1430 irrep = groups[0] 

1431 salcs, ofs, closed, active = map(int, groups[1:]) 

1432 num_active += active 

1433 self.skip_line(inputfile, 'Symmetries') 

1434 # Parse the symmetries of the active orbitals. 

1435 for i, line in zip(range(num_active), inputfile): 

1436 reg = r'(\d+) IRREP= (\d+) \((\w+)\)' 

1437 groups = re.search(reg, line).groups() 

1438 mo, irrep_idx, irrep = groups 

1439 

1440 # Skip until the system specific settings. 

1441 # This will align the cases of symmetry on and off. 

1442 line = next(inputfile) 

1443 while line[:25] != 'SYSTEM-SPECIFIC SETTINGS:': 

1444 line = next(inputfile) 

1445 

1446 # SYSTEM-SPECIFIC SETTINGS: 

1447 # Number of active electrons ... 4 

1448 # Number of active orbitals ... 4 

1449 # Total number of electrons ... 4 

1450 # Total number of orbitals ... 20 

1451 num_el = int(next(inputfile).split()[-1]) 

1452 num_orbs = int(next(inputfile).split()[-1]) 

1453 total_el = int(next(inputfile).split()[-1]) 

1454 total_orbs = int(next(inputfile).split()[-1]) 

1455 

1456 # Determined orbital ranges: 

1457 # Internal 0 - -1 ( 0 orbitals) 

1458 # Active 0 - 3 ( 4 orbitals) 

1459 # External 4 - 19 ( 16 orbitals) 

1460 self.skip_lines(inputfile, ['b', 'Determined']) 

1461 orbital_ranges = [] 

1462 # Parse the orbital ranges for: Internal, Active, and External orbitals. 

1463 for i in range(3): 

1464 vals = next(inputfile).split() 

1465 start, end, num = int(vals[1]), int(vals[3]), int(vals[5]) 

1466 # Change from inclusive to exclusive in order to match python. 

1467 end = end + 1 

1468 assert end - start == num 

1469 orbital_ranges.append((start, end, num)) 

1470 

1471 line = next(inputfile) 

1472 while line[:8] != 'CI-STEP:': 

1473 line = next(inputfile) 

1474 

1475 # CI-STEP: 

1476 # CI strategy ... General CI 

1477 # Number of symmetry/multplity blocks ... 1 

1478 # BLOCK 1 WEIGHT= 1.0000 

1479 # Multiplicity ... 1 

1480 # Irrep ... 0 (A) 

1481 # #(Configurations) ... 11 

1482 # #(CSFs) ... 12 

1483 # #(Roots) ... 1 

1484 # ROOT=0 WEIGHT= 1.000000 

1485 self.skip_line(inputfile, 'CI strategy') 

1486 num_blocks = int(next(inputfile).split()[-1]) 

1487 for b in range(1, num_blocks + 1): 

1488 vals = next(inputfile).split() 

1489 block = int(vals[1]) 

1490 weight = float(vals[3]) 

1491 assert b == block 

1492 mult = int(next(inputfile).split()[-1]) 

1493 vals = next(inputfile).split() 

1494 # The irrep will only be printed if using symmetry. 

1495 if vals[0] == 'Irrep': 

1496 irrep_idx = int(vals[-2]) 

1497 irrep = vals[-1].strip('()') 

1498 vals = next(inputfile).split() 

1499 num_confs = int(vals[-1]) 

1500 num_csfs = int(next(inputfile).split()[-1]) 

1501 num_roots = int(next(inputfile).split()[-1]) 

1502 # Parse the roots. 

1503 for r, line in zip(range(num_roots), inputfile): 

1504 reg = r'=(\d+) WEIGHT=\s*(\d\.\d+)' 

1505 groups = re.search(reg, line).groups() 

1506 root = int(groups[0]) 

1507 weight = float(groups[1]) 

1508 assert r == root 

1509 

1510 # Skip additional setup printing and CASSCF iterations. 

1511 line = next(inputfile).strip() 

1512 while line != 'CASSCF RESULTS': 

1513 line = next(inputfile).strip() 

1514 

1515 # -------------- 

1516 # CASSCF RESULTS 

1517 # -------------- 

1518 # 

1519 # Final CASSCF energy : -14.597120777 Eh -397.2078 eV 

1520 self.skip_lines(inputfile, ['d', 'b']) 

1521 casscf_energy = float(next(inputfile).split()[4]) 

1522 

1523 # This is only printed for first and last step of geometry optimization. 

1524 # ---------------- 

1525 # ORBITAL ENERGIES 

1526 # ---------------- 

1527 # 

1528 # NO OCC E(Eh) E(eV) Irrep 

1529 # 0 0.0868 0.257841 7.0162 1-A 

1530 self.skip_lines(inputfile, ['b', 'd']) 

1531 if next(inputfile).strip() == 'ORBITAL ENERGIES': 

1532 self.skip_lines(inputfile, ['d', 'b', 'NO']) 

1533 orbitals = [] 

1534 vals = next(inputfile).split() 

1535 while vals: 

1536 occ, eh, ev = map(float, vals[1:4]) 

1537 # The irrep will only be printed if using symmetry. 

1538 if len(vals) == 5: 

1539 idx, irrep = vals[4].split('-') 

1540 orbitals.append((occ, ev, int(idx), irrep)) 

1541 else: 

1542 orbitals.append((occ, ev)) 

1543 vals = next(inputfile).split() 

1544 self.skip_lines(inputfile, ['b', 'd']) 

1545 

1546 # Orbital Compositions 

1547 # --------------------------------------------- 

1548 # CAS-SCF STATES FOR BLOCK 1 MULT= 1 IRREP= Ag NROOTS= 2 

1549 # --------------------------------------------- 

1550 # 

1551 # ROOT 0: E= -14.5950507665 Eh 

1552 # 0.89724 [ 0]: 2000 

1553 for b in range(num_blocks): 

1554 # Parse the block data. 

1555 reg = r'BLOCK\s+(\d+) MULT=\s*(\d+) (IRREP=\s*\w+ )?(NROOTS=\s*(\d+))?' 

1556 groups = re.search(reg, next(inputfile)).groups() 

1557 block = int(groups[0]) 

1558 mult = int(groups[1]) 

1559 # The irrep will only be printed if using symmetry. 

1560 if groups[2] is not None: 

1561 irrep = groups[2].split('=')[1].strip() 

1562 nroots = int(groups[3].split('=')[1]) 

1563 

1564 self.skip_lines(inputfile, ['d', 'b']) 

1565 

1566 line = next(inputfile).strip() 

1567 while line: 

1568 if line[:4] == 'ROOT': 

1569 # Parse the root section. 

1570 reg = r'(\d+):\s*E=\s*(-?\d+.\d+) Eh(\s+\d+\.\d+ eV)?(\s+\d+\.\d+)?' 

1571 groups = re.search(reg, line).groups() 

1572 root = int(groups[0]) 

1573 energy = float(groups[1]) 

1574 # Excitation energies are only printed for excited state roots. 

1575 if groups[2] is not None: 

1576 excitation_energy_ev = float(groups[2].split()[0]) 

1577 excitation_energy_cm = float(groups[3]) 

1578 else: 

1579 # Parse the occupations section. 

1580 reg = r'(\d+\.\d+) \[\s*(\d+)\]: (\d+)' 

1581 groups = re.search(reg, line).groups() 

1582 coeff = float(groups[0]) 

1583 number = float(groups[1]) 

1584 occupations = list(map(int, groups[2])) 

1585 

1586 line = next(inputfile).strip() 

1587 

1588 # Skip any extended wavefunction printing. 

1589 while line != 'DENSITY MATRIX': 

1590 line = next(inputfile).strip() 

1591 

1592 self.skip_lines(inputfile, ['d', 'b']) 

1593 # -------------- 

1594 # DENSITY MATRIX 

1595 # -------------- 

1596 # 

1597 # 0 1 2 3 

1598 # 0 0.897244 0.000000 0.000000 0.000000 

1599 # 1 0.000000 0.533964 0.000000 0.000000 

1600 density = numpy.zeros((num_orbs, num_orbs)) 

1601 for i in range(0, num_orbs, 6): 

1602 next(inputfile) 

1603 for j, line in zip(range(num_orbs), inputfile): 

1604 density[j][i:i + 6] = list(map(float, line.split()[1:])) 

1605 self.skip_lines(inputfile, ['Trace', 'b', 'd']) 

1606 

1607 # This is only printed for open-shells. 

1608 # ------------------- 

1609 # SPIN-DENSITY MATRIX 

1610 # ------------------- 

1611 # 

1612 # 0 1 2 3 4 5 

1613 # 0 -0.003709 0.001410 0.000074 -0.000564 -0.007978 0.000735 

1614 # 1 0.001410 -0.001750 -0.000544 -0.003815 0.008462 -0.004529 

1615 if next(inputfile).strip() == 'SPIN-DENSITY MATRIX': 

1616 self.skip_lines(inputfile, ['d', 'b']) 

1617 spin_density = numpy.zeros((num_orbs, num_orbs)) 

1618 for i in range(0, num_orbs, 6): 

1619 next(inputfile) 

1620 for j, line in zip(range(num_orbs), inputfile): 

1621 spin_density[j][i:i + 6] = list(map(float, line.split()[1:])) 

1622 self.skip_lines(inputfile, ['Trace', 'b', 'd', 'ENERGY']) 

1623 self.skip_lines(inputfile, ['d', 'b']) 

1624 

1625 # ----------------- 

1626 # ENERGY COMPONENTS 

1627 # ----------------- 

1628 # 

1629 # One electron energy : -18.811767801 Eh -511.8942 eV 

1630 # Two electron energy : 4.367616074 Eh 118.8489 eV 

1631 # Nuclear repuslion energy : 0.000000000 Eh 0.0000 eV 

1632 # ---------------- 

1633 # -14.444151727 

1634 # 

1635 # Kinetic energy : 14.371970266 Eh 391.0812 eV 

1636 # Potential energy : -28.816121993 Eh -784.1265 eV 

1637 # Virial ratio : -2.005022378 

1638 # ---------------- 

1639 # -14.444151727 

1640 # 

1641 # Core energy : -13.604678408 Eh -370.2021 eV 

1642 one_el_energy = float(next(inputfile).split()[4]) 

1643 two_el_energy = float(next(inputfile).split()[4]) 

1644 nuclear_repulsion_energy = float(next(inputfile).split()[4]) 

1645 self.skip_line(inputfile, 'dashes') 

1646 energy = float(next(inputfile).strip()) 

1647 self.skip_line(inputfile, 'blank') 

1648 kinetic_energy = float(next(inputfile).split()[3]) 

1649 potential_energy = float(next(inputfile).split()[3]) 

1650 virial_ratio = float(next(inputfile).split()[3]) 

1651 self.skip_line(inputfile, 'dashes') 

1652 energy = float(next(inputfile).strip()) 

1653 self.skip_line(inputfile, 'blank') 

1654 core_energy = float(next(inputfile).split()[3]) 

1655 

1656 if line[:15] == 'TOTAL RUN TIME:': 

1657 self.metadata['success'] = True 

1658 

1659 def parse_charge_section(self, line, inputfile, chargestype): 

1660 """Parse a charge section, modifies class in place 

1661 

1662 Parameters 

1663 ---------- 

1664 line : str 

1665 the line which triggered entry here 

1666 inputfile : file 

1667 handle to file object 

1668 chargestype : str 

1669 what type of charge we're dealing with, must be one of 

1670 'mulliken', 'lowdin' or 'chelpg' 

1671 """ 

1672 has_spins = 'AND SPIN POPULATIONS' in line 

1673 

1674 if not hasattr(self, "atomcharges"): 

1675 self.atomcharges = {} 

1676 if has_spins and not hasattr(self, "atomspins"): 

1677 self.atomspins = {} 

1678 

1679 self.skip_line(inputfile, 'dashes') 

1680 

1681 # depending on chargestype, decide when to stop parsing lines 

1682 # start, stop - indices for slicing lines and grabbing values 

1683 if chargestype == 'mulliken': 

1684 should_stop = lambda x: x.startswith('Sum of atomic charges') 

1685 start, stop = 8, 20 

1686 elif chargestype == 'lowdin': 

1687 # stops when blank line encountered 

1688 should_stop = lambda x: not bool(x.strip()) 

1689 start, stop = 8, 20 

1690 elif chargestype == 'chelpg': 

1691 should_stop = lambda x: x.startswith('---') 

1692 start, stop = 11, 26 

1693 

1694 charges = [] 

1695 if has_spins: 

1696 spins = [] 

1697 

1698 line = next(inputfile) 

1699 while not should_stop(line): 

1700 # Don't add point charges or embedding potentials. 

1701 if "Q :" not in line: 

1702 charges.append(float(line[start:stop])) 

1703 if has_spins: 

1704 spins.append(float(line[stop:])) 

1705 line = next(inputfile) 

1706 

1707 self.atomcharges[chargestype] = charges 

1708 if has_spins: 

1709 self.atomspins[chargestype] = spins 

1710 

1711 def parse_scf_condensed_format(self, inputfile, line): 

1712 """ Parse the SCF convergence information in condensed format """ 

1713 

1714 # This is what it looks like 

1715 # ITER Energy Delta-E Max-DP RMS-DP [F,P] Damp 

1716 # *** Starting incremental Fock matrix formation *** 

1717 # 0 -384.5203638934 0.000000000000 0.03375012 0.00223249 0.1351565 0.7000 

1718 # 1 -384.5792776162 -0.058913722842 0.02841696 0.00175952 0.0734529 0.7000 

1719 # ***Turning on DIIS*** 

1720 # 2 -384.6074211837 -0.028143567475 0.04968025 0.00326114 0.0310435 0.0000 

1721 # 3 -384.6479682063 -0.040547022616 0.02097477 0.00121132 0.0361982 0.0000 

1722 # 4 -384.6571124353 -0.009144228947 0.00576471 0.00035160 0.0061205 0.0000 

1723 # 5 -384.6574659959 -0.000353560584 0.00191156 0.00010160 0.0025838 0.0000 

1724 # 6 -384.6574990782 -0.000033082375 0.00052492 0.00003800 0.0002061 0.0000 

1725 # 7 -384.6575005762 -0.000001497987 0.00020257 0.00001146 0.0001652 0.0000 

1726 # 8 -384.6575007321 -0.000000155848 0.00008572 0.00000435 0.0000745 0.0000 

1727 # **** Energy Check signals convergence **** 

1728 

1729 assert line[2] == "Delta-E" 

1730 assert line[3] == "Max-DP" 

1731 

1732 if not hasattr(self, "scfvalues"): 

1733 self.scfvalues = [] 

1734 

1735 self.scfvalues.append([]) 

1736 

1737 # Try to keep track of the converger (NR, DIIS, SOSCF, etc.). 

1738 diis_active = True 

1739 while line: 

1740 

1741 maxDP = None 

1742 if 'Newton-Raphson' in line: 

1743 diis_active = False 

1744 elif 'SOSCF' in line: 

1745 diis_active = False 

1746 elif line[0].isdigit(): 

1747 shim = 0 

1748 try: 

1749 energy = float(line[1]) 

1750 deltaE = float(line[2]) 

1751 maxDP = float(line[3 + int(not diis_active)]) 

1752 rmsDP = float(line[4 + int(not diis_active)]) 

1753 except ValueError as e: 

1754 # Someone in Orca forgot to properly add spaces in the scf printing 

1755 # code looks like: 

1756 # %3i %17.10f%12.12f%11.8f %11.8f 

1757 if line[1].count('.') == 2: 

1758 integer1, decimal1_integer2, decimal2 = line[1].split('.') 

1759 decimal1, integer2 = decimal1_integer2[:10], decimal1_integer2[10:] 

1760 energy = float(integer1 + '.' + decimal1) 

1761 deltaE = float(integer2 + '.' + decimal2) 

1762 maxDP = float(line[2 + int(not diis_active)]) 

1763 rmsDP = float(line[3 + int(not diis_active)]) 

1764 elif line[1].count('.') == 3: 

1765 integer1, decimal1_integer2, decimal2_integer3, decimal3 = line[1].split('.') 

1766 decimal1, integer2 = decimal1_integer2[:10], decimal1_integer2[10:] 

1767 decimal2, integer3 = decimal2_integer3[:12], decimal2_integer3[12:] 

1768 energy = float(integer1 + '.' + decimal1) 

1769 deltaE = float(integer2 + '.' + decimal2) 

1770 maxDP = float(integer3 + '.' + decimal3) 

1771 rmsDP = float(line[2 + int(not diis_active)]) 

1772 elif line[2].count('.') == 2: 

1773 integer1, decimal1_integer2, decimal2 = line[2].split('.') 

1774 decimal1, integer2 = decimal1_integer2[:12], decimal1_integer2[12:] 

1775 deltaE = float(integer1 + '.' + decimal1) 

1776 maxDP = float(integer2 + '.' + decimal2) 

1777 rmsDP = float(line[3 + int(not diis_active)]) 

1778 else: 

1779 raise e 

1780 

1781 self.scfvalues[-1].append([deltaE, maxDP, rmsDP]) 

1782 

1783 try: 

1784 line = next(inputfile).split() 

1785 except StopIteration: 

1786 self.logger.warning('File terminated before end of last SCF! Last Max-DP: {}'.format(maxDP)) 

1787 break 

1788 

1789 def parse_scf_expanded_format(self, inputfile, line): 

1790 """ Parse SCF convergence when in expanded format. """ 

1791 

1792 # The following is an example of the format 

1793 # ----------------------------------------- 

1794 # 

1795 # *** Starting incremental Fock matrix formation *** 

1796 # 

1797 # ---------------------------- 

1798 # ! ITERATION 0 ! 

1799 # ---------------------------- 

1800 # Total Energy : -377.960836651297 Eh 

1801 # Energy Change : -377.960836651297 Eh 

1802 # MAX-DP : 0.100175793695 

1803 # RMS-DP : 0.004437973661 

1804 # Actual Damping : 0.7000 

1805 # Actual Level Shift : 0.2500 Eh 

1806 # Int. Num. El. : 43.99982197 (UP= 21.99991099 DN= 21.99991099) 

1807 # Exchange : -34.27550826 

1808 # Correlation : -2.02540957 

1809 # 

1810 # 

1811 # ---------------------------- 

1812 # ! ITERATION 1 ! 

1813 # ---------------------------- 

1814 # Total Energy : -378.118458080109 Eh 

1815 # Energy Change : -0.157621428812 Eh 

1816 # MAX-DP : 0.053240648588 

1817 # RMS-DP : 0.002375092508 

1818 # Actual Damping : 0.7000 

1819 # Actual Level Shift : 0.2500 Eh 

1820 # Int. Num. El. : 43.99994143 (UP= 21.99997071 DN= 21.99997071) 

1821 # Exchange : -34.00291075 

1822 # Correlation : -2.01607243 

1823 # 

1824 # ***Turning on DIIS*** 

1825 # 

1826 # ---------------------------- 

1827 # ! ITERATION 2 ! 

1828 # ---------------------------- 

1829 # .... 

1830 # 

1831 if not hasattr(self, "scfvalues"): 

1832 self.scfvalues = [] 

1833 

1834 self.scfvalues.append([]) 

1835 

1836 line = "Foo" # dummy argument to enter loop 

1837 while line.find("******") < 0: 

1838 try: 

1839 line = next(inputfile) 

1840 except StopIteration: 

1841 self.logger.warning('File terminated before end of last SCF!') 

1842 break 

1843 info = line.split() 

1844 if len(info) > 1 and info[1] == "ITERATION": 

1845 dashes = next(inputfile) 

1846 energy_line = next(inputfile).split() 

1847 energy = float(energy_line[3]) 

1848 deltaE_line = next(inputfile).split() 

1849 deltaE = float(deltaE_line[3]) 

1850 if energy == deltaE: 

1851 deltaE = 0 

1852 maxDP_line = next(inputfile).split() 

1853 maxDP = float(maxDP_line[2]) 

1854 rmsDP_line = next(inputfile).split() 

1855 rmsDP = float(rmsDP_line[2]) 

1856 self.scfvalues[-1].append([deltaE, maxDP, rmsDP]) 

1857 

1858 return 

1859 

1860 # end of parse_scf_expanded_format 

1861 

1862 def _append_scfvalues_scftargets(self, inputfile, line): 

1863 # The SCF convergence targets are always printed after this, but apparently 

1864 # not all of them always -- for example the RMS Density is missing for geometry 

1865 # optimization steps. So, assume the previous value is still valid if it is 

1866 # not found. For additional certainty, assert that the other targets are unchanged. 

1867 while not "Last Energy change" in line: 

1868 line = next(inputfile) 

1869 deltaE_value = float(line.split()[4]) 

1870 deltaE_target = float(line.split()[7]) 

1871 line = next(inputfile) 

1872 if "Last MAX-Density change" in line: 

1873 maxDP_value = float(line.split()[4]) 

1874 maxDP_target = float(line.split()[7]) 

1875 line = next(inputfile) 

1876 if "Last RMS-Density change" in line: 

1877 rmsDP_value = float(line.split()[4]) 

1878 rmsDP_target = float(line.split()[7]) 

1879 else: 

1880 rmsDP_value = self.scfvalues[-1][-1][2] 

1881 rmsDP_target = self.scftargets[-1][2] 

1882 assert deltaE_target == self.scftargets[-1][0] 

1883 assert maxDP_target == self.scftargets[-1][1] 

1884 self.scfvalues[-1].append([deltaE_value, maxDP_value, rmsDP_value]) 

1885 self.scftargets.append([deltaE_target, maxDP_target, rmsDP_target])