Coverage for cclib/parser/orcaparser.py: 92%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1# -*- coding: utf-8 -*-

5# This file is part of cclib (http://cclib.github.io) and is distributed under

6# the terms of the BSD 3-Clause License.

8"""Parser for ORCA output files"""

11import re

12from itertools import zip_longest

14import numpy

15from packaging.version import parse as parse_version

17from cclib.parser import logfileparser

18from cclib.parser import utils

21class ORCA(logfileparser.Logfile):

22 """An ORCA log file."""

24 def __init__(self, *args, **kwargs):

26 # Call the __init__ method of the superclass

27 super(ORCA, self).__init__(logname="ORCA", *args, **kwargs)

29 def __str__(self):

30 """Return a string representation of the object."""

31 return "ORCA log file %s" % (self.filename)

33 def __repr__(self):

34 """Return a representation of the object."""

35 return 'ORCA("%s")' % (self.filename)

37 def normalisesym(self, label):

38 """ORCA does not require normalizing symmetry labels."""

39 return label

41 def before_parsing(self):

43 # A geometry optimization is started only when

44 # we parse a cycle (so it will be larger than zero().

45 self.gopt_cycle = 0

47 # Keep track of whether this is a relaxed scan calculation

48 self.is_relaxed_scan = False

50 def after_parsing(self):

51 # ORCA doesn't add the dispersion energy to the "Total energy" (which

52 # we parse), only to the "FINAL SINGLE POINT ENERGY" (which we don't

53 # parse).

54 if hasattr(self, "scfenergies") and hasattr(self, "dispersionenergies"):

55 for i, (scfenergy, dispersionenergy) in enumerate(

56 zip_longest(self.scfenergies, self.dispersionenergies)

57 ):

58 # It isn't as problematic if there are more dispersion than

59 # SCF energies, since all dispersion energies can still be

60 # added to the SCF energies, hence the difference in log level.

61 if dispersionenergy is None:

62 self.logger.error(

63 "The number of SCF and dispersion energies are not equal: %d vs. %d, "

64 "can't add dispersion energy to all SCF energies",

65 len(self.scfenergies),

66 len(self.dispersionenergies)

67 )

68 break

69 if scfenergy is None:

70 self.logger.warning(

71 "The number of SCF and dispersion energies are not equal: %d vs. %d, "

72 "can't add dispersion energy to all SCF energies",

73 len(self.scfenergies),

74 len(self.dispersionenergies)

75 )

76 break

77 self.scfenergies[i] += dispersionenergy

79 def extract(self, inputfile, line):

80 """Extract information from the file object inputfile."""

82 # Extract the version number.

83 if "Program Version" == line.strip()[:15]:

84 # Handle development versions.

85 self.metadata["legacy_package_version"] = line.split()[2]

86 self.metadata["package_version"] = self.metadata["legacy_package_version"].replace(".x", "dev")

87 possible_revision_line = next(inputfile)

88 if "SVN: $Rev" in possible_revision_line:

89 self.metadata["package_version"] += "+{}".format(

90 re.search(r"\d+", possible_revision_line).group()

91 )

94 # ================================================================================

95 # WARNINGS

96 # Please study these warnings very carefully!

97 # ================================================================================

98 #

99 # Warning: TCutStore was < 0. Adjusted to Thresh (uncritical)

100 #

101 # WARNING: your system is open-shell and RHF/RKS was chosen

102 # ===> : WILL SWITCH to UHF/UKS

103 #

104 #

105 # INFO : the flag for use of LIBINT has been found!

106 #

107 # ================================================================================

108 if "WARNINGS" == line.strip():

109 self.skip_lines(inputfile, ['text', '=', 'blank'])

110 if 'warnings' not in self.metadata:

111 self.metadata['warnings'] = []

112 if 'info' not in self.metadata:

113 self.metadata['info'] = []

114

115 line = next(inputfile)

116 while line[0] != '=':

117 if line.lower()[:7] == 'warning':

118 self.metadata['warnings'].append('')

119 while len(line) > 1:

120 self.metadata['warnings'][-1] += line[9:].strip()

121 line = next(inputfile)

122 elif line.lower()[:4] == 'info':

123 self.metadata['info'].append('')

124 while len(line) > 1:

125 self.metadata['info'][-1] += line[9:].strip()

126 line = next(inputfile)

127 line = next(inputfile)

128

129 # ================================================================================

130 # INPUT FILE

131 # ================================================================================

132 # NAME = input.dat

133 # | 1> %pal nprocs 4 end

134 # | 2> ! B3LYP def2-svp

135 # | 3> ! Grid4

136 # | 4>

137 # | 5> *xyz 0 3

138 # | 6> O 0 0 0

139 # | 7> O 0 0 1.5

140 # | 8> *

141 # | 9>

142 # | 10> ****END OF INPUT****

143 # ================================================================================

144 if "INPUT FILE" == line.strip():

145 self.skip_line(inputfile, '=')

146 self.metadata['input_file_name'] = next(inputfile).split()[-1]

147

148 # First, collect all the lines...

149 lines = []

150 for line in inputfile:

151 if line[0] != '|':

152 break

153 lines.append(line[6:])

154

155 self.metadata['input_file_contents'] = ''.join(lines[:-1])

156 lines_iter = iter(lines[:-1])

157

158 keywords = []

159 coords = []

160 # ...then parse them separately.

161 for line in lines_iter:

162 line = line.strip()

163 if not line:

164 continue

165

166 # Keywords block

167 if line[0] == '!':

168 keywords += line[1:].split()

169

170 # Impossible to parse without knowing whether a keyword opens a new block

171 elif line[0] == '%':

172 pass

173

174 # Geometry block

175 elif line[0] == '*':

176 coord_type, charge, multiplicity = line[1:].split()[:3]

177 self.set_attribute('charge', int(charge))

178 self.set_attribute('multiplicity', int(multiplicity))

179 coord_type = coord_type.lower()

180 self.metadata['coord_type'] = coord_type

181 if coord_type == 'xyz':

182 def splitter(line):

183 atom, x, y, z = line.split()[:4]

184 return [atom, float(x), float(y), float(z)]

185 elif coord_type in ['int', 'internal']:

186 def splitter(line):

187 atom, a1, a2, a3, bond, angle, dihedral = line.split()[:7]

188 return [atom, int(a1), int(a2), int(a3), float(bond), float(angle), float(dihedral)]

189 elif coord_type == 'gzmt':

190 def splitter(line):

191 vals = line.split()[:7]

192 if len(vals) == 7:

193 atom, a1, bond, a2, angle, a3, dihedral = vals

194 return [atom, int(a1), float(bond), int(a2), float(angle), int(a3), float(dihedral)]

195 elif len(vals) == 5:

196 return [vals[0], int(vals[1]), float(vals[2]), int(vals[3]), float(vals[4])]

197 elif len(vals) == 3:

198 return [vals[0], int(vals[1]), float(vals[2])]

199 elif len(vals) == 1:

200 return [vals[0]]

201 self.logger.warning('Incorrect number of atoms in input geometry.')

202 elif 'file' in coord_type:

203 pass

204 else:

205 self.logger.warning('Invalid coordinate type.')

206

207 if 'file' not in coord_type:

208 for line in lines_iter:

209 if not line:

210 continue

211 if line[0] == '#' or line.strip(' ') == '\n':

212 continue

213 if line.strip()[0] == '*' or line.strip() == "end":

214 break

215 # Strip basis specification that can appear after coordinates

216 line = line.split('newGTO')[0].strip()

217 coords.append(splitter(line))

218

219 self.metadata['keywords'] = keywords

220 self.metadata['coords'] = coords

221

222 if line[0:15] == "Number of atoms":

223

224 natom = int(line.split()[-1])

225 self.set_attribute('natom', natom)

226

227 if line[1:13] == "Total Charge":

228

229 charge = int(line.split()[-1])

230 self.set_attribute('charge', charge)

231

232 line = next(inputfile)

233

234 mult = int(line.split()[-1])

235 self.set_attribute('mult', mult)

236

237 # SCF convergence output begins with:

238 #

239 # --------------

240 # SCF ITERATIONS

241 # --------------

242 #

243 # However, there are two common formats which need to be handled, implemented as separate functions.

244 if line.strip() == "SCF ITERATIONS":

245

246 self.skip_line(inputfile, 'dashes')

247

248 line = next(inputfile)

249 columns = line.split()

250 # "Starting incremental Fock matrix formation" doesn't

251 # necessarily appear before the extended format.

252 if not columns:

253 self.parse_scf_expanded_format(inputfile, columns)

254 # A header with distinct columns indicates the condensed

255 # format.

256 elif columns[1] == "Energy":

257 self.parse_scf_condensed_format(inputfile, columns)

258 # Assume the extended format.

259 else:

260 self.parse_scf_expanded_format(inputfile, columns)

261

262 # Information about the final iteration, which also includes the convergence

263 # targets and the convergence values, is printed separately, in a section like this:

264 #

265 # *****************************************************

266 # * SUCCESS *

267 # * SCF CONVERGED AFTER 9 CYCLES *

268 # *****************************************************

269 #

270 # ...

271 #

272 # Total Energy : -382.04963064 Eh -10396.09898 eV

273 #

274 # ...

275 #

276 # ------------------------- ----------------

277 # FINAL SINGLE POINT ENERGY -382.049630637

278 # ------------------------- ----------------

279 #

280 # We cannot use this last message as a stop condition in general, because

281 # often there is vibrational output before it. So we use the 'Total Energy'

282 # line. However, what comes after that is different for single point calculations

283 # and in the inner steps of geometry optimizations.

284 if "SCF CONVERGED AFTER" in line:

285

286 if not hasattr(self, "scfenergies"):

287 self.scfenergies = []

288 if not hasattr(self, "scfvalues"):

289 self.scfvalues = []

290 if not hasattr(self, "scftargets"):

291 self.scftargets = []

292

293 while not "Total Energy :" in line:

294 line = next(inputfile)

295 energy = utils.convertor(float(line.split()[3]), "hartree", "eV")

296 self.scfenergies.append(energy)

297

298 self._append_scfvalues_scftargets(inputfile, line)

299

300 # Sometimes the SCF does not converge, but does not halt the

301 # the run (like in bug 3184890). In this this case, we should

302 # remain consistent and use the energy from the last reported

303 # SCF cycle. In this case, ORCA print a banner like this:

304 #

305 # *****************************************************

306 # * ERROR *

307 # * SCF NOT CONVERGED AFTER 8 CYCLES *

308 # *****************************************************

309 if "SCF NOT CONVERGED AFTER" in line:

310

311 if not hasattr(self, "scfenergies"):

312 self.scfenergies = []

313 if not hasattr(self, "scfvalues"):

314 self.scfvalues = []

315 if not hasattr(self, "scftargets"):

316 self.scftargets = []

317

318 energy = utils.convertor(self.scfvalues[-1][-1][0], "hartree", "eV")

319 self.scfenergies.append(energy)

320

321 self._append_scfvalues_scftargets(inputfile, line)

322

323 """

324-------------------------------------------------------------------------------

325 DFT DISPERSION CORRECTION

326

327 DFTD3 V3.1 Rev 1

328 USING zero damping

329-------------------------------------------------------------------------------

330The omegaB97X-D3 functional is recognized. Fit by Chai et al.

331Active option DFTDOPT ... 3

332

333molecular C6(AA) [au] = 9563.878941

334

335

336 DFT-D V3

337 parameters

338 s6 scaling factor : 1.0000

339 rs6 scaling factor : 1.2810

340 s8 scaling factor : 1.0000

341 rs8 scaling factor : 1.0940

342 Damping factor alpha6 : 14.0000

343 Damping factor alpha8 : 16.0000

344 ad hoc parameters k1-k3 : 16.0000 1.3333 -4.0000

345

346 Edisp/kcal,au: -10.165629059768 -0.016199959356

347 E6 /kcal : -4.994512983

348 E8 /kcal : -5.171116077

349 % E8 : 50.868628459

350

351------------------------- ----------------

352Dispersion correction -0.016199959

353------------------------- ----------------

354"""

355 if "DFT DISPERSION CORRECTION" in line:

356 # A bunch of parameters are printed the first time dispersion is called

357 # However, they vary wildly in form and number, making parsing problematic

358 line = next(inputfile)

359 while 'Dispersion correction' not in line:

360 line = next(inputfile)

361 dispersion = utils.convertor(float(line.split()[-1]), "hartree", "eV")

362 self.append_attribute("dispersionenergies", dispersion)

363

364 # The convergence targets for geometry optimizations are printed at the

365 # beginning of the output, although the order and their description is

366 # different than later on. So, try to standardize the names of the criteria

367 # and save them for later so that we can get the order right.

368 #

369 # *****************************

370 # * Geometry Optimization Run *

371 # *****************************

372 #

373 # Geometry optimization settings:

374 # Update method Update .... BFGS

375 # Choice of coordinates CoordSys .... Redundant Internals

376 # Initial Hessian InHess .... Almoef's Model

377 #

378 # Convergence Tolerances:

379 # Energy Change TolE .... 5.0000e-06 Eh

380 # Max. Gradient TolMAXG .... 3.0000e-04 Eh/bohr

381 # RMS Gradient TolRMSG .... 1.0000e-04 Eh/bohr

382 # Max. Displacement TolMAXD .... 4.0000e-03 bohr

383 # RMS Displacement TolRMSD .... 2.0000e-03 bohr

384 #

385 if line[25:50] == "Geometry Optimization Run":

386

387 stars = next(inputfile)

388 blank = next(inputfile)

389

390 line = next(inputfile)

391 while line[0:23] != "Convergence Tolerances:":

392 line = next(inputfile)

393

394 if hasattr(self, 'geotargets'):

395 self.logger.warning('The geotargets attribute should not exist yet. There is a problem in the parser.')

396 self.geotargets = []

397 self.geotargets_names = []

398

399 # There should always be five tolerance values printed here.

400 for i in range(5):

401 line = next(inputfile)

402 name = line[:25].strip().lower().replace('.', '').replace('displacement', 'step')

403 target = float(line.split()[-2])

404 self.geotargets_names.append(name)

405 self.geotargets.append(target)

406

407 # The convergence targets for relaxed surface scan steps are printed at the

408 # beginning of the output, although the order and their description is

409 # different than later on. So, try to standardize the names of the criteria

410 # and save them for later so that we can get the order right.

411 #

412 # *************************************************************

413 # * RELAXED SURFACE SCAN STEP 12 *

414 # * *

415 # * Dihedral ( 11, 10, 3, 4) : 180.00000000 *

416 # *************************************************************

417 #

418 # Geometry optimization settings:

419 # Update method Update .... BFGS

420 # Choice of coordinates CoordSys .... Redundant Internals

421 # Initial Hessian InHess .... Almoef's Model

422 #

423 # Convergence Tolerances:

424 # Energy Change TolE .... 5.0000e-06 Eh

425 # Max. Gradient TolMAXG .... 3.0000e-04 Eh/bohr

426 # RMS Gradient TolRMSG .... 1.0000e-04 Eh/bohr

427 # Max. Displacement TolMAXD .... 4.0000e-03 bohr

428 # RMS Displacement TolRMSD .... 2.0000e-03 bohr

429 if line[25:50] == "RELAXED SURFACE SCAN STEP":

430

431 self.is_relaxed_scan = True

432 blank = next(inputfile)

433 info = next(inputfile)

434 stars = next(inputfile)

435 blank = next(inputfile)

436

437 line = next(inputfile)

438 while line[0:23] != "Convergence Tolerances:":

439 line = next(inputfile)

440

441 self.geotargets = []

442 self.geotargets_names = []

443

444 # There should always be five tolerance values printed here.

445 for i in range(5):

446 line = next(inputfile)

447 name = line[:25].strip().lower().replace('.', '').replace('displacement', 'step')

448 target = float(line.split()[-2])

449 self.geotargets_names.append(name)

450 self.geotargets.append(target)

451

452

453 # Moller-Plesset energies.

454 #

455 # ---------------------------------------

456 # MP2 TOTAL ENERGY: -76.112119693 Eh

457 # ---------------------------------------

458 if 'MP2 TOTAL ENERGY' in line[:16]:

459

460 if not hasattr(self, 'mpenergies'):

461 self.metadata['methods'].append('MP2')

462 self.mpenergies = []

463

464 self.mpenergies.append([])

465 mp2energy = utils.float(line.split()[-2])

466 self.mpenergies[-1].append(utils.convertor(mp2energy, 'hartree', 'eV'))

467

468 # MP2 energy output line is different for MP3, since it uses the MDCI

469 # code, which is also in charge of coupled cluster.

470 #

471 # MP3 calculation:

472 # E(MP2) = -76.112119775 EC(MP2)= -0.128216417

473 # E(MP3) = -76.113783480 EC(MP3)= -0.129880122 E3= -0.001663705

474 #

475 # CCSD calculation:

476 # E(MP2) ... -0.393722942

477 # Initial E(tot) ... -1639.631576169

478 # <T|T> ... 0.087231847

479 # Number of pairs included ... 55

480 # Total number of pairs ... 55

481 if 'E(MP2)' in line:

482

483 if not hasattr(self, 'mpenergies'):

484 self.mpenergies = []

485

486 self.mpenergies.append([])

487 mp2energy = utils.float(line.split()[-1])

488 self.mpenergies[-1].append(utils.convertor(mp2energy, 'hartree', 'eV'))

489

490 line = next(inputfile)

491 if line[:6] == 'E(MP3)':

492 self.metadata['methods'].append('MP3')

493 mp3energy = utils.float(line.split()[2])

494 self.mpenergies[-1].append(utils.convertor(mp3energy, 'hartree', 'eV'))

495 else:

496 assert line[:14] == 'Initial E(tot)'

497

498 # ----------------------

499 # COUPLED CLUSTER ENERGY

500 # ----------------------

501 #

502 # E(0) ... -1639.237853227

503 # E(CORR) ... -0.360153516

504 # E(TOT) ... -1639.598006742

505 # Singles Norm <S|S>**1/2 ... 0.176406354

506 # T1 diagnostic ... 0.039445660

507 if line[:22] == 'COUPLED CLUSTER ENERGY':

508 self.skip_lines(inputfile, ['d', 'b'])

509 line = next(inputfile)

510 assert line[:4] == 'E(0)'

511 scfenergy = utils.convertor(utils.float(line.split()[-1]), 'hartree', 'eV')

512 line = next(inputfile)

513 assert line[:7] == 'E(CORR)'

514 while 'E(TOT)' not in line:

515 line = next(inputfile)

516 self.append_attribute(

517 'ccenergies',

518 utils.convertor(utils.float(line.split()[-1]), 'hartree', 'eV')

519 )

520 line = next(inputfile)

521 assert line[:23] == 'Singles Norm <S|S>**1/2'

522 line = next(inputfile)

523 self.metadata["t1_diagnostic"] = utils.float(line.split()[-1])

524

525 # ------------------

526 # CARTESIAN GRADIENT

527 # ------------------

528 #

529 # 1 H : 0.000000004 0.019501450 -0.021537091

530 # 2 O : 0.000000054 -0.042431648 0.042431420

531 # 3 H : 0.000000004 0.021537179 -0.019501388

532 #

533 # ORCA MP2 module has different signal than 'CARTESIAN GRADIENT'.

534 #

535 # The final MP2 gradient

536 # 0: 0.01527469 -0.00292883 0.01125000

537 # 1: 0.00098782 -0.00040549 0.00196825

538 # 2: -0.01626251 0.00333431 -0.01321825

539 if line[:18] == 'CARTESIAN GRADIENT' or line[:22] == 'The final MP2 gradient':

540

541 grads = []

542 if line[:18] == 'CARTESIAN GRADIENT':

543 self.skip_lines(inputfile, ['dashes', 'blank'])

544

545 line = next(inputfile).strip()

546 if 'CONSTRAINED CARTESIAN COORDINATES' in line:

547 self.skip_line(

548 inputfile, 'constrained Cartesian coordinate warning'

549 )

550 line = next(inputfile).strip()

551

552 while line:

553 tokens = line.split()

554 x, y, z = float(tokens[-3]), float(tokens[-2]), float(tokens[-1])

555 grads.append((x, y, z))

556 line = next(inputfile).strip()

557

558 if not hasattr(self, 'grads'):

559 self.grads = []

560 self.grads.append(grads)

561

562

563 # After each geometry optimization step, ORCA prints the current convergence

564 # parameters and the targets (again), so it is a good idea to check that they

565 # have not changed. Note that the order of these criteria here are different

566 # than at the beginning of the output, so make use of the geotargets_names created

567 # before and save the new geovalues in correct order.

568 #

569 # ----------------------|Geometry convergence|---------------------

570 # Item value Tolerance Converged

571 # -----------------------------------------------------------------

572 # Energy change 0.00006021 0.00000500 NO

573 # RMS gradient 0.00031313 0.00010000 NO

574 # RMS step 0.01596159 0.00200000 NO

575 # MAX step 0.04324586 0.00400000 NO

576 # ....................................................

577 # Max(Bonds) 0.0218 Max(Angles) 2.48

578 # Max(Dihed) 0.00 Max(Improp) 0.00

579 # -----------------------------------------------------------------

580 #

581 if line[33:53] == "Geometry convergence":

582

583 headers = next(inputfile)

584 dashes = next(inputfile)

585

586 names = []

587 values = []

588 targets = []

589 line = next(inputfile)

590 # Handle both the dots only and dashes only cases

591 while len(list(set(line.strip()))) != 1:

592 name = line[10:28].strip().lower()

593 tokens = line.split()

594 value = float(tokens[2])

595 target = float(tokens[3])

596 names.append(name)

597 values.append(value)

598 targets.append(target)

599 line = next(inputfile)

600

601 # The energy change is normally not printed in the first iteration, because

602 # there was no previous energy -- in that case assume zero. There are also some

603 # edge cases where the energy change is not printed, for example when internal

604 # angles become improper and internal coordinates are rebuilt as in regression

605 # CuI-MePY2-CH3CN_optxes, and in such cases use NaN.

606 newvalues = []

607 for i, n in enumerate(self.geotargets_names):

608 if (n == "energy change") and (n not in names):

609 if self.is_relaxed_scan:

610 newvalues.append(0.0)

611 else:

612 newvalues.append(numpy.nan)

613 else:

614 newvalues.append(values[names.index(n)])

615 assert targets[names.index(n)] == self.geotargets[i]

616

617 self.append_attribute("geovalues", newvalues)

618

619 """ Grab cartesian coordinates

620 ---------------------------------

621 CARTESIAN COORDINATES (ANGSTROEM)

622 ---------------------------------

623 H 0.000000 0.000000 0.000000

624 O 0.000000 0.000000 1.000000

625 H 0.000000 1.000000 1.000000

626 """

627 if line[0:33] == "CARTESIAN COORDINATES (ANGSTROEM)":

628 next(inputfile)

629

630 atomnos = []

631 atomcoords = []

632 line = next(inputfile)

633 while len(line) > 1:

634 atom, x, y, z = line.split()

635 if atom[-1] != ">":

636 atomnos.append(self.table.number[atom])

637 atomcoords.append([float(x), float(y), float(z)])

638 line = next(inputfile)

639

640 self.set_attribute('natom', len(atomnos))

641 self.set_attribute('atomnos', atomnos)

642 self.append_attribute("atomcoords", atomcoords)

643

644 """ Grab atom masses

645 ----------------------------

646 CARTESIAN COORDINATES (A.U.)

647 ----------------------------

648 NO LB ZA FRAG MASS X Y Z

649 0 H 1.0000 0 1.008 0.000000 0.000000 0.000000

650 1 O 8.0000 0 15.999 0.000000 0.000000 1.889726

651 2 H 1.0000 0 1.008 0.000000 1.889726 1.889726

652 """

653 if line[0:28] == "CARTESIAN COORDINATES (A.U.)" and not hasattr(self, 'atommasses'):

654 next(inputfile)

655 next(inputfile)

656

657 line = next(inputfile)

658 self.atommasses = []

659 while len(line) > 1:

660 if line[:32] == '* core charge reduced due to ECP':

661 break

662 if line.strip() == "> coreless ECP center with (optional) point charge":

663 break

664 no, lb, za, frag, mass, x, y, z = line.split()

665 if lb[-1] != ">":

666 self.atommasses.append(float(mass))

667 line = next(inputfile)

668

669 if line[21:68] == "FINAL ENERGY EVALUATION AT THE STATIONARY POINT":

670 if not hasattr(self, 'optdone'):

671 self.optdone = []

672 self.optdone.append(len(self.atomcoords))

673

674 if "The optimization did not converge" in line:

675 if not hasattr(self, 'optdone'):

676 self.optdone = []

677

678 if line[0:16] == "ORBITAL ENERGIES":

679

680 self.skip_lines(inputfile, ['d', 'text', 'text'])

681

682 self.mooccnos = [[]]

683 self.moenergies = [[]]

684

685 line = next(inputfile)

686 while len(line) > 20: # restricted calcs are terminated by ------

687 info = line.split()

688 mooccno = int(float(info[1]))

689 moenergy = float(info[2])

690 self.mooccnos[0].append(mooccno)

691 self.moenergies[0].append(utils.convertor(moenergy, "hartree", "eV"))

692 line = next(inputfile)

693

694 line = next(inputfile)

695

696 # handle beta orbitals for UHF

697 if line[17:35] == "SPIN DOWN ORBITALS":

698 text = next(inputfile)

699

700 self.mooccnos.append([])

701 self.moenergies.append([])

702

703 line = next(inputfile)

704 while len(line) > 20: # actually terminated by ------

705 info = line.split()

706 mooccno = int(float(info[1]))

707 moenergy = float(info[2])

708 self.mooccnos[1].append(mooccno)

709 self.moenergies[1].append(utils.convertor(moenergy, "hartree", "eV"))

710 line = next(inputfile)

711

712 if not hasattr(self, 'homos'):

713 doubly_occupied = self.mooccnos[0].count(2)

714 singly_occupied = self.mooccnos[0].count(1)

715 # Restricted closed-shell.

716 if doubly_occupied > 0 and singly_occupied == 0:

717 self.set_attribute('homos', [doubly_occupied - 1])

718 # Restricted open-shell.

719 elif doubly_occupied > 0 and singly_occupied > 0:

720 self.set_attribute('homos', [doubly_occupied + singly_occupied - 1,

721 doubly_occupied - 1])

722 # Unrestricted.

723 else:

724 assert len(self.moenergies) == 2

725 assert doubly_occupied == 0

726 assert self.mooccnos[1].count(2) == 0

727 nbeta = self.mooccnos[1].count(1)

728 self.set_attribute('homos', [singly_occupied - 1, nbeta - 1])

729

730 # So nbasis was parsed at first with the first pattern, but it turns out that

731 # semiempirical methods (at least AM1 as reported by Julien Idé) do not use this.

732 # For this reason, also check for the second patterns, and use it as an assert

733 # if nbasis was already parsed. Regression PCB_1_122.out covers this test case.

734 if line[1:32] == "# of contracted basis functions":

735 self.set_attribute('nbasis', int(line.split()[-1]))

736 if line[1:27] == "Basis Dimension Dim":

737 self.set_attribute('nbasis', int(line.split()[-1]))

738

739 if line[0:14] == "OVERLAP MATRIX":

740

741 self.skip_line(inputfile, 'dashes')

742

743 self.aooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d")

744 for i in range(0, self.nbasis, 6):

745 self.updateprogress(inputfile, "Overlap")

746

747 header = next(inputfile)

748 size = len(header.split())

749

750 for j in range(self.nbasis):

751 line = next(inputfile)

752 broken = line.split()

753 self.aooverlaps[j, i:i+size] = list(map(float, broken[1:size+1]))

754

755 # Molecular orbital coefficients are parsed here, but also related things

756 #like atombasis and aonames if possible.

757 #

758 # Normally the output is easy to parse like this:

759 # ------------------

760 # MOLECULAR ORBITALS

761 # ------------------

762 # 0 1 2 3 4 5

763 # -19.28527 -19.26828 -19.26356 -19.25801 -19.25765 -19.21471

764 # 2.00000 2.00000 2.00000 2.00000 2.00000 2.00000

765 # -------- -------- -------- -------- -------- --------

766 # 0C 1s 0.000002 -0.000001 0.000000 0.000000 -0.000000 0.000001

767 # 0C 2s -0.000007 0.000006 -0.000002 -0.000000 0.000001 -0.000003

768 # 0C 3s -0.000086 -0.000061 0.000058 -0.000033 -0.000027 -0.000058

769 # ...

770 #

771 # But when the numbers get big, things get yucky since ORCA does not use

772 # fixed width formatting for the floats, and does not insert extra spaces

773 # when the numbers get wider. So things get stuck together overflowing columns,

774 # like this:

775 # 12C 6s -11.608845-53.775398161.302640-76.633779 29.914985 22.083999

776 #

777 # One assumption that seems to hold is that there are always six significant

778 # digits in the coefficients, so we can try to use that to delineate numbers

779 # when the parsing gets rough. This is what we do below with a regex, and a case

780 # like this is tested in regression ORCA/ORCA4.0/invalid-literal-for-float.out

781 # which was reported in https://github.com/cclib/cclib/issues/629

782 if line[0:18] == "MOLECULAR ORBITALS":

783

784 self.skip_line(inputfile, 'dashes')

785

786 aonames = []

787 atombasis = [[] for i in range(self.natom)]

788 mocoeffs = [numpy.zeros((self.nbasis, self.nbasis), "d")]

789

790 for spin in range(len(self.moenergies)):

791

792 if spin == 1:

793 self.skip_line(inputfile, 'blank')

794 mocoeffs.append(numpy.zeros((self.nbasis, self.nbasis), "d"))

795

796 for i in range(0, self.nbasis, 6):

797

798 self.updateprogress(inputfile, "Coefficients")

799

800 self.skip_lines(inputfile, ['numbers', 'energies', 'occs'])

801 dashes = next(inputfile)

802

803 for j in range(self.nbasis):

804 line = next(inputfile)

805

806 # Only need this in the first iteration.

807 if spin == 0 and i == 0:

808 atomname = line[3:5].split()[0]

809 num = int(line[0:3])

810 orbital = line.split()[1].upper()

811

812 aonames.append("%s%i_%s" % (atomname, num+1, orbital))

813 atombasis[num].append(j)

814

815 # This regex will tease out all number with exactly

816 # six digits after the decimal point.

817 coeffs = re.findall(r'-?\d+\.\d{6}', line)

818

819 # Something is very wrong if this does not hold.

820 assert len(coeffs) <= 6

821

822 mocoeffs[spin][i:i+len(coeffs), j] = [float(c) for c in coeffs]

823

824 self.set_attribute('aonames', aonames)

825 self.set_attribute('atombasis', atombasis)

826 self.set_attribute("mocoeffs", mocoeffs)

827

828 # Basis set information

829 # ORCA prints this out in a somewhat indirect fashion.

830 # Therefore, parsing occurs in several steps:

831 # 1. read which atom belongs to which basis set group

832 if line[0:21] == "BASIS SET INFORMATION":

833 line = next(inputfile)

834 line = next(inputfile)

835

836 self.tmp_atnames = [] # temporary attribute, needed later

837 while(not line[0:5] == '-----'):

838 if line[0:4] == "Atom":

839 self.tmp_atnames.append(line[8:12].strip())

840 line = next(inputfile)

841

842 # 2. Read information for the basis set groups

843 if line[0:25] == "BASIS SET IN INPUT FORMAT":

844 line = next(inputfile)

845 line = next(inputfile)

846

847 # loop over basis set groups

848 gbasis_tmp = {}

849 while(not line[0:5] == '-----'):

850 if line[1:7] == 'NewGTO':

851 bas_atname = line.split()[1]

852 gbasis_tmp[bas_atname] = []

853

854 line = next(inputfile)

855 # loop over contracted GTOs

856 while(not line[0:6] == ' end;'):

857 words = line.split()

858 ang = words[0]

859 nprim = int(words[1])

860

861 # loop over primitives

862 coeff = []

863 for iprim in range(nprim):

864 words = next(inputfile).split()

865 coeff.append( (float(words[1]), float(words[2])) )

866 gbasis_tmp[bas_atname].append((ang, coeff))

867

868 line = next(inputfile)

869 line = next(inputfile)

870

871 # 3. Assign the basis sets to gbasis

872 self.gbasis = []

873 for bas_atname in self.tmp_atnames:

874 self.gbasis.append(gbasis_tmp[bas_atname])

875 del self.tmp_atnames

876

877 """

878 --------------------------

879 THERMOCHEMISTRY AT 298.15K

880 --------------------------

881

882 Temperature ... 298.15 K

883 Pressure ... 1.00 atm

884 Total Mass ... 130.19 AMU

885

886 Throughout the following assumptions are being made:

887 (1) The electronic state is orbitally nondegenerate

888 ...

889

890 freq. 45.75 E(vib) ... 0.53

891 freq. 78.40 E(vib) ... 0.49

892 ...

893

894

895 ------------

896 INNER ENERGY

897 ------------

898

899 The inner energy is: U= E(el) + E(ZPE) + E(vib) + E(rot) + E(trans)

900 E(el) - is the total energy from the electronic structure calc

901 ...

902

903 Summary of contributions to the inner energy U:

904 Electronic energy ... -382.05075804 Eh

905 ...

906 """

907 if line.strip().startswith('THERMOCHEMISTRY AT'):

908

909 self.skip_lines(inputfile, ['dashes', 'blank'])

910 self.temperature = float(next(inputfile).split()[2])

911 self.pressure = float(next(inputfile).split()[2])

912 total_mass = float(next(inputfile).split()[3])

913

914 # Vibrations, rotations, and translations

915 line = next(inputfile)

916 while line[:17] != 'Electronic energy':

917 line = next(inputfile)

918 self.electronic_energy = float(line.split()[3])

919 self.zpe = float(next(inputfile).split()[4])

920 thermal_vibrational_correction = float(next(inputfile).split()[4])

921 thermal_rotional_correction = float(next(inputfile).split()[4])

922 thermal_translational_correction = float(next(inputfile).split()[4])

923 self.skip_lines(inputfile, ['dashes'])

924 total_thermal_energy = float(next(inputfile).split()[3])

925

926 # Enthalpy

927 while line[:17] != 'Total free energy':

928 line = next(inputfile)

929 thermal_enthalpy_correction = float(next(inputfile).split()[4])

930 next(inputfile)

931

932 # For a single atom, ORCA provides the total free energy or inner energy

933 # which includes a spurious vibrational correction (see #817 for details).

934 if self.natom > 1:

935 self.enthalpy = float(next(inputfile).split()[3])

936 else:

937 self.enthalpy = self.electronic_energy + thermal_translational_correction

938

939 # Entropy

940 while line[:18] != 'Electronic entropy':

941 line = next(inputfile)

942 electronic_entropy = float(line.split()[3])

943 vibrational_entropy = float(next(inputfile).split()[3])

944 rotational_entropy = float(next(inputfile).split()[3])

945 translational_entropy = float(next(inputfile).split()[3])

946 self.skip_lines(inputfile, ['dashes'])

947

948 # ORCA prints -inf for single atom entropy.

949 if self.natom > 1:

950 self.entropy = float(next(inputfile).split()[4])

951 else:

952 self.entropy = (electronic_entropy + translational_entropy) / self.temperature

953

954 while (line[:25] != 'Final Gibbs free enthalpy') and (line[:23] != 'Final Gibbs free energy'):

955 line = next(inputfile)

956 self.skip_lines(inputfile, ['dashes'])

957

958

959 # ORCA prints -inf for sinle atom free energy.

960 if self.natom > 1:

961 self.freeenergy = float(line.split()[5])

962 else:

963 self.freeenergy = self.enthalpy - self.temperature * self.entropy

964

965 # Read TDDFT information

966 if any(x in line for x in ("TD-DFT/TDA EXCITED", "TD-DFT EXCITED")):

967 # Could be singlets or triplets

968 if line.find("SINGLETS") >= 0:

969 sym = "Singlet"

970 elif line.find("TRIPLETS") >= 0:

971 sym = "Triplet"

972 else:

973 sym = "Not specified"

974

975 etsecs = []

976 etenergies = []

977 etsyms = []

978

979 lookup = {'a': 0, 'b': 1}

980 line = next(inputfile)

981 while line.find("STATE") < 0:

982 line = next(inputfile)

983 # Contains STATE or is blank

984 while line.find("STATE") >= 0:

985 broken = line.split()

986 etenergies.append(float(broken[-2]))

987 etsyms.append(sym)

988 line = next(inputfile)

989 sec = []

990 # Contains SEC or is blank

991 while line.strip():

992 start = line[0:8].strip()

993 start = (int(start[:-1]), lookup[start[-1]])

994 end = line[10:17].strip()

995 end = (int(end[:-1]), lookup[end[-1]])

996 # Coeffients are not printed for RPA, only

997 # TDA/CIS.

998 contrib = line[35:47].strip()

999 try:

1000 contrib = float(contrib)

1001 except ValueError:

1002 contrib = numpy.nan

1003 sec.append([start, end, contrib])

1004 line = next(inputfile)

1005 etsecs.append(sec)

1006 line = next(inputfile)

1007

1008 self.extend_attribute('etenergies', etenergies)

1009 self.extend_attribute('etsecs', etsecs)

1010 self.extend_attribute('etsyms', etsyms)

1011

1012 # Parse the various absorption spectra for TDDFT and ROCIS.

1013 if 'ABSORPTION SPECTRUM' in line or 'ELECTRIC DIPOLE' in line:

1014 # CASSCF has an anomalous printing of ABSORPTION SPECTRUM.

1015 if line[:-1] == 'ABSORPTION SPECTRUM':

1016 return

1017

1018 line = line.strip()

1019

1020 # Standard header, occasionally changes

1021 header = ['d', 'header', 'header', 'd']

1022

1023 def energy_intensity(line):

1024 """ TDDFT and related methods standard method of output

1025-----------------------------------------------------------------------------

1026 ABSORPTION SPECTRUM VIA TRANSITION ELECTRIC DIPOLE MOMENTS

1027-----------------------------------------------------------------------------

1028State Energy Wavelength fosc T2 TX TY TZ

1029 (cm-1) (nm) (au**2) (au) (au) (au)

1030-----------------------------------------------------------------------------

1031 1 5184116.7 1.9 0.040578220 0.00258 -0.05076 -0.00000 -0.00000

1032"""

1033 try:

1034 state, energy, wavelength, intensity, t2, tx, ty, tz = line.split()

1035 except ValueError as e:

1036 # Must be spin forbidden and thus no intensity

1037 energy = line.split()[1]

1038 intensity = 0

1039 return energy, intensity

1040

1041 # Check for variations

1042 if line == 'COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM' or \

1043 line == 'COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM (origin adjusted)':

1044 def energy_intensity(line):

1045 """ TDDFT with DoQuad == True

1046------------------------------------------------------------------------------------------------------

1047 COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM

1048------------------------------------------------------------------------------------------------------

1049State Energy Wavelength D2 m2 Q2 D2+m2+Q2 D2/TOT m2/TOT Q2/TOT

1050 (cm-1) (nm) (*1e6) (*1e6)

1051------------------------------------------------------------------------------------------------------

1052 1 61784150.6 0.2 0.00000 0.00000 3.23572 0.00000323571519 0.00000 0.00000 1.00000

1053"""

1054 state, energy, wavelength, d2, m2, q2, intensity, d2_contrib, m2_contrib, q2_contrib = line.split()

1055 return energy, intensity

1056

1057 elif line == 'COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM (Origin Independent, Length Representation)':

1058 def energy_intensity(line):

1059 """ TDDFT with doQuad == True (Origin Independent Length Representation)

1060-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

1061 COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM (Origin Independent, Length Representation)

1062-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

1063State Energy Wavelength D2 m2 Q2 DM DO D2+m2+Q2+DM+DO D2/TOT m2/TOT Q2/TOT DM/TOT DO/TOT

1064 (cm-1) (nm) (*1e6) (*1e6) (*1e6) (*1e6)

1065-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

1066 1 61784150.6 0.2 0.00000 0.00000 3.23572 0.00000 0.00000 0.00000323571519 0.00000 0.00000 1.00000 0.00000 0.00000

1067 2 61793079.3 0.2 0.00000 0.00000 2.85949 0.00000 -0.00000 0.00000285948800 0.00000 0.00000 1.00000 0.00000 -0.00000

1068"""

1069 vals = line.split()

1070 if len(vals) < 14:

1071 return vals[1], 0

1072 return vals[1], vals[8]

1073

1074 elif line[:5] == 'X-RAY' and \

1075 (line[6:23] == 'EMISSION SPECTRUM' or line[6:25] == 'ABSORPTION SPECTRUM'):

1076 def energy_intensity(line):

1077 """ X-Ray from XES (emission or absorption, electric or velocity dipole moments)

1078-------------------------------------------------------------------------------------

1079 X-RAY ABSORPTION SPECTRUM VIA TRANSITION ELECTRIC DIPOLE MOMENTS

1080-------------------------------------------------------------------------------------

1081 Transition Energy INT TX TY TZ

1082 (eV) (normalized) (au) (au) (au)

1083-------------------------------------------------------------------------------------

1084 1 90a -> 0a 8748.824 0.000002678629 0.00004 -0.00001 0.00003

1085"""

1086 state, start, arrow, end, energy, intensity, tx, ty, tz = line.split()

1087 return energy, intensity

1088

1089 elif line[:70] == 'COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE X-RAY':

1090 header = ['header', 'd', 'header', 'd', 'header', 'header', 'd']

1091 def energy_intensity(line):

1092 """ XAS with quadrupole (origin adjusted)

1093-------------------------------------------------------------------------------------------------------------------------------

1094 COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE X-RAY ABSORPTION SPECTRUM

1095 (origin adjusted)

1096-------------------------------------------------------------------------------------------------------------------------------

1097 INT (normalized)

1098 ---------------------------------------------------------

1099 Transition Energy D2 M2 Q2 D2+M2+Q2 D2/TOT M2/TOT Q2/TOT

1100 (eV) (*1e6) (*1e6)

1101-------------------------------------------------------------------------------------------------------------------------------

1102 1 90a -> 0a 8748.824 0.000000 0.000292 0.003615 0.000000027512 0.858012 0.010602 0.131386

1103"""

1104 state, start, arrow, end, energy, d2, m2, q2, intensity, d2_contrib, m2_contrib, q2_contrib = line.split()

1105 return energy, intensity

1106

1107 elif line[:55] == 'SPIN ORBIT CORRECTED ABSORPTION SPECTRUM VIA TRANSITION':

1108 def energy_intensity(line):

1109 """ ROCIS dipole approximation with SOC == True (electric or velocity dipole moments)

1110-------------------------------------------------------------------------------

1111SPIN ORBIT CORRECTED ABSORPTION SPECTRUM VIA TRANSITION ELECTRIC DIPOLE MOMENTS

1112-------------------------------------------------------------------------------

1113States Energy Wavelength fosc T2 TX TY TZ

1114 (cm-1) (nm) (au**2) (au) (au) (au)

1115-------------------------------------------------------------------------------

1116 0 1 0.0 0.0 0.000000000 0.00000 0.00000 0.00000 0.00000

1117 0 2 5184116.4 1.9 0.020288451 0.00258 0.05076 0.00003 0.00000

1118"""

1119 state, state2, energy, wavelength, intensity, t2, tx, ty, tz = line.split()

1120 return energy, intensity

1121

1122 elif line[:79] == 'ROCIS COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM' \

1123 or line[:87] == 'SOC CORRECTED COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM':

1124 def energy_intensity(line):

1125 """ ROCIS with DoQuad = True and SOC = True (also does origin adjusted)

1126------------------------------------------------------------------------------------------------------

1127 ROCIS COMBINED ELECTRIC DIPOLE + MAGNETIC DIPOLE + ELECTRIC QUADRUPOLE SPECTRUM

1128------------------------------------------------------------------------------------------------------

1129States Energy Wavelength D2 m2 Q2 D2+m2+Q2 D2/TOT m2/TOT Q2/TOT

1130 (cm-1) (nm) (*1e6) (*1e6) (*population)

1131------------------------------------------------------------------------------------------------------

1132 0 1 0.0 0.0 0.00000 0.00000 0.00000 0.00000000000000 0.00000 0.00000 0.00000

1133 0 2 669388066.6 0.0 0.00000 0.00000 0.00876 0.00000000437784 0.00000 0.00000 1.00000

1134"""

1135 state, state2, energy, wavelength, d2, m2, q2, intensity, d2_contrib, m2_contrib, q2_contrib = line.split()

1136 return energy, intensity

1137

1138 # Clashes with Orca 2.6 (and presumably before) TDDFT absorption spectrum printing

1139 elif line == 'ABSORPTION SPECTRUM' and \

1140 parse_version(self.metadata['package_version']).release > (2, 6):

1141 def energy_intensity(line):

1142 """ CASSCF absorption spectrum

1143------------------------------------------------------------------------------------------

1144 ABSORPTION SPECTRUM

1145------------------------------------------------------------------------------------------

1146 States Energy Wavelength fosc T2 TX TY TZ

1147 (cm-1) (nm) (D**2) (D) (D) (D)

1148------------------------------------------------------------------------------------------

1149 0( 0)-> 1( 0) 1 83163.2 120.2 0.088250385 2.25340 0.00000 0.00000 1.50113

1150"""

1151 reg = r'(\d+)$ ?(\d+)$-> ?(\d+)$ ?(\d+)$ (\d+)'+ r'\s+(\d+\.\d+)'*4 + r'\s+(-?\d+\.\d+)'*3

1152 res = re.search(reg, line)

1153 jstate, jblock, istate, iblock, mult, energy, wavelength, intensity, t2, tx, ty, tz = res.groups()

1154 return energy, intensity

1155

1156 name = line

1157 self.skip_lines(inputfile, header)

1158

1159 if not hasattr(self, 'transprop'):

1160 self.transprop = {}

1161

1162 etenergies = []

1163 etoscs = []

1164 line = next(inputfile)

1165 # The sections are occasionally ended with dashed lines

1166 # other times they are blank (other than a new line)

1167 while len(line.strip('-')) > 2:

1168 energy, intensity = energy_intensity(line)

1169 etenergies.append(float(energy))

1170 etoscs.append(float(intensity))

1171

1172 line = next(inputfile)

1173

1174 self.set_attribute('etenergies', etenergies)

1175 self.set_attribute('etoscs', etoscs)

1176 self.transprop[name] = (numpy.asarray(etenergies), numpy.asarray(etoscs))

1177

1178 if line.strip() == "CD SPECTRUM":

1179 # -------------------------------------------------------------------

1180 # CD SPECTRUM

1181 # -------------------------------------------------------------------

1182 # State Energy Wavelength R MX MY MZ

1183 # (cm-1) (nm) (1e40*cgs) (au) (au) (au)

1184 # -------------------------------------------------------------------

1185 # 1 43167.6 231.7 0.00000 0.00000 -0.00000 0.00000

1186 #

1187 etenergies = []

1188 etrotats = []

1189 self.skip_lines(inputfile, ["d", "State Energy Wavelength", "(cm-1) (nm)", "d"])

1190 line = next(inputfile)

1191 while line.strip():

1192 tokens = line.split()

1193 if "spin forbidden" in line:

1194 etrotat, mx, my, mz = 0.0, 0.0, 0.0, 0.0

1195 else:

1196 etrotat, mx, my, mz = [utils.float(t) for t in tokens[3:]]

1197 etenergies.append(utils.float(tokens[1]))

1198 etrotats.append(etrotat)

1199 line = next(inputfile)

1200 self.set_attribute("etrotats", etrotats)

1201 if not hasattr(self, "etenergies"):

1202 self.logger.warning("etenergies not parsed before ECD section, "

1203 "the output file may be malformed")

1204 self.set_attribute("etenergies", etenergies)

1205

1206 if line[:23] == "VIBRATIONAL FREQUENCIES":

1207 self.skip_lines(inputfile, ['d', 'b'])

1208

1209 # Starting with 4.1, a scaling factor for frequencies is printed

1210 if float(self.metadata["package_version"][:3]) > 4.0:

1211 self.skip_lines(inputfile, ['Scaling factor for frequencies', 'b'])

1212

1213 if self.natom > 1:

1214 vibfreqs = numpy.zeros(3 * self.natom)

1215 for i, line in zip(range(3 * self.natom), inputfile):

1216 vibfreqs[i] = float(line.split()[1])

1217

1218 nonzero = numpy.nonzero(vibfreqs)[0]

1219 self.first_mode = nonzero[0]

1220 # Take all modes after first

1221 # Mode between imaginary and real modes could be 0

1222 self.num_modes = 3*self.natom - self.first_mode

1223 if self.num_modes > 3*self.natom - 6:

1224 msg = "Modes corresponding to rotations/translations may be non-zero."

1225 if self.num_modes == 3*self.natom - 5:

1226 msg += '\n You can ignore this if the molecule is linear.'

1227 self.set_attribute('vibfreqs', vibfreqs[self.first_mode:])

1228 else:

1229 # we have a single atom

1230 self.set_attribute('vibfreqs', numpy.array([]))

1231

1232 # NORMAL MODES

1233 # ------------

1234 #

1235 # These modes are the cartesian displacements weighted by the diagonal matrix

1236 # M(i,i)=1/sqrt(m[i]) where m[i] is the mass of the displaced atom

1237 # Thus, these vectors are normalized but *not* orthogonal

1238 #

1239 # 0 1 2 3 4 5

1240 # 0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000

1241 # 1 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000

1242 # 2 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000

1243 # ...

1244 if line[:12] == "NORMAL MODES":

1245 if self.natom > 1:

1246 all_vibdisps = numpy.zeros((3 * self.natom, self.natom, 3), "d")

1247

1248 self.skip_lines(inputfile, ['d', 'b', 'text', 'text', 'text', 'b'])

1249

1250 for mode in range(0, 3 * self.natom, 6):

1251 header = next(inputfile)

1252 for atom in range(self.natom):

1253 all_vibdisps[mode:mode + 6, atom, 0] = next(inputfile).split()[1:]

1254 all_vibdisps[mode:mode + 6, atom, 1] = next(inputfile).split()[1:]

1255 all_vibdisps[mode:mode + 6, atom, 2] = next(inputfile).split()[1:]

1256

1257 self.set_attribute('vibdisps', all_vibdisps[self.first_mode:])

1258 else:

1259 # we have a single atom

1260 self.set_attribute('vibdisps', numpy.array([]))

1261

1262 # -----------

1263 # IR SPECTRUM

1264 # -----------

1265 #

1266 # Mode freq (cm**-1) T**2 TX TY TZ

1267 # -------------------------------------------------------------------

1268 # 6: 2069.36 1.674341 ( -0.000000 0.914970 -0.914970)

1269 # 7: 3978.81 76.856228 ( 0.000000 6.199041 -6.199042)

1270 # 8: 4113.34 61.077784 ( -0.000000 5.526201 5.526200)

1271 if line[:11] == "IR SPECTRUM":

1272 self.skip_lines(inputfile, ['d', 'b', 'header', 'd'])

1273

1274 if self.natom > 1:

1275 all_vibirs = numpy.zeros((3 * self.natom,), "d")

1276

1277 line = next(inputfile)

1278 while len(line) > 2:

1279 num = int(line[0:4])

1280 all_vibirs[num] = float(line.split()[2])

1281 line = next(inputfile)

1282

1283 self.set_attribute('vibirs', all_vibirs[self.first_mode:])

1284 else:

1285 # we have a single atom

1286 self.set_attribute('vibirs', numpy.array([]))

1287

1288 # --------------

1289 # RAMAN SPECTRUM

1290 # --------------

1291 #

1292 # Mode freq (cm**-1) Activity Depolarization

1293 # -------------------------------------------------------------------

1294 # 6: 296.23 5.291229 0.399982

1295 # 7: 356.70 0.000000 0.749764

1296 # 8: 368.27 0.000000 0.202068

1297 if line[:14] == "RAMAN SPECTRUM":

1298 self.skip_lines(inputfile, ['d', 'b', 'header', 'd'])

1299

1300 if self.natom > 1:

1301 all_vibramans = numpy.zeros(3 * self.natom)

1302

1303 line = next(inputfile)

1304 while len(line) > 2:

1305 num = int(line[0:4])

1306 all_vibramans[num] = float(line.split()[2])

1307 line = next(inputfile)

1308

1309 self.set_attribute('vibramans', all_vibramans[self.first_mode:])

1310 else:

1311 # we have a single atom

1312 self.set_attribute('vibramans', numpy.array([]))

1313

1314 # ORCA will print atomic charges along with the spin populations,

1315 # so care must be taken about choosing the proper column.

1316 # Population analyses are performed usually only at the end

1317 # of a geometry optimization or other run, so we want to

1318 # leave just the final atom charges.

1319 # Here is an example for Mulliken charges:

1320 # --------------------------------------------

1321 # MULLIKEN ATOMIC CHARGES AND SPIN POPULATIONS

1322 # --------------------------------------------

1323 # 0 H : 0.126447 0.002622

1324 # 1 C : -0.613018 -0.029484

1325 # 2 H : 0.189146 0.015452

1326 # 3 H : 0.320041 0.037434

1327 # ...

1328 # Sum of atomic charges : -0.0000000

1329 # Sum of atomic spin populations: 1.0000000

1330 if line[:23] == "MULLIKEN ATOMIC CHARGES":

1331 self.parse_charge_section(line, inputfile, 'mulliken')

1332 # Things are the same for Lowdin populations, except that the sums

1333 # are not printed (there is a blank line at the end).

1334 if line[:22] == "LOEWDIN ATOMIC CHARGES":

1335 self.parse_charge_section(line, inputfile, 'lowdin')

1336 #CHELPG Charges

1337 #--------------------------------

1338 # 0 C : 0.363939

1339 # 1 H : 0.025695

1340 # ...

1341 #--------------------------------

1342 #Total charge: -0.000000

1343 #--------------------------------

1344 if line.startswith('CHELPG Charges'):

1345 self.parse_charge_section(line, inputfile, 'chelpg')

1346

1347 # It is not stated explicitely, but the dipole moment components printed by ORCA

1348 # seem to be in atomic units, so they will need to be converted. Also, they

1349 # are most probably calculated with respect to the origin .

1350 #

1351 # -------------

1352 # DIPOLE MOMENT

1353 # -------------

1354 # X Y Z

1355 # Electronic contribution: 0.00000 -0.00000 -0.00000

1356 # Nuclear contribution : 0.00000 0.00000 0.00000

1357 # -----------------------------------------

1358 # Total Dipole Moment : 0.00000 -0.00000 -0.00000

1359 # -----------------------------------------

1360 # Magnitude (a.u.) : 0.00000

1361 # Magnitude (Debye) : 0.00000

1362 #

1363 if line.strip() == "DIPOLE MOMENT":

1364

1365 self.skip_lines(inputfile, ['d', 'XYZ', 'electronic', 'nuclear', 'd'])

1366 total = next(inputfile)

1367 assert "Total Dipole Moment" in total

1368

1369 reference = [0.0, 0.0, 0.0]

1370 dipole = numpy.array([float(d) for d in total.split()[-3:]])

1371 dipole = utils.convertor(dipole, "ebohr", "Debye")

1372

1373 if not hasattr(self, 'moments'):

1374 self.set_attribute('moments', [reference, dipole])

1375 else:

1376 try:

1377 assert numpy.all(self.moments[1] == dipole)

1378 except AssertionError:

1379 self.logger.warning('Overwriting previous multipole moments with new values')

1380 self.set_attribute('moments', [reference, dipole])

1381

1382 if "Molecular Dynamics Iteration" in line:

1383 self.skip_lines(inputfile, ['d', 'ORCA MD', 'd', 'New Coordinates'])

1384 line = next(inputfile)

1385 tokens = line.split()

1386 assert tokens[0] == "time"

1387 time = utils.convertor(float(tokens[2]), "time_au", "fs")

1388 self.append_attribute('time', time)

1389

1390 # Static polarizability.

1391 if line.strip() == "THE POLARIZABILITY TENSOR":

1392 if not hasattr(self, 'polarizabilities'):

1393 self.polarizabilities = []

1394 self.skip_lines(inputfile, ['d', 'b'])

1395 line = next(inputfile)

1396 assert line.strip() == "The raw cartesian tensor (atomic units):"

1397 polarizability = []

1398 for _ in range(3):

1399 line = next(inputfile)

1400 polarizability.append(line.split())

1401 self.polarizabilities.append(numpy.array(polarizability))

1402

1403 if line.strip() == 'ORCA-CASSCF':

1404 # -------------------------------------------------------------------------------

1405 # ORCA-CASSCF

1406 # -------------------------------------------------------------------------------

1407 #

1408 # Symmetry handling UseSym ... ON

1409 # Point group ... C2

1410 # Used point group ... C2

1411 # Number of irreps ... 2

1412 # Irrep A has 10 SALCs (ofs= 0) #(closed)= 0 #(active)= 2

1413 # Irrep B has 10 SALCs (ofs= 10) #(closed)= 0 #(active)= 2

1414 # Symmetries of active orbitals:

1415 # MO = 0 IRREP= 0 (A)

1416 # MO = 1 IRREP= 1 (B)

1417 self.skip_lines(inputfile, ['d', 'b'])

1418 vals = next(inputfile).split()

1419 # Symmetry section is only printed if symmetry is used.

1420 if vals[0] == 'Symmetry':

1421 assert vals[-1] == 'ON'

1422 point_group = next(inputfile).split()[-1]

1423 used_point_group = next(inputfile).split()[-1]

1424 num_irreps = int(next(inputfile).split()[-1])

1425 num_active = 0

1426 # Parse the irreps.

1427 for i, line in zip(range(num_irreps), inputfile):

1428 reg = r'Irrep\s+(\w+) has\s+(\d+) SALCs $ofs=\s*(\d+)$ #$closed$=\s*(\d+) #$active$=\s*(\d+)'

1429 groups = re.search(reg, line).groups()

1430 irrep = groups[0]

1431 salcs, ofs, closed, active = map(int, groups[1:])

1432 num_active += active

1433 self.skip_line(inputfile, 'Symmetries')

1434 # Parse the symmetries of the active orbitals.

1435 for i, line in zip(range(num_active), inputfile):

1436 reg = r'(\d+) IRREP= (\d+) $(\w+)$'

1437 groups = re.search(reg, line).groups()

1438 mo, irrep_idx, irrep = groups

1439

1440 # Skip until the system specific settings.

1441 # This will align the cases of symmetry on and off.

1442 line = next(inputfile)

1443 while line[:25] != 'SYSTEM-SPECIFIC SETTINGS:':

1444 line = next(inputfile)

1445

1446 # SYSTEM-SPECIFIC SETTINGS:

1447 # Number of active electrons ... 4

1448 # Number of active orbitals ... 4

1449 # Total number of electrons ... 4

1450 # Total number of orbitals ... 20

1451 num_el = int(next(inputfile).split()[-1])

1452 num_orbs = int(next(inputfile).split()[-1])

1453 total_el = int(next(inputfile).split()[-1])

1454 total_orbs = int(next(inputfile).split()[-1])

1455

1456 # Determined orbital ranges:

1457 # Internal 0 - -1 ( 0 orbitals)

1458 # Active 0 - 3 ( 4 orbitals)

1459 # External 4 - 19 ( 16 orbitals)

1460 self.skip_lines(inputfile, ['b', 'Determined'])

1461 orbital_ranges = []

1462 # Parse the orbital ranges for: Internal, Active, and External orbitals.

1463 for i in range(3):

1464 vals = next(inputfile).split()

1465 start, end, num = int(vals[1]), int(vals[3]), int(vals[5])

1466 # Change from inclusive to exclusive in order to match python.

1467 end = end + 1

1468 assert end - start == num

1469 orbital_ranges.append((start, end, num))

1470

1471 line = next(inputfile)

1472 while line[:8] != 'CI-STEP:':

1473 line = next(inputfile)

1474

1475 # CI-STEP:

1476 # CI strategy ... General CI

1477 # Number of symmetry/multplity blocks ... 1

1478 # BLOCK 1 WEIGHT= 1.0000

1479 # Multiplicity ... 1

1480 # Irrep ... 0 (A)

1481 # #(Configurations) ... 11

1482 # #(CSFs) ... 12

1483 # #(Roots) ... 1

1484 # ROOT=0 WEIGHT= 1.000000

1485 self.skip_line(inputfile, 'CI strategy')

1486 num_blocks = int(next(inputfile).split()[-1])

1487 for b in range(1, num_blocks + 1):

1488 vals = next(inputfile).split()

1489 block = int(vals[1])

1490 weight = float(vals[3])

1491 assert b == block

1492 mult = int(next(inputfile).split()[-1])

1493 vals = next(inputfile).split()

1494 # The irrep will only be printed if using symmetry.

1495 if vals[0] == 'Irrep':

1496 irrep_idx = int(vals[-2])

1497 irrep = vals[-1].strip('()')

1498 vals = next(inputfile).split()

1499 num_confs = int(vals[-1])

1500 num_csfs = int(next(inputfile).split()[-1])

1501 num_roots = int(next(inputfile).split()[-1])

1502 # Parse the roots.

1503 for r, line in zip(range(num_roots), inputfile):

1504 reg = r'=(\d+) WEIGHT=\s*(\d\.\d+)'

1505 groups = re.search(reg, line).groups()

1506 root = int(groups[0])

1507 weight = float(groups[1])

1508 assert r == root

1509

1510 # Skip additional setup printing and CASSCF iterations.

1511 line = next(inputfile).strip()

1512 while line != 'CASSCF RESULTS':

1513 line = next(inputfile).strip()

1514

1515 # --------------

1516 # CASSCF RESULTS

1517 # --------------

1518 #

1519 # Final CASSCF energy : -14.597120777 Eh -397.2078 eV

1520 self.skip_lines(inputfile, ['d', 'b'])

1521 casscf_energy = float(next(inputfile).split()[4])

1522

1523 # This is only printed for first and last step of geometry optimization.

1524 # ----------------

1525 # ORBITAL ENERGIES

1526 # ----------------

1527 #

1528 # NO OCC E(Eh) E(eV) Irrep

1529 # 0 0.0868 0.257841 7.0162 1-A

1530 self.skip_lines(inputfile, ['b', 'd'])

1531 if next(inputfile).strip() == 'ORBITAL ENERGIES':

1532 self.skip_lines(inputfile, ['d', 'b', 'NO'])

1533 orbitals = []

1534 vals = next(inputfile).split()

1535 while vals:

1536 occ, eh, ev = map(float, vals[1:4])

1537 # The irrep will only be printed if using symmetry.

1538 if len(vals) == 5:

1539 idx, irrep = vals[4].split('-')

1540 orbitals.append((occ, ev, int(idx), irrep))

1541 else:

1542 orbitals.append((occ, ev))

1543 vals = next(inputfile).split()

1544 self.skip_lines(inputfile, ['b', 'd'])

1545

1546 # Orbital Compositions

1547 # ---------------------------------------------

1548 # CAS-SCF STATES FOR BLOCK 1 MULT= 1 IRREP= Ag NROOTS= 2

1549 # ---------------------------------------------

1550 #

1551 # ROOT 0: E= -14.5950507665 Eh

1552 # 0.89724 [ 0]: 2000

1553 for b in range(num_blocks):

1554 # Parse the block data.

1555 reg = r'BLOCK\s+(\d+) MULT=\s*(\d+) (IRREP=\s*\w+ )?(NROOTS=\s*(\d+))?'

1556 groups = re.search(reg, next(inputfile)).groups()

1557 block = int(groups[0])

1558 mult = int(groups[1])

1559 # The irrep will only be printed if using symmetry.

1560 if groups[2] is not None:

1561 irrep = groups[2].split('=')[1].strip()

1562 nroots = int(groups[3].split('=')[1])

1563

1564 self.skip_lines(inputfile, ['d', 'b'])

1565

1566 line = next(inputfile).strip()

1567 while line:

1568 if line[:4] == 'ROOT':

1569 # Parse the root section.

1570 reg = r'(\d+):\s*E=\s*(-?\d+.\d+) Eh(\s+\d+\.\d+ eV)?(\s+\d+\.\d+)?'

1571 groups = re.search(reg, line).groups()

1572 root = int(groups[0])

1573 energy = float(groups[1])

1574 # Excitation energies are only printed for excited state roots.

1575 if groups[2] is not None:

1576 excitation_energy_ev = float(groups[2].split()[0])

1577 excitation_energy_cm = float(groups[3])

1578 else:

1579 # Parse the occupations section.

1580 reg = r'(\d+\.\d+) \[\s*(\d+)\]: (\d+)'

1581 groups = re.search(reg, line).groups()

1582 coeff = float(groups[0])

1583 number = float(groups[1])

1584 occupations = list(map(int, groups[2]))

1585

1586 line = next(inputfile).strip()

1587

1588 # Skip any extended wavefunction printing.

1589 while line != 'DENSITY MATRIX':

1590 line = next(inputfile).strip()

1591

1592 self.skip_lines(inputfile, ['d', 'b'])

1593 # --------------

1594 # DENSITY MATRIX

1595 # --------------

1596 #

1597 # 0 1 2 3

1598 # 0 0.897244 0.000000 0.000000 0.000000

1599 # 1 0.000000 0.533964 0.000000 0.000000

1600 density = numpy.zeros((num_orbs, num_orbs))

1601 for i in range(0, num_orbs, 6):

1602 next(inputfile)

1603 for j, line in zip(range(num_orbs), inputfile):

1604 density[j][i:i + 6] = list(map(float, line.split()[1:]))

1605 self.skip_lines(inputfile, ['Trace', 'b', 'd'])

1606

1607 # This is only printed for open-shells.

1608 # -------------------

1609 # SPIN-DENSITY MATRIX

1610 # -------------------

1611 #

1612 # 0 1 2 3 4 5

1613 # 0 -0.003709 0.001410 0.000074 -0.000564 -0.007978 0.000735

1614 # 1 0.001410 -0.001750 -0.000544 -0.003815 0.008462 -0.004529

1615 if next(inputfile).strip() == 'SPIN-DENSITY MATRIX':

1616 self.skip_lines(inputfile, ['d', 'b'])

1617 spin_density = numpy.zeros((num_orbs, num_orbs))

1618 for i in range(0, num_orbs, 6):

1619 next(inputfile)

1620 for j, line in zip(range(num_orbs), inputfile):

1621 spin_density[j][i:i + 6] = list(map(float, line.split()[1:]))

1622 self.skip_lines(inputfile, ['Trace', 'b', 'd', 'ENERGY'])

1623 self.skip_lines(inputfile, ['d', 'b'])

1624

1625 # -----------------

1626 # ENERGY COMPONENTS

1627 # -----------------

1628 #

1629 # One electron energy : -18.811767801 Eh -511.8942 eV

1630 # Two electron energy : 4.367616074 Eh 118.8489 eV

1631 # Nuclear repuslion energy : 0.000000000 Eh 0.0000 eV

1632 # ----------------

1633 # -14.444151727

1634 #

1635 # Kinetic energy : 14.371970266 Eh 391.0812 eV

1636 # Potential energy : -28.816121993 Eh -784.1265 eV

1637 # Virial ratio : -2.005022378

1638 # ----------------

1639 # -14.444151727

1640 #

1641 # Core energy : -13.604678408 Eh -370.2021 eV

1642 one_el_energy = float(next(inputfile).split()[4])

1643 two_el_energy = float(next(inputfile).split()[4])

1644 nuclear_repulsion_energy = float(next(inputfile).split()[4])

1645 self.skip_line(inputfile, 'dashes')

1646 energy = float(next(inputfile).strip())

1647 self.skip_line(inputfile, 'blank')

1648 kinetic_energy = float(next(inputfile).split()[3])

1649 potential_energy = float(next(inputfile).split()[3])

1650 virial_ratio = float(next(inputfile).split()[3])

1651 self.skip_line(inputfile, 'dashes')

1652 energy = float(next(inputfile).strip())

1653 self.skip_line(inputfile, 'blank')

1654 core_energy = float(next(inputfile).split()[3])

1655

1656 if line[:15] == 'TOTAL RUN TIME:':

1657 self.metadata['success'] = True

1658

1659 def parse_charge_section(self, line, inputfile, chargestype):

1660 """Parse a charge section, modifies class in place

1661

1662 Parameters

1663 ----------

1664 line : str

1665 the line which triggered entry here

1666 inputfile : file

1667 handle to file object

1668 chargestype : str

1669 what type of charge we're dealing with, must be one of

1670 'mulliken', 'lowdin' or 'chelpg'

1671 """

1672 has_spins = 'AND SPIN POPULATIONS' in line

1673

1674 if not hasattr(self, "atomcharges"):

1675 self.atomcharges = {}

1676 if has_spins and not hasattr(self, "atomspins"):

1677 self.atomspins = {}

1678

1679 self.skip_line(inputfile, 'dashes')

1680

1681 # depending on chargestype, decide when to stop parsing lines

1682 # start, stop - indices for slicing lines and grabbing values

1683 if chargestype == 'mulliken':

1684 should_stop = lambda x: x.startswith('Sum of atomic charges')

1685 start, stop = 8, 20

1686 elif chargestype == 'lowdin':

1687 # stops when blank line encountered

1688 should_stop = lambda x: not bool(x.strip())

1689 start, stop = 8, 20

1690 elif chargestype == 'chelpg':

1691 should_stop = lambda x: x.startswith('---')

1692 start, stop = 11, 26

1693

1694 charges = []

1695 if has_spins:

1696 spins = []

1697

1698 line = next(inputfile)

1699 while not should_stop(line):

1700 # Don't add point charges or embedding potentials.

1701 if "Q :" not in line:

1702 charges.append(float(line[start:stop]))

1703 if has_spins:

1704 spins.append(float(line[stop:]))

1705 line = next(inputfile)

1706

1707 self.atomcharges[chargestype] = charges

1708 if has_spins:

1709 self.atomspins[chargestype] = spins

1710

1711 def parse_scf_condensed_format(self, inputfile, line):

1712 """ Parse the SCF convergence information in condensed format """

1713

1714 # This is what it looks like

1715 # ITER Energy Delta-E Max-DP RMS-DP [F,P] Damp

1716 # *** Starting incremental Fock matrix formation ***

1717 # 0 -384.5203638934 0.000000000000 0.03375012 0.00223249 0.1351565 0.7000

1718 # 1 -384.5792776162 -0.058913722842 0.02841696 0.00175952 0.0734529 0.7000

1719 # ***Turning on DIIS***

1720 # 2 -384.6074211837 -0.028143567475 0.04968025 0.00326114 0.0310435 0.0000

1721 # 3 -384.6479682063 -0.040547022616 0.02097477 0.00121132 0.0361982 0.0000

1722 # 4 -384.6571124353 -0.009144228947 0.00576471 0.00035160 0.0061205 0.0000

1723 # 5 -384.6574659959 -0.000353560584 0.00191156 0.00010160 0.0025838 0.0000

1724 # 6 -384.6574990782 -0.000033082375 0.00052492 0.00003800 0.0002061 0.0000

1725 # 7 -384.6575005762 -0.000001497987 0.00020257 0.00001146 0.0001652 0.0000

1726 # 8 -384.6575007321 -0.000000155848 0.00008572 0.00000435 0.0000745 0.0000

1727 # **** Energy Check signals convergence ****

1728

1729 assert line[2] == "Delta-E"

1730 assert line[3] == "Max-DP"

1731

1732 if not hasattr(self, "scfvalues"):

1733 self.scfvalues = []

1734

1735 self.scfvalues.append([])

1736

1737 # Try to keep track of the converger (NR, DIIS, SOSCF, etc.).

1738 diis_active = True

1739 while line:

1740

1741 maxDP = None

1742 if 'Newton-Raphson' in line:

1743 diis_active = False

1744 elif 'SOSCF' in line:

1745 diis_active = False

1746 elif line[0].isdigit():

1747 shim = 0

1748 try:

1749 energy = float(line[1])

1750 deltaE = float(line[2])

1751 maxDP = float(line[3 + int(not diis_active)])

1752 rmsDP = float(line[4 + int(not diis_active)])

1753 except ValueError as e:

1754 # Someone in Orca forgot to properly add spaces in the scf printing

1755 # code looks like:

1756 # %3i %17.10f%12.12f%11.8f %11.8f

1757 if line[1].count('.') == 2:

1758 integer1, decimal1_integer2, decimal2 = line[1].split('.')

1759 decimal1, integer2 = decimal1_integer2[:10], decimal1_integer2[10:]

1760 energy = float(integer1 + '.' + decimal1)

1761 deltaE = float(integer2 + '.' + decimal2)

1762 maxDP = float(line[2 + int(not diis_active)])

1763 rmsDP = float(line[3 + int(not diis_active)])

1764 elif line[1].count('.') == 3:

1765 integer1, decimal1_integer2, decimal2_integer3, decimal3 = line[1].split('.')

1766 decimal1, integer2 = decimal1_integer2[:10], decimal1_integer2[10:]

1767 decimal2, integer3 = decimal2_integer3[:12], decimal2_integer3[12:]

1768 energy = float(integer1 + '.' + decimal1)

1769 deltaE = float(integer2 + '.' + decimal2)

1770 maxDP = float(integer3 + '.' + decimal3)

1771 rmsDP = float(line[2 + int(not diis_active)])

1772 elif line[2].count('.') == 2:

1773 integer1, decimal1_integer2, decimal2 = line[2].split('.')

1774 decimal1, integer2 = decimal1_integer2[:12], decimal1_integer2[12:]

1775 deltaE = float(integer1 + '.' + decimal1)

1776 maxDP = float(integer2 + '.' + decimal2)

1777 rmsDP = float(line[3 + int(not diis_active)])

1778 else:

1779 raise e

1780

1781 self.scfvalues[-1].append([deltaE, maxDP, rmsDP])

1782

1783 try:

1784 line = next(inputfile).split()

1785 except StopIteration:

1786 self.logger.warning('File terminated before end of last SCF! Last Max-DP: {}'.format(maxDP))

1787 break

1788

1789 def parse_scf_expanded_format(self, inputfile, line):

1790 """ Parse SCF convergence when in expanded format. """

1791

1792 # The following is an example of the format

1793 # -----------------------------------------

1794 #

1795 # *** Starting incremental Fock matrix formation ***

1796 #

1797 # ----------------------------

1798 # ! ITERATION 0 !

1799 # ----------------------------

1800 # Total Energy : -377.960836651297 Eh

1801 # Energy Change : -377.960836651297 Eh

1802 # MAX-DP : 0.100175793695

1803 # RMS-DP : 0.004437973661

1804 # Actual Damping : 0.7000

1805 # Actual Level Shift : 0.2500 Eh

1806 # Int. Num. El. : 43.99982197 (UP= 21.99991099 DN= 21.99991099)

1807 # Exchange : -34.27550826

1808 # Correlation : -2.02540957

1809 #

1810 #

1811 # ----------------------------

1812 # ! ITERATION 1 !

1813 # ----------------------------

1814 # Total Energy : -378.118458080109 Eh

1815 # Energy Change : -0.157621428812 Eh

1816 # MAX-DP : 0.053240648588

1817 # RMS-DP : 0.002375092508

1818 # Actual Damping : 0.7000

1819 # Actual Level Shift : 0.2500 Eh

1820 # Int. Num. El. : 43.99994143 (UP= 21.99997071 DN= 21.99997071)

1821 # Exchange : -34.00291075

1822 # Correlation : -2.01607243

1823 #

1824 # ***Turning on DIIS***

1825 #

1826 # ----------------------------

1827 # ! ITERATION 2 !

1828 # ----------------------------

1829 # ....

1830 #

1831 if not hasattr(self, "scfvalues"):

1832 self.scfvalues = []

1833

1834 self.scfvalues.append([])

1835

1836 line = "Foo" # dummy argument to enter loop

1837 while line.find("******") < 0:

1838 try:

1839 line = next(inputfile)

1840 except StopIteration:

1841 self.logger.warning('File terminated before end of last SCF!')

1842 break

1843 info = line.split()

1844 if len(info) > 1 and info[1] == "ITERATION":

1845 dashes = next(inputfile)

1846 energy_line = next(inputfile).split()

1847 energy = float(energy_line[3])

1848 deltaE_line = next(inputfile).split()

1849 deltaE = float(deltaE_line[3])

1850 if energy == deltaE:

1851 deltaE = 0

1852 maxDP_line = next(inputfile).split()

1853 maxDP = float(maxDP_line[2])

1854 rmsDP_line = next(inputfile).split()

1855 rmsDP = float(rmsDP_line[2])

1856 self.scfvalues[-1].append([deltaE, maxDP, rmsDP])

1857

1858 return

1859

1860 # end of parse_scf_expanded_format

1861

1862 def _append_scfvalues_scftargets(self, inputfile, line):

1863 # The SCF convergence targets are always printed after this, but apparently

1864 # not all of them always -- for example the RMS Density is missing for geometry

1865 # optimization steps. So, assume the previous value is still valid if it is

1866 # not found. For additional certainty, assert that the other targets are unchanged.

1867 while not "Last Energy change" in line:

1868 line = next(inputfile)

1869 deltaE_value = float(line.split()[4])

1870 deltaE_target = float(line.split()[7])

1871 line = next(inputfile)

1872 if "Last MAX-Density change" in line:

1873 maxDP_value = float(line.split()[4])

1874 maxDP_target = float(line.split()[7])

1875 line = next(inputfile)

1876 if "Last RMS-Density change" in line:

1877 rmsDP_value = float(line.split()[4])

1878 rmsDP_target = float(line.split()[7])

1879 else:

1880 rmsDP_value = self.scfvalues[-1][-1][2]

1881 rmsDP_target = self.scftargets[-1][2]

1882 assert deltaE_target == self.scftargets[-1][0]

1883 assert maxDP_target == self.scftargets[-1][1]

1884 self.scfvalues[-1].append([deltaE_value, maxDP_value, rmsDP_value])

1885 self.scftargets.append([deltaE_target, maxDP_target, rmsDP_target])

Coverage for cclib/parser/orcaparser.py : 92%

915 statements

Coverage for cclib/parser/orcaparser.py : 92%

915 statements 844 run 71 missing 0 excluded

915 statements