Coverage for cclib/parser/qchemparser.py: 98%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1# -*- coding: utf-8 -*-

5# This file is part of cclib (http://cclib.github.io) and is distributed under

6# the terms of the BSD 3-Clause License.

8"""Parser for Q-Chem output files"""

10import itertools

11import math

12import re

14import numpy

16from cclib.parser import logfileparser

17from cclib.parser import utils

20class QChem(logfileparser.Logfile):

21 """A Q-Chem log file."""

23 def __init__(self, *args, **kwargs):

25 # Call the __init__ method of the superclass

26 super(QChem, self).__init__(logname="QChem", *args, **kwargs)

28 def __str__(self):

29 """Return a string representation of the object."""

30 return "QChem log file %s" % (self.filename)

32 def __repr__(self):

33 """Return a representation of the object."""

34 return 'QChem("%s")' % (self.filename)

36 def normalisesym(self, label):

37 """Q-Chem does not require normalizing symmetry labels."""

38 return label

40 def before_parsing(self):

42 # Keep track of whether or not we're performing an

43 # (un)restricted calculation.

44 self.unrestricted = False

45 self.is_rohf = False

47 # Keep track of whether or not this is a fragment calculation,

48 # so that only the supersystem is parsed.

49 self.is_fragment_section = False

50 # These headers identify when a fragment section is

51 # entered/exited.

52 self.fragment_section_headers = (

53 'Guess MOs from converged MOs on fragments',

54 'CP correction for fragment',

55 )

56 self.supersystem_section_headers = (

57 'Done with SCF on isolated fragments',

58 'Done with counterpoise correction on fragments',

59 )

61 # Compile the dashes-and-or-spaces-only regex.

62 self.re_dashes_and_spaces = re.compile(r'^[\s-]+$')

64 # Compile the regex for extracting the atomic index from an

65 # aoname.

66 self.re_atomindex = re.compile(r'(\d+)_')

68 # QChem changed the number of spaces from version 5.1 to 5.2

69 # D( 35) --> V( 3) amplitude = 0.0644

70 # S( 1) --> V( 1) amplitude = -0.1628 alpha

71 # D(189) --> S( 1) amplitude = -0.0120 beta

72 self.re_tddft = re.compile(r'[SD]$ *(\d+)$ --> [VS]$ *(\d+)$ amplitude = *([^ ]*)( (alpha|beta))?')

74 # A maximum of 6 columns per block when printing matrices. The

75 # Fock matrix is 4.

76 self.ncolsblock = 6

78 # By default, when asked to print orbitals via

79 # `scf_print`/`scf_final_print` and/or `print_orbitals`,

80 # Q-Chem will print all occupieds and the first 5 virtuals.

81 #

82 # When the number is set for `print_orbitals`, that section of

83 # the output will display (NOcc + that many virtual) MOs, but

84 # any other sections present due to

85 # `scf_print`/`scf_final_print` will still only display (NOcc

86 # + 5) MOs. It is the `print_orbitals` section that `aonames`

87 # is parsed from.

88 #

89 # Note that the (AO basis) density matrix is always (NBasis *

90 # NBasis)!

91 self.norbdisp_alpha = self.norbdisp_beta = 5

92 self.norbdisp_alpha_aonames = self.norbdisp_beta_aonames = 5

93 self.norbdisp_set = False

95 self.alpha_mo_coefficient_headers = (

96 'RESTRICTED (RHF) MOLECULAR ORBITAL COEFFICIENTS',

97 'ALPHA MOLECULAR ORBITAL COEFFICIENTS'

98 )

100 self.gradient_headers = (

101 'Full Analytical Gradient',

102 'Gradient of SCF Energy',

103 'Gradient of MP2 Energy',

104 )

105

106 self.hessian_headers = (

107 'Hessian of the SCF Energy',

108 'Final Hessian.',

109 )

110

111 self.wfn_method = [

112 'HF',

113 'MP2', 'RI-MP2', 'LOCAL_MP2', 'MP4',

114 'CCD', 'CCSD', 'CCSD(T)',

115 'QCISD', 'QCISD(T)'

116 ]

117

118 def after_parsing(self):

119

120 # If parsing a fragment job, each of the geometries appended to

121 # `atomcoords` may be of different lengths, which will prevent

122 # conversion from a list to NumPy array.

123 # Take the length of the first geometry as correct, and remove

124 # all others with different lengths.

125 if len(self.atomcoords) > 1:

126 correctlen = len(self.atomcoords[0])

127 self.atomcoords[:] = [coords for coords in self.atomcoords

128 if len(coords) == correctlen]

129 # At the moment, there is no similar correction for other properties!

130

131 # QChem does not print all MO coefficients by default, but rather

132 # up to HOMO+5. So, fill up the missing values with NaNs. If there are

133 # other cases where coefficient are missing, but different ones, this

134 # general afterthought might not be appropriate and the fix will

135 # need to be done while parsing.

136 if hasattr(self, 'mocoeffs'):

137 for im in range(len(self.mocoeffs)):

138 _nmo, _nbasis = self.mocoeffs[im].shape

139 if (_nmo, _nbasis) != (self.nmo, self.nbasis):

140 coeffs = numpy.empty((self.nmo, self.nbasis))

141 coeffs[:] = numpy.nan

142 coeffs[0:_nmo, 0:_nbasis] = self.mocoeffs[im]

143 self.mocoeffs[im] = coeffs

144

145 # When parsing the 'MOLECULAR ORBITAL COEFFICIENTS' block for

146 # `aonames`, Q-Chem doesn't print the principal quantum number

147 # for each shell; this needs to be added.

148 if hasattr(self, 'aonames') and hasattr(self, 'atombasis'):

149 angmom = ('', 'S', 'P', 'D', 'F', 'G', 'H', 'I')

150 for atom in self.atombasis:

151 bfcounts = dict()

152 for bfindex in atom:

153 atomname, bfname = self.aonames[bfindex].split('_')

154 # Keep track of how many times each shell type has

155 # appeared.

156 if bfname in bfcounts:

157 bfcounts[bfname] += 1

158 else:

159 # Make sure the starting number for type of

160 # angular momentum begins at the appropriate

161 # principal quantum number (1S, 2P, 3D, 4F,

162 # ...).

163 bfcounts[bfname] = angmom.index(bfname[0])

164 newbfname = '{}{}'.format(bfcounts[bfname], bfname)

165 self.aonames[bfindex] = '_'.join([atomname, newbfname])

166

167 # Assign the number of core electrons replaced by ECPs.

168 if hasattr(self, 'user_input') and self.user_input.get('rem') is not None:

169 if self.user_input['rem'].get('ecp') is not None:

170 ecp_is_gen = (self.user_input['rem']['ecp'] == 'gen')

171 if ecp_is_gen:

172 assert 'ecp' in self.user_input

173 has_iprint = hasattr(self, 'possible_ecps')

174

175 if not ecp_is_gen and not has_iprint:

176 msg = """ECPs are present, but the number of core \

177electrons isn't printed at all. Rerun with "iprint >= 100" to get \

178coreelectrons."""

179 self.logger.warning(msg)

180 self.incorrect_coreelectrons = True

181 elif ecp_is_gen and not has_iprint:

182 nmissing = sum(ncore == 0

183 for (_, _, ncore) in self.user_input['ecp'])

184 if nmissing > 1:

185 msg = """ECPs are present, but coreelectrons can only \

186be guessed for one element at most. Rerun with "iprint >= 100" to get \

187coreelectrons."""

188 self.logger.warning(msg)

189 self.incorrect_coreelectrons = True

190 elif self.user_input['molecule'].get('charge') is None:

191 msg = """ECPs are present, but the total charge \

192cannot be determined. Rerun without `$molecule read`."""

193 self.logger.warning(msg)

194 self.incorrect_coreelectrons = True

195 else:

196 user_charge = self.user_input['molecule']['charge']

197 # First, assign the entries given

198 # explicitly.

199 for entry in self.user_input['ecp']:

200 element, _, ncore = entry

201 if ncore > 0:

202 self._assign_coreelectrons_to_element(element, ncore)

203 # Because of how the charge is calculated

204 # during extract(), this is the number of

205 # remaining core electrons that need to be

206 # assigned ECP centers. Filter out the

207 # remaining entries, of which there should

208 # only be one.

209 core_sum = self.coreelectrons.sum() if hasattr(self, 'coreelectrons') else 0

210 remainder = self.charge - user_charge - core_sum

211 entries = [entry

212 for entry in self.user_input['ecp']

213 if entry[2] == 0]

214 if len(entries) != 0:

215 assert len(entries) == 1

216 element, _, ncore = entries[0]

217 assert ncore == 0

218 self._assign_coreelectrons_to_element(

219 element, remainder, ncore_is_total_count=True)

220 elif not ecp_is_gen and has_iprint:

221 atomsymbols = [self.table.element[atomno] for atomno in self.atomnos]

222 for i in range(self.natom):

223 if atomsymbols[i] in self.possible_ecps:

224 self.coreelectrons[i] = self.possible_ecps[atomsymbols[i]]

225 else:

226 assert ecp_is_gen and has_iprint

227 for entry in self.user_input['ecp']:

228 element, _, ncore = entry

229 # If ncore is non-zero, then it must be

230 # user-defined, and we take that

231 # value. Otherwise, look it up.

232 if ncore == 0:

233 ncore = self.possible_ecps[element]

234 self._assign_coreelectrons_to_element(element, ncore)

235

236 # Check to see if the charge is consistent with the input

237 # section. It may not be if using an ECP.

238 if hasattr(self, 'user_input'):

239 if self.user_input.get('molecule') is not None:

240 user_charge = self.user_input['molecule'].get('charge')

241 if user_charge is not None:

242 self.set_attribute('charge', user_charge)

243

244 def parse_charge_section(self, inputfile, chargetype):

245 """Parse the population analysis charge block."""

246 self.skip_line(inputfile, 'blank')

247 line = next(inputfile)

248 has_spins = False

249 if 'Spin' in line:

250 if not hasattr(self, 'atomspins'):

251 self.atomspins = dict()

252 has_spins = True

253 spins = []

254 self.skip_line(inputfile, 'dashes')

255 if not hasattr(self, 'atomcharges'):

256 self.atomcharges = dict()

257 charges = []

258 line = next(inputfile)

259

260 while list(set(line.strip())) != ['-']:

261 elements = line.split()

262 charge = utils.float(elements[2])

263 charges.append(charge)

264 if has_spins:

265 spin = utils.float(elements[3])

266 spins.append(spin)

267 line = next(inputfile)

268

269 self.atomcharges[chargetype] = numpy.array(charges)

270 if has_spins:

271 self.atomspins[chargetype] = numpy.array(spins)

272

273 @staticmethod

274 def parse_matrix(inputfile, nrows, ncols, ncolsblock):

275 """Q-Chem prints most matrices in a standard format; parse the matrix

276 into a NumPy array of the appropriate shape.

277 """

278 nparray = numpy.empty(shape=(nrows, ncols))

279 line = next(inputfile)

280 assert len(line.split()) == min(ncolsblock, ncols)

281 colcounter = 0

282 while colcounter < ncols:

283 # If the line is just the column header (indices)...

284 if line[:5].strip() == '':

285 line = next(inputfile)

286 rowcounter = 0

287 while rowcounter < nrows:

288 row = list(map(float, line.split()[1:]))

289 assert len(row) == min(ncolsblock, (ncols - colcounter))

290 nparray[rowcounter][colcounter:colcounter + ncolsblock] = row

291 line = next(inputfile)

292 rowcounter += 1

293 colcounter += ncolsblock

294 return nparray

295

296 def parse_matrix_aonames(self, inputfile, nrows, ncols):

297 """Q-Chem prints most matrices in a standard format; parse the matrix

298 into a preallocated NumPy array of the appropriate shape.

299

300 Rather than have one routine for parsing all general matrices

301 and the 'MOLECULAR ORBITAL COEFFICIENTS' block, use a second

302 which handles `aonames`.

303 """

304 bigmom = ('d', 'f', 'g', 'h')

305 nparray = numpy.empty(shape=(nrows, ncols))

306 line = next(inputfile)

307 assert len(line.split()) == min(self.ncolsblock, ncols)

308 colcounter = 0

309 split_fixed = utils.WidthSplitter((4, 4, 4, 6, 10, 10, 10, 10, 10, 10))

310 while colcounter < ncols:

311 # If the line is just the column header (indices)...

312 if line[:5].strip() == '':

313 line = next(inputfile)

314 # Do nothing for now.

315 if 'eigenvalues' in line:

316 line = next(inputfile)

317 rowcounter = 0

318 while rowcounter < nrows:

319 row = split_fixed.split(line)

320 # Only take the AO names on the first time through.

321 if colcounter == 0:

322 if len(self.aonames) != self.nbasis:

323 # Apply the offset for rows where there is

324 # more than one atom of any element in the

325 # molecule.

326 offset = 1

327 if row[2] != '':

328 name = self.atommap.get(row[1] + str(row[2]))

329 else:

330 name = self.atommap.get(row[1] + '1')

331 # For l > 1, there is a space between l and

332 # m_l when using spherical functions.

333 shell = row[2 + offset]

334 if shell in bigmom:

335 shell = ''.join([shell, row[3 + offset]])

336 aoname = ''.join([name, '_', shell.upper()])

337 self.aonames.append(aoname)

338 row = list(map(float, row[-min(self.ncolsblock, (ncols - colcounter)):]))

339 nparray[rowcounter][colcounter:colcounter + self.ncolsblock] = row

340 line = next(inputfile)

341 rowcounter += 1

342 colcounter += self.ncolsblock

343 return nparray

344

345 def parse_orbital_energies_and_symmetries(self, inputfile):

346 """Parse the 'Orbital Energies (a.u.)' block appearing after SCF converges,

347 which optionally includes MO symmetries. Based upon the

348 Occupied/Virtual labeling, the HOMO is also parsed.

349 """

350 energies = []

351 symbols = []

352

353 line = next(inputfile)

354 # Sometimes Q-Chem gets a little confused...

355 while "MOs" not in line:

356 line = next(inputfile)

357 line = next(inputfile)

358

359 # The end of the block is either a blank line or only dashes.

360 while not self.re_dashes_and_spaces.search(line) \

361 and not 'Warning : Irrep of orbital' in line:

362 if 'Occupied' in line or 'Virtual' in line:

363 # A nice trick to find where the HOMO is.

364 if 'Virtual' in line:

365 homo = len(energies) - 1

366 line = next(inputfile)

367 tokens = line.split()

368 # If the line contains letters, it must be the MO

369 # symmetries. Otherwise, it's the energies.

370 if re.search("[a-zA-Z]", line):

371 symbols.extend(tokens[1::2])

372 else:

373 for e in tokens:

374 try:

375 energy = utils.convertor(utils.float(e), 'hartree', 'eV')

376 except ValueError:

377 energy = numpy.nan

378 energies.append(energy)

379 line = next(inputfile)

380

381 # MO symmetries are either not present or there is one for each MO

382 # (energy).

383 assert len(symbols) in (0, len(energies))

384

385 return energies, symbols, homo

386

387

388 def generate_atom_map(self):

389 """Generate the map to go from Q-Chem atom numbering:

390 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'H1', 'H2', 'H3', 'H4', 'C7', ...

391 to cclib atom numbering:

392 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'H7', 'H8', 'H9', 'H10', 'C11', ...

393 for later use.

394 """

395

396 # Generate the desired order.

397 order_proper = [element + str(num)

398 for element, num in zip(self.atomelements,

399 itertools.count(start=1))]

400 # We need separate counters for each element.

401 element_counters = {element: itertools.count(start=1)

402 for element in set(self.atomelements)}

403 # Generate the Q-Chem printed order.

404 order_qchem = [element + str(next(element_counters[element]))

405 for element in self.atomelements]

406 # Combine the orders into a mapping.

407 atommap = {k: v for k, v, in zip(order_qchem, order_proper)}

408 return atommap

409

410 def generate_formula_histogram(self):

411 """From the atomnos, generate a histogram that represents the

412 molecular formula.

413 """

414

415 histogram = dict()

416 for element in self.atomelements:

417 if element in histogram.keys():

418 histogram[element] += 1

419 else:

420 histogram[element] = 1

421 return histogram

422

423 def extract(self, inputfile, line):

424 """Extract information from the file object inputfile."""

425

426 # Extract the version number and optionally the version

427 # control info.

428 if any(version_trigger in line for version_trigger in ("Q-Chem", "Unrecognized platform", "Version")):

429 # Part 1 matches

430 # - `Q-Chem 4.3.0 for Intel X86 EM64T Linux`

431 # Part 2 matches

432 # - `Unrecognized platform!!! 4.0.0.1`

433 # Part 3 matches

434 # - `Intel X86 EM64T Linux Version 4.1.0.1 `

435 # but not

436 # - `Additional authors for Version 3.1:`

437 # - `Q-Chem, Version 4.1, Q-Chem, Inc., Pittsburgh, PA (2013).`

438 match = re.search(

439 r"Q-Chem\s([\d\.]*)\sfor|"

440 r"Unrecognized platform!!!\s([\d\.]*)\b|"

441 r"Version\s([\d\.]*)\s*$",

442 line

443 )

444 if match:

445 groups = [s for s in match.groups() if s is not None]

446 assert len(groups) == 1

447 package_version = groups[0]

448 self.metadata["package_version"] = package_version

449 self.metadata["legacy_package_version"] = package_version

450 # Avoid "Last SVN revision" entry.

451 if "SVN revision" in line and "Last" not in line:

452 svn_revision = line.split()[3]

453 line = next(inputfile)

454 svn_branch = line.split()[3].replace("/", "_")

455 if "package_version" in self.metadata:

456 self.metadata["package_version"] = "{}dev+{}-{}".format(

457 self.metadata["package_version"], svn_branch, svn_revision

458 )

459

460 # Disable/enable parsing for fragment sections.

461 if any(message in line for message in self.fragment_section_headers):

462 self.is_fragment_section = True

463 if any(message in line for message in self.supersystem_section_headers):

464 self.is_fragment_section = False

465

466 if not self.is_fragment_section:

467

468 # If the input section is repeated back, parse the $rem and

469 # $molecule sections.

470 if line[0:11] == 'User input:':

471 self.user_input = dict()

472 self.skip_line(inputfile, 'd')

473 while list(set(line.strip())) != ['-']:

474

475 if line.strip().lower() == '$rem':

476

477 self.user_input['rem'] = dict()

478

479 while line.strip().lower() != '$end':

480

481 line = next(inputfile).lower()

482 if line.strip() == '$end':

483 break

484 # Apparently calculations can run without

485 # a matching $end...this terminates the

486 # user input section no matter what.

487 if line.strip() == ('-' * 62):

488 break

489

490 tokens = line.split()

491 # Allow blank lines.

492 if len(tokens) == 0:

493 continue

494 # Entries may be separated by an equals

495 # sign, and can have comments, for example:

496 # ecp gen

497 # ecp = gen

498 # ecp gen ! only on first chlorine

499 # ecp = gen only on first chlorine

500 assert len(tokens) >= 2

501 keyword = tokens[0]

502 if tokens[1] == '=':

503 option = tokens[2]

504 else:

505 option = tokens[1]

506 self.user_input['rem'][keyword] = option

507

508 if keyword == 'method':

509 method = option.upper()

510 if method in self.wfn_method:

511 self.metadata["methods"].append(method)

512 else:

513 self.metadata["methods"].append('DFT')

514 self.metadata["functional"] = method

515

516 if keyword == 'exchange':

517 self.metadata["methods"].append('DFT')

518 self.metadata["functional"] = option

519

520 if keyword == 'print_orbitals':

521 # Stay with the default value if a number isn't

522 # specified.

523 if option in ('true', 'false'):

524 continue

525 else:

526 norbdisp_aonames = int(option)

527 self.norbdisp_alpha_aonames = norbdisp_aonames

528 self.norbdisp_beta_aonames = norbdisp_aonames

529 self.norbdisp_set = True

530

531 if line.strip().lower() == '$ecp':

532

533 self.user_input['ecp'] = []

534 line = next(inputfile)

535

536 while line.strip().lower() != '$end':

537

538 while list(set(line.strip())) != ['*']:

539

540 # Parse the element for this ECP

541 # entry. If only the element is on

542 # this line, or the 2nd token is 0, it

543 # applies to all atoms; if it's > 0,

544 # then it indexes (1-based) that

545 # specific atom in the whole molecule.

546 tokens = line.split()

547 assert len(tokens) > 0

548 element = tokens[0][0].upper() + tokens[0][1:].lower()

549 assert element in self.table.element

550 if len(tokens) > 1:

551 assert len(tokens) == 2

552 index = int(tokens[1]) - 1

553 else:

554 index = -1

555 line = next(inputfile)

556

557 # Next comes the ECP definition. If

558 # the line contains only a single

559 # item, it's a built-in ECP, otherwise

560 # it's a full definition.

561 tokens = line.split()

562 if len(tokens) == 1:

563 ncore = 0

564 line = next(inputfile)

565 else:

566 assert len(tokens) == 3

567 ncore = int(tokens[2])

568 # Don't parse the remainder of the

569 # ECP definition.

570 while list(set(line.strip())) != ['*']:

571 line = next(inputfile)

572

573 entry = (element, index, ncore)

574 self.user_input['ecp'].append(entry)

575

576 line = next(inputfile)

577

578 if line.strip().lower() == '$end':

579 break

580

581 if line.strip().lower() == '$molecule':

582

583 self.user_input['molecule'] = dict()

584 line = next(inputfile)

585

586 # Don't read the molecule, only the

587 # supersystem charge and multiplicity.

588 if line.split()[0].lower() == 'read':

589 pass

590 else:

591 charge, mult = [int(x) for x in line.split()]

592 self.user_input['molecule']['charge'] = charge

593 self.user_input['molecule']['mult'] = mult

594

595 line = next(inputfile).lower()

596

597 # Parse the basis set name

598 if 'Requested basis set' in line:

599 self.metadata["basis_set"] = line.split()[-1]

600

601 # Parse the general basis for `gbasis`, in the style used by

602 # Gaussian.

603 if 'Basis set in general basis input format:' in line:

604 self.skip_lines(inputfile, ['d', '$basis'])

605 line = next(inputfile)

606 if not hasattr(self, 'gbasis'):

607 self.gbasis = []

608 # The end of the general basis block.

609 while '$end' not in line:

610 atom = []

611 # 1. Contains element symbol and atomic index of

612 # basis functions; if 0, applies to all atoms of

613 # same element.

614 assert len(line.split()) == 2

615 line = next(inputfile)

616 # The end of each atomic block.

617 while '****' not in line:

618 # 2. Contains the type of basis function {S, SP,

619 # P, D, F, G, H, ...}, the number of primitives,

620 # and the weight of the final contracted function.

621 bfsplitline = line.split()

622 assert len(bfsplitline) == 3

623 bftype = bfsplitline[0]

624 nprim = int(bfsplitline[1])

625 line = next(inputfile)

626 # 3. The primitive basis functions that compose

627 # the contracted basis function; there are `nprim`

628 # of them. The first value is the exponent, and

629 # the second value is the contraction

630 # coefficient. If `bftype == 'SP'`, the primitives

631 # are for both S- and P-type basis functions but

632 # with separate contraction coefficients,

633 # resulting in three columns.

634 if bftype == 'SP':

635 primitives_S = []

636 primitives_P = []

637 else:

638 primitives = []

639 # For each primitive in the contracted basis

640 # function...

641 for iprim in range(nprim):

642 primsplitline = line.split()

643 exponent = float(primsplitline[0])

644 if bftype == 'SP':

645 assert len(primsplitline) == 3

646 coefficient_S = float(primsplitline[1])

647 coefficient_P = float(primsplitline[2])

648 primitives_S.append((exponent, coefficient_S))

649 primitives_P.append((exponent, coefficient_P))

650 else:

651 assert len(primsplitline) == 2

652 coefficient = float(primsplitline[1])

653 primitives.append((exponent, coefficient))

654 line = next(inputfile)

655 if bftype == 'SP':

656 bf_S = ('S', primitives_S)

657 bf_P = ('P', primitives_P)

658 atom.append(bf_S)

659 atom.append(bf_P)

660 else:

661 bf = (bftype, primitives)

662 atom.append(bf)

663 # Move to the next contracted basis function

664 # as long as we don't hit the '****' atom

665 # delimiter.

666 self.gbasis.append(atom)

667 line = next(inputfile)

668

669 if line.strip() == 'The following effective core potentials will be applied':

670

671 # Keep track of all elements that may have an ECP on

672 # them. *Which* centers have an ECP can't be

673 # determined here, so just take the number of valence

674 # electrons, then later later figure out the centers

675 # and do core = Z - valence.

676 self.possible_ecps = dict()

677 # This will fail if an element has more than one kind

678 # of ECP.

679

680 split_fixed = utils.WidthSplitter((4, 13, 20, 2, 14, 14))

681

682 self.skip_lines(inputfile, ['d', 'header', 'header', 'd'])

683 line = next(inputfile)

684 while list(set(line.strip())) != ['-']:

685 tokens = split_fixed.split(line)

686 if tokens[0] != '':

687 element = tokens[0]

688 valence = int(tokens[1])

689 ncore = self.table.number[element] - valence

690 self.possible_ecps[element] = ncore

691 line = next(inputfile)

692

693 if 'TIME STEP #' in line:

694 tokens = line.split()

695 self.append_attribute('time', float(tokens[8]))

696

697 if line.strip() == "Adding empirical dispersion correction":

698 while "energy" not in line:

699 line = next(inputfile)

700 self.append_attribute(

701 "dispersionenergies",

702 utils.convertor(utils.float(line.split()[-2]), "hartree", "eV")

703 )

704

705 # Extract the atomic numbers and coordinates of the atoms.

706 if 'Standard Nuclear Orientation' in line:

707 if "Angstroms" in line:

708 convertor = lambda x: x

709 elif 'Bohr' in line:

710 convertor = lambda x: utils.convertor(x, 'bohr', 'Angstrom')

711 else:

712 raise ValueError("Unknown units in coordinate header: {}".format(line))

713 self.skip_lines(inputfile, ['cols', 'dashes'])

714 atomelements = []

715 atomcoords = []

716 line = next(inputfile)

717 while list(set(line.strip())) != ['-']:

718 entry = line.split()

719 atomelements.append(entry[1])

720 atomcoords.append([convertor(float(value)) for value in entry[2:]])

721 line = next(inputfile)

722

723 self.append_attribute('atomcoords', atomcoords)

724

725 # We calculate and handle atomnos no matter what, since in

726 # the case of fragment calculations the atoms may change,

727 # along with the charge and spin multiplicity.

728 self.atomnos = []

729 self.atomelements = []

730 for atomelement in atomelements:

731 self.atomelements.append(atomelement)

732 if atomelement == 'GH':

733 self.atomnos.append(0)

734 else:

735 self.atomnos.append(self.table.number[atomelement])

736 self.natom = len(self.atomnos)

737 self.atommap = self.generate_atom_map()

738 self.formula_histogram = self.generate_formula_histogram()

739

740 # Number of electrons.

741 # Useful for determining the number of occupied/virtual orbitals.

742 if 'Nuclear Repulsion Energy' in line:

743 line = next(inputfile)

744 nelec_re_string = r'There are(\s+[0-9]+) alpha and(\s+[0-9]+) beta electrons'

745 match = re.findall(nelec_re_string, line.strip())

746 self.set_attribute('nalpha', int(match[0][0].strip()))

747 self.set_attribute('nbeta', int(match[0][1].strip()))

748 self.norbdisp_alpha += self.nalpha

749 self.norbdisp_alpha_aonames += self.nalpha

750 self.norbdisp_beta += self.nbeta

751 self.norbdisp_beta_aonames += self.nbeta

752 # Calculate the spin multiplicity (2S + 1), where S is the

753 # total spin of the system.

754 S = (self.nalpha - self.nbeta) / 2

755 mult = int(2 * S + 1)

756 self.set_attribute('mult', mult)

757 # Calculate the molecular charge as the difference between

758 # the atomic numbers and the number of electrons.

759 if hasattr(self, 'atomnos'):

760 charge = sum(self.atomnos) - (self.nalpha + self.nbeta)

761 self.set_attribute('charge', charge)

762

763 # Number of basis functions.

764 if 'basis functions' in line:

765 if not hasattr(self, 'nbasis'):

766 self.set_attribute('nbasis', int(line.split()[-3]))

767 # In the case that there are fewer basis functions

768 # (and therefore MOs) than default number of MOs

769 # displayed, reset the display values.

770 self.norbdisp_alpha = min(self.norbdisp_alpha, self.nbasis)

771 self.norbdisp_alpha_aonames = min(self.norbdisp_alpha_aonames, self.nbasis)

772 self.norbdisp_beta = min(self.norbdisp_beta, self.nbasis)

773 self.norbdisp_beta_aonames = min(self.norbdisp_beta_aonames, self.nbasis)

774

775 # Check for whether or not we're peforming an

776 # (un)restricted calculation.

777 if 'calculation will be' in line:

778 if ' restricted' in line:

779 self.unrestricted = False

780 if 'unrestricted' in line:

781 self.unrestricted = True

782 if hasattr(self, 'nalpha') and hasattr(self, 'nbeta'):

783 if self.nalpha != self.nbeta:

784 self.unrestricted = True

785 self.is_rohf = True

786

787 # Section with SCF iterations goes like this:

788 #

789 # SCF converges when DIIS error is below 1.0E-05

790 # ---------------------------------------

791 # Cycle Energy DIIS Error

792 # ---------------------------------------

793 # 1 -381.9238072190 1.39E-01

794 # 2 -382.2937212775 3.10E-03

795 # 3 -382.2939780242 3.37E-03

796 # ...

797 #

798 scf_success_messages = (

799 'Convergence criterion met',

800 'corrected energy'

801 )

802 scf_failure_messages = (

803 'SCF failed to converge',

804 'Convergence failure'

805 )

806 if 'SCF converges when ' in line:

807 if not hasattr(self, 'scftargets'):

808 self.scftargets = []

809 target = float(line.split()[-1])

810 self.scftargets.append([target])

811

812 # We should have the header between dashes now,

813 # but sometimes there are lines before the first dashes.

814 while not 'Cycle Energy' in line:

815 line = next(inputfile)

816 self.skip_line(inputfile, 'd')

817

818 values = []

819 iter_counter = 1

820 line = next(inputfile)

821 while not any(message in line for message in scf_success_messages):

822

823 # Some trickery to avoid a lot of printing that can occur

824 # between each SCF iteration.

825 entry = line.split()

826 if len(entry) > 0:

827 if entry[0] == str(iter_counter):

828 # Q-Chem only outputs one error metric.

829 error = float(entry[2])

830 values.append([error])

831 iter_counter += 1

832

833 try:

834 line = next(inputfile)

835 # Is this the end of the file for some reason?

836 except StopIteration:

837 self.logger.warning('File terminated before end of last SCF! Last error: {}'.format(error))

838 break

839

840 # We've converged, but still need the last iteration.

841 if any(message in line for message in scf_success_messages):

842 entry = line.split()

843 error = float(entry[2])

844 values.append([error])

845 iter_counter += 1

846

847 # This is printed in regression QChem4.2/dvb_sp_unconverged.out

848 # so use it to bail out when convergence fails.

849 if any(message in line for message in scf_failure_messages):

850 break

851

852 if not hasattr(self, 'scfvalues'):

853 self.scfvalues = []

854 self.scfvalues.append(numpy.array(values))

855

856 # Molecular orbital coefficients.

857

858 # Try parsing them from this block (which comes from

859 # `scf_final_print = 2``) rather than the combined

860 # aonames/mocoeffs/moenergies block (which comes from

861 # `print_orbitals = true`).

862 if 'Final Alpha MO Coefficients' in line:

863 if not hasattr(self, 'mocoeffs'):

864 self.mocoeffs = []

865 mocoeffs = QChem.parse_matrix(inputfile, self.nbasis, self.norbdisp_alpha, self.ncolsblock)

866 self.mocoeffs.append(mocoeffs.transpose())

867

868 if 'Final Beta MO Coefficients' in line:

869 mocoeffs = QChem.parse_matrix(inputfile, self.nbasis, self.norbdisp_beta, self.ncolsblock)

870 self.mocoeffs.append(mocoeffs.transpose())

871

872 if 'Total energy in the final basis set' in line:

873 if not hasattr(self, 'scfenergies'):

874 self.scfenergies = []

875 scfenergy = float(line.split()[-1])

876 self.scfenergies.append(utils.convertor(scfenergy, 'hartree', 'eV'))

877

878 # Geometry optimization.

879

880 if 'Maximum Tolerance Cnvgd?' in line:

881 line_g = next(inputfile).split()[1:3]

882 line_d = next(inputfile).split()[1:3]

883 line_e = next(inputfile).split()[2:4]

884

885 if not hasattr(self, 'geotargets'):

886 self.geotargets = [line_g[1], line_d[1], utils.float(line_e[1])]

887 if not hasattr(self, 'geovalues'):

888 self.geovalues = []

889 maxg = utils.float(line_g[0])

890 maxd = utils.float(line_d[0])

891 ediff = utils.float(line_e[0])

892 geovalues = [maxg, maxd, ediff]

893 self.geovalues.append(geovalues)

894

895 if '** OPTIMIZATION CONVERGED **' in line:

896 if not hasattr(self, 'optdone'):

897 self.optdone = []

898 self.optdone.append(len(self.atomcoords))

899

900 if '** MAXIMUM OPTIMIZATION CYCLES REACHED **' in line:

901 if not hasattr(self, 'optdone'):

902 self.optdone = []

903

904 # Moller-Plesset corrections.

905

906 # There are multiple modules in Q-Chem for calculating MPn energies:

907 # cdman, ccman, and ccman2, all with different output.

908 #

909 # MP2, RI-MP2, and local MP2 all default to cdman, which has a simple

910 # block of output after the regular SCF iterations.

911 #

912 # MP3 is handled by ccman2.

913 #

914 # MP4 and variants are handled by ccman.

915

916 # This is the MP2/cdman case.

917 if 'MP2 total energy' in line:

918 if not hasattr(self, 'mpenergies'):

919 self.mpenergies = []

920 mp2energy = float(line.split()[4])

921 mp2energy = utils.convertor(mp2energy, 'hartree', 'eV')

922 self.mpenergies.append([mp2energy])

923

924 # This is the MP3/ccman2 case.

925 if line[1:11] == 'MP2 energy' and line[12:19] != 'read as':

926 if not hasattr(self, 'mpenergies'):

927 self.mpenergies = []

928 mpenergies = []

929 mp2energy = float(line.split()[3])

930 mpenergies.append(mp2energy)

931 line = next(inputfile)

932 line = next(inputfile)

933 # Just a safe check.

934 if 'MP3 energy' in line:

935 mp3energy = float(line.split()[3])

936 mpenergies.append(mp3energy)

937 mpenergies = [utils.convertor(mpe, 'hartree', 'eV')

938 for mpe in mpenergies]

939 self.mpenergies.append(mpenergies)

940

941 # This is the MP4/ccman case.

942 if 'EHF' in line:

943 if not hasattr(self, 'mpenergies'):

944 self.mpenergies = []

945 mpenergies = []

946

947 while list(set(line.strip())) != ['-']:

948

949 if 'EMP2' in line:

950 mp2energy = float(line.split()[2])

951 mpenergies.append(mp2energy)

952 if 'EMP3' in line:

953 mp3energy = float(line.split()[2])

954 mpenergies.append(mp3energy)

955 if 'EMP4SDQ' in line:

956 mp4sdqenergy = float(line.split()[2])

957 mpenergies.append(mp4sdqenergy)

958 # This is really MP4SD(T)Q.

959 if 'EMP4 ' in line:

960 mp4sdtqenergy = float(line.split()[2])

961 mpenergies.append(mp4sdtqenergy)

962

963 line = next(inputfile)

964

965 mpenergies = [utils.convertor(mpe, 'hartree', 'eV')

966 for mpe in mpenergies]

967 self.mpenergies.append(mpenergies)

968

969 # Coupled cluster corrections.

970 # Hopefully we only have to deal with ccman2 here.

971

972 if 'CCD total energy' in line:

973 if not hasattr(self, 'ccenergies'):

974 self.ccenergies = []

975 ccdenergy = float(line.split()[-1])

976 ccdenergy = utils.convertor(ccdenergy, 'hartree', 'eV')

977 self.ccenergies.append(ccdenergy)

978 if 'CCSD total energy' in line:

979 has_triples = False

980 if not hasattr(self, 'ccenergies'):

981 self.ccenergies = []

982 ccsdenergy = float(line.split()[-1])

983 # Make sure we aren't actually doing CCSD(T).

984 line = next(inputfile)

985 line = next(inputfile)

986 if 'CCSD(T) total energy' in line:

987 has_triples = True

988 ccsdtenergy = float(line.split()[-1])

989 ccsdtenergy = utils.convertor(ccsdtenergy, 'hartree', 'eV')

990 self.ccenergies.append(ccsdtenergy)

991 if not has_triples:

992 ccsdenergy = utils.convertor(ccsdenergy, 'hartree', 'eV')

993 self.ccenergies.append(ccsdenergy)

994

995 if line[:11] == " CCSD T1^2":

996 t1_squared = float(line.split()[3])

997 t1_norm = math.sqrt(t1_squared)

998 self.metadata["t1_diagnostic"] = t1_norm / math.sqrt(2 * (self.nalpha + self.nbeta))

999

1000 # Electronic transitions. Works for both CIS and TDDFT.

1001 if 'Excitation Energies' in line:

1002

1003 # Restricted:

1004 # ---------------------------------------------------

1005 # TDDFT/TDA Excitation Energies

1006 # ---------------------------------------------------

1007 #

1008 # Excited state 1: excitation energy (eV) = 3.6052

1009 # Total energy for state 1: -382.167872200685

1010 # Multiplicity: Triplet

1011 # Trans. Mom.: 0.0000 X 0.0000 Y 0.0000 Z

1012 # Strength : 0.0000

1013 # D( 33) --> V( 3) amplitude = 0.2618

1014 # D( 34) --> V( 2) amplitude = 0.2125

1015 # D( 35) --> V( 1) amplitude = 0.9266

1016 #

1017 # Unrestricted:

1018 # Excited state 2: excitation energy (eV) = 2.3156

1019 # Total energy for state 2: -381.980177630969

1020 # <S**2> : 0.7674

1021 # Trans. Mom.: -2.7680 X -0.1089 Y 0.0000 Z

1022 # Strength : 0.4353

1023 # S( 1) --> V( 1) amplitude = -0.3105 alpha

1024 # D( 34) --> S( 1) amplitude = 0.9322 beta

1025

1026 self.skip_lines(inputfile, ['dashes', 'blank'])

1027 line = next(inputfile)

1028

1029 etenergies = []

1030 etsyms = []

1031 etoscs = []

1032 etsecs = []

1033 spinmap = {'alpha': 0, 'beta': 1}

1034

1035 while list(set(line.strip())) != ['-']:

1036

1037 # Take the total energy for the state and subtract from the

1038 # ground state energy, rather than just the EE;

1039 # this will be more accurate.

1040 if 'Total energy for state' in line:

1041 energy = utils.convertor(float(line.split()[5]), 'hartree', 'wavenumber')

1042 etenergy = energy - utils.convertor(self.scfenergies[-1], 'eV', 'wavenumber')

1043 etenergies.append(etenergy)

1044 # if 'excitation energy' in line:

1045 # etenergy = utils.convertor(float(line.split()[-1]), 'eV', 'wavenumber')

1046 # etenergies.append(etenergy)

1047 if 'Multiplicity' in line:

1048 etsym = line.split()[1]

1049 etsyms.append(etsym)

1050 if 'Strength' in line:

1051 strength = float(line.split()[-1])

1052 etoscs.append(strength)

1053

1054 # This is the list of transitions.

1055 if 'amplitude' in line:

1056 sec = []

1057 while line.strip() != '':

1058 re_match = self.re_tddft.search(line)

1059 if self.unrestricted:

1060 spin = spinmap[re_match.group(5)]

1061 else:

1062 spin = 0

1063

1064 # There is a subtle difference between TDA and RPA calcs,

1065 # because in the latter case each transition line is

1066 # preceeded by the type of vector: X or Y, name excitation

1067 # or deexcitation (see #154 for details). For deexcitations,

1068 # we will need to reverse the MO indices. Note also that Q-Chem

1069 # starts reindexing virtual orbitals at 1.

1070 if line[5] == '(':

1071 ttype = 'X'

1072 else:

1073 assert line[5] == ":"

1074 ttype = line[4]

1075 startidx = int(re_match.group(1)) - 1

1076 endidx = int(re_match.group(2)) - 1 + self.nalpha

1077 contrib = float(re_match.group(3))

1078

1079 start = (startidx, spin)

1080 end = (endidx, spin)

1081 if ttype == 'X':

1082 sec.append([start, end, contrib])

1083 elif ttype == 'Y':

1084 sec.append([end, start, contrib])

1085 else:

1086 raise ValueError('Unknown transition type: %s' % ttype)

1087 line = next(inputfile)

1088 etsecs.append(sec)

1089

1090 line = next(inputfile)

1091

1092 self.set_attribute('etenergies', etenergies)

1093 self.set_attribute('etsyms', etsyms)

1094 self.set_attribute('etoscs', etoscs)

1095 self.set_attribute('etsecs', etsecs)

1096

1097 # Static and dynamic polarizability from mopropman.

1098 if 'Polarizability (a.u.)' in line:

1099 if not hasattr(self, 'polarizabilities'):

1100 self.polarizabilities = []

1101 while 'Full Tensor' not in line:

1102 line = next(inputfile)

1103 self.skip_line(inputfile, 'blank')

1104 polarizability = [next(inputfile).split() for _ in range(3)]

1105 self.polarizabilities.append(numpy.array(polarizability))

1106

1107 # Static polarizability from finite difference or

1108 # responseman.

1109 if line.strip() in ('Static polarizability tensor [a.u.]',

1110 'Polarizability tensor [a.u.]'):

1111 if not hasattr(self, 'polarizabilities'):

1112 self.polarizabilities = []

1113 polarizability = [next(inputfile).split() for _ in range(3)]

1114 self.polarizabilities.append(numpy.array(polarizability))

1115

1116 # Molecular orbital energies and symmetries.

1117 if line.strip() == 'Orbital Energies (a.u.) and Symmetries':

1118

1119 # --------------------------------------------------------------

1120 # Orbital Energies (a.u.) and Symmetries

1121 # --------------------------------------------------------------

1122 #

1123 # Alpha MOs, Restricted

1124 # -- Occupied --

1125 # -10.018 -10.018 -10.008 -10.008 -10.007 -10.007 -10.006 -10.005

1126 # 1 Bu 1 Ag 2 Bu 2 Ag 3 Bu 3 Ag 4 Bu 4 Ag

1127 # -9.992 -9.992 -0.818 -0.755 -0.721 -0.704 -0.670 -0.585

1128 # 5 Ag 5 Bu 6 Ag 6 Bu 7 Ag 7 Bu 8 Bu 8 Ag

1129 # -0.561 -0.532 -0.512 -0.462 -0.439 -0.410 -0.400 -0.397

1130 # 9 Ag 9 Bu 10 Ag 11 Ag 10 Bu 11 Bu 12 Bu 12 Ag

1131 # -0.376 -0.358 -0.349 -0.330 -0.305 -0.295 -0.281 -0.263

1132 # 13 Bu 14 Bu 13 Ag 1 Au 15 Bu 14 Ag 15 Ag 1 Bg

1133 # -0.216 -0.198 -0.160

1134 # 2 Au 2 Bg 3 Bg

1135 # -- Virtual --

1136 # 0.050 0.091 0.116 0.181 0.280 0.319 0.330 0.365

1137 # 3 Au 4 Au 4 Bg 5 Au 5 Bg 16 Ag 16 Bu 17 Bu

1138 # 0.370 0.413 0.416 0.422 0.446 0.469 0.496 0.539

1139 # 17 Ag 18 Bu 18 Ag 19 Bu 19 Ag 20 Bu 20 Ag 21 Ag

1140 # 0.571 0.587 0.610 0.627 0.646 0.693 0.743 0.806

1141 # 21 Bu 22 Ag 22 Bu 23 Bu 23 Ag 24 Ag 24 Bu 25 Ag

1142 # 0.816

1143 # 25 Bu

1144 #

1145 # Beta MOs, Restricted

1146 # -- Occupied --

1147 # -10.018 -10.018 -10.008 -10.008 -10.007 -10.007 -10.006 -10.005

1148 # 1 Bu 1 Ag 2 Bu 2 Ag 3 Bu 3 Ag 4 Bu 4 Ag

1149 # -9.992 -9.992 -0.818 -0.755 -0.721 -0.704 -0.670 -0.585

1150 # 5 Ag 5 Bu 6 Ag 6 Bu 7 Ag 7 Bu 8 Bu 8 Ag

1151 # -0.561 -0.532 -0.512 -0.462 -0.439 -0.410 -0.400 -0.397

1152 # 9 Ag 9 Bu 10 Ag 11 Ag 10 Bu 11 Bu 12 Bu 12 Ag

1153 # -0.376 -0.358 -0.349 -0.330 -0.305 -0.295 -0.281 -0.263

1154 # 13 Bu 14 Bu 13 Ag 1 Au 15 Bu 14 Ag 15 Ag 1 Bg

1155 # -0.216 -0.198 -0.160

1156 # 2 Au 2 Bg 3 Bg

1157 # -- Virtual --

1158 # 0.050 0.091 0.116 0.181 0.280 0.319 0.330 0.365

1159 # 3 Au 4 Au 4 Bg 5 Au 5 Bg 16 Ag 16 Bu 17 Bu

1160 # 0.370 0.413 0.416 0.422 0.446 0.469 0.496 0.539

1161 # 17 Ag 18 Bu 18 Ag 19 Bu 19 Ag 20 Bu 20 Ag 21 Ag

1162 # 0.571 0.587 0.610 0.627 0.646 0.693 0.743 0.806

1163 # 21 Bu 22 Ag 22 Bu 23 Bu 23 Ag 24 Ag 24 Bu 25 Ag

1164 # 0.816

1165 # 25 Bu

1166 # --------------------------------------------------------------

1167

1168 self.skip_line(inputfile, 'dashes')

1169 line = next(inputfile)

1170 energies_alpha, symbols_alpha, homo_alpha = self.parse_orbital_energies_and_symmetries(inputfile)

1171 # Only look at the second block if doing an unrestricted calculation.

1172 # This might be a problem for ROHF/ROKS.

1173 if self.unrestricted:

1174 energies_beta, symbols_beta, homo_beta = self.parse_orbital_energies_and_symmetries(inputfile)

1175

1176 # For now, only keep the last set of MO energies, even though it is

1177 # printed at every step of geometry optimizations and fragment jobs.

1178 self.set_attribute('moenergies', [numpy.array(energies_alpha)])

1179 self.set_attribute('homos', [homo_alpha])

1180 self.set_attribute('mosyms', [symbols_alpha])

1181 if self.unrestricted:

1182 self.moenergies.append(numpy.array(energies_beta))

1183 self.homos.append(homo_beta)

1184 self.mosyms.append(symbols_beta)

1185

1186 self.set_attribute('nmo', len(self.moenergies[0]))

1187

1188 # Molecular orbital energies, no symmetries.

1189 if line.strip() == 'Orbital Energies (a.u.)':

1190

1191 # In the case of no orbital symmetries, the beta spin block is not

1192 # present for restricted calculations.

1193

1194 # --------------------------------------------------------------

1195 # Orbital Energies (a.u.)

1196 # --------------------------------------------------------------

1197 #

1198 # Alpha MOs

1199 # -- Occupied --

1200 # ******* -38.595 -34.580 -34.579 -34.578 -19.372 -19.372 -19.364

1201 # -19.363 -19.362 -19.362 -4.738 -3.252 -3.250 -3.250 -1.379

1202 # -1.371 -1.369 -1.365 -1.364 -1.362 -0.859 -0.855 -0.849

1203 # -0.846 -0.840 -0.836 -0.810 -0.759 -0.732 -0.729 -0.704

1204 # -0.701 -0.621 -0.610 -0.595 -0.587 -0.584 -0.578 -0.411

1205 # -0.403 -0.355 -0.354 -0.352

1206 # -- Virtual --

1207 # -0.201 -0.117 -0.099 -0.086 0.020 0.031 0.055 0.067

1208 # 0.075 0.082 0.086 0.092 0.096 0.105 0.114 0.148

1209 #

1210 # Beta MOs

1211 # -- Occupied --

1212 # ******* -38.561 -34.550 -34.549 -34.549 -19.375 -19.375 -19.367

1213 # -19.367 -19.365 -19.365 -4.605 -3.105 -3.103 -3.102 -1.385

1214 # -1.376 -1.376 -1.371 -1.370 -1.368 -0.863 -0.858 -0.853

1215 # -0.849 -0.843 -0.839 -0.818 -0.765 -0.738 -0.737 -0.706

1216 # -0.702 -0.624 -0.613 -0.600 -0.591 -0.588 -0.585 -0.291

1217 # -0.291 -0.288 -0.275

1218 # -- Virtual --

1219 # -0.139 -0.122 -0.103 0.003 0.014 0.049 0.049 0.059

1220 # 0.061 0.070 0.076 0.081 0.086 0.090 0.098 0.106

1221 # 0.138

1222 # --------------------------------------------------------------

1223

1224 self.skip_line(inputfile, 'dashes')

1225 line = next(inputfile)

1226 energies_alpha, _, homo_alpha = self.parse_orbital_energies_and_symmetries(inputfile)

1227 # Only look at the second block if doing an unrestricted calculation.

1228 # This might be a problem for ROHF/ROKS.

1229 if self.unrestricted:

1230 energies_beta, _, homo_beta = self.parse_orbital_energies_and_symmetries(inputfile)

1231

1232 # For now, only keep the last set of MO energies, even though it is

1233 # printed at every step of geometry optimizations and fragment jobs.

1234 self.set_attribute('moenergies', [numpy.array(energies_alpha)])

1235 self.set_attribute('homos', [homo_alpha])

1236 if self.unrestricted:

1237 self.moenergies.append(numpy.array(energies_beta))

1238 self.homos.append(homo_beta)

1239

1240 self.set_attribute('nmo', len(self.moenergies[0]))

1241

1242 # Molecular orbital coefficients.

1243

1244 # This block comes from `print_orbitals = true/{int}`. Less

1245 # precision than `scf_final_print >= 2` for `mocoeffs`, but

1246 # important for `aonames` and `atombasis`.

1247

1248 if any(header in line

1249 for header in self.alpha_mo_coefficient_headers):

1250

1251 # If we've asked to display more virtual orbitals than

1252 # there are MOs present in the molecule, fix that now.

1253 if hasattr(self, 'nmo') and hasattr(self, 'nalpha') and hasattr(self, 'nbeta'):

1254 self.norbdisp_alpha_aonames = min(self.norbdisp_alpha_aonames, self.nmo)

1255 self.norbdisp_beta_aonames = min(self.norbdisp_beta_aonames, self.nmo)

1256

1257 if not hasattr(self, 'mocoeffs'):

1258 self.mocoeffs = []

1259 if not hasattr(self, 'atombasis'):

1260 self.atombasis = []

1261 for n in range(self.natom):

1262 self.atombasis.append([])

1263 if not hasattr(self, 'aonames'):

1264 self.aonames = []

1265 # We could also attempt to parse `moenergies` here, but

1266 # nothing is gained by it.

1267

1268 mocoeffs = self.parse_matrix_aonames(inputfile, self.nbasis, self.norbdisp_alpha_aonames)

1269 # Only use these MO coefficients if we don't have them

1270 # from `scf_final_print`.

1271 if len(self.mocoeffs) == 0:

1272 self.mocoeffs.append(mocoeffs.transpose())

1273

1274 # Go back through `aonames` to create `atombasis`.

1275 assert len(self.aonames) == self.nbasis

1276 for aoindex, aoname in enumerate(self.aonames):

1277 atomindex = int(self.re_atomindex.search(aoname).groups()[0]) - 1

1278 self.atombasis[atomindex].append(aoindex)

1279 assert len(self.atombasis) == len(self.atomnos)

1280

1281 if 'BETA MOLECULAR ORBITAL COEFFICIENTS' in line:

1282

1283 mocoeffs = self.parse_matrix_aonames(inputfile, self.nbasis, self.norbdisp_beta_aonames)

1284 if len(self.mocoeffs) == 1:

1285 self.mocoeffs.append(mocoeffs.transpose())

1286

1287 # Population analysis.

1288

1289 if 'Ground-State Mulliken Net Atomic Charges' in line:

1290 self.parse_charge_section(inputfile, 'mulliken')

1291 if 'Hirshfeld Atomic Charges' in line:

1292 self.parse_charge_section(inputfile, 'hirshfeld')

1293 if 'Ground-State ChElPG Net Atomic Charges' in line:

1294 self.parse_charge_section(inputfile, 'chelpg')

1295

1296 # Multipole moments are not printed in lexicographical order,

1297 # so we need to parse and sort them. The units seem OK, but there

1298 # is some uncertainty about the reference point and whether it

1299 # can be changed.

1300 #

1301 # Notice how the letter/coordinate labels change to coordinate ranks

1302 # after hexadecapole moments, and need to be translated. Additionally,

1303 # after 9-th order moments the ranks are not necessarily single digits

1304 # and so there are spaces between them.

1305 #

1306 # -----------------------------------------------------------------

1307 # Cartesian Multipole Moments

1308 # LMN = < X^L Y^M Z^N >

1309 # -----------------------------------------------------------------

1310 # Charge (ESU x 10^10)

1311 # 0.0000

1312 # Dipole Moment (Debye)

1313 # X 0.0000 Y 0.0000 Z 0.0000

1314 # Tot 0.0000

1315 # Quadrupole Moments (Debye-Ang)

1316 # XX -50.9647 XY -0.1100 YY -50.1441

1317 # XZ 0.0000 YZ 0.0000 ZZ -58.5742

1318 # ...

1319 # 5th-Order Moments (Debye-Ang^4)

1320 # 500 0.0159 410 -0.0010 320 0.0005

1321 # 230 0.0000 140 0.0005 050 0.0012

1322 # ...

1323 # -----------------------------------------------------------------

1324 #

1325 if "Cartesian Multipole Moments" in line:

1326

1327 # This line appears not by default, but only when

1328 # `multipole_order` > 4:

1329 line = inputfile.next()

1330 if 'LMN = < X^L Y^M Z^N >' in line:

1331 line = inputfile.next()

1332

1333 # The reference point is always the origin, although normally the molecule

1334 # is moved so that the center of charge is at the origin.

1335 self.reference = [0.0, 0.0, 0.0]

1336 self.moments = [self.reference]

1337

1338 # Watch out! This charge is in statcoulombs without the exponent!

1339 # We should expect very good agreement, however Q-Chem prints

1340 # the charge only with 5 digits, so expect 1e-4 accuracy.

1341 charge_header = inputfile.next()

1342 assert charge_header.split()[0] == "Charge"

1343 charge = float(inputfile.next().strip())

1344 charge = utils.convertor(charge, 'statcoulomb', 'e') * 1e-10

1345 # Allow this to change until fragment jobs are properly implemented.

1346 # assert abs(charge - self.charge) < 1e-4

1347

1348 # This will make sure Debyes are used (not sure if it can be changed).

1349 line = inputfile.next()

1350 assert line.strip() == "Dipole Moment (Debye)"

1351

1352 while "-----" not in line:

1353

1354 # The current multipole element will be gathered here.

1355 multipole = []

1356

1357 line = inputfile.next()

1358 while ("-----" not in line) and ("Moment" not in line):

1359

1360 cols = line.split()

1361

1362 # The total (norm) is printed for dipole but not other multipoles.

1363 if cols[0] == 'Tot':

1364 line = inputfile.next()

1365 continue

1366

1367 # Find and replace any 'stars' with NaN before moving on.

1368 for i in range(len(cols)):

1369 if '***' in cols[i]:

1370 cols[i] = numpy.nan

1371

1372 # The moments come in pairs (label followed by value) up to the 9-th order,

1373 # although above hexadecapoles the labels are digits representing the rank

1374 # in each coordinate. Above the 9-th order, ranks are not always single digits,

1375 # so there are spaces between them, which means moments come in quartets.

1376 if len(self.moments) < 5:

1377 for i in range(len(cols)//2):

1378 lbl = cols[2*i]

1379 m = cols[2*i + 1]

1380 multipole.append([lbl, m])

1381 elif len(self.moments) < 10:

1382 for i in range(len(cols)//2):

1383 lbl = cols[2*i]

1384 lbl = 'X'*int(lbl[0]) + 'Y'*int(lbl[1]) + 'Z'*int(lbl[2])

1385 m = cols[2*i + 1]

1386 multipole.append([lbl, m])

1387 else:

1388 for i in range(len(cols)//4):

1389 lbl = 'X'*int(cols[4*i]) + 'Y'*int(cols[4*i + 1]) + 'Z'*int(cols[4*i + 2])

1390 m = cols[4*i + 3]

1391 multipole.append([lbl, m])

1392

1393 line = inputfile.next()

1394

1395 # Sort should use the first element when sorting lists,

1396 # so this should simply work, and afterwards we just need

1397 # to extract the second element in each list (the actual moment).

1398 multipole.sort()

1399 multipole = [m[1] for m in multipole]

1400 self.moments.append(multipole)

1401

1402 # For `method = force` or geometry optimizations,

1403 # the gradient is printed.

1404 if any(header in line for header in self.gradient_headers):

1405 if not hasattr(self, 'grads'):

1406 self.grads = []

1407 if 'SCF' in line:

1408 ncolsblock = self.ncolsblock

1409 else:

1410 ncolsblock = 5

1411 grad = QChem.parse_matrix(inputfile, 3, self.natom, ncolsblock)

1412 self.grads.append(grad.T)

1413

1414 # (Static) polarizability from frequency calculations.

1415 if 'Polarizability Matrix (a.u.)' in line:

1416 if not hasattr(self, 'polarizabilities'):

1417 self.polarizabilities = []

1418 polarizability = []

1419 self.skip_line(inputfile, 'index header')

1420 for _ in range(3):

1421 line = next(inputfile)

1422 ss = line.strip()[1:]

1423 polarizability.append([ss[0:12], ss[13:24], ss[25:36]])

1424 # For some reason the sign is inverted.

1425 self.polarizabilities.append(-numpy.array(polarizability, dtype=float))

1426

1427 # For IR-related jobs, the Hessian is printed (dim: 3*natom, 3*natom).

1428 # Note that this is *not* the mass-weighted Hessian.

1429 if any(header in line for header in self.hessian_headers):

1430 if not hasattr(self, 'hessian'):

1431 dim = 3*self.natom

1432 self.hessian = QChem.parse_matrix(inputfile, dim, dim, self.ncolsblock)

1433

1434 # Start of the IR/Raman frequency section.

1435 if 'VIBRATIONAL ANALYSIS' in line:

1436

1437 while 'STANDARD THERMODYNAMIC QUANTITIES' not in line:

1438 ## IR, optional Raman:

1439 #

1440 # **********************************************************************

1441 # ** **

1442 # ** VIBRATIONAL ANALYSIS **

1443 # ** -------------------- **

1444 # ** **

1445 # ** VIBRATIONAL FREQUENCIES (CM**-1) AND NORMAL MODES **

1446 # ** FORCE CONSTANTS (mDYN/ANGSTROM) AND REDUCED MASSES (AMU) **

1447 # ** INFRARED INTENSITIES (KM/MOL) **

1448 ##** RAMAN SCATTERING ACTIVITIES (A**4/AMU) AND DEPOLARIZATION RATIOS **

1449 # ** **

1450 # **********************************************************************

1451 #

1452 #

1453 # Mode: 1 2 3

1454 # Frequency: -106.88 -102.91 161.77

1455 # Force Cnst: 0.0185 0.0178 0.0380

1456 # Red. Mass: 2.7502 2.8542 2.4660

1457 # IR Active: NO YES YES

1458 # IR Intens: 0.000 0.000 0.419

1459 # Raman Active: YES NO NO

1460 ##Raman Intens: 2.048 0.000 0.000

1461 ##Depolar: 0.750 0.000 0.000

1462 # X Y Z X Y Z X Y Z

1463 # C 0.000 0.000 -0.100 -0.000 0.000 -0.070 -0.000 -0.000 -0.027

1464 # C 0.000 0.000 0.045 -0.000 0.000 -0.074 0.000 -0.000 -0.109

1465 # C 0.000 0.000 0.148 -0.000 -0.000 -0.074 0.000 0.000 -0.121

1466 # (...)

1467 # H -0.000 -0.000 0.422 -0.000 -0.000 0.499 0.000 0.000 -0.285

1468 # TransDip 0.000 -0.000 -0.000 0.000 -0.000 -0.000 -0.000 0.000 0.021

1469 #

1470 # Mode: 4 5 6

1471 # ...

1472 #

1473 # There isn't any symmetry information for normal modes present

1474 # in Q-Chem.

1475 # if not hasattr(self, 'vibsyms'):

1476 # self.vibsyms = []

1477 if 'Frequency:' in line:

1478 if not hasattr(self, 'vibfreqs'):

1479 self.vibfreqs = []

1480 vibfreqs = map(float, line.split()[1:])

1481 self.vibfreqs.extend(vibfreqs)

1482

1483 if 'Force Cnst:' in line:

1484 if not hasattr(self, 'vibfconsts'):

1485 self.vibfconsts = []

1486 vibfconsts = map(float, line.split()[2:])

1487 self.vibfconsts.extend(vibfconsts)

1488

1489 if 'Red. Mass' in line:

1490 if not hasattr(self, 'vibrmasses'):

1491 self.vibrmasses = []

1492 vibrmasses = map(float, line.split()[2:])

1493 self.vibrmasses.extend(vibrmasses)

1494

1495 if 'IR Intens:' in line:

1496 if not hasattr(self, 'vibirs'):

1497 self.vibirs = []

1498 vibirs = map(float, line.split()[2:])

1499 self.vibirs.extend(vibirs)

1500

1501 if 'Raman Intens:' in line:

1502 if not hasattr(self, 'vibramans'):

1503 self.vibramans = []

1504 vibramans = map(float, line.split()[2:])

1505 self.vibramans.extend(vibramans)

1506

1507 # This is the start of the displacement block.

1508 if line.split()[0:3] == ['X', 'Y', 'Z']:

1509 if not hasattr(self, 'vibdisps'):

1510 self.vibdisps = []

1511 disps = []

1512 for k in range(self.natom):

1513 line = next(inputfile)

1514 numbers = list(map(float, line.split()[1:]))

1515 N = len(numbers) // 3

1516 if not disps:

1517 for n in range(N):

1518 disps.append([])

1519 for n in range(N):

1520 disps[n].append(numbers[3*n:(3*n)+3])

1521 self.vibdisps.extend(disps)

1522

1523 line = next(inputfile)

1524

1525 # Anharmonic vibrational analysis.

1526 # Q-Chem includes 3 theories: VPT2, TOSH, and VCI.

1527 # For now, just take the VPT2 results.

1528

1529 # if 'VIBRATIONAL ANHARMONIC ANALYSIS' in line:

1530

1531 # while list(set(line.strip())) != ['=']:

1532 # if 'VPT2' in line:

1533 # if not hasattr(self, 'vibanharms'):

1534 # self.vibanharms = []

1535 # self.vibanharms.append(float(line.split()[-1]))

1536 # line = next(inputfile)

1537

1538 if 'STANDARD THERMODYNAMIC QUANTITIES AT' in line:

1539

1540 if not hasattr(self, 'temperature'):

1541 self.temperature = float(line.split()[4])

1542 # Not supported yet.

1543 if not hasattr(self, 'pressure'):

1544 self.pressure = float(line.split()[7])

1545 self.skip_line(inputfile, 'blank')

1546

1547 line = next(inputfile)

1548 if self.natom == 1:

1549 assert 'Translational Enthalpy' in line

1550 else:

1551 assert 'Imaginary Frequencies' in line

1552 line = next(inputfile)

1553 # Not supported yet.

1554 assert 'Zero point vibrational energy' in line

1555 if not hasattr(self, 'zpe'):

1556 # Convert from kcal/mol to Hartree/particle.

1557 self.zpe = utils.convertor(float(line.split()[4]),

1558 'kcal/mol', 'hartree')

1559 atommasses = []

1560 while 'Translational Enthalpy' not in line:

1561 if 'Has Mass' in line:

1562 atommass = float(line.split()[6])

1563 atommasses.append(atommass)

1564 line = next(inputfile)

1565 if not hasattr(self, 'atommasses'):

1566 self.atommasses = numpy.array(atommasses)

1567

1568 while line.strip():

1569 line = next(inputfile)

1570

1571 line = next(inputfile)

1572 assert 'Total Enthalpy' in line

1573 if not hasattr(self, 'enthalpy'):

1574 enthalpy = float(line.split()[2])

1575 self.enthalpy = utils.convertor(enthalpy,

1576 'kcal/mol', 'hartree')

1577 line = next(inputfile)

1578 assert 'Total Entropy' in line

1579 if not hasattr(self, 'entropy'):

1580 entropy = float(line.split()[2]) * self.temperature / 1000

1581 # This is the *temperature dependent* entropy.

1582 self.entropy = utils.convertor(entropy,

1583 'kcal/mol', 'hartree')

1584 if not hasattr(self, 'freeenergy'):

1585 self.freeenergy = self.enthalpy - self.entropy

1586

1587 if line[:16] == ' Total job time:':

1588 self.metadata['success'] = True

1589

1590 # TODO:

1591 # 'enthalpy' (incorrect)

1592 # 'entropy' (incorrect)

1593 # 'freeenergy' (incorrect)

1594 # 'nocoeffs'

1595 # 'nooccnos'

1596 # 'vibanharms'

Coverage for cclib/parser/qchemparser.py : 98%

822 statements

Coverage for cclib/parser/qchemparser.py : 98%

822 statements 808 run 14 missing 0 excluded

822 statements