Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2# 

3# Copyright (c) 2020, the cclib development team 

4# 

5# This file is part of cclib (http://cclib.github.io) and is distributed under 

6# the terms of the BSD 3-Clause License. 

7 

8"""Parser for Q-Chem output files""" 

9 

10import itertools 

11import math 

12import re 

13 

14import numpy 

15 

16from cclib.parser import logfileparser 

17from cclib.parser import utils 

18 

19 

20class QChem(logfileparser.Logfile): 

21 """A Q-Chem log file.""" 

22 

23 def __init__(self, *args, **kwargs): 

24 

25 # Call the __init__ method of the superclass 

26 super(QChem, self).__init__(logname="QChem", *args, **kwargs) 

27 

28 def __str__(self): 

29 """Return a string representation of the object.""" 

30 return "QChem log file %s" % (self.filename) 

31 

32 def __repr__(self): 

33 """Return a representation of the object.""" 

34 return 'QChem("%s")' % (self.filename) 

35 

36 def normalisesym(self, label): 

37 """Q-Chem does not require normalizing symmetry labels.""" 

38 return label 

39 

40 def before_parsing(self): 

41 

42 # Keep track of whether or not we're performing an 

43 # (un)restricted calculation. 

44 self.unrestricted = False 

45 self.is_rohf = False 

46 

47 # Keep track of whether or not this is a fragment calculation, 

48 # so that only the supersystem is parsed. 

49 self.is_fragment_section = False 

50 # These headers identify when a fragment section is 

51 # entered/exited. 

52 self.fragment_section_headers = ( 

53 'Guess MOs from converged MOs on fragments', 

54 'CP correction for fragment', 

55 ) 

56 self.supersystem_section_headers = ( 

57 'Done with SCF on isolated fragments', 

58 'Done with counterpoise correction on fragments', 

59 ) 

60 

61 # Compile the dashes-and-or-spaces-only regex. 

62 self.re_dashes_and_spaces = re.compile(r'^[\s-]+$') 

63 

64 # Compile the regex for extracting the atomic index from an 

65 # aoname. 

66 self.re_atomindex = re.compile(r'(\d+)_') 

67 

68 # QChem changed the number of spaces from version 5.1 to 5.2 

69 # D( 35) --> V( 3) amplitude = 0.0644 

70 # S( 1) --> V( 1) amplitude = -0.1628 alpha 

71 # D(189) --> S( 1) amplitude = -0.0120 beta 

72 self.re_tddft = re.compile(r'[SD]\( *(\d+)\) --> [VS]\( *(\d+)\) amplitude = *([^ ]*)( (alpha|beta))?') 

73 

74 # A maximum of 6 columns per block when printing matrices. The 

75 # Fock matrix is 4. 

76 self.ncolsblock = 6 

77 

78 # By default, when asked to print orbitals via 

79 # `scf_print`/`scf_final_print` and/or `print_orbitals`, 

80 # Q-Chem will print all occupieds and the first 5 virtuals. 

81 # 

82 # When the number is set for `print_orbitals`, that section of 

83 # the output will display (NOcc + that many virtual) MOs, but 

84 # any other sections present due to 

85 # `scf_print`/`scf_final_print` will still only display (NOcc 

86 # + 5) MOs. It is the `print_orbitals` section that `aonames` 

87 # is parsed from. 

88 # 

89 # Note that the (AO basis) density matrix is always (NBasis * 

90 # NBasis)! 

91 self.norbdisp_alpha = self.norbdisp_beta = 5 

92 self.norbdisp_alpha_aonames = self.norbdisp_beta_aonames = 5 

93 self.norbdisp_set = False 

94 

95 self.alpha_mo_coefficient_headers = ( 

96 'RESTRICTED (RHF) MOLECULAR ORBITAL COEFFICIENTS', 

97 'ALPHA MOLECULAR ORBITAL COEFFICIENTS' 

98 ) 

99 

100 self.gradient_headers = ( 

101 'Full Analytical Gradient', 

102 'Gradient of SCF Energy', 

103 'Gradient of MP2 Energy', 

104 ) 

105 

106 self.hessian_headers = ( 

107 'Hessian of the SCF Energy', 

108 'Final Hessian.', 

109 ) 

110 

111 self.wfn_method = [ 

112 'HF', 

113 'MP2', 'RI-MP2', 'LOCAL_MP2', 'MP4', 

114 'CCD', 'CCSD', 'CCSD(T)', 

115 'QCISD', 'QCISD(T)' 

116 ] 

117 

118 def after_parsing(self): 

119 

120 # If parsing a fragment job, each of the geometries appended to 

121 # `atomcoords` may be of different lengths, which will prevent 

122 # conversion from a list to NumPy array. 

123 # Take the length of the first geometry as correct, and remove 

124 # all others with different lengths. 

125 if len(self.atomcoords) > 1: 

126 correctlen = len(self.atomcoords[0]) 

127 self.atomcoords[:] = [coords for coords in self.atomcoords 

128 if len(coords) == correctlen] 

129 # At the moment, there is no similar correction for other properties! 

130 

131 # QChem does not print all MO coefficients by default, but rather 

132 # up to HOMO+5. So, fill up the missing values with NaNs. If there are 

133 # other cases where coefficient are missing, but different ones, this 

134 # general afterthought might not be appropriate and the fix will 

135 # need to be done while parsing. 

136 if hasattr(self, 'mocoeffs'): 

137 for im in range(len(self.mocoeffs)): 

138 _nmo, _nbasis = self.mocoeffs[im].shape 

139 if (_nmo, _nbasis) != (self.nmo, self.nbasis): 

140 coeffs = numpy.empty((self.nmo, self.nbasis)) 

141 coeffs[:] = numpy.nan 

142 coeffs[0:_nmo, 0:_nbasis] = self.mocoeffs[im] 

143 self.mocoeffs[im] = coeffs 

144 

145 # When parsing the 'MOLECULAR ORBITAL COEFFICIENTS' block for 

146 # `aonames`, Q-Chem doesn't print the principal quantum number 

147 # for each shell; this needs to be added. 

148 if hasattr(self, 'aonames') and hasattr(self, 'atombasis'): 

149 angmom = ('', 'S', 'P', 'D', 'F', 'G', 'H', 'I') 

150 for atom in self.atombasis: 

151 bfcounts = dict() 

152 for bfindex in atom: 

153 atomname, bfname = self.aonames[bfindex].split('_') 

154 # Keep track of how many times each shell type has 

155 # appeared. 

156 if bfname in bfcounts: 

157 bfcounts[bfname] += 1 

158 else: 

159 # Make sure the starting number for type of 

160 # angular momentum begins at the appropriate 

161 # principal quantum number (1S, 2P, 3D, 4F, 

162 # ...). 

163 bfcounts[bfname] = angmom.index(bfname[0]) 

164 newbfname = '{}{}'.format(bfcounts[bfname], bfname) 

165 self.aonames[bfindex] = '_'.join([atomname, newbfname]) 

166 

167 # Assign the number of core electrons replaced by ECPs. 

168 if hasattr(self, 'user_input') and self.user_input.get('rem') is not None: 

169 if self.user_input['rem'].get('ecp') is not None: 

170 ecp_is_gen = (self.user_input['rem']['ecp'] == 'gen') 

171 if ecp_is_gen: 

172 assert 'ecp' in self.user_input 

173 has_iprint = hasattr(self, 'possible_ecps') 

174 

175 if not ecp_is_gen and not has_iprint: 

176 msg = """ECPs are present, but the number of core \ 

177electrons isn't printed at all. Rerun with "iprint >= 100" to get \ 

178coreelectrons.""" 

179 self.logger.warning(msg) 

180 self.incorrect_coreelectrons = True 

181 elif ecp_is_gen and not has_iprint: 

182 nmissing = sum(ncore == 0 

183 for (_, _, ncore) in self.user_input['ecp']) 

184 if nmissing > 1: 

185 msg = """ECPs are present, but coreelectrons can only \ 

186be guessed for one element at most. Rerun with "iprint >= 100" to get \ 

187coreelectrons.""" 

188 self.logger.warning(msg) 

189 self.incorrect_coreelectrons = True 

190 elif self.user_input['molecule'].get('charge') is None: 

191 msg = """ECPs are present, but the total charge \ 

192cannot be determined. Rerun without `$molecule read`.""" 

193 self.logger.warning(msg) 

194 self.incorrect_coreelectrons = True 

195 else: 

196 user_charge = self.user_input['molecule']['charge'] 

197 # First, assign the entries given 

198 # explicitly. 

199 for entry in self.user_input['ecp']: 

200 element, _, ncore = entry 

201 if ncore > 0: 

202 self._assign_coreelectrons_to_element(element, ncore) 

203 # Because of how the charge is calculated 

204 # during extract(), this is the number of 

205 # remaining core electrons that need to be 

206 # assigned ECP centers. Filter out the 

207 # remaining entries, of which there should 

208 # only be one. 

209 core_sum = self.coreelectrons.sum() if hasattr(self, 'coreelectrons') else 0 

210 remainder = self.charge - user_charge - core_sum 

211 entries = [entry 

212 for entry in self.user_input['ecp'] 

213 if entry[2] == 0] 

214 if len(entries) != 0: 

215 assert len(entries) == 1 

216 element, _, ncore = entries[0] 

217 assert ncore == 0 

218 self._assign_coreelectrons_to_element( 

219 element, remainder, ncore_is_total_count=True) 

220 elif not ecp_is_gen and has_iprint: 

221 atomsymbols = [self.table.element[atomno] for atomno in self.atomnos] 

222 for i in range(self.natom): 

223 if atomsymbols[i] in self.possible_ecps: 

224 self.coreelectrons[i] = self.possible_ecps[atomsymbols[i]] 

225 else: 

226 assert ecp_is_gen and has_iprint 

227 for entry in self.user_input['ecp']: 

228 element, _, ncore = entry 

229 # If ncore is non-zero, then it must be 

230 # user-defined, and we take that 

231 # value. Otherwise, look it up. 

232 if ncore == 0: 

233 ncore = self.possible_ecps[element] 

234 self._assign_coreelectrons_to_element(element, ncore) 

235 

236 # Check to see if the charge is consistent with the input 

237 # section. It may not be if using an ECP. 

238 if hasattr(self, 'user_input'): 

239 if self.user_input.get('molecule') is not None: 

240 user_charge = self.user_input['molecule'].get('charge') 

241 if user_charge is not None: 

242 self.set_attribute('charge', user_charge) 

243 

244 def parse_charge_section(self, inputfile, chargetype): 

245 """Parse the population analysis charge block.""" 

246 self.skip_line(inputfile, 'blank') 

247 line = next(inputfile) 

248 has_spins = False 

249 if 'Spin' in line: 

250 if not hasattr(self, 'atomspins'): 

251 self.atomspins = dict() 

252 has_spins = True 

253 spins = [] 

254 self.skip_line(inputfile, 'dashes') 

255 if not hasattr(self, 'atomcharges'): 

256 self.atomcharges = dict() 

257 charges = [] 

258 line = next(inputfile) 

259 

260 while list(set(line.strip())) != ['-']: 

261 elements = line.split() 

262 charge = utils.float(elements[2]) 

263 charges.append(charge) 

264 if has_spins: 

265 spin = utils.float(elements[3]) 

266 spins.append(spin) 

267 line = next(inputfile) 

268 

269 self.atomcharges[chargetype] = numpy.array(charges) 

270 if has_spins: 

271 self.atomspins[chargetype] = numpy.array(spins) 

272 

273 @staticmethod 

274 def parse_matrix(inputfile, nrows, ncols, ncolsblock): 

275 """Q-Chem prints most matrices in a standard format; parse the matrix 

276 into a NumPy array of the appropriate shape. 

277 """ 

278 nparray = numpy.empty(shape=(nrows, ncols)) 

279 line = next(inputfile) 

280 assert len(line.split()) == min(ncolsblock, ncols) 

281 colcounter = 0 

282 while colcounter < ncols: 

283 # If the line is just the column header (indices)... 

284 if line[:5].strip() == '': 

285 line = next(inputfile) 

286 rowcounter = 0 

287 while rowcounter < nrows: 

288 row = list(map(float, line.split()[1:])) 

289 assert len(row) == min(ncolsblock, (ncols - colcounter)) 

290 nparray[rowcounter][colcounter:colcounter + ncolsblock] = row 

291 line = next(inputfile) 

292 rowcounter += 1 

293 colcounter += ncolsblock 

294 return nparray 

295 

296 def parse_matrix_aonames(self, inputfile, nrows, ncols): 

297 """Q-Chem prints most matrices in a standard format; parse the matrix 

298 into a preallocated NumPy array of the appropriate shape. 

299 

300 Rather than have one routine for parsing all general matrices 

301 and the 'MOLECULAR ORBITAL COEFFICIENTS' block, use a second 

302 which handles `aonames`. 

303 """ 

304 bigmom = ('d', 'f', 'g', 'h') 

305 nparray = numpy.empty(shape=(nrows, ncols)) 

306 line = next(inputfile) 

307 assert len(line.split()) == min(self.ncolsblock, ncols) 

308 colcounter = 0 

309 split_fixed = utils.WidthSplitter((4, 4, 4, 6, 10, 10, 10, 10, 10, 10)) 

310 while colcounter < ncols: 

311 # If the line is just the column header (indices)... 

312 if line[:5].strip() == '': 

313 line = next(inputfile) 

314 # Do nothing for now. 

315 if 'eigenvalues' in line: 

316 line = next(inputfile) 

317 rowcounter = 0 

318 while rowcounter < nrows: 

319 row = split_fixed.split(line) 

320 # Only take the AO names on the first time through. 

321 if colcounter == 0: 

322 if len(self.aonames) != self.nbasis: 

323 # Apply the offset for rows where there is 

324 # more than one atom of any element in the 

325 # molecule. 

326 offset = 1 

327 if row[2] != '': 

328 name = self.atommap.get(row[1] + str(row[2])) 

329 else: 

330 name = self.atommap.get(row[1] + '1') 

331 # For l > 1, there is a space between l and 

332 # m_l when using spherical functions. 

333 shell = row[2 + offset] 

334 if shell in bigmom: 

335 shell = ''.join([shell, row[3 + offset]]) 

336 aoname = ''.join([name, '_', shell.upper()]) 

337 self.aonames.append(aoname) 

338 row = list(map(float, row[-min(self.ncolsblock, (ncols - colcounter)):])) 

339 nparray[rowcounter][colcounter:colcounter + self.ncolsblock] = row 

340 line = next(inputfile) 

341 rowcounter += 1 

342 colcounter += self.ncolsblock 

343 return nparray 

344 

345 def parse_orbital_energies_and_symmetries(self, inputfile): 

346 """Parse the 'Orbital Energies (a.u.)' block appearing after SCF converges, 

347 which optionally includes MO symmetries. Based upon the 

348 Occupied/Virtual labeling, the HOMO is also parsed. 

349 """ 

350 energies = [] 

351 symbols = [] 

352 

353 line = next(inputfile) 

354 # Sometimes Q-Chem gets a little confused... 

355 while "MOs" not in line: 

356 line = next(inputfile) 

357 line = next(inputfile) 

358 

359 # The end of the block is either a blank line or only dashes. 

360 while not self.re_dashes_and_spaces.search(line) \ 

361 and not 'Warning : Irrep of orbital' in line: 

362 if 'Occupied' in line or 'Virtual' in line: 

363 # A nice trick to find where the HOMO is. 

364 if 'Virtual' in line: 

365 homo = len(energies) - 1 

366 line = next(inputfile) 

367 tokens = line.split() 

368 # If the line contains letters, it must be the MO 

369 # symmetries. Otherwise, it's the energies. 

370 if re.search("[a-zA-Z]", line): 

371 symbols.extend(tokens[1::2]) 

372 else: 

373 for e in tokens: 

374 try: 

375 energy = utils.convertor(utils.float(e), 'hartree', 'eV') 

376 except ValueError: 

377 energy = numpy.nan 

378 energies.append(energy) 

379 line = next(inputfile) 

380 

381 # MO symmetries are either not present or there is one for each MO 

382 # (energy). 

383 assert len(symbols) in (0, len(energies)) 

384 

385 return energies, symbols, homo 

386 

387 

388 def generate_atom_map(self): 

389 """Generate the map to go from Q-Chem atom numbering: 

390 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'H1', 'H2', 'H3', 'H4', 'C7', ... 

391 to cclib atom numbering: 

392 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'H7', 'H8', 'H9', 'H10', 'C11', ... 

393 for later use. 

394 """ 

395 

396 # Generate the desired order. 

397 order_proper = [element + str(num) 

398 for element, num in zip(self.atomelements, 

399 itertools.count(start=1))] 

400 # We need separate counters for each element. 

401 element_counters = {element: itertools.count(start=1) 

402 for element in set(self.atomelements)} 

403 # Generate the Q-Chem printed order. 

404 order_qchem = [element + str(next(element_counters[element])) 

405 for element in self.atomelements] 

406 # Combine the orders into a mapping. 

407 atommap = {k: v for k, v, in zip(order_qchem, order_proper)} 

408 return atommap 

409 

410 def generate_formula_histogram(self): 

411 """From the atomnos, generate a histogram that represents the 

412 molecular formula. 

413 """ 

414 

415 histogram = dict() 

416 for element in self.atomelements: 

417 if element in histogram.keys(): 

418 histogram[element] += 1 

419 else: 

420 histogram[element] = 1 

421 return histogram 

422 

423 def extract(self, inputfile, line): 

424 """Extract information from the file object inputfile.""" 

425 

426 # Extract the version number and optionally the version 

427 # control info. 

428 if any(version_trigger in line for version_trigger in ("Q-Chem", "Unrecognized platform", "Version")): 

429 # Part 1 matches 

430 # - `Q-Chem 4.3.0 for Intel X86 EM64T Linux` 

431 # Part 2 matches 

432 # - `Unrecognized platform!!! 4.0.0.1` 

433 # Part 3 matches 

434 # - `Intel X86 EM64T Linux Version 4.1.0.1 ` 

435 # but not 

436 # - `Additional authors for Version 3.1:` 

437 # - `Q-Chem, Version 4.1, Q-Chem, Inc., Pittsburgh, PA (2013).` 

438 match = re.search( 

439 r"Q-Chem\s([\d\.]*)\sfor|" 

440 r"Unrecognized platform!!!\s([\d\.]*)\b|" 

441 r"Version\s([\d\.]*)\s*$", 

442 line 

443 ) 

444 if match: 

445 groups = [s for s in match.groups() if s is not None] 

446 assert len(groups) == 1 

447 package_version = groups[0] 

448 self.metadata["package_version"] = package_version 

449 self.metadata["legacy_package_version"] = package_version 

450 # Avoid "Last SVN revision" entry. 

451 if "SVN revision" in line and "Last" not in line: 

452 svn_revision = line.split()[3] 

453 line = next(inputfile) 

454 svn_branch = line.split()[3].replace("/", "_") 

455 if "package_version" in self.metadata: 

456 self.metadata["package_version"] = "{}dev+{}-{}".format( 

457 self.metadata["package_version"], svn_branch, svn_revision 

458 ) 

459 

460 # Disable/enable parsing for fragment sections. 

461 if any(message in line for message in self.fragment_section_headers): 

462 self.is_fragment_section = True 

463 if any(message in line for message in self.supersystem_section_headers): 

464 self.is_fragment_section = False 

465 

466 if not self.is_fragment_section: 

467 

468 # If the input section is repeated back, parse the $rem and 

469 # $molecule sections. 

470 if line[0:11] == 'User input:': 

471 self.user_input = dict() 

472 self.skip_line(inputfile, 'd') 

473 while list(set(line.strip())) != ['-']: 

474 

475 if line.strip().lower() == '$rem': 

476 

477 self.user_input['rem'] = dict() 

478 

479 while line.strip().lower() != '$end': 

480 

481 line = next(inputfile).lower() 

482 if line.strip() == '$end': 

483 break 

484 # Apparently calculations can run without 

485 # a matching $end...this terminates the 

486 # user input section no matter what. 

487 if line.strip() == ('-' * 62): 

488 break 

489 

490 tokens = line.split() 

491 # Allow blank lines. 

492 if len(tokens) == 0: 

493 continue 

494 # Entries may be separated by an equals 

495 # sign, and can have comments, for example: 

496 # ecp gen 

497 # ecp = gen 

498 # ecp gen ! only on first chlorine 

499 # ecp = gen only on first chlorine 

500 assert len(tokens) >= 2 

501 keyword = tokens[0] 

502 if tokens[1] == '=': 

503 option = tokens[2] 

504 else: 

505 option = tokens[1] 

506 self.user_input['rem'][keyword] = option 

507 

508 if keyword == 'method': 

509 method = option.upper() 

510 if method in self.wfn_method: 

511 self.metadata["methods"].append(method) 

512 else: 

513 self.metadata["methods"].append('DFT') 

514 self.metadata["functional"] = method 

515 

516 if keyword == 'exchange': 

517 self.metadata["methods"].append('DFT') 

518 self.metadata["functional"] = option 

519 

520 if keyword == 'print_orbitals': 

521 # Stay with the default value if a number isn't 

522 # specified. 

523 if option in ('true', 'false'): 

524 continue 

525 else: 

526 norbdisp_aonames = int(option) 

527 self.norbdisp_alpha_aonames = norbdisp_aonames 

528 self.norbdisp_beta_aonames = norbdisp_aonames 

529 self.norbdisp_set = True 

530 

531 if line.strip().lower() == '$ecp': 

532 

533 self.user_input['ecp'] = [] 

534 line = next(inputfile) 

535 

536 while line.strip().lower() != '$end': 

537 

538 while list(set(line.strip())) != ['*']: 

539 

540 # Parse the element for this ECP 

541 # entry. If only the element is on 

542 # this line, or the 2nd token is 0, it 

543 # applies to all atoms; if it's > 0, 

544 # then it indexes (1-based) that 

545 # specific atom in the whole molecule. 

546 tokens = line.split() 

547 assert len(tokens) > 0 

548 element = tokens[0][0].upper() + tokens[0][1:].lower() 

549 assert element in self.table.element 

550 if len(tokens) > 1: 

551 assert len(tokens) == 2 

552 index = int(tokens[1]) - 1 

553 else: 

554 index = -1 

555 line = next(inputfile) 

556 

557 # Next comes the ECP definition. If 

558 # the line contains only a single 

559 # item, it's a built-in ECP, otherwise 

560 # it's a full definition. 

561 tokens = line.split() 

562 if len(tokens) == 1: 

563 ncore = 0 

564 line = next(inputfile) 

565 else: 

566 assert len(tokens) == 3 

567 ncore = int(tokens[2]) 

568 # Don't parse the remainder of the 

569 # ECP definition. 

570 while list(set(line.strip())) != ['*']: 

571 line = next(inputfile) 

572 

573 entry = (element, index, ncore) 

574 self.user_input['ecp'].append(entry) 

575 

576 line = next(inputfile) 

577 

578 if line.strip().lower() == '$end': 

579 break 

580 

581 if line.strip().lower() == '$molecule': 

582 

583 self.user_input['molecule'] = dict() 

584 line = next(inputfile) 

585 

586 # Don't read the molecule, only the 

587 # supersystem charge and multiplicity. 

588 if line.split()[0].lower() == 'read': 

589 pass 

590 else: 

591 charge, mult = [int(x) for x in line.split()] 

592 self.user_input['molecule']['charge'] = charge 

593 self.user_input['molecule']['mult'] = mult 

594 

595 line = next(inputfile).lower() 

596 

597 # Parse the basis set name 

598 if 'Requested basis set' in line: 

599 self.metadata["basis_set"] = line.split()[-1] 

600 

601 # Parse the general basis for `gbasis`, in the style used by 

602 # Gaussian. 

603 if 'Basis set in general basis input format:' in line: 

604 self.skip_lines(inputfile, ['d', '$basis']) 

605 line = next(inputfile) 

606 if not hasattr(self, 'gbasis'): 

607 self.gbasis = [] 

608 # The end of the general basis block. 

609 while '$end' not in line: 

610 atom = [] 

611 # 1. Contains element symbol and atomic index of 

612 # basis functions; if 0, applies to all atoms of 

613 # same element. 

614 assert len(line.split()) == 2 

615 line = next(inputfile) 

616 # The end of each atomic block. 

617 while '****' not in line: 

618 # 2. Contains the type of basis function {S, SP, 

619 # P, D, F, G, H, ...}, the number of primitives, 

620 # and the weight of the final contracted function. 

621 bfsplitline = line.split() 

622 assert len(bfsplitline) == 3 

623 bftype = bfsplitline[0] 

624 nprim = int(bfsplitline[1]) 

625 line = next(inputfile) 

626 # 3. The primitive basis functions that compose 

627 # the contracted basis function; there are `nprim` 

628 # of them. The first value is the exponent, and 

629 # the second value is the contraction 

630 # coefficient. If `bftype == 'SP'`, the primitives 

631 # are for both S- and P-type basis functions but 

632 # with separate contraction coefficients, 

633 # resulting in three columns. 

634 if bftype == 'SP': 

635 primitives_S = [] 

636 primitives_P = [] 

637 else: 

638 primitives = [] 

639 # For each primitive in the contracted basis 

640 # function... 

641 for iprim in range(nprim): 

642 primsplitline = line.split() 

643 exponent = float(primsplitline[0]) 

644 if bftype == 'SP': 

645 assert len(primsplitline) == 3 

646 coefficient_S = float(primsplitline[1]) 

647 coefficient_P = float(primsplitline[2]) 

648 primitives_S.append((exponent, coefficient_S)) 

649 primitives_P.append((exponent, coefficient_P)) 

650 else: 

651 assert len(primsplitline) == 2 

652 coefficient = float(primsplitline[1]) 

653 primitives.append((exponent, coefficient)) 

654 line = next(inputfile) 

655 if bftype == 'SP': 

656 bf_S = ('S', primitives_S) 

657 bf_P = ('P', primitives_P) 

658 atom.append(bf_S) 

659 atom.append(bf_P) 

660 else: 

661 bf = (bftype, primitives) 

662 atom.append(bf) 

663 # Move to the next contracted basis function 

664 # as long as we don't hit the '****' atom 

665 # delimiter. 

666 self.gbasis.append(atom) 

667 line = next(inputfile) 

668 

669 if line.strip() == 'The following effective core potentials will be applied': 

670 

671 # Keep track of all elements that may have an ECP on 

672 # them. *Which* centers have an ECP can't be 

673 # determined here, so just take the number of valence 

674 # electrons, then later later figure out the centers 

675 # and do core = Z - valence. 

676 self.possible_ecps = dict() 

677 # This will fail if an element has more than one kind 

678 # of ECP. 

679 

680 split_fixed = utils.WidthSplitter((4, 13, 20, 2, 14, 14)) 

681 

682 self.skip_lines(inputfile, ['d', 'header', 'header', 'd']) 

683 line = next(inputfile) 

684 while list(set(line.strip())) != ['-']: 

685 tokens = split_fixed.split(line) 

686 if tokens[0] != '': 

687 element = tokens[0] 

688 valence = int(tokens[1]) 

689 ncore = self.table.number[element] - valence 

690 self.possible_ecps[element] = ncore 

691 line = next(inputfile) 

692 

693 if 'TIME STEP #' in line: 

694 tokens = line.split() 

695 self.append_attribute('time', float(tokens[8])) 

696 

697 if line.strip() == "Adding empirical dispersion correction": 

698 while "energy" not in line: 

699 line = next(inputfile) 

700 self.append_attribute( 

701 "dispersionenergies", 

702 utils.convertor(utils.float(line.split()[-2]), "hartree", "eV") 

703 ) 

704 

705 # Extract the atomic numbers and coordinates of the atoms. 

706 if 'Standard Nuclear Orientation' in line: 

707 if "Angstroms" in line: 

708 convertor = lambda x: x 

709 elif 'Bohr' in line: 

710 convertor = lambda x: utils.convertor(x, 'bohr', 'Angstrom') 

711 else: 

712 raise ValueError("Unknown units in coordinate header: {}".format(line)) 

713 self.skip_lines(inputfile, ['cols', 'dashes']) 

714 atomelements = [] 

715 atomcoords = [] 

716 line = next(inputfile) 

717 while list(set(line.strip())) != ['-']: 

718 entry = line.split() 

719 atomelements.append(entry[1]) 

720 atomcoords.append([convertor(float(value)) for value in entry[2:]]) 

721 line = next(inputfile) 

722 

723 self.append_attribute('atomcoords', atomcoords) 

724 

725 # We calculate and handle atomnos no matter what, since in 

726 # the case of fragment calculations the atoms may change, 

727 # along with the charge and spin multiplicity. 

728 self.atomnos = [] 

729 self.atomelements = [] 

730 for atomelement in atomelements: 

731 self.atomelements.append(atomelement) 

732 if atomelement == 'GH': 

733 self.atomnos.append(0) 

734 else: 

735 self.atomnos.append(self.table.number[atomelement]) 

736 self.natom = len(self.atomnos) 

737 self.atommap = self.generate_atom_map() 

738 self.formula_histogram = self.generate_formula_histogram() 

739 

740 # Number of electrons. 

741 # Useful for determining the number of occupied/virtual orbitals. 

742 if 'Nuclear Repulsion Energy' in line: 

743 line = next(inputfile) 

744 nelec_re_string = r'There are(\s+[0-9]+) alpha and(\s+[0-9]+) beta electrons' 

745 match = re.findall(nelec_re_string, line.strip()) 

746 self.set_attribute('nalpha', int(match[0][0].strip())) 

747 self.set_attribute('nbeta', int(match[0][1].strip())) 

748 self.norbdisp_alpha += self.nalpha 

749 self.norbdisp_alpha_aonames += self.nalpha 

750 self.norbdisp_beta += self.nbeta 

751 self.norbdisp_beta_aonames += self.nbeta 

752 # Calculate the spin multiplicity (2S + 1), where S is the 

753 # total spin of the system. 

754 S = (self.nalpha - self.nbeta) / 2 

755 mult = int(2 * S + 1) 

756 self.set_attribute('mult', mult) 

757 # Calculate the molecular charge as the difference between 

758 # the atomic numbers and the number of electrons. 

759 if hasattr(self, 'atomnos'): 

760 charge = sum(self.atomnos) - (self.nalpha + self.nbeta) 

761 self.set_attribute('charge', charge) 

762 

763 # Number of basis functions. 

764 if 'basis functions' in line: 

765 if not hasattr(self, 'nbasis'): 

766 self.set_attribute('nbasis', int(line.split()[-3])) 

767 # In the case that there are fewer basis functions 

768 # (and therefore MOs) than default number of MOs 

769 # displayed, reset the display values. 

770 self.norbdisp_alpha = min(self.norbdisp_alpha, self.nbasis) 

771 self.norbdisp_alpha_aonames = min(self.norbdisp_alpha_aonames, self.nbasis) 

772 self.norbdisp_beta = min(self.norbdisp_beta, self.nbasis) 

773 self.norbdisp_beta_aonames = min(self.norbdisp_beta_aonames, self.nbasis) 

774 

775 # Check for whether or not we're peforming an 

776 # (un)restricted calculation. 

777 if 'calculation will be' in line: 

778 if ' restricted' in line: 

779 self.unrestricted = False 

780 if 'unrestricted' in line: 

781 self.unrestricted = True 

782 if hasattr(self, 'nalpha') and hasattr(self, 'nbeta'): 

783 if self.nalpha != self.nbeta: 

784 self.unrestricted = True 

785 self.is_rohf = True 

786 

787 # Section with SCF iterations goes like this: 

788 # 

789 # SCF converges when DIIS error is below 1.0E-05 

790 # --------------------------------------- 

791 # Cycle Energy DIIS Error 

792 # --------------------------------------- 

793 # 1 -381.9238072190 1.39E-01 

794 # 2 -382.2937212775 3.10E-03 

795 # 3 -382.2939780242 3.37E-03 

796 # ... 

797 # 

798 scf_success_messages = ( 

799 'Convergence criterion met', 

800 'corrected energy' 

801 ) 

802 scf_failure_messages = ( 

803 'SCF failed to converge', 

804 'Convergence failure' 

805 ) 

806 if 'SCF converges when ' in line: 

807 if not hasattr(self, 'scftargets'): 

808 self.scftargets = [] 

809 target = float(line.split()[-1]) 

810 self.scftargets.append([target]) 

811 

812 # We should have the header between dashes now, 

813 # but sometimes there are lines before the first dashes. 

814 while not 'Cycle Energy' in line: 

815 line = next(inputfile) 

816 self.skip_line(inputfile, 'd') 

817 

818 values = [] 

819 iter_counter = 1 

820 line = next(inputfile) 

821 while not any(message in line for message in scf_success_messages): 

822 

823 # Some trickery to avoid a lot of printing that can occur 

824 # between each SCF iteration. 

825 entry = line.split() 

826 if len(entry) > 0: 

827 if entry[0] == str(iter_counter): 

828 # Q-Chem only outputs one error metric. 

829 error = float(entry[2]) 

830 values.append([error]) 

831 iter_counter += 1 

832 

833 try: 

834 line = next(inputfile) 

835 # Is this the end of the file for some reason? 

836 except StopIteration: 

837 self.logger.warning('File terminated before end of last SCF! Last error: {}'.format(error)) 

838 break 

839 

840 # We've converged, but still need the last iteration. 

841 if any(message in line for message in scf_success_messages): 

842 entry = line.split() 

843 error = float(entry[2]) 

844 values.append([error]) 

845 iter_counter += 1 

846 

847 # This is printed in regression QChem4.2/dvb_sp_unconverged.out 

848 # so use it to bail out when convergence fails. 

849 if any(message in line for message in scf_failure_messages): 

850 break 

851 

852 if not hasattr(self, 'scfvalues'): 

853 self.scfvalues = [] 

854 self.scfvalues.append(numpy.array(values)) 

855 

856 # Molecular orbital coefficients. 

857 

858 # Try parsing them from this block (which comes from 

859 # `scf_final_print = 2``) rather than the combined 

860 # aonames/mocoeffs/moenergies block (which comes from 

861 # `print_orbitals = true`). 

862 if 'Final Alpha MO Coefficients' in line: 

863 if not hasattr(self, 'mocoeffs'): 

864 self.mocoeffs = [] 

865 mocoeffs = QChem.parse_matrix(inputfile, self.nbasis, self.norbdisp_alpha, self.ncolsblock) 

866 self.mocoeffs.append(mocoeffs.transpose()) 

867 

868 if 'Final Beta MO Coefficients' in line: 

869 mocoeffs = QChem.parse_matrix(inputfile, self.nbasis, self.norbdisp_beta, self.ncolsblock) 

870 self.mocoeffs.append(mocoeffs.transpose()) 

871 

872 if 'Total energy in the final basis set' in line: 

873 if not hasattr(self, 'scfenergies'): 

874 self.scfenergies = [] 

875 scfenergy = float(line.split()[-1]) 

876 self.scfenergies.append(utils.convertor(scfenergy, 'hartree', 'eV')) 

877 

878 # Geometry optimization. 

879 

880 if 'Maximum Tolerance Cnvgd?' in line: 

881 line_g = next(inputfile).split()[1:3] 

882 line_d = next(inputfile).split()[1:3] 

883 line_e = next(inputfile).split()[2:4] 

884 

885 if not hasattr(self, 'geotargets'): 

886 self.geotargets = [line_g[1], line_d[1], utils.float(line_e[1])] 

887 if not hasattr(self, 'geovalues'): 

888 self.geovalues = [] 

889 maxg = utils.float(line_g[0]) 

890 maxd = utils.float(line_d[0]) 

891 ediff = utils.float(line_e[0]) 

892 geovalues = [maxg, maxd, ediff] 

893 self.geovalues.append(geovalues) 

894 

895 if '** OPTIMIZATION CONVERGED **' in line: 

896 if not hasattr(self, 'optdone'): 

897 self.optdone = [] 

898 self.optdone.append(len(self.atomcoords)) 

899 

900 if '** MAXIMUM OPTIMIZATION CYCLES REACHED **' in line: 

901 if not hasattr(self, 'optdone'): 

902 self.optdone = [] 

903 

904 # Moller-Plesset corrections. 

905 

906 # There are multiple modules in Q-Chem for calculating MPn energies: 

907 # cdman, ccman, and ccman2, all with different output. 

908 # 

909 # MP2, RI-MP2, and local MP2 all default to cdman, which has a simple 

910 # block of output after the regular SCF iterations. 

911 # 

912 # MP3 is handled by ccman2. 

913 # 

914 # MP4 and variants are handled by ccman. 

915 

916 # This is the MP2/cdman case. 

917 if 'MP2 total energy' in line: 

918 if not hasattr(self, 'mpenergies'): 

919 self.mpenergies = [] 

920 mp2energy = float(line.split()[4]) 

921 mp2energy = utils.convertor(mp2energy, 'hartree', 'eV') 

922 self.mpenergies.append([mp2energy]) 

923 

924 # This is the MP3/ccman2 case. 

925 if line[1:11] == 'MP2 energy' and line[12:19] != 'read as': 

926 if not hasattr(self, 'mpenergies'): 

927 self.mpenergies = [] 

928 mpenergies = [] 

929 mp2energy = float(line.split()[3]) 

930 mpenergies.append(mp2energy) 

931 line = next(inputfile) 

932 line = next(inputfile) 

933 # Just a safe check. 

934 if 'MP3 energy' in line: 

935 mp3energy = float(line.split()[3]) 

936 mpenergies.append(mp3energy) 

937 mpenergies = [utils.convertor(mpe, 'hartree', 'eV') 

938 for mpe in mpenergies] 

939 self.mpenergies.append(mpenergies) 

940 

941 # This is the MP4/ccman case. 

942 if 'EHF' in line: 

943 if not hasattr(self, 'mpenergies'): 

944 self.mpenergies = [] 

945 mpenergies = [] 

946 

947 while list(set(line.strip())) != ['-']: 

948 

949 if 'EMP2' in line: 

950 mp2energy = float(line.split()[2]) 

951 mpenergies.append(mp2energy) 

952 if 'EMP3' in line: 

953 mp3energy = float(line.split()[2]) 

954 mpenergies.append(mp3energy) 

955 if 'EMP4SDQ' in line: 

956 mp4sdqenergy = float(line.split()[2]) 

957 mpenergies.append(mp4sdqenergy) 

958 # This is really MP4SD(T)Q. 

959 if 'EMP4 ' in line: 

960 mp4sdtqenergy = float(line.split()[2]) 

961 mpenergies.append(mp4sdtqenergy) 

962 

963 line = next(inputfile) 

964 

965 mpenergies = [utils.convertor(mpe, 'hartree', 'eV') 

966 for mpe in mpenergies] 

967 self.mpenergies.append(mpenergies) 

968 

969 # Coupled cluster corrections. 

970 # Hopefully we only have to deal with ccman2 here. 

971 

972 if 'CCD total energy' in line: 

973 if not hasattr(self, 'ccenergies'): 

974 self.ccenergies = [] 

975 ccdenergy = float(line.split()[-1]) 

976 ccdenergy = utils.convertor(ccdenergy, 'hartree', 'eV') 

977 self.ccenergies.append(ccdenergy) 

978 if 'CCSD total energy' in line: 

979 has_triples = False 

980 if not hasattr(self, 'ccenergies'): 

981 self.ccenergies = [] 

982 ccsdenergy = float(line.split()[-1]) 

983 # Make sure we aren't actually doing CCSD(T). 

984 line = next(inputfile) 

985 line = next(inputfile) 

986 if 'CCSD(T) total energy' in line: 

987 has_triples = True 

988 ccsdtenergy = float(line.split()[-1]) 

989 ccsdtenergy = utils.convertor(ccsdtenergy, 'hartree', 'eV') 

990 self.ccenergies.append(ccsdtenergy) 

991 if not has_triples: 

992 ccsdenergy = utils.convertor(ccsdenergy, 'hartree', 'eV') 

993 self.ccenergies.append(ccsdenergy) 

994 

995 if line[:11] == " CCSD T1^2": 

996 t1_squared = float(line.split()[3]) 

997 t1_norm = math.sqrt(t1_squared) 

998 self.metadata["t1_diagnostic"] = t1_norm / math.sqrt(2 * (self.nalpha + self.nbeta)) 

999 

1000 # Electronic transitions. Works for both CIS and TDDFT. 

1001 if 'Excitation Energies' in line: 

1002 

1003 # Restricted: 

1004 # --------------------------------------------------- 

1005 # TDDFT/TDA Excitation Energies 

1006 # --------------------------------------------------- 

1007 # 

1008 # Excited state 1: excitation energy (eV) = 3.6052 

1009 # Total energy for state 1: -382.167872200685 

1010 # Multiplicity: Triplet 

1011 # Trans. Mom.: 0.0000 X 0.0000 Y 0.0000 Z 

1012 # Strength : 0.0000 

1013 # D( 33) --> V( 3) amplitude = 0.2618 

1014 # D( 34) --> V( 2) amplitude = 0.2125 

1015 # D( 35) --> V( 1) amplitude = 0.9266 

1016 # 

1017 # Unrestricted: 

1018 # Excited state 2: excitation energy (eV) = 2.3156 

1019 # Total energy for state 2: -381.980177630969 

1020 # <S**2> : 0.7674 

1021 # Trans. Mom.: -2.7680 X -0.1089 Y 0.0000 Z 

1022 # Strength : 0.4353 

1023 # S( 1) --> V( 1) amplitude = -0.3105 alpha 

1024 # D( 34) --> S( 1) amplitude = 0.9322 beta 

1025 

1026 self.skip_lines(inputfile, ['dashes', 'blank']) 

1027 line = next(inputfile) 

1028 

1029 etenergies = [] 

1030 etsyms = [] 

1031 etoscs = [] 

1032 etsecs = [] 

1033 spinmap = {'alpha': 0, 'beta': 1} 

1034 

1035 while list(set(line.strip())) != ['-']: 

1036 

1037 # Take the total energy for the state and subtract from the 

1038 # ground state energy, rather than just the EE; 

1039 # this will be more accurate. 

1040 if 'Total energy for state' in line: 

1041 energy = utils.convertor(float(line.split()[5]), 'hartree', 'wavenumber') 

1042 etenergy = energy - utils.convertor(self.scfenergies[-1], 'eV', 'wavenumber') 

1043 etenergies.append(etenergy) 

1044 # if 'excitation energy' in line: 

1045 # etenergy = utils.convertor(float(line.split()[-1]), 'eV', 'wavenumber') 

1046 # etenergies.append(etenergy) 

1047 if 'Multiplicity' in line: 

1048 etsym = line.split()[1] 

1049 etsyms.append(etsym) 

1050 if 'Strength' in line: 

1051 strength = float(line.split()[-1]) 

1052 etoscs.append(strength) 

1053 

1054 # This is the list of transitions. 

1055 if 'amplitude' in line: 

1056 sec = [] 

1057 while line.strip() != '': 

1058 re_match = self.re_tddft.search(line) 

1059 if self.unrestricted: 

1060 spin = spinmap[re_match.group(5)] 

1061 else: 

1062 spin = 0 

1063 

1064 # There is a subtle difference between TDA and RPA calcs, 

1065 # because in the latter case each transition line is 

1066 # preceeded by the type of vector: X or Y, name excitation 

1067 # or deexcitation (see #154 for details). For deexcitations, 

1068 # we will need to reverse the MO indices. Note also that Q-Chem 

1069 # starts reindexing virtual orbitals at 1. 

1070 if line[5] == '(': 

1071 ttype = 'X' 

1072 else: 

1073 assert line[5] == ":" 

1074 ttype = line[4] 

1075 startidx = int(re_match.group(1)) - 1 

1076 endidx = int(re_match.group(2)) - 1 + self.nalpha 

1077 contrib = float(re_match.group(3)) 

1078 

1079 start = (startidx, spin) 

1080 end = (endidx, spin) 

1081 if ttype == 'X': 

1082 sec.append([start, end, contrib]) 

1083 elif ttype == 'Y': 

1084 sec.append([end, start, contrib]) 

1085 else: 

1086 raise ValueError('Unknown transition type: %s' % ttype) 

1087 line = next(inputfile) 

1088 etsecs.append(sec) 

1089 

1090 line = next(inputfile) 

1091 

1092 self.set_attribute('etenergies', etenergies) 

1093 self.set_attribute('etsyms', etsyms) 

1094 self.set_attribute('etoscs', etoscs) 

1095 self.set_attribute('etsecs', etsecs) 

1096 

1097 # Static and dynamic polarizability from mopropman. 

1098 if 'Polarizability (a.u.)' in line: 

1099 if not hasattr(self, 'polarizabilities'): 

1100 self.polarizabilities = [] 

1101 while 'Full Tensor' not in line: 

1102 line = next(inputfile) 

1103 self.skip_line(inputfile, 'blank') 

1104 polarizability = [next(inputfile).split() for _ in range(3)] 

1105 self.polarizabilities.append(numpy.array(polarizability)) 

1106 

1107 # Static polarizability from finite difference or 

1108 # responseman. 

1109 if line.strip() in ('Static polarizability tensor [a.u.]', 

1110 'Polarizability tensor [a.u.]'): 

1111 if not hasattr(self, 'polarizabilities'): 

1112 self.polarizabilities = [] 

1113 polarizability = [next(inputfile).split() for _ in range(3)] 

1114 self.polarizabilities.append(numpy.array(polarizability)) 

1115 

1116 # Molecular orbital energies and symmetries. 

1117 if line.strip() == 'Orbital Energies (a.u.) and Symmetries': 

1118 

1119 # -------------------------------------------------------------- 

1120 # Orbital Energies (a.u.) and Symmetries 

1121 # -------------------------------------------------------------- 

1122 # 

1123 # Alpha MOs, Restricted 

1124 # -- Occupied -- 

1125 # -10.018 -10.018 -10.008 -10.008 -10.007 -10.007 -10.006 -10.005 

1126 # 1 Bu 1 Ag 2 Bu 2 Ag 3 Bu 3 Ag 4 Bu 4 Ag 

1127 # -9.992 -9.992 -0.818 -0.755 -0.721 -0.704 -0.670 -0.585 

1128 # 5 Ag 5 Bu 6 Ag 6 Bu 7 Ag 7 Bu 8 Bu 8 Ag 

1129 # -0.561 -0.532 -0.512 -0.462 -0.439 -0.410 -0.400 -0.397 

1130 # 9 Ag 9 Bu 10 Ag 11 Ag 10 Bu 11 Bu 12 Bu 12 Ag 

1131 # -0.376 -0.358 -0.349 -0.330 -0.305 -0.295 -0.281 -0.263 

1132 # 13 Bu 14 Bu 13 Ag 1 Au 15 Bu 14 Ag 15 Ag 1 Bg 

1133 # -0.216 -0.198 -0.160 

1134 # 2 Au 2 Bg 3 Bg 

1135 # -- Virtual -- 

1136 # 0.050 0.091 0.116 0.181 0.280 0.319 0.330 0.365 

1137 # 3 Au 4 Au 4 Bg 5 Au 5 Bg 16 Ag 16 Bu 17 Bu 

1138 # 0.370 0.413 0.416 0.422 0.446 0.469 0.496 0.539 

1139 # 17 Ag 18 Bu 18 Ag 19 Bu 19 Ag 20 Bu 20 Ag 21 Ag 

1140 # 0.571 0.587 0.610 0.627 0.646 0.693 0.743 0.806 

1141 # 21 Bu 22 Ag 22 Bu 23 Bu 23 Ag 24 Ag 24 Bu 25 Ag 

1142 # 0.816 

1143 # 25 Bu 

1144 # 

1145 # Beta MOs, Restricted 

1146 # -- Occupied -- 

1147 # -10.018 -10.018 -10.008 -10.008 -10.007 -10.007 -10.006 -10.005 

1148 # 1 Bu 1 Ag 2 Bu 2 Ag 3 Bu 3 Ag 4 Bu 4 Ag 

1149 # -9.992 -9.992 -0.818 -0.755 -0.721 -0.704 -0.670 -0.585 

1150 # 5 Ag 5 Bu 6 Ag 6 Bu 7 Ag 7 Bu 8 Bu 8 Ag 

1151 # -0.561 -0.532 -0.512 -0.462 -0.439 -0.410 -0.400 -0.397 

1152 # 9 Ag 9 Bu 10 Ag 11 Ag 10 Bu 11 Bu 12 Bu 12 Ag 

1153 # -0.376 -0.358 -0.349 -0.330 -0.305 -0.295 -0.281 -0.263 

1154 # 13 Bu 14 Bu 13 Ag 1 Au 15 Bu 14 Ag 15 Ag 1 Bg 

1155 # -0.216 -0.198 -0.160 

1156 # 2 Au 2 Bg 3 Bg 

1157 # -- Virtual -- 

1158 # 0.050 0.091 0.116 0.181 0.280 0.319 0.330 0.365 

1159 # 3 Au 4 Au 4 Bg 5 Au 5 Bg 16 Ag 16 Bu 17 Bu 

1160 # 0.370 0.413 0.416 0.422 0.446 0.469 0.496 0.539 

1161 # 17 Ag 18 Bu 18 Ag 19 Bu 19 Ag 20 Bu 20 Ag 21 Ag 

1162 # 0.571 0.587 0.610 0.627 0.646 0.693 0.743 0.806 

1163 # 21 Bu 22 Ag 22 Bu 23 Bu 23 Ag 24 Ag 24 Bu 25 Ag 

1164 # 0.816 

1165 # 25 Bu 

1166 # -------------------------------------------------------------- 

1167 

1168 self.skip_line(inputfile, 'dashes') 

1169 line = next(inputfile) 

1170 energies_alpha, symbols_alpha, homo_alpha = self.parse_orbital_energies_and_symmetries(inputfile) 

1171 # Only look at the second block if doing an unrestricted calculation. 

1172 # This might be a problem for ROHF/ROKS. 

1173 if self.unrestricted: 

1174 energies_beta, symbols_beta, homo_beta = self.parse_orbital_energies_and_symmetries(inputfile) 

1175 

1176 # For now, only keep the last set of MO energies, even though it is 

1177 # printed at every step of geometry optimizations and fragment jobs. 

1178 self.set_attribute('moenergies', [numpy.array(energies_alpha)]) 

1179 self.set_attribute('homos', [homo_alpha]) 

1180 self.set_attribute('mosyms', [symbols_alpha]) 

1181 if self.unrestricted: 

1182 self.moenergies.append(numpy.array(energies_beta)) 

1183 self.homos.append(homo_beta) 

1184 self.mosyms.append(symbols_beta) 

1185 

1186 self.set_attribute('nmo', len(self.moenergies[0])) 

1187 

1188 # Molecular orbital energies, no symmetries. 

1189 if line.strip() == 'Orbital Energies (a.u.)': 

1190 

1191 # In the case of no orbital symmetries, the beta spin block is not 

1192 # present for restricted calculations. 

1193 

1194 # -------------------------------------------------------------- 

1195 # Orbital Energies (a.u.) 

1196 # -------------------------------------------------------------- 

1197 # 

1198 # Alpha MOs 

1199 # -- Occupied -- 

1200 # ******* -38.595 -34.580 -34.579 -34.578 -19.372 -19.372 -19.364 

1201 # -19.363 -19.362 -19.362 -4.738 -3.252 -3.250 -3.250 -1.379 

1202 # -1.371 -1.369 -1.365 -1.364 -1.362 -0.859 -0.855 -0.849 

1203 # -0.846 -0.840 -0.836 -0.810 -0.759 -0.732 -0.729 -0.704 

1204 # -0.701 -0.621 -0.610 -0.595 -0.587 -0.584 -0.578 -0.411 

1205 # -0.403 -0.355 -0.354 -0.352 

1206 # -- Virtual -- 

1207 # -0.201 -0.117 -0.099 -0.086 0.020 0.031 0.055 0.067 

1208 # 0.075 0.082 0.086 0.092 0.096 0.105 0.114 0.148 

1209 # 

1210 # Beta MOs 

1211 # -- Occupied -- 

1212 # ******* -38.561 -34.550 -34.549 -34.549 -19.375 -19.375 -19.367 

1213 # -19.367 -19.365 -19.365 -4.605 -3.105 -3.103 -3.102 -1.385 

1214 # -1.376 -1.376 -1.371 -1.370 -1.368 -0.863 -0.858 -0.853 

1215 # -0.849 -0.843 -0.839 -0.818 -0.765 -0.738 -0.737 -0.706 

1216 # -0.702 -0.624 -0.613 -0.600 -0.591 -0.588 -0.585 -0.291 

1217 # -0.291 -0.288 -0.275 

1218 # -- Virtual -- 

1219 # -0.139 -0.122 -0.103 0.003 0.014 0.049 0.049 0.059 

1220 # 0.061 0.070 0.076 0.081 0.086 0.090 0.098 0.106 

1221 # 0.138 

1222 # -------------------------------------------------------------- 

1223 

1224 self.skip_line(inputfile, 'dashes') 

1225 line = next(inputfile) 

1226 energies_alpha, _, homo_alpha = self.parse_orbital_energies_and_symmetries(inputfile) 

1227 # Only look at the second block if doing an unrestricted calculation. 

1228 # This might be a problem for ROHF/ROKS. 

1229 if self.unrestricted: 

1230 energies_beta, _, homo_beta = self.parse_orbital_energies_and_symmetries(inputfile) 

1231 

1232 # For now, only keep the last set of MO energies, even though it is 

1233 # printed at every step of geometry optimizations and fragment jobs. 

1234 self.set_attribute('moenergies', [numpy.array(energies_alpha)]) 

1235 self.set_attribute('homos', [homo_alpha]) 

1236 if self.unrestricted: 

1237 self.moenergies.append(numpy.array(energies_beta)) 

1238 self.homos.append(homo_beta) 

1239 

1240 self.set_attribute('nmo', len(self.moenergies[0])) 

1241 

1242 # Molecular orbital coefficients. 

1243 

1244 # This block comes from `print_orbitals = true/{int}`. Less 

1245 # precision than `scf_final_print >= 2` for `mocoeffs`, but 

1246 # important for `aonames` and `atombasis`. 

1247 

1248 if any(header in line 

1249 for header in self.alpha_mo_coefficient_headers): 

1250 

1251 # If we've asked to display more virtual orbitals than 

1252 # there are MOs present in the molecule, fix that now. 

1253 if hasattr(self, 'nmo') and hasattr(self, 'nalpha') and hasattr(self, 'nbeta'): 

1254 self.norbdisp_alpha_aonames = min(self.norbdisp_alpha_aonames, self.nmo) 

1255 self.norbdisp_beta_aonames = min(self.norbdisp_beta_aonames, self.nmo) 

1256 

1257 if not hasattr(self, 'mocoeffs'): 

1258 self.mocoeffs = [] 

1259 if not hasattr(self, 'atombasis'): 

1260 self.atombasis = [] 

1261 for n in range(self.natom): 

1262 self.atombasis.append([]) 

1263 if not hasattr(self, 'aonames'): 

1264 self.aonames = [] 

1265 # We could also attempt to parse `moenergies` here, but 

1266 # nothing is gained by it. 

1267 

1268 mocoeffs = self.parse_matrix_aonames(inputfile, self.nbasis, self.norbdisp_alpha_aonames) 

1269 # Only use these MO coefficients if we don't have them 

1270 # from `scf_final_print`. 

1271 if len(self.mocoeffs) == 0: 

1272 self.mocoeffs.append(mocoeffs.transpose()) 

1273 

1274 # Go back through `aonames` to create `atombasis`. 

1275 assert len(self.aonames) == self.nbasis 

1276 for aoindex, aoname in enumerate(self.aonames): 

1277 atomindex = int(self.re_atomindex.search(aoname).groups()[0]) - 1 

1278 self.atombasis[atomindex].append(aoindex) 

1279 assert len(self.atombasis) == len(self.atomnos) 

1280 

1281 if 'BETA MOLECULAR ORBITAL COEFFICIENTS' in line: 

1282 

1283 mocoeffs = self.parse_matrix_aonames(inputfile, self.nbasis, self.norbdisp_beta_aonames) 

1284 if len(self.mocoeffs) == 1: 

1285 self.mocoeffs.append(mocoeffs.transpose()) 

1286 

1287 # Population analysis. 

1288 

1289 if 'Ground-State Mulliken Net Atomic Charges' in line: 

1290 self.parse_charge_section(inputfile, 'mulliken') 

1291 if 'Hirshfeld Atomic Charges' in line: 

1292 self.parse_charge_section(inputfile, 'hirshfeld') 

1293 if 'Ground-State ChElPG Net Atomic Charges' in line: 

1294 self.parse_charge_section(inputfile, 'chelpg') 

1295 

1296 # Multipole moments are not printed in lexicographical order, 

1297 # so we need to parse and sort them. The units seem OK, but there 

1298 # is some uncertainty about the reference point and whether it 

1299 # can be changed. 

1300 # 

1301 # Notice how the letter/coordinate labels change to coordinate ranks 

1302 # after hexadecapole moments, and need to be translated. Additionally, 

1303 # after 9-th order moments the ranks are not necessarily single digits 

1304 # and so there are spaces between them. 

1305 # 

1306 # ----------------------------------------------------------------- 

1307 # Cartesian Multipole Moments 

1308 # LMN = < X^L Y^M Z^N > 

1309 # ----------------------------------------------------------------- 

1310 # Charge (ESU x 10^10) 

1311 # 0.0000 

1312 # Dipole Moment (Debye) 

1313 # X 0.0000 Y 0.0000 Z 0.0000 

1314 # Tot 0.0000 

1315 # Quadrupole Moments (Debye-Ang) 

1316 # XX -50.9647 XY -0.1100 YY -50.1441 

1317 # XZ 0.0000 YZ 0.0000 ZZ -58.5742 

1318 # ... 

1319 # 5th-Order Moments (Debye-Ang^4) 

1320 # 500 0.0159 410 -0.0010 320 0.0005 

1321 # 230 0.0000 140 0.0005 050 0.0012 

1322 # ... 

1323 # ----------------------------------------------------------------- 

1324 # 

1325 if "Cartesian Multipole Moments" in line: 

1326 

1327 # This line appears not by default, but only when 

1328 # `multipole_order` > 4: 

1329 line = inputfile.next() 

1330 if 'LMN = < X^L Y^M Z^N >' in line: 

1331 line = inputfile.next() 

1332 

1333 # The reference point is always the origin, although normally the molecule 

1334 # is moved so that the center of charge is at the origin. 

1335 self.reference = [0.0, 0.0, 0.0] 

1336 self.moments = [self.reference] 

1337 

1338 # Watch out! This charge is in statcoulombs without the exponent! 

1339 # We should expect very good agreement, however Q-Chem prints 

1340 # the charge only with 5 digits, so expect 1e-4 accuracy. 

1341 charge_header = inputfile.next() 

1342 assert charge_header.split()[0] == "Charge" 

1343 charge = float(inputfile.next().strip()) 

1344 charge = utils.convertor(charge, 'statcoulomb', 'e') * 1e-10 

1345 # Allow this to change until fragment jobs are properly implemented. 

1346 # assert abs(charge - self.charge) < 1e-4 

1347 

1348 # This will make sure Debyes are used (not sure if it can be changed). 

1349 line = inputfile.next() 

1350 assert line.strip() == "Dipole Moment (Debye)" 

1351 

1352 while "-----" not in line: 

1353 

1354 # The current multipole element will be gathered here. 

1355 multipole = [] 

1356 

1357 line = inputfile.next() 

1358 while ("-----" not in line) and ("Moment" not in line): 

1359 

1360 cols = line.split() 

1361 

1362 # The total (norm) is printed for dipole but not other multipoles. 

1363 if cols[0] == 'Tot': 

1364 line = inputfile.next() 

1365 continue 

1366 

1367 # Find and replace any 'stars' with NaN before moving on. 

1368 for i in range(len(cols)): 

1369 if '***' in cols[i]: 

1370 cols[i] = numpy.nan 

1371 

1372 # The moments come in pairs (label followed by value) up to the 9-th order, 

1373 # although above hexadecapoles the labels are digits representing the rank 

1374 # in each coordinate. Above the 9-th order, ranks are not always single digits, 

1375 # so there are spaces between them, which means moments come in quartets. 

1376 if len(self.moments) < 5: 

1377 for i in range(len(cols)//2): 

1378 lbl = cols[2*i] 

1379 m = cols[2*i + 1] 

1380 multipole.append([lbl, m]) 

1381 elif len(self.moments) < 10: 

1382 for i in range(len(cols)//2): 

1383 lbl = cols[2*i] 

1384 lbl = 'X'*int(lbl[0]) + 'Y'*int(lbl[1]) + 'Z'*int(lbl[2]) 

1385 m = cols[2*i + 1] 

1386 multipole.append([lbl, m]) 

1387 else: 

1388 for i in range(len(cols)//4): 

1389 lbl = 'X'*int(cols[4*i]) + 'Y'*int(cols[4*i + 1]) + 'Z'*int(cols[4*i + 2]) 

1390 m = cols[4*i + 3] 

1391 multipole.append([lbl, m]) 

1392 

1393 line = inputfile.next() 

1394 

1395 # Sort should use the first element when sorting lists, 

1396 # so this should simply work, and afterwards we just need 

1397 # to extract the second element in each list (the actual moment). 

1398 multipole.sort() 

1399 multipole = [m[1] for m in multipole] 

1400 self.moments.append(multipole) 

1401 

1402 # For `method = force` or geometry optimizations, 

1403 # the gradient is printed. 

1404 if any(header in line for header in self.gradient_headers): 

1405 if not hasattr(self, 'grads'): 

1406 self.grads = [] 

1407 if 'SCF' in line: 

1408 ncolsblock = self.ncolsblock 

1409 else: 

1410 ncolsblock = 5 

1411 grad = QChem.parse_matrix(inputfile, 3, self.natom, ncolsblock) 

1412 self.grads.append(grad.T) 

1413 

1414 # (Static) polarizability from frequency calculations. 

1415 if 'Polarizability Matrix (a.u.)' in line: 

1416 if not hasattr(self, 'polarizabilities'): 

1417 self.polarizabilities = [] 

1418 polarizability = [] 

1419 self.skip_line(inputfile, 'index header') 

1420 for _ in range(3): 

1421 line = next(inputfile) 

1422 ss = line.strip()[1:] 

1423 polarizability.append([ss[0:12], ss[13:24], ss[25:36]]) 

1424 # For some reason the sign is inverted. 

1425 self.polarizabilities.append(-numpy.array(polarizability, dtype=float)) 

1426 

1427 # For IR-related jobs, the Hessian is printed (dim: 3*natom, 3*natom). 

1428 # Note that this is *not* the mass-weighted Hessian. 

1429 if any(header in line for header in self.hessian_headers): 

1430 if not hasattr(self, 'hessian'): 

1431 dim = 3*self.natom 

1432 self.hessian = QChem.parse_matrix(inputfile, dim, dim, self.ncolsblock) 

1433 

1434 # Start of the IR/Raman frequency section. 

1435 if 'VIBRATIONAL ANALYSIS' in line: 

1436 

1437 while 'STANDARD THERMODYNAMIC QUANTITIES' not in line: 

1438 ## IR, optional Raman: 

1439 # 

1440 # ********************************************************************** 

1441 # ** ** 

1442 # ** VIBRATIONAL ANALYSIS ** 

1443 # ** -------------------- ** 

1444 # ** ** 

1445 # ** VIBRATIONAL FREQUENCIES (CM**-1) AND NORMAL MODES ** 

1446 # ** FORCE CONSTANTS (mDYN/ANGSTROM) AND REDUCED MASSES (AMU) ** 

1447 # ** INFRARED INTENSITIES (KM/MOL) ** 

1448 ##** RAMAN SCATTERING ACTIVITIES (A**4/AMU) AND DEPOLARIZATION RATIOS ** 

1449 # ** ** 

1450 # ********************************************************************** 

1451 # 

1452 # 

1453 # Mode: 1 2 3 

1454 # Frequency: -106.88 -102.91 161.77 

1455 # Force Cnst: 0.0185 0.0178 0.0380 

1456 # Red. Mass: 2.7502 2.8542 2.4660 

1457 # IR Active: NO YES YES 

1458 # IR Intens: 0.000 0.000 0.419 

1459 # Raman Active: YES NO NO 

1460 ##Raman Intens: 2.048 0.000 0.000 

1461 ##Depolar: 0.750 0.000 0.000 

1462 # X Y Z X Y Z X Y Z 

1463 # C 0.000 0.000 -0.100 -0.000 0.000 -0.070 -0.000 -0.000 -0.027 

1464 # C 0.000 0.000 0.045 -0.000 0.000 -0.074 0.000 -0.000 -0.109 

1465 # C 0.000 0.000 0.148 -0.000 -0.000 -0.074 0.000 0.000 -0.121 

1466 # (...) 

1467 # H -0.000 -0.000 0.422 -0.000 -0.000 0.499 0.000 0.000 -0.285 

1468 # TransDip 0.000 -0.000 -0.000 0.000 -0.000 -0.000 -0.000 0.000 0.021 

1469 # 

1470 # Mode: 4 5 6 

1471 # ... 

1472 # 

1473 # There isn't any symmetry information for normal modes present 

1474 # in Q-Chem. 

1475 # if not hasattr(self, 'vibsyms'): 

1476 # self.vibsyms = [] 

1477 if 'Frequency:' in line: 

1478 if not hasattr(self, 'vibfreqs'): 

1479 self.vibfreqs = [] 

1480 vibfreqs = map(float, line.split()[1:]) 

1481 self.vibfreqs.extend(vibfreqs) 

1482 

1483 if 'Force Cnst:' in line: 

1484 if not hasattr(self, 'vibfconsts'): 

1485 self.vibfconsts = [] 

1486 vibfconsts = map(float, line.split()[2:]) 

1487 self.vibfconsts.extend(vibfconsts) 

1488 

1489 if 'Red. Mass' in line: 

1490 if not hasattr(self, 'vibrmasses'): 

1491 self.vibrmasses = [] 

1492 vibrmasses = map(float, line.split()[2:]) 

1493 self.vibrmasses.extend(vibrmasses) 

1494 

1495 if 'IR Intens:' in line: 

1496 if not hasattr(self, 'vibirs'): 

1497 self.vibirs = [] 

1498 vibirs = map(float, line.split()[2:]) 

1499 self.vibirs.extend(vibirs) 

1500 

1501 if 'Raman Intens:' in line: 

1502 if not hasattr(self, 'vibramans'): 

1503 self.vibramans = [] 

1504 vibramans = map(float, line.split()[2:]) 

1505 self.vibramans.extend(vibramans) 

1506 

1507 # This is the start of the displacement block. 

1508 if line.split()[0:3] == ['X', 'Y', 'Z']: 

1509 if not hasattr(self, 'vibdisps'): 

1510 self.vibdisps = [] 

1511 disps = [] 

1512 for k in range(self.natom): 

1513 line = next(inputfile) 

1514 numbers = list(map(float, line.split()[1:])) 

1515 N = len(numbers) // 3 

1516 if not disps: 

1517 for n in range(N): 

1518 disps.append([]) 

1519 for n in range(N): 

1520 disps[n].append(numbers[3*n:(3*n)+3]) 

1521 self.vibdisps.extend(disps) 

1522 

1523 line = next(inputfile) 

1524 

1525 # Anharmonic vibrational analysis. 

1526 # Q-Chem includes 3 theories: VPT2, TOSH, and VCI. 

1527 # For now, just take the VPT2 results. 

1528 

1529 # if 'VIBRATIONAL ANHARMONIC ANALYSIS' in line: 

1530 

1531 # while list(set(line.strip())) != ['=']: 

1532 # if 'VPT2' in line: 

1533 # if not hasattr(self, 'vibanharms'): 

1534 # self.vibanharms = [] 

1535 # self.vibanharms.append(float(line.split()[-1])) 

1536 # line = next(inputfile) 

1537 

1538 if 'STANDARD THERMODYNAMIC QUANTITIES AT' in line: 

1539 

1540 if not hasattr(self, 'temperature'): 

1541 self.temperature = float(line.split()[4]) 

1542 # Not supported yet. 

1543 if not hasattr(self, 'pressure'): 

1544 self.pressure = float(line.split()[7]) 

1545 self.skip_line(inputfile, 'blank') 

1546 

1547 line = next(inputfile) 

1548 if self.natom == 1: 

1549 assert 'Translational Enthalpy' in line 

1550 else: 

1551 assert 'Imaginary Frequencies' in line 

1552 line = next(inputfile) 

1553 # Not supported yet. 

1554 assert 'Zero point vibrational energy' in line 

1555 if not hasattr(self, 'zpe'): 

1556 # Convert from kcal/mol to Hartree/particle. 

1557 self.zpe = utils.convertor(float(line.split()[4]), 

1558 'kcal/mol', 'hartree') 

1559 atommasses = [] 

1560 while 'Translational Enthalpy' not in line: 

1561 if 'Has Mass' in line: 

1562 atommass = float(line.split()[6]) 

1563 atommasses.append(atommass) 

1564 line = next(inputfile) 

1565 if not hasattr(self, 'atommasses'): 

1566 self.atommasses = numpy.array(atommasses) 

1567 

1568 while line.strip(): 

1569 line = next(inputfile) 

1570 

1571 line = next(inputfile) 

1572 assert 'Total Enthalpy' in line 

1573 if not hasattr(self, 'enthalpy'): 

1574 enthalpy = float(line.split()[2]) 

1575 self.enthalpy = utils.convertor(enthalpy, 

1576 'kcal/mol', 'hartree') 

1577 line = next(inputfile) 

1578 assert 'Total Entropy' in line 

1579 if not hasattr(self, 'entropy'): 

1580 entropy = float(line.split()[2]) * self.temperature / 1000 

1581 # This is the *temperature dependent* entropy. 

1582 self.entropy = utils.convertor(entropy, 

1583 'kcal/mol', 'hartree') 

1584 if not hasattr(self, 'freeenergy'): 

1585 self.freeenergy = self.enthalpy - self.entropy 

1586 

1587 if line[:16] == ' Total job time:': 

1588 self.metadata['success'] = True 

1589 

1590 # TODO: 

1591 # 'enthalpy' (incorrect) 

1592 # 'entropy' (incorrect) 

1593 # 'freeenergy' (incorrect) 

1594 # 'nocoeffs' 

1595 # 'nooccnos' 

1596 # 'vibanharms'