Coverage for cclib/parser/nwchemparser.py: 97%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1# -*- coding: utf-8 -*-

5# This file is part of cclib (http://cclib.github.io) and is distributed under

6# the terms of the BSD 3-Clause License.

8"""Parser for NWChem output files"""

11import itertools

12import re

14import numpy

16from cclib.parser import logfileparser

17from cclib.parser import utils

20class NWChem(logfileparser.Logfile):

21 """An NWChem log file."""

23 def __init__(self, *args, **kwargs):

25 # Call the __init__ method of the superclass

26 super(NWChem, self).__init__(logname="NWChem", *args, **kwargs)

28 def __str__(self):

29 """Return a string representation of the object."""

30 return "NWChem log file %s" % (self.filename)

32 def __repr__(self):

33 """Return a representation of the object."""

34 return 'NWChem("%s")' % (self.filename)

36 def normalisesym(self, label):

37 """NWChem does not require normalizing symmetry labels."""

38 return label

40 name2element = lambda self, lbl: "".join(itertools.takewhile(str.isalpha, str(lbl)))

42 def extract(self, inputfile, line):

43 """Extract information from the file object inputfile."""

45 # Extract the version number and the version control information, if

46 # it exists.

47 if "nwchem branch" in line:

48 base_package_version = line.split()[3]

49 self.metadata["legacy_package_version"] = base_package_version

50 self.metadata["package_version"] = base_package_version

51 line = next(inputfile)

52 if "nwchem revision" in line:

53 self.metadata["package_version"] = "{}+{}".format(

54 self.metadata["package_version"], line.split()[3].split("-")[-1]

55 )

57 # This is printed in the input module, so should always be the first coordinates,

58 # and contains some basic information we want to parse as well. However, this is not

59 # the only place where the coordinates are printed during geometry optimization,

60 # since the gradients module has a separate coordinate printout, which happens

61 # alongside the coordinate gradients. This geometry printout happens at the

62 # beginning of each optimization step only.

63 if line.strip() == 'Geometry "geometry" -> ""' or line.strip() == 'Geometry "geometry" -> "geometry"':

65 self.skip_lines(inputfile, ['dashes', 'blank', 'units', 'blank', 'header', 'dashes'])

67 if not hasattr(self, 'atomcoords'):

68 self.atomcoords = []

70 line = next(inputfile)

71 coords = []

72 atomnos = []

73 while line.strip():

74 # The column labeled 'tag' is usually empty, but I'm not sure whether it can have spaces,

75 # so for now assume that it can and that there will be seven columns in that case.

76 if len(line.split()) == 6:

77 index, atomname, nuclear, x, y, z = line.split()

78 else:

79 index, atomname, tag, nuclear, x, y, z = line.split()

80 coords.append(list(map(float, [x, y, z])))

81 atomnos.append(int(float(nuclear)))

82 line = next(inputfile)

84 self.atomcoords.append(coords)

86 self.set_attribute('atomnos', atomnos)

88 # If the geometry is printed in XYZ format, it will have the number of atoms.

89 if line[12:31] == "XYZ format geometry":

91 self.skip_line(inputfile, 'dashes')

92 natom = int(next(inputfile).strip())

93 self.set_attribute('natom', natom)

95 if line.strip() == "NWChem Geometry Optimization":

96 self.skip_lines(inputfile, ['d', 'b', 'b', 'b', 'b', 'title', 'b', 'b'])

97 line = next(inputfile)

98 while line.strip():

99 if "maximum gradient threshold" in line:

100 gmax = float(line.split()[-1])

101 if "rms gradient threshold" in line:

102 grms = float(line.split()[-1])

103 if "maximum cartesian step threshold" in line:

104 xmax = float(line.split()[-1])

105 if "rms cartesian step threshold" in line:

106 xrms = float(line.split()[-1])

107 line = next(inputfile)

108

109 self.set_attribute('geotargets', [gmax, grms, xmax, xrms])

110

111 # NWChem does not normally print the basis set for each atom, but rather

112 # chooses the concise option of printing Gaussian coefficients for each

113 # atom type/element only once. Therefore, we need to first parse those

114 # coefficients and afterwards build the appropriate gbasis attribute based

115 # on that and atom types/elements already parsed (atomnos). However, if atom

116 # are given different names (number after element, like H1 and H2), then NWChem

117 # generally prints the gaussian parameters for all unique names, like this:

118 #

119 # Basis "ao basis" -> "ao basis" (cartesian)

120 # -----

121 # O (Oxygen)

122 # ----------

123 # Exponent Coefficients

124 # -------------- ---------------------------------------------------------

125 # 1 S 1.30709320E+02 0.154329

126 # 1 S 2.38088610E+01 0.535328

127 # (...)

128 #

129 # H1 (Hydrogen)

130 # -------------

131 # Exponent Coefficients

132 # -------------- ---------------------------------------------------------

133 # 1 S 3.42525091E+00 0.154329

134 # (...)

135 #

136 # H2 (Hydrogen)

137 # -------------

138 # Exponent Coefficients

139 # -------------- ---------------------------------------------------------

140 # 1 S 3.42525091E+00 0.154329

141 # (...)

142 #

143 # This current parsing code below assumes all atoms of the same element

144 # use the same basis set, but that might not be true, and this will probably

145 # need to be considered in the future when such a logfile appears.

146 if line.strip() == """Basis "ao basis" -> "ao basis" (cartesian)""":

147 self.skip_line(inputfile, 'dashes')

148 gbasis_dict = {}

149 line = next(inputfile)

150 while line.strip():

151 atomname = line.split()[0]

152 atomelement = self.name2element(atomname)

153 gbasis_dict[atomelement] = []

154 self.skip_lines(inputfile, ['d', 'labels', 'd'])

155 shells = []

156 line = next(inputfile)

157 while line.strip() and line.split()[0].isdigit():

158 shell = None

159 while line.strip():

160 nshell, type, exp, coeff = line.split()

161 nshell = int(nshell)

162 assert len(shells) == nshell - 1

163 if not shell:

164 shell = (type, [])

165 else:

166 assert shell[0] == type

167 exp = float(exp)

168 coeff = float(coeff)

169 shell[1].append((exp, coeff))

170 line = next(inputfile)

171 shells.append(shell)

172 line = next(inputfile)

173 gbasis_dict[atomelement].extend(shells)

174

175 gbasis = []

176 for i in range(self.natom):

177 atomtype = self.table.element[self.atomnos[i]]

178 gbasis.append(gbasis_dict[atomtype])

179

180 self.set_attribute('gbasis', gbasis)

181

182 # Normally the indexes of AOs assigned to specific atoms are also not printed,

183 # so we need to infer that. We could do that from the previous section,

184 # it might be worthwhile to take numbers from two different places, hence

185 # the code below, which builds atombasis based on the number of functions

186 # listed in this summary of the AO basis. Similar to previous section, here

187 # we assume all atoms of the same element have the same basis sets, but

188 # this will probably need to be revised later.

189

190 # The section we can glean info about aonmaes looks like:

191 #

192 # Summary of "ao basis" -> "ao basis" (cartesian)

193 # ------------------------------------------------------------------------------

194 # Tag Description Shells Functions and Types

195 # ---------------- ------------------------------ ------ ---------------------

196 # C sto-3g 3 5 2s1p

197 # H sto-3g 1 1 1s

198 #

199 # However, we need to make sure not to match the following entry lines:

200 #

201 # * Summary of "ao basis" -> "" (cartesian)

202 # * Summary of allocated global arrays

203 #

204 # Unfortantely, "ao basis" isn't unique because it can be renamed to anything for

205 # later reference: http://www.nwchem-sw.org/index.php/Basis

206 # It also appears that we have to handle cartesian vs. spherical

207

208 if line[1:11] == "Summary of":

209 match = re.match(r' Summary of "([^\"]*)" -> "([^\"]*)" \((.+)\)', line)

210

211 if match and match.group(1) == match.group(2):

212

213 self.skip_lines(inputfile, ['d', 'title', 'd'])

214

215 self.shells = {}

216 self.shells["type"] = match.group(3)

217

218 atombasis_dict = {}

219

220 line = next(inputfile)

221 while line.strip():

222 atomname, desc, shells, funcs, types = line.split()

223 atomelement = self.name2element(atomname)

224 self.metadata["basis_set"] = desc

225

226 self.shells[atomname] = types

227 atombasis_dict[atomelement] = int(funcs)

228 line = next(inputfile)

229

230 last = 0

231 atombasis = []

232 for atom in self.atomnos:

233 atomelement = self.table.element[atom]

234 nfuncs = atombasis_dict[atomelement]

235 atombasis.append(list(range(last, last+nfuncs)))

236 last = atombasis[-1][-1] + 1

237

238 self.set_attribute('atombasis', atombasis)

239

240 # This section contains general parameters for Hartree-Fock calculations,

241 # which do not contain the 'General Information' section like most jobs.

242 if line.strip() == "NWChem SCF Module":

243 # If the calculation doesn't have a title specified, there

244 # aren't as many lines to skip here.

245 self.skip_lines(inputfile, ['d', 'b', 'b'])

246 line = next(inputfile)

247 if line.strip():

248 self.skip_lines(inputfile, ['b', 'b', 'b'])

249 line = next(inputfile)

250 while line.strip():

251 if line[2:8] == "charge":

252 charge = int(float(line.split()[-1]))

253 self.set_attribute('charge', charge)

254 if line[2:13] == "open shells":

255 unpaired = int(line.split()[-1])

256 self.set_attribute('mult', 2*unpaired + 1)

257 if line[2:7] == "atoms":

258 natom = int(line.split()[-1])

259 self.set_attribute('natom', natom)

260 if line[2:11] == "functions":

261 nfuncs = int(line.split()[-1])

262 self.set_attribute("nbasis", nfuncs)

263 line = next(inputfile)

264

265 # This section contains general parameters for DFT calculations, as well as

266 # for the many-electron theory module.

267 if line.strip() == "General Information":

268

269 if hasattr(self, 'linesearch') and self.linesearch:

270 return

271

272 while line.strip():

273

274 if "No. of atoms" in line:

275 self.set_attribute('natom', int(line.split()[-1]))

276 if "Charge" in line:

277 self.set_attribute('charge', int(line.split()[-1]))

278 if "Spin multiplicity" in line:

279 mult = line.split()[-1]

280 if mult == "singlet":

281 mult = 1

282 self.set_attribute('mult', int(mult))

283 if "AO basis - number of function" in line:

284 nfuncs = int(line.split()[-1])

285 self.set_attribute('nbasis', nfuncs)

286

287 # These will be present only in the DFT module.

288 if "Convergence on energy requested" in line:

289 target_energy = utils.float(line.split()[-1])

290 if "Convergence on density requested" in line:

291 target_density = utils.float(line.split()[-1])

292 if "Convergence on gradient requested" in line:

293 target_gradient = utils.float(line.split()[-1])

294

295 line = next(inputfile)

296

297 # Pretty nasty temporary hack to set scftargets only in the SCF module.

298 if "target_energy" in dir() and "target_density" in dir() and "target_gradient" in dir():

299 if not hasattr(self, 'scftargets'):

300 self.scftargets = []

301 self.scftargets.append([target_energy, target_density, target_gradient])

302

303 #DFT functional information

304 if "XC Information" in line:

305 line = next(inputfile)

306 line = next(inputfile)

307 self.metadata["functional"] = line.split()[0]

308

309 # If the full overlap matrix is printed, it looks like this:

310 #

311 # global array: Temp Over[1:60,1:60], handle: -996

312 #

313 # 1 2 3 4 5 6

314 # ----------- ----------- ----------- ----------- ----------- -----------

315 # 1 1.00000 0.24836 -0.00000 -0.00000 0.00000 0.00000

316 # 2 0.24836 1.00000 0.00000 -0.00000 0.00000 0.00030

317 # 3 -0.00000 0.00000 1.00000 0.00000 0.00000 -0.00014

318 # ...

319 if "global array: Temp Over[" in line:

320

321 self.set_attribute('nbasis', int(line.split('[')[1].split(',')[0].split(':')[1]))

322 self.set_attribute('nmo', int(line.split(']')[0].split(',')[1].split(':')[1]))

323

324 aooverlaps = []

325 while len(aooverlaps) < self.nbasis:

326

327 self.skip_line(inputfile, 'blank')

328

329 indices = [int(i) for i in inputfile.next().split()]

330 assert indices[0] == len(aooverlaps) + 1

331

332 self.skip_line(inputfile, "dashes")

333 data = [inputfile.next().split() for i in range(self.nbasis)]

334 indices = [int(d[0]) for d in data]

335 assert indices == list(range(1, self.nbasis+1))

336

337 for i in range(1, len(data[0])):

338 vector = [float(d[i]) for d in data]

339 aooverlaps.append(vector)

340

341 self.set_attribute('aooverlaps', aooverlaps)

342

343 if line.strip() in ("The SCF is already converged", "The DFT is already converged"):

344 if self.linesearch:

345 return

346 if hasattr(self, 'scftargets'):

347 self.scftargets.append(self.scftargets[-1])

348 if hasattr(self, 'scfvalues'):

349 self.scfvalues.append(self.scfvalues[-1])

350

351 # The default (only?) SCF algorithm for Hartree-Fock is a preconditioned conjugate

352 # gradient method that apparently "always" converges, so this header should reliably

353 # signal a start of the SCF cycle. The convergence targets are also printed here.

354 if line.strip() == "Quadratically convergent ROHF":

355

356 if hasattr(self, 'linesearch') and self.linesearch:

357 return

358

359 while not "Final" in line:

360

361 # Only the norm of the orbital gradient is used to test convergence.

362 if line[:22] == " Convergence threshold":

363 target = float(line.split()[-1])

364 if not hasattr(self, "scftargets"):

365 self.scftargets = []

366 self.scftargets.append([target])

367

368 # This is critical for the stop condition of the section,

369 # because the 'Final Fock-matrix accuracy' is along the way.

370 # It would be prudent to find a more robust stop condition.

371 while list(set(line.strip())) != ["-"]:

372 line = next(inputfile)

373

374 if line.split() == ['iter', 'energy', 'gnorm', 'gmax', 'time']:

375 values = []

376 self.skip_line(inputfile, 'dashes')

377 line = next(inputfile)

378 while line.strip():

379 it, energy, gnorm, gmax, time = line.split()

380 gnorm = utils.float(gnorm)

381 values.append([gnorm])

382 try:

383 line = next(inputfile)

384 # Is this the end of the file for some reason?

385 except StopIteration:

386 self.logger.warning('File terminated before end of last SCF! Last gradient norm: {}'.format(gnorm))

387 break

388 if not hasattr(self, 'scfvalues'):

389 self.scfvalues = []

390 self.scfvalues.append(values)

391

392 try:

393 line = next(inputfile)

394 except StopIteration:

395 self.logger.warning('File terminated?')

396 break

397

398 # The SCF for DFT does not use the same algorithm as Hartree-Fock, but always

399 # seems to use the following format to report SCF convergence:

400 # convergence iter energy DeltaE RMS-Dens Diis-err time

401 # ---------------- ----- ----------------- --------- --------- --------- ------

402 # d= 0,ls=0.0,diis 1 -382.2544324446 -8.28D+02 1.42D-02 3.78D-01 23.2

403 # d= 0,ls=0.0,diis 2 -382.3017298534 -4.73D-02 6.99D-03 3.82D-02 39.3

404 # d= 0,ls=0.0,diis 3 -382.2954343173 6.30D-03 4.21D-03 7.95D-02 55.3

405 # ...

406 if line.split() == ['convergence', 'iter', 'energy', 'DeltaE', 'RMS-Dens', 'Diis-err', 'time']:

407

408 if hasattr(self, 'linesearch') and self.linesearch:

409 return

410

411 self.skip_line(inputfile, 'dashes')

412 line = next(inputfile)

413 values = []

414 while line.strip():

415

416 # Sometimes there are things in between iterations with fewer columns,

417 # and we want to skip those lines, most probably. An exception might

418 # unrestricted calcualtions, which show extra RMS density and DIIS

419 # errors, although it is not clear yet whether these are for the

420 # beta orbitals or somethine else. The iterations look like this in that case:

421 # convergence iter energy DeltaE RMS-Dens Diis-err time

422 # ---------------- ----- ----------------- --------- --------- --------- ------

423 # d= 0,ls=0.0,diis 1 -382.0243202601 -8.28D+02 7.77D-03 1.04D-01 30.0

424 # 7.68D-03 1.02D-01

425 # d= 0,ls=0.0,diis 2 -382.0647539758 -4.04D-02 4.64D-03 1.95D-02 59.2

426 # 5.39D-03 2.36D-02

427 # ...

428 if len(line[17:].split()) == 6:

429 iter, energy, deltaE, dens, diis, time = line[17:].split()

430 val_energy = utils.float(deltaE)

431 val_density = utils.float(dens)

432 val_gradient = utils.float(diis)

433 values.append([val_energy, val_density, val_gradient])

434

435 try:

436 line = next(inputfile)

437 # Is this the end of the file for some reason?

438 except StopIteration:

439 self.logger.warning('File terminated before end of last SCF! Last error: {}'.format(diis))

440 break

441

442 if not hasattr(self, 'scfvalues'):

443 self.scfvalues = []

444

445 self.scfvalues.append(values)

446

447 # These triggers are supposed to catch the current step in a geometry optimization search

448 # and determine whether we are currently in the main (initial) SCF cycle of that step

449 # or in the subsequent line search. The step is printed between dashes like this:

450 #

451 # --------

452 # Step 0

453 # --------

454 #

455 # and the summary lines that describe the main SCF cycle for the frsit step look like this:

456 #

457 #@ Step Energy Delta E Gmax Grms Xrms Xmax Walltime

458 #@ ---- ---------------- -------- -------- -------- -------- -------- --------

459 #@ 0 -379.76896249 0.0D+00 0.04567 0.01110 0.00000 0.00000 4.2

460 # ok ok

461 #

462 # However, for subsequent step the format is a bit different:

463 #

464 # Step Energy Delta E Gmax Grms Xrms Xmax Walltime

465 # ---- ---------------- -------- -------- -------- -------- -------- --------

466 #@ 2 -379.77794602 -7.4D-05 0.00118 0.00023 0.00440 0.01818 14.8

467 # ok

468 #

469 # There is also a summary of the line search (which we don't use now), like this:

470 #

471 # Line search:

472 # step= 1.00 grad=-1.8D-05 hess= 8.9D-06 energy= -379.777955 mode=accept

473 # new step= 1.00 predicted energy= -379.777955

474 #

475 if line[10:14] == "Step":

476 self.geostep = int(line.split()[-1])

477 self.skip_line(inputfile, 'dashes')

478 self.linesearch = False

479 if line[0] == "@" and line.split()[1] == "Step":

480 at_and_dashes = next(inputfile)

481 line = next(inputfile)

482 assert int(line.split()[1]) == self.geostep == 0

483 gmax = float(line.split()[4])

484 grms = float(line.split()[5])

485 xrms = float(line.split()[6])

486 xmax = float(line.split()[7])

487 if not hasattr(self, 'geovalues'):

488 self.geovalues = []

489 self.geovalues.append([gmax, grms, xmax, xrms])

490 self.linesearch = True

491 if line[2:6] == "Step":

492 self.skip_line(inputfile, 'dashes')

493 line = next(inputfile)

494 assert int(line.split()[1]) == self.geostep

495 if self.linesearch:

496 #print(line)

497 return

498 gmax = float(line.split()[4])

499 grms = float(line.split()[5])

500 xrms = float(line.split()[6])

501 xmax = float(line.split()[7])

502 if not hasattr(self, 'geovalues'):

503 self.geovalues = []

504 self.geovalues.append([gmax, grms, xmax, xrms])

505 self.linesearch = True

506

507 # There is a clear message when the geometry optimization has converged:

508 #

509 # ----------------------

510 # Optimization converged

511 # ----------------------

512 #

513 if line.strip() == "Optimization converged":

514 self.skip_line(inputfile, 'dashes')

515 if not hasattr(self, 'optdone'):

516 self.optdone = []

517 self.optdone.append(len(self.geovalues) - 1)

518

519 if "Failed to converge" in line and hasattr(self, 'geovalues'):

520 if not hasattr(self, 'optdone'):

521 self.optdone = []

522

523 # extract the theoretical method

524 if "Total SCF energy" in line:

525 self.metadata["methods"].append("HF")

526 if "Total DFT energy" in line:

527 self.metadata["methods"].append("DFT")

528

529 # The line containing the final SCF energy seems to be always identifiable like this.

530 if "Total SCF energy" in line or "Total DFT energy" in line:

531

532 # NWChem often does a line search during geometry optimization steps, reporting

533 # the SCF information but not the coordinates (which are not necessarily 'intermediate'

534 # since the step size can become smaller). We want to skip these SCF cycles,

535 # unless the coordinates can also be extracted (possibly from the gradients?).

536 if hasattr(self, 'linesearch') and self.linesearch:

537 return

538

539 if not hasattr(self, "scfenergies"):

540 self.scfenergies = []

541 energy = float(line.split()[-1])

542 energy = utils.convertor(energy, "hartree", "eV")

543 self.scfenergies.append(energy)

544

545 if "Dispersion correction" in line:

546 dispersion = utils.convertor(float(line.split()[-1]), "hartree", "eV")

547 self.append_attribute("dispersionenergies", dispersion)

548

549 # The final MO orbitals are printed in a simple list, but apparently not for

550 # DFT calcs, and often this list does not contain all MOs, so make sure to

551 # parse them from the MO analysis below if possible. This section will be like this:

552 #

553 # Symmetry analysis of molecular orbitals - final

554 # -----------------------------------------------

555 #

556 # Numbering of irreducible representations:

557 #

558 # 1 ag 2 au 3 bg 4 bu

559 #

560 # Orbital symmetries:

561 #

562 # 1 bu 2 ag 3 bu 4 ag 5 bu

563 # 6 ag 7 bu 8 ag 9 bu 10 ag

564 # ...

565 if line.strip() == "Symmetry analysis of molecular orbitals - final":

566

567 self.skip_lines(inputfile, ['d', 'b', 'numbering', 'b', 'reps', 'b', 'syms', 'b'])

568

569 if not hasattr(self, 'mosyms'):

570 self.mosyms = [[None]*self.nbasis]

571 line = next(inputfile)

572 while line.strip():

573 ncols = len(line.split())

574 assert ncols % 2 == 0

575 for i in range(ncols//2):

576 index = int(line.split()[i*2]) - 1

577 sym = line.split()[i*2+1]

578 sym = sym[0].upper() + sym[1:]

579 if self.mosyms[0][index]:

580 if self.mosyms[0][index] != sym:

581 self.logger.warning("Symmetry of MO %i has changed" % (index+1))

582 self.mosyms[0][index] = sym

583 line = next(inputfile)

584

585 # The same format is used for HF and DFT molecular orbital analysis. We want to parse

586 # the MO energies from this section, although it is printed already before this with

587 # less precision (might be useful to parse that if this is not available). Also, this

588 # section contains coefficients for the leading AO contributions, so it might also

589 # be useful to parse and use those values if the full vectors are not printed.

590 #

591 # The block looks something like this (two separate alpha/beta blocks in the unrestricted case):

592 #

593 # ROHF Final Molecular Orbital Analysis

594 # -------------------------------------

595 #

596 # Vector 1 Occ=2.000000D+00 E=-1.104059D+01 Symmetry=bu

597 # MO Center= 1.4D-17, 0.0D+00, -6.5D-37, r^2= 2.1D+00

598 # Bfn. Coefficient Atom+Function Bfn. Coefficient Atom+Function

599 # ----- ------------ --------------- ----- ------------ ---------------

600 # 1 0.701483 1 C s 6 -0.701483 2 C s

601 #

602 # Vector 2 Occ=2.000000D+00 E=-1.104052D+01 Symmetry=ag

603 # ...

604 # Vector 12 Occ=2.000000D+00 E=-1.020253D+00 Symmetry=bu

605 # MO Center= -1.4D-17, -5.6D-17, 2.9D-34, r^2= 7.9D+00

606 # Bfn. Coefficient Atom+Function Bfn. Coefficient Atom+Function

607 # ----- ------------ --------------- ----- ------------ ---------------

608 # 36 -0.298699 11 C s 41 0.298699 12 C s

609 # 2 0.270804 1 C s 7 -0.270804 2 C s

610 # 48 -0.213655 15 C s 53 0.213655 16 C s

611 # ...

612 #

613 if "Final" in line and "Molecular Orbital Analysis" in line:

614

615 # Unrestricted jobs have two such blocks, for alpha and beta orbitals, and

616 # we need to keep track of which one we're parsing (always alpha in restricted case).

617 unrestricted = ("Alpha" in line) or ("Beta" in line)

618 alphabeta = int("Beta" in line)

619

620 self.skip_lines(inputfile, ['dashes', 'blank'])

621

622 nvectors = []

623 mooccnos = []

624 energies = []

625 symmetries = [None]*self.nbasis

626 line = next(inputfile)

627 while line[:7] == " Vector":

628

629 # Note: the vector count starts from 1 in NWChem.

630 nvector = int(line[7:12])

631 nvectors.append(nvector)

632

633 # A nonzero occupancy for SCF jobs means the orbital is occupied.

634 mooccno = int(utils.float(line[18:30]))

635 mooccnos.append(mooccno)

636

637 # If the printout does not start from the first MO, assume None for all previous orbitals.

638 if len(energies) == 0 and nvector > 1:

639 for i in range(1, nvector):

640 energies.append(None)

641

642 energy = utils.float(line[34:47])

643 energy = utils.convertor(energy, "hartree", "eV")

644 energies.append(energy)

645

646 # When symmetry is not used, this part of the line is missing.

647 if line[47:58].strip() == "Symmetry=":

648 sym = line[58:].strip()

649 sym = sym[0].upper() + sym[1:]

650 symmetries[nvector-1] = sym

651

652 line = next(inputfile)

653 if "MO Center" in line:

654 line = next(inputfile)

655 if "Bfn." in line:

656 line = next(inputfile)

657 if "-----" in line:

658 line = next(inputfile)

659 while line.strip():

660 line = next(inputfile)

661 line = next(inputfile)

662

663 self.set_attribute('nmo', nvector)

664

665 if not hasattr(self, 'moenergies') or (len(self.moenergies) > alphabeta):

666 self.moenergies = []

667 self.moenergies.append(energies)

668

669 if not hasattr(self, 'mosyms') or (len(self.mosyms) > alphabeta):

670 self.mosyms = []

671 self.mosyms.append(symmetries)

672

673 if not hasattr(self, 'homos') or (len(self.homos) > alphabeta):

674 self.homos = []

675 nvector_index = mooccnos.index(0) - 1

676 if nvector_index > -1:

677 self.homos.append(nvectors[nvector_index] - 1)

678 else:

679 self.homos.append(-1)

680 # If this was a restricted open-shell calculation, append

681 # to HOMOs twice since only one Molecular Orbital Analysis

682 # section is in the output file.

683 if (not unrestricted) and (1 in mooccnos):

684 nvector_index = mooccnos.index(1) - 1

685 if nvector_index > -1:

686 self.homos.append(nvectors[nvector_index] - 1)

687 else:

688 self.homos.append(-1)

689

690 # This is where the full MO vectors are printed, but a special

691 # directive is needed for it in the `scf` or `dft` block:

692 # print "final vectors" "final vectors analysis"

693 # which gives:

694 #

695 # Final MO vectors

696 # ----------------

697 #

698 #

699 # global array: alpha evecs[1:60,1:60], handle: -995

700 #

701 # 1 2 3 4 5 6

702 # ----------- ----------- ----------- ----------- ----------- -----------

703 # 1 -0.69930 -0.69930 -0.02746 -0.02769 -0.00313 -0.02871

704 # 2 -0.03156 -0.03135 0.00410 0.00406 0.00078 0.00816

705 # 3 0.00002 -0.00003 0.00067 0.00065 -0.00526 -0.00120

706 # ...

707 #

708 if line.strip() == "Final MO vectors":

709

710 if not hasattr(self, 'mocoeffs'):

711 self.mocoeffs = []

712

713 self.skip_lines(inputfile, ['d', 'b', 'b'])

714

715 # The columns are MOs, rows AOs, but that's and educated guess since no

716 # atom information is printed alongside the indices. This next line gives

717 # the dimensions, which we can check. if set before this. Also, this line

718 # specifies whether we are dealing with alpha or beta vectors.

719 array_info = next(inputfile)

720 while ("global array" in array_info):

721 alphabeta = int(line.split()[2] == "beta")

722 size = array_info.split('[')[1].split(']')[0]

723 nbasis = int(size.split(',')[0].split(':')[1])

724 nmo = int(size.split(',')[1].split(':')[1])

725 self.set_attribute('nbasis', nbasis)

726 self.set_attribute('nmo', nmo)

727

728 self.skip_line(inputfile, 'blank')

729 mocoeffs = []

730 while len(mocoeffs) < self.nmo:

731 nmos = list(map(int, next(inputfile).split()))

732 assert len(mocoeffs) == nmos[0] - 1

733 for n in nmos:

734 mocoeffs.append([])

735 self.skip_line(inputfile, 'dashes')

736 for nb in range(nbasis):

737 line = next(inputfile)

738 index = int(line.split()[0])

739 assert index == nb+1

740 coefficients = list(map(float, line.split()[1:]))

741 assert len(coefficients) == len(nmos)

742 for i, c in enumerate(coefficients):

743 mocoeffs[nmos[i]-1].append(c)

744 self.skip_line(inputfile, 'blank')

745 self.mocoeffs.append(mocoeffs)

746

747 array_info = next(inputfile)

748

749 # For Hartree-Fock, the atomic Mulliken charges are typically printed like this:

750 #

751 # Mulliken analysis of the total density

752 # --------------------------------------

753 #

754 # Atom Charge Shell Charges

755 # ----------- ------ -------------------------------------------------------

756 # 1 C 6 6.00 1.99 1.14 2.87

757 # 2 C 6 6.00 1.99 1.14 2.87

758 # ...

759 if line.strip() == "Mulliken analysis of the total density":

760

761 if not hasattr(self, "atomcharges"):

762 self.atomcharges = {}

763

764 self.skip_lines(inputfile, ['d', 'b', 'header', 'd'])

765

766 charges = []

767 line = next(inputfile)

768 while line.strip():

769 index, atomname, nuclear, atom = line.split()[:4]

770 shells = line.split()[4:]

771 charges.append(float(atom)-float(nuclear))

772 line = next(inputfile)

773 self.atomcharges['mulliken'] = charges

774

775 # Not the the 'overlap population' as printed in the Mulliken population analysis,

776 # is not the same thing as the 'overlap matrix'. In fact, it is the overlap matrix

777 # multiplied elementwise times the density matrix.

778 #

779 # ----------------------------

780 # Mulliken population analysis

781 # ----------------------------

782 #

783 # ----- Total overlap population -----

784 #

785 # 1 2 3 4 5 6 7

786 #

787 # 1 1 C s 2.0694818227 -0.0535883400 -0.0000000000 -0.0000000000 -0.0000000000 -0.0000000000 0.0000039991

788 # 2 1 C s -0.0535883400 0.8281341291 0.0000000000 -0.0000000000 0.0000000000 0.0000039991 -0.0009906747

789 # ...

790 #

791 # DFT does not seem to print the separate listing of Mulliken charges

792 # by default, but they are printed by this modules later on. They are also print

793 # for Hartree-Fock runs, though, so in that case make sure they are consistent.

794 if line.strip() == "Mulliken population analysis":

795

796 self.skip_lines(inputfile, ['d', 'b', 'total_overlap_population', 'b'])

797

798 overlaps = []

799 line = next(inputfile)

800 while all([c.isdigit() for c in line.split()]):

801

802 # There is always a line with the MO indices printed in thie block.

803 indices = [int(i)-1 for i in line.split()]

804 for i in indices:

805 overlaps.append([])

806

807 # There is usually a blank line after the MO indices, but

808 # there are exceptions, so check if line is blank first.

809 line = next(inputfile)

810 if not line.strip():

811 line = next(inputfile)

812

813 # Now we can iterate or atomic orbitals.

814 for nao in range(self.nbasis):

815 data = list(map(float, line.split()[4:]))

816 for i, d in enumerate(data):

817 overlaps[indices[i]].append(d)

818 line = next(inputfile)

819

820 line = next(inputfile)

821

822 # This header should be printed later, before the charges are print, which of course

823 # are just sums of the overlaps and could be calculated. But we just go ahead and

824 # parse them, make sure they're consistent with previously parsed values and

825 # use these since they are more precise (previous precision could have been just 0.01).

826 while "Total gross population on atoms" not in line:

827 line = next(inputfile)

828 self.skip_line(inputfile, 'blank')

829 charges = []

830 for i in range(self.natom):

831 line = next(inputfile)

832 iatom, element, ncharge, epop = line.split()

833 iatom = int(iatom)

834 ncharge = float(ncharge)

835 epop = float(epop)

836 assert iatom == (i+1)

837 charges.append(epop-ncharge)

838

839 if not hasattr(self, 'atomcharges'):

840 self.atomcharges = {}

841 if not "mulliken" in self.atomcharges:

842 self.atomcharges['mulliken'] = charges

843 else:

844 assert max(self.atomcharges['mulliken'] - numpy.array(charges)) < 0.01

845 self.atomcharges['mulliken'] = charges

846

847 # NWChem prints the dipole moment in atomic units first, and we could just fast forward

848 # to the values in Debye, which are also printed. But we can also just convert them

849 # right away and so parse a little bit less. Note how the reference point is print

850 # here within the block nicely, as it is for all moment later.

851 #

852 # -------------

853 # Dipole Moment

854 # -------------

855 #

856 # Center of charge (in au) is the expansion point

857 # X = 0.0000000 Y = 0.0000000 Z = 0.0000000

858 #

859 # Dipole moment 0.0000000000 Debye(s)

860 # DMX 0.0000000000 DMXEFC 0.0000000000

861 # DMY 0.0000000000 DMYEFC 0.0000000000

862 # DMZ -0.0000000000 DMZEFC 0.0000000000

863 #

864 # ...

865 #

866 if line.strip() == "Dipole Moment":

867

868 self.skip_lines(inputfile, ['d', 'b'])

869

870 reference_comment = next(inputfile)

871 assert "(in au)" in reference_comment

872 reference = next(inputfile).split()

873 self.reference = [reference[-7], reference[-4], reference[-1]]

874 self.reference = numpy.array([float(x) for x in self.reference])

875 self.reference = utils.convertor(self.reference, 'bohr', 'Angstrom')

876

877 self.skip_line(inputfile, 'blank')

878

879 magnitude = next(inputfile)

880 assert magnitude.split()[-1] == "A.U."

881

882 dipole = []

883 for i in range(3):

884 line = next(inputfile)

885 dipole.append(float(line.split()[1]))

886

887 dipole = utils.convertor(numpy.array(dipole), "ebohr", "Debye")

888

889 if not hasattr(self, 'moments'):

890 self.moments = [self.reference, dipole]

891 else:

892 self.moments[1] == dipole

893

894 # The quadrupole moment is pretty straightforward to parse. There are several

895 # blocks printed, and the first one called 'second moments' contains the raw

896 # moments, and later traceless values are printed. The moments, however, are

897 # not in lexicographical order, so we need to sort them. Also, the first block

898 # is in atomic units, so remember to convert to Buckinghams along the way.

899 #

900 # -----------------

901 # Quadrupole Moment

902 # -----------------

903 #

904 # Center of charge (in au) is the expansion point

905 # X = 0.0000000 Y = 0.0000000 Z = 0.0000000

906 #

907 # < R**2 > = ********** a.u. ( 1 a.u. = 0.280023 10**(-16) cm**2 )

908 # ( also called diamagnetic susceptibility )

909 #

910 # Second moments in atomic units

911 #

912 # Component Electronic+nuclear Point charges Total

913 # --------------------------------------------------------------------------

914 # XX -38.3608511210 0.0000000000 -38.3608511210

915 # YY -39.0055467347 0.0000000000 -39.0055467347

916 # ...

917 #

918 if line.strip() == "Quadrupole Moment":

919

920 self.skip_lines(inputfile, ['d', 'b'])

921

922 reference_comment = next(inputfile)

923 assert "(in au)" in reference_comment

924 reference = next(inputfile).split()

925 self.reference = [reference[-7], reference[-4], reference[-1]]

926 self.reference = numpy.array([float(x) for x in self.reference])

927 self.reference = utils.convertor(self.reference, 'bohr', 'Angstrom')

928

929 self.skip_lines(inputfile, ['b', 'units', 'susc', 'b'])

930

931 line = next(inputfile)

932 assert line.strip() == "Second moments in atomic units"

933

934 self.skip_lines(inputfile, ['b', 'header', 'd'])

935

936 # Parse into a dictionary and then sort by the component key.

937 quadrupole = {}

938 for i in range(6):

939 line = next(inputfile)

940 quadrupole[line.split()[0]] = float(line.split()[-1])

941 lex = sorted(quadrupole.keys())

942 quadrupole = [quadrupole[key] for key in lex]

943

944 quadrupole = utils.convertor(numpy.array(quadrupole), "ebohr2", "Buckingham")

945

946 # The checking of potential previous values if a bit more involved here,

947 # because it turns out NWChem has separate keywords for dipole, quadrupole

948 # and octupole output. So, it is perfectly possible to print the quadrupole

949 # and not the dipole... if that is the case set the former to None and

950 # issue a warning. Also, a regression has been added to cover this case.

951 if not hasattr(self, 'moments') or len(self.moments) < 2:

952 self.logger.warning("Found quadrupole moments but no previous dipole")

953 self.moments = [self.reference, None, quadrupole]

954 else:

955 if len(self.moments) == 2:

956 self.moments.append(quadrupole)

957 else:

958 assert self.moments[2] == quadrupole

959

960 # The octupole moment is analogous to the quadrupole, but there are more components

961 # and the checking of previously parsed dipole and quadrupole moments is more involved,

962 # with a corresponding test also added to regressions.

963 #

964 # ---------------

965 # Octupole Moment

966 # ---------------

967 #

968 # Center of charge (in au) is the expansion point

969 # X = 0.0000000 Y = 0.0000000 Z = 0.0000000

970 #

971 # Third moments in atomic units

972 #

973 # Component Electronic+nuclear Point charges Total

974 # --------------------------------------------------------------------------

975 # XXX -0.0000000000 0.0000000000 -0.0000000000

976 # YYY -0.0000000000 0.0000000000 -0.0000000000

977 # ...

978 #

979 if line.strip() == "Octupole Moment":

980

981 self.skip_lines(inputfile, ['d', 'b'])

982

983 reference_comment = next(inputfile)

984 assert "(in au)" in reference_comment

985 reference = next(inputfile).split()

986 self.reference = [reference[-7], reference[-4], reference[-1]]

987 self.reference = numpy.array([float(x) for x in self.reference])

988 self.reference = utils.convertor(self.reference, 'bohr', 'Angstrom')

989

990 self.skip_line(inputfile, 'blank')

991

992 line = next(inputfile)

993 assert line.strip() == "Third moments in atomic units"

994

995 self.skip_lines(inputfile, ['b', 'header', 'd'])

996

997 octupole = {}

998 for i in range(10):

999 line = next(inputfile)

1000 octupole[line.split()[0]] = float(line.split()[-1])

1001 lex = sorted(octupole.keys())

1002 octupole = [octupole[key] for key in lex]

1003

1004 octupole = utils.convertor(numpy.array(octupole), "ebohr3", "Debye.ang2")

1005

1006 if not hasattr(self, 'moments') or len(self.moments) < 2:

1007 self.logger.warning("Found octupole moments but no previous dipole or quadrupole moments")

1008 self.moments = [self.reference, None, None, octupole]

1009 elif len(self.moments) == 2:

1010 self.logger.warning("Found octupole moments but no previous quadrupole moments")

1011 self.moments.append(None)

1012 self.moments.append(octupole)

1013 else:

1014 if len(self.moments) == 3:

1015 self.moments.append(octupole)

1016 else:

1017 assert self.moments[3] == octupole

1018

1019 if "Total MP2 energy" in line:

1020 self.metadata["methods"].append("MP2")

1021 mpenerg = float(line.split()[-1])

1022 if not hasattr(self, "mpenergies"):

1023 self.mpenergies = []

1024 self.mpenergies.append([])

1025 self.mpenergies[-1].append(utils.convertor(mpenerg, "hartree", "eV"))

1026

1027 if line.strip() == "NWChem Extensible Many-Electron Theory Module":

1028 ccenergies = []

1029 while "Parallel integral file used" not in line:

1030 line = next(inputfile)

1031 if "CCSD total energy / hartree" in line or "total CCSD energy:" in line:

1032 self.metadata["methods"].append("CCSD")

1033 ccenergies.append(float(line.split()[-1]))

1034 if "CCSD(T) total energy / hartree" in line:

1035 self.metadata["methods"].append("CCSD(T)")

1036 ccenergies.append(float(line.split()[-1]))

1037 if ccenergies:

1038 self.append_attribute(

1039 "ccenergies", utils.convertor(ccenergies[-1], "hartree", "eV")

1040 )

1041

1042 # Static and dynamic polarizability.

1043 if "Linear Response polarizability / au" in line:

1044 if not hasattr(self, "polarizabilities"):

1045 self.polarizabilities = []

1046 polarizability = []

1047 line = next(inputfile)

1048 assert line.split()[0] == "Frequency"

1049 line = next(inputfile)

1050 assert line.split()[0] == "Wavelength"

1051 self.skip_lines(inputfile, ['coordinates', 'd'])

1052 for _ in range(3):

1053 line = next(inputfile)

1054 polarizability.append(line.split()[1:])

1055 self.polarizabilities.append(numpy.array(polarizability))

1056

1057 if line[:18] == ' Total times cpu:':

1058 self.metadata['success'] = True

1059

1060 if line.strip() == "NWChem QMD Module":

1061 self.is_BOMD = True

1062

1063 # Born-Oppenheimer molecular dynamics (BOMD): time.

1064 if "QMD Run Information" in line:

1065 self.skip_line(inputfile, 'd')

1066 line = next(inputfile)

1067 assert "Time elapsed (fs)" in line

1068 time = float(line.split()[4])

1069 self.append_attribute('time', time)

1070

1071 # BOMD: geometry coordinates when `print low`.

1072 if line.strip() == "DFT ENERGY GRADIENTS":

1073 if self.is_BOMD:

1074 self.skip_lines(inputfile, ['b', 'atom coordinates gradient', 'xyzxyz'])

1075 line = next(inputfile)

1076 atomcoords_step = []

1077 while line.strip():

1078 tokens = line.split()

1079 assert len(tokens) == 8

1080 atomcoords_step.append([float(c) for c in tokens[2:5]])

1081 line = next(inputfile)

1082 self.atomcoords.append(atomcoords_step)

1083

1084 def before_parsing(self):

1085 """NWChem-specific routines performed before parsing a file.

1086 """

1087

1088 # The only reason we need this identifier is if `print low` is

1089 # set in the input file, which we assume is likely for a BOMD

1090 # trajectory. This will enable parsing coordinates from the

1091 # 'DFT ENERGY GRADIENTS' section.

1092 self.is_BOMD = False

1093

1094 def after_parsing(self):

1095 """NWChem-specific routines for after parsing a file.

1096

1097 Currently, expands self.shells() into self.aonames.

1098 """

1099

1100 # setup a few necessary things, including a regular expression

1101 # for matching the shells

1102 table = utils.PeriodicTable()

1103 elements = [table.element[x] for x in self.atomnos]

1104 pattern = re.compile(r"(\ds)+(\dp)*(\dd)*(\df)*(\dg)*")

1105

1106 labels = {}

1107 labels['s'] = ["%iS"]

1108 labels['p'] = ["%iPX", "%iPY", "%iPZ"]

1109 if self.shells['type'] == 'spherical':

1110 labels['d'] = ['%iD-2', '%iD-1', '%iD0', '%iD1', '%iD2']

1111 labels['f'] = ['%iF-3', '%iF-2', '%iF-1', '%iF0',

1112 '%iF1', '%iF2', '%iF3']

1113 labels['g'] = ['%iG-4', '%iG-3', '%iG-2', '%iG-1', '%iG0',

1114 '%iG1', '%iG2', '%iG3', '%iG4']

1115 elif self.shells['type'] == 'cartesian':

1116 labels['d'] = ['%iDXX', '%iDXY', '%iDXZ',

1117 '%iDYY', '%iDYZ',

1118 '%iDZZ']

1119 labels['f'] = ['%iFXXX', '%iFXXY', '%iFXXZ',

1120 '%iFXYY', '%iFXYZ', '%iFXZZ',

1121 '%iFYYY', '%iFYYZ', '%iFYZZ',

1122 '%iFZZZ']

1123 labels['g'] = ['%iGXXXX', '%iGXXXY', '%iGXXXZ',

1124 '%iGXXYY', '%iGXXYZ', '%iGXXZZ',

1125 '%iGXYYY', '%iGXYYZ', '%iGXYZZ',

1126 '%iGXZZZ', '%iGYYYY', '%iGYYYZ',

1127 '%iGYYZZ', '%iGYZZZ', '%iGZZZZ']

1128 else:

1129 self.logger.warning("Found a non-standard aoname representation type.")

1130 return

1131

1132 # now actually build aonames

1133 # involves expanding 2s1p into appropriate types

1134

1135 self.aonames = []

1136 for i, element in enumerate(elements):

1137 try:

1138 shell_text = self.shells[element]

1139 except KeyError:

1140 del self.aonames

1141 msg = "Cannot determine aonames for at least one atom."

1142 self.logger.warning(msg)

1143 break

1144

1145 prefix = "%s%i_" % (element, i + 1) # (e.g. C1_)

1146

1147 matches = pattern.match(shell_text)

1148 for j, group in enumerate(matches.groups()):

1149 if group is None:

1150 continue

1151

1152 count = int(group[:-1])

1153 label = group[-1]

1154

1155 for k in range(count):

1156 temp = [x % (j + k + 1) for x in labels[label]]

1157 self.aonames.extend([prefix + x for x in temp])

1158

1159 # If we parsed a BOMD trajectory, the first two parsed

1160 # geometries are identical, and all from the second onward are

1161 # in Bohr. Delete the first one and perform the unit

1162 # conversion.

1163 if self.is_BOMD:

1164 self.atomcoords = utils.convertor(numpy.asarray(self.atomcoords)[1:, ...],

1165 'bohr', 'Angstrom')

Coverage for cclib/parser/nwchemparser.py : 97%

606 statements

Coverage for cclib/parser/nwchemparser.py : 97%

606 statements 589 run 17 missing 0 excluded

606 statements