Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2# 

3# Copyright (c) 2020, the cclib development team 

4# 

5# This file is part of cclib (http://cclib.github.io) and is distributed under 

6# the terms of the BSD 3-Clause License. 

7"""Generic output file parser and related tools""" 

8 

9 

10import bz2 

11import fileinput 

12import gzip 

13import inspect 

14import io 

15import logging 

16import os 

17import random 

18import sys 

19import zipfile 

20from abc import ABC, abstractmethod 

21 

22import numpy 

23 

24from cclib.parser import utils 

25from cclib.parser.data import ccData 

26from cclib.parser.data import ccData_optdone_bool 

27 

28 

29# This seems to avoid a problem with Avogadro. 

30logging.logMultiprocessing = 0 

31 

32 

33class myBZ2File(bz2.BZ2File): 

34 """Return string instead of bytes""" 

35 def __next__(self): 

36 line = super(bz2.BZ2File, self).__next__() 

37 return line.decode("ascii", "replace") 

38 

39 def next(self): 

40 line = self.__next__() 

41 return line 

42 

43 

44class myGzipFile(gzip.GzipFile): 

45 """Return string instead of bytes""" 

46 def __next__(self): 

47 super_ob = super(gzip.GzipFile, self) 

48 # seemingly different versions of gzip can have either next or __next__ 

49 if hasattr(super_ob, 'next'): 

50 line = super_ob.next() 

51 else: 

52 line = super_ob.__next__() 

53 return line.decode("ascii", "replace") 

54 

55 def next(self): 

56 line = self.__next__() 

57 return line 

58 

59 

60class FileWrapper: 

61 """Wrap a file-like object or stream with some custom tweaks""" 

62 

63 def __init__(self, source, pos=0): 

64 

65 self.src = source 

66 

67 # Most file-like objects have seek and tell methods, but streams returned 

68 # by urllib.urlopen in Python2 do not, which will raise an AttributeError 

69 # in this code. On the other hand, in Python3 these methods do exist since 

70 # urllib uses the stream class in the io library, but they raise a different 

71 # error, namely io.UnsupportedOperation. That is why it is hard to be more 

72 # specific with except block here. 

73 try: 

74 self.src.seek(0, 2) 

75 self.size = self.src.tell() 

76 self.src.seek(pos, 0) 

77 

78 except (AttributeError, IOError, io.UnsupportedOperation): 

79 # Stream returned by urllib should have size information. 

80 if hasattr(self.src, 'headers') and 'content-length' in self.src.headers: 

81 self.size = int(self.src.headers['content-length']) 

82 else: 

83 self.size = pos 

84 

85 # Assume the position is what was passed to the constructor. 

86 self.pos = pos 

87 

88 self.last_line = None 

89 

90 def next(self): 

91 line = next(self.src) 

92 self.pos += len(line) 

93 self.last_line = line 

94 return line 

95 

96 def __next__(self): 

97 return self.next() 

98 

99 def __iter__(self): 

100 return self 

101 

102 def close(self): 

103 self.src.close() 

104 

105 def seek(self, pos, ref): 

106 

107 # If we are seeking to end, we can emulate it usually. As explained above, 

108 # we cannot be too specific with the except clause due to differences 

109 # between Python2 and 3. Yet another reason to drop Python 2 soon! 

110 try: 

111 self.src.seek(pos, ref) 

112 except: 

113 if ref == 2: 

114 self.src.read() 

115 else: 

116 raise 

117 

118 if ref == 0: 

119 self.pos = pos 

120 if ref == 1: 

121 self.pos += pos 

122 if ref == 2 and hasattr(self, 'size'): 

123 self.pos = self.size 

124 

125 

126def openlogfile(filename, object=None): 

127 """Return a file object given a filename or if object specified decompresses it 

128 if needed and wrap it up. 

129 

130 Given the filename or file object of a log file or a gzipped, zipped, or bzipped 

131 log file, this function returns a file-like object. 

132 

133 Given a list of filenames, this function returns a FileInput object, 

134 which can be used for seamless iteration without concatenation. 

135 """ 

136 

137 # If there is a single string argument given. 

138 if type(filename) in [str, str]: 

139 

140 extension = os.path.splitext(filename)[1] 

141 

142 if extension == ".gz": 

143 fileobject = myGzipFile(filename, "r", fileobj=object) 

144 

145 elif extension == ".zip": 

146 zip = zipfile.ZipFile(object, "r") if object else zipfile.ZipFile(filename, "r") 

147 assert len(zip.namelist()) == 1, "ERROR: Zip file contains more than 1 file" 

148 fileobject = io.StringIO(zip.read(zip.namelist()[0]).decode("ascii", "ignore")) 

149 

150 elif extension in ['.bz', '.bz2']: 

151 # Module 'bz2' is not always importable. 

152 assert bz2 is not None, "ERROR: module bz2 cannot be imported" 

153 fileobject = myBZ2File(object, "r") if object else myBZ2File(filename, "r") 

154 

155 else: 

156 # Assuming that object is text file encoded in utf-8 

157 fileobject = io.StringIO(object.decode('utf-8')) if object \ 

158 else FileWrapper(io.open(filename, "r", errors='ignore')) 

159 

160 return fileobject 

161 

162 elif hasattr(filename, "__iter__"): 

163 

164 # This is needed, because fileinput will assume stdin when filename is empty. 

165 if len(filename) == 0: 

166 return None 

167 

168 return fileinput.input(filename, openhook=fileinput.hook_compressed) 

169 

170 

171class Logfile(ABC): 

172 """Abstract class for logfile objects. 

173 

174 Subclasses defined by cclib: 

175 ADF, DALTON, GAMESS, GAMESSUK, Gaussian, Jaguar, Molpro, MOPAC, 

176 NWChem, ORCA, Psi, Q-Chem 

177 """ 

178 

179 def __init__(self, source, loglevel=logging.ERROR, logname="Log", 

180 logstream=sys.stderr, datatype=ccData_optdone_bool, **kwds): 

181 """Initialise the Logfile object. 

182 

183 This should be called by a subclass in its own __init__ method. 

184 

185 Inputs: 

186 source - a logfile, list of logfiles, or stream with at least a read method 

187 loglevel - integer corresponding to a log level from the logging module 

188 logname - name of the source logfile passed to this constructor 

189 logstream - where to output the logging information 

190 datatype - class to use for gathering data attributes 

191 """ 

192 

193 # Set the filename to source if it is a string or a list of strings, which are 

194 # assumed to be filenames. Otherwise, assume the source is a file-like object 

195 # if it has a read method, and we will try to use it like a stream. 

196 self.isfileinput = False 

197 if isinstance(source, str): 

198 self.filename = source 

199 self.isstream = False 

200 elif isinstance(source, list) and all([isinstance(s, str) for s in source]): 

201 self.filename = source 

202 self.isstream = False 

203 elif isinstance(source, fileinput.FileInput): 

204 self.filename = source 

205 self.isstream = False 

206 self.isfileinput = True 

207 elif hasattr(source, "read"): 

208 self.filename = "stream %s" % str(type(source)) 

209 self.isstream = True 

210 self.stream = source 

211 else: 

212 raise ValueError("Unexpected source type.") 

213 

214 # Set up the logger. 

215 # Note that calling logging.getLogger() with one name always returns the same instance. 

216 # Presently in cclib, all parser instances of the same class use the same logger, 

217 # which means that care needs to be taken not to duplicate handlers. 

218 self.loglevel = loglevel 

219 self.logname = logname 

220 self.logger = logging.getLogger('%s %s' % (self.logname, self.filename)) 

221 self.logger.setLevel(self.loglevel) 

222 if len(self.logger.handlers) == 0: 

223 handler = logging.StreamHandler(logstream) 

224 handler.setFormatter(logging.Formatter("[%(name)s %(levelname)s] %(message)s")) 

225 self.logger.addHandler(handler) 

226 

227 # Set up the metadata. 

228 if not hasattr(self, "metadata"): 

229 self.metadata = {} 

230 self.metadata["package"] = self.logname 

231 self.metadata["methods"] = [] 

232 # Indicate if the computation has completed successfully 

233 self.metadata['success'] = False 

234 

235 

236 # Periodic table of elements. 

237 self.table = utils.PeriodicTable() 

238 

239 # This is the class that will be used in the data object returned by parse(), and should 

240 # normally be ccData or a subclass of it. 

241 self.datatype = datatype 

242 

243 # Change the class used if we want optdone to be a list or if the 'future' option 

244 # is used, which might have more consequences in the future. 

245 optdone_as_list = kwds.get("optdone_as_list", False) or kwds.get("future", False) 

246 optdone_as_list = optdone_as_list if isinstance(optdone_as_list, bool) else False 

247 if optdone_as_list: 

248 self.datatype = ccData 

249 # Parsing of Natural Orbitals and Natural Spin Orbtials into one attribute 

250 self.unified_no_nso = kwds.get("future",False) 

251 

252 def __setattr__(self, name, value): 

253 

254 # Send info to logger if the attribute is in the list of attributes. 

255 if name in ccData._attrlist and hasattr(self, "logger"): 

256 

257 # Call logger.info() only if the attribute is new. 

258 if not hasattr(self, name): 

259 if type(value) in [numpy.ndarray, list]: 

260 self.logger.info("Creating attribute %s[]" % name) 

261 else: 

262 self.logger.info("Creating attribute %s: %s" % (name, str(value))) 

263 

264 # Set the attribute. 

265 object.__setattr__(self, name, value) 

266 

267 def parse(self, progress=None, fupdate=0.05, cupdate=0.002): 

268 """Parse the logfile, using the assumed extract method of the child.""" 

269 

270 # Check that the sub-class has an extract attribute, 

271 # that is callable with the proper number of arguemnts. 

272 if not hasattr(self, "extract"): 

273 raise AttributeError("Class %s has no extract() method." % self.__class__.__name__) 

274 if not callable(self.extract): 

275 raise AttributeError("Method %s._extract not callable." % self.__class__.__name__) 

276 if len(inspect.getargspec(self.extract)[0]) != 3: 

277 raise AttributeError("Method %s._extract takes wrong number of arguments." % self.__class__.__name__) 

278 

279 # Save the current list of attributes to keep after parsing. 

280 # The dict of self should be the same after parsing. 

281 _nodelete = list(set(self.__dict__.keys())) 

282 

283 # Initiate the FileInput object for the input files. 

284 # Remember that self.filename can be a list of files. 

285 if not self.isstream: 

286 if not self.isfileinput: 

287 inputfile = openlogfile(self.filename) 

288 else: 

289 inputfile = self.filename 

290 else: 

291 inputfile = FileWrapper(self.stream) 

292 

293 # Intialize self.progress 

294 is_compressed = isinstance(inputfile, myGzipFile) or isinstance(inputfile, myBZ2File) 

295 if progress and not (is_compressed): 

296 self.progress = progress 

297 self.progress.initialize(inputfile.size) 

298 self.progress.step = 0 

299 self.fupdate = fupdate 

300 self.cupdate = cupdate 

301 

302 # Maybe the sub-class has something to do before parsing. 

303 self.before_parsing() 

304 

305 # Loop over lines in the file object and call extract(). 

306 # This is where the actual parsing is done. 

307 for line in inputfile: 

308 self.updateprogress(inputfile, "Unsupported information", cupdate) 

309 

310 # This call should check if the line begins a section of extracted data. 

311 # If it does, it parses some lines and sets the relevant attributes (to self). 

312 # Any attributes can be freely set and used across calls, however only those 

313 # in data._attrlist will be moved to final data object that is returned. 

314 try: 

315 self.extract(inputfile, line) 

316 except StopIteration: 

317 self.logger.error("Unexpectedly encountered end of logfile.") 

318 break 

319 except Exception as e: 

320 self.logger.error("Encountered error when parsing.") 

321 self.logger.error("Last line read: %s" % inputfile.last_line) 

322 raise 

323 

324 # Close input file object. 

325 if not self.isstream: 

326 inputfile.close() 

327 

328 # Maybe the sub-class has something to do after parsing. 

329 self.after_parsing() 

330 

331 # If atomcoords were not parsed, but some input coordinates were ("inputcoords"). 

332 # This is originally from the Gaussian parser, a regression fix. 

333 if not hasattr(self, "atomcoords") and hasattr(self, "inputcoords"): 

334 self.atomcoords = numpy.array(self.inputcoords, 'd') 

335 

336 # Set nmo if not set already - to nbasis. 

337 if not hasattr(self, "nmo") and hasattr(self, "nbasis"): 

338 self.nmo = self.nbasis 

339 

340 # Create a default coreelectrons array, unless it's impossible 

341 # to determine. 

342 if not hasattr(self, "coreelectrons") and hasattr(self, "natom"): 

343 self.coreelectrons = numpy.zeros(self.natom, "i") 

344 if hasattr(self, "incorrect_coreelectrons"): 

345 self.__delattr__("coreelectrons") 

346 

347 # Create the data object we want to return. This is normally ccData, but can be changed 

348 # by passing the datatype argument to the constructor. All supported cclib attributes 

349 # are copied to this object, but beware that in order to be moved an attribute must be 

350 # included in the data._attrlist of ccData (or whatever else). 

351 # There is the possibility of passing assitional argument via self.data_args, but 

352 # we use this sparingly in cases where we want to limit the API with options, etc. 

353 data = self.datatype(attributes=self.__dict__) 

354 

355 # Now make sure that the cclib attributes in the data object are all the correct type, 

356 # including arrays and lists of arrays. 

357 data.arrayify() 

358 

359 # Delete all temporary attributes (including cclib attributes). 

360 # All attributes should have been moved to a data object, which will be returned. 

361 for attr in list(self.__dict__.keys()): 

362 if not attr in _nodelete: 

363 self.__delattr__(attr) 

364 

365 # Perform final checks on values of attributes. 

366 data.check_values(logger=self.logger) 

367 

368 # Update self.progress as done. 

369 if hasattr(self, "progress"): 

370 self.progress.update(inputfile.size, "Done") 

371 

372 return data 

373 

374 def before_parsing(self): 

375 """Set parser-specific variables and do other initial things here.""" 

376 pass 

377 

378 def after_parsing(self): 

379 """Correct data or do parser-specific validation after parsing is finished.""" 

380 pass 

381 

382 def updateprogress(self, inputfile, msg, xupdate=0.05): 

383 """Update progress.""" 

384 

385 if hasattr(self, "progress") and random.random() < xupdate: 

386 newstep = inputfile.pos 

387 if newstep != self.progress.step: 

388 self.progress.update(newstep, msg) 

389 self.progress.step = newstep 

390 

391 @abstractmethod 

392 def normalisesym(self, symlabel): 

393 """Standardise the symmetry labels between parsers.""" 

394 

395 def new_internal_job(self): 

396 """Delete attributes that can be problematic in multistep jobs. 

397 

398 TODO: instead of this hack, parse each job in a multistep comptation 

399 as a different ccData object (this is for 2.x). 

400 

401 Some computations are actually sequences of several jobs, and some 

402 attributes won't work well if parsed across jobs. There include: 

403 mpenergies: if different jobs go to different orders then 

404 these won't be consistent and can't be converted 

405 to an array easily 

406 """ 

407 for name in ("mpenergies",): 

408 if hasattr(self, name): 

409 delattr(self, name) 

410 

411 def set_attribute(self, name, value, check_change=True): 

412 """Set an attribute and perform an optional check when it already exists. 

413 

414 Note that this can be used for scalars and lists alike, whenever we want 

415 to set a value for an attribute. 

416  

417 Parameters 

418 ---------- 

419 name: str 

420 The name of the attribute. 

421 value: str 

422 The value for the attribute. 

423 check_change: bool 

424 By default we want to check that the value does not change 

425 if the attribute already exists. 

426 """ 

427 if check_change and hasattr(self, name): 

428 try: 

429 numpy.testing.assert_equal(getattr(self, name), value) 

430 except AssertionError: 

431 self.logger.warning("Attribute %s changed value (%s -> %s)" % (name, getattr(self, name), value)) 

432 

433 setattr(self, name, value) 

434 

435 def append_attribute(self, name, value): 

436 """Appends a value to an attribute.""" 

437 

438 if not hasattr(self, name): 

439 self.set_attribute(name, []) 

440 getattr(self, name).append(value) 

441 

442 def extend_attribute(self, name, values): 

443 """Appends an iterable of values to an attribute.""" 

444 

445 if not hasattr(self, name): 

446 self.set_attribute(name, []) 

447 getattr(self, name).extend(values) 

448 

449 def _assign_coreelectrons_to_element(self, element, ncore, 

450 ncore_is_total_count=False): 

451 """Assign core electrons to all instances of the element. 

452 

453 It's usually reasonable to do this for all atoms of a given element, 

454 because mixed usage isn't normally allowed within elements. 

455 

456 Parameters 

457 ---------- 

458 element: str 

459 the chemical element to set coreelectrons for 

460 ncore: int 

461 the number of core electrons 

462 ncore_is_total_count: bool 

463 whether the ncore argument is the total count, in which case it is 

464 divided by the number of atoms of this element 

465 """ 

466 atomsymbols = [self.table.element[atomno] for atomno in self.atomnos] 

467 indices = [i for i, el in enumerate(atomsymbols) if el == element] 

468 if ncore_is_total_count: 

469 ncore = ncore // len(indices) 

470 

471 if not hasattr(self, 'coreelectrons'): 

472 self.coreelectrons = numpy.zeros(self.natom, 'i') 

473 self.coreelectrons[indices] = ncore 

474 

475 def skip_lines(self, inputfile, sequence): 

476 """Read trivial line types and check they are what they are supposed to be. 

477 

478 This function will read len(sequence) lines and do certain checks on them, 

479 when the elements of sequence have the appropriate values. Currently the 

480 following elements trigger checks: 

481 'blank' or 'b' - the line should be blank 

482 'dashes' or 'd' - the line should contain only dashes (or spaces) 

483 'equals' or 'e' - the line should contain only equal signs (or spaces) 

484 'stars' or 's' - the line should contain only stars (or spaces) 

485 """ 

486 

487 expected_characters = { 

488 '-': ['dashes', 'd'], 

489 '=': ['equals', 'e'], 

490 '*': ['stars', 's'], 

491 } 

492 

493 lines = [] 

494 for expected in sequence: 

495 

496 # Read the line we want to skip. 

497 line = next(inputfile) 

498 

499 # Blank lines are perhaps the most common thing we want to check for. 

500 if expected in ["blank", "b"]: 

501 try: 

502 assert line.strip() == "" 

503 except AssertionError: 

504 frame, fname, lno, funcname, funcline, index = inspect.getouterframes(inspect.currentframe())[1] 

505 parser = fname.split('/')[-1] 

506 msg = "In %s, line %i, line not blank as expected: %s" % (parser, lno, line.strip()) 

507 self.logger.warning(msg) 

508 

509 # All cases of heterogeneous lines can be dealt with by the same code. 

510 for character, keys in expected_characters.items(): 

511 if expected in keys: 

512 try: 

513 assert all([c == character for c in line.strip() if c != ' ']) 

514 except AssertionError: 

515 frame, fname, lno, funcname, funcline, index = inspect.getouterframes(inspect.currentframe())[1] 

516 parser = fname.split('/')[-1] 

517 msg = "In %s, line %i, line not all %s as expected: %s" % (parser, lno, keys[0], line.strip()) 

518 self.logger.warning(msg) 

519 continue 

520 

521 # Save the skipped line, and we will return the whole list. 

522 lines.append(line) 

523 

524 return lines 

525 

526 skip_line = lambda self, inputfile, expected: self.skip_lines(inputfile, [expected])