Skip to content

Commit

Permalink
PpCalculation: Make parsing of output files optional (aiidateam#1029)
Browse files Browse the repository at this point in the history
The `parse_data_files` option is added. When switched to `False` the
parser will not parse the outputs files but just keep the raw files. The
existing option `keep_plot_file` is deprecated in favor of the renamed
`keep_data_files` option to make it coherent with the new option.
  • Loading branch information
yakutovicha authored and bastonero committed Jan 6, 2025
1 parent 2c564e2 commit 70b38f6
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 47 deletions.
18 changes: 14 additions & 4 deletions src/aiida_quantumespresso/calculations/pp.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# -*- coding: utf-8 -*-
"""`CalcJob` implementation for the pp.x code of Quantum ESPRESSO."""
import os
import warnings

from aiida import orm
from aiida.common import datastructures, exceptions
from aiida.common.warnings import AiidaDeprecationWarning

from aiida_quantumespresso.calculations import _lowercase_dict, _uppercase_dict
from aiida_quantumespresso.utils.convert import convert_input_to_namelist_entry
Expand Down Expand Up @@ -82,7 +84,9 @@ def define(cls, spec):
spec.input('metadata.options.output_filename', valid_type=str, default=cls._DEFAULT_OUTPUT_FILE)
spec.input('metadata.options.parser_name', valid_type=str, default='quantumespresso.pp')
spec.input('metadata.options.withmpi', valid_type=bool, default=True)
spec.input('metadata.options.keep_plot_file', valid_type=bool, default=False)
spec.input('metadata.options.keep_plot_file', valid_type=bool, required=False)
spec.input('metadata.options.keep_data_files', valid_type=bool, default=False)
spec.input('metadata.options.parse_data_files', valid_type=bool, default=True)

spec.output('output_parameters', valid_type=orm.Dict)
spec.output('output_data', valid_type=orm.ArrayData)
Expand Down Expand Up @@ -218,10 +222,16 @@ def prepare_for_submission(self, folder): # pylint: disable=too-many-branches,t
# distinguish them from one another. The `fileout` filename will be the full data filename with the `fileout`
# value as a suffix.
retrieve_tuples = [self._FILEOUT, (f'{self._FILPLOT}_*{self._FILEOUT}', '.', 0)]

if self.inputs.metadata.options.keep_plot_file:
if 'keep_plot_file' in self.inputs.metadata.options:
self.inputs.metadata.options.keep_data_files = self.inputs.metadata.options.keep_plot_file
warnings.warn(
"The input parameter 'keep_plot_file' is deprecated and will be removed in version 5.0.0. "
"Please use 'keep_data_files' instead.", AiidaDeprecationWarning
)
if self.inputs.metadata.options.keep_data_files:
calcinfo.retrieve_list.extend(retrieve_tuples)
else:
# If we do not want to parse the retrieved files, temporary retrieval is meaningless
elif self.inputs.metadata.options.parse_data_files:
calcinfo.retrieve_temporary_list.extend(retrieve_tuples)

return calcinfo
58 changes: 29 additions & 29 deletions src/aiida_quantumespresso/parsers/pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,35 +117,35 @@ def get_key_from_filename(filename):
matches = re.search(pattern, filename)
return matches.group(1)

for filename in filenames:
# Directly parse the retrieved files after reading them to memory (`data_raw`). The raw data
# of each file is released from memory after parsing, to improve memory usage.
if filename.endswith(filename_suffix):
# Read the file to memory
try:
with file_opener(filename) as handle:
data_raw = handle.read()
except OSError:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_READ.format(filename=filename)
# Parse the file
try:
key = get_key_from_filename(filename)
data_parsed.append((key, parsers[iflag](data_raw, self.units_dict[parsed_data['plot_num']])))
del data_raw
except Exception as exception: # pylint: disable=broad-except
return self.exit_codes.ERROR_OUTPUT_DATAFILE_PARSE.format(filename=filename, exception=exception)

# If we don't have any parsed files, we exit. Note that this will not catch the case where there should be more
# than one file, but the engine did not retrieve all of them. Since often we anyway don't know how many files
# should be retrieved there really is no way to check this explicitly.
if not data_parsed:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_MISSING.format(filename=filename_prefix)

# Create output nodes
if len(data_parsed) == 1:
self.out('output_data', data_parsed[0][1])
else:
self.out('output_data_multiple', dict(data_parsed))
if self.node.base.attributes.get('parse_data_files'):
for filename in filenames:
# Directly parse the retrieved files after reading them to memory (`data_raw`). The raw data
# of each file is released from memory after parsing, to improve memory usage.
if filename.endswith(filename_suffix):
# Read the file to memory
try:
with file_opener(filename) as handle:
data_raw = handle.read()
except OSError:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_READ.format(filename=filename)
# Parse the file
try:
key = get_key_from_filename(filename)
data_parsed.append((key, parsers[iflag](data_raw, self.units_dict[parsed_data['plot_num']])))
del data_raw
except Exception as exception: # pylint: disable=broad-except
return self.exit_codes.ERROR_OUTPUT_DATAFILE_PARSE.format(filename=filename, exception=exception)

# If we don't have any parsed files, we exit. Note that this will not catch the case where there should be more
# than one file, but the engine did not retrieve all of them. Since often we anyway don't know how many files
# should be retrieved there really is no way to check this explicitly.
if not data_parsed:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_MISSING.format(filename=filename_prefix)

if len(data_parsed) == 1:
self.out('output_data', data_parsed[0][1])
else:
self.out('output_data_multiple', dict(data_parsed))

return self.exit(logs=logs)

Expand Down
24 changes: 22 additions & 2 deletions tests/calculations/test_pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ def test_pp_default(fixture_sandbox, generate_calc_job, generate_inputs, file_re
file_regression.check(input_written, encoding='utf-8', extension='.in')


def test_pp_keep_plot_file(fixture_sandbox, generate_calc_job, generate_inputs):
def test_pp_keep_data_files(fixture_sandbox, generate_calc_job, generate_inputs):
"""Test a `PpCalculation` where we want to retrieve the plot file."""
entry_point_name = 'quantumespresso.pp'
inputs = generate_inputs()
inputs.metadata.options.keep_plot_file = True
inputs.metadata.options.keep_data_files = True

calc_info = generate_calc_job(fixture_sandbox, entry_point_name, inputs)
retrieve_list = ['aiida.out', 'aiida.fileout', ('aiida.filplot_*aiida.fileout', '.', 0)]
Expand All @@ -80,6 +80,26 @@ def test_pp_keep_plot_file(fixture_sandbox, generate_calc_job, generate_inputs):
assert element in calc_info.retrieve_list


def test_pp_parse_data_files(fixture_sandbox, generate_calc_job, generate_inputs):
"""Test a `PpCalculation` where we want to retrieve the plot file."""
entry_point_name = 'quantumespresso.pp'
inputs = generate_inputs()
inputs.metadata.options.parse_data_files = False

calc_info = generate_calc_job(fixture_sandbox, entry_point_name, inputs)
retrieve_list = ['aiida.out']
retrieve_temporary_list = []
local_copy_list = []

# When both `keep_data_files` (default) and `parse_data_files` are set to False, the data files won't be pulled.
assert isinstance(calc_info, datastructures.CalcInfo)
assert sorted(calc_info.local_copy_list) == sorted(local_copy_list)
assert sorted(calc_info.retrieve_temporary_list) == sorted(retrieve_temporary_list)
assert len(calc_info.retrieve_list) == 1
for element in retrieve_list:
assert element in calc_info.retrieve_list


def test_pp_cmdline_setting(fixture_sandbox, generate_calc_job, generate_inputs):
"""Test a `PpCalculation` with user-defined cmdline settings."""
entry_point_name = 'quantumespresso.pp'
Expand Down
75 changes: 63 additions & 12 deletions tests/parsers/test_pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,11 @@ def test_pp_default_1d(
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'

node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_1d', generate_inputs_1d)
attributes = {'keep_data_files': False, 'parse_data_files': True}

node = generate_calc_job_node(
entry_point_calc_job, fixture_localhost, 'default_1d', generate_inputs_1d, attributes=attributes
)
parser = generate_parser(entry_point_parser)
results, calcfunction = parser.parse_from_node(node, store_provenance=False)

Expand Down Expand Up @@ -157,9 +161,13 @@ def test_pp_default_1d_spherical(
"""Test a default `pp.x` calculation producing a 1D data set with spherical averaging."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'

attributes = {'keep_data_files': False, 'parse_data_files': True}
node = generate_calc_job_node(
entry_point_calc_job, fixture_localhost, 'default_1d_spherical', generate_inputs_1d_spherical
entry_point_calc_job,
fixture_localhost,
'default_1d_spherical',
generate_inputs_1d_spherical,
attributes=attributes
)
parser = generate_parser(entry_point_parser)
results, calcfunction = parser.parse_from_node(node, store_provenance=False)
Expand Down Expand Up @@ -200,8 +208,11 @@ def test_pp_default_2d(
"""Test a default `pp.x` calculation producing a 2D data set."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'
attributes = {'keep_data_files': False, 'parse_data_files': True}

node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_2d', generate_inputs_2d)
node = generate_calc_job_node(
entry_point_calc_job, fixture_localhost, 'default_2d', generate_inputs_2d, attributes=attributes
)
parser = generate_parser(entry_point_parser)
results, calcfunction = parser.parse_from_node(node, store_provenance=False)

Expand Down Expand Up @@ -237,8 +248,11 @@ def test_pp_default_polar(
"""Test a default `pp.x` calculation producing a polar coordinates data set."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'
attributes = {'keep_data_files': False, 'parse_data_files': True}

node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_polar', generate_inputs_polar)
node = generate_calc_job_node(
entry_point_calc_job, fixture_localhost, 'default_polar', generate_inputs_polar, attributes=attributes
)
parser = generate_parser(entry_point_parser)
results, calcfunction = parser.parse_from_node(node, store_provenance=False)

Expand Down Expand Up @@ -267,8 +281,11 @@ def test_pp_default_3d(
"""Test a default `pp.x` calculation producing a 3D data set."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'
attributes = {'keep_data_files': False, 'parse_data_files': True}

node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_3d', generate_inputs_3d)
node = generate_calc_job_node(
entry_point_calc_job, fixture_localhost, 'default_3d', generate_inputs_3d, attributes=attributes
)
parser = generate_parser(entry_point_parser)
results, calcfunction = parser.parse_from_node(node, store_provenance=False)

Expand Down Expand Up @@ -297,12 +314,16 @@ def test_pp_default_3d(
})


def test_pp_default_3d_keep_plot_file(generate_calc_job_node, generate_parser, generate_inputs_3d, tmpdir):
"""Test a `pp.x` calculation where `keep_plot_file=False` meaning files will be parsed from temporary directory."""
def test_pp_default_3d_keep_data_files(generate_calc_job_node, generate_parser, generate_inputs_3d, tmpdir):
"""Test a `pp.x` calculation where `keep_data_files=False` meaning files will be parsed from temporary directory."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'

attributes = {'options': {'keep_plot_file': False}, 'retrieve_temporary_list': ['aiida.fileout']}
attributes = {
'keep_data_files': False,
'parse_data_files': True,
'retrieve_temporary_list': ['aiida.fileout'],
}
node = generate_calc_job_node(
entry_point_calc_job,
test_name='default_3d',
Expand All @@ -320,12 +341,36 @@ def test_pp_default_3d_keep_plot_file(generate_calc_job_node, generate_parser, g
assert len(results['output_data'].get_arraynames()) == 4


def test_pp_default_3d_parse_data_files(generate_calc_job_node, generate_parser, generate_inputs_3d, tmpdir):
"""Test a `pp.x` calculation where `parse_data_files=False`, so data files won't be parsed."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'

attributes = {'keep_data_files': False, 'parse_data_files': False}
node = generate_calc_job_node(
entry_point_calc_job,
test_name='default_3d',
inputs=generate_inputs_3d,
attributes=attributes,
)
parser = generate_parser(entry_point_parser)
results, calcfunction = parser.parse_from_node(node, store_provenance=False, retrieved_temporary_folder=tmpdir)

assert calcfunction.is_finished, calcfunction.exception
assert calcfunction.is_finished_ok, calcfunction.exit_message
assert 'output_parameters' in results
assert 'output_data' not in results


def test_pp_default_3d_multiple(generate_calc_job_node, generate_parser, generate_inputs_3d):
"""Test a default `pp.x` calculation producing multiple files in 3D format."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'
attributes = {'keep_data_files': False, 'parse_data_files': True}

node = generate_calc_job_node(entry_point_calc_job, test_name='default_3d_multiple', inputs=generate_inputs_3d)
node = generate_calc_job_node(
entry_point_calc_job, test_name='default_3d_multiple', inputs=generate_inputs_3d, attributes=attributes
)
parser = generate_parser(entry_point_parser)
results, calcfunction = parser.parse_from_node(node, store_provenance=False)

Expand Down Expand Up @@ -364,9 +409,14 @@ def test_pp_default_3d_failed_missing_data(
"""Test a default `pp.x` calculation where the aiida.fileout file is missing."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'
attributes = {'keep_data_files': False, 'parse_data_files': True}

node = generate_calc_job_node(
entry_point_calc_job, fixture_localhost, 'default_3d_failed_missing_data', generate_inputs_3d
entry_point_calc_job,
fixture_localhost,
'default_3d_failed_missing_data',
generate_inputs_3d,
attributes=attributes
)
parser = generate_parser(entry_point_parser)
_, calcfunction = parser.parse_from_node(node, store_provenance=False)
Expand Down Expand Up @@ -398,9 +448,10 @@ def test_pp_default_3d_failed_format(fixture_localhost, generate_calc_job_node,
"""Test a default `pp.x` calculation where an unsupported output file format is used."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'
attributes = {'keep_data_files': False, 'parse_data_files': True}

node = generate_calc_job_node(
entry_point_calc_job, fixture_localhost, 'default_3d_failed_format', generate_inputs_3d
entry_point_calc_job, fixture_localhost, 'default_3d_failed_format', generate_inputs_3d, attributes=attributes
)
parser = generate_parser(entry_point_parser)
_, calcfunction = parser.parse_from_node(node, store_provenance=False)
Expand Down

0 comments on commit 70b38f6

Please sign in to comment.