[med-svn] [Git][med-team/qiime][upstream] New upstream version 2024.5.0
Andreas Tille (@tille)
gitlab at salsa.debian.org
Thu Jun 6 14:45:06 BST 2024
Andreas Tille pushed to branch upstream at Debian Med / qiime
Commits:
f438cdd7 by Andreas Tille at 2024-06-06T15:33:01+02:00
New upstream version 2024.5.0
- - - - -
18 changed files:
- qiime2/_version.py
- qiime2/core/archive/provenance_lib/replay.py
- qiime2/core/archive/provenance_lib/tests/test_checksum_validator.py
- qiime2/core/archive/provenance_lib/tests/test_replay.py
- qiime2/core/cache.py
- qiime2/core/testing/plugin.py
- qiime2/core/type/parse.py
- qiime2/core/type/signature.py
- qiime2/core/type/tests/test_parse.py
- qiime2/metadata/io.py
- qiime2/metadata/metadata.py
- qiime2/plugin/testing.py
- qiime2/plugin/tests/test_tests.py
- qiime2/sdk/action.py
- qiime2/sdk/result.py
- + qiime2/sdk/tests/data/singleint.txt
- qiime2/sdk/tests/test_action.py
- qiime2/sdk/tests/test_pipeline.py
Changes:
=====================================
qiime2/_version.py
=====================================
@@ -23,9 +23,9 @@ def get_keywords():
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
- git_refnames = " (tag: 2024.2.0, Release-2024.2)"
- git_full = "512d740cc815b10c3045014a20059bc2e85affe0"
- git_date = "2024-02-16 21:49:30 +0000"
+ git_refnames = " (tag: 2024.5.0, Release-2024.5)"
+ git_full = "981462dc70891c067625d8a24f1c185eeb32ef0f"
+ git_date = "2024-05-29 04:20:00 +0000"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
=====================================
qiime2/core/archive/provenance_lib/replay.py
=====================================
@@ -14,9 +14,8 @@ import pkg_resources
import shutil
import tempfile
from uuid import uuid4
-from collections import UserDict
from dataclasses import dataclass, field
-from typing import Dict, Iterator, List, Optional, Set, Tuple, Union
+from typing import Dict, Iterator, List, Optional, Tuple, Union
from .archive_parser import ProvNode
from .parse import ProvDAG
@@ -90,152 +89,354 @@ class ActionCollections():
no_provenance_nodes: List[str] = field(default_factory=list)
-class UsageVarsDict(UserDict):
+ at dataclass
+class UsageVariableRecord:
+ name: str
+ variable: UsageVariable = None
+
+
+ at dataclass
+class ResultCollectionRecord:
+ collection_uuid: str
+ members: Dict[str, str]
+
+
+class ReplayNamespaces:
'''
- Mapping of uuid -> unique variable name.
-
- A dict where values are also unique. Used here as a UUID-queryable
- "namespace" of strings that can be passed to usage drivers for rendering
- into unique variable names. Non-unique values cause namespace collisions.
-
- For consistency and simplicity, all str values are suffixed with _n when
- added, such that n is some int. When potentially colliding values are
- added, n is incremented as needed until collision is avoided.
- UsageVarsDicts mutate ALL str values they receive.
-
- Best practice is generally to add the UUID: variable-name pair to this,
- create the usage variable using the stored name,
- then store the usage variable in a separate {UUID: UsageVar}. This
- ensures that UsageVariable.name is unique, preventing namespace collisions.
- NamespaceCollections (below) exist to group these related structures.
-
- Note: it's not necessary (and may break the mechanism of uniqueness here)
- to maintain parity between variable names in this namespace and in the
- usage variable store. The keys in both stores, however, must match.
+ A dataclass collection of objects that each track some useful bit of
+ information relevant to replay/usage namespaces.
+
+ Attributes
+ ----------
+ _usg_var_ns : dict
+ The central usage variable namespace that ensures no namespace clashes.
+ Maps artifact uuid to `UsageVariableRecord`.
+ _action_ns : set of str
+ A collection of unique action strings that look like
+ `{plugin}_{action}_{sequential int}`.
+ result_collection_ns : dict
+ Used to keep track of result collection members during usage rendering.
+ Structure is as follows:
+
+ {
+ action-id: {
+ output-name: {
+ 'collection_uuid': uuid,
+ 'artifacts': {
+ uuid: key-in-collection,
+ (...),
+ }
+ },
+ (...),
+ }
+ }
+
+ where the action-id and the output-name uniquely identify a result
+ collection that came from some action, the `collection_uuid` key stores
+ a uuid for the entire collection needed for querying the
+ `usg_var_namespace`, and the `artifacts` key stores all result
+ collection members along with their keys so they can be accessed
+ properly.
'''
- def __setitem__(self, key: str, item: str) -> None:
- unique_item = self._uniquify(item)
- return super().__setitem__(key, unique_item)
+ def __init__(self, dag=None):
+ self._usg_var_ns = {}
+ self._action_ns = set()
+ if dag:
+ self.result_collection_ns = \
+ self.make_result_collection_namespace(dag)
+ self.artifact_uuid_to_rc_uuid, self.rc_contents_to_rc_uuid = \
+ self.make_result_collection_mappings()
+ else:
+ self.result_collection_ns = {}
+ self.artifact_uuid_to_rc_uuid = {}
+ self.rc_contents_to_rc_uuid = {}
- def _uniquify(self, var_name: str) -> str:
+ def add_usg_var_record(self, uuid, name, variable=None):
'''
- Appends _<some int> to var_name, such that the returned name won't
- collide with any variable-name values that already exist in the dict.
+ Given a uuid, name, and optionally a usage variable, create a usage
+ variable record and add it to the namespace.
Parameters
----------
- var_name : str
- The variable name to make unique.
+ uuid : str
+ The uuid of the the artifact or result collection.
+ name : str
+ The not-yet-unique name of the artifact or result collection.
+ variable : UsageVariable or None
+ The optional UsageVariable instance to add to the record.
Returns
-------
str
- The unique integer-appended variable name.
+ The now-unique name of the artifact or result collection.
'''
- some_int = 0
- unique_name = f'{var_name}_{some_int}'
- values = self.data.values()
-
- # no-prov nodes are stored with angle brackets around them, but
- # those brackets shouldn't be considered on uniqueness check
- while unique_name in values or f'<{unique_name}>' in values:
- some_int += 1
- unique_name = f"{var_name}_{some_int}"
+ unique_name = self._make_unique_name(name)
+
+ self._usg_var_ns[uuid] = UsageVariableRecord(unique_name, variable)
+
return unique_name
- def get_key(self, value: str):
+ def update_usg_var_record(self, uuid, variable):
+ '''
+ Given a uuid update the record to contain the passed usage variable.
+ The record is assumed to already be present in the namespace.
+
+ Parameters
+ ----------
+ uuid : str
+ The uuid of the artifact or result collection for which to update
+ the usage variable instance.
+ variable : UsageVariable
+ The usage variable to add to the record.
+ '''
+ self._usg_var_ns[uuid].variable = variable
+
+ def get_usg_var_record(self, uuid):
+ '''
+ Given a uuid, return the corresponding usage variable record, or none
+ if the uuid is not in the namespace.
+
+ Parameters
+ ----------
+ uuid : str
+ The uuid of the artifact or result collection for which to return
+ the record.
+
+ Returns
+ -------
+ UsageVariableRecord or None
+ The record if the uuid was found, otherwise None.
+ '''
+ try:
+ return self._usg_var_ns[uuid]
+ except KeyError:
+ return None
+
+ def get_usg_var_uuid(self, name: str) -> str:
'''
- Given some value in the dict, returns its key.
+ Given a usage variable name, return its uuid, or raise KeyError if the
+ name is not in the namespace.
Parameters
----------
- value : str
- The value to query.
+ name : str
+ The name of the usage variable record of interest.
Returns
-------
str
- The key (uuid) corresponding to the value (variable name).
+ The corresponding uuid of the record.
Raises
------
KeyError
- If the value is not found.
+ If the name is not found in the namespace.
'''
- for key, val in self.items():
- if value == val:
- return key
- raise KeyError(f'passed value \'{value}\' does not exist in the dict.')
+ for uuid, record in self._usg_var_ns.items():
+ if name == record.name:
+ return uuid
+
+ raise KeyError(
+ f'The queried name \'{name}\' does not exist in the namespace.'
+ )
- def wrap_val_in_angle_brackets(self, key: str):
+ def _make_unique_name(self, name: str) -> str:
'''
- Wraps the variable name pointed to by `key` in brackets `<>`.
+ Appends `_<some int>` to name, such that the returned name won't
+ collide with any variable names that already exist in `usg_var_ns`.
Parameters
----------
- key : str
- The key (uuid) of the variable name to wrap in brackets.
+ name : str
+ The variable name to make unique.
- Notes
- -----
- If this is accidentally run twice, it will break things.
+ Returns
+ -------
+ str
+ The unique integer-appended variable name.
'''
- super().__setitem__(key, f'<{self.data[key]}>')
+ counter = 0
+ unique_name = f'{name}_{counter}'
+ names = [record.name for record in self._usg_var_ns.values()]
+ # no-provenance nodes are stored with angle brackets around them
+ while unique_name in names or f'<{unique_name}>' in names:
+ counter += 1
+ unique_name = f'{name}_{counter}'
- at dataclass
-class NamespaceCollections:
- '''
- A dataclass collection of objects that each track some useful bit of
- information relevant to usage namespaces.
+ return unique_name
- Attributes
- ----------
- usg_var_namespace : UsageVarsDict
- A uuid -> variable-name mapping that ensures that variable names remain
- unique in the dictionary.
- usg_vars : dict
- A uuid -> UsageVariable mapping. The names of the UsageVariables here
- do not necessarily match those in `usg_var_namespace`.
- action_namespace : set of str
- A collection of unique action strings that look like
- `{plugin}_{action}_{sequential int}`.
- result_collection_ns : dict
- Used to keep track of result collection members during usage rendering.
- Structure is as follows:
+ def make_result_collection_namespace(self, dag: nx.digraph) -> dict:
+ '''
+ Constructs the result collections namespaces from the parsed digraph.
- {
- action-id: {
- output-name: {
- 'collection_uuid': uuid,
- 'artifacts': {
- uuid: key-in-collection,
- (...),
- }
- },
- (...),
+ Parameters
+ ----------
+ dag : nx.digraph
+ The digraph representing the parsed provenance.
+
+ Returns
+ -------
+ dict
+ The result collection namespace.
+ '''
+ rc_ns = {}
+ for node in dag:
+ provnode = dag.get_node_data(node)
+ rc_key = provnode.action.result_collection_key
+ if rc_key:
+ # output result collection
+ action_id = provnode.action.action_id
+ output_name = provnode.action.output_name
+ if action_id not in rc_ns:
+ rc_ns[action_id] = {}
+ if output_name not in rc_ns[action_id]:
+ artifacts = {rc_key: provnode._uuid}
+ rc_ns[action_id][output_name] = ResultCollectionRecord(
+ collection_uuid=str(uuid4()), members=artifacts
+ )
+ else:
+ rc_ns[action_id][output_name].members[rc_key] = \
+ provnode._uuid
+
+ return rc_ns
+
+ def make_result_collection_mappings(self) -> Tuple[Dict]:
+ '''
+ Builds two mappings:
+ - one from artifact uuid to a tuple of the uuid of the result
+ collection of which it is a member and its key in the collection
+ - one from the hash of the result collection contents (both with
+ and without keys) to the uuid of the result collection
+
+ Returns
+ -------
+ tuple of dict
+ The two result collection mappings.
+ '''
+ a_to_c = {} # artifact uuid -> collection uuid
+ c_to_c = {} # hash of collection contents -> collection uuid
+ for action_id in self.result_collection_ns:
+ for output_name in self.result_collection_ns[action_id]:
+ record = self.result_collection_ns[action_id][output_name]
+ for key, uuid in record.members.items():
+ a_to_c[uuid] = (record.collection_uuid, key)
+
+ hashed_contents = self.hash_result_collection(record.members)
+ hashed_contents_with_keys = \
+ self.hash_result_collection_with_keys(record.members)
+
+ c_to_c[hashed_contents] = record.collection_uuid
+ c_to_c[hashed_contents_with_keys] = record.collection_uuid
+
+ return a_to_c, c_to_c
+
+ def hash_result_collection_with_keys(self, members: Dict) -> int:
+ '''
+ Hashes the contents of a result collection. Useful for finding
+ corresponding usage variables when rendering the replay of result
+ collections. Order of the input result collection is not taken into
+ account (the result collections are ordered alphabetically by key).
+
+ Parameters
+ ----------
+ members : dict
+ The contents of a result collection, looks like:
+
+ {
+ 'a': some-uuid,
+ 'b': some-other-uuid,
+ (...)
}
- }
- where the action-id and the output-name uniquely identify a result
- collection that came from some action, the `collection_uuid` key stores
- a uuid for the entire collection needed for querying the
- `usg_var_namespace`, and the `artifacts` key stores all result
- collection members along with their keys so they can be accessed
- properly.
- '''
- usg_var_namespace: UsageVarsDict = field(default_factory=UsageVarsDict)
- usg_vars: Dict[str, UsageVariable] = field(default_factory=dict)
- action_namespace: Set[str] = field(default_factory=set)
- result_collection_ns: Dict = field(default_factory=dict)
- rc_contents_to_rc_uuid: Dict = field(default_factory=dict)
- artifact_uuid_to_rc_uuid: Dict = field(default_factory=dict)
+ Returns
+ -------
+ int
+ The hashed contents.
+ '''
+ sorted_members = {key: members[key] for key in sorted(members)}
+ hashable_members_with_keys = tuple(
+ (key, value) for key, value in sorted_members.items()
+ )
+ return hash(hashable_members_with_keys)
- at dataclass
-class ResultCollectionRecord:
- collection_uuid: str
- members: Dict[str, str]
+ def hash_result_collection(self, members: Union[Dict, List]) -> int:
+ '''
+ Hashes a list of uuids. Useful for finding corresponding result
+ collections that may have been cast to list of uuids. If a dict is
+ input it is first converted to a list of values (uuids).
+
+ Parameters
+ ----------
+ members : dict or list
+ The contents of a result collection, either as a dict or list.
+
+ Returns
+ -------
+ int
+ The hashed contents.
+ '''
+ if type(members) is dict:
+ members = list(members.values())
+
+ sorted_members = list(sorted(members))
+ hashable_members = tuple(uuid for uuid in sorted_members)
+
+ return hash(hashable_members)
+
+ def add_rc_member_to_ns(self, uuid, name, use):
+ '''
+ Accesses a result collection member of interest and adds it the
+ central usage variable namespace.
+
+ Parameters
+ ----------
+ uuid : str
+ The uuid of the artifact of interest.
+ name : str
+ The desired name of the to-be-made usage variable.
+ use : Usage
+ The currently executing usage driver.
+ '''
+ collection_uuid, key = self.artifact_uuid_to_rc_uuid[uuid]
+ collection_var = self.get_usg_var_record(collection_uuid).variable
+
+ var_name = self.add_usg_var_record(uuid, name)
+
+ usg_var = use.get_artifact_collection_member(
+ var_name, collection_var, key
+ )
+
+ self.update_usg_var_record(uuid, usg_var)
+
+ def uniquify_action_name(self, plugin: str, action: str) -> str:
+ '''
+ Creates a unique name by concatenating plugin, action, and a counter,
+ and adds this name to _action_ns before returning it.
+
+ Parameters
+ ----------
+ plugin : str
+ The name of the plugin.
+ action : str
+ The name of the action.
+ action_namespace : set of str
+ The collection of unqiue action names.
+
+ Returns
+ -------
+ str
+ The unique action name.
+ '''
+ counter = 0
+ plg_action_name = f'{plugin}_{action}_{counter}'
+ while plg_action_name in self._action_ns:
+ counter += 1
+ plg_action_name = f'{plugin}_{action}_{counter}'
+ self._action_ns.add(plg_action_name)
+
+ return plg_action_name
def replay_provenance(
@@ -326,14 +527,7 @@ def replay_provenance(
verbose=verbose, md_out_dir=md_out_dir
)
- result_collection_ns = make_result_collection_namespace(dag)
- artifact_uuid_to_rc_uuid, rc_contents_to_rc_uuid = \
- make_result_collection_mappings(result_collection_ns)
- ns = NamespaceCollections(
- result_collection_ns=result_collection_ns,
- artifact_uuid_to_rc_uuid=artifact_uuid_to_rc_uuid,
- rc_contents_to_rc_uuid=rc_contents_to_rc_uuid
- )
+ ns = ReplayNamespaces(dag)
build_usage_examples(dag, cfg, ns)
if not suppress_header:
@@ -348,122 +542,50 @@ def replay_provenance(
out_fh.write(output)
-def make_result_collection_namespace(dag: nx.digraph) -> dict:
- '''
- Constructs the result collections namespaces from the parsed digraph.
-
- Parameters
- ----------
- dag : nx.digraph
- The digraph representing the parsed provenance.
-
- Returns
- -------
- dict
- A fleshed-out dict to be attached to
- `NamsepaceCollections.result_collection_ns`.
- '''
- rc_ns = {}
- for node in dag:
- provnode = dag.get_node_data(node)
- rc_key = provnode.action.result_collection_key
- if rc_key:
- # output result collection
- action_id = provnode.action.action_id
- output_name = provnode.action.output_name
- if action_id not in rc_ns:
- rc_ns[action_id] = {}
- if output_name not in rc_ns[action_id]:
- artifacts = {rc_key: provnode._uuid}
- rc_ns[action_id][output_name] = ResultCollectionRecord(
- collection_uuid=str(uuid4()), members=artifacts
- )
- else:
- rc_ns[action_id][output_name].members[rc_key] = provnode._uuid
-
- # TODO: maybe handle input result collections here
- return rc_ns
-
-
-def make_result_collection_mappings(result_collection_ns: Dict) -> Tuple[Dict]:
- '''
- '''
- a_to_c = {} # artifact uuid -> collection uuid
- c_to_c = {} # hash of collection contents -> collection uuids
- for action_id in result_collection_ns:
- for output_name in result_collection_ns[action_id]:
- record = result_collection_ns[action_id][output_name]
- for key, uuid in record.members.items():
- a_to_c[uuid] = (record.collection_uuid, key)
-
- hashed_contents = hash_result_collection(record.members)
- hashed_contents_with_keys = \
- hash_result_collection_with_keys(record.members)
-
- c_to_c[hashed_contents] = record.collection_uuid
- c_to_c[hashed_contents_with_keys] = record.collection_uuid
-
- return a_to_c, c_to_c
-
-
-def hash_result_collection_with_keys(members: Dict) -> int:
+def build_usage_examples(
+ dag: ProvDAG, cfg: ReplayConfig, ns: ReplayNamespaces
+):
'''
- Hashes the contents of a result collection. Useful for finding
- corresponding usage variables when rendering the replay of result
- collections. Order of the input result collection is not taken into
- account (the result collections are ordered alphabetically by key).
+ Builds a chained usage example representing the analysis `dag`.
Parameters
----------
- members : dict
- The contents of a result collection, looks like:
-
- {
- 'a': some-uuid,
- 'b': some-other-uuid,
- (...)
- }
-
- Returns
- -------
- int
- The hashed contents.
- '''
- sorted_members = {key: members[key] for key in sorted(members)}
- hashable_members_with_keys = tuple(
- (key, value) for key, value in sorted_members.items()
- )
-
- return hash(hashable_members_with_keys)
-
-
-def hash_result_collection(members: Union[Dict, List]) -> int:
+ dag : ProvDAG
+ The dag representation of parsed provenance.
+ cfg : ReplayConfig
+ Replay configuration options.
+ ns : ReplayNamespaces
+ Info tracking usage and result collection namespaces.
'''
- Hashes a list of uuids. Useful for finding corresponding result collections
- that may have been cast to list of uuids. If a dict is input it is first
- converted to a list of values (uuids).
-
- Parameters
- ----------
- members : dict or list
- The contents of a result collection, either as a dict or list.
+ sorted_nodes = nx.topological_sort(dag.collapsed_view)
+ actions = group_by_action(dag, sorted_nodes, ns)
- Returns
- -------
- int
- The hashed contents.
- '''
- if type(members) is dict:
- members = list(members.values())
+ for node_id in actions.no_provenance_nodes:
+ node = dag.get_node_data(node_id)
+ build_no_provenance_node_usage(node, node_id, ns, cfg)
- sorted_members = list(sorted(members))
- hashable_members = tuple(uuid for uuid in sorted_members)
+ for action_id in (std_actions := actions.std_actions):
+ # we are replaying actions not nodes, so any associated node works
+ try:
+ some_node_id = next(iter(std_actions[action_id]))
+ node = dag.get_node_data(some_node_id)
+ except KeyError:
+ # we have result collection
+ some_output_name = next(iter(ns.result_collection_ns[action_id]))
+ some_node_id = next(iter(
+ ns.result_collection_ns[action_id][
+ some_output_name].members.values()
+ ))
+ node = dag.get_node_data(some_node_id)
- return hash(hashable_members)
+ if node.action.action_type == 'import':
+ build_import_usage(node, ns, cfg)
+ else:
+ build_action_usage(node, ns, std_actions, action_id, cfg)
def group_by_action(
- dag: ProvDAG, nodes: Iterator[str], ns: NamespaceCollections
+ dag: ProvDAG, nodes: Iterator[str], ns: ReplayNamespaces
) -> ActionCollections:
'''
This groups the nodes from a DAG by action, returning an ActionCollections
@@ -482,7 +604,7 @@ def group_by_action(
The dag representation of parsed provenance.
nodes : iterator of str
An iterator over node uuids.
- ns : NamespaceCollections
+ ns : ReplayNamespaces
Info tracking usage and result collection namespaces.
Returns
@@ -514,52 +636,10 @@ def group_by_action(
return actions
-def build_usage_examples(
- dag: ProvDAG, cfg: ReplayConfig, ns: NamespaceCollections
-):
- '''
- Builds a chained usage example representing the analysis `dag`.
-
- Parameters
- ----------
- dag : ProvDAG
- The dag representation of parsed provenance.
- cfg : ReplayConfig
- Replay configuration options.
- ns : NamespaceCollections
- Info tracking usage and result collection namespaces.
- '''
- sorted_nodes = nx.topological_sort(dag.collapsed_view)
- actions = group_by_action(dag, sorted_nodes, ns)
-
- for node_id in actions.no_provenance_nodes:
- node = dag.get_node_data(node_id)
- build_no_provenance_node_usage(node, node_id, ns, cfg)
-
- for action_id in (std_actions := actions.std_actions):
- # we are replaying actions not nodes, so any associated node works
- try:
- some_node_id = next(iter(std_actions[action_id]))
- node = dag.get_node_data(some_node_id)
- except KeyError:
- # we have result collection
- some_output_name = next(iter(ns.result_collection_ns[action_id]))
- some_node_id = next(iter(
- ns.result_collection_ns[action_id][
- some_output_name].members.values()
- ))
- node = dag.get_node_data(some_node_id)
-
- if node.action.action_type == 'import':
- build_import_usage(node, ns, cfg)
- else:
- build_action_usage(node, ns, std_actions, action_id, cfg)
-
-
def build_no_provenance_node_usage(
node: Optional[ProvNode],
uuid: str,
- ns: NamespaceCollections,
+ ns: ReplayNamespaces,
cfg: ReplayConfig
):
'''
@@ -574,7 +654,7 @@ def build_no_provenance_node_usage(
is available.
uuid : str
The uuid of the node/result.
- ns : NamespaceCollections
+ ns : ReplayNamespaces
Info tracking usage and result collection namespaces.
cfg : ReplayConfig
Replay configuration options. Contains the modified usage driver.
@@ -595,21 +675,22 @@ def build_no_provenance_node_usage(
var_name = 'no-provenance-node'
else:
var_name = camel_to_snake(node.type)
- ns.usg_var_namespace.update({uuid: var_name})
- ns.usg_var_namespace.wrap_val_in_angle_brackets(uuid)
+
+ ns.add_usg_var_record(uuid, var_name)
# make a usage variable for downstream consumption
empty_var = cfg.use.usage_variable(
- ns.usg_var_namespace[uuid], lambda: None, 'artifact'
+ ns.get_usg_var_record(uuid).name, lambda: None, 'artifact'
)
- ns.usg_vars.update({uuid: empty_var})
+ ns.update_usg_var_record(uuid, empty_var)
# log the no-prov node
- cfg.use.comment(f"{uuid} {ns.usg_vars[uuid].to_interface_name()}")
+ usg_var = ns.get_usg_var_record(uuid).variable
+ cfg.use.comment(f"{uuid} {usg_var.to_interface_name()}")
def build_import_usage(
- node: ProvNode, ns: NamespaceCollections, cfg: ReplayConfig
+ node: ProvNode, ns: ReplayNamespaces, cfg: ReplayConfig
):
'''
Given a ProvNode, adds an import usage example for it, roughly
@@ -629,27 +710,29 @@ def build_import_usage(
----------
node : ProvNode
The imported node of interest.
- ns : NamespaceCollections
+ ns : ReplayNamespaces
Info tracking usage and result collection namespaces.
cfg : ReplayConfig
Replay configuration options. Contains the modified usage driver.
'''
format_id = node._uuid + '_f'
- ns.usg_var_namespace.update({format_id: camel_to_snake(node.type) + '_f'})
+ ns.add_usg_var_record(format_id, camel_to_snake(node.type) + '_f')
+
format_for_import = cfg.use.init_format(
- ns.usg_var_namespace[format_id], lambda: None
+ ns.get_usg_var_record(format_id).name, lambda: None
)
- ns.usg_var_namespace.update({node._uuid: camel_to_snake(node.type)})
+ var_name = ns.add_usg_var_record(node._uuid, camel_to_snake(node.type))
+
use_var = cfg.use.import_from_format(
- ns.usg_var_namespace[node._uuid], node.type, format_for_import
+ var_name, node.type, format_for_import
)
- ns.usg_vars.update({node._uuid: use_var})
+ ns.update_usg_var_record(node._uuid, use_var)
def build_action_usage(
node: ProvNode,
- ns: NamespaceCollections,
+ ns: ReplayNamespaces,
std_actions: Dict[str, Dict[str, str]],
action_id: str,
cfg: ReplayConfig
@@ -669,7 +752,7 @@ def build_action_usage(
----------
node : ProvNode
The node the creating action of which is of interest.
- ns : NamespaceCollections
+ ns : ReplayNamespaces
Info tracking usage and result collection namespaces.
std_actions : dict
Expalained in ActionCollections.
@@ -681,7 +764,7 @@ def build_action_usage(
command_specific_md_context_has_been_printed = False
plugin = node.action.plugin
action = node.action.action_name
- plg_action_name = uniquify_action_name(plugin, action, ns.action_namespace)
+ plg_action_name = ns.uniquify_action_name(plugin, action)
inputs = _collect_action_inputs(cfg.use, ns, node)
@@ -697,11 +780,11 @@ def build_action_usage(
continue
if isinstance(param_val, MetadataInfo):
- unique_md_id = ns.usg_var_namespace[node._uuid] + '_' + param_name
- ns.usg_var_namespace.update(
- {unique_md_id: camel_to_snake(param_name)}
+ unique_md_id = ns.get_usg_var_record(node._uuid).name \
+ + '_' + param_name
+ md_fn = ns.add_usg_var_record(
+ unique_md_id, camel_to_snake(param_name)
)
- md_fn = ns.usg_var_namespace[unique_md_id]
if cfg.dump_recorded_metadata:
md_with_ext = md_fn + '.tsv'
dump_recorded_md_file(
@@ -712,12 +795,7 @@ def build_action_usage(
# the local dir and fp where md will be saved (if at all) is:
md_fn = f'{plg_action_name}/{md_fn}'
md = init_md_from_recorded_md(
- node,
- param_name,
- unique_md_id,
- ns.usg_var_namespace,
- cfg,
- md_fn
+ node, param_name, unique_md_id, ns, cfg, md_fn
)
else:
if not cfg.md_context_has_been_printed:
@@ -744,11 +822,7 @@ def build_action_usage(
if not param_val.input_artifact_uuids:
md = init_md_from_md_file(
- node,
- param_name,
- unique_md_id,
- ns.usg_var_namespace,
- cfg
+ node, param_name, unique_md_id, ns, cfg
)
else:
md = init_md_from_artifacts(param_val, ns, cfg)
@@ -763,14 +837,14 @@ def build_action_usage(
cfg.use.UsageOutputNames(**outputs)
)
- # write the usage vars into the UsageVars dict so we can use em downstream
+ # add the usage variable(s) to the namespace
for res in usg_var:
- uuid_key = ns.usg_var_namespace.get_key(value=res.name)
- ns.usg_vars[uuid_key] = res
+ uuid_key = ns.get_usg_var_uuid(res.name)
+ ns.update_usg_var_record(uuid_key, res)
def _collect_action_inputs(
- use: Usage, ns: NamespaceCollections, node: ProvNode
+ use: Usage, ns: ReplayNamespaces, node: ProvNode
) -> dict:
'''
Returns a dict containing the action Inputs for a ProvNode.
@@ -780,7 +854,7 @@ def _collect_action_inputs(
----------
use : Usage
The currently executing usage driver.
- ns : NamespaceCollections
+ ns : ReplayNamespaces
Info tracking usage and result collection namespaces.
node : ProvNode
The node the creating action of which's inputs are of interest.
@@ -796,56 +870,67 @@ def _collect_action_inputs(
# this as it was not provided
if input_value is None:
continue
+
# Received a single artifact
if type(input_value) is str:
- if input_value not in ns.usg_vars:
- ns.usg_vars[input_value] = _get_rc_member(
- use, ns, input_value, input_name)
+ if ns.get_usg_var_record(input_value) is None:
+ ns.add_rc_member_to_ns(input_value, input_name, use)
+
+ resolved_input = ns.get_usg_var_record(input_value).variable
- resolved_input = ns.usg_vars[input_value]
# Received a list of artifacts
elif type(input_value) is list:
# may be rc cast to list so search for equivalent rc
# if not then follow algorithm for single str for each
- input_hash = hash_result_collection(input_value)
+ input_hash = ns.hash_result_collection(input_value)
if collection_uuid := ns.rc_contents_to_rc_uuid.get(input_hash):
# corresponding rc found
- resolved_input = ns.usg_vars[collection_uuid]
+ resolved_input = ns.get_usg_var_record(
+ collection_uuid
+ ).variable
else:
# find each artifact and assemble into a list
input_list = []
for input_value in input_value:
- if input_value not in ns.usg_vars:
- ns.usg_vars[input_value] = _get_rc_member(
- use, ns, input_value, input_name)
+ if ns.get_usg_var_record(input_value) is None:
+ ns.add_rc_member_to_ns(input_value, input_name, use)
+
+ input_list.append(
+ ns.get_usg_var_record(input_value).variable
+ )
- input_list.append(ns.usg_vars[input_value])
resolved_input = input_list
+
# Received a dict of artifacts (ResultCollection)
elif type(input_value) is dict:
- # rc -- search for equivalent rc if not found then create new rc
+ # search for equivalent rc if not found then create new rc
rc = input_value
- input_hash = hash_result_collection_with_keys(rc)
+ input_hash = ns.hash_result_collection_with_keys(rc)
if collection_uuid := ns.rc_contents_to_rc_uuid.get(input_hash):
# corresponding rc found
- resolved_input = ns.usg_vars[collection_uuid]
+ resolved_input = ns.get_usg_var_record(
+ collection_uuid
+ ).variable
else:
# build new rc
new_rc = {}
for key, input_value in rc.items():
- if input_value not in ns.usg_vars:
- ns.usg_vars[input_value] = _get_rc_member(
- use, ns, input_value, input_name)
+ if ns.get_usg_var_record(input_value) is None:
+ ns.add_rc_member_to_ns(input_value, input_name, use)
- new_rc[key] = ns.usg_vars[input_value]
+ new_rc[key] = ns.get_usg_var_record(input_value).variable
# make new rc usg var
new_collection_uuid = uuid4()
- ns.usg_var_namespace[new_collection_uuid] = input_name
- var_name = ns.usg_var_namespace[new_collection_uuid]
+ var_name = ns.add_usg_var_record(
+ new_collection_uuid, input_name
+ )
usg_var = use.construct_artifact_collection(var_name, new_rc)
- ns.usg_vars[new_collection_uuid] = usg_var
- resolved_input = ns.usg_vars[new_collection_uuid]
+ ns.update_usg_var_record(new_collection_uuid, usg_var)
+ resolved_input = ns.get_usg_var_record(
+ new_collection_uuid
+ ).variable
+
# If we ever mess with inputs again and add a new type here this should
# trip otherwise we should never see it
else:
@@ -859,24 +944,8 @@ def _collect_action_inputs(
return inputs_dict
-def _get_rc_member(use, ns, uuid, input_name):
- '''
- '''
- # find in rc and render destructure
- collection_uuid, key = ns.artifact_uuid_to_rc_uuid[uuid]
- collection_name = ns.usg_vars[collection_uuid]
-
- ns.usg_var_namespace[uuid] = input_name
- var_name = ns.usg_var_namespace[uuid]
- usg_var = use.get_artifact_collection_member(
- var_name, collection_name, key
- )
-
- return usg_var
-
-
def _uniquify_output_names(
- ns: NamespaceCollections, raw_outputs: dict
+ ns: ReplayNamespaces, raw_outputs: dict
) -> dict:
'''
Returns a dict containing the uniquified output names from a ProvNode.
@@ -884,7 +953,7 @@ def _uniquify_output_names(
Parameters
----------
- ns : NamespaceCollections
+ ns : ReplayNamespaces
Info tracking usage and result collection namespaces.
raw_outputs : dict
Mapping of node uuid to output-name as seen in action.yaml.
@@ -896,9 +965,9 @@ def _uniquify_output_names(
'''
outputs = {}
for uuid, output_name in raw_outputs:
- ns.usg_var_namespace.update({uuid: output_name})
- uniquified_output_name = ns.usg_var_namespace[uuid]
- outputs.update({output_name: uniquified_output_name})
+ var_name = ns.add_usg_var_record(uuid, output_name)
+ outputs.update({output_name: var_name})
+
return outputs
@@ -906,7 +975,7 @@ def init_md_from_recorded_md(
node: ProvNode,
param_name: str,
md_id: str,
- ns: UsageVarsDict,
+ ns: ReplayNamespaces,
cfg: ReplayConfig,
md_fn: str
) -> UsageVariable:
@@ -922,8 +991,8 @@ def init_md_from_recorded_md(
The name of the parameter to which metadata was passed.
md_id : str
Looks like: f'{node uuid}_{param name}'.
- ns : UsageVarsDict
- Mapping of uuid -> unique variable name.
+ ns : ReplayNamespaces
+ Namespaces associated with provenance replay.
cfg : ReplayConfig
Replay configuration options. Contains the executing usage driver.
md_fn : str
@@ -956,14 +1025,18 @@ def init_md_from_recorded_md(
else:
fn = str(cwd / 'recorded_metadata' / md_fn)
- md = cfg.use.init_metadata(ns[md_id], factory, dumped_md_fn=fn)
+ md = cfg.use.init_metadata(
+ ns.get_usg_var_record(md_id).name, factory, dumped_md_fn=fn
+ )
plugin = node.action.plugin
action = node.action.action_name
+
if param_is_metadata_column(cfg, param_name, plugin, action):
mdc_id = node._uuid + '_mdc'
- mdc_name = ns[md_id] + '_mdc'
- ns.update({mdc_id: mdc_name})
- md = cfg.use.get_metadata_column(ns[mdc_id], '<column name>', md)
+ mdc_name = ns.get_usg_var_record(md_id).name + '_mdc'
+ var_name = ns.add_usg_var_record(mdc_id, mdc_name)
+ md = cfg.use.get_metadata_column(var_name, '<column name>', md)
+
return md
@@ -971,7 +1044,7 @@ def init_md_from_md_file(
node: ProvNode,
param_name: str,
md_id: str,
- ns: UsageVarsDict,
+ ns: ReplayNamespaces,
cfg: ReplayConfig
) -> UsageVariable:
'''
@@ -986,8 +1059,8 @@ def init_md_from_md_file(
The parameter name to which the metadata file was passed.
md_id : str
Looks like: f'{node uuid}_{param name}'.
- ns : UsageVarsDict
- Mapping of uuid -> unique variable name.
+ ns : ReplayNamespaces
+ Namespaces associated with provenance replay.
cfg : ReplayConfig
Replay configuration options. Contains the executing usage driver.
@@ -998,17 +1071,19 @@ def init_md_from_md_file(
'''
plugin = node.action.plugin
action = node.action.action_name
- md = cfg.use.init_metadata(ns[md_id], lambda: None)
+ md = cfg.use.init_metadata(ns.get_usg_var_record(md_id).name, lambda: None)
+
if param_is_metadata_column(cfg, param_name, plugin, action):
mdc_id = node._uuid + '_mdc'
- mdc_name = ns[md_id] + '_mdc'
- ns.update({mdc_id: mdc_name})
- md = cfg.use.get_metadata_column(ns[mdc_id], '<column name>', md)
+ mdc_name = ns.get_usg_var_record(md_id).name + '_mdc'
+ var_name = ns.add_usg_var_record(mdc_id, mdc_name)
+ md = cfg.use.get_metadata_column(var_name, '<column name>', md)
+
return md
def init_md_from_artifacts(
- md_inf: MetadataInfo, ns: NamespaceCollections, cfg: ReplayConfig
+ md_inf: MetadataInfo, ns: ReplayNamespaces, cfg: ReplayConfig
) -> UsageVariable:
'''
Initializes and returns a Metadata UsageVariable with no real data,
@@ -1023,7 +1098,7 @@ def init_md_from_artifacts(
Named tuple with fields `input_artifact_uuids` which is a list of
uuids and `relative_fp` which is the filename of the metadata file.
These are parsed from a !metadata tag in action.yaml.
- ns : NamespaceCollections
+ ns : ReplayNamespaces
Info tracking usage and result collection namespaces.
cfg: ReplayConfig
Replay configuration options. Contains the executing usage driver.
@@ -1047,26 +1122,28 @@ def init_md_from_artifacts(
md_files_in = []
for artifact_uuid in md_inf.input_artifact_uuids:
amd_id = artifact_uuid + '_a'
- var_name = ns.usg_vars[artifact_uuid].name + '_a'
- if amd_id not in ns.usg_var_namespace:
- ns.usg_var_namespace.update({amd_id: var_name})
+ var_name = ns.get_usg_var_record(artifact_uuid).variable.name + '_a'
+ if ns.get_usg_var_record(amd_id) is None:
+ var_name = ns.add_usg_var_record(amd_id, var_name)
art_as_md = cfg.use.view_as_metadata(
- ns.usg_var_namespace[amd_id], ns.usg_vars[artifact_uuid]
+ var_name, ns.get_usg_var_record(artifact_uuid).variable
)
- ns.usg_vars.update({amd_id: art_as_md})
+ ns.update_usg_var_record(amd_id, art_as_md)
else:
- art_as_md = ns.usg_vars[amd_id]
+ art_as_md = ns.get_usg_var_record(amd_id).variable
+
md_files_in.append(art_as_md)
if len(md_inf.input_artifact_uuids) > 1:
# we can't uniquify this normally, because one uuid can be merged with
# combinations of others
merge_id = '-'.join(md_inf.input_artifact_uuids)
- ns.usg_var_namespace.update({merge_id: 'merged_artifacts'})
+ var_name = ns.add_usg_var_record(merge_id, 'merged_artifacts')
merged_md = cfg.use.merge_metadata(
- ns.usg_var_namespace[merge_id], *md_files_in
+ var_name, *md_files_in
)
- ns.usg_vars.update({merge_id: merged_md})
+ ns.update_usg_var_record(merge_id, merged_md)
+
return art_as_md
@@ -1171,36 +1248,6 @@ def param_is_metadata_column(
return 'MetadataColumn' in str(param_spec.qiime_type)
-def uniquify_action_name(
- plugin: str, action: str, action_namespace: Set[str]
-) -> str:
- '''
- Creates a unique name by concatenating plugin, action, and a counter,
- and adds this name to action_ns before returning it.
-
- Parameters
- ----------
- plugin : str
- The name of the plugin.
- action : str
- The name of the action.
- action_namespace : set of str
- The collection of unqiue action names.
-
- Returns
- -------
- str
- The unique action name.
- '''
- counter = 0
- plg_action_name = f'{plugin}_{action}_{counter}'
- while plg_action_name in action_namespace:
- counter += 1
- plg_action_name = f'{plugin}_{action}_{counter}'
- action_namespace.add(plg_action_name)
- return plg_action_name
-
-
def collect_citations(
dag: ProvDAG, deduplicate: bool = True
) -> bp.bibdatabase.BibDatabase:
=====================================
qiime2/core/archive/provenance_lib/tests/test_checksum_validator.py
=====================================
@@ -62,9 +62,7 @@ class ValidateChecksumTests(unittest.TestCase):
uuid = os.listdir(tempdir)[0]
root_dir = os.path.join(tempdir, uuid)
- print(os.listdir(root_dir))
os.remove(os.path.join(root_dir, 'metadata.yaml'))
- print(os.listdir(root_dir))
with open(os.path.join(root_dir, 'tamper.txt'), 'w') as fh:
pass
citations_path = \
=====================================
qiime2/core/archive/provenance_lib/tests/test_replay.py
=====================================
@@ -22,45 +22,22 @@ from qiime2.plugins import ArtifactAPIUsageVariable
from ..parse import ProvDAG
from ..replay import (
- ActionCollections, BibContent, NamespaceCollections, ReplayConfig,
- UsageVarsDict,
+ ActionCollections, BibContent, ReplayConfig, ReplayNamespaces,
+ UsageVariableRecord,
build_no_provenance_node_usage, build_import_usage, build_action_usage,
build_usage_examples, collect_citations, dedupe_citations,
dump_recorded_md_file, group_by_action, init_md_from_artifacts,
init_md_from_md_file, init_md_from_recorded_md, replay_provenance,
- uniquify_action_name, replay_citations
-)
-from .testing_utilities import (
- CustomAssertions, DummyArtifacts
+ replay_citations
)
+from .testing_utilities import CustomAssertions, DummyArtifacts
from ..usage_drivers import ReplayPythonUsage
from ...provenance import MetadataInfo
from qiime2.sdk.util import camel_to_snake
-class UsageVarsDictTests(unittest.TestCase):
- def test_uniquify(self):
- collision_val = 'emp_single_end_sequences'
- unique_val = 'some_prime'
- ns = UsageVarsDict({'123': collision_val})
- self.assertEqual(ns.data, {'123': 'emp_single_end_sequences_0'})
- ns.update({'456': collision_val, 'unique': unique_val})
- self.assertEqual(ns['456'], 'emp_single_end_sequences_1')
- self.assertEqual(ns['unique'], 'some_prime_0')
- ns['789'] = collision_val
- self.assertEqual(ns.pop('789'), 'emp_single_end_sequences_2')
-
- def test_get_key(self):
- ns = UsageVarsDict({'123': 'some_name'})
- self.assertEqual('123', ns.get_key('some_name_0'))
- with self.assertRaisesRegex(
- KeyError, "passed value 'fake_key' does not exist"
- ):
- ns.get_key('fake_key')
-
-
-class NamespaceCollectionTests(unittest.TestCase):
+class ReplayNamespacesTests(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.das = DummyArtifacts()
@@ -70,33 +47,47 @@ class NamespaceCollectionTests(unittest.TestCase):
def tearDownClass(cls):
cls.das.free()
+ def test_make_unique_name(self):
+ ns = ReplayNamespaces()
+ self.assertEqual('name_0', ns._make_unique_name('name'))
+ ns.add_usg_var_record('uuid1', 'name')
+ self.assertEqual('name_1', ns._make_unique_name('name'))
+
+ def test_get_usg_var_uuid(self):
+ ns = ReplayNamespaces()
+ ns.add_usg_var_record('uuid1', 'name')
+ self.assertEqual('uuid1', ns.get_usg_var_uuid('name_0'))
+
def test_add_usage_var_workflow(self):
"""
Smoke tests a common workflow with this data structure
- - Create a unique variable name by adding to .usg_var_namespace
+ - Create a unique variable name by adding to the namespace
- Create a UsageVariable with that name
- use the name to get the UUID (when we have Results, we have no UUIDs)
- - add the correctly-named UsageVariable to .usg_vars
+ - add the correctly-named UsageVariable to the namespace
"""
use = Usage()
uuid = self.das.concated_ints.uuid
base_name = 'concated_ints'
exp_name = base_name + '_0'
- ns = NamespaceCollections()
- ns.usg_var_namespace.update({uuid: base_name})
- self.assertEqual(ns.usg_var_namespace[uuid], exp_name)
+ ns = ReplayNamespaces()
+ ns.add_usg_var_record(uuid, base_name)
+ self.assertEqual(ns.get_usg_var_record(uuid).name, exp_name)
def factory(): # pragma: no cover
return Artifact.load(self.das.concated_ints.filepath)
- u_var = use.init_artifact(ns.usg_var_namespace[uuid], factory)
+
+ u_var = use.init_artifact(ns.get_usg_var_record(uuid).name, factory)
self.assertEqual(u_var.name, exp_name)
- actual_uuid = ns.usg_var_namespace.get_key(u_var.name)
+ actual_uuid = ns.get_usg_var_uuid(u_var.name)
self.assertEqual(actual_uuid, uuid)
- ns.usg_vars[uuid] = u_var
- self.assertIsInstance(ns.usg_vars[uuid], UsageVariable)
- self.assertEqual(ns.usg_vars[uuid].name, exp_name)
+ ns.update_usg_var_record(uuid, u_var)
+ self.assertIsInstance(
+ ns.get_usg_var_record(uuid).variable, UsageVariable
+ )
+ self.assertEqual(ns.get_usg_var_record(uuid).name, exp_name)
class ReplayProvenanceTests(unittest.TestCase):
@@ -398,10 +389,13 @@ class BuildUsageExamplesTests(unittest.TestCase):
@patch('qiime2.core.archive.provenance_lib.replay.'
'build_no_provenance_node_usage')
def test_build_usage_examples(self, n_p_builder, imp_builder, act_builder):
- ns = NamespaceCollections()
+ ns = ReplayNamespaces()
dag = self.das.concated_ints_with_md.dag
- cfg = ReplayConfig(use=ReplayPythonUsage(),
- use_recorded_metadata=False, pm=self.pm)
+ cfg = ReplayConfig(
+ use=ReplayPythonUsage(),
+ use_recorded_metadata=False,
+ pm=self.pm
+ )
build_usage_examples(dag, cfg, ns)
n_p_builder.assert_not_called()
@@ -415,15 +409,18 @@ class BuildUsageExamplesTests(unittest.TestCase):
def test_build_usage_examples_lone_v0(
self, n_p_builder, imp_builder, act_builder
):
- ns = NamespaceCollections()
+ ns = ReplayNamespaces()
uuid = self.das.table_v0.uuid
with self.assertWarnsRegex(
UserWarning, f'(:?)Art.*{uuid}.*prior.*incomplete'
):
dag = ProvDAG(self.das.table_v0.filepath)
- cfg = ReplayConfig(use=ReplayPythonUsage(),
- use_recorded_metadata=False, pm=self.pm)
+ cfg = ReplayConfig(
+ use=ReplayPythonUsage(),
+ use_recorded_metadata=False,
+ pm=self.pm
+ )
build_usage_examples(dag, cfg, ns)
# This is a single v0 archive, so should have only one np node
@@ -443,15 +440,18 @@ class BuildUsageExamplesTests(unittest.TestCase):
shutil.copy(self.das.table_v0.filepath, mixed_dir)
shutil.copy(self.das.concated_ints_v6.filepath, mixed_dir)
- ns = NamespaceCollections()
+ ns = ReplayNamespaces()
v0_uuid = self.das.table_v0.uuid
with self.assertWarnsRegex(
UserWarning, f'(:?)Art.*{v0_uuid}.*prior.*incomplete'
):
dag = ProvDAG(mixed_dir)
- cfg = ReplayConfig(use=ReplayPythonUsage(),
- use_recorded_metadata=False, pm=self.pm)
+ cfg = ReplayConfig(
+ use=ReplayPythonUsage(),
+ use_recorded_metadata=False,
+ pm=self.pm
+ )
build_usage_examples(dag, cfg, ns)
n_p_builder.assert_called_once()
@@ -471,10 +471,13 @@ class BuildUsageExamplesTests(unittest.TestCase):
shutil.copy(self.das.splitted_ints.filepath, many_dir)
shutil.copy(self.das.pipeline_viz.filepath, many_dir)
- ns = NamespaceCollections()
+ ns = ReplayNamespaces()
dag = ProvDAG(many_dir)
- cfg = ReplayConfig(use=ReplayPythonUsage(),
- use_recorded_metadata=False, pm=self.pm)
+ cfg = ReplayConfig(
+ use=ReplayPythonUsage(),
+ use_recorded_metadata=False,
+ pm=self.pm
+ )
build_usage_examples(dag, cfg, ns)
n_p_builder.assert_not_called()
@@ -496,16 +499,19 @@ class MiscHelperFnTests(unittest.TestCase):
cls.das.free()
def test_uniquify_action_name(self):
- ns = set()
+ ns = ReplayNamespaces()
p1 = 'dummy_plugin'
a1 = 'action_jackson'
p2 = 'dummy_plugin'
a2 = 'missing_in_action'
- unique1 = uniquify_action_name(p1, a1, ns)
+
+ unique1 = ns.uniquify_action_name(p1, a1)
self.assertEqual(unique1, 'dummy_plugin_action_jackson_0')
- unique2 = uniquify_action_name(p2, a2, ns)
+
+ unique2 = ns.uniquify_action_name(p2, a2)
self.assertEqual(unique2, 'dummy_plugin_missing_in_action_0')
- duplicate = uniquify_action_name(p1, a1, ns)
+
+ duplicate = ns.uniquify_action_name(p1, a1)
self.assertEqual(duplicate, 'dummy_plugin_action_jackson_1')
def test_dump_recorded_md_file_no_md(self):
@@ -539,7 +545,7 @@ class GroupByActionTests(unittest.TestCase):
def test_gba_with_provenance(self):
self.maxDiff = None
- ns = NamespaceCollections()
+ ns = ReplayNamespaces()
dag = self.das.concated_ints_v6.dag
sorted_nodes = nx.topological_sort(dag.collapsed_view)
actual = group_by_action(dag, sorted_nodes, ns)
@@ -558,7 +564,7 @@ class GroupByActionTests(unittest.TestCase):
self.assertEqual(actual.no_provenance_nodes, [])
def test_gba_no_provenance(self):
- ns = NamespaceCollections()
+ ns = ReplayNamespaces()
dag = self.das.table_v0.dag
uuid = self.das.table_v0.uuid
@@ -573,7 +579,7 @@ class GroupByActionTests(unittest.TestCase):
shutil.copy(self.das.table_v0.filepath, mixed_dir)
shutil.copy(self.das.concated_ints_v6.filepath, mixed_dir)
- ns = NamespaceCollections()
+ ns = ReplayNamespaces()
v0_uuid = self.das.table_v0.uuid
with self.assertWarnsRegex(
UserWarning, f'(:?)Art.*{v0_uuid}.*prior.*incomplete'
@@ -640,25 +646,33 @@ class InitializerTests(unittest.TestCase):
cls.das.free()
def test_init_md_from_artifacts_no_artifacts(self):
- cfg = ReplayConfig(use=ReplayPythonUsage(),
- use_recorded_metadata=False, pm=self.pm)
- usg_vars = {}
+ cfg = ReplayConfig(
+ use=ReplayPythonUsage(),
+ use_recorded_metadata=False,
+ pm=self.pm
+ )
+ ns = ReplayNamespaces
+
# create dummy hash '0', not relevant here
md_info = MetadataInfo([], 'hmm.tsv', '0')
with self.assertRaisesRegex(
ValueError, "not.*used.*input_artifact_uuids.*empty"
):
- init_md_from_artifacts(md_info, usg_vars, cfg)
+ init_md_from_artifacts(md_info, ns, cfg)
def test_init_md_from_artifacts_one_art(self):
# This helper doesn't capture real data, so we're only smoke testing,
# checking type, and confirming the repr looks reasonable.
- cfg = ReplayConfig(use=ReplayPythonUsage(),
- use_recorded_metadata=False, pm=self.pm)
+ cfg = ReplayConfig(
+ use=ReplayPythonUsage(),
+ use_recorded_metadata=False,
+ pm=self.pm
+ )
# We expect artifact vars have already been added to the namespace
a1 = cfg.use.init_artifact(name='thing1', factory=lambda: None)
- ns = NamespaceCollections(usg_vars={'uuid1': a1})
+ ns = ReplayNamespaces()
+ ns._usg_var_ns = {'uuid1': UsageVariableRecord('thing1', a1)}
# create dummy hash '0', not relevant here
md_info = MetadataInfo(['uuid1'], 'hmm.tsv', '0')
@@ -673,15 +687,23 @@ class InitializerTests(unittest.TestCase):
def test_init_md_from_artifacts_many(self):
# This helper doesn't capture real data, so we're only smoke testing,
# checking type, and confirming the repr looks reasonable.
- cfg = ReplayConfig(use=ReplayPythonUsage(),
- use_recorded_metadata=False, pm=self.pm)
+ cfg = ReplayConfig(
+ use=ReplayPythonUsage(),
+ use_recorded_metadata=False,
+ pm=self.pm
+ )
# We expect artifact vars have already been added to the namespace
a1 = cfg.use.init_artifact(name='thing1', factory=lambda: None)
a2 = cfg.use.init_artifact(name='thing2', factory=lambda: None)
a3 = cfg.use.init_artifact(name='thing3', factory=lambda: None)
- ns = NamespaceCollections(
- usg_vars={'uuid1': a1, 'uuid2': a2, 'uuid3': a3})
+
+ ns = ReplayNamespaces()
+ ns._usg_var_ns = {
+ 'uuid1': UsageVariableRecord('thing1', a1),
+ 'uuid2': UsageVariableRecord('thing2', a2),
+ 'uuid3': UsageVariableRecord('thing3', a3),
+ }
# create dummy hash '0', not relevant here
md_info = MetadataInfo(['uuid1', 'uuid2', 'uuid3'], 'hmm.tsv', '0')
@@ -694,9 +716,11 @@ class InitializerTests(unittest.TestCase):
self.assertIn('thing1_a_0_md = thing1.view(Metadata)', rendered)
self.assertIn('thing2_a_0_md = thing2.view(Metadata)', rendered)
self.assertIn('thing3_a_0_md = thing3.view(Metadata)', rendered)
- self.assertIn('merged_artifacts_0_md = '
- 'thing1_a_0_md.merge(thing2_a_0_md, thing3_a_0_md)',
- rendered)
+ self.assertIn(
+ 'merged_artifacts_0_md = '
+ 'thing1_a_0_md.merge(thing2_a_0_md, thing3_a_0_md)',
+ rendered
+ )
def test_init_md_from_md_file(self):
dag = self.das.concated_ints_with_md.dag
@@ -704,9 +728,14 @@ class InitializerTests(unittest.TestCase):
md_id = 'whatevs'
param_name = 'metadata'
- ns = UsageVarsDict({md_id: param_name})
- cfg = ReplayConfig(use=ReplayPythonUsage(),
- use_recorded_metadata=False, pm=self.pm)
+ ns = ReplayNamespaces()
+ ns.add_usg_var_record(md_id, param_name)
+
+ cfg = ReplayConfig(
+ use=ReplayPythonUsage(),
+ use_recorded_metadata=False,
+ pm=self.pm
+ )
var = init_md_from_md_file(md_node, param_name, md_id, ns, cfg)
@@ -724,9 +753,15 @@ class InitializerTests(unittest.TestCase):
var_name = 'metadata_0'
param_name = 'metadata'
- ns = UsageVarsDict({var_name: param_name})
- cfg = ReplayConfig(use=ReplayPythonUsage(),
- use_recorded_metadata=False, pm=self.pm)
+ ns = ReplayNamespaces()
+ ns.add_usg_var_record(var_name, param_name)
+
+ cfg = ReplayConfig(
+ use=ReplayPythonUsage(),
+ use_recorded_metadata=False,
+ pm=self.pm
+ )
+
md_fn = 'identity_with_metadata/metadata_0'
with self.assertRaisesRegex(ValueError, 'only.*call.*if.*metadata'):
@@ -743,8 +778,9 @@ class InitializerTests(unittest.TestCase):
rendered = cfg.use.render()
self.assertIn('from qiime2 import Metadata', rendered)
self.assertIn('metadata_0_md = Metadata.load', rendered)
- self.assertIn('recorded_metadata/identity_with_metadata/'
- 'metadata_0', rendered)
+ self.assertIn(
+ 'recorded_metadata/identity_with_metadata/metadata_0', rendered
+ )
def test_init_md_from_recorded_mdc(self):
dag = self.das.concated_ints_with_md_column.dag
@@ -753,9 +789,15 @@ class InitializerTests(unittest.TestCase):
var_name = 'metadata_0'
param_name = 'metadata'
- ns = UsageVarsDict({var_name: param_name})
- cfg = ReplayConfig(use=ReplayPythonUsage(),
- use_recorded_metadata=False, pm=self.pm)
+ ns = ReplayNamespaces()
+ ns.add_usg_var_record(var_name, param_name)
+
+ cfg = ReplayConfig(
+ use=ReplayPythonUsage(),
+ use_recorded_metadata=False,
+ pm=self.pm
+ )
+
md_fn = 'identity_with_metadata_column/metadata_0'
with self.assertRaisesRegex(ValueError, 'only.*call.*if.*metadata'):
@@ -789,7 +831,7 @@ class BuildNoProvenanceUsageTests(CustomAssertions):
cls.das.free()
def test_build_no_provenance_node_usage_w_complete_node(self):
- ns = NamespaceCollections()
+ ns = ReplayNamespaces()
cfg = ReplayConfig(use=ReplayPythonUsage(),
use_recorded_metadata=False, pm=self.pm)
uuid = self.das.table_v0.uuid
@@ -797,8 +839,9 @@ class BuildNoProvenanceUsageTests(CustomAssertions):
v0_node = dag.get_node_data(uuid)
build_no_provenance_node_usage(v0_node, uuid, ns, cfg)
- out_var_name = '<feature_table_frequency_0>'
- self.assertEqual(ns.usg_var_namespace, {uuid: out_var_name})
+ out_var_name = 'feature_table_frequency_0'
+ self.assertIn(uuid, ns._usg_var_ns)
+ self.assertEqual(ns._usg_var_ns[uuid].name, out_var_name)
rendered = cfg.use.render()
# Confirm the initial context comment is present once.
@@ -807,20 +850,24 @@ class BuildNoProvenanceUsageTests(CustomAssertions):
self.assertREAppearsOnlyOnce(rendered, header)
# Confirm expected values have been rendered
- exp_v0 = f'# {uuid} _feature_table_frequency_0_'
+ exp_v0 = f'# {uuid} feature_table_frequency_0'
self.assertRegex(rendered, exp_v0)
def test_build_no_provenance_node_usage_uuid_only_node(self):
- ns = NamespaceCollections()
- cfg = ReplayConfig(use=ReplayPythonUsage(),
- use_recorded_metadata=False, pm=self.pm)
+ ns = ReplayNamespaces()
+ cfg = ReplayConfig(
+ use=ReplayPythonUsage(),
+ use_recorded_metadata=False,
+ pm=self.pm
+ )
uuid = 'some-uuid'
node = None
build_no_provenance_node_usage(node, uuid, ns, cfg)
- out_var_name = '<no-provenance-node_0>'
- self.assertEqual(ns.usg_var_namespace, {uuid: out_var_name})
+ out_var_name = 'no-provenance-node_0'
+ self.assertIn(uuid, ns._usg_var_ns)
+ self.assertEqual(ns._usg_var_ns[uuid].name, out_var_name)
rendered = cfg.use.render()
# Confirm the initial context comment is present once.
@@ -829,11 +876,11 @@ class BuildNoProvenanceUsageTests(CustomAssertions):
self.assertREAppearsOnlyOnce(rendered, header)
# Confirm expected values have been rendered
- exp_v0 = f'# {uuid} _no_provenance_node_0_'
+ exp_v0 = f'# {uuid} no_provenance_node_0'
self.assertRegex(rendered, exp_v0)
def test_build_no_provenance_node_usage_many(self):
- ns = NamespaceCollections()
+ ns = ReplayNamespaces()
cfg = ReplayConfig(
use=ReplayPythonUsage(),
use_recorded_metadata=False, pm=self.pm
@@ -849,12 +896,14 @@ class BuildNoProvenanceUsageTests(CustomAssertions):
build_no_provenance_node_usage(v0_node, uuid, ns, cfg)
build_no_provenance_node_usage(dummy_node, dummy_node_uuid, ns, cfg)
- self.assertIn(uuid, ns.usg_var_namespace)
- self.assertIn(dummy_node_uuid, ns.usg_var_namespace)
- self.assertEqual(ns.usg_var_namespace[uuid],
- '<feature_table_frequency_0>')
- self.assertEqual(ns.usg_var_namespace[dummy_node_uuid],
- '<feature_table_frequency_1>')
+ self.assertIn(uuid, ns._usg_var_ns)
+ self.assertIn(dummy_node_uuid, ns._usg_var_ns)
+ self.assertEqual(
+ ns._usg_var_ns[uuid].name, 'feature_table_frequency_0'
+ )
+ self.assertEqual(
+ ns._usg_var_ns[dummy_node_uuid].name, 'feature_table_frequency_1'
+ )
rendered = cfg.use.render()
# Confirm the initial context isn't repeated.
@@ -863,8 +912,8 @@ class BuildNoProvenanceUsageTests(CustomAssertions):
self.assertREAppearsOnlyOnce(rendered, header)
# Confirm expected values have been rendered
- exp_og = f'# {uuid} _feature_table_frequency_0_'
- exp_dummy = f'# {uuid}-dummy _feature_table_frequency_1_'
+ exp_og = f'# {uuid} feature_table_frequency_0'
+ exp_dummy = f'# {uuid}-dummy feature_table_frequency_1'
self.assertRegex(rendered, exp_og)
self.assertRegex(rendered, exp_dummy)
@@ -881,22 +930,27 @@ class BuildImportUsageTests(CustomAssertions):
cls.das.free()
def test_build_import_usage_python(self):
- ns = NamespaceCollections()
- cfg = ReplayConfig(use=ReplayPythonUsage(),
- use_recorded_metadata=False, pm=self.pm)
+ ns = ReplayNamespaces()
+ cfg = ReplayConfig(
+ use=ReplayPythonUsage(),
+ use_recorded_metadata=False,
+ pm=self.pm
+ )
+
dag = self.das.concated_ints_v6.dag
import_uuid = '8dea2f1a-2164-4a85-9f7d-e0641b1db22b'
import_node = dag.get_node_data(import_uuid)
c_to_s_type = camel_to_snake(import_node.type)
unq_var_nm = c_to_s_type + '_0'
build_import_usage(import_node, ns, cfg)
- rendered = cfg.use.render()
- vars = ns.usg_vars
- out_name = vars[import_uuid].to_interface_name()
- self.assertIsInstance(vars[import_uuid], UsageVariable)
- self.assertEqual(vars[import_uuid].var_type, 'artifact')
- self.assertEqual(vars[import_uuid].name, unq_var_nm)
+ usg_var = ns.get_usg_var_record(import_uuid).variable
+ self.assertIsInstance(usg_var, UsageVariable)
+ self.assertEqual(usg_var.var_type, 'artifact')
+ self.assertEqual(usg_var.name, unq_var_nm)
+
+ rendered = cfg.use.render()
+ out_name = usg_var.to_interface_name()
self.assertRegex(rendered, 'from qiime2 import Artifact')
self.assertRegex(rendered, rf'{out_name} = Artifact.import_data\(')
self.assertRegex(rendered, import_node.type)
@@ -1150,10 +1204,13 @@ class BuildActionUsageTests(CustomAssertions):
def test_build_action_usage_python(self):
plugin = 'dummy_plugin'
action = 'concatenate_ints'
- cfg = ReplayConfig(use=ReplayPythonUsage(),
- use_recorded_metadata=False, pm=self.pm)
+ cfg = ReplayConfig(
+ use=ReplayPythonUsage(),
+ use_recorded_metadata=False,
+ pm=self.pm
+ )
- ns = NamespaceCollections()
+ ns = ReplayNamespaces()
import_var_1 = ArtifactAPIUsageVariable(
'imported_ints_0', lambda: None, 'artifact', cfg.use
)
@@ -1162,10 +1219,8 @@ class BuildActionUsageTests(CustomAssertions):
)
import_uuid_1 = '8dea2f1a-2164-4a85-9f7d-e0641b1db22b'
import_uuid_2 = '7727c060-5384-445d-b007-b64b41a090ee'
- ns.usg_vars = {
- import_uuid_1: import_var_1,
- import_uuid_2: import_var_2
- }
+ ns.add_usg_var_record(import_uuid_1, 'imported_ints', import_var_1)
+ ns.add_usg_var_record(import_uuid_2, 'imported_ints', import_var_2)
dag = self.das.concated_ints_v6.dag
action_uuid = '5035a60e-6f9a-40d4-b412-48ae52255bb5'
@@ -1176,14 +1231,15 @@ class BuildActionUsageTests(CustomAssertions):
)
unique_var_name = node.action.output_name + '_0'
build_action_usage(node, ns, actions.std_actions, action_uuid, cfg)
- rendered = cfg.use.render()
- out_name = ns.usg_vars[node_uuid].to_interface_name()
- vars = ns.usg_vars
- self.assertIsInstance(vars[node_uuid], UsageVariable)
- self.assertEqual(vars[node_uuid].var_type, 'artifact')
- self.assertEqual(vars[node_uuid].name, unique_var_name)
+ usg_var = ns.get_usg_var_record(node_uuid).variable
+ out_name = usg_var.to_interface_name()
+ self.assertIsInstance(usg_var, UsageVariable)
+ self.assertEqual(usg_var.var_type, 'artifact')
+ self.assertEqual(usg_var.name, unique_var_name)
+
+ rendered = cfg.use.render()
self.assertRegex(
rendered, f"import.*{plugin}.actions as {plugin}_actions"
)
@@ -1207,7 +1263,7 @@ class BuildActionUsageTests(CustomAssertions):
dag = self.das.concated_ints_with_md.dag
node = dag.get_node_data(node_uuid)
- ns = NamespaceCollections()
+ ns = ReplayNamespaces()
mapping_var = ArtifactAPIUsageVariable(
'imported_mapping_0', lambda: None, 'artifact', cfg.use
)
@@ -1220,23 +1276,28 @@ class BuildActionUsageTests(CustomAssertions):
mapping_import_uuid = '8f71b73d-b028-4cbc-9894-738bdfe718bf'
intseq_import_uuid_1 = '0bb6d731-155a-4dd0-8a1e-98827bc4e0bf'
intseq_import_uuid_2 = 'e6b37bae-3a14-40f7-87b4-52cf5c7c7a1d'
- ns.usg_vars = {
- mapping_import_uuid: mapping_var,
- intseq_import_uuid_1: intseq_var_1,
- intseq_import_uuid_2: intseq_var_2,
- }
+ ns.add_usg_var_record(
+ mapping_import_uuid, 'imported_mapping', mapping_var
+ )
+ ns.add_usg_var_record(
+ intseq_import_uuid_1, 'imported_ints', intseq_var_1
+ )
+ ns.add_usg_var_record(
+ intseq_import_uuid_2, 'imported_ints', intseq_var_2
+ )
actions = ActionCollections(
std_actions={action_uuid: {node_uuid: 'out'}}
)
build_action_usage(node, ns, actions.std_actions, action_uuid, cfg)
- rendered = cfg.use.render()
- vars = ns.usg_vars
- self.assertIsInstance(vars[node_uuid], UsageVariable)
- self.assertEqual(vars[node_uuid].var_type, 'artifact')
- self.assertEqual(vars[node_uuid].name, 'out_0')
+ usg_var = ns.get_usg_var_record(node_uuid).variable
+
+ self.assertIsInstance(usg_var, UsageVariable)
+ self.assertEqual(usg_var.var_type, 'artifact')
+ self.assertEqual(usg_var.name, 'out_0')
+ rendered = cfg.use.render()
self.assertIn('from qiime2 import Metadata', rendered)
self.assertIn('.view(Metadata)', rendered)
self.assertIn(f'.{action}(', rendered)
=====================================
qiime2/core/cache.py
=====================================
@@ -567,6 +567,32 @@ class Cache:
version_file = fh.read()
return regex.match(version_file) is not None
+ @classmethod
+ def validate_key(cls, key):
+ """Validates that the given key is a valid Python idenitifier with the
+ exception that - is allowed.
+
+ Parameters
+ ----------
+ key : str
+ The name of the key to validate.
+
+ Raises
+ ------
+ ValueError
+ If the key passed in is not a valid Python identifier. We enforce
+ this to ensure no one creates keys that cause issues when we try to
+ load them.
+ """
+ validation_key = key.replace('-', '_')
+ if not validation_key.isidentifier():
+ raise ValueError(f"Key '{key}' is not a valid Python identifier. "
+ "Keys may contain '-' characters but must "
+ "otherwise be valid Python identifiers. Python "
+ "identifier rules may be found here "
+ "https://www.askpython.com/python/"
+ "python-identifiers-rules-best-practices")
+
def _create_cache_contents(self):
"""Create the cache directory, all sub directories, and the version
file.
@@ -859,24 +885,8 @@ class Cache:
The path to the data or pool we are keying.
pool : bool
Whether we are keying a pool or not.
-
- Raises
- ------
- ValueError
- If the key passed in is not a valid Python identifier. We enforce
- this to ensure no one creates keys that cause issues when we try to
- load them.
"""
- # We require keys to be valid Python identifiers with the single caveat
- # that they may also contain dashes
- validation_key = key.replace('-', '_')
- if not validation_key.isidentifier():
- raise ValueError("Keys may contain '-' characters but must "
- "otherwise be valid Python identifiers. Python "
- "identifier rules may be found here "
- "https://www.askpython.com/python/"
- "python-identifiers-rules-best-practices")
-
+ self.validate_key(key)
key_fp = self.keys / key
key_dict = {}
=====================================
qiime2/core/testing/plugin.py
=====================================
@@ -1120,3 +1120,9 @@ other_plugin.methods.register_function(
name='Concatenate integers',
description='Some description'
)
+
+
+ at other_plugin.register_transformer
+def _9999999(ff: SingleIntFormat) -> str:
+ with ff.open() as fh:
+ return fh.read()
=====================================
qiime2/core/type/parse.py
=====================================
@@ -44,7 +44,7 @@ def _expr(expr):
return _build_predicate(expr.func.id, args, kwargs)
if node is ast.Subscript:
- field_expr = expr.slice.value
+ field_expr = expr.slice
if type(field_expr) is ast.Tuple:
field_expr = field_expr.elts
@@ -219,7 +219,14 @@ def ast_to_type(json_ast, scope=None):
name = json_ast['name']
if not json_ast['builtin']:
- base_template = semantic.SemanticType(name).template
+ base_template = semantic.SemanticType(
+ name,
+ field_names=['field' + str(i) for i in range(len(fields))],
+ field_members={
+ ('field' + str(i)):
+ [child] for i, child in enumerate(fields)
+ },
+ ).template
elif name == 'Visualization':
return visualization.Visualization
elif name in {'List', 'Set', 'Tuple', 'Collection'}:
=====================================
qiime2/core/type/signature.py
=====================================
@@ -302,7 +302,8 @@ class PipelineSignature:
for output_name, spec in outputs.items():
if not (is_semantic_type(spec.qiime_type) or
- spec.qiime_type == Visualization):
+ spec.qiime_type == Visualization or
+ spec.qiime_type == Collection[Visualization]):
raise TypeError(
"Output %r must be a semantic QIIME type or "
"Visualization, not %r"
=====================================
qiime2/core/type/tests/test_parse.py
=====================================
@@ -99,6 +99,30 @@ class TestParsing(unittest.TestCase):
self.assertIs(V1.mapping, W1.mapping)
self.assertIs(W1.mapping, X1.mapping)
+ def test_TypeMap_with_properties(self):
+ I, OU = TypeMap({
+ C1[Foo % Properties(['A', 'B', 'C'])]: Str,
+ C1[Foo % Properties(['A', 'B'])]: Str,
+ C1[Foo % Properties(['A', 'C'])]: Str,
+ C1[Foo % Properties(['B', 'C'])]: Str,
+ C1[Foo % Properties(['A'])]: Str,
+ C1[Foo % Properties(['B'])]: Str,
+ C1[Foo % Properties(['C'])]: Str,
+ })
+
+ scope = {}
+ i = ast_to_type(I.to_ast(), scope=scope)
+ o = ast_to_type(OU.to_ast(), scope=scope)
+
+ self.assertEqual(scope[id(I.mapping)], [i, o])
+ self.assertEqual(len(scope), 1)
+
+ # Assert mapping is the same after ast_to_type call
+ self.assertEqual(I.mapping.lifted, i.mapping.lifted)
+
+ # Assert that the mapping object is the same in both i and o
+ self.assertIs(i.mapping, o.mapping)
+
def test_syntax_error(self):
with self.assertRaisesRegex(ValueError, "could not be parsed"):
string_to_ast('$')
=====================================
qiime2/metadata/io.py
=====================================
@@ -415,7 +415,7 @@ class MetadataWriter:
df = md.to_dataframe(encode_missing=True)
df.fillna('', inplace=True)
- df = df.applymap(self._format)
+ df = df.map(self._format)
tsv_writer.writerows(df.itertuples(index=True))
def _non_default_missing(self, missing_directive):
=====================================
qiime2/metadata/metadata.py
=====================================
@@ -1286,7 +1286,7 @@ class CategoricalMetadataColumn(MetadataColumn):
"%r of type %r in column %r." %
(cls.__name__, value, type(value), series.name))
- norm_series = series.apply(normalize, convert_dtype=False)
+ norm_series = series.apply(normalize).astype(object)
norm_series.index = norm_series.index.str.strip()
norm_series.name = norm_series.name.strip()
return norm_series
=====================================
qiime2/plugin/testing.py
=====================================
@@ -16,6 +16,7 @@ import itertools
import qiime2
from qiime2.sdk import usage
+from qiime2.plugin.util import transform
from qiime2.plugin.model.base import FormatBase
@@ -241,8 +242,7 @@ class TestPluginBase(unittest.TestCase):
shutil.copy(filepath, source_path)
input = source_format(source_path, mode='r')
- transformer = self.get_transformer(source_format, target)
- obs = transformer(input)
+ obs = transform(input, from_type=source_format, to_type=target)
if issubclass(target, FormatBase):
self.assertIsInstance(obs, (type(pathlib.Path()), str, target))
=====================================
qiime2/plugin/tests/test_tests.py
=====================================
@@ -9,11 +9,15 @@
import unittest
import tempfile
+
+from qiime2.core.testing.format import SingleIntFormat
from qiime2.core.testing.util import get_dummy_plugin
from qiime2.plugin.testing import TestPluginBase
class TestTesting(TestPluginBase):
+ package = 'qiime2.sdk.tests'
+
def setUp(self):
self.plugin = get_dummy_plugin()
@@ -24,6 +28,12 @@ class TestTesting(TestPluginBase):
def tearDown(self):
self.test_dir.cleanup()
+ def test_transformer_in_other_plugin(self):
+ _, obs = self.transform_format(SingleIntFormat, str,
+ filename='singleint.txt')
+
+ self.assertEqual('42', obs)
+
def test_examples(self):
self.execute_examples()
=====================================
qiime2/sdk/action.py
=====================================
@@ -438,13 +438,23 @@ class Action(metaclass=abc.ABCMeta):
# pipeline that calls two other pipelines within it and execute both of
# those internal pipelines simultaneously.
if isinstance(self, qiime2.sdk.action.Pipeline):
+ # If ctx._parent is None then this is the root pipeline and we want
+ # to dispatch it to a join_app
execution_ctx['parsl_type'] = 'DFK'
- # NOTE: Do not make this a python_app(join=True). We need it to run
- # in the parsl main thread
- future = join_app()(
- _run_parsl_action)(self, ctx, execution_ctx,
- mapped_args, mapped_kwargs,
- inputs=futures)
+ if ctx._parent is None:
+ # NOTE: Do not make this a python_app(join=True). We need it to
+ # run in the parsl main thread
+ future = join_app()(
+ _run_parsl_action)(self, ctx, execution_ctx,
+ mapped_args, mapped_kwargs,
+ inputs=futures)
+ # If there is a parent then this is not the root pipeline and we
+ # want to just _bind it with a parallel context. The fact that
+ # parallel is set on the context will cause ctx.get_action calls in
+ # the pipeline to use the action's _bind_parsl method.
+ else:
+ return self._bind(lambda: qiime2.sdk.Context(ctx),
+ execution_ctx=execution_ctx)(*args, **kwargs)
else:
execution_ctx['parsl_type'] = \
ctx.executor_name_type_mapping[executor]
=====================================
qiime2/sdk/result.py
=====================================
@@ -614,11 +614,14 @@ class ResultCollection:
os.makedirs(directory)
+ order_string = ''
+ for name, result in self.collection.items():
+ result_fp = os.path.join(directory, name)
+ result.save(result_fp)
+ order_string += f'{name}\n'
+
with open(os.path.join(directory, '.order'), 'w') as fh:
- for name, result in self.collection.items():
- result_fp = os.path.join(directory, name)
- result.save(result_fp)
- fh.write(f'{name}\n')
+ fh.write(order_string)
# Do this to give us a unified API with Result.save
return directory
=====================================
qiime2/sdk/tests/data/singleint.txt
=====================================
@@ -0,0 +1 @@
+42
\ No newline at end of file
=====================================
qiime2/sdk/tests/test_action.py
=====================================
@@ -30,7 +30,6 @@ from qiime2.metadata.tests.test_io import get_data_path
class TestBadInputs(TestPluginBase):
-
def make_provenance_capture(self):
# importing visualizations is not supported, but we do that here to
# simplify testing machinery
=====================================
qiime2/sdk/tests/test_pipeline.py
=====================================
@@ -16,6 +16,7 @@ import qiime2.sdk
from qiime2.core.testing.util import get_dummy_plugin
from qiime2.core.testing.type import IntSequence1, SingleInt, Mapping
from qiime2.plugin import Visualization, Int, Bool
+import qiime2.sdk.parallel_config
class TestPipeline(unittest.TestCase):
@@ -339,6 +340,21 @@ class TestPipeline(unittest.TestCase):
self.assertEqual(obs, exp)
+ def test_nested_pipeline_parallel(self):
+ ''' This test basically just validates that nested pipelines in
+ parallel don't blow anything up. It was added concurrently with
+ nested parallel pipelines being flattened.
+ '''
+ pipeline = self.plugin.pipelines['pipelines_in_pipeline']
+
+ ints = qiime2.Artifact.import_data(IntSequence1, [1, 2, 3])
+ mapping = qiime2.Artifact.import_data(Mapping, {'foo': '42'})
+
+ with qiime2.sdk.parallel_config.ParallelConfig():
+ pipeline.parallel(ints, mapping)._result()
+
+ self.assertTrue(True)
+
def test_failing_from_arity(self):
for call in self.iter_callables('failing_pipeline'):
with self.assertRaisesRegex(TypeError, 'match number.*3.*1'):
View it on GitLab: https://salsa.debian.org/med-team/qiime/-/commit/f438cdd7afe2cd7976069e1b67fe26cbb15fac3a
--
This project does not include diff previews in email notifications.
View it on GitLab: https://salsa.debian.org/med-team/qiime/-/commit/f438cdd7afe2cd7976069e1b67fe26cbb15fac3a
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240606/a2679870/attachment-0001.htm>
More information about the debian-med-commit
mailing list