[med-svn] [Git][med-team/qiime][master] 6 commits: New upstream version 2024.5.0

Andreas Tille (@tille) gitlab at salsa.debian.org
Thu Jun 6 14:44:52 BST 2024

Andreas Tille pushed to branch master at Debian Med / qiime

f438cdd7 by Andreas Tille at 2024-06-06T15:33:01+02:00
New upstream version 2024.5.0
- - - - -
3a667d26 by Andreas Tille at 2024-06-06T15:33:01+02:00
routine-update: New upstream version

- - - - -
b5646faa by Andreas Tille at 2024-06-06T15:33:03+02:00
Update upstream source from tag 'upstream/2024.5.0'

Update to upstream version '2024.5.0'
with Debian dir 1e41feab7fa8a560b9c790c5e0f4a2e7fce19bc9
- - - - -
b32f862d by Andreas Tille at 2024-06-06T15:33:04+02:00
routine-update: Standards-Version: 4.7.0

- - - - -
9c060246 by Andreas Tille at 2024-06-06T15:36:27+02:00
Refresh patches

- - - - -
ae2c4da4 by Andreas Tille at 2024-06-06T15:38:47+02:00
Replace python3-appdirs by python3-platformdirs

- - - - -

23 changed files:

- debian/changelog
- debian/control
- debian/patches/python3.10.patch
- − debian/patches/python3.9.patch
- debian/patches/series
- qiime2/_version.py
- qiime2/core/archive/provenance_lib/replay.py
- qiime2/core/archive/provenance_lib/tests/test_checksum_validator.py
- qiime2/core/archive/provenance_lib/tests/test_replay.py
- qiime2/core/cache.py
- qiime2/core/testing/plugin.py
- qiime2/core/type/parse.py
- qiime2/core/type/signature.py
- qiime2/core/type/tests/test_parse.py
- qiime2/metadata/io.py
- qiime2/metadata/metadata.py
- qiime2/plugin/testing.py
- qiime2/plugin/tests/test_tests.py
- qiime2/sdk/action.py
- qiime2/sdk/result.py
- + qiime2/sdk/tests/data/singleint.txt
- qiime2/sdk/tests/test_action.py
- qiime2/sdk/tests/test_pipeline.py


@@ -1,14 +1,16 @@
-qiime (2024.2.0-2) UNRELEASED; urgency=medium
-  Team upload.
+qiime (2024.5.0-1) UNRELEASED; urgency=medium
   [ Andreas Tille ]
+  * New upstream version
   * Fix issue with self._raw_path for Python3.12
+  * Standards-Version: 4.7.0 (routine-update)
+  * Replace python3-appdirs by python3-platformdirs
+    Closes: #1068006
   [ Athos Ribeiro ]
   * d/p/configparser.patch: support python 3.12 configparser
- -- Andreas Tille <tille at debian.org>  Fri, 23 Feb 2024 16:24:14 +0100
+ -- Andreas Tille <tille at debian.org>  Thu, 06 Jun 2024 15:33:01 +0200
 qiime (2024.2.0-1) unstable; urgency=medium

@@ -21,7 +21,7 @@ Build-Depends: debhelper-compat (= 13),
                python3-tzlocal <!nocheck>,
                python3-yaml <!nocheck>,
                python3-networkx <!nocheck>
-Standards-Version: 4.6.2
+Standards-Version: 4.7.0
 Vcs-Browser: https://salsa.debian.org/med-team/qiime
 Vcs-Git: https://salsa.debian.org/med-team/qiime.git
 Homepage: https://qiime2.org
@@ -32,7 +32,7 @@ Architecture: all
 Depends: ${shlibs:Depends},
-         python3-appdirs,
+         python3-platformdirs,

@@ -81,7 +81,7 @@ This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
              self.assertEqual(parameters, exp_parameters)
 --- a/qiime2/sdk/tests/test_pipeline.py
 +++ b/qiime2/sdk/tests/test_pipeline.py
-@@ -61,13 +61,13 @@ class TestPipeline(unittest.TestCase):
+@@ -62,13 +62,13 @@ class TestPipeline(unittest.TestCase):
          kind = inspect.Parameter.POSITIONAL_OR_KEYWORD
          exp_parameters = [
              ('int_sequence', inspect.Parameter(

debian/patches/python3.9.patch deleted
@@ -1,23 +0,0 @@
-Description: python3.9.patch
-Author: Étienne Mollier <etienne.mollier at mailoo.org>
-Bug: https://github.com/qiime2/qiime2/issues/520
-Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=950842
-Last-Update: 2020-11-30
-This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
-Index: qiime/qiime2/core/type/parse.py
---- qiime.orig/qiime2/core/type/parse.py
-+++ qiime/qiime2/core/type/parse.py
-@@ -44,7 +44,10 @@ def _expr(expr):
-         return _build_predicate(expr.func.id, args, kwargs)
-     if node is ast.Subscript:
--        field_expr = expr.slice.value
-+        try:
-+            field_expr = expr.slice.value
-+        except AttributeError:
-+            field_expr = expr.slice
-         if type(field_expr) is ast.Tuple:
-             field_expr = field_expr.elts

@@ -1,5 +1,4 @@

@@ -23,9 +23,9 @@ def get_keywords():
     # setup.py/versioneer.py will grep for the variable names, so they must
     # each be defined on a line of their own. _version.py will just call
     # get_keywords().
-    git_refnames = " (tag: 2024.2.0, Release-2024.2)"
-    git_full = "512d740cc815b10c3045014a20059bc2e85affe0"
-    git_date = "2024-02-16 21:49:30 +0000"
+    git_refnames = " (tag: 2024.5.0, Release-2024.5)"
+    git_full = "981462dc70891c067625d8a24f1c185eeb32ef0f"
+    git_date = "2024-05-29 04:20:00 +0000"
     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
     return keywords

@@ -14,9 +14,8 @@ import pkg_resources
 import shutil
 import tempfile
 from uuid import uuid4
-from collections import UserDict
 from dataclasses import dataclass, field
-from typing import Dict, Iterator, List, Optional, Set, Tuple, Union
+from typing import Dict, Iterator, List, Optional, Tuple, Union
 from .archive_parser import ProvNode
 from .parse import ProvDAG
@@ -90,152 +89,354 @@ class ActionCollections():
     no_provenance_nodes: List[str] = field(default_factory=list)
-class UsageVarsDict(UserDict):
+ at dataclass
+class UsageVariableRecord:
+    name: str
+    variable: UsageVariable = None
+ at dataclass
+class ResultCollectionRecord:
+    collection_uuid: str
+    members: Dict[str, str]
+class ReplayNamespaces:
-    Mapping of uuid -> unique variable name.
-    A dict where values are also unique. Used here as a UUID-queryable
-    "namespace" of strings that can be passed to usage drivers for rendering
-    into unique variable names. Non-unique values cause namespace collisions.
-    For consistency and simplicity, all str values are suffixed with _n when
-    added, such that n is some int. When potentially colliding values are
-    added, n is incremented as needed until collision is avoided.
-    UsageVarsDicts mutate ALL str values they receive.
-    Best practice is generally to add the UUID: variable-name pair to this,
-    create the usage variable using the stored name,
-    then store the usage variable in a separate {UUID: UsageVar}. This
-    ensures that UsageVariable.name is unique, preventing namespace collisions.
-    NamespaceCollections (below) exist to group these related structures.
-    Note: it's not necessary (and may break the mechanism of uniqueness here)
-    to maintain parity between variable names in this namespace and in the
-    usage variable store. The keys in both stores, however, must match.
+    A dataclass collection of objects that each track some useful bit of
+    information relevant to replay/usage namespaces.
+    Attributes
+    ----------
+    _usg_var_ns : dict
+        The central usage variable namespace that ensures no namespace clashes.
+        Maps artifact uuid to `UsageVariableRecord`.
+    _action_ns : set of str
+        A collection of unique action strings that look like
+        `{plugin}_{action}_{sequential int}`.
+    result_collection_ns : dict
+        Used to keep track of result collection members during usage rendering.
+        Structure is as follows:
+        {
+            action-id: {
+                output-name: {
+                    'collection_uuid': uuid,
+                    'artifacts': {
+                        uuid: key-in-collection,
+                        (...),
+                    }
+                },
+                (...),
+            }
+        }
+        where the action-id and the output-name uniquely identify a result
+        collection that came from some action, the `collection_uuid` key stores
+        a uuid for the entire collection needed for querying the
+        `usg_var_namespace`, and the `artifacts` key stores all result
+        collection members along with their keys so they can be accessed
+        properly.
-    def __setitem__(self, key: str, item: str) -> None:
-        unique_item = self._uniquify(item)
-        return super().__setitem__(key, unique_item)
+    def __init__(self, dag=None):
+        self._usg_var_ns = {}
+        self._action_ns = set()
+        if dag:
+            self.result_collection_ns = \
+                self.make_result_collection_namespace(dag)
+            self.artifact_uuid_to_rc_uuid, self.rc_contents_to_rc_uuid = \
+                self.make_result_collection_mappings()
+        else:
+            self.result_collection_ns = {}
+            self.artifact_uuid_to_rc_uuid = {}
+            self.rc_contents_to_rc_uuid = {}
-    def _uniquify(self, var_name: str) -> str:
+    def add_usg_var_record(self, uuid, name, variable=None):
-        Appends _<some int> to var_name, such that the returned name won't
-        collide with any variable-name values that already exist in the dict.
+        Given a uuid, name, and optionally a usage variable, create a usage
+        variable record and add it to the namespace.
-        var_name : str
-            The variable name to make unique.
+        uuid : str
+            The uuid of the the artifact or result collection.
+        name : str
+            The not-yet-unique name of the artifact or result collection.
+        variable : UsageVariable or None
+            The optional UsageVariable instance to add to the record.
-            The unique integer-appended variable name.
+            The now-unique name of the artifact or result collection.
-        some_int = 0
-        unique_name = f'{var_name}_{some_int}'
-        values = self.data.values()
-        # no-prov nodes are stored with angle brackets around them, but
-        # those brackets shouldn't be considered on uniqueness check
-        while unique_name in values or f'<{unique_name}>' in values:
-            some_int += 1
-            unique_name = f"{var_name}_{some_int}"
+        unique_name = self._make_unique_name(name)
+        self._usg_var_ns[uuid] = UsageVariableRecord(unique_name, variable)
         return unique_name
-    def get_key(self, value: str):
+    def update_usg_var_record(self, uuid, variable):
+        '''
+        Given a uuid update the record to contain the passed usage variable.
+        The record is assumed to already be present in the namespace.
+        Parameters
+        ----------
+        uuid : str
+            The uuid of the artifact or result collection for which to update
+            the usage variable instance.
+        variable : UsageVariable
+            The usage variable to add to the record.
+        '''
+        self._usg_var_ns[uuid].variable = variable
+    def get_usg_var_record(self, uuid):
+        '''
+        Given a uuid, return the corresponding usage variable record, or none
+        if the uuid is not in the namespace.
+        Parameters
+        ----------
+        uuid : str
+            The uuid of the artifact or result collection for which to return
+            the record.
+        Returns
+        -------
+        UsageVariableRecord or None
+            The record if the uuid was found, otherwise None.
+        '''
+        try:
+            return self._usg_var_ns[uuid]
+        except KeyError:
+            return None
+    def get_usg_var_uuid(self, name: str) -> str:
-        Given some value in the dict, returns its key.
+        Given a usage variable name, return its uuid, or raise KeyError if the
+        name is not in the namespace.
-        value : str
-            The value to query.
+        name : str
+            The name of the usage variable record of interest.
-            The key (uuid) corresponding to the value (variable name).
+            The corresponding uuid of the record.
-            If the value is not found.
+            If the name is not found in the namespace.
-        for key, val in self.items():
-            if value == val:
-                return key
-        raise KeyError(f'passed value \'{value}\' does not exist in the dict.')
+        for uuid, record in self._usg_var_ns.items():
+            if name == record.name:
+                return uuid
+        raise KeyError(
+            f'The queried name \'{name}\' does not exist in the namespace.'
+        )
-    def wrap_val_in_angle_brackets(self, key: str):
+    def _make_unique_name(self, name: str) -> str:
-        Wraps the variable name pointed to by `key` in brackets `<>`.
+        Appends `_<some int>` to name, such that the returned name won't
+        collide with any variable names that already exist in `usg_var_ns`.
-        key : str
-            The key (uuid) of the variable name to wrap in brackets.
+        name : str
+            The variable name to make unique.
-        Notes
-        -----
-        If this is accidentally run twice, it will break things.
+        Returns
+        -------
+        str
+            The unique integer-appended variable name.
-        super().__setitem__(key, f'<{self.data[key]}>')
+        counter = 0
+        unique_name = f'{name}_{counter}'
+        names = [record.name for record in self._usg_var_ns.values()]
+        # no-provenance nodes are stored with angle brackets around them
+        while unique_name in names or f'<{unique_name}>' in names:
+            counter += 1
+            unique_name = f'{name}_{counter}'
- at dataclass
-class NamespaceCollections:
-    '''
-    A dataclass collection of objects that each track some useful bit of
-    information relevant to usage namespaces.
+        return unique_name
-    Attributes
-    ----------
-    usg_var_namespace : UsageVarsDict
-        A uuid -> variable-name mapping that ensures that variable names remain
-        unique in the dictionary.
-    usg_vars : dict
-        A uuid -> UsageVariable mapping. The names of the UsageVariables here
-        do not necessarily match those in `usg_var_namespace`.
-    action_namespace : set of str
-        A collection of unique action strings that look like
-        `{plugin}_{action}_{sequential int}`.
-    result_collection_ns : dict
-        Used to keep track of result collection members during usage rendering.
-        Structure is as follows:
+    def make_result_collection_namespace(self, dag: nx.digraph) -> dict:
+        '''
+        Constructs the result collections namespaces from the parsed digraph.
-        {
-            action-id: {
-                output-name: {
-                    'collection_uuid': uuid,
-                    'artifacts': {
-                        uuid: key-in-collection,
-                        (...),
-                    }
-                },
-                (...),
+        Parameters
+        ----------
+        dag : nx.digraph
+            The digraph representing the parsed provenance.
+        Returns
+        -------
+        dict
+            The result collection namespace.
+        '''
+        rc_ns = {}
+        for node in dag:
+            provnode = dag.get_node_data(node)
+            rc_key = provnode.action.result_collection_key
+            if rc_key:
+                # output result collection
+                action_id = provnode.action.action_id
+                output_name = provnode.action.output_name
+                if action_id not in rc_ns:
+                    rc_ns[action_id] = {}
+                if output_name not in rc_ns[action_id]:
+                    artifacts = {rc_key: provnode._uuid}
+                    rc_ns[action_id][output_name] = ResultCollectionRecord(
+                        collection_uuid=str(uuid4()), members=artifacts
+                    )
+                else:
+                    rc_ns[action_id][output_name].members[rc_key] = \
+                        provnode._uuid
+        return rc_ns
+    def make_result_collection_mappings(self) -> Tuple[Dict]:
+        '''
+        Builds two mappings:
+            - one from artifact uuid to a tuple of the uuid of the result
+              collection of which it is a member and its key in the collection
+            - one from the hash of the result collection contents (both with
+              and without keys) to the uuid of the result collection
+        Returns
+        -------
+        tuple of dict
+            The two result collection mappings.
+        '''
+        a_to_c = {}  # artifact uuid -> collection uuid
+        c_to_c = {}  # hash of collection contents -> collection uuid
+        for action_id in self.result_collection_ns:
+            for output_name in self.result_collection_ns[action_id]:
+                record = self.result_collection_ns[action_id][output_name]
+                for key, uuid in record.members.items():
+                    a_to_c[uuid] = (record.collection_uuid, key)
+                hashed_contents = self.hash_result_collection(record.members)
+                hashed_contents_with_keys = \
+                    self.hash_result_collection_with_keys(record.members)
+                c_to_c[hashed_contents] = record.collection_uuid
+                c_to_c[hashed_contents_with_keys] = record.collection_uuid
+        return a_to_c, c_to_c
+    def hash_result_collection_with_keys(self, members: Dict) -> int:
+        '''
+        Hashes the contents of a result collection. Useful for finding
+        corresponding usage variables when rendering the replay of result
+        collections. Order of the input result collection is not taken into
+        account (the result collections are ordered alphabetically by key).
+        Parameters
+        ----------
+        members : dict
+            The contents of a result collection, looks like:
+            {
+                'a': some-uuid,
+                'b': some-other-uuid,
+                (...)
-        }
-        where the action-id and the output-name uniquely identify a result
-        collection that came from some action, the `collection_uuid` key stores
-        a uuid for the entire collection needed for querying the
-        `usg_var_namespace`, and the `artifacts` key stores all result
-        collection members along with their keys so they can be accessed
-        properly.
-    '''
-    usg_var_namespace: UsageVarsDict = field(default_factory=UsageVarsDict)
-    usg_vars: Dict[str, UsageVariable] = field(default_factory=dict)
-    action_namespace: Set[str] = field(default_factory=set)
-    result_collection_ns: Dict = field(default_factory=dict)
-    rc_contents_to_rc_uuid: Dict = field(default_factory=dict)
-    artifact_uuid_to_rc_uuid: Dict = field(default_factory=dict)
+        Returns
+        -------
+        int
+            The hashed contents.
+        '''
+        sorted_members = {key: members[key] for key in sorted(members)}
+        hashable_members_with_keys = tuple(
+            (key, value) for key, value in sorted_members.items()
+        )
+        return hash(hashable_members_with_keys)
- at dataclass
-class ResultCollectionRecord:
-    collection_uuid: str
-    members: Dict[str, str]
+    def hash_result_collection(self, members: Union[Dict, List]) -> int:
+        '''
+        Hashes a list of uuids. Useful for finding corresponding result
+        collections that may have been cast to list of uuids. If a dict is
+        input it is first converted to a list of values (uuids).
+        Parameters
+        ----------
+        members : dict or list
+            The contents of a result collection, either as a dict or list.
+        Returns
+        -------
+        int
+            The hashed contents.
+        '''
+        if type(members) is dict:
+            members = list(members.values())
+        sorted_members = list(sorted(members))
+        hashable_members = tuple(uuid for uuid in sorted_members)
+        return hash(hashable_members)
+    def add_rc_member_to_ns(self, uuid, name, use):
+        '''
+        Accesses a result collection member of interest and adds it the
+        central usage variable namespace.
+        Parameters
+        ----------
+        uuid : str
+            The uuid of the artifact of interest.
+        name : str
+            The desired name of the to-be-made usage variable.
+        use : Usage
+            The currently executing usage driver.
+        '''
+        collection_uuid, key = self.artifact_uuid_to_rc_uuid[uuid]
+        collection_var = self.get_usg_var_record(collection_uuid).variable
+        var_name = self.add_usg_var_record(uuid, name)
+        usg_var = use.get_artifact_collection_member(
+            var_name, collection_var, key
+        )
+        self.update_usg_var_record(uuid, usg_var)
+    def uniquify_action_name(self, plugin: str, action: str) -> str:
+        '''
+        Creates a unique name by concatenating plugin, action, and a counter,
+        and adds this name to _action_ns before returning it.
+        Parameters
+        ----------
+        plugin : str
+            The name of the plugin.
+        action : str
+            The name of the action.
+        action_namespace : set of str
+            The collection of unqiue action names.
+        Returns
+        -------
+        str
+            The unique action name.
+        '''
+        counter = 0
+        plg_action_name = f'{plugin}_{action}_{counter}'
+        while plg_action_name in self._action_ns:
+            counter += 1
+            plg_action_name = f'{plugin}_{action}_{counter}'
+        self._action_ns.add(plg_action_name)
+        return plg_action_name
 def replay_provenance(
@@ -326,14 +527,7 @@ def replay_provenance(
         verbose=verbose, md_out_dir=md_out_dir
-    result_collection_ns = make_result_collection_namespace(dag)
-    artifact_uuid_to_rc_uuid, rc_contents_to_rc_uuid = \
-        make_result_collection_mappings(result_collection_ns)
-    ns = NamespaceCollections(
-        result_collection_ns=result_collection_ns,
-        artifact_uuid_to_rc_uuid=artifact_uuid_to_rc_uuid,
-        rc_contents_to_rc_uuid=rc_contents_to_rc_uuid
-    )
+    ns = ReplayNamespaces(dag)
     build_usage_examples(dag, cfg, ns)
     if not suppress_header:
@@ -348,122 +542,50 @@ def replay_provenance(
-def make_result_collection_namespace(dag: nx.digraph) -> dict:
-    '''
-    Constructs the result collections namespaces from the parsed digraph.
-    Parameters
-    ----------
-    dag : nx.digraph
-        The digraph representing the parsed provenance.
-    Returns
-    -------
-    dict
-        A fleshed-out dict to be attached to
-        `NamsepaceCollections.result_collection_ns`.
-    '''
-    rc_ns = {}
-    for node in dag:
-        provnode = dag.get_node_data(node)
-        rc_key = provnode.action.result_collection_key
-        if rc_key:
-            # output result collection
-            action_id = provnode.action.action_id
-            output_name = provnode.action.output_name
-            if action_id not in rc_ns:
-                rc_ns[action_id] = {}
-            if output_name not in rc_ns[action_id]:
-                artifacts = {rc_key: provnode._uuid}
-                rc_ns[action_id][output_name] = ResultCollectionRecord(
-                    collection_uuid=str(uuid4()), members=artifacts
-                )
-            else:
-                rc_ns[action_id][output_name].members[rc_key] = provnode._uuid
-    # TODO: maybe handle input result collections here
-    return rc_ns
-def make_result_collection_mappings(result_collection_ns: Dict) -> Tuple[Dict]:
-    '''
-    '''
-    a_to_c = {}  # artifact uuid -> collection uuid
-    c_to_c = {}  # hash of collection contents -> collection uuids
-    for action_id in result_collection_ns:
-        for output_name in result_collection_ns[action_id]:
-            record = result_collection_ns[action_id][output_name]
-            for key, uuid in record.members.items():
-                a_to_c[uuid] = (record.collection_uuid, key)
-            hashed_contents = hash_result_collection(record.members)
-            hashed_contents_with_keys = \
-                hash_result_collection_with_keys(record.members)
-            c_to_c[hashed_contents] = record.collection_uuid
-            c_to_c[hashed_contents_with_keys] = record.collection_uuid
-    return a_to_c, c_to_c
-def hash_result_collection_with_keys(members: Dict) -> int:
+def build_usage_examples(
+    dag: ProvDAG, cfg: ReplayConfig, ns: ReplayNamespaces
-    Hashes the contents of a result collection. Useful for finding
-    corresponding usage variables when rendering the replay of result
-    collections. Order of the input result collection is not taken into
-    account (the result collections are ordered alphabetically by key).
+    Builds a chained usage example representing the analysis `dag`.
-    members : dict
-        The contents of a result collection, looks like:
-        {
-            'a': some-uuid,
-            'b': some-other-uuid,
-            (...)
-        }
-    Returns
-    -------
-    int
-        The hashed contents.
-    '''
-    sorted_members = {key: members[key] for key in sorted(members)}
-    hashable_members_with_keys = tuple(
-        (key, value) for key, value in sorted_members.items()
-    )
-    return hash(hashable_members_with_keys)
-def hash_result_collection(members: Union[Dict, List]) -> int:
+    dag : ProvDAG
+       The dag representation of parsed provenance.
+    cfg : ReplayConfig
+        Replay configuration options.
+    ns : ReplayNamespaces
+        Info tracking usage and result collection namespaces.
-    Hashes a list of uuids. Useful for finding corresponding result collections
-    that may have been cast to list of uuids. If a dict is input it is first
-    converted to a list of values (uuids).
-    Parameters
-    ----------
-    members : dict or list
-        The contents of a result collection, either as a dict or list.
+    sorted_nodes = nx.topological_sort(dag.collapsed_view)
+    actions = group_by_action(dag, sorted_nodes, ns)
-    Returns
-    -------
-    int
-        The hashed contents.
-    '''
-    if type(members) is dict:
-        members = list(members.values())
+    for node_id in actions.no_provenance_nodes:
+        node = dag.get_node_data(node_id)
+        build_no_provenance_node_usage(node, node_id, ns, cfg)
-    sorted_members = list(sorted(members))
-    hashable_members = tuple(uuid for uuid in sorted_members)
+    for action_id in (std_actions := actions.std_actions):
+        # we are replaying actions not nodes, so any associated node works
+        try:
+            some_node_id = next(iter(std_actions[action_id]))
+            node = dag.get_node_data(some_node_id)
+        except KeyError:
+            # we have result collection
+            some_output_name = next(iter(ns.result_collection_ns[action_id]))
+            some_node_id = next(iter(
+                ns.result_collection_ns[action_id][
+                    some_output_name].members.values()
+            ))
+            node = dag.get_node_data(some_node_id)
-    return hash(hashable_members)
+        if node.action.action_type == 'import':
+            build_import_usage(node, ns, cfg)
+        else:
+            build_action_usage(node, ns, std_actions, action_id, cfg)
 def group_by_action(
-    dag: ProvDAG, nodes: Iterator[str], ns: NamespaceCollections
+    dag: ProvDAG, nodes: Iterator[str], ns: ReplayNamespaces
 ) -> ActionCollections:
     This groups the nodes from a DAG by action, returning an ActionCollections
@@ -482,7 +604,7 @@ def group_by_action(
         The dag representation of parsed provenance.
     nodes : iterator of str
         An iterator over node uuids.
-    ns : NamespaceCollections
+    ns : ReplayNamespaces
         Info tracking usage and result collection namespaces.
@@ -514,52 +636,10 @@ def group_by_action(
     return actions
-def build_usage_examples(
-    dag: ProvDAG, cfg: ReplayConfig, ns: NamespaceCollections
-    '''
-    Builds a chained usage example representing the analysis `dag`.
-    Parameters
-    ----------
-    dag : ProvDAG
-       The dag representation of parsed provenance.
-    cfg : ReplayConfig
-        Replay configuration options.
-    ns : NamespaceCollections
-        Info tracking usage and result collection namespaces.
-    '''
-    sorted_nodes = nx.topological_sort(dag.collapsed_view)
-    actions = group_by_action(dag, sorted_nodes, ns)
-    for node_id in actions.no_provenance_nodes:
-        node = dag.get_node_data(node_id)
-        build_no_provenance_node_usage(node, node_id, ns, cfg)
-    for action_id in (std_actions := actions.std_actions):
-        # we are replaying actions not nodes, so any associated node works
-        try:
-            some_node_id = next(iter(std_actions[action_id]))
-            node = dag.get_node_data(some_node_id)
-        except KeyError:
-            # we have result collection
-            some_output_name = next(iter(ns.result_collection_ns[action_id]))
-            some_node_id = next(iter(
-                ns.result_collection_ns[action_id][
-                    some_output_name].members.values()
-            ))
-            node = dag.get_node_data(some_node_id)
-        if node.action.action_type == 'import':
-            build_import_usage(node, ns, cfg)
-        else:
-            build_action_usage(node, ns, std_actions, action_id, cfg)
 def build_no_provenance_node_usage(
     node: Optional[ProvNode],
     uuid: str,
-    ns: NamespaceCollections,
+    ns: ReplayNamespaces,
     cfg: ReplayConfig
@@ -574,7 +654,7 @@ def build_no_provenance_node_usage(
         is available.
     uuid : str
         The uuid of the node/result.
-    ns : NamespaceCollections
+    ns : ReplayNamespaces
         Info tracking usage and result collection namespaces.
     cfg : ReplayConfig
         Replay configuration options. Contains the modified usage driver.
@@ -595,21 +675,22 @@ def build_no_provenance_node_usage(
         var_name = 'no-provenance-node'
         var_name = camel_to_snake(node.type)
-    ns.usg_var_namespace.update({uuid: var_name})
-    ns.usg_var_namespace.wrap_val_in_angle_brackets(uuid)
+    ns.add_usg_var_record(uuid, var_name)
     # make a usage variable for downstream consumption
     empty_var = cfg.use.usage_variable(
-        ns.usg_var_namespace[uuid], lambda: None, 'artifact'
+        ns.get_usg_var_record(uuid).name, lambda: None, 'artifact'
-    ns.usg_vars.update({uuid: empty_var})
+    ns.update_usg_var_record(uuid, empty_var)
     # log the no-prov node
-    cfg.use.comment(f"{uuid}   {ns.usg_vars[uuid].to_interface_name()}")
+    usg_var = ns.get_usg_var_record(uuid).variable
+    cfg.use.comment(f"{uuid}   {usg_var.to_interface_name()}")
 def build_import_usage(
-        node: ProvNode, ns: NamespaceCollections, cfg: ReplayConfig
+    node: ProvNode, ns: ReplayNamespaces, cfg: ReplayConfig
     Given a ProvNode, adds an import usage example for it, roughly
@@ -629,27 +710,29 @@ def build_import_usage(
     node : ProvNode
         The imported node of interest.
-    ns : NamespaceCollections
+    ns : ReplayNamespaces
         Info tracking usage and result collection namespaces.
     cfg : ReplayConfig
         Replay configuration options. Contains the modified usage driver.
     format_id = node._uuid + '_f'
-    ns.usg_var_namespace.update({format_id: camel_to_snake(node.type) + '_f'})
+    ns.add_usg_var_record(format_id, camel_to_snake(node.type) + '_f')
     format_for_import = cfg.use.init_format(
-        ns.usg_var_namespace[format_id], lambda: None
+        ns.get_usg_var_record(format_id).name, lambda: None
-    ns.usg_var_namespace.update({node._uuid: camel_to_snake(node.type)})
+    var_name = ns.add_usg_var_record(node._uuid, camel_to_snake(node.type))
     use_var = cfg.use.import_from_format(
-        ns.usg_var_namespace[node._uuid], node.type, format_for_import
+        var_name, node.type, format_for_import
-    ns.usg_vars.update({node._uuid: use_var})
+    ns.update_usg_var_record(node._uuid, use_var)
 def build_action_usage(
     node: ProvNode,
-    ns: NamespaceCollections,
+    ns: ReplayNamespaces,
     std_actions: Dict[str, Dict[str, str]],
     action_id: str,
     cfg: ReplayConfig
@@ -669,7 +752,7 @@ def build_action_usage(
     node : ProvNode
         The node the creating action of which is of interest.
-    ns : NamespaceCollections
+    ns : ReplayNamespaces
         Info tracking usage and result collection namespaces.
     std_actions : dict
         Expalained in ActionCollections.
@@ -681,7 +764,7 @@ def build_action_usage(
     command_specific_md_context_has_been_printed = False
     plugin = node.action.plugin
     action = node.action.action_name
-    plg_action_name = uniquify_action_name(plugin, action, ns.action_namespace)
+    plg_action_name = ns.uniquify_action_name(plugin, action)
     inputs = _collect_action_inputs(cfg.use, ns, node)
@@ -697,11 +780,11 @@ def build_action_usage(
         if isinstance(param_val, MetadataInfo):
-            unique_md_id = ns.usg_var_namespace[node._uuid] + '_' + param_name
-            ns.usg_var_namespace.update(
-                {unique_md_id: camel_to_snake(param_name)}
+            unique_md_id = ns.get_usg_var_record(node._uuid).name \
+                           + '_' + param_name
+            md_fn = ns.add_usg_var_record(
+                unique_md_id, camel_to_snake(param_name)
-            md_fn = ns.usg_var_namespace[unique_md_id]
             if cfg.dump_recorded_metadata:
                 md_with_ext = md_fn + '.tsv'
@@ -712,12 +795,7 @@ def build_action_usage(
                 # the local dir and fp where md will be saved (if at all) is:
                 md_fn = f'{plg_action_name}/{md_fn}'
                 md = init_md_from_recorded_md(
-                    node,
-                    param_name,
-                    unique_md_id,
-                    ns.usg_var_namespace,
-                    cfg,
-                    md_fn
+                    node, param_name, unique_md_id, ns, cfg, md_fn
                 if not cfg.md_context_has_been_printed:
@@ -744,11 +822,7 @@ def build_action_usage(
                 if not param_val.input_artifact_uuids:
                     md = init_md_from_md_file(
-                        node,
-                        param_name,
-                        unique_md_id,
-                        ns.usg_var_namespace,
-                        cfg
+                        node, param_name, unique_md_id, ns, cfg
                     md = init_md_from_artifacts(param_val, ns, cfg)
@@ -763,14 +837,14 @@ def build_action_usage(
-    # write the usage vars into the UsageVars dict so we can use em downstream
+    # add the usage variable(s) to the namespace
     for res in usg_var:
-        uuid_key = ns.usg_var_namespace.get_key(value=res.name)
-        ns.usg_vars[uuid_key] = res
+        uuid_key = ns.get_usg_var_uuid(res.name)
+        ns.update_usg_var_record(uuid_key, res)
 def _collect_action_inputs(
-    use: Usage, ns: NamespaceCollections, node: ProvNode
+    use: Usage, ns: ReplayNamespaces, node: ProvNode
 ) -> dict:
     Returns a dict containing the action Inputs for a ProvNode.
@@ -780,7 +854,7 @@ def _collect_action_inputs(
     use : Usage
         The currently executing usage driver.
-    ns : NamespaceCollections
+    ns : ReplayNamespaces
         Info tracking usage and result collection namespaces.
     node : ProvNode
         The node the creating action of which's inputs are of interest.
@@ -796,56 +870,67 @@ def _collect_action_inputs(
         # this as it was not provided
         if input_value is None:
         # Received a single artifact
         if type(input_value) is str:
-            if input_value not in ns.usg_vars:
-                ns.usg_vars[input_value] = _get_rc_member(
-                    use, ns, input_value, input_name)
+            if ns.get_usg_var_record(input_value) is None:
+                ns.add_rc_member_to_ns(input_value, input_name, use)
+            resolved_input = ns.get_usg_var_record(input_value).variable
-            resolved_input = ns.usg_vars[input_value]
         # Received a list of artifacts
         elif type(input_value) is list:
             # may be rc cast to list so search for equivalent rc
             # if not then follow algorithm for single str for each
-            input_hash = hash_result_collection(input_value)
+            input_hash = ns.hash_result_collection(input_value)
             if collection_uuid := ns.rc_contents_to_rc_uuid.get(input_hash):
                 # corresponding rc found
-                resolved_input = ns.usg_vars[collection_uuid]
+                resolved_input = ns.get_usg_var_record(
+                    collection_uuid
+                ).variable
                 # find each artifact and assemble into a list
                 input_list = []
                 for input_value in input_value:
-                    if input_value not in ns.usg_vars:
-                        ns.usg_vars[input_value] = _get_rc_member(
-                            use, ns, input_value, input_name)
+                    if ns.get_usg_var_record(input_value) is None:
+                        ns.add_rc_member_to_ns(input_value, input_name, use)
+                    input_list.append(
+                        ns.get_usg_var_record(input_value).variable
+                    )
-                    input_list.append(ns.usg_vars[input_value])
                 resolved_input = input_list
         # Received a dict of artifacts (ResultCollection)
         elif type(input_value) is dict:
-            # rc -- search for equivalent rc if not found then create new rc
+            # search for equivalent rc if not found then create new rc
             rc = input_value
-            input_hash = hash_result_collection_with_keys(rc)
+            input_hash = ns.hash_result_collection_with_keys(rc)
             if collection_uuid := ns.rc_contents_to_rc_uuid.get(input_hash):
                 # corresponding rc found
-                resolved_input = ns.usg_vars[collection_uuid]
+                resolved_input = ns.get_usg_var_record(
+                    collection_uuid
+                ).variable
                 # build new rc
                 new_rc = {}
                 for key, input_value in rc.items():
-                    if input_value not in ns.usg_vars:
-                        ns.usg_vars[input_value] = _get_rc_member(
-                            use, ns, input_value, input_name)
+                    if ns.get_usg_var_record(input_value) is None:
+                        ns.add_rc_member_to_ns(input_value, input_name, use)
-                    new_rc[key] = ns.usg_vars[input_value]
+                    new_rc[key] = ns.get_usg_var_record(input_value).variable
                 # make new rc usg var
                 new_collection_uuid = uuid4()
-                ns.usg_var_namespace[new_collection_uuid] = input_name
-                var_name = ns.usg_var_namespace[new_collection_uuid]
+                var_name = ns.add_usg_var_record(
+                    new_collection_uuid, input_name
+                )
                 usg_var = use.construct_artifact_collection(var_name, new_rc)
-                ns.usg_vars[new_collection_uuid] = usg_var
-                resolved_input = ns.usg_vars[new_collection_uuid]
+                ns.update_usg_var_record(new_collection_uuid, usg_var)
+                resolved_input = ns.get_usg_var_record(
+                    new_collection_uuid
+                ).variable
         # If we ever mess with inputs again and add a new type here this should
         # trip otherwise we should never see it
@@ -859,24 +944,8 @@ def _collect_action_inputs(
     return inputs_dict
-def _get_rc_member(use, ns, uuid, input_name):
-    '''
-    '''
-    # find in rc and render destructure
-    collection_uuid, key = ns.artifact_uuid_to_rc_uuid[uuid]
-    collection_name = ns.usg_vars[collection_uuid]
-    ns.usg_var_namespace[uuid] = input_name
-    var_name = ns.usg_var_namespace[uuid]
-    usg_var = use.get_artifact_collection_member(
-        var_name, collection_name, key
-    )
-    return usg_var
 def _uniquify_output_names(
-    ns: NamespaceCollections, raw_outputs: dict
+    ns: ReplayNamespaces, raw_outputs: dict
 ) -> dict:
     Returns a dict containing the uniquified output names from a ProvNode.
@@ -884,7 +953,7 @@ def _uniquify_output_names(
-    ns : NamespaceCollections
+    ns : ReplayNamespaces
         Info tracking usage and result collection namespaces.
     raw_outputs : dict
         Mapping of node uuid to output-name as seen in action.yaml.
@@ -896,9 +965,9 @@ def _uniquify_output_names(
     outputs = {}
     for uuid, output_name in raw_outputs:
-        ns.usg_var_namespace.update({uuid: output_name})
-        uniquified_output_name = ns.usg_var_namespace[uuid]
-        outputs.update({output_name: uniquified_output_name})
+        var_name = ns.add_usg_var_record(uuid, output_name)
+        outputs.update({output_name: var_name})
     return outputs
@@ -906,7 +975,7 @@ def init_md_from_recorded_md(
     node: ProvNode,
     param_name: str,
     md_id: str,
-    ns: UsageVarsDict,
+    ns: ReplayNamespaces,
     cfg: ReplayConfig,
     md_fn: str
 ) -> UsageVariable:
@@ -922,8 +991,8 @@ def init_md_from_recorded_md(
         The name of the parameter to which metadata was passed.
     md_id : str
         Looks like: f'{node uuid}_{param name}'.
-    ns : UsageVarsDict
-        Mapping of uuid -> unique variable name.
+    ns : ReplayNamespaces
+        Namespaces associated with provenance replay.
     cfg : ReplayConfig
         Replay configuration options. Contains the executing usage driver.
     md_fn : str
@@ -956,14 +1025,18 @@ def init_md_from_recorded_md(
         fn = str(cwd / 'recorded_metadata' / md_fn)
-    md = cfg.use.init_metadata(ns[md_id], factory, dumped_md_fn=fn)
+    md = cfg.use.init_metadata(
+        ns.get_usg_var_record(md_id).name, factory, dumped_md_fn=fn
+    )
     plugin = node.action.plugin
     action = node.action.action_name
     if param_is_metadata_column(cfg, param_name, plugin, action):
         mdc_id = node._uuid + '_mdc'
-        mdc_name = ns[md_id] + '_mdc'
-        ns.update({mdc_id: mdc_name})
-        md = cfg.use.get_metadata_column(ns[mdc_id], '<column name>', md)
+        mdc_name = ns.get_usg_var_record(md_id).name + '_mdc'
+        var_name = ns.add_usg_var_record(mdc_id, mdc_name)
+        md = cfg.use.get_metadata_column(var_name, '<column name>', md)
     return md
@@ -971,7 +1044,7 @@ def init_md_from_md_file(
     node: ProvNode,
     param_name: str,
     md_id: str,
-    ns: UsageVarsDict,
+    ns: ReplayNamespaces,
     cfg: ReplayConfig
 ) -> UsageVariable:
@@ -986,8 +1059,8 @@ def init_md_from_md_file(
         The parameter name to which the metadata file was passed.
     md_id : str
         Looks like: f'{node uuid}_{param name}'.
-    ns : UsageVarsDict
-        Mapping of uuid -> unique variable name.
+    ns : ReplayNamespaces
+        Namespaces associated with provenance replay.
     cfg : ReplayConfig
         Replay configuration options. Contains the executing usage driver.
@@ -998,17 +1071,19 @@ def init_md_from_md_file(
     plugin = node.action.plugin
     action = node.action.action_name
-    md = cfg.use.init_metadata(ns[md_id], lambda: None)
+    md = cfg.use.init_metadata(ns.get_usg_var_record(md_id).name, lambda: None)
     if param_is_metadata_column(cfg, param_name, plugin, action):
         mdc_id = node._uuid + '_mdc'
-        mdc_name = ns[md_id] + '_mdc'
-        ns.update({mdc_id: mdc_name})
-        md = cfg.use.get_metadata_column(ns[mdc_id], '<column name>', md)
+        mdc_name = ns.get_usg_var_record(md_id).name + '_mdc'
+        var_name = ns.add_usg_var_record(mdc_id, mdc_name)
+        md = cfg.use.get_metadata_column(var_name, '<column name>', md)
     return md
 def init_md_from_artifacts(
-        md_inf: MetadataInfo, ns: NamespaceCollections, cfg: ReplayConfig
+        md_inf: MetadataInfo, ns: ReplayNamespaces, cfg: ReplayConfig
 ) -> UsageVariable:
     Initializes and returns a Metadata UsageVariable with no real data,
@@ -1023,7 +1098,7 @@ def init_md_from_artifacts(
         Named tuple with fields `input_artifact_uuids` which is a list of
         uuids and `relative_fp` which is the filename of the metadata file.
         These are parsed from a !metadata tag in action.yaml.
-    ns : NamespaceCollections
+    ns : ReplayNamespaces
         Info tracking usage and result collection namespaces.
     cfg: ReplayConfig
         Replay configuration options. Contains the executing usage driver.
@@ -1047,26 +1122,28 @@ def init_md_from_artifacts(
     md_files_in = []
     for artifact_uuid in md_inf.input_artifact_uuids:
         amd_id = artifact_uuid + '_a'
-        var_name = ns.usg_vars[artifact_uuid].name + '_a'
-        if amd_id not in ns.usg_var_namespace:
-            ns.usg_var_namespace.update({amd_id: var_name})
+        var_name = ns.get_usg_var_record(artifact_uuid).variable.name + '_a'
+        if ns.get_usg_var_record(amd_id) is None:
+            var_name = ns.add_usg_var_record(amd_id, var_name)
             art_as_md = cfg.use.view_as_metadata(
-                ns.usg_var_namespace[amd_id], ns.usg_vars[artifact_uuid]
+                var_name, ns.get_usg_var_record(artifact_uuid).variable
-            ns.usg_vars.update({amd_id: art_as_md})
+            ns.update_usg_var_record(amd_id, art_as_md)
-            art_as_md = ns.usg_vars[amd_id]
+            art_as_md = ns.get_usg_var_record(amd_id).variable
     if len(md_inf.input_artifact_uuids) > 1:
         # we can't uniquify this normally, because one uuid can be merged with
         # combinations of others
         merge_id = '-'.join(md_inf.input_artifact_uuids)
-        ns.usg_var_namespace.update({merge_id: 'merged_artifacts'})
+        var_name = ns.add_usg_var_record(merge_id, 'merged_artifacts')
         merged_md = cfg.use.merge_metadata(
-            ns.usg_var_namespace[merge_id], *md_files_in
+            var_name, *md_files_in
-        ns.usg_vars.update({merge_id: merged_md})
+        ns.update_usg_var_record(merge_id, merged_md)
     return art_as_md
@@ -1171,36 +1248,6 @@ def param_is_metadata_column(
     return 'MetadataColumn' in str(param_spec.qiime_type)
-def uniquify_action_name(
-    plugin: str, action: str, action_namespace: Set[str]
-) -> str:
-    '''
-    Creates a unique name by concatenating plugin, action, and a counter,
-    and adds this name to action_ns before returning it.
-    Parameters
-    ----------
-    plugin : str
-        The name of the plugin.
-    action : str
-        The name of the action.
-    action_namespace : set of str
-        The collection of unqiue action names.
-    Returns
-    -------
-    str
-        The unique action name.
-    '''
-    counter = 0
-    plg_action_name = f'{plugin}_{action}_{counter}'
-    while plg_action_name in action_namespace:
-        counter += 1
-        plg_action_name = f'{plugin}_{action}_{counter}'
-    action_namespace.add(plg_action_name)
-    return plg_action_name
 def collect_citations(
     dag: ProvDAG, deduplicate: bool = True
 ) -> bp.bibdatabase.BibDatabase:

@@ -62,9 +62,7 @@ class ValidateChecksumTests(unittest.TestCase):
             uuid = os.listdir(tempdir)[0]
             root_dir = os.path.join(tempdir, uuid)
-            print(os.listdir(root_dir))
             os.remove(os.path.join(root_dir, 'metadata.yaml'))
-            print(os.listdir(root_dir))
             with open(os.path.join(root_dir, 'tamper.txt'), 'w') as fh:
             citations_path = \

@@ -22,45 +22,22 @@ from qiime2.plugins import ArtifactAPIUsageVariable
 from ..parse import ProvDAG
 from ..replay import (
-    ActionCollections, BibContent, NamespaceCollections, ReplayConfig,
-    UsageVarsDict,
+    ActionCollections, BibContent, ReplayConfig, ReplayNamespaces,
+    UsageVariableRecord,
     build_no_provenance_node_usage, build_import_usage, build_action_usage,
     build_usage_examples, collect_citations, dedupe_citations,
     dump_recorded_md_file, group_by_action, init_md_from_artifacts,
     init_md_from_md_file, init_md_from_recorded_md, replay_provenance,
-    uniquify_action_name, replay_citations
-from .testing_utilities import (
-    CustomAssertions, DummyArtifacts
+    replay_citations
+from .testing_utilities import CustomAssertions, DummyArtifacts
 from ..usage_drivers import ReplayPythonUsage
 from ...provenance import MetadataInfo
 from qiime2.sdk.util import camel_to_snake
-class UsageVarsDictTests(unittest.TestCase):
-    def test_uniquify(self):
-        collision_val = 'emp_single_end_sequences'
-        unique_val = 'some_prime'
-        ns = UsageVarsDict({'123': collision_val})
-        self.assertEqual(ns.data, {'123': 'emp_single_end_sequences_0'})
-        ns.update({'456': collision_val, 'unique': unique_val})
-        self.assertEqual(ns['456'], 'emp_single_end_sequences_1')
-        self.assertEqual(ns['unique'], 'some_prime_0')
-        ns['789'] = collision_val
-        self.assertEqual(ns.pop('789'), 'emp_single_end_sequences_2')
-    def test_get_key(self):
-        ns = UsageVarsDict({'123': 'some_name'})
-        self.assertEqual('123', ns.get_key('some_name_0'))
-        with self.assertRaisesRegex(
-            KeyError, "passed value 'fake_key' does not exist"
-        ):
-            ns.get_key('fake_key')
-class NamespaceCollectionTests(unittest.TestCase):
+class ReplayNamespacesTests(unittest.TestCase):
     def setUpClass(cls):
         cls.das = DummyArtifacts()
@@ -70,33 +47,47 @@ class NamespaceCollectionTests(unittest.TestCase):
     def tearDownClass(cls):
+    def test_make_unique_name(self):
+        ns = ReplayNamespaces()
+        self.assertEqual('name_0', ns._make_unique_name('name'))
+        ns.add_usg_var_record('uuid1', 'name')
+        self.assertEqual('name_1', ns._make_unique_name('name'))
+    def test_get_usg_var_uuid(self):
+        ns = ReplayNamespaces()
+        ns.add_usg_var_record('uuid1', 'name')
+        self.assertEqual('uuid1', ns.get_usg_var_uuid('name_0'))
     def test_add_usage_var_workflow(self):
         Smoke tests a common workflow with this data structure
-        - Create a unique variable name by adding to .usg_var_namespace
+        - Create a unique variable name by adding to the namespace
         - Create a UsageVariable with that name
         - use the name to get the UUID (when we have Results, we have no UUIDs)
-        - add the correctly-named UsageVariable to .usg_vars
+        - add the correctly-named UsageVariable to the namespace
         use = Usage()
         uuid = self.das.concated_ints.uuid
         base_name = 'concated_ints'
         exp_name = base_name + '_0'
-        ns = NamespaceCollections()
-        ns.usg_var_namespace.update({uuid: base_name})
-        self.assertEqual(ns.usg_var_namespace[uuid], exp_name)
+        ns = ReplayNamespaces()
+        ns.add_usg_var_record(uuid, base_name)
+        self.assertEqual(ns.get_usg_var_record(uuid).name, exp_name)
         def factory():  # pragma: no cover
             return Artifact.load(self.das.concated_ints.filepath)
-        u_var = use.init_artifact(ns.usg_var_namespace[uuid], factory)
+        u_var = use.init_artifact(ns.get_usg_var_record(uuid).name, factory)
         self.assertEqual(u_var.name, exp_name)
-        actual_uuid = ns.usg_var_namespace.get_key(u_var.name)
+        actual_uuid = ns.get_usg_var_uuid(u_var.name)
         self.assertEqual(actual_uuid, uuid)
-        ns.usg_vars[uuid] = u_var
-        self.assertIsInstance(ns.usg_vars[uuid], UsageVariable)
-        self.assertEqual(ns.usg_vars[uuid].name, exp_name)
+        ns.update_usg_var_record(uuid, u_var)
+        self.assertIsInstance(
+            ns.get_usg_var_record(uuid).variable, UsageVariable
+        )
+        self.assertEqual(ns.get_usg_var_record(uuid).name, exp_name)
 class ReplayProvenanceTests(unittest.TestCase):
@@ -398,10 +389,13 @@ class BuildUsageExamplesTests(unittest.TestCase):
     def test_build_usage_examples(self, n_p_builder, imp_builder, act_builder):
-        ns = NamespaceCollections()
+        ns = ReplayNamespaces()
         dag = self.das.concated_ints_with_md.dag
-        cfg = ReplayConfig(use=ReplayPythonUsage(),
-                           use_recorded_metadata=False, pm=self.pm)
+        cfg = ReplayConfig(
+            use=ReplayPythonUsage(),
+            use_recorded_metadata=False,
+            pm=self.pm
+        )
         build_usage_examples(dag, cfg, ns)
@@ -415,15 +409,18 @@ class BuildUsageExamplesTests(unittest.TestCase):
     def test_build_usage_examples_lone_v0(
             self, n_p_builder, imp_builder, act_builder
-        ns = NamespaceCollections()
+        ns = ReplayNamespaces()
         uuid = self.das.table_v0.uuid
         with self.assertWarnsRegex(
                 UserWarning, f'(:?)Art.*{uuid}.*prior.*incomplete'
             dag = ProvDAG(self.das.table_v0.filepath)
-        cfg = ReplayConfig(use=ReplayPythonUsage(),
-                           use_recorded_metadata=False, pm=self.pm)
+        cfg = ReplayConfig(
+            use=ReplayPythonUsage(),
+            use_recorded_metadata=False,
+            pm=self.pm
+        )
         build_usage_examples(dag, cfg, ns)
         # This is a single v0 archive, so should have only one np node
@@ -443,15 +440,18 @@ class BuildUsageExamplesTests(unittest.TestCase):
         shutil.copy(self.das.table_v0.filepath, mixed_dir)
         shutil.copy(self.das.concated_ints_v6.filepath, mixed_dir)
-        ns = NamespaceCollections()
+        ns = ReplayNamespaces()
         v0_uuid = self.das.table_v0.uuid
         with self.assertWarnsRegex(
                 UserWarning, f'(:?)Art.*{v0_uuid}.*prior.*incomplete'
             dag = ProvDAG(mixed_dir)
-        cfg = ReplayConfig(use=ReplayPythonUsage(),
-                           use_recorded_metadata=False, pm=self.pm)
+        cfg = ReplayConfig(
+            use=ReplayPythonUsage(),
+            use_recorded_metadata=False,
+            pm=self.pm
+        )
         build_usage_examples(dag, cfg, ns)
@@ -471,10 +471,13 @@ class BuildUsageExamplesTests(unittest.TestCase):
         shutil.copy(self.das.splitted_ints.filepath, many_dir)
         shutil.copy(self.das.pipeline_viz.filepath, many_dir)
-        ns = NamespaceCollections()
+        ns = ReplayNamespaces()
         dag = ProvDAG(many_dir)
-        cfg = ReplayConfig(use=ReplayPythonUsage(),
-                           use_recorded_metadata=False, pm=self.pm)
+        cfg = ReplayConfig(
+            use=ReplayPythonUsage(),
+            use_recorded_metadata=False,
+            pm=self.pm
+        )
         build_usage_examples(dag, cfg, ns)
@@ -496,16 +499,19 @@ class MiscHelperFnTests(unittest.TestCase):
     def test_uniquify_action_name(self):
-        ns = set()
+        ns = ReplayNamespaces()
         p1 = 'dummy_plugin'
         a1 = 'action_jackson'
         p2 = 'dummy_plugin'
         a2 = 'missing_in_action'
-        unique1 = uniquify_action_name(p1, a1, ns)
+        unique1 = ns.uniquify_action_name(p1, a1)
         self.assertEqual(unique1, 'dummy_plugin_action_jackson_0')
-        unique2 = uniquify_action_name(p2, a2, ns)
+        unique2 = ns.uniquify_action_name(p2, a2)
         self.assertEqual(unique2, 'dummy_plugin_missing_in_action_0')
-        duplicate = uniquify_action_name(p1, a1, ns)
+        duplicate = ns.uniquify_action_name(p1, a1)
         self.assertEqual(duplicate, 'dummy_plugin_action_jackson_1')
     def test_dump_recorded_md_file_no_md(self):
@@ -539,7 +545,7 @@ class GroupByActionTests(unittest.TestCase):
     def test_gba_with_provenance(self):
         self.maxDiff = None
-        ns = NamespaceCollections()
+        ns = ReplayNamespaces()
         dag = self.das.concated_ints_v6.dag
         sorted_nodes = nx.topological_sort(dag.collapsed_view)
         actual = group_by_action(dag, sorted_nodes, ns)
@@ -558,7 +564,7 @@ class GroupByActionTests(unittest.TestCase):
         self.assertEqual(actual.no_provenance_nodes, [])
     def test_gba_no_provenance(self):
-        ns = NamespaceCollections()
+        ns = ReplayNamespaces()
         dag = self.das.table_v0.dag
         uuid = self.das.table_v0.uuid
@@ -573,7 +579,7 @@ class GroupByActionTests(unittest.TestCase):
         shutil.copy(self.das.table_v0.filepath, mixed_dir)
         shutil.copy(self.das.concated_ints_v6.filepath, mixed_dir)
-        ns = NamespaceCollections()
+        ns = ReplayNamespaces()
         v0_uuid = self.das.table_v0.uuid
         with self.assertWarnsRegex(
                 UserWarning, f'(:?)Art.*{v0_uuid}.*prior.*incomplete'
@@ -640,25 +646,33 @@ class InitializerTests(unittest.TestCase):
     def test_init_md_from_artifacts_no_artifacts(self):
-        cfg = ReplayConfig(use=ReplayPythonUsage(),
-                           use_recorded_metadata=False, pm=self.pm)
-        usg_vars = {}
+        cfg = ReplayConfig(
+            use=ReplayPythonUsage(),
+            use_recorded_metadata=False,
+            pm=self.pm
+        )
+        ns = ReplayNamespaces
         # create dummy hash '0', not relevant here
         md_info = MetadataInfo([], 'hmm.tsv', '0')
         with self.assertRaisesRegex(
             ValueError, "not.*used.*input_artifact_uuids.*empty"
-            init_md_from_artifacts(md_info, usg_vars, cfg)
+            init_md_from_artifacts(md_info, ns, cfg)
     def test_init_md_from_artifacts_one_art(self):
         # This helper doesn't capture real data, so we're only smoke testing,
         # checking type, and confirming the repr looks reasonable.
-        cfg = ReplayConfig(use=ReplayPythonUsage(),
-                           use_recorded_metadata=False, pm=self.pm)
+        cfg = ReplayConfig(
+            use=ReplayPythonUsage(),
+            use_recorded_metadata=False,
+            pm=self.pm
+        )
         # We expect artifact vars have already been added to the namespace
         a1 = cfg.use.init_artifact(name='thing1', factory=lambda: None)
-        ns = NamespaceCollections(usg_vars={'uuid1': a1})
+        ns = ReplayNamespaces()
+        ns._usg_var_ns = {'uuid1': UsageVariableRecord('thing1', a1)}
         # create dummy hash '0', not relevant here
         md_info = MetadataInfo(['uuid1'], 'hmm.tsv', '0')
@@ -673,15 +687,23 @@ class InitializerTests(unittest.TestCase):
     def test_init_md_from_artifacts_many(self):
         # This helper doesn't capture real data, so we're only smoke testing,
         # checking type, and confirming the repr looks reasonable.
-        cfg = ReplayConfig(use=ReplayPythonUsage(),
-                           use_recorded_metadata=False, pm=self.pm)
+        cfg = ReplayConfig(
+            use=ReplayPythonUsage(),
+            use_recorded_metadata=False,
+            pm=self.pm
+        )
         # We expect artifact vars have already been added to the namespace
         a1 = cfg.use.init_artifact(name='thing1', factory=lambda: None)
         a2 = cfg.use.init_artifact(name='thing2', factory=lambda: None)
         a3 = cfg.use.init_artifact(name='thing3', factory=lambda: None)
-        ns = NamespaceCollections(
-            usg_vars={'uuid1': a1, 'uuid2': a2, 'uuid3': a3})
+        ns = ReplayNamespaces()
+        ns._usg_var_ns = {
+            'uuid1': UsageVariableRecord('thing1', a1),
+            'uuid2': UsageVariableRecord('thing2', a2),
+            'uuid3': UsageVariableRecord('thing3', a3),
+        }
         # create dummy hash '0', not relevant here
         md_info = MetadataInfo(['uuid1', 'uuid2', 'uuid3'], 'hmm.tsv', '0')
@@ -694,9 +716,11 @@ class InitializerTests(unittest.TestCase):
         self.assertIn('thing1_a_0_md = thing1.view(Metadata)', rendered)
         self.assertIn('thing2_a_0_md = thing2.view(Metadata)', rendered)
         self.assertIn('thing3_a_0_md = thing3.view(Metadata)', rendered)
-        self.assertIn('merged_artifacts_0_md = '
-                      'thing1_a_0_md.merge(thing2_a_0_md, thing3_a_0_md)',
-                      rendered)
+        self.assertIn(
+            'merged_artifacts_0_md = '
+            'thing1_a_0_md.merge(thing2_a_0_md, thing3_a_0_md)',
+            rendered
+        )
     def test_init_md_from_md_file(self):
         dag = self.das.concated_ints_with_md.dag
@@ -704,9 +728,14 @@ class InitializerTests(unittest.TestCase):
         md_id = 'whatevs'
         param_name = 'metadata'
-        ns = UsageVarsDict({md_id: param_name})
-        cfg = ReplayConfig(use=ReplayPythonUsage(),
-                           use_recorded_metadata=False, pm=self.pm)
+        ns = ReplayNamespaces()
+        ns.add_usg_var_record(md_id, param_name)
+        cfg = ReplayConfig(
+            use=ReplayPythonUsage(),
+            use_recorded_metadata=False,
+            pm=self.pm
+        )
         var = init_md_from_md_file(md_node, param_name, md_id, ns, cfg)
@@ -724,9 +753,15 @@ class InitializerTests(unittest.TestCase):
         var_name = 'metadata_0'
         param_name = 'metadata'
-        ns = UsageVarsDict({var_name: param_name})
-        cfg = ReplayConfig(use=ReplayPythonUsage(),
-                           use_recorded_metadata=False, pm=self.pm)
+        ns = ReplayNamespaces()
+        ns.add_usg_var_record(var_name, param_name)
+        cfg = ReplayConfig(
+            use=ReplayPythonUsage(),
+            use_recorded_metadata=False,
+            pm=self.pm
+        )
         md_fn = 'identity_with_metadata/metadata_0'
         with self.assertRaisesRegex(ValueError, 'only.*call.*if.*metadata'):
@@ -743,8 +778,9 @@ class InitializerTests(unittest.TestCase):
         rendered = cfg.use.render()
         self.assertIn('from qiime2 import Metadata', rendered)
         self.assertIn('metadata_0_md = Metadata.load', rendered)
-        self.assertIn('recorded_metadata/identity_with_metadata/'
-                      'metadata_0', rendered)
+        self.assertIn(
+            'recorded_metadata/identity_with_metadata/metadata_0', rendered
+        )
     def test_init_md_from_recorded_mdc(self):
         dag = self.das.concated_ints_with_md_column.dag
@@ -753,9 +789,15 @@ class InitializerTests(unittest.TestCase):
         var_name = 'metadata_0'
         param_name = 'metadata'
-        ns = UsageVarsDict({var_name: param_name})
-        cfg = ReplayConfig(use=ReplayPythonUsage(),
-                           use_recorded_metadata=False, pm=self.pm)
+        ns = ReplayNamespaces()
+        ns.add_usg_var_record(var_name, param_name)
+        cfg = ReplayConfig(
+            use=ReplayPythonUsage(),
+            use_recorded_metadata=False,
+            pm=self.pm
+        )
         md_fn = 'identity_with_metadata_column/metadata_0'
         with self.assertRaisesRegex(ValueError, 'only.*call.*if.*metadata'):
@@ -789,7 +831,7 @@ class BuildNoProvenanceUsageTests(CustomAssertions):
     def test_build_no_provenance_node_usage_w_complete_node(self):
-        ns = NamespaceCollections()
+        ns = ReplayNamespaces()
         cfg = ReplayConfig(use=ReplayPythonUsage(),
                            use_recorded_metadata=False, pm=self.pm)
         uuid = self.das.table_v0.uuid
@@ -797,8 +839,9 @@ class BuildNoProvenanceUsageTests(CustomAssertions):
         v0_node = dag.get_node_data(uuid)
         build_no_provenance_node_usage(v0_node, uuid, ns, cfg)
-        out_var_name = '<feature_table_frequency_0>'
-        self.assertEqual(ns.usg_var_namespace, {uuid: out_var_name})
+        out_var_name = 'feature_table_frequency_0'
+        self.assertIn(uuid, ns._usg_var_ns)
+        self.assertEqual(ns._usg_var_ns[uuid].name, out_var_name)
         rendered = cfg.use.render()
         # Confirm the initial context comment is present once.
@@ -807,20 +850,24 @@ class BuildNoProvenanceUsageTests(CustomAssertions):
         self.assertREAppearsOnlyOnce(rendered, header)
         # Confirm expected values have been rendered
-        exp_v0 = f'# {uuid}   _feature_table_frequency_0_'
+        exp_v0 = f'# {uuid}   feature_table_frequency_0'
         self.assertRegex(rendered, exp_v0)
     def test_build_no_provenance_node_usage_uuid_only_node(self):
-        ns = NamespaceCollections()
-        cfg = ReplayConfig(use=ReplayPythonUsage(),
-                           use_recorded_metadata=False, pm=self.pm)
+        ns = ReplayNamespaces()
+        cfg = ReplayConfig(
+            use=ReplayPythonUsage(),
+            use_recorded_metadata=False,
+            pm=self.pm
+        )
         uuid = 'some-uuid'
         node = None
         build_no_provenance_node_usage(node, uuid, ns, cfg)
-        out_var_name = '<no-provenance-node_0>'
-        self.assertEqual(ns.usg_var_namespace, {uuid: out_var_name})
+        out_var_name = 'no-provenance-node_0'
+        self.assertIn(uuid, ns._usg_var_ns)
+        self.assertEqual(ns._usg_var_ns[uuid].name, out_var_name)
         rendered = cfg.use.render()
         # Confirm the initial context comment is present once.
@@ -829,11 +876,11 @@ class BuildNoProvenanceUsageTests(CustomAssertions):
         self.assertREAppearsOnlyOnce(rendered, header)
         # Confirm expected values have been rendered
-        exp_v0 = f'# {uuid}   _no_provenance_node_0_'
+        exp_v0 = f'# {uuid}   no_provenance_node_0'
         self.assertRegex(rendered, exp_v0)
     def test_build_no_provenance_node_usage_many(self):
-        ns = NamespaceCollections()
+        ns = ReplayNamespaces()
         cfg = ReplayConfig(
             use_recorded_metadata=False, pm=self.pm
@@ -849,12 +896,14 @@ class BuildNoProvenanceUsageTests(CustomAssertions):
         build_no_provenance_node_usage(v0_node, uuid, ns, cfg)
         build_no_provenance_node_usage(dummy_node, dummy_node_uuid, ns, cfg)
-        self.assertIn(uuid, ns.usg_var_namespace)
-        self.assertIn(dummy_node_uuid, ns.usg_var_namespace)
-        self.assertEqual(ns.usg_var_namespace[uuid],
-                         '<feature_table_frequency_0>')
-        self.assertEqual(ns.usg_var_namespace[dummy_node_uuid],
-                         '<feature_table_frequency_1>')
+        self.assertIn(uuid, ns._usg_var_ns)
+        self.assertIn(dummy_node_uuid, ns._usg_var_ns)
+        self.assertEqual(
+            ns._usg_var_ns[uuid].name, 'feature_table_frequency_0'
+        )
+        self.assertEqual(
+            ns._usg_var_ns[dummy_node_uuid].name, 'feature_table_frequency_1'
+        )
         rendered = cfg.use.render()
         # Confirm the initial context isn't repeated.
@@ -863,8 +912,8 @@ class BuildNoProvenanceUsageTests(CustomAssertions):
         self.assertREAppearsOnlyOnce(rendered, header)
         # Confirm expected values have been rendered
-        exp_og = f'# {uuid}   _feature_table_frequency_0_'
-        exp_dummy = f'# {uuid}-dummy   _feature_table_frequency_1_'
+        exp_og = f'# {uuid}   feature_table_frequency_0'
+        exp_dummy = f'# {uuid}-dummy   feature_table_frequency_1'
         self.assertRegex(rendered, exp_og)
         self.assertRegex(rendered, exp_dummy)
@@ -881,22 +930,27 @@ class BuildImportUsageTests(CustomAssertions):
     def test_build_import_usage_python(self):
-        ns = NamespaceCollections()
-        cfg = ReplayConfig(use=ReplayPythonUsage(),
-                           use_recorded_metadata=False, pm=self.pm)
+        ns = ReplayNamespaces()
+        cfg = ReplayConfig(
+            use=ReplayPythonUsage(),
+            use_recorded_metadata=False,
+            pm=self.pm
+        )
         dag = self.das.concated_ints_v6.dag
         import_uuid = '8dea2f1a-2164-4a85-9f7d-e0641b1db22b'
         import_node = dag.get_node_data(import_uuid)
         c_to_s_type = camel_to_snake(import_node.type)
         unq_var_nm = c_to_s_type + '_0'
         build_import_usage(import_node, ns, cfg)
-        rendered = cfg.use.render()
-        vars = ns.usg_vars
-        out_name = vars[import_uuid].to_interface_name()
-        self.assertIsInstance(vars[import_uuid], UsageVariable)
-        self.assertEqual(vars[import_uuid].var_type, 'artifact')
-        self.assertEqual(vars[import_uuid].name, unq_var_nm)
+        usg_var = ns.get_usg_var_record(import_uuid).variable
+        self.assertIsInstance(usg_var, UsageVariable)
+        self.assertEqual(usg_var.var_type, 'artifact')
+        self.assertEqual(usg_var.name, unq_var_nm)
+        rendered = cfg.use.render()
+        out_name = usg_var.to_interface_name()
         self.assertRegex(rendered, 'from qiime2 import Artifact')
         self.assertRegex(rendered, rf'{out_name} = Artifact.import_data\(')
         self.assertRegex(rendered, import_node.type)
@@ -1150,10 +1204,13 @@ class BuildActionUsageTests(CustomAssertions):
     def test_build_action_usage_python(self):
         plugin = 'dummy_plugin'
         action = 'concatenate_ints'
-        cfg = ReplayConfig(use=ReplayPythonUsage(),
-                           use_recorded_metadata=False, pm=self.pm)
+        cfg = ReplayConfig(
+            use=ReplayPythonUsage(),
+            use_recorded_metadata=False,
+            pm=self.pm
+        )
-        ns = NamespaceCollections()
+        ns = ReplayNamespaces()
         import_var_1 = ArtifactAPIUsageVariable(
             'imported_ints_0', lambda: None, 'artifact', cfg.use
@@ -1162,10 +1219,8 @@ class BuildActionUsageTests(CustomAssertions):
         import_uuid_1 = '8dea2f1a-2164-4a85-9f7d-e0641b1db22b'
         import_uuid_2 = '7727c060-5384-445d-b007-b64b41a090ee'
-        ns.usg_vars = {
-            import_uuid_1: import_var_1,
-            import_uuid_2: import_var_2
-        }
+        ns.add_usg_var_record(import_uuid_1, 'imported_ints', import_var_1)
+        ns.add_usg_var_record(import_uuid_2, 'imported_ints', import_var_2)
         dag = self.das.concated_ints_v6.dag
         action_uuid = '5035a60e-6f9a-40d4-b412-48ae52255bb5'
@@ -1176,14 +1231,15 @@ class BuildActionUsageTests(CustomAssertions):
         unique_var_name = node.action.output_name + '_0'
         build_action_usage(node, ns, actions.std_actions, action_uuid, cfg)
-        rendered = cfg.use.render()
-        out_name = ns.usg_vars[node_uuid].to_interface_name()
-        vars = ns.usg_vars
-        self.assertIsInstance(vars[node_uuid], UsageVariable)
-        self.assertEqual(vars[node_uuid].var_type, 'artifact')
-        self.assertEqual(vars[node_uuid].name, unique_var_name)
+        usg_var = ns.get_usg_var_record(node_uuid).variable
+        out_name = usg_var.to_interface_name()
+        self.assertIsInstance(usg_var, UsageVariable)
+        self.assertEqual(usg_var.var_type, 'artifact')
+        self.assertEqual(usg_var.name, unique_var_name)
+        rendered = cfg.use.render()
             rendered, f"import.*{plugin}.actions as {plugin}_actions"
@@ -1207,7 +1263,7 @@ class BuildActionUsageTests(CustomAssertions):
             dag = self.das.concated_ints_with_md.dag
             node = dag.get_node_data(node_uuid)
-            ns = NamespaceCollections()
+            ns = ReplayNamespaces()
             mapping_var = ArtifactAPIUsageVariable(
                 'imported_mapping_0', lambda: None, 'artifact', cfg.use
@@ -1220,23 +1276,28 @@ class BuildActionUsageTests(CustomAssertions):
             mapping_import_uuid = '8f71b73d-b028-4cbc-9894-738bdfe718bf'
             intseq_import_uuid_1 = '0bb6d731-155a-4dd0-8a1e-98827bc4e0bf'
             intseq_import_uuid_2 = 'e6b37bae-3a14-40f7-87b4-52cf5c7c7a1d'
-            ns.usg_vars = {
-                mapping_import_uuid: mapping_var,
-                intseq_import_uuid_1: intseq_var_1,
-                intseq_import_uuid_2: intseq_var_2,
-            }
+            ns.add_usg_var_record(
+                mapping_import_uuid, 'imported_mapping', mapping_var
+            )
+            ns.add_usg_var_record(
+                intseq_import_uuid_1, 'imported_ints', intseq_var_1
+            )
+            ns.add_usg_var_record(
+                intseq_import_uuid_2, 'imported_ints', intseq_var_2
+            )
             actions = ActionCollections(
                 std_actions={action_uuid: {node_uuid: 'out'}}
             build_action_usage(node, ns, actions.std_actions, action_uuid, cfg)
-            rendered = cfg.use.render()
-            vars = ns.usg_vars
-            self.assertIsInstance(vars[node_uuid], UsageVariable)
-            self.assertEqual(vars[node_uuid].var_type, 'artifact')
-            self.assertEqual(vars[node_uuid].name, 'out_0')
+            usg_var = ns.get_usg_var_record(node_uuid).variable
+            self.assertIsInstance(usg_var, UsageVariable)
+            self.assertEqual(usg_var.var_type, 'artifact')
+            self.assertEqual(usg_var.name, 'out_0')
+            rendered = cfg.use.render()
             self.assertIn('from qiime2 import Metadata', rendered)
             self.assertIn('.view(Metadata)', rendered)
             self.assertIn(f'.{action}(', rendered)

@@ -567,6 +567,32 @@ class Cache:
             version_file = fh.read()
             return regex.match(version_file) is not None
+    @classmethod
+    def validate_key(cls, key):
+        """Validates that the given key is a valid Python idenitifier with the
+        exception that - is allowed.
+        Parameters
+        ----------
+        key : str
+            The name of the key to validate.
+        Raises
+        ------
+        ValueError
+            If the key passed in is not a valid Python identifier. We enforce
+            this to ensure no one creates keys that cause issues when we try to
+            load them.
+        """
+        validation_key = key.replace('-', '_')
+        if not validation_key.isidentifier():
+            raise ValueError(f"Key '{key}' is not a valid Python identifier. "
+                             "Keys may contain '-' characters but must "
+                             "otherwise be valid Python identifiers. Python "
+                             "identifier rules may be found here "
+                             "https://www.askpython.com/python/"
+                             "python-identifiers-rules-best-practices")
     def _create_cache_contents(self):
         """Create the cache directory, all sub directories, and the version
@@ -859,24 +885,8 @@ class Cache:
             The path to the data or pool we are keying.
         pool : bool
             Whether we are keying a pool or not.
-        Raises
-        ------
-        ValueError
-            If the key passed in is not a valid Python identifier. We enforce
-            this to ensure no one creates keys that cause issues when we try to
-            load them.
-        # We require keys to be valid Python identifiers with the single caveat
-        # that they may also contain dashes
-        validation_key = key.replace('-', '_')
-        if not validation_key.isidentifier():
-            raise ValueError("Keys may contain '-' characters but must "
-                             "otherwise be valid Python identifiers. Python "
-                             "identifier rules may be found here "
-                             "https://www.askpython.com/python/"
-                             "python-identifiers-rules-best-practices")
+        self.validate_key(key)
         key_fp = self.keys / key
         key_dict = {}

@@ -1120,3 +1120,9 @@ other_plugin.methods.register_function(
     name='Concatenate integers',
     description='Some description'
+ at other_plugin.register_transformer
+def _9999999(ff: SingleIntFormat) -> str:
+    with ff.open() as fh:
+        return fh.read()

@@ -44,7 +44,7 @@ def _expr(expr):
         return _build_predicate(expr.func.id, args, kwargs)
     if node is ast.Subscript:
-        field_expr = expr.slice.value
+        field_expr = expr.slice
         if type(field_expr) is ast.Tuple:
             field_expr = field_expr.elts
@@ -219,7 +219,14 @@ def ast_to_type(json_ast, scope=None):
         name = json_ast['name']
         if not json_ast['builtin']:
-            base_template = semantic.SemanticType(name).template
+            base_template = semantic.SemanticType(
+                name,
+                field_names=['field' + str(i) for i in range(len(fields))],
+                field_members={
+                    ('field' + str(i)):
+                    [child] for i, child in enumerate(fields)
+                },
+            ).template
         elif name == 'Visualization':
             return visualization.Visualization
         elif name in {'List', 'Set', 'Tuple', 'Collection'}:

@@ -302,7 +302,8 @@ class PipelineSignature:
         for output_name, spec in outputs.items():
             if not (is_semantic_type(spec.qiime_type) or
-                    spec.qiime_type == Visualization):
+                    spec.qiime_type == Visualization or
+                    spec.qiime_type == Collection[Visualization]):
                 raise TypeError(
                     "Output %r must be a semantic QIIME type or "
                     "Visualization, not %r"

@@ -99,6 +99,30 @@ class TestParsing(unittest.TestCase):
         self.assertIs(V1.mapping, W1.mapping)
         self.assertIs(W1.mapping, X1.mapping)
+    def test_TypeMap_with_properties(self):
+        I, OU = TypeMap({
+            C1[Foo % Properties(['A', 'B', 'C'])]: Str,
+            C1[Foo % Properties(['A', 'B'])]: Str,
+            C1[Foo % Properties(['A', 'C'])]: Str,
+            C1[Foo % Properties(['B', 'C'])]: Str,
+            C1[Foo % Properties(['A'])]: Str,
+            C1[Foo % Properties(['B'])]: Str,
+            C1[Foo % Properties(['C'])]: Str,
+        })
+        scope = {}
+        i = ast_to_type(I.to_ast(), scope=scope)
+        o = ast_to_type(OU.to_ast(), scope=scope)
+        self.assertEqual(scope[id(I.mapping)], [i, o])
+        self.assertEqual(len(scope), 1)
+        # Assert mapping is the same after ast_to_type call
+        self.assertEqual(I.mapping.lifted, i.mapping.lifted)
+        # Assert that the mapping object is the same in both i and o
+        self.assertIs(i.mapping, o.mapping)
     def test_syntax_error(self):
         with self.assertRaisesRegex(ValueError, "could not be parsed"):

@@ -415,7 +415,7 @@ class MetadataWriter:
             df = md.to_dataframe(encode_missing=True)
             df.fillna('', inplace=True)
-            df = df.applymap(self._format)
+            df = df.map(self._format)
     def _non_default_missing(self, missing_directive):

@@ -1286,7 +1286,7 @@ class CategoricalMetadataColumn(MetadataColumn):
                     "%r of type %r in column %r." %
                     (cls.__name__, value, type(value), series.name))
-        norm_series = series.apply(normalize, convert_dtype=False)
+        norm_series = series.apply(normalize).astype(object)
         norm_series.index = norm_series.index.str.strip()
         norm_series.name = norm_series.name.strip()
         return norm_series

@@ -16,6 +16,7 @@ import itertools
 import qiime2
 from qiime2.sdk import usage
+from qiime2.plugin.util import transform
 from qiime2.plugin.model.base import FormatBase
@@ -241,8 +242,7 @@ class TestPluginBase(unittest.TestCase):
                 shutil.copy(filepath, source_path)
         input = source_format(source_path, mode='r')
-        transformer = self.get_transformer(source_format, target)
-        obs = transformer(input)
+        obs = transform(input, from_type=source_format, to_type=target)
         if issubclass(target, FormatBase):
             self.assertIsInstance(obs, (type(pathlib.Path()), str, target))

@@ -9,11 +9,15 @@
 import unittest
 import tempfile
+from qiime2.core.testing.format import SingleIntFormat
 from qiime2.core.testing.util import get_dummy_plugin
 from qiime2.plugin.testing import TestPluginBase
 class TestTesting(TestPluginBase):
+    package = 'qiime2.sdk.tests'
     def setUp(self):
         self.plugin = get_dummy_plugin()
@@ -24,6 +28,12 @@ class TestTesting(TestPluginBase):
     def tearDown(self):
+    def test_transformer_in_other_plugin(self):
+        _, obs = self.transform_format(SingleIntFormat, str,
+                                       filename='singleint.txt')
+        self.assertEqual('42', obs)
     def test_examples(self):

@@ -438,13 +438,23 @@ class Action(metaclass=abc.ABCMeta):
         # pipeline that calls two other pipelines within it and execute both of
         # those internal pipelines simultaneously.
         if isinstance(self, qiime2.sdk.action.Pipeline):
+            # If ctx._parent is None then this is the root pipeline and we want
+            # to dispatch it to a join_app
             execution_ctx['parsl_type'] = 'DFK'
-            # NOTE: Do not make this a python_app(join=True). We need it to run
-            # in the parsl main thread
-            future = join_app()(
-                    _run_parsl_action)(self, ctx, execution_ctx,
-                                       mapped_args, mapped_kwargs,
-                                       inputs=futures)
+            if ctx._parent is None:
+                # NOTE: Do not make this a python_app(join=True). We need it to
+                # run in the parsl main thread
+                future = join_app()(
+                        _run_parsl_action)(self, ctx, execution_ctx,
+                                           mapped_args, mapped_kwargs,
+                                           inputs=futures)
+            # If there is a parent then this is not the root pipeline and we
+            # want to just _bind it with a parallel context. The fact that
+            # parallel is set on the context will cause ctx.get_action calls in
+            # the pipeline to use the action's _bind_parsl method.
+            else:
+                return self._bind(lambda: qiime2.sdk.Context(ctx),
+                                  execution_ctx=execution_ctx)(*args, **kwargs)
             execution_ctx['parsl_type'] = \

@@ -614,11 +614,14 @@ class ResultCollection:
+        order_string = ''
+        for name, result in self.collection.items():
+            result_fp = os.path.join(directory, name)
+            result.save(result_fp)
+            order_string += f'{name}\n'
         with open(os.path.join(directory, '.order'), 'w') as fh:
-            for name, result in self.collection.items():
-                result_fp = os.path.join(directory, name)
-                result.save(result_fp)
-                fh.write(f'{name}\n')
+            fh.write(order_string)
         # Do this to give us a unified API with Result.save
         return directory

@@ -0,0 +1 @@
\ No newline at end of file

@@ -30,7 +30,6 @@ from qiime2.metadata.tests.test_io import get_data_path
 class TestBadInputs(TestPluginBase):
     def make_provenance_capture(self):
         # importing visualizations is not supported, but we do that here to
         # simplify testing machinery

@@ -16,6 +16,7 @@ import qiime2.sdk
 from qiime2.core.testing.util import get_dummy_plugin
 from qiime2.core.testing.type import IntSequence1, SingleInt, Mapping
 from qiime2.plugin import Visualization, Int, Bool
+import qiime2.sdk.parallel_config
 class TestPipeline(unittest.TestCase):
@@ -339,6 +340,21 @@ class TestPipeline(unittest.TestCase):
         self.assertEqual(obs, exp)
+    def test_nested_pipeline_parallel(self):
+        ''' This test basically just validates that nested pipelines in
+            parallel don't blow anything up. It was added concurrently with
+            nested parallel pipelines being flattened.
+        '''
+        pipeline = self.plugin.pipelines['pipelines_in_pipeline']
+        ints = qiime2.Artifact.import_data(IntSequence1, [1, 2, 3])
+        mapping = qiime2.Artifact.import_data(Mapping, {'foo': '42'})
+        with qiime2.sdk.parallel_config.ParallelConfig():
+            pipeline.parallel(ints, mapping)._result()
+        self.assertTrue(True)
     def test_failing_from_arity(self):
         for call in self.iter_callables('failing_pipeline'):
             with self.assertRaisesRegex(TypeError, 'match number.*3.*1'):

View it on GitLab: https://salsa.debian.org/med-team/qiime/-/compare/085813b0deb68faab2abc37d44a2fdcb303fc86c...ae2c4da4ebe511dd67cdd67bb19d9229546a26eb

This project does not include diff previews in email notifications.
View it on GitLab: https://salsa.debian.org/med-team/qiime/-/compare/085813b0deb68faab2abc37d44a2fdcb303fc86c...ae2c4da4ebe511dd67cdd67bb19d9229546a26eb
You're receiving this email because of your account on salsa.debian.org.

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240606/4985e24f/attachment-0001.htm>

More information about the debian-med-commit mailing list