Source code for jwst.associations.lib.constraint

"""Constraints - use these to define the rules governing association candidate types."""

import abc
import collections
from copy import deepcopy
from itertools import chain
import logging
import re

from .process_list import ListCategory, ProcessList
from .utilities import evaluate, getattr_from_list, is_iterable
from ..pool import PoolRow

__all__ = [
    "AttrConstraint",
    "Constraint",
    "ConstraintTrue",
    "SimpleConstraint",
]

# Configure logging
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())


class SimpleConstraintABC(abc.ABC):
    """
    Simple Constraint ABC.

    Parameters
    ----------
    init : dict
        Dictionary where the key:value pairs define
        the following parameters

    value : object or None
        Value that must be matched.

    name : str or None
        Option name for constraint

    **kwargs : key:value pairs
        Other initialization parameters

    Attributes
    ----------
    found_values : set(str[,...])
        Set of actual found values for this condition. True SimpleConstraints
        do not normally set this; the value is not different than `value`.

    matched : bool
        Last call to `check_and_set`
    """

    # Attributes to show in the string representation.
    _str_attrs: tuple = ("name", "value")

    def __new__(cls, *args, **kwargs):  # noqa: ARG003
        """
        Force creation of the constraint attribute dict before anything else.

        Returns
        -------
        ~jwst.associations.lib.constraint.SimpleConstraintABC
            New instance of class.
        """
        obj = super().__new__(cls)
        obj._ca_history = collections.deque()  # noqa: SLF001
        obj._constraint_attributes = {}  # noqa: SLF001
        return obj

    def __init__(self, init=None, value=None, name=None, **kwargs):
        # Defined attributes
        self.value = value
        self.name = name
        self.matched = False
        self.found_values = set()

        if init is not None:
            self._constraint_attributes.update(init)
        else:
            self._constraint_attributes.update(kwargs)

    def __getattr__(self, name):
        """
        Retrieve user defined attribute.

        Returns
        -------
        any
            Attribute corresponding to provided name.
        """
        if name.startswith("_"):
            return super().__getattribute__(name)
        if name in self._constraint_attributes:
            return self._constraint_attributes[name]
        raise AttributeError(f"No such attribute {name}")

    def __setattr__(self, name, value):
        """Store all attributes in the user dictionary."""
        if not name.startswith("_"):
            self._constraint_attributes[name] = value
        else:
            object.__setattr__(self, name, value)

    @abc.abstractmethod
    def check_and_set(self, item):
        """
        Check and set the constraint.

        Returns
        -------
        success, reprocess : bool, [~jwst.associations.ProcessList[,...]]
            Returns 2-tuple of

                - True if check is successful.
                - List of `~jwst.associations.ProcessList`.
        """
        self.matched = True
        self.found_values.add(self.value)
        return self.matched, []

    @property
    def dup_names(self):
        """
        Return dictionary of constraints with duplicate names.

        This method is meant to be overridden by classes
        that need to traverse a list of constraints.

        Returns
        -------
        dups : {str: [constraint[,...]][,...]}
            Returns a mapping between the duplicated name
            and all the constraints that define that name.
        """
        return {}

    @property
    def id(self):
        """
        Return identifier for the constraint.

        Returns
        -------
        id : str
            The identifier
        """
        return f"{self.__class__.__name__}:{self.name}"

    def copy(self):
        """
        Copy self.

        Returns
        -------
        object
            Deepcopy of self.
        """
        return deepcopy(self)

    def get_all_attr(self, attribute, name=None):
        """
        Return the specified attribute.

        This method exists solely to support `Constraint.get_all_attr`.
        This obviates the need for class/method checking.

        Parameters
        ----------
        attribute : str
            The attribute to retrieve

        name : str or None
            Only return attribute if the name of the current constraint
            matches the requested named constraints. If None, always
            return value.

        Returns
        -------
        [(self, value)] : [(SimpleConstraint, object)]
            The value of the attribute in a tuple. If there is no attribute,
            an empty tuple is returned.
        """
        if name is None or name == self.name:
            value = getattr(self, attribute, None)
            if value is not None:
                if not isinstance(value, (list, set)) or len(value):
                    return [(self, value)]
        return []

    def restore(self):
        """Restore constraint state."""
        try:
            self._constraint_attributes = self._ca_history.pop()
        except IndexError:
            logger.debug("No more attribute history to restore from. restore is a NOOP")

    def preserve(self):
        """Save the current state of the constraints."""
        ca_copy = self._constraint_attributes.copy()
        ca_copy["found_values"] = self._constraint_attributes["found_values"].copy()
        self._ca_history.append(ca_copy)

    # Make iterable to work with `Constraint`.
    # Since this is a leaf, simple return ourselves.
    def __iter__(self):
        yield self

    def __repr__(self):
        result = f"{self.__class__.__name__}({str(self._constraint_attributes)})"
        return result

    def __str__(self):
        result = (
            f"{self.__class__.__name__}("
            f"{ ({str_attr: getattr(self, str_attr) for str_attr in self._str_attrs}) })"
        )
        return result


[docs] class ConstraintTrue(SimpleConstraintABC): """Always return True."""
[docs] def check_and_set(self, item): """ Empty method to return True from parent abstract method. Returns ------- bool True from parent abstract method. """ return super(ConstraintTrue, self).check_and_set(item)
[docs] class SimpleConstraint(SimpleConstraintABC): """ A basic constraint. Examples -------- Create a constraint where the attribute `attr` of an object matches the value `my_value`: >>> c = SimpleConstraint(value="my_value") >>> print(c) SimpleConstraint({'name': None, 'value': 'my_value'}) To check a constraint, call `check_and_set`. A successful match will return a tuple of `True` and a reprocess list. >>> item = "my_value" >>> c.check_and_set(item) (True, []) If it doesn't match, `False` will be returned. >>> bad_item = "not_my_value" >>> c.check_and_set(bad_item) (False, []) A `SimpleConstraint` can also be initialized by a `dict` of the relevant parameters: >>> init = {"value": "my_value"} >>> c = SimpleConstraint(init) >>> print(c) SimpleConstraint({'name': None, 'value': 'my_value'}) If the value to check is `None`, the `SimpleConstraint` will successfully match whatever object given. However, a new `SimpleConstraint` will be returned where the `value` is now set to whatever the attribute was of the object. >>> c = SimpleConstraint(value=None) >>> matched, reprocess = c.check_and_set(item) >>> print(c) SimpleConstraint({'name': None, 'value': 'my_value'}) This behavior can be overridden by the `force_unique` parameter: >>> c = SimpleConstraint(value=None, force_unique=False) >>> matched, reprocess = c.check_and_set(item) >>> print(c) SimpleConstraint({'name': None, 'value': None}) """ def __init__( self, init=None, sources=None, force_unique=True, test=None, reprocess_on_match=False, reprocess_on_fail=False, work_over=ListCategory.BOTH, reprocess_rules=None, **kwargs, ): """ Initialize a new SimpleConstraint. Parameters ---------- init : dict Dictionary where the key:value pairs define the following parameters. sources : func(item) or None Function taking `item` as argument used to retrieve a value to check against. If None, the item itself is used as the value. force_unique : bool If the constraint is satisfied, reset `value` to the value of the source. test : function The test function for the constraint. Takes two arguments: - constraint - object to compare against. Returns a boolean. Default is `SimpleConstraint.eq` reprocess_on_match : bool Reprocess the item if the constraint is satisfied. reprocess_on_fail : bool Reprocess the item if the constraint is not satisfied. work_over : ListCategory.[BOTH, EXISTING, RULES] The condition on which this constraint should operate. reprocess_rules : [rule[,..]] or None List of rules to be applied to. If None, calling function will determine the ruleset. If empty, [], all rules will be used. """ # Defined attributes self.sources = sources self.force_unique = force_unique self.test = test self.reprocess_on_match = reprocess_on_match self.reprocess_on_fail = reprocess_on_fail self.work_over = work_over self.reprocess_rules = reprocess_rules super(SimpleConstraint, self).__init__(init=init, **kwargs) # Give defaults some real meaning. if self.sources is None: self.sources = lambda item: item if test is None: self.test = self.eq
[docs] def check_and_set(self, item): """ Check and set the constraint. Returns ------- success, reprocess : bool, [~jwst.associations.ProcessList[,...]] Returns 2-tuple of - True if check is successful. - List of `~jwst.associations.ProcessList`. """ source_value = self.sources(item) satisfied = True if self.value is not None: satisfied = self.test(self.value, source_value) self.matched = satisfied if self.matched: if self.force_unique: self.value = source_value self.found_values.add(self.value) # Determine reprocessing reprocess = [] if (self.matched and self.reprocess_on_match) or ( not self.matched and self.reprocess_on_fail ): reprocess.append( ProcessList( items=[item], work_over=self.work_over, rules=self.reprocess_rules, trigger_constraints=[self.id], ) ) return self.matched, reprocess
[docs] def eq(self, value1, value2): """ Check if constraint.value and item are equal. Parameters ---------- value1 : any The first value to compare. value2 : any The second value to compare. Returns ------- bool True if the two values are deemed equal. """ return value1 == value2
[docs] class AttrConstraint(SimpleConstraintABC): """ Test attribute of an item. Attributes ---------- found_values : set(str[,...]) Set of actual found values for this condition. matched : bool Last result of `check_and_set` """ # Attributes to show in the string representation. _str_attrs = ("name", "sources", "value") def __init__( self, init=None, sources=None, evaluate=False, force_reprocess=False, force_undefined=False, force_unique=True, invalid_values=None, only_on_match=False, onlyif=None, required=True, **kwargs, ): """ Initialize a new AttrConstraint. Parameters ---------- sources : [str[,...]] List of attributes to query evaluate : bool Evaluate the item's value before checking condition. force_reprocess : ListCategory.state or False Add item back onto the reprocess list using the specified `~jwst.associations.ProcessList` work over state. force_unique : bool If the initial value is `None` or a list of possible values, the constraint will be modified to be the value first matched. invalid_values : [str[,...]] List of values that are invalid in an item. Will cause a non-match. only_on_match : bool If `force_reprocess`, only do the reprocess if the entire constraint is satisfied. onlyif : function Boolean function that takes `item` as argument. If True, the rest of the condition is checked. Otherwise return as a matched condition required : bool One of the sources must exist. Otherwise, return as a matched constraint. """ # Attributes self.sources = sources self.evaluate = evaluate self.force_reprocess = force_reprocess self.force_undefined = force_undefined self.force_unique = force_unique self.invalid_values = invalid_values self.only_on_match = only_on_match self.onlyif = onlyif self.required = required super().__init__(init=init, **kwargs) # Give some defaults real meaning. if invalid_values is None: self.invalid_values = [] if onlyif is None: self.onlyif = lambda _item: True # Haven't actually matched anything yet. self.found_values = set() self.matched = False
[docs] def check_and_set(self, item): """ Check and set constraints based on item. Parameters ---------- item : dict The item to check on. Returns ------- success, reprocess : bool, [~jwst.associations.ProcessList[,...]] Returns 2-tuple of - True if check is successful. - List of `~jwst.associations.ProcessList`. """ reprocess = [] # Only perform check on specified `onlyif` condition if not self.onlyif(item): if self.force_reprocess: reprocess.append( ProcessList( items=[item], work_over=self.force_reprocess, only_on_match=self.only_on_match, trigger_constraints=[self.id], ) ) self.matched = True return self.matched, reprocess # Get the condition information. try: source, value = getattr_from_list( item, self.sources, invalid_values=self.invalid_values ) except KeyError: if self.required and not self.force_undefined: self.matched = False return self.matched, reprocess else: self.matched = True return self.matched, reprocess else: if self.force_undefined: self.matched = False return self.matched, reprocess evaled = value if self.evaluate: evaled = evaluate(value) # If the constraint has no value to check against, and given # value evaluates to a list, the item must be duplicated, # with each value from its list, and all the new items reprocessed. # Otherwise, the value is the value to set the constraint by. if self.value is None: if is_iterable(evaled): reprocess.append(reprocess_multivalue(item, source, evaled, self)) self.matched = False return self.matched, reprocess value = str(evaled) # Else, the constraint does have a value. Check against it. else: if callable(self.value): match_value = self.value() else: match_value = self.value if not is_iterable(evaled): evaled = [evaled] for evaled_item in evaled: value = str(evaled_item) if meets_conditions(value, match_value): break else: # The condition is not matched, leave now. self.matched = False return self.matched, reprocess # A match was found. If there is a list of potential values, # set them up for reprocessing. next_evaleds = [next_evaled for next_evaled in evaled if next_evaled != evaled_item] if next_evaleds: reprocess.append(reprocess_multivalue(item, source, next_evaleds, self)) # At this point, the constraint has passed. # Fix the conditions. escaped_value = re.escape(value) self.found_values.add(escaped_value) if self.force_unique: self.value = escaped_value self.sources = [source] self.force_unique = False # If required to reprocess, add to the reprocess list. if self.force_reprocess: reprocess.append( ProcessList( items=[item], work_over=self.force_reprocess, only_on_match=self.only_on_match, trigger_constraints=[self.id], ) ) # That's all folks self.matched = True return self.matched, reprocess
[docs] class Constraint: """ Constraint that is made up of SimpleConstraints. Attributes ---------- constraints : [Constraint[,...]] List of `Constraint` or `SimpleConstraint` that make this constraint. matched : bool Result of the last `check_and_set` reduce : function A reduction function with signature `x(iterable)` where `iterable` is the `components` list. Returns boolean indicating state of the components. Predefined functions are: - `all`: True if all components return True - `any`: True if any component returns True Notes ----- Named constraints can be accessed directly through indexing: >>> c = Constraint(SimpleConstraint(name="simple", value="a_value")) >>> c["simple"] # doctest: +SKIP SimpleConstraint({'sources': <function SimpleConstraint.__init__.<locals>.<lambda>, 'force_unique': True, 'test': <bound method SimpleConstraint.eq of SimpleConstraint({...})>, 'reprocess_on_match': False, 'reprocess_on_fail': False, 'work_over': 1, 'reprocess_rules': None, 'value': 'a_value', 'name': 'simple', 'matched': False}) """ def __init__( self, init=None, reduce=None, name=None, reprocess_on_match=False, reprocess_on_fail=False, work_over=ListCategory.BOTH, reprocess_rules=None, ): """ Initialize a new Constraint. Parameters ---------- init : object or [object[,...]] A single object or list of objects where the objects are as follows. - SimpleConstraint or subclass - Constraint reduce : function A reduction function with signature `x(iterable)` where `iterable` is the `components` list. Returns boolean indicating state of the components. Default value is `Constraint.all` name : str or None Optional name for constraint. reprocess_on_match : bool Reprocess the item if the constraint is satisfied. reprocess_on_fail : bool Reprocess the item if the constraint is not satisfied. work_over : ListCategory.[BOTH, EXISTING, RULES] The condition on which this constraint should operate. reprocess_rules : [rule[,..]] or None List of rules to be applied to. If None, calling function will determine the ruleset. If empty, [], all rules will be used. """ self.constraints = [] # Initialize from named parameters self.reduce = reduce self.name = name self.reprocess_on_match = reprocess_on_match self.reprocess_on_fail = reprocess_on_fail self.work_over = work_over self.reprocess_rules = reprocess_rules # Initialize from a structure. if init is None: pass elif isinstance(init, list): self.constraints = init elif isinstance(init, Constraint): self.reduce = init.reduce self.name = init.name self.reprocess_on_match = init.reprocess_on_match self.reprocess_on_fail = init.reprocess_on_fail self.work_over = init.work_over self.reprocess_rules = init.reprocess_rules self.constraints = deepcopy(init.constraints) elif isinstance(init, SimpleConstraintABC): self.constraints = [init] else: raise TypeError( f"Invalid initialization value type {type(init)}." "\nValid types are `SimpleConstraint`, `Constraint`," "\nor subclass." ) # Give some defaults real meaning. self.matched = False if self.reduce is None: self.reduce = self.all @property def dup_names(self): """ Return dictionary of constraints with duplicate names. This method is meant to be overridden by classes that need to traverse a list of constraints. Returns ------- dups : {str: [constraint[,...]][,...]} Returns a mapping between the duplicated name and all the constraints that define that name. """ attrs = self.get_all_attr("name") constraints, names = zip(*attrs, strict=True) dups = [name for name, count in collections.Counter(names).items() if count > 1] result = collections.defaultdict(list) for name, constraint in zip(names, constraints, strict=True): if name in dups: result[name].append(constraint) # Turn off the defaultdict factory. result.default_factory = None return result @property def id(self): """ Return identifier for the constraint. Returns ------- id : str The identifier """ return f"{self.__class__.__name__}:{self.name}"
[docs] def append(self, constraint): """Append a new constraint.""" self.constraints.append(constraint)
[docs] def check_and_set(self, item, work_over=ListCategory.BOTH): """ Check and set the constraint. Returns ------- success, reprocess : bool, [~jwst.associations.ProcessList[,...]] Returns 2-tuple of - success : True if check is successful. - List of `~jwst.associations.ProcessList`. """ if work_over not in (self.work_over, ListCategory.BOTH): return False, [] # Do we have positive? self.matched, reprocess = self.reduce(item, self.constraints) # Determine reprocessing if (self.matched and self.reprocess_on_match) or ( not self.matched and self.reprocess_on_fail ): reprocess.append( [ ProcessList( items=[item], work_over=self.work_over, rules=self.reprocess_rules, trigger_constraints=[self.id], ) ] ) return self.matched, list(chain(*reprocess))
[docs] def copy(self): """ Copy ourselves. Returns ------- object Deepcopy of self. """ return deepcopy(self)
[docs] def get_all_attr(self, attribute, name=None): """ Return the specified attribute for specified constraints. Parameters ---------- attribute : str The attribute to retrieve name : str or None Only return attribute if the name of the current constraint matches the requested named constraints. If None, always return value. Returns ------- result : [(SimpleConstraint or Constraint, object)[,...]] The list of values of the attribute in a tuple. If there is no attribute, an empty tuple is returned. Raises ------ AttributeError If the attribute is not found. """ result = [] if name is None or name == self.name: value = getattr(self, attribute, None) if value is not None: result = [(self, value)] for constraint in self.constraints: result.extend(constraint.get_all_attr(attribute, name=name)) return result
[docs] def preserve(self): """Preserve all constraint states.""" for constraint in self.constraints: constraint.preserve()
[docs] def restore(self): """Restore all constraint states.""" for constraint in self.constraints: constraint.restore()
[docs] @staticmethod def all(item, constraints): """ Return positive only if all results are positive. Parameters ---------- item : ACID The candidate. constraints : list[Constraint, ...] The list of constraints to check. Returns ------- bool, list(Constraint, ...) or None True if all constraints positive, with empty list. If no constraints, False and empty list. Otherwise False with list of constraints to reprocess. """ # If there are no constraints, there is nothing to match. # Result is false. if len(constraints) == 0: return False, [] # Find all negatives. Note first negative # that requires reprocessing and how many # negatives do not. all_match = True negative_reprocess = None to_reprocess = [] for constraint in constraints: match, reprocess = constraint.check_and_set(item) if match: if all_match: to_reprocess.append(reprocess) else: all_match = False # If not match and no reprocessing, then fail # completely. However, if there is reprocessing, take # the first one. Continue to check to ensure # there is no further complete fail. if len(reprocess) == 0: negative_reprocess = None break elif negative_reprocess is None: negative_reprocess = [reprocess] if not all_match: if negative_reprocess is not None: to_reprocess = negative_reprocess else: to_reprocess = [] return all_match, to_reprocess
[docs] @staticmethod def any(item, constraints): """ Return the first successful constraint. Parameters ---------- item : ACID The candidate. constraints : list[Constraint, ...] The list of constraints to check. Returns ------- bool, list(Constraint, ...) or None False, [] if no match or constraints to reprocess. True, list(Constraints) if match found, and any constraints to reprocess listed. """ # If there are no constraints, there is nothing to match. # Result is false. if len(constraints) == 0: return False, [] to_reprocess = [] for constraint in constraints: match, reprocess = constraint.check_and_set(item) if match: to_reprocess = [reprocess] break to_reprocess.append(reprocess) return match, to_reprocess
[docs] @staticmethod def notany(item, constraints): """ Check if none of the constraints match; true if none do. Parameters ---------- item : ACID The candidate. constraints : list[Constraint, ...] The list of constraints to check. Returns ------- bool True if none of the constraints match. """ match, to_reprocess = Constraint.any(item, constraints) return not match, to_reprocess
[docs] @staticmethod def notall(item, constraints): """ Check if not all of the constraints match; true if not all do. Parameters ---------- item : ACID The candidate. constraints : list[Constraint, ...] The list of constraints to check. Returns ------- bool True if not all constraints match. """ match, to_reprocess = Constraint.all(item, constraints) return not match, to_reprocess
def __delitem__(self, key): """Not implemented.""" raise NotImplementedError("Cannot delete a constraint by index.") # Make iterable def __iter__(self): yield from chain(*map(iter, self.constraints)) # Index implementation def __getitem__(self, key): """ Retrieve a named constraint. Parameters ---------- key : str The key to retrieve a value with. Returns ------- jwst.associations.lib.constraint.Constraint The constraint to be retrieved. """ for constraint in self.constraints: name = getattr(constraint, "name", None) if name is not None and name == key: return constraint try: found = constraint[key] except (KeyError, TypeError): pass else: return found raise KeyError(f"Constraint {key} not found") def __repr__(self): result = "{}(name={}).{}([{}])".format( self.__class__.__name__, str(getattr(self, "name", None)), str(self.reduce.__name__), "".join([repr(constraint) for constraint in self.constraints]), ) return result def __setitem__(self, key, value): """Not implemented.""" raise NotImplementedError("Cannot set constraints by index.") def __str__(self): result = "\n".join([str(constraint) for constraint in self if constraint.name is not None]) return result
# Utilities def meets_conditions(value, conditions): """ Check whether value meets any of the provided conditions. Parameters ---------- value : str The value to be check with. conditions : regex, Regular expressions to match against. Returns ------- bool True if any condition is meant. """ if not is_iterable(conditions): conditions = [conditions] for condition in conditions: condition = "".join(["^", condition, "$"]) match = re.match(condition, value, flags=re.IGNORECASE) if match: return True return False def reprocess_multivalue(item, source, values, constraint): """ Complete reprocessing of items that have a list of values. Parameters ---------- item : dict The item. source : str The attribute which has the multi-values. values : list The list of values constraint : Constraint The constraint which is triggering the reprocessing. Returns ------- process_list : ProcessList The process list to put on the reprocess queue """ reprocess_items = [] for value in values: new_item = PoolRow(item) new_item[source] = str(value) reprocess_items.append(new_item) process_list = ProcessList(items=reprocess_items, trigger_constraints=[constraint.id]) return process_list