Edit on GitHub

DAG

A DAG, or directed acyclic graph, is a graph where the edges are directional and there are no cycles with all the edges pointing in the same direction. SQLMesh uses a DAG to keep track of a project's models. This allows SQLMesh to easily determine a model's lineage and to identify upstream and downstream dependencies.

  1"""
  2# DAG
  3
  4A DAG, or directed acyclic graph, is a graph where the edges are directional and there are no cycles with
  5all the edges pointing in the same direction. SQLMesh uses a DAG to keep track of a project's models. This
  6allows SQLMesh to easily determine a model's lineage and to identify upstream and downstream dependencies.
  7"""
  8from __future__ import annotations
  9
 10import typing as t
 11
 12T = t.TypeVar("T", bound=t.Hashable)
 13
 14
 15class DAG(t.Generic[T]):
 16    def __init__(self, graph: t.Optional[t.Dict[T, t.Set[T]]] = None):
 17        self._graph: t.Dict[T, t.Set[T]] = {}
 18        for node, dependencies in (graph or {}).items():
 19            self.add(node, dependencies)
 20
 21    def add(self, node: T, dependencies: t.Optional[t.Iterable[T]] = None) -> None:
 22        """Add a node to the graph with an optional upstream dependency.
 23
 24        Args:
 25            node: The node to add.
 26            dependencies: Optional dependencies to add to the node.
 27        """
 28        if node not in self._graph:
 29            self._graph[node] = set()
 30        if dependencies:
 31            self._graph[node].update(dependencies)
 32            for d in dependencies:
 33                self.add(d)
 34
 35    @property
 36    def reversed(self) -> DAG[T]:
 37        """Returns a copy of this DAG with all its edges reversed."""
 38        result = DAG[T]()
 39
 40        for node, deps in self._graph.items():
 41            result.add(node)
 42            for dep in deps:
 43                result.add(dep, [node])
 44
 45        return result
 46
 47    def subdag(self, *nodes: T) -> DAG[T]:
 48        """Create a new subdag given node(s).
 49
 50        Args:
 51            nodes: The nodes of the new subdag.
 52
 53        Returns:
 54            A new dag consisting of the specified nodes.
 55        """
 56        queue = set(nodes)
 57        graph = {}
 58
 59        while queue:
 60            node = queue.pop()
 61            deps = self._graph.get(node, set())
 62            graph[node] = deps
 63            queue.update(deps)
 64
 65        return DAG(graph)
 66
 67    def upstream(self, node: T) -> t.List[T]:
 68        """Returns all upstream dependencies in topologically sorted order."""
 69        return self.subdag(node).sorted()[:-1]
 70
 71    @property
 72    def leaves(self) -> t.Set[T]:
 73        """Returns all nodes in the graph without any upstream dependencies."""
 74        return {dep for deps in self._graph.values() for dep in deps if dep not in self._graph}
 75
 76    @property
 77    def graph(self) -> t.Dict[T, t.Set[T]]:
 78        graph = {}
 79        for node, deps in self._graph.items():
 80            graph[node] = deps.copy()
 81        return graph
 82
 83    def sorted(self) -> t.List[T]:
 84        """Returns a list of nodes sorted in topological order."""
 85        result: t.List[T] = []
 86
 87        unprocessed_nodes = self.graph
 88        while unprocessed_nodes:
 89            next_nodes = {node for node, deps in unprocessed_nodes.items() if not deps}
 90
 91            for node in next_nodes:
 92                unprocessed_nodes.pop(node)
 93
 94            for deps in unprocessed_nodes.values():
 95                deps -= next_nodes
 96
 97            result.extend(next_nodes)
 98
 99        return result
100
101    def downstream(self, node: T) -> t.List[T]:
102        """Get all nodes that have the input node as an upstream dependency.
103
104        Args:
105            node: The ancestor node.
106
107        Returns:
108            A list of descendant nodes sorted in topological order.
109        """
110        sorted_nodes = self.sorted()
111        try:
112            node_index = sorted_nodes.index(node)
113        except ValueError:
114            return []
115
116        def visit() -> t.Iterator[T]:
117            """Visit topologically sorted nodes after input node and yield downstream dependants."""
118            downstream = {node}
119            for current_node in sorted_nodes[node_index + 1 :]:
120                upstream = self._graph.get(current_node, set())
121                if not upstream.isdisjoint(downstream):
122                    downstream.add(current_node)
123                    yield current_node
124
125        return list(visit())
126
127    def lineage(self, node: T) -> DAG[T]:
128        """Get a dag of the node and its upstream dependencies and downstream dependents.
129
130        Args:
131            node: The node used to determine lineage.
132
133        Returns:
134            A new dag consisting of the dependent and descendant nodes.
135        """
136        return self.subdag(node, *self.downstream(node))
class DAG(typing.Generic[~T]):
 16class DAG(t.Generic[T]):
 17    def __init__(self, graph: t.Optional[t.Dict[T, t.Set[T]]] = None):
 18        self._graph: t.Dict[T, t.Set[T]] = {}
 19        for node, dependencies in (graph or {}).items():
 20            self.add(node, dependencies)
 21
 22    def add(self, node: T, dependencies: t.Optional[t.Iterable[T]] = None) -> None:
 23        """Add a node to the graph with an optional upstream dependency.
 24
 25        Args:
 26            node: The node to add.
 27            dependencies: Optional dependencies to add to the node.
 28        """
 29        if node not in self._graph:
 30            self._graph[node] = set()
 31        if dependencies:
 32            self._graph[node].update(dependencies)
 33            for d in dependencies:
 34                self.add(d)
 35
 36    @property
 37    def reversed(self) -> DAG[T]:
 38        """Returns a copy of this DAG with all its edges reversed."""
 39        result = DAG[T]()
 40
 41        for node, deps in self._graph.items():
 42            result.add(node)
 43            for dep in deps:
 44                result.add(dep, [node])
 45
 46        return result
 47
 48    def subdag(self, *nodes: T) -> DAG[T]:
 49        """Create a new subdag given node(s).
 50
 51        Args:
 52            nodes: The nodes of the new subdag.
 53
 54        Returns:
 55            A new dag consisting of the specified nodes.
 56        """
 57        queue = set(nodes)
 58        graph = {}
 59
 60        while queue:
 61            node = queue.pop()
 62            deps = self._graph.get(node, set())
 63            graph[node] = deps
 64            queue.update(deps)
 65
 66        return DAG(graph)
 67
 68    def upstream(self, node: T) -> t.List[T]:
 69        """Returns all upstream dependencies in topologically sorted order."""
 70        return self.subdag(node).sorted()[:-1]
 71
 72    @property
 73    def leaves(self) -> t.Set[T]:
 74        """Returns all nodes in the graph without any upstream dependencies."""
 75        return {dep for deps in self._graph.values() for dep in deps if dep not in self._graph}
 76
 77    @property
 78    def graph(self) -> t.Dict[T, t.Set[T]]:
 79        graph = {}
 80        for node, deps in self._graph.items():
 81            graph[node] = deps.copy()
 82        return graph
 83
 84    def sorted(self) -> t.List[T]:
 85        """Returns a list of nodes sorted in topological order."""
 86        result: t.List[T] = []
 87
 88        unprocessed_nodes = self.graph
 89        while unprocessed_nodes:
 90            next_nodes = {node for node, deps in unprocessed_nodes.items() if not deps}
 91
 92            for node in next_nodes:
 93                unprocessed_nodes.pop(node)
 94
 95            for deps in unprocessed_nodes.values():
 96                deps -= next_nodes
 97
 98            result.extend(next_nodes)
 99
100        return result
101
102    def downstream(self, node: T) -> t.List[T]:
103        """Get all nodes that have the input node as an upstream dependency.
104
105        Args:
106            node: The ancestor node.
107
108        Returns:
109            A list of descendant nodes sorted in topological order.
110        """
111        sorted_nodes = self.sorted()
112        try:
113            node_index = sorted_nodes.index(node)
114        except ValueError:
115            return []
116
117        def visit() -> t.Iterator[T]:
118            """Visit topologically sorted nodes after input node and yield downstream dependants."""
119            downstream = {node}
120            for current_node in sorted_nodes[node_index + 1 :]:
121                upstream = self._graph.get(current_node, set())
122                if not upstream.isdisjoint(downstream):
123                    downstream.add(current_node)
124                    yield current_node
125
126        return list(visit())
127
128    def lineage(self, node: T) -> DAG[T]:
129        """Get a dag of the node and its upstream dependencies and downstream dependents.
130
131        Args:
132            node: The node used to determine lineage.
133
134        Returns:
135            A new dag consisting of the dependent and descendant nodes.
136        """
137        return self.subdag(node, *self.downstream(node))

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

DAG(graph: Optional[Dict[~T, Set[~T]]] = None)
17    def __init__(self, graph: t.Optional[t.Dict[T, t.Set[T]]] = None):
18        self._graph: t.Dict[T, t.Set[T]] = {}
19        for node, dependencies in (graph or {}).items():
20            self.add(node, dependencies)
def add(self, node: ~T, dependencies: Optional[Iterable[~T]] = None) -> None:
22    def add(self, node: T, dependencies: t.Optional[t.Iterable[T]] = None) -> None:
23        """Add a node to the graph with an optional upstream dependency.
24
25        Args:
26            node: The node to add.
27            dependencies: Optional dependencies to add to the node.
28        """
29        if node not in self._graph:
30            self._graph[node] = set()
31        if dependencies:
32            self._graph[node].update(dependencies)
33            for d in dependencies:
34                self.add(d)

Add a node to the graph with an optional upstream dependency.

Arguments:
  • node: The node to add.
  • dependencies: Optional dependencies to add to the node.
reversed: sqlmesh.utils.dag.DAG[~T]

Returns a copy of this DAG with all its edges reversed.

def subdag(self, *nodes: ~T) -> sqlmesh.utils.dag.DAG[~T]:
48    def subdag(self, *nodes: T) -> DAG[T]:
49        """Create a new subdag given node(s).
50
51        Args:
52            nodes: The nodes of the new subdag.
53
54        Returns:
55            A new dag consisting of the specified nodes.
56        """
57        queue = set(nodes)
58        graph = {}
59
60        while queue:
61            node = queue.pop()
62            deps = self._graph.get(node, set())
63            graph[node] = deps
64            queue.update(deps)
65
66        return DAG(graph)

Create a new subdag given node(s).

Arguments:
  • nodes: The nodes of the new subdag.
Returns:

A new dag consisting of the specified nodes.

def upstream(self, node: ~T) -> List[~T]:
68    def upstream(self, node: T) -> t.List[T]:
69        """Returns all upstream dependencies in topologically sorted order."""
70        return self.subdag(node).sorted()[:-1]

Returns all upstream dependencies in topologically sorted order.

leaves: Set[~T]

Returns all nodes in the graph without any upstream dependencies.

def sorted(self) -> List[~T]:
 84    def sorted(self) -> t.List[T]:
 85        """Returns a list of nodes sorted in topological order."""
 86        result: t.List[T] = []
 87
 88        unprocessed_nodes = self.graph
 89        while unprocessed_nodes:
 90            next_nodes = {node for node, deps in unprocessed_nodes.items() if not deps}
 91
 92            for node in next_nodes:
 93                unprocessed_nodes.pop(node)
 94
 95            for deps in unprocessed_nodes.values():
 96                deps -= next_nodes
 97
 98            result.extend(next_nodes)
 99
100        return result

Returns a list of nodes sorted in topological order.

def downstream(self, node: ~T) -> List[~T]:
102    def downstream(self, node: T) -> t.List[T]:
103        """Get all nodes that have the input node as an upstream dependency.
104
105        Args:
106            node: The ancestor node.
107
108        Returns:
109            A list of descendant nodes sorted in topological order.
110        """
111        sorted_nodes = self.sorted()
112        try:
113            node_index = sorted_nodes.index(node)
114        except ValueError:
115            return []
116
117        def visit() -> t.Iterator[T]:
118            """Visit topologically sorted nodes after input node and yield downstream dependants."""
119            downstream = {node}
120            for current_node in sorted_nodes[node_index + 1 :]:
121                upstream = self._graph.get(current_node, set())
122                if not upstream.isdisjoint(downstream):
123                    downstream.add(current_node)
124                    yield current_node
125
126        return list(visit())

Get all nodes that have the input node as an upstream dependency.

Arguments:
  • node: The ancestor node.
Returns:

A list of descendant nodes sorted in topological order.

def lineage(self, node: ~T) -> sqlmesh.utils.dag.DAG[~T]:
128    def lineage(self, node: T) -> DAG[T]:
129        """Get a dag of the node and its upstream dependencies and downstream dependents.
130
131        Args:
132            node: The node used to determine lineage.
133
134        Returns:
135            A new dag consisting of the dependent and descendant nodes.
136        """
137        return self.subdag(node, *self.downstream(node))

Get a dag of the node and its upstream dependencies and downstream dependents.

Arguments:
  • node: The node used to determine lineage.
Returns:

A new dag consisting of the dependent and descendant nodes.