DAG
A DAG, or directed acyclic graph, is a graph where the edges are directional and there are no cycles with all the edges pointing in the same direction. SQLMesh uses a DAG to keep track of a project's models. This allows SQLMesh to easily determine a model's lineage and to identify upstream and downstream dependencies.
1""" 2# DAG 3 4A DAG, or directed acyclic graph, is a graph where the edges are directional and there are no cycles with 5all the edges pointing in the same direction. SQLMesh uses a DAG to keep track of a project's models. This 6allows SQLMesh to easily determine a model's lineage and to identify upstream and downstream dependencies. 7""" 8from __future__ import annotations 9 10import typing as t 11 12T = t.TypeVar("T", bound=t.Hashable) 13 14 15class DAG(t.Generic[T]): 16 def __init__(self, graph: t.Optional[t.Dict[T, t.Set[T]]] = None): 17 self._graph: t.Dict[T, t.Set[T]] = {} 18 for node, dependencies in (graph or {}).items(): 19 self.add(node, dependencies) 20 21 def add(self, node: T, dependencies: t.Optional[t.Iterable[T]] = None) -> None: 22 """Add a node to the graph with an optional upstream dependency. 23 24 Args: 25 node: The node to add. 26 dependencies: Optional dependencies to add to the node. 27 """ 28 if node not in self._graph: 29 self._graph[node] = set() 30 if dependencies: 31 self._graph[node].update(dependencies) 32 for d in dependencies: 33 self.add(d) 34 35 @property 36 def reversed(self) -> DAG[T]: 37 """Returns a copy of this DAG with all its edges reversed.""" 38 result = DAG[T]() 39 40 for node, deps in self._graph.items(): 41 result.add(node) 42 for dep in deps: 43 result.add(dep, [node]) 44 45 return result 46 47 def subdag(self, *nodes: T) -> DAG[T]: 48 """Create a new subdag given node(s). 49 50 Args: 51 nodes: The nodes of the new subdag. 52 53 Returns: 54 A new dag consisting of the specified nodes. 55 """ 56 queue = set(nodes) 57 graph = {} 58 59 while queue: 60 node = queue.pop() 61 deps = self._graph.get(node, set()) 62 graph[node] = deps 63 queue.update(deps) 64 65 return DAG(graph) 66 67 def upstream(self, node: T) -> t.List[T]: 68 """Returns all upstream dependencies in topologically sorted order.""" 69 return self.subdag(node).sorted()[:-1] 70 71 @property 72 def leaves(self) -> t.Set[T]: 73 """Returns all nodes in the graph without any upstream dependencies.""" 74 return {dep for deps in self._graph.values() for dep in deps if dep not in self._graph} 75 76 @property 77 def graph(self) -> t.Dict[T, t.Set[T]]: 78 graph = {} 79 for node, deps in self._graph.items(): 80 graph[node] = deps.copy() 81 return graph 82 83 def sorted(self) -> t.List[T]: 84 """Returns a list of nodes sorted in topological order.""" 85 result: t.List[T] = [] 86 87 unprocessed_nodes = self.graph 88 while unprocessed_nodes: 89 next_nodes = {node for node, deps in unprocessed_nodes.items() if not deps} 90 91 for node in next_nodes: 92 unprocessed_nodes.pop(node) 93 94 for deps in unprocessed_nodes.values(): 95 deps -= next_nodes 96 97 result.extend(next_nodes) 98 99 return result 100 101 def downstream(self, node: T) -> t.List[T]: 102 """Get all nodes that have the input node as an upstream dependency. 103 104 Args: 105 node: The ancestor node. 106 107 Returns: 108 A list of descendant nodes sorted in topological order. 109 """ 110 sorted_nodes = self.sorted() 111 try: 112 node_index = sorted_nodes.index(node) 113 except ValueError: 114 return [] 115 116 def visit() -> t.Iterator[T]: 117 """Visit topologically sorted nodes after input node and yield downstream dependants.""" 118 downstream = {node} 119 for current_node in sorted_nodes[node_index + 1 :]: 120 upstream = self._graph.get(current_node, set()) 121 if not upstream.isdisjoint(downstream): 122 downstream.add(current_node) 123 yield current_node 124 125 return list(visit()) 126 127 def lineage(self, node: T) -> DAG[T]: 128 """Get a dag of the node and its upstream dependencies and downstream dependents. 129 130 Args: 131 node: The node used to determine lineage. 132 133 Returns: 134 A new dag consisting of the dependent and descendant nodes. 135 """ 136 return self.subdag(node, *self.downstream(node))
16class DAG(t.Generic[T]): 17 def __init__(self, graph: t.Optional[t.Dict[T, t.Set[T]]] = None): 18 self._graph: t.Dict[T, t.Set[T]] = {} 19 for node, dependencies in (graph or {}).items(): 20 self.add(node, dependencies) 21 22 def add(self, node: T, dependencies: t.Optional[t.Iterable[T]] = None) -> None: 23 """Add a node to the graph with an optional upstream dependency. 24 25 Args: 26 node: The node to add. 27 dependencies: Optional dependencies to add to the node. 28 """ 29 if node not in self._graph: 30 self._graph[node] = set() 31 if dependencies: 32 self._graph[node].update(dependencies) 33 for d in dependencies: 34 self.add(d) 35 36 @property 37 def reversed(self) -> DAG[T]: 38 """Returns a copy of this DAG with all its edges reversed.""" 39 result = DAG[T]() 40 41 for node, deps in self._graph.items(): 42 result.add(node) 43 for dep in deps: 44 result.add(dep, [node]) 45 46 return result 47 48 def subdag(self, *nodes: T) -> DAG[T]: 49 """Create a new subdag given node(s). 50 51 Args: 52 nodes: The nodes of the new subdag. 53 54 Returns: 55 A new dag consisting of the specified nodes. 56 """ 57 queue = set(nodes) 58 graph = {} 59 60 while queue: 61 node = queue.pop() 62 deps = self._graph.get(node, set()) 63 graph[node] = deps 64 queue.update(deps) 65 66 return DAG(graph) 67 68 def upstream(self, node: T) -> t.List[T]: 69 """Returns all upstream dependencies in topologically sorted order.""" 70 return self.subdag(node).sorted()[:-1] 71 72 @property 73 def leaves(self) -> t.Set[T]: 74 """Returns all nodes in the graph without any upstream dependencies.""" 75 return {dep for deps in self._graph.values() for dep in deps if dep not in self._graph} 76 77 @property 78 def graph(self) -> t.Dict[T, t.Set[T]]: 79 graph = {} 80 for node, deps in self._graph.items(): 81 graph[node] = deps.copy() 82 return graph 83 84 def sorted(self) -> t.List[T]: 85 """Returns a list of nodes sorted in topological order.""" 86 result: t.List[T] = [] 87 88 unprocessed_nodes = self.graph 89 while unprocessed_nodes: 90 next_nodes = {node for node, deps in unprocessed_nodes.items() if not deps} 91 92 for node in next_nodes: 93 unprocessed_nodes.pop(node) 94 95 for deps in unprocessed_nodes.values(): 96 deps -= next_nodes 97 98 result.extend(next_nodes) 99 100 return result 101 102 def downstream(self, node: T) -> t.List[T]: 103 """Get all nodes that have the input node as an upstream dependency. 104 105 Args: 106 node: The ancestor node. 107 108 Returns: 109 A list of descendant nodes sorted in topological order. 110 """ 111 sorted_nodes = self.sorted() 112 try: 113 node_index = sorted_nodes.index(node) 114 except ValueError: 115 return [] 116 117 def visit() -> t.Iterator[T]: 118 """Visit topologically sorted nodes after input node and yield downstream dependants.""" 119 downstream = {node} 120 for current_node in sorted_nodes[node_index + 1 :]: 121 upstream = self._graph.get(current_node, set()) 122 if not upstream.isdisjoint(downstream): 123 downstream.add(current_node) 124 yield current_node 125 126 return list(visit()) 127 128 def lineage(self, node: T) -> DAG[T]: 129 """Get a dag of the node and its upstream dependencies and downstream dependents. 130 131 Args: 132 node: The node used to determine lineage. 133 134 Returns: 135 A new dag consisting of the dependent and descendant nodes. 136 """ 137 return self.subdag(node, *self.downstream(node))
Abstract base class for generic types.
A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::
class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.
This class can then be used as follows::
def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default
22 def add(self, node: T, dependencies: t.Optional[t.Iterable[T]] = None) -> None: 23 """Add a node to the graph with an optional upstream dependency. 24 25 Args: 26 node: The node to add. 27 dependencies: Optional dependencies to add to the node. 28 """ 29 if node not in self._graph: 30 self._graph[node] = set() 31 if dependencies: 32 self._graph[node].update(dependencies) 33 for d in dependencies: 34 self.add(d)
Add a node to the graph with an optional upstream dependency.
Arguments:
- node: The node to add.
- dependencies: Optional dependencies to add to the node.
48 def subdag(self, *nodes: T) -> DAG[T]: 49 """Create a new subdag given node(s). 50 51 Args: 52 nodes: The nodes of the new subdag. 53 54 Returns: 55 A new dag consisting of the specified nodes. 56 """ 57 queue = set(nodes) 58 graph = {} 59 60 while queue: 61 node = queue.pop() 62 deps = self._graph.get(node, set()) 63 graph[node] = deps 64 queue.update(deps) 65 66 return DAG(graph)
Create a new subdag given node(s).
Arguments:
- nodes: The nodes of the new subdag.
Returns:
A new dag consisting of the specified nodes.
68 def upstream(self, node: T) -> t.List[T]: 69 """Returns all upstream dependencies in topologically sorted order.""" 70 return self.subdag(node).sorted()[:-1]
Returns all upstream dependencies in topologically sorted order.
84 def sorted(self) -> t.List[T]: 85 """Returns a list of nodes sorted in topological order.""" 86 result: t.List[T] = [] 87 88 unprocessed_nodes = self.graph 89 while unprocessed_nodes: 90 next_nodes = {node for node, deps in unprocessed_nodes.items() if not deps} 91 92 for node in next_nodes: 93 unprocessed_nodes.pop(node) 94 95 for deps in unprocessed_nodes.values(): 96 deps -= next_nodes 97 98 result.extend(next_nodes) 99 100 return result
Returns a list of nodes sorted in topological order.
102 def downstream(self, node: T) -> t.List[T]: 103 """Get all nodes that have the input node as an upstream dependency. 104 105 Args: 106 node: The ancestor node. 107 108 Returns: 109 A list of descendant nodes sorted in topological order. 110 """ 111 sorted_nodes = self.sorted() 112 try: 113 node_index = sorted_nodes.index(node) 114 except ValueError: 115 return [] 116 117 def visit() -> t.Iterator[T]: 118 """Visit topologically sorted nodes after input node and yield downstream dependants.""" 119 downstream = {node} 120 for current_node in sorted_nodes[node_index + 1 :]: 121 upstream = self._graph.get(current_node, set()) 122 if not upstream.isdisjoint(downstream): 123 downstream.add(current_node) 124 yield current_node 125 126 return list(visit())
Get all nodes that have the input node as an upstream dependency.
Arguments:
- node: The ancestor node.
Returns:
A list of descendant nodes sorted in topological order.
128 def lineage(self, node: T) -> DAG[T]: 129 """Get a dag of the node and its upstream dependencies and downstream dependents. 130 131 Args: 132 node: The node used to determine lineage. 133 134 Returns: 135 A new dag consisting of the dependent and descendant nodes. 136 """ 137 return self.subdag(node, *self.downstream(node))
Get a dag of the node and its upstream dependencies and downstream dependents.
Arguments:
- node: The node used to determine lineage.
Returns:
A new dag consisting of the dependent and descendant nodes.