Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 81 additions & 5 deletions linkml_runtime/utils/schemaview.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import uuid
import warnings
from collections import defaultdict, deque
from collections.abc import Callable, Mapping
from copy import copy, deepcopy
from dataclasses import dataclass
from enum import Enum
Expand Down Expand Up @@ -45,7 +44,7 @@
from linkml_runtime.utils.pattern import PatternResolver

if TYPE_CHECKING:
from collections.abc import Mapping
from collections.abc import Callable, Iterable, Mapping
from types import NotImplementedType

from linkml_runtime.utils.metamodelcore import URI, URIorCURIE
Expand Down Expand Up @@ -92,13 +91,90 @@ class OrderedBy(Enum):
"""


WHITE = 0
GREY = 1
BLACK = 2


def detect_cycles(f: Callable[[Any], Iterable[Any] | None], x: Any) -> None:
"""Detect cycles in a graph, using function `f` to walk the graph, starting at node `x`.

Uses the classic white/grey/black colour coding algorithm to track which nodes have been explored. In this
case, "node" refers to any element in a schema and "neighbours" are elements that can be reached from that
node by executing function `f`.

WHITE: unexplored
GREY: node is being processed; processing includes exploring all neighbours reachable via f(node)
BLACK: node and all of its neighbours (and their neighbours, etc.) have been processed
Comment on lines +102 to +108
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This naming system could be changed (e.g. to TODO / PROCESSING / DONE or something similar) but the white/grey/black nomenclature is used in numerous places where this algorithm is described, so may as well stick with it.


A directed cycle reachable from node `x` raises a ValueError.

:param f: function that returns an iterable of neighbouring nodes (parents or children)
:type f: Callable[[Any], Iterable[Any] | None]
:param x: graph node
:type x: Any
:raises ValueError: if a cycle is discovered through repeated calls to f(x)
"""
# keep track of the processing state of nodes in the graph
processing_state: dict[Any, int] = {}

# Stack entries are (node, processed_flag).
# processed_flag == True means all neighbours (nodes generated by running `f(node)`)
# have been added to the todo stack and the node can be marked BLACK.
todo: list[tuple[Any, bool]] = [(x, False)]

while todo:
node, processed_flag = todo.pop()

if processed_flag:
# all neighbours have been processed
processing_state[node] = BLACK
continue

# check the state of this node
node_state = processing_state.get(node, WHITE)

if node_state == GREY:
# this node was already being processed
# we have discovered an edge back to that node - i.e. a cycle
err_msg = f"Cycle detected at node {node!r}"
raise ValueError(err_msg)

if node_state == BLACK:
# already fully explored - nothing to do
continue

# mark the node as being processed (GREY) and set the processed_flag to True
processing_state[node] = GREY
todo.append((node, True))

# push the neighbours on to the processing stack
todo.extend((child, False) for child in f(node) or [])


def _closure(
f: Callable,
x,
f: Callable[[Any], Iterable[Any] | None],
x: Any,
reflexive: bool = True,
depth_first: bool = True,
**kwargs: dict[str, Any] | None, # noqa: ARG001
**kwargs: dict[str, Any] | None,
) -> list[str | ElementName | ClassDefinitionName | EnumDefinitionName | SlotDefinitionName | TypeDefinitionName]:
"""Walk the graph using function `f` and generate the closure.

:param f: function that returns an iterable of neighbouring nodes (parents or children)
:type f: Callable[[Any], Iterable[Any] | None]
:param x: start node
:type x: Any
:param reflexive: assume the graph is reflexive, defaults to True
:type reflexive: bool, optional
:param depth_first: depth first traversal, defaults to True
:type depth_first: bool, optional
:return: list of nodes
:rtype: list[str | ElementName | ClassDefinitionName | EnumDefinitionName | SlotDefinitionName | TypeDefinitionName]
"""
if kwargs and kwargs.get("detect_cycles"):
detect_cycles(f, x)
Comment on lines +175 to +176
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

allows users run schemaview.type_descendants("my_type", detect_cycles=True) or schemaview.class_ancestors("CoolClass", detect_cycles=True)


rv = [x] if reflexive else []
visited = []
todo = [x]
Expand Down
189 changes: 189 additions & 0 deletions tests/test_utils/input/cycles.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
# yaml-language-server: $schema=https://linkml.io/linkml-model/linkml_model/jsonschema/meta.schema.json
id: https://example.org/test-cycle-schema
name: TestCycleSchema
description: |
Schema with intentional cycles in classes, mixins, slot ranges, and types.
prefixes:
ex: https://example.org/schema/

default_prefix: ex
default_range: string

slots:
identifier_slot:
range: string
identifier: true
noncycle_slot:
slot_a:
slot_b:
slot_c:
mixed_in_slot_a:
mixed_in_slot_b:

classes:
# Non‑cyclic base class with some children
BaseClass:
description: Simple base class with no cycles.
slots:
- noncycle_slot
- identifier_slot

MixinA:
mixin: true
slots:
- mixed_in_slot_a

MixinB:
mixin: true
slots:
- mixed_in_slot_b

NonCycleClassA:
is_a: BaseClass

NonCycleClassB:
mixins:
- MixinA
is_a: NonCycleClassA

NonCycleClassC:
mixins:
- MixinB
is_a: NonCycleClassA

# cycle in the range of a slot
IdentifierCycleClassA:
description: Class with a cycle in the range for the identifier slot.
attributes:
id_slot:
range: IdentifierCycleClassB
identifier: true

IdentifierCycleClassB:
description: Class with a cycle in the range for the identifier slot.
attributes:
id_slot:
range: IdentifierCycleClassC
identifier: true

IdentifierCycleClassC:
description: Class with a cycle in the range for the identifier slot.
attributes:
id_slot:
range: IdentifierCycleClassA
identifier: true

IdentifierCycleClassD:
description: Class with itself as the range for the identifier slot.
attributes:
id_slot:
range: IdentifierCycleClassD
identifier: true

# Cycle: ClassA -> ClassB -> ClassC -> ClassA
# ClassD and ClassE have the misfortune of inheriting all this nonsense.
ClassA:
is_a: ClassB
description: Part of a subclass inheritance cycle (A -> B).
slots:
- slot_a

ClassB:
is_a: ClassC
description: Part of a subclass inheritance cycle (B -> C).
slots:
- slot_b

ClassC:
is_a: ClassA
description: Part of a subclass inheritance cycle (C -> A).
slots:
- slot_c

ClassD:
is_a: ClassA

ClassE:
is_a: ClassD

# Cycle: ClassF -> ClassF
# ClassG is_a ClassF so inherits the cycle
ClassF:
is_a: ClassF

ClassG:
is_a: ClassF

# Mixin cycle (mixins reference each other)
Mixin1:
description: Mixin that pulls in Mixin2, forming a mixin cycle.
mixin: true
mixins:
- Mixin2

Mixin2:
description: Mixin that pulls in Mixin1, forming a mixin cycle.
mixin: true
mixins:
- Mixin1

MixedClass:
description: Class that applies both Mixin1 and Mixin2, thus inheriting the mixin cycle.
mixins:
- Mixin1
- Mixin2


types:
# string and subtypes
string:
uri: ex:string
base: str
description: A character string

super_string:
typeof: string
description: Type with ancestors, no cycles

supreme_string:
typeof: super_string
description: Type with ancestors, no cycles

integer:
uri: ex:integer
base: int
description: An integer

boolean:
uri: ex:boolean
base: Bool
repr: bool
description: Your basic bool.

# in a cycle with itself!
circle:
typeof: circle
uri: ex:circle

circle_of_life:
typeof: circle

# cycle between type_circular and circular_type
circular_type:
typeof: type_circular
uri: ex:circ
description: Type in a cycle

type_circular:
typeof: circular_type
uri: ex:circ
description: Type in a cycle

# inherit the type_circular/circular_type confusion
semi_circular_type:
typeof: circular_type
description: Type with cyclic ancestors

curve_type:
typeof: semi_circular_type
description: Type with cyclic ancestors
Loading
Loading