Skip to content

Graph

NodeType

Bases: str, Enum

Enumeration of node types in the knowledge graph.

Currently supported node types are: UNKNOWN, DOCUMENT, CHUNK

Node

Bases: BaseModel

Represents a node in the knowledge graph.

Attributes:

Name Type Description
id UUID

Unique identifier for the node.

properties dict

Dictionary of properties associated with the node.

type NodeType

Type of the node.

add_property

add_property(key: str, value: Any)

Adds a property to the node.

Raises:

Type Description
ValueError

If the property already exists.

Source code in src/ragas/testset/graph.py
def add_property(self, key: str, value: t.Any):
    """
    Adds a property to the node.

    Raises
    ------
    ValueError
        If the property already exists.
    """
    if key.lower() in self.properties:
        raise ValueError(f"Property {key} already exists")
    self.properties[key.lower()] = value

get_property

get_property(key: str) -> Optional[Any]

Retrieves a property value by key.

Notes

The key is case-insensitive.

Source code in src/ragas/testset/graph.py
def get_property(self, key: str) -> t.Optional[t.Any]:
    """
    Retrieves a property value by key.

    Notes
    -----
    The key is case-insensitive.
    """
    return self.properties.get(key.lower(), None)

Relationship

Bases: BaseModel

Represents a relationship between two nodes in a knowledge graph.

Attributes:

Name Type Description
id (UUID, optional)

Unique identifier for the relationship. Defaults to a new UUID.

type str

The type of the relationship.

source Node

The source node of the relationship.

target Node

The target node of the relationship.

bidirectional (bool, optional)

Whether the relationship is bidirectional. Defaults to False.

properties (dict, optional)

Dictionary of properties associated with the relationship. Defaults to an empty dict.

get_property

get_property(key: str) -> Optional[Any]

Retrieves a property value by key. The key is case-insensitive.

Source code in src/ragas/testset/graph.py
def get_property(self, key: str) -> t.Optional[t.Any]:
    """
    Retrieves a property value by key. The key is case-insensitive.
    """
    return self.properties.get(key.lower(), None)

KnowledgeGraph dataclass

KnowledgeGraph(nodes: List[Node] = list(), relationships: List[Relationship] = list())

Represents a knowledge graph containing nodes and relationships.

Attributes:

Name Type Description
nodes List[Node]

List of nodes in the knowledge graph.

relationships List[Relationship]

List of relationships in the knowledge graph.

add

add(item: Union[Node, Relationship])

Adds a node or relationship to the knowledge graph.

Raises:

Type Description
ValueError

If the item type is not Node or Relationship.

Source code in src/ragas/testset/graph.py
def add(self, item: t.Union[Node, Relationship]):
    """
    Adds a node or relationship to the knowledge graph.

    Raises
    ------
    ValueError
        If the item type is not Node or Relationship.
    """
    if isinstance(item, Node):
        self._add_node(item)
    elif isinstance(item, Relationship):
        self._add_relationship(item)
    else:
        raise ValueError(f"Invalid item type: {type(item)}")

save

save(path: Union[str, Path])

Saves the knowledge graph to a JSON file.

Source code in src/ragas/testset/graph.py
def save(self, path: t.Union[str, Path]):
    """Saves the knowledge graph to a JSON file."""
    if isinstance(path, str):
        path = Path(path)

    data = {
        "nodes": [node.model_dump() for node in self.nodes],
        "relationships": [rel.model_dump() for rel in self.relationships],
    }
    with open(path, "w") as f:
        json.dump(data, f, cls=UUIDEncoder, indent=2, ensure_ascii=False)

load classmethod

load(path: Union[str, Path]) -> KnowledgeGraph

Loads a knowledge graph from a path.

Source code in src/ragas/testset/graph.py
@classmethod
def load(cls, path: t.Union[str, Path]) -> "KnowledgeGraph":
    """Loads a knowledge graph from a path."""
    if isinstance(path, str):
        path = Path(path)

    with open(path, "r") as f:
        data = json.load(f)

    nodes = [Node(**node_data) for node_data in data["nodes"]]
    relationships = [Relationship(**rel_data) for rel_data in data["relationships"]]

    kg = cls()
    kg.nodes.extend(nodes)
    kg.relationships.extend(relationships)
    return kg

find_indirect_clusters

find_indirect_clusters(relationship_condition: Callable[[Relationship], bool] = lambda _: True, depth_limit: int = 3) -> List[Set[Node]]

Finds indirect clusters of nodes in the knowledge graph based on a relationship condition. Here if A -> B -> C -> D, then A, B, C, and D form a cluster. If there's also a path A -> B -> C -> E, it will form a separate cluster.

Parameters:

Name Type Description Default
relationship_condition Callable[[Relationship], bool]

A function that takes a Relationship and returns a boolean, by default lambda _: True

lambda _: True

Returns:

Type Description
List[Set[Node]]

A list of sets, where each set contains nodes that form a cluster.

Source code in src/ragas/testset/graph.py
def find_indirect_clusters(
    self,
    relationship_condition: t.Callable[[Relationship], bool] = lambda _: True,
    depth_limit: int = 3,
) -> t.List[t.Set[Node]]:
    """
    Finds indirect clusters of nodes in the knowledge graph based on a relationship condition.
    Here if A -> B -> C -> D, then A, B, C, and D form a cluster. If there's also a path A -> B -> C -> E,
    it will form a separate cluster.

    Parameters
    ----------
    relationship_condition : Callable[[Relationship], bool], optional
        A function that takes a Relationship and returns a boolean, by default lambda _: True

    Returns
    -------
    List[Set[Node]]
        A list of sets, where each set contains nodes that form a cluster.
    """
    clusters = []
    visited_paths = set()

    relationships = [
        rel for rel in self.relationships if relationship_condition(rel)
    ]

    def dfs(node: Node, cluster: t.Set[Node], depth: int, path: t.Tuple[Node, ...]):
        if depth >= depth_limit or path in visited_paths:
            return
        visited_paths.add(path)
        cluster.add(node)

        for rel in relationships:
            neighbor = None
            if rel.source == node and rel.target not in cluster:
                neighbor = rel.target
            elif (
                rel.bidirectional
                and rel.target == node
                and rel.source not in cluster
            ):
                neighbor = rel.source

            if neighbor is not None:
                dfs(neighbor, cluster.copy(), depth + 1, path + (neighbor,))

        # Add completed path-based cluster
        if len(cluster) > 1:
            clusters.append(cluster)

    for node in self.nodes:
        initial_cluster = set()
        dfs(node, initial_cluster, 0, (node,))

    # Remove duplicates by converting clusters to frozensets
    unique_clusters = [
        set(cluster) for cluster in set(frozenset(c) for c in clusters)
    ]

    return unique_clusters

remove_node

remove_node(node: Node, inplace: bool = True) -> Optional[KnowledgeGraph]

Removes a node and its associated relationships from the knowledge graph.

Parameters:

Name Type Description Default
node Node

The node to be removed from the knowledge graph.

required
inplace bool

If True, modifies the knowledge graph in place. If False, returns a modified copy with the node removed.

True

Returns:

Type Description
KnowledgeGraph or None

Returns a modified copy of the knowledge graph if inplace is False. Returns None if inplace is True.

Raises:

Type Description
ValueError

If the node is not present in the knowledge graph.

Source code in src/ragas/testset/graph.py
def remove_node(
    self, node: Node, inplace: bool = True
) -> t.Optional["KnowledgeGraph"]:
    """
    Removes a node and its associated relationships from the knowledge graph.

    Parameters
    ----------
    node : Node
        The node to be removed from the knowledge graph.
    inplace : bool, optional
        If True, modifies the knowledge graph in place.
        If False, returns a modified copy with the node removed.

    Returns
    -------
    KnowledgeGraph or None
        Returns a modified copy of the knowledge graph if `inplace` is False.
        Returns None if `inplace` is True.

    Raises
    ------
    ValueError
        If the node is not present in the knowledge graph.
    """
    if node not in self.nodes:
        raise ValueError("Node is not present in the knowledge graph.")

    if inplace:
        # Modify the current instance
        self.nodes.remove(node)
        self.relationships = [
            rel
            for rel in self.relationships
            if rel.source != node and rel.target != node
        ]
    else:
        # Create a deep copy and modify it
        new_graph = deepcopy(self)
        new_graph.nodes.remove(node)
        new_graph.relationships = [
            rel
            for rel in new_graph.relationships
            if rel.source != node and rel.target != node
        ]
        return new_graph

find_direct_clusters

find_direct_clusters(relationship_condition: Callable[[Relationship], bool] = lambda _: True) -> Dict[Node, List[Set[Node]]]

Finds direct clusters of nodes in the knowledge graph based on a relationship condition. Here if A->B, and A->C, then A, B, and C form a cluster.

Parameters:

Name Type Description Default
relationship_condition Callable[[Relationship], bool]

A function that takes a Relationship and returns a boolean, by default lambda _: True

lambda _: True

Returns:

Type Description
List[Set[Node]]

A list of sets, where each set contains nodes that form a cluster.

Source code in src/ragas/testset/graph.py
def find_direct_clusters(
    self, relationship_condition: t.Callable[[Relationship], bool] = lambda _: True
) -> t.Dict[Node, t.List[t.Set[Node]]]:
    """
    Finds direct clusters of nodes in the knowledge graph based on a relationship condition.
    Here if A->B, and A->C, then A, B, and C form a cluster.

    Parameters
    ----------
    relationship_condition : Callable[[Relationship], bool], optional
        A function that takes a Relationship and returns a boolean, by default lambda _: True

    Returns
    -------
    List[Set[Node]]
        A list of sets, where each set contains nodes that form a cluster.
    """

    clusters = []
    relationships = [
        rel for rel in self.relationships if relationship_condition(rel)
    ]
    for node in self.nodes:
        cluster = set()
        cluster.add(node)
        for rel in relationships:
            if rel.bidirectional:
                if rel.source == node:
                    cluster.add(rel.target)
                elif rel.target == node:
                    cluster.add(rel.source)
            else:
                if rel.source == node:
                    cluster.add(rel.target)

        if len(cluster) > 1:
            if cluster not in clusters:
                clusters.append(cluster)

    # Remove subsets from clusters
    unique_clusters = []
    for cluster in clusters:
        if not any(cluster < other for other in clusters):
            unique_clusters.append(cluster)
    clusters = unique_clusters

    cluster_dict = {}
    for cluster in clusters:
        cluster_dict.update({cluster.pop(): cluster})

    return cluster_dict