Skip to content

Generation

TestsetGenerator dataclass

TestsetGenerator(llm: BaseRagasLLM, knowledge_graph: KnowledgeGraph = KnowledgeGraph(), persona_list: Optional[List[Persona]] = None)

Generates an evaluation dataset based on given scenarios and parameters.

Attributes:

Name Type Description
llm BaseRagasLLM

The language model to use for the generation process.

knowledge_graph KnowledgeGraph, default empty

The knowledge graph to use for the generation process.

from_langchain classmethod

from_langchain(llm: BaseLanguageModel, knowledge_graph: Optional[KnowledgeGraph] = None) -> TestsetGenerator

Creates a TestsetGenerator from a Langchain LLMs.

Source code in src/ragas/testset/synthesizers/generate.py
@classmethod
def from_langchain(
    cls,
    llm: LangchainLLM,
    knowledge_graph: t.Optional[KnowledgeGraph] = None,
) -> TestsetGenerator:
    """
    Creates a `TestsetGenerator` from a Langchain LLMs.
    """
    knowledge_graph = knowledge_graph or KnowledgeGraph()
    return cls(
        LangchainLLMWrapper(llm),
        knowledge_graph,
    )

from_llama_index classmethod

from_llama_index(llm: BaseLLM, knowledge_graph: Optional[KnowledgeGraph] = None) -> TestsetGenerator

Creates a TestsetGenerator from a LlamaIndex LLM and embedding model.

Source code in src/ragas/testset/synthesizers/generate.py
@classmethod
def from_llama_index(
    cls,
    llm: LlamaIndexLLM,
    knowledge_graph: t.Optional[KnowledgeGraph] = None,
) -> TestsetGenerator:
    """
    Creates a `TestsetGenerator` from a LlamaIndex LLM and embedding model.
    """
    knowledge_graph = knowledge_graph or KnowledgeGraph()
    return cls(
        LlamaIndexLLMWrapper(llm),
        knowledge_graph,
    )

generate_with_langchain_docs

generate_with_langchain_docs(documents: Sequence[Document], testset_size: int, transforms: Optional[Transforms] = None, transforms_llm: Optional[BaseRagasLLM] = None, transforms_embedding_model: Optional[BaseRagasEmbeddings] = None, query_distribution: Optional[QueryDistribution] = None, run_config: Optional[RunConfig] = None, callbacks: Optional[Callbacks] = None, with_debugging_logs=False, raise_exceptions: bool = True) -> Testset

Generates an evaluation dataset based on given Langchain documents and parameters.

Parameters:

Name Type Description Default
documents Sequence[Document]

A sequence of Langchain documents to use as source material

required
testset_size int

The number of test samples to generate

required
transforms Optional[Transforms]

Custom transforms to apply to the documents, by default None

None
transforms_llm Optional[BaseRagasLLM]

LLM to use for transforms if different from instance LLM, by default None

None
transforms_embedding_model Optional[BaseRagasEmbeddings]

Embedding model to use for transforms if different from instance model, by default None

None
query_distribution Optional[QueryDistribution]

Distribution of query types to generate, by default None

None
run_config Optional[RunConfig]

Configuration for the generation run, by default None

None
callbacks Optional[Callbacks]

Callbacks to use during generation, by default None

None
with_debugging_logs bool

Whether to include debug logs, by default False

False
raise_exceptions bool

Whether to raise exceptions during generation, by default True

True

Returns:

Type Description
Testset

The generated evaluation dataset

Raises:

Type Description
ValueError

If no LLM or embedding model is provided either during initialization or as arguments

Source code in src/ragas/testset/synthesizers/generate.py
def generate_with_langchain_docs(
    self,
    documents: t.Sequence[LCDocument],
    testset_size: int,
    transforms: t.Optional[Transforms] = None,
    transforms_llm: t.Optional[BaseRagasLLM] = None,
    transforms_embedding_model: t.Optional[BaseRagasEmbeddings] = None,
    query_distribution: t.Optional[QueryDistribution] = None,
    run_config: t.Optional[RunConfig] = None,
    callbacks: t.Optional[Callbacks] = None,
    with_debugging_logs=False,
    raise_exceptions: bool = True,
) -> Testset:
    """
    Generates an evaluation dataset based on given Langchain documents and parameters.

    Parameters
    ----------
    documents : Sequence[LCDocument]
        A sequence of Langchain documents to use as source material
    testset_size : int
        The number of test samples to generate
    transforms : Optional[Transforms], optional
        Custom transforms to apply to the documents, by default None
    transforms_llm : Optional[BaseRagasLLM], optional
        LLM to use for transforms if different from instance LLM, by default None
    transforms_embedding_model : Optional[BaseRagasEmbeddings], optional
        Embedding model to use for transforms if different from instance model, by default None
    query_distribution : Optional[QueryDistribution], optional
        Distribution of query types to generate, by default None
    run_config : Optional[RunConfig], optional
        Configuration for the generation run, by default None
    callbacks : Optional[Callbacks], optional
        Callbacks to use during generation, by default None
    with_debugging_logs : bool, optional
        Whether to include debug logs, by default False
    raise_exceptions : bool, optional
        Whether to raise exceptions during generation, by default True

    Returns
    -------
    Testset
        The generated evaluation dataset

    Raises
    ------
    ValueError
        If no LLM or embedding model is provided either during initialization or as arguments
    """

    # force the user to provide an llm and embedding client to prevent use of default LLMs
    if not self.llm and not transforms_llm:
        raise ValueError(
            """An llm client was not provided.
                   Provide an LLM on TestsetGenerator instantiation or as an argument for transforms_llm parameter.
                   Alternatively you can provide your own transforms through the `transforms` parameter."""
        )
    if not transforms_embedding_model:
        raise ValueError(
            """An embedding client was not provided. Provide an embedding through the transforms_embedding_model parameter. Alternatively you can provide your own transforms through the `transforms` parameter."""
        )

    if not transforms:
        transforms = default_transforms(
            llm=transforms_llm or self.llm,
            embedding_model=transforms_embedding_model,
        )

    # convert the documents to Ragas nodes
    nodes = []
    for doc in documents:
        node = Node(
            type=NodeType.DOCUMENT,
            properties={
                "page_content": doc.page_content,
                "document_metadata": doc.metadata,
            },
        )
        nodes.append(node)

    kg = KnowledgeGraph(nodes=nodes)

    # apply transforms and update the knowledge graph
    apply_transforms(kg, transforms)
    self.knowledge_graph = kg

    return self.generate(
        testset_size=testset_size,
        query_distribution=query_distribution,
        run_config=run_config,
        callbacks=callbacks,
        with_debugging_logs=with_debugging_logs,
        raise_exceptions=raise_exceptions,
    )

generate_with_llamaindex_docs

generate_with_llamaindex_docs(documents: Sequence[Document], testset_size: int, transforms: Optional[Transforms] = None, transforms_llm: Optional[BaseLLM] = None, transforms_embedding_model: Optional[BaseEmbedding] = None, query_distribution: Optional[QueryDistribution] = None, run_config: Optional[RunConfig] = None, callbacks: Optional[Callbacks] = None, with_debugging_logs=False, raise_exceptions: bool = True)

Generates an evaluation dataset based on given scenarios and parameters.

Source code in src/ragas/testset/synthesizers/generate.py
def generate_with_llamaindex_docs(
    self,
    documents: t.Sequence[LlamaIndexDocument],
    testset_size: int,
    transforms: t.Optional[Transforms] = None,
    transforms_llm: t.Optional[LlamaIndexLLM] = None,
    transforms_embedding_model: t.Optional[LlamaIndexEmbedding] = None,
    query_distribution: t.Optional[QueryDistribution] = None,
    run_config: t.Optional[RunConfig] = None,
    callbacks: t.Optional[Callbacks] = None,
    with_debugging_logs=False,
    raise_exceptions: bool = True,
):
    """
    Generates an evaluation dataset based on given scenarios and parameters.
    """

    run_config = run_config or RunConfig()

    # force the user to provide an llm and embedding client to prevent use of default LLMs
    if not self.llm and not transforms_llm:
        raise ValueError(
            "An llm client was not provided. Provide an LLM on TestsetGenerator instantiation or as an argument for transforms_llm parameter. Alternatively you can provide your own transforms through the `transforms` parameter."
        )
    if not transforms_embedding_model:
        raise ValueError(
            "An embedding client was not provided. Provide an embedding through the transforms_embedding_model parameter. Alternatively you can provide your own transforms through the `transforms` parameter."
        )

    if not transforms:
        if transforms_llm is None:
            llm_for_transforms = self.llm
        else:
            llm_for_transforms = LlamaIndexLLMWrapper(transforms_llm)
        embedding_model_for_transforms = LlamaIndexEmbeddingsWrapper(
            transforms_embedding_model
        )
        transforms = default_transforms(
            llm=llm_for_transforms,
            embedding_model=embedding_model_for_transforms,
        )

    # convert the documents to Ragas nodes
    nodes = []
    for doc in documents:
        if doc.text is not None and doc.text.strip() != "":
            node = Node(
                type=NodeType.DOCUMENT,
                properties={
                    "page_content": doc.text,
                    "document_metadata": doc.metadata,
                },
            )
            nodes.append(node)

    kg = KnowledgeGraph(nodes=nodes)

    # apply transforms and update the knowledge graph
    apply_transforms(kg, transforms, run_config)
    self.knowledge_graph = kg

    return self.generate(
        testset_size=testset_size,
        query_distribution=query_distribution,
        run_config=run_config,
        callbacks=callbacks,
        with_debugging_logs=with_debugging_logs,
        raise_exceptions=raise_exceptions,
    )

generate

generate(testset_size: int, query_distribution: Optional[QueryDistribution] = None, num_personas: int = 3, run_config: Optional[RunConfig] = None, batch_size: Optional[int] = None, callbacks: Optional[Callbacks] = None, token_usage_parser: Optional[TokenUsageParser] = None, with_debugging_logs=False, raise_exceptions: bool = True) -> Testset

Generate an evaluation dataset based on given scenarios and parameters.

Parameters:

Name Type Description Default
testset_size int

The number of samples to generate.

required
query_distribution Optional[QueryDistribution]

A list of tuples containing scenario simulators and their probabilities. If None, default simulators will be used.

None
num_personas int

The number of personas to generate or use from the persona_list.

3
run_config Optional[RunConfig]

Configuration for running the generation process.

None
batch_size Optional[int]

How large should batches be. If set to None (default), no batching is done.

None
callbacks Optional[Callbacks]

Langchain style callbacks to use for the generation process. You can use this to log the generation process or add other metadata.

None
token_usage_parser Optional[TokenUsageParser]

Parse the LLMResult object and return a TokenUsage object. This is used to calculate the cost of the generation process.

None
with_debugging_logs bool

If True, enable debug logging for various components.

False
raise_exceptions bool

If True, raise exceptions during the generation process.

True

Returns:

Type Description
Testset

A dataset containing the generated TestsetSamples.

Notes

This function performs the following steps: 1. Set up scenarios and debug logging if required. 2. Generate scenarios using an Executor. 3. Calculate split values for different scenario types. 4. Generate samples for each scenario. 5. Compile the results into an EvaluationDataset.

Source code in src/ragas/testset/synthesizers/generate.py
def generate(
    self,
    testset_size: int,
    query_distribution: t.Optional[QueryDistribution] = None,
    num_personas: int = 3,
    run_config: t.Optional[RunConfig] = None,
    batch_size: t.Optional[int] = None,
    callbacks: t.Optional[Callbacks] = None,
    token_usage_parser: t.Optional[TokenUsageParser] = None,
    with_debugging_logs=False,
    raise_exceptions: bool = True,
) -> Testset:
    """
    Generate an evaluation dataset based on given scenarios and parameters.

    Parameters
    ----------
    testset_size : int
        The number of samples to generate.
    query_distribution : Optional[QueryDistribution], optional
        A list of tuples containing scenario simulators and their probabilities.
        If None, default simulators will be used.
    num_personas : int, default 3
        The number of personas to generate or use from the persona_list.
    run_config : Optional[RunConfig], optional
        Configuration for running the generation process.
    batch_size: int, optional
        How large should batches be.  If set to None (default), no batching is done.
    callbacks : Optional[Callbacks], optional
        Langchain style callbacks to use for the generation process. You can use
        this to log the generation process or add other metadata.
    token_usage_parser : Optional[TokenUsageParser], optional
        Parse the LLMResult object and return a TokenUsage object. This is used to
        calculate the cost of the generation process.
    with_debugging_logs : bool, default False
        If True, enable debug logging for various components.
    raise_exceptions : bool, default True
        If True, raise exceptions during the generation process.

    Returns
    -------
    Testset
        A dataset containing the generated TestsetSamples.

    Notes
    -----
    This function performs the following steps:
    1. Set up scenarios and debug logging if required.
    2. Generate scenarios using an Executor.
    3. Calculate split values for different scenario types.
    4. Generate samples for each scenario.
    5. Compile the results into an EvaluationDataset.
    """
    if run_config is not None:
        self.llm.set_run_config(run_config)

    query_distribution = query_distribution or default_query_distribution(self.llm)
    callbacks = callbacks or []

    # dict to store any callbacks we define
    ragas_callbacks = {}
    # set the token usage parser
    if token_usage_parser is not None:
        from ragas.cost import CostCallbackHandler

        cost_cb = CostCallbackHandler(token_usage_parser=token_usage_parser)
        ragas_callbacks["cost_cb"] = cost_cb
    else:
        cost_cb = None

    # append all the ragas_callbacks to the callbacks
    for cb in ragas_callbacks.values():
        if isinstance(callbacks, BaseCallbackManager):
            callbacks.add_handler(cb)
        else:
            callbacks.append(cb)

    # new group for Testset Generation
    testset_generation_rm, testset_generation_grp = new_group(
        name=RAGAS_TESTSET_GENERATION_GROUP_NAME,
        inputs={"testset_size": testset_size},
        callbacks=callbacks,
    )

    if with_debugging_logs:
        # TODO: Edit this before pre-release
        from ragas.utils import patch_logger

        patch_logger("ragas.experimental.testset.synthesizers", logging.DEBUG)
        patch_logger("ragas.experimental.testset.graph", logging.DEBUG)
        patch_logger("ragas.experimental.testset.transforms", logging.DEBUG)

    if self.persona_list is None:
        self.persona_list = generate_personas_from_kg(
            llm=self.llm,
            kg=self.knowledge_graph,
            num_personas=num_personas,
            callbacks=callbacks,
        )
    else:
        random.shuffle(self.persona_list)

    splits, _ = calculate_split_values(
        [prob for _, prob in query_distribution], testset_size
    )
    # new group for Generation of Scenarios
    scenario_generation_rm, scenario_generation_grp = new_group(
        name="Scenario Generation",
        inputs={"splits": splits},
        callbacks=testset_generation_grp,
    )

    # generate scenarios
    exec = Executor(
        "Generating Scenarios",
        raise_exceptions=raise_exceptions,
        run_config=run_config,
        keep_progress_bar=False,
        batch_size=batch_size,
    )
    # generate samples
    splits, _ = calculate_split_values(
        [prob for _, prob in query_distribution], testset_size
    )
    for i, (scenario, _) in enumerate(query_distribution):
        exec.submit(
            scenario.generate_scenarios,
            n=splits[i],
            knowledge_graph=self.knowledge_graph,
            persona_list=self.persona_list[:num_personas],
            callbacks=scenario_generation_grp,
        )

    try:
        scenario_sample_list: t.List[t.List[BaseScenario]] = exec.results()
    except Exception as e:
        scenario_generation_rm.on_chain_error(e)
        raise e
    else:
        scenario_generation_rm.on_chain_end(
            outputs={"scenario_sample_list": scenario_sample_list}
        )

    # new group for Generation of Samples
    sample_generation_rm, sample_generation_grp = new_group(
        name="Sample Generation",
        inputs={"scenario_sample_list": scenario_sample_list},
        callbacks=testset_generation_grp,
    )
    exec = Executor(
        "Generating Samples",
        raise_exceptions=raise_exceptions,
        run_config=run_config,
        keep_progress_bar=True,
        batch_size=batch_size,
    )
    additional_testset_info: t.List[t.Dict] = []
    for i, (synthesizer, _) in enumerate(query_distribution):
        for sample in scenario_sample_list[i]:
            exec.submit(
                synthesizer.generate_sample,
                scenario=sample,
                callbacks=sample_generation_grp,
            )
            # fill out the additional info for the TestsetSample
            additional_testset_info.append(
                {
                    "synthesizer_name": synthesizer.name,
                }
            )

    try:
        eval_samples = exec.results()
    except Exception as e:
        sample_generation_rm.on_chain_error(e)
        raise e
    else:
        sample_generation_rm.on_chain_end(outputs={"eval_samples": eval_samples})

    # build the testset
    testsets = []
    for sample, additional_info in zip(eval_samples, additional_testset_info):
        testsets.append(TestsetSample(eval_sample=sample, **additional_info))
    testset = Testset(samples=testsets, cost_cb=cost_cb)
    testset_generation_rm.on_chain_end({"testset": testset})

    # tracking how many samples were generated
    track(
        TestsetGenerationEvent(
            event_type="testset_generation",
            evolution_names=[
                e.__class__.__name__.lower() for e, _ in query_distribution
            ],
            evolution_percentages=[p for _, p in query_distribution],
            num_rows=testset_size,
            language="english",
        )
    )
    return testset