Skip to content

LLMs

BaseRagasLLM dataclass

BaseRagasLLM(run_config: RunConfig = RunConfig(), multiple_completion_supported: bool = False, cache: Optional[CacheInterface] = None)

Bases: ABC

get_temperature

get_temperature(n: int) -> float

Return the temperature to use for completion based on n.

Source code in src/ragas/llms/base.py
def get_temperature(self, n: int) -> float:
    """Return the temperature to use for completion based on n."""
    return 0.3 if n > 1 else 1e-8

generate async

generate(prompt: PromptValue, n: int = 1, temperature: Optional[float] = None, stop: Optional[List[str]] = None, callbacks: Callbacks = None) -> LLMResult

Generate text using the given event loop.

Source code in src/ragas/llms/base.py
async def generate(
    self,
    prompt: PromptValue,
    n: int = 1,
    temperature: t.Optional[float] = None,
    stop: t.Optional[t.List[str]] = None,
    callbacks: Callbacks = None,
) -> LLMResult:
    """Generate text using the given event loop."""

    if temperature is None:
        temperature = self.get_temperature(n)

    agenerate_text_with_retry = add_async_retry(
        self.agenerate_text, self.run_config
    )
    result = await agenerate_text_with_retry(
        prompt=prompt,
        n=n,
        temperature=temperature,
        stop=stop,
        callbacks=callbacks,
    )

    # check there are no max_token issues
    if not self.is_finished(result):
        raise LLMDidNotFinishException()
    return result

LangchainLLMWrapper

LangchainLLMWrapper(langchain_llm: BaseLanguageModel[BaseMessage], run_config: Optional[RunConfig] = None, is_finished_parser: Optional[Callable[[LLMResult], bool]] = None, cache: Optional[CacheInterface] = None)

Bases: BaseRagasLLM

A simple base class for RagasLLMs that is based on Langchain's BaseLanguageModel interface. it implements 2 functions: - generate_text: for generating text from a given PromptValue - agenerate_text: for generating text from a given PromptValue asynchronously

Source code in src/ragas/llms/base.py
def __init__(
    self,
    langchain_llm: BaseLanguageModel[BaseMessage],
    run_config: t.Optional[RunConfig] = None,
    is_finished_parser: t.Optional[t.Callable[[LLMResult], bool]] = None,
    cache: t.Optional[CacheInterface] = None,
):
    super().__init__(cache=cache)
    self.langchain_llm = langchain_llm
    if run_config is None:
        run_config = RunConfig()
    self.set_run_config(run_config)
    self.is_finished_parser = is_finished_parser

is_finished

is_finished(response: LLMResult) -> bool

Parse the response to check if the LLM finished by checking the finish_reason or stop_reason. Supports OpenAI and Vertex AI models.

Source code in src/ragas/llms/base.py
def is_finished(self, response: LLMResult) -> bool:
    """
    Parse the response to check if the LLM finished by checking the finish_reason
    or stop_reason. Supports OpenAI and Vertex AI models.
    """
    if self.is_finished_parser is not None:
        return self.is_finished_parser(response)
    # if no parser is provided default to our own

    is_finished_list = []
    for g in response.flatten():
        resp = g.generations[0][0]
        if resp.generation_info is not None:
            # generation_info is provided - so we parse that
            finish_reason = resp.generation_info.get("finish_reason")
            if finish_reason is not None:
                # OpenAI uses "stop"
                # Vertex AI uses "STOP" or "MAX_TOKENS"
                # WatsonX AI uses "eos_token"
                is_finished_list.append(
                    finish_reason in ["stop", "STOP", "MAX_TOKENS", "eos_token"]
                )

            # provied more conditions here
            # https://github.com/explodinggradients/ragas/issues/1548

        # if generation_info is empty, we parse the response_metadata
        # this is less reliable

        elif (
            isinstance(resp, ChatGeneration)
            and t.cast(ChatGeneration, resp).message is not None
        ):
            resp_message: BaseMessage = t.cast(ChatGeneration, resp).message
            if resp_message.response_metadata.get("finish_reason") is not None:
                finish_reason = resp_message.response_metadata.get("finish_reason")
                is_finished_list.append(
                    finish_reason in ["stop", "STOP", "MAX_TOKENS", "eos_token"]
                )
            elif resp_message.response_metadata.get("stop_reason") is not None:
                stop_reason = resp_message.response_metadata.get("stop_reason")
                is_finished_list.append(
                    stop_reason
                    in ["end_turn", "stop", "STOP", "MAX_TOKENS", "eos_token"]
                )
        # default to True
        else:
            is_finished_list.append(True)
    return all(is_finished_list)

LlamaIndexLLMWrapper

LlamaIndexLLMWrapper(llm: BaseLLM, run_config: Optional[RunConfig] = None, cache: Optional[CacheInterface] = None)

Bases: BaseRagasLLM

A Adaptor for LlamaIndex LLMs

Source code in src/ragas/llms/base.py
def __init__(
    self,
    llm: BaseLLM,
    run_config: t.Optional[RunConfig] = None,
    cache: t.Optional[CacheInterface] = None,
):
    super().__init__(cache=cache)
    self.llm = llm

    try:
        self._signature = type(self.llm).__name__.lower()
    except AttributeError:
        self._signature = ""

    if run_config is None:
        run_config = RunConfig()
    self.set_run_config(run_config)

HaystackLLMWrapper

HaystackLLMWrapper(haystack_generator: Any, run_config: Optional[RunConfig] = None, cache: Optional[CacheInterface] = None)

Bases: BaseRagasLLM

A wrapper class for using Haystack LLM generators within the Ragas framework.

This class integrates Haystack's LLM components (e.g., OpenAIGenerator, HuggingFaceAPIGenerator, etc.) into Ragas, enabling both synchronous and asynchronous text generation.

Parameters:

Name Type Description Default
haystack_generator AzureOpenAIGenerator | HuggingFaceAPIGenerator | HuggingFaceLocalGenerator | OpenAIGenerator

An instance of a Haystack generator.

required
run_config RunConfig

Configuration object to manage LLM execution settings, by default None.

None
cache CacheInterface

A cache instance for storing results, by default None.

None
Source code in src/ragas/llms/haystack_wrapper.py
def __init__(
    self,
    haystack_generator: t.Any,
    run_config: t.Optional[RunConfig] = None,
    cache: t.Optional[CacheInterface] = None,
):
    super().__init__(cache=cache)

    # Lazy Import of required Haystack components
    try:
        from haystack import AsyncPipeline
        from haystack.components.generators import (
            AzureOpenAIGenerator,
            HuggingFaceAPIGenerator,
            HuggingFaceLocalGenerator,
            OpenAIGenerator,
        )
    except ImportError as exc:
        raise ImportError(
            "Haystack is not installed. Please install it using `pip install haystack-ai`."
        ) from exc

    # Validate haystack_generator type
    if not isinstance(
        haystack_generator,
        (
            AzureOpenAIGenerator,
            HuggingFaceAPIGenerator,
            HuggingFaceLocalGenerator,
            OpenAIGenerator,
        ),
    ):
        raise TypeError(
            "Expected 'haystack_generator' to be one of: "
            "AzureOpenAIGenerator, HuggingFaceAPIGenerator, "
            "HuggingFaceLocalGenerator, or OpenAIGenerator, but received "
            f"{type(haystack_generator).__name__}."
        )

    # Set up Haystack pipeline and generator
    self.generator = haystack_generator
    self.async_pipeline = AsyncPipeline()
    self.async_pipeline.add_component("llm", self.generator)

    if run_config is None:
        run_config = RunConfig()
    self.set_run_config(run_config)

llm_factory

llm_factory(model: str = 'gpt-4o-mini', run_config: Optional[RunConfig] = None, default_headers: Optional[Dict[str, str]] = None, base_url: Optional[str] = None) -> BaseRagasLLM

Create and return a BaseRagasLLM instance. Used for running default LLMs used in Ragas (OpenAI).

Parameters:

Name Type Description Default
model str

The name of the model to use, by default "gpt-4o-mini".

'gpt-4o-mini'
run_config RunConfig

Configuration for the run, by default None.

None
default_headers dict of str

Default headers to be used in API requests, by default None.

None
base_url str

Base URL for the API, by default None.

None

Returns:

Type Description
BaseRagasLLM

An instance of BaseRagasLLM configured with the specified parameters.

Source code in src/ragas/llms/base.py
def llm_factory(
    model: str = "gpt-4o-mini",
    run_config: t.Optional[RunConfig] = None,
    default_headers: t.Optional[t.Dict[str, str]] = None,
    base_url: t.Optional[str] = None,
) -> BaseRagasLLM:
    """
    Create and return a BaseRagasLLM instance. Used for running default LLMs used
    in Ragas (OpenAI).

    Parameters
    ----------
    model : str, optional
        The name of the model to use, by default "gpt-4o-mini".
    run_config : RunConfig, optional
        Configuration for the run, by default None.
    default_headers : dict of str, optional
        Default headers to be used in API requests, by default None.
    base_url : str, optional
        Base URL for the API, by default None.

    Returns
    -------
    BaseRagasLLM
        An instance of BaseRagasLLM configured with the specified parameters.
    """
    timeout = None
    if run_config is not None:
        timeout = run_config.timeout

    # if helicone is enabled, use the helicone
    if helicone_config.is_enabled:
        default_headers = helicone_config.default_headers()
        base_url = helicone_config.base_url

    openai_model = ChatOpenAI(
        model=model, timeout=timeout, default_headers=default_headers, base_url=base_url
    )
    return LangchainLLMWrapper(openai_model, run_config)