Skip to content

LLMs

BaseRagasLLM dataclass

BaseRagasLLM(run_config: RunConfig = RunConfig(), multiple_completion_supported: bool = False, cache: Optional[CacheInterface] = None)

Bases: ABC

get_temperature

get_temperature(n: int) -> float

Return the temperature to use for completion based on n.

Source code in src/ragas/llms/base.py
def get_temperature(self, n: int) -> float:
    """Return the temperature to use for completion based on n."""
    return 0.3 if n > 1 else 0.01

is_finished abstractmethod

is_finished(response: LLMResult) -> bool

Check if the LLM response is finished/complete.

Source code in src/ragas/llms/base.py
@abstractmethod
def is_finished(self, response: LLMResult) -> bool:
    """Check if the LLM response is finished/complete."""
    ...

generate async

generate(prompt: PromptValue, n: int = 1, temperature: Optional[float] = 0.01, stop: Optional[List[str]] = None, callbacks: Callbacks = None) -> LLMResult

Generate text using the given event loop.

Source code in src/ragas/llms/base.py
async def generate(
    self,
    prompt: PromptValue,
    n: int = 1,
    temperature: t.Optional[float] = 0.01,
    stop: t.Optional[t.List[str]] = None,
    callbacks: Callbacks = None,
) -> LLMResult:
    """Generate text using the given event loop."""

    if temperature is None:
        temperature = self.get_temperature(n)

    agenerate_text_with_retry = add_async_retry(
        self.agenerate_text, self.run_config
    )
    result = await agenerate_text_with_retry(
        prompt=prompt,
        n=n,
        temperature=temperature,
        stop=stop,
        callbacks=callbacks,
    )

    # check there are no max_token issues
    if not self.is_finished(result):
        raise LLMDidNotFinishException()
    return result

InstructorBaseRagasLLM

Bases: ABC

Base class for LLMs using the Instructor library pattern.

generate abstractmethod

generate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Generate a response using the configured LLM.

For async clients, this will run the async method in the appropriate event loop.

Source code in src/ragas/llms/base.py
@abstractmethod
def generate(
    self, prompt: str, response_model: t.Type[InstructorTypeVar]
) -> InstructorTypeVar:
    """Generate a response using the configured LLM.

    For async clients, this will run the async method in the appropriate event loop.
    """

agenerate abstractmethod async

agenerate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Asynchronously generate a response using the configured LLM.

Source code in src/ragas/llms/base.py
@abstractmethod
async def agenerate(
    self, prompt: str, response_model: t.Type[InstructorTypeVar]
) -> InstructorTypeVar:
    """Asynchronously generate a response using the configured LLM."""

InstructorLLM

InstructorLLM(client: Any, model: str, provider: str, **model_args)

Bases: InstructorBaseRagasLLM

LLM wrapper using the Instructor library for structured outputs.

Source code in src/ragas/llms/base.py
def __init__(self, client: t.Any, model: str, provider: str, **model_args):
    self.client = client
    self.model = model
    self.provider = provider
    self.model_args = model_args or {}
    # Check if client is async-capable at initialization
    self.is_async = self._check_client_async()

generate

generate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Generate a response using the configured LLM.

For async clients, this will run the async method in the appropriate event loop.

Source code in src/ragas/llms/base.py
def generate(
    self, prompt: str, response_model: t.Type[InstructorTypeVar]
) -> InstructorTypeVar:
    """Generate a response using the configured LLM.

    For async clients, this will run the async method in the appropriate event loop.
    """
    messages = [{"role": "user", "content": prompt}]

    # If client is async, use the appropriate method to run it
    if self.is_async:
        return self._run_async_in_current_loop(
            self.agenerate(prompt, response_model)
        )
    else:
        # Regular sync client, just call the method directly
        return self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_model=response_model,
            **self.model_args,
        )

agenerate async

agenerate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Asynchronously generate a response using the configured LLM.

Source code in src/ragas/llms/base.py
async def agenerate(
    self, prompt: str, response_model: t.Type[InstructorTypeVar]
) -> InstructorTypeVar:
    """Asynchronously generate a response using the configured LLM."""
    messages = [{"role": "user", "content": prompt}]

    # If client is not async, raise a helpful error
    if not self.is_async:
        raise TypeError(
            "Cannot use agenerate() with a synchronous client. Use generate() instead."
        )

    # Regular async client, call the method directly
    return await self.client.chat.completions.create(
        model=self.model,
        messages=messages,
        response_model=response_model,
        **self.model_args,
    )

LangchainLLMWrapper

LangchainLLMWrapper(langchain_llm: BaseLanguageModel[BaseMessage], run_config: Optional[RunConfig] = None, is_finished_parser: Optional[Callable[[LLMResult], bool]] = None, cache: Optional[CacheInterface] = None, bypass_temperature: bool = False)

Bases: BaseRagasLLM

A simple base class for RagasLLMs that is based on Langchain's BaseLanguageModel interface. it implements 2 functions: - generate_text: for generating text from a given PromptValue - agenerate_text: for generating text from a given PromptValue asynchronously

Source code in src/ragas/llms/base.py
def __init__(
    self,
    langchain_llm: BaseLanguageModel[BaseMessage],
    run_config: t.Optional[RunConfig] = None,
    is_finished_parser: t.Optional[t.Callable[[LLMResult], bool]] = None,
    cache: t.Optional[CacheInterface] = None,
    bypass_temperature: bool = False,
):
    super().__init__(cache=cache)
    self.langchain_llm = langchain_llm
    if run_config is None:
        run_config = RunConfig()
    self.set_run_config(run_config)
    self.is_finished_parser = is_finished_parser
    # Certain LLMs (e.g., OpenAI o1 series) do not support temperature
    self.bypass_temperature = bypass_temperature

is_finished

is_finished(response: LLMResult) -> bool

Parse the response to check if the LLM finished by checking the finish_reason or stop_reason. Supports OpenAI and Vertex AI models.

Source code in src/ragas/llms/base.py
def is_finished(self, response: LLMResult) -> bool:
    """
    Parse the response to check if the LLM finished by checking the finish_reason
    or stop_reason. Supports OpenAI and Vertex AI models.
    """
    if self.is_finished_parser is not None:
        return self.is_finished_parser(response)
    # if no parser is provided default to our own

    is_finished_list = []
    for g in response.flatten():
        resp = g.generations[0][0]
        if resp.generation_info is not None:
            # generation_info is provided - so we parse that
            finish_reason = resp.generation_info.get("finish_reason")
            if finish_reason is not None:
                # OpenAI uses "stop"
                # Vertex AI uses "STOP" or "MAX_TOKENS"
                # WatsonX AI uses "eos_token"
                is_finished_list.append(
                    finish_reason in ["stop", "STOP", "MAX_TOKENS", "eos_token"]
                )

            # provied more conditions here
            # https://github.com/explodinggradients/ragas/issues/1548

        # if generation_info is empty, we parse the response_metadata
        # this is less reliable

        elif (
            isinstance(resp, ChatGeneration)
            and t.cast(ChatGeneration, resp).message is not None
        ):
            resp_message: BaseMessage = t.cast(ChatGeneration, resp).message
            if resp_message.response_metadata.get("finish_reason") is not None:
                finish_reason = resp_message.response_metadata.get("finish_reason")
                is_finished_list.append(
                    finish_reason in ["stop", "STOP", "MAX_TOKENS", "eos_token"]
                )
            elif resp_message.response_metadata.get("stop_reason") is not None:
                stop_reason = resp_message.response_metadata.get("stop_reason")
                is_finished_list.append(
                    stop_reason
                    in ["end_turn", "stop", "STOP", "MAX_TOKENS", "eos_token"]
                )
        # default to True
        else:
            is_finished_list.append(True)
    return all(is_finished_list)

LlamaIndexLLMWrapper

LlamaIndexLLMWrapper(llm: BaseLLM, run_config: Optional[RunConfig] = None, cache: Optional[CacheInterface] = None, bypass_temperature: bool = False)

Bases: BaseRagasLLM

A Adaptor for LlamaIndex LLMs

Source code in src/ragas/llms/base.py
def __init__(
    self,
    llm: BaseLLM,
    run_config: t.Optional[RunConfig] = None,
    cache: t.Optional[CacheInterface] = None,
    bypass_temperature: bool = False,
):
    super().__init__(cache=cache)
    self.llm = llm
    # Certain LLMs (e.g., OpenAI o1 series) do not support temperature
    self.bypass_temperature = bypass_temperature

    try:
        self._signature = type(self.llm).__name__.lower()
    except AttributeError:
        self._signature = ""

    if run_config is None:
        run_config = RunConfig()
    self.set_run_config(run_config)

HaystackLLMWrapper

HaystackLLMWrapper(haystack_generator: Any, run_config: Optional[RunConfig] = None, cache: Optional[CacheInterface] = None)

Bases: BaseRagasLLM

A wrapper class for using Haystack LLM generators within the Ragas framework.

This class integrates Haystack's LLM components (e.g., OpenAIGenerator, HuggingFaceAPIGenerator, etc.) into Ragas, enabling both synchronous and asynchronous text generation.

Parameters:

Name Type Description Default
haystack_generator AzureOpenAIGenerator | HuggingFaceAPIGenerator | HuggingFaceLocalGenerator | OpenAIGenerator

An instance of a Haystack generator.

required
run_config RunConfig

Configuration object to manage LLM execution settings, by default None.

None
cache CacheInterface

A cache instance for storing results, by default None.

None
Source code in src/ragas/llms/haystack_wrapper.py
def __init__(
    self,
    haystack_generator: t.Any,
    run_config: t.Optional[RunConfig] = None,
    cache: t.Optional[CacheInterface] = None,
):
    super().__init__(cache=cache)

    # Lazy Import of required Haystack components
    try:
        from haystack import AsyncPipeline
        from haystack.components.generators.azure import AzureOpenAIGenerator
        from haystack.components.generators.hugging_face_api import (
            HuggingFaceAPIGenerator,
        )
        from haystack.components.generators.hugging_face_local import (
            HuggingFaceLocalGenerator,
        )
        from haystack.components.generators.openai import OpenAIGenerator
    except ImportError as exc:
        raise ImportError(
            "Haystack is not installed. Please install it using `pip install haystack-ai`."
        ) from exc

    # Validate haystack_generator type
    if not isinstance(
        haystack_generator,
        (
            AzureOpenAIGenerator,
            HuggingFaceAPIGenerator,
            HuggingFaceLocalGenerator,
            OpenAIGenerator,
        ),
    ):
        raise TypeError(
            "Expected 'haystack_generator' to be one of: "
            "AzureOpenAIGenerator, HuggingFaceAPIGenerator, "
            "HuggingFaceLocalGenerator, or OpenAIGenerator, but received "
            f"{type(haystack_generator).__name__}."
        )

    # Set up Haystack pipeline and generator
    self.generator = haystack_generator
    self.async_pipeline = AsyncPipeline()
    self.async_pipeline.add_component("llm", self.generator)

    if run_config is None:
        run_config = RunConfig()
    self.set_run_config(run_config)

instructor_llm_factory

instructor_llm_factory(provider: str, model: Optional[str] = None, client: Optional[Any] = None, **kwargs: Any) -> InstructorBaseRagasLLM

Factory function to create an InstructorLLM instance based on the provider.

Args: provider (str): The name of the LLM provider or provider/model string (e.g., "openai", "openai/gpt-4"). model (str, optional): The model name to use for generation. client (Any, optional): Pre-initialized client for the provider. **kwargs: Additional arguments for the LLM (model_args).

Returns: InstructorBaseRagasLLM: An instance of the specified LLM provider.

Examples: # OpenAI with separate parameters llm = instructor_llm_factory("openai", "gpt-4", client=openai_client)

# OpenAI with provider/model string
llm = instructor_llm_factory("openai/gpt-4", client=openai_client)

# Anthropic
llm = instructor_llm_factory("anthropic", "claude-3-sonnet-20240229", client=anthropic_client)

# Cohere
llm = instructor_llm_factory("cohere", "command-r-plus", client=cohere_client)

# Gemini
llm = instructor_llm_factory("gemini", "gemini-pro", client=gemini_client)

# LiteLLM (supports 100+ models)
llm = instructor_llm_factory("litellm", "gpt-4", client=litellm_client)

Raises: ValueError: If provider is unsupported or required parameters are missing.

Source code in src/ragas/llms/base.py
def instructor_llm_factory(
    provider: str,
    model: t.Optional[str] = None,
    client: t.Optional[t.Any] = None,
    **kwargs: t.Any,
) -> InstructorBaseRagasLLM:
    """
    Factory function to create an InstructorLLM instance based on the provider.

    Args:
        provider (str): The name of the LLM provider or provider/model string
                       (e.g., "openai", "openai/gpt-4").
        model (str, optional): The model name to use for generation.
        client (Any, optional): Pre-initialized client for the provider.
        **kwargs: Additional arguments for the LLM (model_args).

    Returns:
        InstructorBaseRagasLLM: An instance of the specified LLM provider.

    Examples:
        # OpenAI with separate parameters
        llm = instructor_llm_factory("openai", "gpt-4", client=openai_client)

        # OpenAI with provider/model string
        llm = instructor_llm_factory("openai/gpt-4", client=openai_client)

        # Anthropic
        llm = instructor_llm_factory("anthropic", "claude-3-sonnet-20240229", client=anthropic_client)

        # Cohere
        llm = instructor_llm_factory("cohere", "command-r-plus", client=cohere_client)

        # Gemini
        llm = instructor_llm_factory("gemini", "gemini-pro", client=gemini_client)

        # LiteLLM (supports 100+ models)
        llm = instructor_llm_factory("litellm", "gpt-4", client=litellm_client)

    Raises:
        ValueError: If provider is unsupported or required parameters are missing.
    """
    # Handle provider/model string format
    if "/" in provider and model is None:
        provider_name, model_name = provider.split("/", 1)
        provider = provider_name
        model = model_name

    if not model:
        raise ValueError(
            "Model name is required. Either provide it as a separate parameter "
            "or use provider/model format (e.g., 'openai/gpt-4')"
        )

    def _initialize_client(provider: str, client: t.Any) -> t.Any:
        """Initialize the instructor-patched client for the given provider."""
        if not client:
            raise ValueError(f"{provider.title()} provider requires a client instance")

        provider_lower = provider.lower()

        if provider_lower == "openai":
            return instructor.from_openai(client)
        elif provider_lower == "anthropic":
            return instructor.from_anthropic(client)
        elif provider_lower == "cohere":
            return instructor.from_cohere(client)
        elif provider_lower == "gemini":
            return instructor.from_gemini(client)
        elif provider_lower == "litellm":
            return instructor.from_litellm(client)
        else:
            raise ValueError(
                f"Unsupported provider: {provider}. "
                f"Supported providers: openai, anthropic, cohere, gemini, litellm"
            )

    instructor_patched_client = _initialize_client(provider=provider, client=client)
    return InstructorLLM(
        client=instructor_patched_client, model=model, provider=provider, **kwargs
    )

llm_factory

llm_factory(model: str = 'gpt-4o-mini', run_config: Optional[RunConfig] = None, default_headers: Optional[Dict[str, str]] = None, base_url: Optional[str] = None) -> BaseRagasLLM

Create and return a BaseRagasLLM instance. Used for running default LLMs used in Ragas (OpenAI).

Parameters:

Name Type Description Default
model str

The name of the model to use, by default "gpt-4o-mini".

'gpt-4o-mini'
run_config RunConfig

Configuration for the run, by default None.

None
default_headers dict of str

Default headers to be used in API requests, by default None.

None
base_url str

Base URL for the API, by default None.

None

Returns:

Type Description
BaseRagasLLM

An instance of BaseRagasLLM configured with the specified parameters.

Source code in src/ragas/llms/base.py
def llm_factory(
    model: str = "gpt-4o-mini",
    run_config: t.Optional[RunConfig] = None,
    default_headers: t.Optional[t.Dict[str, str]] = None,
    base_url: t.Optional[str] = None,
) -> BaseRagasLLM:
    """
    Create and return a BaseRagasLLM instance. Used for running default LLMs used
    in Ragas (OpenAI).

    Parameters
    ----------
    model : str, optional
        The name of the model to use, by default "gpt-4o-mini".
    run_config : RunConfig, optional
        Configuration for the run, by default None.
    default_headers : dict of str, optional
        Default headers to be used in API requests, by default None.
    base_url : str, optional
        Base URL for the API, by default None.

    Returns
    -------
    BaseRagasLLM
        An instance of BaseRagasLLM configured with the specified parameters.
    """
    timeout = None
    if run_config is not None:
        timeout = run_config.timeout

    # if helicone is enabled, use the helicone
    if helicone_config.is_enabled:
        default_headers = helicone_config.default_headers()
        base_url = helicone_config.base_url

    openai_model = ChatOpenAI(
        model=model, timeout=timeout, default_headers=default_headers, base_url=base_url
    )
    return LangchainLLMWrapper(openai_model, run_config)