Skip to content

LLMs

BaseRagasLLM dataclass

BaseRagasLLM(run_config: RunConfig = RunConfig(), multiple_completion_supported: bool = False, cache: Optional[CacheInterface] = None)

Bases: ABC

get_temperature

get_temperature(n: int) -> float

Return the temperature to use for completion based on n.

Source code in src/ragas/llms/base.py
def get_temperature(self, n: int) -> float:
    """Return the temperature to use for completion based on n."""
    return 0.3 if n > 1 else 0.01

is_finished abstractmethod

is_finished(response: LLMResult) -> bool

Check if the LLM response is finished/complete.

Source code in src/ragas/llms/base.py
@abstractmethod
def is_finished(self, response: LLMResult) -> bool:
    """Check if the LLM response is finished/complete."""
    ...

generate async

generate(prompt: PromptValue, n: int = 1, temperature: Optional[float] = 0.01, stop: Optional[List[str]] = None, callbacks: Callbacks = None) -> LLMResult

Generate text using the given event loop.

Source code in src/ragas/llms/base.py
async def generate(
    self,
    prompt: PromptValue,
    n: int = 1,
    temperature: t.Optional[float] = 0.01,
    stop: t.Optional[t.List[str]] = None,
    callbacks: Callbacks = None,
) -> LLMResult:
    """Generate text using the given event loop."""

    if temperature is None:
        temperature = self.get_temperature(n)

    agenerate_text_with_retry = add_async_retry(
        self.agenerate_text, self.run_config
    )
    result = await agenerate_text_with_retry(
        prompt=prompt,
        n=n,
        temperature=temperature,
        stop=stop,
        callbacks=callbacks,
    )

    # check there are no max_token issues
    if not self.is_finished(result):
        raise LLMDidNotFinishException()
    return result

InstructorBaseRagasLLM

Bases: ABC

Base class for LLMs using the Instructor library pattern.

generate abstractmethod

generate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Generate a response using the configured LLM.

For async clients, this will run the async method in the appropriate event loop.

Source code in src/ragas/llms/base.py
@abstractmethod
def generate(
    self, prompt: str, response_model: t.Type[InstructorTypeVar]
) -> InstructorTypeVar:
    """Generate a response using the configured LLM.

    For async clients, this will run the async method in the appropriate event loop.
    """

agenerate abstractmethod async

agenerate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Asynchronously generate a response using the configured LLM.

Source code in src/ragas/llms/base.py
@abstractmethod
async def agenerate(
    self, prompt: str, response_model: t.Type[InstructorTypeVar]
) -> InstructorTypeVar:
    """Asynchronously generate a response using the configured LLM."""

InstructorLLM

InstructorLLM(client: Any, model: str, provider: str, **model_args)

Bases: InstructorBaseRagasLLM

LLM wrapper using the Instructor library for structured outputs.

Source code in src/ragas/llms/base.py
def __init__(self, client: t.Any, model: str, provider: str, **model_args):
    self.client = client
    self.model = model
    self.provider = provider
    self.model_args = model_args or {}
    # Check if client is async-capable at initialization
    self.is_async = self._check_client_async()

generate

generate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Generate a response using the configured LLM.

For async clients, this will run the async method in the appropriate event loop.

Source code in src/ragas/llms/base.py
def generate(
    self, prompt: str, response_model: t.Type[InstructorTypeVar]
) -> InstructorTypeVar:
    """Generate a response using the configured LLM.

    For async clients, this will run the async method in the appropriate event loop.
    """
    messages = [{"role": "user", "content": prompt}]

    # If client is async, use the appropriate method to run it
    if self.is_async:
        result = self._run_async_in_current_loop(
            self.agenerate(prompt, response_model)
        )
    else:
        # Regular sync client, just call the method directly
        result = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_model=response_model,
            **self.model_args,
        )

    # Track the usage
    track(
        LLMUsageEvent(
            provider=self.provider,
            model=self.model,
            llm_type="instructor",
            num_requests=1,
            is_async=self.is_async,
        )
    )
    return result

agenerate async

agenerate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Asynchronously generate a response using the configured LLM.

Source code in src/ragas/llms/base.py
async def agenerate(
    self, prompt: str, response_model: t.Type[InstructorTypeVar]
) -> InstructorTypeVar:
    """Asynchronously generate a response using the configured LLM."""
    messages = [{"role": "user", "content": prompt}]

    # If client is not async, raise a helpful error
    if not self.is_async:
        raise TypeError(
            "Cannot use agenerate() with a synchronous client. Use generate() instead."
        )

    # Regular async client, call the method directly
    result = await self.client.chat.completions.create(
        model=self.model,
        messages=messages,
        response_model=response_model,
        **self.model_args,
    )

    # Track the usage
    track(
        LLMUsageEvent(
            provider=self.provider,
            model=self.model,
            llm_type="instructor",
            num_requests=1,
            is_async=True,
        )
    )
    return result

HaystackLLMWrapper

HaystackLLMWrapper(haystack_generator: Any, run_config: Optional[RunConfig] = None, cache: Optional[CacheInterface] = None)

Bases: BaseRagasLLM

A wrapper class for using Haystack LLM generators within the Ragas framework.

This class integrates Haystack's LLM components (e.g., OpenAIGenerator, HuggingFaceAPIGenerator, etc.) into Ragas, enabling both synchronous and asynchronous text generation.

Parameters:

Name Type Description Default
haystack_generator AzureOpenAIGenerator | HuggingFaceAPIGenerator | HuggingFaceLocalGenerator | OpenAIGenerator

An instance of a Haystack generator.

required
run_config RunConfig

Configuration object to manage LLM execution settings, by default None.

None
cache CacheInterface

A cache instance for storing results, by default None.

None
Source code in src/ragas/llms/haystack_wrapper.py
def __init__(
    self,
    haystack_generator: t.Any,
    run_config: t.Optional[RunConfig] = None,
    cache: t.Optional[CacheInterface] = None,
):
    super().__init__(cache=cache)

    # Lazy Import of required Haystack components
    try:
        from haystack import AsyncPipeline
        from haystack.components.generators.azure import AzureOpenAIGenerator
        from haystack.components.generators.hugging_face_api import (
            HuggingFaceAPIGenerator,
        )
        from haystack.components.generators.hugging_face_local import (
            HuggingFaceLocalGenerator,
        )
        from haystack.components.generators.openai import OpenAIGenerator
    except ImportError as exc:
        raise ImportError(
            "Haystack is not installed. Please install it using `pip install haystack-ai`."
        ) from exc

    # Validate haystack_generator type
    if not isinstance(
        haystack_generator,
        (
            AzureOpenAIGenerator,
            HuggingFaceAPIGenerator,
            HuggingFaceLocalGenerator,
            OpenAIGenerator,
        ),
    ):
        raise TypeError(
            "Expected 'haystack_generator' to be one of: "
            "AzureOpenAIGenerator, HuggingFaceAPIGenerator, "
            "HuggingFaceLocalGenerator, or OpenAIGenerator, but received "
            f"{type(haystack_generator).__name__}."
        )

    # Set up Haystack pipeline and generator
    self.generator = haystack_generator
    self.async_pipeline = AsyncPipeline()
    self.async_pipeline.add_component("llm", self.generator)

    if run_config is None:
        run_config = RunConfig()
    self.set_run_config(run_config)

OCIGenAIWrapper

OCIGenAIWrapper(model_id: str, compartment_id: str, config: Optional[Dict[str, Any]] = None, endpoint_id: Optional[str] = None, run_config: Optional[RunConfig] = None, cache: Optional[Any] = None, default_system_prompt: Optional[str] = None, client: Optional[Any] = None)

Bases: BaseRagasLLM

OCI Gen AI LLM wrapper for Ragas.

This wrapper provides direct integration with Oracle Cloud Infrastructure Generative AI services without requiring LangChain or LlamaIndex.

Args: model_id: The OCI model ID to use for generation compartment_id: The OCI compartment ID config: OCI configuration dictionary (optional, uses default if not provided) endpoint_id: Optional endpoint ID for the model run_config: Ragas run configuration cache: Optional cache backend

Source code in src/ragas/llms/oci_genai_wrapper.py
def __init__(
    self,
    model_id: str,
    compartment_id: str,
    config: t.Optional[t.Dict[str, t.Any]] = None,
    endpoint_id: t.Optional[str] = None,
    run_config: t.Optional[RunConfig] = None,
    cache: t.Optional[t.Any] = None,
    default_system_prompt: t.Optional[str] = None,
    client: t.Optional[t.Any] = None,
):
    """
    Initialize OCI Gen AI wrapper.

    Args:
        model_id: The OCI model ID to use for generation
        compartment_id: The OCI compartment ID
        config: OCI configuration dictionary (optional, uses default if not provided)
        endpoint_id: Optional endpoint ID for the model
        run_config: Ragas run configuration
        cache: Optional cache backend
    """
    super().__init__(cache=cache)

    self.model_id = model_id
    self.compartment_id = compartment_id
    self.endpoint_id = endpoint_id
    self.default_system_prompt = default_system_prompt

    # Store client/config; perform lazy initialization to keep import-optional
    self.client = client
    self._oci_config = config
    # If no client and SDK not available and no endpoint fallback, raise early
    if (
        self.client is None
        and GenerativeAiClient is None
        and self.endpoint_id is None
    ):  # type: ignore
        raise ImportError(
            "OCI SDK not found. Please install it with: pip install oci"
        )

    # Set run config
    if run_config is None:
        run_config = RunConfig()
    self.set_run_config(run_config)

    # Track initialization
    track(
        LLMUsageEvent(
            provider="oci_genai",
            model=model_id,
            llm_type="oci_wrapper",
            num_requests=1,
            is_async=False,
        )
    )

generate_text

generate_text(prompt: PromptValue, n: int = 1, temperature: Optional[float] = 0.01, stop: Optional[List[str]] = None, callbacks: Optional[Any] = None) -> LLMResult

Generate text using OCI Gen AI.

Source code in src/ragas/llms/oci_genai_wrapper.py
def generate_text(
    self,
    prompt: PromptValue,
    n: int = 1,
    temperature: t.Optional[float] = 0.01,
    stop: t.Optional[t.List[str]] = None,
    callbacks: t.Optional[t.Any] = None,
) -> LLMResult:
    """Generate text using OCI Gen AI."""
    if temperature is None:
        temperature = self.get_temperature(n)

    messages = self._convert_prompt_to_messages(prompt)
    generations = []

    try:
        for _ in range(n):
            request = self._create_generation_request(
                messages, temperature, stop=stop
            )

            response = self._get_client().generate_text(**request)

            # Extract text from response
            if hasattr(response.data, "choices") and response.data.choices:
                text = response.data.choices[0].message.content
            elif hasattr(response.data, "text"):
                text = response.data.text
            else:
                text = str(response.data)

            generation = Generation(text=text)
            generations.append([generation])

        # Track usage
        track(
            LLMUsageEvent(
                provider="oci_genai",
                model=self.model_id,
                llm_type="oci_wrapper",
                num_requests=n,
                is_async=False,
            )
        )

        return LLMResult(generations=generations)

    except Exception as e:
        logger.error(f"Error generating text with OCI Gen AI: {e}")
        raise

agenerate_text async

agenerate_text(prompt: PromptValue, n: int = 1, temperature: Optional[float] = 0.01, stop: Optional[List[str]] = None, callbacks: Optional[Any] = None) -> LLMResult

Generate text asynchronously using OCI Gen AI.

Source code in src/ragas/llms/oci_genai_wrapper.py
async def agenerate_text(
    self,
    prompt: PromptValue,
    n: int = 1,
    temperature: t.Optional[float] = 0.01,
    stop: t.Optional[t.List[str]] = None,
    callbacks: t.Optional[t.Any] = None,
) -> LLMResult:
    """Generate text asynchronously using OCI Gen AI."""
    if temperature is None:
        temperature = self.get_temperature(n)

    messages = self._convert_prompt_to_messages(prompt)
    generations = []

    try:
        # Run synchronous calls in thread pool for async compatibility
        loop = asyncio.get_event_loop()

        for _ in range(n):
            request = self._create_generation_request(
                messages, temperature, stop=stop
            )

            response = await loop.run_in_executor(
                None, lambda: self._get_client().generate_text(**request)
            )

            # Extract text from response
            if hasattr(response.data, "choices") and response.data.choices:
                text = response.data.choices[0].message.content
            elif hasattr(response.data, "text"):
                text = response.data.text
            else:
                text = str(response.data)

            generation = Generation(text=text)
            generations.append([generation])

        # Track usage
        track(
            LLMUsageEvent(
                provider="oci_genai",
                model=self.model_id,
                llm_type="oci_wrapper",
                num_requests=n,
                is_async=True,
            )
        )

        return LLMResult(generations=generations)

    except Exception as e:
        logger.error(f"Error generating text with OCI Gen AI: {e}")
        raise

is_finished

is_finished(response: LLMResult) -> bool

Check if the LLM response is finished/complete.

Source code in src/ragas/llms/oci_genai_wrapper.py
def is_finished(self, response: LLMResult) -> bool:
    """Check if the LLM response is finished/complete."""
    # For OCI Gen AI, we assume the response is always finished
    # unless there's an explicit error or truncation
    try:
        for generation_list in response.generations:
            for generation in generation_list:
                if not generation.text or generation.text.strip() == "":
                    return False
        return True
    except Exception:
        return False

instructor_llm_factory

instructor_llm_factory(provider: str, model: Optional[str] = None, client: Optional[Any] = None, **kwargs: Any) -> InstructorBaseRagasLLM

Factory function to create an InstructorLLM instance based on the provider.

Args: provider (str): The name of the LLM provider or provider/model string (e.g., "openai", "openai/gpt-4"). model (str, optional): The model name to use for generation. client (Any, optional): Pre-initialized client for the provider. **kwargs: Additional arguments for the LLM (model_args).

Returns: InstructorBaseRagasLLM: An instance of the specified LLM provider.

Examples: # OpenAI with separate parameters llm = instructor_llm_factory("openai", "gpt-4", client=openai_client)

# OpenAI with provider/model string
llm = instructor_llm_factory("openai/gpt-4", client=openai_client)

# Anthropic
llm = instructor_llm_factory("anthropic", "claude-3-sonnet-20240229", client=anthropic_client)

# Cohere
llm = instructor_llm_factory("cohere", "command-r-plus", client=cohere_client)

# Google
llm = instructor_llm_factory(provider="google", model="gemini-2.0-flash", client=google_client)

# LiteLLM (supports 100+ models)
llm = instructor_llm_factory("litellm", "gpt-4", client=litellm_client)

Raises: ValueError: If provider is unsupported or required parameters are missing.

Source code in src/ragas/llms/base.py
def instructor_llm_factory(
    provider: str,
    model: t.Optional[str] = None,
    client: t.Optional[t.Any] = None,
    **kwargs: t.Any,
) -> InstructorBaseRagasLLM:
    """
    Factory function to create an InstructorLLM instance based on the provider.

    Args:
        provider (str): The name of the LLM provider or provider/model string
                       (e.g., "openai", "openai/gpt-4").
        model (str, optional): The model name to use for generation.
        client (Any, optional): Pre-initialized client for the provider.
        **kwargs: Additional arguments for the LLM (model_args).

    Returns:
        InstructorBaseRagasLLM: An instance of the specified LLM provider.

    Examples:
        # OpenAI with separate parameters
        llm = instructor_llm_factory("openai", "gpt-4", client=openai_client)

        # OpenAI with provider/model string
        llm = instructor_llm_factory("openai/gpt-4", client=openai_client)

        # Anthropic
        llm = instructor_llm_factory("anthropic", "claude-3-sonnet-20240229", client=anthropic_client)

        # Cohere
        llm = instructor_llm_factory("cohere", "command-r-plus", client=cohere_client)

        # Google
        llm = instructor_llm_factory(provider="google", model="gemini-2.0-flash", client=google_client)

        # LiteLLM (supports 100+ models)
        llm = instructor_llm_factory("litellm", "gpt-4", client=litellm_client)

    Raises:
        ValueError: If provider is unsupported or required parameters are missing.
    """
    # Handle provider/model string format
    if "/" in provider and model is None:
        provider_name, model_name = provider.split("/", 1)
        provider = provider_name
        model = model_name

    if not model:
        raise ValueError(
            "Model name is required. Either provide it as a separate parameter "
            "or use provider/model format (e.g., 'openai/gpt-4')"
        )

    def _initialize_client(provider: str, client: t.Any) -> t.Any:
        """Initialize the instructor-patched client for the given provider."""
        if not client:
            raise ValueError(f"{provider.title()} provider requires a client instance")

        provider_lower = provider.lower()

        if provider_lower == "openai":
            return instructor.from_openai(client)
        elif provider_lower == "anthropic":
            return instructor.from_anthropic(client)
        elif provider_lower == "cohere":
            return instructor.from_cohere(client)
        elif provider_lower == "google":
            return instructor.from_genai(client)
        elif provider_lower == "litellm":
            return instructor.from_litellm(client)
        else:
            raise ValueError(
                f"Unsupported provider: {provider}. "
                f"Supported providers: openai, anthropic, cohere, google, litellm"
            )

    instructor_patched_client = _initialize_client(provider=provider, client=client)

    # Track factory usage
    track(
        LLMUsageEvent(
            provider=provider,
            model=model,
            llm_type="instructor_factory",
            num_requests=1,
            is_async=False,
        )
    )

    return InstructorLLM(
        client=instructor_patched_client, model=model, provider=provider, **kwargs
    )

llm_factory

llm_factory(model: str = 'gpt-4o-mini', run_config: Optional[RunConfig] = None, default_headers: Optional[Dict[str, str]] = None, base_url: Optional[str] = None) -> BaseRagasLLM

Create and return a BaseRagasLLM instance. Used for running default LLMs used in Ragas (OpenAI).

Parameters:

Name Type Description Default
model str

The name of the model to use, by default "gpt-4o-mini".

'gpt-4o-mini'
run_config RunConfig

Configuration for the run, by default None.

None
default_headers dict of str

Default headers to be used in API requests, by default None.

None
base_url str

Base URL for the API, by default None.

None

Returns:

Type Description
BaseRagasLLM

An instance of BaseRagasLLM configured with the specified parameters.

Source code in src/ragas/llms/base.py
def llm_factory(
    model: str = "gpt-4o-mini",
    run_config: t.Optional[RunConfig] = None,
    default_headers: t.Optional[t.Dict[str, str]] = None,
    base_url: t.Optional[str] = None,
) -> BaseRagasLLM:
    """
    Create and return a BaseRagasLLM instance. Used for running default LLMs used
    in Ragas (OpenAI).

    Parameters
    ----------
    model : str, optional
        The name of the model to use, by default "gpt-4o-mini".
    run_config : RunConfig, optional
        Configuration for the run, by default None.
    default_headers : dict of str, optional
        Default headers to be used in API requests, by default None.
    base_url : str, optional
        Base URL for the API, by default None.

    Returns
    -------
    BaseRagasLLM
        An instance of BaseRagasLLM configured with the specified parameters.
    """
    timeout = None
    if run_config is not None:
        timeout = run_config.timeout

    # if helicone is enabled, use the helicone
    if helicone_config.is_enabled:
        default_headers = helicone_config.default_headers()
        base_url = helicone_config.base_url

    openai_model = ChatOpenAI(
        model=model, timeout=timeout, default_headers=default_headers, base_url=base_url
    )

    # Track factory usage
    track(
        LLMUsageEvent(
            provider="openai",
            model=model,
            llm_type="factory",
            num_requests=1,
            is_async=False,
        )
    )

    return LangchainLLMWrapper(openai_model, run_config)

oci_genai_factory

oci_genai_factory(model_id: str, compartment_id: str, config: Optional[Dict[str, Any]] = None, endpoint_id: Optional[str] = None, run_config: Optional[RunConfig] = None, cache: Optional[Any] = None, default_system_prompt: Optional[str] = None, client: Optional[Any] = None) -> OCIGenAIWrapper

Factory function to create an OCI Gen AI LLM instance.

Args: model_id: The OCI model ID to use for generation compartment_id: The OCI compartment ID config: OCI configuration dictionary (optional) endpoint_id: Optional endpoint ID for the model run_config: Ragas run configuration **kwargs: Additional arguments passed to OCIGenAIWrapper

Returns: OCIGenAIWrapper: An instance of the OCI Gen AI LLM wrapper

Examples: # Basic usage with default config llm = oci_genai_factory( model_id="cohere.command", compartment_id="ocid1.compartment.oc1..example" )

# With custom config
llm = oci_genai_factory(
    model_id="cohere.command",
    compartment_id="ocid1.compartment.oc1..example",
    config={"user": "user_ocid", "key_file": "~/.oci/private_key.pem"}
)
Source code in src/ragas/llms/oci_genai_wrapper.py
def oci_genai_factory(
    model_id: str,
    compartment_id: str,
    config: t.Optional[t.Dict[str, t.Any]] = None,
    endpoint_id: t.Optional[str] = None,
    run_config: t.Optional[RunConfig] = None,
    cache: t.Optional[t.Any] = None,
    default_system_prompt: t.Optional[str] = None,
    client: t.Optional[t.Any] = None,
) -> OCIGenAIWrapper:
    """
    Factory function to create an OCI Gen AI LLM instance.

    Args:
        model_id: The OCI model ID to use for generation
        compartment_id: The OCI compartment ID
        config: OCI configuration dictionary (optional)
        endpoint_id: Optional endpoint ID for the model
        run_config: Ragas run configuration
        **kwargs: Additional arguments passed to OCIGenAIWrapper

    Returns:
        OCIGenAIWrapper: An instance of the OCI Gen AI LLM wrapper

    Examples:
        # Basic usage with default config
        llm = oci_genai_factory(
            model_id="cohere.command",
            compartment_id="ocid1.compartment.oc1..example"
        )

        # With custom config
        llm = oci_genai_factory(
            model_id="cohere.command",
            compartment_id="ocid1.compartment.oc1..example",
            config={"user": "user_ocid", "key_file": "~/.oci/private_key.pem"}
        )
    """
    return OCIGenAIWrapper(
        model_id=model_id,
        compartment_id=compartment_id,
        config=config,
        endpoint_id=endpoint_id,
        run_config=run_config,
        cache=cache,
        default_system_prompt=default_system_prompt,
        client=client,
    )