LLMs

BaseRagasLLM `dataclass`

BaseRagasLLM(run_config: RunConfig = RunConfig(), multiple_completion_supported: bool = False, cache: Optional[CacheInterface] = None)

Bases: ABC

get_temperature

get_temperature(n: int) -> float

Return the temperature to use for completion based on n.

Source code in src/ragas/llms/base.py

def get_temperature(self, n: int) -> float:
    """Return the temperature to use for completion based on n."""
    return 0.3 if n > 1 else 0.01

is_finished `abstractmethod`

is_finished(response: LLMResult) -> bool

Check if the LLM response is finished/complete.

Source code in src/ragas/llms/base.py

@abstractmethod
def is_finished(self, response: LLMResult) -> bool:
    """Check if the LLM response is finished/complete."""
    ...

generate `async`

generate(prompt: PromptValue, n: int = 1, temperature: Optional[float] = 0.01, stop: Optional[List[str]] = None, callbacks: Callbacks = None) -> LLMResult

Generate text using the given event loop.

Source code in src/ragas/llms/base.py

async def generate(
    self,
    prompt: PromptValue,
    n: int = 1,
    temperature: t.Optional[float] = 0.01,
    stop: t.Optional[t.List[str]] = None,
    callbacks: Callbacks = None,
) -> LLMResult:
    """Generate text using the given event loop."""

    if temperature is None:
        temperature = self.get_temperature(n)

    agenerate_text_with_retry = add_async_retry(
        self.agenerate_text, self.run_config
    )
    result = await agenerate_text_with_retry(
        prompt=prompt,
        n=n,
        temperature=temperature,
        stop=stop,
        callbacks=callbacks,
    )

    # check there are no max_token issues
    if not self.is_finished(result):
        raise LLMDidNotFinishException()
    return result

InstructorBaseRagasLLM

Bases: ABC

Base class for LLMs using the Instructor library pattern.

generate `abstractmethod`

generate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Generate a response using the configured LLM.

For async clients, this will run the async method in the appropriate event loop.

Source code in src/ragas/llms/base.py

@abstractmethod
def generate(
    self, prompt: str, response_model: t.Type[InstructorTypeVar]
) -> InstructorTypeVar:
    """Generate a response using the configured LLM.

    For async clients, this will run the async method in the appropriate event loop.
    """

agenerate `abstractmethod` `async`

agenerate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Asynchronously generate a response using the configured LLM.

Source code in src/ragas/llms/base.py

@abstractmethod
async def agenerate(
    self,
    prompt: str,
    response_model: t.Type[InstructorTypeVar],
) -> InstructorTypeVar:
    """Asynchronously generate a response using the configured LLM."""

InstructorLLM

InstructorLLM(client: Any, model: str, provider: str, model_args: Optional[InstructorModelArgs] = None, cache: Optional[CacheInterface] = None, **kwargs)

Bases: InstructorBaseRagasLLM

LLM wrapper using the Instructor library for structured outputs.

Source code in src/ragas/llms/base.py

def __init__(
    self,
    client: t.Any,
    model: str,
    provider: str,
    model_args: t.Optional[InstructorModelArgs] = None,
    cache: t.Optional[CacheInterface] = None,
    **kwargs,
):
    self.client = client
    self.model = model
    self.provider = provider

    # Use deterministic defaults if no model_args provided
    if model_args is None:
        model_args = InstructorModelArgs()

    # Convert to dict and merge with any additional kwargs
    self.model_args = {**model_args.model_dump(), **kwargs}

    # Extract system_prompt separately (not passed to LLM API)
    self.system_prompt = self.model_args.pop("system_prompt", None)

    self.cache = cache

    # Check if client is async-capable at initialization
    self.is_async = self._check_client_async()

    if self.cache is not None:
        self.generate = cacher(cache_backend=self.cache)(self.generate)  # type: ignore
        self.agenerate = cacher(cache_backend=self.cache)(self.agenerate)  # type: ignore

generate

generate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Generate a response using the configured LLM.

For async clients, this will run the async method in the appropriate event loop.

Source code in src/ragas/llms/base.py

def generate(
    self, prompt: str, response_model: t.Type[InstructorTypeVar]
) -> InstructorTypeVar:
    """Generate a response using the configured LLM.

    For async clients, this will run the async method in the appropriate event loop.
    """
    messages = []
    if self.system_prompt:
        messages.append({"role": "system", "content": self.system_prompt})
    messages.append({"role": "user", "content": prompt})

    # If client is async, use the appropriate method to run it
    if self.is_async:
        result = self._run_async_in_current_loop(
            self.agenerate(prompt, response_model)
        )
    else:
        # Map parameters based on provider requirements
        provider_kwargs = self._map_provider_params()

        if self.provider.lower() == "google":
            result = self.client.create(
                model=self.model,
                messages=messages,
                response_model=response_model,
                **provider_kwargs,
            )
        else:
            # OpenAI, Anthropic, LiteLLM
            result = self.client.chat.completions.create(
                model=self.model,
                messages=messages,
                response_model=response_model,
                **provider_kwargs,
            )

    # Track the usage
    track(
        LLMUsageEvent(
            provider=self.provider,
            model=self.model,
            llm_type="instructor",
            num_requests=1,
            is_async=self.is_async,
        )
    )
    return result

agenerate `async`

agenerate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Asynchronously generate a response using the configured LLM.

Source code in src/ragas/llms/base.py

async def agenerate(
    self,
    prompt: str,
    response_model: t.Type[InstructorTypeVar],
) -> InstructorTypeVar:
    """Asynchronously generate a response using the configured LLM."""
    messages = []
    if self.system_prompt:
        messages.append({"role": "system", "content": self.system_prompt})
    messages.append({"role": "user", "content": prompt})

    # If client is not async, raise a helpful error
    if not self.is_async:
        raise TypeError(
            "Cannot use agenerate() with a synchronous client. Use generate() instead."
        )

    # Map parameters based on provider requirements
    provider_kwargs = self._map_provider_params()

    if self.provider.lower() == "google":
        result = await self.client.create(
            model=self.model,
            messages=messages,
            response_model=response_model,
            **provider_kwargs,
        )
    else:
        # OpenAI, Anthropic, LiteLLM
        result = await self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_model=response_model,
            **provider_kwargs,
        )

    # Track the usage
    track(
        LLMUsageEvent(
            provider=self.provider,
            model=self.model,
            llm_type="instructor",
            num_requests=1,
            is_async=True,
        )
    )
    return result

HaystackLLMWrapper

HaystackLLMWrapper(haystack_generator: Union[AzureOpenAIGenerator, HuggingFaceAPIGenerator, HuggingFaceLocalGenerator, OpenAIGenerator], run_config: Optional[RunConfig] = None, cache: Optional[CacheInterface] = None)

Bases: BaseRagasLLM

A wrapper class for using Haystack LLM generators within the Ragas framework.

This class integrates Haystack's LLM components (e.g., OpenAIGenerator, HuggingFaceAPIGenerator, etc.) into Ragas, enabling both synchronous and asynchronous text generation.

Parameters:

Name	Type	Description	Default
`haystack_generator`	`AzureOpenAIGenerator \| HuggingFaceAPIGenerator \| HuggingFaceLocalGenerator \| OpenAIGenerator`	An instance of a Haystack generator.	required
`run_config`	`RunConfig`	Configuration object to manage LLM execution settings, by default None.	`None`
`cache`	`CacheInterface`	A cache instance for storing results, by default None.	`None`

Source code in src/ragas/llms/haystack_wrapper.py

def __init__(
    self,
    haystack_generator: t.Union[
        "AzureOpenAIGenerator",
        "HuggingFaceAPIGenerator",
        "HuggingFaceLocalGenerator",
        "OpenAIGenerator",
    ],
    run_config: t.Optional[RunConfig] = None,
    cache: t.Optional[CacheInterface] = None,
):
    super().__init__(cache=cache)

    # Lazy Import of required Haystack components
    try:
        from haystack import AsyncPipeline
        from haystack.components.generators.azure import AzureOpenAIGenerator
        from haystack.components.generators.hugging_face_api import (
            HuggingFaceAPIGenerator,
        )
        from haystack.components.generators.hugging_face_local import (
            HuggingFaceLocalGenerator,
        )
        from haystack.components.generators.openai import OpenAIGenerator
    except ImportError as exc:
        raise ImportError(
            "Haystack is not installed. Please install it using `pip install haystack-ai`."
        ) from exc

    # Validate haystack_generator type
    if not isinstance(
        haystack_generator,
        (
            AzureOpenAIGenerator,
            HuggingFaceAPIGenerator,
            HuggingFaceLocalGenerator,
            OpenAIGenerator,
        ),
    ):
        raise TypeError(
            "Expected 'haystack_generator' to be one of: "
            "AzureOpenAIGenerator, HuggingFaceAPIGenerator, "
            "HuggingFaceLocalGenerator, or OpenAIGenerator, but received "
            f"{type(haystack_generator).__name__}."
        )

    # Set up Haystack pipeline and generator
    self.generator = haystack_generator
    self.async_pipeline = AsyncPipeline()
    self.async_pipeline.add_component("llm", self.generator)  # type: ignore[reportArgumentType]

    if run_config is None:
        run_config = RunConfig()
    self.set_run_config(run_config)

LiteLLMStructuredLLM

LiteLLMStructuredLLM(client: Any, model: str, provider: str, cache: Optional[CacheInterface] = None, system_prompt: Optional[str] = None, **kwargs)

Bases: InstructorBaseRagasLLM

LLM wrapper using LiteLLM for structured outputs.

Works with all 100+ LiteLLM-supported providers including Gemini, Ollama, vLLM, Groq, and many others.

The LiteLLM client should be initialized with structured output support.

Args: client: LiteLLM client instance model: Model name (e.g., "gemini-2.0-flash") provider: Provider name cache: Optional cache backend for caching LLM responses system_prompt: Optional system prompt to prepend to all messages **kwargs: Additional model arguments (temperature, max_tokens, etc.)

Source code in src/ragas/llms/litellm_llm.py

def __init__(
    self,
    client: t.Any,
    model: str,
    provider: str,
    cache: t.Optional[CacheInterface] = None,
    system_prompt: t.Optional[str] = None,
    **kwargs,
):
    """
    Initialize LiteLLM structured LLM.

    Args:
        client: LiteLLM client instance
        model: Model name (e.g., "gemini-2.0-flash")
        provider: Provider name
        cache: Optional cache backend for caching LLM responses
        system_prompt: Optional system prompt to prepend to all messages
        **kwargs: Additional model arguments (temperature, max_tokens, etc.)
    """
    self.client = client
    self.model = model
    self.provider = provider
    self.system_prompt = system_prompt
    self.model_args = kwargs
    self.cache = cache

    # Check if client is async-capable at initialization
    self.is_async = self._check_client_async()

    if self.cache is not None:
        self.generate = cacher(cache_backend=self.cache)(self.generate)  # type: ignore
        self.agenerate = cacher(cache_backend=self.cache)(self.agenerate)  # type: ignore

generate

generate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Generate a response using the configured LLM.

For async clients, this will run the async method in the appropriate event loop.

Args: prompt: Input prompt response_model: Pydantic model for structured output

Returns: Instance of response_model with generated data

Source code in src/ragas/llms/litellm_llm.py

def generate(
    self, prompt: str, response_model: t.Type[InstructorTypeVar]
) -> InstructorTypeVar:
    """Generate a response using the configured LLM.

    For async clients, this will run the async method in the appropriate event loop.

    Args:
        prompt: Input prompt
        response_model: Pydantic model for structured output

    Returns:
        Instance of response_model with generated data
    """
    messages = []
    if self.system_prompt:
        messages.append({"role": "system", "content": self.system_prompt})
    messages.append({"role": "user", "content": prompt})

    # If client is async, use the appropriate method to run it
    if self.is_async:
        result = self._run_async_in_current_loop(
            self.agenerate(prompt, response_model)
        )
    else:
        # Call LiteLLM with structured output
        result = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_model=response_model,
            **self.model_args,
        )

    # Track the usage
    track(
        LLMUsageEvent(
            provider=self.provider,
            model=self.model,
            llm_type="litellm",
            num_requests=1,
            is_async=self.is_async,
        )
    )
    return result

agenerate `async`

agenerate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Asynchronously generate a response using the configured LLM.

Args: prompt: Input prompt response_model: Pydantic model for structured output

Returns: Instance of response_model with generated data

Source code in src/ragas/llms/litellm_llm.py

async def agenerate(
    self,
    prompt: str,
    response_model: t.Type[InstructorTypeVar],
) -> InstructorTypeVar:
    """Asynchronously generate a response using the configured LLM.

    Args:
        prompt: Input prompt
        response_model: Pydantic model for structured output

    Returns:
        Instance of response_model with generated data
    """
    messages = []
    if self.system_prompt:
        messages.append({"role": "system", "content": self.system_prompt})
    messages.append({"role": "user", "content": prompt})

    # If client is not async, raise a helpful error
    if not self.is_async:
        raise TypeError(
            "Cannot use agenerate() with a synchronous client. Use generate() instead."
        )

    # Call LiteLLM async with structured output
    result = await self.client.chat.completions.create(
        model=self.model,
        messages=messages,
        response_model=response_model,
        **self.model_args,
    )

    # Track the usage
    track(
        LLMUsageEvent(
            provider=self.provider,
            model=self.model,
            llm_type="litellm",
            num_requests=1,
            is_async=True,
        )
    )
    return result

OCIGenAIWrapper

OCIGenAIWrapper(model_id: str, compartment_id: str, config: Optional[Dict[str, Any]] = None, endpoint_id: Optional[str] = None, run_config: Optional[RunConfig] = None, cache: Optional[Any] = None, default_system_prompt: Optional[str] = None, client: Optional[Any] = None)

Bases: BaseRagasLLM

OCI Gen AI LLM wrapper for Ragas.

This wrapper provides direct integration with Oracle Cloud Infrastructure Generative AI services without requiring LangChain or LlamaIndex.

Args: model_id: The OCI model ID to use for generation compartment_id: The OCI compartment ID config: OCI configuration dictionary (optional, uses default if not provided) endpoint_id: Optional endpoint ID for the model run_config: Ragas run configuration cache: Optional cache backend

Source code in src/ragas/llms/oci_genai_wrapper.py

def __init__(
    self,
    model_id: str,
    compartment_id: str,
    config: t.Optional[t.Dict[str, t.Any]] = None,
    endpoint_id: t.Optional[str] = None,
    run_config: t.Optional[RunConfig] = None,
    cache: t.Optional[t.Any] = None,
    default_system_prompt: t.Optional[str] = None,
    client: t.Optional[t.Any] = None,
):
    """
    Initialize OCI Gen AI wrapper.

    Args:
        model_id: The OCI model ID to use for generation
        compartment_id: The OCI compartment ID
        config: OCI configuration dictionary (optional, uses default if not provided)
        endpoint_id: Optional endpoint ID for the model
        run_config: Ragas run configuration
        cache: Optional cache backend
    """
    super().__init__(cache=cache)

    self.model_id = model_id
    self.compartment_id = compartment_id
    self.endpoint_id = endpoint_id
    self.default_system_prompt = default_system_prompt

    # Store client/config; perform lazy initialization to keep import-optional
    self.client = client
    self._oci_config = config
    # If no client and SDK not available and no endpoint fallback, raise early
    if (
        self.client is None
        and GenerativeAiClient is None
        and self.endpoint_id is None
    ):  # type: ignore
        raise ImportError(
            "OCI SDK not found. Please install it with: pip install oci"
        )

    # Set run config
    if run_config is None:
        run_config = RunConfig()
    self.set_run_config(run_config)

    # Track initialization
    track(
        LLMUsageEvent(
            provider="oci_genai",
            model=model_id,
            llm_type="oci_wrapper",
            num_requests=1,
            is_async=False,
        )
    )

generate_text

generate_text(prompt: PromptValue, n: int = 1, temperature: Optional[float] = 0.01, stop: Optional[List[str]] = None, callbacks: Optional[Any] = None) -> LLMResult

Generate text using OCI Gen AI.

Source code in src/ragas/llms/oci_genai_wrapper.py

def generate_text(
    self,
    prompt: PromptValue,
    n: int = 1,
    temperature: t.Optional[float] = 0.01,
    stop: t.Optional[t.List[str]] = None,
    callbacks: t.Optional[t.Any] = None,
) -> LLMResult:
    """Generate text using OCI Gen AI."""
    if temperature is None:
        temperature = self.get_temperature(n)

    messages = self._convert_prompt_to_messages(prompt)
    generations = []

    try:
        for _ in range(n):
            request = self._create_generation_request(
                messages, temperature, stop=stop
            )

            response = self._get_client().generate_text(**request)

            # Extract text from response
            if hasattr(response.data, "choices") and response.data.choices:
                text = response.data.choices[0].message.content
            elif hasattr(response.data, "text"):
                text = response.data.text
            else:
                text = str(response.data)

            generation = Generation(text=text)
            generations.append([generation])

        # Track usage
        track(
            LLMUsageEvent(
                provider="oci_genai",
                model=self.model_id,
                llm_type="oci_wrapper",
                num_requests=n,
                is_async=False,
            )
        )

        return LLMResult(generations=generations)

    except Exception as e:
        logger.error(f"Error generating text with OCI Gen AI: {e}")
        raise

agenerate_text `async`

agenerate_text(prompt: PromptValue, n: int = 1, temperature: Optional[float] = 0.01, stop: Optional[List[str]] = None, callbacks: Optional[Any] = None) -> LLMResult

Generate text asynchronously using OCI Gen AI.

Source code in src/ragas/llms/oci_genai_wrapper.py

async def agenerate_text(
    self,
    prompt: PromptValue,
    n: int = 1,
    temperature: t.Optional[float] = 0.01,
    stop: t.Optional[t.List[str]] = None,
    callbacks: t.Optional[t.Any] = None,
) -> LLMResult:
    """Generate text asynchronously using OCI Gen AI."""
    if temperature is None:
        temperature = self.get_temperature(n)

    messages = self._convert_prompt_to_messages(prompt)
    generations = []

    try:
        # Run synchronous calls in thread pool for async compatibility
        loop = asyncio.get_event_loop()

        for _ in range(n):
            request = self._create_generation_request(
                messages, temperature, stop=stop
            )

            response = await loop.run_in_executor(
                None, lambda: self._get_client().generate_text(**request)
            )

            # Extract text from response
            if hasattr(response.data, "choices") and response.data.choices:
                text = response.data.choices[0].message.content
            elif hasattr(response.data, "text"):
                text = response.data.text
            else:
                text = str(response.data)

            generation = Generation(text=text)
            generations.append([generation])

        # Track usage
        track(
            LLMUsageEvent(
                provider="oci_genai",
                model=self.model_id,
                llm_type="oci_wrapper",
                num_requests=n,
                is_async=True,
            )
        )

        return LLMResult(generations=generations)

    except Exception as e:
        logger.error(f"Error generating text with OCI Gen AI: {e}")
        raise

is_finished

is_finished(response: LLMResult) -> bool

Check if the LLM response is finished/complete.

Source code in src/ragas/llms/oci_genai_wrapper.py

def is_finished(self, response: LLMResult) -> bool:
    """Check if the LLM response is finished/complete."""
    # For OCI Gen AI, we assume the response is always finished
    # unless there's an explicit error or truncation
    try:
        for generation_list in response.generations:
            for generation in generation_list:
                if not generation.text or generation.text.strip() == "":
                    return False
        return True
    except Exception:
        return False

llm_factory

llm_factory(model: str, provider: str = 'openai', client: Optional[Any] = None, adapter: str = 'auto', cache: Optional[CacheInterface] = None, mode: Optional[Mode] = None, **kwargs: Any) -> InstructorBaseRagasLLM

Create an LLM instance for structured output generation with automatic adapter selection.

Supports multiple LLM providers and structured output backends with unified interface for both sync and async operations. Returns instances with .generate() and .agenerate() methods that accept Pydantic models for structured outputs.

Auto-detects the best adapter for your provider: - Google Gemini → uses LiteLLM adapter - Other providers → uses Instructor adapter (default) - Explicit control available via adapter parameter

Args: model: Model name (e.g., "gpt-4o", "claude-3-sonnet", "gemini-2.0-flash"). provider: LLM provider (default: "openai"). Examples: openai, anthropic, google, groq, mistral, etc. client: Pre-initialized client instance (required). For OpenAI, can be OpenAI(...) or AsyncOpenAI(...). adapter: Structured output adapter to use (default: "auto"). - "auto": Auto-detect based on provider/client (recommended) - "instructor": Use Instructor library - "litellm": Use LiteLLM (supports 100+ providers) cache: Optional cache backend for caching LLM responses. Pass DiskCacheBackend() for persistent caching across runs. Saves costs and speeds up repeated evaluations by 60x. mode: Instructor mode for structured outputs (default: Mode.JSON). Only applies when using instructor adapter. Options: Mode.JSON, Mode.MD_JSON, Mode.TOOLS, Mode.JSON_SCHEMA, etc. Use Mode.MD_JSON for backends that don't support response_format parameter. **kwargs: Additional model arguments (temperature, max_tokens, top_p, etc).

Returns: InstructorBaseRagasLLM: Instance with generate() and agenerate() methods.

Raises: ValueError: If client is missing, provider is unsupported, model is invalid, or adapter initialization fails.

Examples: from openai import OpenAI

# Basic usage
client = OpenAI(api_key="...")
llm = llm_factory("gpt-4o-mini", client=client)
response = llm.generate(prompt, ResponseModel)

# With caching (recommended for experiments)
from ragas.cache import DiskCacheBackend
cache = DiskCacheBackend()
llm = llm_factory("gpt-4o-mini", client=client, cache=cache)

# Anthropic
from anthropic import Anthropic
client = Anthropic(api_key="...")
llm = llm_factory("claude-3-sonnet", provider="anthropic", client=client)

# Google Gemini (auto-detects litellm adapter)
from litellm import OpenAI as LiteLLMClient
client = LiteLLMClient(api_key="...", model="gemini-2.0-flash")
llm = llm_factory("gemini-2.0-flash", client=client)

# Explicit adapter selection
llm = llm_factory("gemini-2.0-flash", client=client, adapter="litellm")

# Custom instructor mode for backends without response_format support
import instructor
client = OpenAI(api_key="...", base_url="https://custom-backend")
llm = llm_factory("custom-model", client=client, mode=instructor.Mode.MD_JSON)

# Async
from openai import AsyncOpenAI
client = AsyncOpenAI(api_key="...")
llm = llm_factory("gpt-4o-mini", client=client)
response = await llm.agenerate(prompt, ResponseModel)

Source code in src/ragas/llms/base.py

def llm_factory(
    model: str,
    provider: str = "openai",
    client: t.Optional[t.Any] = None,
    adapter: str = "auto",
    cache: t.Optional[CacheInterface] = None,
    mode: t.Optional[instructor.Mode] = None,
    **kwargs: t.Any,
) -> InstructorBaseRagasLLM:
    """
    Create an LLM instance for structured output generation with automatic adapter selection.

    Supports multiple LLM providers and structured output backends with unified interface
    for both sync and async operations. Returns instances with .generate() and .agenerate()
    methods that accept Pydantic models for structured outputs.

    Auto-detects the best adapter for your provider:
    - Google Gemini → uses LiteLLM adapter
    - Other providers → uses Instructor adapter (default)
    - Explicit control available via adapter parameter

    Args:
        model: Model name (e.g., "gpt-4o", "claude-3-sonnet", "gemini-2.0-flash").
        provider: LLM provider (default: "openai").
                 Examples: openai, anthropic, google, groq, mistral, etc.
        client: Pre-initialized client instance (required). For OpenAI, can be
               OpenAI(...) or AsyncOpenAI(...).
        adapter: Structured output adapter to use (default: "auto").
                - "auto": Auto-detect based on provider/client (recommended)
                - "instructor": Use Instructor library
                - "litellm": Use LiteLLM (supports 100+ providers)
        cache: Optional cache backend for caching LLM responses.
               Pass DiskCacheBackend() for persistent caching across runs.
               Saves costs and speeds up repeated evaluations by 60x.
        mode: Instructor mode for structured outputs (default: Mode.JSON).
              Only applies when using instructor adapter.
              Options: Mode.JSON, Mode.MD_JSON, Mode.TOOLS, Mode.JSON_SCHEMA, etc.
              Use Mode.MD_JSON for backends that don't support response_format parameter.
        **kwargs: Additional model arguments (temperature, max_tokens, top_p, etc).

    Returns:
        InstructorBaseRagasLLM: Instance with generate() and agenerate() methods.

    Raises:
        ValueError: If client is missing, provider is unsupported, model is invalid,
                   or adapter initialization fails.

    Examples:
        from openai import OpenAI

        # Basic usage
        client = OpenAI(api_key="...")
        llm = llm_factory("gpt-4o-mini", client=client)
        response = llm.generate(prompt, ResponseModel)

        # With caching (recommended for experiments)
        from ragas.cache import DiskCacheBackend
        cache = DiskCacheBackend()
        llm = llm_factory("gpt-4o-mini", client=client, cache=cache)

        # Anthropic
        from anthropic import Anthropic
        client = Anthropic(api_key="...")
        llm = llm_factory("claude-3-sonnet", provider="anthropic", client=client)

        # Google Gemini (auto-detects litellm adapter)
        from litellm import OpenAI as LiteLLMClient
        client = LiteLLMClient(api_key="...", model="gemini-2.0-flash")
        llm = llm_factory("gemini-2.0-flash", client=client)

        # Explicit adapter selection
        llm = llm_factory("gemini-2.0-flash", client=client, adapter="litellm")

        # Custom instructor mode for backends without response_format support
        import instructor
        client = OpenAI(api_key="...", base_url="https://custom-backend")
        llm = llm_factory("custom-model", client=client, mode=instructor.Mode.MD_JSON)

        # Async
        from openai import AsyncOpenAI
        client = AsyncOpenAI(api_key="...")
        llm = llm_factory("gpt-4o-mini", client=client)
        response = await llm.agenerate(prompt, ResponseModel)
    """
    if client is None:
        raise ValueError(
            "llm_factory() requires a client instance. "
            "Text-only mode has been removed.\n\n"
            "To migrate:\n"
            "  from openai import OpenAI\n"
            "  client = OpenAI(api_key='...')\n"
            "  llm = llm_factory('gpt-4o-mini', client=client)\n\n"
            "For more details: https://docs.ragas.io/en/latest/llm-factory"
        )

    if not model:
        raise ValueError("model parameter is required")

    provider_lower = provider.lower()

    # Auto-detect adapter if needed
    if adapter == "auto":
        from ragas.llms.adapters import auto_detect_adapter

        adapter = auto_detect_adapter(client, provider_lower)

    # Create LLM using selected adapter
    from ragas.llms.adapters import get_adapter

    try:
        adapter_instance = get_adapter(adapter)
        llm = adapter_instance.create_llm(
            client, model, provider_lower, cache=cache, mode=mode, **kwargs
        )
    except ValueError as e:
        # Re-raise ValueError from get_adapter for unknown adapter names
        # Also handle adapter initialization failures
        if "Unknown adapter" in str(e):
            raise
        # Adapter-specific failures get wrapped
        raise ValueError(
            f"Failed to initialize {provider} client with {adapter} adapter. "
            f"Ensure you've created a valid {provider} client.\n"
            f"Error: {str(e)}"
        )
    except Exception as e:
        raise ValueError(
            f"Failed to initialize {provider} client with {adapter} adapter. "
            f"Ensure you've created a valid {provider} client.\n"
            f"Error: {str(e)}"
        )

    track(
        LLMUsageEvent(
            provider=provider,
            model=model,
            llm_type="llm_factory",
            num_requests=1,
            is_async=False,
        )
    )

    return llm

oci_genai_factory

oci_genai_factory(model_id: str, compartment_id: str, config: Optional[Dict[str, Any]] = None, endpoint_id: Optional[str] = None, run_config: Optional[RunConfig] = None, cache: Optional[Any] = None, default_system_prompt: Optional[str] = None, client: Optional[Any] = None) -> OCIGenAIWrapper

Factory function to create an OCI Gen AI LLM instance.

Args: model_id: The OCI model ID to use for generation compartment_id: The OCI compartment ID config: OCI configuration dictionary (optional) endpoint_id: Optional endpoint ID for the model run_config: Ragas run configuration **kwargs: Additional arguments passed to OCIGenAIWrapper

Returns: OCIGenAIWrapper: An instance of the OCI Gen AI LLM wrapper

Examples: # Basic usage with default config llm = oci_genai_factory( model_id="cohere.command", compartment_id="ocid1.compartment.oc1..example" )

# With custom config
llm = oci_genai_factory(
    model_id="cohere.command",
    compartment_id="ocid1.compartment.oc1..example",
    config={"user": "user_ocid", "key_file": "~/.oci/private_key.pem"}
)

Source code in src/ragas/llms/oci_genai_wrapper.py

def oci_genai_factory(
    model_id: str,
    compartment_id: str,
    config: t.Optional[t.Dict[str, t.Any]] = None,
    endpoint_id: t.Optional[str] = None,
    run_config: t.Optional[RunConfig] = None,
    cache: t.Optional[t.Any] = None,
    default_system_prompt: t.Optional[str] = None,
    client: t.Optional[t.Any] = None,
) -> OCIGenAIWrapper:
    """
    Factory function to create an OCI Gen AI LLM instance.

    Args:
        model_id: The OCI model ID to use for generation
        compartment_id: The OCI compartment ID
        config: OCI configuration dictionary (optional)
        endpoint_id: Optional endpoint ID for the model
        run_config: Ragas run configuration
        **kwargs: Additional arguments passed to OCIGenAIWrapper

    Returns:
        OCIGenAIWrapper: An instance of the OCI Gen AI LLM wrapper

    Examples:
        # Basic usage with default config
        llm = oci_genai_factory(
            model_id="cohere.command",
            compartment_id="ocid1.compartment.oc1..example"
        )

        # With custom config
        llm = oci_genai_factory(
            model_id="cohere.command",
            compartment_id="ocid1.compartment.oc1..example",
            config={"user": "user_ocid", "key_file": "~/.oci/private_key.pem"}
        )
    """
    return OCIGenAIWrapper(
        model_id=model_id,
        compartment_id=compartment_id,
        config=config,
        endpoint_id=endpoint_id,
        run_config=run_config,
        cache=cache,
        default_system_prompt=default_system_prompt,
        client=client,
    )

LLMs

BaseRagasLLM dataclass

get_temperature

is_finished abstractmethod

generate async

InstructorBaseRagasLLM

generate abstractmethod

agenerate abstractmethod async

InstructorLLM

generate

agenerate async

HaystackLLMWrapper

LiteLLMStructuredLLM

generate

agenerate async

OCIGenAIWrapper

generate_text

agenerate_text async

is_finished

llm_factory

oci_genai_factory

BaseRagasLLM `dataclass`

is_finished `abstractmethod`

generate `async`

generate `abstractmethod`

agenerate `abstractmethod` `async`

agenerate `async`

agenerate `async`

agenerate_text `async`