Skip to content

Embeddings

BaseRagasEmbeddings

Bases: Embeddings, ABC

Abstract base class for Ragas embeddings.

This class extends the Embeddings class and provides methods for embedding text and managing run configurations.

Attributes: run_config (RunConfig): Configuration for running the embedding operations.

embed_text async

embed_text(text: str, is_async=True) -> List[float]

Embed a single text string.

Source code in src/ragas/embeddings/base.py
async def embed_text(self, text: str, is_async=True) -> List[float]:
    """
    Embed a single text string.
    """
    embs = await self.embed_texts([text], is_async=is_async)
    return embs[0]

embed_texts async

embed_texts(texts: List[str], is_async: bool = True) -> List[List[float]]

Embed multiple texts.

Source code in src/ragas/embeddings/base.py
async def embed_texts(
    self, texts: List[str], is_async: bool = True
) -> t.List[t.List[float]]:
    """
    Embed multiple texts.
    """
    if is_async:
        aembed_documents_with_retry = add_async_retry(
            self.aembed_documents, self.run_config
        )
        return await aembed_documents_with_retry(texts)
    else:
        loop = asyncio.get_event_loop()
        embed_documents_with_retry = add_retry(
            self.embed_documents, self.run_config
        )
        return await loop.run_in_executor(None, embed_documents_with_retry, texts)

set_run_config

set_run_config(run_config: RunConfig)

Set the run configuration for the embedding operations.

Source code in src/ragas/embeddings/base.py
def set_run_config(self, run_config: RunConfig):
    """
    Set the run configuration for the embedding operations.
    """
    self.run_config = run_config

HuggingfaceEmbeddings

Bases: BaseRagasEmbeddings

Hugging Face embeddings class for generating embeddings using pre-trained models.

This class provides functionality to load and use Hugging Face models for generating embeddings of text inputs.

Parameters:

Name Type Description Default
model_name str

Name of the pre-trained model to use, by default DEFAULT_MODEL_NAME.

required
cache_folder str

Path to store downloaded models. Can also be set by SENTENCE_TRANSFORMERS_HOME environment variable.

required
model_kwargs dict

Additional keyword arguments to pass to the model.

required
encode_kwargs dict

Additional keyword arguments to pass to the encoding method.

required

Attributes:

Name Type Description
model Union[SentenceTransformer, CrossEncoder]

The loaded Hugging Face model.

is_cross_encoder bool

Flag indicating whether the model is a cross-encoder.

Methods:

Name Description
embed_query

Embed a single query text.

embed_documents

Embed multiple documents.

predict

Make predictions using a cross-encoder model.

Notes

This class requires the sentence_transformers and transformers packages to be installed.

Examples:

>>> embeddings = HuggingfaceEmbeddings(model_name="bert-base-uncased")
>>> query_embedding = embeddings.embed_query("What is the capital of France?")
>>> doc_embeddings = embeddings.embed_documents(["Paris is the capital of France.", "London is the capital of the UK."])

embed_query

embed_query(text: str) -> List[float]

Embed a single query text.

Source code in src/ragas/embeddings/base.py
def embed_query(self, text: str) -> List[float]:
    """
    Embed a single query text.
    """
    return self.embed_documents([text])[0]

embed_documents

embed_documents(texts: List[str]) -> List[List[float]]

Embed multiple documents.

Source code in src/ragas/embeddings/base.py
def embed_documents(self, texts: List[str]) -> List[List[float]]:
    """
    Embed multiple documents.
    """
    from sentence_transformers.SentenceTransformer import SentenceTransformer
    from torch import Tensor

    assert isinstance(
        self.model, SentenceTransformer
    ), "Model is not of the type Bi-encoder"
    embeddings = self.model.encode(
        texts, normalize_embeddings=True, **self.encode_kwargs
    )

    assert isinstance(embeddings, Tensor)
    return embeddings.tolist()

predict

predict(texts: List[List[str]]) -> List[List[float]]

Make predictions using a cross-encoder model.

Source code in src/ragas/embeddings/base.py
def predict(self, texts: List[List[str]]) -> List[List[float]]:
    """
    Make predictions using a cross-encoder model.
    """
    from sentence_transformers.cross_encoder import CrossEncoder
    from torch import Tensor

    assert isinstance(
        self.model, CrossEncoder
    ), "Model is not of the type CrossEncoder"

    predictions = self.model.predict(texts, **self.encode_kwargs)

    assert isinstance(predictions, Tensor)
    return predictions.tolist()

LangchainEmbeddingsWrapper

LangchainEmbeddingsWrapper(embeddings: Embeddings, run_config: Optional[RunConfig] = None)

Bases: BaseRagasEmbeddings

Wrapper for any embeddings from langchain.

Source code in src/ragas/embeddings/base.py
def __init__(
    self, embeddings: Embeddings, run_config: t.Optional[RunConfig] = None
):
    self.embeddings = embeddings
    if run_config is None:
        run_config = RunConfig()
    self.set_run_config(run_config)

embed_query

embed_query(text: str) -> List[float]

Embed a single query text.

Source code in src/ragas/embeddings/base.py
def embed_query(self, text: str) -> List[float]:
    """
    Embed a single query text.
    """
    return self.embeddings.embed_query(text)

embed_documents

embed_documents(texts: List[str]) -> List[List[float]]

Embed multiple documents.

Source code in src/ragas/embeddings/base.py
def embed_documents(self, texts: List[str]) -> List[List[float]]:
    """
    Embed multiple documents.
    """
    return self.embeddings.embed_documents(texts)

aembed_query async

aembed_query(text: str) -> List[float]

Asynchronously embed a single query text.

Source code in src/ragas/embeddings/base.py
async def aembed_query(self, text: str) -> List[float]:
    """
    Asynchronously embed a single query text.
    """
    return await self.embeddings.aembed_query(text)

aembed_documents async

aembed_documents(texts: List[str]) -> List[List[float]]

Asynchronously embed multiple documents.

Source code in src/ragas/embeddings/base.py
async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
    """
    Asynchronously embed multiple documents.
    """
    return await self.embeddings.aembed_documents(texts)

set_run_config

set_run_config(run_config: RunConfig)

Set the run configuration for the embedding operations.

Source code in src/ragas/embeddings/base.py
def set_run_config(self, run_config: RunConfig):
    """
    Set the run configuration for the embedding operations.
    """
    self.run_config = run_config

    # run configurations specially for OpenAI
    if isinstance(self.embeddings, OpenAIEmbeddings):
        try:
            from openai import RateLimitError
        except ImportError:
            raise ImportError(
                "openai.error.RateLimitError not found. Please install openai package as `pip install openai`"
            )
        self.embeddings.request_timeout = run_config.timeout
        self.run_config.exception_types = RateLimitError

LlamaIndexEmbeddingsWrapper

LlamaIndexEmbeddingsWrapper(embeddings: BaseEmbedding, run_config: Optional[RunConfig] = None)

Bases: BaseRagasEmbeddings

Wrapper for any embeddings from llama-index.

This class provides a wrapper for llama-index embeddings, allowing them to be used within the Ragas framework. It supports both synchronous and asynchronous embedding operations for queries and documents.

Parameters:

Name Type Description Default
embeddings BaseEmbedding

The llama-index embedding model to be wrapped.

required
run_config RunConfig

Configuration for the run. If not provided, a default RunConfig will be used.

None

Attributes:

Name Type Description
embeddings BaseEmbedding

The wrapped llama-index embedding model.

Examples:

>>> from llama_index.embeddings import OpenAIEmbedding
>>> from ragas.embeddings import LlamaIndexEmbeddingsWrapper
>>> llama_embeddings = OpenAIEmbedding()
>>> wrapped_embeddings = LlamaIndexEmbeddingsWrapper(llama_embeddings)
>>> query_embedding = wrapped_embeddings.embed_query("What is the capital of France?")
>>> document_embeddings = wrapped_embeddings.embed_documents(["Paris is the capital of France.", "London is the capital of the UK."])
Source code in src/ragas/embeddings/base.py
def __init__(
    self, embeddings: BaseEmbedding, run_config: t.Optional[RunConfig] = None
):
    self.embeddings = embeddings
    if run_config is None:
        run_config = RunConfig()
    self.set_run_config(run_config)

embedding_factory

embedding_factory(model: str = 'text-embedding-ada-002', run_config: Optional[RunConfig] = None) -> BaseRagasEmbeddings

Create and return a BaseRagasEmbeddings instance. Used for default embeddings used in Ragas (OpenAI).

This factory function creates an OpenAIEmbeddings instance and wraps it with LangchainEmbeddingsWrapper to provide a BaseRagasEmbeddings compatible object.

Parameters:

Name Type Description Default
model str

The name of the OpenAI embedding model to use, by default "text-embedding-ada-002".

'text-embedding-ada-002'
run_config RunConfig

Configuration for the run, by default None.

None

Returns:

Type Description
BaseRagasEmbeddings

An instance of BaseRagasEmbeddings configured with the specified parameters.

Source code in src/ragas/embeddings/base.py
def embedding_factory(
    model: str = "text-embedding-ada-002", run_config: t.Optional[RunConfig] = None
) -> BaseRagasEmbeddings:
    """
    Create and return a BaseRagasEmbeddings instance. Used for default embeddings
    used in Ragas (OpenAI).

    This factory function creates an OpenAIEmbeddings instance and wraps it with
    LangchainEmbeddingsWrapper to provide a BaseRagasEmbeddings compatible object.

    Parameters
    ----------
    model : str, optional
        The name of the OpenAI embedding model to use, by default "text-embedding-ada-002".
    run_config : RunConfig, optional
        Configuration for the run, by default None.

    Returns
    -------
    BaseRagasEmbeddings
        An instance of BaseRagasEmbeddings configured with the specified parameters.
    """
    openai_embeddings = OpenAIEmbeddings(model=model)
    if run_config is not None:
        openai_embeddings.request_timeout = run_config.timeout
    else:
        run_config = RunConfig()
    return LangchainEmbeddingsWrapper(openai_embeddings, run_config=run_config)