Source code for langvae.encoders.automodel_presets

from dataclasses import dataclass
from typing import Type
from enum import StrEnum, auto
from transformers import AutoModel, AutoModelForTextEncoding


[docs]class PoolingMethod(StrEnum): MEAN = auto() LAST = auto() CLS = auto()
[docs]@dataclass class AutoModelPreset: """ Predefined settings class for encoder models loaded with AutoModel. Attributes: cls (str): Name of the class used for loading thge model [AutoModel | AutoModelForTextEncoding]. pooling_method (PoolingMethod): Method used for pooling the token embeddings [MEAN | LAST | CLS]. normalize (bool): Whether the embeddings are to be normalized or not. """ cls: str = "AutoModelForTextEncoding" pooling_method: PoolingMethod = PoolingMethod.MEAN normalize: bool = False @property def cls_type(self) -> Type: return { "AutoModel": AutoModel, "AutoModelForTextEncoding": AutoModelForTextEncoding }[self.cls]
AUTOMODEL_MAP = { "Salesforce/SFR-Embedding-2_R": {"cls": "AutoModel", "pooling_method": PoolingMethod.LAST, "normalize": True}, "intfloat/multilingual-e5-large-instruct": {"cls": "AutoModel", "pooling_method": PoolingMethod.MEAN, "normalize": True}, "Alibaba-NLP/gte-Qwen2-1.5B-instruct": {"cls": "AutoModel", "pooling_method": PoolingMethod.LAST, "normalize": True}, "NovaSearch/stella_en_1.5B_v5": {"cls": "AutoModel", "pooling_method": PoolingMethod.MEAN, "normalize": True} }