vllm.renderers.hf ¶

_PROCESSOR_CHAT_TEMPLATES `module-attribute` ¶

_PROCESSOR_CHAT_TEMPLATES = dict[
    tuple[str, bool], str | None
]()

Used in _try_get_processor_chat_template to avoid calling cached_get_processor again if the processor fails to be loaded.

This is needed because lru_cache does not cache when an exception happens.

_cached_resolve_chat_template_kwargs `module-attribute` ¶

_cached_resolve_chat_template_kwargs = lru_cache(
    _resolve_chat_template_kwargs
)

logger `module-attribute` ¶

logger = init_logger(__name__)

AssistantTracker ¶

Bases: Extension

Source code in vllm/renderers/hf.py

class AssistantTracker(jinja2.ext.Extension):
    tags = {"generation"}

    def parse(self, parser: jinja2.parser.Parser) -> jinja2.nodes.Node:
        lineno = next(parser.stream).lineno
        body = parser.parse_statements(("name:endgeneration",), drop_needle=True)
        call = self.call_method("_generation_support")
        call_block = jinja2.nodes.CallBlock(call, [], [], body)
        return call_block.set_lineno(lineno)

tags `class-attribute` `instance-attribute` ¶

tags = {'generation'}

parse ¶

parse(parser: Parser) -> Node

Source code in vllm/renderers/hf.py

def parse(self, parser: jinja2.parser.Parser) -> jinja2.nodes.Node:
    lineno = next(parser.stream).lineno
    body = parser.parse_statements(("name:endgeneration",), drop_needle=True)
    call = self.call_method("_generation_support")
    call_block = jinja2.nodes.CallBlock(call, [], [], body)
    return call_block.set_lineno(lineno)

HfRenderer ¶

Bases: RendererLike

Source code in vllm/renderers/hf.py

class HfRenderer(RendererLike):
    @classmethod
    def from_config(
        cls,
        config: ModelConfig,
        tokenizer_kwargs: dict[str, Any],
    ) -> "RendererLike":
        return cls(config, tokenizer_kwargs)

    def __init__(
        self,
        config: ModelConfig,
        tokenizer_kwargs: dict[str, Any],
    ) -> None:
        super().__init__()

        self.config = config

        if config.skip_tokenizer_init:
            tokenizer = None
        else:
            tokenizer = cast(
                HfTokenizer,
                cached_get_tokenizer(
                    tokenizer_cls=CachedHfTokenizer,  # type: ignore[type-abstract]
                    **tokenizer_kwargs,
                ),
            )

        self._tokenizer = tokenizer

    @property
    def tokenizer(self) -> HfTokenizer | None:
        return self._tokenizer

    def get_tokenizer(self) -> HfTokenizer:
        tokenizer = self.tokenizer
        if tokenizer is None:
            raise ValueError("Tokenizer not available when `skip_tokenizer_init=True`")

        return tokenizer

    def render_messages(
        self,
        messages: list[ChatCompletionMessageParam],
        chat_template_content_format: ChatTemplateContentFormatOption = "auto",
        **kwargs,
    ) -> tuple[list[ConversationMessage], TextPrompt | TokensPrompt]:
        model_config = self.config
        tokenizer = self.get_tokenizer()

        conversation, mm_data, mm_uuids = parse_chat_messages(
            messages,
            model_config,
            content_format=resolve_chat_template_content_format(
                chat_template=kwargs.get("chat_template"),
                tools=kwargs.get("tools"),
                given_format=chat_template_content_format,
                tokenizer=tokenizer,
                model_config=model_config,
            ),
        )

        prompt_raw = safe_apply_chat_template(
            model_config,
            tokenizer,
            conversation,
            **kwargs,
        )

        prompt = (
            TextPrompt(prompt=prompt_raw)
            if isinstance(prompt_raw, str)
            else TokensPrompt(prompt_token_ids=prompt_raw)
        )
        if mm_data is not None:
            prompt["multi_modal_data"] = mm_data
        if mm_uuids is not None:
            prompt["multi_modal_uuids"] = mm_uuids

        return conversation, prompt  # type: ignore[return-value]

    async def render_messages_async(
        self,
        messages: list[ChatCompletionMessageParam],
        chat_template_content_format: ChatTemplateContentFormatOption = "auto",
        **kwargs,
    ) -> tuple[list[ConversationMessage], TextPrompt | TokensPrompt]:
        model_config = self.config
        tokenizer = self.get_tokenizer()

        conversation, mm_data, mm_uuids = await parse_chat_messages_async(
            messages,
            model_config,
            content_format=resolve_chat_template_content_format(
                chat_template=kwargs.get("chat_template"),
                tools=kwargs.get("tools"),
                given_format=chat_template_content_format,
                tokenizer=tokenizer,
                model_config=model_config,
            ),
        )

        prompt_raw = safe_apply_chat_template(
            model_config,
            tokenizer,
            conversation,
            **kwargs,
        )

        prompt = (
            TextPrompt(prompt=prompt_raw)
            if isinstance(prompt_raw, str)
            else TokensPrompt(prompt_token_ids=prompt_raw)
        )
        if mm_data is not None:
            prompt["multi_modal_data"] = mm_data
        if mm_uuids is not None:
            prompt["multi_modal_uuids"] = mm_uuids

        return conversation, prompt  # type: ignore[return-value]

_tokenizer `instance-attribute` ¶

_tokenizer = tokenizer

config `instance-attribute` ¶

config = config

tokenizer `property` ¶

tokenizer: HfTokenizer | None

init ¶

__init__(
    config: ModelConfig, tokenizer_kwargs: dict[str, Any]
) -> None

Source code in vllm/renderers/hf.py

def __init__(
    self,
    config: ModelConfig,
    tokenizer_kwargs: dict[str, Any],
) -> None:
    super().__init__()

    self.config = config

    if config.skip_tokenizer_init:
        tokenizer = None
    else:
        tokenizer = cast(
            HfTokenizer,
            cached_get_tokenizer(
                tokenizer_cls=CachedHfTokenizer,  # type: ignore[type-abstract]
                **tokenizer_kwargs,
            ),
        )

    self._tokenizer = tokenizer

from_config `classmethod` ¶

from_config(
    config: ModelConfig, tokenizer_kwargs: dict[str, Any]
) -> RendererLike

Source code in vllm/renderers/hf.py

@classmethod
def from_config(
    cls,
    config: ModelConfig,
    tokenizer_kwargs: dict[str, Any],
) -> "RendererLike":
    return cls(config, tokenizer_kwargs)

get_tokenizer ¶

get_tokenizer() -> HfTokenizer

Source code in vllm/renderers/hf.py

def get_tokenizer(self) -> HfTokenizer:
    tokenizer = self.tokenizer
    if tokenizer is None:
        raise ValueError("Tokenizer not available when `skip_tokenizer_init=True`")

    return tokenizer

render_messages ¶

render_messages(
    messages: list[ChatCompletionMessageParam],
    chat_template_content_format: ChatTemplateContentFormatOption = "auto",
    **kwargs,
) -> tuple[
    list[ConversationMessage], TextPrompt | TokensPrompt
]

Source code in vllm/renderers/hf.py

def render_messages(
    self,
    messages: list[ChatCompletionMessageParam],
    chat_template_content_format: ChatTemplateContentFormatOption = "auto",
    **kwargs,
) -> tuple[list[ConversationMessage], TextPrompt | TokensPrompt]:
    model_config = self.config
    tokenizer = self.get_tokenizer()

    conversation, mm_data, mm_uuids = parse_chat_messages(
        messages,
        model_config,
        content_format=resolve_chat_template_content_format(
            chat_template=kwargs.get("chat_template"),
            tools=kwargs.get("tools"),
            given_format=chat_template_content_format,
            tokenizer=tokenizer,
            model_config=model_config,
        ),
    )

    prompt_raw = safe_apply_chat_template(
        model_config,
        tokenizer,
        conversation,
        **kwargs,
    )

    prompt = (
        TextPrompt(prompt=prompt_raw)
        if isinstance(prompt_raw, str)
        else TokensPrompt(prompt_token_ids=prompt_raw)
    )
    if mm_data is not None:
        prompt["multi_modal_data"] = mm_data
    if mm_uuids is not None:
        prompt["multi_modal_uuids"] = mm_uuids

    return conversation, prompt  # type: ignore[return-value]

render_messages_async `async` ¶

render_messages_async(
    messages: list[ChatCompletionMessageParam],
    chat_template_content_format: ChatTemplateContentFormatOption = "auto",
    **kwargs,
) -> tuple[
    list[ConversationMessage], TextPrompt | TokensPrompt
]

Source code in vllm/renderers/hf.py

async def render_messages_async(
    self,
    messages: list[ChatCompletionMessageParam],
    chat_template_content_format: ChatTemplateContentFormatOption = "auto",
    **kwargs,
) -> tuple[list[ConversationMessage], TextPrompt | TokensPrompt]:
    model_config = self.config
    tokenizer = self.get_tokenizer()

    conversation, mm_data, mm_uuids = await parse_chat_messages_async(
        messages,
        model_config,
        content_format=resolve_chat_template_content_format(
            chat_template=kwargs.get("chat_template"),
            tools=kwargs.get("tools"),
            given_format=chat_template_content_format,
            tokenizer=tokenizer,
            model_config=model_config,
        ),
    )

    prompt_raw = safe_apply_chat_template(
        model_config,
        tokenizer,
        conversation,
        **kwargs,
    )

    prompt = (
        TextPrompt(prompt=prompt_raw)
        if isinstance(prompt_raw, str)
        else TokensPrompt(prompt_token_ids=prompt_raw)
    )
    if mm_data is not None:
        prompt["multi_modal_data"] = mm_data
    if mm_uuids is not None:
        prompt["multi_modal_uuids"] = mm_uuids

    return conversation, prompt  # type: ignore[return-value]

_detect_content_format `cached` ¶

_detect_content_format(
    chat_template: str,
    *,
    default: ChatTemplateContentFormat,
) -> ChatTemplateContentFormat

Source code in vllm/renderers/hf.py

@lru_cache(maxsize=32)
def _detect_content_format(
    chat_template: str,
    *,
    default: ChatTemplateContentFormat,
) -> ChatTemplateContentFormat:
    jinja_ast = _try_extract_ast(chat_template)
    if jinja_ast is None:
        return default

    try:
        next(_iter_nodes_assign_content_item(jinja_ast))
    except StopIteration:
        return "string"
    except Exception:
        logger.exception("Error when parsing AST of Jinja template")
        return default
    else:
        return "openai"

_get_hf_base_chat_template_params `cached` ¶

_get_hf_base_chat_template_params() -> frozenset[str]

Source code in vllm/renderers/hf.py

@lru_cache
def _get_hf_base_chat_template_params() -> frozenset[str]:
    from transformers import PreTrainedTokenizer

    # Get standard parameters from HuggingFace's base tokenizer class.
    # This dynamically extracts parameters from PreTrainedTokenizer's
    # apply_chat_template method, ensuring compatibility with tokenizers
    # that use **kwargs to receive standard parameters.

    # Read signature from HF's base class - the single source of truth
    base_sig = inspect.signature(PreTrainedTokenizer.apply_chat_template)

    # Exclude VAR_KEYWORD (**kwargs) and VAR_POSITIONAL (*args) placeholders
    return frozenset(
        p.name
        for p in base_sig.parameters.values()
        if p.kind
        not in (inspect.Parameter.VAR_KEYWORD, inspect.Parameter.VAR_POSITIONAL)
    )

_is_attr_access ¶

_is_attr_access(node: Node, varname: str, key: str) -> bool

Source code in vllm/renderers/hf.py

def _is_attr_access(node: jinja2.nodes.Node, varname: str, key: str) -> bool:
    if isinstance(node, jinja2.nodes.Getitem):
        return (
            _is_var_access(node.node, varname)
            and isinstance(node.arg, jinja2.nodes.Const)
            and node.arg.value == key
        )

    if isinstance(node, jinja2.nodes.Getattr):
        return _is_var_access(node.node, varname) and node.attr == key

    return False

_is_var_access ¶

_is_var_access(node: Node, varname: str) -> bool

Source code in vllm/renderers/hf.py

def _is_var_access(node: jinja2.nodes.Node, varname: str) -> bool:
    if isinstance(node, jinja2.nodes.Name):
        return node.ctx == "load" and node.name == varname

    return False

_is_var_or_elems_access ¶

_is_var_or_elems_access(
    node: Node, varname: str, key: str | None = None
) -> bool

Source code in vllm/renderers/hf.py

def _is_var_or_elems_access(
    node: jinja2.nodes.Node,
    varname: str,
    key: str | None = None,
) -> bool:
    if isinstance(node, jinja2.nodes.Filter):
        return node.node is not None and _is_var_or_elems_access(
            node.node, varname, key
        )
    if isinstance(node, jinja2.nodes.Test):
        return _is_var_or_elems_access(node.node, varname, key)

    if isinstance(node, jinja2.nodes.Getitem) and isinstance(
        node.arg, jinja2.nodes.Slice
    ):
        return _is_var_or_elems_access(node.node, varname, key)

    return _is_attr_access(node, varname, key) if key else _is_var_access(node, varname)

_iter_nodes_assign_content_item ¶

_iter_nodes_assign_content_item(root: Node)

Source code in vllm/renderers/hf.py

def _iter_nodes_assign_content_item(root: jinja2.nodes.Node):
    message_varnames = [
        varname for _, varname in _iter_nodes_assign_messages_item(root)
    ]

    # Search for {%- for content in message['content'] -%} loops
    for loop_ast in root.find_all(jinja2.nodes.For):
        loop_iter = loop_ast.iter
        loop_target = loop_ast.target

        for varname in message_varnames:
            if _is_var_or_elems_access(loop_iter, varname, "content"):
                assert isinstance(loop_target, jinja2.nodes.Name)
                yield loop_ast, loop_target.name
                break

_iter_nodes_assign_messages_item ¶

_iter_nodes_assign_messages_item(root: Node)

Source code in vllm/renderers/hf.py

def _iter_nodes_assign_messages_item(root: jinja2.nodes.Node):
    messages_varnames = [
        varname for _, varname in _iter_nodes_assign_var_or_elems(root, "messages")
    ]

    # Search for {%- for message in messages -%} loops
    for loop_ast in root.find_all(jinja2.nodes.For):
        loop_iter = loop_ast.iter
        loop_target = loop_ast.target

        for varname in messages_varnames:
            if _is_var_or_elems_access(loop_iter, varname):
                assert isinstance(loop_target, jinja2.nodes.Name)
                yield loop_ast, loop_target.name
                break

_iter_nodes_assign_var_or_elems ¶

_iter_nodes_assign_var_or_elems(root: Node, varname: str)

Source code in vllm/renderers/hf.py

def _iter_nodes_assign_var_or_elems(root: jinja2.nodes.Node, varname: str):
    # Global variable that is implicitly defined at the root
    yield root, varname

    # Iterative BFS
    related_varnames = deque([varname])
    while related_varnames:
        related_varname = related_varnames.popleft()

        for assign_ast in root.find_all(jinja2.nodes.Assign):
            lhs = assign_ast.target
            rhs = assign_ast.node

            if _is_var_or_elems_access(rhs, related_varname):
                assert isinstance(lhs, jinja2.nodes.Name)
                yield assign_ast, lhs.name

                # Avoid infinite looping for self-assignment
                if lhs.name != related_varname:
                    related_varnames.append(lhs.name)

_log_chat_template_content_format `cached` ¶

_log_chat_template_content_format(
    chat_template: str | None,
    given_format: ChatTemplateContentFormatOption,
    detected_format: ChatTemplateContentFormatOption,
)

Source code in vllm/renderers/hf.py

@lru_cache
def _log_chat_template_content_format(
    chat_template: str | None,  # For caching purposes
    given_format: ChatTemplateContentFormatOption,
    detected_format: ChatTemplateContentFormatOption,
):
    logger.info(
        "Detected the chat template content format to be '%s'. "
        "You can set `--chat-template-content-format` to override this.",
        detected_format,
    )

    if given_format != "auto" and given_format != detected_format:
        logger.warning(
            "You specified `--chat-template-content-format %s` "
            "which is different from the detected format '%s'. "
            "If our automatic detection is incorrect, please consider "
            "opening a GitHub issue so that we can improve it: "
            "https://github.com/vllm-project/vllm/issues/new/choose",
            given_format,
            detected_format,
        )

_resolve_chat_template_content_format ¶

_resolve_chat_template_content_format(
    chat_template: str | None,
    tools: list[dict[str, Any]] | None,
    tokenizer: HfTokenizer,
    *,
    model_config: ModelConfig,
) -> ChatTemplateContentFormat

Source code in vllm/renderers/hf.py

def _resolve_chat_template_content_format(
    chat_template: str | None,
    tools: list[dict[str, Any]] | None,
    tokenizer: HfTokenizer,
    *,
    model_config: "ModelConfig",
) -> ChatTemplateContentFormat:
    resolved_chat_template = resolve_chat_template(
        tokenizer,
        chat_template=chat_template,
        tools=tools,
        model_config=model_config,
    )

    jinja_text = (
        resolved_chat_template
        if isinstance(resolved_chat_template, str)
        else load_chat_template(chat_template, is_literal=True)
    )

    detected_format = (
        "string"
        if jinja_text is None
        else _detect_content_format(jinja_text, default="string")
    )

    return detected_format

_resolve_chat_template_kwargs ¶

_resolve_chat_template_kwargs(
    chat_template: str,
) -> Set[str]

Source code in vllm/renderers/hf.py

def _resolve_chat_template_kwargs(chat_template: str) -> Set[str]:
    env = jinja2.sandbox.ImmutableSandboxedEnvironment(
        trim_blocks=True,
        lstrip_blocks=True,
        extensions=[AssistantTracker, jinja2.ext.loopcontrols],
    )
    parsed_content = env.parse(chat_template)
    template_vars = jinja2.meta.find_undeclared_variables(parsed_content)
    return template_vars

_try_extract_ast ¶

_try_extract_ast(chat_template: str) -> Template | None

Source code in vllm/renderers/hf.py

def _try_extract_ast(chat_template: str) -> jinja2.nodes.Template | None:
    import transformers.utils.chat_template_utils as hf_chat_utils

    try:
        jinja_compiled = hf_chat_utils._compile_jinja_template(chat_template)
        return jinja_compiled.environment.parse(chat_template)
    except Exception:
        logger.exception("Error when compiling Jinja template")
        return None

_try_get_processor_chat_template ¶

_try_get_processor_chat_template(
    tokenizer: HfTokenizer, *, trust_remote_code: bool
) -> str | None

Source code in vllm/renderers/hf.py

def _try_get_processor_chat_template(
    tokenizer: HfTokenizer,
    *,
    trust_remote_code: bool,
) -> str | None:
    cache_key = (tokenizer.name_or_path, trust_remote_code)
    if cache_key in _PROCESSOR_CHAT_TEMPLATES:
        return _PROCESSOR_CHAT_TEMPLATES[cache_key]

    from transformers import (
        PreTrainedTokenizer,
        PreTrainedTokenizerFast,
        ProcessorMixin,
    )

    try:
        processor = cached_get_processor(
            tokenizer.name_or_path,
            processor_cls=(
                PreTrainedTokenizer,
                PreTrainedTokenizerFast,
                ProcessorMixin,
            ),
            trust_remote_code=trust_remote_code,
        )
        if (
            isinstance(processor, ProcessorMixin)
            and hasattr(processor, "chat_template")
            and (chat_template := processor.chat_template) is not None
        ):
            _PROCESSOR_CHAT_TEMPLATES[cache_key] = chat_template
            return chat_template
    except Exception:
        logger.debug(
            "Failed to load AutoProcessor chat template for %s",
            tokenizer.name_or_path,
            exc_info=True,
        )

    _PROCESSOR_CHAT_TEMPLATES[cache_key] = None
    return None

resolve_chat_template ¶

resolve_chat_template(
    tokenizer: HfTokenizer,
    chat_template: str | None,
    tools: list[dict[str, Any]] | None,
    *,
    model_config: ModelConfig,
) -> str | None

Source code in vllm/renderers/hf.py

def resolve_chat_template(
    tokenizer: HfTokenizer,
    chat_template: str | None,
    tools: list[dict[str, Any]] | None,
    *,
    model_config: "ModelConfig",
) -> str | None:
    # 1st priority: The given chat template
    if chat_template is not None:
        return chat_template

    # 2nd priority: AutoProcessor chat template, unless tool calling is enabled
    if tools is None:
        chat_template = _try_get_processor_chat_template(
            tokenizer,
            trust_remote_code=model_config.trust_remote_code,
        )
        if chat_template is not None:
            return chat_template

    # 3rd priority: AutoTokenizer chat template
    try:
        return tokenizer.get_chat_template(chat_template, tools=tools)
    except Exception:
        logger.debug(
            "Failed to load AutoTokenizer chat template for %s",
            tokenizer.name_or_path,
            exc_info=True,
        )

    # 4th priority: Predefined fallbacks
    path = get_chat_template_fallback_path(
        model_type=model_config.hf_config.model_type,
        tokenizer_name_or_path=tokenizer.name_or_path,
    )
    if path is not None:
        logger.info_once(
            "Loading chat template fallback for %s as there isn't one "
            "defined on HF Hub.",
            tokenizer.name_or_path,
        )
        chat_template = load_chat_template(path)
    else:
        logger.debug_once(
            "There is no chat template fallback for %s", tokenizer.name_or_path
        )

    return chat_template

resolve_chat_template_content_format ¶

resolve_chat_template_content_format(
    chat_template: str | None,
    tools: list[dict[str, Any]] | None,
    given_format: ChatTemplateContentFormatOption,
    tokenizer: HfTokenizer,
    *,
    model_config: ModelConfig,
) -> ChatTemplateContentFormat

Source code in vllm/renderers/hf.py

def resolve_chat_template_content_format(
    chat_template: str | None,
    tools: list[dict[str, Any]] | None,
    given_format: ChatTemplateContentFormatOption,
    tokenizer: HfTokenizer,
    *,
    model_config: "ModelConfig",
) -> ChatTemplateContentFormat:
    if given_format != "auto":
        return given_format

    detected_format = _resolve_chat_template_content_format(
        chat_template,
        tools,
        tokenizer,
        model_config=model_config,
    )

    _log_chat_template_content_format(
        chat_template,
        given_format=given_format,
        detected_format=detected_format,
    )

    return detected_format

resolve_chat_template_kwargs ¶

resolve_chat_template_kwargs(
    tokenizer: HfTokenizer,
    chat_template: str,
    chat_template_kwargs: dict[str, Any],
    raise_on_unexpected: bool = True,
) -> dict[str, Any]

Source code in vllm/renderers/hf.py

def resolve_chat_template_kwargs(
    tokenizer: HfTokenizer,
    chat_template: str,
    chat_template_kwargs: dict[str, Any],
    raise_on_unexpected: bool = True,
) -> dict[str, Any]:
    # We exclude chat_template from kwargs here, because
    # chat template has been already resolved at this stage
    unexpected_vars = {"chat_template", "tokenize"}
    if raise_on_unexpected and (
        unexpected_in_kwargs := unexpected_vars & chat_template_kwargs.keys()
    ):
        raise ValueError(
            "Found unexpected chat template kwargs from request: "
            f"{unexpected_in_kwargs}"
        )

    fn_kw = {
        k
        for k in chat_template_kwargs
        if supports_kw(tokenizer.apply_chat_template, k, allow_var_kwargs=False)
    }
    template_vars = _cached_resolve_chat_template_kwargs(chat_template)

    # Allow standard HF parameters even if tokenizer uses **kwargs to receive them
    hf_base_params = _get_hf_base_chat_template_params()

    accept_vars = (fn_kw | template_vars | hf_base_params) - unexpected_vars
    return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars}

safe_apply_chat_template ¶

safe_apply_chat_template(
    model_config: ModelConfig,
    tokenizer: HfTokenizer,
    conversation: list[ConversationMessage],
    *,
    tools: list[dict[str, Any]] | None = None,
    chat_template: str | None = None,
    tokenize: bool = True,
    **kwargs,
) -> str | list[int]

Source code in vllm/renderers/hf.py

def safe_apply_chat_template(
    model_config: "ModelConfig",
    tokenizer: HfTokenizer,
    conversation: list[ConversationMessage],
    *,
    tools: list[dict[str, Any]] | None = None,
    chat_template: str | None = None,
    tokenize: bool = True,
    **kwargs,
) -> str | list[int]:
    chat_template = resolve_chat_template(
        tokenizer,
        chat_template=chat_template,
        tools=tools,
        model_config=model_config,
    )
    if chat_template is None:
        raise ChatTemplateResolutionError(
            "As of transformers v4.44, default chat template is no longer "
            "allowed, so you must provide a chat template if the tokenizer "
            "does not define one."
        )

    resolved_kwargs = resolve_chat_template_kwargs(
        tokenizer=tokenizer,
        chat_template=chat_template,
        chat_template_kwargs=kwargs,
    )

    try:
        return tokenizer.apply_chat_template(
            conversation=conversation,  # type: ignore[arg-type]
            tools=tools,  # type: ignore[arg-type]
            chat_template=chat_template,
            tokenize=tokenize,
            **resolved_kwargs,
        )
    # External library exceptions can sometimes occur despite the framework's
    # internal exception management capabilities.
    except Exception as e:
        # Log and report any library-related exceptions for further
        # investigation.
        logger.exception(
            "An error occurred in `transformers` while applying chat template"
        )
        raise ValueError(str(e)) from e

vllm.renderers.hf ¶

_PROCESSOR_CHAT_TEMPLATES module-attribute ¶

_cached_resolve_chat_template_kwargs module-attribute ¶

logger module-attribute ¶

AssistantTracker ¶

tags class-attribute instance-attribute ¶

parse ¶

HfRenderer ¶

_tokenizer instance-attribute ¶

config instance-attribute ¶

tokenizer property ¶

__init__ ¶

from_config classmethod ¶

get_tokenizer ¶

render_messages ¶

render_messages_async async ¶

_detect_content_format cached ¶

_get_hf_base_chat_template_params cached ¶

_is_attr_access ¶

_is_var_access ¶

_is_var_or_elems_access ¶

_iter_nodes_assign_content_item ¶

_iter_nodes_assign_messages_item ¶

_iter_nodes_assign_var_or_elems ¶

_log_chat_template_content_format cached ¶

_resolve_chat_template_content_format ¶

_resolve_chat_template_kwargs ¶

_try_extract_ast ¶

_try_get_processor_chat_template ¶

resolve_chat_template ¶

resolve_chat_template_content_format ¶

resolve_chat_template_kwargs ¶

safe_apply_chat_template ¶

_PROCESSOR_CHAT_TEMPLATES `module-attribute` ¶

_cached_resolve_chat_template_kwargs `module-attribute` ¶

logger `module-attribute` ¶

tags `class-attribute` `instance-attribute` ¶

_tokenizer `instance-attribute` ¶

config `instance-attribute` ¶

tokenizer `property` ¶

init ¶

from_config `classmethod` ¶

render_messages_async `async` ¶

_detect_content_format `cached` ¶

_get_hf_base_chat_template_params `cached` ¶

_log_chat_template_content_format `cached` ¶