Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 164 additions & 0 deletions pylock.toml

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,11 @@ dependencies = [

[project.optional-dependencies]
# Meta Extras
all = ["guidellm[perf,openai,audio,vision]"]
recommended = ["guidellm[perf,openai]"]
all = ["guidellm[perf,tokenizers,audio,vision]"]
recommended = ["guidellm[perf,tokenizers]"]
# Feature Extras
perf = ["orjson", "msgpack", "msgspec", "uvloop"]
openai = ["tiktoken>=0.11.0", "blobfile>=3.1.0"]
tokenizers = ["tiktoken", "blobfile", "mistral-common"]
audio = [
# Lowest version with full torchcodec support
"datasets[audio]>=4.1.0",
Expand Down
2 changes: 1 addition & 1 deletion src/guidellm/data/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pathlib import Path
from typing import Any

from transformers import AutoTokenizer, PreTrainedTokenizerBase # type: ignore[import]
from transformers import AutoTokenizer, PreTrainedTokenizerBase

__all__ = ["ProcessorFactory"]

Expand Down
4 changes: 2 additions & 2 deletions src/guidellm/mock_server/handlers/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from sanic import response
from sanic.request import Request
from sanic.response import HTTPResponse
from transformers.tokenization_utils import PreTrainedTokenizer
from transformers import AutoTokenizer

from guidellm.mock_server.config import MockServerConfig
from guidellm.mock_server.models import (
Expand Down Expand Up @@ -55,7 +55,7 @@ def __init__(self, config: MockServerConfig) -> None:
self.tokenizer = (
MockTokenizer()
if config.processor is None
else PreTrainedTokenizer.from_pretrained(config.processor)
else AutoTokenizer.from_pretrained(config.processor)
)

async def tokenize(self, request: Request) -> HTTPResponse:
Expand Down
12 changes: 8 additions & 4 deletions src/guidellm/mock_server/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@
from collections.abc import Generator

from faker import Faker
from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer, TextInput
from transformers.tokenization_utils_base import (
AddedToken,
PreTrainedTokenizerBase,
TextInput,
)

__all__ = [
"MockTokenizer",
Expand All @@ -25,7 +29,7 @@
]


class MockTokenizer(PreTrainedTokenizer):
class MockTokenizer(PreTrainedTokenizerBase):
"""
Mock tokenizer implementation for testing text processing workflows.
Expand Down Expand Up @@ -207,7 +211,7 @@ def decode( # type: ignore[override]

def create_fake_text(
num_tokens: int,
processor: PreTrainedTokenizer,
processor: PreTrainedTokenizerBase,
seed: int = 42,
fake: Faker | None = None,
) -> str:
Expand All @@ -229,7 +233,7 @@ def create_fake_text(

def create_fake_tokens_str(
num_tokens: int,
processor: PreTrainedTokenizer,
processor: PreTrainedTokenizerBase,
seed: int = 42,
fake: Faker | None = None,
) -> list[str]:
Expand Down
Loading