Skip to content

Commit

Permalink
fix(core): chat-device should be used for checking against model (Tab…
Browse files Browse the repository at this point in the history
…byML#2036)

* fix(core): chat-device should be used for checking against model
downloading.

* update

* fix demo mode

* read from secrets

* update
  • Loading branch information
wsxiaoys authored May 3, 2024
1 parent 5ed1080 commit acec5c8
Show file tree
Hide file tree
Showing 11 changed files with 42 additions and 43 deletions.
5 changes: 2 additions & 3 deletions crates/tabby/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@ authors.workspace = true
homepage.workspace = true

[features]
default = ["ee", "experimental-http"]
default = ["ee"]
ee = ["dep:tabby-webserver"]
cuda = ["llama-cpp-bindings/cuda"]
rocm = ["llama-cpp-bindings/rocm"]
vulkan = ["llama-cpp-bindings/vulkan"]
experimental-http = ["dep:http-api-bindings"]
# If compiling on a system without OpenSSL installed, or cross-compiling for a different
# architecture, enable this feature to compile OpenSSL as part of the build.
# See https://docs.rs/openssl/#vendored for more.
Expand Down Expand Up @@ -43,7 +42,7 @@ tantivy = { workspace = true }
anyhow = { workspace = true }
sysinfo = "0.29.8"
nvml-wrapper = "0.9.0"
http-api-bindings = { path = "../http-api-bindings", optional = true } # included when build with `experimental-http` feature
http-api-bindings = { path = "../http-api-bindings" }
async-stream = { workspace = true }
minijinja = { version = "1.0.8", features = ["loader"] }
textdistance = "1.0.2"
Expand Down
1 change: 0 additions & 1 deletion crates/tabby/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ pub enum Device {
#[strum(serialize = "vulkan")]
Vulkan,

#[cfg(feature = "experimental-http")]
#[strum(serialize = "experimental_http")]
#[clap(hide = true)]
ExperimentalHttp,
Expand Down
20 changes: 9 additions & 11 deletions crates/tabby/src/serve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,6 @@ pub struct ServeArgs {
}

pub async fn main(config: &Config, args: &ServeArgs) {
#[cfg(feature = "experimental-http")]
if args.device == Device::ExperimentalHttp {
tracing::warn!("HTTP device is unstable and does not comply with semver expectations.");
} else {
load_model(args).await;
}
#[cfg(not(feature = "experimental-http"))]
load_model(args).await;

info!("Starting server, this might take a few minutes...");
Expand Down Expand Up @@ -175,12 +168,17 @@ pub async fn main(config: &Config, args: &ServeArgs) {
}

async fn load_model(args: &ServeArgs) {
if let Some(model) = &args.model {
download_model_if_needed(model).await;
if args.device != Device::ExperimentalHttp {
if let Some(model) = &args.model {
download_model_if_needed(model).await;
}
}

if let Some(chat_model) = &args.chat_model {
download_model_if_needed(chat_model).await
let chat_device = args.chat_device.as_ref().unwrap_or(&args.device);
if chat_device != &Device::ExperimentalHttp {
if let Some(chat_model) = &args.chat_model {
download_model_if_needed(chat_model).await
}
}
}

Expand Down
2 changes: 0 additions & 2 deletions crates/tabby/src/services/model/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ pub async fn load_chat_completion(
device: &Device,
parallelism: u8,
) -> Arc<dyn ChatCompletionStream> {
#[cfg(feature = "experimental-http")]
if device == &Device::ExperimentalHttp {
return http_api_bindings::create_chat(model_id);
}
Expand Down Expand Up @@ -47,7 +46,6 @@ async fn load_completion(
device: &Device,
parallelism: u8,
) -> (Arc<dyn CompletionStream>, PromptInfo) {
#[cfg(feature = "experimental-http")]
if device == &Device::ExperimentalHttp {
let (engine, prompt_template, chat_template) = http_api_bindings::create(model_id);
return (
Expand Down
2 changes: 1 addition & 1 deletion ee/tabby-schema/src/env.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
pub fn demo_mode() -> bool {
pub fn is_demo_mode() -> bool {
std::env::var("TABBY_WEBSERVER_DEMO_MODE").is_ok()
}
2 changes: 1 addition & 1 deletion ee/tabby-schema/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ mod schema;

pub mod juniper;
pub use dao::*;
pub use env::demo_mode;
pub use env::is_demo_mode;
pub use schema::*;

#[macro_export]
Expand Down
2 changes: 1 addition & 1 deletion ee/tabby-schema/src/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ impl Query {
is_chat_enabled: ctx.locator.worker().is_chat_enabled().await?,
is_email_configured: ctx.locator.email().read_setting().await?.is_some(),
allow_self_signup: ctx.locator.auth().allow_self_signup().await?,
is_demo_mode: env::demo_mode(),
is_demo_mode: env::is_demo_mode(),
})
}

Expand Down
10 changes: 5 additions & 5 deletions ee/tabby-webserver/src/service/auth.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ use tabby_schema::{
OAuthResponse, RefreshTokenResponse, RegisterResponse, RequestInvitationInput,
TokenAuthResponse, UpdateOAuthCredentialInput, User,
},
demo_mode,
email::EmailService,
is_demo_mode,
license::{LicenseInfo, LicenseService},
setting::SettingService,
AsID, AsRowid, CoreError, DbEnum, Result,
Expand Down Expand Up @@ -63,7 +63,7 @@ impl AuthenticationService for AuthenticationServiceImpl {
invitation_code: Option<String>,
) -> Result<RegisterResponse> {
let is_admin_initialized = self.is_admin_initialized().await?;
if is_admin_initialized && demo_mode() {
if is_admin_initialized && is_demo_mode() {
bail!("Registering new users is disabled in demo mode");
}
let invitation =
Expand Down Expand Up @@ -166,7 +166,7 @@ impl AuthenticationService for AuthenticationServiceImpl {
old_password: Option<&str>,
new_password: &str,
) -> Result<()> {
if demo_mode() {
if is_demo_mode() {
bail!("Changing passwords is disabled in demo mode");
}

Expand Down Expand Up @@ -322,7 +322,7 @@ impl AuthenticationService for AuthenticationServiceImpl {
}

async fn create_invitation(&self, email: String) -> Result<Invitation> {
if demo_mode() {
if is_demo_mode() {
bail!("Inviting users is disabled in demo mode");
}
let license = self.license.read().await?;
Expand Down Expand Up @@ -523,7 +523,7 @@ async fn get_or_create_oauth_user(
.map_err(|x| OAuthError::Other(x.into()))?
.can_register_without_invitation(email)
{
if demo_mode() {
if is_demo_mode() {
bail!("Registering new users is disabled in demo mode");
}
// it's ok to set password to null here, because
Expand Down
6 changes: 3 additions & 3 deletions ee/tabby-webserver/src/service/license.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use lazy_static::lazy_static;
use serde::Deserialize;
use tabby_db::DbConn;
use tabby_schema::{
demo_mode,
is_demo_mode,
license::{LicenseInfo, LicenseService, LicenseStatus, LicenseType},
Result,
};
Expand Down Expand Up @@ -129,7 +129,7 @@ fn license_info_from_raw(raw: LicenseJWTPayload, seats_used: usize) -> Result<Li
#[async_trait]
impl LicenseService for LicenseServiceImpl {
async fn read(&self) -> Result<LicenseInfo> {
if demo_mode() {
if is_demo_mode() {
return self.make_demo_license().await;
}

Expand All @@ -145,7 +145,7 @@ impl LicenseService for LicenseServiceImpl {
}

async fn update(&self, license: String) -> Result<()> {
if demo_mode() {
if is_demo_mode() {
bail!("Modifying license is disabled in demo mode");
}

Expand Down
17 changes: 4 additions & 13 deletions ee/tabby-webserver/src/service/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ use tabby_db::DbConn;
use tabby_schema::{
analytic::AnalyticService,
auth::AuthenticationService,
demo_mode,
email::EmailService,
is_demo_mode,
job::JobService,
license::{IsLicenseValid, LicenseService},
repository::RepositoryService,
Expand Down Expand Up @@ -116,13 +116,6 @@ impl ServerContext {
/// Returns whether a request is authorized to access the content, and the user ID if authentication was used.
async fn authorize_request(&self, uri: &Uri, headers: &HeaderMap) -> (bool, Option<ID>) {
let path = uri.path();
if demo_mode()
&& (path.starts_with("/v1/completions")
|| path.starts_with("/v1/chat/completions")
|| path.starts_with("/v1beta/chat/completions"))
{
return (false, None);
}
if !(path.starts_with("/v1/") || path.starts_with("/v1beta/")) {
return (true, None);
}
Expand All @@ -146,12 +139,10 @@ impl ServerContext {
}

let is_license_valid = self.license.read().await.ensure_valid_license().is_ok();
let requires_owner = !is_license_valid || is_demo_mode();

// If there's no valid license, only allows owner access.
match self
.db_conn
.verify_auth_token(token, !is_license_valid)
.await
{
match self.db_conn.verify_auth_token(token, requires_owner).await {
Ok(id) => (true, Some(id.as_id())),
Err(_) => (false, None),
}
Expand Down
18 changes: 16 additions & 2 deletions experimental/demo/app.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from modal import Image, Stub, gpu, asgi_app, Volume
from modal import Image, Stub, gpu, asgi_app, Volume, Secret

IMAGE_NAME = os.environ.get("TABBY_IMAGE", "tabbyml/tabby")

Expand All @@ -21,10 +21,12 @@
container_idle_timeout=600*2,
timeout=600,
volumes = {"/data": volume},
_allow_background_volume_commits=True
_allow_background_volume_commits=True,
secrets=[Secret.from_name("deepseek-openapi-key")]
)
@asgi_app()
def entry():
import json
import socket
import subprocess
import time
Expand All @@ -34,12 +36,24 @@ def entry():
env = os.environ.copy()
env["TABBY_DISABLE_USAGE_COLLECTION"] = "1"
env["TABBY_WEBSERVER_DEMO_MODE"] = "1"

chat_model = dict(
kind="openai-chat",
model_name="deepseek-coder",
api_endpoint="https://api.deepseek.com/v1",
api_key=env.get("OPENAI_API_KEY", ""),
)

launcher = subprocess.Popen(
[
"/opt/tabby/bin/tabby-cpu",
"serve",
"--port",
"8000",
"--chat-device",
"experimental-http"
"--chat-model",
json.dumps(chat_model),
],
env=env
)
Expand Down

0 comments on commit acec5c8

Please sign in to comment.