diff --git a/experimental/modal-deploy/app.py b/experimental/modal-deploy/app.py
new file mode 100644
index 000000000000..794a80b201a2
--- /dev/null
+++ b/experimental/modal-deploy/app.py
@@ -0,0 +1,84 @@
+"""
+modal serve app.py
+"""
+
+from pathlib import Path
+
+import modal
+from modal import Image, Mount, Secret, Stub, asgi_app, gpu, method
+
+IMAGE_NAME = "tabbyml/tabby:0.3.1"
+MODEL_ID = "TabbyML/StarCoder-1B"
+GPU_CONFIG = gpu.T4()
+
+
+def download_model():
+    import subprocess
+
+    subprocess.run(
+        [
+            "/opt/tabby/bin/tabby",
+            "download",
+            "--model",
+            MODEL_ID,
+        ]
+    )
+
+
+image = (
+    Image.from_registry(
+        "tabbyml/tabby:0.3.1",
+        add_python="3.11",
+    )
+    .dockerfile_commands("ENTRYPOINT []")
+    .run_function(download_model)
+    .pip_install("asgi-proxy-lib")
+)
+
+stub = Stub("tabby-server-" + MODEL_ID.split("/")[-1], image=image)
+
+
+@stub.function(
+    gpu=GPU_CONFIG,
+    allow_concurrent_inputs=10,
+    container_idle_timeout=120,
+    timeout=360,
+)
+@asgi_app()
+def app():
+    import socket
+    import subprocess
+    import time
+    from asgi_proxy import asgi_proxy
+
+    launcher = subprocess.Popen(
+        [
+            "/opt/tabby/bin/tabby",
+            "serve",
+            "--model",
+            MODEL_ID,
+            "--port",
+            "8000",
+            "--device",
+            "cuda",
+        ]
+    )
+
+    # Poll until webserver at 127.0.0.1:8000 accepts connections before running inputs.
+    def webserver_ready():
+        try:
+            socket.create_connection(("127.0.0.1", 8000), timeout=1).close()
+            return True
+        except (socket.timeout, ConnectionRefusedError):
+            # Check if launcher webserving process has exited.
+            # If so, a connection can never be made.
+            retcode = launcher.poll()
+            if retcode is not None:
+                raise RuntimeError(f"launcher exited unexpectedly with code {retcode}")
+            return False
+
+    while not webserver_ready():
+        time.sleep(1.0)
+
+    print("Tabby server ready!")
+    return asgi_proxy("http://localhost:8000")