From 79f440f302fb283bbfebdd4b9d3cc20c3159cb0a Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sat, 25 May 2024 14:43:35 -0700
Subject: [PATCH] refac: byebye litellm

---
 .env.example                                  |   6 +-
 Dockerfile                                    |  13 +-
 backend/apps/litellm/main.py                  | 388 ------------------
 backend/apps/web/models/modelfiles.py         | 144 -------
 backend/config.py                             |  21 +-
 backend/main.py                               |  18 -
 backend/requirements.txt                      |   2 -
 backend/space/litellm_config.yaml             |  43 --
 backend/start.sh                              |   5 -
 src/lib/apis/litellm/index.ts                 | 151 -------
 src/lib/components/chat/Chat.svelte           |  11 +-
 .../components/workspace/Playground.svelte    |  19 +-
 src/lib/constants.ts                          |   1 -
 13 files changed, 14 insertions(+), 808 deletions(-)
 delete mode 100644 backend/apps/litellm/main.py
 delete mode 100644 backend/apps/web/models/modelfiles.py
 delete mode 100644 backend/space/litellm_config.yaml
 delete mode 100644 src/lib/apis/litellm/index.ts

diff --git a/.env.example b/.env.example
index 2d782fce1..c38bf88bf 100644
--- a/.env.example
+++ b/.env.example
@@ -10,8 +10,4 @@ OPENAI_API_KEY=''
 # DO NOT TRACK
 SCARF_NO_ANALYTICS=true
 DO_NOT_TRACK=true
-ANONYMIZED_TELEMETRY=false
-
-# Use locally bundled version of the LiteLLM cost map json
-# to avoid repetitive startup connections
-LITELLM_LOCAL_MODEL_COST_MAP="True"
\ No newline at end of file
+ANONYMIZED_TELEMETRY=false
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index c2c42aa17..52987b5a6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -59,11 +59,6 @@ ENV OPENAI_API_KEY="" \
     DO_NOT_TRACK=true \
     ANONYMIZED_TELEMETRY=false
 
-# Use locally bundled version of the LiteLLM cost map json
-# to avoid repetitive startup connections
-ENV LITELLM_LOCAL_MODEL_COST_MAP="True"
-
-
 #### Other models #########################################################
 ## whisper TTS model settings ##
 ENV WHISPER_MODEL="base" \
@@ -83,10 +78,10 @@ WORKDIR /app/backend
 ENV HOME /root
 # Create user and group if not root
 RUN if [ $UID -ne 0 ]; then \
-      if [ $GID -ne 0 ]; then \
-        addgroup --gid $GID app; \
-      fi; \
-      adduser --uid $UID --gid $GID --home $HOME --disabled-password --no-create-home app; \
+    if [ $GID -ne 0 ]; then \
+    addgroup --gid $GID app; \
+    fi; \
+    adduser --uid $UID --gid $GID --home $HOME --disabled-password --no-create-home app; \
     fi
 
 RUN mkdir -p $HOME/.cache/chroma
diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
deleted file mode 100644
index 2b771d5c6..000000000
--- a/backend/apps/litellm/main.py
+++ /dev/null
@@ -1,388 +0,0 @@
-import sys
-from contextlib import asynccontextmanager
-
-from fastapi import FastAPI, Depends, HTTPException
-from fastapi.routing import APIRoute
-from fastapi.middleware.cors import CORSMiddleware
-
-import logging
-from fastapi import FastAPI, Request, Depends, status, Response
-from fastapi.responses import JSONResponse
-
-from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
-from starlette.responses import StreamingResponse
-import json
-import time
-import requests
-
-from pydantic import BaseModel, ConfigDict
-from typing import Optional, List
-
-from apps.web.models.models import Models
-from utils.utils import get_verified_user, get_current_user, get_admin_user
-from config import SRC_LOG_LEVELS
-from constants import MESSAGES
-
-import os
-
-log = logging.getLogger(__name__)
-log.setLevel(SRC_LOG_LEVELS["LITELLM"])
-
-
-from config import (
-    ENABLE_LITELLM,
-    ENABLE_MODEL_FILTER,
-    MODEL_FILTER_LIST,
-    DATA_DIR,
-    LITELLM_PROXY_PORT,
-    LITELLM_PROXY_HOST,
-)
-
-import warnings
-
-warnings.simplefilter("ignore")
-
-from litellm.utils import get_llm_provider
-
-import asyncio
-import subprocess
-import yaml
-
-
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    log.info("startup_event")
-    # TODO: Check config.yaml file and create one
-    asyncio.create_task(start_litellm_background())
-    yield
-
-
-app = FastAPI(lifespan=lifespan)
-
-origins = ["*"]
-
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=origins,
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-
-LITELLM_CONFIG_DIR = f"{DATA_DIR}/litellm/config.yaml"
-
-with open(LITELLM_CONFIG_DIR, "r") as file:
-    litellm_config = yaml.safe_load(file)
-
-
-app.state.ENABLE_MODEL_FILTER = ENABLE_MODEL_FILTER.value
-app.state.MODEL_FILTER_LIST = MODEL_FILTER_LIST.value
-app.state.MODEL_CONFIG = Models.get_all_models()
-
-app.state.ENABLE = ENABLE_LITELLM
-app.state.CONFIG = litellm_config
-
-# Global variable to store the subprocess reference
-background_process = None
-
-CONFLICT_ENV_VARS = [
-    # Uvicorn uses PORT, so LiteLLM might use it as well
-    "PORT",
-    # LiteLLM uses DATABASE_URL for Prisma connections
-    "DATABASE_URL",
-]
-
-
-async def run_background_process(command):
-    global background_process
-    log.info("run_background_process")
-
-    try:
-        # Log the command to be executed
-        log.info(f"Executing command: {command}")
-        # Filter environment variables known to conflict with litellm
-        env = {k: v for k, v in os.environ.items() if k not in CONFLICT_ENV_VARS}
-        # Execute the command and create a subprocess
-        process = await asyncio.create_subprocess_exec(
-            *command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env
-        )
-        background_process = process
-        log.info("Subprocess started successfully.")
-
-        # Capture STDERR for debugging purposes
-        stderr_output = await process.stderr.read()
-        stderr_text = stderr_output.decode().strip()
-        if stderr_text:
-            log.info(f"Subprocess STDERR: {stderr_text}")
-
-        # log.info output line by line
-        async for line in process.stdout:
-            log.info(line.decode().strip())
-
-        # Wait for the process to finish
-        returncode = await process.wait()
-        log.info(f"Subprocess exited with return code {returncode}")
-    except Exception as e:
-        log.error(f"Failed to start subprocess: {e}")
-        raise  # Optionally re-raise the exception if you want it to propagate
-
-
-async def start_litellm_background():
-    log.info("start_litellm_background")
-    # Command to run in the background
-    command = [
-        "litellm",
-        "--port",
-        str(LITELLM_PROXY_PORT),
-        "--host",
-        LITELLM_PROXY_HOST,
-        "--telemetry",
-        "False",
-        "--config",
-        LITELLM_CONFIG_DIR,
-    ]
-
-    await run_background_process(command)
-
-
-async def shutdown_litellm_background():
-    log.info("shutdown_litellm_background")
-    global background_process
-    if background_process:
-        background_process.terminate()
-        await background_process.wait()  # Ensure the process has terminated
-        log.info("Subprocess terminated")
-        background_process = None
-
-
-@app.get("/")
-async def get_status():
-    return {"status": True}
-
-
-async def restart_litellm():
-    """
-    Endpoint to restart the litellm background service.
-    """
-    log.info("Requested restart of litellm service.")
-    try:
-        # Shut down the existing process if it is running
-        await shutdown_litellm_background()
-        log.info("litellm service shutdown complete.")
-
-        # Restart the background service
-
-        asyncio.create_task(start_litellm_background())
-        log.info("litellm service restart complete.")
-
-        return {
-            "status": "success",
-            "message": "litellm service restarted successfully.",
-        }
-    except Exception as e:
-        log.info(f"Error restarting litellm service: {e}")
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)
-        )
-
-
-@app.get("/restart")
-async def restart_litellm_handler(user=Depends(get_admin_user)):
-    return await restart_litellm()
-
-
-@app.get("/config")
-async def get_config(user=Depends(get_admin_user)):
-    return app.state.CONFIG
-
-
-class LiteLLMConfigForm(BaseModel):
-    general_settings: Optional[dict] = None
-    litellm_settings: Optional[dict] = None
-    model_list: Optional[List[dict]] = None
-    router_settings: Optional[dict] = None
-
-    model_config = ConfigDict(protected_namespaces=())
-
-
-@app.post("/config/update")
-async def update_config(form_data: LiteLLMConfigForm, user=Depends(get_admin_user)):
-    app.state.CONFIG = form_data.model_dump(exclude_none=True)
-
-    with open(LITELLM_CONFIG_DIR, "w") as file:
-        yaml.dump(app.state.CONFIG, file)
-
-    await restart_litellm()
-    return app.state.CONFIG
-
-
-@app.get("/models")
-@app.get("/v1/models")
-async def get_models(user=Depends(get_current_user)):
-
-    if app.state.ENABLE:
-        while not background_process:
-            await asyncio.sleep(0.1)
-
-        url = f"http://localhost:{LITELLM_PROXY_PORT}/v1"
-        r = None
-        try:
-            r = requests.request(method="GET", url=f"{url}/models")
-            r.raise_for_status()
-
-            data = r.json()
-
-            if app.state.ENABLE_MODEL_FILTER:
-                if user and user.role == "user":
-                    data["data"] = list(
-                        filter(
-                            lambda model: model["id"] in app.state.MODEL_FILTER_LIST,
-                            data["data"],
-                        )
-                    )
-
-            return data
-        except Exception as e:
-
-            log.exception(e)
-            error_detail = "Open WebUI: Server Connection Error"
-            if r is not None:
-                try:
-                    res = r.json()
-                    if "error" in res:
-                        error_detail = f"External: {res['error']}"
-                except:
-                    error_detail = f"External: {e}"
-
-            return {
-                "data": [
-                    {
-                        "id": model["model_name"],
-                        "object": "model",
-                        "created": int(time.time()),
-                        "owned_by": "openai",
-                        "custom_info": next(
-                            (
-                                item
-                                for item in app.state.MODEL_CONFIG
-                                if item.id == model["model_name"]
-                            ),
-                            None,
-                        ),
-                    }
-                    for model in app.state.CONFIG["model_list"]
-                ],
-                "object": "list",
-            }
-    else:
-        return {
-            "data": [],
-            "object": "list",
-        }
-
-
-@app.get("/model/info")
-async def get_model_list(user=Depends(get_admin_user)):
-    return {"data": app.state.CONFIG["model_list"]}
-
-
-class AddLiteLLMModelForm(BaseModel):
-    model_name: str
-    litellm_params: dict
-
-    model_config = ConfigDict(protected_namespaces=())
-
-
-@app.post("/model/new")
-async def add_model_to_config(
-    form_data: AddLiteLLMModelForm, user=Depends(get_admin_user)
-):
-    try:
-        get_llm_provider(model=form_data.model_name)
-        app.state.CONFIG["model_list"].append(form_data.model_dump())
-
-        with open(LITELLM_CONFIG_DIR, "w") as file:
-            yaml.dump(app.state.CONFIG, file)
-
-        await restart_litellm()
-
-        return {"message": MESSAGES.MODEL_ADDED(form_data.model_name)}
-    except Exception as e:
-        print(e)
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)
-        )
-
-
-class DeleteLiteLLMModelForm(BaseModel):
-    id: str
-
-
-@app.post("/model/delete")
-async def delete_model_from_config(
-    form_data: DeleteLiteLLMModelForm, user=Depends(get_admin_user)
-):
-    app.state.CONFIG["model_list"] = [
-        model
-        for model in app.state.CONFIG["model_list"]
-        if model["model_name"] != form_data.id
-    ]
-
-    with open(LITELLM_CONFIG_DIR, "w") as file:
-        yaml.dump(app.state.CONFIG, file)
-
-    await restart_litellm()
-
-    return {"message": MESSAGES.MODEL_DELETED(form_data.id)}
-
-
-@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
-async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
-    body = await request.body()
-
-    url = f"http://localhost:{LITELLM_PROXY_PORT}"
-
-    target_url = f"{url}/{path}"
-
-    headers = {}
-    # headers["Authorization"] = f"Bearer {key}"
-    headers["Content-Type"] = "application/json"
-
-    r = None
-
-    try:
-        r = requests.request(
-            method=request.method,
-            url=target_url,
-            data=body,
-            headers=headers,
-            stream=True,
-        )
-
-        r.raise_for_status()
-
-        # Check if response is SSE
-        if "text/event-stream" in r.headers.get("Content-Type", ""):
-            return StreamingResponse(
-                r.iter_content(chunk_size=8192),
-                status_code=r.status_code,
-                headers=dict(r.headers),
-            )
-        else:
-            response_data = r.json()
-            return response_data
-    except Exception as e:
-        log.exception(e)
-        error_detail = "Open WebUI: Server Connection Error"
-        if r is not None:
-            try:
-                res = r.json()
-                if "error" in res:
-                    error_detail = f"External: {res['error']['message'] if 'message' in res['error'] else res['error']}"
-            except:
-                error_detail = f"External: {e}"
-
-        raise HTTPException(
-            status_code=r.status_code if r else 500, detail=error_detail
-        )
diff --git a/backend/apps/web/models/modelfiles.py b/backend/apps/web/models/modelfiles.py
deleted file mode 100644
index fe278ed5f..000000000
--- a/backend/apps/web/models/modelfiles.py
+++ /dev/null
@@ -1,144 +0,0 @@
-################################################################################
-#                              DEPRECATION NOTICE                              #
-#                                                                              #
-# This file has been deprecated since version 0.2.0.                           #
-#                                                                              #
-################################################################################
-
-
-from pydantic import BaseModel
-from peewee import *
-from playhouse.shortcuts import model_to_dict
-from typing import List, Union, Optional
-import time
-
-from utils.utils import decode_token
-from utils.misc import get_gravatar_url
-
-from apps.web.internal.db import DB
-
-import json
-
-####################
-# Modelfile DB Schema
-####################
-
-
-class Modelfile(Model):
-    tag_name = CharField(unique=True)
-    user_id = CharField()
-    modelfile = TextField()
-    timestamp = BigIntegerField()
-
-    class Meta:
-        database = DB
-
-
-class ModelfileModel(BaseModel):
-    tag_name: str
-    user_id: str
-    modelfile: str
-    timestamp: int  # timestamp in epoch
-
-
-####################
-# Forms
-####################
-
-
-class ModelfileForm(BaseModel):
-    modelfile: dict
-
-
-class ModelfileTagNameForm(BaseModel):
-    tag_name: str
-
-
-class ModelfileUpdateForm(ModelfileForm, ModelfileTagNameForm):
-    pass
-
-
-class ModelfileResponse(BaseModel):
-    tag_name: str
-    user_id: str
-    modelfile: dict
-    timestamp: int  # timestamp in epoch
-
-
-class ModelfilesTable:
-
-    def __init__(self, db):
-        self.db = db
-        self.db.create_tables([Modelfile])
-
-    def insert_new_modelfile(
-        self, user_id: str, form_data: ModelfileForm
-    ) -> Optional[ModelfileModel]:
-        if "tagName" in form_data.modelfile:
-            modelfile = ModelfileModel(
-                **{
-                    "user_id": user_id,
-                    "tag_name": form_data.modelfile["tagName"],
-                    "modelfile": json.dumps(form_data.modelfile),
-                    "timestamp": int(time.time()),
-                }
-            )
-
-            try:
-                result = Modelfile.create(**modelfile.model_dump())
-                if result:
-                    return modelfile
-                else:
-                    return None
-            except:
-                return None
-
-        else:
-            return None
-
-    def get_modelfile_by_tag_name(self, tag_name: str) -> Optional[ModelfileModel]:
-        try:
-            modelfile = Modelfile.get(Modelfile.tag_name == tag_name)
-            return ModelfileModel(**model_to_dict(modelfile))
-        except:
-            return None
-
-    def get_modelfiles(self, skip: int = 0, limit: int = 50) -> List[ModelfileResponse]:
-        return [
-            ModelfileResponse(
-                **{
-                    **model_to_dict(modelfile),
-                    "modelfile": json.loads(modelfile.modelfile),
-                }
-            )
-            for modelfile in Modelfile.select()
-            # .limit(limit).offset(skip)
-        ]
-
-    def update_modelfile_by_tag_name(
-        self, tag_name: str, modelfile: dict
-    ) -> Optional[ModelfileModel]:
-        try:
-            query = Modelfile.update(
-                modelfile=json.dumps(modelfile),
-                timestamp=int(time.time()),
-            ).where(Modelfile.tag_name == tag_name)
-
-            query.execute()
-
-            modelfile = Modelfile.get(Modelfile.tag_name == tag_name)
-            return ModelfileModel(**model_to_dict(modelfile))
-        except:
-            return None
-
-    def delete_modelfile_by_tag_name(self, tag_name: str) -> bool:
-        try:
-            query = Modelfile.delete().where((Modelfile.tag_name == tag_name))
-            query.execute()  # Remove the rows, return number of rows removed.
-
-            return True
-        except:
-            return False
-
-
-Modelfiles = ModelfilesTable(DB)
diff --git a/backend/config.py b/backend/config.py
index 0b18eab43..81f8afcad 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -56,7 +56,6 @@ log_sources = [
     "CONFIG",
     "DB",
     "IMAGES",
-    "LITELLM",
     "MAIN",
     "MODELS",
     "OLLAMA",
@@ -374,10 +373,10 @@ def create_config_file(file_path):
 
 LITELLM_CONFIG_PATH = f"{DATA_DIR}/litellm/config.yaml"
 
-if not os.path.exists(LITELLM_CONFIG_PATH):
-    log.info("Config file doesn't exist. Creating...")
-    create_config_file(LITELLM_CONFIG_PATH)
-    log.info("Config file created successfully.")
+# if not os.path.exists(LITELLM_CONFIG_PATH):
+#     log.info("Config file doesn't exist. Creating...")
+#     create_config_file(LITELLM_CONFIG_PATH)
+#     log.info("Config file created successfully.")
 
 
 ####################################
@@ -826,18 +825,6 @@ AUDIO_OPENAI_API_VOICE = PersistentConfig(
     os.getenv("AUDIO_OPENAI_API_VOICE", "alloy"),
 )
 
-####################################
-# LiteLLM
-####################################
-
-
-ENABLE_LITELLM = os.environ.get("ENABLE_LITELLM", "True").lower() == "true"
-
-LITELLM_PROXY_PORT = int(os.getenv("LITELLM_PROXY_PORT", "14365"))
-if LITELLM_PROXY_PORT < 0 or LITELLM_PROXY_PORT > 65535:
-    raise ValueError("Invalid port number for LITELLM_PROXY_PORT")
-LITELLM_PROXY_HOST = os.getenv("LITELLM_PROXY_HOST", "127.0.0.1")
-
 
 ####################################
 # Database
diff --git a/backend/main.py b/backend/main.py
index aa3004865..d9ebe922c 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -22,13 +22,6 @@ from starlette.responses import StreamingResponse, Response
 from apps.ollama.main import app as ollama_app, get_all_models as get_ollama_models
 from apps.openai.main import app as openai_app, get_all_models as get_openai_models
 
-from apps.litellm.main import (
-    app as litellm_app,
-    start_litellm_background,
-    shutdown_litellm_background,
-)
-
-
 from apps.audio.main import app as audio_app
 from apps.images.main import app as images_app
 from apps.rag.main import app as rag_app
@@ -55,7 +48,6 @@ from config import (
     STATIC_DIR,
     ENABLE_OPENAI_API,
     ENABLE_OLLAMA_API,
-    ENABLE_LITELLM,
     ENABLE_MODEL_FILTER,
     MODEL_FILTER_LIST,
     GLOBAL_LOG_LEVEL,
@@ -100,11 +92,7 @@ https://github.com/open-webui/open-webui
 
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    if ENABLE_LITELLM:
-        asyncio.create_task(start_litellm_background())
     yield
-    if ENABLE_LITELLM:
-        await shutdown_litellm_background()
 
 
 app = FastAPI(
@@ -262,9 +250,6 @@ async def update_embedding_function(request: Request, call_next):
     return response
 
 
-# TODO: Deprecate LiteLLM
-app.mount("/litellm/api", litellm_app)
-
 app.mount("/ollama", ollama_app)
 app.mount("/openai", openai_app)
 
@@ -407,9 +392,6 @@ async def update_model_filter_config(
     openai_app.state.config.ENABLE_MODEL_FILTER = app.state.config.ENABLE_MODEL_FILTER
     openai_app.state.config.MODEL_FILTER_LIST = app.state.config.MODEL_FILTER_LIST
 
-    litellm_app.state.ENABLE_MODEL_FILTER = app.state.config.ENABLE_MODEL_FILTER
-    litellm_app.state.MODEL_FILTER_LIST = app.state.config.MODEL_FILTER_LIST
-
     return {
         "enabled": app.state.config.ENABLE_MODEL_FILTER,
         "models": app.state.config.MODEL_FILTER_LIST,
diff --git a/backend/requirements.txt b/backend/requirements.txt
index 29e37f8b8..7a3668428 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -18,8 +18,6 @@ psycopg2-binary==2.9.9
 PyMySQL==1.1.1
 bcrypt==4.1.3
 
-litellm[proxy]==1.37.20
-
 boto3==1.34.110
 
 argon2-cffi==23.1.0
diff --git a/backend/space/litellm_config.yaml b/backend/space/litellm_config.yaml
deleted file mode 100644
index af4f880b9..000000000
--- a/backend/space/litellm_config.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-litellm_settings:
-  drop_params: true
-model_list:
-  - model_name: 'HuggingFace: Mistral: Mistral 7B Instruct v0.1'
-    litellm_params:
-      model: huggingface/mistralai/Mistral-7B-Instruct-v0.1
-      api_key: os.environ/HF_TOKEN
-      max_tokens: 1024
-  - model_name: 'HuggingFace: Mistral: Mistral 7B Instruct v0.2'
-    litellm_params:
-      model: huggingface/mistralai/Mistral-7B-Instruct-v0.2
-      api_key: os.environ/HF_TOKEN
-      max_tokens: 1024
-  - model_name: 'HuggingFace: Meta: Llama 3 8B Instruct'
-    litellm_params:
-      model: huggingface/meta-llama/Meta-Llama-3-8B-Instruct
-      api_key: os.environ/HF_TOKEN
-      max_tokens: 2047
-  - model_name: 'HuggingFace: Mistral: Mixtral 8x7B Instruct v0.1'
-    litellm_params:
-      model: huggingface/mistralai/Mixtral-8x7B-Instruct-v0.1
-      api_key: os.environ/HF_TOKEN
-      max_tokens: 8192
-  - model_name: 'HuggingFace: Microsoft: Phi-3 Mini-4K-Instruct'
-    litellm_params:
-      model: huggingface/microsoft/Phi-3-mini-4k-instruct
-      api_key: os.environ/HF_TOKEN
-      max_tokens: 1024
-  - model_name: 'HuggingFace: Google: Gemma 7B 1.1'
-    litellm_params:
-      model: huggingface/google/gemma-1.1-7b-it
-      api_key: os.environ/HF_TOKEN
-      max_tokens: 1024
-  - model_name: 'HuggingFace: Yi-1.5 34B Chat'
-    litellm_params:
-      model: huggingface/01-ai/Yi-1.5-34B-Chat
-      api_key: os.environ/HF_TOKEN
-      max_tokens: 1024
-  - model_name: 'HuggingFace: Nous Research: Nous Hermes 2 Mixtral 8x7B DPO'
-    litellm_params:
-      model: huggingface/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO
-      api_key: os.environ/HF_TOKEN
-      max_tokens: 2048
diff --git a/backend/start.sh b/backend/start.sh
index ba7741e1d..15fc568d3 100755
--- a/backend/start.sh
+++ b/backend/start.sh
@@ -34,11 +34,6 @@ fi
 # Check if SPACE_ID is set, if so, configure for space
 if [ -n "$SPACE_ID" ]; then
   echo "Configuring for HuggingFace Space deployment"
-  
-  # Copy litellm_config.yaml with specified ownership
-  echo "Copying litellm_config.yaml to the desired location with specified ownership..."
-  cp -f ./space/litellm_config.yaml ./data/litellm/config.yaml
-
   if [ -n "$ADMIN_USER_EMAIL" ] && [ -n "$ADMIN_USER_PASSWORD" ]; then
     echo "Admin user configured, creating"
     WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" uvicorn main:app --host "$HOST" --port "$PORT" --forwarded-allow-ips '*' &
diff --git a/src/lib/apis/litellm/index.ts b/src/lib/apis/litellm/index.ts
deleted file mode 100644
index b1c24c5bd..000000000
--- a/src/lib/apis/litellm/index.ts
+++ /dev/null
@@ -1,151 +0,0 @@
-import { LITELLM_API_BASE_URL } from '$lib/constants';
-
-export const getLiteLLMModels = async (token: string = '') => {
-	let error = null;
-
-	const res = await fetch(`${LITELLM_API_BASE_URL}/v1/models`, {
-		method: 'GET',
-		headers: {
-			Accept: 'application/json',
-			'Content-Type': 'application/json',
-			...(token && { authorization: `Bearer ${token}` })
-		}
-	})
-		.then(async (res) => {
-			if (!res.ok) throw await res.json();
-			return res.json();
-		})
-		.catch((err) => {
-			console.log(err);
-			error = `LiteLLM: ${err?.error?.message ?? 'Network Problem'}`;
-			return [];
-		});
-
-	if (error) {
-		throw error;
-	}
-
-	const models = Array.isArray(res) ? res : res?.data ?? null;
-
-	return models
-		? models
-				.map((model) => ({
-					id: model.id,
-					name: model.name ?? model.id,
-					external: true,
-					source: 'LiteLLM',
-					custom_info: model.custom_info
-				}))
-				.sort((a, b) => {
-					return a.name.localeCompare(b.name);
-				})
-		: models;
-};
-
-export const getLiteLLMModelInfo = async (token: string = '') => {
-	let error = null;
-
-	const res = await fetch(`${LITELLM_API_BASE_URL}/model/info`, {
-		method: 'GET',
-		headers: {
-			Accept: 'application/json',
-			'Content-Type': 'application/json',
-			...(token && { authorization: `Bearer ${token}` })
-		}
-	})
-		.then(async (res) => {
-			if (!res.ok) throw await res.json();
-			return res.json();
-		})
-		.catch((err) => {
-			console.log(err);
-			error = `LiteLLM: ${err?.error?.message ?? 'Network Problem'}`;
-			return [];
-		});
-
-	if (error) {
-		throw error;
-	}
-
-	const models = Array.isArray(res) ? res : res?.data ?? null;
-
-	return models;
-};
-
-type AddLiteLLMModelForm = {
-	name: string;
-	model: string;
-	api_base: string;
-	api_key: string;
-	rpm: string;
-	max_tokens: string;
-};
-
-export const addLiteLLMModel = async (token: string = '', payload: AddLiteLLMModelForm) => {
-	let error = null;
-
-	const res = await fetch(`${LITELLM_API_BASE_URL}/model/new`, {
-		method: 'POST',
-		headers: {
-			Accept: 'application/json',
-			'Content-Type': 'application/json',
-			...(token && { authorization: `Bearer ${token}` })
-		},
-		body: JSON.stringify({
-			model_name: payload.name,
-			litellm_params: {
-				model: payload.model,
-				...(payload.api_base === '' ? {} : { api_base: payload.api_base }),
-				...(payload.api_key === '' ? {} : { api_key: payload.api_key }),
-				...(isNaN(parseInt(payload.rpm)) ? {} : { rpm: parseInt(payload.rpm) }),
-				...(payload.max_tokens === '' ? {} : { max_tokens: payload.max_tokens })
-			}
-		})
-	})
-		.then(async (res) => {
-			if (!res.ok) throw await res.json();
-			return res.json();
-		})
-		.catch((err) => {
-			console.log(err);
-			error = `LiteLLM: ${err?.error?.message ?? 'Network Problem'}`;
-			return [];
-		});
-
-	if (error) {
-		throw error;
-	}
-
-	return res;
-};
-
-export const deleteLiteLLMModel = async (token: string = '', id: string) => {
-	let error = null;
-
-	const res = await fetch(`${LITELLM_API_BASE_URL}/model/delete`, {
-		method: 'POST',
-		headers: {
-			Accept: 'application/json',
-			'Content-Type': 'application/json',
-			...(token && { authorization: `Bearer ${token}` })
-		},
-		body: JSON.stringify({
-			id: id
-		})
-	})
-		.then(async (res) => {
-			if (!res.ok) throw await res.json();
-			return res.json();
-		})
-		.catch((err) => {
-			console.log(err);
-			error = `LiteLLM: ${err?.error?.message ?? 'Network Problem'}`;
-			return [];
-		});
-
-	if (error) {
-		throw error;
-	}
-
-	return res;
-};
diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte
index ff025868c..507aa785d 100644
--- a/src/lib/components/chat/Chat.svelte
+++ b/src/lib/components/chat/Chat.svelte
@@ -35,12 +35,7 @@
 	import MessageInput from '$lib/components/chat/MessageInput.svelte';
 	import Messages from '$lib/components/chat/Messages.svelte';
 	import Navbar from '$lib/components/layout/Navbar.svelte';
-	import {
-		LITELLM_API_BASE_URL,
-		OLLAMA_API_BASE_URL,
-		OPENAI_API_BASE_URL,
-		WEBUI_BASE_URL
-	} from '$lib/constants';
+	import { OLLAMA_API_BASE_URL, OPENAI_API_BASE_URL, WEBUI_BASE_URL } from '$lib/constants';
 	import { createOpenAITextStream } from '$lib/apis/streaming';
 	import { queryMemory } from '$lib/apis/memories';
 	import type { Writable } from 'svelte/store';
@@ -733,9 +728,7 @@
 					docs: docs.length > 0 ? docs : undefined,
 					citations: docs.length > 0
 				},
-				model?.source?.toLowerCase() === 'litellm'
-					? `${LITELLM_API_BASE_URL}/v1`
-					: `${OPENAI_API_BASE_URL}`
+				`${OPENAI_API_BASE_URL}`
 			);
 
 			// Wait until history/message have been updated
diff --git a/src/lib/components/workspace/Playground.svelte b/src/lib/components/workspace/Playground.svelte
index 2142b15d6..476ce774d 100644
--- a/src/lib/components/workspace/Playground.svelte
+++ b/src/lib/components/workspace/Playground.svelte
@@ -5,12 +5,7 @@
 
 	import { toast } from 'svelte-sonner';
 
-	import {
-		LITELLM_API_BASE_URL,
-		OLLAMA_API_BASE_URL,
-		OPENAI_API_BASE_URL,
-		WEBUI_API_BASE_URL
-	} from '$lib/constants';
+	import { OLLAMA_API_BASE_URL, OPENAI_API_BASE_URL, WEBUI_API_BASE_URL } from '$lib/constants';
 	import { WEBUI_NAME, config, user, models, settings } from '$lib/stores';
 
 	import { cancelOllamaRequest, generateChatCompletion } from '$lib/apis/ollama';
@@ -79,11 +74,7 @@
 					}
 				]
 			},
-			model.external
-				? model.source === 'litellm'
-					? `${LITELLM_API_BASE_URL}/v1`
-					: `${OPENAI_API_BASE_URL}`
-				: `${OLLAMA_API_BASE_URL}/v1`
+			model?.owned_by === 'openai' ? `${OPENAI_API_BASE_URL}` : `${OLLAMA_API_BASE_URL}/v1`
 		);
 
 		if (res && res.ok) {
@@ -150,11 +141,7 @@
 					...messages
 				].filter((message) => message)
 			},
-			model.external
-				? model.source === 'litellm'
-					? `${LITELLM_API_BASE_URL}/v1`
-					: `${OPENAI_API_BASE_URL}`
-				: `${OLLAMA_API_BASE_URL}/v1`
+			model?.owned_by === 'openai' ? `${OPENAI_API_BASE_URL}` : `${OLLAMA_API_BASE_URL}/v1`
 		);
 
 		let responseMessage;
diff --git a/src/lib/constants.ts b/src/lib/constants.ts
index 3ae424440..9f6070bee 100644
--- a/src/lib/constants.ts
+++ b/src/lib/constants.ts
@@ -6,7 +6,6 @@ export const WEBUI_BASE_URL = browser ? (dev ? `http://${location.hostname}:8080
 
 export const WEBUI_API_BASE_URL = `${WEBUI_BASE_URL}/api/v1`;
 
-export const LITELLM_API_BASE_URL = `${WEBUI_BASE_URL}/litellm/api`;
 export const OLLAMA_API_BASE_URL = `${WEBUI_BASE_URL}/ollama`;
 export const OPENAI_API_BASE_URL = `${WEBUI_BASE_URL}/openai`;
 export const AUDIO_API_BASE_URL = `${WEBUI_BASE_URL}/audio/api/v1`;