mirror of https://github.com/open-webui/open-webui
Merge branch 'dev'
This commit is contained in:
commit
7354e8ebae
|
@ -8,3 +8,294 @@ node_modules
|
|||
!.env.example
|
||||
vite.config.js.timestamp-*
|
||||
vite.config.ts.timestamp-*
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
lerna-debug.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
*.lcov
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
jspm_packages/
|
||||
|
||||
# Snowpack dependency directory (https://snowpack.dev/)
|
||||
web_modules/
|
||||
|
||||
# TypeScript cache
|
||||
*.tsbuildinfo
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Optional stylelint cache
|
||||
.stylelintcache
|
||||
|
||||
# Microbundle cache
|
||||
.rpt2_cache/
|
||||
.rts2_cache_cjs/
|
||||
.rts2_cache_es/
|
||||
.rts2_cache_umd/
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variable files
|
||||
.env
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
.env.local
|
||||
|
||||
# parcel-bundler cache (https://parceljs.org/)
|
||||
.cache
|
||||
.parcel-cache
|
||||
|
||||
# Next.js build output
|
||||
.next
|
||||
out
|
||||
|
||||
# Nuxt.js build / generate output
|
||||
.nuxt
|
||||
dist
|
||||
|
||||
# Gatsby files
|
||||
.cache/
|
||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||
# https://nextjs.org/blog/next-9-1#public-directory-support
|
||||
# public
|
||||
|
||||
# vuepress build output
|
||||
.vuepress/dist
|
||||
|
||||
# vuepress v2.x temp and cache directory
|
||||
.temp
|
||||
.cache
|
||||
|
||||
# Docusaurus cache and generated files
|
||||
.docusaurus
|
||||
|
||||
# Serverless directories
|
||||
.serverless/
|
||||
|
||||
# FuseBox cache
|
||||
.fusebox/
|
||||
|
||||
# DynamoDB Local files
|
||||
.dynamodb/
|
||||
|
||||
# TernJS port file
|
||||
.tern-port
|
||||
|
||||
# Stores VSCode versions used for testing VSCode extensions
|
||||
.vscode-test
|
||||
|
||||
# yarn v2
|
||||
.yarn/cache
|
||||
.yarn/unplugged
|
||||
.yarn/build-state.yml
|
||||
.yarn/install-state.gz
|
||||
.pnp.*
|
50
README.md
50
README.md
|
@ -27,7 +27,7 @@ Also check our sibling project, [OllamaHub](https://ollamahub.com/), where you c
|
|||
|
||||
- ⚡ **Swift Responsiveness**: Enjoy fast and responsive performance.
|
||||
|
||||
- 🚀 **Effortless Setup**: Install seamlessly using Docker for a hassle-free experience.
|
||||
- 🚀 **Effortless Setup**: Install seamlessly using Docker or Kubernetes (kubectl, kustomize or helm) for a hassle-free experience.
|
||||
|
||||
- 💻 **Code Syntax Highlighting**: Enjoy enhanced code readability with our syntax highlighting feature.
|
||||
|
||||
|
@ -81,30 +81,50 @@ Don't forget to explore our sibling project, [OllamaHub](https://ollamahub.com/)
|
|||
|
||||
### Installing Both Ollama and Ollama Web UI Using Docker Compose
|
||||
|
||||
If you don't have Ollama installed yet, you can use the provided Docker Compose file for a hassle-free installation. Simply run the following command:
|
||||
If you don't have Ollama installed yet, you can use the provided bash script for a hassle-free installation. Simply run the following command:
|
||||
|
||||
For cpu-only container
|
||||
```bash
|
||||
docker compose up -d --build
|
||||
chmod +x run-compose.sh && ./run-compose.sh
|
||||
```
|
||||
|
||||
This command will install both Ollama and Ollama Web UI on your system.
|
||||
|
||||
#### Enable GPU
|
||||
|
||||
Use the additional Docker Compose file designed to enable GPU support by running the following command:
|
||||
|
||||
For gpu-enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html))
|
||||
```bash
|
||||
docker compose -f docker-compose.yml -f docker-compose.gpu.yml up -d --build
|
||||
chmod +x run-compose.sh && ./run-compose.sh --enable-gpu[count=1]
|
||||
```
|
||||
|
||||
#### Expose Ollama API outside the container stack
|
||||
|
||||
Deploy the service with an additional Docker Compose file designed for API exposure:
|
||||
|
||||
Note that both the above commands will use the latest production docker image in repository, to be able to build the latest local version you'll need to append the `--build` parameter, for example:
|
||||
```bash
|
||||
docker compose -f docker-compose.yml -f docker-compose.api.yml up -d --build
|
||||
./run-compose.sh --build --enable-gpu[count=1]
|
||||
```
|
||||
|
||||
### Installing Both Ollama and Ollama Web UI Using Kustomize
|
||||
For cpu-only pod
|
||||
```bash
|
||||
kubectl apply -f ./kubernetes/manifest/base
|
||||
```
|
||||
For gpu-enabled pod
|
||||
```bash
|
||||
kubectl apply -k ./kubernetes/manifest
|
||||
```
|
||||
|
||||
### Installing Both Ollama and Ollama Web UI Using Helm
|
||||
Package Helm file first
|
||||
```bash
|
||||
helm package ./kubernetes/helm/
|
||||
```
|
||||
|
||||
For cpu-only pod
|
||||
```bash
|
||||
helm install ollama-webui ./ollama-webui-*.tgz
|
||||
```
|
||||
For gpu-enabled pod
|
||||
```bash
|
||||
helm install ollama-webui ./ollama-webui-*.tgz --set ollama.resources.limits.nvidia.com/gpu="1"
|
||||
```
|
||||
|
||||
Check the `kubernetes/helm/values.yaml` file to know which parameters are available for customization
|
||||
|
||||
### Installing Ollama Web UI Only
|
||||
|
||||
#### Prerequisites
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
version: '3.8'
|
||||
|
||||
services:
|
||||
ollama:
|
||||
ports:
|
||||
- ${OLLAMA_WEBAPI_PORT-11434}:11434
|
|
@ -0,0 +1,6 @@
|
|||
version: '3.8'
|
||||
|
||||
services:
|
||||
ollama:
|
||||
volumes:
|
||||
- ${OLLAMA_DATA_DIR-./ollama-data}:/root/.ollama
|
|
@ -0,0 +1,12 @@
|
|||
version: '3.8'
|
||||
|
||||
services:
|
||||
ollama:
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: ${OLLAMA_GPU_DRIVER-nvidia}
|
||||
count: ${OLLAMA_GPU_COUNT-1}
|
||||
capabilities:
|
||||
- gpu
|
|
@ -1,4 +1,4 @@
|
|||
version: '3.6'
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
ollama:
|
||||
|
@ -16,14 +16,14 @@ services:
|
|||
args:
|
||||
OLLAMA_API_BASE_URL: '/ollama/api'
|
||||
dockerfile: Dockerfile
|
||||
image: ollama-webui:latest
|
||||
image: ghcr.io/ollama-webui/ollama-webui:main
|
||||
container_name: ollama-webui
|
||||
volumes:
|
||||
- ollama-webui:/app/backend/data
|
||||
depends_on:
|
||||
- ollama
|
||||
ports:
|
||||
- 3000:8080
|
||||
- ${OLLAMA_WEBUI_PORT-3000}:8080
|
||||
environment:
|
||||
- "OLLAMA_API_BASE_URL=http://ollama:11434/api"
|
||||
extra_hosts:
|
|
@ -0,0 +1,5 @@
|
|||
apiVersion: v2
|
||||
name: ollama-webui
|
||||
description: "Ollama Web UI: A User-Friendly Web Interface for Chat Interactions 👋"
|
||||
version: 1.0.0
|
||||
icon: https://raw.githubusercontent.com/ollama-webui/ollama-webui/main/static/favicon.png
|
|
@ -0,0 +1,4 @@
|
|||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: {{ .Values.namespace }}
|
|
@ -0,0 +1,12 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ollama-service
|
||||
namespace: {{ .Values.namespace }}
|
||||
spec:
|
||||
selector:
|
||||
app: ollama
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: {{ .Values.ollama.servicePort }}
|
||||
targetPort: {{ .Values.ollama.servicePort }}
|
|
@ -0,0 +1,38 @@
|
|||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: {{ .Values.namespace }}
|
||||
spec:
|
||||
serviceName: "ollama"
|
||||
replicas: {{ .Values.ollama.replicaCount }}
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ollama
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: ollama
|
||||
spec:
|
||||
containers:
|
||||
- name: ollama
|
||||
image: {{ .Values.ollama.image }}
|
||||
ports:
|
||||
- containerPort: {{ .Values.ollama.servicePort }}
|
||||
resources:
|
||||
limits:
|
||||
cpu: {{ .Values.ollama.resources.limits.cpu }}
|
||||
memory: {{ .Values.ollama.resources.limits.memory }}
|
||||
nvidia.com/gpu: {{ .Values.ollama.resources.limits.gpu }}
|
||||
volumeMounts:
|
||||
- name: ollama-volume
|
||||
mountPath: /root/.ollama
|
||||
tty: true
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: ollama-volume
|
||||
spec:
|
||||
accessModes: [ "ReadWriteOnce" ]
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
|
@ -0,0 +1,28 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: ollama-webui-deployment
|
||||
namespace: {{ .Values.namespace }}
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ollama-webui
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: ollama-webui
|
||||
spec:
|
||||
containers:
|
||||
- name: ollama-webui
|
||||
image: ghcr.io/ollama-webui/ollama-webui:main
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
resources:
|
||||
limits:
|
||||
cpu: "500m"
|
||||
memory: "500Mi"
|
||||
env:
|
||||
- name: OLLAMA_API_BASE_URL
|
||||
value: "http://ollama-service.{{ .Values.namespace }}.svc.cluster.local:{{ .Values.ollama.servicePort }}/api"
|
||||
tty: true
|
|
@ -0,0 +1,20 @@
|
|||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: ollama-webui-ingress
|
||||
namespace: {{ .Values.namespace }}
|
||||
#annotations:
|
||||
# Use appropriate annotations for your Ingress controller, e.g., for NGINX:
|
||||
# nginx.ingress.kubernetes.io/rewrite-target: /
|
||||
spec:
|
||||
rules:
|
||||
- host: {{ .Values.webui.ingress.host }}
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: ollama-webui-service
|
||||
port:
|
||||
number: {{ .Values.webui.servicePort }}
|
|
@ -0,0 +1,15 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ollama-webui-service
|
||||
namespace: {{ .Values.namespace }}
|
||||
spec:
|
||||
type: NodePort # Use LoadBalancer if you're on a cloud that supports it
|
||||
selector:
|
||||
app: ollama-webui
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: {{ .Values.webui.servicePort }}
|
||||
targetPort: {{ .Values.webui.servicePort }}
|
||||
# If using NodePort, you can optionally specify the nodePort:
|
||||
# nodePort: 30000
|
|
@ -0,0 +1,23 @@
|
|||
namespace: ollama-namespace
|
||||
|
||||
ollama:
|
||||
replicaCount: 1
|
||||
image: ollama/ollama:latest
|
||||
servicePort: 11434
|
||||
resources:
|
||||
limits:
|
||||
cpu: "2000m"
|
||||
memory: "2Gi"
|
||||
nvidia.com/gpu: "0"
|
||||
volumeSize: 1Gi
|
||||
|
||||
webui:
|
||||
replicaCount: 1
|
||||
image: ghcr.io/ollama-webui/ollama-webui:main
|
||||
servicePort: 8080
|
||||
resources:
|
||||
limits:
|
||||
cpu: "500m"
|
||||
memory: "500Mi"
|
||||
ingress:
|
||||
host: ollama.minikube.local
|
|
@ -0,0 +1,4 @@
|
|||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: ollama-namespace
|
|
@ -0,0 +1,12 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ollama-service
|
||||
namespace: ollama-namespace
|
||||
spec:
|
||||
selector:
|
||||
app: ollama
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 11434
|
||||
targetPort: 11434
|
|
@ -0,0 +1,37 @@
|
|||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: ollama-namespace
|
||||
spec:
|
||||
serviceName: "ollama"
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ollama
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: ollama
|
||||
spec:
|
||||
containers:
|
||||
- name: ollama
|
||||
image: ollama/ollama:latest
|
||||
ports:
|
||||
- containerPort: 11434
|
||||
resources:
|
||||
limits:
|
||||
cpu: "2000m"
|
||||
memory: "2Gi"
|
||||
volumeMounts:
|
||||
- name: ollama-volume
|
||||
mountPath: /root/.ollama
|
||||
tty: true
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: ollama-volume
|
||||
spec:
|
||||
accessModes: [ "ReadWriteOnce" ]
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
|
@ -0,0 +1,28 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: ollama-webui-deployment
|
||||
namespace: ollama-namespace
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ollama-webui
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: ollama-webui
|
||||
spec:
|
||||
containers:
|
||||
- name: ollama-webui
|
||||
image: ghcr.io/ollama-webui/ollama-webui:main
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
resources:
|
||||
limits:
|
||||
cpu: "500m"
|
||||
memory: "500Mi"
|
||||
env:
|
||||
- name: OLLAMA_API_BASE_URL
|
||||
value: "http://ollama-service.ollama-namespace.svc.cluster.local:11434/api"
|
||||
tty: true
|
|
@ -0,0 +1,20 @@
|
|||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: ollama-webui-ingress
|
||||
namespace: ollama-namespace
|
||||
#annotations:
|
||||
# Use appropriate annotations for your Ingress controller, e.g., for NGINX:
|
||||
# nginx.ingress.kubernetes.io/rewrite-target: /
|
||||
spec:
|
||||
rules:
|
||||
- host: ollama.minikube.local
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: ollama-webui-service
|
||||
port:
|
||||
number: 8080
|
|
@ -0,0 +1,15 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ollama-webui-service
|
||||
namespace: ollama-namespace
|
||||
spec:
|
||||
type: NodePort # Use LoadBalancer if you're on a cloud that supports it
|
||||
selector:
|
||||
app: ollama-webui
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
# If using NodePort, you can optionally specify the nodePort:
|
||||
# nodePort: 30000
|
|
@ -0,0 +1,12 @@
|
|||
resources:
|
||||
- base/ollama-namespace.yaml
|
||||
- base/ollama-service.yaml
|
||||
- base/ollama-statefulset.yaml
|
||||
- base/webui-deployment.yaml
|
||||
- base/webui-service.yaml
|
||||
- base/webui-ingress.yaml
|
||||
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
patches:
|
||||
- path: patches/ollama-statefulset-gpu.yaml
|
|
@ -0,0 +1,17 @@
|
|||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: ollama-namespace
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ollama
|
||||
serviceName: "ollama"
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: ollama
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "1"
|
|
@ -0,0 +1,237 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Define color and formatting codes
|
||||
BOLD='\033[1m'
|
||||
GREEN='\033[1;32m'
|
||||
WHITE='\033[1;37m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m' # No Color
|
||||
# Unicode character for tick mark
|
||||
TICK='\u2713'
|
||||
|
||||
# Detect GPU driver
|
||||
get_gpu_driver() {
|
||||
# Detect NVIDIA GPUs
|
||||
if lspci | grep -i nvidia >/dev/null; then
|
||||
echo "nvidia"
|
||||
return
|
||||
fi
|
||||
|
||||
# Detect AMD GPUs (including GCN architecture check for amdgpu vs radeon)
|
||||
if lspci | grep -i amd >/dev/null; then
|
||||
# List of known GCN and later architecture cards
|
||||
# This is a simplified list, and in a real-world scenario, you'd want a more comprehensive one
|
||||
local gcn_and_later=("Radeon HD 7000" "Radeon HD 8000" "Radeon R5" "Radeon R7" "Radeon R9" "Radeon RX")
|
||||
|
||||
# Get GPU information
|
||||
local gpu_info=$(lspci | grep -i 'vga.*amd')
|
||||
|
||||
for model in "${gcn_and_later[@]}"; do
|
||||
if echo "$gpu_info" | grep -iq "$model"; then
|
||||
echo "amdgpu"
|
||||
return
|
||||
fi
|
||||
done
|
||||
|
||||
# Default to radeon if no GCN or later architecture is detected
|
||||
echo "radeon"
|
||||
return
|
||||
fi
|
||||
|
||||
# Detect Intel GPUs
|
||||
if lspci | grep -i intel >/dev/null; then
|
||||
echo "i915"
|
||||
return
|
||||
fi
|
||||
|
||||
# If no known GPU is detected
|
||||
echo "Unknown or unsupported GPU driver"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Function for rolling animation
|
||||
show_loading() {
|
||||
local spin='-\|/'
|
||||
local i=0
|
||||
|
||||
printf " "
|
||||
|
||||
while kill -0 $1 2>/dev/null; do
|
||||
i=$(( (i+1) %4 ))
|
||||
printf "\b${spin:$i:1}"
|
||||
sleep .1
|
||||
done
|
||||
|
||||
# Replace the spinner with a tick
|
||||
printf "\b${GREEN}${TICK}${NC}"
|
||||
}
|
||||
|
||||
# Usage information
|
||||
usage() {
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo "Options:"
|
||||
echo " --enable-gpu[count=COUNT] Enable GPU support with the specified count."
|
||||
echo " --enable-api[port=PORT] Enable API and expose it on the specified port."
|
||||
echo " --webui[port=PORT] Set the port for the web user interface."
|
||||
echo " --data[folder=PATH] Bind mount for ollama data folder (by default will create the 'ollama' volume)."
|
||||
echo " --build Build the docker image before running the compose project."
|
||||
echo " --drop Drop the compose project."
|
||||
echo " -q, --quiet Run script in headless mode."
|
||||
echo " -h, --help Show this help message."
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " ./$0 --drop"
|
||||
echo " ./$0 --enable-gpu[count=1]"
|
||||
echo " ./$0 --enable-api[port=11435]"
|
||||
echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]"
|
||||
echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]"
|
||||
echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build"
|
||||
echo ""
|
||||
echo "This script configures and runs a docker-compose setup with optional GPU support, API exposure, and web UI configuration."
|
||||
echo "About the gpu to use, the script automatically detects it using the "lspci" command."
|
||||
echo "In this case the gpu detected is: $(get_gpu_driver)"
|
||||
}
|
||||
|
||||
# Default values
|
||||
gpu_count=1
|
||||
api_port=11435
|
||||
webui_port=3000
|
||||
headless=false
|
||||
build_image=false
|
||||
kill_compose=false
|
||||
|
||||
# Function to extract value from the parameter
|
||||
extract_value() {
|
||||
echo "$1" | sed -E 's/.*\[.*=(.*)\].*/\1/; t; s/.*//'
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
key="$1"
|
||||
|
||||
case $key in
|
||||
--enable-gpu*)
|
||||
enable_gpu=true
|
||||
value=$(extract_value "$key")
|
||||
gpu_count=${value:-1}
|
||||
;;
|
||||
--enable-api*)
|
||||
enable_api=true
|
||||
value=$(extract_value "$key")
|
||||
api_port=${value:-11435}
|
||||
;;
|
||||
--webui*)
|
||||
value=$(extract_value "$key")
|
||||
webui_port=${value:-3000}
|
||||
;;
|
||||
--data*)
|
||||
value=$(extract_value "$key")
|
||||
data_dir=${value:-"./ollama-data"}
|
||||
;;
|
||||
--drop)
|
||||
kill_compose=true
|
||||
;;
|
||||
--build)
|
||||
build_image=true
|
||||
;;
|
||||
-q|--quiet)
|
||||
headless=true
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit
|
||||
;;
|
||||
*)
|
||||
# Unknown option
|
||||
echo "Unknown option: $key"
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift # past argument or value
|
||||
done
|
||||
|
||||
if [[ $kill_compose == true ]]; then
|
||||
docker compose down --remove-orphans
|
||||
echo -e "${GREEN}${BOLD}Compose project dropped successfully.${NC}"
|
||||
exit
|
||||
else
|
||||
DEFAULT_COMPOSE_COMMAND="docker compose -f docker-compose.yaml"
|
||||
if [[ $enable_gpu == true ]]; then
|
||||
# Validate and process command-line arguments
|
||||
if [[ -n $gpu_count ]]; then
|
||||
if ! [[ $gpu_count =~ ^[0-9]+$ ]]; then
|
||||
echo "Invalid GPU count: $gpu_count"
|
||||
exit 1
|
||||
fi
|
||||
echo "Enabling GPU with $gpu_count GPUs"
|
||||
# Add your GPU allocation logic here
|
||||
export OLLAMA_GPU_DRIVER=$(get_gpu_driver)
|
||||
export OLLAMA_GPU_COUNT=$gpu_count # Set OLLAMA_GPU_COUNT environment variable
|
||||
fi
|
||||
DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.gpu.yaml"
|
||||
fi
|
||||
if [[ $enable_api == true ]]; then
|
||||
DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.api.yaml"
|
||||
if [[ -n $api_port ]]; then
|
||||
export OLLAMA_WEBAPI_PORT=$api_port # Set OLLAMA_WEBAPI_PORT environment variable
|
||||
fi
|
||||
fi
|
||||
if [[ -n $data_dir ]]; then
|
||||
DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.data.yaml"
|
||||
export OLLAMA_DATA_DIR=$data_dir # Set OLLAMA_DATA_DIR environment variable
|
||||
fi
|
||||
DEFAULT_COMPOSE_COMMAND+=" up -d"
|
||||
DEFAULT_COMPOSE_COMMAND+=" --remove-orphans"
|
||||
DEFAULT_COMPOSE_COMMAND+=" --force-recreate"
|
||||
if [[ $build_image == true ]]; then
|
||||
DEFAULT_COMPOSE_COMMAND+=" --build"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Recap of environment variables
|
||||
echo
|
||||
echo -e "${WHITE}${BOLD}Current Setup:${NC}"
|
||||
echo -e " ${GREEN}${BOLD}GPU Driver:${NC} ${OLLAMA_GPU_DRIVER:-Not Enabled}"
|
||||
echo -e " ${GREEN}${BOLD}GPU Count:${NC} ${OLLAMA_GPU_COUNT:-Not Enabled}"
|
||||
echo -e " ${GREEN}${BOLD}WebAPI Port:${NC} ${OLLAMA_WEBAPI_PORT:-Not Enabled}"
|
||||
echo -e " ${GREEN}${BOLD}Data Folder:${NC} ${data_dir:-Using ollama volume}"
|
||||
echo -e " ${GREEN}${BOLD}WebUI Port:${NC} $webui_port"
|
||||
echo
|
||||
|
||||
if [[ $headless == true ]]; then
|
||||
echo -ne "${WHITE}${BOLD}Running in headless mode... ${NC}"
|
||||
choice="y"
|
||||
else
|
||||
# Ask for user acceptance
|
||||
echo -ne "${WHITE}${BOLD}Do you want to proceed with current setup? (Y/n): ${NC}"
|
||||
read -n1 -s choice
|
||||
fi
|
||||
|
||||
echo
|
||||
|
||||
if [[ $choice == "" || $choice == "y" ]]; then
|
||||
# Execute the command with the current user
|
||||
eval "$DEFAULT_COMPOSE_COMMAND" &
|
||||
|
||||
# Capture the background process PID
|
||||
PID=$!
|
||||
|
||||
# Display the loading animation
|
||||
#show_loading $PID
|
||||
|
||||
# Wait for the command to finish
|
||||
wait $PID
|
||||
|
||||
echo
|
||||
# Check exit status
|
||||
if [ $? -eq 0 ]; then
|
||||
echo -e "${GREEN}${BOLD}Compose project started successfully.${NC}"
|
||||
else
|
||||
echo -e "${RED}${BOLD}There was an error starting the compose project.${NC}"
|
||||
fi
|
||||
else
|
||||
echo "Aborted."
|
||||
fi
|
||||
|
||||
echo
|
Loading…
Reference in New Issue