Forgejo build
All checks were successful
Build and Push Docker Image / build (push) Successful in 51m41s

This commit is contained in:
Tom Foster 2025-07-14 17:20:44 +01:00
parent 97868dbbb0
commit 2c902a1369
6 changed files with 185 additions and 47 deletions

View file

@ -0,0 +1,53 @@
name: Build and Push Docker Image
on:
push:
branches: [main]
pull_request:
branches: [main]
release:
types: [published]
env:
REGISTRY: git.tomfos.tr
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: https://github.com/actions/checkout@v4
- name: Set up Docker Buildx
uses: https://github.com/docker/setup-buildx-action@v3
- name: Login to Container Registry
uses: https://github.com/docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ gitea.actor }}
password: ${{ secrets.FORGEJO_TOKEN }}
- name: Extract metadata
id: meta
uses: https://github.com/docker/metadata-action@v5
with:
images: git.tomfos.tr/tom/kyutai-moshi
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=raw,value=latest,enable={{is_default_branch}}
- name: Build and push Docker image
uses: https://github.com/docker/build-push-action@v5
with:
context: .
file: ./moshi/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ gitea.repository }}:build-cache
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ gitea.repository }}:build-cache,mode=max

View file

@ -0,0 +1,46 @@
static_dir = "./static/"
log_dir = "$HOME/tmp/tts-logs"
instance_name = "tts"
authorized_ids = ["public_token"]
[modules.asr]
path = "/api/asr-streaming"
type = "BatchedAsr"
lm_model_file = "hf://kyutai/stt-1b-en_fr-candle/model.safetensors"
text_tokenizer_file = "hf://kyutai/stt-1b-en_fr-candle/tokenizer_en_fr_audio_8000.model"
audio_tokenizer_file = "hf://kyutai/stt-1b-en_fr-candle/mimi-pytorch-e351c8d8@125.safetensors"
asr_delay_in_tokens = 6
batch_size = 64
conditioning_learnt_padding = true
temperature = 0.0
[modules.asr.model]
audio_vocab_size = 2049
text_in_vocab_size = 8001
text_out_vocab_size = 8000
audio_codebooks = 32
[modules.asr.model.transformer]
d_model = 2048
num_heads = 16
num_layers = 16
dim_feedforward = 8192
causal = true
norm_first = true
bias_ff = false
bias_attn = false
context = 750
max_period = 100000
use_conv_block = false
use_conv_bias = true
gating = "silu"
norm = "RmsNorm"
positional_embedding = "Rope"
conv_layout = false
conv_kernel_size = 3
kv_repeat = 1
max_seq_len = 40960
[modules.asr.model.extra_heads]
num_heads = 4
dim = 6

20
configs/config-tts.toml Normal file
View file

@ -0,0 +1,20 @@
static_dir = "./static/"
log_dir = "$HOME/tmp/tts-logs"
instance_name = "tts"
authorized_ids = ["public_token"]
[modules.tts_py]
type = "Py"
path = "/api/tts_streaming"
text_tokenizer_file = "hf://kyutai/tts-1.6b-en_fr/tokenizer_spm_8k_en_fr_audio.model"
batch_size = 8 # Adjust to your GPU memory capacity
text_bos_token = 1
[modules.tts_py.py]
log_folder = "$HOME/tmp/moshi-server-logs"
voice_folder = "hf-snapshot://kyutai/tts-voices/**/*.safetensors"
default_voice = "unmute-prod-website/default_voice.wav"
cfg_coef = 2.0
cfg_is_no_text = true
padding_between = 1
n_q = 24

View file

@ -1,24 +1,45 @@
# Use an official Python runtime as a parent image
FROM python:3.10
# Build stage
FROM python:3-slim-bookworm AS builder
# Set the working directory in the container
# Install build dependencies and tools in a single layer
RUN apt-get update && apt-get install -y \
build-essential \
cmake \
curl \
libopus-dev \
pkg-config \
&& curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \
&& rm -rf /var/lib/apt/lists/*
ENV PATH="/root/.cargo/bin:${PATH}"
# Set the working directory
WORKDIR /app
# Copy the current directory contents into the container at /app
COPY . /app
# Copy only necessary files for installation
COPY moshi/ ./moshi/
# Install any needed packages specified in requirements.txt
# Assuming you have a requirements.txt file in the moshi directory
RUN pip install --no-cache-dir -r requirements.txt
# Install packages in a single layer
RUN pip install --no-cache-dir -r ./moshi/requirements.txt --target /app/wheels \
&& pip install --no-cache-dir ./moshi/. --target /app/wheels
# Install Moshi and gradio
RUN pip install --no-cache-dir moshi gradio
# Runtime stage
FROM python:3-slim-bookworm
# Expose the port used by the server
# Set the working directory
WORKDIR /app
# Copy configs and installed packages from appropriate sources
COPY --from=builder /app/wheels /app/wheels
COPY configs/ /app/configs/
# Set environment variables
ENV CONFIG_PATH="/app/configs/moshi_7b_202409.json" \
HF_REPO="kyutai/moshiko-pytorch-bf16" \
PYTHONPATH="/app/wheels"
# Expose the port
EXPOSE 8998
# Set environment variable for the model (with a default value)
ENV HF_REPO=kyutai/moshiko-pytorch-bf16
# Run the server when the container launches
CMD python -m moshi.server --gradio-tunnel --hf-repo $HF_REPO
# Run the server
CMD ["sh", "-c", "python -m moshi.server --hf-repo $HF_REPO --config-path $CONFIG_PATH"]

View file

@ -3,21 +3,20 @@ name = "moshi"
requires-python = ">= 3.10"
description = "Moshi is moshi"
dependencies = [
"numpy >= 1.26, < 2.3",
"safetensors >= 0.4.0, < 0.6",
"huggingface-hub >= 0.24, < 0.34",
"bitsandbytes >= 0.45, < 0.46; sys_platform == 'linux'",
"einops >= 0.7, < 0.9",
"sentencepiece == 0.2",
"sounddevice == 0.5",
"sphn >= 0.1.4, < 0.2.0",
"torch >= 2.2.0, < 2.8",
"aiohttp>=3.10.5, <3.12",
"pytest >= 8.3.3",
"numpy>=1.26.4",
"safetensors>=0.4.4",
"huggingface-hub>=0.24, <0.34",
"einops>=0.7.0",
"sentencepiece>=0.2.0",
"sounddevice>=0.5.0",
"soundfile>=0.12.1",
"sphn>=0.1.4",
"torch>=2.2.0",
"aiohttp>=3.10.5, <3.11",
]
authors = [{name="Laurent Mazaré", email="laurent@kyutai.org"}]
maintainers = [{name="Laurent Mazaré", email="laurent@kyutai.org"}]
license = {text = "MIT"}
authors = [{ name = "Laurent Mazaré", email = "laurent@kyutai.org" }]
maintainers = [{ name = "Laurent Mazaré", email = "laurent@kyutai.org" }]
license = { text = "MIT" }
dynamic = ["version"]
readme = "README.md"
@ -28,17 +27,16 @@ moshi-inference = "moshi.run_inference:main"
moshi-tts = "moshi.run_tts:main"
[tool.setuptools.dynamic]
version = {attr = "moshi.__version__"}
version = { attr = "moshi.__version__" }
[tool.setuptools.packages.find]
where = ["."]
include = ["moshi*"]
exclude = ["wheels"]
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"
[project.optional-dependencies]
dev = [
"pyright",
"pytest",
"flake8",
"pre-commit",
"gradio-webrtc>=0.0.18"
]
dev = ["pyright", "pytest", "flake8", "pre-commit", "gradio-webrtc>=0.0.18"]

View file

@ -1,11 +1,11 @@
einops==0.7.0
safetensors==0.4.4
sentencepiece==0.2.0
sounddevice==0.5.0
soundfile==0.12.1
sphn==0.1.4
torch==2.2.0
numpy==1.26.4
einops>=0.7.0
safetensors>=0.4.4
sentencepiece>=0.2.0
sounddevice>=0.5.0
soundfile>=0.12.1
sphn>=0.1.4
torch>=2.2.0
numpy>=1.26.4
aiohttp>=3.10.5, <3.11
huggingface-hub>=0.24, <0.34
pytest==8.3.3
pytest>=8.3.3