Forgejo build

2025-07-14 17:20:44 +01:00 · 2025-07-14 17:20:44 +01:00 · 2c902a1369
commit 2c902a1369
parent 97868dbbb0
6 changed files with 185 additions and 47 deletions
--- a/.forgejo/workflows/docker-build.yml
+++ b/.forgejo/workflows/docker-build.yml
@ -0,0 +1,53 @@
+name: Build and Push Docker Image
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+  release:
+    types: [published]
+
+env:
+  REGISTRY: git.tomfos.tr
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: https://github.com/actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: https://github.com/docker/setup-buildx-action@v3
+
+      - name: Login to Container Registry
+        uses: https://github.com/docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ gitea.actor }}
+          password: ${{ secrets.FORGEJO_TOKEN }}
+
+      - name: Extract metadata
+        id: meta
+        uses: https://github.com/docker/metadata-action@v5
+        with:
+          images: git.tomfos.tr/tom/kyutai-moshi
+          tags: |
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=raw,value=latest,enable={{is_default_branch}}
+
+      - name: Build and push Docker image
+        uses: https://github.com/docker/build-push-action@v5
+        with:
+          context: .
+          file: ./moshi/Dockerfile
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          platforms: linux/amd64,linux/arm64
+          cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ gitea.repository }}:build-cache
+          cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ gitea.repository }}:build-cache,mode=max
--- a/configs/config-stt-en_fr-hf.toml
+++ b/configs/config-stt-en_fr-hf.toml
@ -0,0 +1,46 @@
+static_dir = "./static/"
+log_dir = "$HOME/tmp/tts-logs"
+instance_name = "tts"
+authorized_ids = ["public_token"]
+
+[modules.asr]
+path = "/api/asr-streaming"
+type = "BatchedAsr"
+lm_model_file = "hf://kyutai/stt-1b-en_fr-candle/model.safetensors"
+text_tokenizer_file = "hf://kyutai/stt-1b-en_fr-candle/tokenizer_en_fr_audio_8000.model"
+audio_tokenizer_file = "hf://kyutai/stt-1b-en_fr-candle/mimi-pytorch-e351c8d8@125.safetensors"
+asr_delay_in_tokens = 6
+batch_size = 64
+conditioning_learnt_padding = true
+temperature = 0.0
+
+[modules.asr.model]
+audio_vocab_size = 2049
+text_in_vocab_size = 8001
+text_out_vocab_size = 8000
+audio_codebooks = 32
+
+[modules.asr.model.transformer]
+d_model = 2048
+num_heads = 16
+num_layers = 16
+dim_feedforward = 8192
+causal = true
+norm_first = true
+bias_ff = false
+bias_attn = false
+context = 750
+max_period = 100000
+use_conv_block = false
+use_conv_bias = true
+gating = "silu"
+norm = "RmsNorm"
+positional_embedding = "Rope"
+conv_layout = false
+conv_kernel_size = 3
+kv_repeat = 1
+max_seq_len = 40960
+
+[modules.asr.model.extra_heads]
+num_heads = 4
+dim = 6
--- a/configs/config-tts.toml
+++ b/configs/config-tts.toml
@ -0,0 +1,20 @@
+static_dir = "./static/"
+log_dir = "$HOME/tmp/tts-logs"
+instance_name = "tts"
+authorized_ids = ["public_token"]
+
+[modules.tts_py]
+type = "Py"
+path = "/api/tts_streaming"
+text_tokenizer_file = "hf://kyutai/tts-1.6b-en_fr/tokenizer_spm_8k_en_fr_audio.model"
+batch_size = 8                                                                        # Adjust to your GPU memory capacity
+text_bos_token = 1
+
+[modules.tts_py.py]
+log_folder = "$HOME/tmp/moshi-server-logs"
+voice_folder = "hf-snapshot://kyutai/tts-voices/**/*.safetensors"
+default_voice = "unmute-prod-website/default_voice.wav"
+cfg_coef = 2.0
+cfg_is_no_text = true
+padding_between = 1
+n_q = 24
--- a/moshi/Dockerfile
+++ b/moshi/Dockerfile
@ -1,24 +1,45 @@
-# Use an official Python runtime as a parent image
-FROM python:3.10
+# Build stage
+FROM python:3-slim-bookworm AS builder

-# Set the working directory in the container
+# Install build dependencies and tools in a single layer
+RUN apt-get update && apt-get install -y \
+  build-essential \
+  cmake \
+  curl \
+  libopus-dev \
+  pkg-config \
+  && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \
+  && rm -rf /var/lib/apt/lists/*
+
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+# Set the working directory
 WORKDIR /app

-# Copy the current directory contents into the container at /app
-COPY . /app
+# Copy only necessary files for installation
+COPY moshi/ ./moshi/

-# Install any needed packages specified in requirements.txt
-# Assuming you have a requirements.txt file in the moshi directory
-RUN pip install --no-cache-dir -r requirements.txt
+# Install packages in a single layer
+RUN pip install --no-cache-dir -r ./moshi/requirements.txt --target /app/wheels \
+  && pip install --no-cache-dir ./moshi/. --target /app/wheels

-# Install Moshi and gradio
-RUN pip install --no-cache-dir moshi gradio
+# Runtime stage
+FROM python:3-slim-bookworm

-# Expose the port used by the server
+# Set the working directory
+WORKDIR /app
+
+# Copy configs and installed packages from appropriate sources
+COPY --from=builder /app/wheels /app/wheels
+COPY configs/ /app/configs/
+
+# Set environment variables
+ENV CONFIG_PATH="/app/configs/moshi_7b_202409.json" \
+  HF_REPO="kyutai/moshiko-pytorch-bf16" \
+  PYTHONPATH="/app/wheels"
+
+# Expose the port
 EXPOSE 8998

-# Set environment variable for the model (with a default value)
-ENV HF_REPO=kyutai/moshiko-pytorch-bf16
-
-# Run the server when the container launches
-CMD python -m moshi.server --gradio-tunnel --hf-repo $HF_REPO
+# Run the server
+CMD ["sh", "-c", "python -m moshi.server --hf-repo $HF_REPO --config-path $CONFIG_PATH"]
--- a/moshi/pyproject.toml
+++ b/moshi/pyproject.toml
@ -3,21 +3,20 @@ name = "moshi"
 requires-python = ">= 3.10"
 description = "Moshi is moshi"
 dependencies = [
-    "numpy >= 1.26, < 2.3",
-    "safetensors >= 0.4.0, < 0.6",
-    "huggingface-hub >= 0.24, < 0.34",
-    "bitsandbytes >= 0.45, < 0.46; sys_platform == 'linux'",
-    "einops >= 0.7, < 0.9",
-    "sentencepiece == 0.2",
-    "sounddevice == 0.5",
-    "sphn >= 0.1.4, < 0.2.0",
-    "torch >= 2.2.0, < 2.8",
-    "aiohttp>=3.10.5, <3.12",
-    "pytest >= 8.3.3",
+  "numpy>=1.26.4",
+  "safetensors>=0.4.4",
+  "huggingface-hub>=0.24, <0.34",
+  "einops>=0.7.0",
+  "sentencepiece>=0.2.0",
+  "sounddevice>=0.5.0",
+  "soundfile>=0.12.1",
+  "sphn>=0.1.4",
+  "torch>=2.2.0",
+  "aiohttp>=3.10.5, <3.11",
 ]
-authors = [{name="Laurent Mazaré", email="laurent@kyutai.org"}]
-maintainers = [{name="Laurent Mazaré", email="laurent@kyutai.org"}]
-license = {text = "MIT"}
+authors = [{ name = "Laurent Mazaré", email = "laurent@kyutai.org" }]
+maintainers = [{ name = "Laurent Mazaré", email = "laurent@kyutai.org" }]
+license = { text = "MIT" }
 dynamic = ["version"]
 readme = "README.md"

@ -28,17 +27,16 @@ moshi-inference = "moshi.run_inference:main"
 moshi-tts = "moshi.run_tts:main"

 [tool.setuptools.dynamic]
-version = {attr = "moshi.__version__"}
+version = { attr = "moshi.__version__" }
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["moshi*"]
+exclude = ["wheels"]

 [build-system]
 requires = ["setuptools"]
 build-backend = "setuptools.build_meta"

 [project.optional-dependencies]
-dev = [
-    "pyright",
-    "pytest",
-    "flake8",
-    "pre-commit",
-    "gradio-webrtc>=0.0.18"
-]
+dev = ["pyright", "pytest", "flake8", "pre-commit", "gradio-webrtc>=0.0.18"]
--- a/moshi/requirements.txt
+++ b/moshi/requirements.txt
@ -1,11 +1,11 @@
-einops==0.7.0
-safetensors==0.4.4
-sentencepiece==0.2.0
-sounddevice==0.5.0
-soundfile==0.12.1
-sphn==0.1.4
-torch==2.2.0
-numpy==1.26.4
+einops>=0.7.0
+safetensors>=0.4.4
+sentencepiece>=0.2.0
+sounddevice>=0.5.0
+soundfile>=0.12.1
+sphn>=0.1.4
+torch>=2.2.0
+numpy>=1.26.4
 aiohttp>=3.10.5, <3.11
 huggingface-hub>=0.24, <0.34
-pytest==8.3.3
+pytest>=8.3.3