kyutai-moshi (sha256:5c0776baf55d7e4e7a99f83ba840682ebf5398b08c71af5a7d97287d336e959e)

Published 2025-09-22 13:03:57 +01:00 by tom in tom/kyutai-moshi

Pull the image from the command line:

docker pull git.tomfos.tr/tom/kyutai-moshi@sha256:5c0776baf55d7e4e7a99f83ba840682ebf5398b08c71af5a7d97287d336e959e

Digest

sha256:5c0776baf55d7e4e7a99f83ba840682ebf5398b08c71af5a7d97287d336e959e

For more information on the Container registry, see the documentation.

Moshi is a speech-text foundation model and full-duplex spoken dialogue framework. It uses Mimi, a state-of-the-art streaming neural audio codec.

# debian.sh --arch 'arm64' out/ 'trixie' '@1757289600'

ENV PATH=/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin

RUN /bin/sh -c set -eux; apt-get update; apt-get install -y --no-install-recommends ca-certificates netbase tzdata ; apt-get dist-clean # buildkit

ENV GPG_KEY=7169605F62C751356D054A26A821E680E5FA6305

ENV PYTHON_VERSION=3.13.7

ENV PYTHON_SHA256=5462f9099dfd30e238def83c71d91897d8caa5ff6ebc7a50f14d4802cdaaa79a

RUN /bin/sh -c set -eux; savedAptMark="$(apt-mark showmanual)"; apt-get update; apt-get install -y --no-install-recommends dpkg-dev gcc gnupg libbluetooth-dev libbz2-dev libc6-dev libdb-dev libffi-dev libgdbm-dev liblzma-dev libncursesw5-dev libreadline-dev libsqlite3-dev libssl-dev make tk-dev uuid-dev wget xz-utils zlib1g-dev ; wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz"; echo "$PYTHON_SHA256 *python.tar.xz" | sha256sum -c -; wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc"; GNUPGHOME="$(mktemp -d)"; export GNUPGHOME; gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys "$GPG_KEY"; gpg --batch --verify python.tar.xz.asc python.tar.xz; gpgconf --kill all; rm -rf "$GNUPGHOME" python.tar.xz.asc; mkdir -p /usr/src/python; tar --extract --directory /usr/src/python --strip-components=1 --file python.tar.xz; rm python.tar.xz; cd /usr/src/python; gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)"; ./configure --build="$gnuArch" --enable-loadable-sqlite-extensions --enable-optimizations --enable-option-checking=fatal --enable-shared $(test "${gnuArch%%-*}" != 'riscv64' && echo '--with-lto') --with-ensurepip ; nproc="$(nproc)"; EXTRA_CFLAGS="$(dpkg-buildflags --get CFLAGS)"; LDFLAGS="$(dpkg-buildflags --get LDFLAGS)"; LDFLAGS="${LDFLAGS:--Wl},--strip-all"; arch="$(dpkg --print-architecture)"; arch="${arch##*-}"; case "$arch" in amd64|arm64) EXTRA_CFLAGS="${EXTRA_CFLAGS:-} -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer"; ;; i386) ;; *) EXTRA_CFLAGS="${EXTRA_CFLAGS:-} -fno-omit-frame-pointer"; ;; esac; make -j "$nproc" "EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" "LDFLAGS=${LDFLAGS:-}" ; rm python; make -j "$nproc" "EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" "LDFLAGS=${LDFLAGS:--Wl},-rpath='\$\$ORIGIN/../lib'" python ; make install; cd /; rm -rf /usr/src/python; find /usr/local -depth $ \( -type d -a \( -name test -o -name tests -o -name idle_test $ \) -o $ -type f -a \( -name '*.pyc' -o -name '*.pyo' -o -name 'libpython*.a' $ \) \) -exec rm -rf '{}' + ; ldconfig; apt-mark auto '.*' > /dev/null; apt-mark manual $savedAptMark; find /usr/local -type f -executable -not $ -name '*tkinter*' $ -exec ldd '{}' ';' | awk '/=>/ { so = $(NF-1); if (index(so, "/usr/local/") == 1) { next }; gsub("^/(usr/)?", "", so); printf "*%s\n", so }' | sort -u | xargs -rt dpkg-query --search | awk 'sub(":$", "", $1) { print $1 }' | sort -u | xargs -r apt-mark manual ; apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false; apt-get dist-clean; export PYTHONDONTWRITEBYTECODE=1; python3 --version; pip3 --version # buildkit

RUN /bin/sh -c set -eux; for src in idle3 pip3 pydoc3 python3 python3-config; do dst="$(echo "$src" | tr -d 3)"; [ -s "/usr/local/bin/$src" ]; [ ! -e "/usr/local/bin/$dst" ]; ln -svT "$src" "/usr/local/bin/$dst"; done # buildkit

CMD ["python3"]

ARG TARGETARCH=arm64

LABEL org.opencontainers.image.title=kyutai-moshi org.opencontainers.image.description=Moshi speech-text foundation model for real-time dialogue org.opencontainers.image.url=https://git.tomfos.tr/tom/kyutai-moshi org.opencontainers.image.source=https://git.tomfos.tr/tom/kyutai-moshi org.opencontainers.image.documentation=https://git.tomfos.tr/tom/kyutai-moshi/src/branch/main/README.md org.opencontainers.image.vendor=git.tomfos.tr org.opencontainers.image.licenses=MIT org.opencontainers.image.authors=Kyutai Labs

WORKDIR /app

COPY /app/wheels /app/wheels # buildkit

COPY configs/ /app/configs/ # buildkit

ENV CONFIG_PATH=/app/configs/moshi_7b_202409.json HF_REPO=kyutai/moshiko-pytorch-bf16 PYTHONPATH=/app/wheels

EXPOSE &{[{{71 0} {71 0}}] 0xc00a761f40}

CMD ["sh" "-c" "python -m moshi.server --hf-repo $HF_REPO --config-path $CONFIG_PATH"]

Key	Value
org.opencontainers.image.authors	Kyutai Labs
org.opencontainers.image.created	2025-09-22T11:13:26.336Z
org.opencontainers.image.description	Moshi is a speech-text foundation model and full-duplex spoken dialogue framework. It uses Mimi, a state-of-the-art streaming neural audio codec.
org.opencontainers.image.documentation	https://git.tomfos.tr/tom/kyutai-moshi/src/branch/main/README.md
org.opencontainers.image.licenses
org.opencontainers.image.revision	b98c28747ac93e994a4e8f4fd2ae129480aae992
org.opencontainers.image.source	https://git.tomfos.tr/tom/kyutai-moshi
org.opencontainers.image.title	kyutai-moshi
org.opencontainers.image.url	https://git.tomfos.tr/tom/kyutai-moshi
org.opencontainers.image.vendor	git.tomfos.tr
org.opencontainers.image.version	main