kyutai-moshi (sha256:5c0776baf55d7e4e7a99f83ba840682ebf5398b08c71af5a7d97287d336e959e)
Installation
docker pull git.tomfos.tr/tom/kyutai-moshi@sha256:5c0776baf55d7e4e7a99f83ba840682ebf5398b08c71af5a7d97287d336e959e
sha256:5c0776baf55d7e4e7a99f83ba840682ebf5398b08c71af5a7d97287d336e959e
About this package
Moshi is a speech-text foundation model and full-duplex spoken dialogue framework. It uses Mimi, a state-of-the-art streaming neural audio codec.
Image layers
# debian.sh --arch 'arm64' out/ 'trixie' '@1757289600' |
ENV PATH=/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin |
RUN /bin/sh -c set -eux; apt-get update; apt-get install -y --no-install-recommends ca-certificates netbase tzdata ; apt-get dist-clean # buildkit |
ENV GPG_KEY=7169605F62C751356D054A26A821E680E5FA6305 |
ENV PYTHON_VERSION=3.13.7 |
ENV PYTHON_SHA256=5462f9099dfd30e238def83c71d91897d8caa5ff6ebc7a50f14d4802cdaaa79a |
RUN /bin/sh -c set -eux; savedAptMark="$(apt-mark showmanual)"; apt-get update; apt-get install -y --no-install-recommends dpkg-dev gcc gnupg libbluetooth-dev libbz2-dev libc6-dev libdb-dev libffi-dev libgdbm-dev liblzma-dev libncursesw5-dev libreadline-dev libsqlite3-dev libssl-dev make tk-dev uuid-dev wget xz-utils zlib1g-dev ; wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz"; echo "$PYTHON_SHA256 *python.tar.xz" | sha256sum -c -; wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc"; GNUPGHOME="$(mktemp -d)"; export GNUPGHOME; gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys "$GPG_KEY"; gpg --batch --verify python.tar.xz.asc python.tar.xz; gpgconf --kill all; rm -rf "$GNUPGHOME" python.tar.xz.asc; mkdir -p /usr/src/python; tar --extract --directory /usr/src/python --strip-components=1 --file python.tar.xz; rm python.tar.xz; cd /usr/src/python; gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)"; ./configure --build="$gnuArch" --enable-loadable-sqlite-extensions --enable-optimizations --enable-option-checking=fatal --enable-shared $(test "${gnuArch%%-*}" != 'riscv64' && echo '--with-lto') --with-ensurepip ; nproc="$(nproc)"; EXTRA_CFLAGS="$(dpkg-buildflags --get CFLAGS)"; LDFLAGS="$(dpkg-buildflags --get LDFLAGS)"; LDFLAGS="${LDFLAGS:--Wl},--strip-all"; arch="$(dpkg --print-architecture)"; arch="${arch##*-}"; case "$arch" in amd64|arm64) EXTRA_CFLAGS="${EXTRA_CFLAGS:-} -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer"; ;; i386) ;; *) EXTRA_CFLAGS="${EXTRA_CFLAGS:-} -fno-omit-frame-pointer"; ;; esac; make -j "$nproc" "EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" "LDFLAGS=${LDFLAGS:-}" ; rm python; make -j "$nproc" "EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" "LDFLAGS=${LDFLAGS:--Wl},-rpath='\$\$ORIGIN/../lib'" python ; make install; cd /; rm -rf /usr/src/python; find /usr/local -depth \( \( -type d -a \( -name test -o -name tests -o -name idle_test \) \) -o \( -type f -a \( -name '*.pyc' -o -name '*.pyo' -o -name 'libpython*.a' \) \) \) -exec rm -rf '{}' + ; ldconfig; apt-mark auto '.*' > /dev/null; apt-mark manual $savedAptMark; find /usr/local -type f -executable -not \( -name '*tkinter*' \) -exec ldd '{}' ';' | awk '/=>/ { so = $(NF-1); if (index(so, "/usr/local/") == 1) { next }; gsub("^/(usr/)?", "", so); printf "*%s\n", so }' | sort -u | xargs -rt dpkg-query --search | awk 'sub(":$", "", $1) { print $1 }' | sort -u | xargs -r apt-mark manual ; apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false; apt-get dist-clean; export PYTHONDONTWRITEBYTECODE=1; python3 --version; pip3 --version # buildkit |
RUN /bin/sh -c set -eux; for src in idle3 pip3 pydoc3 python3 python3-config; do dst="$(echo "$src" | tr -d 3)"; [ -s "/usr/local/bin/$src" ]; [ ! -e "/usr/local/bin/$dst" ]; ln -svT "$src" "/usr/local/bin/$dst"; done # buildkit |
CMD ["python3"] |
ARG TARGETARCH=arm64 |
LABEL org.opencontainers.image.title=kyutai-moshi org.opencontainers.image.description=Moshi speech-text foundation model for real-time dialogue org.opencontainers.image.url=https://git.tomfos.tr/tom/kyutai-moshi org.opencontainers.image.source=https://git.tomfos.tr/tom/kyutai-moshi org.opencontainers.image.documentation=https://git.tomfos.tr/tom/kyutai-moshi/src/branch/main/README.md org.opencontainers.image.vendor=git.tomfos.tr org.opencontainers.image.licenses=MIT org.opencontainers.image.authors=Kyutai Labs |
WORKDIR /app |
COPY /app/wheels /app/wheels # buildkit |
COPY configs/ /app/configs/ # buildkit |
ENV CONFIG_PATH=/app/configs/moshi_7b_202409.json HF_REPO=kyutai/moshiko-pytorch-bf16 PYTHONPATH=/app/wheels |
EXPOSE &{[{{71 0} {71 0}}] 0xc00a761f40} |
CMD ["sh" "-c" "python -m moshi.server --hf-repo $HF_REPO --config-path $CONFIG_PATH"] |
Labels
Key | Value |
---|---|
org.opencontainers.image.authors | Kyutai Labs |
org.opencontainers.image.created | 2025-09-22T11:13:26.336Z |
org.opencontainers.image.description | Moshi is a speech-text foundation model and full-duplex spoken dialogue framework. It uses Mimi, a state-of-the-art streaming neural audio codec. |
org.opencontainers.image.documentation | https://git.tomfos.tr/tom/kyutai-moshi/src/branch/main/README.md |
org.opencontainers.image.licenses | |
org.opencontainers.image.revision | b98c28747ac93e994a4e8f4fd2ae129480aae992 |
org.opencontainers.image.source | https://git.tomfos.tr/tom/kyutai-moshi |
org.opencontainers.image.title | kyutai-moshi |
org.opencontainers.image.url | https://git.tomfos.tr/tom/kyutai-moshi |
org.opencontainers.image.vendor | git.tomfos.tr |
org.opencontainers.image.version | main |
Details
2025-09-22 13:03:57 +01:00
Versions (2)
View all
Container
0
OCI / Docker
linux/arm64
Kyutai Labs
227 MiB