Skip to content

Instantly share code, notes, and snippets.

@lucataco
Created October 9, 2025 23:23
Show Gist options
  • Select an option

  • Save lucataco/14b2dd12db49cfea20c6cadbc8151887 to your computer and use it in GitHub Desktop.

Select an option

Save lucataco/14b2dd12db49cfea20c6cadbc8151887 to your computer and use it in GitHub Desktop.
local CLI tool to run Kokoro TTS on Apple silicon (MBP)
#!/bin/bash
# This is local cli command that allows users to use kokoro on a Macbook Pro
# Requires you to first run the kokoro docker container:
docker run -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-cpu:latest
# Then save this file to /usr/local/bin
# Finally you can test:
kokoro "The quick brown fox jumped over the lazy dog"
# Or even pipe from a stream like:
llm "tell me a joke" | kokoro
# See Examples below for more
# Default values
PORT=8880
VOICE="af_sky"
# Help message
show_help() {
cat << EOF
Usage: kokoro [OPTIONS] [TEXT]
Send text to local Kokoro TTS server and play the audio.
Supports streaming - processes sentences as they arrive!
Options:
-p, --port PORT Port number (default: 8880)
-v, --voice VOICE Voice to use (default: af_sky)
-h, --help Show this help message
Examples:
kokoro "Hello world"
echo "Hello world" | kokoro
llm "tell me a joke" | kokoro
llm "explain quantum physics" | kokoro --voice af_bella
EOF
exit 0
}
# Parse arguments
TEXT=""
while [[ $# -gt 0 ]]; do
case $1 in
-p|--port)
PORT="$2"
shift 2
;;
-v|--voice)
VOICE="$2"
shift 2
;;
-h|--help)
show_help
;;
*)
TEXT="$1"
shift
;;
esac
done
# Function to generate and queue audio
generate_audio() {
local text="$1"
local output_file="$2"
# Escape text for JSON
local escaped_text=$(echo "$text" | python3 -c 'import json, sys; print(json.dumps(sys.stdin.read()))')
# Generate audio
curl -s -X POST "http://localhost:$PORT/v1/audio/speech" \
-H "Content-Type: application/json" \
-d "{\"input\":$escaped_text,\"voice\":\"$VOICE\",\"response_format\":\"wav\"}" \
-o "$output_file" 2>/dev/null
return $?
}
# Function to play audio file
play_audio() {
local file="$1"
if [ ! -s "$file" ]; then
return 1
fi
if command -v ffplay &> /dev/null; then
ffplay -nodisp -autoexit -loglevel quiet "$file" 2>/dev/null
elif command -v aplay &> /dev/null; then
aplay -q "$file" 2>/dev/null
elif command -v paplay &> /dev/null; then
paplay "$file" 2>/dev/null
elif command -v mpg123 &> /dev/null; then
mpg123 -q "$file" 2>/dev/null
else
echo "Error: No audio player found" >&2
return 1
fi
}
# If text provided as argument, process it directly
if [ -n "$TEXT" ]; then
temp_file=$(mktemp "${TMPDIR:-/tmp}/kokoro.XXXXXX.wav")
generate_audio "$TEXT" "$temp_file"
play_audio "$temp_file"
rm -f "$temp_file"
exit 0
fi
# Streaming mode - process from stdin
if [ ! -t 0 ]; then
echo "🎤 Streaming mode active... (Press Ctrl+C to stop)" >&2
# Directory for audio queue
QUEUE_DIR=$(mktemp -d "${TMPDIR:-/tmp}/kokoro_queue.XXXXXX")
# Cleanup function
cleanup() {
echo "" >&2
echo "🛑 Stopping playback..." >&2
# Kill all child processes
pkill -P $$ 2>/dev/null
# Kill player if it exists
if [ -n "$PLAYER_PID" ]; then
kill $PLAYER_PID 2>/dev/null
fi
# Remove queue directory
rm -rf "$QUEUE_DIR" 2>/dev/null
exit 0
}
# Set up trap for Ctrl+C
trap cleanup SIGINT SIGTERM
counter=0
# Start player process
(
while true; do
# Look for next audio file to play
audio_file="$QUEUE_DIR/audio_$counter.wav"
done_file="$QUEUE_DIR/done_$counter"
# Wait for file to be ready
while [ ! -f "$done_file" ]; do
sleep 0.05
# Check if we should exit
if [ -f "$QUEUE_DIR/finished" ] && [ ! -f "$done_file" ]; then
exit 0
fi
done
# Play the audio
if [ -f "$audio_file" ]; then
play_audio "$audio_file"
rm -f "$audio_file" "$done_file"
fi
counter=$((counter + 1))
done
) &
PLAYER_PID=$!
# Buffer for accumulating text
buffer=""
sentence_counter=0
# Read and process input
while IFS= read -r line || [ -n "$line" ]; do
buffer="$buffer$line "
# Extract complete sentences
while [[ "$buffer" =~ ([^.!?]*[.!?][[:space:]]*) ]]; do
sentence="${BASH_REMATCH[1]}"
buffer="${buffer#$sentence}"
sentence=$(echo "$sentence" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
if [ -n "$sentence" ]; then
# Generate audio in background
(
idx=$sentence_counter
audio_file="$QUEUE_DIR/audio_$idx.wav"
done_file="$QUEUE_DIR/done_$idx"
generate_audio "$sentence" "$audio_file"
touch "$done_file"
) &
sentence_counter=$((sentence_counter + 1))
fi
done
done
# Process remaining buffer
if [ -n "$buffer" ]; then
buffer=$(echo "$buffer" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
if [ -n "$buffer" ]; then
audio_file="$QUEUE_DIR/audio_$sentence_counter.wav"
done_file="$QUEUE_DIR/done_$sentence_counter"
generate_audio "$buffer" "$audio_file"
touch "$done_file"
fi
fi
# Wait for all generation to complete
wait
# Signal player to finish
touch "$QUEUE_DIR/finished"
wait $PLAYER_PID 2>/dev/null
# Cleanup
rm -rf "$QUEUE_DIR"
# Remove trap
trap - SIGINT SIGTERM
else
echo "Error: No text provided"
echo "Try 'kokoro --help' for more information"
exit 1
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment