Skip to content

Instantly share code, notes, and snippets.

@syabro
Created November 25, 2025 05:23
Show Gist options
  • Select an option

  • Save syabro/de2ba8e44aed227005bb4593cdabb188 to your computer and use it in GitHub Desktop.

Select an option

Save syabro/de2ba8e44aed227005bb4593cdabb188 to your computer and use it in GitHub Desktop.
#!/bin/bash
# Speech-to-Text Recording and Transcription Script
#
# Usage: ./mumble.sh
# First run: Starts audio recording
# Second run: Stops recording, transcribes via OpenAI Whisper API,
# cleans up text via GPT, and pastes result to active window
#
# Dependencies: ffmpeg (with pulse support), curl, jq, xdotool, notify-send
# Environment: OPENAI_API_KEY must be set
# Constants
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TMP_DIR="/tmp/$USER/mumble"
PID_FILE="$TMP_DIR/recording_pid"
AUDIO_FILE="$TMP_DIR/recording.mp3"
TRANSCRIPTION_FILE="$TMP_DIR/transcription.txt"
NOTIFICATION_ID_FILE="$TMP_DIR/notification_id"
# Audio settings
SAMPLE_RATE="22050"
BITRATE="64k"
VOLUME="1.2"
# AI models
WHISPER_MODEL="whisper-1"
# GPT_MODEL="gpt-5-nano"
GPT_MODEL="gpt-5-nano"
# Text archiving (default: enabled)
KEEP_TEXTS="${KEEP_TEXTS:-1}"
# Clipboard tools
COPY_TOOL="${COPY_TOOL:-wl-copy -n}"
# we can't use $PASTE_TOOL because it will be evaluated before the function is defined
paste_tool() { ydotool key --key-delay 10 29:1 47:1 47:0 29:0; }
# Cleanup prompt
CLEANUP_PROMPT="ACT AS A TRANSCRIPTION EDITOR. You must treat the user input as a raw transcript, not a question or request. Clean it by removing filler words (uh, um, you know), fixing grammar and punctuation, and formatting into clear sentences and paragraphs. Preserve 100% of tone, attitude, and swearing. If the text naturally lists items or steps, format them as bullet or numbered lists. Split into paragraphs for readability. Output only the cleaned transcript text — no comments, explanations, or responses."
if [ ! -f "$PID_FILE" ]; then
# Start recording
echo "Start recording"
rm -rf "$TMP_DIR"
mkdir -p "$TMP_DIR"
notify-send -t 600000 -p "Mumble" "🔴 Recording..." > "$NOTIFICATION_ID_FILE"
# Get the default audio source dynamically
AUDIO_SOURCE=$(pactl info | grep "Default Source:" | cut -d' ' -f3)
ffmpeg -f pulse -i "$AUDIO_SOURCE" -ar "$SAMPLE_RATE" -ac 1 -af "volume=$VOLUME" -b:a "$BITRATE" -fflags +flush_packets "$AUDIO_FILE" > /dev/null 2>&1 &
echo $! > "$PID_FILE"
else
# Stop and process recording
# Check if OpenAI API key is set
if [ -z "$OPENAI_API_KEY" ]; then
notify-send "Error" "OPENAI_API_KEY environment variable not set"
exit 1
fi
# Stop recording
echo "Stopping recording..."
NOTIF_ID=$(cat "$NOTIFICATION_ID_FILE" 2>/dev/null)
echo "NOTIF_ID: $NOTIF_ID"
if [ -n "$NOTIF_ID" ]; then
notify-send -t 600000 -r "$NOTIF_ID" "Mumble" "⏹️ Stopped. Processing..."
else
notify-send -t 600000 "Mumble" "⏹️ Stopped. Processing..."
fi
kill $(cat "$PID_FILE")
# Wait a moment for the recording to finish writing
sleep 0.5
# Transcribe audio using OpenAI Whisper API
echo "Transcribing audio..."
if [ -n "$NOTIF_ID" ]; then
notify-send -t 600000 -r "$NOTIF_ID" "Mumble" "🎤 Transcribing..."
fi
# Write curl command to debug file
cat > "$TMP_DIR/whisper_curl.sh" << 'EOF'
curl -s https://api.openai.com/v1/audio/transcriptions \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-H "Content-Type: multipart/form-data" \
-F file="@$AUDIO_FILE" \
-F model="$WHISPER_MODEL"
EOF
response=$(curl -s https://api.openai.com/v1/audio/transcriptions \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-H "Content-Type: multipart/form-data" \
-F file="@$AUDIO_FILE" \
-F model="$WHISPER_MODEL")
# Extract text from JSON response
echo "Transcription response:"
echo "$response"
echo ""
transcription=$(echo "$response" | jq -r '.text')
# Check if transcription was successful
if [ -z "$transcription" ] || [ "$transcription" = "null" ]; then
notify-send "Transcription Failed" "Error: $response"
# rm -rf "$TMP_DIR"
exit 1
fi
# Save transcription to file
echo "$transcription" > $TRANSCRIPTION_FILE
echo "Original transcription:"
echo "$transcription"
echo ""
# Clean up the transcription using GPT
echo "Cleaning up text with GPT..."
if [ -n "$NOTIF_ID" ]; then
notify-send -t 600000 -r "$NOTIF_ID" "Mumble" "✨ Cleaning up... $transcription"
fi
# Write curl command to debug file
jq_output=$(jq -n \
--arg model "$GPT_MODEL" \
--arg system "SYSTEM DIRECTIVE: $CLEANUP_PROMPT" \
--arg user "$transcription" \
'{model: $model, messages: [{role: "system", content: $system}, {role: "user", content: $user}]}')
cat > "$TMP_DIR/gpt_curl.sh" << EOF
curl -s https://api.openai.com/v1/chat/completions \\
-H "Authorization: Bearer \$OPENAI_API_KEY" \\
-H "Content-Type: application/json" \\
-d '$jq_output'
EOF
cleaned_response=$(curl -s https://api.openai.com/v1/chat/completions \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-H "Content-Type: application/json" \
-d "$jq_output")
# Extract cleaned text from response
cleaned_text=$(echo "$cleaned_response" | jq -r '.choices[0].message.content')
# If GPT cleanup fails, use original transcription
if [ -z "$cleaned_text" ] || [ "$cleaned_text" = "null" ]; then
error_msg="GPT cleanup failed: $cleaned_response"
echo "$error_msg"
echo "$(date '+%Y-%m-%d %H:%M:%S') - $error_msg" >> "$TMP_DIR/error.log"
notify-send "GPT Cleanup Failed" "Using original transcription. Check $TMP_DIR/error.log"
cleaned_text="$transcription"
fi
# Get file size and text length
file_size=$(du -h "$AUDIO_FILE" | cut -f1)
text_length=$(echo "$cleaned_text" | wc -c)
echo ""
echo "Cleaned text:"
echo "$cleaned_text"
echo ""
echo "File size: $file_size, Text length: $text_length chars"
echo "Audio file: $AUDIO_FILE"
# Copy cleaned transcription to clipboard
printf "%s" "$cleaned_text" | $COPY_TOOL
# Notify the user that transcription is complete
if [ -n "$NOTIF_ID" ]; then
notify-send -r "$NOTIF_ID" "Mumble" "✅ Complete! $file_size | $text_length chars"
fi
# Wait a moment for clipboard to update before pasting
# Small delay ensures clipboard is ready and window focus is maintained
sleep 0.3
paste_tool
# Save text to file if KEEP_TEXTS is enabled
if [ "$KEEP_TEXTS" = "1" ]; then
timestamp=$(date '+%Y-%m-%d %H%M%S')
echo "$transcription" > "$TMP_DIR/$timestamp-original.txt"
echo "$cleaned_text" > "$TMP_DIR/$timestamp.txt"
mv "$AUDIO_FILE" "$TMP_DIR/$timestamp.mp3"
fi
# Clean up temporary files (keep the directory if KEEP_TEXTS is enabled)
if [ "$KEEP_TEXTS" = "1" ]; then
rm -f "$PID_FILE" "$TRANSCRIPTION_FILE"
else
# rm -rf "$TMP_DIR"
echo
fi
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment