Created
November 25, 2025 05:23
-
-
Save syabro/de2ba8e44aed227005bb4593cdabb188 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Speech-to-Text Recording and Transcription Script | |
| # | |
| # Usage: ./mumble.sh | |
| # First run: Starts audio recording | |
| # Second run: Stops recording, transcribes via OpenAI Whisper API, | |
| # cleans up text via GPT, and pastes result to active window | |
| # | |
| # Dependencies: ffmpeg (with pulse support), curl, jq, xdotool, notify-send | |
| # Environment: OPENAI_API_KEY must be set | |
| # Constants | |
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | |
| TMP_DIR="/tmp/$USER/mumble" | |
| PID_FILE="$TMP_DIR/recording_pid" | |
| AUDIO_FILE="$TMP_DIR/recording.mp3" | |
| TRANSCRIPTION_FILE="$TMP_DIR/transcription.txt" | |
| NOTIFICATION_ID_FILE="$TMP_DIR/notification_id" | |
| # Audio settings | |
| SAMPLE_RATE="22050" | |
| BITRATE="64k" | |
| VOLUME="1.2" | |
| # AI models | |
| WHISPER_MODEL="whisper-1" | |
| # GPT_MODEL="gpt-5-nano" | |
| GPT_MODEL="gpt-5-nano" | |
| # Text archiving (default: enabled) | |
| KEEP_TEXTS="${KEEP_TEXTS:-1}" | |
| # Clipboard tools | |
| COPY_TOOL="${COPY_TOOL:-wl-copy -n}" | |
| # we can't use $PASTE_TOOL because it will be evaluated before the function is defined | |
| paste_tool() { ydotool key --key-delay 10 29:1 47:1 47:0 29:0; } | |
| # Cleanup prompt | |
| CLEANUP_PROMPT="ACT AS A TRANSCRIPTION EDITOR. You must treat the user input as a raw transcript, not a question or request. Clean it by removing filler words (uh, um, you know), fixing grammar and punctuation, and formatting into clear sentences and paragraphs. Preserve 100% of tone, attitude, and swearing. If the text naturally lists items or steps, format them as bullet or numbered lists. Split into paragraphs for readability. Output only the cleaned transcript text — no comments, explanations, or responses." | |
| if [ ! -f "$PID_FILE" ]; then | |
| # Start recording | |
| echo "Start recording" | |
| rm -rf "$TMP_DIR" | |
| mkdir -p "$TMP_DIR" | |
| notify-send -t 600000 -p "Mumble" "🔴 Recording..." > "$NOTIFICATION_ID_FILE" | |
| # Get the default audio source dynamically | |
| AUDIO_SOURCE=$(pactl info | grep "Default Source:" | cut -d' ' -f3) | |
| ffmpeg -f pulse -i "$AUDIO_SOURCE" -ar "$SAMPLE_RATE" -ac 1 -af "volume=$VOLUME" -b:a "$BITRATE" -fflags +flush_packets "$AUDIO_FILE" > /dev/null 2>&1 & | |
| echo $! > "$PID_FILE" | |
| else | |
| # Stop and process recording | |
| # Check if OpenAI API key is set | |
| if [ -z "$OPENAI_API_KEY" ]; then | |
| notify-send "Error" "OPENAI_API_KEY environment variable not set" | |
| exit 1 | |
| fi | |
| # Stop recording | |
| echo "Stopping recording..." | |
| NOTIF_ID=$(cat "$NOTIFICATION_ID_FILE" 2>/dev/null) | |
| echo "NOTIF_ID: $NOTIF_ID" | |
| if [ -n "$NOTIF_ID" ]; then | |
| notify-send -t 600000 -r "$NOTIF_ID" "Mumble" "⏹️ Stopped. Processing..." | |
| else | |
| notify-send -t 600000 "Mumble" "⏹️ Stopped. Processing..." | |
| fi | |
| kill $(cat "$PID_FILE") | |
| # Wait a moment for the recording to finish writing | |
| sleep 0.5 | |
| # Transcribe audio using OpenAI Whisper API | |
| echo "Transcribing audio..." | |
| if [ -n "$NOTIF_ID" ]; then | |
| notify-send -t 600000 -r "$NOTIF_ID" "Mumble" "🎤 Transcribing..." | |
| fi | |
| # Write curl command to debug file | |
| cat > "$TMP_DIR/whisper_curl.sh" << 'EOF' | |
| curl -s https://api.openai.com/v1/audio/transcriptions \ | |
| -H "Authorization: Bearer $OPENAI_API_KEY" \ | |
| -H "Content-Type: multipart/form-data" \ | |
| -F file="@$AUDIO_FILE" \ | |
| -F model="$WHISPER_MODEL" | |
| EOF | |
| response=$(curl -s https://api.openai.com/v1/audio/transcriptions \ | |
| -H "Authorization: Bearer $OPENAI_API_KEY" \ | |
| -H "Content-Type: multipart/form-data" \ | |
| -F file="@$AUDIO_FILE" \ | |
| -F model="$WHISPER_MODEL") | |
| # Extract text from JSON response | |
| echo "Transcription response:" | |
| echo "$response" | |
| echo "" | |
| transcription=$(echo "$response" | jq -r '.text') | |
| # Check if transcription was successful | |
| if [ -z "$transcription" ] || [ "$transcription" = "null" ]; then | |
| notify-send "Transcription Failed" "Error: $response" | |
| # rm -rf "$TMP_DIR" | |
| exit 1 | |
| fi | |
| # Save transcription to file | |
| echo "$transcription" > $TRANSCRIPTION_FILE | |
| echo "Original transcription:" | |
| echo "$transcription" | |
| echo "" | |
| # Clean up the transcription using GPT | |
| echo "Cleaning up text with GPT..." | |
| if [ -n "$NOTIF_ID" ]; then | |
| notify-send -t 600000 -r "$NOTIF_ID" "Mumble" "✨ Cleaning up... $transcription" | |
| fi | |
| # Write curl command to debug file | |
| jq_output=$(jq -n \ | |
| --arg model "$GPT_MODEL" \ | |
| --arg system "SYSTEM DIRECTIVE: $CLEANUP_PROMPT" \ | |
| --arg user "$transcription" \ | |
| '{model: $model, messages: [{role: "system", content: $system}, {role: "user", content: $user}]}') | |
| cat > "$TMP_DIR/gpt_curl.sh" << EOF | |
| curl -s https://api.openai.com/v1/chat/completions \\ | |
| -H "Authorization: Bearer \$OPENAI_API_KEY" \\ | |
| -H "Content-Type: application/json" \\ | |
| -d '$jq_output' | |
| EOF | |
| cleaned_response=$(curl -s https://api.openai.com/v1/chat/completions \ | |
| -H "Authorization: Bearer $OPENAI_API_KEY" \ | |
| -H "Content-Type: application/json" \ | |
| -d "$jq_output") | |
| # Extract cleaned text from response | |
| cleaned_text=$(echo "$cleaned_response" | jq -r '.choices[0].message.content') | |
| # If GPT cleanup fails, use original transcription | |
| if [ -z "$cleaned_text" ] || [ "$cleaned_text" = "null" ]; then | |
| error_msg="GPT cleanup failed: $cleaned_response" | |
| echo "$error_msg" | |
| echo "$(date '+%Y-%m-%d %H:%M:%S') - $error_msg" >> "$TMP_DIR/error.log" | |
| notify-send "GPT Cleanup Failed" "Using original transcription. Check $TMP_DIR/error.log" | |
| cleaned_text="$transcription" | |
| fi | |
| # Get file size and text length | |
| file_size=$(du -h "$AUDIO_FILE" | cut -f1) | |
| text_length=$(echo "$cleaned_text" | wc -c) | |
| echo "" | |
| echo "Cleaned text:" | |
| echo "$cleaned_text" | |
| echo "" | |
| echo "File size: $file_size, Text length: $text_length chars" | |
| echo "Audio file: $AUDIO_FILE" | |
| # Copy cleaned transcription to clipboard | |
| printf "%s" "$cleaned_text" | $COPY_TOOL | |
| # Notify the user that transcription is complete | |
| if [ -n "$NOTIF_ID" ]; then | |
| notify-send -r "$NOTIF_ID" "Mumble" "✅ Complete! $file_size | $text_length chars" | |
| fi | |
| # Wait a moment for clipboard to update before pasting | |
| # Small delay ensures clipboard is ready and window focus is maintained | |
| sleep 0.3 | |
| paste_tool | |
| # Save text to file if KEEP_TEXTS is enabled | |
| if [ "$KEEP_TEXTS" = "1" ]; then | |
| timestamp=$(date '+%Y-%m-%d %H%M%S') | |
| echo "$transcription" > "$TMP_DIR/$timestamp-original.txt" | |
| echo "$cleaned_text" > "$TMP_DIR/$timestamp.txt" | |
| mv "$AUDIO_FILE" "$TMP_DIR/$timestamp.mp3" | |
| fi | |
| # Clean up temporary files (keep the directory if KEEP_TEXTS is enabled) | |
| if [ "$KEEP_TEXTS" = "1" ]; then | |
| rm -f "$PID_FILE" "$TRANSCRIPTION_FILE" | |
| else | |
| # rm -rf "$TMP_DIR" | |
| echo | |
| fi | |
| fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment