Skip to content

Instantly share code, notes, and snippets.

@fuyi
Created July 30, 2025 10:24
Show Gist options
  • Select an option

  • Save fuyi/f8e1801a1fd3d77adbd59c1408e96157 to your computer and use it in GitHub Desktop.

Select an option

Save fuyi/f8e1801a1fd3d77adbd59c1408e96157 to your computer and use it in GitHub Desktop.
import boto3
import struct
def validate_wav_header_from_s3(bucket_name, object_key):
"""
Validate WAV file by reading only the header (first 44 bytes)
"""
session = boto3.Session(profile_name='product-admin') # use the underlying aws credential
s3 = session.client('s3')
try:
# Download only the first 44 bytes (standard WAV header)
response = s3.get_object(
Bucket=bucket_name,
Key=object_key,
Range='bytes=0-43' # First 44 bytes
)
header_data = response['Body'].read()
# Validate WAV signature
if len(header_data) < 44:
return False, "File too small to be WAV"
# Check RIFF signature
riff_header = header_data[:4]
if riff_header != b'RIFF':
return False, "Not a RIFF file"
# Check WAVE signature
wave_header = header_data[8:12]
if wave_header != b'WAVE':
return False, "Not a WAVE file"
# Check fmt chunk
fmt_header = header_data[12:16]
if fmt_header != b'fmt ':
return False, "Invalid format chunk"
# Extract audio parameters
audio_format = struct.unpack('<H', header_data[20:22])[0]
channels = struct.unpack('<H', header_data[22:24])[0]
sample_rate = struct.unpack('<L', header_data[24:28])[0]
return True, {
'format': audio_format,
'channels': channels,
'sample_rate': sample_rate,
'validated': 'header_only'
}
except Exception as e:
return False, f"Validation error: {str(e)}"
# Usage
bucket_name = "astrid01-prod-astrid-speech-sessions-raw"
object_key = "by-tenant-name/guldbrev/dstny-downloads/2e5e3f79-9027-4d35-add1-b7806392dd2c/80073065.wav"
is_valid, details = validate_wav_header_from_s3(bucket_name, object_key)
print(f"Valid WAV: {is_valid}, Details: {details}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment