Skip to content

Instantly share code, notes, and snippets.

@matsubo
Last active December 4, 2025 07:02
Show Gist options
  • Select an option

  • Save matsubo/c1366c36b1045484a00d0756ab082f3a to your computer and use it in GitHub Desktop.

Select an option

Save matsubo/c1366c36b1045484a00d0756ab082f3a to your computer and use it in GitHub Desktop.
#!/bin/bash
# Please ensure GOOGLE_API_KEY is set.
# The key can be found: https://aistudio.google.com/apps
# Create test.xlsx file
echo "" | base64 -d > test.xlsx
# Download PoC script
curl https://gist.githubusercontent.com/matsubo/c1366c36b1045484a00d0756ab082f3a/raw/poc_mime_type_issue.py > poc_mime_type_issue.py
# Execute
python poc_mime_type_issue.py
"""
Proof of Concept: MIME Type Validation issue in uploadToFileSearchStore API
This script demonstrates a server-side validation issue where the API rejects a
valid MIME type for Excel files (.xlsx).
Error: 400 INVALID_ARGUMENT
Message: "When provided, MIME type must be in a valid type/subtype format"
MIME Type Used: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
Issue: The MIME type IS in valid type/subtype format, but the API incorrectly rejects it.
"""
from google import genai
from google.genai import types
import time
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Get API key
api_key = os.getenv('GOOGLE_API_KEY')
if not api_key:
raise ValueError("GOOGLE_API_KEY not found in environment variables")
client = genai.Client(api_key=api_key)
print("=" * 80)
print("PoC: MIME Type issue in uploadToFileSearchStore API")
print("=" * 80)
# Preparation: Create a file search store
print("\nPreparation: Creating file search store...")
file_search_store = client.file_search_stores.create(
config={'display_name': 'poc-mime-type-xlsx'}
)
print(f" Created: {file_search_store.name}")
# Step 1: Attempt to upload an Excel file with the correct MIME type
print("\n1. Uploading Excel file (.xlsx) with correct MIME type...")
test_file = "test.xlsx"
mime_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
print(f" File: {test_file}")
print(f" MIME Type: {mime_type}")
print(f" Note: This MIME type is the official IANA-registered type for .xlsx files")
try:
with open(test_file, 'rb') as f:
operation = client.file_search_stores.upload_to_file_search_store(
file=f,
file_search_store_name=file_search_store.name,
config={
'display_name': 'test.xlsx',
'mime_type': mime_type # VALID MIME TYPE
}
)
# Wait for completion
while not operation.done:
time.sleep(2)
operation = client.operations.get(operation)
print("\n ✓ SUCCESS: File uploaded successfully")
except Exception as e:
print("\n ✗ ERROR: Upload failed")
print(f"\n{e}")
print("\n" + "=" * 80)
print("ISSUE CONFIRMED")
print("=" * 80)
print("The API incorrectly rejects the MIME type:")
print(f" {mime_type}")
print("\nThis is a valid IANA-registered MIME type in 'type/subtype' format.")
print("Expected: API should accept this MIME type")
print("Actual: API returns 400 INVALID_ARGUMENT")
print("=" * 80)
# Step 2: Try without mime_type to see if it works
print("\n\n2. Testing upload WITHOUT mime_type parameter...")
try:
with open(test_file, 'rb') as f:
operation = client.file_search_stores.upload_to_file_search_store(
file=f,
file_search_store_name=file_search_store.name,
config={
'display_name': 'test-without-mimetype.xlsx'
# NO mime_type specified
}
)
# Wait for completion
while not operation.done:
time.sleep(2)
operation = client.operations.get(operation)
print(" ✓ SUCCESS: File uploaded without mime_type parameter")
print("\n This confirms the issue: The API works when mime_type is omitted,")
print(" but fails when the correct mime_type is provided.")
except Exception as e:
print(f"\n ✗ Also failed without mime_type: {e}")
# Step 3: Try alternative approach - upload via File API then import
print("\n\n3. Testing alternative approach: File API upload → import to store...")
print(" This tests if using client.files.upload() + import_file() works better")
try:
# Upload file using File API
with open(test_file, 'rb') as f:
file_ref = client.files.upload(
file=f,
config=types.UploadFileConfig(
display_name='test-via-file-api.xlsx',
mime_type=mime_type
)
)
print(f" ✓ Uploaded via File API: {file_ref.name}")
# Import the uploaded file into the file search store
import_op = client.file_search_stores.import_file(
file_search_store_name=file_search_store.name,
file_name=file_ref.name,
)
print(f" File import started: {import_op.name}")
print(" Waiting for import to complete", end="")
while not (import_op := client.operations.get(import_op)).done:
time.sleep(1)
print(".", end="", flush=True)
print()
print(" ✓ SUCCESS: File imported successfully via File API → import_file()")
print("\n This approach works! Use File API upload + import as a workaround.")
except Exception as e:
print(f"\n ✗ Failed with File API approach: {e}")
print("\n" + "=" * 80)
print("PoC Complete")
print("=" * 80)
print("\nSUMMARY:")
print(" Step 1 (direct upload with mime_type): Expected to fail")
print(" Step 2 (direct upload without mime_type): May work")
print(" Step 3 (File API + import): Recommended workaround")
print("=" * 80)
@matsubo
Copy link
Author

matsubo commented Dec 4, 2025

Output

❯ GOOGLE_API_KEY=xxxxxxxxxxxxx uv run python poc_mime_type_issue.py
================================================================================
PoC: MIME Type issue in uploadToFileSearchStore API
================================================================================

Preparation: Creating file search store...
   Created: fileSearchStores/pocmimetypexlsx-34khecxjwyia

1. Uploading Excel file (.xlsx) with correct MIME type...
   File: test.xlsx
   MIME Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
   Note: This MIME type is the official IANA-registered type for .xlsx files

   ✗ ERROR: Upload failed

400 INVALID_ARGUMENT. {'error': {'code': 400, 'message': "* UploadToFileSearchStoreRequest.mime_type: When provided, MIME type must be in a valid type/subtype format (e.g., 'text/plain', 'application/pdf').\n", 'status': 'INVALID_ARGUMENT'}}

================================================================================
ISSUE CONFIRMED
================================================================================
The API incorrectly rejects the MIME type:
  application/vnd.openxmlformats-officedocument.spreadsheetml.sheet

This is a valid IANA-registered MIME type in 'type/subtype' format.
Expected: API should accept this MIME type
Actual: API returns 400 INVALID_ARGUMENT
================================================================================


2. Testing upload WITHOUT mime_type parameter...

   ✗ Also failed without mime_type: Unknown mime type: Could not determine the mimetype for your file
 please set the `mime_type` argument


3. Testing alternative approach: File API upload → import to store...
   This tests if using client.files.upload() + import_file() works better
   ✓ Uploaded via File API: files/u129b2vloblf
   File import started: fileSearchStores/pocmimetypexlsx-34khecxjwyia/operations/u129b2vloblf-fxng7npnzfdz
   Waiting for import to complete.
   ✓ SUCCESS: File imported successfully via File API → import_file()

   This approach works! Use File API upload + import as a workaround.

================================================================================
PoC Complete
================================================================================

SUMMARY:
  Step 1 (direct upload with mime_type): Expected to fail
  Step 2 (direct upload without mime_type): May work
  Step 3 (File API + import): Recommended workaround
================================================================================

pakcage version

❯ uv run pip list
Package                 Version    Editable project location
----------------------- ---------- --------------------------------------------------------------------
annotated-types         0.7.0
anyio                   4.12.0
cachetools              6.2.2
certifi                 2025.11.12
charset-normalizer      3.4.4
cobble                  0.1.4
google-auth             2.43.0
google-genai            1.53.0
h11                     0.16.0
httpcore                1.0.9
httpx                   0.28.1
idna                    3.11
mammoth                 1.11.0
pip                     24.3.1
pyasn1                  0.6.1
pyasn1_modules          0.4.2
pydantic                2.12.5
pydantic_core           2.41.5
python-dotenv           1.2.1
requests                2.32.5
rsa                     4.9.1
tenacity                9.1.2
typing_extensions       4.15.0
typing-inspection       0.4.2
urllib3                 2.5.0
websockets              15.0.1
xlsx2csv                0.8.4

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment