Skip to content

Instantly share code, notes, and snippets.

@zhensongren
Created August 25, 2025 20:02
Show Gist options
  • Select an option

  • Save zhensongren/aeee210167a271e88ad64787a06eacb9 to your computer and use it in GitHub Desktop.

Select an option

Save zhensongren/aeee210167a271e88ad64787a06eacb9 to your computer and use it in GitHub Desktop.
import_csv_endpoint
@app.post("/campaigns/import_csv")
async def import_csv(
file: UploadFile = File(..., description="CSV with headers; includes parameters + objectives"),
meta_json: Optional[str] = Form(None, description='Tiny JSON, e.g. {"objectives":[{"name":"y","direction":"max"}],"batch_size":3}')
):
# 1) Load CSV
try:
text = (await file.read()).decode("utf-8")
df = pd.read_csv(StringIO(text))
except Exception as e:
raise HTTPException(400, f"Failed to parse CSV: {e}")
if df.shape[1] < 2:
raise HTTPException(400, "CSV must contain at least one parameter column and one objective column")
# 2) Parse tiny JSON
if meta_json:
try:
meta = InitBody.model_validate_json(meta_json)
except Exception as e:
raise HTTPException(400, f"Bad meta_json: {e}")
else:
# default: take the last column as a max objective
last = df.columns[-1]
meta = InitBody(objectives=[ObjectiveSpec(name=last, direction="max")], batch_size=3)
objective_names = [o.name for o in meta.objectives]
for name in objective_names:
if name not in df.columns:
raise HTTPException(400, f'Objective column "{name}" not found in CSV headers')
# 3) Build BayBE campaign
params = _infer_parameters(df, objective_names)
targets = _build_targets(meta.objectives)
campaign = Campaign(parameters=params, targets=targets)
# 4) Seed with any rows that already have objective values
# Expect the CSV to contain observed y for the objective columns
# BayBE wants dict-like x and y. We'll iterate rows and add where objectives are finite.
y_df = df[objective_names]
mask = ~y_df.isnull().any(axis=1)
for _, row in df[mask].iterrows():
x = {c: row[c] for c in df.columns if c not in objective_names}
y = {c: float(row[c]) for c in objective_names}
campaign.add_measurements(x=x, y=y) # BayBE call
# 5) Store and optionally recommend
cid = str(uuid4())
CAMPAIGNS[cid] = campaign
batch = campaign.recommend(batch_size=meta.batch_size or 1) # BayBE call (batch_size required)
LAST_BATCH[cid] = batch
return JSONResponse({
"campaign_id": cid,
"inferred_parameters": [p.name for p in params],
"objectives": objective_names,
"initial_points_added": int(mask.sum()),
"first_recommendation_batch": batch
})
@app.post("/campaigns/import_csv")
async def import_csv(
file: UploadFile = File(..., description="CSV exported from the 'Data' sheet of the template"),
meta_json: Optional[str] = Form(None, description='Optional JSON: {"objectives":[...],"batch_size":3}')
):
# 1) Read CSV
try:
raw = (await file.read()).decode("utf-8")
df = pd.read_csv(StringIO(raw))
except Exception as e:
raise HTTPException(400, f"Failed to read CSV: {e}")
# 2) Validate structure and values
_validate_headers(df)
df = _validate_and_cast(df)
# 3) Split params/targets and seed data rows with complete targets
param_cols = ["solvent", "temp_C", "pressure_bar", "resin_pct"]
target_cols = ["y_yield_pct", "y_defect_rate"]
have_y = df[target_cols].notna().all(axis=1)
seed_df = df.loc[have_y, param_cols + target_cols].reset_index(drop=True)
# 4) Build BayBE components (SearchSpace, Objective, Recommender) and Campaign
try:
meta = InitMeta.model_validate_json(meta_json) if meta_json else InitMeta()
except Exception as e:
raise HTTPException(400, f"Bad meta_json: {e}")
searchspace = _build_searchspace(df)
objective = _build_objective(meta)
recommender = _default_recommender()
campaign = Campaign(searchspace=searchspace, objective=objective, recommender=recommender)
# From BayBE docs/README, add_measurements accepts DataFrame with params+targets # :contentReference[oaicite:3]{index=3}
if not seed_df.empty:
campaign.add_measurements(seed_df)
# 5) Recommend next batch (batch_size is mandatory in recent releases) # :contentReference[oaicite:4]{index=4}
batch_size = meta.batch_size or 1
rec_df = campaign.recommend(batch_size=batch_size)
rec_records = rec_df.to_dict(orient="records")
# 6) Stash and return
cid = str(uuid4())
CAMPAIGNS[cid] = campaign
LAST_REC[cid] = rec_records
return JSONResponse({
"campaign_id": cid,
"initial_points_added": int(seed_df.shape[0]),
"recommendations": rec_records,
"parameters": param_cols,
"objectives": target_cols
})
@app.post("/campaigns/import_csv")
async def import_csv(file: UploadFile = File(...), batch_size: int = Form(3)):
raw = await file.read()
try:
df = pd.read_csv(StringIO(raw.decode("utf-8")))
except Exception as e:
raise HTTPException(400, f"Failed to read CSV: {e}")
_validate_headers(df)
df = _validate_and_cast(df)
cid = str(uuid4())
state = State(_default_campaign(), df.copy(), _csv_hash(raw))
STATES[cid] = state
# Append first recommendations and return CSV directly
_append_recommendations(state, batch_size=batch_size)
return _csv_response(state.csv_df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment