Skip to content

Instantly share code, notes, and snippets.

@yoursunny
Last active January 24, 2026 23:51
Show Gist options
  • Select an option

  • Save yoursunny/e2cc67d5931272de17799bf5407dd85d to your computer and use it in GitHub Desktop.

Select an option

Save yoursunny/e2cc67d5931272de17799bf5407dd85d to your computer and use it in GitHub Desktop.
__pycache__/
*.py[codz]
*$py.class
/.venv
/*.csv
/*.sh
/missing
/output

水源社区个人图片下载

# 个人资料 - 偏好设置 - 请求归档
# 下载 ZIP,解压 `user_archive.csv`

python3 -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt

python main.py >1.sh

# 进入浏览器开发者工具,复制 Cookie _t 值
# 打开 1.sh,将 Cookie _t 值写入 COOKIE_T 变量

bash 1.sh

# 成功下载的文件都在 output 里
# 下载失败的文件名在 missing 里,表示帖子已被删除
import csv
import re
from pathlib import Path
from typing import Literal, TypedDict
from bs4 import BeautifulSoup
SERVER = "https://shuiyuan.sjtu.edu.cn"
class Record(TypedDict):
topic_title: str
categories: str
is_pm: Literal["否", "是"]
post_raw: str
post_cooked: str
like_count: int
reply_count: int
url: str
created_at: str
re_upload = re.compile(r'upload://[^\s\"\)]+')
def process_row(row: Record) -> None:
if row["is_pm"] == "是":
return
cooked_bs = []
for m in re_upload.finditer(row["post_raw"]):
process_upload(m[0][9:], row["post_cooked"], cooked_bs)
def process_upload(filename: str, cooked: str, cooked_bs: list[BeautifulSoup]) -> None:
basename = Path(filename).stem
short_url = f"/uploads/short-url/{basename}.jpeg?dl=1"
if short_url in cooked:
url = short_url
else:
if len(cooked_bs) == 0:
cooked_bs.append(BeautifulSoup(cooked, features="html.parser"))
soup = cooked_bs[0]
found = soup.find("img", {"data-base62-sha1": basename})
if found is None:
return
url = found["src"]
if url.startswith(SERVER):
url = url[len(SERVER):]
print(f"download {filename} {url}")
def main(filename: str) -> None:
print("#!/bin/bash")
print("set -euo pipefail")
print("COOKIE_T=")
print("mkdir -p output/ missing/")
print("download() {")
print(" if [[ -f output/$1 ]] || [[ -f missing/$1 ]]; then")
print(" return")
print(" fi")
print(
f" if ! http -F -d -o output/$1 GET {SERVER}$2 Cookie:_t=$COOKIE_T; then")
print(" mv output/$1 missing/$1")
print(" fi")
print("}")
print("")
with open(filename, newline="") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
process_row(row)
if __name__ == "__main__":
main("user_archive.csv")
beautifulsoup4>=4.14.3,<5
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment