Skip to content

Instantly share code, notes, and snippets.

@Wybxc
Last active July 28, 2024 01:23
Show Gist options
  • Select an option

  • Save Wybxc/546f6d8f3d0c97428242129cdb7932ff to your computer and use it in GitHub Desktop.

Select an option

Save Wybxc/546f6d8f3d0c97428242129cdb7932ff to your computer and use it in GitHub Desktop.
https://github.com/CosmosShadow/gptpdf 的一个命令行界面
import os
from gptpdf import parse_pdf
from GeneralAgent import Agent
import fire
def parse(
pdf_path: str,
output_dir: str = "./",
prompt: dict | None = None,
api_key: str | None = None,
base_url: str | None = None,
model: str = "gpt-4o-mini",
verbose: bool = False,
gpt_worker: int = 1,
):
if api_key is None:
api_key = os.environ.get("OPENAI_API_KEY")
parse_pdf(
pdf_path=pdf_path,
output_dir=output_dir,
prompt=prompt,
api_key=api_key,
base_url=base_url,
model=model,
verbose=verbose,
gpt_worker=gpt_worker,
)
def translate(
markdown_path: str,
api_key: str | None = None,
prompt: str | None = None,
base_url: str | None = None,
model: str = "gpt-4o-mini",
verbose: bool = False,
):
if api_key is None:
api_key = os.environ.get("OPENAI_API_KEY")
if prompt is None:
prompt = """使用markdown语法,这篇文章翻译为中文,并调整其中可能存在的格式错误。你必须做到:
1. 只翻译文章的正文内容,例如,代码块中的内容不要翻译。
2. 不要解释和输出无关的文字,直接输出翻译后的内容。例如,严禁输出 “以下是我根据对论文部分内容的翻译:”这样的例子,而是应该直接输出markdown。
3. 内容不要包含在```markdown ```中、段落公式使用 $$ $$ 的形式、行内公式使用 $ $ 的形式。
再次强调,不要解释和输出无关的文字,直接输出翻译后的内容。
"""
agent = Agent(
"你是一个PDF文档翻译器,使用markdown和latex语法输出文档的内容。",
api_key=api_key,
base_url=base_url,
disable_python_run=True,
model=model,
)
with open(markdown_path, "r", encoding="utf-8") as f:
markdown = f.read()
local_prompt = prompt + "\n\n" + markdown
content = agent.run([local_prompt], display=verbose)
markdown_zh_path = os.path.join(os.path.dirname(markdown_path), "output.zh.md")
with open(markdown_zh_path, "w", encoding="utf-8") as f:
f.write(content)
if __name__ == "__main__":
fire.Fire({
"parse": parse,
"translate": translate
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment