Last active
July 28, 2024 01:23
-
-
Save Wybxc/546f6d8f3d0c97428242129cdb7932ff to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| from gptpdf import parse_pdf | |
| from GeneralAgent import Agent | |
| import fire | |
| def parse( | |
| pdf_path: str, | |
| output_dir: str = "./", | |
| prompt: dict | None = None, | |
| api_key: str | None = None, | |
| base_url: str | None = None, | |
| model: str = "gpt-4o-mini", | |
| verbose: bool = False, | |
| gpt_worker: int = 1, | |
| ): | |
| if api_key is None: | |
| api_key = os.environ.get("OPENAI_API_KEY") | |
| parse_pdf( | |
| pdf_path=pdf_path, | |
| output_dir=output_dir, | |
| prompt=prompt, | |
| api_key=api_key, | |
| base_url=base_url, | |
| model=model, | |
| verbose=verbose, | |
| gpt_worker=gpt_worker, | |
| ) | |
| def translate( | |
| markdown_path: str, | |
| api_key: str | None = None, | |
| prompt: str | None = None, | |
| base_url: str | None = None, | |
| model: str = "gpt-4o-mini", | |
| verbose: bool = False, | |
| ): | |
| if api_key is None: | |
| api_key = os.environ.get("OPENAI_API_KEY") | |
| if prompt is None: | |
| prompt = """使用markdown语法,这篇文章翻译为中文,并调整其中可能存在的格式错误。你必须做到: | |
| 1. 只翻译文章的正文内容,例如,代码块中的内容不要翻译。 | |
| 2. 不要解释和输出无关的文字,直接输出翻译后的内容。例如,严禁输出 “以下是我根据对论文部分内容的翻译:”这样的例子,而是应该直接输出markdown。 | |
| 3. 内容不要包含在```markdown ```中、段落公式使用 $$ $$ 的形式、行内公式使用 $ $ 的形式。 | |
| 再次强调,不要解释和输出无关的文字,直接输出翻译后的内容。 | |
| """ | |
| agent = Agent( | |
| "你是一个PDF文档翻译器,使用markdown和latex语法输出文档的内容。", | |
| api_key=api_key, | |
| base_url=base_url, | |
| disable_python_run=True, | |
| model=model, | |
| ) | |
| with open(markdown_path, "r", encoding="utf-8") as f: | |
| markdown = f.read() | |
| local_prompt = prompt + "\n\n" + markdown | |
| content = agent.run([local_prompt], display=verbose) | |
| markdown_zh_path = os.path.join(os.path.dirname(markdown_path), "output.zh.md") | |
| with open(markdown_zh_path, "w", encoding="utf-8") as f: | |
| f.write(content) | |
| if __name__ == "__main__": | |
| fire.Fire({ | |
| "parse": parse, | |
| "translate": translate | |
| }) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment