Created
October 15, 2025 09:33
-
-
Save reachlin/9f3c28063ca58736d9989ab4616158b9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Demo. the usage of my local screen OCR MCP. | |
| # The MCP can take a screenshot, then collect texts from it with coordinates. | |
| # The agent uses the returned tuple: x, y, width, height to move the mouse to the specified text | |
| # This agent can be easily integrated in test automation system or screen read system for blind people. | |
| from agents.mcp import MCPServer, MCPServerSse | |
| from agents import Agent, Runner, function_tool | |
| import pyautogui | |
| @function_tool | |
| def move_mouse_to(x: int, y: int, width: int, height: int): | |
| sw, sh = pyautogui.size() | |
| cx = x*sw/width | |
| cy = y*sh/height | |
| print(f"move_mouse_to: {cx}, {cy}") | |
| pyautogui.moveTo(cx, cy, 2) | |
| #q = "take a screenshot and summarize its contents. the screenshot will be a big array and each item is a set of coordinates followed by text recognized." | |
| q = "then, find 'File' menu on my screen and move my mouse to it." | |
| async with MCPServerSse( | |
| name="My SSE Python Server", | |
| client_session_timeout_seconds=100, | |
| params={ | |
| "url": "http://localhost:8000/sse", | |
| # "timeout": 100.0, | |
| # "sse_read_timeout": 100.0, | |
| }, | |
| ) as mcp_server: | |
| from agents.model_settings import ModelSettings | |
| agent = Agent( | |
| name="Assistant", | |
| instructions="Use the tools to answer the questions.", | |
| mcp_servers=[mcp_server], | |
| model_settings=ModelSettings(tool_choice="required"), | |
| tools = [move_mouse_to] | |
| ) | |
| result = await Runner.run(agent, input=q) | |
| print(result.final_output) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
sample outputs: