whitead · November 26, 2025 18:49
diff --git a/checker.py b/checker.py
 # /// script
 # dependencies = [
 #   "anthropic",
 #   "edison-client",
 # ]
 # ///

 import anthropic
 import asyncio
 import base64
 import csv
 import os
 import sys
 from dataclasses import dataclass
 from datetime import datetime
 from edison_client import EdisonClient, JobNames

 # Terminal colors and styles
 class Style:
    BOLD = "\033[1m"
    DIM = "\033[2m"
    RESET = "\033[0m"
    GREEN = "\033[92m"
    YELLOW = "\033[93m"
    RED = "\033[91m"
    BLUE = "\033[94m"
    CYAN = "\033[96m"
    MAGENTA = "\033[95m"
    ORANGE = "\033[38;5;208m"

 # Icons for different score levels
 SCORE_ICONS = {
    0: (Style.GREEN + "✓✓" + Style.RESET, "Strong confirmatory"),
    1: (Style.GREEN + "✓ " + Style.RESET, "Confirmatory"),
    2: (Style.DIM + "○ " + Style.RESET, "No evidence"),
    3: (Style.ORANGE + "✗ " + Style.RESET, "Some contradicting"),
    4: (Style.RED + "✗✗" + Style.RESET, "Strong contradicting"),
 }

 SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
 CARROT = Style.CYAN + "▶" + Style.RESET

 @dataclass
 class ClaimResult:
    claim: str
    score: int
    task_id: str
    snippet: str

 def clear_line():
    print("\033[2K\033[G", end="", flush=True)

 def move_up(n=1):
    print(f"\033[{n}A", end="", flush=True)

 def print_header():
    print()
    print(Style.BOLD + Style.CYAN + "╔════════════════════════════════════════════════════════════╗" + Style.RESET)
    print(Style.BOLD + Style.CYAN + "║" + Style.RESET + "          🔬 Scientific Claim Checker 🔬                    " + Style.BOLD + Style.CYAN + "║" + Style.RESET)
    print(Style.BOLD + Style.CYAN + "╚════════════════════════════════════════════════════════════╝" + Style.RESET)
    print()

 def get_api_key(name: str, env_var: str) -> str:
    """Prompt for API key if not in environment."""
    key = os.environ.get(env_var)
    if key:
        print(f"  {Style.GREEN}✓{Style.RESET} {name} found in environment")
        return key
    
    print(f"  {Style.YELLOW}?{Style.RESET} {name} not found in environment")
    key = input(f"    Enter {name}: ").strip()
    if not key:
        print(f"  {Style.RED}✗{Style.RESET} {name} is required")
        sys.exit(1)
    os.environ[env_var] = key
    return key

 def read_pdf(path: str) -> str:
    """Read and encode PDF to base64."""
    if not os.path.exists(path):
        print(f"  {Style.RED}✗{Style.RESET} File not found: {path}")
        sys.exit(1)
    
    with open(path, "rb") as f:
        return base64.standard_b64encode(f.read()).decode("utf-8")

 def extract_claims(pdf_data: str, api_key: str) -> list[str]:
    """Extract scientific claims from PDF using Claude."""
    print(f"\n{Style.BOLD}📄 Extracting claims from PDF...{Style.RESET}")
    
    client = anthropic.Anthropic(api_key=api_key)
    message = client.messages.create(
        model="claude-sonnet-4-5-20250929",
        max_tokens=4096,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "document",
                        "source": {
                            "type": "base64",
                            "media_type": "application/pdf",
                            "data": pdf_data,
                        },
                    },
                    {
                        "type": "text",
                        "text": ("What are the main scientific claims of this document. "
                                 "List one per line in plain text. Do not number them or use bullet points."),
                    },
                ],
            }
        ],
    )
    
    claims = [c.strip() for c in message.content[0].text.strip().split("\n") if c.strip()]
    print(f"  {Style.GREEN}✓{Style.RESET} Found {len(claims)} claims\n")
    return claims

 class ClaimDisplay:
    """Manages the terminal display for claim processing."""
    
    def __init__(self, claims: list[str]):
        self.claims = claims
        self.statuses = ["pending"] * len(claims)  # pending, processing, done
        self.results = [None] * len(claims)
        self.spinner_idx = 0
        self.lock = asyncio.Lock()
    
    def truncate(self, text: str, max_len: int = 55) -> str:
        if len(text) <= max_len:
            return text
        return text[:max_len-3] + "..."
    
    def render(self):
        """Render all claims with their current status."""
        lines = []
        for i, claim in enumerate(self.claims):
            status = self.statuses[i]
            truncated = self.truncate(claim)
            
            if status == "pending":
                icon = Style.DIM + "○ " + Style.RESET
                lines.append(f"  {icon} {Style.DIM}{truncated}{Style.RESET}")
            elif status == "processing":
                spinner = Style.MAGENTA + SPINNER_FRAMES[self.spinner_idx] + Style.RESET
                lines.append(f"  {spinner} {CARROT} {truncated}")
            else:  # done
                result = self.results[i]
                if result:
                    icon, _ = SCORE_ICONS.get(result.score, (Style.DIM + "? " + Style.RESET, "Unknown"))
                    lines.append(f"  {icon} {truncated}")
                else:
                    lines.append(f"  {Style.RED}✗ {Style.RESET}{truncated} {Style.DIM}(error){Style.RESET}")
        
        return "\n".join(lines)
    
    def print_initial(self):
        """Print the initial state."""
        print(Style.BOLD + "🔍 Checking claims against literature:" + Style.RESET)
        print(self.render())
    
    async def update_display(self):
        """Move cursor up and redraw."""
        async with self.lock:
            move_up(len(self.claims))
            print(self.render())
    
    def set_processing(self, idx: int):
        self.statuses[idx] = "processing"
    
    def set_done(self, idx: int, result: ClaimResult | None):
        self.statuses[idx] = "done"
        self.results[idx] = result
    
    def advance_spinner(self):
        self.spinner_idx = (self.spinner_idx + 1) % len(SPINNER_FRAMES)

 async def check_claim(client: EdisonClient, claim: str, idx: int, display: ClaimDisplay, sem: asyncio.Semaphore) -> ClaimResult | None:
    """Check a single claim using Edison API."""
    rubric = [
        "Strong confirmatory evidence",
        "Confirmatory evidence",
        "No evidence one way or another",
        "Some contradicting evidence",
        "Strong contradicting evidence"
    ]
    rubric_text = "\n".join([f"{i}: {s}" for i, s in enumerate(rubric)])
    
    task_data = {
        "name": JobNames.LITERATURE,
        "query": f'Has anyone provided concrete evidence disproving this claim:\n"{claim}"\n\nImportant: Start your answer response with an integer from 0 to 4 according to this rubric:\n{rubric_text}',
    }
    
    async with sem:
        display.set_processing(idx)
        await display.update_display()
        
        try:
            task_response = await client.arun_tasks_until_done(task_data)
            try:
                score = int(task_response[0].answer[0])
            except ValueError:
                score = 2  # Default to "No evidence" if parsing fails
            task_id = task_response[0].task_id
            snippet = str(task_response[0].answer)[:100]
            
            result = ClaimResult(claim=claim, score=score, task_id=task_id, snippet=snippet)
            display.set_done(idx, result)
            await display.update_display()
            return result
        except Exception as e:
            display.set_done(idx, None)
            await display.update_display()
            return None

 async def spinner_task(display: ClaimDisplay, stop_event: asyncio.Event):
    """Animate the spinner while processing."""
    while not stop_event.is_set():
        await asyncio.sleep(0.1)
        display.advance_spinner()
        if "processing" in display.statuses:
            await display.update_display()

 async def process_claims(claims: list[str], edison_key: str) -> list[ClaimResult]:
    """Process all claims with concurrent Edison API calls."""
    client = EdisonClient(api_key=edison_key)
    display = ClaimDisplay(claims)
    sem = asyncio.Semaphore(5)  # Max 5 concurrent calls
    
    display.print_initial()
    
    stop_event = asyncio.Event()
    spinner = asyncio.create_task(spinner_task(display, stop_event))
    
    tasks = [check_claim(client, claim, i, display, sem) for i, claim in enumerate(claims)]
    results = await asyncio.gather(*tasks)

    # close client
    client.close()
    
    stop_event.set()
    await spinner
    
    return [r for r in results if r is not None]

 def write_results(results: list[ClaimResult], pdf_path: str):
    """Write results to CSV file."""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    base_name = os.path.splitext(os.path.basename(pdf_path))[0]
    output_file = f"{base_name}_claims_{timestamp}.csv"
    
    with open(output_file, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["Claim", "Score", "Score Description", "Trajectory URL"])
        
        for r in results:
            _, desc = SCORE_ICONS.get(r.score, ("?", "Unknown"))
            url = f"https://platform.edisonscientific.com/trajectories/{r.task_id}"
            writer.writerow([r.claim, r.score, desc, url])
    
    return output_file

 def print_summary(results: list[ClaimResult]):
    """Print a summary of the results."""
    print(f"\n{Style.BOLD}📊 Summary:{Style.RESET}")
    
    score_counts = {}
    for r in results:
        score_counts[r.score] = score_counts.get(r.score, 0) + 1
    
    for score in sorted(score_counts.keys()):
        icon, desc = SCORE_ICONS.get(score, ("?", "Unknown"))
        count = score_counts[score]
        bar = Style.CYAN + "█" * count + Style.RESET
        print(f"  {icon} {desc}: {bar} {count}")

 def print_legend():
    """Print the score legend."""
    print(f"\n{Style.BOLD}📋 Score Legend:{Style.RESET}")
    for score, (icon, desc) in sorted(SCORE_ICONS.items()):
        print(f"  {icon} {score}: {desc}")

 async def main():
    print_header()
    
    # Check arguments
    if len(sys.argv) != 2:
        print(f"  {Style.RED}✗{Style.RESET} Usage: python claim_checker.py <path_to_pdf>")
        sys.exit(1)
    
    pdf_path = sys.argv[1]
    
    # Get API keys
    print(Style.BOLD + "🔑 Checking API keys:" + Style.RESET)
    anthropic_key = get_api_key("ANTHROPIC_API_KEY", "ANTHROPIC_API_KEY")
    edison_key = get_api_key("EDISON_API_KEY", "EDISON_API_KEY")
    
    # Read PDF
    print(f"\n{Style.BOLD}📂 Reading PDF:{Style.RESET}")
    print(f"  {Style.DIM}{pdf_path}{Style.RESET}")
    pdf_data = read_pdf(pdf_path)
    print(f"  {Style.GREEN}✓{Style.RESET} PDF loaded successfully")
    
    # Extract claims
    claims = extract_claims(pdf_data, anthropic_key)
    
    # Process claims
    print()
    results = await process_claims(claims, edison_key)
    
    # Print summary
    print_summary(results)
    print_legend()
    
    # Write CSV
    output_file = write_results(results, pdf_path)
    print(f"\n{Style.BOLD}💾 Results saved:{Style.RESET}")
    print(f"  {Style.GREEN}✓{Style.RESET} {output_file}")
    print()

 if __name__ == "__main__":
    asyncio.run(main())
	# /// script
	# dependencies = [
	# "anthropic",
	# "edison-client",
	# ]
	# ///

	import anthropic
	import asyncio
	import base64
	import csv
	import os
	import sys
	from dataclasses import dataclass
	from datetime import datetime
	from edison_client import EdisonClient, JobNames

	# Terminal colors and styles
	class Style:
	BOLD = "\033[1m"
	DIM = "\033[2m"
	RESET = "\033[0m"
	GREEN = "\033[92m"
	YELLOW = "\033[93m"
	RED = "\033[91m"
	BLUE = "\033[94m"
	CYAN = "\033[96m"
	MAGENTA = "\033[95m"
	ORANGE = "\033[38;5;208m"

	# Icons for different score levels
	SCORE_ICONS = {
	0: (Style.GREEN + "✓✓" + Style.RESET, "Strong confirmatory"),
	1: (Style.GREEN + "✓ " + Style.RESET, "Confirmatory"),
	2: (Style.DIM + "○ " + Style.RESET, "No evidence"),
	3: (Style.ORANGE + "✗ " + Style.RESET, "Some contradicting"),
	4: (Style.RED + "✗✗" + Style.RESET, "Strong contradicting"),
	}

	SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
	CARROT = Style.CYAN + "▶" + Style.RESET

	@dataclass
	class ClaimResult:
	claim: str
	score: int
	task_id: str
	snippet: str

	def clear_line():
	print("\033[2K\033[G", end="", flush=True)

	def move_up(n=1):
	print(f"\033[{n}A", end="", flush=True)

	def print_header():
	print()
	print(Style.BOLD + Style.CYAN + "╔════════════════════════════════════════════════════════════╗" + Style.RESET)
	print(Style.BOLD + Style.CYAN + "║" + Style.RESET + " 🔬 Scientific Claim Checker 🔬 " + Style.BOLD + Style.CYAN + "║" + Style.RESET)
	print(Style.BOLD + Style.CYAN + "╚════════════════════════════════════════════════════════════╝" + Style.RESET)
	print()

	def get_api_key(name: str, env_var: str) -> str:
	"""Prompt for API key if not in environment."""
	key = os.environ.get(env_var)
	if key:
	print(f" {Style.GREEN}✓{Style.RESET} {name} found in environment")
	return key

	print(f" {Style.YELLOW}?{Style.RESET} {name} not found in environment")
	key = input(f" Enter {name}: ").strip()
	if not key:
	print(f" {Style.RED}✗{Style.RESET} {name} is required")
	sys.exit(1)
	os.environ[env_var] = key
	return key

	def read_pdf(path: str) -> str:
	"""Read and encode PDF to base64."""
	if not os.path.exists(path):
	print(f" {Style.RED}✗{Style.RESET} File not found: {path}")
	sys.exit(1)

	with open(path, "rb") as f:
	return base64.standard_b64encode(f.read()).decode("utf-8")

	def extract_claims(pdf_data: str, api_key: str) -> list[str]:
	"""Extract scientific claims from PDF using Claude."""
	print(f"\n{Style.BOLD}📄 Extracting claims from PDF...{Style.RESET}")

	client = anthropic.Anthropic(api_key=api_key)
	message = client.messages.create(
	model="claude-sonnet-4-5-20250929",
	max_tokens=4096,
	messages=[
	{
	"role": "user",
	"content": [
	{
	"type": "document",
	"source": {
	"type": "base64",
	"media_type": "application/pdf",
	"data": pdf_data,
	},
	},
	{
	"type": "text",
	"text": ("What are the main scientific claims of this document. "
	"List one per line in plain text. Do not number them or use bullet points."),
	},
	],
	}
	],
	)

	claims = [c.strip() for c in message.content[0].text.strip().split("\n") if c.strip()]
	print(f" {Style.GREEN}✓{Style.RESET} Found {len(claims)} claims\n")
	return claims

	class ClaimDisplay:
	"""Manages the terminal display for claim processing."""

	def __init__(self, claims: list[str]):
	self.claims = claims
	self.statuses = ["pending"] * len(claims) # pending, processing, done
	self.results = [None] * len(claims)
	self.spinner_idx = 0
	self.lock = asyncio.Lock()

	def truncate(self, text: str, max_len: int = 55) -> str:
	if len(text) <= max_len:
	return text
	return text[:max_len-3] + "..."

	def render(self):
	"""Render all claims with their current status."""
	lines = []
	for i, claim in enumerate(self.claims):
	status = self.statuses[i]
	truncated = self.truncate(claim)

	if status == "pending":
	icon = Style.DIM + "○ " + Style.RESET
	lines.append(f" {icon} {Style.DIM}{truncated}{Style.RESET}")
	elif status == "processing":
	spinner = Style.MAGENTA + SPINNER_FRAMES[self.spinner_idx] + Style.RESET
	lines.append(f" {spinner} {CARROT} {truncated}")
	else: # done
	result = self.results[i]
	if result:
	icon, _ = SCORE_ICONS.get(result.score, (Style.DIM + "? " + Style.RESET, "Unknown"))
	lines.append(f" {icon} {truncated}")
	else:
	lines.append(f" {Style.RED}✗ {Style.RESET}{truncated} {Style.DIM}(error){Style.RESET}")

	return "\n".join(lines)

	def print_initial(self):
	"""Print the initial state."""
	print(Style.BOLD + "🔍 Checking claims against literature:" + Style.RESET)
	print(self.render())

	async def update_display(self):
	"""Move cursor up and redraw."""
	async with self.lock:
	move_up(len(self.claims))
	print(self.render())

	def set_processing(self, idx: int):
	self.statuses[idx] = "processing"

	def set_done(self, idx: int, result: ClaimResult \| None):
	self.statuses[idx] = "done"
	self.results[idx] = result

	def advance_spinner(self):
	self.spinner_idx = (self.spinner_idx + 1) % len(SPINNER_FRAMES)

	async def check_claim(client: EdisonClient, claim: str, idx: int, display: ClaimDisplay, sem: asyncio.Semaphore) -> ClaimResult \| None:
	"""Check a single claim using Edison API."""
	rubric = [
	"Strong confirmatory evidence",
	"Confirmatory evidence",
	"No evidence one way or another",
	"Some contradicting evidence",
	"Strong contradicting evidence"
	]
	rubric_text = "\n".join([f"{i}: {s}" for i, s in enumerate(rubric)])

	task_data = {
	"name": JobNames.LITERATURE,
	"query": f'Has anyone provided concrete evidence disproving this claim:\n"{claim}"\n\nImportant: Start your answer response with an integer from 0 to 4 according to this rubric:\n{rubric_text}',
	}

	async with sem:
	display.set_processing(idx)
	await display.update_display()

	try:
	task_response = await client.arun_tasks_until_done(task_data)
	try:
	score = int(task_response[0].answer[0])
	except ValueError:
	score = 2 # Default to "No evidence" if parsing fails
	task_id = task_response[0].task_id
	snippet = str(task_response[0].answer)[:100]

	result = ClaimResult(claim=claim, score=score, task_id=task_id, snippet=snippet)
	display.set_done(idx, result)
	await display.update_display()
	return result
	except Exception as e:
	display.set_done(idx, None)
	await display.update_display()
	return None

	async def spinner_task(display: ClaimDisplay, stop_event: asyncio.Event):
	"""Animate the spinner while processing."""
	while not stop_event.is_set():
	await asyncio.sleep(0.1)
	display.advance_spinner()
	if "processing" in display.statuses:
	await display.update_display()

	async def process_claims(claims: list[str], edison_key: str) -> list[ClaimResult]:
	"""Process all claims with concurrent Edison API calls."""
	client = EdisonClient(api_key=edison_key)
	display = ClaimDisplay(claims)
	sem = asyncio.Semaphore(5) # Max 5 concurrent calls

	display.print_initial()

	stop_event = asyncio.Event()
	spinner = asyncio.create_task(spinner_task(display, stop_event))

	tasks = [check_claim(client, claim, i, display, sem) for i, claim in enumerate(claims)]
	results = await asyncio.gather(*tasks)

	# close client
	client.close()

	stop_event.set()
	await spinner

	return [r for r in results if r is not None]

	def write_results(results: list[ClaimResult], pdf_path: str):
	"""Write results to CSV file."""
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	base_name = os.path.splitext(os.path.basename(pdf_path))[0]
	output_file = f"{base_name}_claims_{timestamp}.csv"

	with open(output_file, "w", newline="", encoding="utf-8") as f:
	writer = csv.writer(f)
	writer.writerow(["Claim", "Score", "Score Description", "Trajectory URL"])

	for r in results:
	_, desc = SCORE_ICONS.get(r.score, ("?", "Unknown"))
	url = f"https://platform.edisonscientific.com/trajectories/{r.task_id}"
	writer.writerow([r.claim, r.score, desc, url])

	return output_file

	def print_summary(results: list[ClaimResult]):
	"""Print a summary of the results."""
	print(f"\n{Style.BOLD}📊 Summary:{Style.RESET}")

	score_counts = {}
	for r in results:
	score_counts[r.score] = score_counts.get(r.score, 0) + 1

	for score in sorted(score_counts.keys()):
	icon, desc = SCORE_ICONS.get(score, ("?", "Unknown"))
	count = score_counts[score]
	bar = Style.CYAN + "█" * count + Style.RESET
	print(f" {icon} {desc}: {bar} {count}")

	def print_legend():
	"""Print the score legend."""
	print(f"\n{Style.BOLD}📋 Score Legend:{Style.RESET}")
	for score, (icon, desc) in sorted(SCORE_ICONS.items()):
	print(f" {icon} {score}: {desc}")

	async def main():
	print_header()

	# Check arguments
	if len(sys.argv) != 2:
	print(f" {Style.RED}✗{Style.RESET} Usage: python claim_checker.py <path_to_pdf>")
	sys.exit(1)

	pdf_path = sys.argv[1]

	# Get API keys
	print(Style.BOLD + "🔑 Checking API keys:" + Style.RESET)
	anthropic_key = get_api_key("ANTHROPIC_API_KEY", "ANTHROPIC_API_KEY")
	edison_key = get_api_key("EDISON_API_KEY", "EDISON_API_KEY")

	# Read PDF
	print(f"\n{Style.BOLD}📂 Reading PDF:{Style.RESET}")
	print(f" {Style.DIM}{pdf_path}{Style.RESET}")
	pdf_data = read_pdf(pdf_path)
	print(f" {Style.GREEN}✓{Style.RESET} PDF loaded successfully")

	# Extract claims
	claims = extract_claims(pdf_data, anthropic_key)

	# Process claims
	print()
	results = await process_claims(claims, edison_key)

	# Print summary
	print_summary(results)
	print_legend()

	# Write CSV
	output_file = write_results(results, pdf_path)
	print(f"\n{Style.BOLD}💾 Results saved:{Style.RESET}")
	print(f" {Style.GREEN}✓{Style.RESET} {output_file}")
	print()

	if __name__ == "__main__":
	asyncio.run(main())
No results found