Created
September 7, 2025 14:20
-
-
Save esc5221/47d91a711210b4a0c892f909b07c2949 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| ์ต๊ทผ N์๊ฐ Claude ๋ํ๋ฅผ raw ํํ๋ก ์ถ์ถ | |
| python3 extract_conversation.py --hours 24 --output chat.txt | |
| """ | |
| import json | |
| import os | |
| from pathlib import Path | |
| from datetime import datetime, timedelta, timezone | |
| import argparse | |
| from collections import defaultdict | |
| def truncate_text(text, max_length=800): | |
| """๊ธด ํ ์คํธ๋ฅผ ์/๋ค๋ง ๋ณด์ด๊ฒ ์๋ฅด๊ธฐ (์์ด ๋ ๊ธธ๊ฒ)""" | |
| if len(text) <= max_length: | |
| return text | |
| # ์ 70%, ๋ค 30% ๋น์จ๋ก ์๋ฅด๊ธฐ | |
| front_length = int(max_length * 0.7) | |
| back_length = int(max_length * 0.3) | |
| front_part = text[:front_length].rstrip() | |
| back_part = text[-back_length:].lstrip() | |
| return f"{front_part}\n\n... [truncated] ...\n\n{back_part}" | |
| def extract_text_content(content, hide_file_tools=False): | |
| """content์์ ํ ์คํธ ๋ถ๋ถ๋ง ์ถ์ถ""" | |
| if isinstance(content, str): | |
| return content.strip() | |
| if isinstance(content, list): | |
| text_parts = [] | |
| for item in content: | |
| if isinstance(item, dict): | |
| if item.get('type') == 'text': | |
| text_parts.append(item.get('text', '')) | |
| elif item.get('type') == 'tool_use': | |
| # tool_use๋ฅผ ๊ฐ๋จํ ์์ฝ | |
| tool_name = item.get('name', 'unknown') | |
| input_data = item.get('input', {}) | |
| # ํ์ผ ๊ด๋ จ ํด ์จ๊ธฐ๊ธฐ ์ต์ | |
| if hide_file_tools and tool_name in ['Read', 'Edit', 'Write', 'TodoWrite']: | |
| continue | |
| # ํด๋ณ๋ก ์ฃผ์ ํ๋ผ๋ฏธํฐ ์ถ์ถ | |
| if tool_name == 'Bash': | |
| command = input_data.get('command', '')[:120] | |
| text_parts.append(f"[{tool_name}: {command}]") | |
| elif tool_name == 'Grep': | |
| pattern = input_data.get('pattern', '')[:80] | |
| text_parts.append(f"[{tool_name}: {pattern}]") | |
| elif tool_name == 'Read': | |
| file_path = input_data.get('file_path', '')[:100] | |
| text_parts.append(f"[{tool_name}: {file_path}]") | |
| elif tool_name == 'Edit': | |
| file_path = input_data.get('file_path', '')[:100] | |
| text_parts.append(f"[{tool_name}: {file_path}]") | |
| elif tool_name == 'Write': | |
| file_path = input_data.get('file_path', '')[:100] | |
| text_parts.append(f"[{tool_name}: {file_path}]") | |
| elif tool_name == 'Glob': | |
| pattern = input_data.get('pattern', '')[:80] | |
| text_parts.append(f"[{tool_name}: {pattern}]") | |
| elif tool_name == 'TodoWrite': | |
| text_parts.append(f"[{tool_name}]") | |
| else: | |
| text_parts.append(f"[{tool_name}]") | |
| return ' '.join(text_parts).strip() | |
| return str(content).strip() | |
| def get_session_conversations(projects_dir, hours=8): | |
| """์ต๊ทผ N์๊ฐ์ ๋ํ๋ฅผ ์ธ์ ๋ณ๋ก ์ถ์ถ""" | |
| current_time = datetime.now(timezone.utc) | |
| cutoff_time = current_time - timedelta(hours=hours) | |
| sessions = defaultdict(list) | |
| # ๋ชจ๋ ํ๋ก์ ํธ ๋๋ ํ ๋ฆฌ ์ํ | |
| for project_dir in Path(projects_dir).iterdir(): | |
| if not project_dir.is_dir(): | |
| continue | |
| # ํ๋ก์ ํธ ๋ด ๋ชจ๋ JSONL ํ์ผ ์ฒ๋ฆฌ | |
| for jsonl_file in project_dir.glob("*.jsonl"): | |
| try: | |
| with open(jsonl_file, 'r', encoding='utf-8') as f: | |
| for line in f: | |
| try: | |
| data = json.loads(line.strip()) | |
| # ์๊ฐ ํํฐ๋ง | |
| timestamp_str = data.get('timestamp', '') | |
| if timestamp_str: | |
| timestamp = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00')) | |
| if timestamp < cutoff_time: | |
| continue | |
| # ์ฌ์ฉ์/์ด์์คํดํธ ๋ฉ์์ง๋ง ์์ง | |
| if data.get('type') in ['user', 'assistant'] and 'message' in data: | |
| message_data = { | |
| 'timestamp': timestamp_str, | |
| 'type': data['type'], | |
| 'message': data['message'], | |
| 'sessionId': data.get('sessionId', 'unknown'), | |
| 'cwd': data.get('cwd', ''), | |
| 'project': project_dir.name | |
| } | |
| sessions[data.get('sessionId', 'unknown')].append(message_data) | |
| except (json.JSONDecodeError, ValueError): | |
| continue | |
| except Exception as e: | |
| print(f"ํ์ผ ์ฝ๊ธฐ ์ค๋ฅ: {jsonl_file} - {e}") | |
| continue | |
| return sessions | |
| def format_conversation(sessions, max_text_length=800, hide_file_tools=False): | |
| """์ธ์ ๋ณ ๋ํ๋ฅผ raw ํํ๋ก ํฌ๋งท""" | |
| output_lines = [] | |
| # ์ธ์ ์ ์๊ฐ ์ญ์์ผ๋ก ์ ๋ ฌ (์ต์ ์ด ์๋ก) | |
| sorted_sessions = [] | |
| for session_id, messages in sessions.items(): | |
| if messages: | |
| # ์ธ์ ๋ด ๋ฉ์์ง๋ฅผ ์๊ฐ์ ์ ๋ ฌ | |
| messages.sort(key=lambda x: x.get('timestamp', '')) | |
| first_msg = messages[0] | |
| last_msg = messages[-1] | |
| sorted_sessions.append({ | |
| 'session_id': session_id, | |
| 'messages': messages, | |
| 'start_time': first_msg.get('timestamp', ''), | |
| 'end_time': last_msg.get('timestamp', ''), | |
| 'cwd': first_msg.get('cwd', ''), | |
| 'project': first_msg.get('project', '') | |
| }) | |
| # ์๊ฐ ์ ์ ์ ๋ ฌ (์ค๋๋ ๊ฒ์ด ์๋ก) | |
| sorted_sessions.sort(key=lambda x: x['start_time'], reverse=False) | |
| for session in sorted_sessions: | |
| # ์ธ์ ํค๋ | |
| start_time = session['start_time'] | |
| end_time = session['end_time'] | |
| cwd = session['cwd'] | |
| try: | |
| start_dt = datetime.fromisoformat(start_time.replace('Z', '+00:00')) | |
| end_dt = datetime.fromisoformat(end_time.replace('Z', '+00:00')) | |
| start_str = start_dt.strftime('%H:%M') | |
| end_str = end_dt.strftime('%H:%M') | |
| time_range = f"{start_str}-{end_str}" | |
| except: | |
| time_range = "unknown" | |
| output_lines.append(f"=== ์ธ์ : {cwd} ({time_range}) ===") | |
| output_lines.append("") | |
| # ๋ํ ๋ด์ฉ | |
| prev_timestamp = None | |
| for msg in session['messages']: | |
| msg_type = msg['type'] | |
| message = msg['message'] | |
| content = extract_text_content(message.get('content', ''), hide_file_tools) | |
| if not content.strip(): | |
| continue | |
| # 10๋ถ ์ด์ ๊ฐ๊ฒฉ ์ฒดํฌ | |
| current_timestamp_str = msg.get('timestamp', '') | |
| if prev_timestamp and current_timestamp_str: | |
| try: | |
| prev_dt = datetime.fromisoformat(prev_timestamp.replace('Z', '+00:00')) | |
| current_dt = datetime.fromisoformat(current_timestamp_str.replace('Z', '+00:00')) | |
| time_diff = (current_dt - prev_dt).total_seconds() / 60 # ๋ถ ๋จ์ | |
| if time_diff >= 10: | |
| output_lines.append("---") | |
| output_lines.append("") | |
| except: | |
| pass | |
| # ํ ์คํธ ๊ธธ์ด ์ ํ | |
| content = truncate_text(content, max_text_length) | |
| if msg_type == 'user': | |
| output_lines.append(f"user: {content}") | |
| elif msg_type == 'assistant': | |
| output_lines.append(f"assistant: {content}") | |
| output_lines.append("") | |
| prev_timestamp = current_timestamp_str | |
| output_lines.append("") # ์ธ์ ๊ฐ ๊ตฌ๋ถ | |
| return '\n'.join(output_lines) | |
| def main(): | |
| parser = argparse.ArgumentParser(description='Claude ๋ํ๋ฅผ raw ํํ๋ก ์ถ์ถ') | |
| parser.add_argument('--hours', | |
| type=int, | |
| default=8, | |
| help='์ถ์ถํ ์๊ฐ ๋ฒ์ (๊ธฐ๋ณธ: 8์๊ฐ)') | |
| parser.add_argument('--projects-dir', | |
| default=str(Path.home() / '.claude' / 'projects'), | |
| help='Claude projects ๋๋ ํ ๋ฆฌ ๊ฒฝ๋ก') | |
| parser.add_argument('--output', | |
| help='์ถ๋ ฅ ํ์ผ ๊ฒฝ๋ก (๋ฏธ์ง์ ์ stdout)') | |
| parser.add_argument('--max-length', | |
| type=int, | |
| default=800, | |
| help='ํ ์คํธ ์ต๋ ๊ธธ์ด (๊ธฐ๋ณธ: 800์)') | |
| parser.add_argument('--hide-file-tools', | |
| action='store_true', | |
| help='Read, Edit, Write, TodoWrite ํด ํ์ ์จ๊ธฐ๊ธฐ') | |
| args = parser.parse_args() | |
| print(f"์ต๊ทผ {args.hours}์๊ฐ ๋ํ ์ถ์ถ ์ค...") | |
| # ๋ํ ์ถ์ถ | |
| sessions = get_session_conversations(args.projects_dir, args.hours) | |
| if not sessions: | |
| print("์ถ์ถ๋ ๋ํ๊ฐ ์์ต๋๋ค.") | |
| return | |
| # ํฌ๋งทํ | |
| conversation_text = format_conversation(sessions, args.max_length, args.hide_file_tools) | |
| # ์ถ๋ ฅ | |
| if args.output: | |
| with open(args.output, 'w', encoding='utf-8') as f: | |
| f.write(conversation_text) | |
| print(f"๋ํ ๋ด์ฉ์ {args.output}์ ์ ์ฅํ์ต๋๋ค.") | |
| else: | |
| print(conversation_text) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment