Skip to content

Instantly share code, notes, and snippets.

@esc5221
Created September 7, 2025 14:20
Show Gist options
  • Select an option

  • Save esc5221/47d91a711210b4a0c892f909b07c2949 to your computer and use it in GitHub Desktop.

Select an option

Save esc5221/47d91a711210b4a0c892f909b07c2949 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
์ตœ๊ทผ N์‹œ๊ฐ„ Claude ๋Œ€ํ™”๋ฅผ raw ํ˜•ํƒœ๋กœ ์ถ”์ถœ
python3 extract_conversation.py --hours 24 --output chat.txt
"""
import json
import os
from pathlib import Path
from datetime import datetime, timedelta, timezone
import argparse
from collections import defaultdict
def truncate_text(text, max_length=800):
"""๊ธด ํ…์ŠคํŠธ๋ฅผ ์•ž/๋’ค๋งŒ ๋ณด์ด๊ฒŒ ์ž๋ฅด๊ธฐ (์•ž์ด ๋” ๊ธธ๊ฒŒ)"""
if len(text) <= max_length:
return text
# ์•ž 70%, ๋’ค 30% ๋น„์œจ๋กœ ์ž๋ฅด๊ธฐ
front_length = int(max_length * 0.7)
back_length = int(max_length * 0.3)
front_part = text[:front_length].rstrip()
back_part = text[-back_length:].lstrip()
return f"{front_part}\n\n... [truncated] ...\n\n{back_part}"
def extract_text_content(content, hide_file_tools=False):
"""content์—์„œ ํ…์ŠคํŠธ ๋ถ€๋ถ„๋งŒ ์ถ”์ถœ"""
if isinstance(content, str):
return content.strip()
if isinstance(content, list):
text_parts = []
for item in content:
if isinstance(item, dict):
if item.get('type') == 'text':
text_parts.append(item.get('text', ''))
elif item.get('type') == 'tool_use':
# tool_use๋ฅผ ๊ฐ„๋‹จํžˆ ์š”์•ฝ
tool_name = item.get('name', 'unknown')
input_data = item.get('input', {})
# ํŒŒ์ผ ๊ด€๋ จ ํˆด ์ˆจ๊ธฐ๊ธฐ ์˜ต์…˜
if hide_file_tools and tool_name in ['Read', 'Edit', 'Write', 'TodoWrite']:
continue
# ํˆด๋ณ„๋กœ ์ฃผ์š” ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”์ถœ
if tool_name == 'Bash':
command = input_data.get('command', '')[:120]
text_parts.append(f"[{tool_name}: {command}]")
elif tool_name == 'Grep':
pattern = input_data.get('pattern', '')[:80]
text_parts.append(f"[{tool_name}: {pattern}]")
elif tool_name == 'Read':
file_path = input_data.get('file_path', '')[:100]
text_parts.append(f"[{tool_name}: {file_path}]")
elif tool_name == 'Edit':
file_path = input_data.get('file_path', '')[:100]
text_parts.append(f"[{tool_name}: {file_path}]")
elif tool_name == 'Write':
file_path = input_data.get('file_path', '')[:100]
text_parts.append(f"[{tool_name}: {file_path}]")
elif tool_name == 'Glob':
pattern = input_data.get('pattern', '')[:80]
text_parts.append(f"[{tool_name}: {pattern}]")
elif tool_name == 'TodoWrite':
text_parts.append(f"[{tool_name}]")
else:
text_parts.append(f"[{tool_name}]")
return ' '.join(text_parts).strip()
return str(content).strip()
def get_session_conversations(projects_dir, hours=8):
"""์ตœ๊ทผ N์‹œ๊ฐ„์˜ ๋Œ€ํ™”๋ฅผ ์„ธ์…˜๋ณ„๋กœ ์ถ”์ถœ"""
current_time = datetime.now(timezone.utc)
cutoff_time = current_time - timedelta(hours=hours)
sessions = defaultdict(list)
# ๋ชจ๋“  ํ”„๋กœ์ ํŠธ ๋””๋ ‰ํ† ๋ฆฌ ์ˆœํšŒ
for project_dir in Path(projects_dir).iterdir():
if not project_dir.is_dir():
continue
# ํ”„๋กœ์ ํŠธ ๋‚ด ๋ชจ๋“  JSONL ํŒŒ์ผ ์ฒ˜๋ฆฌ
for jsonl_file in project_dir.glob("*.jsonl"):
try:
with open(jsonl_file, 'r', encoding='utf-8') as f:
for line in f:
try:
data = json.loads(line.strip())
# ์‹œ๊ฐ„ ํ•„ํ„ฐ๋ง
timestamp_str = data.get('timestamp', '')
if timestamp_str:
timestamp = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
if timestamp < cutoff_time:
continue
# ์‚ฌ์šฉ์ž/์–ด์‹œ์Šคํ„ดํŠธ ๋ฉ”์‹œ์ง€๋งŒ ์ˆ˜์ง‘
if data.get('type') in ['user', 'assistant'] and 'message' in data:
message_data = {
'timestamp': timestamp_str,
'type': data['type'],
'message': data['message'],
'sessionId': data.get('sessionId', 'unknown'),
'cwd': data.get('cwd', ''),
'project': project_dir.name
}
sessions[data.get('sessionId', 'unknown')].append(message_data)
except (json.JSONDecodeError, ValueError):
continue
except Exception as e:
print(f"ํŒŒ์ผ ์ฝ๊ธฐ ์˜ค๋ฅ˜: {jsonl_file} - {e}")
continue
return sessions
def format_conversation(sessions, max_text_length=800, hide_file_tools=False):
"""์„ธ์…˜๋ณ„ ๋Œ€ํ™”๋ฅผ raw ํ˜•ํƒœ๋กœ ํฌ๋งท"""
output_lines = []
# ์„ธ์…˜์„ ์‹œ๊ฐ„ ์—ญ์ˆœ์œผ๋กœ ์ •๋ ฌ (์ตœ์‹ ์ด ์œ„๋กœ)
sorted_sessions = []
for session_id, messages in sessions.items():
if messages:
# ์„ธ์…˜ ๋‚ด ๋ฉ”์‹œ์ง€๋ฅผ ์‹œ๊ฐ„์ˆœ ์ •๋ ฌ
messages.sort(key=lambda x: x.get('timestamp', ''))
first_msg = messages[0]
last_msg = messages[-1]
sorted_sessions.append({
'session_id': session_id,
'messages': messages,
'start_time': first_msg.get('timestamp', ''),
'end_time': last_msg.get('timestamp', ''),
'cwd': first_msg.get('cwd', ''),
'project': first_msg.get('project', '')
})
# ์‹œ๊ฐ„ ์ •์ˆœ ์ •๋ ฌ (์˜ค๋ž˜๋œ ๊ฒƒ์ด ์œ„๋กœ)
sorted_sessions.sort(key=lambda x: x['start_time'], reverse=False)
for session in sorted_sessions:
# ์„ธ์…˜ ํ—ค๋”
start_time = session['start_time']
end_time = session['end_time']
cwd = session['cwd']
try:
start_dt = datetime.fromisoformat(start_time.replace('Z', '+00:00'))
end_dt = datetime.fromisoformat(end_time.replace('Z', '+00:00'))
start_str = start_dt.strftime('%H:%M')
end_str = end_dt.strftime('%H:%M')
time_range = f"{start_str}-{end_str}"
except:
time_range = "unknown"
output_lines.append(f"=== ์„ธ์…˜: {cwd} ({time_range}) ===")
output_lines.append("")
# ๋Œ€ํ™” ๋‚ด์šฉ
prev_timestamp = None
for msg in session['messages']:
msg_type = msg['type']
message = msg['message']
content = extract_text_content(message.get('content', ''), hide_file_tools)
if not content.strip():
continue
# 10๋ถ„ ์ด์ƒ ๊ฐ„๊ฒฉ ์ฒดํฌ
current_timestamp_str = msg.get('timestamp', '')
if prev_timestamp and current_timestamp_str:
try:
prev_dt = datetime.fromisoformat(prev_timestamp.replace('Z', '+00:00'))
current_dt = datetime.fromisoformat(current_timestamp_str.replace('Z', '+00:00'))
time_diff = (current_dt - prev_dt).total_seconds() / 60 # ๋ถ„ ๋‹จ์œ„
if time_diff >= 10:
output_lines.append("---")
output_lines.append("")
except:
pass
# ํ…์ŠคํŠธ ๊ธธ์ด ์ œํ•œ
content = truncate_text(content, max_text_length)
if msg_type == 'user':
output_lines.append(f"user: {content}")
elif msg_type == 'assistant':
output_lines.append(f"assistant: {content}")
output_lines.append("")
prev_timestamp = current_timestamp_str
output_lines.append("") # ์„ธ์…˜ ๊ฐ„ ๊ตฌ๋ถ„
return '\n'.join(output_lines)
def main():
parser = argparse.ArgumentParser(description='Claude ๋Œ€ํ™”๋ฅผ raw ํ˜•ํƒœ๋กœ ์ถ”์ถœ')
parser.add_argument('--hours',
type=int,
default=8,
help='์ถ”์ถœํ•  ์‹œ๊ฐ„ ๋ฒ”์œ„ (๊ธฐ๋ณธ: 8์‹œ๊ฐ„)')
parser.add_argument('--projects-dir',
default=str(Path.home() / '.claude' / 'projects'),
help='Claude projects ๋””๋ ‰ํ† ๋ฆฌ ๊ฒฝ๋กœ')
parser.add_argument('--output',
help='์ถœ๋ ฅ ํŒŒ์ผ ๊ฒฝ๋กœ (๋ฏธ์ง€์ •์‹œ stdout)')
parser.add_argument('--max-length',
type=int,
default=800,
help='ํ…์ŠคํŠธ ์ตœ๋Œ€ ๊ธธ์ด (๊ธฐ๋ณธ: 800์ž)')
parser.add_argument('--hide-file-tools',
action='store_true',
help='Read, Edit, Write, TodoWrite ํˆด ํ‘œ์‹œ ์ˆจ๊ธฐ๊ธฐ')
args = parser.parse_args()
print(f"์ตœ๊ทผ {args.hours}์‹œ๊ฐ„ ๋Œ€ํ™” ์ถ”์ถœ ์ค‘...")
# ๋Œ€ํ™” ์ถ”์ถœ
sessions = get_session_conversations(args.projects_dir, args.hours)
if not sessions:
print("์ถ”์ถœ๋œ ๋Œ€ํ™”๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
return
# ํฌ๋งทํŒ…
conversation_text = format_conversation(sessions, args.max_length, args.hide_file_tools)
# ์ถœ๋ ฅ
if args.output:
with open(args.output, 'w', encoding='utf-8') as f:
f.write(conversation_text)
print(f"๋Œ€ํ™” ๋‚ด์šฉ์„ {args.output}์— ์ €์žฅํ–ˆ์Šต๋‹ˆ๋‹ค.")
else:
print(conversation_text)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment