maycuatroi1 · February 6, 2026 07:01
diff --git a/gistfile1.py b/gistfile1.py
 from transformers import AutoModel, AutoTokenizer
 import torch

 model_name = 'deepseek-ai/DeepSeek-OCR-2'

 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 model = AutoModel.from_pretrained(
    model_name,
    trust_remote_code=True,
    _attn_implementation='flash_attention_2',
    use_safetensors=True
 )
 model = model.eval().cuda().to(torch.bfloat16)

 # Document mode with grounding
 prompt = "<image>\n<|grounding|>Convert the document to markdown."

 result = model.infer(
    tokenizer,
    prompt=prompt,
    image_file='test.jpg',
    output_path='output/',
    base_size=1024,
    image_size=768,
    crop_mode=True,
    save_results=True
 )
	from transformers import AutoModel, AutoTokenizer
	import torch

	model_name = 'deepseek-ai/DeepSeek-OCR-2'

	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	model = AutoModel.from_pretrained(
	model_name,
	trust_remote_code=True,
	_attn_implementation='flash_attention_2',
	use_safetensors=True
	)
	model = model.eval().cuda().to(torch.bfloat16)

	# Document mode with grounding
	prompt = "<image>\n<\|grounding\|>Convert the document to markdown."

	result = model.infer(
	tokenizer,
	prompt=prompt,
	image_file='test.jpg',
	output_path='output/',
	base_size=1024,
	image_size=768,
	crop_mode=True,
	save_results=True
	)
No results found