rodjjo · November 28, 2025 01:14
diff --git a/gistfile1.txt b/gistfile1.txt
 #!/usr/bin/env python
 import base64
 import lmstudio as lms
 import sys
 from PIL import Image
 import math
 import io
 import os
 import signal
 from tqdm import tqdm

 MAX_PIXELS = 768 * 768  # 589,824
 SHOUD_STOP = False

 def handle_signal(signum, frame):
    global SHOUD_STOP
    SHOUD_STOP = True


 def resize_by_total_pixels(path, max_pixels=MAX_PIXELS):
    """Resize image so total pixels <= max_pixels, keeping aspect ratio."""
    img = Image.open(path)
    w, h = img.size
    total = w * h

    if total > max_pixels:
        scale = math.sqrt(max_pixels / total)
        new_w = int(w * scale)
        new_h = int(h * scale)
        img = img.resize((new_w, new_h), Image.LANCZOS)

    img.save("temp.jpg", format="JPEG")


 if __name__ == '__main__':
  # Register signal handlers
  signal.signal(signal.SIGTERM, handle_signal)
  signal.signal(signal.SIGINT, handle_signal)

  model = lms.llm("llama-joycaption-beta-one-hf-llava-mmproj")
  directory = sys.argv[1]
  assert os.path.isdir(directory), "expected a directory"
  image_paths = os.listdir(directory)
  image_paths = [os.path.join(directory, i) for i in image_paths if i.endswith('.jpg') or i.endswith('.png') or i.endswith('.jpeg') ]
  
  tmp_paths = []
  for image_path in image_paths:
      text_path = image_path.rsplit('.', maxsplit=1)[0] + ".txt"
      if os.path.exists(text_path):
          continue
      tmp_paths.append(image_path)
  image_paths = tmp_paths
  
  with lms.Client() as client:
      # Prepare the image for the model
      for image_path in tqdm(image_paths):
          if SHOUD_STOP:
            break
          text_path = image_path.rsplit('.', maxsplit=1)[0] + ".txt"
          # resize_by_total_pixels(image_path)
          image_handle = client.files.prepare_image(image_path)

          # Create a chat instance
          chat = lms.Chat("You are a helpful image captioner.")

          # Add a message with the image and a prompt
          chat.add_user_message(
              "Output a short stable diffusion prompt that is indistinguishable from a real stable diffusion prompt.",
              images=[image_handle]
          )

          # Get the response from the VLM
          response = model.respond(
              chat, 
              on_message=chat.append,
          )

          # Print the model's description of the image
          with open(text_path, "w") as fp:
            contents = response.content.lower()
            contents = contents.replace("jpeg artifacts", "")
            fp.write(contents)
	#!/usr/bin/env python
	import base64
	import lmstudio as lms
	import sys
	from PIL import Image
	import math
	import io
	import os
	import signal
	from tqdm import tqdm

	MAX_PIXELS = 768 * 768 # 589,824
	SHOUD_STOP = False

	def handle_signal(signum, frame):
	global SHOUD_STOP
	SHOUD_STOP = True


	def resize_by_total_pixels(path, max_pixels=MAX_PIXELS):
	"""Resize image so total pixels <= max_pixels, keeping aspect ratio."""
	img = Image.open(path)
	w, h = img.size
	total = w * h

	if total > max_pixels:
	scale = math.sqrt(max_pixels / total)
	new_w = int(w * scale)
	new_h = int(h * scale)
	img = img.resize((new_w, new_h), Image.LANCZOS)

	img.save("temp.jpg", format="JPEG")


	if __name__ == '__main__':
	# Register signal handlers
	signal.signal(signal.SIGTERM, handle_signal)
	signal.signal(signal.SIGINT, handle_signal)

	model = lms.llm("llama-joycaption-beta-one-hf-llava-mmproj")
	directory = sys.argv[1]
	assert os.path.isdir(directory), "expected a directory"
	image_paths = os.listdir(directory)
	image_paths = [os.path.join(directory, i) for i in image_paths if i.endswith('.jpg') or i.endswith('.png') or i.endswith('.jpeg') ]

	tmp_paths = []
	for image_path in image_paths:
	text_path = image_path.rsplit('.', maxsplit=1)[0] + ".txt"
	if os.path.exists(text_path):
	continue
	tmp_paths.append(image_path)
	image_paths = tmp_paths

	with lms.Client() as client:
	# Prepare the image for the model
	for image_path in tqdm(image_paths):
	if SHOUD_STOP:
	break
	text_path = image_path.rsplit('.', maxsplit=1)[0] + ".txt"
	# resize_by_total_pixels(image_path)
	image_handle = client.files.prepare_image(image_path)

	# Create a chat instance
	chat = lms.Chat("You are a helpful image captioner.")

	# Add a message with the image and a prompt
	chat.add_user_message(
	"Output a short stable diffusion prompt that is indistinguishable from a real stable diffusion prompt.",
	images=[image_handle]
	)

	# Get the response from the VLM
	response = model.respond(
	chat,
	on_message=chat.append,
	)

	# Print the model's description of the image
	with open(text_path, "w") as fp:
	contents = response.content.lower()
	contents = contents.replace("jpeg artifacts", "")
	fp.write(contents)
No results found