Created
June 18, 2025 22:03
-
-
Save mseri/7427b254f4d184e5f901699469bd51eb to your computer and use it in GitHub Desktop.
Small swift script to extract text from images (using Apple's Vision)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/swift | |
| // Started from the code in https://terminalbytes.com/iphone-8-solar-powered-vision-ocr-server/ | |
| // Edited from Mistral generated code: https://chat.mistral.ai/chat/563cacdf-6def-49e4-9df6-ee8e263978c5 | |
| import AppKit | |
| import CoreGraphics | |
| import Foundation | |
| import SwiftUI | |
| import Vision | |
| func processImageSync(imagePath: String) -> String? { | |
| let fileURL = URL(fileURLWithPath: imagePath) | |
| guard let imageSource = CGImageSourceCreateWithURL(fileURL as CFURL, nil), | |
| let cgImage = CGImageSourceCreateImageAtIndex(imageSource, 0, nil) | |
| else { | |
| return nil | |
| } | |
| let semaphore = DispatchSemaphore(value: 0) | |
| var recognizedText: String? | |
| let request = VNRecognizeTextRequest { request, error in | |
| defer { semaphore.signal() } | |
| guard let observations = request.results as? [VNRecognizedTextObservation] else { | |
| return | |
| } | |
| recognizedText = observations.compactMap { observation in | |
| observation.topCandidates(1).first?.string | |
| }.joined(separator: "\n") | |
| } | |
| request.recognitionLevel = .accurate | |
| request.usesLanguageCorrection = true | |
| let handler = VNImageRequestHandler(cgImage: cgImage, options: [:]) | |
| try? handler.perform([request]) | |
| semaphore.wait() | |
| return recognizedText | |
| } | |
| struct ContentView: View { | |
| @State private var image: NSImage? = nil | |
| @State private var recognizedText = "" | |
| var body: some View { | |
| VStack(spacing: 20) { | |
| ZStack { | |
| RoundedRectangle(cornerRadius: 10) | |
| .stroke(Color.gray, lineWidth: 2) | |
| .frame(width: 300, height: 200) | |
| .overlay( | |
| Text("Drag and Drop Image Here") | |
| .foregroundColor(.gray) | |
| ) | |
| if image != nil { | |
| Image(nsImage: image!) | |
| .resizable() | |
| .scaledToFit() | |
| .frame(width: 300, height: 200) | |
| } | |
| } | |
| .onDrop(of: ["public.file-url"], isTargeted: nil) { providers -> Bool in | |
| providers.first?.loadItem(forTypeIdentifier: "public.file-url", options: nil) { | |
| (data, error) in | |
| DispatchQueue.main.async { | |
| if let data = data as? Data, | |
| let url = URL(dataRepresentation: data, relativeTo: nil, isAbsolute: true) | |
| { | |
| if let loadedImage = NSImage(contentsOf: url) { | |
| self.image = loadedImage | |
| recognizeText(from: loadedImage) | |
| } | |
| } | |
| } | |
| } | |
| return true | |
| } | |
| TextEditor(text: $recognizedText) | |
| .frame( | |
| minWidth: 300, idealWidth: 300, maxWidth: .infinity, | |
| minHeight: 200, idealHeight: 200, maxHeight: .infinity, alignment: .center | |
| ) | |
| .border(Color.gray, width: 1) | |
| HStack(spacing: 20) { | |
| Button(action: { | |
| image = nil | |
| recognizedText = "" | |
| }) { | |
| Text("Clear") | |
| .padding() | |
| .frame(maxWidth: .infinity) | |
| .background(Color.blue) | |
| .foregroundColor(.white) | |
| .cornerRadius(8) | |
| } | |
| .buttonStyle(PlainButtonStyle()) // Ensure the entire button area is clickable | |
| Button(action: { | |
| copyToClipboard(text: recognizedText) | |
| }) { | |
| Text("Copy Text") | |
| .padding() | |
| .frame(maxWidth: .infinity) | |
| .background(Color.green) | |
| .foregroundColor(.black) | |
| .cornerRadius(8) | |
| } | |
| .buttonStyle(PlainButtonStyle()) // Ensure the entire button area is clickable | |
| Button(action: { | |
| NSApplication.shared.terminate(nil) | |
| }) { | |
| Text("Close App") | |
| .padding() | |
| .frame(maxWidth: .infinity) | |
| .background(Color.red) | |
| .foregroundColor(.white) | |
| .cornerRadius(8) | |
| } | |
| .buttonStyle(PlainButtonStyle()) // Ensure the entire button area is clickable | |
| } | |
| } | |
| .padding() | |
| } | |
| func recognizeText(from nsImage: NSImage) { | |
| guard let cgImage = nsImage.cgImage(forProposedRect: nil, context: nil, hints: nil) else { | |
| recognizedText = "ERROR: Failed to process image." | |
| return | |
| } | |
| let request = VNRecognizeTextRequest { request, error in | |
| guard let observations = request.results as? [VNRecognizedTextObservation] else { | |
| DispatchQueue.main.async { | |
| self.recognizedText = "WARNING: No text recognized." | |
| } | |
| return | |
| } | |
| let text = observations.compactMap { observation in | |
| observation.topCandidates(1).first?.string | |
| }.joined(separator: "\n") | |
| DispatchQueue.main.async { | |
| self.recognizedText = text | |
| } | |
| } | |
| request.recognitionLevel = .accurate | |
| request.usesLanguageCorrection = true | |
| let handler = VNImageRequestHandler(cgImage: cgImage, options: [:]) | |
| try? handler.perform([request]) | |
| } | |
| func copyToClipboard(text: String) { | |
| let pasteboard = NSPasteboard.general | |
| pasteboard.clearContents() | |
| pasteboard.setString(text, forType: .string) | |
| } | |
| } | |
| struct GUIApp: App { | |
| init() { | |
| NSApplication.shared.activate(ignoringOtherApps: true) | |
| } | |
| var body: some Scene { | |
| WindowGroup { | |
| ContentView().onDisappear { | |
| // window was closed | |
| NSApplication.shared.terminate(nil) | |
| } | |
| } | |
| } | |
| } | |
| func runCommandLine() { | |
| guard CommandLine.arguments.count > 1 else { | |
| print("Usage: swift TextRecognizer.swift <image-path> or swift TextRecognizer.swift --gui") | |
| return | |
| } | |
| let imagePath = CommandLine.arguments[1] | |
| if let recognizedText = processImageSync(imagePath: imagePath) { | |
| print(recognizedText) | |
| } else { | |
| print("ERROR") | |
| exit(1) | |
| } | |
| } | |
| if CommandLine.arguments.contains("--gui") { | |
| GUIApp.main() | |
| } else { | |
| runCommandLine() | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment