-
-
Save kltng/c25422538e15e155bccef0e289ea3faa to your computer and use it in GitHub Desktop.
| function extractTextOnOpen() { | |
| //ADD YOUR VALUES BELOW | |
| var folderName = "[YOUR PROJECT FOLDER]"; | |
| var sheetId = "[YOUR SHEET ID]"; | |
| //Define folder | |
| var folder = DriveApp.getFoldersByName(folderName).next(); | |
| var folderId = folder.getId(); | |
| //Find all jpegs in folder | |
| var images = folder.getFilesByType("image/jpeg"); | |
| while (images.hasNext()) { | |
| //Convert each jpeg to a Google Doc with OCR | |
| var image = images.next(); | |
| var imageName = image.getName(); | |
| var docName = imageName.split("\.")[0]; | |
| var file = { | |
| title: docName, | |
| mimeType: "image/jpeg" | |
| } | |
| Drive.Files.insert(file, image, { ocr: true }); | |
| //Store newly-created Google Doc in project folder | |
| var newFile = DriveApp.getFilesByName(docName).next(); | |
| folder.addFile(newFile); | |
| var rootFolder = DriveApp.getRootFolder(); | |
| rootFolder.removeFile(newFile); | |
| } | |
| //Find all Google Docs in folder | |
| var docs = folder.getFilesByType("application/vnd.google-apps.document"); | |
| //Set up spreadsheet | |
| var ss = SpreadsheetApp.openById(sheetId); | |
| SpreadsheetApp.setActiveSpreadsheet(ss); | |
| Logger.log('File name: ' + ss.getName()); | |
| var sheet = SpreadsheetApp.getActiveSheet(); | |
| sheet.clear(); | |
| sheet.appendRow(["Filename", "Text"]); | |
| //Populate spreadsheet with OCR text | |
| while (docs.hasNext()) { | |
| var file = docs.next(); | |
| var docId = file.getId(); | |
| var doc = DocumentApp.openById(docId); | |
| var name = doc.getName(); | |
| var body = doc.getBody().getText(); | |
| //Add item data to spreadsheet | |
| sheet.appendRow([name, body]); | |
| } | |
| }; |
That's great. Thank you.
Do you know if there is a way to extract vertical japanese text also? I have managed to extract japanese, but it is always detected as horizontal text.
Striking. Thanks!
This worked for me thank you! However, I am only wanting to extract certain text from the OCR into my sheet. The JPEG contains lots of information but I want it to selectively put only certain information into the google sheet... does anyone know how I would do this? Thank you
You can try regular expressions if the data has certain patterns.
hi can you help mw with my error "Exception: Cannot retrieve the next object: iterator has reached the end."
not working for me: returns "Exception: Cannot retrieve the next object: iterator has reached the end.
extractTextOnOpen " :(
Excellent, Thank you