Skip to content

Instantly share code, notes, and snippets.

@DamianEdwards
Created January 17, 2026 20:49
Show Gist options
  • Select an option

  • Save DamianEdwards/2c3d4f34421b6fc052dc39ab19ed78a2 to your computer and use it in GitHub Desktop.

Select an option

Save DamianEdwards/2c3d4f34421b6fc052dc39ab19ed78a2 to your computer and use it in GitHub Desktop.
Batch OCR PDFs using NAPS2 with auto-acquisition of portable NAPS2 CLI
# OCR-Pdfs.ps1
# Batch-OCR PDFs using NAPS2.Console.exe
[CmdletBinding()]
param(
[Parameter(Mandatory = $true, HelpMessage = "Path to a PDF file or folder containing PDFs to OCR.")]
[ValidateScript({ Test-Path $_ })]
[string]$InputPath,
[Parameter(Mandatory = $false, HelpMessage = "Path to the output folder. Defaults to InputPath\OCR (or parent folder\OCR for single file).")]
[string]$OutputPath
)
$ErrorActionPreference = "Stop"
#region NAPS2 Auto-Acquisition
function Get-Naps2DownloadInfo {
<#
.SYNOPSIS
Queries GitHub API for the latest NAPS2 release and returns download URL for the portable ZIP.
#>
[CmdletBinding()]
param()
$arch = if ([System.Environment]::Is64BitOperatingSystem) {
if ($env:PROCESSOR_ARCHITECTURE -eq "ARM64") { "arm64" } else { "x64" }
} else {
"x64" # Fallback, though 32-bit Windows is rare
}
$releaseApiUrl = "https://api.github.com/repos/cyanfish/naps2/releases/latest"
Write-Verbose "Fetching latest NAPS2 release info from GitHub..."
try {
$release = Invoke-RestMethod -Uri $releaseApiUrl -Headers @{ "User-Agent" = "NAPS2-AutoAcquire" } -ErrorAction Stop
}
catch {
throw "Failed to query GitHub for NAPS2 releases: $_"
}
# Find the portable ZIP asset matching our architecture (e.g., naps2-8.2.1-win-x64.zip)
$pattern = "naps2-.*-win-$arch\.zip$"
$asset = $release.assets | Where-Object { $_.name -match $pattern } | Select-Object -First 1
if (-not $asset) {
throw "Could not find portable NAPS2 ZIP for architecture '$arch' in release $($release.tag_name)"
}
return @{
Version = $release.tag_name
DownloadUrl = $asset.browser_download_url
FileName = $asset.name
}
}
function Install-Naps2Portable {
<#
.SYNOPSIS
Downloads and extracts the portable NAPS2 to a local directory.
.DESCRIPTION
Downloads the latest NAPS2 portable ZIP from GitHub releases, extracts it to
$env:LOCALAPPDATA\NAPS2-Portable, and installs the English OCR language pack.
.OUTPUTS
The full path to NAPS2.Console.exe
#>
[CmdletBinding()]
param(
[string]$InstallPath = (Join-Path $env:LOCALAPPDATA "NAPS2-Portable")
)
Write-Host "NAPS2 not found. Auto-acquiring portable version..." -ForegroundColor Cyan
# Get download info
$info = Get-Naps2DownloadInfo
Write-Verbose "Found NAPS2 $($info.Version): $($info.FileName)"
# Create install directory
if (-not (Test-Path $InstallPath)) {
New-Item -ItemType Directory -Path $InstallPath -Force | Out-Null
}
$zipPath = Join-Path $InstallPath $info.FileName
$versionFile = Join-Path $InstallPath ".version"
# Download the ZIP
Write-Host "Downloading $($info.FileName)..." -ForegroundColor Cyan
try {
$ProgressPreference = 'SilentlyContinue' # Speeds up Invoke-WebRequest significantly
Invoke-WebRequest -Uri $info.DownloadUrl -OutFile $zipPath -ErrorAction Stop
}
catch {
throw "Failed to download NAPS2: $_"
}
# Extract the ZIP (overwrite existing files)
Write-Host "Extracting to $InstallPath..." -ForegroundColor Cyan
try {
Expand-Archive -Path $zipPath -DestinationPath $InstallPath -Force -ErrorAction Stop
}
catch {
throw "Failed to extract NAPS2 archive: $_"
}
# Clean up the ZIP file
Remove-Item -Path $zipPath -Force -ErrorAction SilentlyContinue
# Write version file for future update checks
$info.Version | Out-File -FilePath $versionFile -Encoding UTF8 -Force
# Locate NAPS2.Console.exe
$consolePath = Join-Path $InstallPath "NAPS2.Console.exe"
if (-not (Test-Path $consolePath)) {
# Some versions may extract to a subfolder
$consolePath = Get-ChildItem -Path $InstallPath -Filter "NAPS2.Console.exe" -Recurse -ErrorAction SilentlyContinue | Select-Object -First 1 -ExpandProperty FullName
}
if (-not $consolePath -or -not (Test-Path $consolePath)) {
throw "NAPS2.Console.exe not found after extraction. Check $InstallPath"
}
# Install English OCR language pack
Write-Host "Installing English OCR language pack..." -ForegroundColor Cyan
try {
& $consolePath --install ocr-eng 2>&1 | Out-Null
}
catch {
Write-Warning "Failed to install OCR language pack. OCR may not work: $_"
}
Write-Host "NAPS2 $($info.Version) installed successfully." -ForegroundColor Green
return $consolePath
}
function Get-Naps2ConsolePath {
<#
.SYNOPSIS
Finds NAPS2.Console.exe, auto-acquiring if necessary.
#>
[CmdletBinding()]
param()
$portablePath = Join-Path $env:LOCALAPPDATA "NAPS2-Portable"
# Candidate paths in order of preference
$candidatePaths = @(
"C:\Program Files\NAPS2\NAPS2.Console.exe",
"C:\Program Files (x86)\NAPS2\NAPS2.Console.exe",
(Join-Path $portablePath "NAPS2.Console.exe")
)
$found = $candidatePaths | Where-Object { Test-Path $_ } | Select-Object -First 1
if ($found) {
Write-Verbose "Found NAPS2 at: $found"
return $found
}
# Not found - auto-acquire
return Install-Naps2Portable -InstallPath $portablePath
}
#endregion NAPS2 Auto-Acquisition
# Determine if input is a file or folder
$isFile = Test-Path $InputPath -PathType Leaf
if ($isFile) {
$InputRoot = Split-Path -Parent $InputPath
$OutputRoot = if ($OutputPath) { $OutputPath } else { Join-Path $InputRoot "OCR" }
} else {
$InputRoot = $InputPath
$OutputRoot = if ($OutputPath) { $OutputPath } else { Join-Path $InputRoot "OCR" }
}
# Find or acquire NAPS2
$Naps2 = Get-Naps2ConsolePath
# Create output root
New-Item -ItemType Directory -Path $OutputRoot -Force | Out-Null
$LogPath = Join-Path $OutputRoot ("ocr-log-{0}.txt" -f (Get-Date -Format "yyyyMMdd-HHmmss"))
"Started: $(Get-Date)" | Out-File -FilePath $LogPath -Encoding UTF8
# Find PDFs (excluding the OCR output folder itself)
if ($isFile) {
$pdfs = @(Get-Item -Path $InputPath)
} else {
$pdfs = Get-ChildItem -Path $InputRoot -Filter *.pdf -File -Recurse |
Where-Object { $_.FullName -notlike "$OutputRoot\*" }
}
if ($pdfs.Count -eq 0) {
"No PDFs found under '$InputRoot'." | Tee-Object -FilePath $LogPath -Append
exit 0
}
$processed = 0
$skipped = 0
$failed = 0
foreach ($pdf in $pdfs) {
# Compute relative path under input root
$relativePath = $pdf.FullName.Substring($InputRoot.Length).TrimStart('\')
# Output path mirrors input path under OCR folder
$outPath = Join-Path $OutputRoot $relativePath
$outDir = Split-Path -Parent $outPath
# Skip if already OCR'd output exists
if (Test-Path $outPath) {
$skipped++
"SKIP: $($pdf.FullName) -> (exists) $outPath" | Tee-Object -FilePath $LogPath -Append
continue
}
New-Item -ItemType Directory -Path $outDir -Force | Out-Null
try {
"OCR : $($pdf.FullName) -> $outPath" | Tee-Object -FilePath $LogPath -Append
# Run NAPS2 CLI (--noprofile prevents scanning, -n 0 sets scan count to 0)
& $Naps2 `
--noprofile `
-n 0 `
-i $pdf.FullName `
-o $outPath `
--ocrlang eng | Out-Null
$processed++
}
catch {
$failed++
"FAIL: $($pdf.FullName) -> $outPath`n$($_.Exception.Message)" | Tee-Object -FilePath $LogPath -Append
}
}
"Finished: $(Get-Date)" | Out-File -FilePath $LogPath -Append -Encoding UTF8
"Processed: $processed Skipped: $skipped Failed: $failed" | Tee-Object -FilePath $LogPath -Append
Write-Host "Done. Processed=$processed Skipped=$skipped Failed=$failed"
Write-Host "Log: $LogPath"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment