Created
October 21, 2025 11:13
-
-
Save antronic/88d07781b97d15f5113c3362c5bee895 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env mongosh | |
| /** | |
| * MongoDB Cluster Document Count Comparison Script | |
| * | |
| * This script compares document counts in collections between two MongoDB clusters | |
| * Features: | |
| * - Database selection support | |
| * - Multi-threaded/parallel processing | |
| * - Comprehensive reporting | |
| * - Error handling and logging | |
| * | |
| * Usage: | |
| * mongosh --file mongo-cluster-compare.js | |
| */ | |
| // Configuration object - modify these values as needed | |
| const config = { | |
| // Source cluster connection | |
| sourceCluster: { | |
| uri: "mongodb://localhost:27017", // Replace with your source cluster URI | |
| // uri: "mongodb://username:password@host1:27017,host2:27017/admin?replicaSet=rs0", | |
| }, | |
| // Target cluster connection | |
| targetCluster: { | |
| uri: "mongodb://localhost:27018", // Replace with your target cluster URI | |
| // uri: "mongodb://username:password@host3:27017,host4:27017/admin?replicaSet=rs1", | |
| }, | |
| // Databases to compare (empty array means all databases) | |
| databasesToCompare: [], // e.g., ["myapp", "analytics", "logs"] | |
| // Databases to exclude from comparison | |
| excludeDatabases: ["admin", "config", "local"], | |
| // Number of concurrent threads/processes | |
| maxConcurrency: 10, | |
| // Timeout for operations (in milliseconds) | |
| operationTimeout: 30000, | |
| // Output options | |
| outputFormat: "table", // "table", "json", "csv" | |
| showOnlyDifferences: false, | |
| // Logging level: "debug", "info", "warn", "error" | |
| logLevel: "info" | |
| }; | |
| // Global variables | |
| let sourceConnection = null; | |
| let targetConnection = null; | |
| const results = []; | |
| const errors = []; | |
| // Utility functions | |
| function log(level, message, data = null) { | |
| const levels = { debug: 0, info: 1, warn: 2, error: 3 }; | |
| const configLevel = levels[config.logLevel] || 1; | |
| if (levels[level] >= configLevel) { | |
| const timestamp = new Date().toISOString(); | |
| const logMessage = `[${timestamp}] [${level.toUpperCase()}] ${message}`; | |
| if (level === 'error') { | |
| print(`\x1b[31m${logMessage}\x1b[0m`); // Red | |
| } else if (level === 'warn') { | |
| print(`\x1b[33m${logMessage}\x1b[0m`); // Yellow | |
| } else if (level === 'info') { | |
| print(`\x1b[32m${logMessage}\x1b[0m`); // Green | |
| } else { | |
| print(logMessage); | |
| } | |
| if (data) { | |
| print(JSON.stringify(data, null, 2)); | |
| } | |
| } | |
| } | |
| function formatNumber(num) { | |
| return new Intl.NumberFormat().format(num); | |
| } | |
| // Connection management | |
| function connectToCluster(uri, name) { | |
| try { | |
| log('info', `Connecting to ${name} cluster: ${uri.replace(/\/\/.*@/, '//***@')}`); | |
| const conn = new Mongo(uri); | |
| log('info', `Successfully connected to ${name} cluster`); | |
| return conn; | |
| } catch (error) { | |
| log('error', `Failed to connect to ${name} cluster`, error); | |
| throw error; | |
| } | |
| } | |
| // Get list of databases to process | |
| function getDatabaseList(connection) { | |
| try { | |
| const adminDb = connection.getDB('admin'); | |
| const dbList = adminDb.runCommand('listDatabases'); | |
| if (!dbList.ok) { | |
| throw new Error('Failed to list databases'); | |
| } | |
| let databases = dbList.databases.map(db => db.name); | |
| // Filter databases based on configuration | |
| if (config.databasesToCompare.length > 0) { | |
| databases = databases.filter(db => config.databasesToCompare.includes(db)); | |
| } | |
| databases = databases.filter(db => !config.excludeDatabases.includes(db)); | |
| log('info', `Found ${databases.length} databases to process`, databases); | |
| return databases; | |
| } catch (error) { | |
| log('error', 'Failed to get database list', error); | |
| throw error; | |
| } | |
| } | |
| // Get collections in a database | |
| function getCollectionList(connection, dbName) { | |
| try { | |
| const db = connection.getDB(dbName); | |
| const collections = db.getCollectionNames(); | |
| log('debug', `Database ${dbName} has ${collections.length} collections`); | |
| return collections; | |
| } catch (error) { | |
| log('error', `Failed to get collections for database ${dbName}`, error); | |
| return []; | |
| } | |
| } | |
| // Get document count for a collection | |
| function getDocumentCount(connection, dbName, collectionName) { | |
| try { | |
| const db = connection.getDB(dbName); | |
| const collection = db.getCollection(collectionName); | |
| // Use estimatedDocumentCount for better performance on large collections | |
| // Fall back to countDocuments if estimated count is not available | |
| let count; | |
| try { | |
| count = collection.estimatedDocumentCount(); | |
| } catch (e) { | |
| log('debug', `Estimated count failed for ${dbName}.${collectionName}, using exact count`); | |
| count = collection.countDocuments({}); | |
| } | |
| log('debug', `${dbName}.${collectionName}: ${formatNumber(count)} documents`); | |
| return count; | |
| } catch (error) { | |
| log('error', `Failed to count documents in ${dbName}.${collectionName}`, error); | |
| return -1; // Indicate error | |
| } | |
| } | |
| // Process a single collection comparison | |
| function compareCollection(dbName, collectionName) { | |
| return new Promise((resolve) => { | |
| try { | |
| const sourceCount = getDocumentCount(sourceConnection, dbName, collectionName); | |
| const targetCount = getDocumentCount(targetConnection, dbName, collectionName); | |
| const result = { | |
| database: dbName, | |
| collection: collectionName, | |
| sourceCount: sourceCount, | |
| targetCount: targetCount, | |
| difference: sourceCount >= 0 && targetCount >= 0 ? sourceCount - targetCount : null, | |
| status: sourceCount === targetCount ? 'MATCH' : | |
| sourceCount < 0 || targetCount < 0 ? 'ERROR' : 'DIFF' | |
| }; | |
| resolve(result); | |
| } catch (error) { | |
| log('error', `Error comparing collection ${dbName}.${collectionName}`, error); | |
| resolve({ | |
| database: dbName, | |
| collection: collectionName, | |
| sourceCount: -1, | |
| targetCount: -1, | |
| difference: null, | |
| status: 'ERROR', | |
| error: error.message | |
| }); | |
| } | |
| }); | |
| } | |
| // Process collections with concurrency control | |
| async function processCollectionsConcurrently(tasks) { | |
| const results = []; | |
| // Process tasks in batches based on maxConcurrency | |
| for (let i = 0; i < tasks.length; i += config.maxConcurrency) { | |
| const batch = tasks.slice(i, i + config.maxConcurrency); | |
| log('info', `Processing batch ${Math.floor(i / config.maxConcurrency) + 1}/${Math.ceil(tasks.length / config.maxConcurrency)} (${batch.length} collections)`); | |
| const batchPromises = batch.map(task => compareCollection(task.database, task.collection)); | |
| const batchResults = await Promise.all(batchPromises); | |
| results.push(...batchResults); | |
| // Small delay to prevent overwhelming the database | |
| if (i + config.maxConcurrency < tasks.length) { | |
| await new Promise(resolve => setTimeout(resolve, 100)); | |
| } | |
| } | |
| return results; | |
| } | |
| // Output formatting functions | |
| function outputAsTable(results) { | |
| print('\n' + '='.repeat(120)); | |
| print('MONGODB CLUSTER COMPARISON RESULTS'); | |
| print('='.repeat(120)); | |
| // Header | |
| const header = sprintf('%-20s %-30s %15s %15s %15s %-8s', | |
| 'Database', 'Collection', 'Source Count', 'Target Count', 'Difference', 'Status'); | |
| print(header); | |
| print('-'.repeat(120)); | |
| // Data rows | |
| for (const result of results) { | |
| if (config.showOnlyDifferences && result.status === 'MATCH') { | |
| continue; | |
| } | |
| const sourceStr = result.sourceCount >= 0 ? formatNumber(result.sourceCount) : 'ERROR'; | |
| const targetStr = result.targetCount >= 0 ? formatNumber(result.targetCount) : 'ERROR'; | |
| const diffStr = result.difference !== null ? | |
| (result.difference > 0 ? '+' + formatNumber(result.difference) : formatNumber(result.difference)) : 'N/A'; | |
| const row = sprintf('%-20s %-30s %15s %15s %15s %-8s', | |
| result.database, | |
| result.collection.length > 30 ? result.collection.substring(0, 27) + '...' : result.collection, | |
| sourceStr, | |
| targetStr, | |
| diffStr, | |
| result.status | |
| ); | |
| // Color coding for status | |
| if (result.status === 'MATCH') { | |
| print(`\x1b[32m${row}\x1b[0m`); // Green | |
| } else if (result.status === 'DIFF') { | |
| print(`\x1b[33m${row}\x1b[0m`); // Yellow | |
| } else if (result.status === 'ERROR') { | |
| print(`\x1b[31m${row}\x1b[0m`); // Red | |
| } else { | |
| print(row); | |
| } | |
| } | |
| print('-'.repeat(120)); | |
| } | |
| function outputAsJSON(results) { | |
| print(JSON.stringify({ | |
| metadata: { | |
| comparisonDate: new Date().toISOString(), | |
| sourceCluster: config.sourceCluster.uri.replace(/\/\/.*@/, '//***@'), | |
| targetCluster: config.targetCluster.uri.replace(/\/\/.*@/, '//***@'), | |
| totalCollections: results.length | |
| }, | |
| results: results | |
| }, null, 2)); | |
| } | |
| function outputAsCSV(results) { | |
| print('Database,Collection,SourceCount,TargetCount,Difference,Status,Error'); | |
| for (const result of results) { | |
| const csvRow = [ | |
| result.database, | |
| result.collection, | |
| result.sourceCount, | |
| result.targetCount, | |
| result.difference || '', | |
| result.status, | |
| result.error || '' | |
| ].map(field => `"${field}"`).join(','); | |
| print(csvRow); | |
| } | |
| } | |
| // Summary statistics | |
| function printSummary(results) { | |
| const summary = { | |
| total: results.length, | |
| matches: results.filter(r => r.status === 'MATCH').length, | |
| differences: results.filter(r => r.status === 'DIFF').length, | |
| errors: results.filter(r => r.status === 'ERROR').length, | |
| totalSourceDocs: results.reduce((sum, r) => sum + (r.sourceCount >= 0 ? r.sourceCount : 0), 0), | |
| totalTargetDocs: results.reduce((sum, r) => sum + (r.targetCount >= 0 ? r.targetCount : 0), 0) | |
| }; | |
| print('\n' + '='.repeat(60)); | |
| print('COMPARISON SUMMARY'); | |
| print('='.repeat(60)); | |
| print(`Total Collections: ${formatNumber(summary.total)}`); | |
| print(`Matches: ${formatNumber(summary.matches)} (${(summary.matches/summary.total*100).toFixed(1)}%)`); | |
| print(`Differences: ${formatNumber(summary.differences)} (${(summary.differences/summary.total*100).toFixed(1)}%)`); | |
| print(`Errors: ${formatNumber(summary.errors)} (${(summary.errors/summary.total*100).toFixed(1)}%)`); | |
| print(`Total Source Docs: ${formatNumber(summary.totalSourceDocs)}`); | |
| print(`Total Target Docs: ${formatNumber(summary.totalTargetDocs)}`); | |
| print(`Total Difference: ${formatNumber(summary.totalSourceDocs - summary.totalTargetDocs)}`); | |
| print('='.repeat(60)); | |
| } | |
| // Main execution function | |
| async function main() { | |
| const startTime = new Date(); | |
| log('info', 'Starting MongoDB cluster comparison'); | |
| try { | |
| // Establish connections | |
| sourceConnection = connectToCluster(config.sourceCluster.uri, 'source'); | |
| targetConnection = connectToCluster(config.targetCluster.uri, 'target'); | |
| // Get list of databases from source cluster | |
| const databases = getDatabaseList(sourceConnection); | |
| if (databases.length === 0) { | |
| log('warn', 'No databases found to compare'); | |
| return; | |
| } | |
| // Collect all collections to compare | |
| const tasks = []; | |
| for (const dbName of databases) { | |
| const collections = getCollectionList(sourceConnection, dbName); | |
| for (const collectionName of collections) { | |
| tasks.push({ database: dbName, collection: collectionName }); | |
| } | |
| } | |
| log('info', `Found ${tasks.length} collections across ${databases.length} databases`); | |
| if (tasks.length === 0) { | |
| log('warn', 'No collections found to compare'); | |
| return; | |
| } | |
| // Process comparisons with concurrency | |
| log('info', `Starting comparison with max concurrency: ${config.maxConcurrency}`); | |
| const comparisonResults = await processCollectionsConcurrently(tasks); | |
| // Sort results for better readability | |
| comparisonResults.sort((a, b) => { | |
| if (a.database !== b.database) return a.database.localeCompare(b.database); | |
| return a.collection.localeCompare(b.collection); | |
| }); | |
| // Output results | |
| switch (config.outputFormat.toLowerCase()) { | |
| case 'json': | |
| outputAsJSON(comparisonResults); | |
| break; | |
| case 'csv': | |
| outputAsCSV(comparisonResults); | |
| break; | |
| default: | |
| outputAsTable(comparisonResults); | |
| break; | |
| } | |
| // Print summary | |
| if (config.outputFormat.toLowerCase() === 'table') { | |
| printSummary(comparisonResults); | |
| } | |
| const endTime = new Date(); | |
| const duration = (endTime - startTime) / 1000; | |
| log('info', `Comparison completed in ${duration.toFixed(2)} seconds`); | |
| // Exit with appropriate code | |
| const hasErrors = comparisonResults.some(r => r.status === 'ERROR'); | |
| const hasDifferences = comparisonResults.some(r => r.status === 'DIFF'); | |
| if (hasErrors) { | |
| log('warn', 'Comparison completed with errors'); | |
| } else if (hasDifferences) { | |
| log('info', 'Comparison completed - differences found'); | |
| } else { | |
| log('info', 'Comparison completed - all collections match'); | |
| } | |
| } catch (error) { | |
| log('error', 'Fatal error during comparison', error); | |
| throw error; | |
| } finally { | |
| // Clean up connections | |
| if (sourceConnection) { | |
| try { sourceConnection.close(); } catch (e) {} | |
| } | |
| if (targetConnection) { | |
| try { targetConnection.close(); } catch (e) {} | |
| } | |
| } | |
| } | |
| // Helper function for string formatting (simple sprintf implementation) | |
| function sprintf(format, ...args) { | |
| let i = 0; | |
| return format.replace(/%[-+0 ]?(\*|\d+)?([sd])/g, (match, width, type) => { | |
| if (i >= args.length) return match; | |
| let arg = args[i++]; | |
| if (type === 'd') arg = parseInt(arg) || 0; | |
| arg = String(arg); | |
| if (width && width !== '*') { | |
| const w = parseInt(width); | |
| if (w > 0) { | |
| while (arg.length < w) arg = ' ' + arg; | |
| } | |
| } | |
| return arg; | |
| }); | |
| } | |
| // Promise polyfill for older mongosh versions | |
| if (typeof Promise === 'undefined') { | |
| log('error', 'This script requires Promise support. Please use a newer version of mongosh.'); | |
| quit(1); | |
| } | |
| // Run the main function | |
| main().catch(error => { | |
| log('error', 'Script execution failed', error); | |
| quit(1); | |
| }); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment