Skip to content

Instantly share code, notes, and snippets.

@antronic
Created October 21, 2025 11:13
Show Gist options
  • Select an option

  • Save antronic/88d07781b97d15f5113c3362c5bee895 to your computer and use it in GitHub Desktop.

Select an option

Save antronic/88d07781b97d15f5113c3362c5bee895 to your computer and use it in GitHub Desktop.
#!/usr/bin/env mongosh
/**
* MongoDB Cluster Document Count Comparison Script
*
* This script compares document counts in collections between two MongoDB clusters
* Features:
* - Database selection support
* - Multi-threaded/parallel processing
* - Comprehensive reporting
* - Error handling and logging
*
* Usage:
* mongosh --file mongo-cluster-compare.js
*/
// Configuration object - modify these values as needed
const config = {
// Source cluster connection
sourceCluster: {
uri: "mongodb://localhost:27017", // Replace with your source cluster URI
// uri: "mongodb://username:password@host1:27017,host2:27017/admin?replicaSet=rs0",
},
// Target cluster connection
targetCluster: {
uri: "mongodb://localhost:27018", // Replace with your target cluster URI
// uri: "mongodb://username:password@host3:27017,host4:27017/admin?replicaSet=rs1",
},
// Databases to compare (empty array means all databases)
databasesToCompare: [], // e.g., ["myapp", "analytics", "logs"]
// Databases to exclude from comparison
excludeDatabases: ["admin", "config", "local"],
// Number of concurrent threads/processes
maxConcurrency: 10,
// Timeout for operations (in milliseconds)
operationTimeout: 30000,
// Output options
outputFormat: "table", // "table", "json", "csv"
showOnlyDifferences: false,
// Logging level: "debug", "info", "warn", "error"
logLevel: "info"
};
// Global variables
let sourceConnection = null;
let targetConnection = null;
const results = [];
const errors = [];
// Utility functions
function log(level, message, data = null) {
const levels = { debug: 0, info: 1, warn: 2, error: 3 };
const configLevel = levels[config.logLevel] || 1;
if (levels[level] >= configLevel) {
const timestamp = new Date().toISOString();
const logMessage = `[${timestamp}] [${level.toUpperCase()}] ${message}`;
if (level === 'error') {
print(`\x1b[31m${logMessage}\x1b[0m`); // Red
} else if (level === 'warn') {
print(`\x1b[33m${logMessage}\x1b[0m`); // Yellow
} else if (level === 'info') {
print(`\x1b[32m${logMessage}\x1b[0m`); // Green
} else {
print(logMessage);
}
if (data) {
print(JSON.stringify(data, null, 2));
}
}
}
function formatNumber(num) {
return new Intl.NumberFormat().format(num);
}
// Connection management
function connectToCluster(uri, name) {
try {
log('info', `Connecting to ${name} cluster: ${uri.replace(/\/\/.*@/, '//***@')}`);
const conn = new Mongo(uri);
log('info', `Successfully connected to ${name} cluster`);
return conn;
} catch (error) {
log('error', `Failed to connect to ${name} cluster`, error);
throw error;
}
}
// Get list of databases to process
function getDatabaseList(connection) {
try {
const adminDb = connection.getDB('admin');
const dbList = adminDb.runCommand('listDatabases');
if (!dbList.ok) {
throw new Error('Failed to list databases');
}
let databases = dbList.databases.map(db => db.name);
// Filter databases based on configuration
if (config.databasesToCompare.length > 0) {
databases = databases.filter(db => config.databasesToCompare.includes(db));
}
databases = databases.filter(db => !config.excludeDatabases.includes(db));
log('info', `Found ${databases.length} databases to process`, databases);
return databases;
} catch (error) {
log('error', 'Failed to get database list', error);
throw error;
}
}
// Get collections in a database
function getCollectionList(connection, dbName) {
try {
const db = connection.getDB(dbName);
const collections = db.getCollectionNames();
log('debug', `Database ${dbName} has ${collections.length} collections`);
return collections;
} catch (error) {
log('error', `Failed to get collections for database ${dbName}`, error);
return [];
}
}
// Get document count for a collection
function getDocumentCount(connection, dbName, collectionName) {
try {
const db = connection.getDB(dbName);
const collection = db.getCollection(collectionName);
// Use estimatedDocumentCount for better performance on large collections
// Fall back to countDocuments if estimated count is not available
let count;
try {
count = collection.estimatedDocumentCount();
} catch (e) {
log('debug', `Estimated count failed for ${dbName}.${collectionName}, using exact count`);
count = collection.countDocuments({});
}
log('debug', `${dbName}.${collectionName}: ${formatNumber(count)} documents`);
return count;
} catch (error) {
log('error', `Failed to count documents in ${dbName}.${collectionName}`, error);
return -1; // Indicate error
}
}
// Process a single collection comparison
function compareCollection(dbName, collectionName) {
return new Promise((resolve) => {
try {
const sourceCount = getDocumentCount(sourceConnection, dbName, collectionName);
const targetCount = getDocumentCount(targetConnection, dbName, collectionName);
const result = {
database: dbName,
collection: collectionName,
sourceCount: sourceCount,
targetCount: targetCount,
difference: sourceCount >= 0 && targetCount >= 0 ? sourceCount - targetCount : null,
status: sourceCount === targetCount ? 'MATCH' :
sourceCount < 0 || targetCount < 0 ? 'ERROR' : 'DIFF'
};
resolve(result);
} catch (error) {
log('error', `Error comparing collection ${dbName}.${collectionName}`, error);
resolve({
database: dbName,
collection: collectionName,
sourceCount: -1,
targetCount: -1,
difference: null,
status: 'ERROR',
error: error.message
});
}
});
}
// Process collections with concurrency control
async function processCollectionsConcurrently(tasks) {
const results = [];
// Process tasks in batches based on maxConcurrency
for (let i = 0; i < tasks.length; i += config.maxConcurrency) {
const batch = tasks.slice(i, i + config.maxConcurrency);
log('info', `Processing batch ${Math.floor(i / config.maxConcurrency) + 1}/${Math.ceil(tasks.length / config.maxConcurrency)} (${batch.length} collections)`);
const batchPromises = batch.map(task => compareCollection(task.database, task.collection));
const batchResults = await Promise.all(batchPromises);
results.push(...batchResults);
// Small delay to prevent overwhelming the database
if (i + config.maxConcurrency < tasks.length) {
await new Promise(resolve => setTimeout(resolve, 100));
}
}
return results;
}
// Output formatting functions
function outputAsTable(results) {
print('\n' + '='.repeat(120));
print('MONGODB CLUSTER COMPARISON RESULTS');
print('='.repeat(120));
// Header
const header = sprintf('%-20s %-30s %15s %15s %15s %-8s',
'Database', 'Collection', 'Source Count', 'Target Count', 'Difference', 'Status');
print(header);
print('-'.repeat(120));
// Data rows
for (const result of results) {
if (config.showOnlyDifferences && result.status === 'MATCH') {
continue;
}
const sourceStr = result.sourceCount >= 0 ? formatNumber(result.sourceCount) : 'ERROR';
const targetStr = result.targetCount >= 0 ? formatNumber(result.targetCount) : 'ERROR';
const diffStr = result.difference !== null ?
(result.difference > 0 ? '+' + formatNumber(result.difference) : formatNumber(result.difference)) : 'N/A';
const row = sprintf('%-20s %-30s %15s %15s %15s %-8s',
result.database,
result.collection.length > 30 ? result.collection.substring(0, 27) + '...' : result.collection,
sourceStr,
targetStr,
diffStr,
result.status
);
// Color coding for status
if (result.status === 'MATCH') {
print(`\x1b[32m${row}\x1b[0m`); // Green
} else if (result.status === 'DIFF') {
print(`\x1b[33m${row}\x1b[0m`); // Yellow
} else if (result.status === 'ERROR') {
print(`\x1b[31m${row}\x1b[0m`); // Red
} else {
print(row);
}
}
print('-'.repeat(120));
}
function outputAsJSON(results) {
print(JSON.stringify({
metadata: {
comparisonDate: new Date().toISOString(),
sourceCluster: config.sourceCluster.uri.replace(/\/\/.*@/, '//***@'),
targetCluster: config.targetCluster.uri.replace(/\/\/.*@/, '//***@'),
totalCollections: results.length
},
results: results
}, null, 2));
}
function outputAsCSV(results) {
print('Database,Collection,SourceCount,TargetCount,Difference,Status,Error');
for (const result of results) {
const csvRow = [
result.database,
result.collection,
result.sourceCount,
result.targetCount,
result.difference || '',
result.status,
result.error || ''
].map(field => `"${field}"`).join(',');
print(csvRow);
}
}
// Summary statistics
function printSummary(results) {
const summary = {
total: results.length,
matches: results.filter(r => r.status === 'MATCH').length,
differences: results.filter(r => r.status === 'DIFF').length,
errors: results.filter(r => r.status === 'ERROR').length,
totalSourceDocs: results.reduce((sum, r) => sum + (r.sourceCount >= 0 ? r.sourceCount : 0), 0),
totalTargetDocs: results.reduce((sum, r) => sum + (r.targetCount >= 0 ? r.targetCount : 0), 0)
};
print('\n' + '='.repeat(60));
print('COMPARISON SUMMARY');
print('='.repeat(60));
print(`Total Collections: ${formatNumber(summary.total)}`);
print(`Matches: ${formatNumber(summary.matches)} (${(summary.matches/summary.total*100).toFixed(1)}%)`);
print(`Differences: ${formatNumber(summary.differences)} (${(summary.differences/summary.total*100).toFixed(1)}%)`);
print(`Errors: ${formatNumber(summary.errors)} (${(summary.errors/summary.total*100).toFixed(1)}%)`);
print(`Total Source Docs: ${formatNumber(summary.totalSourceDocs)}`);
print(`Total Target Docs: ${formatNumber(summary.totalTargetDocs)}`);
print(`Total Difference: ${formatNumber(summary.totalSourceDocs - summary.totalTargetDocs)}`);
print('='.repeat(60));
}
// Main execution function
async function main() {
const startTime = new Date();
log('info', 'Starting MongoDB cluster comparison');
try {
// Establish connections
sourceConnection = connectToCluster(config.sourceCluster.uri, 'source');
targetConnection = connectToCluster(config.targetCluster.uri, 'target');
// Get list of databases from source cluster
const databases = getDatabaseList(sourceConnection);
if (databases.length === 0) {
log('warn', 'No databases found to compare');
return;
}
// Collect all collections to compare
const tasks = [];
for (const dbName of databases) {
const collections = getCollectionList(sourceConnection, dbName);
for (const collectionName of collections) {
tasks.push({ database: dbName, collection: collectionName });
}
}
log('info', `Found ${tasks.length} collections across ${databases.length} databases`);
if (tasks.length === 0) {
log('warn', 'No collections found to compare');
return;
}
// Process comparisons with concurrency
log('info', `Starting comparison with max concurrency: ${config.maxConcurrency}`);
const comparisonResults = await processCollectionsConcurrently(tasks);
// Sort results for better readability
comparisonResults.sort((a, b) => {
if (a.database !== b.database) return a.database.localeCompare(b.database);
return a.collection.localeCompare(b.collection);
});
// Output results
switch (config.outputFormat.toLowerCase()) {
case 'json':
outputAsJSON(comparisonResults);
break;
case 'csv':
outputAsCSV(comparisonResults);
break;
default:
outputAsTable(comparisonResults);
break;
}
// Print summary
if (config.outputFormat.toLowerCase() === 'table') {
printSummary(comparisonResults);
}
const endTime = new Date();
const duration = (endTime - startTime) / 1000;
log('info', `Comparison completed in ${duration.toFixed(2)} seconds`);
// Exit with appropriate code
const hasErrors = comparisonResults.some(r => r.status === 'ERROR');
const hasDifferences = comparisonResults.some(r => r.status === 'DIFF');
if (hasErrors) {
log('warn', 'Comparison completed with errors');
} else if (hasDifferences) {
log('info', 'Comparison completed - differences found');
} else {
log('info', 'Comparison completed - all collections match');
}
} catch (error) {
log('error', 'Fatal error during comparison', error);
throw error;
} finally {
// Clean up connections
if (sourceConnection) {
try { sourceConnection.close(); } catch (e) {}
}
if (targetConnection) {
try { targetConnection.close(); } catch (e) {}
}
}
}
// Helper function for string formatting (simple sprintf implementation)
function sprintf(format, ...args) {
let i = 0;
return format.replace(/%[-+0 ]?(\*|\d+)?([sd])/g, (match, width, type) => {
if (i >= args.length) return match;
let arg = args[i++];
if (type === 'd') arg = parseInt(arg) || 0;
arg = String(arg);
if (width && width !== '*') {
const w = parseInt(width);
if (w > 0) {
while (arg.length < w) arg = ' ' + arg;
}
}
return arg;
});
}
// Promise polyfill for older mongosh versions
if (typeof Promise === 'undefined') {
log('error', 'This script requires Promise support. Please use a newer version of mongosh.');
quit(1);
}
// Run the main function
main().catch(error => {
log('error', 'Script execution failed', error);
quit(1);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment