Last active
August 29, 2015 13:55
-
-
Save normgraham/8717228 to your computer and use it in GitHub Desktop.
Scans a copy of the system.profile collection named sp and for each document that has a query field inserts a queryHash field that's a hash of the field names and orderby components of the query. Essentially creates a signature for the query. Includes a sample aggregation pipeline for analysis.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Assuming that sp the name of collection that's a copy of the system.profile collection, | |
| // the following code tags each document containing a query field with a queryHash value | |
| // based on the shape of the query and orderby (or sort). There's also a sample aggregation | |
| // pipeline that uses the new queryHash field. E.g. tagQueries("sp") or aggQueries("sp") | |
| var tagQueries = function (sp) { | |
| var queryToHash = function (doc) { | |
| var props = new Array(); | |
| // Recursively walk the query or sort document pushing fieldnames onto the props array. | |
| var pushProps = function(doc, path) { | |
| for (var propName in doc) { | |
| var prop = doc[propName]; | |
| if (typeof prop === 'object' && prop !== null && | |
| !( prop instanceof Date || prop instanceof Timestamp || prop instanceof ObjectId || | |
| prop instanceof NumberInt || prop instanceof NumberLong ) ) | |
| pushProps(prop, path + propName + ".."); | |
| else | |
| props.push(path + propName); | |
| } | |
| } | |
| // Since fieldnames in system.profile documents may contain special | |
| // characters like "." and "$", we have to avoid creating new objects | |
| // with those fieldnames. Reading the fieldnames is no problem, so we | |
| // just cherry pick the fieldnames we're interested in. | |
| // A query in a normal CRUD operation. Two flavor: with and without sorts. | |
| if ("query" in doc) { | |
| if ("query" in doc.query && "orderby" in doc.query) { | |
| pushProps(doc.query.query, "Q.."); | |
| pushProps(doc.query.orderby, "S.."); | |
| } | |
| else | |
| pushProps(doc.query, "Q.."); | |
| } | |
| // A query in a command. Two flavors: non-aggregation and aggregation commands. | |
| else if ("command" in doc) { | |
| if ("query" in doc.command) { | |
| pushProps(doc.command.query, "Q.."); | |
| if ("sort" in doc.command) | |
| pushProps(doc.command.sort, "S.."); | |
| } | |
| else if ("pipeline" in doc.command) { | |
| // OK, this is very hacky, but we're basically looking for the 1st | |
| // $match and $sort that occurs before any of $project, $unwind, or $group | |
| var pipeline = doc.command.pipeline; | |
| var match = -1; | |
| var sort = -1; | |
| for (var i = pipeline.length-1; i >= 0; i--) { | |
| if ("$match" in pipeline[i]) | |
| match = i; | |
| else if ("$sort" in pipeline[i]) | |
| sort = i; | |
| else if ("$project" in pipeline[i] || "$unwind" in pipeline[i] || "$group" in pipeline[i]) { | |
| match = -1; | |
| sort = -1; | |
| } | |
| } | |
| if (match != -1) | |
| pushProps(doc.command.pipeline[match]["$match"], "Q.."); | |
| if (sort != -1) | |
| pushProps(doc.command.pipeline[sort]["$sort"], "S..") | |
| } | |
| } | |
| var hashChar = function(a,c) { | |
| a = ((a<<5)-a)+c.charCodeAt(0); | |
| return a & a; | |
| } | |
| return props.sort().join(';').split("").reduce(hashChar, 0); | |
| } | |
| var topLevelQuery = {"query" : {$exists : true}}; | |
| var commandLevelQuery = {"command.query" : {$exists : 1}}; | |
| var pipelineLevelQuery = {"command.pipeline" : {$exists : 1}}; | |
| var cur = db[sp].find({$or : [topLevelQuery, commandLevelQuery, pipelineLevelQuery]}); | |
| cur.forEach(function(doc) { db[sp].update({_id : doc._id}, {$set : {queryHash : queryToHash(doc)}}) }); | |
| } | |
| var aggQueries = function (sp) { | |
| db[sp].aggregate( | |
| { $match : { queryHash : {$exists : 1} } }, | |
| { $group : { | |
| _id : {queryHash : "$queryHash", ns : "$ns"}, | |
| "queryHash" : {$first:"$queryHash"}, | |
| "sample query" : {$first:"$query"}, | |
| "sample command" : {$first:"$command"}, | |
| "ns" : {$first:"$ns"}, | |
| "count" : {$sum : 1}, | |
| "max millis" : {$max:"$millis"}, | |
| "avg millis" : {$avg:"$millis"}, | |
| "max numYield" : {$max:"$numYield"}, | |
| "avg numYield" : {$avg:"$numYield"}, | |
| "max nscanned" : {$max:"$nscanned"}, | |
| "avg nscanned" : {$avg:"$nscanned"}, | |
| "max nreturned" : {$max:"$nreturned"}, | |
| "avg nreturned" : {$avg:"$nreturned"} } }, | |
| { $sort : {count:-1} }, | |
| { $project : { | |
| "_id" : 0, | |
| "queryHash" : 1, | |
| "sample query" : {$ifNull : ["$sample query", "$sample command"]}, | |
| "ns" : 1, | |
| "count" : 1, | |
| "max millis" : 1, | |
| "avg millis" : 1, | |
| "max numYield" : 1, | |
| "avg numYield" : 1, | |
| "max nscanned" : 1, | |
| "avg nscanned" : 1, | |
| "max nreturned" : 1, | |
| "avg nreturned" : 1 } } | |
| ).result.forEach(printjson); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment