Skip to content

Instantly share code, notes, and snippets.

@normgraham
Last active August 29, 2015 13:55
Show Gist options
  • Select an option

  • Save normgraham/8717228 to your computer and use it in GitHub Desktop.

Select an option

Save normgraham/8717228 to your computer and use it in GitHub Desktop.
Scans a copy of the system.profile collection named sp and for each document that has a query field inserts a queryHash field that's a hash of the field names and orderby components of the query. Essentially creates a signature for the query. Includes a sample aggregation pipeline for analysis.
// Assuming that sp the name of collection that's a copy of the system.profile collection,
// the following code tags each document containing a query field with a queryHash value
// based on the shape of the query and orderby (or sort). There's also a sample aggregation
// pipeline that uses the new queryHash field. E.g. tagQueries("sp") or aggQueries("sp")
var tagQueries = function (sp) {
var queryToHash = function (doc) {
var props = new Array();
// Recursively walk the query or sort document pushing fieldnames onto the props array.
var pushProps = function(doc, path) {
for (var propName in doc) {
var prop = doc[propName];
if (typeof prop === 'object' && prop !== null &&
!( prop instanceof Date || prop instanceof Timestamp || prop instanceof ObjectId ||
prop instanceof NumberInt || prop instanceof NumberLong ) )
pushProps(prop, path + propName + "..");
else
props.push(path + propName);
}
}
// Since fieldnames in system.profile documents may contain special
// characters like "." and "$", we have to avoid creating new objects
// with those fieldnames. Reading the fieldnames is no problem, so we
// just cherry pick the fieldnames we're interested in.
// A query in a normal CRUD operation. Two flavor: with and without sorts.
if ("query" in doc) {
if ("query" in doc.query && "orderby" in doc.query) {
pushProps(doc.query.query, "Q..");
pushProps(doc.query.orderby, "S..");
}
else
pushProps(doc.query, "Q..");
}
// A query in a command. Two flavors: non-aggregation and aggregation commands.
else if ("command" in doc) {
if ("query" in doc.command) {
pushProps(doc.command.query, "Q..");
if ("sort" in doc.command)
pushProps(doc.command.sort, "S..");
}
else if ("pipeline" in doc.command) {
// OK, this is very hacky, but we're basically looking for the 1st
// $match and $sort that occurs before any of $project, $unwind, or $group
var pipeline = doc.command.pipeline;
var match = -1;
var sort = -1;
for (var i = pipeline.length-1; i >= 0; i--) {
if ("$match" in pipeline[i])
match = i;
else if ("$sort" in pipeline[i])
sort = i;
else if ("$project" in pipeline[i] || "$unwind" in pipeline[i] || "$group" in pipeline[i]) {
match = -1;
sort = -1;
}
}
if (match != -1)
pushProps(doc.command.pipeline[match]["$match"], "Q..");
if (sort != -1)
pushProps(doc.command.pipeline[sort]["$sort"], "S..")
}
}
var hashChar = function(a,c) {
a = ((a<<5)-a)+c.charCodeAt(0);
return a & a;
}
return props.sort().join(';').split("").reduce(hashChar, 0);
}
var topLevelQuery = {"query" : {$exists : true}};
var commandLevelQuery = {"command.query" : {$exists : 1}};
var pipelineLevelQuery = {"command.pipeline" : {$exists : 1}};
var cur = db[sp].find({$or : [topLevelQuery, commandLevelQuery, pipelineLevelQuery]});
cur.forEach(function(doc) { db[sp].update({_id : doc._id}, {$set : {queryHash : queryToHash(doc)}}) });
}
var aggQueries = function (sp) {
db[sp].aggregate(
{ $match : { queryHash : {$exists : 1} } },
{ $group : {
_id : {queryHash : "$queryHash", ns : "$ns"},
"queryHash" : {$first:"$queryHash"},
"sample query" : {$first:"$query"},
"sample command" : {$first:"$command"},
"ns" : {$first:"$ns"},
"count" : {$sum : 1},
"max millis" : {$max:"$millis"},
"avg millis" : {$avg:"$millis"},
"max numYield" : {$max:"$numYield"},
"avg numYield" : {$avg:"$numYield"},
"max nscanned" : {$max:"$nscanned"},
"avg nscanned" : {$avg:"$nscanned"},
"max nreturned" : {$max:"$nreturned"},
"avg nreturned" : {$avg:"$nreturned"} } },
{ $sort : {count:-1} },
{ $project : {
"_id" : 0,
"queryHash" : 1,
"sample query" : {$ifNull : ["$sample query", "$sample command"]},
"ns" : 1,
"count" : 1,
"max millis" : 1,
"avg millis" : 1,
"max numYield" : 1,
"avg numYield" : 1,
"max nscanned" : 1,
"avg nscanned" : 1,
"max nreturned" : 1,
"avg nreturned" : 1 } }
).result.forEach(printjson);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment