Skip to content

Instantly share code, notes, and snippets.

@LouisJenkinsCS
Forked from novoselrok/kmeans-dist.chpl
Last active January 21, 2019 04:27
Show Gist options
  • Select an option

  • Save LouisJenkinsCS/e0f525f212d475ca2e6a5af79999e840 to your computer and use it in GitHub Desktop.

Select an option

Save LouisJenkinsCS/e0f525f212d475ca2e6a5af79999e840 to your computer and use it in GitHub Desktop.
use Random;
use Time;
use BlockDist;
// use CommDiagnostics;
// See: https://chapel-lang.org/docs/master/modules/standard/CommDiagnostics.html
/* Config variables */
config const filename = "../data/test_10_10_2.chpl.txt";
config const outFilename = "out.txt";
config const k = 2;
config const maxIter = 10;
config const numPoints = 10;
/* Datatypes */
type Point = 10 * real;
// Note: This is a bit of information that may help influence the decision to use a 'record' or a 'class'
// A 'class' is equivalent to a heap-allocated object that is globally visible to all compute-nodes; essentially
// your run-of-the-mill pointer to a 'struct'. A 'record' on the other-hand is a stack-allocated object that exists
// on the compute-node it was allocated on. Hence if you initialize a local variable to a 'record' (which will invoke
// the initializer) and then access it from an 'on' statement (e.g. from another compute-node), by default all accesses
// will be by-reference. Note that a store/load to a 'wide' reference, being a reference to remote memory, is resolved
// by the compiler (with runtime assistance) into a PUT/GET respectively. Note as well that in Chapel, invoking a method
// is not implicitly handled in a Remote-Method-Invocation (RMI), it will invoke the method with the implicit 'this'
// treated as an wide-reference, meaning any and all accesses to fields will be resolved remotely into, you guessed it,
// PUT/GET. If you *need* to continue execution on the locale it is allocated on, use an `on this` statement to wrap the
// rest of code being executed.
record Cluster {
var size: int;
var pointSum: Point;
var mean: Point;
proc init() {}
proc init(point: Point) {
mean = point;
}
proc distance(p: Point) {
var squaredDiff = (mean - p) ** 2;
var sum = 0.0;
for el in squaredDiff {
sum += el;
}
return sqrt(sum);
// Note: Can be reduced to a single line which will be
// data-parallel...
// return sqrt(+ reduce squaredDiff);
}
// Used when reducing clusters
// Might want to make this a 'const ref' to avoid copying by-value
proc addCluster(other: Cluster) {
size += other.size;
pointSum += other.pointSum;
}
proc addPoint(ref p: Point) {
pointSum += p;
size += 1;
}
proc setMean(ref p: Point) {
mean = p;
}
proc calcMean() {
mean = pointSum / size;
}
}
// Used when reducing clusters
// Note: addCluster is a side-effect inducing operation. Do you mean to overload '+='?
// Adding two objects together should _not_ produce any side-effects on either LHS or RHS.
// Perhaps you should just inline the code to add the two Clusters here and create
// a new 'Cluster' and return that?
proc +(ref c1: Cluster, c2: Cluster) {
c1.addCluster(c2);
return c1;
}
proc main() {
// startVerboseComm();
var pointsSpace = {0..#numPoints};
var pointsDomain = pointsSpace dmapped Block(boundingBox=pointsSpace);
var points: [pointsDomain] Point;
var labels: [pointsDomain] int;
// Note: This is allocated on Locale#0; any and all accesses are resolved to Locale#0 as PUT/GET
var clustersDomain = {0..#k};
var clusters: [clustersDomain] Cluster;
/* Read input file */
var f = open(filename, iomode.r);
var reader = f.reader();
for i in pointsDomain {
points[i] = reader.read(Point);
}
f.close();
reader.close();
var watch: Timer;
watch.start();
/* Algorithm */
var randStream = new owned RandomStream(real);
// We can initialize this in parallel (which even if not needed in practice may be good practice for you.
// Since randStream is currently a serialized single-node object, calling 'getNext' in a loop will result
// in a pretty severe bottleneck (distributed parallel forall exacerbates the problem since you have multiple
// compute-nodes contesting for a remote 'sync' variable). There is a method called 'iterate' that will perform
// yield random elements in a data-parallel manner that you can efficiently 'zip' over, which adheres to locality
// as well as ensures near-optimal performance. I say 'near-optimal' as the implementation of Chapel is anything
// but optimal right now, but its getting there! :)
//
// forall (rng, cluster) in zip(randStream.iterate(clustersDomain), clusters) {
// cluster = new Cluster(points[(rng * numPoints) : int]);
// }
for i in clustersDomain {
var idx: int = (randStream.getNext() * numPoints):int;
clusters[i] = new Cluster(points[idx]);
}
for iteration in 0..#maxIter {
var newClustersPerLocale: [0..#numLocales][clustersDomain] Cluster;
// You can just iterate over 'points' which will do this in a more efficient way. Note that iteration
// yields values by reference, hence iterating over 'point in points' will do exactly this! Since you also
// rely on the label which has the exact same index, you can 'zip' over both 'points' and 'labels' to make
// for some nice and elegant looking code!
//
// forall (label, point) in zip(labels, points)
coforall L in Locales {
on L {
const indices = points.localSubdomain();
for i in indices {
// You have to be careful with tuples since they create copies
ref point = points[i];
var minIndex = 0;
var minValue = Math.INFINITY;
// Note: I can predict this is a _huge_ bottleneck! Especially the invocation of 'cluster.distance'...
// This definitely should be distributed! Right now you have a _load imbalance_ to locale #0, but if you
// distribute it, say cyclically, it will balance out pretty well and should scale. Also note that this
// loop is _serial_ as you do not invoke the parallel (and possibly distributed) iterator which is only
// done so via a 'forall'! Hence while you may have this code run over N locales, you only have one
// task per locale running! Making this is a 'forall' will _dramatically_ improve performance.
// Furthermore, if you want to find the minimum value, you can use 'min reduce *' to find the
// smallest value in a data-parallel way!
//
// var (minValue, minIndex) = min reduce [(idx, cluster) in zip(clustersDomain, clusters)] (cluster.distance(point), idx)
for (idx, cluster) in zip(clusters.domain, clusters) {
var distance = cluster.distance(point);
if (distance < minValue) {
minValue = distance;
minIndex = idx;
}
}
// label = minIndex;
labels[i] = minIndex;
newClustersPerLocale[here.id][minIndex].addPoint(point);
}
}
}
// Everything past this should probably be revised given the changes suggested above.
var newClusters = + reduce newClustersPerLocale;
// 'forall'
for cluster in newClusters {
cluster.calcMean();
}
clusters = newClusters;
}
writeln(watch.elapsed());
// Output labels
var outf = open(outFilename, iomode.cw);
var writer = outf.writer();
for lbl in labels {
writer.writeln(lbl);
}
writer.close();
outf.close();
// stopVerboseComm();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment