-
-
Save LouisJenkinsCS/e0f525f212d475ca2e6a5af79999e840 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| use Random; | |
| use Time; | |
| use BlockDist; | |
| // use CommDiagnostics; | |
| // See: https://chapel-lang.org/docs/master/modules/standard/CommDiagnostics.html | |
| /* Config variables */ | |
| config const filename = "../data/test_10_10_2.chpl.txt"; | |
| config const outFilename = "out.txt"; | |
| config const k = 2; | |
| config const maxIter = 10; | |
| config const numPoints = 10; | |
| /* Datatypes */ | |
| type Point = 10 * real; | |
| // Note: This is a bit of information that may help influence the decision to use a 'record' or a 'class' | |
| // A 'class' is equivalent to a heap-allocated object that is globally visible to all compute-nodes; essentially | |
| // your run-of-the-mill pointer to a 'struct'. A 'record' on the other-hand is a stack-allocated object that exists | |
| // on the compute-node it was allocated on. Hence if you initialize a local variable to a 'record' (which will invoke | |
| // the initializer) and then access it from an 'on' statement (e.g. from another compute-node), by default all accesses | |
| // will be by-reference. Note that a store/load to a 'wide' reference, being a reference to remote memory, is resolved | |
| // by the compiler (with runtime assistance) into a PUT/GET respectively. Note as well that in Chapel, invoking a method | |
| // is not implicitly handled in a Remote-Method-Invocation (RMI), it will invoke the method with the implicit 'this' | |
| // treated as an wide-reference, meaning any and all accesses to fields will be resolved remotely into, you guessed it, | |
| // PUT/GET. If you *need* to continue execution on the locale it is allocated on, use an `on this` statement to wrap the | |
| // rest of code being executed. | |
| record Cluster { | |
| var size: int; | |
| var pointSum: Point; | |
| var mean: Point; | |
| proc init() {} | |
| proc init(point: Point) { | |
| mean = point; | |
| } | |
| proc distance(p: Point) { | |
| var squaredDiff = (mean - p) ** 2; | |
| var sum = 0.0; | |
| for el in squaredDiff { | |
| sum += el; | |
| } | |
| return sqrt(sum); | |
| // Note: Can be reduced to a single line which will be | |
| // data-parallel... | |
| // return sqrt(+ reduce squaredDiff); | |
| } | |
| // Used when reducing clusters | |
| // Might want to make this a 'const ref' to avoid copying by-value | |
| proc addCluster(other: Cluster) { | |
| size += other.size; | |
| pointSum += other.pointSum; | |
| } | |
| proc addPoint(ref p: Point) { | |
| pointSum += p; | |
| size += 1; | |
| } | |
| proc setMean(ref p: Point) { | |
| mean = p; | |
| } | |
| proc calcMean() { | |
| mean = pointSum / size; | |
| } | |
| } | |
| // Used when reducing clusters | |
| // Note: addCluster is a side-effect inducing operation. Do you mean to overload '+='? | |
| // Adding two objects together should _not_ produce any side-effects on either LHS or RHS. | |
| // Perhaps you should just inline the code to add the two Clusters here and create | |
| // a new 'Cluster' and return that? | |
| proc +(ref c1: Cluster, c2: Cluster) { | |
| c1.addCluster(c2); | |
| return c1; | |
| } | |
| proc main() { | |
| // startVerboseComm(); | |
| var pointsSpace = {0..#numPoints}; | |
| var pointsDomain = pointsSpace dmapped Block(boundingBox=pointsSpace); | |
| var points: [pointsDomain] Point; | |
| var labels: [pointsDomain] int; | |
| // Note: This is allocated on Locale#0; any and all accesses are resolved to Locale#0 as PUT/GET | |
| var clustersDomain = {0..#k}; | |
| var clusters: [clustersDomain] Cluster; | |
| /* Read input file */ | |
| var f = open(filename, iomode.r); | |
| var reader = f.reader(); | |
| for i in pointsDomain { | |
| points[i] = reader.read(Point); | |
| } | |
| f.close(); | |
| reader.close(); | |
| var watch: Timer; | |
| watch.start(); | |
| /* Algorithm */ | |
| var randStream = new owned RandomStream(real); | |
| // We can initialize this in parallel (which even if not needed in practice may be good practice for you. | |
| // Since randStream is currently a serialized single-node object, calling 'getNext' in a loop will result | |
| // in a pretty severe bottleneck (distributed parallel forall exacerbates the problem since you have multiple | |
| // compute-nodes contesting for a remote 'sync' variable). There is a method called 'iterate' that will perform | |
| // yield random elements in a data-parallel manner that you can efficiently 'zip' over, which adheres to locality | |
| // as well as ensures near-optimal performance. I say 'near-optimal' as the implementation of Chapel is anything | |
| // but optimal right now, but its getting there! :) | |
| // | |
| // forall (rng, cluster) in zip(randStream.iterate(clustersDomain), clusters) { | |
| // cluster = new Cluster(points[(rng * numPoints) : int]); | |
| // } | |
| for i in clustersDomain { | |
| var idx: int = (randStream.getNext() * numPoints):int; | |
| clusters[i] = new Cluster(points[idx]); | |
| } | |
| for iteration in 0..#maxIter { | |
| var newClustersPerLocale: [0..#numLocales][clustersDomain] Cluster; | |
| // You can just iterate over 'points' which will do this in a more efficient way. Note that iteration | |
| // yields values by reference, hence iterating over 'point in points' will do exactly this! Since you also | |
| // rely on the label which has the exact same index, you can 'zip' over both 'points' and 'labels' to make | |
| // for some nice and elegant looking code! | |
| // | |
| // forall (label, point) in zip(labels, points) | |
| coforall L in Locales { | |
| on L { | |
| const indices = points.localSubdomain(); | |
| for i in indices { | |
| // You have to be careful with tuples since they create copies | |
| ref point = points[i]; | |
| var minIndex = 0; | |
| var minValue = Math.INFINITY; | |
| // Note: I can predict this is a _huge_ bottleneck! Especially the invocation of 'cluster.distance'... | |
| // This definitely should be distributed! Right now you have a _load imbalance_ to locale #0, but if you | |
| // distribute it, say cyclically, it will balance out pretty well and should scale. Also note that this | |
| // loop is _serial_ as you do not invoke the parallel (and possibly distributed) iterator which is only | |
| // done so via a 'forall'! Hence while you may have this code run over N locales, you only have one | |
| // task per locale running! Making this is a 'forall' will _dramatically_ improve performance. | |
| // Furthermore, if you want to find the minimum value, you can use 'min reduce *' to find the | |
| // smallest value in a data-parallel way! | |
| // | |
| // var (minValue, minIndex) = min reduce [(idx, cluster) in zip(clustersDomain, clusters)] (cluster.distance(point), idx) | |
| for (idx, cluster) in zip(clusters.domain, clusters) { | |
| var distance = cluster.distance(point); | |
| if (distance < minValue) { | |
| minValue = distance; | |
| minIndex = idx; | |
| } | |
| } | |
| // label = minIndex; | |
| labels[i] = minIndex; | |
| newClustersPerLocale[here.id][minIndex].addPoint(point); | |
| } | |
| } | |
| } | |
| // Everything past this should probably be revised given the changes suggested above. | |
| var newClusters = + reduce newClustersPerLocale; | |
| // 'forall' | |
| for cluster in newClusters { | |
| cluster.calcMean(); | |
| } | |
| clusters = newClusters; | |
| } | |
| writeln(watch.elapsed()); | |
| // Output labels | |
| var outf = open(outFilename, iomode.cw); | |
| var writer = outf.writer(); | |
| for lbl in labels { | |
| writer.writeln(lbl); | |
| } | |
| writer.close(); | |
| outf.close(); | |
| // stopVerboseComm(); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment