using UnityEngine; using System.Collections; using System.Collections.Generic; using System.Text; using System; namespace DigitalOpus.MB.Core { [Serializable] public class MB3_AgglomerativeClustering { public List items = new List(); public ClusterNode[] clusters; public bool wasCanceled; [Serializable] public class ClusterNode { public item_s leaf; public ClusterNode cha; public ClusterNode chb; public int height; /* height of node from the bottom */ public float distToMergedCentroid; public Vector3 centroid; /* centroid of this cluster */ public int[] leafs; /* indexes of root clusters merged */ public int idx; //index in clusters list public bool isUnclustered = true; public ClusterNode(item_s ii, int index) { leaf = ii; idx = index; leafs = new int[1]; leafs[0] = index; centroid = ii.coord; height = 0; } public ClusterNode(ClusterNode a, ClusterNode b, int index, int h, float dist, ClusterNode[] clusters) { cha = a; chb = b; idx = index; leafs = new int[a.leafs.Length + b.leafs.Length]; Array.Copy(a.leafs, leafs, a.leafs.Length); Array.Copy(b.leafs, 0, leafs, a.leafs.Length, b.leafs.Length); Vector3 c = Vector3.zero; for (int i = 0; i < leafs.Length; i++) { c += clusters[leafs[i]].centroid; } centroid = c / leafs.Length; height = h; distToMergedCentroid = dist; } }; [Serializable] public class item_s { public GameObject go; public Vector3 coord; /* coordinate of the input data point */ }; float euclidean_distance(Vector3 a, Vector3 b) { return Vector3.Distance(a, b); } public bool agglomerate(ProgressUpdateCancelableDelegate progFunc) { wasCanceled = true; if (progFunc != null) wasCanceled = progFunc("Filling Priority Queue:", 0); if (items.Count <= 1) { clusters = new ClusterNode[0]; return false; //yield break; } clusters = new ClusterNode[items.Count * 2 - 1]; for (int i = 0; i < items.Count; i++) { clusters[i] = new ClusterNode(items[i], i); } int numClussters = items.Count; List unclustered = new List(); for (int i = 0; i < numClussters; i++) { clusters[i].isUnclustered = true; unclustered.Add(clusters[i]); } int height = 0; System.Diagnostics.Stopwatch timer = new System.Diagnostics.Stopwatch(); timer.Start(); float largestDistInQ = 0; long usedMemory = GC.GetTotalMemory(false) / 1000000; PriorityQueue < float, ClusterDistance > pq = new PriorityQueue(); //largestDistInQ = _RefillPriorityQWithSome(pq, unclustered, clusters /*,null,null*/); int numRefills = 0; while (unclustered.Count > 1) { int numToFindClosetPair = 0; height++; //find closest pair if (pq.Count == 0) { numRefills++; usedMemory = GC.GetTotalMemory(false) / 1000000; if (progFunc != null) wasCanceled = progFunc("Refilling Q:" + ((float)(items.Count - unclustered.Count) * 100) / items.Count + " unclustered:" + unclustered.Count + " inQ:" + pq.Count + " usedMem:" + usedMemory, ((float)(items.Count - unclustered.Count)) / items.Count); largestDistInQ = _RefillPriorityQWithSome(pq, unclustered, clusters, progFunc); if (pq.Count == 0) break; } KeyValuePair closestPair = pq.Dequeue(); // should only consider unclustered pairs. It is more effecient to discard nodes that have already been clustered as they are popped off the Q // than to try to remove them from the Q when they have been clustered. while (!closestPair.Value.a.isUnclustered || !closestPair.Value.b.isUnclustered) { if (pq.Count == 0) { numRefills++; usedMemory = GC.GetTotalMemory(false) / 1000000; if (progFunc != null) wasCanceled = progFunc("Creating clusters:" + ((float)(items.Count - unclustered.Count) * 100) / items.Count + " unclustered:" + unclustered.Count + " inQ:" + pq.Count + " usedMem:" + usedMemory, ((float)(items.Count - unclustered.Count)) / items.Count); largestDistInQ = _RefillPriorityQWithSome(pq, unclustered, clusters, progFunc); if (pq.Count == 0) break; } closestPair = pq.Dequeue(); numToFindClosetPair++; } //make a new cluster with pair as children set merge height numClussters++; ClusterNode cn = new ClusterNode(closestPair.Value.a, closestPair.Value.b, numClussters - 1, height, closestPair.Key, clusters); //remove children from unclustered unclustered.Remove(closestPair.Value.a); unclustered.Remove(closestPair.Value.b); //We NEED TO REMOVE ALL DISTANCE PAIRS THAT INVOLVE A AND B FROM PRIORITY Q. However searching for all these pairs and removing is very slow. // Instead we will leave them in the Queue and flag the clusters as isUnclustered = false and discard them as they are popped from the Q which is O(1) operation. closestPair.Value.a.isUnclustered = false; closestPair.Value.b.isUnclustered = false; //add new cluster to unclustered int newIdx = numClussters - 1; if (newIdx == clusters.Length) { Debug.LogError("how did this happen"); } clusters[newIdx] = cn; unclustered.Add(cn); cn.isUnclustered = true; //update new clusteres distance for (int i = 0; i < unclustered.Count - 1; i++) { float dist = euclidean_distance(cn.centroid, unclustered[i].centroid); if (dist < largestDistInQ) //avoid cluttering Qwith { pq.Add(new KeyValuePair(dist, new ClusterDistance(cn, unclustered[i]))); } } //if (timer.Interval > .2f) //{ // yield return null; // timer.Start(); //} if (wasCanceled) break; usedMemory = GC.GetTotalMemory(false) / 1000000; if (progFunc != null) wasCanceled = progFunc("Creating clusters:" + ((float)(items.Count - unclustered.Count)*100) / items.Count + " unclustered:" + unclustered.Count + " inQ:" + pq.Count + " usedMem:" + usedMemory, ((float)(items.Count - unclustered.Count)) / items.Count); } if (progFunc != null) wasCanceled = progFunc("Finished clustering:", 100); //Debug.Log("Time " + timer.Elapsed); if (wasCanceled) { return false; } else { return true; } } const int MAX_PRIORITY_Q_SIZE = 2048; float _RefillPriorityQWithSome(PriorityQueue pq, List unclustered, ClusterNode[] clusters, ProgressUpdateCancelableDelegate progFunc) { //find nthSmallest point of distances between pairs List allDist = new List(2048); for (int i = 0; i < unclustered.Count; i++) { for (int j = i+1; j < unclustered.Count; j++) { // if (unclustered[i] == omitA || unclustered[i] == omitB || // unclustered[j] == omitA || unclustered[j] == omitB) // { // } else // { allDist.Add(euclidean_distance(unclustered[i].centroid, unclustered[j].centroid)); // } } wasCanceled = progFunc("Refilling Queue Part A:", i / (unclustered.Count * 2f)); if (wasCanceled) return 10f; } if (allDist.Count == 0) { return 10e10f; } float nthSmallest = NthSmallestElement(allDist, MAX_PRIORITY_Q_SIZE); //load up Q with up to nthSmallest distance pairs for (int i = 0; i < unclustered.Count; i++) { for (int j = i + 1; j < unclustered.Count; j++) { int idxa = unclustered[i].idx; int idxb = unclustered[j].idx; float newDist = euclidean_distance(unclustered[i].centroid, unclustered[j].centroid); if (newDist <= nthSmallest) { pq.Add(new KeyValuePair(newDist, new ClusterDistance(clusters[idxa], clusters[idxb]))); } } wasCanceled = progFunc("Refilling Queue Part B:", (unclustered.Count + i) / (unclustered.Count * 2f)); if (wasCanceled) return 10f; } return nthSmallest; } public int TestRun(List gos) { List its = new List(); for (int i = 0; i < gos.Count; i++) { item_s ii = new item_s(); ii.go = gos[i]; ii.coord = gos[i].transform.position; its.Add(ii); } items = its; if (items.Count > 0) { agglomerate(null); } return 0; } //------ // Unclustered //need to be able to find the smallest distance between unclustered pairs quickly //Do this by maintaining a fixed length PriorityQueue (len = 1000) // Q stores min distances between cluster pairs // unlclustered stores list of unclustered //GetMin // if Q is empty // build Q from unclustered O(n2) // track the largestDistanceInQ // if unclustered is empty we are done // else // q.DeQueue O(1) // // when creating new merged cluster, calc dist to all other unclustered add these distances to priority Q if less than largestDistanceInQ O(N) // public class ClusterDistance { public ClusterNode a; public ClusterNode b; public ClusterDistance(ClusterNode aa, ClusterNode bb) { a = aa; b = bb; } } public static void Main() { List inputArray = new List(); inputArray.AddRange(new float[] { 19, 18, 17, 16, 15, 10, 11, 12, 13, 14 }); // Loop 10 times Debug.Log("Loop quick select 10 times."); Debug.Log(NthSmallestElement(inputArray, 0)); } // n is 0 indexed public static T NthSmallestElement(List array, int n) where T : IComparable { if (n < 0) n = 0; if (n > array.Count - 1) n = array.Count - 1; if (array.Count == 0) throw new ArgumentException("Array is empty.", "array"); if (array.Count == 1) return array[0]; return QuickSelectSmallest(array, n)[n]; } private static List QuickSelectSmallest(List input, int n) where T : IComparable { // Let's not mess up with our input array // For very large arrays - we should optimize this somehow - or just mess up with our input var partiallySortedArray = input; // Initially we are going to execute quick select to entire array var startIndex = 0; var endIndex = input.Count - 1; // Selecting initial pivot // Maybe we are lucky and array is sorted initially? var pivotIndex = n; // Loop until there is nothing to loop (this actually shouldn't happen - we should find our value before we run out of values) var r = new System.Random(); while (endIndex > startIndex) { pivotIndex = QuickSelectPartition(partiallySortedArray, startIndex, endIndex, pivotIndex); if (pivotIndex == n) // We found our n:th smallest value - it is stored to pivot index break; if (pivotIndex > n) // Array before our pivot index have more elements that we are looking for endIndex = pivotIndex - 1; else // Array before our pivot index has less elements that we are looking for startIndex = pivotIndex + 1; // Omnipotent beings don't need to roll dices - but we do... // Randomly select a new pivot index between end and start indexes (there are other methods, this is just most brutal and simplest) pivotIndex = r.Next(startIndex, endIndex); } return partiallySortedArray; } private static int QuickSelectPartition(List array, int startIndex, int endIndex, int pivotIndex) where T : IComparable { var pivotValue = array[pivotIndex]; // Initially we just assume that value in pivot index is largest - so we move it to end (makes also for loop more straight forward) Swap(array, pivotIndex, endIndex); for (var i = startIndex; i < endIndex; i++) { if (array[i].CompareTo(pivotValue) > 0) continue; // Value stored to i was smaller than or equal with pivot value - let's move it to start Swap(array, i, startIndex); // Move start one index forward startIndex++; } // Start index is now pointing to index where we should store our pivot value from end of array Swap(array, endIndex, startIndex); return startIndex; } private static void Swap(List array, int index1, int index2) { if (index1 == index2) return; var temp = array[index1]; array[index1] = array[index2]; array[index2] = temp; } } }