123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387 |
- using UnityEngine;
- using System.Collections;
- using System.Collections.Generic;
- using System.Text;
- using System;
- namespace DigitalOpus.MB.Core
- {
- [Serializable]
- public class MB3_AgglomerativeClustering
- {
- public List<item_s> items = new List<item_s>();
- public ClusterNode[] clusters;
- public bool wasCanceled;
- [Serializable]
- public class ClusterNode
- {
- public item_s leaf;
- public ClusterNode cha;
- public ClusterNode chb;
- public int height; /* height of node from the bottom */
- public float distToMergedCentroid;
- public Vector3 centroid; /* centroid of this cluster */
- public int[] leafs; /* indexes of root clusters merged */
- public int idx; //index in clusters list
- public bool isUnclustered = true;
- public ClusterNode(item_s ii, int index)
- {
- leaf = ii;
- idx = index;
- leafs = new int[1];
- leafs[0] = index;
- centroid = ii.coord;
- height = 0;
- }
- public ClusterNode(ClusterNode a, ClusterNode b, int index, int h, float dist, ClusterNode[] clusters)
- {
- cha = a;
- chb = b;
- idx = index;
- leafs = new int[a.leafs.Length + b.leafs.Length];
- Array.Copy(a.leafs, leafs, a.leafs.Length);
- Array.Copy(b.leafs, 0, leafs, a.leafs.Length, b.leafs.Length);
- Vector3 c = Vector3.zero;
- for (int i = 0; i < leafs.Length; i++)
- {
- c += clusters[leafs[i]].centroid;
- }
- centroid = c / leafs.Length;
- height = h;
- distToMergedCentroid = dist;
- }
- };
- [Serializable]
- public class item_s
- {
- public GameObject go;
- public Vector3 coord; /* coordinate of the input data point */
- };
- float euclidean_distance(Vector3 a, Vector3 b)
- {
- return Vector3.Distance(a, b);
- }
- public bool agglomerate(ProgressUpdateCancelableDelegate progFunc)
- {
- wasCanceled = true;
- if (progFunc != null) wasCanceled = progFunc("Filling Priority Queue:", 0);
- if (items.Count <= 1)
- {
- clusters = new ClusterNode[0];
- return false;
- //yield break;
- }
- clusters = new ClusterNode[items.Count * 2 - 1];
- for (int i = 0; i < items.Count; i++)
- {
- clusters[i] = new ClusterNode(items[i], i);
- }
- int numClussters = items.Count;
- List<ClusterNode> unclustered = new List<ClusterNode>();
- for (int i = 0; i < numClussters; i++)
- {
- clusters[i].isUnclustered = true;
- unclustered.Add(clusters[i]);
- }
- int height = 0;
- System.Diagnostics.Stopwatch timer = new System.Diagnostics.Stopwatch();
- timer.Start();
- float largestDistInQ = 0;
- long usedMemory = GC.GetTotalMemory(false) / 1000000;
- PriorityQueue < float, ClusterDistance > pq = new PriorityQueue<float, ClusterDistance>();
- //largestDistInQ = _RefillPriorityQWithSome(pq, unclustered, clusters /*,null,null*/);
- int numRefills = 0;
- while (unclustered.Count > 1)
- {
-
- int numToFindClosetPair = 0;
- height++;
- //find closest pair
-
- if (pq.Count == 0)
- {
- numRefills++;
- usedMemory = GC.GetTotalMemory(false) / 1000000;
- if (progFunc != null) wasCanceled = progFunc("Refilling Q:" + ((float)(items.Count - unclustered.Count) * 100) / items.Count + " unclustered:" + unclustered.Count + " inQ:" + pq.Count + " usedMem:" + usedMemory,
- ((float)(items.Count - unclustered.Count)) / items.Count);
- largestDistInQ = _RefillPriorityQWithSome(pq, unclustered, clusters, progFunc);
- if (pq.Count == 0) break;
- }
- KeyValuePair<float, ClusterDistance> closestPair = pq.Dequeue();
- // should only consider unclustered pairs. It is more effecient to discard nodes that have already been clustered as they are popped off the Q
- // than to try to remove them from the Q when they have been clustered.
- while (!closestPair.Value.a.isUnclustered || !closestPair.Value.b.isUnclustered) {
- if (pq.Count == 0)
- {
- numRefills++;
- usedMemory = GC.GetTotalMemory(false) / 1000000;
- if (progFunc != null) wasCanceled = progFunc("Creating clusters:" + ((float)(items.Count - unclustered.Count) * 100) / items.Count + " unclustered:" + unclustered.Count + " inQ:" + pq.Count + " usedMem:" + usedMemory,
- ((float)(items.Count - unclustered.Count)) / items.Count);
- largestDistInQ = _RefillPriorityQWithSome(pq, unclustered, clusters, progFunc);
- if (pq.Count == 0) break;
- }
- closestPair = pq.Dequeue();
- numToFindClosetPair++;
- }
- //make a new cluster with pair as children set merge height
- numClussters++;
- ClusterNode cn = new ClusterNode(closestPair.Value.a, closestPair.Value.b, numClussters - 1, height, closestPair.Key, clusters);
- //remove children from unclustered
- unclustered.Remove(closestPair.Value.a);
- unclustered.Remove(closestPair.Value.b);
- //We NEED TO REMOVE ALL DISTANCE PAIRS THAT INVOLVE A AND B FROM PRIORITY Q. However searching for all these pairs and removing is very slow.
- // Instead we will leave them in the Queue and flag the clusters as isUnclustered = false and discard them as they are popped from the Q which is O(1) operation.
- closestPair.Value.a.isUnclustered = false;
- closestPair.Value.b.isUnclustered = false;
- //add new cluster to unclustered
- int newIdx = numClussters - 1;
- if (newIdx == clusters.Length)
- {
- Debug.LogError("how did this happen");
- }
- clusters[newIdx] = cn;
- unclustered.Add(cn);
- cn.isUnclustered = true;
- //update new clusteres distance
- for (int i = 0; i < unclustered.Count - 1; i++)
- {
- float dist = euclidean_distance(cn.centroid, unclustered[i].centroid);
- if (dist < largestDistInQ) //avoid cluttering Qwith
- {
- pq.Add(new KeyValuePair<float, ClusterDistance>(dist, new ClusterDistance(cn, unclustered[i])));
- }
- }
- //if (timer.Interval > .2f)
- //{
- // yield return null;
- // timer.Start();
- //}
- if (wasCanceled) break;
- usedMemory = GC.GetTotalMemory(false) / 1000000;
- if (progFunc != null) wasCanceled = progFunc("Creating clusters:" + ((float)(items.Count - unclustered.Count)*100) / items.Count + " unclustered:" + unclustered.Count + " inQ:" + pq.Count + " usedMem:" + usedMemory,
- ((float)(items.Count - unclustered.Count)) / items.Count);
- }
- if (progFunc != null) wasCanceled = progFunc("Finished clustering:", 100);
- //Debug.Log("Time " + timer.Elapsed);
- if (wasCanceled)
- {
- return false;
- }
- else
- {
- return true;
- }
- }
- const int MAX_PRIORITY_Q_SIZE = 2048;
- float _RefillPriorityQWithSome(PriorityQueue<float, ClusterDistance> pq, List<ClusterNode> unclustered, ClusterNode[] clusters, ProgressUpdateCancelableDelegate progFunc)
- {
- //find nthSmallest point of distances between pairs
- List<float> allDist = new List<float>(2048);
- for (int i = 0; i < unclustered.Count; i++)
- {
- for (int j = i+1; j < unclustered.Count; j++)
- {
-
- // if (unclustered[i] == omitA || unclustered[i] == omitB ||
- // unclustered[j] == omitA || unclustered[j] == omitB)
- // {
-
- // } else
- // {
-
- allDist.Add(euclidean_distance(unclustered[i].centroid, unclustered[j].centroid));
- // }
- }
- wasCanceled = progFunc("Refilling Queue Part A:", i / (unclustered.Count * 2f));
- if (wasCanceled) return 10f;
- }
-
- if (allDist.Count == 0)
- {
- return 10e10f;
- }
- float nthSmallest = NthSmallestElement(allDist, MAX_PRIORITY_Q_SIZE);
- //load up Q with up to nthSmallest distance pairs
- for (int i = 0; i < unclustered.Count; i++)
- {
- for (int j = i + 1; j < unclustered.Count; j++)
- {
- int idxa = unclustered[i].idx;
- int idxb = unclustered[j].idx;
- float newDist = euclidean_distance(unclustered[i].centroid, unclustered[j].centroid);
- if (newDist <= nthSmallest)
- {
- pq.Add(new KeyValuePair<float, ClusterDistance>(newDist, new ClusterDistance(clusters[idxa], clusters[idxb])));
- }
- }
- wasCanceled = progFunc("Refilling Queue Part B:", (unclustered.Count + i) / (unclustered.Count * 2f));
- if (wasCanceled) return 10f;
- }
- return nthSmallest;
- }
- public int TestRun(List<GameObject> gos)
- {
- List<item_s> its = new List<item_s>();
- for (int i = 0; i < gos.Count; i++)
- {
- item_s ii = new item_s();
- ii.go = gos[i];
- ii.coord = gos[i].transform.position;
- its.Add(ii);
- }
- items = its;
- if (items.Count > 0)
- {
- agglomerate(null);
- }
- return 0;
- }
- //------
- // Unclustered
- //need to be able to find the smallest distance between unclustered pairs quickly
- //Do this by maintaining a fixed length PriorityQueue (len = 1000)
- // Q stores min distances between cluster pairs
- // unlclustered stores list of unclustered
- //GetMin
- // if Q is empty
- // build Q from unclustered O(n2)
- // track the largestDistanceInQ
- // if unclustered is empty we are done
- // else
- // q.DeQueue O(1)
- //
- // when creating new merged cluster, calc dist to all other unclustered add these distances to priority Q if less than largestDistanceInQ O(N)
- //
- public class ClusterDistance
- {
- public ClusterNode a;
- public ClusterNode b;
- public ClusterDistance(ClusterNode aa, ClusterNode bb)
- {
- a = aa;
- b = bb;
- }
- }
- public static void Main()
- {
- List<float> inputArray = new List<float>();
- inputArray.AddRange(new float[] { 19, 18, 17, 16, 15, 10, 11, 12, 13, 14 });
- // Loop 10 times
- Debug.Log("Loop quick select 10 times.");
- Debug.Log(NthSmallestElement(inputArray, 0));
-
- }
- // n is 0 indexed
- public static T NthSmallestElement<T>(List<T> array, int n) where T : IComparable<T>
- {
- if (n < 0)
- n = 0;
- if (n > array.Count - 1)
- n = array.Count - 1;
- if (array.Count == 0)
- throw new ArgumentException("Array is empty.", "array");
- if (array.Count == 1)
- return array[0];
- return QuickSelectSmallest(array, n)[n];
- }
- private static List<T> QuickSelectSmallest<T>(List<T> input, int n) where T : IComparable<T>
- {
- // Let's not mess up with our input array
- // For very large arrays - we should optimize this somehow - or just mess up with our input
- var partiallySortedArray = input;
- // Initially we are going to execute quick select to entire array
- var startIndex = 0;
- var endIndex = input.Count - 1;
- // Selecting initial pivot
- // Maybe we are lucky and array is sorted initially?
- var pivotIndex = n;
- // Loop until there is nothing to loop (this actually shouldn't happen - we should find our value before we run out of values)
- var r = new System.Random();
- while (endIndex > startIndex)
- {
- pivotIndex = QuickSelectPartition(partiallySortedArray, startIndex, endIndex, pivotIndex);
- if (pivotIndex == n)
- // We found our n:th smallest value - it is stored to pivot index
- break;
- if (pivotIndex > n)
- // Array before our pivot index have more elements that we are looking for
- endIndex = pivotIndex - 1;
- else
- // Array before our pivot index has less elements that we are looking for
- startIndex = pivotIndex + 1;
- // Omnipotent beings don't need to roll dices - but we do...
- // Randomly select a new pivot index between end and start indexes (there are other methods, this is just most brutal and simplest)
- pivotIndex = r.Next(startIndex, endIndex);
- }
- return partiallySortedArray;
- }
- private static int QuickSelectPartition<T>(List<T> array, int startIndex, int endIndex, int pivotIndex) where T : IComparable<T>
- {
- var pivotValue = array[pivotIndex];
- // Initially we just assume that value in pivot index is largest - so we move it to end (makes also for loop more straight forward)
- Swap(array, pivotIndex, endIndex);
- for (var i = startIndex; i < endIndex; i++)
- {
- if (array[i].CompareTo(pivotValue) > 0)
- continue;
- // Value stored to i was smaller than or equal with pivot value - let's move it to start
- Swap(array, i, startIndex);
- // Move start one index forward
- startIndex++;
- }
- // Start index is now pointing to index where we should store our pivot value from end of array
- Swap(array, endIndex, startIndex);
- return startIndex;
- }
- private static void Swap<T>(List<T> array, int index1, int index2)
- {
- if (index1 == index2)
- return;
- var temp = array[index1];
- array[index1] = array[index2];
- array[index2] = temp;
- }
-
- }
- }
|