Question: To generate a K-subset in a uniformly random manner from N elements.
We have two slightly different solutions. The first one is quite straightforward: Â shuffle the elements, and then return the first K elements. In such solution, the space complexity would be O(N).
The second method would use the “inside-out” version of the shuffle. In this implementation, we travel the input from begin to end, rather than from end to begin. Every element has only one chance to enter the K-subset, when in its scanning stage. Otherwise, every processed element can only be swapped into latter position. As a result, once an element is swapped out of the K-subset, it could NEVER come back. And we do not need to keep it in record. The space complexity of this solution is O(K).
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | /************************************************************************* * Compilation: javac RandomSubset.java * Execution: java RandomSubset * * Generate a subset in uniformly random manner. * *************************************************************************/ import java.util.Scanner; import java.util.Random; /** * <i>Random Subset Generator</i>. This class generates an K-subset from * N elements in uniformly random manner, with time complexity O(N) and * space complexity O(K). * * @author Sheng Yu * @version 0.1 */ public class RandomSubset<Item> { // The container of the input elements. Stored them in a stack manner. private Item[] contents = null; // The index for the first unused space in the array. private int end = 0; /** * construct an empty array with default size 64 */ public RandomSubset() { contents = (Item[]) new Object[64]; } /** * construct an empty array with specific size * @param capacity The original size of the array * @exception IllegalArgumentException * Occurs when the capacity is less * than one, or is odd number. */ public RandomSubset(int capacity) { if (capacity < 1 || capacity % 2 == 1) { throw new IllegalArgumentException(); } contents = (Item[]) new Object[capacity]; } /** * Resize the storing array. In both extending and * shrinking, the original contents will be kept. * * @param capacity The new size of the array. It must be * a even positive number. * @exception IllegalArgumentException * Occurs when the capacity is less than * one, or is odd number. */ private void resize(int capacity) { if (capacity < 1 || capacity % 2 == 1) { throw new IllegalArgumentException(); } Item[] copy = (Item[]) new Object[capacity]; for (int i = 0; i < end; i++) { copy[i] = contents[i]; } contents = copy; return; } /** * Test whether the contents is empty. * * @return true if there are some data in the array, * false otherwise. */ public boolean isEmpty() { return end == 0; } /** * @return Return the number of items in the array. */ public int size() { return end; } /** * Append an item to the array. * @param item The new item to append. * @exception NullPointerException * Occurs when the given item is null. */ public void feed(Item item) { if (item == null) { throw new NullPointerException(); } if (end == contents.length) { resize(contents.length*2); } contents[end] = item; end++; return; } /** * Generate a K-subset in an uniformly random manner * from existing N elements. * * @param size The size of generating subset. * @return Item[] randomly chosen K-subset * @exception IndexOutOfBoundsException * Occurs when the wanted subset size * is greater than contents' size. */ public Item[] sample(int size) { if (size > end) { throw new IndexOutOfBoundsException(); } // Pseudorandom Number Generators, for demo only!!! Random random = new Random(System.currentTimeMillis()); Item[] subset = (Item[]) new Object[size]; int swapPos = 0; Item swapTemp = null; for (int i = 0; i < size; i++) { subset[i] = contents[i]; swapPos = random.nextInt(i+1); if (swapPos != i) { swapTemp = subset[swapPos]; subset[swapPos] = subset[i]; subset[i] = swapTemp; } } for (int i = size; i < end; i++) { swapPos = random.nextInt(i+1); /*** * If the element is swapped to the position after K, * it will NEVER have chance to come back to K-subset. * So we do not need to store the elements after * position K. */ if (swapPos < size) { subset[swapPos] = contents[i]; } } return subset; } /** * Main function for unit test. * @param args the command line arguments. Not used. */ public static void main(String[] args) { Scanner scanner = new Scanner(System.in); RandomSubset<String> test = new RandomSubset<String>(); String element; int sampleSize = 0; System.out.print("Please input the size of subset: "); sampleSize = scanner.nextInt(); System.out.println("Input example (MUST ends with SAMPLE)" + " A B3 C 1 FG 4 H SAMPLE"); System.out.print("Please input the contents: "); while (true) { element = scanner.next(); if ("SAMPLE".equals(element)) { break; } test.feed(element); } Object[] subset = test.sample(sampleSize); for (Object item : subset) { System.out.printf("%s ", (String) item); } return; } } |