Question: if you hava an array with n integers. These integers are in random order. And each interger is between 1 and n-1, including 1 and n-1. Additionally, there is one and only one integer, which appears twice, while all others only occur once. Find out the duplicate integer with O(n) time and O(1) space.
Solution of C programming language could be:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | #include <stdio.h> #include <stdlib.h> #include <errno.h> #include <stdbool.h> #include <time.h> bool DEBUG = true; void shuffle(long int *contents, long int count){ // Shuffle the contents with number of items being "count" // Using the algorithm "Fisher–Yates shuffle" // Algorithm detial is available on // https://en.wikipedia.org/wiki/Fisher-Yates_shuffle long int index = count-1, swapindex = 0; srand(time(NULL)); while(index >= 0 ){ swapindex = rand() % (index + 1); if(swapindex != index){ // Swap the item at position index and swapindex contents[index] ^= contents[swapindex]; contents[swapindex] ^= contents[index]; contents[index] ^= contents[swapindex]; } --index; } return; } long int find_duplicate(long int *contents, long int count){ // Return the duplicate item in contents long int duplicate = 0,index =0; // After all the XOR computation, the result should be // duplicate = 0 ^ (duplicate value) ^ (1 ^ 1) ^ (2 ^2) ^ // ... ^ (count-1 ^ count-1) = 0 ^ (duplicate value) // = duplicate value // The order of XOR computations does not influent the // result. if(count <=2){ return -1; } while(index < count-1){ // The following statement, in most cases, is equal to // duplicate += contents[index] - (index+1); // But the minus method might lead to overflow or // downflow in some cases. While the XOR never fails. duplicate ^= contents[index] ^ (index+1); ++index; } duplicate ^= contents[index]; return duplicate; } int main(int argc, char **argv){ long int range=0, duplicate=0, result = 0, *contents=NULL; long int index = 0; // Read the arguments if(argc != 3){ printf("Usage: %s range duplicaten",argv[0]); return 2; } errno = 0; range = strtol(argv[1], NULL, 10); if(range <=0 || errno == ERANGE){ printf("Invalid argument for range: %s.n",argv[1]); return 1; } errno = 0; duplicate = strtol(argv[2], NULL, 10); if(duplicate <=0 || errno == ERANGE || duplicate >range){ printf("Invalid argument for duplicate: %s.n",argv[2]); return 1; } // Allocate the memory and fill with data and shuffle // Allocate the memory to store the test date contents = (long int *)malloc(sizeof(long int)*(range+1)); if(contents == NULL){ printf("Fail to allocate the memory.n"); return 3; } // Fill the data with sequential numbers from 1 to range // And add the duplicate number at the end index = 0; while(index < range){ contents[index] = index + 1; ++index; } contents[index] = duplicate; if(DEBUG){ printf("The contents before shuffle are:n"); index = 0; while(index < range){ printf("%ld ",contents[index]); ++index; } printf("%ldn",contents[index]); } // Shuffle the array shuffle(contents, range+1); if(DEBUG){ printf("The contents after shuffle are:n"); index = 0; while(index < range){ printf("%ld ",contents[index]); ++index; } printf("%ldn",contents[index]); } // Find out the duplicate data without additional space (only two // long int variable needed) and with O(n) time. result = find_duplicate(contents, range+1); if(result < 0){ printf("Error in findind the duplicate item.n"); } else{ printf("The duplicate number is %ld.n",result); } free(contents); return 0; } |
 And the Python solution (only include the core function) would be:
1 2 3 4 5 6 7 8 | def find_duplicate(contents): contents_len = len(contents) if contents_len < 2: raise ValueError() duplicate = contents[0] for i in range(1, contents_len): duplicate ^= i^ contents[i] return duplicate |
Couldn’t you just use Gauss’s algorithm?
Calculate what the sum of 1 … n-1 would be using 0.5*(n-1)*(n), then subtract this from the sum of all the ints in the input array and you should be left over the the duplicate number.
*what’s left over should be the duplicated number
It will lead to overflow.