Find a Duplicate Item In an Array of Shuffled Consecutive Integers

Question: if you hava an array with n integers. These integers are in random order. And each interger is between 1 and n-1, including 1 and n-1. Additionally, there is one and only one integer, which appears twice, while all others only occur once. Find out the duplicate integer with O(n) time and O(1) space.

Solution of C programming language could be:

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

#include <stdio.h>

#include <stdlib.h>

#include <errno.h>

#include <stdbool.h>

#include <time.h>

bool DEBUG = true;

void shuffle(long int *contents, long int count){

// Shuffle the contents with number of items being "count"

// Using the algorithm "Fisher–Yates shuffle"

// Algorithm detial is available on

// https://en.wikipedia.org/wiki/Fisher-Yates_shuffle

long int index = count-1, swapindex = 0;

srand(time(NULL));

while(index >= 0 ){

swapindex = rand() % (index + 1);

if(swapindex != index){

// Swap the item at position index and swapindex

contents[index] ^= contents[swapindex];

contents[swapindex] ^= contents[index];

contents[index] ^= contents[swapindex];

}

--index;

}

return;

}

long int find_duplicate(long int *contents, long int count){

// Return the duplicate item in contents

long int duplicate = 0,index =0;

// After all the XOR computation, the result should be

// duplicate = 0 ^ (duplicate value) ^ (1 ^ 1) ^ (2 ^2) ^

// ... ^ (count-1 ^ count-1) = 0 ^ (duplicate value)

// = duplicate value

// The order of XOR computations does not influent the

// result.

if(count <=2){

return -1;

}

while(index < count-1){

// The following statement, in most cases, is equal to

// duplicate += contents[index] - (index+1);

// But the minus method might lead to overflow or

// downflow in some cases. While the XOR never fails.

duplicate ^= contents[index] ^ (index+1);

++index;

}

duplicate ^= contents[index];

return duplicate;

}

int main(int argc, char **argv){

long int range=0, duplicate=0, result = 0, *contents=NULL;

long int index = 0;

// Read the arguments

if(argc != 3){

printf("Usage: %s range duplicaten",argv[0]);

return 2;

}

errno = 0;

range = strtol(argv[1], NULL, 10);

if(range <=0 || errno == ERANGE){

printf("Invalid argument for range: %s.n",argv[1]);

return 1;

}

errno = 0;

duplicate = strtol(argv[2], NULL, 10);

if(duplicate <=0 || errno == ERANGE || duplicate >range){

printf("Invalid argument for duplicate: %s.n",argv[2]);

return 1;

}

// Allocate the memory and fill with data and shuffle

// Allocate the memory to store the test date

contents = (long int *)malloc(sizeof(long int)*(range+1));

if(contents == NULL){

printf("Fail to allocate the memory.n");

return 3;

}

// Fill the data with sequential numbers from 1 to range

// And add the duplicate number at the end

index = 0;

while(index < range){

contents[index] = index + 1;

++index;

}

contents[index] = duplicate;

if(DEBUG){

printf("The contents before shuffle are:n");

index = 0;

while(index < range){

printf("%ld ",contents[index]);

++index;

}

printf("%ldn",contents[index]);

}

// Shuffle the array

shuffle(contents, range+1);

if(DEBUG){

printf("The contents after shuffle are:n");

index = 0;

while(index < range){

printf("%ld ",contents[index]);

++index;

}

printf("%ldn",contents[index]);

}

// Find out the duplicate data without additional space (only two

// long int variable needed) and with O(n) time.

result = find_duplicate(contents, range+1);

if(result < 0){

printf("Error in findind the duplicate item.n");

}

else{

printf("The duplicate number is %ld.n",result);

}

free(contents);

return 0;

}

And the Python solution (only include the core function) would be:

def find_duplicate(contents):

contents_len = len(contents)

if contents_len < 2:

raise ValueError()

duplicate = contents[0]

for i in range(1, contents_len):

duplicate ^= i^ contents[i]

return duplicate

3 Replies to “Find a Duplicate Item In an Array of Shuffled Consecutive Integers”

Ash says:
October 16, 2014 at 12:23 pm
Couldn’t you just use Gauss’s algorithm?
Calculate what the sum of 1 … n-1 would be using 0.5*(n-1)*(n), then subtract this from the sum of all the ints in the input array and you should be left over the the duplicate number.
- Ash says:
  October 16, 2014 at 12:24 pm
  *what’s left over should be the duplicated number
- Sheng says:
  October 16, 2014 at 2:22 pm
  It will lead to overflow.

3 Replies to “Find a Duplicate Item In an Array of Shuffled Consecutive Integers”

Leave a Reply Cancel reply