Prepping for an interview. I am trying to practice by solving the following problem: Given an input array of NSNumbers where some of the numbers are duplicated, how can you create another array that only has the unique values in the original array.
I see 2 approaches:
Brute-force: Loop through each element in the array, while at a element compare it against the set of numbers in the unique list, if there is a match, don't store it, else add it to the unique list. O(n^2) worst case time?
Hash-table based approach: Have a hash-table of length N. Each element of the has-table is NSSet. Every number is mapped to 0,...N-1 using a hashing function. If it is exists in the NSSet corresponding to the "mapped-index", it is not added to "unique array". if not, it is added to set and unique array.
Is this O(N) complexity?
- I looked two ways to implement approach 2 A. NSMutableArray with size of N all initialized to [NSNull null] objects at start. B. NSMutableDictionary where key = hashed mapping integer
Code for each approach is below.
I am noticing that
i. Running time of 2A (array approach) is half of that of 2B (Mutabledictionary approach) for the input array of length 403 shown below(0.055ms vs .12ms).
ii. Running time of 1 is ~ 5 times worse 0.25ms. If there are not any duplicates, this discrepancy is even worse.
My Qs are:
- Is there a better algorithm than 2?
- Is there a better implementation of algorithm 2?
- Why is dictionary approach slower? How can I answer this for myself using Instruments profiling. I.e how can I know exact time taken by each step using Instruments?
Code
Hashcode function
#define NUM_BUCKETS 127
#define RANDOMIZER 11
#define NUM_ITER 40000
int hashcode(int value)
{
int retVal = (value*RANDOMIZER)%NUM_BUCKETS ;
if(retVal<0)
{
retVal+=NUM_BUCKETS ;
}
return retVal ;
}
1. Brute-Force Approach
NSMutableArray *smooshedArr=[[NSMutableArray alloc] init] ;
double startTime ;
startTime=CFAbsoluteTimeGetCurrent() ;
for(int iter=0;iter<=NUM_ITER;iter++)
{
[smooshedArr removeAllObjects] ;
[smooshedArr addObject:ints[0]] ;
int i,j ;
for(i=1;i<[ints count];i++)
{
for(j=0;j<[smooshedArr count];j++)
{
if([ints[i] intValue] == [smooshedArr[j] intValue])
{
break ;
}
}
if(j==[smooshedArr count])
{
[smooshedArr addObject:ints[i]] ;
}
}
}
NSLog(@"Bruteforce took %.3fms to remove duplicates from array of length %lu",(CFAbsoluteTimeGetCurrent()-startTime)*1000/NUM_ITER,(unsigned long)[ints count]) ;
NSLog(@"Smooshed arary is %@",smooshedArr) ;
2A. Array based hash table
NSMutableArray *hashTable = [[NSMutableArray alloc] init] ;
startTime=CFAbsoluteTimeGetCurrent() ;
for(int iter=0;iter<=NUM_ITER;iter++)
{
[smooshedArr removeAllObjects];
for (NSInteger i = 0; i < NUM_BUCKETS; ++i)
{
[hashTable addObject:[NSNull null]];
}
[smooshedArr addObject:ints[0]] ;
int indexToInsert = hashcode([ints[0] intValue]) ;
hashTable[indexToInsert]=[[NSMutableSet alloc] init] ;
[hashTable[indexToInsert] addObject:ints[0]] ;
int i ;
for(i=1;i<[ints count];i++)
{
//Find hascode of element i
//If the list at index = hashcode in hashCodeArary is empty, then create a NSMutableSet, set toInsert = True
//If not empty, check if the element exists in the set. If yes, setToInsert=False. If no, setToInsert=True
int indexToInsert = hashcode([ints[i] intValue]) ;
BOOL toInsert=false ;
if(hashTable[indexToInsert] == [NSNull null])
{
hashTable[indexToInsert]=[[NSMutableSet alloc] init] ;
toInsert=true ;
}
else
{
if(![hashTable[indexToInsert] containsObject:ints[i]])
toInsert=true ;
}
if(toInsert)
{
[hashTable[indexToInsert] addObject:ints[i]] ;
[smooshedArr addObject:ints[i]] ;
}
}
}
NSLog(@"MutableArray (no cheat) took %.3fms to remove duplicates from array of length %lu",(CFAbsoluteTimeGetCurrent()-startTime)*1000/NUM_ITER,(unsigned long)[ints count]) ;
2B. Dictionary based hash table
NSMutableDictionary *hashDict = [[NSMutableDictionary alloc] init] ;
//NSLog(@"Start of hashcode approach %.6f", CFAbsoluteTimeGetCurrent()) ;
startTime=CFAbsoluteTimeGetCurrent() ;
for(int iter=0;iter<=NUM_ITER;iter++)
{
//if(iter <4) NSLog(@"iter start: %.6f", CFAbsoluteTimeGetCurrent()) ;
//if(iter <4) NSLog(@"init start: %.6f", CFAbsoluteTimeGetCurrent()) ;
[smooshedArr removeAllObjects];
[hashDict removeAllObjects] ;
//if (iter<4) NSLog(@"init end: %.6f", CFAbsoluteTimeGetCurrent()) ;
[smooshedArr addObject:ints[0]] ;
int indexToInsert = hashcode([ints[0] intValue]) ;
hashDict[@(indexToInsert)]=[[NSMutableSet alloc] init] ;
[hashDict[@(indexToInsert)] addObject:ints[0]] ;
int i ;
for(i=1;i<[ints count];i++)
{
//Find hascode of element i
//If the list at index = hashcode in hashCodeArary is empty, then create a NSMutableSet, set toInsert = True
//If not empty, check if the element exists in the set. If yes, setToInsert=False. If no, setToInsert=True
int indexToInsert = hashcode([ints[i] intValue]) ;
BOOL toInsert=false ;
if(hashDict[@(indexToInsert)] == nil)
{
hashDict[@(indexToInsert)]=[[NSMutableSet alloc] init] ;
toInsert=true ;
}
else
{
if(![hashDict[@(indexToInsert)] containsObject:ints[i]])
toInsert=true ;
}
if(toInsert)
{
[hashDict[@(indexToInsert)] addObject:ints[i]] ;
[smooshedArr addObject:ints[i]] ;
}
}
}
NSLog(@"Dictionary approach: %.3fms to remove duplicates from array of length %lu",(CFAbsoluteTimeGetCurrent()-startTime)*1000/NUM_ITER,(unsigned long)[ints count]) ;
Input tested ON, 430 elements with some dups and averaged over 40000 iterations
NSArray *ints = @[@(2),@(3),@(4),@(1),@(6),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(65),@(45454),@(1234),@(3421),@(123),@(1234),@(11111),@(2727272),@(112),@(3),@(4),@(1),@(612211),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(65),@(45454),@(1234),@(3421),@(123),@(1234),@(11111),@(7272),@(1232),@(3),@(4),@(1),@(60),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(65),@(45454),@(1234),@(3421),@(123),@(1234),@(11111),@(2727272),@(2),@(3),@(4),@(1),@(6),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(65),@(45454),@(1234),@(3421),@(123),@(1234),@(11111),@(2727272),@(2),@(3),@(4),@(1),@(6),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(65),@(45454),@(1234),@(3421),@(123),@(1234),@(11111),@(72),@(2),@(3),@(4),@(1),@(6),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(65),@(45454),@(1234),@(3421),@(123),@(1234),@(11111),@(13272),@(2),@(3),@(4),@(18),@(6),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(65),@(45454),@(1234),@(3421),@(123),@(1234),@(11111),@(972),@(2),@(3),@(4),@(1),@(6),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(65),@(45454),@(1234),@(3421),@(123),@(1234),@(11111),@(3272),@(2),@(3),@(4),@(1),@(69),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(65),@(45454),@(1234),@(3421),@(123),@(1234),@(11111),@(1272),@(2),@(3),@(4),@(1),@(6),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(65),@(45454),@(1234),@(3421),@(123),@(1234),@(11111),@(2272),@(2),@(3),@(4),@(1),@(6),@(91),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(65),@(45454),@(1234),@(3421),@(123),@(1234),@(11111),@(7272),@(2),@(3),@(4),@(12),@(6),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(65),@(45454),@(1234),@(3421),@(123),@(1234),@(111),@(27272),@(2),@(321),@(4),@(1),@(6),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(65),@(4545411),@(12341),@(34210),@(123),@(1234),@(1111),@(727272),@(11187),@(9086),@(876543),@(74532),@(464642),@(65),@(45454),@(1234),@(3421),@(123),@(1234),@(11111),@(13272),@(2),@(3),@(4),@(18),@(6),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(658),@(45454),@(12934),@(38421),@(1243),@(12345),@(1112),@(72),@(52),@(3),@(498),@(1),@(6),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(65),@(45454),@(1234),@(650),@(45454),@(1234),@(3421),@(123),@(1234),@(111),@(27272),@(2),@(321),@(4),@(1),@(6),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(65),@(4545411),@(12341),@(34210),@(123),@(1234),@(1111),@(727272),@(11187),@(9086),@(876543),@(74532),@(464642),@(65),@(45454),@(1234),@(3421),@(123),@(1234),@(11111),@(13272),@(2),@(3),@(4),@(18),@(6),@(9),@(2),@(2),@(3),@(21),@(22),@(45454),@(464642),@(658),@(45454),@(12934),@(38421),@(1243),@(19992345),@(119875412),@(72),@(52),@(3),@(498),@(1),@(6),@(9),@(2),@(2),@(3),@(21),@(22),@(450454),@(46908764642),@(6753435),@(45498754),@(100234),@(65)] ;