Question 1
The goal of this problem is to implement a variant of the 2-SUM algorithm (covered in the Week 6 lecture on hash table applications).
The file contains 1 million integers, both positive and negative (there might be some repetitions!).This is your array of integers, with the
ith
row of the file specifying the
ith
entry of the array.
Your task is to compute the number of target values
t
in the interval [-10000,10000] (inclusive) such that there are distinct numbers
x,y
in the input file that satisfy
x+y=t
. (NOTE: ensuring distinctness requires a one-line addition to the algorithm from lecture.)
Write your numeric answer (an integer between 0 and 20001) in the space provided.
OPTIONAL CHALLENGE: If this problem is too easy for you, try implementing your own hash table for it. For example, you could compare performance under the chaining and open addressing approaches to resolving collisions.
Question 2
The goal of this problem is to implement the "Median Maintenance" algorithm (covered in the Week 5 lecture on heap applications). The text file contains a list of the integers from 1 to 10000 in unsorted order; you should treat this as a stream of numbers, arriving one by one. Letting xi denote the i th number of the file, the k th median mk is defined as the median of the numbers x1,…,xk . (So, if k is odd, then mk is ((k+1)/2) th smallest number among x1,…,xk ; if k is even, then mk is the (k/2) th smallest number among x1,…,xk .)
In the box below you should type the sum of these 10000 medians, modulo 10000 (i.e., only the last 4 digits). That is, you should compute (m1+m2+m3+⋯+m10000)mod10000 .
OPTIONAL EXERCISE: Compare the performance achieved by heap-based and search-tree-based implementations of the algorithm.
#include <iostream>
#include <fstream>
#define MIN 10000
using namespace std;
int hash2[MIN + 1] = { 0 };
int count = 0;
void readData()
{
ifstream fin("2sum.txt");
int temp = 0;
while(fin>>temp)
{
if(temp < MIN)
hash2[temp]++;
}
}
bool hashMap(int n)
{
if(n > MIN)
return false;
if(hash2[n])
return true;
else
return false;
}
int main()
{
readData();
for(int i = -10000; i <= 10000; i++)
{
for(int j = -10000; j <= 10000; j++)
{
if(hashMap(j) && hashMap(i - j))
{
count++;
break;
}
}
}
cout<<count<<endl;
return 0;
}
#include <stdio.h>
#include <time.h>
#define MAX 10000
struct heap
{
int size;
int data[MAX];
};
struct heap min_heap, max_heap;
long long count = 0;
void init();
void minheap_insert(int, struct heap *);
int extract_min(struct heap *);
void maxheap_insert(int, struct heap *);
int extract_max(struct heap *);
void swap(int *, int *);
int parent(int);
int smallest_child(int, struct heap *);
int biggest_child(int, struct heap *);
void display(struct heap *);
void read_data(char *);
int main(void)
{
init();
read_data("D:\\Median.txt");
printf("The count mod 10000 is : %d\n", count % 10000);
return 0;
}
void init()
{
min_heap.size = 0;
max_heap.size = 0;
int x = 0;
for(x = 0; x < MAX; x++)
{
*(min_heap.data + x) = 0;
*(max_heap.data + x) = 0;
}
}
void minheap_insert(int label, struct heap * temp)
{
temp->size += 1;
int i = temp->size;
*(temp->data + i - 1) = label;
int *parent_i_th, *i_th;
i_th = temp->data + i - 1;
parent_i_th = temp->data + parent(i) - 1;
while(*parent_i_th > *i_th)
{
swap(parent_i_th, i_th);
i = parent(i);
i_th = temp->data + i - 1;
parent_i_th = temp->data + parent(i) - 1;
}
}
void maxheap_insert(int label, struct heap * temp)
{
temp->size += 1;
int i = temp->size;
*(temp->data + i - 1) = label;
int *parent_i_th, *i_th;
i_th = temp->data + i - 1;
parent_i_th = temp->data + parent(i) - 1;
while(*parent_i_th < *i_th)
{
swap(parent_i_th, i_th);
i = parent(i);
i_th = temp->data + i - 1;
parent_i_th = temp->data + parent(i) - 1;
}
if(label == 1260)
{
printf("1260 Test: ");
display(temp);
}
}
int extract_min(struct heap * temp)
{
if(temp->size == 0)
{
printf("No more element!\n");
return -1;
}
int result = *(temp->data);
*(temp->data) = *(temp->data + temp->size - 1);
temp->size -= 1;
int i = 1;
int *i_th, *s_child;
i_th = temp->data + i - 1;
s_child = temp->data + smallest_child(i, temp) - 1;
while(*i_th > *s_child)
{
// Attention, Need caculate smallest_child before swap.
i = smallest_child(i, temp);
swap(i_th, s_child);
i_th = temp->data + i - 1;
s_child = temp->data + smallest_child(i, temp) - 1;
}
return result;
}
int extract_max(struct heap * temp)
{
if(temp->size == 0)
{
printf("No more element!\n");
return -1;
}
int result = *(temp->data);
*(temp->data) = *(temp->data + temp->size - 1);
temp->size -= 1;
int i = 1;
int *i_th, *b_child;
i_th = temp->data + i - 1;
b_child = temp->data + biggest_child(i, temp) - 1;
while(*i_th < *b_child)
{
// Attention, Need to caculate biggest_child before swap.
i = biggest_child(i, temp);
swap(i_th, b_child);
i_th = temp->data + i - 1;
b_child = temp->data + biggest_child(i, temp) - 1;
}
return result;
}
void swap(int * label1, int * label2)
{
int temp = *label2;
*label2 = *label1;
*label1 = temp;
}
int parent(int label)
{
if(label == 1)
return 1;
else
return label/2;
}
int smallest_child(int label, struct heap * temp)
{
int left_child = label * 2;
int right_child = label * 2 + 1;
if(left_child > temp->size)
return label;
else if(left_child == temp->size)
return left_child;
else
{
if(*(temp->data + left_child - 1) <= *(temp->data + right_child - 1))
return left_child;
else
return right_child;
}
}
int biggest_child(int label, struct heap * temp)
{
int left_child = label * 2;
int right_child = label * 2 + 1;
if(left_child > temp->size)
return label;
else if(left_child == temp->size)
return left_child;
else
{
if(*(temp->data + left_child - 1) > *(temp->data + right_child - 1))
return left_child;
else
return right_child;
}
}
void display(struct heap * temp)
{
int x = 0;
for(x = 0; x < temp->size; x++)
printf("%d-> ", *(temp->data + x));
printf("\n");
}
void read_data(char * file)
{
FILE *fp = fopen(file, "rb");
int label;
FILE *wp = fopen("D:\\test.csv", "wb");
int i = 0;
while(!feof(fp))
{
// Please don't forget the space after %d.
fscanf(fp, "%d ", &label);
//printf("Label : %d \n", label);
if(max_heap.size == 0)
maxheap_insert(label, &max_heap);
else
{
if(label <= *(max_heap.data))
maxheap_insert(label, &max_heap);
else
minheap_insert(label, &min_heap);
/*
printf("1Low:%d\n", max_heap.size);
display(&max_heap);
printf("1High:%d\n", min_heap.size);
display(&min_heap);
*/
if(max_heap.size - min_heap.size > 1)
{
label = extract_max(&max_heap);
minheap_insert(label, &min_heap);
}
if(max_heap.size < min_heap.size)
{
label = extract_min(&min_heap);
maxheap_insert(label, &max_heap);
}
}
/*
printf("Low:%d\n", max_heap.size);
display(&max_heap);
printf("High:%d\n", min_heap.size);
display(&min_heap);
*/
int temp;
temp = *(max_heap.data);
count += temp;
fprintf(wp, "%d\n", temp);
printf("%d th Median: %d \n", ++i, temp);
}
fclose(wp);
fclose(fp);
}