Merge M sorted arrays

© Parineeth M R

Question 45. Given m sorted arrays each of which has a size n, merge the arrays in sorted order into a single array of size m*n


To efficiently solve the problem we use a heap which has a maximum size of m (number of sorted arrays). If the arrays are sorted in non-decreasing order, then we maintain a min heap otherwise we maintain a max heap. The algorithm is as follows

1. Pick the first element from each array, add it to the heap and construct the heap using the heapify function

2. Add the topmost element in the heap to the result array. Then replace the topmost element of the heap with the next element from the same array as the topmost element. Re-adjust the heap using the heapify function. Suppose all elements in the same array are over, then add a MAX_INT for non-decreasing order (MIN_INT for non-increasing order) into the root of the heap and re-adjust the heap using heapify. Repeat this step until all elements in all the arrays are processed.

Inserting an element into a heap of size m takes O(logm). Since we have n*m elements, the time complexity of this approach is O(nm * logm).

The code for merging k sorted lists is given below.

C/C++


/*
arrays: the arrays to be merged. arrays[0] has the first array, arrays[1] has 
		the second array and so on
n: number of elements in each array
k: number of arrays
result: the merged results are passed back in this array
*/
void merge_k_sorted_arrays(int arrays[][MAX_NUM_ELEMENTS], int n, int k, 
			int *result)
{
	struct node *heap = (struct node*) calloc(k, sizeof(struct node));
	int *arr_pos = (int*) calloc(k, sizeof(int));
	int i, pos, res_index, array_no;

	/*Store the first element in each array into the heap*/
	for (i = 0; i < k; ++i) {
		heap[i].value = arrays[i][0];
		heap[i].array_no = i;
		arr_pos[i] = 1;
	}

	/*Construct the initial heap using the heapify procedure*/
	for (i = k - 1; i >= 0; --i)
		heapify(heap, i, k);
	
	/*
	Process the remaining elements in the arrays. When all elements in the 
	arrays have been processed, MAX_INT will be present at root of heap
	*/
	res_index = 0;
	while (heap[0].value != MAX_INT) {
		/*
		root of the heap will have the lowest value. So store
		it into the result
		*/
		result[res_index++] = heap[0].value;

		array_no = heap[0].array_no;
		pos = arr_pos[array_no];

		/*
		If the root belongs to array x, then replace the root with
		the next element in array x
		*/
		if (pos >= n) {
			/*If we have exhausted all elements in the array, 
			then insert MAX_INT into the heap*/
			heap[0].value = MAX_INT;
			heap[0].array_no = array_no;
		} else {
			heap[0].value = arrays[array_no][pos];
			heap[0].array_no = array_no;
		}

		/*Re-adjust the heap after replacing the root*/
		heapify(heap, 0, k);

		arr_pos[array_no]++;
	}

	free(arr_pos);
	free(heap);
}


/* Helper function to perform heapify
heap: min heap.  Maximum number of elements in heap is k
pos: position of the heap that may need to be fixed
heap_size: current number of nodes in the heap
*/
void heapify(int heap[], int pos, int heap_size)
{
	int left = 2 * pos;
	int right = (2 * pos) + 1;
	int ix_of_smallest = pos;

	/*Find which of the three are the smallest: heap[pos] OR left child
	OR right child*/
	if (left < heap_size && heap[pos] > heap[left])
		ix_of_smallest = left;
	if (right < heap_size && heap[ix_of_smallest] > heap[right])
		ix_of_smallest = right;

	if (ix_of_smallest != pos) {
		/*
		If pos doesn't contain the smallest value,
		then swap the smallest value into pos 
		*/
		int temp = heap[pos];
		heap[pos] = heap[ix_of_smallest];
		heap[ix_of_smallest] = temp;

		/*Recursively re-adjust the heap*/
		heapify(heap, ix_of_smallest, heap_size);
	}
}



Java


/*
arrays: the arrays to be merged. arrays[0] has the first array, arrays[1] has
		the second array and so on
Return value: the merged results are passed back in this array
*/
public static int[] mergeKSortedArrays(int[][] arrays) {
	int k = arrays.length;	/*number of arrays*/
	int n = arrays[0].length; /*number of elements in each array*/
	Node[] heap = new Node[k];
	int[] arrPos = new int[k];
	int[] result = new int[k * n];

	/*Store the first element in each array into the heap*/
	int i;
	for (i = 0; i < k; ++i) {
		heap[i] = new Node();
		heap[i].value = arrays[i][0];
		heap[i].arrayNo = i;
		arrPos[i] = 1;
	}

	/*Construct the initial heap using the heapify procedure*/
	for (i = k - 1; i >= 0; --i)
		heapify(heap, i, k);

	
	/*
	Process the remaining elements in the arrays. When all elements in the
	arrays have been processed, MAX_INT will be present at root of heap
	*/
	int resIndex = 0;
	while (heap[0].value != MAX_INT) {
		/*
		root of the heap will have the lowest value. So store
		it into the result
		*/
		result[resIndex++] = heap[0].value;

		int arrayNo = heap[0].arrayNo;
		int pos = arrPos[arrayNo];

		/*
		If the root belongs to array x, then replace the root with
		the next element in array x
		*/
		if (pos >= n) {
			/*If we have exhausted all elements in the array, 
			then insert MAX_INT into the heap*/
			heap[0].value = MAX_INT;
			heap[0].arrayNo = arrayNo;
		} else {
			heap[0].value = arrays[arrayNo][pos];
			heap[0].arrayNo = arrayNo;
		}

		/*Re-adjust the heap after replacing the root*/
		heapify(heap, 0, k);

		arrPos[arrayNo]++;
	}

	return result;
}

/* Helper function to perform heapify
heap: min heap.  Maximum number of elements in heap is k
pos: position of the heap that may need to be fixed
heapSize: current number of nodes in the heap
*/
public static void heapify(int[] heap, int pos, int heapSize) {
	int left = 2 * pos;
	int right = (2 * pos) + 1;
	int ixOfSmallest = pos;

	/*Find which of the three are the smallest - heap[pos] OR left child
	OR right child*/
	if (left < heapSize && heap[pos] > heap[left])
		ixOfSmallest = left;
	if (right < heapSize && heap[ixOfSmallest] > heap[right])
		ixOfSmallest = right;


	if (ixOfSmallest != pos) {
		/*
		If pos doesn't contain the smallest value,
		then swap the smallest value into pos 
		*/
		int temp = heap[pos];
		heap[pos] = heap[ixOfSmallest];
		heap[ixOfSmallest] = temp;

		/*Recursively re-adjust the heap*/
		heapify(heap, ixOfSmallest, heapSize);
	}
}




Python


#lists:  the lists to be merged. lists[0] has the first list, lists[1] has 
#		the second list and so on
#Return value:  the merged results are passed back in this list
def merge_k_sorted_lists(lists) :
	k = len(lists)	#number of lists
	n = len(lists[0]) #number of elements in each list
	heap = []
	arr_pos = []

	#Store the first element in each list into the heap
	for  i in range(0, k):
		new_node = Node()
		new_node.value = lists[i][0]
		new_node.list_no = i
		heap.append(new_node)
		arr_pos.append(1)
	
	#Construct the initial heap using the heapify procedure
	for  i in range(k - 1, -1,-1):
		heapify(heap, i, k)

	#Process the remaining elements in the lists. When all elements in  
	#the lists have been processed, MAX_INT will be present at root of heap
	result = [] 
	while (heap[0].value != MAX_INT) :
		#root of the heap will have the lowest value. So store
		#it into the result
		result.append(heap[0].value)

		list_no = heap[0].list_no
		pos = arr_pos[list_no]

		#If the root belongs to list x, then replace the root with
		#the next element in list x
		if (pos >= n) :
			#If we have exhausted all elements in the list, 
			#then insert MAX_INT into the heap
			heap[0].value = MAX_INT
			heap[0].list_no = list_no
		else :
			heap[0].value = lists[list_no][pos]
			heap[0].list_no = list_no
		

		#Re-adjust the heap after replacing the root
		heapify(heap, 0, k)

		arr_pos[list_no] += 1
	
	return result

#Helper function to perform heapify
#heap: min heap.  Maximum number of elements in heap is k
#pos: position of the heap that may need to be fixed
#heap_size: current number of nodes in the heap
def heapify(heap, pos, heap_size) :
	left = 2 * pos
	right = (2 * pos) + 1
	ix_of_smallest = pos

	#Find which of the three are the smallest - value at pos OR left child
	#OR right child
	if (left < heap_size and heap[pos] > heap[left]):
		ix_of_smallest = left
	if (right < heap_size and heap[ix_of_smallest] > heap[right]):
		ix_of_smallest = right
 
	if (ix_of_smallest != pos) :
		#If pos doesn't contain the smallest value,
		#then swap the smallest value into pos 
		heap[pos], heap[ix_of_smallest] = heap[ix_of_smallest], heap[pos]

		#Recursively readjust the heap
		heapify(heap, ix_of_smallest, heap_size)