HARD 0004. Median of Two Sorted Arrays

Problem Statement

Leetcode Link

Given two sorted arrays nums1 and nums2 of size m and n respectively, return the median of the two sorted arrays.

Constraints

nums1.length == m
nums2.length == n
0 <= m <= 1000
0 <= n <= 1000
1 <= m + n <= 2000
-106 <= nums1[i], nums2[i] <= 106

Follow up: The overall run time complexity should be \(O(log (m+n))\).

Tip

Remember that the median of a list is that item that divides the list into two equal halves, by population.

Tip

This problem uses principles I’ve come across when learning the Merge Sort algorithm. It should help to re-read that section before trying to solve this problem.

Examples

Solution

This is a good explanation targeting this problem: Binary Search - Median of Two Sorted Arrays.

  1. Partition the two arrays into two other arrays which are of equal halves.

  2. Ensure that every element on the left side is less than or equal to every element on the right side.

Solution
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
"""Solution for Leetcode #0004 - Median of Two Sorted Arrays
https://leetcode.com/problems/median-of-two-sorted-arrays/
"""

import statistics


def median_two_sorted_arrays(nums1: list[int], nums2: list[int]) -> int:
    """Return a median of two sorted arrays in O(log(m+n))
    """
    # Note: Please read the explanation / watch the linked video
    # *before* trying to understand this.
    if 0 < len(nums1) < len(nums2):
        smaller_array = nums1
        larger_array = nums2
    elif 0 < len(nums2) < len(nums1):
        smaller_array = nums2
        larger_array = nums1
    elif 0 < len(nums1) == len(nums2):
        smaller_array = nums1
        larger_array = nums2
    elif len(nums2) == 0 and len(nums1) > 0:
        return statistics.median(nums1)
    elif len(nums1) == 0 and len(nums2) > 0:
        return statistics.median(nums2)
    else:
        return None

    # perform binary search on the smaller array.
    start = 0
    end = len(smaller_array) - 1

    found_partition = False
    counter = 0
    while not found_partition:
        counter += 1
        partition_smaller_array = (end + start) // 2

        partition_larger_array = (
            len(smaller_array) + len(larger_array) + 1
        ) // 2 - partition_smaller_array

        left_smaller = smaller_array[:partition_smaller_array]
        left_larger = larger_array[:partition_larger_array]
        right_smaller = smaller_array[partition_smaller_array:]
        right_larger = larger_array[partition_larger_array:]

        max_left_smaller = left_smaller[-1] if len(
            left_smaller) > 0 else -float('inf')
        min_right_larger = right_larger[0] if len(
            right_larger) > 0 else float('inf')
        max_left_larger = left_larger[-1] if len(
            left_larger) > 0 else -float("inf")
        min_right_smaller = right_smaller[0] if len(
            right_smaller) > 0 else float("inf")

        if max_left_smaller <= min_right_larger and max_left_larger <= min_right_smaller:
            found_partition = True
        elif max_left_smaller > min_right_larger:
            end -= 1
        else:
            start += 1

            # now the median will be in the last four digits:
    total_length = len(smaller_array) + len(larger_array)

    if total_length % 2 == 0:  # this is even
        median = (max(max_left_smaller, max_left_larger
                      ) + min(min_right_larger, min_right_smaller))/2
    else:
        median = max(max_left_smaller, max_left_larger)

    return median


def test_base():
    import random
    import statistics

    list_1 = [random.randint(0, 100)
              for _ in range(random.randint(10**5, 10**6))]
    list_2 = [random.randint(0, 100)
              for _ in range(random.randint(10**5, 10**6))]

    list_1.sort()
    list_2.sort()

    calculated_median = median_two_sorted_arrays(list_1, list_2)

    merged_list = sorted(list_1 + list_2)

    median = statistics.median(merged_list)
    assert calculated_median == median, f"Returned value {calculated_median=} is wrong! {median=}"


def test_edgecase():
    list_1 = [1, 3]
    list_2 = [2]

    calculated_median = median_two_sorted_arrays(list_1, list_2)
    median = 2
    assert calculated_median == median, f"Returned value {calculated_median=} is wrong! {median=}"


def test_equal_case():
    list_1 = list_2 = [1, 2, 3, 4]

    calculated_median = median_two_sorted_arrays(list_1, list_2)
    median = statistics.median(list_1)
    assert calculated_median == median, f"Returned value {calculated_median=} is wrong! {median=}"