def find_median(data_points):
# Data will be mapped category: population
# For example:
"""
{
20: 2324, # $20 income, 2324 people in category
60: 4108, # $60 income, 4108 people in category
}
"""
# List of all categories
keys = []
total_population = 0
for key in data_points.keys():
keys.append(key)
total_population += data_points[key]
# Sort in ascending order (needed to find median)
keys = sorted(keys)
# An even total population means we must calculate the median ourselves
if total_population % 2 == 0:
# Median will be midpoint between these two numbers
# In most cases they'll probably be equal
left_median = (total_population // 2) - 1
right_median = (total_population // 2) + 1
population_seen = 0
median = [None, None]
for key in keys:
if population_seen <= left_median <= population_seen + data_points[key]:
median[0] = key
if population_seen <= right_median <= population_seen + data_points[key]:
median[1] = key
# If we've found both medians, no more work to do
if median[0] is not None and median[1] is not None:
break
population_seen += data_points[key]
# Make sure we actually found the relevant points
assert median[0] is not None
assert median[1] is not None
# The mean between two middle points is the median
return round(sum(median) / 2)
else:
median_person = (total_population + 1) // 2
population_seen = 0
for key in keys:
if population_seen <= median_person <= population_seen + data_points[key]:
return key
population_seen += data_points[key]