Because the data is stored in different ways, we have to return a mapping from median rent/income to population, rather than a separate population_data() function.
4YKXEBAVKKIWUC6EIG4MGSTZOHD7XJSO5O4YNU3OK5SPKMIWX7YQC
# Extract the median rent (midpoint between range) for each column
median_rents = self.median_rents()
# Extract the median household income (midpoint between each range) for each bracket
median_incomes = self.median_incomes()
# Extract the actual table
population_data = self.population_data()
from pprint import pprint
pprint(population_data)
assert rows[0].startswith("Negative income")
assert rows[1].startswith("Nil income")
assert rows[-3].startswith('"$2,000 or more"')
assert rows[-2].startswith("Partial income stated(a)")
assert rows[-1].startswith("All incomes not stated(b)")
assert rows[0][0] == "Negative income"
assert rows[1][0] == "Nil income"
assert rows[-3][0] == "$2000 or more"
assert rows[-2][0] == "Partial income stated(a)"
assert rows[-1][0] == "All incomes not stated(b)"
# Two dollar amounts separated by a hyphen ('-')
# row[0] will be values such as: '$1-$39'
[start, end] = row[0].split("-")
(start, end) = parse_financial_bracket(row[0])
median_rent = (start + end + 1) // 2
# Make sure we've got data in the right shape
assert start.startswith("$")
assert end.startswith("$")
assert should_keep(row[0])
# Strip prefixes
start = start[1:]
end = end[1:]
# Downcast and calculate midpoint
start = int(start)
end = int(end)
incomes.append((start + end + 1) // 2)
# The last column is a 'total' amount
population = int(row[-1])
incomes[median_rent] = population