The main drawback is that this does not support the actual population data, as getting that will be a bit more complicated.
2AE4VE7UAQOEXBTOWOSXSEC2Q5PII4PNZH6MZFCLGNY3LGJOGKIAC
def extract_sections(relative_path, separator):
file = open(f"{DATA_DIRECTORY}/{relative_path}", "r").read()
# Split the file up into sections
sections = []
# Each section is separated by this string
for section in file.split(separator):
section = section.strip()
# Some lines are just section breaks, so ignore those
if section != "":
sections.append(section)
return sections
# TODO: use this everywhere
def tabulate(text):
rows = text.splitlines()
for row in range(len(rows)):
rows[row] = rows[row].split(",")
# Remove any extraneous whitespace
for column in range(len(rows[row])):
rows[row][column] = rows[row][column].strip()
return rows
def parse_financial_bracket(start, end):
assert start.startswith("$")
assert end.startswith("$")
# Strip '$' prefix
start = start[1:]
end = end[1:]
# Make sure we have filtered out bad categories such as 'Not stated'
assert should_keep(start + end)
# Downcast ranges from str -> int
start = int(start)
end = int(end)
def __init__(self, filename):
file = open(filename, "r").read()
# Split the file up into sections
self.sections = []
# Each section is separated by this string
for section in file.split(",,,,,,,,,,,,,"):
section = section.strip()
# Some lines are just section breaks, so ignore those
if section != "":
self.sections.append(section)
def __init__(self):
sections = extract_sections("1996/1996_income_by_rent.csv", ",,,,,,,,,,,,,")
# Remove commas inside of quotes as to not break parsing
weekly_rent_heading = remove_quoted_commas(self.WEEKLY_RENT_HEADING)
assert weekly_rent_heading.startswith(",,,,,, Weekly rent,,,,,,,")
assert weekly_rent_heading.count("\n") == 2
assert self.WEEKLY_RENT_HEADING.startswith(",,,,,, Weekly rent,,,,,,,")
assert self.WEEKLY_RENT_HEADING.count("\n") == 2
[start_ranges, end_ranges] = weekly_rent_heading.splitlines()[1:]
# Remove comma prefix and split into columns
start_ranges = start_ranges[1:].split(",")
end_ranges = end_ranges[1:].split(",")
[start_ranges, end_ranges] = tabulate(self.WEEKLY_RENT_HEADING)[1:]
# Remove comma prefix
start_ranges = start_ranges[1:]
end_ranges = end_ranges[1:]
def parse_rent_bracket(self, start, end):
assert start.startswith("$")
assert start.endswith("-")
assert end.startswith("$")
# Strip '$' prefix and '-' suffix
start = start[1:-1]
# Strip '$' prefix
end = end[1:]
# Make sure we have filtered out bad categories such as 'Not stated'
assert should_keep(start + end)
# Downcast ranges and calculate midpoint
start = int(start)
end = int(end)
return (start, end)
class Census2001(CensusDataset):
def __init__(self):
income_sections = extract_sections("2001/Income_2001.csv", ",,,")
self.INCOME_DATA = remove_quoted_commas(income_sections[5])
rent_sections = extract_sections("2001/Rent_2001.csv", ",,,,,")
self.RENT_DATA = remove_quoted_commas(rent_sections[5])
def census_year(self):
return 2001
def filtered_rent_brackets(self):
rows = tabulate(self.RENT_DATA)
assert rows[-2][0] == "$500 or more"
assert rows[-1][0] == "Not stated"
rows = rows[:-2]
brackets = []
for row in rows:
brackets.append(row[0])
return brackets
def median_rents(self):
rents = self.filtered_rent_brackets()
for rent in range(len(rents)):
# Rent brackets are 2 dollar amounts separated by a hyphen ('-')
# Example rent bracket (rents[rent]): $1-$49
[start, end] = rents[rent].split("-")
(start, end) = parse_financial_bracket(start, end)
rents[rent] = (start + end + 1) // 2
return rents
assert rows[0][0] == "Negative/Nil income"
assert rows[-3][0] == "$2000 or more"
assert rows[-2][0] == "Partial income stated(b)"
assert rows[-1][0] == "All incomes not stated(c)"
rows = rows[1:-3]
incomes = []
for row in rows:
incomes.append(row[0])
return incomes
def median_incomes(self):
incomes = self.filtered_income_brackets()
for income in range(len(incomes)):
[start, end] = incomes[income].split("-")
(start, end) = parse_financial_bracket(start, end)
incomes[income] = (start + end + 1) // 2
return incomes