The main drawback is that this does not support the actual population data, as getting that will be a bit more complicated.
2AE4VE7UAQOEXBTOWOSXSEC2Q5PII4PNZH6MZFCLGNY3LGJOGKIAC def extract_sections(relative_path, separator):file = open(f"{DATA_DIRECTORY}/{relative_path}", "r").read()# Split the file up into sectionssections = []# Each section is separated by this stringfor section in file.split(separator):section = section.strip()# Some lines are just section breaks, so ignore thoseif section != "":sections.append(section)return sections# TODO: use this everywheredef tabulate(text):rows = text.splitlines()for row in range(len(rows)):rows[row] = rows[row].split(",")# Remove any extraneous whitespacefor column in range(len(rows[row])):rows[row][column] = rows[row][column].strip()return rowsdef parse_financial_bracket(start, end):assert start.startswith("$")assert end.startswith("$")# Strip '$' prefixstart = start[1:]end = end[1:]# Make sure we have filtered out bad categories such as 'Not stated'assert should_keep(start + end)# Downcast ranges from str -> intstart = int(start)end = int(end)
def __init__(self, filename):file = open(filename, "r").read()# Split the file up into sectionsself.sections = []# Each section is separated by this stringfor section in file.split(",,,,,,,,,,,,,"):section = section.strip()# Some lines are just section breaks, so ignore thoseif section != "":self.sections.append(section)
def __init__(self):sections = extract_sections("1996/1996_income_by_rent.csv", ",,,,,,,,,,,,,")
# Remove commas inside of quotes as to not break parsingweekly_rent_heading = remove_quoted_commas(self.WEEKLY_RENT_HEADING)assert weekly_rent_heading.startswith(",,,,,, Weekly rent,,,,,,,")assert weekly_rent_heading.count("\n") == 2
assert self.WEEKLY_RENT_HEADING.startswith(",,,,,, Weekly rent,,,,,,,")assert self.WEEKLY_RENT_HEADING.count("\n") == 2
[start_ranges, end_ranges] = weekly_rent_heading.splitlines()[1:]# Remove comma prefix and split into columnsstart_ranges = start_ranges[1:].split(",")end_ranges = end_ranges[1:].split(",")
[start_ranges, end_ranges] = tabulate(self.WEEKLY_RENT_HEADING)[1:]# Remove comma prefixstart_ranges = start_ranges[1:]end_ranges = end_ranges[1:]
def parse_rent_bracket(self, start, end):assert start.startswith("$")assert start.endswith("-")assert end.startswith("$")# Strip '$' prefix and '-' suffixstart = start[1:-1]# Strip '$' prefixend = end[1:]# Make sure we have filtered out bad categories such as 'Not stated'assert should_keep(start + end)# Downcast ranges and calculate midpointstart = int(start)end = int(end)return (start, end)
class Census2001(CensusDataset):def __init__(self):income_sections = extract_sections("2001/Income_2001.csv", ",,,")self.INCOME_DATA = remove_quoted_commas(income_sections[5])rent_sections = extract_sections("2001/Rent_2001.csv", ",,,,,")self.RENT_DATA = remove_quoted_commas(rent_sections[5])def census_year(self):return 2001def filtered_rent_brackets(self):rows = tabulate(self.RENT_DATA)assert rows[-2][0] == "$500 or more"assert rows[-1][0] == "Not stated"rows = rows[:-2]brackets = []for row in rows:brackets.append(row[0])return bracketsdef median_rents(self):rents = self.filtered_rent_brackets()for rent in range(len(rents)):# Rent brackets are 2 dollar amounts separated by a hyphen ('-')# Example rent bracket (rents[rent]): $1-$49[start, end] = rents[rent].split("-")(start, end) = parse_financial_bracket(start, end)rents[rent] = (start + end + 1) // 2return rents
assert rows[0][0] == "Negative/Nil income"assert rows[-3][0] == "$2000 or more"assert rows[-2][0] == "Partial income stated(b)"assert rows[-1][0] == "All incomes not stated(c)"rows = rows[1:-3]incomes = []for row in rows:incomes.append(row[0])return incomesdef median_incomes(self):incomes = self.filtered_income_brackets()for income in range(len(incomes)):[start, end] = incomes[income].split("-")(start, end) = parse_financial_bracket(start, end)incomes[income] = (start + end + 1) // 2return incomes