Add support for 2001 census
Dependencies
- [2]
QU7MDQEDAdd basic support for 1996 census data - [3]
FCHAPZLDImplement simpler file merging function - [4]
7ZN6HHL2Extract relevant census data - [*]
QDEBWR2RAdd missing state time series data
Change contents
- edit in project.py at line 1[3.35][6.0]
DATA_DIRECTORY = "Data" - edit in project.py at line 53
def extract_sections(relative_path, separator):file = open(f"{DATA_DIRECTORY}/{relative_path}", "r").read()# Split the file up into sectionssections = []# Each section is separated by this stringfor section in file.split(separator):section = section.strip()# Some lines are just section breaks, so ignore thoseif section != "":sections.append(section)return sections# TODO: use this everywheredef tabulate(text):rows = text.splitlines()for row in range(len(rows)):rows[row] = rows[row].split(",")# Remove any extraneous whitespacefor column in range(len(rows[row])):rows[row][column] = rows[row][column].strip()return rowsdef parse_financial_bracket(start, end):assert start.startswith("$")assert end.startswith("$")# Strip '$' prefixstart = start[1:]end = end[1:]# Make sure we have filtered out bad categories such as 'Not stated'assert should_keep(start + end)# Downcast ranges from str -> intstart = int(start)end = int(end) - edit in project.py at line 99
return (start, end) - edit in project.py at line 101
- replacement in project.py at line 107
def median_incomes(self):def median_rents(self): - replacement in project.py at line 110
def median_rents(self):def median_incomes(self): - replacement in project.py at line 115
def __init__(self, filename):file = open(filename, "r").read()# Split the file up into sectionsself.sections = []# Each section is separated by this stringfor section in file.split(",,,,,,,,,,,,,"):section = section.strip()# Some lines are just section breaks, so ignore thoseif section != "":self.sections.append(section)def __init__(self):sections = extract_sections("1996/1996_income_by_rent.csv", ",,,,,,,,,,,,,") - replacement in project.py at line 118
self.WEEKLY_RENT_HEADING = self.sections[4]self.POPULATION_BY_INCOME = self.sections[5]self.WEEKLY_RENT_HEADING = remove_quoted_commas(sections[4])self.POPULATION_BY_INCOME = remove_quoted_commas(sections[5]) - replacement in project.py at line 136
# Remove commas inside of quotes as to not break parsingweekly_rent_heading = remove_quoted_commas(self.WEEKLY_RENT_HEADING)assert weekly_rent_heading.startswith(",,,,,, Weekly rent,,,,,,,")assert weekly_rent_heading.count("\n") == 2assert self.WEEKLY_RENT_HEADING.startswith(",,,,,, Weekly rent,,,,,,,")assert self.WEEKLY_RENT_HEADING.count("\n") == 2 - replacement in project.py at line 142
[start_ranges, end_ranges] = weekly_rent_heading.splitlines()[1:]# Remove comma prefix and split into columnsstart_ranges = start_ranges[1:].split(",")end_ranges = end_ranges[1:].split(",")[start_ranges, end_ranges] = tabulate(self.WEEKLY_RENT_HEADING)[1:]# Remove comma prefixstart_ranges = start_ranges[1:]end_ranges = end_ranges[1:] - edit in project.py at line 162
def parse_rent_bracket(self, start, end):assert start.startswith("$")assert start.endswith("-")assert end.startswith("$")# Strip '$' prefix and '-' suffixstart = start[1:-1]# Strip '$' prefixend = end[1:]# Make sure we have filtered out bad categories such as 'Not stated'assert should_keep(start + end)# Downcast ranges and calculate midpointstart = int(start)end = int(end)return (start, end) - replacement in project.py at line 170
(start, end) = self.parse_rent_bracket(start, end)# Strip hypen suffix from `start`start = start[:-1](start, end) = parse_financial_bracket(start, end) - edit in project.py at line 191
def extract_income_columns(self, line):# Convert '"$1,000"' -> '$1000'line = remove_quoted_commas(line)# Split into columnsline = line.split(",")return line - replacement in project.py at line 195
for line in self.filter_income_brackets():line = self.extract_income_columns(line)for row in self.filter_income_brackets(): - replacement in project.py at line 198
[start, end] = line[0].split("-")[start, end] = row[0].split("-") - replacement in project.py at line 203
assert should_keep(line[0])assert should_keep(row[0]) - edit in project.py at line 230
class Census2001(CensusDataset):def __init__(self):income_sections = extract_sections("2001/Income_2001.csv", ",,,")self.INCOME_DATA = remove_quoted_commas(income_sections[5])rent_sections = extract_sections("2001/Rent_2001.csv", ",,,,,")self.RENT_DATA = remove_quoted_commas(rent_sections[5])def census_year(self):return 2001def filtered_rent_brackets(self):rows = tabulate(self.RENT_DATA)assert rows[-2][0] == "$500 or more"assert rows[-1][0] == "Not stated"rows = rows[:-2]brackets = []for row in rows:brackets.append(row[0])return bracketsdef median_rents(self):rents = self.filtered_rent_brackets()for rent in range(len(rents)):# Rent brackets are 2 dollar amounts separated by a hyphen ('-')# Example rent bracket (rents[rent]): $1-$49[start, end] = rents[rent].split("-")(start, end) = parse_financial_bracket(start, end)rents[rent] = (start + end + 1) // 2return rents - edit in project.py at line 268
def filtered_income_brackets(self):rows = tabulate(self.INCOME_DATA) - edit in project.py at line 271
assert rows[0][0] == "Negative/Nil income"assert rows[-3][0] == "$2000 or more"assert rows[-2][0] == "Partial income stated(b)"assert rows[-1][0] == "All incomes not stated(c)"rows = rows[1:-3]incomes = []for row in rows:incomes.append(row[0])return incomesdef median_incomes(self):incomes = self.filtered_income_brackets()for income in range(len(incomes)):[start, end] = incomes[income].split("-")(start, end) = parse_financial_bracket(start, end)incomes[income] = (start + end + 1) // 2return incomes - replacement in project.py at line 523
data = Census1996("Data/1996/1996_income_by_rent.csv")# data = Census1996()data = Census2001().median_incomes()