Source code for parsons.scytl.scytl

import zipfile
import csv
import requests
import xml.etree.ElementTree as ET
import typing as t
from datetime import datetime
from dateutil.parser import parse as parsedate
from pytz import timezone
from io import BytesIO, StringIO
from dataclasses import dataclass

CLARITY_URL = "https://results.enr.clarityelections.com/"

CURRENT_VERSION_URL_TEMPLATE = CLARITY_URL + "{administrator}/{election_id}/current_ver.txt"
SUMMARY_CSV_ZIP_URL_TEMPLATE = (
    CLARITY_URL + "{administrator}/{election_id}/{version_num}/reports/summary.zip"
)
DETAIL_XML_ZIP_URL_TEMPLATE = (
    CLARITY_URL + "{administrator}/{election_id}/{version_num}/reports/detailxml.zip"
)
COUNTY_DETAIL_XML_ZIP_URL_TEMPLATE = (
    CLARITY_URL
    + "{state}/{county_name}/{county_election_id}/{county_version_num}/reports/detailxml.zip"
)
ELECTION_SETTINGS_JSON_URL_TEMPLATE = (
    CLARITY_URL + "{state}/{election_id}/{version_num}/json/en/electionsettings.json"
)

BROWSER_HEADERS = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) "
    + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
}

TZ_INFO = {
    "EST": "UTC-5",
    "EDT": "UTC-4",
    "CST": "UTC-6",
    "CDT": "UTC-5",
    "MST": "UTC-7",
    "MDT": "UTC-6",
    "PST": "UTC-8",
    "PDT": "UTC-7",
    "AKST": "UTC-9",
    "AKDT": "UTC-8",
    "HST": "UTC-10",
    "HDT": "UTC-9",
}


@dataclass
class CountyDetails:
    """
    A class for keeping track of County election details.

    A dataclass is decorator that adds special functions including an
    automatic __init__ function. See more here: https://docs.python.org/3/library/dataclasses.html
    """

    state: str
    county_name: str
    county_election_id: str
    county_version_num: str
    county_update_date: datetime = None


[docs]class Scytl: """ Instantiate a Scytl connector. `Args:`: state: str The two letter code of the state the publishing election results. ex: GA election_id: str The numeric identifier for the election found in the url of the election's website. ex: "114729" county: str (optional) The name of the county publishing the results. ex: Clarke """ def __init__(self, state: str, election_id: str, county=""): self.state = state self.county = county.replace(" ", "_") self.administrator = f"{self.state}/{self.county}" if self.county else self.state self.election_id = election_id self.previous_summary_version_num = None self.previous_details_version_num = None self.previous_county_details_version_num = None self.previous_county_details_list = None self.previously_fetched_counties = set([]) def _parse_date_to_utc(self, input_dt: str) -> datetime: """ Parse datetime string as datetime in UTC `Args`: input_dt: str The datetime string to be parsed `Returns`: datetime | None """ if input_dt is None: return temp = parsedate(input_dt, tzinfos=TZ_INFO) temp = temp.astimezone(timezone("UTC")) return temp def _get_version(self, administrator: str, election_id: str) -> str: """ Fetch the latest version of the election results from the Clarity site `Args`: administrator: str The url code for the election administrator, either the two-letter state code or the state code and the county, separated by a slash election_id: str The election id for the given election as a string `Returns`: str The version id as a string """ config_version_url = CURRENT_VERSION_URL_TEMPLATE.format( administrator=administrator, election_id=election_id ) res = requests.get(config_version_url, headers=BROWSER_HEADERS) return res.text def _parse_file_from_zip_url(self, zipfile_url: str, file_name: str) -> bytes: """ Fetch a zip file from the given url and unzip to a byte array `Args`: zipfile_url: str The url where the zip file can be found election_id: str The expected name of the file in the zipfile to read `Returns`: bytes The unzipped file as bytes """ with BytesIO() as zipdata: with requests.get(zipfile_url, headers=BROWSER_HEADERS) as res: zipdata.write(res.content) zipdata.flush() zf = zipfile.ZipFile(zipdata) with zf.open(file_name) as input: return input.read() def _get_latest_counties_scytl_info( self, state: str, election_id: str, version_num: str ) -> t.Dict[str, CountyDetails]: """ Fetch the settings JSON file for the election and parse the county details for participating counties in a state election. `Args`: state: str The two-letter state code for the state election_id: str The election ID for the given election version_num: str The latest version ID of the election as a string `Returns`: dict[str, CountyDetails] A dictionary mapping county names to their sub-election information """ county_dict = {} config_settings_json_url = ELECTION_SETTINGS_JSON_URL_TEMPLATE.format( state=state, election_id=election_id, version_num=version_num ) settings_json_res = requests.get(config_settings_json_url, headers=BROWSER_HEADERS) settings_json = settings_json_res.json() participating_counties = settings_json["settings"]["electiondetails"][ "participatingcounties" ] for county_row in participating_counties: county_info = county_row.split("|") source_county_name = county_info[0] county_election_id = county_info[1] county_version_num = county_info[2] county_update_date = self._parse_date_to_utc(county_info[3]) county_details = CountyDetails( state, source_county_name, county_election_id, county_version_num, county_update_date, ) county_dict[source_county_name] = county_details return county_dict def _parse_county_xml_data_to_precincts( self, county_data: bytes, county_details: CountyDetails ) -> t.List[t.Dict]: """ Parse a detail XML file for a county into a list of election results by precinct and vote method. `Args`: county_data: bytes The detail XML file for a county as bytes county_details: str The details class for the county, including name, id, and last updated datetime `Returns`: list[dict] The list of election results by precinct and vote method in the file. """ tree = ET.fromstring(county_data) precinct_dict = {} precinct_votes = [] root = tree for child in root: if child.tag == "VoterTurnout": precincts = child[0] for precinct in precincts: data = precinct.attrib name = data.get("name") precinct_info = { "total_voters": data.get("totalVoters"), "ballots_cast": data.get("ballotsCast"), "voter_turnout": data.get("voterTurnout"), "percent_reporting": data.get("percentReporting"), } precinct_dict[name] = precinct_info if child.tag == "Contest": office = child.attrib["text"] for choice in child: cand_votes = {} if choice.tag == "VoteType": continue source_cand_data = choice.attrib cand_name = source_cand_data.get("text") cand_party = source_cand_data.get("party") for vote_type in choice: vote_type_label = vote_type.attrib["name"] for precinct in vote_type: precinct_name = precinct.attrib["name"] cand_votes[precinct_name] = int(precinct.attrib["votes"]) precinct_turnout = precinct_dict.get(precinct_name, {}) result = { "state": county_details.state, "county_name": county_details.county_name, "county_id": county_details.county_election_id, "office": office, "ballots_cast": precinct_turnout.get("ballots_cast"), "reg_voters": precinct_turnout.get("total_voters"), "vote_method": vote_type_label, "candidate_name": cand_name, "candidate_party": cand_party, "precinct_name": precinct_name, "recorded_votes": cand_votes[precinct_name], "voter_turnout": precinct_turnout.get("voter_turnout"), "percent_reporting": precinct_turnout.get("percent_reporting"), "timestamp_last_updated": county_details.county_update_date, } precinct_votes.append(result) return precinct_votes def _parse_state_xml_data_to_counties(self, state_data: bytes, state: str) -> t.List[t.Dict]: """ Parse a detail XML file for a state into a list of election results by county and vote method. `Args`: state_data: bytes The detail XML file for a state as bytes state: str The two-letter state code for the state associated with the file `Returns`: list[dict] The list of election results by state and vote method in the file. """ root = ET.fromstring(state_data) county_dict = {} county_votes = [] timestamp = None for child in root: if child.tag == "Timestamp": # <Timestamp>1/5/2021 3:22:30 PM EST</Timestamp> timestamp = self._parse_date_to_utc(child.text) if child.tag == "ElectionVoterTurnout": counties = child[0] for county in counties: data = county.attrib name = data["name"] county_dict[name] = data if child.tag == "Contest": office = child.attrib["text"] for choice in child: cand_votes = {} if choice.tag == "ParticipatingCounties": continue source_cand_data = choice.attrib cand_name = source_cand_data.get("text") cand_party = source_cand_data.get("party") for vote_type in choice: vote_type_label = vote_type.attrib["name"] for county in vote_type: county_name = county.attrib["name"] cand_votes[county_name] = int(county.attrib["votes"]) county_turnout = county_dict.get(county_name, {}) result = { "state": state, "county_name": county_name, "office": office, "ballots_cast": county_turnout.get("ballotsCast"), "reg_voters": county_turnout.get("totalVoters"), "precincts_reporting": county_turnout.get("precinctsReported"), "total_precincts": county_turnout.get("precinctsParticipating"), "vote_method": vote_type_label, "candidate_name": cand_name, "candidate_party": cand_party, "recorded_votes": cand_votes[county_name], "timestamp_last_updated": timestamp, } county_votes.append(result) return county_votes def _fetch_and_parse_summary_results( self, administrator: str, election_id: str, version_num: str, county="" ) -> t.List[t.Dict]: """ Fetches the summary results CSV file from the Scytl site and parses it into a list of election results by candidate. `Args`: administrator: str The url code for the election administrator, either the two-letter state code or the state code and the county, separated by a slash election_id: str The election id for the given election as a string version_num: str The latest version ID of the election as a string county: str The name of the county associated with the summary file `Returns`: list[dict] The list of election results by candidate. """ summary_csv_zip_url = SUMMARY_CSV_ZIP_URL_TEMPLATE.format( administrator=administrator, election_id=election_id, version_num=version_num, ) zip_bytes = self._parse_file_from_zip_url(summary_csv_zip_url, "summary.csv") string_buffer = StringIO(zip_bytes.decode("latin-1")) csv_data = csv.DictReader(string_buffer, delimiter=",") data = [ { "state": self.state, "county_name": county or self.county, "office": x.get("contest name"), "ballots_cast": x.get("ballots cast"), "reg_voters": x.get("registered voters"), "counties_reporting": x.get("num Area rptg"), "total_counties": x.get("num Area total"), "precincts_reporting": x.get("num Precinct rptg"), "total_precincts": x.get("num Precinct total"), "candidate_name": x.get("choice name"), "candidate_party": x.get("party name"), "recorded_votes": x.get("total votes"), } for x in csv_data ] return data
[docs] def get_summary_results(self, force_update=False) -> t.List[t.Dict]: """ Fetch the latest summary results for the given election, across all contests. Please note that all electoral entities administer their elections differently, so not all values will be populated if the entity doesn't provide them. `Args:` force_update: bool If this is False, the connector will check to see if the current version matches the previously fetched version of the results. If the version has not been changed, no results will be fetched or returned. Default: false `Returns:` list[dict] The list should contain entries for each candidate in each office. Each row will contain the following: - state - county_name (if applicable) - office - ballots_cast (in the contest) - reg_voters (eligible for the contest) - counties_reporting - total_counties - precincts_reporting - total_precincts - candidate_name - candidate_party (many administrators do not use this feature and instead include the party in the candidate name) - recorded_votes (votes cast for the candidate) """ version_num = self._get_version(self.administrator, self.election_id) if not force_update and version_num == self.previous_summary_version_num: return data = self._fetch_and_parse_summary_results( self.administrator, self.election_id, version_num ) self.previous_summary_version_num = version_num return data
[docs] def get_detailed_results(self, force_update=False) -> t.List[t.Dict]: """ Fetch the latest detailed results by geography for the given election, across all contests. Please note that all electoral entities administer their elections differently, so not all values will be populated if the entity doesn't provide them. `Args:` force_update: bool If this is False, the connector will check to see if the current version matches the previously fetched version of the results. If the version has not been changed, no results will be fetched or returned. Default: false `Returns:` list[dict] The list should contain entries for each candidate in each office, per vote method and per county. If fetching for a state, results will look like: - state - county_name - office - ballots_cast - reg_voters - precincts_reporting - total_precincts - vote_method (note: some administrators choose to differentiate results by vote method, while others do not) - candidate_name - candidate_party (many administrators do not use this feature and instead include the party in the candidate name) - recorded_votes (votes cast for the candidate with this vote method in this county) - timestamp_last_updated If fetching for a county, results will look like: - state - county_name - county_id - office - ballots_cast - reg_voters - vote_method (note: some administrators choose to differentiate results by vote method, while others do not) - candidate_name - candidate_party (many administrators do not use this feature and instead include the party in the candidate name) - precinct_name - recorded_votes (votes cast for the candidate with this vote method in this county) - voter_turnout - percent_reporting - timestamp_last_updated """ version_num = self._get_version(self.administrator, self.election_id) if not force_update and version_num == self.previous_details_version_num: return detail_xml_url = DETAIL_XML_ZIP_URL_TEMPLATE.format( administrator=self.administrator, election_id=self.election_id, version_num=version_num, ) parsed_data = [] county_data = self._parse_file_from_zip_url(detail_xml_url, "detail.xml") if self.county: county_details = CountyDetails(self.state, self.county, self.election_id, version_num) parsed_data = self._parse_county_xml_data_to_precincts(county_data, county_details) else: parsed_data = self._parse_state_xml_data_to_counties(county_data, self.state) self.previous_details_version_num = version_num return parsed_data
[docs] def get_detailed_results_for_participating_counties( self, county_names: t.List[str] = None, force_update=False ) -> t.Tuple[t.List[str], t.List[t.Dict]]: """ Fetch the latest detailed results for the given election for all participating counties with detailed results, across all contests. Some counties may not have detailed results. If so, this will attempt to fetch the summary results for that county. If no results exist for either, the county name will be appended to the missing_counties list. After the first fetch, only the counties with updates will be returned, previous results will not be included. Please note that all electoral entities administer their elections differently, so not all values will be populated if the entity doesn't provide them. `Args:` county_names: list[str] The list of counties to get precinct-level results for. Default: None (get all counties) force_update: bool If this is False, the connector will check to see if the current version matches the previously fetched version of the results. If the version has not been changed, no results will be fetched or returned. Default: false `Returns:` list[str] The list of county names that could not be fetched list[dict] The list should contain entries for each candidate in each office, per vote method, county, and precinct. Each row will contain the following: - state - county_name - county_id - office - ballots_cast - reg_voters - vote_method (note: some administrators choose to differentiate results by vote method, while others do not) - candidate_name - candidate_party (many administrators do not use this feature and instead include the party in the candidate name) - precinct_name - recorded_votes (votes cast for the candidate with this vote method in this county) - voter_turnout - percent_reporting - timestamp_last_updated """ version_num = self._get_version(self.administrator, self.election_id) if not force_update and version_num == self.previous_county_details_version_num: return [], [] county_details_list = self._get_latest_counties_scytl_info( self.state, self.election_id, version_num ) parsed_data = [] fetched_counties = [] missing_counties = [] for county_name, county_details in county_details_list.items(): if county_names and county_name not in county_names: continue if ( not force_update and county_name in self.previously_fetched_counties and self.previous_county_details_list and county_details.county_update_date <= self.previous_county_details_list[county_name].county_update_date ): continue detail_xml_url = COUNTY_DETAIL_XML_ZIP_URL_TEMPLATE.format( state=county_details.state, county_name=county_details.county_name, county_election_id=county_details.county_election_id, county_version_num=county_details.county_version_num, ) try: county_data = self._parse_file_from_zip_url(detail_xml_url, "detail.xml") except requests.exceptions.RequestException: try: summary_data = self._fetch_and_parse_summary_results( f"{self.state}/{county_name}", county_details.county_election_id, county_details.county_version_num, county_name, ) except requests.exceptions.RequestException: missing_counties.append(county_name) else: if len(summary_data) > 0: parsed_data += summary_data else: parsed_data += self._parse_county_xml_data_to_precincts(county_data, county_details) fetched_counties.append(county_name) self.previous_county_details_version_num = version_num self.previous_county_details_list = county_details_list self.previously_fetched_counties = set(fetched_counties) return missing_counties, parsed_data