import pandas
general = pandas.read_csv("2012_general.csv")
primary = pandas.read_csv("2012_primary.csv")
general
# get rid of whitespace
general = general.replace({r'\s+':' '},regex=True)
primary = primary.replace({r'\s+':' '},regex=True)
len((general['WARD'].map(str) + " " + general['DIVISION'].map(str)).unique())
In this tweet, Godfrey mentions 1686. I'm not sure where my extra one comes from, but I'll consider this close enough.
gen_sum = general.groupby(['WARD', 'DIVISION', 'OFFICE','NAME','PARTY'])[['VOTES']].sum().add_suffix('_sum').reset_index()
gen_sum_romney = gen_sum[gen_sum.NAME == "MITT ROMNEY"]
gen_sum_romney_none = gen_sum_romney[gen_sum_romney.VOTES_sum == 0]
print 'number of divisions with no A, M, or P votes for Romney: ', len(gen_sum_romney_none)
gen_AM = general[(general.TYPE == "A") | (general.TYPE == "M")]
gen_AM_sum = gen_AM.groupby(['WARD', 'DIVISION', 'OFFICE','NAME','PARTY'])[['VOTES']].sum().add_suffix('_sum').reset_index()
gen_AM_sum_romney = gen_AM_sum[gen_AM_sum.NAME == "MITT ROMNEY"]
gen_AM_sum_romney_none = gen_AM_sum_romney[gen_AM_sum_romney.VOTES_sum == 0]
print 'number of divisions with no A or M votes for Romney: ', len(gen_AM_sum_romney_none)
gen_machine_romney = general[(general.NAME == "MITT ROMNEY") & (general.TYPE == "M")]
gen_machine_romney_none = gen_machine_romney[gen_machine_romney.VOTES == 0]
print 'number of divisions with no M votes for Romney: ', len(gen_machine_romney_none)
gen_machine_romney_none
We'll restrict to M types from now on
I'm not exactly sure why, but Philadelphia doesn't give what ballot was used (e.g. Republican or Democratic), only the office and the party of the person voted for. So if we want to get write-in votes counted toward votes in a division, which I think we do, we need to separate the Republican primary votes by race, instead of simply by party of votee. This is a bit cumbersome, but at least the Republican primary races are easily identified as such.
A wrinkle: it looks like primary ballots for both parties include various "SPECIAL ELECTION"s. I did not count these toward either primary, because I couldn't tell which one they belonged to (i.e. a Democrat could've voted for a Republican or vice versa).
primary["OFFICE"].unique()
republican_primary_races = [
'ALTERNATE DELEGATE TO THE REPUBLICAN NATIONAL CONVENTION 1ST DIST',
'ATTORNEY GENERAL-REP',
'AUDITOR GENERAL-REP',
'DELEGATE TO THE REPUBLICAN NATIONAL CONVENTION 1ST DIST',
'PRESIDENT OF THE UNITED STATES-REP',
'REPRESENTATIVE IN CONGRESS - 1ST DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 184TH DIST - REP',
'SENATOR IN THE GENERAL ASSEMBLY - 1ST DIST - REP',
'STATE TREASURER-REP',
'UNITED STATES SENATOR -REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 175TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 182ND DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 186TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 191ST DIST - REP',
'ALTERNATE DELEGATE TO THE REPUBLICAN NATIONAL CONVENTION 2ND DIST',
'DELEGATE TO THE REPUBLICAN NATIONAL CONVENTION 2ND DIST',
'REPRESENTATIVE IN CONGRESS - 2ND DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 190TH DIST - REP',
'SENATOR IN THE GENERAL ASSEMBLY - 7TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 192ND DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 181ST DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 195TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 180TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 200TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 198TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 203RD DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 197TH DIST - REP',
'SENATOR IN THE GENERAL ASSEMBLY - 3RD DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 201ST DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 202ND DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 194TH DIST - REP',
'ALTERNATE DELEGATE TO THE REPUBLICAN NATIONAL CONVENTION 13TH DIST',
'DELEGATE TO THE REPUBLICAN NATIONAL CONVENTION 13TH DIST',
'REPRESENTATIVE IN CONGRESS - 13TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 179TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 177TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 185TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 188TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 154TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 170TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 173RD DIST - REP',
'SENATOR IN THE GENERAL ASSEMBLY - 5TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 174TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 172ND DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 169TH DIST - REP',
'REPRESENTATIVE IN THE GENERAL ASSEMBLY - 152ND DIST - REP'
]
republican_primary = primary[primary.OFFICE.isin(republican_primary_races)]
relevant_primary = pandas.merge(gen_machine_romney_none[['WARD','DIVISION','TYPE']], republican_primary, on=['WARD','DIVISION','TYPE'], how='inner', suffixes=('_GENERAL', '_PRIMARY'))
relevant_primary
#this way of joining doesn't allow for write-ins!
#relevant_primary = pandas.merge(gen_machine_romney_none[['WARD','DIVISION','TYPE','PARTY']], primary, on=['WARD','DIVISION','TYPE','PARTY'], how='inner', suffixes=('_GENERAL', '_PRIMARY'))
Because votes are aggregated at candidate level, but voters can vote in multiple races, we can only place lower and upper bounds on the number of voters. To place a lower bound, we can take the maximum number of votes in any given Republican primary race for each division. To place an upper bound, we can sum the votes for all Republican primary races for each division.
relevant_primary_office = relevant_primary.groupby(['WARD','DIVISION','OFFICE'])[['VOTES']].sum().reset_index()
relevant_primary_office_atleast = relevant_primary.groupby(['WARD','DIVISION'])[['VOTES']].max().reset_index()
print '# of voters, # of divisions with at least that many voters\n',relevant_primary_office_atleast['VOTES'].value_counts()
relevant_primary_office_atleast
relevant_primary_atmost = relevant_primary.groupby(['WARD','DIVISION'])[['VOTES']].sum().reset_index()
print '# of voters, # of divisions with at most that many voters\n',relevant_primary_atmost['VOTES'].value_counts()
relevant_primary_atmost
relevant_primary_prez = relevant_primary[relevant_primary.OFFICE == 'PRESIDENT OF THE UNITED STATES-REP']
relevant_primary_prez_sum = relevant_primary_prez.groupby(['WARD','DIVISION','OFFICE','PARTY'])[['VOTES']].sum().reset_index()
print '# of voters, # of divisions with that many voters\n',relevant_primary_prez_sum['VOTES'].value_counts()
relevant_primary_prez_sum
relevant_primary_prez_romney = relevant_primary_prez[relevant_primary_prez.NAME == 'MITT ROMNEY']
print '# of voters, # of divisions with that many voters\n',relevant_primary_prez_romney['VOTES'].value_counts()
relevant_primary_prez_romney
relevant_primary[relevant_primary.OFFICE == "PRESIDENT OF THE UNITED STATES-REP"].groupby(["NAME"])[["VOTES"]].sum().reset_index()