# Combine county voter counts from various Missouri Secretary of State # onlines files. # Franklin Center for Government & Public Integrity # Earl F Glynn # efg, 29 July 2012 ################################################################################ ### Setup County.Count <- 114 library(RODBC) setwd("F:/Voter-Registration/Missouri/Census-Registration-Comparison/SOS-Data/") # CapLeading attributed to Christian Hoffmann in R mailing list CapLeading <- function(string) { fn <- function(x) { v <- unlist(strsplit(x, split = " ")) u <- sapply(v, function(x){ x <- tolower(x) substring(x, 1, 1) <- toupper(substring(x, 1, 1)) x}) paste(u, collapse = " ") } sapply(string, fn) } ################################################################################ ### PDF files downloaded Missouri Secretary of State site, converted to ### Excel using Able2Extract Professional, and manually edited. files <- list.files(path=".", pattern="^.*\\.xlsx$") # One row per county, with totals at bottom # + extra rows for KC and St. Louis City (not to be confused with St. Louis County) voters.active <- data.frame(matrix(0, County.Count+3, length(files))) voters.total <- data.frame(matrix(0, County.Count+3, length(files))) for (i in 1:length(files)) { cat(files[i], "\n") channel <- odbcConnectExcel2007(files[i]) #print(sqlTables(channel)) d <- sqlFetch(channel, "Sheet1", as.is=TRUE) odbcClose(channel) # Fix county names to match spelling by U.S. Census # (except for periods use in St. and Ste.) d$County <- CapLeading(d$County) d$County[d$County == "Dekalb"] <- "DeKalb" d$County[d$County == "St Louis City"] <- "St Louis city" # Let's make sure counties are in the same order in all files if (i == 1) { counties <- d$County rownames(voters.active) <- counties rownames(voters.total) <- counties } stopifnot( all(counties == d$county) ) # Grab yyyy-mm-dd from filename and format as # Dyyyy.mm.dd to keep R happy name <- gsub("-","\\.", paste("D", substr(files[i],1,10), sep="")) colnames(voters.active)[i] <- name colnames(voters.total)[i] <- name voters.active[d$County,i] <- d$Active voters.total[d$County,i] <- d$Registered } ################################################################################ ### Save to files to be used for plots # To standardize with name used by census match routine rownames(voters.active)[nrow(voters.active)] <- "TOTAL" rownames(voters.total)[nrow(voters.total)] <- "TOTAL" write.csv(voters.active, "Missouri-County-Voter-Counts-2000-2010-Stats-Active.csv") write.csv(voters.total, "Missouri-County-Voter-Counts-2000-2010-Stats-Total.csv")