# Combine two files of Virginia voter registration data while enforcing # some standardization. # Franklin Center for Government & Public Integrity # Earl F Glynn # 22 August 2013. ################################################################################ ### Setup library(XML) setwd("F:/Voter-Registration/Virginia/census-registration-comparison/Board-of-Elections/") # CapLeading attributed to Christian Hoffmann in R mailing list CapLeading <- function(string) { fn <- function(x) { v <- unlist(strsplit(x, split = " ")) u <- sapply(v, function(x){ x <- tolower(x) substring(x, 1, 1) <- toupper(substring(x, 1, 1)) x}) paste(u, collapse = " ") } sapply(string, fn) } ################################################################################ # 2004 - 2006 data d1 <- read.csv("Virginia-Voter-Registration-2004-2006.csv", as.is=TRUE) d1$Date <- as.Date(d1$Date) # 2008 - 2012 data d2 <- read.csv("Virginia-Voter-Registration-2008-2012.csv", as.is=TRUE) colnames(d2)[3] <- "Locality" d2$Date <- as.Date(d2$Date, "%m/%d/%Y") d2$Locality <- CapLeading(d2$Locality) d2$Locality[d2$Locality == "Total"] <- "TOTAL" # Standardize with names used by US Census d2$Locality[d2$Locality == "Isle Of Wight County"] <- "Isle of Wight County" d2$Locality[d2$Locality == "King & Queen County"] <- "King and Queen County" # Check consistency of Locality names (iterate with fixes above until all match) u1 <- sort(unique(d1$Locality)) u2 <- sort(unique(d2$Locality)) mismatch <- u1 != u2 u1[mismatch] u2[mismatch] # Combine files d <- rbind( d1[,c("Date", "Locality", "Active", "Inactive", "Total")], d2[,c("Date", "Locality", "Active", "Inactive", "Total")]) # Sort by Locality and Date d <- d[order(d$Locality, d$Date),] write.csv(d, "Virginia-Voter-Registration-2004-2012.csv", row.names=FALSE)