## this file will create all line graphs related to resiliency metrics ## do not need to run any other file in order to run this library(plyr) library(dplyr) library(readxl) library(openxlsx) library(lubridate) library(ggplot2) library(extrafont) library(gridExtra) font_import() loadfonts() setwd("/Users/amanankit/Documents/Research/Data EIA + OE/Data/") files <- list.files(pattern = '.xls') customerDataEIA <- lapply(files, read_excel, col_names = TRUE, skip = 2) customerDataEIA <- rbind.fill(customerDataEIA) ## automatically import the OE files and stores them as lists files <- list.files(pattern = '.txt') customerDataOE <- lapply(files, read.table, header = TRUE, quote = "", sep = "\t", skip = 1, fill = T) NERC_regions <- read_excel('/Users/amanankit/Documents/Research/Data EIA + OE/NERC_regions.xlsx') ## combine the lists into one dataframe customerDataOE <- rbind.fill(customerDataOE) ## 50 states state <- c("Alabama","Alaska","Arizona","Arkansas","California","Colorado","Connecticut","Delaware","Florida","Georgia","Hawaii","Idaho", "Illinois","Indiana","Iowa","Kansas","Kentucky","Louisiana","Maine","Maryland","Massachusetts","Michigan","Minnesota", "Mississippi","Missouri","Montana","Nebraska","Nevada","New Hampshire","New Jersey","New Mexico","New York","North Carolina", "North Dakota","Ohio","Oklahoma","Oregon","Pennsylvania","Rhode Island","South Carolina","South Dakota","Tennessee","Texas", "Utah","Vermont","Virginia","Washington","West Virginia","Wisconsin","Wyoming") stateAbbrev <- c("AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS", "MO","MT","NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV", "WI","WY") ## isolates relevant columns customerStateAndPopEIA <- na.omit(customerDataEIA[,c("YEAR", "State", "Count__4")]) ## assigns column names colnames(customerStateAndPopEIA) <- c("year", "state", "population") ## removes commas from the population customerStateAndPopEIA$population <- as.numeric(gsub(",", "", customerStateAndPopEIA$population)) customerStateAndPopEIA <- na.omit(customerStateAndPopEIA) ## isolates relevant columns customerStateAndPopOE <- customerDataOE[,c("Date.Event.Began", "Time.Event.Began", "Date.of.Restoration", "Time.of.Restoration", "Area.Affected", "Event.Type", "Number.of.Customers.Affected", "NERC.Region")] ## assigns column names colnames(customerStateAndPopOE) <- c("dayStart", "timeStart", "dayEnd", "timeEnd", "state", "cause", "population", "NERC_regions") ## removes extra characters (commas, quotes) customerStateAndPopOE$population <- gsub(",", "", customerStateAndPopOE$population) customerStateAndPopOE$population <- gsub("\"", "", customerStateAndPopOE$population) customerStateAndPopOE$state <- gsub("\"", "", customerStateAndPopOE$state) ## converts time to 24 hour style to elimante AM and PM notation customerStateAndPopOE$timeStart <- format(strptime(customerStateAndPopOE$timeStart, "%I:%M %p"), format = "%H:%M:%S") customerStateAndPopOE$timeEnd <- format(strptime(customerStateAndPopOE$timeEnd, "%I:%M %p"), format = "%H:%M:%S") ## manually parses the dates customerStateAndPopOE$dayStart <- parse_date_time(customerStateAndPopOE$dayStart, orders = "mdy") customerStateAndPopOE$dayEnd <- parse_date_time(customerStateAndPopOE$dayEnd, orders = "mdy") ## merges date and time into one column customerStateAndPopOE$dayStart <- paste(customerStateAndPopOE$dayStart, customerStateAndPopOE$timeStart) customerStateAndPopOE$dayEnd <- paste(customerStateAndPopOE$dayEnd, customerStateAndPopOE$timeEnd) ## converts characters to POSIXct to allow for calculations customerStateAndPopOE$dayStart <- parse_date_time(customerStateAndPopOE$dayStart, "Ymd HMS") customerStateAndPopOE$dayEnd <- parse_date_time(customerStateAndPopOE$dayEnd, "Ymd HMS") ## calculates the time elapsed time.interval <- customerStateAndPopOE$dayStart %--% customerStateAndPopOE$dayEnd customerStateAndPopOE["Time.Elapsed"] <- NA ## converts time elapsed to days: remove " / 1440" to revert back to minutes customerStateAndPopOE$Time.Elapsed <- as.double((as.duration(time.interval) / 60) / 1440) ## removes rows that have NA value customerStateAndPopOE$population <- as.numeric(customerStateAndPopOE$population) customerStateAndPopOE <- na.omit(customerStateAndPopOE) ## converts negative values to positive values verify negative populations???? customerStateAndPopEIA$population <- abs(as.numeric(customerStateAndPopEIA$population)) ## extract year and make into new column customerStateAndPopOE$Year <- format(as.Date(customerStateAndPopOE$dayStart, format="%y/%m/%d:%h:%m:%s"), "%Y") customerStateAndPopOE$cause <- gsub("deliberate", "Operations", customerStateAndPopOE$cause) customerStateAndPopOE$cause <- gsub("natural", "Natural", customerStateAndPopOE$cause) customerStateAndPopOE$cause <- gsub("human attack", "Human attack", customerStateAndPopOE$cause) customerStateAndPopOE$cause <- gsub("mechanical", "Mechanical failure", customerStateAndPopOE$cause) customerStateAndPopOE$cause <- gsub("System Operations", "Operations", customerStateAndPopOE$cause) customerStateAndPopOE$cause <- gsub("operations", "Operations", customerStateAndPopOE$cause) customerStateAndPopOE$cause <- gsub("Unit Tripped", "Mechanical failure", customerStateAndPopOE$cause) customerStateAndPopOE$cause <- gsub("Units Tripped", "Mechanical failure", customerStateAndPopOE$cause) customerStateAndPopOE$cause <- gsub("Transmission Tripped", "Mechanical failure", customerStateAndPopOE$cause) customerStateAndPopOE$cause <- gsub("Shed Load", "Operations", customerStateAndPopOE$cause) customerStateAndPopOE <- customerStateAndPopOE %>% filter(cause %in% c("Operations", "Natural", "Human attack", "Mechanical failure")) # combines the population counts for each year for each state customer_state_and_pop_EIA_sorted <- with(customerStateAndPopEIA, aggregate(list(population = population), list(year = year, state = state), sum)) # extracts the state from the state column states_pattern <- paste0(paste0(".*\\b(", paste0(state.name, collapse="|")), ")\\b.*") customerStateAndPopOE$stateIsolated <- sub(states_pattern, "\\1", customerStateAndPopOE$state) # add state abbreviations customerStateAndPopOE$stateIsolatedAbbrev <- state.abb[match(customerStateAndPopOE$stateIsolated, state.name)] customerStateAndPopOE <- subset(customerStateAndPopOE, Time.Elapsed >= 0) customerStateAndPopOE <- customerStateAndPopOE[complete.cases(customerStateAndPopOE), ] customerStateAndPopOE <- merge(customerStateAndPopOE, customer_state_and_pop_EIA_sorted, by.x = c("stateIsolatedAbbrev", "Year"), by.y = c("state", "year"), all.x = FALSE, all.y = TRUE) customerStateAndPopOE <- customerStateAndPopOE[complete.cases(customerStateAndPopOE), ] colnames(customerStateAndPopOE)[9] <- "populationAffected" colnames(customerStateAndPopOE)[13] <- "customersInState" customerStateAndPopOE <- customerStateAndPopOE %>% mutate("SAIDI" = (Time.Elapsed * populationAffected) / customersInState) customerStateAndPopOE <- customerStateAndPopOE %>% mutate("SAIFI" = populationAffected / customersInState) customerStateAndPopOE <- customerStateAndPopOE %>% mutate("CAIDI" = SAIDI / SAIFI) #write.xlsx(customerStateAndPopOE, "/Users/amanankit/Documents/Research/Data EIA + OE/Resilience per Event.xlsx") customerStateAndPopOE$NERC_regions <- gsub("\"", "", customerStateAndPopOE$NERC_regions) customerStateAndPopOE$NERC_regions <- gsub(" ", "", customerStateAndPopOE$NERC_regions) customerStateAndPopOE$NERC_regions <- gsub(";", ",", customerStateAndPopOE$NERC_regions) customerStateAndPopOE$NERC_regions <- gsub(",", ", ", customerStateAndPopOE$NERC_regions) customerStateAndPopOE$NERC_regions <- gsub("\\", "RFC", customerStateAndPopOE$NERC_regions) customerStateAndPopOE$NERC_regions <- gsub("WeEnergiesMAIN", "MAIN", customerStateAndPopOE$NERC_regions) customerStateAndPopOE$NERC_regions <- gsub("ERCOT", "TRE", customerStateAndPopOE$NERC_regions) customerStateAndPopOE$NERC_regions <- gsub("WSCC", "WECC", customerStateAndPopOE$NERC_regions) customerStateAndPopOE$NERC_regions <- gsub("MAPP", "MRO", customerStateAndPopOE$NERC_regions) # for hawaii only customerStateAndPopOE$NERC_regions <- gsub("N/A", "HI", customerStateAndPopOE$NERC_regions) NERC_regions <- unique(customerStateAndPopOE$NERC_regions) customerStateAndPopOE <- customerStateAndPopOE[complete.cases(customerStateAndPopOE), ] # removes the row and the row number customerStateAndPopOE$Key <- 1:length(customerStateAndPopOE$stateIsolated) # create a key for easy data matching customerStateAndPopOE <- customerStateAndPopOE[, colnames(customerStateAndPopOE)[c(17,1:16)]] customerStateAndPopOE$SAIDI_NERC <- as.numeric(NA) customerStateAndPopOE$SAIFI_NERC <- as.numeric(NA) customerStateAndPopOE$CAIDI_NERC <- as.numeric(NA) calculate_NERC_resilience <- function(NERC_region, year, metric) { filtered <- customerStateAndPopOE %>% filter(NERC_regions == NERC_region & Year == year) if (length(filtered$Key > 0)) { total_customer_per_year_per_NERC <- sum(unique(filtered$customersInState), na.rm = TRUE) if (metric == "SAIDI") { value <- sum(filtered$Time.Elapsed * filtered$populationAffected) / total_customer_per_year_per_NERC } else if (metric == "SAIFI") { value <- sum(filtered$populationAffected) / total_customer_per_year_per_NERC } for (i in 1:length(filtered$Key)) { row <- as.numeric(filtered$Key[i]) col <- paste0(metric, "_NERC") customerStateAndPopOE[customerStateAndPopOE$Key == row, col] <<- value # have to use <<- to update the dataframe because it is stored in global environment } } } for (i in 1:length(NERC_regions)) { curr_NERC_region <- NERC_regions[i] for (curr_year in 2002:2017) { calculate_NERC_resilience(curr_NERC_region, curr_year, "SAIDI") calculate_NERC_resilience(curr_NERC_region, curr_year, "SAIFI") } } customerStateAndPopOE <- customerStateAndPopOE %>% mutate(CAIDI_NERC = SAIDI_NERC / SAIFI_NERC) write.xlsx(customerStateAndPopOE, "/Users/amanankit/Documents/Research/Data EIA + OE/Resilience per NERC per year.xlsx") ggplot(customerStateAndPopOE, aes(x = Year, y = SAIDI_NERC, color = NERC_regions, group = NERC_regions)) + geom_point() + geom_line() ggplot(customerStateAndPopOE, aes(x = Year, y = SAIFI_NERC, color = NERC_regions, group = NERC_regions)) + geom_point() + geom_line() ggplot(customerStateAndPopOE, aes(x = Year, y = CAIDI_NERC, color = NERC_regions, group = NERC_regions)) + geom_point() + geom_line() unloadNamespace("plyr") year <- as.data.frame(2002:2017) cause_operations <- as.data.frame(rep(c("Operations"), length(year$`2002:2017`))) colnames(cause_operations) <- c("cause") cause_mechanical <- as.data.frame(rep(c("Mechanical failure"), length(year$`2002:2017`))) colnames(cause_mechanical) <- c("cause") cause_natural <- as.data.frame(rep(c("Natural"), length(year$`2002:2017`))) colnames(cause_natural) <- c("cause") cause_human_attk <- as.data.frame(rep(c("Human attack"), length(year$`2002:2017`))) colnames(cause_human_attk) <- c("cause") metric_by_cause <- cbind(bind_rows(cause_operations, cause_mechanical, cause_natural, cause_human_attk), year) colnames(metric_by_cause) <- c("cause", "year") calculate_resilience_cause_US <- function(cause_param, metric, year) { year_isolated <- customerStateAndPopOE %>% filter(Year == year, cause == cause_param) total_cust_served <- sum(unique(year_isolated$customersInState)) if (length(year_isolated$Key) > 0) { if (metric == "SAIDI") { restoration_time_x_cust_interrupted <- sum(year_isolated$Time.Elapsed * year_isolated$populationAffected) SAIDI <- restoration_time_x_cust_interrupted / total_cust_served return (SAIDI) } else if(metric == "SAIFI") { cust_interrupted <- sum(year_isolated$populationAffected) SAIFI <- cust_interrupted / total_cust_served return (SAIFI) } else if (metric == "CAIDI") { CAIDI <- calculate_resilience_cause_US(cause_param, "SAIDI", year) / calculate_resilience_cause_US(cause_param, "SAIFI", year) return (CAIDI) } } return (NA) } causes <- c("Operations", "Natural", "Human attack", "Mechanical failure") metrics <- c("SAIDI", "SAIFI", "CAIDI") metric_by_cause$Key <- paste0(metric_by_cause[,1], metric_by_cause[,2]) metric_by_cause <- metric_by_cause[, colnames(metric_by_cause)[c(3,1:2)]] metric_by_cause$SAIDI <- as.numeric(NA) metric_by_cause$SAIFI <- as.numeric(NA) metric_by_cause$CAIDI <- as.numeric(NA) for (cause in 1:length(causes)) { for (year in 2002:2017) { for (metric in 1:length(metrics)) { curr_cause <- causes[cause] curr_metric <- metrics[metric] value <- calculate_resilience_cause_US(curr_cause, curr_metric, year) if (!is.na(value)) { if (metrics[metric] == "SAIDI") { col <- 4 } else if (metrics[metric] == "SAIFI") { col <- 5 } else if (metrics[metric] == "CAIDI") { col <- 6 } key <- paste0(causes[cause], year) metric_by_cause[metric_by_cause$Key == key, col] <- value } } } } # figure_3a <- ggplot(metric_by_cause, aes(x=factor(year), y=SAIDI, color = cause, group = cause)) + geom_point() + geom_line() + ggtitle('SAIDI values for 2002 - 2017 by outage type') + xlab('Year') + ylab('SAIDI [hours per customer]') + guides(color=guide_legend('Cause for outage')) + theme_bw() + theme(text=element_text(family="CMU Serif")) # figure_3b <- ggplot(metric_by_cause, aes(x=factor(year), y=SAIFI, color = cause, group = cause)) + geom_point() + geom_line() + ggtitle('SAIFI values for 2002 - 2017 by outage type') + xlab('Year') + ylab('SAIFI [interruptions per customer]') + guides(color=guide_legend('Cause for outage')) + theme_bw() + theme(text=element_text(family="CMU Serif")) # figure_3c <- ggplot(metric_by_cause, aes(x=factor(year), y=CAIDI, color = cause, group = cause)) + geom_point() + geom_line() + ggtitle('CAIDI values for 2002 - 2017 by outage type') + xlab('Year') + ylab('CAIDI [hours per outage]') + guides(color=guide_legend('Cause for outage')) + theme_bw() + theme(text=element_text(family="CMU Serif"), legend.position = "bottom") figure_3a <- ggplot(metric_by_cause, aes(x=factor(year), y=SAIDI, color = cause, group = cause)) + geom_point() + geom_line() + xlab('Year') + ylab('SAIDI [hours per customer]') + guides(color=guide_legend('Cause for outage')) + theme_bw() + theme(text=element_text(family="CMU Serif"), legend.position = "bottom", axis.title=element_text(size=14), legend.text = element_text(size=14), legend.title = element_text(size=13.7)) + theme(axis.text = element_text(size = 14)) + theme(axis.text.x = element_text(angle = 30, vjust = 0.5)) figure_3b <- ggplot(metric_by_cause, aes(x=factor(year), y=SAIFI, color = cause, group = cause)) + geom_point() + geom_line() + xlab('Year') + ylab('SAIFI [interruptions per customer]') + guides(color=guide_legend('Cause for outage')) + theme_bw() + theme(text=element_text(family="CMU Serif"), legend.position = "bottom", axis.title=element_text(size=14)) + theme(axis.text = element_text(size = 14)) + theme(axis.text.x = element_text(angle = 30, vjust = 0.5)) figure_3c <- ggplot(metric_by_cause, aes(x=factor(year), y=CAIDI, color = cause, group = cause)) + geom_point() + geom_line() + xlab('Year') + ylab('CAIDI [hours per outage]') + theme_bw() + theme(text=element_text(family="CMU Serif"), legend.position = "bottom", axis.title=element_text(size=14), legend.text = element_text(size=12), legend.title = element_text(size=12)) + theme(axis.text = element_text(size = 14)) + theme(axis.text.x = element_text(angle = 30, vjust = 0.5)) grid.arrange(figure_3c, figure_3a, figure_3b, nrow=1) # customerStateAndPopOE_all_time_per_NERC <- customerStateAndPopOE # calculate_NERC_resilience_all_time <- function(NERC_region, cause_param, metric) { # filtered <- customerStateAndPopOE_all_time_per_NERC %>% filter(NERC_regions == NERC_region, cause == cause_param) # if (length(filtered$Key > 0)) { # total_customer_per_NERC <- mean(unique(filtered$customersInState), na.rm = TRUE) # using mean since it is over multiple years # if (metric == "SAIDI") { # value <- sum(filtered$Time.Elapsed * filtered$populationAffected) / total_customer_per_NERC # } else if (metric == "SAIFI") { # value <- sum(filtered$populationAffected) / total_customer_per_NERC # } # for (i in 1:length(filtered$Key)) { # row <- as.numeric(filtered$Key[i]) # col <- paste0(metric, "_NERC") # customerStateAndPopOE_all_time_per_NERC[customerStateAndPopOE_all_time_per_NERC$Key == row, col] <<- value # have to use <<- to update the dataframe because it is stored in global environment # } # } # } # # for (i in 1:length(NERC_regions)) { # curr_NERC_region <- NERC_regions[i] # calculate_NERC_resilience_all_time(curr_NERC_region, "SAIDI") # calculate_NERC_resilience_all_time(curr_NERC_region, "SAIFI") # # } # # for (i in 1:length(NERC_regions)) { # curr_NERC_region <- NERC_regions[i] # for (cause in causes) { # calculate_NERC_resilience_all_time(curr_NERC_region, cause, "SAIDI") # calculate_NERC_resilience_all_time(curr_NERC_region, cause, "SAIFI") # } # } # customerStateAndPopOE_all_time_per_NERC <- customerStateAndPopOE_all_time_per_NERC %>% mutate(CAIDI_NERC = SAIDI_NERC / SAIFI_NERC) # cause_order <- factor(customerStateAndPopOE_all_time_per_NERC$cause, level = c("Operations", "Mechanical failure", "Human attack", "Natural")) # # ggplot(customerStateAndPopOE_all_time_per_NERC, aes(x = cause_order, y = SAIDI_NERC, color = NERC_regions, group = NERC_regions)) + # geom_point() + geom_line() # # ggplot(customerStateAndPopOE_all_time_per_NERC, aes(x = cause_order, y = SAIFI_NERC, color = NERC_regions, group = NERC_regions)) + # geom_point() + geom_line() # # ggplot(customerStateAndPopOE_all_time_per_NERC, aes(x = cause_order, y = CAIDI_NERC, color = NERC_regions, group = NERC_regions)) + # geom_point() + geom_line()