## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----include = FALSE, echo = FALSE, warning = FALSE, message = FALSE---------- library(dplyr) library(purrr) library(tidyr) library(ggplot2) library(patchwork) # Create example_migration # Define names of fake counties fake_counties = c("Greenridge","Windermoor","Bramblewood","Silverlake", "Thornbury","Maplewood","Hawthorne","Pinehurst", "Riverton","Meadowbrook","Fairhaven","Oakdale","Stonebridge", "Brookfield","Ashford","Glenville","Sunnyvale","Westfield") # Create region county lookup tables rc_lkp = data.frame(region = c(rep("North",3),rep("Midlands",5), rep("South West",4),rep("South East",6)), county = fake_counties) og_lkp = rc_lkp %>% setNames(c("Origin Region" ,"Origin County" )) dn_lkp = rc_lkp %>% setNames(c("Destination Region","Destination County")) # Create dataframe of fake migration data set.seed(1234) example_migration = expand.grid(fake_counties,fake_counties) %>% setNames(paste(c("Origin","Destination"),"County",sep=" ")) %>% full_join(og_lkp) %>% full_join(dn_lkp) %>% mutate(Migration = (1/rgamma(18*18, shape = 17, rate = 0.5)) %>% {. * 1000} %>% round()) example_migration[example_migration$`Origin County` == example_migration$`Destination County`,"Migration"] = example_migration[example_migration$`Origin County` == example_migration$`Destination County`,"Migration"] * 10 # Define names of fake counties fake_counties = c("Greenridge","Windermoor","Bramblewood","Silverlake", "Thornbury","Maplewood","Hawthorne","Pinehurst", "Riverton","Meadowbrook","Fairhaven","Oakdale","Stonebridge", "Brookfield","Ashford","Glenville","Sunnyvale","Westfield") # Create example time_series # Create dataframe of fake migration data set.seed(1234) example_time_series = data.frame(region = c(rep("North",3),rep("Midlands",5), rep("South West",4),rep("South East",6)), county = fake_counties, year_2011 = sample(1:10000,length(fake_counties)), year_2012 = sample(1:10000,length(fake_counties)), year_2013 = sample(1:10000,length(fake_counties)), year_2014 = sample(1:10000,length(fake_counties)), year_2015 = sample(1:10000,length(fake_counties))) %>% setNames(c("Region","County",2011:2015)) %>% pivot_longer(cols = `2011`:`2015`, names_to = "Year", values_to = "Immigration") %>% mutate(Year = as.numeric(Year)) example_time_series[sample(1:(length(fake_counties)*5),5),"Immigration"] = NA # Define cg cg = function(colour1, colour2, n = 15) { # Create a color palette function colour_func <- grDevices::colorRampPalette(c(colour1, colour2)) # Generate the color gradient colour_gradient <- colour_func(n) # Return colour gradient return(colour_gradient) } # Define decimal places decimalplaces <- function(x) { if (abs(x - round(x)) > .Machine$double.eps^0.5) { nchar(strsplit(sub('0+$', '', as.character(x)), ".", fixed = TRUE)[[1]][[2]]) } else { return(0) } } # Define exp_seq exp_seq = function(n,ln=15,exponent=2,round_values=TRUE,rmv_extremes=TRUE) { # Check variables have been entered correctly if (!is.numeric(exponent) || exponent < 1) { stop("`exponent` must be a numer greater than or equal to 1.") } # Create a sequence from 1 to n. # If `n` is specified as 1, the vector will be scaled to between 0 and 1. if (n == 1) { seq = seq(0, 1000, length.out = ln) round_values = FALSE } else { seq = seq(0, n, length.out = ln) } # Apply the logarithm to the sequence exp_seq = seq %>% {.^exponent} # Scale the sequence to the range [0, 1] min_val = min(exp_seq) max_val = max(exp_seq) one_seq = ((exp_seq - min_val) / (max_val - min_val)) # Scale sequence to n if (round_values) { scaled_one_seq = one_seq %>% {. * n} %>% round() } else if (n == 1) { scaled_one_seq = one_seq } else { scaled_one_seq = one_seq %>% {. * n} } # Option to remove zero and the maximum value (i.e. `n`) from the beginning and the end of the vector if (rmv_extremes) { scaled_one_seq = scaled_one_seq %>% .[2:(length(.)-1)] } # Return scaled sequence return(scaled_one_seq) } # Define hhm hhm = function(df,ylower,yupper,xlower,xupper,values,rm_diag=F,lgttl=NULL,bins=NULL,cbrks=NULL,cclrs=NULL,norm_lgd=F,lgdps=0,xttl_height=0.15,yttl_width=0.15) { # Define max value supplied to `values` if (rm_diag) { max_value = max(df[df[[xlower]] != df[[ylower]],values], na.rm = TRUE) } else { max_value = max(df[[values]], na.rm = TRUE) } # Check that supplied model inputs are compatible and won't cause errors if (!is.null(bins) && !is.null(cbrks)) { stop("The inputs bins and cbrks should not be supplied at the same time. bins is used to break the data into a specific number of groups with equal intervals between the min and max values. cbrks is used to manually break the data into groups based on the supplied thresholds. Please provide either one or the other.") } if (!is.null(bins) && !is.null(cclrs)) { if (bins != length(cclrs)) { stop("If both bins and cclrs are provideds, bins and cclrs must both be vectors with cclrs having a length equal to the value of bins.") } } if (!is.null(cbrks) && rm_diag) { if ( (min(cbrks) <= 0) || (max(cbrks) >= max_value) ) { stop(paste0("All values in cbrks must be between 0 and the largest value provided to `values`. In this instance rm_diag == TRUE, so only values not on the diagonal are considered. All values provided to cbrks should therefore be between greater than 0 and less than ",max_value,".")) } } if (!is.null(cbrks) && rm_diag == F) { if ( !is.null(cbrks) && (min(cbrks) <= 0) || (max(cbrks) >= max_value) ) { stop(paste0("All values in cbrks must be between 0 and the largest value provided to `values`. In this instance all values provided to cbrks should therefore be between greater than 0 and less than ",max_value,".")) } } if (!is.null(cbrks) && !is.null(cclrs)) { if ( length(cbrks) != (length(cclrs)-2) ) { stop("If both cbrks and cclrs are provided, cbrks and cclrs must both be vectors with cclrs having a length two longer than cbrks.") } } if (!is.null(cbrks) && norm_lgd) { if (min(range(cbrks)) < 0 || max(range(cbrks)) > 1) { stop("If normalising the values (norm_lgd == TRUE), all breaks provided to cbrks must be between 0 and 1.") } } if (!is.null(cbrks) && (cbrks %>% diff() %>% {. <= 0} %>% sum() %>% {. > 0})) { stop("Please ensure the values in cbrks are provided in ascending order.") } # Remove unwanted rows and format origin so geographies appear in alphabetical order df = df[,c(ylower,xlower,yupper,xupper,values)] # Define the groups to be shown along the x and y axes # If ordering of groups already defined via factor ordering, take this as the order # the groups should appear (top to bottom / left to right). # Otherwise, order groups alphabetically if (!is.null(df[[xupper]] %>% levels())) { xgrps = df[[xupper]] %>% levels() } else { xgrps = df[[xupper]] %>% unique() %>% sort() } if (!is.null(df[[yupper]] %>% levels())) { ygrps = df[[yupper]] %>% levels() } else { ygrps = df[[yupper]] %>% unique() %>% sort() } # If user specified to remove diagonal values, set all observations where ylower and xlower are identical to zero if (rm_diag) { df[df[[ylower]] == df[[xlower]],values] = NA } # Option to normalise values between 0-1 (only works if all values are positive) if (norm_lgd) { # If any values are negative, return error message if ((df[[values]] < 0) %>% sum(na.rm = TRUE) %>% {. > 0}) {stop("norm_lgd is only designed to be used if all values used to populate the heatmap are positive.")} # Otherwise normalise values df[[values]] = df[[values]] / max(df[[values]], na.rm = TRUE) # Unless a value other than zero is supplied (i.e. the user has manually specified a non-default value), set the number of decimal points shown in the legend to 3 if (lgdps == 0) { lgdps = 3 } } # Option to split legend into custom categories if (!is.null(cbrks)) { # If cbrks provided # Add the smallest value possible in R as a lower threshold to cbrks. # This ensures anything that is equal to, or less than, zero is included in the first group. cbrks = c(.Machine$double.xmin,cbrks) # Define names of custom breaks if (lgdps == 0) { brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0 ,. ), cbrks %>% .[2:length(.)] %>% c(.,max_value), sep = "-")) } else if (norm_lgd) { brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0 ,. ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), cbrks %>% .[2:length(.)] %>% c(.,1 ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), sep = "-")) } else { brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0 ,. ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), cbrks %>% .[2:length(.)] %>% c(.,max_value) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), sep = "-")) } # Create discrete scale based on these custom breaks df[[values]] = df[[values]] %>% findInterval(cbrks) %>% {. + 1} %>% addNA() %>% factor(levels = 1:length(brk_nms), labels = brk_nms) } else if (!is.null(bins)) { # If bins provided # Assign breaks to be equidistant thresholds between zero and maximum observed values # Also add the minimum possible value above zero as the first break in the sequence cbrks = seq(0, max(df[[values]], na.rm = TRUE), length.out = bins - 1) %>% .[2:(length(.)-1)] %>% c(.Machine$double.xmin,.) # Define names of custom breaks if (lgdps == 0) { # If set to show whole numbers # Round all values other than the first one (which is the minimum possible value above zero), up to the nearest whole number cbrks = c(.Machine$double.xmin, cbrks %>% .[2:length(.)] %>% ceiling()) # Assign break names between zero and the maximum observed value in the data brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0 ,. ), cbrks %>% .[2:length(.)] %>% c(.,max_value), sep = "-")) } else if (norm_lgd) { # If data has been normalised # Assign break names between 0 and 1 to the specified number of decimal points (lgdps) brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0 ,. ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), cbrks %>% .[2:length(.)] %>% c(.,1 ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), sep = "-")) } else { # If using non-rounded, non-normalised data # Assign break names between zero and the maximum observed value in the data to the specified number of decimal points (lgdps) brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0 ,. ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), cbrks %>% .[2:length(.)] %>% c(.,max_value) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), sep = "-")) } # Create discrete scale based on these custom breaks df[[values]] = df[[values]] %>% findInterval(cbrks) %>% replace(. == length(cbrks), (length(cbrks)-.Machine$double.xmin)) %>% {. + 1} %>% addNA() %>% factor(levels = 1:length(brk_nms), labels = brk_nms) } else { # Otherwise define consistent legend scale range lg_lims = df[[values]] %>% range(na.rm = TRUE) } # Define legend title (if not defined by user) if (is.null(lgttl) && norm_lgd) { lgttl = "Normalised\nValues" } else if (is.null(lgttl) && norm_lgd == F) { lgttl = "Values" } # Create empty list to populate with ggplot heatmaps pl = list() # Define vectors capturing the number of lower categories in each upper group xglns = df %>% group_split(!!rlang::sym(xupper)) %>% purrr::map(~ .[[xlower]] %>% unique() %>% length()) %>% unlist() yglns = df %>% group_split(!!rlang::sym(yupper)) %>% purrr::map(~ .[[ylower]] %>% unique() %>% length()) %>% unlist() # Counter to keep track of interations of nested for loop i = 0 # For each y-axis group for (ygrp in 1:length(ygrps)) { # For each x-axis group for (xgrp in 1:length(xgrps)) { # Increase interature counter by 1 i = i + 1 # Filter group-level migration data to only include origin and destination regions of interest sdf = df[df[[yupper]] == ygrps[ygrp] & df[[xupper]] == xgrps[xgrp],] # Order lower categories alphabetically sdf[[xlower]] = factor(sdf[[xlower]], levels = sdf[[xlower]] %>% unique() %>% sort() ) sdf[[ylower]] = factor(sdf[[ylower]], levels = sdf[[ylower]] %>% unique() %>% sort() %>% rev() ) # Define main plot p = ggplot(sdf, aes(.data[[xlower]], .data[[ylower]])) + geom_tile(aes(fill = .data[[values]]), show.legend = TRUE) + theme(plot.margin = unit(rep(0,4), "cm"), axis.text.x = element_text(angle = 90, hjust = 1.0, vjust = 0.3), axis.title.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5), axis.title.y = element_text(angle = 0, hjust = 0.5, vjust = 0.5), axis.ticks = element_blank()) + labs(x = xgrps[xgrp], y = ygrps[ygrp]) # Define colour scale if (!is.null(cbrks) && !is.null(cclrs)) { p = p + scale_fill_manual(name = lgttl, values = cclrs , drop = F, na.value = "white") } else if (!is.null(cbrks) && is.null(cclrs)) { p = p + scale_fill_manual(name = lgttl, values = cg("white","#08306B",(length(cbrks)+1)), drop = F, na.value = "white") } else if (is.null(cbrks) && !is.null(cclrs) && norm_lgd) { p = p + scale_fill_gradientn(name = lgttl, colours = cclrs , limits = c(0,1) , na.value = "white") } else if (is.null(cbrks) && !is.null(cclrs) && norm_lgd == F) { p = p + scale_fill_gradientn(name = lgttl, colours = cclrs , limits = lg_lims, na.value = "white") } else if (is.null(cbrks) && is.null(cclrs) && norm_lgd) { p = p + scale_fill_gradientn(name = lgttl, colours = cg("white","#08306B"), limits = c(0,1) , na.value = "white") } else if (is.null(cbrks) && is.null(cclrs) && norm_lgd == F) { p = p + scale_fill_gradientn(name = lgttl, colours = cg("white","#08306B"), limits = lg_lims, na.value = "white") } # To prevent legend showing NA values if rm_diag set to TRUE (in which case, diagonal set to NA), only show legend for plots that are not on the diagonal if (rm_diag && (sdf[[values]] %>% is.na() %>% sum() %>% {. > 0}) && (ygrp == xgrp) ) { p = p + theme(legend.position = "none") } # If bottom-left plot if (ygrp == length(ygrps) & xgrp == 1) { # Include provincia names on both axes p = p + theme(axis.title.x = element_blank(), axis.title.y = element_blank()) } else if (ygrp < length(ygrps) & xgrp == 1) { # If left-hand plot # Include provincia names on y-axis p = p + theme(axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.x = element_blank()) } else if (ygrp == length(ygrps) & xgrp > 1) { # If bottom plot # Include provincia names on x-axis p = p + theme(axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.y = element_blank()) } else { # If plot not on left of bottom edges of multiplot # Remove provincia names p = p + theme(axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank()) } # Add ggplot to plot list pl[[i]] = p } } # Define plot heights and widths (including group titles) wds = c((sum(xglns)*yttl_width),xglns) hts = c(yglns,(sum(yglns)*xttl_height)) # Define plot spacer ps = plot_spacer() # Create empty lists to be populated with plot titles xttls = list() yttls = list() # Define plot titles for (xgrp in 1:length(xgrps)) { xttls[[xgrp]] = plt_ttl(xgrps[xgrp]) } for (ygrp in 1:length(ygrps)) { yttls[[ygrp]] = plt_ttl(ygrps[ygrp],axs="y") } # Create empty list to populate with both plot title and heatmap tiles in the correct order plts = list() # Define counters for subsetting plot title and heatmap lists, to ensure they are ordered correctly i = 1 j = 1 # For each group (row), assign each plot title, then the heatmap tiles within that row to plts list for (ygrp in 1:length(yttls)) { # Add plot title to list plts[[i]] = yttls[[ygrp]] # Add heatmap plots to list plts[(i+1):(i+length(xttls))] = pl[j:(j+length(xttls)-1)] # Adjust counters i = i + 1 + length(xttls) j = j + length(xttls) } # Add x-axis plots plts[[length(plts)+1]] = ps plts[(length(plts)+1):(length(plts)+length(xttls))] = xttls # Define final plot plt = patchwork::wrap_plots(plts, widths = wds, heights = hts, guides = "collect") # Return final plot return(plt) } # Define log_seq log_seq = function(n,ln=15,round_values=T,rmv_extremes=F) { # Create a sequence from 1 to n. # If `n` is specified as 1, the vector will be scaled to between 0 and 1. if (n == 1) { seq = seq(1, 1000, length.out = ln) round_values = F } else { seq = seq(1, n, length.out = ln-1) } # Apply the logarithm to the sequence log_seq = log(seq) # Scale the sequence to the range [0, 1] min_val = min(log_seq) max_val = max(log_seq) one_seq = ((log_seq - min_val) / (max_val - min_val)) # Reverse pattern of scale so breaks are focussed on lower rather than upper end of scale one_seq_rev = one_seq %>% {. - max(.)} %>% {. * -1} %>% rev() # Scale sequence to n if (round_values) { scaled_one_seq = one_seq_rev %>% {. * n} %>% round() %>% .[2:length(.)] %>% c(0,1,.) } else if (n == 1) { scaled_one_seq = one_seq_rev } else { scaled_one_seq = one_seq_rev %>% {. * n} %>% .[2:length(.)] %>% c(0,.Machine$double.xmin,.) } # Option to remove zero and the maximum value (i.e. `n`) from the beginning and the end of the vector if (rmv_extremes) { scaled_one_seq = scaled_one_seq %>% .[2:(length(.)-1)] } # Return scaled sequence return(scaled_one_seq) } # Define plt_ttl plt_ttl = function(ttl,axs="x",rotate_title=TRUE) { # If plotting on x-axis if (axs == "x") { # Place at top of plot p = ggplot(data.frame(x = 0:1, y = 0:1), aes(x = .data[["x"]], y = .data[["y"]])) + geom_point(col = "white") + coord_cartesian(xlim = c(0,1), ylim = c(0,1)) + theme_void() + theme(plot.margin = unit(rep(0,4), "cm")) # Speficy wehther title should be roated 90 degrees or not if (rotate_title) { p = p + geom_text(x = 0.495, y = 1.0, angle = 90, label = ttl, size = 4, hjust = 1) } else if (rotate_title == FALSE) { p = p + geom_text(x = 0.495, y = 0.5, angle = 0, label = ttl, size = 4, hjust = 1) } else { stop("rotate_title must be TRUE or FALSE.") } } else if (axs == "y") { # If plotting on y-axis # Place at left of plot p = ggplot(data.frame(x = 0:1, y = 0:1), aes(x = .data[["x"]], y = .data[["y"]])) + geom_point(col = "white") + coord_cartesian(xlim = c(0,1), ylim = c(0,1)) + theme_void() + theme(plot.margin = unit(rep(0,4), "cm")) # Speficy wehther title should be roated 90 degrees or not if (rotate_title) { p = p + geom_text(x = 1, y = 0.51, angle = 0, label = ttl, size = 4, hjust = 1) } else if (rotate_title == FALSE) { p = p + geom_text(x = 1, y = 0.51, angle = 90, label = ttl, size = 4, hjust = 1) } else { stop("rotate_title must be TRUE or FALSE.") } } # Return region name plot return(p) } # Define tshhm tshhm = function(df,lower,upper,times,values,sort_lower="alphabetical",lgttl=NULL,bins=NULL,cbrks=NULL,cclrs=NULL,norm_lgd=F,lgdps=0,na_colour=NULL,xttl_height=0.05,yttl_width=0.15) { # Define max value supplied to `values` max_value = max(df[[values]], na.rm = TRUE) # Check that supplied model inputs are compatible and won't cause errors if (!is.null(bins) && !is.null(cbrks)) { stop("The inputs bins and cbrks should not be supplied at the same time. bins is used to break the data into a specific number of groups with equal intervals between the min and max values. cbrks is used to manually break the data into groups based on the supplied thresholds. Please provide either one or the other.") } if (!is.null(bins) && !is.null(cclrs)) { if (bins != length(cclrs)) { stop("If both bins and cclrs are provideds, bins and cclrs must both be vectors with cclrs having a length equal to the value of bins.") } } if (!is.null(cbrks) && !is.null(cclrs)) { if ( length(cbrks) != (length(cclrs)-2) ) { stop("If both cbrks and cclrs are provided, cbrks and cclrs must both be vectors with cclrs having a length two longer than cbrks.") } } if (!is.null(cbrks) && norm_lgd) { if (min(range(cbrks)) < 0 || max(range(cbrks)) > 1) { stop("If normalising the values (norm_lgd == TRUE), all breaks provided to cbrks must be between 0 and 1.") } } if (!is.null(cbrks) && (cbrks %>% diff() %>% {. <= 0} %>% sum() %>% {. > 0})) { stop("Please ensure the values in cbrks are provided in ascending order.") } # Remove unwanted rows and format origin so geographies appear in alphabetical order df = df[,c(lower,upper,times,values)] # If no colour has been assigned for NA values, then remove them from the dataset if (is.null(na_colour)) { df = df %>% filter(!is.na(!!rlang::sym(values))) } # Define the groups to be shown along the y-axis # If ordering of groups already defined via factor ordering, take this as the order # the groups should appear (top to bottom). Otherwise, order groups alphabetically if (!is.null(df[[upper]] %>% levels())) { ygrps = df[[upper]] %>% levels() } else { ygrps = df[[upper]] %>% unique() %>% sort() } # Option to normalise values between 0-1 (only works if all values are positive) if (norm_lgd) { # If any values are negative, return error message if ((df[[values]] < 0) %>% sum(na.rm = TRUE) %>% {. > 0}) {stop("norm_lgd is only designed to be used if all values used to populate the heatmap are positive.")} # Otherwise normalise values df[[values]] = df[[values]] / max(df[[values]], na.rm = TRUE) # Unless a value other than zero is supplied (i.e. the user has manually specified a non-default value), set the number of decimal points shown in the legend to 3 if (lgdps == 0) { lgdps = 3 } } # Option to split legend into custom categories if (!is.null(cbrks)) { # If cbrks provided # Add the smallest value possible in R as a lower threshold to cbrks. # This ensures anything that is equal to, or less than, zero is included in the first group. cbrks = c(.Machine$double.xmin,cbrks) # Define names of custom breaks if (lgdps == 0) { brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0 ,. ), cbrks %>% .[2:length(.)] %>% c(.,max_value), sep = "-")) } else if (norm_lgd) { brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0 ,. ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), cbrks %>% .[2:length(.)] %>% c(.,1 ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), sep = "-")) } else { brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0 ,. ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), cbrks %>% .[2:length(.)] %>% c(.,max_value) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), sep = "-")) } # Create backup version of origin variable so rows can still be ordered by row total or row means if (sort_lower != "alphabetical") { df[[paste0(values,"_old")]] = df[[values]] } # Create discrete scale based on these custom breaks df[[values]] = df[[values]] %>% findInterval(cbrks) %>% {. + 1} %>% addNA() %>% factor(levels = 1:length(brk_nms), labels = brk_nms) } else if (!is.null(bins)) { # If bins provided # Assign breaks to be equidistant thresholds between zero and maximum observed values # Also add the minimum possible value above zero as the first break in the sequence cbrks = seq(0, max(df[[values]], na.rm = TRUE), length.out = bins - 1) %>% .[2:(length(.)-1)] %>% c(.Machine$double.xmin,.) # Define names of custom breaks if (lgdps == 0) { # If set to show whole numbers # Round all values other than the first one (which is the minimum possible value above zero), up to the nearest whole number cbrks = c(.Machine$double.xmin, cbrks %>% .[2:length(.)] %>% ceiling()) # Assign break names between zero and the maximum observed value in the data brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0 ,. ), cbrks %>% .[2:length(.)] %>% c(.,max_value), sep = "-")) } else if (norm_lgd) { # If data has been normalised # Assign break names between 0 and 1 to the specified number of decimal points (lgdps) brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0 ,. ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), cbrks %>% .[2:length(.)] %>% c(.,1 ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), sep = "-")) } else { # If using non-rounded, non-normalised data # Assign break names between zero and the maximum observed value in the data to the specified number of decimal points (lgdps) brk_nms = c(0, paste(cbrks %>% .[2:length(.)] %>% c(0 ,. ) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), cbrks %>% .[2:length(.)] %>% c(.,max_value) %>% sprintf(fmt = paste0('%#.',lgdps,'f')), sep = "-")) } # Create backup version of origin variable so rows can still be ordered by row total or row means if (sort_lower != "alphabetical") { df[[paste0(values,"_old")]] = df[[values]] } # Create discrete scale based on these custom breaks df[[values]] = df[[values]] %>% findInterval(cbrks) %>% replace(. == length(cbrks), (length(cbrks)-.Machine$double.xmin)) %>% {. + 1} %>% addNA() %>% factor(levels = 1:length(brk_nms), labels = brk_nms) } else { # Otherwise define consistent legend scale range lg_lims = df[[values]] %>% range(na.rm = TRUE) } # Define legend title (if not defined by user) if (is.null(lgttl) && norm_lgd) { lgttl = "Normalised\nValues" } else if (is.null(lgttl) && norm_lgd == F) { lgttl = "Values" } # Create empty list to populate with ggplot heatmaps pl = list() # Define vectors capturing the number of lower categories in each upper group yglns = df %>% group_split(!!rlang::sym(upper)) %>% purrr::map(~ .[[lower]] %>% unique() %>% length()) %>% unlist() # For each y-axis group for (ygrp in 1:length(ygrps)) { # Filter group-level migration data to only include origin and destination regions of interest sdf = df[df[[upper]] == ygrps[ygrp],] # If breaking data into categories, and sorting values by row values, then use continuous version of values to sort rows if (is.null(cbrks) && sort_lower != "alphabetical") { sort_var = values } else if (!is.null(cbrks) && sort_lower != "alphabetical") { sort_var = paste0(values,"_old") } # Define how rows of the lower categories should be arranged if (sort_lower == "alphabetical") { sdf[[lower]] = factor(sdf[[lower]], levels = sdf[[lower]] %>% unique() %>% sort() %>% rev() ) } else if (sort_lower == "sum_ascend") { sdf[[lower]] = factor(sdf[[lower]], levels = sdf %>% group_by(!!rlang::sym(lower)) %>% summarise(sum = sum(!!rlang::sym(sort_var), na.rm = TRUE)) %>% arrange(sum) %>% .[[lower]] %>% rev() ) } else if (sort_lower == "sum_descend") { sdf[[lower]] = factor(sdf[[lower]], levels = sdf %>% group_by(!!rlang::sym(lower)) %>% summarise(sum = sum(!!rlang::sym(sort_var), na.rm = TRUE)) %>% arrange(sum) %>% .[[lower]] ) } else if (sort_lower == "mean_ascend") { sdf[[lower]] = factor(sdf[[lower]], levels = sdf %>% group_by(!!rlang::sym(lower)) %>% summarise(mean = mean(!!rlang::sym(sort_var), na.rm = TRUE)) %>% arrange(mean) %>% .[[lower]] %>% rev() ) } else if (sort_lower == "mean_descend") { sdf[[lower]] = factor(sdf[[lower]], levels = sdf %>% group_by(!!rlang::sym(lower)) %>% summarise(mean = mean(!!rlang::sym(sort_var), na.rm = TRUE)) %>% arrange(mean) %>% .[[lower]] ) } else { stop("The variable sort_lower should be defined as one of the following: alphabetical, sum_ascend, sum_descend, mean_ascend, mean_descend. See `?tshm` for details.") } # Define main plot p = ggplot(sdf, aes(.data[[times]], .data[[lower]])) + geom_tile(aes(fill = .data[[values]]), show.legend = TRUE) + theme_minimal() + theme(plot.margin = unit(rep(0,4), "cm"), axis.title.y = element_text(angle = 0, hjust = 20.0, vjust = 0.5), axis.ticks = element_blank()) + #labs(y = ygrps[ygrp]) labs(x = NULL, y = NULL) # Define colour scale if (is.null(na_colour)) { if (!is.null(cbrks) && !is.null(cclrs)) { p = p + scale_fill_manual(name = lgttl, values = cclrs , drop = F, na.translate = FALSE) } else if (!is.null(cbrks) && is.null(cclrs)) { p = p + scale_fill_manual(name = lgttl, values = cg("grey90","#08306B",(length(cbrks)+1)), drop = F, na.translate = FALSE) } else if (is.null(cbrks) && !is.null(cclrs) && norm_lgd) { p = p + scale_fill_gradientn(name = lgttl, colours = cclrs , limits = c(0,1) ) } else if (is.null(cbrks) && !is.null(cclrs) && norm_lgd == F) { p = p + scale_fill_gradientn(name = lgttl, colours = cclrs , limits = lg_lims) } else if (is.null(cbrks) && is.null(cclrs) && norm_lgd) { p = p + scale_fill_gradientn(name = lgttl, colours = cg("grey90","#08306B"), limits = c(0,1) ) } else if (is.null(cbrks) && is.null(cclrs) && norm_lgd == F) { p = p + scale_fill_gradientn(name = lgttl, colours = cg("grey90","#08306B"), limits = lg_lims) } } else { if (!is.null(cbrks) && !is.null(cclrs)) { p = p + scale_fill_manual(name = lgttl, values = cclrs , drop = F, na.value = na_colour) } else if (!is.null(cbrks) && is.null(cclrs)) { p = p + scale_fill_manual(name = lgttl, values = cg("grey90","#08306B",(length(cbrks)+1)), drop = F, na.value = na_colour) } else if (is.null(cbrks) && !is.null(cclrs) && norm_lgd) { p = p + scale_fill_gradientn(name = lgttl, colours = cclrs , limits = c(0,1) , na.value = na_colour) } else if (is.null(cbrks) && !is.null(cclrs) && norm_lgd == F) { p = p + scale_fill_gradientn(name = lgttl, colours = cclrs , limits = lg_lims, na.value = na_colour) } else if (is.null(cbrks) && is.null(cclrs) && norm_lgd) { p = p + scale_fill_gradientn(name = lgttl, colours = cg("grey90","#08306B"), limits = c(0,1) , na.value = na_colour) } else if (is.null(cbrks) && is.null(cclrs) && norm_lgd == F) { p = p + scale_fill_gradientn(name = lgttl, colours = cg("grey90","#08306B"), limits = lg_lims, na.value = na_colour) } } # To avoid creating multiple legends, if the user has specified a colour for NA values then do not show any legends # for plots where no NA values occured. This assumed that at least one plot the multiplot contains NA values, which seems # reasonable, as why else would the user bother to specify the colour of NA values) if (!is.null(na_colour) && (sdf[[values]] %>% is.na() %>% sum() %>% {. == 0}) ) { p = p + theme(legend.position = "none") } # If not bottom plot, remove x-axis text and title if (ygrp != length(ygrps)) { p = p + theme(axis.title.x = element_blank(), axis.text.x = element_blank()) } # Add ggplot to plot list pl[[ygrp]] = p } # Define plot heights and widths (including group titles) wds = c(yttl_width,1) hts = c(yglns,(sum(yglns)*xttl_height)) # Define plot spacer ps = plot_spacer() # Create empty list to be populated with plot titles yttls = list() # Define plot titles for (ygrp in 1:length(ygrps)) { yttls[[ygrp]] = plt_ttl(ygrps[ygrp],axs="y") } # Create empty list to populate with both plot title and heatmap tiles in the correct order plts = list() # Define counters for subsetting plot title and heatmap lists, to ensure they are ordered correctly i = 1 j = 1 # For each group (row), assign each plot title, then the heatmap tiles within that row to plts list for (ygrp in 1:length(yttls)) { # Add plot title to list plts[[i]] = yttls[[ygrp]] # Add heatmap plots to list plts[i+1] = pl[j] # Adjust counters i = i + 2 j = j + 1 } # Add x-axis plots plts[[length(plts)+1]] = ps plts[[length(plts)+1]] = plt_ttl(times, rotate_title = FALSE) # Define final plot plt = patchwork::wrap_plots(plts, widths = wds, heights = hts, guides = "collect") # Return final plot return(plt) } ## ----warning = FALSE, message = FALSE----------------------------------------- # Import dplyr for data cleaning library(dplyr) # Summarise hierarchical data structure example_migration %>% group_by(`Origin Region`) %>% reframe(`Origin County` = unique(`Origin County`)) %>% setNames(c("Region","County")) ## ----------------------------------------------------------------------------- # Show data head(example_migration) ## ----fig.width=7.15, fig.height=6.15------------------------------------------ # Create Intial heatmap hierarchical_heatmap = hhm(df = example_migration, ylower = "Origin County", xlower = "Destination County", yupper = "Origin Region", xupper = "Destination Region", values = "Migration", yttl_width = 0.22, xttl_height = 0.22) # View result hierarchical_heatmap ## ----fig.width=7.15, fig.height=6.15------------------------------------------ # Remove diagonal from heatmap (i.e. hide static populations) removed_diag = hhm(df = example_migration, ylower = "Origin County", xlower = "Destination County", yupper = "Origin Region", xupper = "Destination Region", values = "Migration", yttl_width = 0.22, xttl_height = 0.22, rm_diag = TRUE) # View result removed_diag ## ----fig.width=7.15, fig.height=6.15------------------------------------------ # Nomalise the legend normalised_lgd = hhm(df = example_migration, ylower = "Origin County", xlower = "Destination County", yupper = "Origin Region", xupper = "Destination Region", values = "Migration", yttl_width = 0.22, xttl_height = 0.22, rm_diag = TRUE, norm_lgd = TRUE) # View result normalised_lgd ## ----fig.width=7.15, fig.height=6.15------------------------------------------ # Manually define colour scheme for heatmap (uses viridis colour scheme) viridis_12 = c("#440154FF","#482173FF","#433E85FF","#38598CFF","#2D708EFF","#25858EFF", "#1E9B8AFF","#2BB07FFF","#51C56AFF","#85D54AFF","#C2DF23FF","#FDE725FF") # Assign continuous colour scheme cont_clrs = hhm(df = example_migration, ylower = "Origin County", xlower = "Destination County", yupper = "Origin Region", xupper = "Destination Region", values = "Migration", yttl_width = 0.22, xttl_height = 0.22, rm_diag = TRUE, norm_lgd = TRUE, cclrs = viridis_12) # View result cont_clrs ## ----fig.width=7.15, fig.height=6.15------------------------------------------ # Break legends into a specified number of bins # (of equal intervals between 0 and the maximum value in `values`) bins_15 = hhm(df = example_migration, ylower = "Origin County", xlower = "Destination County", yupper = "Origin Region", xupper = "Destination Region", values = "Migration", yttl_width = 0.22, xttl_height = 0.22, rm_diag = TRUE, bins = 15) # View result bins_15 ## ----------------------------------------------------------------------------- # Manually break data into categories using user-specified intervals. cbrks = log_seq(example_migration[example_migration[["Origin County" ]] != example_migration[["Destination County"]],] %>% .$Migration %>% max(), 12, rmv_extremes = TRUE) # Show interval breaks cbrks ## ----fig.width=7.15, fig.height=6.15------------------------------------------ # Manually assign legend categories legend_cats = hhm(df = example_migration, ylower = "Origin County", xlower = "Destination County", yupper = "Origin Region", xupper = "Destination Region", values = "Migration", yttl_width = 0.22, xttl_height = 0.22, rm_diag = TRUE, cbrks = cbrks) # View result legend_cats ## ----fig.width=7.15, fig.height=6.15------------------------------------------ # Manually assign colours to legend categories cat_clrs = hhm(df = example_migration, ylower = "Origin County", xlower = "Destination County", yupper = "Origin Region", xupper = "Destination Region", values = "Migration", yttl_width = 0.22, xttl_height = 0.22, rm_diag = TRUE, cbrks = cbrks, cclrs = viridis_12) # View result cat_clrs ## ----------------------------------------------------------------------------- # Show data head(example_time_series) ## ----fig.width=7.15, fig.height=4.5------------------------------------------- # Intial heatmap time_series_heatmap = tshhm(df = example_time_series, lower = "County", upper = "Region", times = "Year", values = "Immigration", yttl_width = 0.25) # View result time_series_heatmap ## ----fig.width=7.15, fig.height=4.5------------------------------------------- # Arrange counties within each region by total number of immigrants # across all five years (ascending from top to bottom) sort_ascending = tshhm(df = example_time_series, lower = "County", upper = "Region", times = "Year", values = "Immigration", sort_lower = "sum_ascend", yttl_width = 0.25) # View result sort_ascending ## ----fig.width=7.15, fig.height=4.5------------------------------------------- # Nomalise the legend normalised_lgd = tshhm(df = example_time_series, lower = "County", upper = "Region", times = "Year", values = "Immigration", sort_lower = "sum_ascend", norm_lgd = TRUE, yttl_width = 0.25) # View result normalised_lgd ## ----fig.width=7.15, fig.height=4.5------------------------------------------- # Assign continuous colour scheme cont_clrs = tshhm(df = example_time_series, lower = "County", upper = "Region", times = "Year", values = "Immigration", sort_lower = "sum_ascend", norm_lgd = TRUE, cclrs = viridis_12, yttl_width = 0.25) # View result cont_clrs ## ----fig.width=7.15, fig.height=4.5------------------------------------------- # Assign colour for NA values na_clrs = tshhm(df = example_time_series, lower = "County", upper = "Region", times = "Year", values = "Immigration", sort_lower = "sum_ascend", norm_lgd = TRUE, cclrs = viridis_12, na_colour = "grey80", yttl_width = 0.25) # View result na_clrs ## ----fig.width=7.15, fig.height=4.5------------------------------------------- # Break legends into a specified number of bins # (of equal intervals between 0 and the maximum value in `values`) bins_15 = tshhm(df = example_time_series, lower = "County", upper = "Region", times = "Year", values = "Immigration", sort_lower = "sum_ascend", bins = 15, yttl_width = 0.25) # View result bins_15 ## ----------------------------------------------------------------------------- # Manually break data into categories using user-specified intervals. cbrks = log_seq(example_time_series %>% .$Immigration %>% max(na.rm = TRUE), 12, rmv_extremes = TRUE) # Show breaks cbrks ## ----fig.width=7.15, fig.height=4.5------------------------------------------- # Manually assign legend categories legend_cats = tshhm(df = example_time_series, lower = "County", upper = "Region", times = "Year", values = "Immigration", sort_lower = "sum_ascend", cbrks = cbrks, yttl_width = 0.25) # View result legend_cats ## ----fig.width=7.15, fig.height=4.5------------------------------------------- # Manually assign colours to legend categories cat_clrs = tshhm(df = example_time_series, lower = "County", upper = "Region", times = "Year", values = "Immigration", sort_lower = "sum_ascend", cbrks = cbrks, cclrs = viridis_12, na_colour = "grey80", yttl_width = 0.25) # View result cat_clrs ## ----fig.width=7.15, fig.height=4.5------------------------------------------- # Manually define order of x-axis and groups using factor levels new_time_series = example_time_series %>% mutate(Year = factor(Year, levels = c(2012,2011,2014, 2013,2015)), Region = factor(Region, levels = c("North","Midlands", "South West", "South East"))) # Manually define order of x-axis and groups rearrange_axes = tshhm(df = new_time_series, lower = "County", upper = "Region", times = "Year", values = "Immigration", sort_lower = "sum_ascend", cbrks = cbrks, cclrs = viridis_12, na_colour = "grey80", yttl_width = 0.25) # View result rearrange_axes