1  Nourishment episodes

Show the code
###### Nourishment data

d_raw_nourishment <- read_xlsx(here("data/raw/PSDS-NC-20240705.xlsx")) |>
  clean_names() |>
  mutate(across(where(is.character), ~ map_chr(.x, iconv, "UTF-8", "UTF-8", sub=''))) # just in case

d_nourishment <- d_raw_nourishment |>
  mutate(place = str_trim(str_extract(location, "([[:alpha:]][[:space:]]?)+")),
         place = case_when(
           str_detect(place, "Topsail")         ~ "Topsail",
           place == "Emerald Isle Point"        ~ "Emerald Isle",
           place == "Cape Hatteras"             ~ "Hatteras Island",
           .default = place
         )) |> 
  relocate(place, .after = location) |>
  mutate(justification = case_match(
    justification,
    "Section 111"          ~ "Shore Protection",
    "FEMA Emergency"       ~ "Emergency",
    .default = justification
  ))

d_rank_n <- d_nourishment |>
  count(place) |>
  arrange(desc(n), place) %>%
  mutate(rank_n = nrow(.) - rank(n, ties.method = "min") + 1)

d_rank_cy <- d_nourishment |>
  count(place, wt = volume_cy, name = "cy") |>
  arrange(desc(cy), place) %>%
  mutate(rank_n = nrow(.) - rank(cy, ties.method = "min") + 1)

d_rank_total_cost <- d_nourishment |>
  count(place, wt = adjusted_cost_2022, name = "total_cost_2022") |>
  arrange(desc(total_cost_2022), place) %>%
  mutate(rank_n = nrow(.) - rank(total_cost_2022) + 1)


###### Location data

d_beach_locations <- read_xlsx(here("data/raw/beach-coordinates-geonames-dmoul.xlsx"),
                               skip = 3) |>
  mutate(across(where(is.character), ~ map_chr(.x, iconv, "UTF-8", "UTF-8", sub=''))) |> # just in case
  filter(!is.na(longitude),
         !is.na(latitude)) |>
  mutate(place = str_trim(str_to_title(str_extract(place, "([[:alpha:]][[:space:]]?)+"))),
         place = case_when(
           str_detect(place, "Topsail")                   ~ "Topsail",
           place == "Cape Hatteras"                       ~ "Hatteras Island",
           place == "Emerald Isle Point"                  ~ "Emerald Isle",
           str_detect(place, "Figure Eight")              ~ "Figure Eight Island", # get rid of north end / south end
           .default = place
         )) |> 
  distinct(place, .keep_all = TRUE) |>
  st_as_sf(coords = c("longitude", "latitude"),
           crs = "WGS84")
  
         
nc_coast_bbox <- st_bbox(d_beach_locations)

d_combined <- left_join(d_beach_locations, d_nourishment,
                        by = "place") |>
  mutate(primary_funding_source = factor(primary_funding_source, 
                                         levels = c("Private", "Local", "State", "Federal"))
         )

year_first <- min(d_combined$year_completed)
year_last <- max(d_combined$year_completed)
n_all_episodes <- nrow(d_combined)

d_combined_with_est <- d_combined |>
  st_drop_geometry() |>
  mutate(adjusted_cost_2022 = if_else(adjusted_cost_2022 == 0,
                                    NA,
                                    adjusted_cost_2022),
         adjusted_cost_cy = if_else(adjusted_cost_cy == 0 | adjusted_cost_cy > 100,
                                    NA,
                                    adjusted_cost_cy),
         length_ft = if_else(length_ft == 0,
                                    NA,
                                    length_ft)
         ) |>
  mutate(med_adjusted_cost_2022 = median(adjusted_cost_2022, na.rm = TRUE),
         med_adjusted_cost_cy = median(adjusted_cost_cy, na.rm = TRUE),
         med_length_ft = median(length_ft, na.rm = TRUE),
         .by = place) |>
  mutate(adjusted_cost_2022_new = med_adjusted_cost_cy * volume_cy,
         pct_dif_adjusted_cost_2022_est = adjusted_cost_2022_new / med_adjusted_cost_2022,
         pct_dif_adjusted_cost_2022 = adjusted_cost_2022_new / adjusted_cost_2022) |>
  # Now choose best cost estimate
  mutate(adjusted_cost_2022_est = if_else(is.na(adjusted_cost_2022),
                                          adjusted_cost_2022_new,
                                          adjusted_cost_2022),
         adjusted_cost_cy_est = if_else(is.na(adjusted_cost_cy),
                                          med_adjusted_cost_cy,
                                          adjusted_cost_cy),
         est_cost = if_else(is.na(adjusted_cost_2022),
                            "Y",
                            "")
  ) |>
  # While we're at it, we will need length estimates too
  mutate(length_ft_est = if_else(is.na(length_ft),
                                 med_length_ft,
                                 length_ft),
         est_length = if_else(is.na(length_ft),
                            "Y",
                            ""),
  )

n_episodes <- nrow(d_combined)
n_episodes_cost_missing <- d_combined_with_est |>
  filter(is.na(adjusted_cost_2022)) |>
  nrow()
pct_episodes_missing_cost <- percent(n_episodes_cost_missing / n_episodes)
my_caption_missing_append <- glue("Estimated episode costs for {n_episodes_cost_missing}", 
                                  " of {n_episodes} episodes ({pct_episodes_missing_cost})",
                                  "\nindicated by the rug marks at bottom of plot")

my_caption_missing_length_append <- glue("Includes estimated episode lengths where data is missing")


volume_at_80pct <- quantile(d_combined$volume_cy, na.rm = TRUE,
                            probs = 0.8)
length_at_80pct <- quantile(d_combined_with_est$length_ft_est, na.rm = TRUE,
                            probs = 0.8)


##### County boundaries

# v2020 <- tidycensus::load_variables(year = 2020,
#                                     dataset = "pl")

d_raw_county <- tidycensus::get_decennial(geography = "county",
                                          variables = "P1_001N",
                                          year = 2020,
                                          state = "NC",
                                          geometry = TRUE) |>
  st_transform(crs = "WGS84")


###### Coastline

d_nc_border <- st_union(d_beach_locations)

fname <- here("data/raw/noaa-shoreline/gshhg-shp-2.3.7/GSHHS_shp/h/")
d_raw_h1 <- st_read(fname,
                   layer = "GSHHS_h_L1",
                   quiet = TRUE)
d_raw_h2 <- st_read(fname,
                   layer = "GSHHS_h_L2",
                   quiet = TRUE)


bbox_eps <- 0.1 # degrees

my_xmin = min(st_coordinates(d_nc_border)[, 1]) - bbox_eps
my_xmax = max(st_coordinates(d_nc_border)[, 1]) + 100 * bbox_eps
my_ymin = min(st_coordinates(d_nc_border)[, 2]) - 10 * bbox_eps
my_ymax = max(st_coordinates(d_nc_border)[, 2]) + bbox_eps

plot_bbox <- st_bbox(c(xmin = my_xmin, xmax = my_xmax, ymax = my_ymax, ymin = my_ymin), 
                     crs = st_crs("WGS84")
                     )

plot_bbox <- st_bbox(c(xmin = -79, xmax = -70, ymax = 36.8, ymin = 32), 
                     crs = st_crs("WGS84")
                     )

nc_coastline <- st_crop(d_raw_h1, plot_bbox)
# nc_rivers <- st_crop(d_raw_h2, plot_bbox) # doesn't include enough river data for NC

nc_counties <- st_crop(d_raw_county, plot_bbox)

###### Counts

d_combined_count <- count(d_combined, place)

d_combined_volume_cy <- d_combined |>
  filter(!is.na(volume_cy)) |>
  count(place, wt = volume_cy,
        name = "total_volume_cy")

n_missing_costs <- d_combined |>
  filter(adjusted_cost_2022 == 0) |>
  nrow()

The data set includes 286 nourishment episodes on or near ocean shorelines in North Carolina from 1939 through sometime in the first half of 2024 (I downloaded the data on 2024-07-05). While the costs for a minority of episodes (106, 37%) are not in the data set, the cumulative cost in 2022 dollars for the events that include costs is $1.3B. Using estimates for the missing costs, I estimate the cumulative cost to be $1.6B.

1.1 How much sand, how much money, and with what justifications?

Episodes were distributed along nearly the whole length of NC coastline. Carolina Beach, one of the older beach communities, received 32 nourishment episodes delivering over 20M cubic yards–more than any other place. See the summary Table 1.1.1

1.1.1 Volume of beach nourishment episodes

Show the code
d_combined_volume_cy |>
  ggplot() +
  geom_sf(data = nc_coastline,
          alpha = 1) +
  geom_sf(data = nc_counties,
          linewidth = 0.05,
          color = "grey40",
          fill = NA,
          alpha = 0.1) +
  geom_sf(aes(size = total_volume_cy), 
          color = "firebrick",
          alpha = 0.4) +
  geom_text_repel(
    aes(label = place, geometry = geometry),
    stat = "sf_coordinates",
    min.segment.length = 1,
    force = 20,
    max.overlaps = 20,
    seed = 9999
  ) +
  coord_sf(xlim = c(NA, -74.8)) +
  scale_x_continuous(expand = expansion(mult = c(0, 0))) +
  scale_y_continuous(expand = expansion(mult = c(0, 0))) +
  scale_size_continuous(range = c(4, 20),
                        labels = label_number(scale_cut = cut_short_scale())
  ) +
  guides(size = guide_legend(position = "inside")) +
  theme(axis.title = element_blank(),
        panel.border = element_blank(),
        panel.background = element_rect(fill = "#00008B33"),
        legend.position.inside = c(0.1, 0.8),
        legend.key = element_rect(fill = "white"),
        legend.background = element_rect(fill = "white"),) +
  labs(
    title = glue("The data set includes {n_all_episodes} beach nourishment epsiodes in NC", 
                 "\nbetween {year_first} and the first half of {year_last}",
                 "\nmoving nearly {round(sum(d_combined_volume_cy$total_volume_cy) / 1e6)} million cubic yards of sand"),
    caption = my_caption_geo
  )
Figure 1.1: NC beach nourishment map


I distinguish three three location clusters by their first beach nourishment episode as seen in Figure 1.2:

  • Some early beach communities started nourishment before 1980 including Carolina Beach, Wrightsville Beach, and Atlantic Beach
  • The majority started nourishment in the eighties, nineties or early 2000s
  • Some places started nourishment since 2018, mainly funded locally, and probably at least in part in response to Hurricane Florence
Show the code
dta_for_plot <- d_combined_with_est |>
  select(place, year_completed, volume_cy, primary_funding_source) |>
  arrange(place, year_completed) |>
  mutate(n_episodes = n(),
         idx = row_number(),
         cum_cy = cumsum(volume_cy),
         .by = place)

dta_for_plot_labels <- dta_for_plot |>
  slice_max(order_by = cum_cy,
            n = 1,
            by = place) |>
  slice_max(order_by = cum_cy,
            n = 10) |>
  mutate(plot_label = glue("{place} ({idx})"))

dta_for_plot |>
  ggplot(aes(x = year_completed, y = cum_cy, group = place)) +
  geom_vline(xintercept = c(1980, 2015), 
             lty = 2, linewidth = 0.15, alpha = 0.5) +
  geom_line(linewidth = 0.15,
            alpha = 0.6) +
  geom_point(aes(color = primary_funding_source),
             size = 1.5, alpha = 0.6) +
  geom_text(data = dta_for_plot_labels,
            aes(x = year_completed + 2,
                y = cum_cy,
                label = plot_label),
            hjust = 0,
            size = 2,
            check_overlap = TRUE) +
  scale_y_continuous(labels = label_number(scale_cut = cut_short_scale()),
                     expand = expansion(mult = c(0.005, 0.02))) +
  scale_size_continuous() +
  expand_limits(x = 2040) +
  guides(color = guide_legend(override.aes = list(size = 4),
                              position = "inside")) +
  theme(legend.position.inside = c(0.15, 0.8)) +
  labs(
    title = glue("In general, the beach communities receiving", 
                 "\nthe most episodes received the most", 
                 "\ncumulative volume of beach nourishment"),
    subtitle = "Once you start it's hard to stop",
    x = NULL,
    y = "Volume (cubic yards)",
    color = "Primary funding\nsource",
    caption = my_caption
  )
Figure 1.2: Cumulative volume by place


Show the code
dta_for_plot_tmp <- d_combined_with_est |>
  select(place, year_completed, volume_cy, primary_funding_source) |>
  arrange(place, year_completed) |>
  mutate(n_episodes = n(),
         idx = row_number(),
         cum_cy = cumsum(volume_cy),
         .by = place)

dta_for_plot_summary <- dta_for_plot_tmp |>
  summarize(year_first = min(year_completed),
            year_last = max(year_completed),
            n_episodes  = n(),
            total_volume_cy = sum(volume_cy),
            .by = place) |>
  mutate(place_label = glue("{place} ({n_episodes})"),
         place_label = fct_reorder(place_label, n_episodes))
  
dta_for_plot_tmp |>
  left_join(dta_for_plot_summary |>
              select(place, place_label),
            by = "place") |>
  ggplot() +
  geom_vline(xintercept = c(1980, 2015), 
             lty = 2, linewidth = 0.15, alpha = 0.5) +
  geom_segment(data = dta_for_plot_summary,
               aes(x = year_first, xend = year_last, y = place_label),
  alpha = 0.5, linewidth = 0.2) +
  geom_point(aes(x = year_completed, y = place_label, size = volume_cy, color = primary_funding_source),
             alpha = 0.15) +
  scale_x_continuous(expand = expansion(mult = c(0.02, 0.02))) +
  scale_y_discrete(expand = expansion(mult = c(0.04, 0.04))) +
  scale_size_continuous(range = c(1, 15),
                        breaks = c(0.1e6, 1e6, 2e6, 3e6),
                        labels = label_number(scale_cut = cut_short_scale())) +
  guides(size = guide_legend(position = "inside"),
         color = guide_legend(position = "inside",
                              override.aes = list(size = 3))) +
  theme(legend.position.inside = c(0.15, 0.35),
        plot.title.position = "plot",
        plot.subtitle.position = "plot") +
  labs(
    title = "There is a wide range in the number and size\nof beach nourishment episodes",
    subtitle = "Timing and relative size of nourishment episodes",
    x = NULL,
    y = NULL,
    size = "Volume\ncubic yards",
    color = "Primary\nfunding source",
    caption = my_caption
  )
Figure 1.3: Episodes and relative volume by place

I see that all of the privately funded nourishment episodes were for the private gated Figure Eight Island.


Eighty percent of the episodes delivered volumes of less than 817K cubic yards (panel C). Nags Head and Atlantic Beach are notable outliers (panel A).

Show the code
p1 <- d_combined_with_est |>
  summarize(avg_volume_cy = mean(volume_cy),
            med_volume_cy = median(volume_cy),
            n_episodes  = n(),
            total_volume_cy = sum(volume_cy),
            .by = place) |>
  mutate(place_label = glue("{place} ({n_episodes})"),
         place_label = fct_reorder(place_label, n_episodes)) |>
  ggplot() +
  geom_segment(aes(x = avg_volume_cy, xend = med_volume_cy, y = place_label),
             alpha = 0.5, linewidth = 0.2) +
  geom_point(aes(x = med_volume_cy, y = place_label),
             alpha = 0.5, color = "dodgerblue", size = 2) +
  geom_point(aes(x = avg_volume_cy, y = place_label),
             alpha = 0.5, color = "firebrick", size = 2) +
  scale_x_continuous(labels = label_number(scale_cut = cut_short_scale()),
                     expand = expansion(mult = c(0, 0.02))) +
  scale_size_continuous(breaks = c(1, 10, 20, 30)) +
  expand_limits(x = 0) +
  labs(
    subtitle = "A: Median (blue) average (red) volume",
    x = "Volume (cubic yards)",
    y = NULL
  )

p2 <- d_combined_with_est |>
  ggplot() +
  geom_density(aes(x = volume_cy),
             alpha = 0.5, linewidth = 0.2) +
  scale_x_continuous(labels = label_number(scale_cut = cut_short_scale()),
                     expand = expansion(mult = c(0, 0.02))) +
  scale_y_continuous(labels = label_number(scale_cut = cut_short_scale()),
                     expand = expansion(mult = c(0, 0.02))) +
  theme(axis.text.y = element_blank()) +
  labs(
    subtitle = "B: Density of episode volumes",
    x = "Volume (cubic yards)",
    y = NULL
  )

p3 <- d_combined_with_est |>
  ggplot() +
  stat_ecdf(aes(x = volume_cy),
             alpha = 0.5, linewidth = 0.2) +
  annotate("point", x = volume_at_80pct, y = 0.8, size = 2, color = "firebrick", alpha = 0.6) +
  scale_x_continuous(labels = label_number(scale_cut = cut_short_scale()),
                     expand = expansion(mult = c(0, 0.02))) +
  scale_y_continuous(labels = label_percent(scale_cut = cut_short_scale()),
                     expand = expansion(mult = c(0, 0.02))) +
  labs(
    subtitle = "C: Distribution of episode volumes",
    x = "Volume (cubic yards)",
    y = "Percent of all episodes"
  )

p1 + (p2 / p3) +
  plot_annotation(
    title = "Episode volumes",
    caption = my_caption
  )
Figure 1.4: Volume per episode with median and average volume for each place


1.1.2 Length of beach nourishment episodes

Eighty percent of the episodes delivered nourishment along less than about 12K linear feet, which is 2.3 miles (panel C). Nags Head and Pine Knoll Shores are notable outliers (panel A).

Show the code
p1 <- d_combined_with_est |>
  summarize(avg_length_ft = mean(length_ft_est),
            med_length_ft = median(length_ft_est),
            n_episodes  = n(),
            total_length_ft = sum(length_ft_est),
            .by = place) |>
  mutate(place_label = glue("{place} ({n_episodes})"),
         place_label = fct_reorder(place_label, n_episodes)) |>
  filter(!is.na(total_length_ft)) |>
  ggplot() +
  geom_segment(aes(x = avg_length_ft, xend = med_length_ft, y = place_label),
             alpha = 0.5, linewidth = 0.2) +
  geom_point(aes(x = med_length_ft, y = place_label),
             alpha = 0.5, color = "dodgerblue", size = 2) +
  geom_point(aes(x = avg_length_ft, y = place_label),
             alpha = 0.5, color = "firebrick", size = 2) +
  scale_x_continuous(labels = label_number(scale_cut = cut_short_scale()),
                     expand = expansion(mult = c(0, 0.02))) +
  scale_size_continuous(breaks = c(1, 10, 20, 30)) +
  expand_limits(x = 0) +
  labs(
    subtitle = "A: Median (blue) average (red) length",
    x = "Length (feet)",
    y = NULL
  )

p2 <- d_combined_with_est |>
  ggplot() +
  geom_density(aes(x = length_ft_est),
             alpha = 0.5, linewidth = 0.2) +
  scale_x_continuous(labels = label_number(scale_cut = cut_short_scale()),
                     expand = expansion(mult = c(0.005, 0.02))) +
  scale_y_continuous(labels = label_number(scale_cut = cut_short_scale()),
                     expand = expansion(mult = c(0, 0.02))) +
  theme(axis.text.y = element_blank()) +
  labs(
    subtitle = "B: Density of episode length",
    x = "Length (feet)",
    y = NULL
  )

p3 <- d_combined_with_est |>
  ggplot() +
  stat_ecdf(aes(x = length_ft_est),
             alpha = 0.5, linewidth = 0.2) +
  annotate("point", x = length_at_80pct, y = 0.8, size = 2, color = "firebrick", alpha = 0.6) +
  scale_x_continuous(labels = label_number(scale_cut = cut_short_scale()),
                     expand = expansion(mult = c(0, 0.02))) +
  scale_y_continuous(labels = label_percent(scale_cut = cut_short_scale()),
                     expand = expansion(mult = c(0, 0.02))) +
  labs(
    subtitle = "C: Distribution of episode length",
    x = "Length (feet)",
    y = "Percent of all episodes"
  )

p1 + (p2 / p3) +
  plot_annotation(
    title = "Episode lengths",
    caption = paste0(my_caption, "\n", my_caption_missing_length_append)
  )
Figure 1.5: Length of shoreline nourished per episode with median and average for each place


Show the code
dta_for_plot <- d_combined_with_est |>
  select(place, year_completed, length_ft_est, primary_funding_source, justification) |>
  arrange(place, year_completed) |>
  mutate(n_episodes = n(),
         idx = row_number(),
         cum_length = cumsum(length_ft_est),
         .by = place)

dta_for_plot_labels <- dta_for_plot |>
  slice_max(order_by = cum_length,
            n = 1,
            by = place) |>
  slice_max(order_by = cum_length,
            n = 10) |>
  mutate(plot_label = glue("{place} ({idx})"))

p1 <- dta_for_plot |>
  ggplot(aes(x = year_completed, y = cum_length, group = place)) +
  geom_vline(xintercept = c(1980, 2015), 
             lty = 2, linewidth = 0.15, alpha = 0.5) +
  geom_line(linewidth = 0.15,
            alpha = 0.6) +
  geom_point(aes(color = primary_funding_source),
             size = 1.5, alpha = 0.6) +
  geom_text(data = dta_for_plot_labels,
            aes(x = 2026,
                y = cum_length,
                label = plot_label),
            hjust = 0,
            size = 2,
            check_overlap = TRUE) +
  scale_y_continuous(labels = label_number(scale_cut = cut_short_scale()),
                     expand = expansion(mult = c(0.005, 0.02))) +
  scale_size_continuous() +
  expand_limits(x = 2040) +
  guides(color = guide_legend(override.aes = list(size = 4),
                              position = "inside")) +
  theme(legend.position.inside = c(0.15, 0.8)) +
  labs(
    subtitle = "A: By primary funding source",
    x = NULL,
    y = "Length (linear feet)",
    color = "Primary funding\nsource"
  )

p2 <- dta_for_plot |>
  ggplot(aes(x = year_completed, y = cum_length, group = place)) +
  geom_vline(xintercept = c(1980, 2015), 
             lty = 2, linewidth = 0.15, alpha = 0.5) +
  geom_line(linewidth = 0.15,
            alpha = 0.6) +
  geom_point(aes(color = justification),
             size = 1.5, alpha = 0.6) +
  geom_text(data = dta_for_plot_labels,
            aes(x = 2026,
                y = cum_length,
                label = plot_label),
            hjust = 0,
            size = 2,
            check_overlap = TRUE) +
  scale_y_continuous(labels = label_number(scale_cut = cut_short_scale()),
                     expand = expansion(mult = c(0.005, 0.02))) +
  scale_size_continuous() +
  expand_limits(x = 2040) +
  guides(color = guide_legend(override.aes = list(size = 4),
                              position = "inside")) +
  theme(legend.position.inside = c(0.2, 0.8)) +
  labs(
    subtitle = "B: By justification",
    x = NULL,
    y = "Length (linear feet)",
    color = "Justification"
  )

p1 + p2 +
  plot_annotation(
    title = "The oldest beach communities have received the most episodes\nand most linear feet of beach nourishment",
    
    caption = paste0(my_caption, "\n", my_caption_missing_length_append)
  )
Figure 1.6: Cumulative length by place


There is a wide range of nourishment lengths and volumes–and their ratios (Figure 1.7). At some locations (example: Carolina Beach) there have been episodes of widely varying volume deposited on very similar lengths of beach while at other locations (example: Topsail beaches) it’s the reverse. See Table 1.1.

Show the code
model1 <- d_combined_with_est |>
  mutate(volume_cyk = volume_cy / 1000) |>
  lm(length_ft_est ~ volume_cyk,
     data = _) |>
  broom::tidy()

model1_estimate <- round(model1[2, 2], digits = 1)
model1_std_error <- round(model1[2, 3], digits = 1)
one_ft_volume <- round(1000 / model1_estimate)

p1 <- d_combined_with_est |>
  st_drop_geometry() |>
  mutate(primary_funding_source = factor(primary_funding_source, 
                                         levels = c("Private", "Local", "State", "Federal"))
         ) |>
  ggplot(aes(x = volume_cy, y = length_ft_est)) +
  geom_point(aes(size = adjusted_cost_2022_est,
                 color = primary_funding_source),
             alpha = 0.6,
             show.legend = TRUE) +
  geom_smooth(method = 'lm', formula = 'y ~ x', se = TRUE,
              span = 0.95) +
  scale_x_continuous(labels = label_number(scale_cut = cut_short_scale()),
                     expand = expansion(mult = c(0.001, 0.02))) +
  scale_y_continuous(labels = label_number(scale_cut = cut_short_scale()),
                     expand = expansion(mult = c(0.005, 0.02))) +
  scale_size_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
  guides(color = "none") +
  labs(
    subtitle = "A: Linear volume scale",
    x = "Volume",
    y = "Length",
    size = "Cost",
    color = "Primary funding\nsource"
  )

p2 <- d_combined_with_est |>
  st_drop_geometry() |>
  mutate(primary_funding_source = factor(primary_funding_source, 
                                         levels = c("Private", "Local", "State", "Federal"))
         ) |>
  ggplot(aes(x = volume_cy, y = length_ft_est)) +
  geom_point(aes(size = adjusted_cost_2022_est,
                 color = primary_funding_source),
             alpha = 0.6,
             show.legend = TRUE) +
  geom_smooth(method = 'lm', formula = 'y ~ x', se = FALSE,
              span = 0.95) +
  scale_x_log10(labels = label_number(scale_cut = cut_short_scale()),
                expand = expansion(mult = c(0.001, 0.02))) +
  scale_y_log10(labels = label_number(scale_cut = cut_short_scale()),
                expand = expansion(mult = c(0.005, 0.02))) +
  scale_size_continuous(labels = label_dollar(scale_cut = cut_short_scale())) +
  guides(color = guide_legend(override.aes = list(size = 4))) +
  labs(
    subtitle = "B: Log10 volume scale",
    x = "Volume",
    y = NULL,
    size = "Cost",
    color = "Primary funding\nsource"
  )

p1 + p2 +
  plot_annotation(
    title = "What is the relationship between volume and length?",
    subtitle = glue("For every 1000 cubic yards, {model1_estimate} ± {model1_std_error} feet of beach is nourished.",
                    " In other words, on average about {one_ft_volume} cubic yards nourishes one linear foot of shoreline,",
                    "\nhowever the range varies a lot."),
    caption = paste0(my_caption, "\n", my_caption_missing_length_append)
  ) +
  plot_layout(
    guides = "collect"
  )
Figure 1.7: The relationship of length of shoreline nourished by volume


The mix of justifications has shifted from mostly navigation to mostly shore protection.

Show the code
dta_for_plot <- d_combined_with_est |>
  select(place, year_completed, length_ft_est, primary_funding_source, justification) |>
  mutate(decade = floor(year_completed / 10) * 10) |>
  arrange(place, year_completed) |>
  summarize(n_episodes = n(),
            .by = c(decade, justification)) |>
  mutate(pct_in_decade = n_episodes / sum(n_episodes),
         .by = c(decade))

dta_for_plot |>
  ggplot(aes(x = decade, y = pct_in_decade, color = justification)) +
  geom_bump(linewidth = 0.25,
            show.legend = FALSE) +
  geom_point(aes(size = n_episodes),
             alpha = 0.6) +
  scale_x_continuous(breaks = 1930 + 1:12 * 10,
                     expand = expansion(mult = c(0.01, 0.04))) +
  scale_y_continuous(labels = label_percent(),
                     expand = expansion(mult = c(0.005, 0.02))) +
  scale_size_continuous(range = c(1, 15),
                        breaks = c(1, 10, 20, 30)) +
  guides(color = guide_legend(override.aes = list(size = 4))) +
  labs(
    title = glue("Shore protection has replaced navigation", 
                 "\nas the primary justification"),
    subtitle = "Most 'emergency' episodes are probably shore protection too",
    x = NULL,
    y = "Percent of episodes in decade",
    color = "Justification",
    caption = paste0(my_caption, "\n", my_caption_missing_length_append)
  )
Figure 1.8: Changing mix of justifications


1.3 Appendix

All places and episodes

Show the code
episode_all <- d_combined_with_est |>
  st_drop_geometry() |>
  reframe(n_episodes = n(),
          volume_cy = sum(volume_cy, na.rm = TRUE),
          length_ft = sum(length_ft_est, na.rm = TRUE),
          .by = place) |>
  arrange(desc(n_episodes), desc(volume_cy))

episode_all_distribution <- d_combined_with_est |>
  st_drop_geometry() |>
  summarize(volume_cy_dist = list(volume_cy),
            length_ft_dist = list(length_ft_est),
            .by = place)

dta_for_table_tmp <- left_join(episode_all, episode_all_distribution,
                           by = "place")

dta_for_table_tmp |>
  rows_append(tibble(place = "Total",
                     n_episodes = sum(dta_for_table_tmp$n_episodes),
                     volume_cy = sum(dta_for_table_tmp$volume_cy),
                     length_ft = sum(dta_for_table_tmp$length_ft)
                     )) |>
  gt() |>
  tab_header(md(glue("**All places and episodes**<br>with distribution of volume (cubic yards)", 
                     " and length (feet)<br>for each place's episodes"))) |>
  tab_options(table.font.size = 10) |>
  fmt_number(columns = contains("volume_cy"),
             decimals = 1,
             scale_by = 1e-6,
             pattern = "{x}M") |>
  fmt_number(columns = contains("length_ft"),
             decimals = 0,
             scale_by = 1e-3,
             pattern = "{x}K") |>
  gt_plt_dist(column = volume_cy_dist) |>
  gt_plt_dist(column = length_ft_dist)
Table 1.1: All places and episodes with volume and length distributions for each place

All places and episodes
with distribution of volume (cubic yards) and length (feet)
for each place’s episodes

place n_episodes volume_cy length_ft volume_cy_dist length_ft_dist
Carolina Beach 32 20.8M 220K
Topsail 29 6.5M 346K
Wrightsville Beach 27 17.0M 234K
Holden Beach 27 4.6M 142K
Emerald Isle 22 7.8M 237K
Figure Eight Island 20 5.7M 56K
Pea Island 17 9.4M 55K
Ocean Isle Beach 16 5.4M 91K
Bald Head Island 15 15.3M 189K
Fort Macon 11 5.7M 68K
Hatteras Island 10 2.7M 16K
Oak Island 9 4.6M 89K
Atlantic Beach 8 12.6M 114K
Kure Beach 7 3.2M 81K
Pine Knoll Shores 5 4.5M 188K
Ocracoke Island 5 0.5M 6K
Indian Beach 4 1.8M 54K
Nags Head 3 9.2M 129K
Masonboro Island 3 2.9M 11K
Southern Shores 3 1.1M 43K
Duck 2 1.8M 18K
Kill Devil Hills 2 1.3M 27K
Caswell Beach 2 1.2M 4K
Buxton 1 1.2M 15K
Eagle Island 1 1.0M 5K
Avon 1 1.0M 13K
Mason 1 0.5M 0K
Bogue Inlet 1 0.2M 6K
West Onslow Beach 1 0.1M 4K
Cape Lookout 1 0.1M 3K
Total 286 149.6M 2,462K

For those figures in which I estimated costs for one or more episodes (Figure 1.12, Figure 1.13) there is more detail below. The episodes missing costs were below average volume for nearly all places.

Show the code
episode_no_costs <- d_not_included |>
  st_drop_geometry() |>
  reframe(n_episodes = n(),
          volume_cy = sum(volume_cy),
          .by = place) |>
  arrange(desc(n_episodes))

episode_all <- d_combined |>
  st_drop_geometry() |>
  reframe(total_n_episodes = n(),
          total_volume_cy = sum(volume_cy),
          .by = place) |>
  arrange(desc(total_n_episodes))

dta_for_table_tmp <- left_join(episode_no_costs, episode_all,
                           by = "place")

dta_for_table_tmp |>
  rows_append(tibble(place = "Total",
                     n_episodes = sum(dta_for_table_tmp$n_episodes),
                     volume_cy = sum(dta_for_table_tmp$volume_cy),
                     total_n_episodes = sum(dta_for_table_tmp$total_n_episodes),
                     total_volume_cy = sum(dta_for_table_tmp$total_volume_cy)
                     )) |>
  mutate(pct_episodes = 100 * n_episodes / total_n_episodes,
         pct_volume = 100 * volume_cy / total_volume_cy) |>
  gt() |>
  tab_header(md("**Places with at least one episode missing cost data**")) |>
  tab_options(table.font.size = 10) |>
  fmt_number(columns = contains("volume_cy"),
             decimals = 0) |>
  gt_plt_bar_pct(column = pct_episodes, 
                 scaled = TRUE, fill = "forestgreen", labels = TRUE,
                 width = 25) |>
  gt_plt_bar_pct(column = pct_volume, 
             scaled = TRUE, fill = "forestgreen", labels = TRUE,
             width = 25) |>
  tab_spanner(
    label = "Episodes missing costs",
    columns = c(n_episodes, volume_cy)
  ) |>
  tab_spanner(
    label = "All episodes for place",
    columns = c(total_n_episodes, total_volume_cy)
  ) |>
  tab_spanner(
    label = "Percent missing",
    columns = c(pct_episodes, pct_volume)
  )
Table 1.2: Places with episodes that don’t include costs

Places with at least one episode missing cost data

place Episodes missing costs All episodes for place Percent missing
n_episodes volume_cy total_n_episodes total_volume_cy pct_episodes pct_volume
Holden Beach 22 2,115,040 27 4,575,242
81.5%
46.2%
Figure Eight Island 17 4,794,679 20 5,685,251
85%
84.3%
Carolina Beach 12 1,113,111 32 20,783,368
37.5%
5.4%
Emerald Isle 11 434,000 22 7,756,286
50%
5.6%
Wrightsville Beach 11 1,322,416 27 16,956,391
40.7%
7.8%
Ocean Isle Beach 7 311,785 16 5,415,985
43.8%
5.8%
Bald Head Island 6 4,179,800 15 15,256,300
40%
27.4%
Pea Island 6 2,733,307 17 9,367,902
35.3%
29.2%
Fort Macon 4 507,148 11 5,726,836
36.4%
8.9%
Topsail 2 211,715 29 6,468,208
6.9%
3.3%
Hatteras Island 2 512,000 10 2,699,801
20%
19%
Masonboro Island 2 2,359,530 3 2,939,530
66.7%
80.3%
Oak Island 2 226,103 9 4,627,369
22.2%
4.9%
Caswell Beach 1 133,200 2 1,223,200
50%
10.9%
Southern Shores 1 50,000 3 1,102,510
33.3%
4.5%
Total 106 21,003,834 243 110,584,179
43.6%
19%


1.3.1 How good are the cost estimates?

Where total cost and median cost are missing, volume_cy is typically available (and varies considerably among each place’s episodes). So using \(median(cost\_cy) * volume\_cy\) for each place (pct_diff_med_cy_cost_times_cy) is a better estimate than simply using \(median(cost)\) for each place (pct_diff_med_cost).

As an error check, the following plots compare the two estimation methods with the actual costs for those episodes that include costs (and thus in these cases I did not use estimated values in plots above). I assume the performance of the estimators is similar for episodes lacking cost data.

Show the code
dta_for_plot <- d_combined_with_est |>
  select(place, year_completed, adjusted_cost_2022, adjusted_cost_cy, med_adjusted_cost_2022, med_adjusted_cost_cy, 
         adjusted_cost_2022_new) |>
  filter(!is.na(adjusted_cost_2022),
         !is.na(adjusted_cost_cy)) |>
  mutate(pct_diff_med_cost = (med_adjusted_cost_2022 - adjusted_cost_2022) / adjusted_cost_2022,
         pct_diff_med_cy_cost_times_cy = (adjusted_cost_2022_new - adjusted_cost_2022) / adjusted_cost_2022
) |>
  pivot_longer(cols = contains("pct_diff"),
               names_to = "pct",
               values_to = "value")

d_fun_a <- dta_for_plot |>
  filter(pct == "pct_diff_med_cy_cost_times_cy") |>
  pull(value) |>
  ecdf()
prob_a <- d_fun_a(1) - d_fun_a(-1)

d_fun_b <- dta_for_plot |>
  filter(pct == "pct_diff_med_cost") |>
  pull(value) |>
  ecdf()
prob_b <- d_fun_b(1) - d_fun_b(-1)

p1 <- dta_for_plot |>
  ggplot() +
  geom_vline(xintercept = c(-1, 0, 1), 
             lty = 2, linewidth = 0.15, alpha = 0.5) +
  geom_density(aes(value, color = pct, fill = pct),
               alpha = 0.1,
               show.legend = FALSE) +
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.02))) +
  scale_color_manual(values = c("firebrick", "dodgerblue")) +
  coord_cartesian(xlim = c(NA, 10)) +
  theme(legend.position = "bottom") +
  labs(
    subtitle = "A: Density",
    x = "Difference in estimate and original data as percent of original data",
    y = "Density"
  )

p2 <- dta_for_plot|>
  ggplot() +
  geom_vline(xintercept = c(-1, 0, 1), 
             lty = 2, linewidth = 0.15, alpha = 0.5) +
  stat_ecdf(aes(value, color = pct),
            pad = FALSE,
            alpha = 1) +
  scale_y_continuous(expand = expansion(mult = c(0.005, 0.02)),
                     labels = label_percent()) +
  scale_color_manual(values = c("firebrick", "dodgerblue")) +
  guides(color = guide_legend(override.aes = list(linewidth = 3))) +
  coord_cartesian(xlim = c(NA, 10)) +
  expand_limits(y = 0) +
  theme(legend.position = "bottom") +
  labs(
    subtitle = "B: ECDF",
    x = "Difference in estimate and original data as percent of original data",
    y = "Percent of all episodes\nwithout missing data"
  )

caption_extra2 <- glue("\nPlots truncated to make them more readible (`pct_diff_med_cost` extends to 40x).")

p1 + p2 + 
  plot_annotation(
    title = glue("The selected estimation method is good enough",
                 "\nand is superior to a simpler method I used initially"),
    subtitle = glue("Selected method captures {percent(prob_a)} of the episode values within ±1.", 
                    "\nand avoids large errors present when using median episode cost.",
                    "\nMedian episode cost captures {percent(prob_b)} and generates more extreme outliers.", 
                    "\nDashed lines are ±1 a.k.a. estimate off by 100%."),
    caption = paste0(my_caption, "\n", caption_extra2)
  ) +
  plot_layout(guides = "collect",
              axes = "collect") &
  theme(legend.position = "bottom")
Figure 1.15: The selected method of cost estimation is good enough



  1. Note that I combined some locations in the source data as described in Section 2.4 Simplifying Categories.↩︎