|
| 1 | +#' Check transit service availability by date |
| 2 | +#' |
| 3 | +#' @description |
| 4 | +#' This function checks the number and proportion of public transport services from the GTFS feeds |
| 5 | +#' that are active on specified dates. This is useful to verify that the selected departure |
| 6 | +#' dates for routing analysis are valid and have adequate service levels. When routing with public transport, it is crucial to use a departure date where services are operational, as indicated in the GTFS `calendar.txt` file. |
| 7 | +#' |
| 8 | +#' @details |
| 9 | +#' You can specify the dates to check in two ways: |
| 10 | +#' \itemize{ |
| 11 | +#' \item Using the `dates` argument to provide a vector of specific dates. |
| 12 | +#' \item Using the `start_date` and `end_date` arguments to provide a continuous date range. |
| 13 | +#' } |
| 14 | +#' You must use one of these two methods, but not both in the same function call. |
| 15 | +#' |
| 16 | +#' @param r5r_network A routable transport network created with `build_network()`. |
| 17 | +#' @param r5r_core The `r5r_core` argument is deprecated as of r5r v2.3.0. Please use the `r5r_network` argument instead. |
| 18 | +#' @param dates A vector of specific dates to be checked. Can be character strings in |
| 19 | +#' "YYYY-MM-DD" format, or objects of class `Date`. This argument cannot be used with `start_date` or `end_date`. |
| 20 | +#' @param start_date The start date for a continuous date range. Must be a single |
| 21 | +#' character string in "YYYY-MM-DD" format or a `Date` object. Must be used with `end_date`. |
| 22 | +#' @param end_date The end date for a continuous date range. Must be a single |
| 23 | +#' character string in "YYYY-MM-DD" format or a `Date` object. Must be used with `start_date`. |
| 24 | +#' @return A `data.table` with four columns: `date`, `total_services`, |
| 25 | +#' `active_services`, and `pct_active` (the proportion of active services). |
| 26 | +#' |
| 27 | +#' @examplesIf identical(tolower(Sys.getenv("NOT_CRAN")), "true") |
| 28 | +#' library(r5r) |
| 29 | +#' data_path <- system.file("extdata/poa", package = "r5r") |
| 30 | +#' r5r_network <- build_network(data_path) |
| 31 | +#' |
| 32 | +#' # Example 1: Check a vector of specific dates |
| 33 | +#' # Let's check a regular weekday and a Sunday, where service may differ. |
| 34 | +#' dates_to_check <- c("2019-05-13", "2019-05-19") |
| 35 | +#' availability1 <- check_transit_availability(r5r_network, dates = dates_to_check) |
| 36 | +#' availability1 |
| 37 | +#' #> date total_services active_services pct_active |
| 38 | +#' #> 1: 2019-05-13 118 116 0.983050847 |
| 39 | +#' #> 2: 2019-05-19 118 1 0.008474576 |
| 40 | +#' |
| 41 | +#' # Example 2: Check a continuous date range using start_date and end_date |
| 42 | +#' availability2 <- check_transit_availability( |
| 43 | +#' r5r_network, |
| 44 | +#' start_date = "2019-01-01", |
| 45 | +#' end_date = "2019-12-31" |
| 46 | +#' ) |
| 47 | +#' availability2[121:124,] |
| 48 | +#' #> date total_services active_services pct_active |
| 49 | +#' #> <Date> <int> <int> <num> |
| 50 | +#' #> 1: 2019-05-01 118 62 0.525423729 |
| 51 | +#' #> 2: 2019-05-02 118 116 0.983050847 |
| 52 | +#' #> 3: 2019-05-03 118 116 0.983050847 |
| 53 | +#' #> 4: 2019-05-04 118 1 0.008474576 |
| 54 | +#' |
| 55 | +#' # plot availability over the year |
| 56 | +#' library(ggplot2) |
| 57 | +#' ggplot(availability2, aes(x = date, y = pct_active)) + |
| 58 | +#' geom_line() + |
| 59 | +#' geom_point() + |
| 60 | +#' theme_classic(base_size = 16) |
| 61 | +#' |
| 62 | +#' stop_r5(r5r_network) |
| 63 | +#' @export |
| 64 | +check_transit_availability <- function( |
| 65 | + r5r_network, |
| 66 | + r5r_core = deprecated(), |
| 67 | + dates = NULL, |
| 68 | + start_date = NULL, |
| 69 | + end_date = NULL |
| 70 | +) { |
| 71 | + # deprecating r5r_core -------------------------------------- |
| 72 | + if (lifecycle::is_present(r5r_core)) { |
| 73 | + cli::cli_warn(c( |
| 74 | + "!" = "The `r5r_core` argument is deprecated as of r5r v2.3.0.", |
| 75 | + "i" = "Please use the `r5r_network` argument instead." |
| 76 | + )) |
| 77 | + r5r_network <- r5r_core |
| 78 | + } |
| 79 | + |
| 80 | + # Check inputs |
| 81 | + checkmate::assert_class(r5r_network, "r5r_network") |
| 82 | + jcore <- r5r_network@jcore |
| 83 | + |
| 84 | + # Argument validation for date inputs (CONSOLIDATED) |
| 85 | + is_valid_dates_vector <- !is.null(dates) && |
| 86 | + is.null(start_date) && |
| 87 | + is.null(end_date) |
| 88 | + is_valid_date_range <- is.null(dates) && |
| 89 | + !is.null(start_date) && |
| 90 | + !is.null(end_date) |
| 91 | + |
| 92 | + if (!is_valid_dates_vector && !is_valid_date_range) { |
| 93 | + cli::cli_abort( |
| 94 | + c( |
| 95 | + "Incorrect date arguments provided.", |
| 96 | + "i" = "Please specify dates using one of the following methods:", |
| 97 | + "*" = "Use the {.arg dates} argument to provide a vector of specific dates.", |
| 98 | + "*" = "Use both {.arg start_date} and {.arg end_date} to provide a continuous date range.", |
| 99 | + "x" = "You cannot mix these methods or provide an incomplete date range." |
| 100 | + ) |
| 101 | + ) |
| 102 | + } |
| 103 | + |
| 104 | + # Helper function to parse and validate date inputs |
| 105 | + parse_date_input <- function(date_input, arg_name) { |
| 106 | + # Pass Date objects through directly |
| 107 | + if (inherits(date_input, "Date")) { |
| 108 | + return(date_input) |
| 109 | + } |
| 110 | + |
| 111 | + if (!is.character(date_input)) { |
| 112 | + cli::cli_abort( |
| 113 | + "{.arg {arg_name}} must be a vector of character strings or Date objects." |
| 114 | + ) |
| 115 | + } |
| 116 | + |
| 117 | + # Use regex to strictly check for "YYYY-MM-DD" format |
| 118 | + is_iso_format <- grepl("^\\d{4}-\\d{2}-\\d{2}$", date_input) |
| 119 | + if (any(!is_iso_format)) { |
| 120 | + cli::cli_abort(c( |
| 121 | + "x" = "Invalid date format found in {.arg {arg_name}}.", |
| 122 | + "i" = "Please use the strict {.val 'YYYY-MM-DD'} format for all date strings." |
| 123 | + )) |
| 124 | + } |
| 125 | + |
| 126 | + # Use a tryCatch block to convert potential errors from as.Date() into NAs |
| 127 | + parsed_dates_list <- lapply(date_input, function(d) { |
| 128 | + tryCatch( |
| 129 | + { |
| 130 | + as.Date(d) |
| 131 | + }, |
| 132 | + error = function(e) { |
| 133 | + # If as.Date fails, return NA instead of throwing an error |
| 134 | + as.Date(NA) |
| 135 | + } |
| 136 | + ) |
| 137 | + }) |
| 138 | + parsed_dates <- do.call("c", parsed_dates_list) |
| 139 | + |
| 140 | + # Final check for NAs, which now correctly indicate logically impossible dates |
| 141 | + if (anyNA(parsed_dates)) { |
| 142 | + cli::cli_abort(c( |
| 143 | + "x" = "Could not parse all values in {.arg {arg_name}}.", |
| 144 | + "i" = "One or more dates are logically invalid (e.g., '2025-02-29')." |
| 145 | + )) |
| 146 | + } |
| 147 | + |
| 148 | + return(parsed_dates) |
| 149 | + } |
| 150 | + |
| 151 | + # Prepare the list of dates to check |
| 152 | + if (!is.null(dates)) { |
| 153 | + dates_as_date <- parse_date_input(dates, "dates") |
| 154 | + dates_formatted <- format(dates_as_date, "%Y-%m-%d") |
| 155 | + } else { |
| 156 | + start_d <- parse_date_input(start_date, "start_date") |
| 157 | + end_d <- parse_date_input(end_date, "end_date") |
| 158 | + |
| 159 | + if (length(start_d) > 1 || length(end_d) > 1) { |
| 160 | + cli::cli_abort( |
| 161 | + "{.arg start_date} and {.arg end_date} must each be a single date." |
| 162 | + ) |
| 163 | + } |
| 164 | + if (start_d > end_d) { |
| 165 | + cli::cli_abort( |
| 166 | + "{.arg start_date} must be before or the same as {.arg end_date}." |
| 167 | + ) |
| 168 | + } |
| 169 | + |
| 170 | + date_sequence <- seq(from = start_d, to = end_d, by = "day") |
| 171 | + dates_formatted <- format(date_sequence, "%Y-%m-%d") |
| 172 | + } |
| 173 | + |
| 174 | + # Function to process a single date by querying the Java object |
| 175 | + process_single_date <- function(date_str) { |
| 176 | + services_java <- jcore$getTransitServicesByDate(date_str) |
| 177 | + services_dt <- java_to_dt(services_java) |
| 178 | + |
| 179 | + if (nrow(services_dt) == 0) { |
| 180 | + return(data.table::data.table( |
| 181 | + date = as.Date(date_str), |
| 182 | + total_services = 0L, |
| 183 | + active_services = 0L, |
| 184 | + pct_active = 0.0 |
| 185 | + )) |
| 186 | + } |
| 187 | + |
| 188 | + total_s <- nrow(services_dt) |
| 189 | + active_s <- sum(services_dt$active_on_date, na.rm = TRUE) |
| 190 | + pct_s <- if (total_s > 0) active_s / total_s else 0.0 |
| 191 | + |
| 192 | + return(data.table::data.table( |
| 193 | + date = as.Date(date_str), |
| 194 | + total_services = total_s, |
| 195 | + active_services = active_s, |
| 196 | + pct_active = pct_s |
| 197 | + )) |
| 198 | + } |
| 199 | + |
| 200 | + # Apply function to all dates and bind results into a single data.table |
| 201 | + results_list <- lapply(dates_formatted, process_single_date) |
| 202 | + final_dt <- data.table::rbindlist(results_list) |
| 203 | + |
| 204 | + return(final_dt) |
| 205 | +} |
0 commit comments