|
2 | 2 | #'
|
3 | 3 | #' @param data_csv path to csv file from Tecan Spark plate reader
|
4 | 4 | #' @param layout_csv path to csv file containing plate layout information
|
| 5 | +#' @param timeseries Boolean flag indicating whether the data is a timeseries or |
| 6 | +#' single recording. The Tecan software outputs the two scenarios differently. |
5 | 7 | #'
|
6 | 8 | #' @return a data.frame containing the parsed plate reader data
|
7 | 9 | #' @export
|
8 | 10 | #' @importFrom rlang .data
|
9 | 11 | #'
|
10 |
| -spark_parse <- function(data_csv, layout_csv) { |
| 12 | +spark_parse <- function(data_csv, layout_csv, timeseries=F) { |
11 | 13 | data <- utils::read.table(data_csv, sep = ",", blank.lines.skip = T,
|
12 | 14 | header = F, stringsAsFactors = F)
|
13 | 15 |
|
14 | 16 | plate_layout <- utils::read.csv(layout_csv)
|
15 | 17 |
|
16 |
| - start_time_idx <- which(data[, 1] == "Start Time") |
17 |
| - if (length(start_time_idx) > 1) { |
18 |
| - start_time_idx <- start_time_idx[length(start_time_idx)] |
| 18 | + if(timeseries == TRUE){ |
| 19 | + start_time_idx <- which(data[, 1] == "Start Time") |
| 20 | + if (length(start_time_idx) > 1) { |
| 21 | + start_time_idx <- start_time_idx[length(start_time_idx)] |
| 22 | + } |
| 23 | + |
| 24 | + next_block_start_idx <- start_time_idx + 2 |
| 25 | + |
| 26 | + end_of_file <- F |
| 27 | + all_data <- c() |
| 28 | + while (!end_of_file) { |
| 29 | + # find what is being measured |
| 30 | + block_name <- data[next_block_start_idx, 1] |
| 31 | + |
| 32 | + # check if we are at the end of the file |
| 33 | + if (block_name == "End Time") { |
| 34 | + end_of_file <- T |
| 35 | + break |
| 36 | + } |
| 37 | + |
| 38 | + # find where the end of the current measurement block is |
| 39 | + block_end_idx <- next_block_start_idx |
| 40 | + while (data[block_end_idx, 1] != "") { |
| 41 | + block_end_idx <- block_end_idx + 1 |
| 42 | + } |
| 43 | + |
| 44 | + # grab the data only for that measurement |
| 45 | + new_block <- data[(next_block_start_idx + 1):(block_end_idx - 1), ] |
| 46 | + |
| 47 | + # trim unecessary readings i.e. temp and cycle number |
| 48 | + # and rename columns |
| 49 | + times <- new_block[2, ] |
| 50 | + new_block <- new_block[c(-1, -2, -3), ] |
| 51 | + names(new_block) <- times |
| 52 | + names(new_block)[1] <- "well" |
| 53 | + |
| 54 | + # add info for each well |
| 55 | + joined_block <- dplyr::full_join(plate_layout, new_block) |
| 56 | + joined_block$measure <- block_name |
| 57 | + |
| 58 | + # |
| 59 | + all_data <- rbind(all_data, joined_block) |
| 60 | + |
| 61 | + # |
| 62 | + next_block_start_idx <- block_end_idx + 1 |
| 63 | + } |
| 64 | + |
| 65 | + # rearrange data ---------------------------------------------------------- |
| 66 | + well_idx <- which(names(all_data) == "well") |
| 67 | + gathered_data <- tidyr::gather(all_data, key = "time", value = p, |
| 68 | + -c(1:well_idx, ncol(all_data))) |
| 69 | + gathered_data$time <- as.numeric(gathered_data$time) |
| 70 | + gathered_data$p <- as.numeric(gathered_data$p) |
| 71 | + spread_data <- tidyr::spread(gathered_data, key = .data$measure, |
| 72 | + value = .data$p) |
| 73 | + |
| 74 | + spread_data$row <- substr(x = spread_data$well, start = 1, stop = 1) |
| 75 | + spread_data$column <- as.numeric(substr(x = spread_data$well, start = 2, |
| 76 | + stop = nchar(spread_data$well))) |
| 77 | + spread_data <- dplyr::arrange_at(spread_data, dplyr::vars(.data$time, |
| 78 | + .data$row, |
| 79 | + .data$column)) |
| 80 | + |
| 81 | + |
| 82 | + |
| 83 | + # write parsed data to csv ------------------------------------------------ |
| 84 | + out_name <- gsub(".csv", "_parsed.csv", data_csv) |
| 85 | + utils::write.csv(x = spread_data, file = out_name, row.names = FALSE) |
| 86 | + |
| 87 | + return(spread_data) |
19 | 88 | }
|
| 89 | + else if (timeseries == FALSE){ |
| 90 | + start_time_idx <- which(data[, 1] == "Start Time") # get start and end time ids |
| 91 | + end_idx <- which(data[, 1] == "End Time") |
| 92 | + names_idx <- which(data[, 1] == "Name") |
| 93 | + names_idx <- names_idx[2:length(names_idx)] # remove the first start time entry which just details plate type |
20 | 94 |
|
21 |
| - next_block_start_idx <- start_time_idx + 2 |
| 95 | + all_data <- c() |
| 96 | + for (i in seq_len(length(start_time_idx))) { |
| 97 | + block_name <- data[names_idx[i], 2] # record name of what is being measured |
22 | 98 |
|
23 |
| - end_of_file <- F |
24 |
| - all_data <- c() |
25 |
| - while (!end_of_file) { |
26 |
| - # find what is being measured |
27 |
| - block_name <- data[next_block_start_idx, 1] |
| 99 | + block_start <- start_time_idx[i] + 4 # find start and end of measurement block |
| 100 | + block_end_idx <- end_idx[i] - 3 |
28 | 101 |
|
29 |
| - # check if we are at the end of the file |
30 |
| - if (block_name == "End Time") { |
31 |
| - end_of_file <- T |
32 |
| - break |
33 |
| - } |
| 102 | + new_block <- data[(block_start):(block_end_idx), 1:2] # grab and name the data |
| 103 | + names(new_block)[1] <- "well" |
| 104 | + names(new_block)[2] <- "value" |
34 | 105 |
|
35 |
| - # find where the end of the current measurement block is |
36 |
| - block_end_idx <- next_block_start_idx |
37 |
| - while (data[block_end_idx, 1] != "") { |
38 |
| - block_end_idx <- block_end_idx + 1 |
39 |
| - } |
| 106 | + new_block$value <- as.numeric(new_block$value) |
40 | 107 |
|
41 |
| - # grab the data only for that measurement |
42 |
| - new_block <- data[(next_block_start_idx + 1):(block_end_idx - 1), ] |
| 108 | + joined_block <- dplyr::full_join(plate_layout, new_block) # join to plate layout csv, add measurement category |
| 109 | + joined_block$measure <- block_name |
43 | 110 |
|
44 |
| - # trim unecessary readings i.e. temp and cycle number |
45 |
| - # and rename columns |
46 |
| - times <- new_block[2, ] |
47 |
| - new_block <- new_block[c(-1, -2, -3), ] |
48 |
| - names(new_block) <- times |
49 |
| - names(new_block)[1] <- "well" |
| 111 | + all_data <- rbind(all_data, joined_block) # add to all data |
| 112 | + } |
50 | 113 |
|
51 |
| - # add info for each well |
52 |
| - joined_block <- dplyr::full_join(plate_layout, new_block) |
53 |
| - joined_block$measure <- block_name |
| 114 | + # rearrange data ---------------------------------------------------------- |
| 115 | + spread_data <- tidyr::pivot_wider(all_data, names_from = .data$measure, |
| 116 | + values_from = .data$value) |
| 117 | + spread_data$row <- substr(x = spread_data$well, start = 1, stop = 1) |
| 118 | + spread_data$column <- as.numeric(substr(x = spread_data$well, start = 2, |
| 119 | + stop = nchar(spread_data$well))) |
| 120 | + spread_data <- dplyr::arrange_at(spread_data, dplyr::vars(.data$row, |
| 121 | + .data$column)) |
54 | 122 |
|
55 |
| - # |
56 |
| - all_data <- rbind(all_data, joined_block) |
| 123 | + # write parsed data to csv ------------------------------------------------ |
| 124 | + out_name <- gsub(".csv", "_parsed.csv", data_csv) |
| 125 | + utils::write.csv(x = spread_data, file = out_name, row.names = FALSE) |
57 | 126 |
|
58 |
| - # |
59 |
| - next_block_start_idx <- block_end_idx + 1 |
| 127 | + return(spread_data) |
60 | 128 | }
|
61 |
| - |
62 |
| - # rearrange data ---------------------------------------------------------- |
63 |
| - well_idx <- which(names(all_data) == "well") |
64 |
| - gathered_data <- tidyr::gather(all_data, key = "time", value = p, |
65 |
| - -c(1:well_idx, ncol(all_data))) |
66 |
| - gathered_data$time <- as.numeric(gathered_data$time) |
67 |
| - gathered_data$p <- as.numeric(gathered_data$p) |
68 |
| - spread_data <- tidyr::spread(gathered_data, key = .data$measure, |
69 |
| - value = .data$p) |
70 |
| - |
71 |
| - spread_data$row <- substr(x = spread_data$well, start = 1, stop = 1) |
72 |
| - spread_data$column <- as.numeric(substr(x = spread_data$well, start = 2, |
73 |
| - stop = nchar(spread_data$well))) |
74 |
| - spread_data <- dplyr::arrange_at(spread_data, dplyr::vars(.data$time, |
75 |
| - .data$row, |
76 |
| - .data$column)) |
77 |
| - |
78 |
| - return(spread_data) |
79 | 129 | }
|
80 | 130 |
|
81 | 131 | #' Parser for Tecan Spark plate reader data
|
|
0 commit comments