Here is an extensive list of R CLI commands, techniques, and advanced methods for working with R in various environments.
-
Start R:
R
-
Execute R script:
Rscript script.R
-
Install package:
install.packages("package_name")
-
Load package:
library(package_name)
-
Get help for a function:
?function_name
-
Quit R:
q()
-
Read CSV file:
data <- read.csv("file.csv")
-
Write CSV file:
write.csv(data, "file.csv")
-
Read Excel file:
install.packages("readxl") library(readxl) data <- read_excel("file.xlsx")
-
Read data from URL:
data <- read.csv("http://example.com/file.csv")
-
View first few rows of data:
head(data)
-
View structure of data:
str(data)
-
Filter data:
subset_data <- subset(data, column_name == "value")
-
Select columns:
selected_data <- data[, c("column1", "column2")]
-
Sort data:
sorted_data <- data[order(data$column_name),]
-
Merge datasets:
merged_data <- merge(data1, data2, by = "common_column")
-
Plot data:
plot(data$column1, data$column2)
-
Histogram:
hist(data$column_name)
-
Boxplot:
boxplot(data$column_name)
-
Barplot:
barplot(table(data$column_name))
-
Advanced plotting with ggplot2:
install.packages("ggplot2") library(ggplot2) ggplot(data, aes(x = column1, y = column2)) + geom_point()
-
Summary statistics:
summary(data)
-
Correlation:
cor(data$column1, data$column2)
-
Linear regression:
model <- lm(column1 ~ column2, data = data) summary(model)
-
ANOVA:
aov_result <- aov(column1 ~ factor_column, data = data) summary(aov_result)
-
t-test:
t.test(data$column1, data$column2)
-
Install dplyr:
install.packages("dplyr") library(dplyr)
-
Select columns:
selected_data <- select(data, column1, column2)
-
Filter rows:
filtered_data <- filter(data, column_name == "value")
-
Arrange rows:
arranged_data <- arrange(data, column_name)
-
Mutate (create new columns):
mutated_data <- mutate(data, new_column = column1 + column2)
-
Summarize data:
summary_data <- summarize(data, mean_value = mean(column_name))
-
Group by and summarize:
grouped_data <- data %>% group_by(group_column) %>% summarize(mean_value = mean(column_name))
-
Scatter plot:
ggplot(data, aes(x = column1, y = column2)) + geom_point()
-
Line plot:
ggplot(data, aes(x = column1, y = column2)) + geom_line()
-
Bar plot:
ggplot(data, aes(x = factor_column, y = column_name)) + geom_bar(stat = "identity")
-
Histogram:
ggplot(data, aes(x = column_name)) + geom_histogram(binwidth = 10)
-
Boxplot:
ggplot(data, aes(x = factor_column, y = column_name)) + geom_boxplot()
-
Facet wrap:
ggplot(data, aes(x = column1, y = column2)) + geom_point() + facet_wrap(~ factor_column)
-
Install and load time series package:
install.packages("forecast") library(forecast)
-
Create time series object:
ts_data <- ts(data$column_name, start = c(2020, 1), frequency = 12)
-
Plot time series:
plot(ts_data)
-
Decompose time series:
decomposed <- decompose(ts_data) plot(decomposed)
-
Forecasting:
fit <- auto.arima(ts_data) forecast_data <- forecast(fit, h = 12) plot(forecast_data)
-
Install R Markdown:
install.packages("rmarkdown")
-
Create R Markdown document:
rmarkdown::draft("report.Rmd", template = "html_document", package = "rmarkdown")
-
Render R Markdown to HTML:
rmarkdown::render("report.Rmd")
-
List installed packages:
installed.packages()
-
Update packages:
update.packages()
-
Remove a package:
remove.packages("package_name")
-
Check for package updates:
old.packages()
-
Install package from GitHub:
install.packages("devtools") devtools::install_github("username/repository")
-
Install parallel package:
install.packages("parallel") library(parallel)
-
Parallel apply:
mclapply(1:10, function(x) x^2, mc.cores = 4)
-
Parallel map with
furrr
:install.packages("furrr") library(furrr) plan(multiprocess, workers = 4) future_map(1:10, ~ .x^2)
-
Install database packages:
install.packages(c("DBI", "RSQLite", "dplyr")) library(DBI) library(RSQLite) library(dplyr)
-
Connect to SQLite database:
con <- dbConnect(RSQLite::SQLite(), "database.sqlite")
-
List tables:
dbListTables(con)
-
Read table into data frame:
data <- dbReadTable(con, "table_name")
-
Write data frame to table:
dbWriteTable(con, "table_name", data)
-
Execute SQL query:
result <- dbGetQuery(con, "SELECT * FROM table_name WHERE column_name = 'value'")
-
Disconnect from database:
dbDisconnect(con)
-
Install caret package:
install.packages("caret") library(caret)
-
Train-test split:
set.seed(123) trainIndex <- createDataPartition(data$target, p = 0.8, list = FALSE) trainData <- data[trainIndex,] testData <- data[-trainIndex,]
-
Train model:
model <- train(target ~ ., data = trainData, method = "rpart")
-
Make predictions:
predictions <- predict(model, testData)
-
Evaluate Model:
confusionMatrix(predictions, testData$target)
-
Anonymous functions:
sapply(1:10, function(x) x^2)
-
Apply family functions:
lapply(data, mean) sapply(data, mean) apply(matrix_data, 1, sum)
-
Higher-order functions with purrr:
install.packages("purrr") library(purrr) map(1:10, ~ .x^2)
-
Install tidyr:
install.packages("tidyr") library(tidyr)
-
Gather (wide to long format):
long_data <- gather(data, key = "variable", value = "value", -id)
-
Spread (long to wide format):
wide_data <- spread(long_data, key = "variable", value = "value")
-
Separate columns:
separated_data <- separate(data, col = "column_name", into = c("col1", "col2"), sep = "_")
-
Unite columns:
united_data <- unite(data, new_column, col1, col2, sep = "_")
-
Install Shiny:
install.packages("shiny") library(shiny)
-
Create Shiny app:
ui <- fluidPage( titlePanel("Simple Shiny App"), sidebarLayout( sidebarPanel( sliderInput("bins", "Number of bins:", min = 1, max = 50, value = 30) ), mainPanel( plotOutput("distPlot") ) ) ) server <- function(input, output) { output$distPlot <- renderPlot({ x <- faithful$eruptions bins <- seq(min(x), max(x), length.out = input$bins + 1) hist(x, breaks = bins, col = 'darkgray', border = 'white') }) } shinyApp(ui = ui, server = server)
-
Install Plotly:
install.packages("plotly") library(plotly)
-
Create interactive plot:
plot_ly(data, x = ~column1, y = ~column2, type = 'scatter', mode = 'lines+markers')
-
3D plot:
plot_ly(data, x = ~column1, y = ~column2, z = ~column3, type = 'scatter3d', mode = 'markers')
-
Install knitr:
install.packages("knitr") library(knitr)
-
Knit R Markdown to HTML:
knit("report.Rmd")
-
Embed R code in Markdown:
```{r} summary(data)
-
Debug a function:
debug(function_name) function_name(args) undebug(function_name)
-
Profiling code with Rprof:
Rprof("profile_data.out") # Your code here Rprof(NULL) summaryRprof("profile_data.out")
-
Advanced debugging with
debugger
:install.packages("debugger") library(debugger) mdebug(function_name)
-
Install httr package:
install.packages("httr") library(httr)
-
GET request:
response <- GET("https://api.example.com/data") content <- content(response, "text")
-
POST request:
response <- POST("https://api.example.com/data", body = list(key = "value")) content <- content(response, "text")
-
JSON parsing:
install.packages("jsonlite") library(jsonlite) json_data <- fromJSON(content)
-
Install future and furrr:
install.packages(c("future", "furrr")) library(future) library(furrr)
-
Set up parallel plan:
plan(multisession, workers = 4)
-
Parallel mapping:
results <- future_map(1:10, ~ .x^2)
-
Shut down parallel workers:
plan(sequential)
-
Preprocessing data:
preProc <- preProcess(trainData, method = c("center", "scale")) trainData <- predict(preProc, trainData) testData <- predict(preProc, testData)
-
Cross-validation:
trainControl <- trainControl(method = "cv", number = 10) model <- train(target ~ ., data = trainData, method = "rpart", trControl = trainControl)
-
Hyperparameter tuning:
grid <- expand.grid(cp = seq(0.01, 0.1, by = 0.01)) model <- train(target ~ ., data = trainData, method = "rpart", trControl = trainControl, tuneGrid = grid)
-
Install text mining packages:
install.packages(c("tm", "SnowballC")) library(tm) library(SnowballC)
-
Create corpus:
docs <- Corpus(VectorSource(text_data))
-
Text preprocessing:
docs <- tm_map(docs, content_transformer(tolower)) docs <- tm_map(docs, removePunctuation) docs <- tm_map(docs, removeNumbers) docs <- tm_map(docs, removeWords, stopwords("en"))
-
Term-document matrix:
dtm <- DocumentTermMatrix(docs)
-
Word cloud:
install.packages("wordcloud") library(wordcloud) wordcloud(words = names(freq), freq = freq, min.freq = 1, scale = c(3, 0.5))
-
Sentiment analysis with
syuzhet
:install.packages("syuzhet") library(syuzhet) sentiments <- get_nrc_sentiment(text_data)
-
Install data.table:
install.packages("data.table") library(data.table)
-
Convert data frame to data.table:
dt <- as.data.table(data)
-
Fast data filtering:
filtered_data <- dt[column_name == "value"]
-
Fast aggregation:
aggregated_data <- dt[, .(mean_value = mean(column_name)), by = group_column]
-
Joining tables:
merged_data <- merge(dt1, dt2, by = "common_column")
This extensive list provides a comprehensive overview of R CLI commands and advanced techniques across various domains, including data manipulation, visualization, statistical analysis, machine learning, parallel computing, and more. It covers basic operations, package management, advanced data wrangling with dplyr and tidyr, web applications with Shiny, interactive plotting with Plotly, reproducible research with knitr, debugging, working with APIs, and advanced machine learning with caret, among other topics.