From 3c53d0705a0287906b4002cadb62caf50927fd16 Mon Sep 17 00:00:00 2001 From: Katrin Date: Sat, 20 Sep 2014 19:49:54 +0200 Subject: [PATCH] sort output by revenue & clearer comments --- summarize-flattr-reports.R | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/summarize-flattr-reports.R b/summarize-flattr-reports.R index 7dbfb1f..b495c38 100644 --- a/summarize-flattr-reports.R +++ b/summarize-flattr-reports.R @@ -1,12 +1,12 @@ # Creates Flattr summary from "Monthly Revenue" reports # Started by Katrin from the KonScience Podcast in Sept. 2014 -# IMPORTANT -# Adjust folder path that contains all the downloaded .csv files here - -path_to_flattr_reports <- "/Users/YOU/Flattr/" +# IMPORTANT: Adjust folder path that contains all the downloaded .csv files here # Windows: Please use double backslashes like C:\\User\\YOU... # Linux & Mac: /Users/YOU/ can be abbreviated as '~/...' +path_to_flattr_reports <- "/Users/YOU/Flattr/" + + # saves original working directory and sets new one as provided above original_wd <- getwd() @@ -14,24 +14,24 @@ setwd(path_to_flattr_reports) # get filenames of Flattr Monthly Revenue CSVs Flattr_filenames <- list.files(path_to_flattr_reports, - pattern = "flattr-revenue-[0-9]*.csv" # RegEx learned from http://www.regexr.com/ - ) + pattern = "flattr-revenue-[0-9]*.csv" + ) # // TODO find easier way to select path, e.g. # - auto-use folder where script runs from // TODO adjust ReadMe.md # - prompt user for path, all files (file.choose selects only one) or 1st file and find others in same folder // TODO remove original_wd code -# read data from CSVs into dataframe +# read data from CSVs into data frame raw_Flattrs <- do.call("rbind", lapply(Flattr_filenames, read.csv, - sep = ";", # column separator + sep = ";", dec = ",", # convert decimal separator from , to . for following calculations - stringsAsFactors = FALSE + stringsAsFactors = FALSE # ) ) # Function structure learned from https://stat.ethz.ch/pipermail/r-help/2010-October/255593.html -# load plyr package +# load plyr package for data frame library(plyr) # summarizes raw data by title, thus accounting for changes in Flattr Thing ID and URLs @@ -42,14 +42,18 @@ all_Flattrs <- ddply(raw_Flattrs, na.rm = TRUE), # removes NA / empty data all_revenue = sum(revenue, na.rm = TRUE) - ) # // TODO sort output + ) + +# order Flattr Things by revenue +ordered_Flattrs <- all_Flattrs[order(all_Flattrs$all_revenue, decreasing = TRUE),] # exports summary to same folder -write.table(all_Flattrs, - file = "flattr-revenue-summary.csv", # Change only in combination with RegEx pattern "flattr-revenue-[0-9]*.csv" above! If exported file must not be read into Flattr_filenames object. - sep = ";", # reset column separator to Flattr default - dec = ",", # reset decimal separator to Flattr default - row.names = FALSE # +write.table(ordered_Flattrs[2:4], + file = "flattr-revenue-summary.csv", # Change only in combination with RegEx pattern "flattr-revenue-[0-9]*.csv" above! Summary file must not be inported on next run of script. + # restore column and decimal separators to Flattr defaults + sep = ";", + dec = ",", + row.names = FALSE ) # restore original working directory