# R example script
# Author: Blase Ur, with some content borrowed from Saranga Komanduri

# Set options that make R not use strings as factors and have a maximum print width of 400
options(stringsAsFactors = F,
        width = 400)

# Helper function - written by Saranga Komanduri
RecodeVector <- function(vector, oldvalues, newvalues) {
  # Function for recoding values of a vector based on a vector of matching oldvalues and newvalues
  # Ex. oldvalues = c("Male", "Female") and newvalues = c("M", "F") will change all instances 
  # of "Male" and "Female" in the given vector with "M" and "F", leaving the other values alone
  if (length(oldvalues) != length(newvalues)) {
    stop("oldvalues and newvalues must be the same length!")
  }
  vec2 <- vector    # Make copy of vector and replace values
  for (i in seq_along(oldvalues)) {
    vec2[which(vec2 %in% oldvalues[i])] <- newvalues[i]
  }  
  return(vec2)
}

###############################################################################
# Initialize data
# Assume current directory is the source file location
# Load data into table. This assumes that you have a CSV file!!!
# Make sure that column headings (first row) don't have spaces
data <- read.csv("ponies.csv", header=T)

# Send R output to the following file
sink(file = "Results.txt",
     append = F,
     type = "output")

################################################################################
# Data cleaning/transformations

# Let's bin all instances of "Maybe" and "No" into "NonYes"
# This is the kind of thing you might want to do with your Likert-scale responses
data$LikePoniesBinary <- RecodeVector(data$LikePonies,
                      c("Yes", "Maybe", "No"),
                      c("Yes", "NonYes", "NonYes"))

# Now, we need to let R know that Gender and LikePonies are categorical variables,
# which are known as factors in R. We'll specify that "Gender" and "LikePonies" and 
# "LikePonies Binary" are all categorical.
# !!! NOTE: If you tried to do the recoding above after turning data$LikePonies into 
# a factor, then it would ignore all data that's recoded as "NonYes" since it 
# won't recognize that as a valid category. Instead, turn them into factors at the end

data$Gender <- factor(data$Gender)
data$LikePonies <- factor(data$LikePonies)
data$LikePoniesBinary <- factor(data$LikePoniesBinary)

################################################################################
# Print out the counts and percentages for our two categorical variables

cat("\n\n============Print counts for categorical variables:\n")

cat("\n\n====Gender:\n")
counts <- table(data$Gender)
percentages <- paste(round(100*prop.table(counts), 2), "%", sep="") # adds in percentages
counts <- cbind(counts, percentages)
counts

cat("\n\n====LikePonies:\n")
counts <- table(data$LikePonies)
percentages <- paste(round(100*prop.table(counts), 2), "%", sep="") # adds in percentages
counts <- cbind(counts, percentages)
counts


cat("\n\n====LikePoniesBinary:\n")
counts <- table(data$LikePoniesBinary)
percentages <- paste(round(100*prop.table(counts), 2), "%", sep="") # adds in percentages
both <- cbind(counts, percentages)
both

################################################################################
# Prints a table comparing LikePoniesBinary responses by gender

cat("\n\n============Print contingency table in advance of chi-square test:\n")

cat("\n\n====Compare LikePoniesBinary responses by gender:\n")
contingencytable <- table(data$Gender, data$LikePoniesBinary) # A will be rows, B will be columns 
contingencytable

################################################################################
# Conducts Fisher's Exact Test (chi-square equivalent when you have small values in cells) 
# where gender is the independent (input) variable
# and LikePoniesBinary is the dependent (output) variable

cat("\n\n============Conduct chi-square equivalent (Fisher's Exact Test):\n")

fisher.test(contingencytable)   
# this is equivalent to: fisher.test(data$Gender, data$LikePoniesBinary)


################################################################################
# 

cat("\n\n============Print summaries of continutous variables in advance of ANOVA:\n")

cat("\n====Summary statistics for PoniesOwned:\n")
summary(data$PoniesOwned, na.rm=TRUE)
cat("\n====Summary statistics for PoniesOwned for Males only:\n")
summary(data$PoniesOwned[data$Gender=="Male"], na.rm=TRUE)
cat("\n====Summary statistics for PoniesOwned for Females only:\n")
summary(data$PoniesOwned[data$Gender=="Female"], na.rm=TRUE)


cat("\n\n\n====Test normality of the distributions to check appropriateness of ANOVA test:\n")
shapiro.test(data$PoniesOwned[data$Gender=="Male"])
shapiro.test(data$PoniesOwned[data$Gender=="Female"])
# p values < .05 indicate distributions that are *not* normal. In this case, they're all fine.

################################################################################
# Run an ANOVA test

cat("\n\n============ANOVA test of whether number of ponies owned differs by gender:\n")

fit <- aov(PoniesOwned ~ Gender, data)
anova(fit) # print it out

# Close capture
sink()