# R example script # Author: Blase Ur, with some content borrowed from Saranga Komanduri # Set options that make R not use strings as factors and have a maximum print width of 400 options(stringsAsFactors = F, width = 400) # Helper function - written by Saranga Komanduri RecodeVector <- function(vector, oldvalues, newvalues) { # Function for recoding values of a vector based on a vector of matching oldvalues and newvalues # Ex. oldvalues = c("Male", "Female") and newvalues = c("M", "F") will change all instances # of "Male" and "Female" in the given vector with "M" and "F", leaving the other values alone if (length(oldvalues) != length(newvalues)) { stop("oldvalues and newvalues must be the same length!") } vec2 <- vector # Make copy of vector and replace values for (i in seq_along(oldvalues)) { vec2[which(vec2 %in% oldvalues[i])] <- newvalues[i] } return(vec2) } ############################################################################### # Initialize data # Assume current directory is the source file location # Load data into table. This assumes that you have a CSV file!!! # Make sure that column headings (first row) don't have spaces data <- read.csv("ponies.csv", header=T) # Send R output to the following file sink(file = "Results.txt", append = F, type = "output") ################################################################################ # Data cleaning/transformations # Let's bin all instances of "Maybe" and "No" into "NonYes" # This is the kind of thing you might want to do with your Likert-scale responses data$LikePoniesBinary <- RecodeVector(data$LikePonies, c("Yes", "Maybe", "No"), c("Yes", "NonYes", "NonYes")) # Now, we need to let R know that Gender and LikePonies are categorical variables, # which are known as factors in R. We'll specify that "Gender" and "LikePonies" and # "LikePonies Binary" are all categorical. # !!! NOTE: If you tried to do the recoding above after turning data$LikePonies into # a factor, then it would ignore all data that's recoded as "NonYes" since it # won't recognize that as a valid category. Instead, turn them into factors at the end data$Gender <- factor(data$Gender) data$LikePonies <- factor(data$LikePonies) data$LikePoniesBinary <- factor(data$LikePoniesBinary) ################################################################################ # Print out the counts and percentages for our two categorical variables cat("\n\n============Print counts for categorical variables:\n") cat("\n\n====Gender:\n") counts <- table(data$Gender) percentages <- paste(round(100*prop.table(counts), 2), "%", sep="") # adds in percentages counts <- cbind(counts, percentages) counts cat("\n\n====LikePonies:\n") counts <- table(data$LikePonies) percentages <- paste(round(100*prop.table(counts), 2), "%", sep="") # adds in percentages counts <- cbind(counts, percentages) counts cat("\n\n====LikePoniesBinary:\n") counts <- table(data$LikePoniesBinary) percentages <- paste(round(100*prop.table(counts), 2), "%", sep="") # adds in percentages both <- cbind(counts, percentages) both ################################################################################ # Prints a table comparing LikePoniesBinary responses by gender cat("\n\n============Print contingency table in advance of chi-square test:\n") cat("\n\n====Compare LikePoniesBinary responses by gender:\n") contingencytable <- table(data$Gender, data$LikePoniesBinary) # A will be rows, B will be columns contingencytable ################################################################################ # Conducts Fisher's Exact Test (chi-square equivalent when you have small values in cells) # where gender is the independent (input) variable # and LikePoniesBinary is the dependent (output) variable cat("\n\n============Conduct chi-square equivalent (Fisher's Exact Test):\n") fisher.test(contingencytable) # this is equivalent to: fisher.test(data$Gender, data$LikePoniesBinary) ################################################################################ # cat("\n\n============Print summaries of continutous variables in advance of ANOVA:\n") cat("\n====Summary statistics for PoniesOwned:\n") summary(data$PoniesOwned, na.rm=TRUE) cat("\n====Summary statistics for PoniesOwned for Males only:\n") summary(data$PoniesOwned[data$Gender=="Male"], na.rm=TRUE) cat("\n====Summary statistics for PoniesOwned for Females only:\n") summary(data$PoniesOwned[data$Gender=="Female"], na.rm=TRUE) cat("\n\n\n====Test normality of the distributions to check appropriateness of ANOVA test:\n") shapiro.test(data$PoniesOwned[data$Gender=="Male"]) shapiro.test(data$PoniesOwned[data$Gender=="Female"]) # p values < .05 indicate distributions that are *not* normal. In this case, they're all fine. ################################################################################ # Run an ANOVA test cat("\n\n============ANOVA test of whether number of ponies owned differs by gender:\n") fit <- aov(PoniesOwned ~ Gender, data) anova(fit) # print it out # Close capture sink()