-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_analysis.R
More file actions
118 lines (88 loc) · 4.57 KB
/
run_analysis.R
File metadata and controls
118 lines (88 loc) · 4.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
go <- function() {
## download files if necessary
downloadfiles()
## read test and train files
## Extract only the measurements on the mean and standard deviation
## for each measurement (Step 2)
## Appropriately label the data set with descriptive names (Step 4)
testdata <- readtest()
traindata <- readtrain()
## Merges the training and the test sets to create one data set. (Step 1)
data <- rbind(testdata, traindata)
## Assign friendly names to activities
## Uses descriptive activity names to name the activities in the data set (Step 3)
data$activity <- factor(data$activity, levels = c(1, 2, 3, 4, 5, 6),
labels = c("Walking", "Walking Upstairs", "Walking Downstairs",
"Sitting", "Standing", "Laying"))
## Creates a second, independent tidy data set with the average of each
## variable for each activity and each subject. (Step 5)
tidy <- aggregate(.~subject+activity, data=data, mean)
## Write the output to a csv file - used to create file for submission
## File extension is txt because web page doesn't accept .csv extension
## Commented out here, because the github script is supposed to output
## the tidy data set, not necessarily write it to disk
## write.table(tidy, "UCI-HAR-Dataset-Tidy.txt", sep=",")
## Output the tidy dataset
tidy
}
## Read test files, assign column names, and merge with activities and subjects
readtest <- function() {
## read X, which has 561 readings per observation
x <- read.fwf("UCI\ HAR\ Dataset/test/X_test.txt", c(17, rep(16,560)) )
## read features and use this to name the columns
features <- read.table("UCI\ HAR\ Dataset/features.txt", sep=" ",
col.names=c("x","feature"), stringsAsFactors=FALSE)
colnames(x) <- as.vector(features$feature)
## choose desired columns: mean or std
meancol <- features[grep("mean", features$feature), 2]
stdcol <- features[grep("std", features$feature), 2]
mycols <- c( meancol, stdcol )
## read corresponding list of activities
y <- read.table("UCI\ HAR\ Dataset/test/y_test.txt", col.names="activity")
## read corresponding list of subjects
s <- read.table("UCI\ HAR\ Dataset/test/subject_test.txt", col.names="subject")
## merge tables, choosing only columns involving mean or std
m <- cbind(s, y, x[ ,mycols])
## clean up column names, by removing dashes and parens
names(m) <- gsub("-","", names(m))
names(m) <- gsub("\\(\\)","", names(m))
## make column names more intelligible
names(m) <- gsub("^t","time",names(m))
names(m) <- gsub("^f","freq",names(m))
m
}
## Read train files, assign column names, and merge with activities and subjects
readtrain <- function() {
## read X, which has 561 readings per observation
x <- read.fwf("UCI\ HAR\ Dataset/train/X_train.txt", c(17, rep(16,560)) )
## read features and use this to name the columns
features <- read.table("UCI\ HAR\ Dataset/features.txt", sep=" ",
col.names=c("x","feature"), stringsAsFactors=FALSE)
colnames(x) <- as.vector(features$feature)
## choose desired columns: mean or std
meancol <- features[grep("mean", features$feature), 2]
stdcol <- features[grep("std", features$feature), 2]
mycols <- c( meancol, stdcol )
## read corresponding list of activities
y <- read.table("UCI\ HAR\ Dataset/train/y_train.txt", col.names="activity")
## read corresponding list of subjects
s <- read.table("UCI\ HAR\ Dataset/train/subject_train.txt", col.names="subject")
## merge tables, choosing only columns involving mean or std
m <- cbind(s, y, x[ ,mycols])
## clean up column names, by removing dashes and parens
names(m) <- gsub("-","", names(m))
names(m) <- gsub("\\(\\)","", names(m))
## make column names more intelligible
names(m) <- gsub("^t","time",names(m))
names(m) <- gsub("^f","freq",names(m))
m
}
## Download the zip file and extract, if it doesn't already exist in the working directory
downloadfiles <- function( ) {
## download file to working directory and unzip, if necessary
if ( !file.exists("UCI HAR Dataset")) {
download.file("https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip",
method="curl", destfile="getdata-projectfiles-UCI-HAR-Dataset.zip")
system("unzip getdata-projectfiles-UCI-HAR-Dataset.zip")
}
}