-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path3. Analyzing and Visualizing The Data.sql
More file actions
124 lines (114 loc) · 3.12 KB
/
3. Analyzing and Visualizing The Data.sql
File metadata and controls
124 lines (114 loc) · 3.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
-- Title: Data Analysis
-- Total trips by customer type
SELECT
member_casual,
COUNT(*) AS total_trips,
CAST(COUNT(*) AS FLOAT64) / SUM(COUNT(*)) OVER () * 100 AS percentage
FROM
`pristine-sphere-456607-r7`.Case_Study.final_cleaned_combined_data
GROUP BY 1;
-- average ride length by customer type
-- Note: ride_length is in minutes
SELECT
member_casual,
AVG(ride_length) AS average_ride_length
FROM
`pristine-sphere-456607-r7`.Case_Study.final_cleaned_combined_data
GROUP BY 1;
-- total rides by customer type and hour of the day
SELECT
member_casual,
EXTRACT(HOUR FROM started_at) AS hour_of_day,
COUNT(*) AS num_trips
FROM
`pristine-sphere-456607-r7`.Case_Study.final_cleaned_combined_data
GROUP BY 1, 2
ORDER BY hour_of_day;
-- average ride length and total trips
-- by customer type and day of the week
SELECT
member_casual,
FORMAT_DATE('%A', DATE(started_at)) AS day_of_week,
AVG(ride_length) AS average_ride_length,
COUNT(*) AS num_trips
FROM
`pristine-sphere-456607-r7`.Case_Study.final_cleaned_combined_data
GROUP BY 1, 2
ORDER BY CASE day_of_week
WHEN 'Monday' THEN 1
WHEN 'Tuesday' THEN 2
WHEN 'Wednesday' THEN 3
WHEN 'Thursday' THEN 4
WHEN 'Friday' THEN 5
WHEN 'Saturday' THEN 6
WHEN 'Sunday' THEN 7
ELSE 8
END;
-- total trips by customer type and month of the year
SELECT
member_casual,
FORMAT_DATE('%B', DATE(started_at)) AS month_of_year,
COUNT(*) AS num_trips
FROM
`pristine-sphere-456607-r7`.Case_Study.final_cleaned_combined_data
GROUP BY 1, 2
ORDER BY CASE month_of_year
WHEN 'January' THEN 1
WHEN 'February' THEN 2
WHEN 'March' THEN 3
WHEN 'April' THEN 4
WHEN 'May' THEN 5
WHEN 'June' THEN 6
WHEN 'July' THEN 7
WHEN 'August' THEN 8
WHEN 'September' THEN 9
WHEN 'October' THEN 10
WHEN 'November' THEN 11
WHEN 'December' THEN 12
ELSE 13
END;
-- total rides by customer type and bike type
-- Note: rideable_type is the bike type
-- Note: bike types are 'electric_bike', 'docked_bike', 'classic_bike'
SELECT
member_casual,
rideable_type,
COUNT(*) AS total_rides
FROM
`pristine-sphere-456607-r7`.Case_Study.final_cleaned_combined_data
GROUP BY 1, 2;
-- casual riders that ride more than 2 standard deviations above the mean
SELECT
ride_id,
start_station_name,
end_station_name,
ride_length,
day,
month
FROM
`pristine-sphere-456607-r7`.Case_Study.final_cleaned_combined_data
WHERE
ride_length > (
SELECT
AVG(ride_length) + 2 * STDDEV(ride_length)
FROM
`pristine-sphere-456607-r7`.Case_Study.final_cleaned_combined_data
) AND start_station_name IS NOT NULL AND end_station_name IS NOT NULL AND member_casual = 'casual'
ORDER BY ride_length;
-- top 10 days where casual riders (non-members) took the longest total rides
-- grouped by the start station for each day.
SELECT
day,
member_casual,
start_station_name,
SUM(ride_length) AS total_ride_length,
AVG(ride_length) AS average_ride_length
FROM
`pristine-sphere-456607-r7`.Case_Study.final_cleaned_combined_data
WHERE
member_casual = 'casual'
GROUP BY 1, 2, 3
ORDER BY day, total_ride_length DESC
LIMIT 10;
-- lets continue to visualizing the data
-- i am using tableau