BigQuery for data analysts quest
Big Data Analysis to a Slide Presentation
Big Data Analysis to a slide presentation Qwiklab
Learning goals:
- How to use Google Apps Script with multiple Google services
- How to use Google BigQuery to perform a big data analysis
- How to create a Google Sheet and populate data into it, as well as how to create a chart with spreadsheet data
- How to transfer the spreadsheet chart and data into separate slides of a Google Slides presentation
Resources:
- Google Apps Script Overview
- Google Apps Script Tutorials
- Google Apps Script Video examples
- Built in Google Services
- Advanced Google Services
- Extending Google Sheets with Apps Scripts
- Extending Google Slides with Apps Scripts
- Big data to slides github repo
A built-in service provides high-level methods that you can use to access to G Suite or Google product data, or other useful utility methods
An advanced service is merely a thin wrapper around a G Suite or Google REST API.
/**
* Copyright 2018 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Filename for data results
var QUERY_NAME = "Most common words in all of Shakespeare's works";
// Replace this value with your Google Cloud API project ID
//var PROJECT_ID = '<YOUR_PROJECT_ID>';
var PROJECT_ID = 'qwiklabs-gcp-01-e643bc98ef64';
if (!PROJECT_ID) throw Error('Project ID is required in setup');
/**
* Runs a BigQuery query, adds data and a chart in a Sheet.
*/
function createBigQueryPresentation() {
var spreadsheet = runQuery();
Logger.log('Results spreadsheet created: %s', spreadsheet.getUrl());
createColumnChart(spreadsheet);
}
/**
* Runs a BigQuery query; puts results into Sheet. You must enable
* the BigQuery advanced service before you can run this code.
* @see http://developers.google.com/apps-script/advanced/bigquery#run_query
* @see http://github.com/gsuitedevs/apps-script-samples/blob/master/advanced/bigquery.gs
*
* @returns {Spreadsheet} Returns a spreadsheet with BigQuery results
* @see http://developers.google.com/apps-script/reference/spreadsheet/spreadsheet
*/
function runQuery() {
// Replace sample with your own BigQuery query.
var request = {
query:
'SELECT ' +
'LOWER(word) AS word, ' +
'SUM(word_count) AS count ' +
'FROM [bigquery-public-data:samples.shakespeare] ' +
'GROUP BY word ' +
'ORDER BY count ' +
'DESC LIMIT 10'
};
var queryResults = BigQuery.Jobs.query(request, PROJECT_ID);
var jobId = queryResults.jobReference.jobId;
// Wait for BQ job completion (with exponential backoff).
var sleepTimeMs = 500;
while (!queryResults.jobComplete) {
Utilities.sleep(sleepTimeMs);
sleepTimeMs *= 2;
queryResults = BigQuery.Jobs.getQueryResults(PROJECT_ID, jobId);
}
// Get all results from BigQuery.
var rows = queryResults.rows;
while (queryResults.pageToken) {
queryResults = BigQuery.Jobs.getQueryResults(PROJECT_ID, jobId, {
pageToken: queryResults.pageToken
});
rows = rows.concat(queryResults.rows);
}
// Return null if no data returned.
if (!rows) {
return Logger.log('No rows returned.');
}
// Create the new results spreadsheet.
var spreadsheet = SpreadsheetApp.create(QUERY_NAME);
var sheet = spreadsheet.getActiveSheet();
// Add headers to Sheet.
var headers = queryResults.schema.fields.map(function(field) {
return field.name.toUpperCase();
});
sheet.appendRow(headers);
// Append the results.
var data = new Array(rows.length);
for (var i = 0; i < rows.length; i++) {
var cols = rows[i].f;
data[i] = new Array(cols.length);
for (var j = 0; j < cols.length; j++) {
data[i][j] = cols[j].v;
}
}
// Start storing data in row 2, col 1
var START_ROW = 2; // skip header row
var START_COL = 1;
sheet.getRange(START_ROW, START_COL, rows.length, headers.length).setValues(data);
// Return the spreadsheet object for later use.
return spreadsheet;
}
/**
* Uses spreadsheet data to create columnar chart.
* @param {Spreadsheet} Spreadsheet containing results data
* @returns {EmbeddedChart} visualizing the results
* @see http://developers.google.com/apps-script/reference/spreadsheet/embedded-chart
*/
function createColumnChart(spreadsheet) {
// Retrieve the populated (first and only) Sheet.
var sheet = spreadsheet.getSheets()[0];
// Data range in Sheet is from cell A2 to B11
var START_CELL = 'A2'; // skip header row
var END_CELL = 'B11';
// Place chart on Sheet starting on cell E5.
var START_ROW = 5; // row 5
var START_COL = 5; // col E
var OFFSET = 0;
// Create & place chart on the Sheet using above params.
var chart = sheet.newChart()
.setChartType(Charts.ChartType.COLUMN)
.addRange(sheet.getRange(START_CELL + ':' + END_CELL))
.setPosition(START_ROW, START_COL, OFFSET, OFFSET)
.build();
sheet.insertChart(chart);
}
BigQuery Qwik Start - Command Line
BigQuery Qwik Start Commant Line Qwiklab
BigQuery Qwik Start - Console
BigQuery Qwik Start - Console Qwiklab
Explore and Create Reports with Data Studio
Explore and Create Reports with Data Studio Qwiklab
Introduction to SQL for BigQuery and Cloud
Introduction to SQL for BigQuery and Cloud Qwiklab
Learning Goals:
- Understand SQL keywords and how they are used
- Learn to navigate BigQuery
- Create a Cloud SQL instance with data inside.
Structured data is normally comprised of tables of data.
Unstructured data includes images and video.
Playing with the London Bicycles dataset
SELECT end_station_name
FROM `bigquery-public-data.london_bicycles.cycle_hire`;
How many bike rides were longer than 20 minutes?
SELECT *
FROM `bigquery-public-data.london_bicycles.cycle_hire`
WHERE duration>=1200;
How many stations are there?
SELECT start_station_name
FROM `bigquery-public-data.london_bicycles.cycle_hire`
GROUP BY start_station_name;
How many trips start at each station?
SELECT start_station_name, COUNT(*) AS num_starts
FROM `bigquery-public-data.london_bicycles.cycle_hire`
GROUP BY start_station_name;
How many trips start at each station sorted alphabetically?
SELECT start_station_name, COUNT(*) AS num
FROM `bigquery-public-data.london_bicycles.cycle_hire`
GROUP BY start_station_name
ORDER BY start_station_name;
How many trips start at each station sorted by number of trips in ascending order?
SELECT start_station_name, COUNT(*) AS num
FROM `bigquery-public-data.london_bicycles.cycle_hire`
GROUP BY start_station_name
ORDER BY num;
How many trips start at each station sorted by number of trips in descending order?
SELECT start_station_name, COUNT(*) AS num
FROM `bigquery-public-data.london_bicycles.cycle_hire`
GROUP BY start_station_name
ORDER BY num DESC;
How many trips end at each station sorted by number of trips in descending order?
SELECT end_station_name, COUNT(*) AS num
FROM `bigquery-public-data.london_bicycles.cycle_hire`
GROUP BY end_station_name
ORDER BY num DESC;
Cloud shell terminal
Check active user profile
gcloud auth list
Show active project
gcloud config list project
Connect to created sql instance
gcloud sql connect qwiklabs-demo --user=root
While connected to sql instance:
create database
CREATE DATABASE bike;
Create 2 tables in the database.
USE bike;
CREATE TABLE london1 (start_station_name VARCHAR(255), num INT);
USE bike;
CREATE TABLE london2 (end_station_name VARCHAR(255), num INT);
Check tables have been created
SELECT * FROM london1;
SELECT * FROM london2;
Delete header rows which are part of the table data
DELETE FROM london1 WHERE num=0;
DELETE FROM london2 WHERE num=0;
Test insert data
INSERT INTO london1 (start_station_name, num) VALUES ("test destination", 1);
perform a union query to see top drop off and start locations
SELECT start_station_name AS top_stations, num FROM london1 WHERE num>100000
UNION
SELECT end_station_name, num FROM london2 WHERE num>100000
ORDER BY top_stations DESC;
Troubleshooting Common SQL Errors
Troubleshooting Common SQL Errors Qwiklab