Guides:
Steps
-
Download the file
-
Run script (this will update the
~/.zshrcfile)
> cd Downloads
> ./google-cloud-sdk/install.sh
- Initialise the cloud CLI (select default project and login)
./google-cloud-sdk/bin/gcloud init
- Delete the downloaded folder
- Run
gcloud auth loginin terminal (or jupyter notebook) - Connect to your GCP account
- Setup the correct GCP project
gcloud config set project prd-data-analytics
# Install package
python -m pip install google-cloud-bigquery
python -m pip install 'google-cloud-bigquery[pandas]'
# Test connection with GCP
def implicit():
from google.cloud import storage
# If you don't specify credentials when constructing the client, the
# client library will look for credentials in the environment.
storage_client = storage.Client()
# Make an authenticated API request
buckets = list(storage_client.list_buckets())
print(buckets)
# Run
implicit()
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
query = """
SELECT name, SUM(number) as total_people
FROM `bigquery-public-data.usa_names.usa_1910_2013`
WHERE state = 'TX'
GROUP BY name, state
ORDER BY total_people DESC
LIMIT 20
"""
query_job = client.query(query) # Make an API request.
print("The query data:")
for row in query_job:
# Row values can be accessed by field name or index.
print("name={}, count={}".format(row[0], row["total_people"]))
from google.cloud import bigquery
project_id = 'prd-mfp-analytics'
client = bigquery.Client(project=project_id)
query = """
with filtered_faults AS (
SELECT
site_name
,robot_id
,msg.attribute AS fault_type
,msg.value
,time
,msg.header.stamp.sec+msg.header.stamp.nsec/1000000000 AS start_time
,LEAD(msg.header.stamp.sec+msg.header.stamp.nsec/1000000000) OVER (PARTITION BY site_name, robot_id, msg.attribute ORDER BY msg.header.stamp.sec+msg.header.stamp.nsec/1000000000 ASC) AS end_time
,LEAD(msg.value) OVER (PARTITION BY site_name, robot_id, msg.attribute ORDER BY msg.header.stamp.sec+msg.header.stamp.nsec/1000000000 ASC) AS next_fault_value
FROM `prd-mfp-analytics.analytics.info_events`
WHERE 1=1
AND time between TIMESTAMP('{start_date}') and TIMESTAMP('{end_date}')
AND msg.entity = 'FAULT'
AND site_name IS NOT NULL
AND NOT REGEXP_CONTAINS(site_name, r".{{6,}}sim|.{{6,}}tst|simulation|techops|qa_stress|simsolutionsmovement|wagedwtrn") --remove simumation sites
)
-- filter to remove unclosed faults and return a duration for each remaining fault
SELECT
end_time - start_time AS seconds_faulted
FROM filtered_faults
WHERE 1=1
AND value = 1
AND next_fault_value = 0
"""
start_date_1 = '2021-06-05'
end_date_1 = '2021-06-08'
formatted_query_1 = query.format(start_date = start_date_1, end_date = end_date_1)
fault_time_df_1 = client.query(formatted_query_1).to_dataframe()
print(fault_time_df_1)
You provide that name by using an argument with the %%bigquery magic command.
# Run this cell to load bigquery magic
%load_ext google.cloud.bigquery
%%bigquery regions_by_country
SELECT
country_code,
country_name,
COUNT(DISTINCT region_code) AS num_regions
FROM
`bigquery-public-data.google_trends.international_top_terms`
WHERE
refresh_date = DATE_SUB(CURRENT_DATE, INTERVAL 1 DAY)
GROUP BY
country_code, country_name
ORDER BY
num_regions DESC;