Skip to content

Commit 5104d8b

Browse files
committed
updated source
1 parent 032d79a commit 5104d8b

8 files changed

Lines changed: 116 additions & 120 deletions

File tree

.DS_Store

0 Bytes
Binary file not shown.

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,7 @@
11
HackersAndSlackers-d2a47db89384.json
22
HackersAndSlackers-d2a47db89384.json
3+
HackersAndSlackers-4893023543f3.json
4+
HackersAndSlackers-59ed81beb2ea.json
5+
HackersAndSlackers-aec129ee8154.json
6+
.env
7+
.DS_Store

Pipfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ verify_ssl = true
66
[dev-packages]
77

88
[packages]
9-
google-cloud-storage="*"
10-
google-cloud-bigquery="*"
11-
pprint="*"
9+
google-cloud-storage = "*"
10+
google-cloud-bigquery = "*"
11+
pprint = "*"
1212

1313
[requires]
1414
python_version = "3.7"

Pipfile.lock

Lines changed: 46 additions & 46 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
# Google Big Query: Python SDK Tutorial
22

33

4-
![Python](https://img.shields.io/badge/Python-v^3.7-blue.svg?logo=python&longCache=true&logoColor=white&colorB=23a8e2&style=flat-square&colorA=36363e)
5-
![Google Cloud BigQuery](https://img.shields.io/badge/google--cloud--bigquery-v1.11.2-blue.svg?logo=Google&longCache=true&logoColor=white&colorB=23a8e2&style=flat-square&colorA=36363e)
6-
![Google Cloud Storage](https://img.shields.io/badge/Google--Cloud--Storage-v1.15.0-blue.svg?logo=Google&longCache=true&logoColor=white&colorB=23a8e2&style=flat-square&colorA=36363e)
7-
![Pprint](https://img.shields.io/badge/Pprint-v0.1-blue.svg?logo=python&longCache=true&logoColor=white&colorB=23a8e2&style=flat-square&colorA=36363e)
8-
![GitHub Last Commit](https://img.shields.io/github/last-commit/google/skia.svg?style=flat-square&colorA=36363e)
9-
[![GitHub Issues](https://img.shields.io/github/issues/toddbirchard/tableau-extraction.svg?style=flat-square&colorA=36363e)](https://github.com/hackersandslackers/bigquery-python-tutorial/issues)
10-
[![GitHub Stars](https://img.shields.io/github/stars/toddbirchard/tableau-extraction.svg?style=flat-square&colorB=e3bb18&colorA=36363e)](https://github.com/hackersandslackers/bigquery-python-tutorial/stargazers)
11-
[![GitHub Forks](https://img.shields.io/github/forks/toddbirchard/tableau-extraction.svg?style=flat-square&colorA=36363e)](https://github.com/hackersandslackers/bigquery-python-tutorial/network)
4+
![Python](https://img.shields.io/badge/Python-v3.7-blue.svg?logo=python&longCache=true&logoColor=white&colorB=5e81ac&style=flat-square&colorA=2e3440)
5+
![Google Cloud BigQuery](https://img.shields.io/badge/Google--BigQuery-v1.11.2-blue.svg?logo=Google&longCache=true&logoColor=white&colorB=5e81ac&style=flat-square&colorA=2e3440)
6+
![Google Cloud Storage](https://img.shields.io/badge/Google--Cloud--Storage-v1.15.0-blue.svg?logo=Google&longCache=true&logoColor=white&colorB=5e81ac&style=flat-square&colorA=2e3440)
7+
![GitHub Last Commit](https://img.shields.io/github/last-commit/google/skia.svg?style=flat-square&colorA=2e3440&colorB=a3be8c)
8+
[![GitHub Issues](https://img.shields.io/github/issues/toddbirchard/tableau-extraction.svg?style=flat-square&colorA=2e3440&colorB=d08770)](https://github.com/hackersandslackers/bigquery-python-tutorial/issues)
9+
[![GitHub Stars](https://img.shields.io/github/stars/toddbirchard/tableau-extraction.svg?style=flat-square&colorB=d08770&colorA=2e3440)](https://github.com/hackersandslackers/bigquery-python-tutorial/stargazers)
10+
[![GitHub Forks](https://img.shields.io/github/forks/toddbirchard/tableau-extraction.svg?style=flat-square&colorA=2e3440&colorB=5e81ac)](https://github.com/hackersandslackers/bigquery-python-tutorial/network)
11+
12+
13+
Source for the accompanying tutorial: https://hackersandslackers.com/getting-started-google-big-query-python/

config.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
"""BigQuery Upload Configuration."""
2+
from os import environ
3+
4+
5+
# Google Cloud Storage
6+
bucketURI = environ.get('GCP_BUCKET_URI')
7+
bucketName = environ.get('GCP_BUCKET_NAME')
8+
9+
# Google BigQuery
10+
bigqueryDataset = environ.get('GCP_BIGQUERY_DATASET')
11+
bigqueryTable = environ.get('GCP_BIGQUERY_TABLE')
12+
13+
# Data
14+
localDataFile = environ.get('LOCAL_DATA_TARGET')
15+
destinationBlobName = environ.get('DESTINATION_BLOB_NAME')

main.py

Lines changed: 27 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,71 +1,39 @@
1+
"""Programatically create a BigQuery table from a CSV."""
12
from google.cloud import storage
23
from google.cloud import bigquery
4+
from config import bucketURI, bucketName, bigqueryDataset, bigqueryTable, localDataFile, destinationBlobName
35
import pprint
46

5-
bucket_uri = 'gs://your-bucket/'
6-
bucket_name = 'your-bucket'
7-
bucket_target = 'datasets/data_upload.csv'
8-
local_dataset = 'data/test.csv'
9-
bucket_target_uri = bucket_uri + bucket_target
10-
bigquery_dataset = 'uploadtest'
11-
bigquery_table = 'my_table'
127

13-
14-
def upload_blob(bucket_name, source_file_name, destination_blob_name):
15-
"""Upload a CSV to Google Cloud Storage.
16-
17-
1. Retrieve the target bucket.
18-
2. Set destination of data to be uploaded.
19-
3. Upload local CSV.
20-
"""
8+
def storage_upload_blob(bucketName, source_file_name, destinationBlobName):
9+
"""Upload a CSV to Google Cloud Storage."""
2110
storage_client = storage.Client()
22-
bucket = storage_client.get_bucket(bucket_name)
23-
blob = bucket.blob(destination_blob_name)
24-
# Commence Upload
11+
bucket = storage_client.get_bucket(bucketName)
12+
blob = bucket.blob(destinationBlobName)
2513
blob.upload_from_filename(source_file_name)
26-
print('File {} uploaded to {}.'.format(
27-
source_file_name,
28-
destination_blob_name))
14+
return 'File {} uploaded to {}.'.format(source_file_name,
15+
destinationBlobName)
2916

3017

31-
def insert_bigquery(target_uri, dataset_id, table_id):
32-
"""Insert CSV from Google Storage to BigQuery Table.
33-
34-
1. Specify target dataset within BigQuery.
35-
2. Create a Job configuration.
36-
3. Specify that we are autodetecting datatypes.
37-
4. Reserve row #1 for headers.
38-
5. Specify the source format of the file (defaults to CSV).
39-
6. Pass the URI of the data storage on Google Cloud Storage from.
40-
7. Load BigQuery Job.
41-
8. Execute BigQuery Job.
42-
"""
18+
def bigquery_insert_data(bucketURI, destinationBlobName, dataset_id, table_id):
19+
"""Insert CSV from Google Storage to BigQuery Table."""
20+
target = bucketURI + destinationBlobName
4321
bigquery_client = bigquery.Client()
4422
dataset_ref = bigquery_client.dataset(dataset_id)
4523
job_config = bigquery.LoadJobConfig()
4624
job_config.autodetect = True
4725
job_config.skip_leading_rows = 1
4826
job_config.source_format = bigquery.SourceFormat.CSV
49-
uri = target_uri
50-
load_job = bigquery_client.load_table_from_uri(
51-
uri,
52-
dataset_ref.table(table_id),
53-
job_config=job_config) # API request
27+
load_job = bigquery_client.load_table_from_uri(target,
28+
dataset_ref.table(table_id),
29+
job_config=job_config)
5430
print('Starting job {}'.format(load_job.job_id))
55-
# Waits for table load to complete.
56-
load_job.result()
57-
print('Job finished.')
58-
31+
load_job.result() # Waits for table load to complete.
32+
return 'Job finished.'
5933

60-
def get_schema(dataset_id, table_id):
61-
"""Get BigQuery Table Schema.
6234

63-
1. Specify target dataset within BigQuery.
64-
2. Specify target table within given dataset.
65-
3. Create Table class instance from existing BigQuery Table.
66-
4. Print results to console.
67-
5. Return the schema dict.
68-
"""
35+
def get_table_schema(dataset_id, table_id):
36+
"""Get BigQuery Table Schema."""
6937
bigquery_client = bigquery.Client()
7038
dataset_ref = bigquery_client.dataset(dataset_id)
7139
bg_tableref = bigquery.table.TableReference(dataset_ref, table_id)
@@ -76,6 +44,12 @@ def get_schema(dataset_id, table_id):
7644
return bg_table.schema
7745

7846

79-
upload_blob(bucket_name, local_dataset, bucket_target)
80-
insert_bigquery(bucket_target_uri, bigquery_dataset, bigquery_table)
81-
bigquery_table_schema = get_schema(bigquery_dataset, bigquery_table)
47+
storage_upload_blob(bucketName,
48+
localDataFile,
49+
destinationBlobName)
50+
bigquery_insert_data(bucketURI,
51+
destinationBlobName,
52+
bigqueryDataset,
53+
bigqueryTable)
54+
bigqueryTableSchema = get_table_schema(bigqueryDataset,
55+
bigqueryTable)

requirements.txt

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
1-
cachetools==3.1.0
1+
cachetools==3.1.1
22
certifi==2019.3.9
33
chardet==3.0.4
4-
google-api-core==1.9.0
4+
google-api-core==1.11.1
55
google-auth==1.6.3
6-
google-cloud-bigquery==1.11.2
7-
google-cloud-core==0.29.1
8-
google-cloud-storage==1.15.0
6+
google-cloud-bigquery==1.14.0
7+
google-cloud-core==1.0.1
8+
google-cloud-storage==1.16.1
99
google-resumable-media==0.3.2
10-
googleapis-common-protos==1.5.9
10+
googleapis-common-protos==1.6.0
1111
idna==2.8
1212
pprint==0.1
13-
protobuf==3.7.1
13+
protobuf==3.8.0
1414
pyasn1==0.4.5
15-
pyasn1-modules==0.2.4
15+
pyasn1-modules==0.2.5
1616
pytz==2019.1
17-
requests==2.21.0
17+
requests==2.22.0
1818
rsa==4.0
1919
six==1.12.0
20-
urllib3==1.24.2
20+
urllib3==1.25.3

0 commit comments

Comments
 (0)