Skip to content

Commit 701703d

Browse files
committed
first commit
0 parents  commit 701703d

7 files changed

Lines changed: 381 additions & 0 deletions

File tree

.DS_Store

6 KB
Binary file not shown.

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
HackersAndSlackers-d2a47db89384.json
2+
HackersAndSlackers-d2a47db89384.json

Pipfile

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[[source]]
2+
name = "pypi"
3+
url = "https://pypi.org/simple"
4+
verify_ssl = true
5+
6+
[dev-packages]
7+
8+
[packages]
9+
google-cloud-storage="*"
10+
google-cloud-bigquery="*"
11+
pprint="*"
12+
13+
[requires]
14+
python_version = "3.7"

Pipfile.lock

Lines changed: 268 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# bigquery-python-tutorial

data/test.csv

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
id,initiated,hiredate,email,firstname,lastname,title,department,location,country,type
2+
100035435,2015-12-11T09:16:20.722-08:00,3/22/67,GretchenRMorrow@jourrapide.com,Gretchen,Morrow,Power plant operator,Physical Product,Britling Cafeterias,United Kingdom,Employee
3+
100056435,2015-12-15T10:11:24.604-08:00,6/22/99,ElizabethLSnow@armyspy.com,Elizabeth,Snow,Oxygen therapist,Physical Product,Grade A Investment,United States of America,Employee
4+
100037955,2015-12-16T14:31:32.765-08:00,5/31/74,AlbertMPeterson@einrot.com,Albert,Peterson,Psychologist,Physical Product,Grass Roots Yard Services,United States of America,Employee
5+
100035435,2016-01-20T11:15:47.249-08:00,9/9/69,JohnMLynch@dayrep.com,John,Lynch,Environmental hydrologist,Physical Product,Waccamaw's Homeplace,United States of America,Employee
6+
100057657,2016-01-21T12:45:38.261-08:00,4/9/83,TheresaJCahoon@teleworm.us,Theresa,Cahoon,Personal chef,Physical Product,Cala Foods,United States of America,Employee
7+
100056747,2016-02-01T11:25:39.317-08:00,6/26/98,KennethHPayne@dayrep.com,Kenneth,Payne,Central office operator,Frontline,Magna Consulting,United States of America,Employee
8+
100035435,2016-02-01T11:28:11.953-08:00,4/16/82,LeifTSpeights@fleckens.hu,Leif,Speights,Staff development director,Frontline,Rivera Property Maintenance,United States of America,Employee
9+
100035435,2016-02-01T12:21:01.756-08:00,8/6/80,JamesSRobinson@teleworm.us,James,Robinson,Scheduling clerk,Frontline,Discount Furniture Showcase,United States of America,Employee
10+
100074688,2016-02-01T13:29:19.147-08:00,12/14/74,AnnaDMoberly@jourrapide.com,Anna,Moberly,Playwright,Physical Product,The Wiz,United States of America,Employee
11+
100665778,2016-02-04T14:40:05.223-08:00,9/13/66,MarjorieBCrawford@armyspy.com,Marjorie,Crawford,"Court, municipal, and license clerk",Physical Product,The Serendipity Dip,United Kingdom,Employee
12+
100876876,2016-02-24T12:39:25.872-08:00,12/19/67,LyleCHackett@fleckens.hu,Lyle,Hackett,Airframe mechanic,Physical Product,Infinity Investment Plan,United States of America,Employee
13+
100658565,2016-02-29T15:52:12.933-08:00,11/17/83,MaryJDensmore@jourrapide.com,Mary,Densmore,Employer relations representative,Frontline,One-Up Realtors,United States of America,Employee
14+
100766547,2016-03-01T12:32:53.357-08:00,10/1/87,CindyRDiaz@armyspy.com,Cindy,Diaz,Student affairs administrator,Physical Product,Mr. AG's,United States of America,Employee
15+
100045677,2016-03-02T12:07:44.264-08:00,8/16/65,AndreaTLigon@einrot.com,Andrea,Ligon,Railroad engineer,Central Growth,Robinson Furniture,United States of America,Employee

main.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
from google.cloud import storage
2+
from google.cloud import bigquery
3+
import pprint
4+
5+
bucket_uri = 'gs://your-bucket/'
6+
bucket_name = 'your-bucket'
7+
bucket_target = 'datasets/data_upload.csv'
8+
local_dataset = 'data/test.csv'
9+
bucket_target_uri = bucket_uri + bucket_target
10+
bigquery_dataset = 'uploadtest'
11+
bigquery_table = 'my_table'
12+
13+
14+
def upload_blob(bucket_name, source_file_name, destination_blob_name):
15+
"""Upload a CSV to Google Cloud Storage.
16+
17+
1. Retrieve the target bucket.
18+
2. Set destination of data to be uploaded.
19+
3. Upload local CSV.
20+
"""
21+
storage_client = storage.Client()
22+
bucket = storage_client.get_bucket(bucket_name)
23+
blob = bucket.blob(destination_blob_name)
24+
# Commence Upload
25+
blob.upload_from_filename(source_file_name)
26+
print('File {} uploaded to {}.'.format(
27+
source_file_name,
28+
destination_blob_name))
29+
30+
31+
def insert_bigquery(target_uri, dataset_id, table_id):
32+
"""Insert CSV from Google Storage to BigQuery Table.
33+
34+
1. Specify target dataset within BigQuery.
35+
2. Create a Job configuration.
36+
3. Specify that we are autodetecting datatypes.
37+
4. Reserve row #1 for headers.
38+
5. Specify the source format of the file (defaults to CSV).
39+
6. Pass the URI of the data storage on Google Cloud Storage from.
40+
7. Load BigQuery Job.
41+
8. Execute BigQuery Job.
42+
"""
43+
bigquery_client = bigquery.Client()
44+
dataset_ref = bigquery_client.dataset(dataset_id)
45+
job_config = bigquery.LoadJobConfig()
46+
job_config.autodetect = True
47+
job_config.skip_leading_rows = 1
48+
job_config.source_format = bigquery.SourceFormat.CSV
49+
uri = target_uri
50+
load_job = bigquery_client.load_table_from_uri(
51+
uri,
52+
dataset_ref.table(table_id),
53+
job_config=job_config) # API request
54+
print('Starting job {}'.format(load_job.job_id))
55+
# Waits for table load to complete.
56+
load_job.result()
57+
print('Job finished.')
58+
59+
60+
def get_schema(dataset_id, table_id):
61+
"""Get BigQuery Table Schema.
62+
63+
1. Specify target dataset within BigQuery.
64+
2. Specify target table within given dataset.
65+
3. Create Table class instance from existing BigQuery Table.
66+
4. Print results to console.
67+
5. Return the schema dict.
68+
"""
69+
bigquery_client = bigquery.Client()
70+
dataset_ref = bigquery_client.dataset(dataset_id)
71+
bg_tableref = bigquery.table.TableReference(dataset_ref, table_id)
72+
bg_table = bigquery_client.get_table(bg_tableref)
73+
# Print Schema to Console
74+
pp = pprint.PrettyPrinter(indent=4)
75+
pp.pprint(bg_table.schema)
76+
return bg_table.schema
77+
78+
79+
upload_blob(bucket_name, local_dataset, bucket_target)
80+
insert_bigquery(bucket_target_uri, bigquery_dataset, bigquery_table)
81+
bigquery_table_schema = get_schema(bigquery_dataset, bigquery_table)

0 commit comments

Comments
 (0)