Skip to content

Commit f8cd6b5

Browse files
dmosorastKyle Allan
andauthored
Wal2json format v2 (#91)
* wip - using wal2json format version 2 * Removing debug comments and ipdb's * Pylint * Catch DataError on trying format v2 * Try to read a message to check version 2 support * Fix small logic issue * Set message_format to switch implementation * Add sleep to test theory * Saving progress on backoff pattern, reverting next * Move to conn_info message_format * Make config parameter more explicit * move tests into tap-postgres repo * Fix unittests imports * Move tests to integration subdirectory * Revert previous commit and fix test discovery issue * Change to correct integration tests path * Clean up comments, add explicit connection property for wal2json, copy test for wal2json v2 * Change new test name to match canonicalized version * Add log line when using format-version 2 * Pull new message-format out of config * Move to docker postgres host * Fix tests, bugs and incorrect expectations * Ensure DB exists for unittests * Review comments * Remove .sample and pylint Co-authored-by: Kyle Allan <kyle@stitchdata.com>
1 parent f4dab2d commit f8cd6b5

26 files changed

Lines changed: 5109 additions & 73 deletions

.circleci/Dockerfile

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
FROM postgres:9.6
2+
3+
# Git SHA of v2.2
4+
ENV WAL2JSON_COMMIT_ID=9f9762315062888f7f7f4f0a115073a33ad1275e
5+
6+
# Compile the plugins from sources and install
7+
RUN apt-get update && apt-get install -y postgresql-server-dev-9.6 gcc git make pkgconf \
8+
&& git clone https://github.com/eulerto/wal2json -b master --single-branch \
9+
&& (cd /wal2json && git checkout $WAL2JSON_COMMIT_ID && make && make install) \
10+
&& rm -rf wal2json
11+
12+
# Copy the custom configuration which will be passed down to the server
13+
COPY postgresql.conf /usr/local/share/postgresql/postgresql.conf
14+
15+
# Copy the script which will initialize the replication permissions
16+
COPY /docker-entrypoint-initdb.d /docker-entrypoint-initdb.d

.circleci/config.yml

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@ version: 2
22
jobs:
33
build:
44
docker:
5-
- image: 218546966473.dkr.ecr.us-east-1.amazonaws.com/circle-ci:tap-tester
5+
- image: 218546966473.dkr.ecr.us-east-1.amazonaws.com/circle-ci:tap-tester-v4
6+
- image: singerio/postgres:9.6-wal2json-2.2
7+
environment:
8+
POSTGRES_USER: postgres
9+
POSTGRES_PASSWORD: password
10+
command: [postgres, -c, config_file=/usr/local/share/postgresql/postgresql.conf]
611
steps:
712
- checkout
813
- run:
@@ -25,18 +30,19 @@ jobs:
2530
command: |
2631
source dev_env.sh
2732
source /usr/local/share/virtualenvs/tap-tester/bin/activate
28-
run-a-test --tap=tap-postgres \
29-
--target=target-stitch \
30-
--orchestrator=stitch-orchestrator \
31-
--email=harrison+sandboxtest@stitchdata.com \
32-
--password=$SANDBOX_PASSWORD \
33-
--client-id=50 \
34-
tap_tester.suites.postgres
33+
run-test --tap=tap-postgres \
34+
--target=target-stitch \
35+
--orchestrator=stitch-orchestrator \
36+
--email=harrison+sandboxtest@stitchdata.com \
37+
--password=$SANDBOX_PASSWORD \
38+
--client-id=50 \
39+
tests
3540
workflows:
3641
version: 2
3742
commit:
3843
jobs:
39-
- build
44+
- build:
45+
context: circleci-user
4046
build_daily:
4147
triggers:
4248
- schedule:
@@ -46,4 +52,5 @@ workflows:
4652
only:
4753
- master
4854
jobs:
49-
- build
55+
- build:
56+
context: circleci-user
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
set -e
3+
4+
{ echo "host replication $POSTGRES_USER 0.0.0.0/0 trust"; } >> "$PGDATA/pg_hba.conf"

.circleci/postgresql.conf

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# LOGGING
2+
log_min_error_statement = fatal
3+
4+
# CONNECTION
5+
listen_addresses = '*'
6+
7+
# MODULES
8+
#shared_preload_libraries = 'decoderbufs'
9+
10+
# REPLICATION
11+
wal_level = logical # minimal, archive, hot_standby, or logical (change requires restart)
12+
max_wal_senders = 5 # max number of walsender processes (change requires restart)
13+
#wal_keep_segments = 4 # in logfile segments, 16MB each; 0 disables
14+
#wal_sender_timeout = 60s # in milliseconds; 0 disables
15+
max_replication_slots = 5 # max number of replication slots (change requires restart)

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
test:
2-
nosetests -v
2+
nosetests -v tests/unittests

bin/test-db

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/usr/bin/env python3
2+
import os
3+
import sys
4+
import argparse
5+
import subprocess
6+
import time
7+
from argparse import RawTextHelpFormatter
8+
9+
full_image_name = "singerio/postgres:9.6-wal2json-2.2"
10+
11+
def start_container(name):
12+
START_COMMAND = """
13+
sudo docker run -e "POSTGRES_USER={0}" -e "POSTGRES_PASSWORD={1}" -p {2}:{2} --name {3} -d {4} \
14+
postgres -c config_file=/usr/local/share/postgresql/postgresql.conf
15+
""".format(os.getenv('TAP_POSTGRES_USER'),
16+
os.getenv('TAP_POSTGRES_PASSWORD'),
17+
5432,
18+
name,
19+
full_image_name)
20+
21+
print("Starting Docker process {} using command: {}".format(name, START_COMMAND))
22+
23+
proc = subprocess.run(START_COMMAND, shell=True)
24+
if proc.returncode != 0:
25+
sys.exit("Exited with code: {}, the docker process failed to start.".format(proc.returncode))
26+
print("Process started successfully.")
27+
28+
def get_ip_addr(name):
29+
IP_ADDR_COMMAND = "docker inspect {} | jq -r .[].NetworkSettings.IPAddress"
30+
print("Retrieving IP addr of postgres container")
31+
ip_addr = subprocess.check_output(IP_ADDR_COMMAND.format(name), shell=True).decode('utf-8').rstrip()
32+
print(ip_addr)
33+
return ip_addr
34+
35+
36+
def stop_container(name):
37+
STOP_COMMAND = "sudo docker stop {0} && sudo docker rm {0}"
38+
39+
print("Stopping Docker process {}".format(name))
40+
proc = subprocess.run(STOP_COMMAND.format(name), shell=True)
41+
if proc.returncode != 0:
42+
sys.exit("Exited with code: {}, the docker process failed to stop.".format(proc.returncode))
43+
print("Process stopped successfully")
44+
45+
def connect_to_db(name):
46+
CONNECT_COMMAND = 'docker run -it --rm -e "PGPASSWORD={}" {} psql --host {} -U {}'
47+
48+
ip_addr = get_ip_addr(name)
49+
50+
print("Attempting to connect to running container using a postgres container via psql")
51+
connect_command_format = CONNECT_COMMAND.format(os.getenv('TAP_POSTGRES_PASSWORD'),
52+
full_image_name,
53+
ip_addr,
54+
os.getenv('TAP_POSTGRES_USER'))
55+
print(connect_command_format)
56+
# NB: Using call instead of run here because it is blocking
57+
# This returns only an exit code.
58+
returncode = subprocess.call(connect_command_format,
59+
shell=True)
60+
if returncode != 0:
61+
sys.exit("Exited with code: {}, could not connect.".format(returncode))
62+
63+
DESCRIPTION = """
64+
Manage docker instance for tap-postgres testing.
65+
66+
Uses environment variables:
67+
TAP_POSTGRES_USER
68+
TAP_POSTGRES_PASSWORD
69+
"""
70+
parser = argparse.ArgumentParser(description=DESCRIPTION, formatter_class=RawTextHelpFormatter)
71+
parser.add_argument('action', choices=['start','stop', 'connect'], help='action to perform with the container')
72+
parser.add_argument('--name', help="name assigned to running docker process", default='postgres1')
73+
74+
def main():
75+
parsed_args = parser.parse_args()
76+
# Potential arguments to add: pull, changing docker cointainer, changing password
77+
if parsed_args.action == 'start':
78+
start_container(parsed_args.name)
79+
elif parsed_args.action == 'stop':
80+
stop_container(parsed_args.name)
81+
elif parsed_args.action == 'connect':
82+
connect_to_db(parsed_args.name)
83+
84+
if __name__ == "__main__":
85+
main()

tap_postgres/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -683,7 +683,8 @@ def main_impl():
683683
'dbname' : args.config['dbname'],
684684
'filter_dbs' : args.config.get('filter_dbs'),
685685
'debug_lsn' : args.config.get('debug_lsn') == 'true',
686-
'logical_poll_total_seconds': float(args.config.get('logical_poll_total_seconds', 0))}
686+
'logical_poll_total_seconds': float(args.config.get('logical_poll_total_seconds', 0)),
687+
'wal2json_message_format': args.config.get('wal2json_message_format')}
687688

688689
if args.config.get('ssl') == 'true':
689690
conn_config['sslmode'] = 'require'

tap_postgres/sync_strategies/logical_replication.py

Lines changed: 92 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -217,14 +217,68 @@ def row_to_singer_message(stream, row, version, columns, time_extracted, md_map,
217217
version=version,
218218
time_extracted=time_extracted)
219219

220-
def consume_message(streams, state, msg, time_extracted, conn_info, end_lsn):
221-
payload = json.loads(msg.payload)
222-
lsn = msg.data_start
220+
def consume_message_format_2(payload, conn_info, streams_lookup, state, time_extracted, lsn):
221+
## Action Types:
222+
# I = Insert
223+
# U = Update
224+
# D = Delete
225+
# B = Begin Transaction
226+
# C = Commit Transaction
227+
# M = Message
228+
# T = Truncate
229+
action = payload['action']
230+
if action not in ['U', 'I', 'D']:
231+
LOGGER.debug("Skipping message of type %s", action)
232+
yield None
233+
else:
234+
tap_stream_id = post_db.compute_tap_stream_id(conn_info['dbname'], payload['schema'], payload['table'])
235+
if streams_lookup.get(tap_stream_id) is None:
236+
yield None
237+
else:
238+
target_stream = streams_lookup[tap_stream_id]
239+
stream_version = get_stream_version(target_stream['tap_stream_id'], state)
240+
stream_md_map = metadata.to_map(target_stream['metadata'])
223241

224-
streams_lookup = {}
225-
for s in streams:
226-
streams_lookup[s['tap_stream_id']] = s
242+
desired_columns = [col for col in target_stream['schema']['properties'].keys() if sync_common.should_sync_column(stream_md_map, col)]
227243

244+
col_names = []
245+
col_vals = []
246+
if payload['action'] in ['I', 'U']:
247+
for column in payload['columns']:
248+
if column['name'] in set(desired_columns):
249+
col_names.append(column['name'])
250+
col_vals.append(column['value'])
251+
252+
col_names = col_names + ['_sdc_deleted_at']
253+
col_vals = col_vals + [None]
254+
255+
if conn_info.get('debug_lsn'):
256+
col_names = col_names + ['_sdc_lsn']
257+
col_vals = col_vals + [str(lsn)]
258+
259+
elif payload['action'] == 'D':
260+
for column in payload['identity']:
261+
if column['name'] in set(desired_columns):
262+
col_names.append(column['name'])
263+
col_vals.append(column['value'])
264+
265+
col_names = col_names + ['_sdc_deleted_at']
266+
col_vals = col_vals + [singer.utils.strftime(singer.utils.strptime_to_utc(payload['timestamp']))]
267+
268+
if conn_info.get('debug_lsn'):
269+
col_vals = col_vals + [str(lsn)]
270+
col_names = col_names + ['_sdc_lsn']
271+
272+
# Yield 1 record to match the API of V1
273+
yield row_to_singer_message(target_stream, col_vals, stream_version, col_names, time_extracted, stream_md_map, conn_info)
274+
275+
state = singer.write_bookmark(state,
276+
target_stream['tap_stream_id'],
277+
'lsn',
278+
lsn)
279+
280+
# message-format v1
281+
def consume_message_format_1(payload, conn_info, streams_lookup, state, time_extracted, lsn):
228282
for c in payload['change']:
229283
tap_stream_id = post_db.compute_tap_stream_id(conn_info['dbname'], c['schema'], c['table'])
230284
if streams_lookup.get(tap_stream_id) is None:
@@ -288,15 +342,33 @@ def consume_message(streams, state, msg, time_extracted, conn_info, end_lsn):
288342
raise Exception("unrecognized replication operation: {}".format(c['kind']))
289343

290344

291-
singer.write_message(record_message)
345+
yield record_message
292346
state = singer.write_bookmark(state,
293347
target_stream['tap_stream_id'],
294348
'lsn',
295349
lsn)
350+
351+
352+
def consume_message(streams, state, msg, time_extracted, conn_info, end_lsn, message_format="1"):
353+
payload = json.loads(msg.payload)
354+
lsn = msg.data_start
355+
356+
streams_lookup = {s['tap_stream_id']: s for s in streams}
357+
358+
if message_format == "1":
359+
records = consume_message_format_1(payload, conn_info, streams_lookup, state, time_extracted, lsn)
360+
elif message_format == "2":
361+
records = consume_message_format_2(payload, conn_info, streams_lookup, state, time_extracted, lsn)
362+
else:
363+
raise Exception("Unknown wal2json message format version: {}".format(message_format))
364+
365+
for record_message in records:
366+
if record_message:
367+
singer.write_message(record_message)
368+
# Pulled out of refactor so we send a keep-alive per-record
296369
LOGGER.debug("sending feedback to server with NO flush_lsn. just a keep-alive")
297370
msg.cursor.send_feedback()
298371

299-
300372
LOGGER.debug("sending feedback to server. flush_lsn = %s", msg.data_start)
301373
if msg.data_start > end_lsn:
302374
raise Exception("incorrectly attempting to flush an lsn({}) > end_lsn({})".format(msg.data_start, end_lsn))
@@ -339,8 +411,17 @@ def sync_tables(conn_info, logical_streams, state, end_lsn):
339411
with post_db.open_connection(conn_info, True) as conn:
340412
with conn.cursor() as cur:
341413
LOGGER.info("Starting Logical Replication for %s(%s): %s -> %s. poll_total_seconds: %s", list(map(lambda s: s['tap_stream_id'], logical_streams)), slot, start_lsn, end_lsn, poll_total_seconds)
414+
415+
replication_params = {"slot_name": slot,
416+
"decode": True,
417+
"start_lsn": start_lsn}
418+
message_format = conn_info.get("wal2json_message_format") or "1"
419+
if message_format == "2":
420+
LOGGER.info("Using wal2json format-version 2")
421+
replication_params["options"] = {"format-version": 2, "include-timestamp": True}
422+
342423
try:
343-
cur.start_replication(slot_name=slot, decode=True, start_lsn=start_lsn)
424+
cur.start_replication(**replication_params)
344425
except psycopg2.ProgrammingError:
345426
raise Exception("unable to start replication with logical replication slot {}".format(slot))
346427

@@ -358,13 +439,13 @@ def sync_tables(conn_info, logical_streams, state, end_lsn):
358439
LOGGER.info("gone past end_lsn %s for run. breaking", end_lsn)
359440
break
360441

361-
state = consume_message(logical_streams, state, msg, time_extracted, conn_info, end_lsn)
442+
state = consume_message(logical_streams, state, msg, time_extracted,
443+
conn_info, end_lsn, message_format=message_format)
362444
#msg has been consumed. it has been processed
363445
last_lsn_processed = msg.data_start
364446
rows_saved = rows_saved + 1
365447
if rows_saved % UPDATE_BOOKMARK_PERIOD == 0:
366448
singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
367-
368449
else:
369450
now = datetime.datetime.now()
370451
timeout = keep_alive_time - (now - cur.io_timestamp).total_seconds()

tests/db_utils.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import os
2+
import psycopg2
3+
4+
def ensure_db(dbname=os.getenv('TAP_POSTGRES_DBNAME')):
5+
# Create database dev if not exists
6+
with get_test_connection() as conn:
7+
conn.autocommit = True
8+
with conn.cursor() as cur:
9+
cur.execute("SELECT 1 FROM pg_database WHERE datname = '{}'".format(dbname))
10+
exists = cur.fetchone()
11+
if not exists:
12+
print("Creating database {}".format(dbname))
13+
cur.execute("CREATE DATABASE {}".format(dbname))
14+
15+
def get_test_connection(dbname=os.getenv('TAP_POSTGRES_DBNAME'), logical_replication=False):
16+
conn_string = "host='{}' dbname='{}' user='{}' password='{}' port='{}'".format(os.getenv('TAP_POSTGRES_HOST'),
17+
dbname,
18+
os.getenv('TAP_POSTGRES_USER'),
19+
os.getenv('TAP_POSTGRES_PASSWORD'),
20+
os.getenv('TAP_POSTGRES_PORT'))
21+
if logical_replication:
22+
return psycopg2.connect(conn_string, connection_factory=psycopg2.extras.LogicalReplicationConnection)
23+
else:
24+
return psycopg2.connect(conn_string)

0 commit comments

Comments
 (0)