Skip to content

Commit dae88f7

Browse files
authored
fix(bq): split BigQuery modules.sql when exceeding 1MB query limit (#599)
1 parent 98ba445 commit dae88f7

3 files changed

Lines changed: 54 additions & 16 deletions

File tree

clouds/bigquery/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ endif
6161
build-modules:
6262
mkdir -p $(BUILD_DIR)
6363
$(MAKE) -C modules build
64-
cp modules/build/modules.sql $(BUILD_DIR)
64+
cp modules/build/modules*.sql $(BUILD_DIR)
6565

6666
deploy:
6767
$(MAKE) deploy-libraries
@@ -105,7 +105,7 @@ create-package:
105105

106106
rm -rf $(DIST_DIR)
107107
mkdir -p $(DIST_DIR)/$(PACKAGE_NAME)
108-
cp $(BUILD_DIR)/modules.sql $(DIST_DIR)/$(PACKAGE_NAME)/
108+
cp $(BUILD_DIR)/modules*.sql $(DIST_DIR)/$(PACKAGE_NAME)/
109109
cp -r $(BUILD_DIR)/libs $(DIST_DIR)/$(PACKAGE_NAME)/libs
110110

111111
$(MAKE) extra-package

clouds/bigquery/common/build_modules.js

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -151,13 +151,9 @@ function add (f, include) {
151151
functions.forEach(f => add(f));
152152

153153
// Replace environment variables
154-
let separator;
155-
if (argv.production) {
156-
separator = '\n';
157-
} else {
158-
separator = '\n-->\n'; // marker to future SQL split
159-
}
160-
let content = output.map(f => f.content).join(separator);
154+
const internalSeparator = '\n-->\n'; // marker to split SQL statements
155+
const outputSeparator = argv.production ? '\n' : internalSeparator;
156+
let content = output.map(f => f.content).join(internalSeparator);
161157

162158
function apply_replacements (text) {
163159
const libraries = [... new Set(content.match(new RegExp('@@BQ_LIBRARY_[^@]*?_BUCKET@@', 'g')))];
@@ -190,14 +186,54 @@ function apply_replacements (text) {
190186

191187
if (argv.dropfirst) {
192188
const header = fs.readFileSync(path.resolve(__dirname, 'DROP_FUNCTIONS.sql')).toString();
193-
content = header + separator + content
189+
content = header + internalSeparator + content
194190
}
195191

196192
const footer = fs.readFileSync(path.resolve(__dirname, 'VERSION.sql')).toString();
197-
content += separator + footer;
193+
content += internalSeparator + footer;
198194

199195
content = apply_replacements(content);
200196

201-
// Write modules.sql file
202-
fs.writeFileSync(path.join(outputDir, 'modules.sql'), content);
203-
console.log(`Write ${outputDir}/modules.sql`);
197+
// Split into individual statements and write output files,
198+
// chunking into multiple files if the BigQuery query size limit is exceeded
199+
// BigQuery hard limit is 1,024,000 characters per query.
200+
// Using 768KB to keep files well under the limit and reduce future splits.
201+
const BQ_QUERY_CHAR_LIMIT = 1024 * 1000; // 1,024,000 characters
202+
const SAFE_LIMIT = 768 * 1000; // 768,000 characters
203+
204+
const statements = content.split(internalSeparator).filter(q => q.trim());
205+
const singleContent = statements.join(outputSeparator);
206+
207+
if (singleContent.length <= SAFE_LIMIT) {
208+
fs.writeFileSync(path.join(outputDir, 'modules.sql'), singleContent);
209+
console.log(`Write ${outputDir}/modules.sql`);
210+
} else {
211+
const chunks = [];
212+
let currentStatements = [];
213+
let currentSize = 0;
214+
215+
for (const stmt of statements) {
216+
if (stmt.length > BQ_QUERY_CHAR_LIMIT) {
217+
console.log(`ERROR: Single statement exceeds BigQuery limit (${stmt.length} chars)`);
218+
process.exit(1);
219+
}
220+
const addedSize = stmt.length + outputSeparator.length;
221+
if (currentSize + addedSize > SAFE_LIMIT && currentStatements.length > 0) {
222+
chunks.push(currentStatements.join(outputSeparator));
223+
currentStatements = [stmt];
224+
currentSize = addedSize;
225+
} else {
226+
currentStatements.push(stmt);
227+
currentSize += addedSize;
228+
}
229+
}
230+
if (currentStatements.length > 0) {
231+
chunks.push(currentStatements.join(outputSeparator));
232+
}
233+
234+
for (let i = 0; i < chunks.length; i++) {
235+
const filename = `modules_${String(i + 1).padStart(2, '0')}.sql`;
236+
fs.writeFileSync(path.join(outputDir, filename), chunks[i]);
237+
console.log(`Write ${outputDir}/${filename}`);
238+
}
239+
}

clouds/bigquery/modules/Makefile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,10 @@ build: $(NODE_MODULES_DEV)
5858
deploy: check build
5959
echo "Deploying modules..."
6060
$(MAKE) dataset-create
61-
GOOGLE_APPLICATION_CREDENTIALS=$(GOOGLE_APPLICATION_CREDENTIALS) \
62-
$(COMMON_DIR)/run-script.js $(BUILD_DIR)/modules.sql
61+
for f in $(BUILD_DIR)/modules*.sql; do \
62+
GOOGLE_APPLICATION_CREDENTIALS=$(GOOGLE_APPLICATION_CREDENTIALS) \
63+
$(COMMON_DIR)/run-script.js $$f || exit 1; \
64+
done
6365
ifdef BQ_PERMISSIONS
6466
BQ_PERMISSIONS_TARGET_DATASET=$(BQ_DEPLOY_DATASET) $(COMMON_DIR)/$(MODULE_PERMISSIONS_BASH)
6567
endif

0 commit comments

Comments
 (0)