Skip to content

Commit 76b4b6c

Browse files
authored
perf(gen-schema-view): update schema view query to eliminate unnecessary window function calls, enhancing efficiency (#2759)
* perf(gen-schema-view): update schema view query to eliminate unnecessary window function calls, enhancing efficiency. * fix(gen-schema-view): replace ROW_NUMBER with RANK in schema view query to fix pointer from gemini * fix(gen-schema-view): update sql snapshot fixtures to use RANK instead of ROW_NUMBER * chore(gen-schema-view): update old comment and remove unused stuff
1 parent eea0e40 commit 76b4b6c

4 files changed

Lines changed: 63 additions & 209 deletions

File tree

firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/fixtures/sql/emptySchemaLatest.sql

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,23 +15,15 @@ FROM
1515
SELECT
1616
document_name,
1717
document_id,
18-
FIRST_VALUE(timestamp) OVER(
19-
PARTITION BY document_name
20-
ORDER BY
21-
timestamp DESC
22-
) AS timestamp,
23-
FIRST_VALUE(operation) OVER(
24-
PARTITION BY document_name
25-
ORDER BY
26-
timestamp DESC
27-
) AS operation,
28-
FIRST_VALUE(operation) OVER(
18+
timestamp,
19+
operation,
20+
operation = "DELETE" AS is_deleted
21+
FROM
22+
`test.test_dataset.test_table` QUALIFY RANK() OVER(
2923
PARTITION BY document_name
3024
ORDER BY
3125
timestamp DESC
32-
) = "DELETE" AS is_deleted
33-
FROM
34-
`test.test_dataset.test_table`
26+
) = 1
3527
)
3628
WHERE
3729
NOT is_deleted

firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/fixtures/sql/fullSchemaLatest.sql

Lines changed: 13 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -28,68 +28,26 @@ FROM
2828
SELECT
2929
document_name,
3030
document_id,
31-
FIRST_VALUE(timestamp) OVER(
32-
PARTITION BY document_name
33-
ORDER BY
34-
timestamp DESC
35-
) AS timestamp,
36-
FIRST_VALUE(operation) OVER(
37-
PARTITION BY document_name
38-
ORDER BY
39-
timestamp DESC
40-
) AS operation,
41-
FIRST_VALUE(operation) OVER(
42-
PARTITION BY document_name
43-
ORDER BY
44-
timestamp DESC
45-
) = "DELETE" AS is_deleted,
46-
FIRST_VALUE(JSON_EXTRACT_SCALAR(data, '$.name')) OVER(
47-
PARTITION BY document_name
48-
ORDER BY
49-
timestamp DESC
50-
) AS name,
51-
`test.test_dataset.firestoreArray`(
52-
FIRST_VALUE(JSON_EXTRACT(data, '$.favorite_numbers')) OVER(
53-
PARTITION BY document_name
54-
ORDER BY
55-
timestamp DESC
56-
)
57-
) AS favorite_numbers,
58-
`test.test_dataset.firestoreTimestamp`(
59-
FIRST_VALUE(JSON_EXTRACT(data, '$.last_login')) OVER(
60-
PARTITION BY document_name
61-
ORDER BY
62-
timestamp DESC
63-
)
64-
) AS last_login,
65-
`test.test_dataset.firestoreGeopoint`(
66-
FIRST_VALUE(JSON_EXTRACT(data, '$.last_location')) OVER(
67-
PARTITION BY document_name
68-
ORDER BY
69-
timestamp DESC
70-
)
71-
) AS last_location,
31+
timestamp,
32+
operation,
33+
operation = "DELETE" AS is_deleted,
34+
JSON_EXTRACT_SCALAR(data, '$.name') AS name,
35+
`test.test_dataset.firestoreArray`(JSON_EXTRACT(data, '$.favorite_numbers')) AS favorite_numbers,
36+
`test.test_dataset.firestoreTimestamp`(JSON_EXTRACT(data, '$.last_login')) AS last_login,
37+
`test.test_dataset.firestoreGeopoint`(JSON_EXTRACT(data, '$.last_location')) AS last_location,
7238
SAFE_CAST(
73-
FIRST_VALUE(JSON_EXTRACT_SCALAR(data, '$.last_location._latitude')) OVER(
74-
PARTITION BY document_name
75-
ORDER BY
76-
timestamp DESC
77-
) AS NUMERIC
39+
JSON_EXTRACT_SCALAR(data, '$.last_location._latitude') AS NUMERIC
7840
) AS last_location_latitude,
7941
SAFE_CAST(
80-
FIRST_VALUE(JSON_EXTRACT_SCALAR(data, '$.last_location._longitude')) OVER(
81-
PARTITION BY document_name
82-
ORDER BY
83-
timestamp DESC
84-
) AS NUMERIC
42+
JSON_EXTRACT_SCALAR(data, '$.last_location._longitude') AS NUMERIC
8543
) AS last_location_longitude,
86-
FIRST_VALUE(JSON_EXTRACT_SCALAR(data, '$.friends.name')) OVER(
44+
JSON_EXTRACT_SCALAR(data, '$.friends.name') AS friends_name
45+
FROM
46+
`test.test_dataset.test_table` QUALIFY RANK() OVER(
8747
PARTITION BY document_name
8848
ORDER BY
8949
timestamp DESC
90-
) AS friends_name
91-
FROM
92-
`test.test_dataset.test_table`
50+
) = 1
9351
)
9452
WHERE
9553
NOT is_deleted

firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/fixtures/sql/viewColumnRenameSchema.sql

Lines changed: 26 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -44,145 +44,41 @@ FROM
4444
SELECT
4545
document_name,
4646
document_id,
47-
FIRST_VALUE(timestamp) OVER(
48-
PARTITION BY document_name
49-
ORDER BY
50-
timestamp DESC
51-
) AS timestamp,
52-
FIRST_VALUE(operation) OVER(
53-
PARTITION BY document_name
54-
ORDER BY
55-
timestamp DESC
56-
) AS operation,
57-
FIRST_VALUE(operation) OVER(
58-
PARTITION BY document_name
59-
ORDER BY
60-
timestamp DESC
61-
) = "DELETE" AS is_deleted,
62-
`test.test_dataset.firestoreArray`(
63-
FIRST_VALUE(JSON_EXTRACT(data, '$.order')) OVER(
64-
PARTITION BY document_name
65-
ORDER BY
66-
timestamp DESC
67-
)
68-
) AS newArray,
69-
FIRST_VALUE(JSON_EXTRACT_SCALAR(data, '$.limit')) OVER(
70-
PARTITION BY document_name
71-
ORDER BY
72-
timestamp DESC
73-
) AS newString,
74-
`test.test_dataset.firestoreNumber`(
75-
FIRST_VALUE(JSON_EXTRACT_SCALAR(data, '$.from')) OVER(
76-
PARTITION BY document_name
77-
ORDER BY
78-
timestamp DESC
79-
)
80-
) AS newNumber,
81-
`test.test_dataset.firestoreBoolean`(
82-
FIRST_VALUE(JSON_EXTRACT_SCALAR(data, '$.select')) OVER(
83-
PARTITION BY document_name
84-
ORDER BY
85-
timestamp DESC
86-
)
87-
) AS newBoolean,
88-
`test.test_dataset.firestoreGeopoint`(
89-
FIRST_VALUE(JSON_EXTRACT(data, '$.where')) OVER(
90-
PARTITION BY document_name
91-
ORDER BY
92-
timestamp DESC
93-
)
94-
) AS newGeopoint,
47+
timestamp,
48+
operation,
49+
operation = "DELETE" AS is_deleted,
50+
`test.test_dataset.firestoreArray`(JSON_EXTRACT(data, '$.order')) AS newArray,
51+
JSON_EXTRACT_SCALAR(data, '$.limit') AS newString,
52+
`test.test_dataset.firestoreNumber`(JSON_EXTRACT_SCALAR(data, '$.from')) AS newNumber,
53+
`test.test_dataset.firestoreBoolean`(JSON_EXTRACT_SCALAR(data, '$.select')) AS newBoolean,
54+
`test.test_dataset.firestoreGeopoint`(JSON_EXTRACT(data, '$.where')) AS newGeopoint,
9555
SAFE_CAST(
96-
FIRST_VALUE(JSON_EXTRACT_SCALAR(data, '$.where._latitude')) OVER(
97-
PARTITION BY document_name
98-
ORDER BY
99-
timestamp DESC
100-
) AS NUMERIC
56+
JSON_EXTRACT_SCALAR(data, '$.where._latitude') AS NUMERIC
10157
) AS newGeopoint_latitude,
10258
SAFE_CAST(
103-
FIRST_VALUE(JSON_EXTRACT_SCALAR(data, '$.where._longitude')) OVER(
104-
PARTITION BY document_name
105-
ORDER BY
106-
timestamp DESC
107-
) AS NUMERIC
59+
JSON_EXTRACT_SCALAR(data, '$.where._longitude') AS NUMERIC
10860
) AS newGeopoint_longitude,
109-
`test.test_dataset.firestoreTimestamp`(
110-
FIRST_VALUE(JSON_EXTRACT(data, '$.between')) OVER(
111-
PARTITION BY document_name
112-
ORDER BY
113-
timestamp DESC
114-
)
115-
) AS newTimestamp,
116-
FIRST_VALUE(JSON_EXTRACT_SCALAR(data, '$.like')) OVER(
117-
PARTITION BY document_name
118-
ORDER BY
119-
timestamp DESC
120-
) AS newReference,
121-
FIRST_VALUE(JSON_EXTRACT_SCALAR(data, '$.and.like')) OVER(
122-
PARTITION BY document_name
123-
ORDER BY
124-
timestamp DESC
125-
) AS newMapColumnName_referenceMap,
126-
`test.test_dataset.firestoreArray`(
127-
FIRST_VALUE(JSON_EXTRACT(data, '$.and.order')) OVER(
128-
PARTITION BY document_name
129-
ORDER BY
130-
timestamp DESC
131-
)
132-
) AS newMapColumnName_arrayMap,
133-
FIRST_VALUE(JSON_EXTRACT_SCALAR(data, '$.and.limit')) OVER(
134-
PARTITION BY document_name
135-
ORDER BY
136-
timestamp DESC
137-
) AS newMapColumnName_stringMap,
138-
`test.test_dataset.firestoreNumber`(
139-
FIRST_VALUE(JSON_EXTRACT_SCALAR(data, '$.and.from')) OVER(
140-
PARTITION BY document_name
141-
ORDER BY
142-
timestamp DESC
143-
)
144-
) AS newMapColumnName_numberMap,
145-
`test.test_dataset.firestoreBoolean`(
146-
FIRST_VALUE(JSON_EXTRACT_SCALAR(data, '$.and.select')) OVER(
147-
PARTITION BY document_name
148-
ORDER BY
149-
timestamp DESC
150-
)
151-
) AS newMapColumnName_booleanMap,
152-
`test.test_dataset.firestoreGeopoint`(
153-
FIRST_VALUE(JSON_EXTRACT(data, '$.and.where')) OVER(
154-
PARTITION BY document_name
155-
ORDER BY
156-
timestamp DESC
157-
)
158-
) AS newMapColumnName_geopointMap,
61+
`test.test_dataset.firestoreTimestamp`(JSON_EXTRACT(data, '$.between')) AS newTimestamp,
62+
JSON_EXTRACT_SCALAR(data, '$.like') AS newReference,
63+
JSON_EXTRACT_SCALAR(data, '$.and.like') AS newMapColumnName_referenceMap,
64+
`test.test_dataset.firestoreArray`(JSON_EXTRACT(data, '$.and.order')) AS newMapColumnName_arrayMap,
65+
JSON_EXTRACT_SCALAR(data, '$.and.limit') AS newMapColumnName_stringMap,
66+
`test.test_dataset.firestoreNumber`(JSON_EXTRACT_SCALAR(data, '$.and.from')) AS newMapColumnName_numberMap,
67+
`test.test_dataset.firestoreBoolean`(JSON_EXTRACT_SCALAR(data, '$.and.select')) AS newMapColumnName_booleanMap,
68+
`test.test_dataset.firestoreGeopoint`(JSON_EXTRACT(data, '$.and.where')) AS newMapColumnName_geopointMap,
15969
SAFE_CAST(
160-
FIRST_VALUE(
161-
JSON_EXTRACT_SCALAR(data, '$.and.where._latitude')
162-
) OVER(
163-
PARTITION BY document_name
164-
ORDER BY
165-
timestamp DESC
166-
) AS NUMERIC
70+
JSON_EXTRACT_SCALAR(data, '$.and.where._latitude') AS NUMERIC
16771
) AS newMapColumnName_geopointMap_latitude,
16872
SAFE_CAST(
169-
FIRST_VALUE(
170-
JSON_EXTRACT_SCALAR(data, '$.and.where._longitude')
171-
) OVER(
172-
PARTITION BY document_name
173-
ORDER BY
174-
timestamp DESC
175-
) AS NUMERIC
73+
JSON_EXTRACT_SCALAR(data, '$.and.where._longitude') AS NUMERIC
17674
) AS newMapColumnName_geopointMap_longitude,
177-
`test.test_dataset.firestoreTimestamp`(
178-
FIRST_VALUE(JSON_EXTRACT(data, '$.and.between')) OVER(
179-
PARTITION BY document_name
180-
ORDER BY
181-
timestamp DESC
182-
)
183-
) AS newMapColumnName_timestampMap
75+
`test.test_dataset.firestoreTimestamp`(JSON_EXTRACT(data, '$.and.between')) AS newMapColumnName_timestampMap
18476
FROM
185-
`test.test_dataset.test_table`
77+
`test.test_dataset.test_table` QUALIFY RANK() OVER(
78+
PARTITION BY document_name
79+
ORDER BY
80+
timestamp DESC
81+
) = 1
18682
)
18783
WHERE
18884
NOT is_deleted

firestore-bigquery-export/scripts/gen-schema-view/src/snapshot.ts

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,19 @@ export const buildLatestSchemaSnapshotViewQuery = (
6666
schema: FirestoreSchema,
6767
useNewSqlSyntax = false
6868
): any => {
69-
const firstValue = (selector: string, isArrayType?: boolean) => {
70-
if (isArrayType) return selector;
71-
return `FIRST_VALUE(${selector}) OVER(PARTITION BY document_name ORDER BY timestamp DESC)`;
72-
};
69+
// Use identity transformer - no FIRST_VALUE wrapping needed
70+
// We'll use QUALIFY RANK() = 1 instead to filter to latest row
71+
const identitySelector = (selector: string) => selector;
7372

7473
// We need to pass the dataset id into the parser so that we can call the
7574
// fully qualified json2array persistent user-defined function in the proper
7675
// scope.
77-
const result = processFirestoreSchema(datasetId, "data", schema, firstValue);
76+
const result = processFirestoreSchema(
77+
datasetId,
78+
"data",
79+
schema,
80+
identitySelector
81+
);
7882

7983
const [
8084
schemaFieldExtractors,
@@ -135,6 +139,9 @@ export const buildLatestSchemaSnapshotViewQuery = (
135139
})
136140
.join(" ");
137141

142+
// Use QUALIFY with single RANK() instead of multiple FIRST_VALUE() calls
143+
// This dramatically improves performance for wide schemas (200+ columns)
144+
// by using only ONE window function regardless of field count
138145
let query = `
139146
SELECT
140147
document_name,
@@ -146,14 +153,15 @@ export const buildLatestSchemaSnapshotViewQuery = (
146153
SELECT
147154
document_name,
148155
document_id,
149-
${firstValue(`timestamp`)} AS timestamp,
150-
${firstValue(`operation`)} AS operation,
151-
${firstValue(`operation`)} = "DELETE" AS is_deleted${
152-
fieldValueSelectorClauses.length > 0 ? `,` : ``
153-
}
156+
timestamp,
157+
operation,
158+
operation = "DELETE" AS is_deleted${
159+
fieldValueSelectorClauses.length > 0 ? `,` : ``
160+
}
154161
${fieldValueSelectorClauses}
155162
FROM \`${process.env.PROJECT_ID}.${datasetId}.${rawViewName}\`
156163
${offsetJoins}
164+
QUALIFY RANK() OVER(PARTITION BY document_name ORDER BY timestamp DESC) = 1
157165
)
158166
WHERE NOT is_deleted
159167
`;

0 commit comments

Comments
 (0)