Skip to content

Commit ee39bb9

Browse files
committed
refactor: avoid intermediate list in decodeDictV1 rep-level path
1 parent 3aa44b6 commit ee39bb9

1 file changed

Lines changed: 26 additions & 28 deletions

File tree

src/DataFrame/IO/Parquet/Dictionary.hs

Lines changed: 26 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,31 @@ applyDictToColumn dict idxs maxDef defLvls
192192
then DI.fromVector vec -- VB.Vector (Maybe a) → OptionalColumn
193193
else DI.fromVector (V.map fromJust vec) -- VB.Vector a → BoxedColumn/UnboxedColumn
194194

195+
-- | Apply dictionary indices to a rep-level stitch path, avoiding intermediate list materialization.
196+
applyDictWithRepStitch ::
197+
DictVals ->
198+
VU.Vector Int ->
199+
Int ->
200+
[Int] ->
201+
[Int] ->
202+
DI.Column
203+
applyDictWithRepStitch dictVals idxs maxRep maxDef repLvls defLvls =
204+
case dictVals of
205+
DBool ds ->
206+
stitchForRepBool maxRep maxDef repLvls defLvls (VU.toList (VU.map (ds V.!) idxs))
207+
DInt32 ds ->
208+
stitchForRepInt32 maxRep maxDef repLvls defLvls (VU.toList (VU.map (ds V.!) idxs))
209+
DInt64 ds ->
210+
stitchForRepInt64 maxRep maxDef repLvls defLvls (VU.toList (VU.map (ds V.!) idxs))
211+
DInt96 ds ->
212+
stitchForRepUTCTime maxRep maxDef repLvls defLvls (VU.toList (VU.map (ds V.!) idxs))
213+
DFloat ds ->
214+
stitchForRepFloat maxRep maxDef repLvls defLvls (VU.toList (VU.map (ds V.!) idxs))
215+
DDouble ds ->
216+
stitchForRepDouble maxRep maxDef repLvls defLvls (VU.toList (VU.map (ds V.!) idxs))
217+
DText ds ->
218+
stitchForRepText maxRep maxDef repLvls defLvls (VU.toList (VU.map (ds V.!) idxs))
219+
195220
decodeDictV1 ::
196221
Maybe DictVals ->
197222
Int ->
@@ -214,34 +239,7 @@ decodeDictV1 dictValsM maxDef maxRep repLvls defLvls nPresent bytes =
214239
++ ", expected "
215240
++ show nPresent
216241
if maxRep > 0
217-
then do
218-
case dictVals of
219-
DBool ds ->
220-
pure $
221-
stitchForRepBool maxRep maxDef repLvls defLvls (map (ds V.!) (VU.toList idxs))
222-
DInt32 ds ->
223-
pure $
224-
stitchForRepInt32 maxRep maxDef repLvls defLvls (map (ds V.!) (VU.toList idxs))
225-
DInt64 ds ->
226-
pure $
227-
stitchForRepInt64 maxRep maxDef repLvls defLvls (map (ds V.!) (VU.toList idxs))
228-
DInt96 ds ->
229-
pure $
230-
stitchForRepUTCTime
231-
maxRep
232-
maxDef
233-
repLvls
234-
defLvls
235-
(map (ds V.!) (VU.toList idxs))
236-
DFloat ds ->
237-
pure $
238-
stitchForRepFloat maxRep maxDef repLvls defLvls (map (ds V.!) (VU.toList idxs))
239-
DDouble ds ->
240-
pure $
241-
stitchForRepDouble maxRep maxDef repLvls defLvls (map (ds V.!) (VU.toList idxs))
242-
DText ds ->
243-
pure $
244-
stitchForRepText maxRep maxDef repLvls defLvls (map (ds V.!) (VU.toList idxs))
242+
then pure $ applyDictWithRepStitch dictVals idxs maxRep maxDef repLvls defLvls
245243
else case dictVals of
246244
-- Fast path: unboxable types, no nulls — one allocation via VU.map
247245
DInt32 ds | maxDef == 0 -> pure $ DI.fromUnboxedVector (VU.map (ds V.!) idxs)

0 commit comments

Comments
 (0)