Skip to content

Commit 235d043

Browse files
committed
custom reader wip
1 parent 03a366d commit 235d043

1 file changed

Lines changed: 118 additions & 53 deletions

File tree

src/main/clojure/clojure/data/json.clj

Lines changed: 118 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,70 @@
1515
(:import (java.io PrintWriter PushbackReader StringWriter
1616
Writer StringReader EOFException)))
1717

18+
;; CUSTOM PUSHBACK READER
19+
20+
(set! *warn-on-reflection* true)
21+
22+
(definterface InternalPBR
23+
(^int readChar [])
24+
(^long readChars [^chars buffer ^long start ^long bufflen])
25+
(^void unreadChar [^int c])
26+
(^void unreadChars [^chars buffer ^int off ^int bufflen])
27+
(^java.io.Reader toReader []))
28+
29+
(deftype ReaderPBR [^PushbackReader rdr]
30+
InternalPBR
31+
(readChar [_]
32+
(.read rdr))
33+
(readChars [_ buffer start bufflen]
34+
(.read rdr ^chars buffer start bufflen))
35+
(unreadChar [_ c]
36+
(.unread rdr c))
37+
(unreadChars [_ buffer start bufflen]
38+
(.unread rdr buffer start bufflen))
39+
(toReader [_]
40+
rdr))
41+
42+
(comment
43+
(compile 'clojure.data.json)
44+
)
45+
46+
(deftype StringPBR [^String s ^:unsynchronized-mutable ^long pos ^long len]
47+
InternalPBR
48+
(readChar [_]
49+
(if (< pos len)
50+
(let [p pos]
51+
(set! pos (unchecked-inc pos))
52+
(let [c (int (.charAt s p))]
53+
c))
54+
(let [i (int -1)]
55+
i)))
56+
(readChars [_ buffer start bufflen]
57+
(let [remaining (- len pos)
58+
n (Math/min remaining bufflen)]
59+
(when (pos? n)
60+
(let [p pos
61+
end (+ p n)]
62+
(set! pos end)
63+
(.getChars ^String s p end ^chars buffer start)))
64+
(if (pos? n) n -1)))
65+
(unreadChar [_ _c]
66+
(set! pos (unchecked-dec pos))
67+
nil)
68+
(unreadChars [_ buffer start bufflen]
69+
(set! pos (unchecked-subtract pos bufflen))
70+
nil)
71+
(toReader [_]
72+
(StringReader. (.subSequence s pos len))))
73+
74+
(defn- pushback-pbr
75+
[^PushbackReader r]
76+
(->ReaderPBR r))
77+
78+
(defn- string-pbr
79+
[^String s]
80+
(->StringPBR s 0 (.length s)))
81+
1882
;;; JSON READER
1983

2084
(set! *warn-on-reflection* true)
@@ -50,23 +114,23 @@
50114
~@(when (odd? (count clauses))
51115
[(last clauses)])))
52116

53-
(defn- read-hex-char [^PushbackReader stream]
117+
(defn- read-hex-char [^InternalPBR stream]
54118
;; Expects to be called with the head of the stream AFTER the
55119
;; initial "\u". Reads the next four characters from the stream.
56-
(let [a (.read stream)
57-
b (.read stream)
58-
c (.read stream)
59-
d (.read stream)]
120+
(let [a (.readChar stream)
121+
b (.readChar stream)
122+
c (.readChar stream)
123+
d (.readChar stream)]
60124
(when (or (neg? a) (neg? b) (neg? c) (neg? d))
61125
(throw (EOFException.
62126
"JSON error (end-of-file inside Unicode character escape)")))
63127
(let [s (str (char a) (char b) (char c) (char d))]
64128
(char (Integer/parseInt s 16)))))
65129

66-
(defn- read-escaped-char [^PushbackReader stream]
130+
(defn- read-escaped-char [^InternalPBR stream]
67131
;; Expects to be called with the head of the stream AFTER the
68132
;; initial backslash.
69-
(let [c (.read stream)]
133+
(let [c (.readChar stream)]
70134
(when (neg? c)
71135
(throw (EOFException. "JSON error (end-of-file inside escaped char)")))
72136
(codepoint-case c
@@ -78,10 +142,10 @@
78142
\t \tab
79143
\u (read-hex-char stream))))
80144

81-
(defn- slow-read-string [^PushbackReader stream ^String already-read]
145+
(defn- slow-read-string [^InternalPBR stream ^String already-read]
82146
(let [buffer (StringBuilder. already-read)]
83147
(loop []
84-
(let [c (.read stream)]
148+
(let [c (.readChar stream)]
85149
(when (neg? c)
86150
(throw (EOFException. "JSON error (end-of-file inside string)")))
87151
(codepoint-case c
@@ -91,11 +155,11 @@
91155
(do (.append buffer (char c))
92156
(recur)))))))
93157

94-
(defn- read-quoted-string [^PushbackReader stream]
158+
(defn- read-quoted-string [^InternalPBR stream]
95159
;; Expects to be called with the head of the stream AFTER the
96160
;; opening quotation mark.
97161
(let [buffer ^chars (char-array 64)
98-
read (.read stream buffer 0 64)
162+
read (.readChars stream buffer 0 64)
99163
end-index (unchecked-dec-int read)]
100164
(when (neg? read)
101165
(throw (EOFException. "JSON error (end-of-file inside string)")))
@@ -104,14 +168,14 @@
104168
(codepoint-case c
105169
\" (let [off (unchecked-inc-int i)
106170
len (unchecked-subtract-int read off)]
107-
(.unread stream buffer off len)
171+
(.unreadChars stream buffer off len)
108172
(String. buffer 0 i))
109173
\\ (let [off i
110174
len (unchecked-subtract-int read off)]
111-
(.unread stream buffer off len)
175+
(.unreadChars stream buffer off len)
112176
(slow-read-string stream (String. buffer 0 i)))
113177
(if (= i end-index)
114-
(do (.unread stream c)
178+
(do (.unreadChar stream c)
115179
(slow-read-string stream (String. buffer 0 i)))
116180
(recur (unchecked-inc-int i))))))))
117181

@@ -127,10 +191,10 @@
127191
(bigdec string)
128192
(Double/valueOf string)))
129193

130-
(defn- read-number [^PushbackReader stream bigdec?]
194+
(defn- read-number [^InternalPBR stream bigdec?]
131195
(let [buffer (StringBuilder.)
132196
decimal? (loop [stage :minus]
133-
(let [c (.read stream)]
197+
(let [c (.readChar stream)]
134198
(case stage
135199
:minus
136200
(codepoint-case c
@@ -168,10 +232,10 @@
168232
(recur :exp-symbol))
169233
;; early exit
170234
:whitespace
171-
(do (.unread stream c)
235+
(do (.unreadChar stream c)
172236
false)
173237
(\, \] \} -1)
174-
(do (.unread stream c)
238+
(do (.unreadChar stream c)
175239
false)
176240
(throw (Exception. "JSON error (invalid number literal)")))
177241
;; previous character is a "0"
@@ -185,10 +249,10 @@
185249
(recur :exp-symbol))
186250
;; early exit
187251
:whitespace
188-
(do (.unread stream c)
252+
(do (.unreadChar stream c)
189253
false)
190254
(\, \] \} -1)
191-
(do (.unread stream c)
255+
(do (.unreadChar stream c)
192256
false)
193257
;; Disallow zero-padded numbers or invalid characters
194258
(throw (Exception. "JSON error (invalid number literal)")))
@@ -210,10 +274,10 @@
210274
(recur :exp-symbol))
211275
;; early exit
212276
:whitespace
213-
(do (.unread stream c)
277+
(do (.unreadChar stream c)
214278
true)
215279
(\, \] \} -1)
216-
(do (.unread stream c)
280+
(do (.unreadChar stream c)
217281
true)
218282
(throw (Exception. "JSON error (invalid number literal)")))
219283
;; previous character is a "e" or "E"
@@ -240,28 +304,28 @@
240304
(do (.append buffer (char c))
241305
(recur :exp-digit))
242306
:whitespace
243-
(do (.unread stream c)
307+
(do (.unreadChar stream c)
244308
true)
245309
(\, \] \} -1)
246-
(do (.unread stream c)
310+
(do (.unreadChar stream c)
247311
true)
248312
(throw (Exception. "JSON error (invalid number literal)"))))))]
249313
(if decimal?
250314
(read-decimal (str buffer) bigdec?)
251315
(read-integer (str buffer)))))
252316

253-
(defn- next-token [^PushbackReader stream]
254-
(loop [c (.read stream)]
317+
(defn- next-token [^InternalPBR stream]
318+
(loop [c (.readChar stream)]
255319
(if (< 32 c)
256320
(int c)
257321
(codepoint-case (int c)
258-
:whitespace (recur (.read stream))
322+
:whitespace (recur (.readChar stream))
259323
-1 -1))))
260324

261325
(defn invalid-array-exception []
262326
(Exception. "JSON error (invalid array)"))
263327

264-
(defn- read-array* [^PushbackReader stream options]
328+
(defn- read-array* [^InternalPBR stream options]
265329
;; Handles all array values after the first.
266330
(loop [result (transient [])]
267331
(let [r (conj! result (-read stream true nil options))]
@@ -270,18 +334,18 @@
270334
\, (recur r)
271335
(throw (invalid-array-exception))))))
272336

273-
(defn- read-array [^PushbackReader stream options]
337+
(defn- read-array [^InternalPBR stream options]
274338
;; Expects to be called with the head of the stream AFTER the
275339
;; opening bracket.
276340
;; Only handles array value.
277341
(let [c (int (next-token stream))]
278342
(codepoint-case c
279343
\] []
280344
\, (throw (invalid-array-exception))
281-
(do (.unread stream c)
345+
(do (.unreadChar stream c)
282346
(read-array* stream options)))))
283347

284-
(defn- read-key [^PushbackReader stream]
348+
(defn- read-key [^InternalPBR stream]
285349
(let [c (int (next-token stream))]
286350
(if (= c (codepoint \"))
287351
(let [key (read-quoted-string stream)]
@@ -292,7 +356,7 @@
292356
nil
293357
(throw (Exception. (str "JSON error (non-string key in object), found `" (char c) "`, expected `\"`")))))))
294358

295-
(defn- read-object [^PushbackReader stream options]
359+
(defn- read-object [^InternalPBR stream options]
296360
;; Expects to be called with the head of the stream AFTER the
297361
;; opening bracket.
298362
(let [key-fn (get options :key-fn)
@@ -317,36 +381,36 @@
317381
(throw (Exception. "JSON error empty entry in object is not allowed"))))))))
318382

319383
(defn- -read
320-
[^PushbackReader stream eof-error? eof-value options]
384+
[^InternalPBR stream eof-error? eof-value options]
321385
(let [c (int (next-token stream))]
322386
(codepoint-case c
323387
;; Read numbers
324388
(\- \0 \1 \2 \3 \4 \5 \6 \7 \8 \9)
325-
(do (.unread stream c)
389+
(do (.unreadChar stream c)
326390
(read-number stream (:bigdec options)))
327391

328392
;; Read strings
329393
\" (read-quoted-string stream)
330394

331395
;; Read null as nil
332-
\n (if (and (= (codepoint \u) (.read stream))
333-
(= (codepoint \l) (.read stream))
334-
(= (codepoint \l) (.read stream)))
396+
\n (if (and (= (codepoint \u) (.readChar stream))
397+
(= (codepoint \l) (.readChar stream))
398+
(= (codepoint \l) (.readChar stream)))
335399
nil
336400
(throw (Exception. "JSON error (expected null)")))
337401

338402
;; Read true
339-
\t (if (and (= (codepoint \r) (.read stream))
340-
(= (codepoint \u) (.read stream))
341-
(= (codepoint \e) (.read stream)))
403+
\t (if (and (= (codepoint \r) (.readChar stream))
404+
(= (codepoint \u) (.readChar stream))
405+
(= (codepoint \e) (.readChar stream)))
342406
true
343407
(throw (Exception. "JSON error (expected true)")))
344408

345409
;; Read false
346-
\f (if (and (= (codepoint \a) (.read stream))
347-
(= (codepoint \l) (.read stream))
348-
(= (codepoint \s) (.read stream))
349-
(= (codepoint \e) (.read stream)))
410+
\f (if (and (= (codepoint \a) (.readChar stream))
411+
(= (codepoint \l) (.readChar stream))
412+
(= (codepoint \s) (.readChar stream))
413+
(= (codepoint \e) (.readChar stream)))
350414
false
351415
(throw (Exception. "JSON error (expected false)")))
352416

@@ -364,16 +428,16 @@
364428
(str "JSON error (unexpected character): " (char c))))))))
365429

366430
(defn- -read1
367-
[^PushbackReader stream eof-error? eof-value options]
431+
[^InternalPBR stream eof-error? eof-value options]
368432
(let [val (-read stream eof-error? eof-value options)]
369433
(if-let [extra-data-fn (:extra-data-fn options)]
370434
(if (or eof-error? (not (identical? eof-value val)))
371-
(let [c (.read stream)]
435+
(let [c (.readChar stream)]
372436
(if (neg? c)
373437
val
374438
(do
375-
(.unread stream c)
376-
(extra-data-fn val stream))))
439+
(.unreadChar stream c)
440+
(extra-data-fn val (.toReader stream)))))
377441
val)
378442
val)))
379443

@@ -386,7 +450,7 @@
386450
(defn on-extra-throw-remaining
387451
"Pass as :extra-data-fn to `read` or `read-str` to throw if data is found
388452
after the first object and return the remaining data in ex-data :remaining."
389-
[val ^java.io.PushbackReader rdr]
453+
[val rdr]
390454
(let [remaining (slurp rdr)]
391455
(throw (ex-info (str "Found extra data after json object: " remaining)
392456
{:val val, :remaining remaining}))))
@@ -443,9 +507,10 @@
443507
[reader & {:as options}]
444508
(let [{:keys [eof-error? eof-value]
445509
:or {eof-error? true}} options
446-
pbr (if (instance? PushbackReader reader)
447-
reader
448-
(PushbackReader. reader 64))]
510+
pbr (pushback-pbr
511+
(if (instance? PushbackReader reader)
512+
reader
513+
(PushbackReader. reader 64)))]
449514
(->> options
450515
(merge default-read-options)
451516
(-read1 pbr eof-error? eof-value))))
@@ -458,7 +523,7 @@
458523
:or {eof-error? true}} options]
459524
(->> options
460525
(merge default-read-options)
461-
(-read1 (PushbackReader. (StringReader. string) 64) eof-error? eof-value))))
526+
(-read1 (string-pbr string) eof-error? eof-value))))
462527

463528
;;; JSON WRITER
464529

0 commit comments

Comments
 (0)