1 """Implementation of JSONDecoder
7 from simplejson.scanner import make_scanner
9 from simplejson._speedups import scanstring as c_scanstring
13 __all__ = ['JSONDecoder']
15 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
17 def _floatconstants():
18 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
19 if sys.byteorder != 'big':
20 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
21 nan, inf = struct.unpack('dd', _BYTES)
24 NaN, PosInf, NegInf = _floatconstants()
27 def linecol(doc, pos):
28 lineno = doc.count('\n', 0, pos) + 1
32 colno = pos - doc.rindex('\n', 0, pos)
36 def errmsg(msg, doc, pos, end=None):
37 # Note that this function is called from _speedups
38 lineno, colno = linecol(doc, pos)
40 #fmt = '{0}: line {1} column {2} (char {3})'
41 #return fmt.format(msg, lineno, colno, pos)
42 fmt = '%s: line %d column %d (char %d)'
43 return fmt % (msg, lineno, colno, pos)
44 endlineno, endcolno = linecol(doc, end)
45 #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
46 #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
47 fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
48 return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
57 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
59 '"': u'"', '\\': u'\\', '/': u'/',
60 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
63 DEFAULT_ENCODING = "utf-8"
65 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
66 """Scan the string s for a JSON string. End is the index of the
67 character in s after the quote that started the JSON string.
68 Unescapes all valid JSON string escape sequences and raises ValueError
69 on attempt to decode an invalid string. If strict is False then literal
70 control characters are allowed in the string.
72 Returns a tuple of the decoded string and the index of the character in s
73 after the end quote."""
75 encoding = DEFAULT_ENCODING
77 _append = chunks.append
83 errmsg("Unterminated string starting at", s, begin))
85 content, terminator = chunk.groups()
86 # Content is contains zero or more unescaped string characters
88 if not isinstance(content, unicode):
89 content = unicode(content, encoding)
91 # Terminator is the end of string, a literal control character,
92 # or a backslash denoting that an escape sequence follows
95 elif terminator != '\\':
97 msg = "Invalid control character %r at" % (terminator,)
98 #msg = "Invalid control character {0!r} at".format(terminator)
99 raise ValueError(errmsg(msg, s, end))
107 errmsg("Unterminated string starting at", s, begin))
108 # If not a unicode escape sequence, must be in the lookup table
113 msg = "Invalid \\escape: " + repr(esc)
114 raise ValueError(errmsg(msg, s, end))
117 # Unicode escape sequence
118 esc = s[end + 1:end + 5]
121 msg = "Invalid \\uXXXX escape"
122 raise ValueError(errmsg(msg, s, end))
124 # Check for surrogate pair on UCS-4 systems
125 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
126 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
127 if not s[end + 5:end + 7] == '\\u':
128 raise ValueError(errmsg(msg, s, end))
129 esc2 = s[end + 7:end + 11]
131 raise ValueError(errmsg(msg, s, end))
133 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
137 # Append the unescaped character
139 return u''.join(chunks), end
142 # Use speedup if available
143 scanstring = c_scanstring or py_scanstring
145 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
146 WHITESPACE_STR = ' \t\n\r'
148 def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
150 # Use a slice to prevent IndexError from being raised, the following
151 # check will raise a more specific ValueError if the string is empty
152 nextchar = s[end:end + 1]
153 # Normally we expect nextchar == '"'
156 end = _w(s, end).end()
157 nextchar = s[end:end + 1]
158 # Trivial empty object
160 return pairs, end + 1
161 elif nextchar != '"':
162 raise ValueError(errmsg("Expecting property name", s, end))
165 key, end = scanstring(s, end, encoding, strict)
167 # To skip some function call overhead we optimize the fast paths where
168 # the JSON key separator is ": " or just ":".
169 if s[end:end + 1] != ':':
170 end = _w(s, end).end()
171 if s[end:end + 1] != ':':
172 raise ValueError(errmsg("Expecting : delimiter", s, end))
180 end = _w(s, end + 1).end()
185 value, end = scan_once(s, end)
186 except StopIteration:
187 raise ValueError(errmsg("Expecting object", s, end))
193 end = _w(s, end + 1).end()
201 elif nextchar != ',':
202 raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
210 end = _w(s, end + 1).end()
217 raise ValueError(errmsg("Expecting property name", s, end - 1))
219 if object_hook is not None:
220 pairs = object_hook(pairs)
223 def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
225 nextchar = s[end:end + 1]
227 end = _w(s, end + 1).end()
228 nextchar = s[end:end + 1]
229 # Look-ahead for trivial empty array
231 return values, end + 1
232 _append = values.append
235 value, end = scan_once(s, end)
236 except StopIteration:
237 raise ValueError(errmsg("Expecting object", s, end))
239 nextchar = s[end:end + 1]
241 end = _w(s, end + 1).end()
242 nextchar = s[end:end + 1]
246 elif nextchar != ',':
247 raise ValueError(errmsg("Expecting , delimiter", s, end))
253 end = _w(s, end + 1).end()
259 class JSONDecoder(object):
260 """Simple JSON <http://json.org> decoder
262 Performs the following translations in decoding by default:
264 +---------------+-------------------+
266 +===============+===================+
268 +---------------+-------------------+
270 +---------------+-------------------+
272 +---------------+-------------------+
273 | number (int) | int, long |
274 +---------------+-------------------+
275 | number (real) | float |
276 +---------------+-------------------+
278 +---------------+-------------------+
280 +---------------+-------------------+
282 +---------------+-------------------+
284 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
285 their corresponding ``float`` values, which is outside the JSON spec.
289 def __init__(self, encoding=None, object_hook=None, parse_float=None,
290 parse_int=None, parse_constant=None, strict=True):
291 """``encoding`` determines the encoding used to interpret any ``str``
292 objects decoded by this instance (utf-8 by default). It has no
293 effect when decoding ``unicode`` objects.
295 Note that currently only encodings that are a superset of ASCII work,
296 strings of other encodings should be passed in as ``unicode``.
298 ``object_hook``, if specified, will be called with the result
299 of every JSON object decoded and its return value will be used in
300 place of the given ``dict``. This can be used to provide custom
301 deserializations (e.g. to support JSON-RPC class hinting).
303 ``parse_float``, if specified, will be called with the string
304 of every JSON float to be decoded. By default this is equivalent to
305 float(num_str). This can be used to use another datatype or parser
306 for JSON floats (e.g. decimal.Decimal).
308 ``parse_int``, if specified, will be called with the string
309 of every JSON int to be decoded. By default this is equivalent to
310 int(num_str). This can be used to use another datatype or parser
311 for JSON integers (e.g. float).
313 ``parse_constant``, if specified, will be called with one of the
314 following strings: -Infinity, Infinity, NaN.
315 This can be used to raise an exception if invalid JSON numbers
319 self.encoding = encoding
320 self.object_hook = object_hook
321 self.parse_float = parse_float or float
322 self.parse_int = parse_int or int
323 self.parse_constant = parse_constant or _CONSTANTS.__getitem__
325 self.parse_object = JSONObject
326 self.parse_array = JSONArray
327 self.parse_string = scanstring
328 self.scan_once = make_scanner(self)
330 def decode(self, s, _w=WHITESPACE.match):
331 """Return the Python representation of ``s`` (a ``str`` or ``unicode``
332 instance containing a JSON document)
335 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
336 end = _w(s, end).end()
338 raise ValueError(errmsg("Extra data", s, end, len(s)))
341 def raw_decode(self, s, idx=0):
342 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
343 with a JSON document) and return a 2-tuple of the Python
344 representation and the index in ``s`` where the document ended.
346 This can be used to decode a JSON document from a string that may
347 have extraneous data at the end.
351 obj, end = self.scan_once(s, idx)
352 except StopIteration:
353 raise ValueError("No JSON object could be decoded")