1 # Copyright (c) 2010, 2011 Nicira Networks
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
19 escapes = {ord('"'): u"\\\"",
28 escapes[i] = u"\\u%04x" % i
30 def __dump_string(stream, s):
31 stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
33 def to_stream(obj, stream, pretty=False, sort_keys=True):
37 stream.write(u"false")
40 elif type(obj) in (int, long):
41 stream.write(u"%d" % obj)
42 elif type(obj) == float:
43 stream.write("%.15g" % obj)
44 elif type(obj) == unicode:
45 __dump_string(stream, obj)
46 elif type(obj) == str:
47 __dump_string(stream, unicode(obj))
48 elif type(obj) == dict:
51 items = sorted(obj.items())
53 items = obj.iteritems()
54 for i, (key, value) in enumerate(items):
57 __dump_string(stream, unicode(key))
59 to_stream(value, stream, pretty, sort_keys)
61 elif type(obj) in (list, tuple):
63 for i, value in enumerate(obj):
66 to_stream(value, stream, pretty, sort_keys)
69 raise Exception("can't serialize %s as JSON" % obj)
71 def to_file(obj, name, pretty=False, sort_keys=True):
72 stream = open(name, "w")
74 to_stream(obj, stream, pretty, sort_keys)
78 def to_string(obj, pretty=False, sort_keys=True):
79 output = StringIO.StringIO()
80 to_stream(obj, output, pretty, sort_keys)
85 def from_stream(stream):
86 p = Parser(check_trailer=True)
88 buf = stream.read(4096)
89 if buf == "" or p.feed(buf) != len(buf):
94 stream = open(name, "r")
96 return from_stream(stream)
102 s = unicode(s, 'utf-8')
103 except UnicodeDecodeError, e:
104 seq = ' '.join(["0x%2x" % ord(c)
105 for c in e.object[e.start:e.end] if ord(c) >= 0x80])
106 return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
107 p = Parser(check_trailer=True)
111 class Parser(object):
112 ## Maximum height of parsing stack. ##
115 def __init__(self, check_trailer=False):
116 self.check_trailer = check_trailer
119 self.lex_state = Parser.__lex_start
122 self.column_number = 0
126 self.parse_state = Parser.__parse_start
128 self.member_name = None
134 def __lex_start_space(self, c):
136 def __lex_start_alpha(self, c):
138 self.lex_state = Parser.__lex_keyword
139 def __lex_start_token(self, c):
140 self.__parser_input(c)
141 def __lex_start_number(self, c):
143 self.lex_state = Parser.__lex_number
144 def __lex_start_string(self, c):
145 self.lex_state = Parser.__lex_string
146 def __lex_start_error(self, c):
147 if ord(c) >= 32 and ord(c) < 128:
148 self.__error("invalid character '%s'" % c)
150 self.__error("invalid character U+%04x" % ord(c))
152 __lex_start_actions = {}
154 __lex_start_actions[c] = __lex_start_space
155 for c in "abcdefghijklmnopqrstuvwxyz":
156 __lex_start_actions[c] = __lex_start_alpha
158 __lex_start_actions[c] = __lex_start_token
159 for c in "-0123456789":
160 __lex_start_actions[c] = __lex_start_number
161 __lex_start_actions['"'] = __lex_start_string
162 def __lex_start(self, c):
163 Parser.__lex_start_actions.get(
164 c, Parser.__lex_start_error)(self, c)
168 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
169 __lex_alpha[c] = True
170 def __lex_finish_keyword(self):
171 if self.buffer == "false":
172 self.__parser_input(False)
173 elif self.buffer == "true":
174 self.__parser_input(True)
175 elif self.buffer == "null":
176 self.__parser_input(None)
178 self.__error("invalid keyword '%s'" % self.buffer)
179 def __lex_keyword(self, c):
180 if c in Parser.__lex_alpha:
184 self.__lex_finish_keyword()
187 __number_re = re.compile("(-)?(0|[1-9][0-9]*)(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
188 def __lex_finish_number(self):
190 m = Parser.__number_re.match(s)
192 sign, integer, fraction, exp = m.groups()
193 if (exp is not None and
194 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
195 self.__error("exponent outside valid range")
198 if fraction is not None and len(fraction.lstrip('0')) == 0:
202 if fraction is not None:
203 sig_string += fraction
204 significand = int(sig_string)
207 if fraction is not None:
208 pow10 -= len(fraction)
213 self.__parser_input(0)
215 elif significand <= 2**63:
216 while pow10 > 0 and significand <= 2*63:
219 while pow10 < 0 and significand % 10 == 0:
223 ((not sign and significand < 2**63) or
224 (sign and significand <= 2**63))):
226 self.__parser_input(-significand)
228 self.__parser_input(significand)
232 if value == float("inf") or value == float("-inf"):
233 self.__error("number outside valid range")
236 # Suppress negative zero.
238 self.__parser_input(value)
239 elif re.match("-?0[0-9]", s):
240 self.__error("leading zeros not allowed")
241 elif re.match("-([^0-9]|$)", s):
242 self.__error("'-' must be followed by digit")
243 elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
244 self.__error("decimal point must be followed by digit")
245 elif re.search("e[-+]?([^0-9]|$)", s):
246 self.__error("exponent must contain at least one digit")
248 self.__error("syntax error in number")
250 def __lex_number(self, c):
251 if c in ".0123456789eE-+":
255 self.__lex_finish_number()
258 __4hex_re = re.compile("[0-9a-fA-F]{4}")
259 def __lex_4hex(self, s):
261 self.__error("quoted string ends within \\u escape")
262 elif not Parser.__4hex_re.match(s):
263 self.__error("malformed \\u escape")
265 self.__error("null bytes not supported in quoted strings")
269 def __is_leading_surrogate(c):
270 """Returns true if 'c' is a Unicode code point for a leading
272 return c >= 0xd800 and c <= 0xdbff
274 def __is_trailing_surrogate(c):
275 """Returns true if 'c' is a Unicode code point for a trailing
277 return c >= 0xdc00 and c <= 0xdfff
279 def __utf16_decode_surrogate_pair(leading, trailing):
280 """Returns the unicode code point corresponding to leading surrogate
281 'leading' and trailing surrogate 'trailing'. The return value will not
282 make any sense if 'leading' or 'trailing' are not in the correct ranges
283 for leading or trailing surrogates."""
284 # Leading surrogate: 110110wwwwxxxxxx
285 # Trailing surrogate: 110111xxxxxxxxxx
286 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
287 w = (leading >> 6) & 0xf
290 x1 = trailing & 0x3ff
291 return (u << 16) | (x0 << 10) | x1
292 __unescape = {'"': u'"',
300 def __lex_finish_string(self):
304 backslash = inp.find('\\')
308 out += inp[:backslash]
309 inp = inp[backslash + 1:]
311 self.__error("quoted string may not end with backslash")
314 replacement = Parser.__unescape.get(inp[0])
315 if replacement is not None:
320 self.__error("bad escape \\%s" % inp[0])
323 c0 = self.__lex_4hex(inp[1:5])
328 if Parser.__is_leading_surrogate(c0):
329 if inp[:2] != u'\\u':
330 self.__error("malformed escaped surrogate pair")
332 c1 = self.__lex_4hex(inp[2:6])
335 if not Parser.__is_trailing_surrogate(c1):
336 self.__error("second half of escaped surrogate pair is "
337 "not trailing surrogate")
339 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
343 out += unichr(code_point)
344 self.__parser_input('string', out)
346 def __lex_string_escape(self, c):
348 self.lex_state = Parser.__lex_string
350 def __lex_string(self, c):
353 self.lex_state = Parser.__lex_string_escape
355 self.__lex_finish_string()
359 self.__error("U+%04X must be escaped in quoted string" % ord(c))
362 def __lex_input(self, c):
363 self.byte_number += 1
365 self.column_number = 0
366 self.line_number += 1
368 self.column_number += 1
370 eat = self.lex_state(self, c)
371 assert eat is True or eat is False
374 def __parse_start(self, token, string):
380 self.__error("syntax error at beginning of input")
381 def __parse_end(self, token, string):
382 self.__error("trailing garbage at end of input")
383 def __parse_object_init(self, token, string):
387 self.__parse_object_name(token, string)
388 def __parse_object_name(self, token, string):
389 if token == 'string':
390 self.member_name = string
391 self.parse_state = Parser.__parse_object_colon
393 self.__error("syntax error parsing object expecting string")
394 def __parse_object_colon(self, token, string):
396 self.parse_state = Parser.__parse_object_value
398 self.__error("syntax error parsing object expecting ':'")
399 def __parse_object_value(self, token, string):
400 self.__parse_value(token, string, Parser.__parse_object_next)
401 def __parse_object_next(self, token, string):
403 self.parse_state = Parser.__parse_object_name
407 self.__error("syntax error expecting '}' or ','")
408 def __parse_array_init(self, token, string):
412 self.__parse_array_value(token, string)
413 def __parse_array_value(self, token, string):
414 self.__parse_value(token, string, Parser.__parse_array_next)
415 def __parse_array_next(self, token, string):
417 self.parse_state = Parser.__parse_array_value
421 self.__error("syntax error expecting ']' or ','")
422 def __parser_input(self, token, string=None):
423 self.lex_state = Parser.__lex_start
425 #old_state = self.parse_state
426 self.parse_state(self, token, string)
427 #print ("token=%s string=%s old_state=%s new_state=%s"
428 # % (token, string, old_state, self.parse_state))
430 def __put_value(self, value):
432 if type(top) == dict:
433 top[self.member_name] = value
437 def __parser_push(self, new_json, next_state):
438 if len(self.stack) < Parser.MAX_HEIGHT:
439 if len(self.stack) > 0:
440 self.__put_value(new_json)
441 self.stack.append(new_json)
442 self.parse_state = next_state
444 self.__error("input exceeds maximum nesting depth %d" %
446 def __push_object(self):
447 self.__parser_push({}, Parser.__parse_object_init)
448 def __push_array(self):
449 self.__parser_push([], Parser.__parse_array_init)
451 def __parser_pop(self):
452 if len(self.stack) == 1:
453 self.parse_state = Parser.__parse_end
454 if not self.check_trailer:
459 if type(top) == list:
460 self.parse_state = Parser.__parse_array_next
462 self.parse_state = Parser.__parse_object_next
464 def __parse_value(self, token, string, next_state):
465 if token in [False, None, True] or type(token) in [int, long, float]:
466 self.__put_value(token)
467 elif token == 'string':
468 self.__put_value(string)
475 self.__error("syntax error expecting value")
477 self.parse_state = next_state
479 def __error(self, message):
480 if self.error is None:
481 self.error = ("line %d, column %d, byte %d: %s"
482 % (self.line_number, self.column_number,
483 self.byte_number, message))
489 if self.done or i >= len(s):
491 if self.__lex_input(s[i]):
498 if self.lex_state == Parser.__lex_start:
500 elif self.lex_state in (Parser.__lex_string,
501 Parser.__lex_string_escape):
502 self.__error("unexpected end of input in quoted string")
504 self.__lex_input(" ")
506 if self.parse_state == Parser.__parse_start:
507 self.__error("empty input stream")
508 elif self.parse_state != Parser.__parse_end:
509 self.__error("unexpected end of input")
511 if self.error == None:
512 assert len(self.stack) == 1
513 return self.stack.pop()