1 # Copyright (c) 2010, 2011 Nicira Networks
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
19 escapes = {ord('"'): u"\\\"",
28 escapes[i] = u"\\u%04x" % i
30 def __dump_string(stream, s):
34 escape = escapes.get(x)
41 def to_stream(obj, stream, pretty=False, sort_keys=True):
45 stream.write(u"false")
48 elif type(obj) in (int, long):
49 stream.write(u"%d" % obj)
50 elif type(obj) == float:
51 stream.write("%.15g" % obj)
52 elif type(obj) == unicode:
53 __dump_string(stream, obj)
54 elif type(obj) == str:
55 __dump_string(stream, unicode(obj))
56 elif type(obj) == dict:
59 items = sorted(obj.items())
61 items = obj.iteritems()
63 for key, value in items:
67 __dump_string(stream, unicode(key))
69 to_stream(value, stream, pretty, sort_keys)
71 elif type(obj) in (list, tuple):
78 to_stream(value, stream, pretty, sort_keys)
81 raise Error("can't serialize %s as JSON" % obj)
83 def to_file(obj, name, pretty=False, sort_keys=True):
84 stream = open(name, "w")
86 to_stream(obj, stream, pretty, sort_keys)
90 def to_string(obj, pretty=False, sort_keys=True):
91 output = StringIO.StringIO()
92 to_stream(obj, output, pretty, sort_keys)
97 def from_stream(stream):
98 p = Parser(check_trailer=True)
100 buf = stream.read(4096)
101 if buf == "" or p.feed(buf) != len(buf):
106 stream = open(name, "r")
108 return from_stream(stream)
114 s = unicode(s, 'utf-8')
115 except UnicodeDecodeError, e:
116 seq = ' '.join(["0x%2x" % ord(c)
117 for c in e.object[e.start:e.end] if ord(c) >= 0x80])
118 return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
119 p = Parser(check_trailer=True)
123 class Parser(object):
124 ## Maximum height of parsing stack. ##
127 def __init__(self, check_trailer=False):
128 self.check_trailer = check_trailer
131 self.lex_state = Parser.__lex_start
134 self.column_number = 0
138 self.parse_state = Parser.__parse_start
140 self.member_name = None
146 def __lex_start_space(self, c):
148 def __lex_start_alpha(self, c):
150 self.lex_state = Parser.__lex_keyword
151 def __lex_start_token(self, c):
152 self.__parser_input(c)
153 def __lex_start_number(self, c):
155 self.lex_state = Parser.__lex_number
156 def __lex_start_string(self, c):
157 self.lex_state = Parser.__lex_string
158 def __lex_start_error(self, c):
159 if ord(c) >= 32 and ord(c) < 128:
160 self.__error("invalid character '%s'" % c)
162 self.__error("invalid character U+%04x" % ord(c))
164 __lex_start_actions = {}
166 __lex_start_actions[c] = __lex_start_space
167 for c in "abcdefghijklmnopqrstuvwxyz":
168 __lex_start_actions[c] = __lex_start_alpha
170 __lex_start_actions[c] = __lex_start_token
171 for c in "-0123456789":
172 __lex_start_actions[c] = __lex_start_number
173 __lex_start_actions['"'] = __lex_start_string
174 def __lex_start(self, c):
175 Parser.__lex_start_actions.get(
176 c, Parser.__lex_start_error)(self, c)
180 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
181 __lex_alpha[c] = True
182 def __lex_finish_keyword(self):
183 if self.buffer == "false":
184 self.__parser_input(False)
185 elif self.buffer == "true":
186 self.__parser_input(True)
187 elif self.buffer == "null":
188 self.__parser_input(None)
190 self.__error("invalid keyword '%s'" % self.buffer)
191 def __lex_keyword(self, c):
192 if c in Parser.__lex_alpha:
196 self.__lex_finish_keyword()
199 __number_re = re.compile("(-)?(0|[1-9][0-9]*)(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
200 def __lex_finish_number(self):
202 m = Parser.__number_re.match(s)
204 sign, integer, fraction, exp = m.groups()
205 if (exp is not None and
206 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
207 self.__error("exponent outside valid range")
210 if fraction is not None and len(fraction.lstrip('0')) == 0:
214 if fraction is not None:
215 sig_string += fraction
216 significand = int(sig_string)
219 if fraction is not None:
220 pow10 -= len(fraction)
225 self.__parser_input(0)
227 elif significand <= 2**63:
228 while pow10 > 0 and significand <= 2*63:
231 while pow10 < 0 and significand % 10 == 0:
235 ((not sign and significand < 2**63) or
236 (sign and significand <= 2**63))):
238 self.__parser_input(-significand)
240 self.__parser_input(significand)
244 if value == float("inf") or value == float("-inf"):
245 self.__error("number outside valid range")
248 # Suppress negative zero.
250 self.__parser_input(value)
251 elif re.match("-?0[0-9]", s):
252 self.__error("leading zeros not allowed")
253 elif re.match("-([^0-9]|$)", s):
254 self.__error("'-' must be followed by digit")
255 elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
256 self.__error("decimal point must be followed by digit")
257 elif re.search("e[-+]?([^0-9]|$)", s):
258 self.__error("exponent must contain at least one digit")
260 self.__error("syntax error in number")
262 def __lex_number(self, c):
263 if c in ".0123456789eE-+":
267 self.__lex_finish_number()
270 __4hex_re = re.compile("[0-9a-fA-F]{4}")
271 def __lex_4hex(self, s):
273 self.__error("quoted string ends within \\u escape")
274 elif not Parser.__4hex_re.match(s):
275 self.__error("malformed \\u escape")
277 self.__error("null bytes not supported in quoted strings")
281 def __is_leading_surrogate(c):
282 """Returns true if 'c' is a Unicode code point for a leading
284 return c >= 0xd800 and c <= 0xdbff
286 def __is_trailing_surrogate(c):
287 """Returns true if 'c' is a Unicode code point for a trailing
289 return c >= 0xdc00 and c <= 0xdfff
291 def __utf16_decode_surrogate_pair(leading, trailing):
292 """Returns the unicode code point corresponding to leading surrogate
293 'leading' and trailing surrogate 'trailing'. The return value will not
294 make any sense if 'leading' or 'trailing' are not in the correct ranges
295 for leading or trailing surrogates."""
296 # Leading surrogate: 110110wwwwxxxxxx
297 # Trailing surrogate: 110111xxxxxxxxxx
298 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
299 w = (leading >> 6) & 0xf
302 x1 = trailing & 0x3ff
303 return (u << 16) | (x0 << 10) | x1
304 __unescape = {'"': u'"',
312 def __lex_finish_string(self):
316 backslash = inp.find('\\')
320 out += inp[:backslash]
321 inp = inp[backslash + 1:]
323 self.__error("quoted string may not end with backslash")
326 replacement = Parser.__unescape.get(inp[0])
327 if replacement is not None:
332 self.__error("bad escape \\%s" % inp[0])
335 c0 = self.__lex_4hex(inp[1:5])
340 if Parser.__is_leading_surrogate(c0):
341 if inp[:2] != u'\\u':
342 self.__error("malformed escaped surrogate pair")
344 c1 = self.__lex_4hex(inp[2:6])
347 if not Parser.__is_trailing_surrogate(c1):
348 self.__error("second half of escaped surrogate pair is "
349 "not trailing surrogate")
351 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
355 out += unichr(code_point)
356 self.__parser_input('string', out)
358 def __lex_string_escape(self, c):
360 self.lex_state = Parser.__lex_string
362 def __lex_string(self, c):
365 self.lex_state = Parser.__lex_string_escape
367 self.__lex_finish_string()
371 self.__error("U+%04X must be escaped in quoted string" % ord(c))
374 def __lex_input(self, c):
375 self.byte_number += 1
377 self.column_number = 0
378 self.line_number += 1
380 self.column_number += 1
382 eat = self.lex_state(self, c)
383 assert eat is True or eat is False
386 def __parse_start(self, token, string):
392 self.__error("syntax error at beginning of input")
393 def __parse_end(self, token, string):
394 self.__error("trailing garbage at end of input")
395 def __parse_object_init(self, token, string):
399 self.__parse_object_name(token, string)
400 def __parse_object_name(self, token, string):
401 if token == 'string':
402 self.member_name = string
403 self.parse_state = Parser.__parse_object_colon
405 self.__error("syntax error parsing object expecting string")
406 def __parse_object_colon(self, token, string):
408 self.parse_state = Parser.__parse_object_value
410 self.__error("syntax error parsing object expecting ':'")
411 def __parse_object_value(self, token, string):
412 self.__parse_value(token, string, Parser.__parse_object_next)
413 def __parse_object_next(self, token, string):
415 self.parse_state = Parser.__parse_object_name
419 self.__error("syntax error expecting '}' or ','")
420 def __parse_array_init(self, token, string):
424 self.__parse_array_value(token, string)
425 def __parse_array_value(self, token, string):
426 self.__parse_value(token, string, Parser.__parse_array_next)
427 def __parse_array_next(self, token, string):
429 self.parse_state = Parser.__parse_array_value
433 self.__error("syntax error expecting ']' or ','")
434 def __parser_input(self, token, string=None):
435 self.lex_state = Parser.__lex_start
437 #old_state = self.parse_state
438 self.parse_state(self, token, string)
439 #print ("token=%s string=%s old_state=%s new_state=%s"
440 # % (token, string, old_state, self.parse_state))
442 def __put_value(self, value):
444 if type(top) == dict:
445 top[self.member_name] = value
449 def __parser_push(self, new_json, next_state):
450 if len(self.stack) < Parser.MAX_HEIGHT:
451 if len(self.stack) > 0:
452 self.__put_value(new_json)
453 self.stack.append(new_json)
454 self.parse_state = next_state
456 self.__error("input exceeds maximum nesting depth %d" %
458 def __push_object(self):
459 self.__parser_push({}, Parser.__parse_object_init)
460 def __push_array(self):
461 self.__parser_push([], Parser.__parse_array_init)
463 def __parser_pop(self):
464 if len(self.stack) == 1:
465 self.parse_state = Parser.__parse_end
466 if not self.check_trailer:
471 if type(top) == list:
472 self.parse_state = Parser.__parse_array_next
474 self.parse_state = Parser.__parse_object_next
476 def __parse_value(self, token, string, next_state):
477 if token in [False, None, True] or type(token) in [int, long, float]:
478 self.__put_value(token)
479 elif token == 'string':
480 self.__put_value(string)
487 self.__error("syntax error expecting value")
489 self.parse_state = next_state
491 def __error(self, message):
492 if self.error is None:
493 self.error = ("line %d, column %d, byte %d: %s"
494 % (self.line_number, self.column_number,
495 self.byte_number, message))
501 if self.done or i >= len(s):
503 if self.__lex_input(s[i]):
510 if self.lex_state == Parser.__lex_start:
512 elif self.lex_state in (Parser.__lex_string,
513 Parser.__lex_string_escape):
514 self.__error("unexpected end of input in quoted string")
516 self.__lex_input(" ")
518 if self.parse_state == Parser.__parse_start:
519 self.__error("empty input stream")
520 elif self.parse_state != Parser.__parse_end:
521 self.__error("unexpected end of input")
523 if self.error == None:
524 assert len(self.stack) == 1
525 return self.stack.pop()