1 # Copyright (c) 2010, 2011 Nicira Networks
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
19 escapes = {ord('"'): u"\\\"",
28 escapes[i] = u"\\u%04x" % i
30 def __dump_string(stream, s):
31 stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
33 def to_stream(obj, stream, pretty=False, sort_keys=True):
37 stream.write(u"false")
40 elif type(obj) in (int, long):
41 stream.write(u"%d" % obj)
42 elif type(obj) == float:
43 stream.write("%.15g" % obj)
44 elif type(obj) == unicode:
45 __dump_string(stream, obj)
46 elif type(obj) == str:
47 __dump_string(stream, unicode(obj))
48 elif type(obj) == dict:
51 items = sorted(obj.items())
53 items = obj.iteritems()
55 for key, value in items:
59 __dump_string(stream, unicode(key))
61 to_stream(value, stream, pretty, sort_keys)
63 elif type(obj) in (list, tuple):
70 to_stream(value, stream, pretty, sort_keys)
73 raise Error("can't serialize %s as JSON" % obj)
75 def to_file(obj, name, pretty=False, sort_keys=True):
76 stream = open(name, "w")
78 to_stream(obj, stream, pretty, sort_keys)
82 def to_string(obj, pretty=False, sort_keys=True):
83 output = StringIO.StringIO()
84 to_stream(obj, output, pretty, sort_keys)
89 def from_stream(stream):
90 p = Parser(check_trailer=True)
92 buf = stream.read(4096)
93 if buf == "" or p.feed(buf) != len(buf):
98 stream = open(name, "r")
100 return from_stream(stream)
106 s = unicode(s, 'utf-8')
107 except UnicodeDecodeError, e:
108 seq = ' '.join(["0x%2x" % ord(c)
109 for c in e.object[e.start:e.end] if ord(c) >= 0x80])
110 return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
111 p = Parser(check_trailer=True)
115 class Parser(object):
116 ## Maximum height of parsing stack. ##
119 def __init__(self, check_trailer=False):
120 self.check_trailer = check_trailer
123 self.lex_state = Parser.__lex_start
126 self.column_number = 0
130 self.parse_state = Parser.__parse_start
132 self.member_name = None
138 def __lex_start_space(self, c):
140 def __lex_start_alpha(self, c):
142 self.lex_state = Parser.__lex_keyword
143 def __lex_start_token(self, c):
144 self.__parser_input(c)
145 def __lex_start_number(self, c):
147 self.lex_state = Parser.__lex_number
148 def __lex_start_string(self, c):
149 self.lex_state = Parser.__lex_string
150 def __lex_start_error(self, c):
151 if ord(c) >= 32 and ord(c) < 128:
152 self.__error("invalid character '%s'" % c)
154 self.__error("invalid character U+%04x" % ord(c))
156 __lex_start_actions = {}
158 __lex_start_actions[c] = __lex_start_space
159 for c in "abcdefghijklmnopqrstuvwxyz":
160 __lex_start_actions[c] = __lex_start_alpha
162 __lex_start_actions[c] = __lex_start_token
163 for c in "-0123456789":
164 __lex_start_actions[c] = __lex_start_number
165 __lex_start_actions['"'] = __lex_start_string
166 def __lex_start(self, c):
167 Parser.__lex_start_actions.get(
168 c, Parser.__lex_start_error)(self, c)
172 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
173 __lex_alpha[c] = True
174 def __lex_finish_keyword(self):
175 if self.buffer == "false":
176 self.__parser_input(False)
177 elif self.buffer == "true":
178 self.__parser_input(True)
179 elif self.buffer == "null":
180 self.__parser_input(None)
182 self.__error("invalid keyword '%s'" % self.buffer)
183 def __lex_keyword(self, c):
184 if c in Parser.__lex_alpha:
188 self.__lex_finish_keyword()
191 __number_re = re.compile("(-)?(0|[1-9][0-9]*)(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
192 def __lex_finish_number(self):
194 m = Parser.__number_re.match(s)
196 sign, integer, fraction, exp = m.groups()
197 if (exp is not None and
198 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
199 self.__error("exponent outside valid range")
202 if fraction is not None and len(fraction.lstrip('0')) == 0:
206 if fraction is not None:
207 sig_string += fraction
208 significand = int(sig_string)
211 if fraction is not None:
212 pow10 -= len(fraction)
217 self.__parser_input(0)
219 elif significand <= 2**63:
220 while pow10 > 0 and significand <= 2*63:
223 while pow10 < 0 and significand % 10 == 0:
227 ((not sign and significand < 2**63) or
228 (sign and significand <= 2**63))):
230 self.__parser_input(-significand)
232 self.__parser_input(significand)
236 if value == float("inf") or value == float("-inf"):
237 self.__error("number outside valid range")
240 # Suppress negative zero.
242 self.__parser_input(value)
243 elif re.match("-?0[0-9]", s):
244 self.__error("leading zeros not allowed")
245 elif re.match("-([^0-9]|$)", s):
246 self.__error("'-' must be followed by digit")
247 elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
248 self.__error("decimal point must be followed by digit")
249 elif re.search("e[-+]?([^0-9]|$)", s):
250 self.__error("exponent must contain at least one digit")
252 self.__error("syntax error in number")
254 def __lex_number(self, c):
255 if c in ".0123456789eE-+":
259 self.__lex_finish_number()
262 __4hex_re = re.compile("[0-9a-fA-F]{4}")
263 def __lex_4hex(self, s):
265 self.__error("quoted string ends within \\u escape")
266 elif not Parser.__4hex_re.match(s):
267 self.__error("malformed \\u escape")
269 self.__error("null bytes not supported in quoted strings")
273 def __is_leading_surrogate(c):
274 """Returns true if 'c' is a Unicode code point for a leading
276 return c >= 0xd800 and c <= 0xdbff
278 def __is_trailing_surrogate(c):
279 """Returns true if 'c' is a Unicode code point for a trailing
281 return c >= 0xdc00 and c <= 0xdfff
283 def __utf16_decode_surrogate_pair(leading, trailing):
284 """Returns the unicode code point corresponding to leading surrogate
285 'leading' and trailing surrogate 'trailing'. The return value will not
286 make any sense if 'leading' or 'trailing' are not in the correct ranges
287 for leading or trailing surrogates."""
288 # Leading surrogate: 110110wwwwxxxxxx
289 # Trailing surrogate: 110111xxxxxxxxxx
290 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
291 w = (leading >> 6) & 0xf
294 x1 = trailing & 0x3ff
295 return (u << 16) | (x0 << 10) | x1
296 __unescape = {'"': u'"',
304 def __lex_finish_string(self):
308 backslash = inp.find('\\')
312 out += inp[:backslash]
313 inp = inp[backslash + 1:]
315 self.__error("quoted string may not end with backslash")
318 replacement = Parser.__unescape.get(inp[0])
319 if replacement is not None:
324 self.__error("bad escape \\%s" % inp[0])
327 c0 = self.__lex_4hex(inp[1:5])
332 if Parser.__is_leading_surrogate(c0):
333 if inp[:2] != u'\\u':
334 self.__error("malformed escaped surrogate pair")
336 c1 = self.__lex_4hex(inp[2:6])
339 if not Parser.__is_trailing_surrogate(c1):
340 self.__error("second half of escaped surrogate pair is "
341 "not trailing surrogate")
343 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
347 out += unichr(code_point)
348 self.__parser_input('string', out)
350 def __lex_string_escape(self, c):
352 self.lex_state = Parser.__lex_string
354 def __lex_string(self, c):
357 self.lex_state = Parser.__lex_string_escape
359 self.__lex_finish_string()
363 self.__error("U+%04X must be escaped in quoted string" % ord(c))
366 def __lex_input(self, c):
367 self.byte_number += 1
369 self.column_number = 0
370 self.line_number += 1
372 self.column_number += 1
374 eat = self.lex_state(self, c)
375 assert eat is True or eat is False
378 def __parse_start(self, token, string):
384 self.__error("syntax error at beginning of input")
385 def __parse_end(self, token, string):
386 self.__error("trailing garbage at end of input")
387 def __parse_object_init(self, token, string):
391 self.__parse_object_name(token, string)
392 def __parse_object_name(self, token, string):
393 if token == 'string':
394 self.member_name = string
395 self.parse_state = Parser.__parse_object_colon
397 self.__error("syntax error parsing object expecting string")
398 def __parse_object_colon(self, token, string):
400 self.parse_state = Parser.__parse_object_value
402 self.__error("syntax error parsing object expecting ':'")
403 def __parse_object_value(self, token, string):
404 self.__parse_value(token, string, Parser.__parse_object_next)
405 def __parse_object_next(self, token, string):
407 self.parse_state = Parser.__parse_object_name
411 self.__error("syntax error expecting '}' or ','")
412 def __parse_array_init(self, token, string):
416 self.__parse_array_value(token, string)
417 def __parse_array_value(self, token, string):
418 self.__parse_value(token, string, Parser.__parse_array_next)
419 def __parse_array_next(self, token, string):
421 self.parse_state = Parser.__parse_array_value
425 self.__error("syntax error expecting ']' or ','")
426 def __parser_input(self, token, string=None):
427 self.lex_state = Parser.__lex_start
429 #old_state = self.parse_state
430 self.parse_state(self, token, string)
431 #print ("token=%s string=%s old_state=%s new_state=%s"
432 # % (token, string, old_state, self.parse_state))
434 def __put_value(self, value):
436 if type(top) == dict:
437 top[self.member_name] = value
441 def __parser_push(self, new_json, next_state):
442 if len(self.stack) < Parser.MAX_HEIGHT:
443 if len(self.stack) > 0:
444 self.__put_value(new_json)
445 self.stack.append(new_json)
446 self.parse_state = next_state
448 self.__error("input exceeds maximum nesting depth %d" %
450 def __push_object(self):
451 self.__parser_push({}, Parser.__parse_object_init)
452 def __push_array(self):
453 self.__parser_push([], Parser.__parse_array_init)
455 def __parser_pop(self):
456 if len(self.stack) == 1:
457 self.parse_state = Parser.__parse_end
458 if not self.check_trailer:
463 if type(top) == list:
464 self.parse_state = Parser.__parse_array_next
466 self.parse_state = Parser.__parse_object_next
468 def __parse_value(self, token, string, next_state):
469 if token in [False, None, True] or type(token) in [int, long, float]:
470 self.__put_value(token)
471 elif token == 'string':
472 self.__put_value(string)
479 self.__error("syntax error expecting value")
481 self.parse_state = next_state
483 def __error(self, message):
484 if self.error is None:
485 self.error = ("line %d, column %d, byte %d: %s"
486 % (self.line_number, self.column_number,
487 self.byte_number, message))
493 if self.done or i >= len(s):
495 if self.__lex_input(s[i]):
502 if self.lex_state == Parser.__lex_start:
504 elif self.lex_state in (Parser.__lex_string,
505 Parser.__lex_string_escape):
506 self.__error("unexpected end of input in quoted string")
508 self.__lex_input(" ")
510 if self.parse_state == Parser.__parse_start:
511 self.__error("empty input stream")
512 elif self.parse_state != Parser.__parse_end:
513 self.__error("unexpected end of input")
515 if self.error == None:
516 assert len(self.stack) == 1
517 return self.stack.pop()