1 # Copyright (c) 2010 Nicira Networks
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
19 escapes = {ord('"'): u"\\\"",
28 escapes[i] = u"\\u%04x" % i
30 def __dump_string(stream, s):
34 escape = escapes.get(x)
41 def to_stream(obj, stream, pretty=False, sort_keys=True):
45 stream.write(u"false")
48 elif type(obj) in (int, long):
49 stream.write(u"%d" % obj)
50 elif type(obj) == float:
51 stream.write("%.15g" % obj)
52 elif type(obj) == unicode:
53 __dump_string(stream, obj)
54 elif type(obj) == str:
55 __dump_string(stream, unicode(obj))
56 elif type(obj) == dict:
59 items = sorted(obj.items())
61 items = obj.iteritems()
63 for key, value in items:
67 __dump_string(stream, unicode(key))
69 to_stream(value, stream, pretty, sort_keys)
71 elif type(obj) in (list, tuple):
78 to_stream(value, stream, pretty, sort_keys)
81 raise Error("can't serialize %s as JSON" % obj)
83 def to_file(obj, name, pretty=False, sort_keys=True):
84 stream = open(name, "w")
86 to_stream(obj, stream, pretty, sort_keys)
90 def to_string(obj, pretty=False, sort_keys=True):
91 output = StringIO.StringIO()
92 to_stream(obj, output, pretty, sort_keys)
97 def from_stream(stream):
98 p = Parser(check_trailer=True)
100 buf = stream.read(4096)
101 if buf == "" or p.feed(buf) != len(buf):
106 stream = open(name, "r")
108 return from_stream(stream)
114 s = unicode(s, 'utf-8')
115 except UnicodeDecodeError, e:
116 seq = ' '.join(["0x%2x" % ord(c) for c in e.object[e.start:e.end]])
117 raise Error("\"%s\" is not a valid UTF-8 string: "
118 "invalid UTF-8 sequence %s" % (s, seq),
119 tag="constraint violation")
120 p = Parser(check_trailer=True)
124 class Parser(object):
125 ## Maximum height of parsing stack. ##
128 def __init__(self, check_trailer=False):
129 self.check_trailer = check_trailer
132 self.lex_state = Parser.__lex_start
135 self.column_number = 0
139 self.parse_state = Parser.__parse_start
141 self.member_name = None
147 def __lex_start_space(self, c):
149 def __lex_start_alpha(self, c):
151 self.lex_state = Parser.__lex_keyword
152 def __lex_start_token(self, c):
153 self.__parser_input(c)
154 def __lex_start_number(self, c):
156 self.lex_state = Parser.__lex_number
157 def __lex_start_string(self, c):
158 self.lex_state = Parser.__lex_string
159 def __lex_start_error(self, c):
160 if ord(c) >= 32 and ord(c) < 128:
161 self.__error("invalid character '%s'" % c)
163 self.__error("invalid character U+%04x" % ord(c))
165 __lex_start_actions = {}
167 __lex_start_actions[c] = __lex_start_space
168 for c in "abcdefghijklmnopqrstuvwxyz":
169 __lex_start_actions[c] = __lex_start_alpha
171 __lex_start_actions[c] = __lex_start_token
172 for c in "-0123456789":
173 __lex_start_actions[c] = __lex_start_number
174 __lex_start_actions['"'] = __lex_start_string
175 def __lex_start(self, c):
176 Parser.__lex_start_actions.get(
177 c, Parser.__lex_start_error)(self, c)
181 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
182 __lex_alpha[c] = True
183 def __lex_finish_keyword(self):
184 if self.buffer == "false":
185 self.__parser_input(False)
186 elif self.buffer == "true":
187 self.__parser_input(True)
188 elif self.buffer == "null":
189 self.__parser_input(None)
191 self.__error("invalid keyword '%s'" % self.buffer)
192 def __lex_keyword(self, c):
193 if c in Parser.__lex_alpha:
197 self.__lex_finish_keyword()
200 __number_re = re.compile("(-)?(0|[1-9][0-9]*)(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
201 def __lex_finish_number(self):
203 m = Parser.__number_re.match(s)
205 sign, integer, fraction, exp = m.groups()
206 if (exp is not None and
207 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
208 self.__error("exponent outside valid range")
211 if fraction is not None and len(fraction.lstrip('0')) == 0:
215 if fraction is not None:
216 sig_string += fraction
217 significand = int(sig_string)
220 if fraction is not None:
221 pow10 -= len(fraction)
226 self.__parser_input(0)
228 elif significand <= 2**63:
229 while pow10 > 0 and significand <= 2*63:
232 while pow10 < 0 and significand % 10 == 0:
236 ((not sign and significand < 2**63) or
237 (sign and significand <= 2**63))):
239 self.__parser_input(-significand)
241 self.__parser_input(significand)
245 if value == float("inf") or value == float("-inf"):
246 self.__error("number outside valid range")
249 # Suppress negative zero.
251 self.__parser_input(value)
252 elif re.match("-?0[0-9]", s):
253 self.__error("leading zeros not allowed")
254 elif re.match("-([^0-9]|$)", s):
255 self.__error("'-' must be followed by digit")
256 elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
257 self.__error("decimal point must be followed by digit")
258 elif re.search("e[-+]?([^0-9]|$)", s):
259 self.__error("exponent must contain at least one digit")
261 self.__error("syntax error in number")
263 def __lex_number(self, c):
264 if c in ".0123456789eE-+":
268 self.__lex_finish_number()
271 __4hex_re = re.compile("[0-9a-fA-F]{4}")
272 def __lex_4hex(self, s):
274 self.__error("quoted string ends within \\u escape")
275 elif not Parser.__4hex_re.match(s):
276 self.__error("malformed \\u escape")
278 self.__error("null bytes not supported in quoted strings")
282 def __is_leading_surrogate(c):
283 """Returns true if 'c' is a Unicode code point for a leading
285 return c >= 0xd800 and c <= 0xdbff
287 def __is_trailing_surrogate(c):
288 """Returns true if 'c' is a Unicode code point for a trailing
290 return c >= 0xdc00 and c <= 0xdfff
292 def __utf16_decode_surrogate_pair(leading, trailing):
293 """Returns the unicode code point corresponding to leading surrogate
294 'leading' and trailing surrogate 'trailing'. The return value will not
295 make any sense if 'leading' or 'trailing' are not in the correct ranges
296 for leading or trailing surrogates."""
297 # Leading surrogate: 110110wwwwxxxxxx
298 # Trailing surrogate: 110111xxxxxxxxxx
299 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
300 w = (leading >> 6) & 0xf
303 x1 = trailing & 0x3ff
304 return (u << 16) | (x0 << 10) | x1
305 __unescape = {'"': u'"',
313 def __lex_finish_string(self):
317 backslash = inp.find('\\')
321 out += inp[:backslash]
322 inp = inp[backslash + 1:]
324 self.__error("quoted string may not end with backslash")
327 replacement = Parser.__unescape.get(inp[0])
328 if replacement is not None:
333 self.__error("bad escape \\%s" % inp[0])
336 c0 = self.__lex_4hex(inp[1:5])
341 if Parser.__is_leading_surrogate(c0):
342 if inp[:2] != u'\\u':
343 self.__error("malformed escaped surrogate pair")
345 c1 = self.__lex_4hex(inp[2:6])
348 if not Parser.__is_trailing_surrogate(c1):
349 self.__error("second half of escaped surrogate pair is "
350 "not trailing surrogate")
352 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
356 out += unichr(code_point)
357 self.__parser_input('string', out)
359 def __lex_string_escape(self, c):
361 self.lex_state = Parser.__lex_string
363 def __lex_string(self, c):
366 self.lex_state = Parser.__lex_string_escape
368 self.__lex_finish_string()
372 self.__error("U+%04X must be escaped in quoted string" % ord(c))
375 def __lex_input(self, c):
376 self.byte_number += 1
378 self.column_number = 0
379 self.line_number += 1
381 self.column_number += 1
383 eat = self.lex_state(self, c)
384 assert eat is True or eat is False
387 def __parse_start(self, token, string):
393 self.__error("syntax error at beginning of input")
394 def __parse_end(self, token, string):
395 self.__error("trailing garbage at end of input")
396 def __parse_object_init(self, token, string):
400 self.__parse_object_name(token, string)
401 def __parse_object_name(self, token, string):
402 if token == 'string':
403 self.member_name = string
404 self.parse_state = Parser.__parse_object_colon
406 self.__error("syntax error parsing object expecting string")
407 def __parse_object_colon(self, token, string):
409 self.parse_state = Parser.__parse_object_value
411 self.__error("syntax error parsing object expecting ':'")
412 def __parse_object_value(self, token, string):
413 self.__parse_value(token, string, Parser.__parse_object_next)
414 def __parse_object_next(self, token, string):
416 self.parse_state = Parser.__parse_object_name
420 self.__error("syntax error expecting '}' or ','")
421 def __parse_array_init(self, token, string):
425 self.__parse_array_value(token, string)
426 def __parse_array_value(self, token, string):
427 self.__parse_value(token, string, Parser.__parse_array_next)
428 def __parse_array_next(self, token, string):
430 self.parse_state = Parser.__parse_array_value
434 self.__error("syntax error expecting ']' or ','")
435 def __parser_input(self, token, string=None):
436 self.lex_state = Parser.__lex_start
438 #old_state = self.parse_state
439 self.parse_state(self, token, string)
440 #print ("token=%s string=%s old_state=%s new_state=%s"
441 # % (token, string, old_state, self.parse_state))
443 def __put_value(self, value):
445 if type(top) == dict:
446 top[self.member_name] = value
450 def __parser_push(self, new_json, next_state):
451 if len(self.stack) < Parser.MAX_HEIGHT:
452 if len(self.stack) > 0:
453 self.__put_value(new_json)
454 self.stack.append(new_json)
455 self.parse_state = next_state
457 self.__error("input exceeds maximum nesting depth %d" %
459 def __push_object(self):
460 self.__parser_push({}, Parser.__parse_object_init)
461 def __push_array(self):
462 self.__parser_push([], Parser.__parse_array_init)
464 def __parser_pop(self):
465 if len(self.stack) == 1:
466 self.parse_state = Parser.__parse_end
467 if not self.check_trailer:
472 if type(top) == list:
473 self.parse_state = Parser.__parse_array_next
475 self.parse_state = Parser.__parse_object_next
477 def __parse_value(self, token, string, next_state):
478 if token in [False, None, True] or type(token) in [int, long, float]:
479 self.__put_value(token)
480 elif token == 'string':
481 self.__put_value(string)
488 self.__error("syntax error expecting value")
490 self.parse_state = next_state
492 def __error(self, message):
493 if self.error is None:
494 self.error = ("line %d, column %d, byte %d: %s"
495 % (self.line_number, self.column_number,
496 self.byte_number, message))
502 if self.done or i >= len(s):
504 if self.__lex_input(s[i]):
511 if self.lex_state == Parser.__lex_start:
513 elif self.lex_state in (Parser.__lex_string,
514 Parser.__lex_string_escape):
515 self.__error("unexpected end of input in quoted string")
517 self.__lex_input(" ")
519 if self.parse_state == Parser.__parse_start:
520 self.__error("empty input stream")
521 elif self.parse_state != Parser.__parse_end:
522 self.__error("unexpected end of input")
524 if self.error == None:
525 assert len(self.stack) == 1
526 return self.stack.pop()