1 # Copyright (c) 2010, 2011 Nicira Networks
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
19 escapes = {ord('"'): u"\\\"",
28 escapes[i] = u"\\u%04x" % i
30 def __dump_string(stream, s):
34 escape = escapes.get(x)
41 def to_stream(obj, stream, pretty=False, sort_keys=True):
45 stream.write(u"false")
48 elif type(obj) in (int, long):
49 stream.write(u"%d" % obj)
50 elif type(obj) == float:
51 stream.write("%.15g" % obj)
52 elif type(obj) == unicode:
53 __dump_string(stream, obj)
54 elif type(obj) == str:
55 __dump_string(stream, unicode(obj))
56 elif type(obj) == dict:
59 items = sorted(obj.items())
61 items = obj.iteritems()
63 for key, value in items:
67 __dump_string(stream, unicode(key))
69 to_stream(value, stream, pretty, sort_keys)
71 elif type(obj) in (list, tuple):
78 to_stream(value, stream, pretty, sort_keys)
81 raise Error("can't serialize %s as JSON" % obj)
83 def to_file(obj, name, pretty=False, sort_keys=True):
84 stream = open(name, "w")
86 to_stream(obj, stream, pretty, sort_keys)
90 def to_string(obj, pretty=False, sort_keys=True):
91 output = StringIO.StringIO()
92 to_stream(obj, output, pretty, sort_keys)
97 def from_stream(stream):
98 p = Parser(check_trailer=True)
100 buf = stream.read(4096)
101 if buf == "" or p.feed(buf) != len(buf):
106 stream = open(name, "r")
108 return from_stream(stream)
114 s = unicode(s, 'utf-8')
115 except UnicodeDecodeError, e:
116 seq = ' '.join(["0x%2x" % ord(c) for c in e.object[e.start:e.end]])
117 return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
118 p = Parser(check_trailer=True)
122 class Parser(object):
123 ## Maximum height of parsing stack. ##
126 def __init__(self, check_trailer=False):
127 self.check_trailer = check_trailer
130 self.lex_state = Parser.__lex_start
133 self.column_number = 0
137 self.parse_state = Parser.__parse_start
139 self.member_name = None
145 def __lex_start_space(self, c):
147 def __lex_start_alpha(self, c):
149 self.lex_state = Parser.__lex_keyword
150 def __lex_start_token(self, c):
151 self.__parser_input(c)
152 def __lex_start_number(self, c):
154 self.lex_state = Parser.__lex_number
155 def __lex_start_string(self, c):
156 self.lex_state = Parser.__lex_string
157 def __lex_start_error(self, c):
158 if ord(c) >= 32 and ord(c) < 128:
159 self.__error("invalid character '%s'" % c)
161 self.__error("invalid character U+%04x" % ord(c))
163 __lex_start_actions = {}
165 __lex_start_actions[c] = __lex_start_space
166 for c in "abcdefghijklmnopqrstuvwxyz":
167 __lex_start_actions[c] = __lex_start_alpha
169 __lex_start_actions[c] = __lex_start_token
170 for c in "-0123456789":
171 __lex_start_actions[c] = __lex_start_number
172 __lex_start_actions['"'] = __lex_start_string
173 def __lex_start(self, c):
174 Parser.__lex_start_actions.get(
175 c, Parser.__lex_start_error)(self, c)
179 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
180 __lex_alpha[c] = True
181 def __lex_finish_keyword(self):
182 if self.buffer == "false":
183 self.__parser_input(False)
184 elif self.buffer == "true":
185 self.__parser_input(True)
186 elif self.buffer == "null":
187 self.__parser_input(None)
189 self.__error("invalid keyword '%s'" % self.buffer)
190 def __lex_keyword(self, c):
191 if c in Parser.__lex_alpha:
195 self.__lex_finish_keyword()
198 __number_re = re.compile("(-)?(0|[1-9][0-9]*)(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
199 def __lex_finish_number(self):
201 m = Parser.__number_re.match(s)
203 sign, integer, fraction, exp = m.groups()
204 if (exp is not None and
205 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
206 self.__error("exponent outside valid range")
209 if fraction is not None and len(fraction.lstrip('0')) == 0:
213 if fraction is not None:
214 sig_string += fraction
215 significand = int(sig_string)
218 if fraction is not None:
219 pow10 -= len(fraction)
224 self.__parser_input(0)
226 elif significand <= 2**63:
227 while pow10 > 0 and significand <= 2*63:
230 while pow10 < 0 and significand % 10 == 0:
234 ((not sign and significand < 2**63) or
235 (sign and significand <= 2**63))):
237 self.__parser_input(-significand)
239 self.__parser_input(significand)
243 if value == float("inf") or value == float("-inf"):
244 self.__error("number outside valid range")
247 # Suppress negative zero.
249 self.__parser_input(value)
250 elif re.match("-?0[0-9]", s):
251 self.__error("leading zeros not allowed")
252 elif re.match("-([^0-9]|$)", s):
253 self.__error("'-' must be followed by digit")
254 elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
255 self.__error("decimal point must be followed by digit")
256 elif re.search("e[-+]?([^0-9]|$)", s):
257 self.__error("exponent must contain at least one digit")
259 self.__error("syntax error in number")
261 def __lex_number(self, c):
262 if c in ".0123456789eE-+":
266 self.__lex_finish_number()
269 __4hex_re = re.compile("[0-9a-fA-F]{4}")
270 def __lex_4hex(self, s):
272 self.__error("quoted string ends within \\u escape")
273 elif not Parser.__4hex_re.match(s):
274 self.__error("malformed \\u escape")
276 self.__error("null bytes not supported in quoted strings")
280 def __is_leading_surrogate(c):
281 """Returns true if 'c' is a Unicode code point for a leading
283 return c >= 0xd800 and c <= 0xdbff
285 def __is_trailing_surrogate(c):
286 """Returns true if 'c' is a Unicode code point for a trailing
288 return c >= 0xdc00 and c <= 0xdfff
290 def __utf16_decode_surrogate_pair(leading, trailing):
291 """Returns the unicode code point corresponding to leading surrogate
292 'leading' and trailing surrogate 'trailing'. The return value will not
293 make any sense if 'leading' or 'trailing' are not in the correct ranges
294 for leading or trailing surrogates."""
295 # Leading surrogate: 110110wwwwxxxxxx
296 # Trailing surrogate: 110111xxxxxxxxxx
297 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
298 w = (leading >> 6) & 0xf
301 x1 = trailing & 0x3ff
302 return (u << 16) | (x0 << 10) | x1
303 __unescape = {'"': u'"',
311 def __lex_finish_string(self):
315 backslash = inp.find('\\')
319 out += inp[:backslash]
320 inp = inp[backslash + 1:]
322 self.__error("quoted string may not end with backslash")
325 replacement = Parser.__unescape.get(inp[0])
326 if replacement is not None:
331 self.__error("bad escape \\%s" % inp[0])
334 c0 = self.__lex_4hex(inp[1:5])
339 if Parser.__is_leading_surrogate(c0):
340 if inp[:2] != u'\\u':
341 self.__error("malformed escaped surrogate pair")
343 c1 = self.__lex_4hex(inp[2:6])
346 if not Parser.__is_trailing_surrogate(c1):
347 self.__error("second half of escaped surrogate pair is "
348 "not trailing surrogate")
350 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
354 out += unichr(code_point)
355 self.__parser_input('string', out)
357 def __lex_string_escape(self, c):
359 self.lex_state = Parser.__lex_string
361 def __lex_string(self, c):
364 self.lex_state = Parser.__lex_string_escape
366 self.__lex_finish_string()
370 self.__error("U+%04X must be escaped in quoted string" % ord(c))
373 def __lex_input(self, c):
374 self.byte_number += 1
376 self.column_number = 0
377 self.line_number += 1
379 self.column_number += 1
381 eat = self.lex_state(self, c)
382 assert eat is True or eat is False
385 def __parse_start(self, token, string):
391 self.__error("syntax error at beginning of input")
392 def __parse_end(self, token, string):
393 self.__error("trailing garbage at end of input")
394 def __parse_object_init(self, token, string):
398 self.__parse_object_name(token, string)
399 def __parse_object_name(self, token, string):
400 if token == 'string':
401 self.member_name = string
402 self.parse_state = Parser.__parse_object_colon
404 self.__error("syntax error parsing object expecting string")
405 def __parse_object_colon(self, token, string):
407 self.parse_state = Parser.__parse_object_value
409 self.__error("syntax error parsing object expecting ':'")
410 def __parse_object_value(self, token, string):
411 self.__parse_value(token, string, Parser.__parse_object_next)
412 def __parse_object_next(self, token, string):
414 self.parse_state = Parser.__parse_object_name
418 self.__error("syntax error expecting '}' or ','")
419 def __parse_array_init(self, token, string):
423 self.__parse_array_value(token, string)
424 def __parse_array_value(self, token, string):
425 self.__parse_value(token, string, Parser.__parse_array_next)
426 def __parse_array_next(self, token, string):
428 self.parse_state = Parser.__parse_array_value
432 self.__error("syntax error expecting ']' or ','")
433 def __parser_input(self, token, string=None):
434 self.lex_state = Parser.__lex_start
436 #old_state = self.parse_state
437 self.parse_state(self, token, string)
438 #print ("token=%s string=%s old_state=%s new_state=%s"
439 # % (token, string, old_state, self.parse_state))
441 def __put_value(self, value):
443 if type(top) == dict:
444 top[self.member_name] = value
448 def __parser_push(self, new_json, next_state):
449 if len(self.stack) < Parser.MAX_HEIGHT:
450 if len(self.stack) > 0:
451 self.__put_value(new_json)
452 self.stack.append(new_json)
453 self.parse_state = next_state
455 self.__error("input exceeds maximum nesting depth %d" %
457 def __push_object(self):
458 self.__parser_push({}, Parser.__parse_object_init)
459 def __push_array(self):
460 self.__parser_push([], Parser.__parse_array_init)
462 def __parser_pop(self):
463 if len(self.stack) == 1:
464 self.parse_state = Parser.__parse_end
465 if not self.check_trailer:
470 if type(top) == list:
471 self.parse_state = Parser.__parse_array_next
473 self.parse_state = Parser.__parse_object_next
475 def __parse_value(self, token, string, next_state):
476 if token in [False, None, True] or type(token) in [int, long, float]:
477 self.__put_value(token)
478 elif token == 'string':
479 self.__put_value(string)
486 self.__error("syntax error expecting value")
488 self.parse_state = next_state
490 def __error(self, message):
491 if self.error is None:
492 self.error = ("line %d, column %d, byte %d: %s"
493 % (self.line_number, self.column_number,
494 self.byte_number, message))
500 if self.done or i >= len(s):
502 if self.__lex_input(s[i]):
509 if self.lex_state == Parser.__lex_start:
511 elif self.lex_state in (Parser.__lex_string,
512 Parser.__lex_string_escape):
513 self.__error("unexpected end of input in quoted string")
515 self.__lex_input(" ")
517 if self.parse_state == Parser.__parse_start:
518 self.__error("empty input stream")
519 elif self.parse_state != Parser.__parse_end:
520 self.__error("unexpected end of input")
522 if self.error == None:
523 assert len(self.stack) == 1
524 return self.stack.pop()