1 # Copyright (c) 2010, 2011 Nicira Networks
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
19 __pychecker__ = 'no-stringiter'
21 escapes = {ord('"'): u"\\\"",
29 if esc not in escapes:
30 escapes[esc] = u"\\u%04x" % esc
35 class _Serializer(object):
36 def __init__(self, stream, pretty, sort_keys):
39 self.sort_keys = sort_keys
42 def __serialize_string(self, s):
43 self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
45 def __indent_line(self):
47 self.stream.write('\n')
48 self.stream.write(' ' * (SPACES_PER_LEVEL * self.depth))
50 def serialize(self, obj):
52 self.stream.write(u"null")
54 self.stream.write(u"false")
56 self.stream.write(u"true")
57 elif type(obj) in (int, long):
58 self.stream.write(u"%d" % obj)
59 elif type(obj) == float:
60 self.stream.write("%.15g" % obj)
61 elif type(obj) == unicode:
62 self.__serialize_string(obj)
63 elif type(obj) == str:
64 self.__serialize_string(unicode(obj))
65 elif type(obj) == dict:
66 self.stream.write(u"{")
72 items = sorted(obj.items())
74 items = obj.iteritems()
75 for i, (key, value) in enumerate(items):
77 self.stream.write(u",")
79 self.__serialize_string(unicode(key))
80 self.stream.write(u":")
82 self.stream.write(u' ')
85 self.stream.write(u"}")
87 elif type(obj) in (list, tuple):
88 self.stream.write(u"[")
94 for i, value in enumerate(obj):
96 self.stream.write(u",")
101 self.stream.write(u"]")
103 raise Exception("can't serialize %s as JSON" % obj)
106 def to_stream(obj, stream, pretty=False, sort_keys=True):
107 _Serializer(stream, pretty, sort_keys).serialize(obj)
110 def to_file(obj, name, pretty=False, sort_keys=True):
111 stream = open(name, "w")
113 to_stream(obj, stream, pretty, sort_keys)
118 def to_string(obj, pretty=False, sort_keys=True):
119 output = StringIO.StringIO()
120 to_stream(obj, output, pretty, sort_keys)
121 s = output.getvalue()
126 def from_stream(stream):
127 p = Parser(check_trailer=True)
129 buf = stream.read(4096)
130 if buf == "" or p.feed(buf) != len(buf):
136 stream = open(name, "r")
138 return from_stream(stream)
145 s = unicode(s, 'utf-8')
146 except UnicodeDecodeError, e:
147 seq = ' '.join(["0x%2x" % ord(c)
148 for c in e.object[e.start:e.end] if ord(c) >= 0x80])
149 return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
150 p = Parser(check_trailer=True)
155 class Parser(object):
156 ## Maximum height of parsing stack. ##
159 def __init__(self, check_trailer=False):
160 self.check_trailer = check_trailer
163 self.lex_state = Parser.__lex_start
166 self.column_number = 0
170 self.parse_state = Parser.__parse_start
172 self.member_name = None
178 def __lex_start_space(self, c):
181 def __lex_start_alpha(self, c):
183 self.lex_state = Parser.__lex_keyword
185 def __lex_start_token(self, c):
186 self.__parser_input(c)
188 def __lex_start_number(self, c):
190 self.lex_state = Parser.__lex_number
192 def __lex_start_string(self, _):
193 self.lex_state = Parser.__lex_string
195 def __lex_start_error(self, c):
196 if ord(c) >= 32 and ord(c) < 128:
197 self.__error("invalid character '%s'" % c)
199 self.__error("invalid character U+%04x" % ord(c))
201 __lex_start_actions = {}
203 __lex_start_actions[c] = __lex_start_space
204 for c in "abcdefghijklmnopqrstuvwxyz":
205 __lex_start_actions[c] = __lex_start_alpha
207 __lex_start_actions[c] = __lex_start_token
208 for c in "-0123456789":
209 __lex_start_actions[c] = __lex_start_number
210 __lex_start_actions['"'] = __lex_start_string
212 def __lex_start(self, c):
213 Parser.__lex_start_actions.get(
214 c, Parser.__lex_start_error)(self, c)
218 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
219 __lex_alpha[c] = True
221 def __lex_finish_keyword(self):
222 if self.buffer == "false":
223 self.__parser_input(False)
224 elif self.buffer == "true":
225 self.__parser_input(True)
226 elif self.buffer == "null":
227 self.__parser_input(None)
229 self.__error("invalid keyword '%s'" % self.buffer)
231 def __lex_keyword(self, c):
232 if c in Parser.__lex_alpha:
236 self.__lex_finish_keyword()
239 __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
240 "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
242 def __lex_finish_number(self):
244 m = Parser.__number_re.match(s)
246 sign, integer, fraction, exp = m.groups()
247 if (exp is not None and
248 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
249 self.__error("exponent outside valid range")
252 if fraction is not None and len(fraction.lstrip('0')) == 0:
256 if fraction is not None:
257 sig_string += fraction
258 significand = int(sig_string)
261 if fraction is not None:
262 pow10 -= len(fraction)
267 self.__parser_input(0)
269 elif significand <= 2 ** 63:
270 while pow10 > 0 and significand <= 2 ** 63:
273 while pow10 < 0 and significand % 10 == 0:
277 ((not sign and significand < 2 ** 63) or
278 (sign and significand <= 2 ** 63))):
280 self.__parser_input(-significand)
282 self.__parser_input(significand)
286 if value == float("inf") or value == float("-inf"):
287 self.__error("number outside valid range")
290 # Suppress negative zero.
292 self.__parser_input(value)
293 elif re.match("-?0[0-9]", s):
294 self.__error("leading zeros not allowed")
295 elif re.match("-([^0-9]|$)", s):
296 self.__error("'-' must be followed by digit")
297 elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
298 self.__error("decimal point must be followed by digit")
299 elif re.search("e[-+]?([^0-9]|$)", s):
300 self.__error("exponent must contain at least one digit")
302 self.__error("syntax error in number")
304 def __lex_number(self, c):
305 if c in ".0123456789eE-+":
309 self.__lex_finish_number()
312 __4hex_re = re.compile("[0-9a-fA-F]{4}")
314 def __lex_4hex(self, s):
316 self.__error("quoted string ends within \\u escape")
317 elif not Parser.__4hex_re.match(s):
318 self.__error("malformed \\u escape")
320 self.__error("null bytes not supported in quoted strings")
325 def __is_leading_surrogate(c):
326 """Returns true if 'c' is a Unicode code point for a leading
328 return c >= 0xd800 and c <= 0xdbff
331 def __is_trailing_surrogate(c):
332 """Returns true if 'c' is a Unicode code point for a trailing
334 return c >= 0xdc00 and c <= 0xdfff
337 def __utf16_decode_surrogate_pair(leading, trailing):
338 """Returns the unicode code point corresponding to leading surrogate
339 'leading' and trailing surrogate 'trailing'. The return value will not
340 make any sense if 'leading' or 'trailing' are not in the correct ranges
341 for leading or trailing surrogates."""
342 # Leading surrogate: 110110wwwwxxxxxx
343 # Trailing surrogate: 110111xxxxxxxxxx
344 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
345 w = (leading >> 6) & 0xf
348 x1 = trailing & 0x3ff
349 return (u << 16) | (x0 << 10) | x1
350 __unescape = {'"': u'"',
359 def __lex_finish_string(self):
363 backslash = inp.find('\\')
367 out += inp[:backslash]
368 inp = inp[backslash + 1:]
370 self.__error("quoted string may not end with backslash")
373 replacement = Parser.__unescape.get(inp[0])
374 if replacement is not None:
379 self.__error("bad escape \\%s" % inp[0])
382 c0 = self.__lex_4hex(inp[1:5])
387 if Parser.__is_leading_surrogate(c0):
388 if inp[:2] != u'\\u':
389 self.__error("malformed escaped surrogate pair")
391 c1 = self.__lex_4hex(inp[2:6])
394 if not Parser.__is_trailing_surrogate(c1):
395 self.__error("second half of escaped surrogate pair is "
396 "not trailing surrogate")
398 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
402 out += unichr(code_point)
403 self.__parser_input('string', out)
405 def __lex_string_escape(self, c):
407 self.lex_state = Parser.__lex_string
410 def __lex_string(self, c):
413 self.lex_state = Parser.__lex_string_escape
415 self.__lex_finish_string()
419 self.__error("U+%04X must be escaped in quoted string" % ord(c))
422 def __lex_input(self, c):
423 self.byte_number += 1
425 self.column_number = 0
426 self.line_number += 1
428 self.column_number += 1
430 eat = self.lex_state(self, c)
431 assert eat is True or eat is False
434 def __parse_start(self, token, unused_string):
440 self.__error("syntax error at beginning of input")
442 def __parse_end(self, unused_token, unused_string):
443 self.__error("trailing garbage at end of input")
445 def __parse_object_init(self, token, string):
449 self.__parse_object_name(token, string)
451 def __parse_object_name(self, token, string):
452 if token == 'string':
453 self.member_name = string
454 self.parse_state = Parser.__parse_object_colon
456 self.__error("syntax error parsing object expecting string")
458 def __parse_object_colon(self, token, unused_string):
460 self.parse_state = Parser.__parse_object_value
462 self.__error("syntax error parsing object expecting ':'")
464 def __parse_object_value(self, token, string):
465 self.__parse_value(token, string, Parser.__parse_object_next)
467 def __parse_object_next(self, token, unused_string):
469 self.parse_state = Parser.__parse_object_name
473 self.__error("syntax error expecting '}' or ','")
475 def __parse_array_init(self, token, string):
479 self.__parse_array_value(token, string)
481 def __parse_array_value(self, token, string):
482 self.__parse_value(token, string, Parser.__parse_array_next)
484 def __parse_array_next(self, token, unused_string):
486 self.parse_state = Parser.__parse_array_value
490 self.__error("syntax error expecting ']' or ','")
492 def __parser_input(self, token, string=None):
493 self.lex_state = Parser.__lex_start
495 self.parse_state(self, token, string)
497 def __put_value(self, value):
499 if type(top) == dict:
500 top[self.member_name] = value
504 def __parser_push(self, new_json, next_state):
505 if len(self.stack) < Parser.MAX_HEIGHT:
506 if len(self.stack) > 0:
507 self.__put_value(new_json)
508 self.stack.append(new_json)
509 self.parse_state = next_state
511 self.__error("input exceeds maximum nesting depth %d" %
514 def __push_object(self):
515 self.__parser_push({}, Parser.__parse_object_init)
517 def __push_array(self):
518 self.__parser_push([], Parser.__parse_array_init)
520 def __parser_pop(self):
521 if len(self.stack) == 1:
522 self.parse_state = Parser.__parse_end
523 if not self.check_trailer:
528 if type(top) == list:
529 self.parse_state = Parser.__parse_array_next
531 self.parse_state = Parser.__parse_object_next
533 def __parse_value(self, token, string, next_state):
534 if token in [False, None, True] or type(token) in [int, long, float]:
535 self.__put_value(token)
536 elif token == 'string':
537 self.__put_value(string)
544 self.__error("syntax error expecting value")
546 self.parse_state = next_state
548 def __error(self, message):
549 if self.error is None:
550 self.error = ("line %d, column %d, byte %d: %s"
551 % (self.line_number, self.column_number,
552 self.byte_number, message))
558 if self.done or i >= len(s):
560 if self.__lex_input(s[i]):
567 if self.lex_state == Parser.__lex_start:
569 elif self.lex_state in (Parser.__lex_string,
570 Parser.__lex_string_escape):
571 self.__error("unexpected end of input in quoted string")
573 self.__lex_input(" ")
575 if self.parse_state == Parser.__parse_start:
576 self.__error("empty input stream")
577 elif self.parse_state != Parser.__parse_end:
578 self.__error("unexpected end of input")
580 if self.error == None:
581 assert len(self.stack) == 1
582 return self.stack.pop()