1 # Copyright (c) 2010, 2011 Nicira Networks
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
19 escapes = {ord('"'): u"\\\"",
27 if esc not in escapes:
28 escapes[esc] = u"\\u%04x" % esc
33 class _Serializer(object):
34 def __init__(self, stream, pretty, sort_keys):
37 self.sort_keys = sort_keys
40 def __serialize_string(self, s):
41 self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
43 def __indent_line(self):
45 self.stream.write('\n')
46 self.stream.write(' ' * (SPACES_PER_LEVEL * self.depth))
48 def serialize(self, obj):
50 self.stream.write(u"null")
52 self.stream.write(u"false")
54 self.stream.write(u"true")
55 elif type(obj) in (int, long):
56 self.stream.write(u"%d" % obj)
57 elif type(obj) == float:
58 self.stream.write("%.15g" % obj)
59 elif type(obj) == unicode:
60 self.__serialize_string(obj)
61 elif type(obj) == str:
62 self.__serialize_string(unicode(obj))
63 elif type(obj) == dict:
64 self.stream.write(u"{")
70 items = sorted(obj.items())
72 items = obj.iteritems()
73 for i, (key, value) in enumerate(items):
75 self.stream.write(u",")
77 self.__serialize_string(unicode(key))
78 self.stream.write(u":")
80 self.stream.write(u' ')
83 self.stream.write(u"}")
85 elif type(obj) in (list, tuple):
86 self.stream.write(u"[")
92 for i, value in enumerate(obj):
94 self.stream.write(u",")
99 self.stream.write(u"]")
101 raise Exception("can't serialize %s as JSON" % obj)
104 def to_stream(obj, stream, pretty=False, sort_keys=True):
105 _Serializer(stream, pretty, sort_keys).serialize(obj)
107 def to_file(obj, name, pretty=False, sort_keys=True):
108 stream = open(name, "w")
110 to_stream(obj, stream, pretty, sort_keys)
114 def to_string(obj, pretty=False, sort_keys=True):
115 output = StringIO.StringIO()
116 to_stream(obj, output, pretty, sort_keys)
117 s = output.getvalue()
121 def from_stream(stream):
122 p = Parser(check_trailer=True)
124 buf = stream.read(4096)
125 if buf == "" or p.feed(buf) != len(buf):
130 stream = open(name, "r")
132 return from_stream(stream)
138 s = unicode(s, 'utf-8')
139 except UnicodeDecodeError, e:
140 seq = ' '.join(["0x%2x" % ord(c)
141 for c in e.object[e.start:e.end] if ord(c) >= 0x80])
142 return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
143 p = Parser(check_trailer=True)
147 class Parser(object):
148 ## Maximum height of parsing stack. ##
151 def __init__(self, check_trailer=False):
152 self.check_trailer = check_trailer
155 self.lex_state = Parser.__lex_start
158 self.column_number = 0
162 self.parse_state = Parser.__parse_start
164 self.member_name = None
170 def __lex_start_space(self, c):
172 def __lex_start_alpha(self, c):
174 self.lex_state = Parser.__lex_keyword
175 def __lex_start_token(self, c):
176 self.__parser_input(c)
177 def __lex_start_number(self, c):
179 self.lex_state = Parser.__lex_number
180 def __lex_start_string(self, _):
181 self.lex_state = Parser.__lex_string
182 def __lex_start_error(self, c):
183 if ord(c) >= 32 and ord(c) < 128:
184 self.__error("invalid character '%s'" % c)
186 self.__error("invalid character U+%04x" % ord(c))
188 __lex_start_actions = {}
190 __lex_start_actions[c] = __lex_start_space
191 for c in "abcdefghijklmnopqrstuvwxyz":
192 __lex_start_actions[c] = __lex_start_alpha
194 __lex_start_actions[c] = __lex_start_token
195 for c in "-0123456789":
196 __lex_start_actions[c] = __lex_start_number
197 __lex_start_actions['"'] = __lex_start_string
198 def __lex_start(self, c):
199 Parser.__lex_start_actions.get(
200 c, Parser.__lex_start_error)(self, c)
204 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
205 __lex_alpha[c] = True
206 def __lex_finish_keyword(self):
207 if self.buffer == "false":
208 self.__parser_input(False)
209 elif self.buffer == "true":
210 self.__parser_input(True)
211 elif self.buffer == "null":
212 self.__parser_input(None)
214 self.__error("invalid keyword '%s'" % self.buffer)
215 def __lex_keyword(self, c):
216 if c in Parser.__lex_alpha:
220 self.__lex_finish_keyword()
223 __number_re = re.compile("(-)?(0|[1-9][0-9]*)(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
224 def __lex_finish_number(self):
226 m = Parser.__number_re.match(s)
228 sign, integer, fraction, exp = m.groups()
229 if (exp is not None and
230 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
231 self.__error("exponent outside valid range")
234 if fraction is not None and len(fraction.lstrip('0')) == 0:
238 if fraction is not None:
239 sig_string += fraction
240 significand = int(sig_string)
243 if fraction is not None:
244 pow10 -= len(fraction)
249 self.__parser_input(0)
251 elif significand <= 2**63:
252 while pow10 > 0 and significand <= 2*63:
255 while pow10 < 0 and significand % 10 == 0:
259 ((not sign and significand < 2**63) or
260 (sign and significand <= 2**63))):
262 self.__parser_input(-significand)
264 self.__parser_input(significand)
268 if value == float("inf") or value == float("-inf"):
269 self.__error("number outside valid range")
272 # Suppress negative zero.
274 self.__parser_input(value)
275 elif re.match("-?0[0-9]", s):
276 self.__error("leading zeros not allowed")
277 elif re.match("-([^0-9]|$)", s):
278 self.__error("'-' must be followed by digit")
279 elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
280 self.__error("decimal point must be followed by digit")
281 elif re.search("e[-+]?([^0-9]|$)", s):
282 self.__error("exponent must contain at least one digit")
284 self.__error("syntax error in number")
286 def __lex_number(self, c):
287 if c in ".0123456789eE-+":
291 self.__lex_finish_number()
294 __4hex_re = re.compile("[0-9a-fA-F]{4}")
295 def __lex_4hex(self, s):
297 self.__error("quoted string ends within \\u escape")
298 elif not Parser.__4hex_re.match(s):
299 self.__error("malformed \\u escape")
301 self.__error("null bytes not supported in quoted strings")
305 def __is_leading_surrogate(c):
306 """Returns true if 'c' is a Unicode code point for a leading
308 return c >= 0xd800 and c <= 0xdbff
310 def __is_trailing_surrogate(c):
311 """Returns true if 'c' is a Unicode code point for a trailing
313 return c >= 0xdc00 and c <= 0xdfff
315 def __utf16_decode_surrogate_pair(leading, trailing):
316 """Returns the unicode code point corresponding to leading surrogate
317 'leading' and trailing surrogate 'trailing'. The return value will not
318 make any sense if 'leading' or 'trailing' are not in the correct ranges
319 for leading or trailing surrogates."""
320 # Leading surrogate: 110110wwwwxxxxxx
321 # Trailing surrogate: 110111xxxxxxxxxx
322 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
323 w = (leading >> 6) & 0xf
326 x1 = trailing & 0x3ff
327 return (u << 16) | (x0 << 10) | x1
328 __unescape = {'"': u'"',
336 def __lex_finish_string(self):
340 backslash = inp.find('\\')
344 out += inp[:backslash]
345 inp = inp[backslash + 1:]
347 self.__error("quoted string may not end with backslash")
350 replacement = Parser.__unescape.get(inp[0])
351 if replacement is not None:
356 self.__error("bad escape \\%s" % inp[0])
359 c0 = self.__lex_4hex(inp[1:5])
364 if Parser.__is_leading_surrogate(c0):
365 if inp[:2] != u'\\u':
366 self.__error("malformed escaped surrogate pair")
368 c1 = self.__lex_4hex(inp[2:6])
371 if not Parser.__is_trailing_surrogate(c1):
372 self.__error("second half of escaped surrogate pair is "
373 "not trailing surrogate")
375 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
379 out += unichr(code_point)
380 self.__parser_input('string', out)
382 def __lex_string_escape(self, c):
384 self.lex_state = Parser.__lex_string
386 def __lex_string(self, c):
389 self.lex_state = Parser.__lex_string_escape
391 self.__lex_finish_string()
395 self.__error("U+%04X must be escaped in quoted string" % ord(c))
398 def __lex_input(self, c):
399 self.byte_number += 1
401 self.column_number = 0
402 self.line_number += 1
404 self.column_number += 1
406 eat = self.lex_state(self, c)
407 assert eat is True or eat is False
410 def __parse_start(self, token, unused_string):
416 self.__error("syntax error at beginning of input")
417 def __parse_end(self, unused_token, unused_string):
418 self.__error("trailing garbage at end of input")
419 def __parse_object_init(self, token, string):
423 self.__parse_object_name(token, string)
424 def __parse_object_name(self, token, string):
425 if token == 'string':
426 self.member_name = string
427 self.parse_state = Parser.__parse_object_colon
429 self.__error("syntax error parsing object expecting string")
430 def __parse_object_colon(self, token, unused_string):
432 self.parse_state = Parser.__parse_object_value
434 self.__error("syntax error parsing object expecting ':'")
435 def __parse_object_value(self, token, string):
436 self.__parse_value(token, string, Parser.__parse_object_next)
437 def __parse_object_next(self, token, unused_string):
439 self.parse_state = Parser.__parse_object_name
443 self.__error("syntax error expecting '}' or ','")
444 def __parse_array_init(self, token, string):
448 self.__parse_array_value(token, string)
449 def __parse_array_value(self, token, string):
450 self.__parse_value(token, string, Parser.__parse_array_next)
451 def __parse_array_next(self, token, unused_string):
453 self.parse_state = Parser.__parse_array_value
457 self.__error("syntax error expecting ']' or ','")
458 def __parser_input(self, token, string=None):
459 self.lex_state = Parser.__lex_start
461 self.parse_state(self, token, string)
463 def __put_value(self, value):
465 if type(top) == dict:
466 top[self.member_name] = value
470 def __parser_push(self, new_json, next_state):
471 if len(self.stack) < Parser.MAX_HEIGHT:
472 if len(self.stack) > 0:
473 self.__put_value(new_json)
474 self.stack.append(new_json)
475 self.parse_state = next_state
477 self.__error("input exceeds maximum nesting depth %d" %
479 def __push_object(self):
480 self.__parser_push({}, Parser.__parse_object_init)
481 def __push_array(self):
482 self.__parser_push([], Parser.__parse_array_init)
484 def __parser_pop(self):
485 if len(self.stack) == 1:
486 self.parse_state = Parser.__parse_end
487 if not self.check_trailer:
492 if type(top) == list:
493 self.parse_state = Parser.__parse_array_next
495 self.parse_state = Parser.__parse_object_next
497 def __parse_value(self, token, string, next_state):
498 if token in [False, None, True] or type(token) in [int, long, float]:
499 self.__put_value(token)
500 elif token == 'string':
501 self.__put_value(string)
508 self.__error("syntax error expecting value")
510 self.parse_state = next_state
512 def __error(self, message):
513 if self.error is None:
514 self.error = ("line %d, column %d, byte %d: %s"
515 % (self.line_number, self.column_number,
516 self.byte_number, message))
522 if self.done or i >= len(s):
524 if self.__lex_input(s[i]):
531 if self.lex_state == Parser.__lex_start:
533 elif self.lex_state in (Parser.__lex_string,
534 Parser.__lex_string_escape):
535 self.__error("unexpected end of input in quoted string")
537 self.__lex_input(" ")
539 if self.parse_state == Parser.__parse_start:
540 self.__error("empty input stream")
541 elif self.parse_state != Parser.__parse_end:
542 self.__error("unexpected end of input")
544 if self.error == None:
545 assert len(self.stack) == 1
546 return self.stack.pop()