5e7d8fab9ed8adff45478a284b08379af0f95d7b
[openvswitch] / python / ovs / json.py
1 # Copyright (c) 2010, 2011 Nicira Networks
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
6 #
7 #     http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 import re
16 import StringIO
17 import sys
18
19 escapes = {ord('"'): u"\\\"",
20            ord("\\"): u"\\\\",
21            ord("\b"): u"\\b",
22            ord("\f"): u"\\f",
23            ord("\n"): u"\\n",
24            ord("\r"): u"\\r",
25            ord("\t"): u"\\t"}
26 for i in range(32):
27     if i not in escapes:
28         escapes[i] = u"\\u%04x" % i
29
30 def __dump_string(stream, s):
31     stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
32
33 def to_stream(obj, stream, pretty=False, sort_keys=True):
34     if obj is None:
35         stream.write(u"null")
36     elif obj is False:
37         stream.write(u"false")
38     elif obj is True:
39         stream.write(u"true")
40     elif type(obj) in (int, long):
41         stream.write(u"%d" % obj)
42     elif type(obj) == float:
43         stream.write("%.15g" % obj)
44     elif type(obj) == unicode:
45         __dump_string(stream, obj)
46     elif type(obj) == str:
47         __dump_string(stream, unicode(obj))
48     elif type(obj) == dict:
49         stream.write(u"{")
50         if sort_keys:
51             items = sorted(obj.items())
52         else:
53             items = obj.iteritems()
54         for i, (key, value) in enumerate(items):
55             if i > 0:
56                 stream.write(u",")
57             __dump_string(stream, unicode(key))
58             stream.write(u":")
59             to_stream(value, stream, pretty, sort_keys)
60         stream.write(u"}")
61     elif type(obj) in (list, tuple):
62         stream.write(u"[")
63         for i, value in enumerate(obj):
64             if i > 0:
65                 stream.write(u",")
66             to_stream(value, stream, pretty, sort_keys)
67         stream.write(u"]")
68     else:
69         raise Error("can't serialize %s as JSON" % obj)
70
71 def to_file(obj, name, pretty=False, sort_keys=True):
72     stream = open(name, "w")
73     try:
74         to_stream(obj, stream, pretty, sort_keys)
75     finally:
76         stream.close()
77
78 def to_string(obj, pretty=False, sort_keys=True):
79     output = StringIO.StringIO()
80     to_stream(obj, output, pretty, sort_keys)
81     s = output.getvalue()
82     output.close()
83     return s
84
85 def from_stream(stream):
86     p = Parser(check_trailer=True)
87     while True:
88         buf = stream.read(4096)
89         if buf == "" or p.feed(buf) != len(buf):
90             break
91     return p.finish()
92
93 def from_file(name):
94     stream = open(name, "r")
95     try:
96         return from_stream(stream)
97     finally:
98         stream.close()
99
100 def from_string(s):
101     try:
102         s = unicode(s, 'utf-8')
103     except UnicodeDecodeError, e:
104         seq = ' '.join(["0x%2x" % ord(c)
105                         for c in e.object[e.start:e.end] if ord(c) >= 0x80])
106         return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
107     p = Parser(check_trailer=True)
108     p.feed(s)
109     return p.finish()
110
111 class Parser(object):
112     ## Maximum height of parsing stack. ##
113     MAX_HEIGHT = 1000
114
115     def __init__(self, check_trailer=False):
116         self.check_trailer = check_trailer
117
118         # Lexical analysis.
119         self.lex_state = Parser.__lex_start
120         self.buffer = ""
121         self.line_number = 0
122         self.column_number = 0
123         self.byte_number = 0
124         
125         # Parsing.
126         self.parse_state = Parser.__parse_start
127         self.stack = []
128         self.member_name = None
129
130         # Parse status.
131         self.done = False
132         self.error = None
133
134     def __lex_start_space(self, c):
135         pass
136     def __lex_start_alpha(self, c):
137         self.buffer = c
138         self.lex_state = Parser.__lex_keyword
139     def __lex_start_token(self, c):
140         self.__parser_input(c)
141     def __lex_start_number(self, c):
142         self.buffer = c
143         self.lex_state = Parser.__lex_number
144     def __lex_start_string(self, c):
145         self.lex_state = Parser.__lex_string
146     def __lex_start_error(self, c):
147         if ord(c) >= 32 and ord(c) < 128:
148             self.__error("invalid character '%s'" % c)
149         else:
150             self.__error("invalid character U+%04x" % ord(c))
151
152     __lex_start_actions = {}
153     for c in " \t\n\r":
154         __lex_start_actions[c] = __lex_start_space
155     for c in "abcdefghijklmnopqrstuvwxyz":
156         __lex_start_actions[c] = __lex_start_alpha
157     for c in "[{]}:,":
158         __lex_start_actions[c] = __lex_start_token
159     for c in "-0123456789":
160         __lex_start_actions[c] = __lex_start_number
161     __lex_start_actions['"'] = __lex_start_string
162     def __lex_start(self, c):
163         Parser.__lex_start_actions.get(
164             c, Parser.__lex_start_error)(self, c)
165         return True
166
167     __lex_alpha = {}
168     for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
169         __lex_alpha[c] = True
170     def __lex_finish_keyword(self):
171         if self.buffer == "false":
172             self.__parser_input(False)
173         elif self.buffer == "true":
174             self.__parser_input(True)
175         elif self.buffer == "null":
176             self.__parser_input(None)
177         else:
178             self.__error("invalid keyword '%s'" % self.buffer)
179     def __lex_keyword(self, c):
180         if c in Parser.__lex_alpha:
181             self.buffer += c
182             return True
183         else:
184             self.__lex_finish_keyword()
185             return False
186
187     __number_re = re.compile("(-)?(0|[1-9][0-9]*)(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
188     def __lex_finish_number(self):
189         s = self.buffer
190         m = Parser.__number_re.match(s)
191         if m:
192             sign, integer, fraction, exp = m.groups() 
193             if (exp is not None and
194                 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
195                 self.__error("exponent outside valid range")
196                 return
197
198             if fraction is not None and len(fraction.lstrip('0')) == 0:
199                 fraction = None
200
201             sig_string = integer
202             if fraction is not None:
203                 sig_string += fraction
204             significand = int(sig_string)
205
206             pow10 = 0
207             if fraction is not None:
208                 pow10 -= len(fraction)
209             if exp is not None:
210                 pow10 += long(exp)
211
212             if significand == 0:
213                 self.__parser_input(0)
214                 return
215             elif significand <= 2**63:
216                 while pow10 > 0 and significand <= 2*63:
217                     significand *= 10
218                     pow10 -= 1
219                 while pow10 < 0 and significand % 10 == 0:
220                     significand /= 10
221                     pow10 += 1
222                 if (pow10 == 0 and
223                     ((not sign and significand < 2**63) or
224                      (sign and significand <= 2**63))):
225                     if sign:
226                         self.__parser_input(-significand)
227                     else:
228                         self.__parser_input(significand)
229                     return
230
231             value = float(s)
232             if value == float("inf") or value == float("-inf"):
233                 self.__error("number outside valid range")
234                 return
235             if value == 0:
236                 # Suppress negative zero.
237                 value = 0
238             self.__parser_input(value)
239         elif re.match("-?0[0-9]", s):
240             self.__error("leading zeros not allowed")
241         elif re.match("-([^0-9]|$)", s):
242             self.__error("'-' must be followed by digit")
243         elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
244             self.__error("decimal point must be followed by digit")
245         elif re.search("e[-+]?([^0-9]|$)", s):
246             self.__error("exponent must contain at least one digit")
247         else:
248             self.__error("syntax error in number")
249             
250     def __lex_number(self, c):
251         if c in ".0123456789eE-+":
252             self.buffer += c
253             return True
254         else:
255             self.__lex_finish_number()
256             return False
257
258     __4hex_re = re.compile("[0-9a-fA-F]{4}")
259     def __lex_4hex(self, s):
260         if len(s) < 4:
261             self.__error("quoted string ends within \\u escape")
262         elif not Parser.__4hex_re.match(s):
263             self.__error("malformed \\u escape")
264         elif s == "0000":
265             self.__error("null bytes not supported in quoted strings")
266         else:
267             return int(s, 16)
268     @staticmethod
269     def __is_leading_surrogate(c):
270         """Returns true if 'c' is a Unicode code point for a leading
271         surrogate."""
272         return c >= 0xd800 and c <= 0xdbff
273     @staticmethod
274     def __is_trailing_surrogate(c):
275         """Returns true if 'c' is a Unicode code point for a trailing
276         surrogate."""
277         return c >= 0xdc00 and c <= 0xdfff
278     @staticmethod
279     def __utf16_decode_surrogate_pair(leading, trailing):
280         """Returns the unicode code point corresponding to leading surrogate
281         'leading' and trailing surrogate 'trailing'.  The return value will not
282         make any sense if 'leading' or 'trailing' are not in the correct ranges
283         for leading or trailing surrogates."""
284         #  Leading surrogate:         110110wwwwxxxxxx
285         # Trailing surrogate:         110111xxxxxxxxxx
286         #         Code point: 000uuuuuxxxxxxxxxxxxxxxx
287         w = (leading >> 6) & 0xf
288         u = w + 1
289         x0 = leading & 0x3f
290         x1 = trailing & 0x3ff
291         return (u << 16) | (x0 << 10) | x1
292     __unescape = {'"': u'"',
293                   "\\": u"\\",
294                   "/": u"/",
295                   "b": u"\b",
296                   "f": u"\f",
297                   "n": u"\n",
298                   "r": u"\r",
299                   "t": u"\t"}
300     def __lex_finish_string(self):
301         inp = self.buffer
302         out = u""
303         while len(inp):
304             backslash = inp.find('\\')
305             if backslash == -1:
306                 out += inp
307                 break
308             out += inp[:backslash]
309             inp = inp[backslash + 1:]
310             if inp == "":
311                 self.__error("quoted string may not end with backslash")
312                 return
313
314             replacement = Parser.__unescape.get(inp[0])
315             if replacement is not None:
316                 out += replacement
317                 inp = inp[1:]
318                 continue
319             elif inp[0] != u'u':
320                 self.__error("bad escape \\%s" % inp[0])
321                 return
322             
323             c0 = self.__lex_4hex(inp[1:5])
324             if c0 is None:
325                 return
326             inp = inp[5:]
327
328             if Parser.__is_leading_surrogate(c0):
329                 if inp[:2] != u'\\u':
330                     self.__error("malformed escaped surrogate pair")
331                     return
332                 c1 = self.__lex_4hex(inp[2:6])
333                 if c1 is None:
334                     return
335                 if not Parser.__is_trailing_surrogate(c1):
336                     self.__error("second half of escaped surrogate pair is "
337                                  "not trailing surrogate")
338                     return
339                 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
340                 inp = inp[6:]
341             else:
342                 code_point = c0
343             out += unichr(code_point)
344         self.__parser_input('string', out)
345
346     def __lex_string_escape(self, c):
347         self.buffer += c
348         self.lex_state = Parser.__lex_string
349         return True
350     def __lex_string(self, c):
351         if c == '\\':
352             self.buffer += c
353             self.lex_state = Parser.__lex_string_escape
354         elif c == '"':
355             self.__lex_finish_string()
356         elif ord(c) >= 0x20:
357             self.buffer += c
358         else:
359             self.__error("U+%04X must be escaped in quoted string" % ord(c))
360         return True
361
362     def __lex_input(self, c):
363         self.byte_number += 1
364         if c == '\n':
365             self.column_number = 0
366             self.line_number += 1
367         else:
368             self.column_number += 1
369
370         eat = self.lex_state(self, c)
371         assert eat is True or eat is False
372         return eat
373
374     def __parse_start(self, token, string):
375         if token == '{':
376             self.__push_object()
377         elif token == '[':
378             self.__push_array()
379         else:
380             self.__error("syntax error at beginning of input")
381     def __parse_end(self, token, string):
382         self.__error("trailing garbage at end of input")
383     def __parse_object_init(self, token, string):
384         if token == '}':
385             self.__parser_pop()
386         else:
387             self.__parse_object_name(token, string)
388     def __parse_object_name(self, token, string):
389         if token == 'string':
390             self.member_name = string
391             self.parse_state = Parser.__parse_object_colon
392         else:
393             self.__error("syntax error parsing object expecting string")
394     def __parse_object_colon(self, token, string):
395         if token == ":":
396             self.parse_state = Parser.__parse_object_value
397         else:
398             self.__error("syntax error parsing object expecting ':'")
399     def __parse_object_value(self, token, string):
400         self.__parse_value(token, string, Parser.__parse_object_next)
401     def __parse_object_next(self, token, string):
402         if token == ",":
403             self.parse_state = Parser.__parse_object_name
404         elif token == "}":
405             self.__parser_pop()
406         else:
407             self.__error("syntax error expecting '}' or ','")
408     def __parse_array_init(self, token, string):
409         if token == ']':
410             self.__parser_pop()
411         else:
412             self.__parse_array_value(token, string)
413     def __parse_array_value(self, token, string):
414         self.__parse_value(token, string, Parser.__parse_array_next)
415     def __parse_array_next(self, token, string):
416         if token == ",":
417             self.parse_state = Parser.__parse_array_value
418         elif token == "]":
419             self.__parser_pop()
420         else:
421             self.__error("syntax error expecting ']' or ','")
422     def __parser_input(self, token, string=None):
423         self.lex_state = Parser.__lex_start
424         self.buffer = ""
425         #old_state = self.parse_state
426         self.parse_state(self, token, string)
427         #print ("token=%s string=%s old_state=%s new_state=%s"
428         #       % (token, string, old_state, self.parse_state))
429
430     def __put_value(self, value):
431         top = self.stack[-1]
432         if type(top) == dict:
433             top[self.member_name] = value
434         else:
435             top.append(value)
436
437     def __parser_push(self, new_json, next_state):
438         if len(self.stack) < Parser.MAX_HEIGHT:
439             if len(self.stack) > 0:
440                 self.__put_value(new_json)
441             self.stack.append(new_json)
442             self.parse_state = next_state
443         else:
444             self.__error("input exceeds maximum nesting depth %d" %
445                          Parser.MAX_HEIGHT)
446     def __push_object(self):
447         self.__parser_push({}, Parser.__parse_object_init)
448     def __push_array(self):
449         self.__parser_push([], Parser.__parse_array_init)
450
451     def __parser_pop(self):
452         if len(self.stack) == 1:
453             self.parse_state = Parser.__parse_end
454             if not self.check_trailer:
455                 self.done = True
456         else:
457             self.stack.pop()
458             top = self.stack[-1]
459             if type(top) == list:
460                 self.parse_state = Parser.__parse_array_next
461             else:
462                 self.parse_state = Parser.__parse_object_next
463
464     def __parse_value(self, token, string, next_state):
465         if token in [False, None, True] or type(token) in [int, long, float]:
466             self.__put_value(token)
467         elif token == 'string':
468             self.__put_value(string)
469         else:
470             if token == '{':
471                 self.__push_object()
472             elif token == '[':
473                 self.__push_array()
474             else:
475                 self.__error("syntax error expecting value")
476             return
477         self.parse_state = next_state
478
479     def __error(self, message):
480         if self.error is None:
481             self.error = ("line %d, column %d, byte %d: %s"
482                           % (self.line_number, self.column_number,
483                              self.byte_number, message))
484             self.done = True
485
486     def feed(self, s):
487         i = 0
488         while True:
489             if self.done or i >= len(s):
490                 return i
491             if self.__lex_input(s[i]):
492                 i += 1
493
494     def is_done(self):
495         return self.done
496
497     def finish(self):
498         if self.lex_state == Parser.__lex_start:
499             pass
500         elif self.lex_state in (Parser.__lex_string,
501                                 Parser.__lex_string_escape):
502             self.__error("unexpected end of input in quoted string")
503         else:
504             self.__lex_input(" ")
505
506         if self.parse_state == Parser.__parse_start:
507             self.__error("empty input stream")
508         elif self.parse_state != Parser.__parse_end:
509             self.__error("unexpected end of input")
510
511         if self.error == None:
512             assert len(self.stack) == 1
513             return self.stack.pop()
514         else:
515             return self.error