97ff1bbdfb2a8f2e522470d9fdf1f99262a09f0d
[openvswitch] / python / ovs / json.py
1 # Copyright (c) 2010, 2011 Nicira Networks
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
6 #
7 #     http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 import re
16 import StringIO
17 import sys
18
19 escapes = {ord('"'): u"\\\"",
20            ord("\\"): u"\\\\",
21            ord("\b"): u"\\b",
22            ord("\f"): u"\\f",
23            ord("\n"): u"\\n",
24            ord("\r"): u"\\r",
25            ord("\t"): u"\\t"}
26 for i in range(32):
27     if i not in escapes:
28         escapes[i] = u"\\u%04x" % i
29
30 def __dump_string(stream, s):
31     stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
32
33 def to_stream(obj, stream, pretty=False, sort_keys=True):
34     if obj is None:
35         stream.write(u"null")
36     elif obj is False:
37         stream.write(u"false")
38     elif obj is True:
39         stream.write(u"true")
40     elif type(obj) in (int, long):
41         stream.write(u"%d" % obj)
42     elif type(obj) == float:
43         stream.write("%.15g" % obj)
44     elif type(obj) == unicode:
45         __dump_string(stream, obj)
46     elif type(obj) == str:
47         __dump_string(stream, unicode(obj))
48     elif type(obj) == dict:
49         stream.write(u"{")
50         if sort_keys:
51             items = sorted(obj.items())
52         else:
53             items = obj.iteritems()
54         i = 0
55         for key, value in items:
56             if i > 0:
57                 stream.write(u",")
58             i += 1
59             __dump_string(stream, unicode(key))
60             stream.write(u":")
61             to_stream(value, stream, pretty, sort_keys)
62         stream.write(u"}")
63     elif type(obj) in (list, tuple):
64         stream.write(u"[")
65         i = 0
66         for value in obj:
67             if i > 0:
68                 stream.write(u",")
69             i += 1
70             to_stream(value, stream, pretty, sort_keys)
71         stream.write(u"]")
72     else:
73         raise Error("can't serialize %s as JSON" % obj)
74
75 def to_file(obj, name, pretty=False, sort_keys=True):
76     stream = open(name, "w")
77     try:
78         to_stream(obj, stream, pretty, sort_keys)
79     finally:
80         stream.close()
81
82 def to_string(obj, pretty=False, sort_keys=True):
83     output = StringIO.StringIO()
84     to_stream(obj, output, pretty, sort_keys)
85     s = output.getvalue()
86     output.close()
87     return s
88
89 def from_stream(stream):
90     p = Parser(check_trailer=True)
91     while True:
92         buf = stream.read(4096)
93         if buf == "" or p.feed(buf) != len(buf):
94             break
95     return p.finish()
96
97 def from_file(name):
98     stream = open(name, "r")
99     try:
100         return from_stream(stream)
101     finally:
102         stream.close()
103
104 def from_string(s):
105     try:
106         s = unicode(s, 'utf-8')
107     except UnicodeDecodeError, e:
108         seq = ' '.join(["0x%2x" % ord(c)
109                         for c in e.object[e.start:e.end] if ord(c) >= 0x80])
110         return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
111     p = Parser(check_trailer=True)
112     p.feed(s)
113     return p.finish()
114
115 class Parser(object):
116     ## Maximum height of parsing stack. ##
117     MAX_HEIGHT = 1000
118
119     def __init__(self, check_trailer=False):
120         self.check_trailer = check_trailer
121
122         # Lexical analysis.
123         self.lex_state = Parser.__lex_start
124         self.buffer = ""
125         self.line_number = 0
126         self.column_number = 0
127         self.byte_number = 0
128         
129         # Parsing.
130         self.parse_state = Parser.__parse_start
131         self.stack = []
132         self.member_name = None
133
134         # Parse status.
135         self.done = False
136         self.error = None
137
138     def __lex_start_space(self, c):
139         pass
140     def __lex_start_alpha(self, c):
141         self.buffer = c
142         self.lex_state = Parser.__lex_keyword
143     def __lex_start_token(self, c):
144         self.__parser_input(c)
145     def __lex_start_number(self, c):
146         self.buffer = c
147         self.lex_state = Parser.__lex_number
148     def __lex_start_string(self, c):
149         self.lex_state = Parser.__lex_string
150     def __lex_start_error(self, c):
151         if ord(c) >= 32 and ord(c) < 128:
152             self.__error("invalid character '%s'" % c)
153         else:
154             self.__error("invalid character U+%04x" % ord(c))
155
156     __lex_start_actions = {}
157     for c in " \t\n\r":
158         __lex_start_actions[c] = __lex_start_space
159     for c in "abcdefghijklmnopqrstuvwxyz":
160         __lex_start_actions[c] = __lex_start_alpha
161     for c in "[{]}:,":
162         __lex_start_actions[c] = __lex_start_token
163     for c in "-0123456789":
164         __lex_start_actions[c] = __lex_start_number
165     __lex_start_actions['"'] = __lex_start_string
166     def __lex_start(self, c):
167         Parser.__lex_start_actions.get(
168             c, Parser.__lex_start_error)(self, c)
169         return True
170
171     __lex_alpha = {}
172     for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
173         __lex_alpha[c] = True
174     def __lex_finish_keyword(self):
175         if self.buffer == "false":
176             self.__parser_input(False)
177         elif self.buffer == "true":
178             self.__parser_input(True)
179         elif self.buffer == "null":
180             self.__parser_input(None)
181         else:
182             self.__error("invalid keyword '%s'" % self.buffer)
183     def __lex_keyword(self, c):
184         if c in Parser.__lex_alpha:
185             self.buffer += c
186             return True
187         else:
188             self.__lex_finish_keyword()
189             return False
190
191     __number_re = re.compile("(-)?(0|[1-9][0-9]*)(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
192     def __lex_finish_number(self):
193         s = self.buffer
194         m = Parser.__number_re.match(s)
195         if m:
196             sign, integer, fraction, exp = m.groups() 
197             if (exp is not None and
198                 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
199                 self.__error("exponent outside valid range")
200                 return
201
202             if fraction is not None and len(fraction.lstrip('0')) == 0:
203                 fraction = None
204
205             sig_string = integer
206             if fraction is not None:
207                 sig_string += fraction
208             significand = int(sig_string)
209
210             pow10 = 0
211             if fraction is not None:
212                 pow10 -= len(fraction)
213             if exp is not None:
214                 pow10 += long(exp)
215
216             if significand == 0:
217                 self.__parser_input(0)
218                 return
219             elif significand <= 2**63:
220                 while pow10 > 0 and significand <= 2*63:
221                     significand *= 10
222                     pow10 -= 1
223                 while pow10 < 0 and significand % 10 == 0:
224                     significand /= 10
225                     pow10 += 1
226                 if (pow10 == 0 and
227                     ((not sign and significand < 2**63) or
228                      (sign and significand <= 2**63))):
229                     if sign:
230                         self.__parser_input(-significand)
231                     else:
232                         self.__parser_input(significand)
233                     return
234
235             value = float(s)
236             if value == float("inf") or value == float("-inf"):
237                 self.__error("number outside valid range")
238                 return
239             if value == 0:
240                 # Suppress negative zero.
241                 value = 0
242             self.__parser_input(value)
243         elif re.match("-?0[0-9]", s):
244             self.__error("leading zeros not allowed")
245         elif re.match("-([^0-9]|$)", s):
246             self.__error("'-' must be followed by digit")
247         elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
248             self.__error("decimal point must be followed by digit")
249         elif re.search("e[-+]?([^0-9]|$)", s):
250             self.__error("exponent must contain at least one digit")
251         else:
252             self.__error("syntax error in number")
253             
254     def __lex_number(self, c):
255         if c in ".0123456789eE-+":
256             self.buffer += c
257             return True
258         else:
259             self.__lex_finish_number()
260             return False
261
262     __4hex_re = re.compile("[0-9a-fA-F]{4}")
263     def __lex_4hex(self, s):
264         if len(s) < 4:
265             self.__error("quoted string ends within \\u escape")
266         elif not Parser.__4hex_re.match(s):
267             self.__error("malformed \\u escape")
268         elif s == "0000":
269             self.__error("null bytes not supported in quoted strings")
270         else:
271             return int(s, 16)
272     @staticmethod
273     def __is_leading_surrogate(c):
274         """Returns true if 'c' is a Unicode code point for a leading
275         surrogate."""
276         return c >= 0xd800 and c <= 0xdbff
277     @staticmethod
278     def __is_trailing_surrogate(c):
279         """Returns true if 'c' is a Unicode code point for a trailing
280         surrogate."""
281         return c >= 0xdc00 and c <= 0xdfff
282     @staticmethod
283     def __utf16_decode_surrogate_pair(leading, trailing):
284         """Returns the unicode code point corresponding to leading surrogate
285         'leading' and trailing surrogate 'trailing'.  The return value will not
286         make any sense if 'leading' or 'trailing' are not in the correct ranges
287         for leading or trailing surrogates."""
288         #  Leading surrogate:         110110wwwwxxxxxx
289         # Trailing surrogate:         110111xxxxxxxxxx
290         #         Code point: 000uuuuuxxxxxxxxxxxxxxxx
291         w = (leading >> 6) & 0xf
292         u = w + 1
293         x0 = leading & 0x3f
294         x1 = trailing & 0x3ff
295         return (u << 16) | (x0 << 10) | x1
296     __unescape = {'"': u'"',
297                   "\\": u"\\",
298                   "/": u"/",
299                   "b": u"\b",
300                   "f": u"\f",
301                   "n": u"\n",
302                   "r": u"\r",
303                   "t": u"\t"}
304     def __lex_finish_string(self):
305         inp = self.buffer
306         out = u""
307         while len(inp):
308             backslash = inp.find('\\')
309             if backslash == -1:
310                 out += inp
311                 break
312             out += inp[:backslash]
313             inp = inp[backslash + 1:]
314             if inp == "":
315                 self.__error("quoted string may not end with backslash")
316                 return
317
318             replacement = Parser.__unescape.get(inp[0])
319             if replacement is not None:
320                 out += replacement
321                 inp = inp[1:]
322                 continue
323             elif inp[0] != u'u':
324                 self.__error("bad escape \\%s" % inp[0])
325                 return
326             
327             c0 = self.__lex_4hex(inp[1:5])
328             if c0 is None:
329                 return
330             inp = inp[5:]
331
332             if Parser.__is_leading_surrogate(c0):
333                 if inp[:2] != u'\\u':
334                     self.__error("malformed escaped surrogate pair")
335                     return
336                 c1 = self.__lex_4hex(inp[2:6])
337                 if c1 is None:
338                     return
339                 if not Parser.__is_trailing_surrogate(c1):
340                     self.__error("second half of escaped surrogate pair is "
341                                  "not trailing surrogate")
342                     return
343                 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
344                 inp = inp[6:]
345             else:
346                 code_point = c0
347             out += unichr(code_point)
348         self.__parser_input('string', out)
349
350     def __lex_string_escape(self, c):
351         self.buffer += c
352         self.lex_state = Parser.__lex_string
353         return True
354     def __lex_string(self, c):
355         if c == '\\':
356             self.buffer += c
357             self.lex_state = Parser.__lex_string_escape
358         elif c == '"':
359             self.__lex_finish_string()
360         elif ord(c) >= 0x20:
361             self.buffer += c
362         else:
363             self.__error("U+%04X must be escaped in quoted string" % ord(c))
364         return True
365
366     def __lex_input(self, c):
367         self.byte_number += 1
368         if c == '\n':
369             self.column_number = 0
370             self.line_number += 1
371         else:
372             self.column_number += 1
373
374         eat = self.lex_state(self, c)
375         assert eat is True or eat is False
376         return eat
377
378     def __parse_start(self, token, string):
379         if token == '{':
380             self.__push_object()
381         elif token == '[':
382             self.__push_array()
383         else:
384             self.__error("syntax error at beginning of input")
385     def __parse_end(self, token, string):
386         self.__error("trailing garbage at end of input")
387     def __parse_object_init(self, token, string):
388         if token == '}':
389             self.__parser_pop()
390         else:
391             self.__parse_object_name(token, string)
392     def __parse_object_name(self, token, string):
393         if token == 'string':
394             self.member_name = string
395             self.parse_state = Parser.__parse_object_colon
396         else:
397             self.__error("syntax error parsing object expecting string")
398     def __parse_object_colon(self, token, string):
399         if token == ":":
400             self.parse_state = Parser.__parse_object_value
401         else:
402             self.__error("syntax error parsing object expecting ':'")
403     def __parse_object_value(self, token, string):
404         self.__parse_value(token, string, Parser.__parse_object_next)
405     def __parse_object_next(self, token, string):
406         if token == ",":
407             self.parse_state = Parser.__parse_object_name
408         elif token == "}":
409             self.__parser_pop()
410         else:
411             self.__error("syntax error expecting '}' or ','")
412     def __parse_array_init(self, token, string):
413         if token == ']':
414             self.__parser_pop()
415         else:
416             self.__parse_array_value(token, string)
417     def __parse_array_value(self, token, string):
418         self.__parse_value(token, string, Parser.__parse_array_next)
419     def __parse_array_next(self, token, string):
420         if token == ",":
421             self.parse_state = Parser.__parse_array_value
422         elif token == "]":
423             self.__parser_pop()
424         else:
425             self.__error("syntax error expecting ']' or ','")
426     def __parser_input(self, token, string=None):
427         self.lex_state = Parser.__lex_start
428         self.buffer = ""
429         #old_state = self.parse_state
430         self.parse_state(self, token, string)
431         #print ("token=%s string=%s old_state=%s new_state=%s"
432         #       % (token, string, old_state, self.parse_state))
433
434     def __put_value(self, value):
435         top = self.stack[-1]
436         if type(top) == dict:
437             top[self.member_name] = value
438         else:
439             top.append(value)
440
441     def __parser_push(self, new_json, next_state):
442         if len(self.stack) < Parser.MAX_HEIGHT:
443             if len(self.stack) > 0:
444                 self.__put_value(new_json)
445             self.stack.append(new_json)
446             self.parse_state = next_state
447         else:
448             self.__error("input exceeds maximum nesting depth %d" %
449                          Parser.MAX_HEIGHT)
450     def __push_object(self):
451         self.__parser_push({}, Parser.__parse_object_init)
452     def __push_array(self):
453         self.__parser_push([], Parser.__parse_array_init)
454
455     def __parser_pop(self):
456         if len(self.stack) == 1:
457             self.parse_state = Parser.__parse_end
458             if not self.check_trailer:
459                 self.done = True
460         else:
461             self.stack.pop()
462             top = self.stack[-1]
463             if type(top) == list:
464                 self.parse_state = Parser.__parse_array_next
465             else:
466                 self.parse_state = Parser.__parse_object_next
467
468     def __parse_value(self, token, string, next_state):
469         if token in [False, None, True] or type(token) in [int, long, float]:
470             self.__put_value(token)
471         elif token == 'string':
472             self.__put_value(string)
473         else:
474             if token == '{':
475                 self.__push_object()
476             elif token == '[':
477                 self.__push_array()
478             else:
479                 self.__error("syntax error expecting value")
480             return
481         self.parse_state = next_state
482
483     def __error(self, message):
484         if self.error is None:
485             self.error = ("line %d, column %d, byte %d: %s"
486                           % (self.line_number, self.column_number,
487                              self.byte_number, message))
488             self.done = True
489
490     def feed(self, s):
491         i = 0
492         while True:
493             if self.done or i >= len(s):
494                 return i
495             if self.__lex_input(s[i]):
496                 i += 1
497
498     def is_done(self):
499         return self.done
500
501     def finish(self):
502         if self.lex_state == Parser.__lex_start:
503             pass
504         elif self.lex_state in (Parser.__lex_string,
505                                 Parser.__lex_string_escape):
506             self.__error("unexpected end of input in quoted string")
507         else:
508             self.__lex_input(" ")
509
510         if self.parse_state == Parser.__parse_start:
511             self.__error("empty input stream")
512         elif self.parse_state != Parser.__parse_end:
513             self.__error("unexpected end of input")
514
515         if self.error == None:
516             assert len(self.stack) == 1
517             return self.stack.pop()
518         else:
519             return self.error