2 #include "structmember.h"
3 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
6 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7 typedef int Py_ssize_t;
8 #define PY_SSIZE_T_MAX INT_MAX
9 #define PY_SSIZE_T_MIN INT_MIN
10 #define PyInt_FromSsize_t PyInt_FromLong
11 #define PyInt_AsSsize_t PyInt_AsLong
14 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
18 #define UNUSED __attribute__((__unused__))
23 #define DEFAULT_ENCODING "utf-8"
25 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
30 static PyTypeObject PyScannerType;
31 static PyTypeObject PyEncoderType;
33 typedef struct _PyScannerObject {
37 PyObject *object_hook;
38 PyObject *parse_float;
40 PyObject *parse_constant;
43 static PyMemberDef scanner_members[] = {
44 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
45 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
46 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
47 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
48 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
49 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
53 typedef struct _PyEncoderObject {
59 PyObject *key_separator;
60 PyObject *item_separator;
67 static PyMemberDef encoder_members[] = {
68 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
69 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
70 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
71 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
72 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
73 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
74 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
75 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
80 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
82 ascii_escape_unicode(PyObject *pystr);
84 ascii_escape_str(PyObject *pystr);
86 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
87 void init_speedups(void);
89 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
91 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
93 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
95 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
97 scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
99 scanner_dealloc(PyObject *self);
101 scanner_clear(PyObject *self);
103 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
105 encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
107 encoder_dealloc(PyObject *self);
109 encoder_clear(PyObject *self);
111 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
113 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
115 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
117 _encoded_const(PyObject *const);
119 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
121 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
123 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
125 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
127 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
129 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
130 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
132 #define MIN_EXPANSION 6
133 #ifdef Py_UNICODE_WIDE
134 #define MAX_EXPANSION (2 * MIN_EXPANSION)
136 #define MAX_EXPANSION MIN_EXPANSION
140 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
142 /* PyObject to Py_ssize_t converter */
143 *size_ptr = PyInt_AsSsize_t(o);
144 if (*size_ptr == -1 && PyErr_Occurred());
150 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
152 /* Py_ssize_t to PyObject converter */
153 return PyInt_FromSsize_t(*size_ptr);
157 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
159 /* Escape unicode code point c to ASCII escape sequences
160 in char *output. output must have at least 12 bytes unused to
161 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
162 output[chars++] = '\\';
164 case '\\': output[chars++] = (char)c; break;
165 case '"': output[chars++] = (char)c; break;
166 case '\b': output[chars++] = 'b'; break;
167 case '\f': output[chars++] = 'f'; break;
168 case '\n': output[chars++] = 'n'; break;
169 case '\r': output[chars++] = 'r'; break;
170 case '\t': output[chars++] = 't'; break;
172 #ifdef Py_UNICODE_WIDE
174 /* UTF-16 surrogate pair */
175 Py_UNICODE v = c - 0x10000;
176 c = 0xd800 | ((v >> 10) & 0x3ff);
177 output[chars++] = 'u';
178 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
179 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
180 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
182 c = 0xdc00 | (v & 0x3ff);
183 output[chars++] = '\\';
186 output[chars++] = 'u';
187 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
188 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
189 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
190 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
196 ascii_escape_unicode(PyObject *pystr)
198 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
200 Py_ssize_t input_chars;
201 Py_ssize_t output_size;
202 Py_ssize_t max_output_size;
206 Py_UNICODE *input_unicode;
208 input_chars = PyUnicode_GET_SIZE(pystr);
209 input_unicode = PyUnicode_AS_UNICODE(pystr);
211 /* One char input can be up to 6 chars output, estimate 4 of these */
212 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
213 max_output_size = 2 + (input_chars * MAX_EXPANSION);
214 rval = PyString_FromStringAndSize(NULL, output_size);
218 output = PyString_AS_STRING(rval);
220 output[chars++] = '"';
221 for (i = 0; i < input_chars; i++) {
222 Py_UNICODE c = input_unicode[i];
224 output[chars++] = (char)c;
227 chars = ascii_escape_char(c, output, chars);
229 if (output_size - chars < (1 + MAX_EXPANSION)) {
230 /* There's more than four, so let's resize by a lot */
231 Py_ssize_t new_output_size = output_size * 2;
232 /* This is an upper bound */
233 if (new_output_size > max_output_size) {
234 new_output_size = max_output_size;
236 /* Make sure that the output size changed before resizing */
237 if (new_output_size != output_size) {
238 output_size = new_output_size;
239 if (_PyString_Resize(&rval, output_size) == -1) {
242 output = PyString_AS_STRING(rval);
246 output[chars++] = '"';
247 if (_PyString_Resize(&rval, chars) == -1) {
254 ascii_escape_str(PyObject *pystr)
256 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
258 Py_ssize_t input_chars;
259 Py_ssize_t output_size;
265 input_chars = PyString_GET_SIZE(pystr);
266 input_str = PyString_AS_STRING(pystr);
268 /* Fast path for a string that's already ASCII */
269 for (i = 0; i < input_chars; i++) {
270 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
272 /* If we have to escape something, scan the string for unicode */
274 for (j = i; j < input_chars; j++) {
275 c = (Py_UNICODE)(unsigned char)input_str[j];
277 /* We hit a non-ASCII character, bail to unicode mode */
279 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
283 rval = ascii_escape_unicode(uni);
292 if (i == input_chars) {
293 /* Input is already ASCII */
294 output_size = 2 + input_chars;
297 /* One char input can be up to 6 chars output, estimate 4 of these */
298 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
300 rval = PyString_FromStringAndSize(NULL, output_size);
304 output = PyString_AS_STRING(rval);
307 /* We know that everything up to i is ASCII already */
309 memcpy(&output[1], input_str, i);
311 for (; i < input_chars; i++) {
312 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
314 output[chars++] = (char)c;
317 chars = ascii_escape_char(c, output, chars);
319 /* An ASCII char can't possibly expand to a surrogate! */
320 if (output_size - chars < (1 + MIN_EXPANSION)) {
321 /* There's more than four, so let's resize by a lot */
323 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
324 output_size = 2 + (input_chars * MIN_EXPANSION);
326 if (_PyString_Resize(&rval, output_size) == -1) {
329 output = PyString_AS_STRING(rval);
332 output[chars++] = '"';
333 if (_PyString_Resize(&rval, chars) == -1) {
340 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
342 /* Use the Python function simplejson.decoder.errmsg to raise a nice
343 looking ValueError exception */
344 static PyObject *errmsg_fn = NULL;
346 if (errmsg_fn == NULL) {
347 PyObject *decoder = PyImport_ImportModule("simplejson.decoder");
350 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
352 if (errmsg_fn == NULL)
355 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
357 PyErr_SetObject(PyExc_ValueError, pymsg);
363 join_list_unicode(PyObject *lst)
365 /* return u''.join(lst) */
366 static PyObject *joinfn = NULL;
367 if (joinfn == NULL) {
368 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
372 joinfn = PyObject_GetAttrString(ustr, "join");
377 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
381 join_list_string(PyObject *lst)
383 /* return ''.join(lst) */
384 static PyObject *joinfn = NULL;
385 if (joinfn == NULL) {
386 PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
390 joinfn = PyObject_GetAttrString(ustr, "join");
395 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
399 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
400 /* return (rval, idx) tuple, stealing reference to rval */
404 steal a reference to rval, returns (rval, idx)
409 pyidx = PyInt_FromSsize_t(idx);
414 tpl = PyTuple_New(2);
420 PyTuple_SET_ITEM(tpl, 0, rval);
421 PyTuple_SET_ITEM(tpl, 1, pyidx);
426 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
428 /* Read the JSON string from PyString pystr.
429 end is the index of the first character after the quote.
430 encoding is the encoding of pystr (must be an ASCII superset)
431 if strict is zero then literal control characters are allowed
432 *next_end_ptr is a return-by-reference index of the character
435 Return value is a new PyString (if ASCII-only) or PyUnicode
438 Py_ssize_t len = PyString_GET_SIZE(pystr);
439 Py_ssize_t begin = end - 1;
440 Py_ssize_t next = begin;
442 char *buf = PyString_AS_STRING(pystr);
443 PyObject *chunks = PyList_New(0);
444 if (chunks == NULL) {
447 if (end < 0 || len <= end) {
448 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
452 /* Find the end of the string or the next escape */
454 PyObject *chunk = NULL;
455 for (next = end; next < len; next++) {
456 c = (unsigned char)buf[next];
457 if (c == '"' || c == '\\') {
460 else if (strict && c <= 0x1f) {
461 raise_errmsg("Invalid control character at", pystr, next);
468 if (!(c == '"' || c == '\\')) {
469 raise_errmsg("Unterminated string starting at", pystr, begin);
472 /* Pick up this chunk if it's not zero length */
474 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
475 if (strchunk == NULL) {
479 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
488 if (PyList_Append(chunks, chunk)) {
500 raise_errmsg("Unterminated string starting at", pystr, begin);
505 /* Non-unicode backslash escapes */
511 case 'b': c = '\b'; break;
512 case 'f': c = '\f'; break;
513 case 'n': c = '\n'; break;
514 case 'r': c = '\r'; break;
515 case 't': c = '\t'; break;
519 raise_errmsg("Invalid \\escape", pystr, end - 2);
528 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
531 /* Decode 4 hex digits */
532 for (; next < end; next++) {
533 Py_UNICODE digit = buf[next];
536 case '0': case '1': case '2': case '3': case '4':
537 case '5': case '6': case '7': case '8': case '9':
538 c |= (digit - '0'); break;
539 case 'a': case 'b': case 'c': case 'd': case 'e':
541 c |= (digit - 'a' + 10); break;
542 case 'A': case 'B': case 'C': case 'D': case 'E':
544 c |= (digit - 'A' + 10); break;
546 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
550 #ifdef Py_UNICODE_WIDE
552 if ((c & 0xfc00) == 0xd800) {
554 if (end + 6 >= len) {
555 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
558 if (buf[next++] != '\\' || buf[next++] != 'u') {
559 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
563 /* Decode 4 hex digits */
564 for (; next < end; next++) {
566 Py_UNICODE digit = buf[next];
568 case '0': case '1': case '2': case '3': case '4':
569 case '5': case '6': case '7': case '8': case '9':
570 c2 |= (digit - '0'); break;
571 case 'a': case 'b': case 'c': case 'd': case 'e':
573 c2 |= (digit - 'a' + 10); break;
574 case 'A': case 'B': case 'C': case 'D': case 'E':
576 c2 |= (digit - 'A' + 10); break;
578 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
582 if ((c2 & 0xfc00) != 0xdc00) {
583 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
586 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
588 else if ((c & 0xfc00) == 0xdc00) {
589 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
598 chunk = PyUnicode_FromUnicode(&c, 1);
604 char c_char = Py_CHARMASK(c);
605 chunk = PyString_FromStringAndSize(&c_char, 1);
610 if (PyList_Append(chunks, chunk)) {
617 rval = join_list_string(chunks);
632 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
634 /* Read the JSON string from PyUnicode pystr.
635 end is the index of the first character after the quote.
636 if strict is zero then literal control characters are allowed
637 *next_end_ptr is a return-by-reference index of the character
640 Return value is a new PyUnicode
643 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
644 Py_ssize_t begin = end - 1;
645 Py_ssize_t next = begin;
646 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
647 PyObject *chunks = PyList_New(0);
648 if (chunks == NULL) {
651 if (end < 0 || len <= end) {
652 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
656 /* Find the end of the string or the next escape */
658 PyObject *chunk = NULL;
659 for (next = end; next < len; next++) {
661 if (c == '"' || c == '\\') {
664 else if (strict && c <= 0x1f) {
665 raise_errmsg("Invalid control character at", pystr, next);
669 if (!(c == '"' || c == '\\')) {
670 raise_errmsg("Unterminated string starting at", pystr, begin);
673 /* Pick up this chunk if it's not zero length */
675 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
679 if (PyList_Append(chunks, chunk)) {
691 raise_errmsg("Unterminated string starting at", pystr, begin);
696 /* Non-unicode backslash escapes */
702 case 'b': c = '\b'; break;
703 case 'f': c = '\f'; break;
704 case 'n': c = '\n'; break;
705 case 'r': c = '\r'; break;
706 case 't': c = '\t'; break;
710 raise_errmsg("Invalid \\escape", pystr, end - 2);
719 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
722 /* Decode 4 hex digits */
723 for (; next < end; next++) {
724 Py_UNICODE digit = buf[next];
727 case '0': case '1': case '2': case '3': case '4':
728 case '5': case '6': case '7': case '8': case '9':
729 c |= (digit - '0'); break;
730 case 'a': case 'b': case 'c': case 'd': case 'e':
732 c |= (digit - 'a' + 10); break;
733 case 'A': case 'B': case 'C': case 'D': case 'E':
735 c |= (digit - 'A' + 10); break;
737 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
741 #ifdef Py_UNICODE_WIDE
743 if ((c & 0xfc00) == 0xd800) {
745 if (end + 6 >= len) {
746 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
749 if (buf[next++] != '\\' || buf[next++] != 'u') {
750 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
754 /* Decode 4 hex digits */
755 for (; next < end; next++) {
757 Py_UNICODE digit = buf[next];
759 case '0': case '1': case '2': case '3': case '4':
760 case '5': case '6': case '7': case '8': case '9':
761 c2 |= (digit - '0'); break;
762 case 'a': case 'b': case 'c': case 'd': case 'e':
764 c2 |= (digit - 'a' + 10); break;
765 case 'A': case 'B': case 'C': case 'D': case 'E':
767 c2 |= (digit - 'A' + 10); break;
769 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
773 if ((c2 & 0xfc00) != 0xdc00) {
774 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
777 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
779 else if ((c & 0xfc00) == 0xdc00) {
780 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
785 chunk = PyUnicode_FromUnicode(&c, 1);
789 if (PyList_Append(chunks, chunk)) {
796 rval = join_list_unicode(chunks);
809 PyDoc_STRVAR(pydoc_scanstring,
810 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
812 "Scan the string s for a JSON string. End is the index of the\n"
813 "character in s after the quote that started the JSON string.\n"
814 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
815 "on attempt to decode an invalid string. If strict is False then literal\n"
816 "control characters are allowed in the string.\n"
818 "Returns a tuple of the decoded string and the index of the character in s\n"
819 "after the end quote."
823 py_scanstring(PyObject* self UNUSED, PyObject *args)
828 Py_ssize_t next_end = -1;
829 char *encoding = NULL;
831 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
834 if (encoding == NULL) {
835 encoding = DEFAULT_ENCODING;
837 if (PyString_Check(pystr)) {
838 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
840 else if (PyUnicode_Check(pystr)) {
841 rval = scanstring_unicode(pystr, end, strict, &next_end);
844 PyErr_Format(PyExc_TypeError,
845 "first argument must be a string, not %.80s",
846 Py_TYPE(pystr)->tp_name);
849 return _build_rval_index_tuple(rval, next_end);
852 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
853 "encode_basestring_ascii(basestring) -> str\n"
855 "Return an ASCII-only JSON representation of a Python string"
859 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
861 /* Return an ASCII-only JSON representation of a Python string */
863 if (PyString_Check(pystr)) {
864 return ascii_escape_str(pystr);
866 else if (PyUnicode_Check(pystr)) {
867 return ascii_escape_unicode(pystr);
870 PyErr_Format(PyExc_TypeError,
871 "first argument must be a string, not %.80s",
872 Py_TYPE(pystr)->tp_name);
878 scanner_dealloc(PyObject *self)
880 /* Deallocate scanner object */
882 Py_TYPE(self)->tp_free(self);
886 scanner_traverse(PyObject *self, visitproc visit, void *arg)
889 assert(PyScanner_Check(self));
890 s = (PyScannerObject *)self;
891 Py_VISIT(s->encoding);
893 Py_VISIT(s->object_hook);
894 Py_VISIT(s->parse_float);
895 Py_VISIT(s->parse_int);
896 Py_VISIT(s->parse_constant);
901 scanner_clear(PyObject *self)
904 assert(PyScanner_Check(self));
905 s = (PyScannerObject *)self;
906 Py_CLEAR(s->encoding);
908 Py_CLEAR(s->object_hook);
909 Py_CLEAR(s->parse_float);
910 Py_CLEAR(s->parse_int);
911 Py_CLEAR(s->parse_constant);
916 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
917 /* Read a JSON object from PyString pystr.
918 idx is the index of the first character after the opening curly brace.
919 *next_idx_ptr is a return-by-reference index to the first character after
920 the closing curly brace.
922 Returns a new PyObject (usually a dict, but object_hook can change that)
924 char *str = PyString_AS_STRING(pystr);
925 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
926 PyObject *rval = PyDict_New();
927 PyObject *key = NULL;
928 PyObject *val = NULL;
929 char *encoding = PyString_AS_STRING(s->encoding);
930 int strict = PyObject_IsTrue(s->strict);
935 /* skip whitespace after { */
936 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
938 /* only loop if the object is non-empty */
939 if (idx <= end_idx && str[idx] != '}') {
940 while (idx <= end_idx) {
942 if (str[idx] != '"') {
943 raise_errmsg("Expecting property name", pystr, idx);
946 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
951 /* skip whitespace between key and : delimiter, read :, skip whitespace */
952 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
953 if (idx > end_idx || str[idx] != ':') {
954 raise_errmsg("Expecting : delimiter", pystr, idx);
958 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
960 /* read any JSON data type */
961 val = scan_once_str(s, pystr, idx, &next_idx);
965 if (PyDict_SetItem(rval, key, val) == -1)
972 /* skip whitespace before } or , */
973 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
975 /* bail if the object is closed or we didn't get the , delimiter */
976 if (idx > end_idx) break;
977 if (str[idx] == '}') {
980 else if (str[idx] != ',') {
981 raise_errmsg("Expecting , delimiter", pystr, idx);
986 /* skip whitespace after , delimiter */
987 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
990 /* verify that idx < end_idx, str[idx] should be '}' */
991 if (idx > end_idx || str[idx] != '}') {
992 raise_errmsg("Expecting object", pystr, end_idx);
995 /* if object_hook is not None: rval = object_hook(rval) */
996 if (s->object_hook != Py_None) {
997 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1004 *next_idx_ptr = idx + 1;
1014 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1015 /* Read a JSON object from PyUnicode pystr.
1016 idx is the index of the first character after the opening curly brace.
1017 *next_idx_ptr is a return-by-reference index to the first character after
1018 the closing curly brace.
1020 Returns a new PyObject (usually a dict, but object_hook can change that)
1022 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1023 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1024 PyObject *val = NULL;
1025 PyObject *rval = PyDict_New();
1026 PyObject *key = NULL;
1027 int strict = PyObject_IsTrue(s->strict);
1028 Py_ssize_t next_idx;
1032 /* skip whitespace after { */
1033 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1035 /* only loop if the object is non-empty */
1036 if (idx <= end_idx && str[idx] != '}') {
1037 while (idx <= end_idx) {
1039 if (str[idx] != '"') {
1040 raise_errmsg("Expecting property name", pystr, idx);
1043 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1048 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1049 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1050 if (idx > end_idx || str[idx] != ':') {
1051 raise_errmsg("Expecting : delimiter", pystr, idx);
1055 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1057 /* read any JSON term */
1058 val = scan_once_unicode(s, pystr, idx, &next_idx);
1062 if (PyDict_SetItem(rval, key, val) == -1)
1069 /* skip whitespace before } or , */
1070 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1072 /* bail if the object is closed or we didn't get the , delimiter */
1073 if (idx > end_idx) break;
1074 if (str[idx] == '}') {
1077 else if (str[idx] != ',') {
1078 raise_errmsg("Expecting , delimiter", pystr, idx);
1083 /* skip whitespace after , delimiter */
1084 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1088 /* verify that idx < end_idx, str[idx] should be '}' */
1089 if (idx > end_idx || str[idx] != '}') {
1090 raise_errmsg("Expecting object", pystr, end_idx);
1094 /* if object_hook is not None: rval = object_hook(rval) */
1095 if (s->object_hook != Py_None) {
1096 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1103 *next_idx_ptr = idx + 1;
1113 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1114 /* Read a JSON array from PyString pystr.
1115 idx is the index of the first character after the opening brace.
1116 *next_idx_ptr is a return-by-reference index to the first character after
1119 Returns a new PyList
1121 char *str = PyString_AS_STRING(pystr);
1122 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1123 PyObject *val = NULL;
1124 PyObject *rval = PyList_New(0);
1125 Py_ssize_t next_idx;
1129 /* skip whitespace after [ */
1130 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1132 /* only loop if the array is non-empty */
1133 if (idx <= end_idx && str[idx] != ']') {
1134 while (idx <= end_idx) {
1136 /* read any JSON term and de-tuplefy the (rval, idx) */
1137 val = scan_once_str(s, pystr, idx, &next_idx);
1141 if (PyList_Append(rval, val) == -1)
1147 /* skip whitespace between term and , */
1148 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1150 /* bail if the array is closed or we didn't get the , delimiter */
1151 if (idx > end_idx) break;
1152 if (str[idx] == ']') {
1155 else if (str[idx] != ',') {
1156 raise_errmsg("Expecting , delimiter", pystr, idx);
1161 /* skip whitespace after , */
1162 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1166 /* verify that idx < end_idx, str[idx] should be ']' */
1167 if (idx > end_idx || str[idx] != ']') {
1168 raise_errmsg("Expecting object", pystr, end_idx);
1171 *next_idx_ptr = idx + 1;
1180 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1181 /* Read a JSON array from PyString pystr.
1182 idx is the index of the first character after the opening brace.
1183 *next_idx_ptr is a return-by-reference index to the first character after
1186 Returns a new PyList
1188 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1189 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1190 PyObject *val = NULL;
1191 PyObject *rval = PyList_New(0);
1192 Py_ssize_t next_idx;
1196 /* skip whitespace after [ */
1197 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1199 /* only loop if the array is non-empty */
1200 if (idx <= end_idx && str[idx] != ']') {
1201 while (idx <= end_idx) {
1203 /* read any JSON term */
1204 val = scan_once_unicode(s, pystr, idx, &next_idx);
1208 if (PyList_Append(rval, val) == -1)
1214 /* skip whitespace between term and , */
1215 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1217 /* bail if the array is closed or we didn't get the , delimiter */
1218 if (idx > end_idx) break;
1219 if (str[idx] == ']') {
1222 else if (str[idx] != ',') {
1223 raise_errmsg("Expecting , delimiter", pystr, idx);
1228 /* skip whitespace after , */
1229 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1233 /* verify that idx < end_idx, str[idx] should be ']' */
1234 if (idx > end_idx || str[idx] != ']') {
1235 raise_errmsg("Expecting object", pystr, end_idx);
1238 *next_idx_ptr = idx + 1;
1247 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1248 /* Read a JSON constant from PyString pystr.
1249 constant is the constant string that was found
1250 ("NaN", "Infinity", "-Infinity").
1251 idx is the index of the first character of the constant
1252 *next_idx_ptr is a return-by-reference index to the first character after
1255 Returns the result of parse_constant
1259 /* constant is "NaN", "Infinity", or "-Infinity" */
1260 cstr = PyString_InternFromString(constant);
1264 /* rval = parse_constant(constant) */
1265 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1266 idx += PyString_GET_SIZE(cstr);
1268 *next_idx_ptr = idx;
1273 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1274 /* Read a JSON number from PyString pystr.
1275 idx is the index of the first character of the number
1276 *next_idx_ptr is a return-by-reference index to the first character after
1279 Returns a new PyObject representation of that number:
1280 PyInt, PyLong, or PyFloat.
1281 May return other types if parse_int or parse_float are set
1283 char *str = PyString_AS_STRING(pystr);
1284 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1285 Py_ssize_t idx = start;
1290 /* read a sign if it's there, make sure it's not the end of the string */
1291 if (str[idx] == '-') {
1293 if (idx > end_idx) {
1294 PyErr_SetNone(PyExc_StopIteration);
1299 /* read as many integer digits as we find as long as it doesn't start with 0 */
1300 if (str[idx] >= '1' && str[idx] <= '9') {
1302 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1304 /* if it starts with 0 we only expect one integer digit */
1305 else if (str[idx] == '0') {
1308 /* no integer digits, error */
1310 PyErr_SetNone(PyExc_StopIteration);
1314 /* if the next char is '.' followed by a digit then read all float digits */
1315 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1318 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1321 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1322 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1324 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1325 Py_ssize_t e_start = idx;
1328 /* read an exponent sign if present */
1329 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1331 /* read all digits */
1332 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1334 /* if we got a digit, then parse as float. if not, backtrack */
1335 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1343 /* copy the section we determined to be a number */
1344 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1348 /* parse as a float using a fast path if available, otherwise call user defined method */
1349 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1350 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1353 rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr)));
1357 /* parse as an int using a fast path if available, otherwise call user defined method */
1358 if (s->parse_int != (PyObject *)&PyInt_Type) {
1359 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1362 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1366 *next_idx_ptr = idx;
1371 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1372 /* Read a JSON number from PyUnicode pystr.
1373 idx is the index of the first character of the number
1374 *next_idx_ptr is a return-by-reference index to the first character after
1377 Returns a new PyObject representation of that number:
1378 PyInt, PyLong, or PyFloat.
1379 May return other types if parse_int or parse_float are set
1381 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1382 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1383 Py_ssize_t idx = start;
1388 /* read a sign if it's there, make sure it's not the end of the string */
1389 if (str[idx] == '-') {
1391 if (idx > end_idx) {
1392 PyErr_SetNone(PyExc_StopIteration);
1397 /* read as many integer digits as we find as long as it doesn't start with 0 */
1398 if (str[idx] >= '1' && str[idx] <= '9') {
1400 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1402 /* if it starts with 0 we only expect one integer digit */
1403 else if (str[idx] == '0') {
1406 /* no integer digits, error */
1408 PyErr_SetNone(PyExc_StopIteration);
1412 /* if the next char is '.' followed by a digit then read all float digits */
1413 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1416 while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1419 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1420 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1421 Py_ssize_t e_start = idx;
1424 /* read an exponent sign if present */
1425 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1427 /* read all digits */
1428 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1430 /* if we got a digit, then parse as float. if not, backtrack */
1431 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1439 /* copy the section we determined to be a number */
1440 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1444 /* parse as a float using a fast path if available, otherwise call user defined method */
1445 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1446 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1449 rval = PyFloat_FromString(numstr, NULL);
1453 /* no fast path for unicode -> int, just call */
1454 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1457 *next_idx_ptr = idx;
1462 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1464 /* Read one JSON term (of any kind) from PyString pystr.
1465 idx is the index of the first character of the term
1466 *next_idx_ptr is a return-by-reference index to the first character after
1469 Returns a new PyObject representation of the term.
1471 char *str = PyString_AS_STRING(pystr);
1472 Py_ssize_t length = PyString_GET_SIZE(pystr);
1473 if (idx >= length) {
1474 PyErr_SetNone(PyExc_StopIteration);
1480 return scanstring_str(pystr, idx + 1,
1481 PyString_AS_STRING(s->encoding),
1482 PyObject_IsTrue(s->strict),
1486 return _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1489 return _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1492 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1494 *next_idx_ptr = idx + 4;
1500 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1502 *next_idx_ptr = idx + 4;
1508 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1509 Py_INCREF(Py_False);
1510 *next_idx_ptr = idx + 5;
1516 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1517 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1522 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1523 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1528 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1529 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1533 /* Didn't find a string, object, array, or named constant. Look for a number. */
1534 return _match_number_str(s, pystr, idx, next_idx_ptr);
1538 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1540 /* Read one JSON term (of any kind) from PyUnicode pystr.
1541 idx is the index of the first character of the term
1542 *next_idx_ptr is a return-by-reference index to the first character after
1545 Returns a new PyObject representation of the term.
1547 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1548 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1549 if (idx >= length) {
1550 PyErr_SetNone(PyExc_StopIteration);
1556 return scanstring_unicode(pystr, idx + 1,
1557 PyObject_IsTrue(s->strict),
1561 return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1564 return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1567 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1569 *next_idx_ptr = idx + 4;
1575 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1577 *next_idx_ptr = idx + 4;
1583 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1584 Py_INCREF(Py_False);
1585 *next_idx_ptr = idx + 5;
1591 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1592 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1597 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1598 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1603 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1604 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1608 /* Didn't find a string, object, array, or named constant. Look for a number. */
1609 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1613 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1615 /* Python callable interface to scan_once_{str,unicode} */
1619 Py_ssize_t next_idx = -1;
1620 static char *kwlist[] = {"string", "idx", NULL};
1622 assert(PyScanner_Check(self));
1623 s = (PyScannerObject *)self;
1624 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1627 if (PyString_Check(pystr)) {
1628 rval = scan_once_str(s, pystr, idx, &next_idx);
1630 else if (PyUnicode_Check(pystr)) {
1631 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1634 PyErr_Format(PyExc_TypeError,
1635 "first argument must be a string, not %.80s",
1636 Py_TYPE(pystr)->tp_name);
1639 return _build_rval_index_tuple(rval, next_idx);
1643 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1646 s = (PyScannerObject *)type->tp_alloc(type, 0);
1650 s->object_hook = NULL;
1651 s->parse_float = NULL;
1652 s->parse_int = NULL;
1653 s->parse_constant = NULL;
1655 return (PyObject *)s;
1659 scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1661 /* Initialize Scanner object */
1663 static char *kwlist[] = {"context", NULL};
1666 assert(PyScanner_Check(self));
1667 s = (PyScannerObject *)self;
1669 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1672 /* PyString_AS_STRING is used on encoding */
1673 s->encoding = PyObject_GetAttrString(ctx, "encoding");
1674 if (s->encoding == Py_None) {
1676 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1678 else if (PyUnicode_Check(s->encoding)) {
1679 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1680 Py_DECREF(s->encoding);
1683 if (s->encoding == NULL || !PyString_Check(s->encoding))
1686 /* All of these will fail "gracefully" so we don't need to verify them */
1687 s->strict = PyObject_GetAttrString(ctx, "strict");
1688 if (s->strict == NULL)
1690 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1691 if (s->object_hook == NULL)
1693 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1694 if (s->parse_float == NULL)
1696 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1697 if (s->parse_int == NULL)
1699 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1700 if (s->parse_constant == NULL)
1706 Py_CLEAR(s->encoding);
1707 Py_CLEAR(s->strict);
1708 Py_CLEAR(s->object_hook);
1709 Py_CLEAR(s->parse_float);
1710 Py_CLEAR(s->parse_int);
1711 Py_CLEAR(s->parse_constant);
1715 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1718 PyTypeObject PyScannerType = {
1719 PyObject_HEAD_INIT(NULL)
1720 0, /* tp_internal */
1721 "simplejson._speedups.Scanner", /* tp_name */
1722 sizeof(PyScannerObject), /* tp_basicsize */
1723 0, /* tp_itemsize */
1724 scanner_dealloc, /* tp_dealloc */
1730 0, /* tp_as_number */
1731 0, /* tp_as_sequence */
1732 0, /* tp_as_mapping */
1734 scanner_call, /* tp_call */
1736 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1737 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1738 0, /* tp_as_buffer */
1739 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1740 scanner_doc, /* tp_doc */
1741 scanner_traverse, /* tp_traverse */
1742 scanner_clear, /* tp_clear */
1743 0, /* tp_richcompare */
1744 0, /* tp_weaklistoffset */
1746 0, /* tp_iternext */
1748 scanner_members, /* tp_members */
1752 0, /* tp_descr_get */
1753 0, /* tp_descr_set */
1754 0, /* tp_dictoffset */
1755 scanner_init, /* tp_init */
1756 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1757 scanner_new, /* tp_new */
1758 0,/* PyObject_GC_Del, */ /* tp_free */
1762 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1765 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1768 s->defaultfn = NULL;
1771 s->key_separator = NULL;
1772 s->item_separator = NULL;
1773 s->sort_keys = NULL;
1776 return (PyObject *)s;
1780 encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1782 /* initialize Encoder object */
1783 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1786 PyObject *allow_nan;
1788 assert(PyEncoder_Check(self));
1789 s = (PyEncoderObject *)self;
1791 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
1792 &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan))
1795 Py_INCREF(s->markers);
1796 Py_INCREF(s->defaultfn);
1797 Py_INCREF(s->encoder);
1798 Py_INCREF(s->indent);
1799 Py_INCREF(s->key_separator);
1800 Py_INCREF(s->item_separator);
1801 Py_INCREF(s->sort_keys);
1802 Py_INCREF(s->skipkeys);
1803 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1804 s->allow_nan = PyObject_IsTrue(allow_nan);
1809 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1811 /* Python callable interface to encode_listencode_obj */
1812 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1815 Py_ssize_t indent_level;
1817 assert(PyEncoder_Check(self));
1818 s = (PyEncoderObject *)self;
1819 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1820 &obj, _convertPyInt_AsSsize_t, &indent_level))
1822 rval = PyList_New(0);
1825 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1833 _encoded_const(PyObject *obj)
1835 /* Return the JSON string representation of None, True, False */
1836 if (obj == Py_None) {
1837 static PyObject *s_null = NULL;
1838 if (s_null == NULL) {
1839 s_null = PyString_InternFromString("null");
1844 else if (obj == Py_True) {
1845 static PyObject *s_true = NULL;
1846 if (s_true == NULL) {
1847 s_true = PyString_InternFromString("true");
1852 else if (obj == Py_False) {
1853 static PyObject *s_false = NULL;
1854 if (s_false == NULL) {
1855 s_false = PyString_InternFromString("false");
1861 PyErr_SetString(PyExc_ValueError, "not a const");
1867 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1869 /* Return the JSON representation of a PyFloat */
1870 double i = PyFloat_AS_DOUBLE(obj);
1871 if (!Py_IS_FINITE(i)) {
1872 if (!s->allow_nan) {
1873 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1877 return PyString_FromString("Infinity");
1880 return PyString_FromString("-Infinity");
1883 return PyString_FromString("NaN");
1886 /* Use a better float format here? */
1887 return PyObject_Repr(obj);
1891 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1893 /* Return the JSON representation of a string */
1895 return py_encode_basestring_ascii(NULL, obj);
1897 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1901 _steal_list_append(PyObject *lst, PyObject *stolen)
1903 /* Append stolen and then decrement its reference count */
1904 int rval = PyList_Append(lst, stolen);
1910 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1912 /* Encode Python object obj to a JSON term, rval is a PyList */
1916 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1917 PyObject *cstr = _encoded_const(obj);
1920 return _steal_list_append(rval, cstr);
1922 else if (PyString_Check(obj) || PyUnicode_Check(obj))
1924 PyObject *encoded = encoder_encode_string(s, obj);
1925 if (encoded == NULL)
1927 return _steal_list_append(rval, encoded);
1929 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1930 PyObject *encoded = PyObject_Str(obj);
1931 if (encoded == NULL)
1933 return _steal_list_append(rval, encoded);
1935 else if (PyFloat_Check(obj)) {
1936 PyObject *encoded = encoder_encode_float(s, obj);
1937 if (encoded == NULL)
1939 return _steal_list_append(rval, encoded);
1941 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1942 return encoder_listencode_list(s, rval, obj, indent_level);
1944 else if (PyDict_Check(obj)) {
1945 return encoder_listencode_dict(s, rval, obj, indent_level);
1948 PyObject *ident = NULL;
1949 if (s->markers != Py_None) {
1951 ident = PyLong_FromVoidPtr(obj);
1954 has_key = PyDict_Contains(s->markers, ident);
1957 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1961 if (PyDict_SetItem(s->markers, ident, obj)) {
1966 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1967 if (newobj == NULL) {
1971 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
1977 if (ident != NULL) {
1978 if (PyDict_DelItem(s->markers, ident)) {
1989 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
1991 /* Encode Python dict dct a JSON term, rval is a PyList */
1992 static PyObject *open_dict = NULL;
1993 static PyObject *close_dict = NULL;
1994 static PyObject *empty_dict = NULL;
1995 PyObject *kstr = NULL;
1996 PyObject *ident = NULL;
1997 PyObject *key, *value;
2002 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2003 open_dict = PyString_InternFromString("{");
2004 close_dict = PyString_InternFromString("}");
2005 empty_dict = PyString_InternFromString("{}");
2006 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2009 if (PyDict_Size(dct) == 0)
2010 return PyList_Append(rval, empty_dict);
2012 if (s->markers != Py_None) {
2014 ident = PyLong_FromVoidPtr(dct);
2017 has_key = PyDict_Contains(s->markers, ident);
2020 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2023 if (PyDict_SetItem(s->markers, ident, dct)) {
2028 if (PyList_Append(rval, open_dict))
2031 if (s->indent != Py_None) {
2032 /* TODO: DOES NOT RUN */
2035 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2036 separator = _item_separator + newline_indent
2037 buf += newline_indent
2041 /* TODO: C speedup not implemented for sort_keys */
2044 skipkeys = PyObject_IsTrue(s->skipkeys);
2046 while (PyDict_Next(dct, &pos, &key, &value)) {
2049 if (PyString_Check(key) || PyUnicode_Check(key)) {
2053 else if (PyFloat_Check(key)) {
2054 kstr = encoder_encode_float(s, key);
2058 else if (PyInt_Check(key) || PyLong_Check(key)) {
2059 kstr = PyObject_Str(key);
2063 else if (key == Py_True || key == Py_False || key == Py_None) {
2064 kstr = _encoded_const(key);
2068 else if (skipkeys) {
2072 /* TODO: include repr of key */
2073 PyErr_SetString(PyExc_ValueError, "keys must be a string");
2078 if (PyList_Append(rval, s->item_separator))
2082 encoded = encoder_encode_string(s, kstr);
2084 if (encoded == NULL)
2086 if (PyList_Append(rval, encoded)) {
2091 if (PyList_Append(rval, s->key_separator))
2093 if (encoder_listencode_obj(s, rval, value, indent_level))
2097 if (ident != NULL) {
2098 if (PyDict_DelItem(s->markers, ident))
2102 if (s->indent != Py_None) {
2103 /* TODO: DOES NOT RUN */
2106 yield '\n' + (' ' * (_indent * _current_indent_level))
2109 if (PyList_Append(rval, close_dict))
2121 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2123 /* Encode Python list seq to a JSON term, rval is a PyList */
2124 static PyObject *open_array = NULL;
2125 static PyObject *close_array = NULL;
2126 static PyObject *empty_array = NULL;
2127 PyObject *ident = NULL;
2128 PyObject *s_fast = NULL;
2129 Py_ssize_t num_items;
2130 PyObject **seq_items;
2133 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2134 open_array = PyString_InternFromString("[");
2135 close_array = PyString_InternFromString("]");
2136 empty_array = PyString_InternFromString("[]");
2137 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2141 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2144 num_items = PySequence_Fast_GET_SIZE(s_fast);
2145 if (num_items == 0) {
2147 return PyList_Append(rval, empty_array);
2150 if (s->markers != Py_None) {
2152 ident = PyLong_FromVoidPtr(seq);
2155 has_key = PyDict_Contains(s->markers, ident);
2158 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2161 if (PyDict_SetItem(s->markers, ident, seq)) {
2166 seq_items = PySequence_Fast_ITEMS(s_fast);
2167 if (PyList_Append(rval, open_array))
2169 if (s->indent != Py_None) {
2170 /* TODO: DOES NOT RUN */
2173 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2174 separator = _item_separator + newline_indent
2175 buf += newline_indent
2178 for (i = 0; i < num_items; i++) {
2179 PyObject *obj = seq_items[i];
2181 if (PyList_Append(rval, s->item_separator))
2184 if (encoder_listencode_obj(s, rval, obj, indent_level))
2187 if (ident != NULL) {
2188 if (PyDict_DelItem(s->markers, ident))
2192 if (s->indent != Py_None) {
2193 /* TODO: DOES NOT RUN */
2196 yield '\n' + (' ' * (_indent * _current_indent_level))
2199 if (PyList_Append(rval, close_array))
2211 encoder_dealloc(PyObject *self)
2213 /* Deallocate Encoder */
2214 encoder_clear(self);
2215 Py_TYPE(self)->tp_free(self);
2219 encoder_traverse(PyObject *self, visitproc visit, void *arg)
2222 assert(PyEncoder_Check(self));
2223 s = (PyEncoderObject *)self;
2224 Py_VISIT(s->markers);
2225 Py_VISIT(s->defaultfn);
2226 Py_VISIT(s->encoder);
2227 Py_VISIT(s->indent);
2228 Py_VISIT(s->key_separator);
2229 Py_VISIT(s->item_separator);
2230 Py_VISIT(s->sort_keys);
2231 Py_VISIT(s->skipkeys);
2236 encoder_clear(PyObject *self)
2238 /* Deallocate Encoder */
2240 assert(PyEncoder_Check(self));
2241 s = (PyEncoderObject *)self;
2242 Py_CLEAR(s->markers);
2243 Py_CLEAR(s->defaultfn);
2244 Py_CLEAR(s->encoder);
2245 Py_CLEAR(s->indent);
2246 Py_CLEAR(s->key_separator);
2247 Py_CLEAR(s->item_separator);
2248 Py_CLEAR(s->sort_keys);
2249 Py_CLEAR(s->skipkeys);
2253 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2256 PyTypeObject PyEncoderType = {
2257 PyObject_HEAD_INIT(NULL)
2258 0, /* tp_internal */
2259 "simplejson._speedups.Encoder", /* tp_name */
2260 sizeof(PyEncoderObject), /* tp_basicsize */
2261 0, /* tp_itemsize */
2262 encoder_dealloc, /* tp_dealloc */
2268 0, /* tp_as_number */
2269 0, /* tp_as_sequence */
2270 0, /* tp_as_mapping */
2272 encoder_call, /* tp_call */
2274 0, /* tp_getattro */
2275 0, /* tp_setattro */
2276 0, /* tp_as_buffer */
2277 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2278 encoder_doc, /* tp_doc */
2279 encoder_traverse, /* tp_traverse */
2280 encoder_clear, /* tp_clear */
2281 0, /* tp_richcompare */
2282 0, /* tp_weaklistoffset */
2284 0, /* tp_iternext */
2286 encoder_members, /* tp_members */
2290 0, /* tp_descr_get */
2291 0, /* tp_descr_set */
2292 0, /* tp_dictoffset */
2293 encoder_init, /* tp_init */
2295 encoder_new, /* tp_new */
2299 static PyMethodDef speedups_methods[] = {
2300 {"encode_basestring_ascii",
2301 (PyCFunction)py_encode_basestring_ascii,
2303 pydoc_encode_basestring_ascii},
2305 (PyCFunction)py_scanstring,
2308 {NULL, NULL, 0, NULL}
2311 PyDoc_STRVAR(module_doc,
2312 "simplejson speedups\n");
2318 PyScannerType.tp_new = PyType_GenericNew;
2319 if (PyType_Ready(&PyScannerType) < 0)
2321 PyEncoderType.tp_new = PyType_GenericNew;
2322 if (PyType_Ready(&PyEncoderType) < 0)
2324 m = Py_InitModule3("_speedups", speedups_methods, module_doc);
2325 Py_INCREF((PyObject*)&PyScannerType);
2326 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2327 Py_INCREF((PyObject*)&PyEncoderType);
2328 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);