root/cpsskins/branches/jmo-perspectives/minjson.py

Revision 2415, 14.2 kB (checked in by jmorliaguet, 3 years ago)

- upgraded to the latest version of minjson.py

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
Line 
1 ##############################################################################
2 #
3 # Copyright (c) 2005 Jim Washington and Contributors.
4 # All Rights Reserved.
5 #
6 # This software is subject to the provisions of the Zope Public License,
7 # Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
8 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
9 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
10 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
11 # FOR A PARTICULAR PURPOSE.
12 #
13 ##############################################################################
14
15 # minjson.py
16 # reads minimal javascript objects.
17 # str's objects and fixes the text to write javascript.
18
19 #UNICODE USAGE:  Minjson tries hard to accommodate naive usage in a
20 #"Do what I mean" manner.  Real applications should handle unicode separately.
21 # The "right" way to use minjson in an application is to provide minjson a
22 # python unicode string for reading and accept a unicode output from minjson's
23 # writing.  That way, the assumptions for unicode are yours and not minjson's.
24
25 # That said, the minjson code has some (optional) unicode handling that you
26 # may look at as a model for the unicode handling your application may need.
27
28 # Thanks to Patrick Logan for starting the json-py project and making so many
29 # good test cases.
30
31 # Additional thanks to Balazs Ree for replacing the writing module.
32
33 # Jim Washington 30 Dec 2005.
34
35 # 2005-12-30 writing now traverses the object tree instead of relying on
36 #            str() or unicode()
37 # 2005-10-10 on reading, looks for \\uxxxx and replaces with u'\uxxxx'
38 # 2005-10-09 now tries hard to make all strings unicode when reading.
39 # 2005-10-07 got rid of eval() completely, makes object as found by the
40 #            tokenizer.
41 # 2005-09-06 imported parsing constants from tokenize; they changed a bit from
42 #            python2.3 to 2.4
43 # 2005-08-22 replaced the read sanity code
44 # 2005-08-21 Search for exploits on eval() yielded more default bad operators.
45 # 2005-08-18 Added optional code from Koen van de Sande to escape
46 #            outgoing unicode chars above 128
47
48
49 from re import compile, sub, search, DOTALL
50 from token import ENDMARKER, NAME, NUMBER, STRING, OP, ERRORTOKEN
51 from tokenize import tokenize, TokenError, NL
52
53 #Usually, utf-8 will work, set this to utf-16 if you dare.
54 emergencyEncoding = 'utf-8'
55
56 class ReadException(Exception):
57     pass
58
59 class WriteException(Exception):
60     pass
61
62 #################################
63 #      read JSON object         #
64 #################################
65
66 slashstarcomment = compile(r'/\*.*?\*/',DOTALL)
67 doubleslashcomment = compile(r'//.*\n')
68
69 unichrRE = compile(r"\\u[0-9a-fA-F]{4,4}")
70
71 def unichrReplace(match):
72     return unichr(int(match.group()[2:],16))
73
74 escapeStrs = (('\\','\\\\'),('\n',r'\n'),('\b',r'\b'),
75     ('\f',r'\f'),('\t',r'\t'),('\r',r'\r'), ('"',r'\"')
76     )
77
78 class DictToken:
79     __slots__=[]
80     pass
81 class ListToken:
82     __slots__=[]
83     pass
84 class ColonToken:
85     __slots__=[]
86     pass
87 class CommaToken:
88     __slots__=[]
89     pass
90
91 class JSONReader(object):
92     """raise SyntaxError if it is not JSON, and make the object available"""
93     def __init__(self,data):
94         self.stop = False
95         #make an iterator of data so that next() works in tokenize.
96         self._data = iter([data])
97         self.lastOp = None
98         self.objects = []
99         self.tokenize()
100
101     def tokenize(self):
102         try:
103             tokenize(self._data.next,self.readTokens)
104         except TokenError:
105             raise SyntaxError
106
107     def resolveList(self):
108         #check for empty list
109         if isinstance(self.objects[-1],ListToken):
110             self.objects[-1] = []
111             return
112         theList = []
113         commaCount = 0
114         try:
115             item = self.objects.pop()
116         except IndexError:
117             raise SyntaxError
118         while not isinstance(item,ListToken):
119             if isinstance(item,CommaToken):
120                 commaCount += 1
121             else:
122                 theList.append(item)
123             try:
124                 item = self.objects.pop()
125             except IndexError:
126                 raise SyntaxError
127         if not commaCount == (len(theList) -1):
128             raise SyntaxError
129         theList.reverse()
130         item = theList
131         self.objects.append(item)
132
133     def resolveDict(self):
134         theList = []
135         #check for empty dict
136         if isinstance(self.objects[-1], DictToken):
137             self.objects[-1] = {}
138             return
139         #not empty; must have at least three values
140         try:
141             #value (we're going backwards!)
142             value = self.objects.pop()
143         except IndexError:
144             raise SyntaxError
145         try:
146             #colon
147             colon = self.objects.pop()
148             if not isinstance(colon, ColonToken):
149                 raise SyntaxError
150         except IndexError:
151             raise SyntaxError
152         try:
153             #key
154             key = self.objects.pop()
155             if not isinstance(key,basestring):
156                 raise SyntaxError
157         except IndexError:
158
159             raise SyntaxError
160         #salt the while
161         comma = value
162         while not isinstance(comma,DictToken):
163             # store the value
164             theList.append((key,value))
165             #do it again...
166             try:
167                 #might be a comma
168                 comma = self.objects.pop()
169             except IndexError:
170                 raise SyntaxError
171             if isinstance(comma,CommaToken):
172                 #if it's a comma, get the values
173                 try:
174                     value = self.objects.pop()
175                 except IndexError:
176                     #print self.objects
177                     raise SyntaxError
178                 try:
179                     colon = self.objects.pop()
180                     if not isinstance(colon, ColonToken):
181                         raise SyntaxError
182                 except IndexError:
183                     raise SyntaxError
184                 try:
185                     key = self.objects.pop()
186                     if not isinstance(key,basestring):
187                         raise SyntaxError
188                 except IndexError:
189                     raise SyntaxError
190         theDict = {}
191         for k in theList:
192             theDict[k[0]] = k[1]
193         self.objects.append(theDict)
194
195     def readTokens(self,type, token, (srow, scol), (erow, ecol), line):
196         # UPPERCASE consts from tokens.py or tokenize.py
197         if type == OP:
198             if token not in "[{}],:-":
199                 raise SyntaxError
200             else:
201                 self.lastOp = token
202             if token == '[':
203                 self.objects.append(ListToken())
204             elif token == '{':
205                 self.objects.append(DictToken())
206             elif token == ']':
207                 self.resolveList()
208             elif token == '}':
209                 self.resolveDict()
210             elif token == ':':
211                 self.objects.append(ColonToken())
212             elif token == ',':
213                 self.objects.append(CommaToken())
214         elif type == STRING:
215             tok = token[1:-1]
216             for k in escapeStrs:
217                 if k[1] in tok:
218                     tok = tok.replace(k[1],k[0])
219             self.objects.append(tok)
220         elif type == NUMBER:
221             if self.lastOp == '-':
222                 factor = -1
223             else:
224                 factor = 1
225             try:
226                 self.objects.append(factor * int(token))
227             except ValueError:
228                 self.objects.append(factor * float(token))
229         elif type == NAME:
230             try:
231                 self.objects.append({'true':True,
232                     'false':False,'null':None}[token])
233             except KeyError:
234                 raise SyntaxError
235         elif type == ENDMARKER:
236             pass
237         elif type == NL:
238             pass
239         elif type == ERRORTOKEN:
240             if ecol == len(line):
241                 #it's a char at the end of the line.  (mostly) harmless.
242                 pass
243             else:
244                 raise SyntaxError
245         else:
246             raise SyntaxError
247     def output(self):
248         try:
249             assert len(self.objects) == 1
250         except AssertionError:
251             raise SyntaxError
252         return self.objects[0]
253
254 def safeRead(aString, encoding=None):
255     """read the js, first sanitizing a bit and removing any c-style comments
256     If the input is a unicode string, great.  That's preferred.  If the input
257     is a byte string, strings in the object will be produced as unicode anyway.
258     """
259     # get rid of trailing null. Konqueror appends this.
260     CHR0 = chr(0)
261     while aString.endswith(CHR0):
262         aString = aString[:-1]
263     # strip leading and trailing whitespace
264     aString = aString.strip()
265     # zap /* ... */ comments
266     aString = slashstarcomment.sub('',aString)
267     # zap // comments
268     aString = doubleslashcomment.sub('',aString)
269     # detect and handle \\u unicode characters. Note: This has the side effect
270     # of converting the entire string to unicode. This is probably OK.
271     unicodechars = unichrRE.search(aString)
272     if unicodechars:
273         aString = unichrRE.sub(unichrReplace, aString)
274     #if it's already unicode, we won't try to decode it
275     if isinstance(aString, unicode):
276         s = aString
277     else:
278         if encoding:
279             # note: no "try" here.  the encoding provided must work for the
280             # incoming byte string.  UnicodeDecode error will be raised
281             # in that case.  Often, it will be best not to provide the encoding
282             # and allow the default
283             s = unicode(aString, encoding)
284             #print "decoded %s from %s" % (s,encoding)
285         else:
286             # let's try to decode to unicode in system default encoding
287             try:
288                 s = unicode(aString)
289                 #import sys
290                 #print "decoded %s from %s" % (s,sys.getdefaultencoding())
291             except UnicodeDecodeError:
292                 # last choice: handle as emergencyEncoding
293                 enc = emergencyEncoding
294                 s = unicode(aString, enc)
295                 #print "%s decoded from %s" % (s, enc)
296     # parse and get the object.
297     try:
298         data = JSONReader(s).output()
299     except SyntaxError:
300         raise ReadException, 'Unacceptable JSON expression: %s' % aString
301     return data
302
303 read = safeRead
304
305 #################################
306 #   write object as JSON        #
307 #################################
308
309 import re, codecs
310 from cStringIO import StringIO
311
312 ### Codec error handler
313
314 def jsonreplace_handler(exc):
315     '''Error handler for json
316
317     If encoding fails, \\uxxxx must be emitted. This
318     is similar to the "backshashreplace" handler, only
319     that we never emit \\xnn since this is not legal
320     according to the JSON syntax specs.
321     '''
322     if isinstance(exc, UnicodeEncodeError):
323         part = exc.object[exc.start]
324         # repr(part) will convert u'\unnnn' to u'u\\nnnn'
325         return u'\\u%04x' % ord(part), exc.start+1
326     else:
327         raise exc
328
329 # register the error handler
330 codecs.register_error('jsonreplace', jsonreplace_handler)
331
332 ### Writer
333
334 def write(input, encoding='utf-8', outputEncoding=None):
335     writer = JsonWriter(input_encoding=encoding, output_encoding=outputEncoding)
336     writer.write(input)
337     return writer.getvalue()
338
339 re_strmangle = re.compile('"|\b|\f|\n|\r|\t|\\\\')
340
341 def func_strmangle(match):
342     return {
343         '"': '\\"',
344         '\b': '\\b',
345         '\f': '\\f',
346         '\n': '\\n',
347         '\r': '\\r',
348         '\t': '\\t',
349         '\\': '\\\\',
350         }[match.group(0)]
351
352 def strmangle(text):
353     return re_strmangle.sub(func_strmangle, text)
354
355 class JsonStream(object):
356
357     def __init__(self):
358         self.buf = []
359
360     def write(self, text):
361         self.buf.append(text)
362
363     def getvalue(self):
364         return ''.join(self.buf)
365
366 class JsonWriter(object):
367
368     def __init__(self, stream=None, input_encoding='utf-8', output_encoding=None):
369         '''
370         - stream is optional, if specified must also give output_encoding
371         - The input strings can be unicode or in input_encoding
372         - output_encoding is optional, if omitted, result will be unicode
373         '''
374         if stream is not None:
375             if output_encoding is None:
376                 raise WriteException, 'If a stream is given, output encoding must also be provided'
377         else:
378             stream = JsonStream()
379         self.stream = stream
380         self.input_encoding = input_encoding
381         self.output_encoding = output_encoding
382
383     def write(self, obj):
384         if isinstance(obj, (list, tuple)):
385             self.stream.write('[')
386             first = True
387             for elem in obj:
388                 if first:
389                     first = False
390                 else:
391                     self.stream.write(',')
392                 self.write(elem)
393             self.stream.write(']'),
394         elif isinstance(obj, dict):
395             self.stream.write('{')
396             first = True
397             for key, value in obj.iteritems():
398                 if first:
399                     first = False
400                 else:
401                     self.stream.write(',')
402                 self.write(key)
403                 self.stream.write(':')
404                 self.write(value)
405             self.stream.write('}')
406         elif obj is True:
407             self.stream.write('true')
408         elif obj is False:
409             self.stream.write('false')
410         elif obj is None:
411             self.stream.write('null')
412         elif not isinstance(obj, basestring):
413             # if we are not baseobj, convert to it
414             try:
415                 obj = str(obj)
416             except Exception, exc:
417                 raise WriteException, 'Cannot write object (%s: %s)' % (exc.__class__, exc)
418             self.stream.write(obj)
419         else:
420             # convert to unicode first
421             if not isinstance(obj, unicode):
422                 try:
423                     obj = unicode(obj, self.input_encoding)
424                 except (UnicodeDecodeError, UnicodeTranslateError):
425                     obj = unicode(obj, 'utf-8', 'replace')
426             # do the mangling
427             obj = strmangle(obj)
428             # make the encoding
429             if self.output_encoding is not None:
430                 obj = obj.encode(self.output_encoding, 'jsonreplace')
431             self.stream.write('"')
432             self.stream.write(obj)
433             self.stream.write('"')
434
435     def getvalue(self):
436         return self.stream.getvalue()
Note: See TracBrowser for help on using the browser.