Changesets can be listed by changeset number.
The Git repository is here.
- Revision:
- 297
- Log:
Updated to AWStats 7.0.
- Author:
- rool
- Date:
- Fri Mar 18 13:33:29 +0000 2011
- Size:
- 57685 Bytes
- Properties:
- Property svn:executable is set to *
1 | // Copyright (C) 2006 Google Inc. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | |
16 | /** |
17 | * @fileoverview |
18 | * some functions for browser-side pretty printing of code contained in html. |
19 | * <p> |
20 | * |
21 | * For a fairly comprehensive set of languages see the |
22 | * <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a> |
23 | * file that came with this source. At a minimum, the lexer should work on a |
24 | * number of languages including C and friends, Java, Python, Bash, SQL, HTML, |
25 | * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk |
26 | * and a subset of Perl, but, because of commenting conventions, doesn't work on |
27 | * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class. |
28 | * <p> |
29 | * Usage: <ol> |
30 | * <li> include this source file in an html page via |
31 | * {@code <script type="text/javascript" src="/path/to/prettify.js"></script>} |
32 | * <li> define style rules. See the example page for examples. |
33 | * <li> mark the {@code <pre>} and {@code <code>} tags in your source with |
34 | * {@code class=prettyprint.} |
35 | * You can also use the (html deprecated) {@code <xmp>} tag, but the pretty |
36 | * printer needs to do more substantial DOM manipulations to support that, so |
37 | * some css styles may not be preserved. |
38 | * </ol> |
39 | * That's it. I wanted to keep the API as simple as possible, so there's no |
40 | * need to specify which language the code is in, but if you wish, you can add |
41 | * another class to the {@code <pre>} or {@code <code>} element to specify the |
42 | * language, as in {@code <pre class="prettyprint lang-java">}. Any class that |
43 | * starts with "lang-" followed by a file extension, specifies the file type. |
44 | * See the "lang-*.js" files in this directory for code that implements |
45 | * per-language file handlers. |
46 | * <p> |
47 | * Change log:<br> |
48 | * cbeust, 2006/08/22 |
49 | * <blockquote> |
50 | * Java annotations (start with "@") are now captured as literals ("lit") |
51 | * </blockquote> |
52 | * @requires console |
53 | * @overrides window |
54 | */ |
55 | |
56 | // JSLint declarations |
57 | /*global console, document, navigator, setTimeout, window */ |
58 | |
59 | /** |
60 | * Split {@code prettyPrint} into multiple timeouts so as not to interfere with |
61 | * UI events. |
62 | * If set to {@code false}, {@code prettyPrint()} is synchronous. |
63 | */ |
64 | window['PR_SHOULD_USE_CONTINUATION'] = true; |
65 | |
66 | /** the number of characters between tab columns */ |
67 | window['PR_TAB_WIDTH'] = 8; |
68 | |
69 | /** Walks the DOM returning a properly escaped version of innerHTML. |
70 | * @param {Node} node |
71 | * @param {Array.<string>} out output buffer that receives chunks of HTML. |
72 | */ |
73 | window['PR_normalizedHtml'] |
74 | |
75 | /** Contains functions for creating and registering new language handlers. |
76 | * @type {Object} |
77 | */ |
78 | = window['PR'] |
79 | |
80 | /** Pretty print a chunk of code. |
81 | * |
82 | * @param {string} sourceCodeHtml code as html |
83 | * @return {string} code as html, but prettier |
84 | */ |
85 | = window['prettyPrintOne'] |
86 | /** Find all the {@code <pre>} and {@code <code>} tags in the DOM with |
87 | * {@code class=prettyprint} and prettify them. |
88 | * @param {Function?} opt_whenDone if specified, called when the last entry |
89 | * has been finished. |
90 | */ |
91 | = window['prettyPrint'] = void 0; |
92 | |
93 | /** browser detection. @extern @returns false if not IE, otherwise the major version. */ |
94 | window['_pr_isIE6'] = function () { |
95 | var ieVersion = navigator && navigator.userAgent && |
96 | navigator.userAgent.match(/\bMSIE ([678])\./); |
97 | ieVersion = ieVersion ? +ieVersion[1] : false; |
98 | window['_pr_isIE6'] = function () { return ieVersion; }; |
99 | return ieVersion; |
100 | }; |
101 | |
102 | |
103 | (function () { |
104 | // Keyword lists for various languages. |
105 | var FLOW_CONTROL_KEYWORDS = |
106 | "break continue do else for if return while "; |
107 | var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " + |
108 | "double enum extern float goto int long register short signed sizeof " + |
109 | "static struct switch typedef union unsigned void volatile "; |
110 | var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " + |
111 | "new operator private protected public this throw true try typeof "; |
112 | var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " + |
113 | "concept concept_map const_cast constexpr decltype " + |
114 | "dynamic_cast explicit export friend inline late_check " + |
115 | "mutable namespace nullptr reinterpret_cast static_assert static_cast " + |
116 | "template typeid typename using virtual wchar_t where "; |
117 | var JAVA_KEYWORDS = COMMON_KEYWORDS + |
118 | "abstract boolean byte extends final finally implements import " + |
119 | "instanceof null native package strictfp super synchronized throws " + |
120 | "transient "; |
121 | var CSHARP_KEYWORDS = JAVA_KEYWORDS + |
122 | "as base by checked decimal delegate descending event " + |
123 | "fixed foreach from group implicit in interface internal into is lock " + |
124 | "object out override orderby params partial readonly ref sbyte sealed " + |
125 | "stackalloc string select uint ulong unchecked unsafe ushort var "; |
126 | var JSCRIPT_KEYWORDS = COMMON_KEYWORDS + |
127 | "debugger eval export function get null set undefined var with " + |
128 | "Infinity NaN "; |
129 | var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " + |
130 | "goto if import last local my next no our print package redo require " + |
131 | "sub undef unless until use wantarray while BEGIN END "; |
132 | var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " + |
133 | "elif except exec finally from global import in is lambda " + |
134 | "nonlocal not or pass print raise try with yield " + |
135 | "False True None "; |
136 | var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" + |
137 | " defined elsif end ensure false in module next nil not or redo rescue " + |
138 | "retry self super then true undef unless until when yield BEGIN END "; |
139 | var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " + |
140 | "function in local set then until "; |
141 | var ALL_KEYWORDS = ( |
142 | CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS + |
143 | PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS); |
144 | |
145 | // token style names. correspond to css classes |
146 | /** token style for a string literal */ |
147 | var PR_STRING = 'str'; |
148 | /** token style for a keyword */ |
149 | var PR_KEYWORD = 'kwd'; |
150 | /** token style for a comment */ |
151 | var PR_COMMENT = 'com'; |
152 | /** token style for a type */ |
153 | var PR_TYPE = 'typ'; |
154 | /** token style for a literal value. e.g. 1, null, true. */ |
155 | var PR_LITERAL = 'lit'; |
156 | /** token style for a punctuation string. */ |
157 | var PR_PUNCTUATION = 'pun'; |
158 | /** token style for a punctuation string. */ |
159 | var PR_PLAIN = 'pln'; |
160 | |
161 | /** token style for an sgml tag. */ |
162 | var PR_TAG = 'tag'; |
163 | /** token style for a markup declaration such as a DOCTYPE. */ |
164 | var PR_DECLARATION = 'dec'; |
165 | /** token style for embedded source. */ |
166 | var PR_SOURCE = 'src'; |
167 | /** token style for an sgml attribute name. */ |
168 | var PR_ATTRIB_NAME = 'atn'; |
169 | /** token style for an sgml attribute value. */ |
170 | var PR_ATTRIB_VALUE = 'atv'; |
171 | |
172 | /** |
173 | * A class that indicates a section of markup that is not code, e.g. to allow |
174 | * embedding of line numbers within code listings. |
175 | */ |
176 | var PR_NOCODE = 'nocode'; |
177 | |
178 | /** A set of tokens that can precede a regular expression literal in |
179 | * javascript. |
180 | * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full |
181 | * list, but I've removed ones that might be problematic when seen in |
182 | * languages that don't support regular expression literals. |
183 | * |
184 | * <p>Specifically, I've removed any keywords that can't precede a regexp |
185 | * literal in a syntactically legal javascript program, and I've removed the |
186 | * "in" keyword since it's not a keyword in many languages, and might be used |
187 | * as a count of inches. |
188 | * |
189 | * <p>The link a above does not accurately describe EcmaScript rules since |
190 | * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works |
191 | * very well in practice. |
192 | * |
193 | * @private |
194 | */ |
195 | var REGEXP_PRECEDER_PATTERN = function () { |
196 | var preceders = [ |
197 | "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", |
198 | "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=", |
199 | "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";", |
200 | "<", "<<", "<<=", "<=", "=", "==", "===", ">", |
201 | ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", |
202 | "^", "^=", "^^", "^^=", "{", "|", "|=", "||", |
203 | "||=", "~" /* handles =~ and !~ */, |
204 | "break", "case", "continue", "delete", |
205 | "do", "else", "finally", "instanceof", |
206 | "return", "throw", "try", "typeof" |
207 | ]; |
208 | var pattern = '(?:^^|[+-]'; |
209 | for (var i = 0; i < preceders.length; ++i) { |
210 | pattern += '|' + preceders[i].replace(/([^=<>:&a-z])/g, '\\$1'); |
211 | } |
212 | pattern += ')\\s*'; // matches at end, and matches empty string |
213 | return pattern; |
214 | // CAVEAT: this does not properly handle the case where a regular |
215 | // expression immediately follows another since a regular expression may |
216 | // have flags for case-sensitivity and the like. Having regexp tokens |
217 | // adjacent is not valid in any language I'm aware of, so I'm punting. |
218 | // TODO: maybe style special characters inside a regexp as punctuation. |
219 | }(); |
220 | |
221 | // Define regexps here so that the interpreter doesn't have to create an |
222 | // object each time the function containing them is called. |
223 | // The language spec requires a new object created even if you don't access |
224 | // the $1 members. |
225 | var pr_amp = /&/g; |
226 | var pr_lt = /</g; |
227 | var pr_gt = />/g; |
228 | var pr_quot = /\"/g; |
229 | /** like textToHtml but escapes double quotes to be attribute safe. */ |
230 | function attribToHtml(str) { |
231 | return str.replace(pr_amp, '&') |
232 | .replace(pr_lt, '<') |
233 | .replace(pr_gt, '>') |
234 | .replace(pr_quot, '"'); |
235 | } |
236 | |
237 | /** escapest html special characters to html. */ |
238 | function textToHtml(str) { |
239 | return str.replace(pr_amp, '&') |
240 | .replace(pr_lt, '<') |
241 | .replace(pr_gt, '>'); |
242 | } |
243 | |
244 | |
245 | var pr_ltEnt = /</g; |
246 | var pr_gtEnt = />/g; |
247 | var pr_aposEnt = /'/g; |
248 | var pr_quotEnt = /"/g; |
249 | var pr_ampEnt = /&/g; |
250 | var pr_nbspEnt = / /g; |
251 | /** unescapes html to plain text. */ |
252 | function htmlToText(html) { |
253 | var pos = html.indexOf('&'); |
254 | if (pos < 0) { return html; } |
255 | // Handle numeric entities specially. We can't use functional substitution |
256 | // since that doesn't work in older versions of Safari. |
257 | // These should be rare since most browsers convert them to normal chars. |
258 | for (--pos; (pos = html.indexOf('&#', pos + 1)) >= 0;) { |
259 | var end = html.indexOf(';', pos); |
260 | if (end >= 0) { |
261 | var num = html.substring(pos + 3, end); |
262 | var radix = 10; |
263 | if (num && num.charAt(0) === 'x') { |
264 | num = num.substring(1); |
265 | radix = 16; |
266 | } |
267 | var codePoint = parseInt(num, radix); |
268 | if (!isNaN(codePoint)) { |
269 | html = (html.substring(0, pos) + String.fromCharCode(codePoint) + |
270 | html.substring(end + 1)); |
271 | } |
272 | } |
273 | } |
274 | |
275 | return html.replace(pr_ltEnt, '<') |
276 | .replace(pr_gtEnt, '>') |
277 | .replace(pr_aposEnt, "'") |
278 | .replace(pr_quotEnt, '"') |
279 | .replace(pr_nbspEnt, ' ') |
280 | .replace(pr_ampEnt, '&'); |
281 | } |
282 | |
283 | /** is the given node's innerHTML normally unescaped? */ |
284 | function isRawContent(node) { |
285 | return 'XMP' === node.tagName; |
286 | } |
287 | |
288 | var newlineRe = /[\r\n]/g; |
289 | /** |
290 | * Are newlines and adjacent spaces significant in the given node's innerHTML? |
291 | */ |
292 | function isPreformatted(node, content) { |
293 | // PRE means preformatted, and is a very common case, so don't create |
294 | // unnecessary computed style objects. |
295 | if ('PRE' === node.tagName) { return true; } |
296 | if (!newlineRe.test(content)) { return true; } // Don't care |
297 | var whitespace = ''; |
298 | // For disconnected nodes, IE has no currentStyle. |
299 | if (node.currentStyle) { |
300 | whitespace = node.currentStyle.whiteSpace; |
301 | } else if (window.getComputedStyle) { |
302 | // Firefox makes a best guess if node is disconnected whereas Safari |
303 | // returns the empty string. |
304 | whitespace = window.getComputedStyle(node, null).whiteSpace; |
305 | } |
306 | return !whitespace || whitespace === 'pre'; |
307 | } |
308 | |
309 | function normalizedHtml(node, out) { |
310 | switch (node.nodeType) { |
311 | case 1: // an element |
312 | var name = node.tagName.toLowerCase(); |
313 | out.push('<', name); |
314 | for (var i = 0; i < node.attributes.length; ++i) { |
315 | var attr = node.attributes[i]; |
316 | if (!attr.specified) { continue; } |
317 | out.push(' '); |
318 | normalizedHtml(attr, out); |
319 | } |
320 | out.push('>'); |
321 | for (var child = node.firstChild; child; child = child.nextSibling) { |
322 | normalizedHtml(child, out); |
323 | } |
324 | if (node.firstChild || !/^(?:br|link|img)$/.test(name)) { |
325 | out.push('<\/', name, '>'); |
326 | } |
327 | break; |
328 | case 2: // an attribute |
329 | out.push(node.name.toLowerCase(), '="', attribToHtml(node.value), '"'); |
330 | break; |
331 | case 3: case 4: // text |
332 | out.push(textToHtml(node.nodeValue)); |
333 | break; |
334 | } |
335 | } |
336 | |
337 | /** |
338 | * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally |
339 | * matches the union o the sets o strings matched d by the input RegExp. |
340 | * Since it matches globally, if the input strings have a start-of-input |
341 | * anchor (/^.../), it is ignored for the purposes of unioning. |
342 | * @param {Array.<RegExp>} regexs non multiline, non-global regexs. |
343 | * @return {RegExp} a global regex. |
344 | */ |
345 | function combinePrefixPatterns(regexs) { |
346 | var capturedGroupIndex = 0; |
347 | |
348 | var needToFoldCase = false; |
349 | var ignoreCase = false; |
350 | for (var i = 0, n = regexs.length; i < n; ++i) { |
351 | var regex = regexs[i]; |
352 | if (regex.ignoreCase) { |
353 | ignoreCase = true; |
354 | } else if (/[a-z]/i.test(regex.source.replace( |
355 | /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { |
356 | needToFoldCase = true; |
357 | ignoreCase = false; |
358 | break; |
359 | } |
360 | } |
361 | |
362 | function decodeEscape(charsetPart) { |
363 | if (charsetPart.charAt(0) !== '\\') { return charsetPart.charCodeAt(0); } |
364 | switch (charsetPart.charAt(1)) { |
365 | case 'b': return 8; |
366 | case 't': return 9; |
367 | case 'n': return 0xa; |
368 | case 'v': return 0xb; |
369 | case 'f': return 0xc; |
370 | case 'r': return 0xd; |
371 | case 'u': case 'x': |
372 | return parseInt(charsetPart.substring(2), 16) |
373 | || charsetPart.charCodeAt(1); |
374 | case '0': case '1': case '2': case '3': case '4': |
375 | case '5': case '6': case '7': |
376 | return parseInt(charsetPart.substring(1), 8); |
377 | default: return charsetPart.charCodeAt(1); |
378 | } |
379 | } |
380 | |
381 | function encodeEscape(charCode) { |
382 | if (charCode < 0x20) { |
383 | return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); |
384 | } |
385 | var ch = String.fromCharCode(charCode); |
386 | if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') { |
387 | ch = '\\' + ch; |
388 | } |
389 | return ch; |
390 | } |
391 | |
392 | function caseFoldCharset(charSet) { |
393 | var charsetParts = charSet.substring(1, charSet.length - 1).match( |
394 | new RegExp( |
395 | '\\\\u[0-9A-Fa-f]{4}' |
396 | + '|\\\\x[0-9A-Fa-f]{2}' |
397 | + '|\\\\[0-3][0-7]{0,2}' |
398 | + '|\\\\[0-7]{1,2}' |
399 | + '|\\\\[\\s\\S]' |
400 | + '|-' |
401 | + '|[^-\\\\]', |
402 | 'g')); |
403 | var groups = []; |
404 | var ranges = []; |
405 | var inverse = charsetParts[0] === '^'; |
406 | for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { |
407 | var p = charsetParts[i]; |
408 | switch (p) { |
409 | case '\\B': case '\\b': |
410 | case '\\D': case '\\d': |
411 | case '\\S': case '\\s': |
412 | case '\\W': case '\\w': |
413 | groups.push(p); |
414 | continue; |
415 | } |
416 | var start = decodeEscape(p); |
417 | var end; |
418 | if (i + 2 < n && '-' === charsetParts[i + 1]) { |
419 | end = decodeEscape(charsetParts[i + 2]); |
420 | i += 2; |
421 | } else { |
422 | end = start; |
423 | } |
424 | ranges.push([start, end]); |
425 | // If the range might intersect letters, then expand it. |
426 | if (!(end < 65 || start > 122)) { |
427 | if (!(end < 65 || start > 90)) { |
428 | ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); |
429 | } |
430 | if (!(end < 97 || start > 122)) { |
431 | ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]); |
432 | } |
433 | } |
434 | } |
435 | |
436 | // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] |
437 | // -> [[1, 12], [14, 14], [16, 17]] |
438 | ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); |
439 | var consolidatedRanges = []; |
440 | var lastRange = [NaN, NaN]; |
441 | for (var i = 0; i < ranges.length; ++i) { |
442 | var range = ranges[i]; |
443 | if (range[0] <= lastRange[1] + 1) { |
444 | lastRange[1] = Math.max(lastRange[1], range[1]); |
445 | } else { |
446 | consolidatedRanges.push(lastRange = range); |
447 | } |
448 | } |
449 | |
450 | var out = ['[']; |
451 | if (inverse) { out.push('^'); } |
452 | out.push.apply(out, groups); |
453 | for (var i = 0; i < consolidatedRanges.length; ++i) { |
454 | var range = consolidatedRanges[i]; |
455 | out.push(encodeEscape(range[0])); |
456 | if (range[1] > range[0]) { |
457 | if (range[1] + 1 > range[0]) { out.push('-'); } |
458 | out.push(encodeEscape(range[1])); |
459 | } |
460 | } |
461 | out.push(']'); |
462 | return out.join(''); |
463 | } |
464 | |
465 | function allowAnywhereFoldCaseAndRenumberGroups(regex) { |
466 | // Split into character sets, escape sequences, punctuation strings |
467 | // like ('(', '(?:', ')', '^'), and runs of characters that do not |
468 | // include any of the above. |
469 | var parts = regex.source.match( |
470 | new RegExp( |
471 | '(?:' |
472 | + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set |
473 | + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape |
474 | + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape |
475 | + '|\\\\[0-9]+' // a back-reference or octal escape |
476 | + '|\\\\[^ux0-9]' // other escape sequence |
477 | + '|\\(\\?[:!=]' // start of a non-capturing group |
478 | + '|[\\(\\)\\^]' // start/emd of a group, or line start |
479 | + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters |
480 | + ')', |
481 | 'g')); |
482 | var n = parts.length; |
483 | |
484 | // Maps captured group numbers to the number they will occupy in |
485 | // the output or to -1 if that has not been determined, or to |
486 | // undefined if they need not be capturing in the output. |
487 | var capturedGroups = []; |
488 | |
489 | // Walk over and identify back references to build the capturedGroups |
490 | // mapping. |
491 | for (var i = 0, groupIndex = 0; i < n; ++i) { |
492 | var p = parts[i]; |
493 | if (p === '(') { |
494 | // groups are 1-indexed, so max group index is count of '(' |
495 | ++groupIndex; |
496 | } else if ('\\' === p.charAt(0)) { |
497 | var decimalValue = +p.substring(1); |
498 | if (decimalValue && decimalValue <= groupIndex) { |
499 | capturedGroups[decimalValue] = -1; |
500 | } |
501 | } |
502 | } |
503 | |
504 | // Renumber groups and reduce capturing groups to non-capturing groups |
505 | // where possible. |
506 | for (var i = 1; i < capturedGroups.length; ++i) { |
507 | if (-1 === capturedGroups[i]) { |
508 | capturedGroups[i] = ++capturedGroupIndex; |
509 | } |
510 | } |
511 | for (var i = 0, groupIndex = 0; i < n; ++i) { |
512 | var p = parts[i]; |
513 | if (p === '(') { |
514 | ++groupIndex; |
515 | if (capturedGroups[groupIndex] === undefined) { |
516 | parts[i] = '(?:'; |
517 | } |
518 | } else if ('\\' === p.charAt(0)) { |
519 | var decimalValue = +p.substring(1); |
520 | if (decimalValue && decimalValue <= groupIndex) { |
521 | parts[i] = '\\' + capturedGroups[groupIndex]; |
522 | } |
523 | } |
524 | } |
525 | |
526 | // Remove any prefix anchors so that the output will match anywhere. |
527 | // ^^ really does mean an anchored match though. |
528 | for (var i = 0, groupIndex = 0; i < n; ++i) { |
529 | if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } |
530 | } |
531 | |
532 | // Expand letters to groupts to handle mixing of case-sensitive and |
533 | // case-insensitive patterns if necessary. |
534 | if (regex.ignoreCase && needToFoldCase) { |
535 | for (var i = 0; i < n; ++i) { |
536 | var p = parts[i]; |
537 | var ch0 = p.charAt(0); |
538 | if (p.length >= 2 && ch0 === '[') { |
539 | parts[i] = caseFoldCharset(p); |
540 | } else if (ch0 !== '\\') { |
541 | // TODO: handle letters in numeric escapes. |
542 | parts[i] = p.replace( |
543 | /[a-zA-Z]/g, |
544 | function (ch) { |
545 | var cc = ch.charCodeAt(0); |
546 | return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; |
547 | }); |
548 | } |
549 | } |
550 | } |
551 | |
552 | return parts.join(''); |
553 | } |
554 | |
555 | var rewritten = []; |
556 | for (var i = 0, n = regexs.length; i < n; ++i) { |
557 | var regex = regexs[i]; |
558 | if (regex.global || regex.multiline) { throw new Error('' + regex); } |
559 | rewritten.push( |
560 | '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); |
561 | } |
562 | |
563 | return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); |
564 | } |
565 | |
566 | var PR_innerHtmlWorks = null; |
567 | function getInnerHtml(node) { |
568 | // inner html is hopelessly broken in Safari 2.0.4 when the content is |
569 | // an html description of well formed XML and the containing tag is a PRE |
570 | // tag, so we detect that case and emulate innerHTML. |
571 | if (null === PR_innerHtmlWorks) { |
572 | var testNode = document.createElement('PRE'); |
573 | testNode.appendChild( |
574 | document.createTextNode('<!DOCTYPE foo PUBLIC "foo bar">\n<foo />')); |
575 | PR_innerHtmlWorks = !/</.test(testNode.innerHTML); |
576 | } |
577 | |
578 | if (PR_innerHtmlWorks) { |
579 | var content = node.innerHTML; |
580 | // XMP tags contain unescaped entities so require special handling. |
581 | if (isRawContent(node)) { |
582 | content = textToHtml(content); |
583 | } else if (!isPreformatted(node, content)) { |
584 | content = content.replace(/(<br\s*\/?>)[\r\n]+/g, '$1') |
585 | .replace(/(?:[\r\n]+[ \t]*)+/g, ' '); |
586 | } |
587 | return content; |
588 | } |
589 | |
590 | var out = []; |
591 | for (var child = node.firstChild; child; child = child.nextSibling) { |
592 | normalizedHtml(child, out); |
593 | } |
594 | return out.join(''); |
595 | } |
596 | |
597 | /** returns a function that expand tabs to spaces. This function can be fed |
598 | * successive chunks of text, and will maintain its own internal state to |
599 | * keep track of how tabs are expanded. |
600 | * @return {function (string) : string} a function that takes |
601 | * plain text and return the text with tabs expanded. |
602 | * @private |
603 | */ |
604 | function makeTabExpander(tabWidth) { |
605 | var SPACES = ' '; |
606 | var charInLine = 0; |
607 | |
608 | return function (plainText) { |
609 | // walk over each character looking for tabs and newlines. |
610 | // On tabs, expand them. On newlines, reset charInLine. |
611 | // Otherwise increment charInLine |
612 | var out = null; |
613 | var pos = 0; |
614 | for (var i = 0, n = plainText.length; i < n; ++i) { |
615 | var ch = plainText.charAt(i); |
616 | |
617 | switch (ch) { |
618 | case '\t': |
619 | if (!out) { out = []; } |
620 | out.push(plainText.substring(pos, i)); |
621 | // calculate how much space we need in front of this part |
622 | // nSpaces is the amount of padding -- the number of spaces needed |
623 | // to move us to the next column, where columns occur at factors of |
624 | // tabWidth. |
625 | var nSpaces = tabWidth - (charInLine % tabWidth); |
626 | charInLine += nSpaces; |
627 | for (; nSpaces >= 0; nSpaces -= SPACES.length) { |
628 | out.push(SPACES.substring(0, nSpaces)); |
629 | } |
630 | pos = i + 1; |
631 | break; |
632 | case '\n': |
633 | charInLine = 0; |
634 | break; |
635 | default: |
636 | ++charInLine; |
637 | } |
638 | } |
639 | if (!out) { return plainText; } |
640 | out.push(plainText.substring(pos)); |
641 | return out.join(''); |
642 | }; |
643 | } |
644 | |
645 | var pr_chunkPattern = new RegExp( |
646 | '[^<]+' // A run of characters other than '<' |
647 | + '|<\!--[\\s\\S]*?--\>' // an HTML comment |
648 | + '|<!\\[CDATA\\[[\\s\\S]*?\\]\\]>' // a CDATA section |
649 | // a probable tag that should not be highlighted |
650 | + '|<\/?[a-zA-Z](?:[^>\"\']|\'[^\']*\'|\"[^\"]*\")*>' |
651 | + '|<', // A '<' that does not begin a larger chunk |
652 | 'g'); |
653 | var pr_commentPrefix = /^<\!--/; |
654 | var pr_cdataPrefix = /^<!\[CDATA\[/; |
655 | var pr_brPrefix = /^<br\b/i; |
656 | var pr_tagNameRe = /^<(\/?)([a-zA-Z][a-zA-Z0-9]*)/; |
657 | |
658 | /** split markup into chunks of html tags (style null) and |
659 | * plain text (style {@link #PR_PLAIN}), converting tags which are |
660 | * significant for tokenization (<br>) into their textual equivalent. |
661 | * |
662 | * @param {string} s html where whitespace is considered significant. |
663 | * @return {Object} source code and extracted tags. |
664 | * @private |
665 | */ |
666 | function extractTags(s) { |
667 | // since the pattern has the 'g' modifier and defines no capturing groups, |
668 | // this will return a list of all chunks which we then classify and wrap as |
669 | // PR_Tokens |
670 | var matches = s.match(pr_chunkPattern); |
671 | var sourceBuf = []; |
672 | var sourceBufLen = 0; |
673 | var extractedTags = []; |
674 | if (matches) { |
675 | for (var i = 0, n = matches.length; i < n; ++i) { |
676 | var match = matches[i]; |
677 | if (match.length > 1 && match.charAt(0) === '<') { |
678 | if (pr_commentPrefix.test(match)) { continue; } |
679 | if (pr_cdataPrefix.test(match)) { |
680 | // strip CDATA prefix and suffix. Don't unescape since it's CDATA |
681 | sourceBuf.push(match.substring(9, match.length - 3)); |
682 | sourceBufLen += match.length - 12; |
683 | } else if (pr_brPrefix.test(match)) { |
684 | // <br> tags are lexically significant so convert them to text. |
685 | // This is undone later. |
686 | sourceBuf.push('\n'); |
687 | ++sourceBufLen; |
688 | } else { |
689 | if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) { |
690 | // A <span class="nocode"> will start a section that should be |
691 | // ignored. Continue walking the list until we see a matching end |
692 | // tag. |
693 | var name = match.match(pr_tagNameRe)[2]; |
694 | var depth = 1; |
695 | var j; |
696 | end_tag_loop: |
697 | for (j = i + 1; j < n; ++j) { |
698 | var name2 = matches[j].match(pr_tagNameRe); |
699 | if (name2 && name2[2] === name) { |
700 | if (name2[1] === '/') { |
701 | if (--depth === 0) { break end_tag_loop; } |
702 | } else { |
703 | ++depth; |
704 | } |
705 | } |
706 | } |
707 | if (j < n) { |
708 | extractedTags.push( |
709 | sourceBufLen, matches.slice(i, j + 1).join('')); |
710 | i = j; |
711 | } else { // Ignore unclosed sections. |
712 | extractedTags.push(sourceBufLen, match); |
713 | } |
714 | } else { |
715 | extractedTags.push(sourceBufLen, match); |
716 | } |
717 | } |
718 | } else { |
719 | var literalText = htmlToText(match); |
720 | sourceBuf.push(literalText); |
721 | sourceBufLen += literalText.length; |
722 | } |
723 | } |
724 | } |
725 | return { source: sourceBuf.join(''), tags: extractedTags }; |
726 | } |
727 | |
728 | /** True if the given tag contains a class attribute with the nocode class. */ |
729 | function isNoCodeTag(tag) { |
730 | return !!tag |
731 | // First canonicalize the representation of attributes |
732 | .replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g, |
733 | ' $1="$2$3$4"') |
734 | // Then look for the attribute we want. |
735 | .match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/); |
736 | } |
737 | |
738 | /** |
739 | * Apply the given language handler to sourceCode and add the resulting |
740 | * decorations to out. |
741 | * @param {number} basePos the index of sourceCode within the chunk of source |
742 | * whose decorations are already present on out. |
743 | */ |
744 | function appendDecorations(basePos, sourceCode, langHandler, out) { |
745 | if (!sourceCode) { return; } |
746 | var job = { |
747 | source: sourceCode, |
748 | basePos: basePos |
749 | }; |
750 | langHandler(job); |
751 | out.push.apply(out, job.decorations); |
752 | } |
753 | |
754 | /** Given triples of [style, pattern, context] returns a lexing function, |
755 | * The lexing function interprets the patterns to find token boundaries and |
756 | * returns a decoration list of the form |
757 | * [index_0, style_0, index_1, style_1, ..., index_n, style_n] |
758 | * where index_n is an index into the sourceCode, and style_n is a style |
759 | * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to |
760 | * all characters in sourceCode[index_n-1:index_n]. |
761 | * |
762 | * The stylePatterns is a list whose elements have the form |
763 | * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. |
764 | * |
765 | * Style is a style constant like PR_PLAIN, or can be a string of the |
766 | * form 'lang-FOO', where FOO is a language extension describing the |
767 | * language of the portion of the token in $1 after pattern executes. |
768 | * E.g., if style is 'lang-lisp', and group 1 contains the text |
769 | * '(hello (world))', then that portion of the token will be passed to the |
770 | * registered lisp handler for formatting. |
771 | * The text before and after group 1 will be restyled using this decorator |
772 | * so decorators should take care that this doesn't result in infinite |
773 | * recursion. For example, the HTML lexer rule for SCRIPT elements looks |
774 | * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match |
775 | * '<script>foo()<\/script>', which would cause the current decorator to |
776 | * be called with '<script>' which would not match the same rule since |
777 | * group 1 must not be empty, so it would be instead styled as PR_TAG by |
778 | * the generic tag rule. The handler registered for the 'js' extension would |
779 | * then be called with 'foo()', and finally, the current decorator would |
780 | * be called with '<\/script>' which would not match the original rule and |
781 | * so the generic tag rule would identify it as a tag. |
782 | * |
783 | * Pattern must only match prefixes, and if it matches a prefix, then that |
784 | * match is considered a token with the same style. |
785 | * |
786 | * Context is applied to the last non-whitespace, non-comment token |
787 | * recognized. |
788 | * |
789 | * Shortcut is an optional string of characters, any of which, if the first |
790 | * character, gurantee that this pattern and only this pattern matches. |
791 | * |
792 | * @param {Array} shortcutStylePatterns patterns that always start with |
793 | * a known character. Must have a shortcut string. |
794 | * @param {Array} fallthroughStylePatterns patterns that will be tried in |
795 | * order if the shortcut ones fail. May have shortcuts. |
796 | * |
797 | * @return {function (Object)} a |
798 | * function that takes source code and returns a list of decorations. |
799 | */ |
800 | function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) { |
801 | var shortcuts = {}; |
802 | var tokenizer; |
803 | (function () { |
804 | var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns); |
805 | var allRegexs = []; |
806 | var regexKeys = {}; |
807 | for (var i = 0, n = allPatterns.length; i < n; ++i) { |
808 | var patternParts = allPatterns[i]; |
809 | var shortcutChars = patternParts[3]; |
810 | if (shortcutChars) { |
811 | for (var c = shortcutChars.length; --c >= 0;) { |
812 | shortcuts[shortcutChars.charAt(c)] = patternParts; |
813 | } |
814 | } |
815 | var regex = patternParts[1]; |
816 | var k = '' + regex; |
817 | if (!regexKeys.hasOwnProperty(k)) { |
818 | allRegexs.push(regex); |
819 | regexKeys[k] = null; |
820 | } |
821 | } |
822 | allRegexs.push(/[\0-\uffff]/); |
823 | tokenizer = combinePrefixPatterns(allRegexs); |
824 | })(); |
825 | |
826 | var nPatterns = fallthroughStylePatterns.length; |
827 | var notWs = /\S/; |
828 | |
829 | /** |
830 | * Lexes job.source and produces an output array job.decorations of style |
831 | * classes preceded by the position at which they start in job.source in |
832 | * order. |
833 | * |
834 | * @param {Object} job an object like {@code |
835 | * source: {string} sourceText plain text, |
836 | * basePos: {int} position of job.source in the larger chunk of |
837 | * sourceCode. |
838 | * } |
839 | */ |
840 | var decorate = function (job) { |
841 | var sourceCode = job.source, basePos = job.basePos; |
842 | /** Even entries are positions in source in ascending order. Odd enties |
843 | * are style markers (e.g., PR_COMMENT) that run from that position until |
844 | * the end. |
845 | * @type {Array.<number|string>} |
846 | */ |
847 | var decorations = [basePos, PR_PLAIN]; |
848 | var pos = 0; // index into sourceCode |
849 | var tokens = sourceCode.match(tokenizer) || []; |
850 | var styleCache = {}; |
851 | |
852 | for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) { |
853 | var token = tokens[ti]; |
854 | var style = styleCache[token]; |
855 | var match = void 0; |
856 | |
857 | var isEmbedded; |
858 | if (typeof style === 'string') { |
859 | isEmbedded = false; |
860 | } else { |
861 | var patternParts = shortcuts[token.charAt(0)]; |
862 | if (patternParts) { |
863 | match = token.match(patternParts[1]); |
864 | style = patternParts[0]; |
865 | } else { |
866 | for (var i = 0; i < nPatterns; ++i) { |
867 | patternParts = fallthroughStylePatterns[i]; |
868 | match = token.match(patternParts[1]); |
869 | if (match) { |
870 | style = patternParts[0]; |
871 | break; |
872 | } |
873 | } |
874 | |
875 | if (!match) { // make sure that we make progress |
876 | style = PR_PLAIN; |
877 | } |
878 | } |
879 | |
880 | isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5); |
881 | if (isEmbedded && !(match && typeof match[1] === 'string')) { |
882 | isEmbedded = false; |
883 | style = PR_SOURCE; |
884 | } |
885 | |
886 | if (!isEmbedded) { styleCache[token] = style; } |
887 | } |
888 | |
889 | var tokenStart = pos; |
890 | pos += token.length; |
891 | |
892 | if (!isEmbedded) { |
893 | decorations.push(basePos + tokenStart, style); |
894 | } else { // Treat group 1 as an embedded block of source code. |
895 | var embeddedSource = match[1]; |
896 | var embeddedSourceStart = token.indexOf(embeddedSource); |
897 | var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length; |
898 | if (match[2]) { |
899 | // If embeddedSource can be blank, then it would match at the |
900 | // beginning which would cause us to infinitely recurse on the |
901 | // entire token, so we catch the right context in match[2]. |
902 | embeddedSourceEnd = token.length - match[2].length; |
903 | embeddedSourceStart = embeddedSourceEnd - embeddedSource.length; |
904 | } |
905 | var lang = style.substring(5); |
906 | // Decorate the left of the embedded source |
907 | appendDecorations( |
908 | basePos + tokenStart, |
909 | token.substring(0, embeddedSourceStart), |
910 | decorate, decorations); |
911 | // Decorate the embedded source |
912 | appendDecorations( |
913 | basePos + tokenStart + embeddedSourceStart, |
914 | embeddedSource, |
915 | langHandlerForExtension(lang, embeddedSource), |
916 | decorations); |
917 | // Decorate the right of the embedded section |
918 | appendDecorations( |
919 | basePos + tokenStart + embeddedSourceEnd, |
920 | token.substring(embeddedSourceEnd), |
921 | decorate, decorations); |
922 | } |
923 | } |
924 | job.decorations = decorations; |
925 | }; |
926 | return decorate; |
927 | } |
928 | |
929 | /** returns a function that produces a list of decorations from source text. |
930 | * |
931 | * This code treats ", ', and ` as string delimiters, and \ as a string |
932 | * escape. It does not recognize perl's qq() style strings. |
933 | * It has no special handling for double delimiter escapes as in basic, or |
934 | * the tripled delimiters used in python, but should work on those regardless |
935 | * although in those cases a single string literal may be broken up into |
936 | * multiple adjacent string literals. |
937 | * |
938 | * It recognizes C, C++, and shell style comments. |
939 | * |
940 | * @param {Object} options a set of optional parameters. |
941 | * @return {function (Object)} a function that examines the source code |
942 | * in the input job and builds the decoration list. |
943 | */ |
944 | function sourceDecorator(options) { |
945 | var shortcutStylePatterns = [], fallthroughStylePatterns = []; |
946 | if (options['tripleQuotedStrings']) { |
947 | // '''multi-line-string''', 'single-line-string', and double-quoted |
948 | shortcutStylePatterns.push( |
949 | [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, |
950 | null, '\'"']); |
951 | } else if (options['multiLineStrings']) { |
952 | // 'multi-line-string', "multi-line-string" |
953 | shortcutStylePatterns.push( |
954 | [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, |
955 | null, '\'"`']); |
956 | } else { |
957 | // 'single-line-string', "single-line-string" |
958 | shortcutStylePatterns.push( |
959 | [PR_STRING, |
960 | /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/, |
961 | null, '"\'']); |
962 | } |
963 | if (options['verbatimStrings']) { |
964 | // verbatim-string-literal production from the C# grammar. See issue 93. |
965 | fallthroughStylePatterns.push( |
966 | [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]); |
967 | } |
968 | if (options['hashComments']) { |
969 | if (options['cStyleComments']) { |
970 | // Stop C preprocessor declarations at an unclosed open comment |
971 | shortcutStylePatterns.push( |
972 | [PR_COMMENT, /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/, |
973 | null, '#']); |
974 | fallthroughStylePatterns.push( |
975 | [PR_STRING, |
976 | /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/, |
977 | null]); |
978 | } else { |
979 | shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']); |
980 | } |
981 | } |
982 | if (options['cStyleComments']) { |
983 | fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]); |
984 | fallthroughStylePatterns.push( |
985 | [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]); |
986 | } |
987 | if (options['regexLiterals']) { |
988 | var REGEX_LITERAL = ( |
989 | // A regular expression literal starts with a slash that is |
990 | // not followed by * or / so that it is not confused with |
991 | // comments. |
992 | '/(?=[^/*])' |
993 | // and then contains any number of raw characters, |
994 | + '(?:[^/\\x5B\\x5C]' |
995 | // escape sequences (\x5C), |
996 | + '|\\x5C[\\s\\S]' |
997 | // or non-nesting character sets (\x5B\x5D); |
998 | + '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+' |
999 | // finally closed by a /. |
1000 | + '/'); |
1001 | fallthroughStylePatterns.push( |
1002 | ['lang-regex', |
1003 | new RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')') |
1004 | ]); |
1005 | } |
1006 | |
1007 | var keywords = options['keywords'].replace(/^\s+|\s+$/g, ''); |
1008 | if (keywords.length) { |
1009 | fallthroughStylePatterns.push( |
1010 | [PR_KEYWORD, |
1011 | new RegExp('^(?:' + keywords.replace(/\s+/g, '|') + ')\\b'), null]); |
1012 | } |
1013 | |
1014 | shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']); |
1015 | fallthroughStylePatterns.push( |
1016 | // TODO(mikesamuel): recognize non-latin letters and numerals in idents |
1017 | [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null], |
1018 | [PR_TYPE, /^@?[A-Z]+[a-z][A-Za-z_$@0-9]*/, null], |
1019 | [PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null], |
1020 | [PR_LITERAL, |
1021 | new RegExp( |
1022 | '^(?:' |
1023 | // A hex number |
1024 | + '0x[a-f0-9]+' |
1025 | // or an octal or decimal number, |
1026 | + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)' |
1027 | // possibly in scientific notation |
1028 | + '(?:e[+\\-]?\\d+)?' |
1029 | + ')' |
1030 | // with an optional modifier like UL for unsigned long |
1031 | + '[a-z]*', 'i'), |
1032 | null, '0123456789'], |
1033 | [PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#]*/, null]); |
1034 | |
1035 | return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns); |
1036 | } |
1037 | |
1038 | var decorateSource = sourceDecorator({ |
1039 | 'keywords': ALL_KEYWORDS, |
1040 | 'hashComments': true, |
1041 | 'cStyleComments': true, |
1042 | 'multiLineStrings': true, |
1043 | 'regexLiterals': true |
1044 | }); |
1045 | |
1046 | /** Breaks {@code job.source} around style boundaries in |
1047 | * {@code job.decorations} while re-interleaving {@code job.extractedTags}, |
1048 | * and leaves the result in {@code job.prettyPrintedHtml}. |
1049 | * @param {Object} job like { |
1050 | * source: {string} source as plain text, |
1051 | * extractedTags: {Array.<number|string>} extractedTags chunks of raw |
1052 | * html preceded by their position in {@code job.source} |
1053 | * in order |
1054 | * decorations: {Array.<number|string} an array of style classes preceded |
1055 | * by the position at which they start in job.source in order |
1056 | * } |
1057 | * @private |
1058 | */ |
1059 | function recombineTagsAndDecorations(job) { |
1060 | var sourceText = job.source; |
1061 | var extractedTags = job.extractedTags; |
1062 | var decorations = job.decorations; |
1063 | |
1064 | var html = []; |
1065 | // index past the last char in sourceText written to html |
1066 | var outputIdx = 0; |
1067 | |
1068 | var openDecoration = null; |
1069 | var currentDecoration = null; |
1070 | var tagPos = 0; // index into extractedTags |
1071 | var decPos = 0; // index into decorations |
1072 | var tabExpander = makeTabExpander(window['PR_TAB_WIDTH']); |
1073 | |
1074 | var adjacentSpaceRe = /([\r\n ]) /g; |
1075 | var startOrSpaceRe = /(^| ) /gm; |
1076 | var newlineRe = /\r\n?|\n/g; |
1077 | var trailingSpaceRe = /[ \r\n]$/; |
1078 | var lastWasSpace = true; // the last text chunk emitted ended with a space. |
1079 | |
1080 | // A helper function that is responsible for opening sections of decoration |
1081 | // and outputing properly escaped chunks of source |
1082 | function emitTextUpTo(sourceIdx) { |
1083 | if (sourceIdx > outputIdx) { |
1084 | if (openDecoration && openDecoration !== currentDecoration) { |
1085 | // Close the current decoration |
1086 | html.push('</span>'); |
1087 | openDecoration = null; |
1088 | } |
1089 | if (!openDecoration && currentDecoration) { |
1090 | openDecoration = currentDecoration; |
1091 | html.push('<span class="', openDecoration, '">'); |
1092 | } |
1093 | // This interacts badly with some wikis which introduces paragraph tags |
1094 | // into pre blocks for some strange reason. |
1095 | // It's necessary for IE though which seems to lose the preformattedness |
1096 | // of <pre> tags when their innerHTML is assigned. |
1097 | // http://stud3.tuwien.ac.at/~e0226430/innerHtmlQuirk.html |
1098 | // and it serves to undo the conversion of <br>s to newlines done in |
1099 | // chunkify. |
1100 | var htmlChunk = textToHtml( |
1101 | tabExpander(sourceText.substring(outputIdx, sourceIdx))) |
1102 | .replace(lastWasSpace |
1103 | ? startOrSpaceRe |
1104 | : adjacentSpaceRe, '$1 '); |
1105 | // Keep track of whether we need to escape space at the beginning of the |
1106 | // next chunk. |
1107 | lastWasSpace = trailingSpaceRe.test(htmlChunk); |
1108 | // IE collapses multiple adjacient <br>s into 1 line break. |
1109 | // Prefix every <br> with ' ' can prevent such IE's behavior. |
1110 | var lineBreakHtml = window['_pr_isIE6']() ? ' <br />' : '<br />'; |
1111 | html.push(htmlChunk.replace(newlineRe, lineBreakHtml)); |
1112 | outputIdx = sourceIdx; |
1113 | } |
1114 | } |
1115 | |
1116 | while (true) { |
1117 | // Determine if we're going to consume a tag this time around. Otherwise |
1118 | // we consume a decoration or exit. |
1119 | var outputTag; |
1120 | if (tagPos < extractedTags.length) { |
1121 | if (decPos < decorations.length) { |
1122 | // Pick one giving preference to extractedTags since we shouldn't open |
1123 | // a new style that we're going to have to immediately close in order |
1124 | // to output a tag. |
1125 | outputTag = extractedTags[tagPos] <= decorations[decPos]; |
1126 | } else { |
1127 | outputTag = true; |
1128 | } |
1129 | } else { |
1130 | outputTag = false; |
1131 | } |
1132 | // Consume either a decoration or a tag or exit. |
1133 | if (outputTag) { |
1134 | emitTextUpTo(extractedTags[tagPos]); |
1135 | if (openDecoration) { |
1136 | // Close the current decoration |
1137 | html.push('</span>'); |
1138 | openDecoration = null; |
1139 | } |
1140 | html.push(extractedTags[tagPos + 1]); |
1141 | tagPos += 2; |
1142 | } else if (decPos < decorations.length) { |
1143 | emitTextUpTo(decorations[decPos]); |
1144 | currentDecoration = decorations[decPos + 1]; |
1145 | decPos += 2; |
1146 | } else { |
1147 | break; |
1148 | } |
1149 | } |
1150 | emitTextUpTo(sourceText.length); |
1151 | if (openDecoration) { |
1152 | html.push('</span>'); |
1153 | } |
1154 | job.prettyPrintedHtml = html.join(''); |
1155 | } |
1156 | |
1157 | /** Maps language-specific file extensions to handlers. */ |
1158 | var langHandlerRegistry = {}; |
1159 | /** Register a language handler for the given file extensions. |
1160 | * @param {function (Object)} handler a function from source code to a list |
1161 | * of decorations. Takes a single argument job which describes the |
1162 | * state of the computation. The single parameter has the form |
1163 | * {@code { |
1164 | * source: {string} as plain text. |
1165 | * decorations: {Array.<number|string>} an array of style classes |
1166 | * preceded by the position at which they start in |
1167 | * job.source in order. |
1168 | * The language handler should assigned this field. |
1169 | * basePos: {int} the position of source in the larger source chunk. |
1170 | * All positions in the output decorations array are relative |
1171 | * to the larger source chunk. |
1172 | * } } |
1173 | * @param {Array.<string>} fileExtensions |
1174 | */ |
1175 | function registerLangHandler(handler, fileExtensions) { |
1176 | for (var i = fileExtensions.length; --i >= 0;) { |
1177 | var ext = fileExtensions[i]; |
1178 | if (!langHandlerRegistry.hasOwnProperty(ext)) { |
1179 | langHandlerRegistry[ext] = handler; |
1180 | } else if ('console' in window) { |
1181 | console.warn('cannot override language handler %s', ext); |
1182 | } |
1183 | } |
1184 | } |
1185 | function langHandlerForExtension(extension, source) { |
1186 | if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) { |
1187 | // Treat it as markup if the first non whitespace character is a < and |
1188 | // the last non-whitespace character is a >. |
1189 | extension = /^\s*</.test(source) |
1190 | ? 'default-markup' |
1191 | : 'default-code'; |
1192 | } |
1193 | return langHandlerRegistry[extension]; |
1194 | } |
1195 | registerLangHandler(decorateSource, ['default-code']); |
1196 | registerLangHandler( |
1197 | createSimpleLexer( |
1198 | [], |
1199 | [ |
1200 | [PR_PLAIN, /^[^<?]+/], |
1201 | [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/], |
1202 | [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/], |
1203 | // Unescaped content in an unknown language |
1204 | ['lang-', /^<\?([\s\S]+?)(?:\?>|$)/], |
1205 | ['lang-', /^<%([\s\S]+?)(?:%>|$)/], |
1206 | [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/], |
1207 | ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i], |
1208 | // Unescaped content in javascript. (Or possibly vbscript). |
1209 | ['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i], |
1210 | // Contains unescaped stylesheet content |
1211 | ['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i], |
1212 | ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i] |
1213 | ]), |
1214 | ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']); |
1215 | registerLangHandler( |
1216 | createSimpleLexer( |
1217 | [ |
1218 | [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'], |
1219 | [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\''] |
1220 | ], |
1221 | [ |
1222 | [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i], |
1223 | [PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i], |
1224 | ['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/], |
1225 | [PR_PUNCTUATION, /^[=<>\/]+/], |
1226 | ['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i], |
1227 | ['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i], |
1228 | ['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i], |
1229 | ['lang-css', /^style\s*=\s*\"([^\"]+)\"/i], |
1230 | ['lang-css', /^style\s*=\s*\'([^\']+)\'/i], |
1231 | ['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i] |
1232 | ]), |
1233 | ['in.tag']); |
1234 | registerLangHandler( |
1235 | createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']); |
1236 | registerLangHandler(sourceDecorator({ |
1237 | 'keywords': CPP_KEYWORDS, |
1238 | 'hashComments': true, |
1239 | 'cStyleComments': true |
1240 | }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']); |
1241 | registerLangHandler(sourceDecorator({ |
1242 | 'keywords': 'null true false' |
1243 | }), ['json']); |
1244 | registerLangHandler(sourceDecorator({ |
1245 | 'keywords': CSHARP_KEYWORDS, |
1246 | 'hashComments': true, |
1247 | 'cStyleComments': true, |
1248 | 'verbatimStrings': true |
1249 | }), ['cs']); |
1250 | registerLangHandler(sourceDecorator({ |
1251 | 'keywords': JAVA_KEYWORDS, |
1252 | 'cStyleComments': true |
1253 | }), ['java']); |
1254 | registerLangHandler(sourceDecorator({ |
1255 | 'keywords': SH_KEYWORDS, |
1256 | 'hashComments': true, |
1257 | 'multiLineStrings': true |
1258 | }), ['bsh', 'csh', 'sh']); |
1259 | registerLangHandler(sourceDecorator({ |
1260 | 'keywords': PYTHON_KEYWORDS, |
1261 | 'hashComments': true, |
1262 | 'multiLineStrings': true, |
1263 | 'tripleQuotedStrings': true |
1264 | }), ['cv', 'py']); |
1265 | registerLangHandler(sourceDecorator({ |
1266 | 'keywords': PERL_KEYWORDS, |
1267 | 'hashComments': true, |
1268 | 'multiLineStrings': true, |
1269 | 'regexLiterals': true |
1270 | }), ['perl', 'pl', 'pm']); |
1271 | registerLangHandler(sourceDecorator({ |
1272 | 'keywords': RUBY_KEYWORDS, |
1273 | 'hashComments': true, |
1274 | 'multiLineStrings': true, |
1275 | 'regexLiterals': true |
1276 | }), ['rb']); |
1277 | registerLangHandler(sourceDecorator({ |
1278 | 'keywords': JSCRIPT_KEYWORDS, |
1279 | 'cStyleComments': true, |
1280 | 'regexLiterals': true |
1281 | }), ['js']); |
1282 | registerLangHandler( |
1283 | createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']); |
1284 | |
1285 | function applyDecorator(job) { |
1286 | var sourceCodeHtml = job.sourceCodeHtml; |
1287 | var opt_langExtension = job.langExtension; |
1288 | |
1289 | // Prepopulate output in case processing fails with an exception. |
1290 | job.prettyPrintedHtml = sourceCodeHtml; |
1291 | |
1292 | try { |
1293 | // Extract tags, and convert the source code to plain text. |
1294 | var sourceAndExtractedTags = extractTags(sourceCodeHtml); |
1295 | /** Plain text. @type {string} */ |
1296 | var source = sourceAndExtractedTags.source; |
1297 | job.source = source; |
1298 | job.basePos = 0; |
1299 | |
1300 | /** Even entries are positions in source in ascending order. Odd entries |
1301 | * are tags that were extracted at that position. |
1302 | * @type {Array.<number|string>} |
1303 | */ |
1304 | job.extractedTags = sourceAndExtractedTags.tags; |
1305 | |
1306 | // Apply the appropriate language handler |
1307 | langHandlerForExtension(opt_langExtension, source)(job); |
1308 | // Integrate the decorations and tags back into the source code to produce |
1309 | // a decorated html string which is left in job.prettyPrintedHtml. |
1310 | recombineTagsAndDecorations(job); |
1311 | } catch (e) { |
1312 | if ('console' in window) { |
1313 | console.log(e); |
1314 | console.trace(); |
1315 | } |
1316 | } |
1317 | } |
1318 | |
1319 | function prettyPrintOne(sourceCodeHtml, opt_langExtension) { |
1320 | var job = { |
1321 | sourceCodeHtml: sourceCodeHtml, |
1322 | langExtension: opt_langExtension |
1323 | }; |
1324 | applyDecorator(job); |
1325 | return job.prettyPrintedHtml; |
1326 | } |
1327 | |
1328 | function prettyPrint(opt_whenDone) { |
1329 | var isIE678 = window['_pr_isIE6'](); |
1330 | var ieNewline = isIE678 === 6 ? '\r\n' : '\r'; |
1331 | // See bug 71 and http://stackoverflow.com/questions/136443/why-doesnt-ie7- |
1332 | |
1333 | // fetch a list of nodes to rewrite |
1334 | var codeSegments = [ |
1335 | document.getElementsByTagName('pre'), |
1336 | document.getElementsByTagName('code'), |
1337 | document.getElementsByTagName('xmp') ]; |
1338 | var elements = []; |
1339 | for (var i = 0; i < codeSegments.length; ++i) { |
1340 | for (var j = 0, n = codeSegments[i].length; j < n; ++j) { |
1341 | elements.push(codeSegments[i][j]); |
1342 | } |
1343 | } |
1344 | codeSegments = null; |
1345 | |
1346 | var clock = Date; |
1347 | if (!clock['now']) { |
1348 | clock = { 'now': function () { return (new Date).getTime(); } }; |
1349 | } |
1350 | |
1351 | // The loop is broken into a series of continuations to make sure that we |
1352 | // don't make the browser unresponsive when rewriting a large page. |
1353 | var k = 0; |
1354 | var prettyPrintingJob; |
1355 | |
1356 | function doWork() { |
1357 | var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ? |
1358 | clock.now() + 250 /* ms */ : |
1359 | Infinity); |
1360 | for (; k < elements.length && clock.now() < endTime; k++) { |
1361 | var cs = elements[k]; |
1362 | if (cs.className && cs.className.indexOf('prettyprint') >= 0) { |
1363 | // If the classes includes a language extensions, use it. |
1364 | // Language extensions can be specified like |
1365 | // <pre class="prettyprint lang-cpp"> |
1366 | // the language extension "cpp" is used to find a language handler as |
1367 | // passed to PR_registerLangHandler. |
1368 | var langExtension = cs.className.match(/\blang-(\w+)\b/); |
1369 | if (langExtension) { langExtension = langExtension[1]; } |
1370 | |
1371 | // make sure this is not nested in an already prettified element |
1372 | var nested = false; |
1373 | for (var p = cs.parentNode; p; p = p.parentNode) { |
1374 | if ((p.tagName === 'pre' || p.tagName === 'code' || |
1375 | p.tagName === 'xmp') && |
1376 | p.className && p.className.indexOf('prettyprint') >= 0) { |
1377 | nested = true; |
1378 | break; |
1379 | } |
1380 | } |
1381 | if (!nested) { |
1382 | // fetch the content as a snippet of properly escaped HTML. |
1383 | // Firefox adds newlines at the end. |
1384 | var content = getInnerHtml(cs); |
1385 | content = content.replace(/(?:\r\n?|\n)$/, ''); |
1386 | |
1387 | // do the pretty printing |
1388 | prettyPrintingJob = { |
1389 | sourceCodeHtml: content, |
1390 | langExtension: langExtension, |
1391 | sourceNode: cs |
1392 | }; |
1393 | applyDecorator(prettyPrintingJob); |
1394 | replaceWithPrettyPrintedHtml(); |
1395 | } |
1396 | } |
1397 | } |
1398 | if (k < elements.length) { |
1399 | // finish up in a continuation |
1400 | setTimeout(doWork, 250); |
1401 | } else if (opt_whenDone) { |
1402 | opt_whenDone(); |
1403 | } |
1404 | } |
1405 | |
1406 | function replaceWithPrettyPrintedHtml() { |
1407 | var newContent = prettyPrintingJob.prettyPrintedHtml; |
1408 | if (!newContent) { return; } |
1409 | var cs = prettyPrintingJob.sourceNode; |
1410 | |
1411 | // push the prettified html back into the tag. |
1412 | if (!isRawContent(cs)) { |
1413 | // just replace the old html with the new |
1414 | cs.innerHTML = newContent; |
1415 | } else { |
1416 | // we need to change the tag to a <pre> since <xmp>s do not allow |
1417 | // embedded tags such as the span tags used to attach styles to |
1418 | // sections of source code. |
1419 | var pre = document.createElement('PRE'); |
1420 | for (var i = 0; i < cs.attributes.length; ++i) { |
1421 | var a = cs.attributes[i]; |
1422 | if (a.specified) { |
1423 | var aname = a.name.toLowerCase(); |
1424 | if (aname === 'class') { |
1425 | pre.className = a.value; // For IE 6 |
1426 | } else { |
1427 | pre.setAttribute(a.name, a.value); |
1428 | } |
1429 | } |
1430 | } |
1431 | pre.innerHTML = newContent; |
1432 | |
1433 | // remove the old |
1434 | cs.parentNode.replaceChild(pre, cs); |
1435 | cs = pre; |
1436 | } |
1437 | |
1438 | // Replace <br>s with line-feeds so that copying and pasting works |
1439 | // on IE 6. |
1440 | // Doing this on other browsers breaks lots of stuff since \r\n is |
1441 | // treated as two newlines on Firefox, and doing this also slows |
1442 | // down rendering. |
1443 | if (isIE678 && cs.tagName === 'PRE') { |
1444 | var lineBreaks = cs.getElementsByTagName('br'); |
1445 | for (var j = lineBreaks.length; --j >= 0;) { |
1446 | var lineBreak = lineBreaks[j]; |
1447 | lineBreak.parentNode.replaceChild( |
1448 | document.createTextNode(ieNewline), lineBreak); |
1449 | } |
1450 | } |
1451 | } |
1452 | |
1453 | doWork(); |
1454 | } |
1455 | |
1456 | window['PR_normalizedHtml'] = normalizedHtml; |
1457 | window['prettyPrintOne'] = prettyPrintOne; |
1458 | window['prettyPrint'] = prettyPrint; |
1459 | window['PR'] = { |
1460 | 'combinePrefixPatterns': combinePrefixPatterns, |
1461 | 'createSimpleLexer': createSimpleLexer, |
1462 | 'registerLangHandler': registerLangHandler, |
1463 | 'sourceDecorator': sourceDecorator, |
1464 | 'PR_ATTRIB_NAME': PR_ATTRIB_NAME, |
1465 | 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE, |
1466 | 'PR_COMMENT': PR_COMMENT, |
1467 | 'PR_DECLARATION': PR_DECLARATION, |
1468 | 'PR_KEYWORD': PR_KEYWORD, |
1469 | 'PR_LITERAL': PR_LITERAL, |
1470 | 'PR_NOCODE': PR_NOCODE, |
1471 | 'PR_PLAIN': PR_PLAIN, |
1472 | 'PR_PUNCTUATION': PR_PUNCTUATION, |
1473 | 'PR_SOURCE': PR_SOURCE, |
1474 | 'PR_STRING': PR_STRING, |
1475 | 'PR_TAG': PR_TAG, |
1476 | 'PR_TYPE': PR_TYPE |
1477 | }; |
1478 | })(); |