sax.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642
  1. //[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
  2. //[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
  3. //[5] Name ::= NameStartChar (NameChar)*
  4. var nameStartChar = /[A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]///\u10000-\uEFFFF
  5. var nameChar = new RegExp("[\\-\\.0-9"+nameStartChar.source.slice(1,-1)+"\\u00B7\\u0300-\\u036F\\u203F-\\u2040]");
  6. var tagNamePattern = new RegExp('^'+nameStartChar.source+nameChar.source+'*(?:\:'+nameStartChar.source+nameChar.source+'*)?$');
  7. //var tagNamePattern = /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/
  8. //var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',')
  9. //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
  10. //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
  11. var S_TAG = 0;//tag name offerring
  12. var S_ATTR = 1;//attr name offerring
  13. var S_ATTR_SPACE=2;//attr name end and space offer
  14. var S_EQ = 3;//=space?
  15. var S_ATTR_NOQUOT_VALUE = 4;//attr value(no quot value only)
  16. var S_ATTR_END = 5;//attr value end and no space(quot end)
  17. var S_TAG_SPACE = 6;//(attr value end || tag end ) && (space offer)
  18. var S_TAG_CLOSE = 7;//closed el<el />
  19. /**
  20. * Creates an error that will not be caught by XMLReader aka the SAX parser.
  21. *
  22. * @param {string} message
  23. * @param {any?} locator Optional, can provide details about the location in the source
  24. * @constructor
  25. */
  26. function ParseError(message, locator) {
  27. this.message = message
  28. this.locator = locator
  29. if(Error.captureStackTrace) Error.captureStackTrace(this, ParseError);
  30. }
  31. ParseError.prototype = new Error();
  32. ParseError.prototype.name = ParseError.name
  33. function XMLReader(){
  34. }
  35. XMLReader.prototype = {
  36. parse:function(source,defaultNSMap,entityMap){
  37. var domBuilder = this.domBuilder;
  38. domBuilder.startDocument();
  39. _copy(defaultNSMap ,defaultNSMap = {})
  40. parse(source,defaultNSMap,entityMap,
  41. domBuilder,this.errorHandler);
  42. domBuilder.endDocument();
  43. }
  44. }
  45. function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){
  46. function fixedFromCharCode(code) {
  47. // String.prototype.fromCharCode does not supports
  48. // > 2 bytes unicode chars directly
  49. if (code > 0xffff) {
  50. code -= 0x10000;
  51. var surrogate1 = 0xd800 + (code >> 10)
  52. , surrogate2 = 0xdc00 + (code & 0x3ff);
  53. return String.fromCharCode(surrogate1, surrogate2);
  54. } else {
  55. return String.fromCharCode(code);
  56. }
  57. }
  58. function entityReplacer(a){
  59. var k = a.slice(1,-1);
  60. if(k in entityMap){
  61. return entityMap[k];
  62. }else if(k.charAt(0) === '#'){
  63. return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x')))
  64. }else{
  65. errorHandler.error('entity not found:'+a);
  66. return a;
  67. }
  68. }
  69. function appendText(end){//has some bugs
  70. if(end>start){
  71. var xt = source.substring(start,end).replace(/&#?\w+;/g,entityReplacer);
  72. locator&&position(start);
  73. domBuilder.characters(xt,0,end-start);
  74. start = end
  75. }
  76. }
  77. function position(p,m){
  78. while(p>=lineEnd && (m = linePattern.exec(source))){
  79. lineStart = m.index;
  80. lineEnd = lineStart + m[0].length;
  81. locator.lineNumber++;
  82. //console.log('line++:',locator,startPos,endPos)
  83. }
  84. locator.columnNumber = p-lineStart+1;
  85. }
  86. var lineStart = 0;
  87. var lineEnd = 0;
  88. var linePattern = /.*(?:\r\n?|\n)|.*$/g
  89. var locator = domBuilder.locator;
  90. var parseStack = [{currentNSMap:defaultNSMapCopy}]
  91. var closeMap = {};
  92. var start = 0;
  93. while(true){
  94. try{
  95. var tagStart = source.indexOf('<',start);
  96. if(tagStart<0){
  97. if(!source.substr(start).match(/^\s*$/)){
  98. var doc = domBuilder.doc;
  99. var text = doc.createTextNode(source.substr(start));
  100. doc.appendChild(text);
  101. domBuilder.currentElement = text;
  102. }
  103. return;
  104. }
  105. if(tagStart>start){
  106. appendText(tagStart);
  107. }
  108. switch(source.charAt(tagStart+1)){
  109. case '/':
  110. var end = source.indexOf('>',tagStart+3);
  111. var tagName = source.substring(tagStart+2,end);
  112. var config = parseStack.pop();
  113. if(end<0){
  114. tagName = source.substring(tagStart+2).replace(/[\s<].*/,'');
  115. errorHandler.error("end tag name: "+tagName+' is not complete:'+config.tagName);
  116. end = tagStart+1+tagName.length;
  117. }else if(tagName.match(/\s</)){
  118. tagName = tagName.replace(/[\s<].*/,'');
  119. errorHandler.error("end tag name: "+tagName+' maybe not complete');
  120. end = tagStart+1+tagName.length;
  121. }
  122. var localNSMap = config.localNSMap;
  123. var endMatch = config.tagName == tagName;
  124. var endIgnoreCaseMach = endMatch || config.tagName&&config.tagName.toLowerCase() == tagName.toLowerCase()
  125. if(endIgnoreCaseMach){
  126. domBuilder.endElement(config.uri,config.localName,tagName);
  127. if(localNSMap){
  128. for(var prefix in localNSMap){
  129. domBuilder.endPrefixMapping(prefix) ;
  130. }
  131. }
  132. if(!endMatch){
  133. errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName ); // No known test case
  134. }
  135. }else{
  136. parseStack.push(config)
  137. }
  138. end++;
  139. break;
  140. // end elment
  141. case '?':// <?...?>
  142. locator&&position(tagStart);
  143. end = parseInstruction(source,tagStart,domBuilder);
  144. break;
  145. case '!':// <!doctype,<![CDATA,<!--
  146. locator&&position(tagStart);
  147. end = parseDCC(source,tagStart,domBuilder,errorHandler);
  148. break;
  149. default:
  150. locator&&position(tagStart);
  151. var el = new ElementAttributes();
  152. var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
  153. //elStartEnd
  154. var end = parseElementStartPart(source,tagStart,el,currentNSMap,entityReplacer,errorHandler);
  155. var len = el.length;
  156. if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){
  157. el.closed = true;
  158. if(!entityMap.nbsp){
  159. errorHandler.warning('unclosed xml attribute');
  160. }
  161. }
  162. if(locator && len){
  163. var locator2 = copyLocator(locator,{});
  164. //try{//attribute position fixed
  165. for(var i = 0;i<len;i++){
  166. var a = el[i];
  167. position(a.offset);
  168. a.locator = copyLocator(locator,{});
  169. }
  170. domBuilder.locator = locator2
  171. if(appendElement(el,domBuilder,currentNSMap)){
  172. parseStack.push(el)
  173. }
  174. domBuilder.locator = locator;
  175. }else{
  176. if(appendElement(el,domBuilder,currentNSMap)){
  177. parseStack.push(el)
  178. }
  179. }
  180. if(el.uri === 'http://www.w3.org/1999/xhtml' && !el.closed){
  181. end = parseHtmlSpecialContent(source,end,el.tagName,entityReplacer,domBuilder)
  182. }else{
  183. end++;
  184. }
  185. }
  186. }catch(e){
  187. if (e instanceof ParseError) {
  188. throw e;
  189. }
  190. errorHandler.error('element parse error: '+e)
  191. end = -1;
  192. }
  193. if(end>start){
  194. start = end;
  195. }else{
  196. //TODO: 这里有可能sax回退,有位置错误风险
  197. appendText(Math.max(tagStart,start)+1);
  198. }
  199. }
  200. }
  201. function copyLocator(f,t){
  202. t.lineNumber = f.lineNumber;
  203. t.columnNumber = f.columnNumber;
  204. return t;
  205. }
  206. /**
  207. * @see #appendElement(source,elStartEnd,el,selfClosed,entityReplacer,domBuilder,parseStack);
  208. * @return end of the elementStartPart(end of elementEndPart for selfClosed el)
  209. */
  210. function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,errorHandler){
  211. /**
  212. * @param {string} qname
  213. * @param {string} value
  214. * @param {number} startIndex
  215. */
  216. function addAttribute(qname, value, startIndex) {
  217. if (qname in el.attributeNames) errorHandler.fatalError('Attribute ' + qname + ' redefined')
  218. el.addValue(qname, value, startIndex)
  219. }
  220. var attrName;
  221. var value;
  222. var p = ++start;
  223. var s = S_TAG;//status
  224. while(true){
  225. var c = source.charAt(p);
  226. switch(c){
  227. case '=':
  228. if(s === S_ATTR){//attrName
  229. attrName = source.slice(start,p);
  230. s = S_EQ;
  231. }else if(s === S_ATTR_SPACE){
  232. s = S_EQ;
  233. }else{
  234. //fatalError: equal must after attrName or space after attrName
  235. throw new Error('attribute equal must after attrName'); // No known test case
  236. }
  237. break;
  238. case '\'':
  239. case '"':
  240. if(s === S_EQ || s === S_ATTR //|| s == S_ATTR_SPACE
  241. ){//equal
  242. if(s === S_ATTR){
  243. errorHandler.warning('attribute value must after "="')
  244. attrName = source.slice(start,p)
  245. }
  246. start = p+1;
  247. p = source.indexOf(c,start)
  248. if(p>0){
  249. value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
  250. addAttribute(attrName, value, start-1);
  251. s = S_ATTR_END;
  252. }else{
  253. //fatalError: no end quot match
  254. throw new Error('attribute value no end \''+c+'\' match');
  255. }
  256. }else if(s == S_ATTR_NOQUOT_VALUE){
  257. value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
  258. //console.log(attrName,value,start,p)
  259. addAttribute(attrName, value, start);
  260. //console.dir(el)
  261. errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!');
  262. start = p+1;
  263. s = S_ATTR_END
  264. }else{
  265. //fatalError: no equal before
  266. throw new Error('attribute value must after "="'); // No known test case
  267. }
  268. break;
  269. case '/':
  270. switch(s){
  271. case S_TAG:
  272. el.setTagName(source.slice(start,p));
  273. case S_ATTR_END:
  274. case S_TAG_SPACE:
  275. case S_TAG_CLOSE:
  276. s =S_TAG_CLOSE;
  277. el.closed = true;
  278. case S_ATTR_NOQUOT_VALUE:
  279. case S_ATTR:
  280. case S_ATTR_SPACE:
  281. break;
  282. //case S_EQ:
  283. default:
  284. throw new Error("attribute invalid close char('/')") // No known test case
  285. }
  286. break;
  287. case ''://end document
  288. errorHandler.error('unexpected end of input');
  289. if(s == S_TAG){
  290. el.setTagName(source.slice(start,p));
  291. }
  292. return p;
  293. case '>':
  294. switch(s){
  295. case S_TAG:
  296. el.setTagName(source.slice(start,p));
  297. case S_ATTR_END:
  298. case S_TAG_SPACE:
  299. case S_TAG_CLOSE:
  300. break;//normal
  301. case S_ATTR_NOQUOT_VALUE://Compatible state
  302. case S_ATTR:
  303. value = source.slice(start,p);
  304. if(value.slice(-1) === '/'){
  305. el.closed = true;
  306. value = value.slice(0,-1)
  307. }
  308. case S_ATTR_SPACE:
  309. if(s === S_ATTR_SPACE){
  310. value = attrName;
  311. }
  312. if(s == S_ATTR_NOQUOT_VALUE){
  313. errorHandler.warning('attribute "'+value+'" missed quot(")!');
  314. addAttribute(attrName, value.replace(/&#?\w+;/g,entityReplacer), start)
  315. }else{
  316. if(currentNSMap[''] !== 'http://www.w3.org/1999/xhtml' || !value.match(/^(?:disabled|checked|selected)$/i)){
  317. errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
  318. }
  319. addAttribute(value, value, start)
  320. }
  321. break;
  322. case S_EQ:
  323. throw new Error('attribute value missed!!');
  324. }
  325. // console.log(tagName,tagNamePattern,tagNamePattern.test(tagName))
  326. return p;
  327. /*xml space '\x20' | #x9 | #xD | #xA; */
  328. case '\u0080':
  329. c = ' ';
  330. default:
  331. if(c<= ' '){//space
  332. switch(s){
  333. case S_TAG:
  334. el.setTagName(source.slice(start,p));//tagName
  335. s = S_TAG_SPACE;
  336. break;
  337. case S_ATTR:
  338. attrName = source.slice(start,p)
  339. s = S_ATTR_SPACE;
  340. break;
  341. case S_ATTR_NOQUOT_VALUE:
  342. var value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
  343. errorHandler.warning('attribute "'+value+'" missed quot(")!!');
  344. addAttribute(attrName, value, start)
  345. case S_ATTR_END:
  346. s = S_TAG_SPACE;
  347. break;
  348. //case S_TAG_SPACE:
  349. //case S_EQ:
  350. //case S_ATTR_SPACE:
  351. // void();break;
  352. //case S_TAG_CLOSE:
  353. //ignore warning
  354. }
  355. }else{//not space
  356. //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
  357. //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
  358. switch(s){
  359. //case S_TAG:void();break;
  360. //case S_ATTR:void();break;
  361. //case S_ATTR_NOQUOT_VALUE:void();break;
  362. case S_ATTR_SPACE:
  363. var tagName = el.tagName;
  364. if(currentNSMap[''] !== 'http://www.w3.org/1999/xhtml' || !attrName.match(/^(?:disabled|checked|selected)$/i)){
  365. errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!')
  366. }
  367. addAttribute(attrName, attrName, start);
  368. start = p;
  369. s = S_ATTR;
  370. break;
  371. case S_ATTR_END:
  372. errorHandler.warning('attribute space is required"'+attrName+'"!!')
  373. case S_TAG_SPACE:
  374. s = S_ATTR;
  375. start = p;
  376. break;
  377. case S_EQ:
  378. s = S_ATTR_NOQUOT_VALUE;
  379. start = p;
  380. break;
  381. case S_TAG_CLOSE:
  382. throw new Error("elements closed character '/' and '>' must be connected to");
  383. }
  384. }
  385. }//end outer switch
  386. //console.log('p++',p)
  387. p++;
  388. }
  389. }
  390. /**
  391. * @return true if has new namespace define
  392. */
  393. function appendElement(el,domBuilder,currentNSMap){
  394. var tagName = el.tagName;
  395. var localNSMap = null;
  396. //var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
  397. var i = el.length;
  398. while(i--){
  399. var a = el[i];
  400. var qName = a.qName;
  401. var value = a.value;
  402. var nsp = qName.indexOf(':');
  403. if(nsp>0){
  404. var prefix = a.prefix = qName.slice(0,nsp);
  405. var localName = qName.slice(nsp+1);
  406. var nsPrefix = prefix === 'xmlns' && localName
  407. }else{
  408. localName = qName;
  409. prefix = null
  410. nsPrefix = qName === 'xmlns' && ''
  411. }
  412. //can not set prefix,because prefix !== ''
  413. a.localName = localName ;
  414. //prefix == null for no ns prefix attribute
  415. if(nsPrefix !== false){//hack!!
  416. if(localNSMap == null){
  417. localNSMap = {}
  418. //console.log(currentNSMap,0)
  419. _copy(currentNSMap,currentNSMap={})
  420. //console.log(currentNSMap,1)
  421. }
  422. currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value;
  423. a.uri = 'http://www.w3.org/2000/xmlns/'
  424. domBuilder.startPrefixMapping(nsPrefix, value)
  425. }
  426. }
  427. var i = el.length;
  428. while(i--){
  429. a = el[i];
  430. var prefix = a.prefix;
  431. if(prefix){//no prefix attribute has no namespace
  432. if(prefix === 'xml'){
  433. a.uri = 'http://www.w3.org/XML/1998/namespace';
  434. }if(prefix !== 'xmlns'){
  435. a.uri = currentNSMap[prefix || '']
  436. //{console.log('###'+a.qName,domBuilder.locator.systemId+'',currentNSMap,a.uri)}
  437. }
  438. }
  439. }
  440. var nsp = tagName.indexOf(':');
  441. if(nsp>0){
  442. prefix = el.prefix = tagName.slice(0,nsp);
  443. localName = el.localName = tagName.slice(nsp+1);
  444. }else{
  445. prefix = null;//important!!
  446. localName = el.localName = tagName;
  447. }
  448. //no prefix element has default namespace
  449. var ns = el.uri = currentNSMap[prefix || ''];
  450. domBuilder.startElement(ns,localName,tagName,el);
  451. //endPrefixMapping and startPrefixMapping have not any help for dom builder
  452. //localNSMap = null
  453. if(el.closed){
  454. domBuilder.endElement(ns,localName,tagName);
  455. if(localNSMap){
  456. for(prefix in localNSMap){
  457. domBuilder.endPrefixMapping(prefix)
  458. }
  459. }
  460. }else{
  461. el.currentNSMap = currentNSMap;
  462. el.localNSMap = localNSMap;
  463. //parseStack.push(el);
  464. return true;
  465. }
  466. }
  467. function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){
  468. if(/^(?:script|textarea)$/i.test(tagName)){
  469. var elEndStart = source.indexOf('</'+tagName+'>',elStartEnd);
  470. var text = source.substring(elStartEnd+1,elEndStart);
  471. if(/[&<]/.test(text)){
  472. if(/^script$/i.test(tagName)){
  473. //if(!/\]\]>/.test(text)){
  474. //lexHandler.startCDATA();
  475. domBuilder.characters(text,0,text.length);
  476. //lexHandler.endCDATA();
  477. return elEndStart;
  478. //}
  479. }//}else{//text area
  480. text = text.replace(/&#?\w+;/g,entityReplacer);
  481. domBuilder.characters(text,0,text.length);
  482. return elEndStart;
  483. //}
  484. }
  485. }
  486. return elStartEnd+1;
  487. }
  488. function fixSelfClosed(source,elStartEnd,tagName,closeMap){
  489. //if(tagName in closeMap){
  490. var pos = closeMap[tagName];
  491. if(pos == null){
  492. //console.log(tagName)
  493. pos = source.lastIndexOf('</'+tagName+'>')
  494. if(pos<elStartEnd){//忘记闭合
  495. pos = source.lastIndexOf('</'+tagName)
  496. }
  497. closeMap[tagName] =pos
  498. }
  499. return pos<elStartEnd;
  500. //}
  501. }
  502. function _copy(source,target){
  503. for(var n in source){target[n] = source[n]}
  504. }
  505. function parseDCC(source,start,domBuilder,errorHandler){//sure start with '<!'
  506. var next= source.charAt(start+2)
  507. switch(next){
  508. case '-':
  509. if(source.charAt(start + 3) === '-'){
  510. var end = source.indexOf('-->',start+4);
  511. //append comment source.substring(4,end)//<!--
  512. if(end>start){
  513. domBuilder.comment(source,start+4,end-start-4);
  514. return end+3;
  515. }else{
  516. errorHandler.error("Unclosed comment");
  517. return -1;
  518. }
  519. }else{
  520. //error
  521. return -1;
  522. }
  523. default:
  524. if(source.substr(start+3,6) == 'CDATA['){
  525. var end = source.indexOf(']]>',start+9);
  526. domBuilder.startCDATA();
  527. domBuilder.characters(source,start+9,end-start-9);
  528. domBuilder.endCDATA()
  529. return end+3;
  530. }
  531. //<!DOCTYPE
  532. //startDTD(java.lang.String name, java.lang.String publicId, java.lang.String systemId)
  533. var matchs = split(source,start);
  534. var len = matchs.length;
  535. if(len>1 && /!doctype/i.test(matchs[0][0])){
  536. var name = matchs[1][0];
  537. var pubid = false;
  538. var sysid = false;
  539. if(len>3){
  540. if(/^public$/i.test(matchs[2][0])){
  541. pubid = matchs[3][0];
  542. sysid = len>4 && matchs[4][0];
  543. }else if(/^system$/i.test(matchs[2][0])){
  544. sysid = matchs[3][0];
  545. }
  546. }
  547. var lastMatch = matchs[len-1]
  548. domBuilder.startDTD(name, pubid, sysid);
  549. domBuilder.endDTD();
  550. return lastMatch.index+lastMatch[0].length
  551. }
  552. }
  553. return -1;
  554. }
  555. function parseInstruction(source,start,domBuilder){
  556. var end = source.indexOf('?>',start);
  557. if(end){
  558. var match = source.substring(start,end).match(/^<\?(\S*)\s*([\s\S]*?)\s*$/);
  559. if(match){
  560. var len = match[0].length;
  561. domBuilder.processingInstruction(match[1], match[2]) ;
  562. return end+2;
  563. }else{//error
  564. return -1;
  565. }
  566. }
  567. return -1;
  568. }
  569. function ElementAttributes(){
  570. this.attributeNames = {}
  571. }
  572. ElementAttributes.prototype = {
  573. setTagName:function(tagName){
  574. if(!tagNamePattern.test(tagName)){
  575. throw new Error('invalid tagName:'+tagName)
  576. }
  577. this.tagName = tagName
  578. },
  579. addValue:function(qName, value, offset) {
  580. if(!tagNamePattern.test(qName)){
  581. throw new Error('invalid attribute:'+qName)
  582. }
  583. this.attributeNames[qName] = this.length;
  584. this[this.length++] = {qName:qName,value:value,offset:offset}
  585. },
  586. length:0,
  587. getLocalName:function(i){return this[i].localName},
  588. getLocator:function(i){return this[i].locator},
  589. getQName:function(i){return this[i].qName},
  590. getURI:function(i){return this[i].uri},
  591. getValue:function(i){return this[i].value}
  592. // ,getIndex:function(uri, localName)){
  593. // if(localName){
  594. //
  595. // }else{
  596. // var qName = uri
  597. // }
  598. // },
  599. // getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))},
  600. // getType:function(uri,localName){}
  601. // getType:function(i){},
  602. }
  603. function split(source,start){
  604. var match;
  605. var buf = [];
  606. var reg = /'[^']+'|"[^"]+"|[^\s<>\/=]+=?|(\/?\s*>|<)/g;
  607. reg.lastIndex = start;
  608. reg.exec(source);//skip <
  609. while(match = reg.exec(source)){
  610. buf.push(match);
  611. if(match[1])return buf;
  612. }
  613. }
  614. exports.XMLReader = XMLReader;
  615. exports.ParseError = ParseError;