Newer
Older
dub_jkp / source / dub / internal / undead / xml.d
  1. // Written in the D programming language.
  2.  
  3. /**
  4. $(RED Warning: This module is considered out-dated and not up to Phobos'
  5. current standards. It will remain until we have a suitable replacement,
  6. but be aware that it will not remain long term.)
  7.  
  8. Classes and functions for creating and parsing XML
  9.  
  10. The basic architecture of this module is that there are standalone functions,
  11. classes for constructing an XML document from scratch (Tag, Element and
  12. Document), and also classes for parsing a pre-existing XML file (ElementParser
  13. and DocumentParser). The parsing classes <i>may</i> be used to build a
  14. Document, but that is not their primary purpose. The handling capabilities of
  15. DocumentParser and ElementParser are sufficiently customizable that you can
  16. make them do pretty much whatever you want.
  17.  
  18. Example: This example creates a DOM (Document Object Model) tree
  19. from an XML file.
  20. ------------------------------------------------------------------------------
  21. import dub.internal.undead.xml;
  22. import std.stdio;
  23. import std.string;
  24. import std.file;
  25.  
  26. // books.xml is used in various samples throughout the Microsoft XML Core
  27. // Services (MSXML) SDK.
  28. //
  29. // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
  30.  
  31. void main()
  32. {
  33. string s = cast(string) std.file.read("books.xml");
  34.  
  35. // Check for well-formedness
  36. check(s);
  37.  
  38. // Make a DOM tree
  39. auto doc = new Document(s);
  40.  
  41. // Plain-print it
  42. writeln(doc);
  43. }
  44. ------------------------------------------------------------------------------
  45.  
  46. Example: This example does much the same thing, except that the file is
  47. deconstructed and reconstructed by hand. This is more work, but the
  48. techniques involved offer vastly more power.
  49. ------------------------------------------------------------------------------
  50. import dub.internal.undead.xml;
  51. import std.stdio;
  52. import std.string;
  53.  
  54. struct Book
  55. {
  56. string id;
  57. string author;
  58. string title;
  59. string genre;
  60. string price;
  61. string pubDate;
  62. string description;
  63. }
  64.  
  65. void main()
  66. {
  67. string s = cast(string) std.file.read("books.xml");
  68.  
  69. // Check for well-formedness
  70. check(s);
  71.  
  72. // Take it apart
  73. Book[] books;
  74.  
  75. auto xml = new DocumentParser(s);
  76. xml.onStartTag["book"] = (ElementParser xml)
  77. {
  78. Book book;
  79. book.id = xml.tag.attr["id"];
  80.  
  81. xml.onEndTag["author"] = (in Element e) { book.author = e.text(); };
  82. xml.onEndTag["title"] = (in Element e) { book.title = e.text(); };
  83. xml.onEndTag["genre"] = (in Element e) { book.genre = e.text(); };
  84. xml.onEndTag["price"] = (in Element e) { book.price = e.text(); };
  85. xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text(); };
  86. xml.onEndTag["description"] = (in Element e) { book.description = e.text(); };
  87.  
  88. xml.parse();
  89.  
  90. books ~= book;
  91. };
  92. xml.parse();
  93.  
  94. // Put it back together again;
  95. auto doc = new Document(new Tag("catalog"));
  96. foreach (book;books)
  97. {
  98. auto element = new Element("book");
  99. element.tag.attr["id"] = book.id;
  100.  
  101. element ~= new Element("author", book.author);
  102. element ~= new Element("title", book.title);
  103. element ~= new Element("genre", book.genre);
  104. element ~= new Element("price", book.price);
  105. element ~= new Element("publish-date",book.pubDate);
  106. element ~= new Element("description", book.description);
  107.  
  108. doc ~= element;
  109. }
  110.  
  111. // Pretty-print it
  112. writefln(join(doc.pretty(3),"\n"));
  113. }
  114. -------------------------------------------------------------------------------
  115. Copyright: Copyright Janice Caron 2008 - 2009.
  116. License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
  117. Authors: Janice Caron
  118. Source: $(PHOBOSSRC std/xml.d)
  119. */
  120. /*
  121. Copyright Janice Caron 2008 - 2009.
  122. Distributed under the Boost Software License, Version 1.0.
  123. (See accompanying file LICENSE_1_0.txt or copy at
  124. http://www.boost.org/LICENSE_1_0.txt)
  125. */
  126. module dub.internal.undead.xml;
  127.  
  128. enum cdata = "<![CDATA[";
  129.  
  130. /**
  131. * Returns true if the character is a character according to the XML standard
  132. *
  133. * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
  134. *
  135. * Params:
  136. * c = the character to be tested
  137. */
  138. bool isChar(dchar c) @safe @nogc pure nothrow // rule 2
  139. {
  140. if (c <= 0xD7FF)
  141. {
  142. if (c >= 0x20)
  143. return true;
  144. switch (c)
  145. {
  146. case 0xA:
  147. case 0x9:
  148. case 0xD:
  149. return true;
  150. default:
  151. return false;
  152. }
  153. }
  154. else if (0xE000 <= c && c <= 0x10FFFF)
  155. {
  156. if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
  157. return true;
  158. }
  159. return false;
  160. }
  161.  
  162. @safe @nogc nothrow pure unittest
  163. {
  164. assert(!isChar(cast(dchar) 0x8));
  165. assert( isChar(cast(dchar) 0x9));
  166. assert( isChar(cast(dchar) 0xA));
  167. assert(!isChar(cast(dchar) 0xB));
  168. assert(!isChar(cast(dchar) 0xC));
  169. assert( isChar(cast(dchar) 0xD));
  170. assert(!isChar(cast(dchar) 0xE));
  171. assert(!isChar(cast(dchar) 0x1F));
  172. assert( isChar(cast(dchar) 0x20));
  173. assert( isChar('J'));
  174. assert( isChar(cast(dchar) 0xD7FF));
  175. assert(!isChar(cast(dchar) 0xD800));
  176. assert(!isChar(cast(dchar) 0xDFFF));
  177. assert( isChar(cast(dchar) 0xE000));
  178. assert( isChar(cast(dchar) 0xFFFD));
  179. assert(!isChar(cast(dchar) 0xFFFE));
  180. assert(!isChar(cast(dchar) 0xFFFF));
  181. assert( isChar(cast(dchar) 0x10000));
  182. assert( isChar(cast(dchar) 0x10FFFF));
  183. assert(!isChar(cast(dchar) 0x110000));
  184.  
  185. debug (stdxml_TestHardcodedChecks)
  186. {
  187. foreach (c; 0 .. dchar.max + 1)
  188. assert(isChar(c) == lookup(CharTable, c));
  189. }
  190. }
  191.  
  192. /**
  193. * Returns true if the character is whitespace according to the XML standard
  194. *
  195. * Only the following characters are considered whitespace in XML - space, tab,
  196. * carriage return and linefeed
  197. *
  198. * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
  199. *
  200. * Params:
  201. * c = the character to be tested
  202. */
  203. bool isSpace(dchar c) @safe @nogc pure nothrow
  204. {
  205. return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
  206. }
  207.  
  208. /**
  209. * Returns true if the character is a digit according to the XML standard
  210. *
  211. * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
  212. *
  213. * Params:
  214. * c = the character to be tested
  215. */
  216. bool isDigit(dchar c) @safe @nogc pure nothrow
  217. {
  218. if (c <= 0x0039 && c >= 0x0030)
  219. return true;
  220. else
  221. return lookup(DigitTable,c);
  222. }
  223.  
  224. @safe @nogc nothrow pure unittest
  225. {
  226. debug (stdxml_TestHardcodedChecks)
  227. {
  228. foreach (c; 0 .. dchar.max + 1)
  229. assert(isDigit(c) == lookup(DigitTable, c));
  230. }
  231. }
  232.  
  233. /**
  234. * Returns true if the character is a letter according to the XML standard
  235. *
  236. * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
  237. *
  238. * Params:
  239. * c = the character to be tested
  240. */
  241. bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84
  242. {
  243. return isIdeographic(c) || isBaseChar(c);
  244. }
  245.  
  246. /**
  247. * Returns true if the character is an ideographic character according to the
  248. * XML standard
  249. *
  250. * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
  251. *
  252. * Params:
  253. * c = the character to be tested
  254. */
  255. bool isIdeographic(dchar c) @safe @nogc nothrow pure
  256. {
  257. if (c == 0x3007)
  258. return true;
  259. if (c <= 0x3029 && c >= 0x3021 )
  260. return true;
  261. if (c <= 0x9FA5 && c >= 0x4E00)
  262. return true;
  263. return false;
  264. }
  265.  
  266. @safe @nogc nothrow pure unittest
  267. {
  268. assert(isIdeographic('\u4E00'));
  269. assert(isIdeographic('\u9FA5'));
  270. assert(isIdeographic('\u3007'));
  271. assert(isIdeographic('\u3021'));
  272. assert(isIdeographic('\u3029'));
  273.  
  274. debug (stdxml_TestHardcodedChecks)
  275. {
  276. foreach (c; 0 .. dchar.max + 1)
  277. assert(isIdeographic(c) == lookup(IdeographicTable, c));
  278. }
  279. }
  280.  
  281. /**
  282. * Returns true if the character is a base character according to the XML
  283. * standard
  284. *
  285. * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
  286. *
  287. * Params:
  288. * c = the character to be tested
  289. */
  290. bool isBaseChar(dchar c) @safe @nogc nothrow pure
  291. {
  292. return lookup(BaseCharTable,c);
  293. }
  294.  
  295. /**
  296. * Returns true if the character is a combining character according to the
  297. * XML standard
  298. *
  299. * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
  300. *
  301. * Params:
  302. * c = the character to be tested
  303. */
  304. bool isCombiningChar(dchar c) @safe @nogc nothrow pure
  305. {
  306. return lookup(CombiningCharTable,c);
  307. }
  308.  
  309. /**
  310. * Returns true if the character is an extender according to the XML standard
  311. *
  312. * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
  313. *
  314. * Params:
  315. * c = the character to be tested
  316. */
  317. bool isExtender(dchar c) @safe @nogc nothrow pure
  318. {
  319. return lookup(ExtenderTable,c);
  320. }
  321.  
  322. /**
  323. * Encodes a string by replacing all characters which need to be escaped with
  324. * appropriate predefined XML entities.
  325. *
  326. * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
  327. * and greater-than), and similarly, decode() unescapes them. These functions
  328. * are provided for convenience only. You do not need to use them when using
  329. * the undead.xml classes, because then all the encoding and decoding will be done
  330. * for you automatically.
  331. *
  332. * If the string is not modified, the original will be returned.
  333. *
  334. * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
  335. *
  336. * Params:
  337. * s = The string to be encoded
  338. *
  339. * Returns: The encoded string
  340. *
  341. * Example:
  342. * --------------
  343. * writefln(encode("a > b")); // writes "a &gt; b"
  344. * --------------
  345. */
  346. S encode(S)(S s)
  347. {
  348. import std.array : appender;
  349.  
  350. string r;
  351. size_t lastI;
  352. auto result = appender!S();
  353.  
  354. foreach (i, c; s)
  355. {
  356. switch (c)
  357. {
  358. case '&': r = "&amp;"; break;
  359. case '"': r = "&quot;"; break;
  360. case '\'': r = "&apos;"; break;
  361. case '<': r = "&lt;"; break;
  362. case '>': r = "&gt;"; break;
  363. default: continue;
  364. }
  365. // Replace with r
  366. result.put(s[lastI .. i]);
  367. result.put(r);
  368. lastI = i + 1;
  369. }
  370.  
  371. if (!result.data.ptr) return s;
  372. result.put(s[lastI .. $]);
  373. return result.data;
  374. }
  375.  
  376. @safe pure unittest
  377. {
  378. auto s = "hello";
  379. assert(encode(s) is s);
  380. assert(encode("a > b") == "a &gt; b", encode("a > b"));
  381. assert(encode("a < b") == "a &lt; b");
  382. assert(encode("don't") == "don&apos;t");
  383. assert(encode("\"hi\"") == "&quot;hi&quot;", encode("\"hi\""));
  384. assert(encode("cat & dog") == "cat &amp; dog");
  385. }
  386.  
  387. /**
  388. * Mode to use for decoding.
  389. *
  390. * $(DDOC_ENUM_MEMBERS NONE) Do not decode
  391. * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
  392. * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
  393. */
  394. enum DecodeMode
  395. {
  396. NONE, LOOSE, STRICT
  397. }
  398.  
  399. /**
  400. * Decodes a string by unescaping all predefined XML entities.
  401. *
  402. * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
  403. * and greater-than), and similarly, decode() unescapes them. These functions
  404. * are provided for convenience only. You do not need to use them when using
  405. * the undead.xml classes, because then all the encoding and decoding will be done
  406. * for you automatically.
  407. *
  408. * This function decodes the entities &amp;amp;, &amp;quot;, &amp;apos;,
  409. * &amp;lt; and &amp;gt,
  410. * as well as decimal and hexadecimal entities such as &amp;#x20AC;
  411. *
  412. * If the string does not contain an ampersand, the original will be returned.
  413. *
  414. * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
  415. * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
  416. * (decode, and throw a DecodeException in the event of an error).
  417. *
  418. * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
  419. *
  420. * Params:
  421. * s = The string to be decoded
  422. * mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
  423. *
  424. * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
  425. *
  426. * Returns: The decoded string
  427. *
  428. * Example:
  429. * --------------
  430. * writefln(decode("a &gt; b")); // writes "a > b"
  431. * --------------
  432. */
  433. string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure
  434. {
  435. import std.algorithm.searching : startsWith;
  436.  
  437. if (mode == DecodeMode.NONE) return s;
  438.  
  439. string buffer;
  440. foreach (ref i; 0 .. s.length)
  441. {
  442. char c = s[i];
  443. if (c != '&')
  444. {
  445. if (buffer.length != 0) buffer ~= c;
  446. }
  447. else
  448. {
  449. if (buffer.length == 0)
  450. {
  451. buffer = s[0 .. i].dup;
  452. }
  453. if (startsWith(s[i..$],"&#"))
  454. {
  455. try
  456. {
  457. dchar d;
  458. string t = s[i..$];
  459. checkCharRef(t, d);
  460. char[4] temp;
  461. import std.utf : encode;
  462. buffer ~= temp[0 .. encode(temp, d)];
  463. i = s.length - t.length - 1;
  464. }
  465. catch (Err e)
  466. {
  467. if (mode == DecodeMode.STRICT)
  468. throw new DecodeException("Unescaped &");
  469. buffer ~= '&';
  470. }
  471. }
  472. else if (startsWith(s[i..$],"&amp;" )) { buffer ~= '&'; i += 4; }
  473. else if (startsWith(s[i..$],"&quot;")) { buffer ~= '"'; i += 5; }
  474. else if (startsWith(s[i..$],"&apos;")) { buffer ~= '\''; i += 5; }
  475. else if (startsWith(s[i..$],"&lt;" )) { buffer ~= '<'; i += 3; }
  476. else if (startsWith(s[i..$],"&gt;" )) { buffer ~= '>'; i += 3; }
  477. else
  478. {
  479. if (mode == DecodeMode.STRICT)
  480. throw new DecodeException("Unescaped &");
  481. buffer ~= '&';
  482. }
  483. }
  484. }
  485. return (buffer.length == 0) ? s : buffer;
  486. }
  487.  
  488. @safe pure unittest
  489. {
  490. void assertNot(string s) pure
  491. {
  492. bool b = false;
  493. try { decode(s,DecodeMode.STRICT); }
  494. catch (DecodeException e) { b = true; }
  495. assert(b,s);
  496. }
  497.  
  498. // Assert that things that should work, do
  499. auto s = "hello";
  500. assert(decode(s, DecodeMode.STRICT) is s);
  501. assert(decode("a &gt; b", DecodeMode.STRICT) == "a > b");
  502. assert(decode("a &lt; b", DecodeMode.STRICT) == "a < b");
  503. assert(decode("don&apos;t", DecodeMode.STRICT) == "don't");
  504. assert(decode("&quot;hi&quot;", DecodeMode.STRICT) == "\"hi\"");
  505. assert(decode("cat &amp; dog", DecodeMode.STRICT) == "cat & dog");
  506. assert(decode("&#42;", DecodeMode.STRICT) == "*");
  507. assert(decode("&#x2A;", DecodeMode.STRICT) == "*");
  508. assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog");
  509. assert(decode("a &gt b", DecodeMode.LOOSE) == "a &gt b");
  510. assert(decode("&#;", DecodeMode.LOOSE) == "&#;");
  511. assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;");
  512. assert(decode("&#2G;", DecodeMode.LOOSE) == "&#2G;");
  513. assert(decode("&#x2G;", DecodeMode.LOOSE) == "&#x2G;");
  514.  
  515. // Assert that things that shouldn't work, don't
  516. assertNot("cat & dog");
  517. assertNot("a &gt b");
  518. assertNot("&#;");
  519. assertNot("&#x;");
  520. assertNot("&#2G;");
  521. assertNot("&#x2G;");
  522. }
  523.  
  524. /**
  525. * Class representing an XML document.
  526. *
  527. * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
  528. *
  529. */
  530. class Document : Element
  531. {
  532. /**
  533. * Contains all text which occurs before the root element.
  534. * Defaults to &lt;?xml version="1.0"?&gt;
  535. */
  536. string prolog = "<?xml version=\"1.0\"?>";
  537. /**
  538. * Contains all text which occurs after the root element.
  539. * Defaults to the empty string
  540. */
  541. string epilog;
  542.  
  543. /**
  544. * Constructs a Document by parsing XML text.
  545. *
  546. * This function creates a complete DOM (Document Object Model) tree.
  547. *
  548. * The input to this function MUST be valid XML.
  549. * This is enforced by DocumentParser's in contract.
  550. *
  551. * Params:
  552. * s = the complete XML text.
  553. */
  554. this(string s)
  555. in
  556. {
  557. assert(s.length != 0);
  558. }
  559. do
  560. {
  561. auto xml = new DocumentParser(s);
  562. string tagString = xml.tag.tagString;
  563.  
  564. this(xml.tag);
  565. prolog = s[0 .. tagString.ptr - s.ptr];
  566. parse(xml);
  567. epilog = *xml.s;
  568. }
  569.  
  570. /**
  571. * Constructs a Document from a Tag.
  572. *
  573. * Params:
  574. * tag = the start tag of the document.
  575. */
  576. this(const(Tag) tag)
  577. {
  578. super(tag);
  579. }
  580.  
  581. const
  582. {
  583. /**
  584. * Compares two Documents for equality
  585. *
  586. * Example:
  587. * --------------
  588. * Document d1,d2;
  589. * if (d1 == d2) { }
  590. * --------------
  591. */
  592. override bool opEquals(scope const Object o) const
  593. {
  594. const scope doc = toType!(const Document)(o);
  595. return prolog == doc.prolog
  596. && (cast(const) this).Element.opEquals(cast(const) doc)
  597. && epilog == doc.epilog;
  598. }
  599.  
  600. /**
  601. * Compares two Documents
  602. *
  603. * You should rarely need to call this function. It exists so that
  604. * Documents can be used as associative array keys.
  605. *
  606. * Example:
  607. * --------------
  608. * Document d1,d2;
  609. * if (d1 < d2) { }
  610. * --------------
  611. */
  612. override int opCmp(scope const Object o) scope const
  613. {
  614. const scope doc = toType!(const Document)(o);
  615. if (prolog != doc.prolog)
  616. return prolog < doc.prolog ? -1 : 1;
  617. if (int cmp = this.Element.opCmp(doc))
  618. return cmp;
  619. if (epilog != doc.epilog)
  620. return epilog < doc.epilog ? -1 : 1;
  621. return 0;
  622. }
  623.  
  624. /**
  625. * Returns the hash of a Document
  626. *
  627. * You should rarely need to call this function. It exists so that
  628. * Documents can be used as associative array keys.
  629. */
  630. override size_t toHash() scope const @trusted
  631. {
  632. return hash(prolog, hash(epilog, (cast() this).Element.toHash()));
  633. }
  634.  
  635. /**
  636. * Returns the string representation of a Document. (That is, the
  637. * complete XML of a document).
  638. */
  639. override string toString() scope const @safe
  640. {
  641. return prolog ~ super.toString() ~ epilog;
  642. }
  643. }
  644. }
  645.  
  646. @system unittest
  647. {
  648. // https://issues.dlang.org/show_bug.cgi?id=14966
  649. auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
  650.  
  651. auto a = new Document(xml);
  652. auto b = new Document(xml);
  653. assert(a == b);
  654. assert(!(a < b));
  655. int[Document] aa;
  656. aa[a] = 1;
  657. assert(aa[b] == 1);
  658.  
  659. b ~= new Element("b");
  660. assert(a < b);
  661. assert(b > a);
  662. }
  663.  
  664. /**
  665. * Class representing an XML element.
  666. *
  667. * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
  668. */
  669. class Element : Item
  670. {
  671. Tag tag; /// The start tag of the element
  672. Item[] items; /// The element's items
  673. Text[] texts; /// The element's text items
  674. CData[] cdatas; /// The element's CData items
  675. Comment[] comments; /// The element's comments
  676. ProcessingInstruction[] pis; /// The element's processing instructions
  677. Element[] elements; /// The element's child elements
  678.  
  679. /**
  680. * Constructs an Element given a name and a string to be used as a Text
  681. * interior.
  682. *
  683. * Params:
  684. * name = the name of the element.
  685. * interior = (optional) the string interior.
  686. *
  687. * Example:
  688. * -------------------------------------------------------
  689. * auto element = new Element("title","Serenity")
  690. * // constructs the element <title>Serenity</title>
  691. * -------------------------------------------------------
  692. */
  693. this(string name, string interior=null) @safe pure
  694. {
  695. this(new Tag(name));
  696. if (interior.length != 0) opOpAssign!("~")(new Text(interior));
  697. }
  698.  
  699. /**
  700. * Constructs an Element from a Tag.
  701. *
  702. * Params:
  703. * tag_ = the start or empty tag of the element.
  704. */
  705. this(const(Tag) tag_) @safe pure
  706. {
  707. this.tag = new Tag(tag_.name);
  708. tag.type = TagType.EMPTY;
  709. foreach (k,v;tag_.attr) tag.attr[k] = v;
  710. tag.tagString = tag_.tagString;
  711. }
  712.  
  713. /**
  714. * Append a text item to the interior of this element
  715. *
  716. * Params:
  717. * item = the item you wish to append.
  718. *
  719. * Example:
  720. * --------------
  721. * Element element;
  722. * element ~= new Text("hello");
  723. * --------------
  724. */
  725. void opOpAssign(string op)(Text item) @safe pure
  726. if (op == "~")
  727. {
  728. texts ~= item;
  729. appendItem(item);
  730. }
  731.  
  732. /**
  733. * Append a CData item to the interior of this element
  734. *
  735. * Params:
  736. * item = the item you wish to append.
  737. *
  738. * Example:
  739. * --------------
  740. * Element element;
  741. * element ~= new CData("hello");
  742. * --------------
  743. */
  744. void opOpAssign(string op)(CData item) @safe pure
  745. if (op == "~")
  746. {
  747. cdatas ~= item;
  748. appendItem(item);
  749. }
  750.  
  751. /**
  752. * Append a comment to the interior of this element
  753. *
  754. * Params:
  755. * item = the item you wish to append.
  756. *
  757. * Example:
  758. * --------------
  759. * Element element;
  760. * element ~= new Comment("hello");
  761. * --------------
  762. */
  763. void opOpAssign(string op)(Comment item) @safe pure
  764. if (op == "~")
  765. {
  766. comments ~= item;
  767. appendItem(item);
  768. }
  769.  
  770. /**
  771. * Append a processing instruction to the interior of this element
  772. *
  773. * Params:
  774. * item = the item you wish to append.
  775. *
  776. * Example:
  777. * --------------
  778. * Element element;
  779. * element ~= new ProcessingInstruction("hello");
  780. * --------------
  781. */
  782. void opOpAssign(string op)(ProcessingInstruction item) @safe pure
  783. if (op == "~")
  784. {
  785. pis ~= item;
  786. appendItem(item);
  787. }
  788.  
  789. /**
  790. * Append a complete element to the interior of this element
  791. *
  792. * Params:
  793. * item = the item you wish to append.
  794. *
  795. * Example:
  796. * --------------
  797. * Element element;
  798. * Element other = new Element("br");
  799. * element ~= other;
  800. * // appends element representing <br />
  801. * --------------
  802. */
  803. void opOpAssign(string op)(Element item) @safe pure
  804. if (op == "~")
  805. {
  806. elements ~= item;
  807. appendItem(item);
  808. }
  809.  
  810. private void appendItem(Item item) @safe pure
  811. {
  812. items ~= item;
  813. if (tag.type == TagType.EMPTY && !item.isEmptyXML)
  814. tag.type = TagType.START;
  815. }
  816.  
  817. private void parse(ElementParser xml)
  818. {
  819. xml.onText = (string s) { opOpAssign!("~")(new Text(s)); };
  820. xml.onCData = (string s) { opOpAssign!("~")(new CData(s)); };
  821. xml.onComment = (string s) { opOpAssign!("~")(new Comment(s)); };
  822. xml.onPI = (string s) { opOpAssign!("~")(new ProcessingInstruction(s)); };
  823.  
  824. xml.onStartTag[null] = (ElementParser xml)
  825. {
  826. auto e = new Element(xml.tag);
  827. e.parse(xml);
  828. opOpAssign!("~")(e);
  829. };
  830.  
  831. xml.parse();
  832. }
  833.  
  834. /**
  835. * Compares two Elements for equality
  836. *
  837. * Example:
  838. * --------------
  839. * Element e1,e2;
  840. * if (e1 == e2) { }
  841. * --------------
  842. */
  843. override bool opEquals(scope const Object o) const
  844. {
  845. const scope element = toType!(const Element)(o);
  846. immutable len = items.length;
  847. if (len != element.items.length) return false;
  848. foreach (i; 0 .. len)
  849. {
  850. if (!items[i].opEquals(element.items[i])) return false;
  851. }
  852. return true;
  853. }
  854.  
  855. /**
  856. * Compares two Elements
  857. *
  858. * You should rarely need to call this function. It exists so that Elements
  859. * can be used as associative array keys.
  860. *
  861. * Example:
  862. * --------------
  863. * Element e1,e2;
  864. * if (e1 < e2) { }
  865. * --------------
  866. */
  867. override int opCmp(scope const Object o) @safe const
  868. {
  869. const scope element = toType!(const Element)(o);
  870. for (uint i=0; ; ++i)
  871. {
  872. if (i == items.length && i == element.items.length) return 0;
  873. if (i == items.length) return -1;
  874. if (i == element.items.length) return 1;
  875. if (!items[i].opEquals(element.items[i]))
  876. return items[i].opCmp(element.items[i]);
  877. }
  878. }
  879.  
  880. /**
  881. * Returns the hash of an Element
  882. *
  883. * You should rarely need to call this function. It exists so that Elements
  884. * can be used as associative array keys.
  885. */
  886. override size_t toHash() scope const @safe
  887. {
  888. size_t hash = tag.toHash();
  889. foreach (item;items) hash += item.toHash();
  890. return hash;
  891. }
  892.  
  893. const
  894. {
  895. /**
  896. * Returns the decoded interior of an element.
  897. *
  898. * The element is assumed to contain text <i>only</i>. So, for
  899. * example, given XML such as "&lt;title&gt;Good &amp;amp;
  900. * Bad&lt;/title&gt;", will return "Good &amp; Bad".
  901. *
  902. * Params:
  903. * mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
  904. *
  905. * Throws: DecodeException if decode fails
  906. */
  907. string text(DecodeMode mode=DecodeMode.LOOSE)
  908. {
  909. string buffer;
  910. foreach (item;items)
  911. {
  912. Text t = cast(Text) item;
  913. if (t is null) throw new DecodeException(item.toString());
  914. buffer ~= decode(t.toString(),mode);
  915. }
  916. return buffer;
  917. }
  918.  
  919. /**
  920. * Returns an indented string representation of this item
  921. *
  922. * Params:
  923. * indent = (optional) number of spaces by which to indent this
  924. * element. Defaults to 2.
  925. */
  926. override string[] pretty(uint indent=2) scope
  927. {
  928. import std.algorithm.searching : count;
  929. import std.string : rightJustify;
  930.  
  931. if (isEmptyXML) return [ tag.toEmptyString() ];
  932.  
  933. if (items.length == 1)
  934. {
  935. auto t = cast(const(Text))(items[0]);
  936. if (t !is null)
  937. {
  938. return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
  939. }
  940. }
  941.  
  942. string[] a = [ tag.toStartString() ];
  943. foreach (item;items)
  944. {
  945. string[] b = item.pretty(indent);
  946. foreach (s;b)
  947. {
  948. a ~= rightJustify(s,count(s) + indent);
  949. }
  950. }
  951. a ~= tag.toEndString();
  952. return a;
  953. }
  954.  
  955. /**
  956. * Returns the string representation of an Element
  957. *
  958. * Example:
  959. * --------------
  960. * auto element = new Element("br");
  961. * writefln(element.toString()); // writes "<br />"
  962. * --------------
  963. */
  964. override string toString() scope @safe
  965. {
  966. if (isEmptyXML) return tag.toEmptyString();
  967.  
  968. string buffer = tag.toStartString();
  969. foreach (item;items) { buffer ~= item.toString(); }
  970. buffer ~= tag.toEndString();
  971. return buffer;
  972. }
  973.  
  974. override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; }
  975. }
  976. }
  977.  
  978. /**
  979. * Tag types.
  980. *
  981. * $(DDOC_ENUM_MEMBERS START) Used for start tags
  982. * $(DDOC_ENUM_MEMBERS END) Used for end tags
  983. * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
  984. *
  985. */
  986. enum TagType { START, END, EMPTY }
  987.  
  988. /**
  989. * Class representing an XML tag.
  990. *
  991. * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
  992. *
  993. * The class invariant guarantees
  994. * <ul>
  995. * <li> that $(B type) is a valid enum TagType value</li>
  996. * <li> that $(B name) consists of valid characters</li>
  997. * <li> that each attribute name consists of valid characters</li>
  998. * </ul>
  999. */
  1000. class Tag
  1001. {
  1002. TagType type = TagType.START; /// Type of tag
  1003. string name; /// Tag name
  1004. string[string] attr; /// Associative array of attributes
  1005. private string tagString;
  1006.  
  1007. invariant()
  1008. {
  1009. string s;
  1010. string t;
  1011.  
  1012. assert(type == TagType.START
  1013. || type == TagType.END
  1014. || type == TagType.EMPTY);
  1015.  
  1016. s = name;
  1017. try { checkName(s,t); }
  1018. catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
  1019.  
  1020. foreach (k,v;attr)
  1021. {
  1022. s = k;
  1023. try { checkName(s,t); }
  1024. catch (Err e)
  1025. { assert(false,"Invalid attribute name:" ~ e.toString()); }
  1026. }
  1027. }
  1028.  
  1029. /**
  1030. * Constructs an instance of Tag with a specified name and type
  1031. *
  1032. * The constructor does not initialize the attributes. To initialize the
  1033. * attributes, you access the $(B attr) member variable.
  1034. *
  1035. * Params:
  1036. * name = the Tag's name
  1037. * type = (optional) the Tag's type. If omitted, defaults to
  1038. * TagType.START.
  1039. *
  1040. * Example:
  1041. * --------------
  1042. * auto tag = new Tag("img",Tag.EMPTY);
  1043. * tag.attr["src"] = "http://example.com/example.jpg";
  1044. * --------------
  1045. */
  1046. this(string name, TagType type=TagType.START) @safe pure
  1047. {
  1048. this.name = name;
  1049. this.type = type;
  1050. }
  1051.  
  1052. /* Private constructor (so don't ddoc this!)
  1053. *
  1054. * Constructs a Tag by parsing the string representation, e.g. "<html>".
  1055. *
  1056. * The string is passed by reference, and is advanced over all characters
  1057. * consumed.
  1058. *
  1059. * The second parameter is a dummy parameter only, required solely to
  1060. * distinguish this constructor from the public one.
  1061. */
  1062. private this(ref string s, bool dummy) @safe pure
  1063. {
  1064. import std.algorithm.searching : countUntil;
  1065. import std.ascii : isWhite;
  1066. import std.utf : byCodeUnit;
  1067.  
  1068. tagString = s;
  1069. try
  1070. {
  1071. reqc(s,'<');
  1072. if (optc(s,'/')) type = TagType.END;
  1073. ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
  1074. name = s[0 .. i];
  1075. s = s[i .. $];
  1076.  
  1077. i = s.byCodeUnit.countUntil!(a => !isWhite(a));
  1078. s = s[i .. $];
  1079.  
  1080. while (s.length > 0 && s[0] != '>' && s[0] != '/')
  1081. {
  1082. i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
  1083. string key = s[0 .. i];
  1084. s = s[i .. $];
  1085.  
  1086. i = s.byCodeUnit.countUntil!(a => !isWhite(a));
  1087. s = s[i .. $];
  1088. reqc(s,'=');
  1089. i = s.byCodeUnit.countUntil!(a => !isWhite(a));
  1090. s = s[i .. $];
  1091.  
  1092. immutable char quote = requireOneOf(s,"'\"");
  1093. i = s.byCodeUnit.countUntil(quote);
  1094. string val = decode(s[0 .. i], DecodeMode.LOOSE);
  1095. s = s[i .. $];
  1096. reqc(s,quote);
  1097.  
  1098. i = s.byCodeUnit.countUntil!(a => !isWhite(a));
  1099. s = s[i .. $];
  1100. attr[key] = val;
  1101. }
  1102. if (optc(s,'/'))
  1103. {
  1104. if (type == TagType.END) throw new TagException("");
  1105. type = TagType.EMPTY;
  1106. }
  1107. reqc(s,'>');
  1108. tagString.length = tagString.length - s.length;
  1109. }
  1110. catch (XMLException e)
  1111. {
  1112. tagString.length = tagString.length - s.length;
  1113. throw new TagException(tagString);
  1114. }
  1115. }
  1116.  
  1117. const
  1118. {
  1119. /**
  1120. * Compares two Tags for equality
  1121. *
  1122. * You should rarely need to call this function. It exists so that Tags
  1123. * can be used as associative array keys.
  1124. *
  1125. * Example:
  1126. * --------------
  1127. * Tag tag1,tag2
  1128. * if (tag1 == tag2) { }
  1129. * --------------
  1130. */
  1131. override bool opEquals(scope Object o)
  1132. {
  1133. const tag = toType!(const Tag)(o);
  1134. return
  1135. (name != tag.name) ? false : (
  1136. (attr != tag.attr) ? false : (
  1137. (type != tag.type) ? false : (
  1138. true )));
  1139. }
  1140.  
  1141. /**
  1142. * Compares two Tags
  1143. *
  1144. * Example:
  1145. * --------------
  1146. * Tag tag1,tag2
  1147. * if (tag1 < tag2) { }
  1148. * --------------
  1149. */
  1150. override int opCmp(Object o)
  1151. {
  1152. const tag = toType!(const Tag)(o);
  1153. // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
  1154. return
  1155. ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
  1156. ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) :
  1157. ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
  1158. 0 )));
  1159. }
  1160.  
  1161. /**
  1162. * Returns the hash of a Tag
  1163. *
  1164. * You should rarely need to call this function. It exists so that Tags
  1165. * can be used as associative array keys.
  1166. */
  1167. override size_t toHash()
  1168. {
  1169. return .hashOf(name);
  1170. }
  1171.  
  1172. /**
  1173. * Returns the string representation of a Tag
  1174. *
  1175. * Example:
  1176. * --------------
  1177. * auto tag = new Tag("book",TagType.START);
  1178. * writefln(tag.toString()); // writes "<book>"
  1179. * --------------
  1180. */
  1181. override string toString() @safe
  1182. {
  1183. if (isEmpty) return toEmptyString();
  1184. return (isEnd) ? toEndString() : toStartString();
  1185. }
  1186.  
  1187. private
  1188. {
  1189. string toNonEndString() @safe
  1190. {
  1191. import std.format : format;
  1192.  
  1193. string s = "<" ~ name;
  1194. foreach (key,val;attr)
  1195. s ~= format(" %s=\"%s\"",key,encode(val));
  1196. return s;
  1197. }
  1198.  
  1199. string toStartString() @safe { return toNonEndString() ~ ">"; }
  1200.  
  1201. string toEndString() @safe { return "</" ~ name ~ ">"; }
  1202.  
  1203. string toEmptyString() @safe { return toNonEndString() ~ " />"; }
  1204. }
  1205.  
  1206. /**
  1207. * Returns true if the Tag is a start tag
  1208. *
  1209. * Example:
  1210. * --------------
  1211. * if (tag.isStart) { }
  1212. * --------------
  1213. */
  1214. @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; }
  1215.  
  1216. /**
  1217. * Returns true if the Tag is an end tag
  1218. *
  1219. * Example:
  1220. * --------------
  1221. * if (tag.isEnd) { }
  1222. * --------------
  1223. */
  1224. @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END; }
  1225.  
  1226. /**
  1227. * Returns true if the Tag is an empty tag
  1228. *
  1229. * Example:
  1230. * --------------
  1231. * if (tag.isEmpty) { }
  1232. * --------------
  1233. */
  1234. @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; }
  1235. }
  1236. }
  1237.  
  1238. /**
  1239. * Class representing a comment
  1240. */
  1241. class Comment : Item
  1242. {
  1243. private string content;
  1244.  
  1245. /**
  1246. * Construct a comment
  1247. *
  1248. * Params:
  1249. * content = the body of the comment
  1250. *
  1251. * Throws: CommentException if the comment body is illegal (contains "--"
  1252. * or exactly equals "-")
  1253. *
  1254. * Example:
  1255. * --------------
  1256. * auto item = new Comment("This is a comment");
  1257. * // constructs <!--This is a comment-->
  1258. * --------------
  1259. */
  1260. this(string content) @safe pure
  1261. {
  1262. import std.string : indexOf;
  1263.  
  1264. if (content == "-" || content.indexOf("--") != -1)
  1265. throw new CommentException(content);
  1266. this.content = content;
  1267. }
  1268.  
  1269. /**
  1270. * Compares two comments for equality
  1271. *
  1272. * Example:
  1273. * --------------
  1274. * Comment item1,item2;
  1275. * if (item1 == item2) { }
  1276. * --------------
  1277. */
  1278. override bool opEquals(scope const Object o) const
  1279. {
  1280. const scope item = toType!(const Item)(o);
  1281. const t = cast(const Comment) item;
  1282. return t !is null && content == t.content;
  1283. }
  1284.  
  1285. /**
  1286. * Compares two comments
  1287. *
  1288. * You should rarely need to call this function. It exists so that Comments
  1289. * can be used as associative array keys.
  1290. *
  1291. * Example:
  1292. * --------------
  1293. * Comment item1,item2;
  1294. * if (item1 < item2) { }
  1295. * --------------
  1296. */
  1297. override int opCmp(scope const Object o) scope const
  1298. {
  1299. const scope item = toType!(const Item)(o);
  1300. const t = cast(const Comment) item;
  1301. return t !is null && (content != t.content
  1302. ? (content < t.content ? -1 : 1 ) : 0 );
  1303. }
  1304.  
  1305. /**
  1306. * Returns the hash of a Comment
  1307. *
  1308. * You should rarely need to call this function. It exists so that Comments
  1309. * can be used as associative array keys.
  1310. */
  1311. override size_t toHash() scope const nothrow { return hash(content); }
  1312.  
  1313. /**
  1314. * Returns a string representation of this comment
  1315. */
  1316. override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; }
  1317.  
  1318. override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
  1319. }
  1320.  
  1321. @safe unittest // issue 16241
  1322. {
  1323. import std.exception : assertThrown;
  1324. auto c = new Comment("==");
  1325. assert(c.content == "==");
  1326. assertThrown!CommentException(new Comment("--"));
  1327. }
  1328.  
  1329. /**
  1330. * Class representing a Character Data section
  1331. */
  1332. class CData : Item
  1333. {
  1334. private string content;
  1335.  
  1336. /**
  1337. * Construct a character data section
  1338. *
  1339. * Params:
  1340. * content = the body of the character data segment
  1341. *
  1342. * Throws: CDataException if the segment body is illegal (contains "]]>")
  1343. *
  1344. * Example:
  1345. * --------------
  1346. * auto item = new CData("<b>hello</b>");
  1347. * // constructs <![CDATA[<b>hello</b>]]>
  1348. * --------------
  1349. */
  1350. this(string content) @safe pure
  1351. {
  1352. import std.string : indexOf;
  1353. if (content.indexOf("]]>") != -1) throw new CDataException(content);
  1354. this.content = content;
  1355. }
  1356.  
  1357. /**
  1358. * Compares two CDatas for equality
  1359. *
  1360. * Example:
  1361. * --------------
  1362. * CData item1,item2;
  1363. * if (item1 == item2) { }
  1364. * --------------
  1365. */
  1366. override bool opEquals(scope const Object o) const
  1367. {
  1368. const scope item = toType!(const Item)(o);
  1369. const t = cast(const CData) item;
  1370. return t !is null && content == t.content;
  1371. }
  1372.  
  1373. /**
  1374. * Compares two CDatas
  1375. *
  1376. * You should rarely need to call this function. It exists so that CDatas
  1377. * can be used as associative array keys.
  1378. *
  1379. * Example:
  1380. * --------------
  1381. * CData item1,item2;
  1382. * if (item1 < item2) { }
  1383. * --------------
  1384. */
  1385. override int opCmp(scope const Object o) scope const
  1386. {
  1387. const scope item = toType!(const Item)(o);
  1388. const t = cast(const CData) item;
  1389. return t !is null && (content != t.content
  1390. ? (content < t.content ? -1 : 1 ) : 0 );
  1391. }
  1392.  
  1393. /**
  1394. * Returns the hash of a CData
  1395. *
  1396. * You should rarely need to call this function. It exists so that CDatas
  1397. * can be used as associative array keys.
  1398. */
  1399. override size_t toHash() scope const nothrow { return hash(content); }
  1400.  
  1401. /**
  1402. * Returns a string representation of this CData section
  1403. */
  1404. override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; }
  1405.  
  1406. override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
  1407. }
  1408.  
  1409. /**
  1410. * Class representing a text (aka Parsed Character Data) section
  1411. */
  1412. class Text : Item
  1413. {
  1414. private string content;
  1415.  
  1416. /**
  1417. * Construct a text (aka PCData) section
  1418. *
  1419. * Params:
  1420. * content = the text. This function encodes the text before
  1421. * insertion, so it is safe to insert any text
  1422. *
  1423. * Example:
  1424. * --------------
  1425. * auto Text = new CData("a < b");
  1426. * // constructs a &lt; b
  1427. * --------------
  1428. */
  1429. this(string content) @safe pure
  1430. {
  1431. this.content = encode(content);
  1432. }
  1433.  
  1434. /**
  1435. * Compares two text sections for equality
  1436. *
  1437. * Example:
  1438. * --------------
  1439. * Text item1,item2;
  1440. * if (item1 == item2) { }
  1441. * --------------
  1442. */
  1443. override bool opEquals(scope const Object o) const
  1444. {
  1445. const scope item = toType!(const Item)(o);
  1446. const t = cast(const Text) item;
  1447. return t !is null && content == t.content;
  1448. }
  1449.  
  1450. /**
  1451. * Compares two text sections
  1452. *
  1453. * You should rarely need to call this function. It exists so that Texts
  1454. * can be used as associative array keys.
  1455. *
  1456. * Example:
  1457. * --------------
  1458. * Text item1,item2;
  1459. * if (item1 < item2) { }
  1460. * --------------
  1461. */
  1462. override int opCmp(scope const Object o) scope const
  1463. {
  1464. const scope item = toType!(const Item)(o);
  1465. const t = cast(const Text) item;
  1466. return t !is null
  1467. && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
  1468. }
  1469.  
  1470. /**
  1471. * Returns the hash of a text section
  1472. *
  1473. * You should rarely need to call this function. It exists so that Texts
  1474. * can be used as associative array keys.
  1475. */
  1476. override size_t toHash() scope const nothrow { return hash(content); }
  1477.  
  1478. /**
  1479. * Returns a string representation of this Text section
  1480. */
  1481. override string toString() scope const @safe @nogc pure nothrow { return content; }
  1482.  
  1483. /**
  1484. * Returns true if the content is the empty string
  1485. */
  1486. override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; }
  1487. }
  1488.  
  1489. /**
  1490. * Class representing an XML Instruction section
  1491. */
  1492. class XMLInstruction : Item
  1493. {
  1494. private string content;
  1495.  
  1496. /**
  1497. * Construct an XML Instruction section
  1498. *
  1499. * Params:
  1500. * content = the body of the instruction segment
  1501. *
  1502. * Throws: XIException if the segment body is illegal (contains ">")
  1503. *
  1504. * Example:
  1505. * --------------
  1506. * auto item = new XMLInstruction("ATTLIST");
  1507. * // constructs <!ATTLIST>
  1508. * --------------
  1509. */
  1510. this(string content) @safe pure
  1511. {
  1512. import std.string : indexOf;
  1513. if (content.indexOf(">") != -1) throw new XIException(content);
  1514. this.content = content;
  1515. }
  1516.  
  1517. /**
  1518. * Compares two XML instructions for equality
  1519. *
  1520. * Example:
  1521. * --------------
  1522. * XMLInstruction item1,item2;
  1523. * if (item1 == item2) { }
  1524. * --------------
  1525. */
  1526. override bool opEquals(scope const Object o) const
  1527. {
  1528. const scope item = toType!(const Item)(o);
  1529. const t = cast(const XMLInstruction) item;
  1530. return t !is null && content == t.content;
  1531. }
  1532.  
  1533. /**
  1534. * Compares two XML instructions
  1535. *
  1536. * You should rarely need to call this function. It exists so that
  1537. * XmlInstructions can be used as associative array keys.
  1538. *
  1539. * Example:
  1540. * --------------
  1541. * XMLInstruction item1,item2;
  1542. * if (item1 < item2) { }
  1543. * --------------
  1544. */
  1545. override int opCmp(scope const Object o) scope const
  1546. {
  1547. const scope item = toType!(const Item)(o);
  1548. const t = cast(const XMLInstruction) item;
  1549. return t !is null
  1550. && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
  1551. }
  1552.  
  1553. /**
  1554. * Returns the hash of an XMLInstruction
  1555. *
  1556. * You should rarely need to call this function. It exists so that
  1557. * XmlInstructions can be used as associative array keys.
  1558. */
  1559. override size_t toHash() scope const nothrow { return hash(content); }
  1560.  
  1561. /**
  1562. * Returns a string representation of this XmlInstruction
  1563. */
  1564. override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; }
  1565.  
  1566. override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
  1567. }
  1568.  
  1569. /**
  1570. * Class representing a Processing Instruction section
  1571. */
  1572. class ProcessingInstruction : Item
  1573. {
  1574. private string content;
  1575.  
  1576. /**
  1577. * Construct a Processing Instruction section
  1578. *
  1579. * Params:
  1580. * content = the body of the instruction segment
  1581. *
  1582. * Throws: PIException if the segment body is illegal (contains "?>")
  1583. *
  1584. * Example:
  1585. * --------------
  1586. * auto item = new ProcessingInstruction("php");
  1587. * // constructs <?php?>
  1588. * --------------
  1589. */
  1590. this(string content) @safe pure
  1591. {
  1592. import std.string : indexOf;
  1593. if (content.indexOf("?>") != -1) throw new PIException(content);
  1594. this.content = content;
  1595. }
  1596.  
  1597. /**
  1598. * Compares two processing instructions for equality
  1599. *
  1600. * Example:
  1601. * --------------
  1602. * ProcessingInstruction item1,item2;
  1603. * if (item1 == item2) { }
  1604. * --------------
  1605. */
  1606. override bool opEquals(scope const Object o) const
  1607. {
  1608. const scope item = toType!(const Item)(o);
  1609. const t = cast(const ProcessingInstruction) item;
  1610. return t !is null && content == t.content;
  1611. }
  1612.  
  1613. /**
  1614. * Compares two processing instructions
  1615. *
  1616. * You should rarely need to call this function. It exists so that
  1617. * ProcessingInstructions can be used as associative array keys.
  1618. *
  1619. * Example:
  1620. * --------------
  1621. * ProcessingInstruction item1,item2;
  1622. * if (item1 < item2) { }
  1623. * --------------
  1624. */
  1625. override int opCmp(scope const Object o) scope const
  1626. {
  1627. const scope item = toType!(const Item)(o);
  1628. const t = cast(const ProcessingInstruction) item;
  1629. return t !is null
  1630. && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
  1631. }
  1632.  
  1633. /**
  1634. * Returns the hash of a ProcessingInstruction
  1635. *
  1636. * You should rarely need to call this function. It exists so that
  1637. * ProcessingInstructions can be used as associative array keys.
  1638. */
  1639. override size_t toHash() scope const nothrow { return hash(content); }
  1640.  
  1641. /**
  1642. * Returns a string representation of this ProcessingInstruction
  1643. */
  1644. override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; }
  1645.  
  1646. override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always
  1647. }
  1648.  
  1649. /**
  1650. * Abstract base class for XML items
  1651. */
  1652. abstract class Item
  1653. {
  1654. /// Compares with another Item of same type for equality
  1655. abstract override bool opEquals(scope const Object o) @safe const;
  1656.  
  1657. /// Compares with another Item of same type
  1658. abstract override int opCmp(scope const Object o) @safe const;
  1659.  
  1660. /// Returns the hash of this item
  1661. abstract override size_t toHash() @safe scope const;
  1662.  
  1663. /// Returns a string representation of this item
  1664. abstract override string toString() @safe scope const;
  1665.  
  1666. /**
  1667. * Returns an indented string representation of this item
  1668. *
  1669. * Params:
  1670. * indent = number of spaces by which to indent child elements
  1671. */
  1672. string[] pretty(uint indent) @safe scope const
  1673. {
  1674. import std.string : strip;
  1675. string s = strip(toString());
  1676. return s.length == 0 ? [] : [ s ];
  1677. }
  1678.  
  1679. /// Returns true if the item represents empty XML text
  1680. abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
  1681. }
  1682.  
  1683. /**
  1684. * Class for parsing an XML Document.
  1685. *
  1686. * This is a subclass of ElementParser. Most of the useful functions are
  1687. * documented there.
  1688. *
  1689. * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
  1690. *
  1691. * Bugs:
  1692. * Currently only supports UTF documents.
  1693. *
  1694. * If there is an encoding attribute in the prolog, it is ignored.
  1695. *
  1696. */
  1697. class DocumentParser : ElementParser
  1698. {
  1699. string xmlText;
  1700.  
  1701. /**
  1702. * Constructs a DocumentParser.
  1703. *
  1704. * The input to this function MUST be valid XML.
  1705. * This is enforced by the function's in contract.
  1706. *
  1707. * Params:
  1708. * xmlText_ = the entire XML document as text
  1709. *
  1710. */
  1711. this(string xmlText_)
  1712. in
  1713. {
  1714. assert(xmlText_.length != 0);
  1715. try
  1716. {
  1717. // Confirm that the input is valid XML
  1718. check(xmlText_);
  1719. }
  1720. catch (CheckException e)
  1721. {
  1722. // And if it's not, tell the user why not
  1723. assert(false, "\n" ~ e.toString());
  1724. }
  1725. }
  1726. do
  1727. {
  1728. xmlText = xmlText_;
  1729. s = &xmlText;
  1730. super(); // Initialize everything
  1731. parse(); // Parse through the root tag (but not beyond)
  1732. }
  1733. }
  1734.  
  1735. @system unittest
  1736. {
  1737. auto doc = new Document("<root><child><grandchild/></child></root>");
  1738. assert(doc.elements.length == 1);
  1739. assert(doc.elements[0].tag.name == "child");
  1740. assert(doc.items == doc.elements);
  1741. }
  1742.  
  1743. /**
  1744. * Class for parsing an XML element.
  1745. *
  1746. * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
  1747. *
  1748. * Note that you cannot construct instances of this class directly. You can
  1749. * construct a DocumentParser (which is a subclass of ElementParser), but
  1750. * otherwise, Instances of ElementParser will be created for you by the
  1751. * library, and passed your way via onStartTag handlers.
  1752. *
  1753. */
  1754. class ElementParser
  1755. {
  1756. alias Handler = void delegate(string);
  1757. alias ElementHandler = void delegate(in Element element);
  1758. alias ParserHandler = void delegate(ElementParser parser);
  1759.  
  1760. private
  1761. {
  1762. Tag tag_;
  1763. string elementStart;
  1764. string* s;
  1765.  
  1766. Handler commentHandler = null;
  1767. Handler cdataHandler = null;
  1768. Handler xiHandler = null;
  1769. Handler piHandler = null;
  1770. Handler rawTextHandler = null;
  1771. Handler textHandler = null;
  1772.  
  1773. // Private constructor for start tags
  1774. this(ElementParser parent) @safe @nogc pure nothrow
  1775. {
  1776. s = parent.s;
  1777. this();
  1778. tag_ = parent.tag_;
  1779. }
  1780.  
  1781. // Private constructor for empty tags
  1782. this(Tag tag, string* t) @safe @nogc pure nothrow
  1783. {
  1784. s = t;
  1785. this();
  1786. tag_ = tag;
  1787. }
  1788. }
  1789.  
  1790. /**
  1791. * The Tag at the start of the element being parsed. You can read this to
  1792. * determine the tag's name and attributes.
  1793. */
  1794. @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; }
  1795.  
  1796. /**
  1797. * Register a handler which will be called whenever a start tag is
  1798. * encountered which matches the specified name. You can also pass null as
  1799. * the name, in which case the handler will be called for any unmatched
  1800. * start tag.
  1801. *
  1802. * Example:
  1803. * --------------
  1804. * // Call this function whenever a <podcast> start tag is encountered
  1805. * onStartTag["podcast"] = (ElementParser xml)
  1806. * {
  1807. * // Your code here
  1808. * //
  1809. * // This is a a closure, so code here may reference
  1810. * // variables which are outside of this scope
  1811. * };
  1812. *
  1813. * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
  1814. * // start tag is encountered
  1815. * onStartTag["episode"] = &myEpisodeStartHandler;
  1816. *
  1817. * // call delegate dg for all other start tags
  1818. * onStartTag[null] = dg;
  1819. * --------------
  1820. *
  1821. * This library will supply your function with a new instance of
  1822. * ElementHandler, which may be used to parse inside the element whose
  1823. * start tag was just found, or to identify the tag attributes of the
  1824. * element, etc.
  1825. *
  1826. * Note that your function will be called for both start tags and empty
  1827. * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
  1828. * and &lt;br/&gt;.
  1829. */
  1830. ParserHandler[string] onStartTag;
  1831.  
  1832. /**
  1833. * Register a handler which will be called whenever an end tag is
  1834. * encountered which matches the specified name. You can also pass null as
  1835. * the name, in which case the handler will be called for any unmatched
  1836. * end tag.
  1837. *
  1838. * Example:
  1839. * --------------
  1840. * // Call this function whenever a </podcast> end tag is encountered
  1841. * onEndTag["podcast"] = (in Element e)
  1842. * {
  1843. * // Your code here
  1844. * //
  1845. * // This is a a closure, so code here may reference
  1846. * // variables which are outside of this scope
  1847. * };
  1848. *
  1849. * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
  1850. * // end tag is encountered
  1851. * onEndTag["episode"] = &myEpisodeEndHandler;
  1852. *
  1853. * // call delegate dg for all other end tags
  1854. * onEndTag[null] = dg;
  1855. * --------------
  1856. *
  1857. * Note that your function will be called for both start tags and empty
  1858. * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
  1859. * and &lt;br/&gt;.
  1860. */
  1861. ElementHandler[string] onEndTag;
  1862.  
  1863. protected this() @safe @nogc pure nothrow
  1864. {
  1865. elementStart = *s;
  1866. }
  1867.  
  1868. /**
  1869. * Register a handler which will be called whenever text is encountered.
  1870. *
  1871. * Example:
  1872. * --------------
  1873. * // Call this function whenever text is encountered
  1874. * onText = (string s)
  1875. * {
  1876. * // Your code here
  1877. *
  1878. * // The passed parameter s will have been decoded by the time you see
  1879. * // it, and so may contain any character.
  1880. * //
  1881. * // This is a a closure, so code here may reference
  1882. * // variables which are outside of this scope
  1883. * };
  1884. * --------------
  1885. */
  1886. @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; }
  1887.  
  1888. /**
  1889. * Register an alternative handler which will be called whenever text
  1890. * is encountered. This differs from onText in that onText will decode
  1891. * the text, whereas onTextRaw will not. This allows you to make design
  1892. * choices, since onText will be more accurate, but slower, while
  1893. * onTextRaw will be faster, but less accurate. Of course, you can
  1894. * still call decode() within your handler, if you want, but you'd
  1895. * probably want to use onTextRaw only in circumstances where you
  1896. * know that decoding is unnecessary.
  1897. *
  1898. * Example:
  1899. * --------------
  1900. * // Call this function whenever text is encountered
  1901. * onText = (string s)
  1902. * {
  1903. * // Your code here
  1904. *
  1905. * // The passed parameter s will NOT have been decoded.
  1906. * //
  1907. * // This is a a closure, so code here may reference
  1908. * // variables which are outside of this scope
  1909. * };
  1910. * --------------
  1911. */
  1912. @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; }
  1913.  
  1914. /**
  1915. * Register a handler which will be called whenever a character data
  1916. * segment is encountered.
  1917. *
  1918. * Example:
  1919. * --------------
  1920. * // Call this function whenever a CData section is encountered
  1921. * onCData = (string s)
  1922. * {
  1923. * // Your code here
  1924. *
  1925. * // The passed parameter s does not include the opening <![CDATA[
  1926. * // nor closing ]]>
  1927. * //
  1928. * // This is a a closure, so code here may reference
  1929. * // variables which are outside of this scope
  1930. * };
  1931. * --------------
  1932. */
  1933. @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; }
  1934.  
  1935. /**
  1936. * Register a handler which will be called whenever a comment is
  1937. * encountered.
  1938. *
  1939. * Example:
  1940. * --------------
  1941. * // Call this function whenever a comment is encountered
  1942. * onComment = (string s)
  1943. * {
  1944. * // Your code here
  1945. *
  1946. * // The passed parameter s does not include the opening <!-- nor
  1947. * // closing -->
  1948. * //
  1949. * // This is a a closure, so code here may reference
  1950. * // variables which are outside of this scope
  1951. * };
  1952. * --------------
  1953. */
  1954. @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; }
  1955.  
  1956. /**
  1957. * Register a handler which will be called whenever a processing
  1958. * instruction is encountered.
  1959. *
  1960. * Example:
  1961. * --------------
  1962. * // Call this function whenever a processing instruction is encountered
  1963. * onPI = (string s)
  1964. * {
  1965. * // Your code here
  1966. *
  1967. * // The passed parameter s does not include the opening <? nor
  1968. * // closing ?>
  1969. * //
  1970. * // This is a a closure, so code here may reference
  1971. * // variables which are outside of this scope
  1972. * };
  1973. * --------------
  1974. */
  1975. @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; }
  1976.  
  1977. /**
  1978. * Register a handler which will be called whenever an XML instruction is
  1979. * encountered.
  1980. *
  1981. * Example:
  1982. * --------------
  1983. * // Call this function whenever an XML instruction is encountered
  1984. * // (Note: XML instructions may only occur preceding the root tag of a
  1985. * // document).
  1986. * onPI = (string s)
  1987. * {
  1988. * // Your code here
  1989. *
  1990. * // The passed parameter s does not include the opening <! nor
  1991. * // closing >
  1992. * //
  1993. * // This is a a closure, so code here may reference
  1994. * // variables which are outside of this scope
  1995. * };
  1996. * --------------
  1997. */
  1998. @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; }
  1999.  
  2000. /**
  2001. * Parse an XML element.
  2002. *
  2003. * Parsing will continue until the end of the current element. Any items
  2004. * encountered for which a handler has been registered will invoke that
  2005. * handler.
  2006. *
  2007. * Throws: various kinds of XMLException
  2008. */
  2009. void parse()
  2010. {
  2011. import std.algorithm.searching : startsWith;
  2012. import std.string : indexOf;
  2013.  
  2014. string t;
  2015. const Tag root = tag_;
  2016. Tag[string] startTags;
  2017. if (tag_ !is null) startTags[tag_.name] = tag_;
  2018.  
  2019. while (s.length != 0)
  2020. {
  2021. if (startsWith(*s,"<!--"))
  2022. {
  2023. chop(*s,4);
  2024. t = chop(*s,indexOf(*s,"-->"));
  2025. if (commentHandler.funcptr !is null) commentHandler(t);
  2026. chop(*s,3);
  2027. }
  2028. else if (startsWith(*s,"<![CDATA["))
  2029. {
  2030. chop(*s,9);
  2031. t = chop(*s,indexOf(*s,"]]>"));
  2032. if (cdataHandler.funcptr !is null) cdataHandler(t);
  2033. chop(*s,3);
  2034. }
  2035. else if (startsWith(*s,"<!"))
  2036. {
  2037. chop(*s,2);
  2038. t = chop(*s,indexOf(*s,">"));
  2039. if (xiHandler.funcptr !is null) xiHandler(t);
  2040. chop(*s,1);
  2041. }
  2042. else if (startsWith(*s,"<?"))
  2043. {
  2044. chop(*s,2);
  2045. t = chop(*s,indexOf(*s,"?>"));
  2046. if (piHandler.funcptr !is null) piHandler(t);
  2047. chop(*s,2);
  2048. }
  2049. else if (startsWith(*s,"<"))
  2050. {
  2051. tag_ = new Tag(*s,true);
  2052. if (root is null)
  2053. return; // Return to constructor of derived class
  2054.  
  2055. if (tag_.isStart)
  2056. {
  2057. startTags[tag_.name] = tag_;
  2058.  
  2059. auto parser = new ElementParser(this);
  2060.  
  2061. auto handler = tag_.name in onStartTag;
  2062. if (handler !is null) (*handler)(parser);
  2063. else
  2064. {
  2065. handler = null in onStartTag;
  2066. if (handler !is null) (*handler)(parser);
  2067. }
  2068. }
  2069. else if (tag_.isEnd)
  2070. {
  2071. const startTag = startTags[tag_.name];
  2072. string text;
  2073.  
  2074. if (startTag.tagString.length == 0)
  2075. assert(0);
  2076.  
  2077. immutable(char)* p = startTag.tagString.ptr
  2078. + startTag.tagString.length;
  2079. immutable(char)* q = &tag_.tagString[0];
  2080. text = decode(p[0..(q-p)], DecodeMode.LOOSE);
  2081.  
  2082. auto element = new Element(startTag);
  2083. if (text.length != 0) element ~= new Text(text);
  2084.  
  2085. auto handler = tag_.name in onEndTag;
  2086. if (handler !is null) (*handler)(element);
  2087. else
  2088. {
  2089. handler = null in onEndTag;
  2090. if (handler !is null) (*handler)(element);
  2091. }
  2092.  
  2093. if (tag_.name == root.name) return;
  2094. }
  2095. else if (tag_.isEmpty)
  2096. {
  2097. Tag startTag = new Tag(tag_.name);
  2098.  
  2099. // FIX by hed010gy, for bug 2979
  2100. // http://d.puremagic.com/issues/show_bug.cgi?id=2979
  2101. if (tag_.attr.length > 0)
  2102. foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv;
  2103. // END FIX
  2104.  
  2105. // Handle the pretend start tag
  2106. string s2;
  2107. auto parser = new ElementParser(startTag,&s2);
  2108. auto handler1 = startTag.name in onStartTag;
  2109. if (handler1 !is null) (*handler1)(parser);
  2110. else
  2111. {
  2112. handler1 = null in onStartTag;
  2113. if (handler1 !is null) (*handler1)(parser);
  2114. }
  2115.  
  2116. // Handle the pretend end tag
  2117. auto element = new Element(startTag);
  2118. auto handler2 = tag_.name in onEndTag;
  2119. if (handler2 !is null) (*handler2)(element);
  2120. else
  2121. {
  2122. handler2 = null in onEndTag;
  2123. if (handler2 !is null) (*handler2)(element);
  2124. }
  2125. }
  2126. }
  2127. else
  2128. {
  2129. t = chop(*s,indexOf(*s,"<"));
  2130. if (rawTextHandler.funcptr !is null)
  2131. rawTextHandler(t);
  2132. else if (textHandler.funcptr !is null)
  2133. textHandler(decode(t,DecodeMode.LOOSE));
  2134. }
  2135. }
  2136. }
  2137.  
  2138. /**
  2139. * Returns that part of the element which has already been parsed
  2140. */
  2141. override string toString() const @nogc @safe pure nothrow
  2142. {
  2143. assert(elementStart.length >= s.length);
  2144. return elementStart[0 .. elementStart.length - s.length];
  2145. }
  2146.  
  2147. }
  2148.  
  2149. private
  2150. {
  2151. template Check(string msg)
  2152. {
  2153. string old = s;
  2154.  
  2155. void fail() @safe pure
  2156. {
  2157. s = old;
  2158. throw new Err(s,msg);
  2159. }
  2160.  
  2161. void fail(Err e) @safe pure
  2162. {
  2163. s = old;
  2164. throw new Err(s,msg,e);
  2165. }
  2166.  
  2167. void fail(string msg2) @safe pure
  2168. {
  2169. fail(new Err(s,msg2));
  2170. }
  2171. }
  2172.  
  2173. void checkMisc(ref string s) @safe pure // rule 27
  2174. {
  2175. import std.algorithm.searching : startsWith;
  2176.  
  2177. mixin Check!("Misc");
  2178.  
  2179. try
  2180. {
  2181. if (s.startsWith("<!--")) { checkComment(s); }
  2182. else if (s.startsWith("<?")) { checkPI(s); }
  2183. else { checkSpace(s); }
  2184. }
  2185. catch (Err e) { fail(e); }
  2186. }
  2187.  
  2188. void checkDocument(ref string s) @safe pure // rule 1
  2189. {
  2190. mixin Check!("Document");
  2191. try
  2192. {
  2193. checkProlog(s);
  2194. checkElement(s);
  2195. star!(checkMisc)(s);
  2196. }
  2197. catch (Err e) { fail(e); }
  2198. }
  2199.  
  2200. void checkChars(ref string s) @safe pure // rule 2
  2201. {
  2202. // TO DO - Fix std.utf stride and decode functions, then use those
  2203. // instead
  2204. import std.format : format;
  2205.  
  2206. mixin Check!("Chars");
  2207.  
  2208. dchar c;
  2209. ptrdiff_t n = -1;
  2210. // 'i' must not be smaller than size_t because size_t is used internally in
  2211. // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
  2212. foreach (size_t i, dchar d; s)
  2213. {
  2214. if (!isChar(d))
  2215. {
  2216. c = d;
  2217. n = i;
  2218. break;
  2219. }
  2220. }
  2221. if (n != -1)
  2222. {
  2223. s = s[n..$];
  2224. fail(format("invalid character: U+%04X",c));
  2225. }
  2226. }
  2227.  
  2228. void checkSpace(ref string s) @safe pure // rule 3
  2229. {
  2230. import std.algorithm.searching : countUntil;
  2231. import std.ascii : isWhite;
  2232. import std.utf : byCodeUnit;
  2233.  
  2234. mixin Check!("Whitespace");
  2235. ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a));
  2236. if (i == -1 && s.length > 0 && isWhite(s[0]))
  2237. s = s[$ .. $];
  2238. else if (i > -1)
  2239. s = s[i .. $];
  2240. if (s is old) fail();
  2241. }
  2242.  
  2243. void checkName(ref string s, out string name) @safe pure // rule 5
  2244. {
  2245. mixin Check!("Name");
  2246.  
  2247. if (s.length == 0) fail();
  2248. ptrdiff_t n;
  2249. // 'i' must not be smaller than size_t because size_t is used internally in
  2250. // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
  2251. foreach (size_t i, dchar c; s)
  2252. {
  2253. if (c == '_' || c == ':' || isLetter(c)) continue;
  2254. if (i == 0) fail();
  2255. if (c == '-' || c == '.' || isDigit(c)
  2256. || isCombiningChar(c) || isExtender(c)) continue;
  2257. n = i;
  2258. break;
  2259. }
  2260. name = s[0 .. n];
  2261. s = s[n..$];
  2262. }
  2263.  
  2264. void checkAttValue(ref string s) @safe pure // rule 10
  2265. {
  2266. import std.algorithm.searching : countUntil;
  2267. import std.utf : byCodeUnit;
  2268.  
  2269. mixin Check!("AttValue");
  2270.  
  2271. if (s.length == 0) fail();
  2272. char c = s[0];
  2273. if (c != '\u0022' && c != '\u0027')
  2274. fail("attribute value requires quotes");
  2275. s = s[1..$];
  2276. for (;;)
  2277. {
  2278. s = s[s.byCodeUnit.countUntil(c) .. $];
  2279. if (s.length == 0) fail("unterminated attribute value");
  2280. if (s[0] == '<') fail("< found in attribute value");
  2281. if (s[0] == c) break;
  2282. try { checkReference(s); } catch (Err e) { fail(e); }
  2283. }
  2284. s = s[1..$];
  2285. }
  2286.  
  2287. void checkCharData(ref string s) @safe pure // rule 14
  2288. {
  2289. import std.algorithm.searching : startsWith;
  2290.  
  2291. mixin Check!("CharData");
  2292.  
  2293. while (s.length != 0)
  2294. {
  2295. if (s.startsWith("&")) break;
  2296. if (s.startsWith("<")) break;
  2297. if (s.startsWith("]]>")) fail("]]> found within char data");
  2298. s = s[1..$];
  2299. }
  2300. }
  2301.  
  2302. void checkComment(ref string s) @safe pure // rule 15
  2303. {
  2304. import std.string : indexOf;
  2305.  
  2306. mixin Check!("Comment");
  2307.  
  2308. try { checkLiteral("<!--",s); } catch (Err e) { fail(e); }
  2309. ptrdiff_t n = s.indexOf("--");
  2310. if (n == -1) fail("unterminated comment");
  2311. s = s[n..$];
  2312. try { checkLiteral("-->",s); } catch (Err e) { fail(e); }
  2313. }
  2314.  
  2315. void checkPI(ref string s) @safe pure // rule 16
  2316. {
  2317. mixin Check!("PI");
  2318.  
  2319. try
  2320. {
  2321. checkLiteral("<?",s);
  2322. checkEnd("?>",s);
  2323. }
  2324. catch (Err e) { fail(e); }
  2325. }
  2326.  
  2327. void checkCDSect(ref string s) @safe pure // rule 18
  2328. {
  2329. mixin Check!("CDSect");
  2330.  
  2331. try
  2332. {
  2333. checkLiteral(cdata,s);
  2334. checkEnd("]]>",s);
  2335. }
  2336. catch (Err e) { fail(e); }
  2337. }
  2338.  
  2339. void checkProlog(ref string s) @safe pure // rule 22
  2340. {
  2341. mixin Check!("Prolog");
  2342.  
  2343. try
  2344. {
  2345. /* The XML declaration is optional
  2346. * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
  2347. */
  2348. opt!(checkXMLDecl)(s);
  2349.  
  2350. star!(checkMisc)(s);
  2351. opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
  2352. }
  2353. catch (Err e) { fail(e); }
  2354. }
  2355.  
  2356. void checkXMLDecl(ref string s) @safe pure // rule 23
  2357. {
  2358. mixin Check!("XMLDecl");
  2359.  
  2360. try
  2361. {
  2362. checkLiteral("<?xml",s);
  2363. checkVersionInfo(s);
  2364. opt!(checkEncodingDecl)(s);
  2365. opt!(checkSDDecl)(s);
  2366. opt!(checkSpace)(s);
  2367. checkLiteral("?>",s);
  2368. }
  2369. catch (Err e) { fail(e); }
  2370. }
  2371.  
  2372. void checkVersionInfo(ref string s) @safe pure // rule 24
  2373. {
  2374. mixin Check!("VersionInfo");
  2375.  
  2376. try
  2377. {
  2378. checkSpace(s);
  2379. checkLiteral("version",s);
  2380. checkEq(s);
  2381. quoted!(checkVersionNum)(s);
  2382. }
  2383. catch (Err e) { fail(e); }
  2384. }
  2385.  
  2386. void checkEq(ref string s) @safe pure // rule 25
  2387. {
  2388. mixin Check!("Eq");
  2389.  
  2390. try
  2391. {
  2392. opt!(checkSpace)(s);
  2393. checkLiteral("=",s);
  2394. opt!(checkSpace)(s);
  2395. }
  2396. catch (Err e) { fail(e); }
  2397. }
  2398.  
  2399. void checkVersionNum(ref string s) @safe pure // rule 26
  2400. {
  2401. import std.algorithm.searching : countUntil;
  2402. import std.utf : byCodeUnit;
  2403.  
  2404. mixin Check!("VersionNum");
  2405.  
  2406. s = s[s.byCodeUnit.countUntil('\"') .. $];
  2407. if (s is old) fail();
  2408. }
  2409.  
  2410. void checkDocTypeDecl(ref string s) @safe pure // rule 28
  2411. {
  2412. mixin Check!("DocTypeDecl");
  2413.  
  2414. try
  2415. {
  2416. checkLiteral("<!DOCTYPE",s);
  2417. //
  2418. // TO DO -- ensure DOCTYPE is well formed
  2419. // (But not yet. That's one of our "future directions")
  2420. //
  2421. checkEnd(">",s);
  2422. }
  2423. catch (Err e) { fail(e); }
  2424. }
  2425.  
  2426. void checkSDDecl(ref string s) @safe pure // rule 32
  2427. {
  2428. import std.algorithm.searching : startsWith;
  2429.  
  2430. mixin Check!("SDDecl");
  2431.  
  2432. try
  2433. {
  2434. checkSpace(s);
  2435. checkLiteral("standalone",s);
  2436. checkEq(s);
  2437. }
  2438. catch (Err e) { fail(e); }
  2439.  
  2440. int n = 0;
  2441. if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
  2442. else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
  2443. else fail("standalone attribute value must be 'yes', \"yes\","~
  2444. " 'no' or \"no\"");
  2445. s = s[n..$];
  2446. }
  2447.  
  2448. void checkElement(ref string s) @safe pure // rule 39
  2449. {
  2450. mixin Check!("Element");
  2451.  
  2452. string sname,ename,t;
  2453. try { checkTag(s,t,sname); } catch (Err e) { fail(e); }
  2454.  
  2455. if (t == "STag")
  2456. {
  2457. try
  2458. {
  2459. checkContent(s);
  2460. t = s;
  2461. checkETag(s,ename);
  2462. }
  2463. catch (Err e) { fail(e); }
  2464.  
  2465. if (sname != ename)
  2466. {
  2467. s = t;
  2468. fail("end tag name \"" ~ ename
  2469. ~ "\" differs from start tag name \""~sname~"\"");
  2470. }
  2471. }
  2472. }
  2473.  
  2474. // rules 40 and 44
  2475. void checkTag(ref string s, out string type, out string name) @safe pure
  2476. {
  2477. mixin Check!("Tag");
  2478.  
  2479. try
  2480. {
  2481. type = "STag";
  2482. checkLiteral("<",s);
  2483. checkName(s,name);
  2484. star!(seq!(checkSpace,checkAttribute))(s);
  2485. opt!(checkSpace)(s);
  2486. if (s.length != 0 && s[0] == '/')
  2487. {
  2488. s = s[1..$];
  2489. type = "ETag";
  2490. }
  2491. checkLiteral(">",s);
  2492. }
  2493. catch (Err e) { fail(e); }
  2494. }
  2495.  
  2496. void checkAttribute(ref string s) @safe pure // rule 41
  2497. {
  2498. mixin Check!("Attribute");
  2499.  
  2500. try
  2501. {
  2502. string name;
  2503. checkName(s,name);
  2504. checkEq(s);
  2505. checkAttValue(s);
  2506. }
  2507. catch (Err e) { fail(e); }
  2508. }
  2509.  
  2510. void checkETag(ref string s, out string name) @safe pure // rule 42
  2511. {
  2512. mixin Check!("ETag");
  2513.  
  2514. try
  2515. {
  2516. checkLiteral("</",s);
  2517. checkName(s,name);
  2518. opt!(checkSpace)(s);
  2519. checkLiteral(">",s);
  2520. }
  2521. catch (Err e) { fail(e); }
  2522. }
  2523.  
  2524. void checkContent(ref string s) @safe pure // rule 43
  2525. {
  2526. import std.algorithm.searching : startsWith;
  2527.  
  2528. mixin Check!("Content");
  2529.  
  2530. try
  2531. {
  2532. while (s.length != 0)
  2533. {
  2534. old = s;
  2535. if (s.startsWith("&")) { checkReference(s); }
  2536. else if (s.startsWith("<!--")) { checkComment(s); }
  2537. else if (s.startsWith("<?")) { checkPI(s); }
  2538. else if (s.startsWith(cdata)) { checkCDSect(s); }
  2539. else if (s.startsWith("</")) { break; }
  2540. else if (s.startsWith("<")) { checkElement(s); }
  2541. else { checkCharData(s); }
  2542. }
  2543. }
  2544. catch (Err e) { fail(e); }
  2545. }
  2546.  
  2547. void checkCharRef(ref string s, out dchar c) @safe pure // rule 66
  2548. {
  2549. import std.format : format;
  2550.  
  2551. mixin Check!("CharRef");
  2552.  
  2553. c = 0;
  2554. try { checkLiteral("&#",s); } catch (Err e) { fail(e); }
  2555. int radix = 10;
  2556. if (s.length != 0 && s[0] == 'x')
  2557. {
  2558. s = s[1..$];
  2559. radix = 16;
  2560. }
  2561. if (s.length == 0) fail("unterminated character reference");
  2562. if (s[0] == ';')
  2563. fail("character reference must have at least one digit");
  2564. while (s.length != 0)
  2565. {
  2566. immutable char d = s[0];
  2567. int n = 0;
  2568. switch (d)
  2569. {
  2570. case 'F','f': ++n; goto case;
  2571. case 'E','e': ++n; goto case;
  2572. case 'D','d': ++n; goto case;
  2573. case 'C','c': ++n; goto case;
  2574. case 'B','b': ++n; goto case;
  2575. case 'A','a': ++n; goto case;
  2576. case '9': ++n; goto case;
  2577. case '8': ++n; goto case;
  2578. case '7': ++n; goto case;
  2579. case '6': ++n; goto case;
  2580. case '5': ++n; goto case;
  2581. case '4': ++n; goto case;
  2582. case '3': ++n; goto case;
  2583. case '2': ++n; goto case;
  2584. case '1': ++n; goto case;
  2585. case '0': break;
  2586. default: n = 100; break;
  2587. }
  2588. if (n >= radix) break;
  2589. c *= radix;
  2590. c += n;
  2591. s = s[1..$];
  2592. }
  2593. if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
  2594. if (s.length == 0 || s[0] != ';') fail("expected ;");
  2595. else s = s[1..$];
  2596. }
  2597.  
  2598. void checkReference(ref string s) @safe pure // rule 67
  2599. {
  2600. import std.algorithm.searching : startsWith;
  2601.  
  2602. mixin Check!("Reference");
  2603.  
  2604. try
  2605. {
  2606. dchar c;
  2607. if (s.startsWith("&#")) checkCharRef(s,c);
  2608. else checkEntityRef(s);
  2609. }
  2610. catch (Err e) { fail(e); }
  2611. }
  2612.  
  2613. void checkEntityRef(ref string s) @safe pure // rule 68
  2614. {
  2615. mixin Check!("EntityRef");
  2616.  
  2617. try
  2618. {
  2619. string name;
  2620. checkLiteral("&",s);
  2621. checkName(s,name);
  2622. checkLiteral(";",s);
  2623. }
  2624. catch (Err e) { fail(e); }
  2625. }
  2626.  
  2627. void checkEncName(ref string s) @safe pure // rule 81
  2628. {
  2629. import std.algorithm.searching : countUntil;
  2630. import std.ascii : isAlpha;
  2631. import std.utf : byCodeUnit;
  2632.  
  2633. mixin Check!("EncName");
  2634.  
  2635. s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $];
  2636. if (s is old) fail();
  2637. s = s[s.byCodeUnit.countUntil('\"', '\'') .. $];
  2638. }
  2639.  
  2640. void checkEncodingDecl(ref string s) @safe pure // rule 80
  2641. {
  2642. mixin Check!("EncodingDecl");
  2643.  
  2644. try
  2645. {
  2646. checkSpace(s);
  2647. checkLiteral("encoding",s);
  2648. checkEq(s);
  2649. quoted!(checkEncName)(s);
  2650. }
  2651. catch (Err e) { fail(e); }
  2652. }
  2653.  
  2654. // Helper functions
  2655.  
  2656. void checkLiteral(string literal,ref string s) @safe pure
  2657. {
  2658. import std.string : startsWith;
  2659.  
  2660. mixin Check!("Literal");
  2661.  
  2662. if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
  2663. s = s[literal.length..$];
  2664. }
  2665.  
  2666. void checkEnd(string end,ref string s) @safe pure
  2667. {
  2668. import std.string : indexOf;
  2669. // Deliberately no mixin Check here.
  2670.  
  2671. auto n = s.indexOf(end);
  2672. if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
  2673. s = s[n..$];
  2674. checkLiteral(end,s);
  2675. }
  2676.  
  2677. // Metafunctions -- none of these use mixin Check
  2678.  
  2679. void opt(alias f)(ref string s)
  2680. {
  2681. try { f(s); } catch (Err e) {}
  2682. }
  2683.  
  2684. void plus(alias f)(ref string s)
  2685. {
  2686. f(s);
  2687. star!(f)(s);
  2688. }
  2689.  
  2690. void star(alias f)(ref string s)
  2691. {
  2692. while (s.length != 0)
  2693. {
  2694. try { f(s); }
  2695. catch (Err e) { return; }
  2696. }
  2697. }
  2698.  
  2699. void quoted(alias f)(ref string s)
  2700. {
  2701. import std.string : startsWith;
  2702.  
  2703. if (s.startsWith("'"))
  2704. {
  2705. checkLiteral("'",s);
  2706. f(s);
  2707. checkLiteral("'",s);
  2708. }
  2709. else
  2710. {
  2711. checkLiteral("\"",s);
  2712. f(s);
  2713. checkLiteral("\"",s);
  2714. }
  2715. }
  2716.  
  2717. void seq(alias f,alias g)(ref string s)
  2718. {
  2719. f(s);
  2720. g(s);
  2721. }
  2722. }
  2723.  
  2724. /**
  2725. * Check an entire XML document for well-formedness
  2726. *
  2727. * Params:
  2728. * s = the document to be checked, passed as a string
  2729. *
  2730. * Throws: CheckException if the document is not well formed
  2731. *
  2732. * CheckException's toString() method will yield the complete hierarchy of
  2733. * parse failure (the XML equivalent of a stack trace), giving the line and
  2734. * column number of every failure at every level.
  2735. */
  2736. void check(string s) @safe pure
  2737. {
  2738. try
  2739. {
  2740. checkChars(s);
  2741. checkDocument(s);
  2742. if (s.length != 0) throw new Err(s,"Junk found after document");
  2743. }
  2744. catch (Err e)
  2745. {
  2746. e.complete(s);
  2747. throw e;
  2748. }
  2749. }
  2750.  
  2751. @system pure unittest
  2752. {
  2753. import std.string : indexOf;
  2754.  
  2755. try
  2756. {
  2757. check(q"[<?xml version="1.0"?>
  2758. <catalog>
  2759. <book id="bk101">
  2760. <author>Gambardella, Matthew</author>
  2761. <title>XML Developer's Guide</title>
  2762. <genre>Computer</genre>
  2763. <price>44.95</price>
  2764. <publish_date>2000-10-01</publish_date>
  2765. <description>An in-depth look at creating applications
  2766. with XML.</description>
  2767. </book>
  2768. <book id="bk102">
  2769. <author>Ralls, Kim</author>
  2770. <title>Midnight Rain</title>
  2771. <genre>Fantasy</genres>
  2772. <price>5.95</price>
  2773. <publish_date>2000-12-16</publish_date>
  2774. <description>A former architect battles corporate zombies,
  2775. an evil sorceress, and her own childhood to become queen
  2776. of the world.</description>
  2777. </book>
  2778. <book id="bk103">
  2779. <author>Corets, Eva</author>
  2780. <title>Maeve Ascendant</title>
  2781. <genre>Fantasy</genre>
  2782. <price>5.95</price>
  2783. <publish_date>2000-11-17</publish_date>
  2784. <description>After the collapse of a nanotechnology
  2785. society in England, the young survivors lay the
  2786. foundation for a new society.</description>
  2787. </book>
  2788. </catalog>
  2789. ]");
  2790. assert(false);
  2791. }
  2792. catch (CheckException e)
  2793. {
  2794. auto n = e.toString().indexOf("end tag name \"genres\" differs"~
  2795. " from start tag name \"genre\"");
  2796. assert(n != -1);
  2797. }
  2798. }
  2799.  
  2800. @system unittest
  2801. {
  2802. string s = q"EOS
  2803. <?xml version="1.0"?>
  2804. <set>
  2805. <one>A</one>
  2806. <!-- comment -->
  2807. <two>B</two>
  2808. </set>
  2809. EOS";
  2810. try
  2811. {
  2812. check(s);
  2813. }
  2814. catch (CheckException e)
  2815. {
  2816. assert(0, e.toString());
  2817. }
  2818. }
  2819.  
  2820. @system unittest
  2821. {
  2822. string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
  2823. xmlns:stream="http://etherx.'jabber'.org/streams"
  2824. xmlns="jabber:'client'" from='jid.pl' id="587a5767"
  2825. xml:lang="en" version="1.0" attr='a"b"c'>
  2826. </stream:stream></r>`;
  2827.  
  2828. DocumentParser parser = new DocumentParser(test_xml);
  2829. bool tested = false;
  2830. parser.onStartTag["stream:stream"] = (ElementParser p) {
  2831. assert(p.tag.attr["xmlns"] == "jabber:'client'");
  2832. assert(p.tag.attr["from"] == "jid.pl");
  2833. assert(p.tag.attr["attr"] == "a\"b\"c");
  2834. tested = true;
  2835. };
  2836. parser.parse();
  2837. assert(tested);
  2838. }
  2839.  
  2840. @system unittest
  2841. {
  2842. string s = q"EOS
  2843. <?xml version="1.0" encoding="utf-8"?> <Tests>
  2844. <Test thing="What &amp; Up">What &amp; Up Second</Test>
  2845. </Tests>
  2846. EOS";
  2847. auto xml = new DocumentParser(s);
  2848.  
  2849. xml.onStartTag["Test"] = (ElementParser xml) {
  2850. assert(xml.tag.attr["thing"] == "What & Up");
  2851. };
  2852.  
  2853. xml.onEndTag["Test"] = (in Element e) {
  2854. assert(e.text() == "What & Up Second");
  2855. };
  2856. xml.parse();
  2857. }
  2858.  
  2859. @system unittest
  2860. {
  2861. string s = `<tag attr="&quot;value&gt;" />`;
  2862. auto doc = new Document(s);
  2863. assert(doc.toString() == s);
  2864. }
  2865.  
  2866. /** The base class for exceptions thrown by this module */
  2867. class XMLException : Exception { this(string msg) @safe pure { super(msg); } }
  2868.  
  2869. // Other exceptions
  2870.  
  2871. /// Thrown during Comment constructor
  2872. class CommentException : XMLException
  2873. { private this(string msg) @safe pure { super(msg); } }
  2874.  
  2875. /// Thrown during CData constructor
  2876. class CDataException : XMLException
  2877. { private this(string msg) @safe pure { super(msg); } }
  2878.  
  2879. /// Thrown during XMLInstruction constructor
  2880. class XIException : XMLException
  2881. { private this(string msg) @safe pure { super(msg); } }
  2882.  
  2883. /// Thrown during ProcessingInstruction constructor
  2884. class PIException : XMLException
  2885. { private this(string msg) @safe pure { super(msg); } }
  2886.  
  2887. /// Thrown during Text constructor
  2888. class TextException : XMLException
  2889. { private this(string msg) @safe pure { super(msg); } }
  2890.  
  2891. /// Thrown during decode()
  2892. class DecodeException : XMLException
  2893. { private this(string msg) @safe pure { super(msg); } }
  2894.  
  2895. /// Thrown if comparing with wrong type
  2896. class InvalidTypeException : XMLException
  2897. { private this(string msg) @safe pure { super(msg); } }
  2898.  
  2899. /// Thrown when parsing for Tags
  2900. class TagException : XMLException
  2901. { private this(string msg) @safe pure { super(msg); } }
  2902.  
  2903. /**
  2904. * Thrown during check()
  2905. */
  2906. class CheckException : XMLException
  2907. {
  2908. CheckException err; /// Parent in hierarchy
  2909. private string tail;
  2910. /**
  2911. * Name of production rule which failed to parse,
  2912. * or specific error message
  2913. */
  2914. string msg;
  2915. size_t line = 0; /// Line number at which parse failure occurred
  2916. size_t column = 0; /// Column number at which parse failure occurred
  2917.  
  2918. private this(string tail,string msg,Err err=null) @safe pure
  2919. {
  2920. super(null);
  2921. this.tail = tail;
  2922. this.msg = msg;
  2923. this.err = err;
  2924. }
  2925.  
  2926. private void complete(string entire) @safe pure
  2927. {
  2928. import std.string : count, lastIndexOf;
  2929. import std.utf : toUTF32;
  2930.  
  2931. string head = entire[0..$-tail.length];
  2932. ptrdiff_t n = head.lastIndexOf('\n') + 1;
  2933. line = head.count("\n") + 1;
  2934. dstring t = toUTF32(head[n..$]);
  2935. column = t.length + 1;
  2936. if (err !is null) err.complete(entire);
  2937. }
  2938.  
  2939. override string toString() const @safe pure
  2940. {
  2941. import std.format : format;
  2942.  
  2943. string s;
  2944. if (line != 0) s = format("Line %d, column %d: ",line,column);
  2945. s ~= msg;
  2946. s ~= '\n';
  2947. if (err !is null) s = err.toString() ~ s;
  2948. return s;
  2949. }
  2950. }
  2951.  
  2952. private alias Err = CheckException;
  2953.  
  2954. // Private helper functions
  2955.  
  2956. private
  2957. {
  2958. inout(T) toType(T)(scope return inout Object o)
  2959. {
  2960. T t = cast(T)(o);
  2961. if (t is null)
  2962. {
  2963. throw new InvalidTypeException("Attempt to compare a "
  2964. ~ T.stringof ~ " with an instance of another type");
  2965. }
  2966. return t;
  2967. }
  2968.  
  2969. string chop(ref string s, size_t n) @safe pure nothrow
  2970. {
  2971. if (n == -1) n = s.length;
  2972. string t = s[0 .. n];
  2973. s = s[n..$];
  2974. return t;
  2975. }
  2976.  
  2977. bool optc(ref string s, char c) @safe pure nothrow
  2978. {
  2979. immutable bool b = s.length != 0 && s[0] == c;
  2980. if (b) s = s[1..$];
  2981. return b;
  2982. }
  2983.  
  2984. void reqc(ref string s, char c) @safe pure
  2985. {
  2986. if (s.length == 0 || s[0] != c) throw new TagException("");
  2987. s = s[1..$];
  2988. }
  2989.  
  2990. char requireOneOf(ref string s, string chars) @safe pure
  2991. {
  2992. import std.string : indexOf;
  2993.  
  2994. if (s.length == 0 || indexOf(chars,s[0]) == -1)
  2995. throw new TagException("");
  2996. immutable char ch = s[0];
  2997. s = s[1..$];
  2998. return ch;
  2999. }
  3000.  
  3001. alias hash = .hashOf;
  3002.  
  3003. // Definitions from the XML specification
  3004. immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
  3005. 0x10000,0x10FFFF];
  3006. immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
  3007. 0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
  3008. 0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
  3009. 0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
  3010. 0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
  3011. 0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
  3012. 0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
  3013. 0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
  3014. 0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
  3015. 0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
  3016. 0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
  3017. 0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
  3018. 0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
  3019. 0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
  3020. 0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
  3021. 0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
  3022. 0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
  3023. 0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
  3024. 0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
  3025. 0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
  3026. 0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
  3027. 0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
  3028. 0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
  3029. 0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
  3030. 0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
  3031. 0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
  3032. 0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
  3033. 0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
  3034. 0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
  3035. 0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
  3036. 0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
  3037. 0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
  3038. 0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
  3039. 0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
  3040. 0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
  3041. 0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
  3042. 0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
  3043. 0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
  3044. 0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
  3045. 0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
  3046. 0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
  3047. immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
  3048. immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
  3049. 0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
  3050. 0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
  3051. 0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
  3052. 0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
  3053. 0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
  3054. 0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
  3055. 0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
  3056. 0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
  3057. 0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
  3058. 0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
  3059. 0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
  3060. 0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
  3061. 0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
  3062. 0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
  3063. 0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
  3064. 0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
  3065. 0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
  3066. 0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
  3067. 0x3099,0x3099,0x309A,0x309A];
  3068. immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
  3069. 0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
  3070. 0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
  3071. 0x0ED9,0x0F20,0x0F29];
  3072. immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
  3073. 0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
  3074. 0x3035,0x309D,0x309E,0x30FC,0x30FE];
  3075.  
  3076. bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure
  3077. {
  3078. while (table.length != 0)
  3079. {
  3080. auto m = (table.length >> 1) & ~1;
  3081. if (c < table[m])
  3082. {
  3083. table = table[0 .. m];
  3084. }
  3085. else if (c > table[m+1])
  3086. {
  3087. table = table[m+2..$];
  3088. }
  3089. else return true;
  3090. }
  3091. return false;
  3092. }
  3093.  
  3094. string startOf(string s) @safe nothrow pure
  3095. {
  3096. string r;
  3097. foreach (char c;s)
  3098. {
  3099. r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
  3100. if (r.length >= 40) { r ~= "___"; break; }
  3101. }
  3102. return r;
  3103. }
  3104.  
  3105. void exit(string s=null)
  3106. {
  3107. throw new XMLException(s);
  3108. }
  3109. }