Newer
Older
dub_jkp / source / dub / internal / sdlang / parser.d
  1. // SDLang-D
  2. // Written in the D programming language.
  3.  
  4. module dub.internal.sdlang.parser;
  5.  
  6. version (Have_sdlang_d) public import sdlang.parser;
  7. else:
  8.  
  9. import std.file;
  10.  
  11. import dub.internal.libInputVisitor;
  12.  
  13. import dub.internal.sdlang.ast;
  14. import dub.internal.sdlang.exception;
  15. import dub.internal.sdlang.lexer;
  16. import dub.internal.sdlang.symbol;
  17. import dub.internal.sdlang.token;
  18. import dub.internal.sdlang.util;
  19.  
  20. import dub.internal.dyaml.stdsumtype;
  21.  
  22. /// Returns root tag.
  23. Tag parseFile(string filename)
  24. {
  25. auto source = cast(string)read(filename);
  26. return parseSource(source, filename);
  27. }
  28.  
  29. /// Returns root tag. The optional 'filename' parameter can be included
  30. /// so that the SDL document's filename (if any) can be displayed with
  31. /// any syntax error messages.
  32. Tag parseSource(string source, string filename=null)
  33. {
  34. auto lexer = new Lexer(source, filename);
  35. auto parser = DOMParser(lexer);
  36. return parser.parseRoot();
  37. }
  38.  
  39. /++
  40. Parses an SDL document using StAX/Pull-style. Returns an InputRange with
  41. element type ParserEvent.
  42.  
  43. The pullParseFile version reads a file and parses it, while pullParseSource
  44. parses a string passed in. The optional 'filename' parameter in pullParseSource
  45. can be included so that the SDL document's filename (if any) can be displayed
  46. with any syntax error messages.
  47.  
  48. Warning! The FileStartEvent and FileEndEvent events *might* be removed later.
  49. See $(LINK https://github.com/Abscissa/SDLang-D/issues/17)
  50.  
  51. Example:
  52. ------------------
  53. parent 12 attr="q" {
  54. childA 34
  55. childB 56
  56. }
  57. lastTag
  58. ------------------
  59.  
  60. The ParserEvent sequence emitted for that SDL document would be as
  61. follows (indented for readability):
  62. ------------------
  63. FileStartEvent
  64. TagStartEvent (parent)
  65. ValueEvent (12)
  66. AttributeEvent (attr, "q")
  67. TagStartEvent (childA)
  68. ValueEvent (34)
  69. TagEndEvent
  70. TagStartEvent (childB)
  71. ValueEvent (56)
  72. TagEndEvent
  73. TagEndEvent
  74. TagStartEvent (lastTag)
  75. TagEndEvent
  76. FileEndEvent
  77. ------------------
  78.  
  79. Example:
  80. ------------------
  81. foreach(event; pullParseFile("stuff.sdl"))
  82. {
  83. import std.stdio;
  84.  
  85. if(event.peek!FileStartEvent())
  86. writeln("FileStartEvent, starting! ");
  87.  
  88. else if(event.peek!FileEndEvent())
  89. writeln("FileEndEvent, done! ");
  90.  
  91. else if(auto e = event.peek!TagStartEvent())
  92. writeln("TagStartEvent: ", e.namespace, ":", e.name, " @ ", e.location);
  93.  
  94. else if(event.peek!TagEndEvent())
  95. writeln("TagEndEvent");
  96.  
  97. else if(auto e = event.peek!ValueEvent())
  98. writeln("ValueEvent: ", e.value);
  99.  
  100. else if(auto e = event.peek!AttributeEvent())
  101. writeln("AttributeEvent: ", e.namespace, ":", e.name, "=", e.value);
  102.  
  103. else // Shouldn't happen
  104. throw new Exception("Received unknown parser event");
  105. }
  106. ------------------
  107. +/
  108. auto pullParseFile(string filename)
  109. {
  110. auto source = cast(string)read(filename);
  111. return parseSource(source, filename);
  112. }
  113.  
  114. ///ditto
  115. auto pullParseSource(string source, string filename=null)
  116. {
  117. auto lexer = new Lexer(source, filename);
  118. auto parser = PullParser(lexer);
  119. return inputVisitor!ParserEvent( parser );
  120. }
  121.  
  122. /// The element of the InputRange returned by pullParseFile and pullParseSource:
  123. alias ParserEvent = SumType!(
  124. FileStartEvent,
  125. FileEndEvent,
  126. TagStartEvent,
  127. TagEndEvent,
  128. ValueEvent,
  129. AttributeEvent,
  130. );
  131.  
  132. /// Event: Start of file
  133. struct FileStartEvent
  134. {
  135. Location location;
  136. }
  137.  
  138. /// Event: End of file
  139. struct FileEndEvent
  140. {
  141. Location location;
  142. }
  143.  
  144. /// Event: Start of tag
  145. struct TagStartEvent
  146. {
  147. Location location;
  148. string namespace;
  149. string name;
  150. }
  151.  
  152. /// Event: End of tag
  153. struct TagEndEvent
  154. {
  155. //Location location;
  156. }
  157.  
  158. /// Event: Found a Value in the current tag
  159. struct ValueEvent
  160. {
  161. Location location;
  162. Value value;
  163. }
  164.  
  165. /// Event: Found an Attribute in the current tag
  166. struct AttributeEvent
  167. {
  168. Location location;
  169. string namespace;
  170. string name;
  171. Value value;
  172. }
  173.  
  174. // The actual pull parser
  175. private struct PullParser
  176. {
  177. private Lexer lexer;
  178.  
  179. private struct IDFull
  180. {
  181. string namespace;
  182. string name;
  183. }
  184.  
  185. private void error(string msg)
  186. {
  187. error(lexer.front.location, msg);
  188. }
  189.  
  190. private void error(Location loc, string msg)
  191. {
  192. throw new SDLangParseException(loc, "Error: "~msg);
  193. }
  194.  
  195. private InputVisitor!(PullParser, ParserEvent) v;
  196.  
  197. void visit(InputVisitor!(PullParser, ParserEvent) v)
  198. {
  199. this.v = v;
  200. parseRoot();
  201. }
  202.  
  203. private void emit(Event)(Event event)
  204. {
  205. v.yield( ParserEvent(event) );
  206. }
  207.  
  208. /// <Root> ::= <Tags> EOF (Lookaheads: Anything)
  209. private void parseRoot()
  210. {
  211. //trace("Starting parse of file: ", lexer.filename);
  212. //trace(__FUNCTION__, ": <Root> ::= <Tags> EOF (Lookaheads: Anything)");
  213.  
  214. auto startLocation = Location(lexer.filename, 0, 0, 0);
  215. emit( FileStartEvent(startLocation) );
  216.  
  217. parseTags();
  218.  
  219. auto token = lexer.front;
  220. if(!token.matches!"EOF"())
  221. error("Expected end-of-file, not " ~ token.symbol.name);
  222.  
  223. emit( FileEndEvent(token.location) );
  224. }
  225.  
  226. /// <Tags> ::= <Tag> <Tags> (Lookaheads: Ident Value)
  227. /// | EOL <Tags> (Lookaheads: EOL)
  228. /// | {empty} (Lookaheads: Anything else, except '{')
  229. void parseTags()
  230. {
  231. //trace("Enter ", __FUNCTION__);
  232. while(true)
  233. {
  234. auto token = lexer.front;
  235. if(token.matches!"Ident"() || token.matches!"Value"())
  236. {
  237. //trace(__FUNCTION__, ": <Tags> ::= <Tag> <Tags> (Lookaheads: Ident Value)");
  238. parseTag();
  239. continue;
  240. }
  241. else if(token.matches!"EOL"())
  242. {
  243. //trace(__FUNCTION__, ": <Tags> ::= EOL <Tags> (Lookaheads: EOL)");
  244. lexer.popFront();
  245. continue;
  246. }
  247. else if(token.matches!"{"())
  248. {
  249. error("Anonymous tags must have at least one value. They cannot just have children and attributes only.");
  250. }
  251. else
  252. {
  253. //trace(__FUNCTION__, ": <Tags> ::= {empty} (Lookaheads: Anything else, except '{')");
  254. break;
  255. }
  256. }
  257. }
  258.  
  259. /// <Tag>
  260. /// ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Ident)
  261. /// | <Value> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Value)
  262. void parseTag()
  263. {
  264. auto token = lexer.front;
  265. if(token.matches!"Ident"())
  266. {
  267. //trace(__FUNCTION__, ": <Tag> ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Ident)");
  268. //trace("Found tag named: ", tag.fullName);
  269. auto id = parseIDFull();
  270. emit( TagStartEvent(token.location, id.namespace, id.name) );
  271. }
  272. else if(token.matches!"Value"())
  273. {
  274. //trace(__FUNCTION__, ": <Tag> ::= <Value> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Value)");
  275. //trace("Found anonymous tag.");
  276. emit( TagStartEvent(token.location, null, null) );
  277. }
  278. else
  279. error("Expected tag name or value, not " ~ token.symbol.name);
  280.  
  281. if(lexer.front.matches!"="())
  282. error("Anonymous tags must have at least one value. They cannot just have attributes and children only.");
  283.  
  284. parseValues();
  285. parseAttributes();
  286. parseOptChild();
  287. parseTagTerminator();
  288.  
  289. emit( TagEndEvent() );
  290. }
  291.  
  292. /// <IDFull> ::= Ident <IDSuffix> (Lookaheads: Ident)
  293. IDFull parseIDFull()
  294. {
  295. auto token = lexer.front;
  296. if(token.matches!"Ident"())
  297. {
  298. //trace(__FUNCTION__, ": <IDFull> ::= Ident <IDSuffix> (Lookaheads: Ident)");
  299. lexer.popFront();
  300. return parseIDSuffix(token.data);
  301. }
  302. else
  303. {
  304. error("Expected namespace or identifier, not " ~ token.symbol.name);
  305. assert(0);
  306. }
  307. }
  308.  
  309. /// <IDSuffix>
  310. /// ::= ':' Ident (Lookaheads: ':')
  311. /// ::= {empty} (Lookaheads: Anything else)
  312. IDFull parseIDSuffix(string firstIdent)
  313. {
  314. auto token = lexer.front;
  315. if(token.matches!":"())
  316. {
  317. //trace(__FUNCTION__, ": <IDSuffix> ::= ':' Ident (Lookaheads: ':')");
  318. lexer.popFront();
  319. token = lexer.front;
  320. if(token.matches!"Ident"())
  321. {
  322. lexer.popFront();
  323. return IDFull(firstIdent, token.data);
  324. }
  325. else
  326. {
  327. error("Expected name, not " ~ token.symbol.name);
  328. assert(0);
  329. }
  330. }
  331. else
  332. {
  333. //trace(__FUNCTION__, ": <IDSuffix> ::= {empty} (Lookaheads: Anything else)");
  334. return IDFull("", firstIdent);
  335. }
  336. }
  337.  
  338. /// <Values>
  339. /// ::= Value <Values> (Lookaheads: Value)
  340. /// | {empty} (Lookaheads: Anything else)
  341. void parseValues()
  342. {
  343. while(true)
  344. {
  345. auto token = lexer.front;
  346. if(token.matches!"Value"())
  347. {
  348. //trace(__FUNCTION__, ": <Values> ::= Value <Values> (Lookaheads: Value)");
  349. parseValue();
  350. continue;
  351. }
  352. else
  353. {
  354. //trace(__FUNCTION__, ": <Values> ::= {empty} (Lookaheads: Anything else)");
  355. break;
  356. }
  357. }
  358. }
  359.  
  360. /// Handle Value terminals that aren't part of an attribute
  361. void parseValue()
  362. {
  363. auto token = lexer.front;
  364. if(token.matches!"Value"())
  365. {
  366. //trace(__FUNCTION__, ": (Handle Value terminals that aren't part of an attribute)");
  367. auto value = token.value;
  368. //trace("In tag '", parent.fullName, "', found value: ", value);
  369. emit( ValueEvent(token.location, value) );
  370.  
  371. lexer.popFront();
  372. }
  373. else
  374. error("Expected value, not "~token.symbol.name);
  375. }
  376.  
  377. /// <Attributes>
  378. /// ::= <Attribute> <Attributes> (Lookaheads: Ident)
  379. /// | {empty} (Lookaheads: Anything else)
  380. void parseAttributes()
  381. {
  382. while(true)
  383. {
  384. auto token = lexer.front;
  385. if(token.matches!"Ident"())
  386. {
  387. //trace(__FUNCTION__, ": <Attributes> ::= <Attribute> <Attributes> (Lookaheads: Ident)");
  388. parseAttribute();
  389. continue;
  390. }
  391. else
  392. {
  393. //trace(__FUNCTION__, ": <Attributes> ::= {empty} (Lookaheads: Anything else)");
  394. break;
  395. }
  396. }
  397. }
  398.  
  399. /// <Attribute> ::= <IDFull> '=' Value (Lookaheads: Ident)
  400. void parseAttribute()
  401. {
  402. //trace(__FUNCTION__, ": <Attribute> ::= <IDFull> '=' Value (Lookaheads: Ident)");
  403. auto token = lexer.front;
  404. if(!token.matches!"Ident"())
  405. error("Expected attribute name, not "~token.symbol.name);
  406.  
  407. auto id = parseIDFull();
  408.  
  409. token = lexer.front;
  410. if(!token.matches!"="())
  411. error("Expected '=' after attribute name, not "~token.symbol.name);
  412.  
  413. lexer.popFront();
  414. token = lexer.front;
  415. if(!token.matches!"Value"())
  416. error("Expected attribute value, not "~token.symbol.name);
  417.  
  418. //trace("In tag '", parent.fullName, "', found attribute '", attr.fullName, "'");
  419. emit( AttributeEvent(token.location, id.namespace, id.name, token.value) );
  420.  
  421. lexer.popFront();
  422. }
  423.  
  424. /// <OptChild>
  425. /// ::= '{' EOL <Tags> '}' (Lookaheads: '{')
  426. /// | {empty} (Lookaheads: Anything else)
  427. void parseOptChild()
  428. {
  429. auto token = lexer.front;
  430. if(token.matches!"{")
  431. {
  432. //trace(__FUNCTION__, ": <OptChild> ::= '{' EOL <Tags> '}' (Lookaheads: '{')");
  433. lexer.popFront();
  434. token = lexer.front;
  435. if(!token.matches!"EOL"())
  436. error("Expected newline or semicolon after '{', not "~token.symbol.name);
  437.  
  438. lexer.popFront();
  439. parseTags();
  440.  
  441. token = lexer.front;
  442. if(!token.matches!"}"())
  443. error("Expected '}' after child tags, not "~token.symbol.name);
  444. lexer.popFront();
  445. }
  446. else
  447. {
  448. //trace(__FUNCTION__, ": <OptChild> ::= {empty} (Lookaheads: Anything else)");
  449. // Do nothing, no error.
  450. }
  451. }
  452.  
  453. /// <TagTerminator>
  454. /// ::= EOL (Lookahead: EOL)
  455. /// | {empty} (Lookahead: EOF)
  456. void parseTagTerminator()
  457. {
  458. auto token = lexer.front;
  459. if(token.matches!"EOL")
  460. {
  461. //trace(__FUNCTION__, ": <TagTerminator> ::= EOL (Lookahead: EOL)");
  462. lexer.popFront();
  463. }
  464. else if(token.matches!"EOF")
  465. {
  466. //trace(__FUNCTION__, ": <TagTerminator> ::= {empty} (Lookahead: EOF)");
  467. // Do nothing
  468. }
  469. else
  470. error("Expected end of tag (newline, semicolon or end-of-file), not " ~ token.symbol.name);
  471. }
  472. }
  473.  
  474. private struct DOMParser
  475. {
  476. Lexer lexer;
  477.  
  478. Tag parseRoot()
  479. {
  480. auto currTag = new Tag(null, null, "root");
  481. currTag.location = Location(lexer.filename, 0, 0, 0);
  482.  
  483. auto parser = PullParser(lexer);
  484. auto eventRange = inputVisitor!ParserEvent( parser );
  485. foreach(event; eventRange)
  486. {
  487. event.match!(
  488. (TagStartEvent e)
  489. {
  490. auto newTag = new Tag(currTag, e.namespace, e.name);
  491. newTag.location = e.location;
  492.  
  493. currTag = newTag;
  494. },
  495. (TagEndEvent _)
  496. {
  497. currTag = currTag.parent;
  498.  
  499. if(!currTag)
  500. parser.error("Internal Error: Received an extra TagEndEvent");
  501. },
  502. (ValueEvent e)
  503. {
  504. currTag.add(e.value);
  505. },
  506. (AttributeEvent e)
  507. {
  508. auto attr = new Attribute(e.namespace, e.name, e.value, e.location);
  509. currTag.add(attr);
  510. },
  511. (FileStartEvent _)
  512. {
  513. // Do nothing
  514. },
  515. (FileEndEvent _)
  516. {
  517. // There shouldn't be another parent.
  518. if(currTag.parent)
  519. parser.error("Internal Error: Unexpected end of file, not enough TagEndEvent");
  520. }
  521. );
  522. }
  523.  
  524. return currTag;
  525. }
  526. }
  527.  
  528. // Other parser tests are part of the AST's tests over in the ast module.
  529.  
  530. // Regression test, issue #16: https://github.com/Abscissa/SDLang-D/issues/16
  531. version(sdlangUnittest)
  532. unittest
  533. {
  534. import std.stdio;
  535. writeln("parser: Regression test issue #16...");
  536. stdout.flush();
  537.  
  538. // Shouldn't crash
  539. foreach(event; pullParseSource(`tag "data"`))
  540. {
  541. event.peek!FileStartEvent();
  542. }
  543. }
  544.  
  545. // Regression test, issue #31: https://github.com/Abscissa/SDLang-D/issues/31
  546. // "Escape sequence results in range violation error"
  547. version(sdlangUnittest)
  548. unittest
  549. {
  550. import std.stdio;
  551. writeln("parser: Regression test issue #31...");
  552. stdout.flush();
  553.  
  554. // Shouldn't get a Range violation
  555. parseSource(`test "\"foo\""`);
  556. }