Newer
Older
dub_jkp / source / dub / internal / sdlang / parser.d
@Sebastian Wilzbach Sebastian Wilzbach on 23 Feb 2017 12 KB Remove all trailing whitespace
  1. // SDLang-D
  2. // Written in the D programming language.
  3.  
  4. module dub.internal.sdlang.parser;
  5.  
  6. version (Have_sdlang_d) public import sdlang.parser;
  7. else:
  8.  
  9. import std.file;
  10.  
  11. import dub.internal.libInputVisitor;
  12.  
  13. import dub.internal.sdlang.ast;
  14. import dub.internal.sdlang.exception;
  15. import dub.internal.sdlang.lexer;
  16. import dub.internal.sdlang.symbol;
  17. import dub.internal.sdlang.token;
  18. import dub.internal.sdlang.util;
  19.  
  20. /// Returns root tag.
  21. Tag parseFile(string filename)
  22. {
  23. auto source = cast(string)read(filename);
  24. return parseSource(source, filename);
  25. }
  26.  
  27. /// Returns root tag. The optional 'filename' parameter can be included
  28. /// so that the SDL document's filename (if any) can be displayed with
  29. /// any syntax error messages.
  30. Tag parseSource(string source, string filename=null)
  31. {
  32. auto lexer = new Lexer(source, filename);
  33. auto parser = DOMParser(lexer);
  34. return parser.parseRoot();
  35. }
  36.  
  37. /++
  38. Parses an SDL document using StAX/Pull-style. Returns an InputRange with
  39. element type ParserEvent.
  40.  
  41. The pullParseFile version reads a file and parses it, while pullParseSource
  42. parses a string passed in. The optional 'filename' parameter in pullParseSource
  43. can be included so that the SDL document's filename (if any) can be displayed
  44. with any syntax error messages.
  45.  
  46. Warning! The FileStartEvent and FileEndEvent events *might* be removed later.
  47. See $(LINK https://github.com/Abscissa/SDLang-D/issues/17)
  48.  
  49. Example:
  50. ------------------
  51. parent 12 attr="q" {
  52. childA 34
  53. childB 56
  54. }
  55. lastTag
  56. ------------------
  57.  
  58. The ParserEvent sequence emitted for that SDL document would be as
  59. follows (indented for readability):
  60. ------------------
  61. FileStartEvent
  62. TagStartEvent (parent)
  63. ValueEvent (12)
  64. AttributeEvent (attr, "q")
  65. TagStartEvent (childA)
  66. ValueEvent (34)
  67. TagEndEvent
  68. TagStartEvent (childB)
  69. ValueEvent (56)
  70. TagEndEvent
  71. TagEndEvent
  72. TagStartEvent (lastTag)
  73. TagEndEvent
  74. FileEndEvent
  75. ------------------
  76.  
  77. Example:
  78. ------------------
  79. foreach(event; pullParseFile("stuff.sdl"))
  80. {
  81. import std.stdio;
  82.  
  83. if(event.peek!FileStartEvent())
  84. writeln("FileStartEvent, starting! ");
  85.  
  86. else if(event.peek!FileEndEvent())
  87. writeln("FileEndEvent, done! ");
  88.  
  89. else if(auto e = event.peek!TagStartEvent())
  90. writeln("TagStartEvent: ", e.namespace, ":", e.name, " @ ", e.location);
  91.  
  92. else if(event.peek!TagEndEvent())
  93. writeln("TagEndEvent");
  94.  
  95. else if(auto e = event.peek!ValueEvent())
  96. writeln("ValueEvent: ", e.value);
  97.  
  98. else if(auto e = event.peek!AttributeEvent())
  99. writeln("AttributeEvent: ", e.namespace, ":", e.name, "=", e.value);
  100.  
  101. else // Shouldn't happen
  102. throw new Exception("Received unknown parser event");
  103. }
  104. ------------------
  105. +/
  106. auto pullParseFile(string filename)
  107. {
  108. auto source = cast(string)read(filename);
  109. return parseSource(source, filename);
  110. }
  111.  
  112. ///ditto
  113. auto pullParseSource(string source, string filename=null)
  114. {
  115. auto lexer = new Lexer(source, filename);
  116. auto parser = PullParser(lexer);
  117. return inputVisitor!ParserEvent( parser );
  118. }
  119.  
  120. /// The element of the InputRange returned by pullParseFile and pullParseSource:
  121. alias ParserEvent = std.variant.Algebraic!(
  122. FileStartEvent,
  123. FileEndEvent,
  124. TagStartEvent,
  125. TagEndEvent,
  126. ValueEvent,
  127. AttributeEvent,
  128. );
  129.  
  130. /// Event: Start of file
  131. struct FileStartEvent
  132. {
  133. Location location;
  134. }
  135.  
  136. /// Event: End of file
  137. struct FileEndEvent
  138. {
  139. Location location;
  140. }
  141.  
  142. /// Event: Start of tag
  143. struct TagStartEvent
  144. {
  145. Location location;
  146. string namespace;
  147. string name;
  148. }
  149.  
  150. /// Event: End of tag
  151. struct TagEndEvent
  152. {
  153. //Location location;
  154. }
  155.  
  156. /// Event: Found a Value in the current tag
  157. struct ValueEvent
  158. {
  159. Location location;
  160. Value value;
  161. }
  162.  
  163. /// Event: Found an Attribute in the current tag
  164. struct AttributeEvent
  165. {
  166. Location location;
  167. string namespace;
  168. string name;
  169. Value value;
  170. }
  171.  
  172. // The actual pull parser
  173. private struct PullParser
  174. {
  175. private Lexer lexer;
  176.  
  177. private struct IDFull
  178. {
  179. string namespace;
  180. string name;
  181. }
  182.  
  183. private void error(string msg)
  184. {
  185. error(lexer.front.location, msg);
  186. }
  187.  
  188. private void error(Location loc, string msg)
  189. {
  190. throw new SDLangParseException(loc, "Error: "~msg);
  191. }
  192.  
  193. private InputVisitor!(PullParser, ParserEvent) v;
  194.  
  195. void visit(InputVisitor!(PullParser, ParserEvent) v)
  196. {
  197. this.v = v;
  198. parseRoot();
  199. }
  200.  
  201. private void emit(Event)(Event event)
  202. {
  203. v.yield( ParserEvent(event) );
  204. }
  205.  
  206. /// <Root> ::= <Tags> EOF (Lookaheads: Anything)
  207. private void parseRoot()
  208. {
  209. //trace("Starting parse of file: ", lexer.filename);
  210. //trace(__FUNCTION__, ": <Root> ::= <Tags> EOF (Lookaheads: Anything)");
  211.  
  212. auto startLocation = Location(lexer.filename, 0, 0, 0);
  213. emit( FileStartEvent(startLocation) );
  214.  
  215. parseTags();
  216.  
  217. auto token = lexer.front;
  218. if(!token.matches!"EOF"())
  219. error("Expected end-of-file, not " ~ token.symbol.name);
  220.  
  221. emit( FileEndEvent(token.location) );
  222. }
  223.  
  224. /// <Tags> ::= <Tag> <Tags> (Lookaheads: Ident Value)
  225. /// | EOL <Tags> (Lookaheads: EOL)
  226. /// | {empty} (Lookaheads: Anything else, except '{')
  227. void parseTags()
  228. {
  229. //trace("Enter ", __FUNCTION__);
  230. while(true)
  231. {
  232. auto token = lexer.front;
  233. if(token.matches!"Ident"() || token.matches!"Value"())
  234. {
  235. //trace(__FUNCTION__, ": <Tags> ::= <Tag> <Tags> (Lookaheads: Ident Value)");
  236. parseTag();
  237. continue;
  238. }
  239. else if(token.matches!"EOL"())
  240. {
  241. //trace(__FUNCTION__, ": <Tags> ::= EOL <Tags> (Lookaheads: EOL)");
  242. lexer.popFront();
  243. continue;
  244. }
  245. else if(token.matches!"{"())
  246. {
  247. error("Anonymous tags must have at least one value. They cannot just have children and attributes only.");
  248. }
  249. else
  250. {
  251. //trace(__FUNCTION__, ": <Tags> ::= {empty} (Lookaheads: Anything else, except '{')");
  252. break;
  253. }
  254. }
  255. }
  256.  
  257. /// <Tag>
  258. /// ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Ident)
  259. /// | <Value> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Value)
  260. void parseTag()
  261. {
  262. auto token = lexer.front;
  263. if(token.matches!"Ident"())
  264. {
  265. //trace(__FUNCTION__, ": <Tag> ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Ident)");
  266. //trace("Found tag named: ", tag.fullName);
  267. auto id = parseIDFull();
  268. emit( TagStartEvent(token.location, id.namespace, id.name) );
  269. }
  270. else if(token.matches!"Value"())
  271. {
  272. //trace(__FUNCTION__, ": <Tag> ::= <Value> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Value)");
  273. //trace("Found anonymous tag.");
  274. emit( TagStartEvent(token.location, null, null) );
  275. }
  276. else
  277. error("Expected tag name or value, not " ~ token.symbol.name);
  278.  
  279. if(lexer.front.matches!"="())
  280. error("Anonymous tags must have at least one value. They cannot just have attributes and children only.");
  281.  
  282. parseValues();
  283. parseAttributes();
  284. parseOptChild();
  285. parseTagTerminator();
  286.  
  287. emit( TagEndEvent() );
  288. }
  289.  
  290. /// <IDFull> ::= Ident <IDSuffix> (Lookaheads: Ident)
  291. IDFull parseIDFull()
  292. {
  293. auto token = lexer.front;
  294. if(token.matches!"Ident"())
  295. {
  296. //trace(__FUNCTION__, ": <IDFull> ::= Ident <IDSuffix> (Lookaheads: Ident)");
  297. lexer.popFront();
  298. return parseIDSuffix(token.data);
  299. }
  300. else
  301. {
  302. error("Expected namespace or identifier, not " ~ token.symbol.name);
  303. assert(0);
  304. }
  305. }
  306.  
  307. /// <IDSuffix>
  308. /// ::= ':' Ident (Lookaheads: ':')
  309. /// ::= {empty} (Lookaheads: Anything else)
  310. IDFull parseIDSuffix(string firstIdent)
  311. {
  312. auto token = lexer.front;
  313. if(token.matches!":"())
  314. {
  315. //trace(__FUNCTION__, ": <IDSuffix> ::= ':' Ident (Lookaheads: ':')");
  316. lexer.popFront();
  317. token = lexer.front;
  318. if(token.matches!"Ident"())
  319. {
  320. lexer.popFront();
  321. return IDFull(firstIdent, token.data);
  322. }
  323. else
  324. {
  325. error("Expected name, not " ~ token.symbol.name);
  326. assert(0);
  327. }
  328. }
  329. else
  330. {
  331. //trace(__FUNCTION__, ": <IDSuffix> ::= {empty} (Lookaheads: Anything else)");
  332. return IDFull("", firstIdent);
  333. }
  334. }
  335.  
  336. /// <Values>
  337. /// ::= Value <Values> (Lookaheads: Value)
  338. /// | {empty} (Lookaheads: Anything else)
  339. void parseValues()
  340. {
  341. while(true)
  342. {
  343. auto token = lexer.front;
  344. if(token.matches!"Value"())
  345. {
  346. //trace(__FUNCTION__, ": <Values> ::= Value <Values> (Lookaheads: Value)");
  347. parseValue();
  348. continue;
  349. }
  350. else
  351. {
  352. //trace(__FUNCTION__, ": <Values> ::= {empty} (Lookaheads: Anything else)");
  353. break;
  354. }
  355. }
  356. }
  357.  
  358. /// Handle Value terminals that aren't part of an attribute
  359. void parseValue()
  360. {
  361. auto token = lexer.front;
  362. if(token.matches!"Value"())
  363. {
  364. //trace(__FUNCTION__, ": (Handle Value terminals that aren't part of an attribute)");
  365. auto value = token.value;
  366. //trace("In tag '", parent.fullName, "', found value: ", value);
  367. emit( ValueEvent(token.location, value) );
  368.  
  369. lexer.popFront();
  370. }
  371. else
  372. error("Expected value, not "~token.symbol.name);
  373. }
  374.  
  375. /// <Attributes>
  376. /// ::= <Attribute> <Attributes> (Lookaheads: Ident)
  377. /// | {empty} (Lookaheads: Anything else)
  378. void parseAttributes()
  379. {
  380. while(true)
  381. {
  382. auto token = lexer.front;
  383. if(token.matches!"Ident"())
  384. {
  385. //trace(__FUNCTION__, ": <Attributes> ::= <Attribute> <Attributes> (Lookaheads: Ident)");
  386. parseAttribute();
  387. continue;
  388. }
  389. else
  390. {
  391. //trace(__FUNCTION__, ": <Attributes> ::= {empty} (Lookaheads: Anything else)");
  392. break;
  393. }
  394. }
  395. }
  396.  
  397. /// <Attribute> ::= <IDFull> '=' Value (Lookaheads: Ident)
  398. void parseAttribute()
  399. {
  400. //trace(__FUNCTION__, ": <Attribute> ::= <IDFull> '=' Value (Lookaheads: Ident)");
  401. auto token = lexer.front;
  402. if(!token.matches!"Ident"())
  403. error("Expected attribute name, not "~token.symbol.name);
  404.  
  405. auto id = parseIDFull();
  406.  
  407. token = lexer.front;
  408. if(!token.matches!"="())
  409. error("Expected '=' after attribute name, not "~token.symbol.name);
  410.  
  411. lexer.popFront();
  412. token = lexer.front;
  413. if(!token.matches!"Value"())
  414. error("Expected attribute value, not "~token.symbol.name);
  415.  
  416. //trace("In tag '", parent.fullName, "', found attribute '", attr.fullName, "'");
  417. emit( AttributeEvent(token.location, id.namespace, id.name, token.value) );
  418.  
  419. lexer.popFront();
  420. }
  421.  
  422. /// <OptChild>
  423. /// ::= '{' EOL <Tags> '}' (Lookaheads: '{')
  424. /// | {empty} (Lookaheads: Anything else)
  425. void parseOptChild()
  426. {
  427. auto token = lexer.front;
  428. if(token.matches!"{")
  429. {
  430. //trace(__FUNCTION__, ": <OptChild> ::= '{' EOL <Tags> '}' (Lookaheads: '{')");
  431. lexer.popFront();
  432. token = lexer.front;
  433. if(!token.matches!"EOL"())
  434. error("Expected newline or semicolon after '{', not "~token.symbol.name);
  435.  
  436. lexer.popFront();
  437. parseTags();
  438.  
  439. token = lexer.front;
  440. if(!token.matches!"}"())
  441. error("Expected '}' after child tags, not "~token.symbol.name);
  442. lexer.popFront();
  443. }
  444. else
  445. {
  446. //trace(__FUNCTION__, ": <OptChild> ::= {empty} (Lookaheads: Anything else)");
  447. // Do nothing, no error.
  448. }
  449. }
  450.  
  451. /// <TagTerminator>
  452. /// ::= EOL (Lookahead: EOL)
  453. /// | {empty} (Lookahead: EOF)
  454. void parseTagTerminator()
  455. {
  456. auto token = lexer.front;
  457. if(token.matches!"EOL")
  458. {
  459. //trace(__FUNCTION__, ": <TagTerminator> ::= EOL (Lookahead: EOL)");
  460. lexer.popFront();
  461. }
  462. else if(token.matches!"EOF")
  463. {
  464. //trace(__FUNCTION__, ": <TagTerminator> ::= {empty} (Lookahead: EOF)");
  465. // Do nothing
  466. }
  467. else
  468. error("Expected end of tag (newline, semicolon or end-of-file), not " ~ token.symbol.name);
  469. }
  470. }
  471.  
  472. private struct DOMParser
  473. {
  474. Lexer lexer;
  475.  
  476. Tag parseRoot()
  477. {
  478. auto currTag = new Tag(null, null, "root");
  479. currTag.location = Location(lexer.filename, 0, 0, 0);
  480.  
  481. auto parser = PullParser(lexer);
  482. auto eventRange = inputVisitor!ParserEvent( parser );
  483. foreach(event; eventRange)
  484. {
  485. if(auto e = event.peek!TagStartEvent())
  486. {
  487. auto newTag = new Tag(currTag, e.namespace, e.name);
  488. newTag.location = e.location;
  489.  
  490. currTag = newTag;
  491. }
  492. else if(event.peek!TagEndEvent())
  493. {
  494. currTag = currTag.parent;
  495.  
  496. if(!currTag)
  497. parser.error("Internal Error: Received an extra TagEndEvent");
  498. }
  499. else if(auto e = event.peek!ValueEvent())
  500. {
  501. currTag.add(e.value);
  502. }
  503. else if(auto e = event.peek!AttributeEvent())
  504. {
  505. auto attr = new Attribute(e.namespace, e.name, e.value, e.location);
  506. currTag.add(attr);
  507. }
  508. else if(event.peek!FileStartEvent())
  509. {
  510. // Do nothing
  511. }
  512. else if(event.peek!FileEndEvent())
  513. {
  514. // There shouldn't be another parent.
  515. if(currTag.parent)
  516. parser.error("Internal Error: Unexpected end of file, not enough TagEndEvent");
  517. }
  518. else
  519. parser.error("Internal Error: Received unknown parser event");
  520. }
  521.  
  522. return currTag;
  523. }
  524. }
  525.  
  526. // Other parser tests are part of the AST's tests over in the ast module.
  527.  
  528. // Regression test, issue #16: https://github.com/Abscissa/SDLang-D/issues/16
  529. version(sdlangUnittest)
  530. unittest
  531. {
  532. import std.stdio;
  533. writeln("parser: Regression test issue #16...");
  534. stdout.flush();
  535.  
  536. // Shouldn't crash
  537. foreach(event; pullParseSource(`tag "data"`))
  538. {
  539. event.peek!FileStartEvent();
  540. }
  541. }
  542.  
  543. // Regression test, issue #31: https://github.com/Abscissa/SDLang-D/issues/31
  544. // "Escape sequence results in range violation error"
  545. version(sdlangUnittest)
  546. unittest
  547. {
  548. import std.stdio;
  549. writeln("parser: Regression test issue #31...");
  550. stdout.flush();
  551.  
  552. // Shouldn't get a Range violation
  553. parseSource(`test "\"foo\""`);
  554. }