Newer
Older
dub_jkp / source / dub / internal / sdlang / lexer.d
@WebFreak001 WebFreak001 on 4 Feb 2023 61 KB review adjustments
  1. // SDLang-D
  2. // Written in the D programming language.
  3.  
  4. module dub.internal.sdlang.lexer;
  5.  
  6. version (Have_sdlang_d) public import sdlang.lexer;
  7. else:
  8.  
  9. import std.algorithm;
  10. import std.array;
  11. import std.base64;
  12. import std.bigint;
  13. import std.conv;
  14. import std.datetime;
  15. import std.file;
  16. import std.traits;
  17. import std.typecons;
  18. import std.uni;
  19. import std.utf;
  20.  
  21. import dub.internal.sdlang.exception;
  22. import dub.internal.sdlang.symbol;
  23. import dub.internal.sdlang.token;
  24. import dub.internal.sdlang.util;
  25.  
  26. alias dub.internal.sdlang.util.startsWith startsWith;
  27.  
  28. Token[] lexFile(string filename)
  29. {
  30. auto source = cast(string)read(filename);
  31. return lexSource(source, filename);
  32. }
  33.  
  34. Token[] lexSource(string source, string filename=null)
  35. {
  36. auto lexer = scoped!Lexer(source, filename);
  37.  
  38. // Can't use 'std.array.array(Range)' because 'lexer' is scoped
  39. // and therefore cannot have its reference copied.
  40. Appender!(Token[]) tokens;
  41. foreach(tok; lexer)
  42. tokens.put(tok);
  43.  
  44. return tokens.data;
  45. }
  46.  
  47. // Kind of a poor-man's yield, but fast.
  48. // Only to be used inside Lexer.popFront (and Lexer.this).
  49. private template accept(string symbolName)
  50. {
  51. static assert(symbolName != "Value", "Value symbols must also take a value.");
  52. enum accept = acceptImpl!(symbolName, "null");
  53. }
  54. private template accept(string symbolName, string value)
  55. {
  56. static assert(symbolName == "Value", "Only a Value symbol can take a value.");
  57. enum accept = acceptImpl!(symbolName, value);
  58. }
  59. private template accept(string symbolName, string value, string startLocation, string endLocation)
  60. {
  61. static assert(symbolName == "Value", "Only a Value symbol can take a value.");
  62. enum accept = ("
  63. {
  64. _front = makeToken!"~symbolName.stringof~";
  65. _front.value = "~value~";
  66. _front.location = "~(startLocation==""? "tokenStart" : startLocation)~";
  67. _front.data = source[
  68. "~(startLocation==""? "tokenStart.index" : startLocation)~"
  69. ..
  70. "~(endLocation==""? "location.index" : endLocation)~"
  71. ];
  72. return;
  73. }
  74. ").replace("\n", "");
  75. }
  76. private template acceptImpl(string symbolName, string value)
  77. {
  78. enum acceptImpl = ("
  79. {
  80. _front = makeToken!"~symbolName.stringof~";
  81. _front.value = "~value~";
  82. return;
  83. }
  84. ").replace("\n", "");
  85. }
  86.  
  87. class Lexer
  88. {
  89. string source;
  90. string filename;
  91. Location location; /// Location of current character in source
  92.  
  93. private dchar ch; // Current character
  94. private dchar nextCh; // Lookahead character
  95. private size_t nextPos; // Position of lookahead character (an index into source)
  96. private bool hasNextCh; // If false, then there's no more lookahead, just EOF
  97. private size_t posAfterLookahead; // Position after lookahead character (an index into source)
  98.  
  99. private Location tokenStart; // The starting location of the token being lexed
  100.  
  101. // Length so far of the token being lexed, not including current char
  102. private size_t tokenLength; // Length in UTF-8 code units
  103. private size_t tokenLength32; // Length in UTF-32 code units
  104.  
  105. // Slight kludge:
  106. // If a numeric fragment is found after a Date (separated by arbitrary
  107. // whitespace), it could be the "hours" part of a DateTime, or it could
  108. // be a separate numeric literal that simply follows a plain Date. If the
  109. // latter, then the Date must be emitted, but numeric fragment that was
  110. // found after it needs to be saved for the the lexer's next iteration.
  111. //
  112. // It's a slight kludge, and could instead be implemented as a slightly
  113. // kludgey parser hack, but it's the only situation where SDL's lexing
  114. // needs to lookahead more than one character, so this is good enough.
  115. private struct LookaheadTokenInfo
  116. {
  117. bool exists = false;
  118. string numericFragment = "";
  119. bool isNegative = false;
  120. Location tokenStart;
  121. }
  122. private LookaheadTokenInfo lookaheadTokenInfo;
  123.  
  124. this(string source=null, string filename=null)
  125. {
  126. this.filename = filename;
  127. this.source = source;
  128.  
  129. _front = Token(symbol!"Error", Location());
  130. lookaheadTokenInfo = LookaheadTokenInfo.init;
  131.  
  132. if( source.startsWith( ByteOrderMarks[BOM.UTF8] ) )
  133. {
  134. source = source[ ByteOrderMarks[BOM.UTF8].length .. $ ];
  135. this.source = source;
  136. }
  137.  
  138. foreach(bom; ByteOrderMarks)
  139. if( source.startsWith(bom) )
  140. error(Location(filename,0,0,0), "SDL spec only supports UTF-8, not UTF-16 or UTF-32");
  141.  
  142. if(source == "")
  143. mixin(accept!"EOF");
  144.  
  145. // Prime everything
  146. hasNextCh = true;
  147. nextCh = source.decode(posAfterLookahead);
  148. advanceChar(ErrorOnEOF.Yes);
  149. location = Location(filename, 0, 0, 0);
  150. popFront();
  151. }
  152.  
  153. @property bool empty()
  154. {
  155. return _front.symbol == symbol!"EOF";
  156. }
  157.  
  158. Token _front;
  159. @property Token front()
  160. {
  161. return _front;
  162. }
  163.  
  164. @property bool isEOF()
  165. {
  166. return location.index == source.length && !lookaheadTokenInfo.exists;
  167. }
  168.  
  169. private void error(string msg)
  170. {
  171. error(location, msg);
  172. }
  173.  
  174. private void error(Location loc, string msg)
  175. {
  176. throw new SDLangParseException(loc, "Error: "~msg);
  177. }
  178.  
  179. private Token makeToken(string symbolName)()
  180. {
  181. auto tok = Token(symbol!symbolName, tokenStart);
  182. tok.data = tokenData;
  183. return tok;
  184. }
  185.  
  186. private @property string tokenData()
  187. {
  188. return source[ tokenStart.index .. location.index ];
  189. }
  190.  
  191. /// Check the lookahead character
  192. private bool lookahead(dchar ch)
  193. {
  194. return hasNextCh && nextCh == ch;
  195. }
  196.  
  197. private bool lookahead(bool function(dchar) condition)
  198. {
  199. return hasNextCh && condition(nextCh);
  200. }
  201.  
  202. private static bool isNewline(dchar ch)
  203. {
  204. return ch == '\n' || ch == '\r' || ch == lineSep || ch == paraSep;
  205. }
  206.  
  207. /// Returns the length of the newline sequence, or zero if the current
  208. /// character is not a newline
  209. ///
  210. /// Note that there are only single character sequences and the two
  211. /// character sequence `\r\n` as used on Windows.
  212. private size_t isAtNewline()
  213. {
  214. if(ch == '\n' || ch == lineSep || ch == paraSep) return 1;
  215. else if(ch == '\r') return lookahead('\n') ? 2 : 1;
  216. else return 0;
  217. }
  218.  
  219. /// Is 'ch' a valid base 64 character?
  220. private bool isBase64(dchar ch)
  221. {
  222. if(ch >= 'A' && ch <= 'Z')
  223. return true;
  224.  
  225. if(ch >= 'a' && ch <= 'z')
  226. return true;
  227.  
  228. if(ch >= '0' && ch <= '9')
  229. return true;
  230.  
  231. return ch == '+' || ch == '/' || ch == '=';
  232. }
  233.  
  234. /// Is the current character one that's allowed
  235. /// immediately *after* an int/float literal?
  236. private bool isEndOfNumber()
  237. {
  238. if(isEOF)
  239. return true;
  240.  
  241. return !isDigit(ch) && ch != ':' && ch != '_' && !isAlpha(ch);
  242. }
  243.  
  244. /// Is current character the last one in an ident?
  245. private bool isEndOfIdentCached = false;
  246. private bool _isEndOfIdent;
  247. private bool isEndOfIdent()
  248. {
  249. if(!isEndOfIdentCached)
  250. {
  251. if(!hasNextCh)
  252. _isEndOfIdent = true;
  253. else
  254. _isEndOfIdent = !isIdentChar(nextCh);
  255.  
  256. isEndOfIdentCached = true;
  257. }
  258.  
  259. return _isEndOfIdent;
  260. }
  261.  
  262. /// Is 'ch' a character that's allowed *somewhere* in an identifier?
  263. private bool isIdentChar(dchar ch)
  264. {
  265. if(isAlpha(ch))
  266. return true;
  267.  
  268. else if(isNumber(ch))
  269. return true;
  270.  
  271. else
  272. return
  273. ch == '-' ||
  274. ch == '_' ||
  275. ch == '.' ||
  276. ch == '$';
  277. }
  278.  
  279. private bool isDigit(dchar ch)
  280. {
  281. return ch >= '0' && ch <= '9';
  282. }
  283.  
  284. private enum KeywordResult
  285. {
  286. Accept, // Keyword is matched
  287. Continue, // Keyword is not matched *yet*
  288. Failed, // Keyword doesn't match
  289. }
  290. private KeywordResult checkKeyword(dstring keyword32)
  291. {
  292. // Still within length of keyword
  293. if(tokenLength32 < keyword32.length)
  294. {
  295. if(ch == keyword32[tokenLength32])
  296. return KeywordResult.Continue;
  297. else
  298. return KeywordResult.Failed;
  299. }
  300.  
  301. // At position after keyword
  302. else if(tokenLength32 == keyword32.length)
  303. {
  304. if(isEOF || !isIdentChar(ch))
  305. {
  306. debug assert(tokenData == to!string(keyword32));
  307. return KeywordResult.Accept;
  308. }
  309. else
  310. return KeywordResult.Failed;
  311. }
  312.  
  313. assert(0, "Fell off end of keyword to check");
  314. }
  315.  
  316. enum ErrorOnEOF { No, Yes }
  317.  
  318. /// Advance one code point.
  319. private void advanceChar(ErrorOnEOF errorOnEOF)
  320. {
  321. if(auto cnt = isAtNewline())
  322. {
  323. if (cnt == 1)
  324. location.line++;
  325. location.col = 0;
  326. }
  327. else
  328. location.col++;
  329.  
  330. location.index = nextPos;
  331.  
  332. nextPos = posAfterLookahead;
  333. ch = nextCh;
  334.  
  335. if(!hasNextCh)
  336. {
  337. if(errorOnEOF == ErrorOnEOF.Yes)
  338. error("Unexpected end of file");
  339.  
  340. return;
  341. }
  342.  
  343. tokenLength32++;
  344. tokenLength = location.index - tokenStart.index;
  345.  
  346. if(nextPos == source.length)
  347. {
  348. nextCh = dchar.init;
  349. hasNextCh = false;
  350. return;
  351. }
  352.  
  353. nextCh = source.decode(posAfterLookahead);
  354. isEndOfIdentCached = false;
  355. }
  356.  
  357. /// Advances the specified amount of characters
  358. private void advanceChar(size_t count, ErrorOnEOF errorOnEOF)
  359. {
  360. while(count-- > 0)
  361. advanceChar(errorOnEOF);
  362. }
  363.  
  364. void popFront()
  365. {
  366. // -- Main Lexer -------------
  367.  
  368. eatWhite();
  369.  
  370. if(isEOF)
  371. mixin(accept!"EOF");
  372.  
  373. tokenStart = location;
  374. tokenLength = 0;
  375. tokenLength32 = 0;
  376. isEndOfIdentCached = false;
  377.  
  378. if(lookaheadTokenInfo.exists)
  379. {
  380. tokenStart = lookaheadTokenInfo.tokenStart;
  381.  
  382. auto prevLATokenInfo = lookaheadTokenInfo;
  383. lookaheadTokenInfo = LookaheadTokenInfo.init;
  384. lexNumeric(prevLATokenInfo);
  385. return;
  386. }
  387.  
  388. if(ch == '=')
  389. {
  390. advanceChar(ErrorOnEOF.No);
  391. mixin(accept!"=");
  392. }
  393.  
  394. else if(ch == '{')
  395. {
  396. advanceChar(ErrorOnEOF.No);
  397. mixin(accept!"{");
  398. }
  399.  
  400. else if(ch == '}')
  401. {
  402. advanceChar(ErrorOnEOF.No);
  403. mixin(accept!"}");
  404. }
  405.  
  406. else if(ch == ':')
  407. {
  408. advanceChar(ErrorOnEOF.No);
  409. mixin(accept!":");
  410. }
  411.  
  412. else if(ch == ';')
  413. {
  414. advanceChar(ErrorOnEOF.No);
  415. mixin(accept!"EOL");
  416. }
  417.  
  418. else if(auto cnt = isAtNewline())
  419. {
  420. advanceChar(cnt, ErrorOnEOF.No);
  421. mixin(accept!"EOL");
  422. }
  423.  
  424. else if(isAlpha(ch) || ch == '_')
  425. lexIdentKeyword();
  426.  
  427. else if(ch == '"')
  428. lexRegularString();
  429.  
  430. else if(ch == '`')
  431. lexRawString();
  432.  
  433. else if(ch == '\'')
  434. lexCharacter();
  435.  
  436. else if(ch == '[')
  437. lexBinary();
  438.  
  439. else if(ch == '-' || ch == '.' || isDigit(ch))
  440. lexNumeric();
  441.  
  442. else
  443. {
  444. advanceChar(ErrorOnEOF.No);
  445. error("Syntax error");
  446. }
  447. }
  448.  
  449. /// Lex Ident or Keyword
  450. private void lexIdentKeyword()
  451. {
  452. assert(isAlpha(ch) || ch == '_');
  453.  
  454. // Keyword
  455. struct Key
  456. {
  457. dstring name;
  458. Value value;
  459. bool failed = false;
  460. }
  461. static Key[5] keywords;
  462. static keywordsInited = false;
  463. if(!keywordsInited)
  464. {
  465. // Value (as a std.variant-based type) can't be statically initialized
  466. keywords[0] = Key("true", Value(true ));
  467. keywords[1] = Key("false", Value(false));
  468. keywords[2] = Key("on", Value(true ));
  469. keywords[3] = Key("off", Value(false));
  470. keywords[4] = Key("null", Value(null ));
  471. keywordsInited = true;
  472. }
  473.  
  474. foreach(ref key; keywords)
  475. key.failed = false;
  476.  
  477. auto numKeys = keywords.length;
  478.  
  479. do
  480. {
  481. foreach(ref key; keywords)
  482. if(!key.failed)
  483. {
  484. final switch(checkKeyword(key.name))
  485. {
  486. case KeywordResult.Accept:
  487. mixin(accept!("Value", "key.value"));
  488.  
  489. case KeywordResult.Continue:
  490. break;
  491.  
  492. case KeywordResult.Failed:
  493. key.failed = true;
  494. numKeys--;
  495. break;
  496. }
  497. }
  498.  
  499. if(numKeys == 0)
  500. {
  501. lexIdent();
  502. return;
  503. }
  504.  
  505. advanceChar(ErrorOnEOF.No);
  506.  
  507. } while(!isEOF);
  508.  
  509. foreach(ref key; keywords)
  510. if(!key.failed)
  511. if(key.name.length == tokenLength32+1)
  512. mixin(accept!("Value", "key.value"));
  513.  
  514. mixin(accept!"Ident");
  515. }
  516.  
  517. /// Lex Ident
  518. private void lexIdent()
  519. {
  520. if(tokenLength == 0)
  521. assert(isAlpha(ch) || ch == '_');
  522.  
  523. while(!isEOF && isIdentChar(ch))
  524. advanceChar(ErrorOnEOF.No);
  525.  
  526. mixin(accept!"Ident");
  527. }
  528.  
  529. /// Lex regular string
  530. private void lexRegularString()
  531. {
  532. assert(ch == '"');
  533.  
  534. Appender!string buf;
  535. size_t spanStart = nextPos;
  536.  
  537. // Doesn't include current character
  538. void updateBuf()
  539. {
  540. if(location.index == spanStart)
  541. return;
  542.  
  543. buf.put( source[spanStart..location.index] );
  544. }
  545.  
  546. advanceChar(ErrorOnEOF.Yes);
  547. while(ch != '"')
  548. {
  549. if(ch == '\\')
  550. {
  551. updateBuf();
  552.  
  553. bool wasEscSequence = true;
  554. if(hasNextCh)
  555. {
  556. switch(nextCh)
  557. {
  558. case 'n': buf.put('\n'); break;
  559. case 'r': buf.put('\r'); break;
  560. case 't': buf.put('\t'); break;
  561. case '"': buf.put('\"'); break;
  562. case '\\': buf.put('\\'); break;
  563. default: wasEscSequence = false; break;
  564. }
  565. }
  566.  
  567. if(wasEscSequence)
  568. {
  569. advanceChar(ErrorOnEOF.Yes);
  570. spanStart = nextPos;
  571. }
  572. else
  573. {
  574. eatWhite(false);
  575. spanStart = location.index;
  576. }
  577. }
  578.  
  579. else if(isNewline(ch))
  580. error("Unescaped newlines are only allowed in raw strings, not regular strings.");
  581.  
  582. advanceChar(ErrorOnEOF.Yes);
  583. }
  584.  
  585. updateBuf();
  586. advanceChar(ErrorOnEOF.No); // Skip closing double-quote
  587. mixin(accept!("Value", "buf.data"));
  588. }
  589.  
  590. /// Lex raw string
  591. private void lexRawString()
  592. {
  593. assert(ch == '`');
  594.  
  595. do
  596. advanceChar(ErrorOnEOF.Yes);
  597. while(ch != '`');
  598.  
  599. advanceChar(ErrorOnEOF.No); // Skip closing back-tick
  600. mixin(accept!("Value", "tokenData[1..$-1]"));
  601. }
  602.  
  603. /// Lex character literal
  604. private void lexCharacter()
  605. {
  606. assert(ch == '\'');
  607. advanceChar(ErrorOnEOF.Yes); // Skip opening single-quote
  608.  
  609. dchar value;
  610. if(ch == '\\')
  611. {
  612. advanceChar(ErrorOnEOF.Yes); // Skip escape backslash
  613. switch(ch)
  614. {
  615. case 'n': value = '\n'; break;
  616. case 'r': value = '\r'; break;
  617. case 't': value = '\t'; break;
  618. case '\'': value = '\''; break;
  619. case '\\': value = '\\'; break;
  620. default: error("Invalid escape sequence.");
  621. }
  622. }
  623. else if(isNewline(ch))
  624. error("Newline not allowed in character literal.");
  625. else
  626. value = ch;
  627. advanceChar(ErrorOnEOF.Yes); // Skip the character itself
  628.  
  629. if(ch == '\'')
  630. advanceChar(ErrorOnEOF.No); // Skip closing single-quote
  631. else
  632. error("Expected closing single-quote.");
  633.  
  634. mixin(accept!("Value", "value"));
  635. }
  636.  
  637. /// Lex base64 binary literal
  638. private void lexBinary()
  639. {
  640. assert(ch == '[');
  641. advanceChar(ErrorOnEOF.Yes);
  642.  
  643. void eatBase64Whitespace()
  644. {
  645. while(!isEOF && isWhite(ch))
  646. {
  647. if(isNewline(ch))
  648. advanceChar(ErrorOnEOF.Yes);
  649.  
  650. if(!isEOF && isWhite(ch))
  651. eatWhite();
  652. }
  653. }
  654.  
  655. eatBase64Whitespace();
  656.  
  657. // Iterates all valid base64 characters, ending at ']'.
  658. // Skips all whitespace. Throws on invalid chars.
  659. struct Base64InputRange
  660. {
  661. Lexer lexer;
  662. private bool isInited = false;
  663. private int numInputCharsMod4 = 0;
  664.  
  665. @property bool empty()
  666. {
  667. if(lexer.ch == ']')
  668. {
  669. if(numInputCharsMod4 != 0)
  670. lexer.error("Length of Base64 encoding must be a multiple of 4. ("~to!string(numInputCharsMod4)~")");
  671.  
  672. return true;
  673. }
  674.  
  675. return false;
  676. }
  677.  
  678. @property dchar front()
  679. {
  680. return lexer.ch;
  681. }
  682.  
  683. void popFront()
  684. {
  685. auto lex = lexer;
  686.  
  687. if(!isInited)
  688. {
  689. if(lexer.isBase64(lexer.ch))
  690. {
  691. numInputCharsMod4++;
  692. numInputCharsMod4 %= 4;
  693. }
  694.  
  695. isInited = true;
  696. }
  697.  
  698. lex.advanceChar(lex.ErrorOnEOF.Yes);
  699.  
  700. eatBase64Whitespace();
  701.  
  702. if(lex.isEOF)
  703. lex.error("Unexpected end of file.");
  704.  
  705. if(lex.ch != ']')
  706. {
  707. if(!lex.isBase64(lex.ch))
  708. lex.error("Invalid character in base64 binary literal.");
  709.  
  710. numInputCharsMod4++;
  711. numInputCharsMod4 %= 4;
  712. }
  713. }
  714. }
  715.  
  716. // This is a slow ugly hack. It's necessary because Base64.decode
  717. // currently requires the source to have known length.
  718. //TODO: Remove this when DMD issue #9543 is fixed.
  719. dchar[] tmpBuf = array(Base64InputRange(this));
  720.  
  721. Appender!(ubyte[]) outputBuf;
  722. // Ugly workaround for DMD issue #9102
  723. //TODO: Remove this when DMD #9102 is fixed
  724. struct OutputBuf
  725. {
  726. void put(ubyte ch)
  727. {
  728. outputBuf.put(ch);
  729. }
  730. }
  731.  
  732. try
  733. //Base64.decode(Base64InputRange(this), OutputBuf());
  734. Base64.decode(tmpBuf, OutputBuf());
  735.  
  736. //TODO: Starting with dmd 2.062, this should be a Base64Exception
  737. catch(Exception e)
  738. error("Invalid character in base64 binary literal.");
  739.  
  740. advanceChar(ErrorOnEOF.No); // Skip ']'
  741. mixin(accept!("Value", "outputBuf.data"));
  742. }
  743.  
  744. private BigInt toBigInt(bool isNegative, string absValue)
  745. {
  746. auto num = BigInt(absValue);
  747. assert(num >= 0);
  748.  
  749. if(isNegative)
  750. num = -num;
  751.  
  752. return num;
  753. }
  754.  
  755. /// Lex [0-9]+, but without emitting a token.
  756. /// This is used by the other numeric parsing functions.
  757. private string lexNumericFragment()
  758. {
  759. if(!isDigit(ch))
  760. error("Expected a digit 0-9.");
  761.  
  762. auto spanStart = location.index;
  763.  
  764. do
  765. {
  766. advanceChar(ErrorOnEOF.No);
  767. } while(!isEOF && isDigit(ch));
  768.  
  769. return source[spanStart..location.index];
  770. }
  771.  
  772. /// Lex anything that starts with 0-9 or '-'. Ints, floats, dates, etc.
  773. private void lexNumeric(LookaheadTokenInfo laTokenInfo = LookaheadTokenInfo.init)
  774. {
  775. bool isNegative;
  776. string firstFragment;
  777. if(laTokenInfo.exists)
  778. {
  779. firstFragment = laTokenInfo.numericFragment;
  780. isNegative = laTokenInfo.isNegative;
  781. }
  782. else
  783. {
  784. assert(ch == '-' || ch == '.' || isDigit(ch));
  785.  
  786. // Check for negative
  787. isNegative = ch == '-';
  788. if(isNegative)
  789. advanceChar(ErrorOnEOF.Yes);
  790.  
  791. // Some floating point with omitted leading zero?
  792. if(ch == '.')
  793. {
  794. lexFloatingPoint("");
  795. return;
  796. }
  797.  
  798. firstFragment = lexNumericFragment();
  799. }
  800.  
  801. // Long integer (64-bit signed)?
  802. if(ch == 'L' || ch == 'l')
  803. {
  804. advanceChar(ErrorOnEOF.No);
  805.  
  806. // BigInt(long.min) is a workaround for DMD issue #9548
  807. auto num = toBigInt(isNegative, firstFragment);
  808. if(num < BigInt(long.min) || num > long.max)
  809. error(tokenStart, "Value doesn't fit in 64-bit signed long integer: "~to!string(num));
  810.  
  811. mixin(accept!("Value", "num.toLong()"));
  812. }
  813.  
  814. // Float (32-bit signed)?
  815. else if(ch == 'F' || ch == 'f')
  816. {
  817. auto value = to!float(tokenData);
  818. advanceChar(ErrorOnEOF.No);
  819. mixin(accept!("Value", "value"));
  820. }
  821.  
  822. // Double float (64-bit signed) with suffix?
  823. else if((ch == 'D' || ch == 'd') && !lookahead(':')
  824. )
  825. {
  826. auto value = to!double(tokenData);
  827. advanceChar(ErrorOnEOF.No);
  828. mixin(accept!("Value", "value"));
  829. }
  830.  
  831. // Decimal (128+ bits signed)?
  832. else if(
  833. (ch == 'B' || ch == 'b') &&
  834. (lookahead('D') || lookahead('d'))
  835. )
  836. {
  837. auto value = to!real(tokenData);
  838. advanceChar(ErrorOnEOF.No);
  839. advanceChar(ErrorOnEOF.No);
  840. mixin(accept!("Value", "value"));
  841. }
  842.  
  843. // Some floating point?
  844. else if(ch == '.')
  845. lexFloatingPoint(firstFragment);
  846.  
  847. // Some date?
  848. else if(ch == '/' && hasNextCh && isDigit(nextCh))
  849. lexDate(isNegative, firstFragment);
  850.  
  851. // Some time span?
  852. else if(ch == ':' || ch == 'd')
  853. lexTimeSpan(isNegative, firstFragment);
  854.  
  855. // Integer (32-bit signed)?
  856. else if(isEndOfNumber())
  857. {
  858. auto num = toBigInt(isNegative, firstFragment);
  859. if(num < int.min || num > int.max)
  860. error(tokenStart, "Value doesn't fit in 32-bit signed integer: "~to!string(num));
  861.  
  862. mixin(accept!("Value", "num.toInt()"));
  863. }
  864.  
  865. // Invalid suffix
  866. else
  867. error("Invalid integer suffix.");
  868. }
  869.  
  870. /// Lex any floating-point literal (after the initial numeric fragment was lexed)
  871. private void lexFloatingPoint(string firstPart)
  872. {
  873. assert(ch == '.');
  874. advanceChar(ErrorOnEOF.No);
  875.  
  876. auto secondPart = lexNumericFragment();
  877.  
  878. try
  879. {
  880. // Double float (64-bit signed) with suffix?
  881. if(ch == 'D' || ch == 'd')
  882. {
  883. auto value = to!double(tokenData);
  884. advanceChar(ErrorOnEOF.No);
  885. mixin(accept!("Value", "value"));
  886. }
  887.  
  888. // Float (32-bit signed)?
  889. else if(ch == 'F' || ch == 'f')
  890. {
  891. auto value = to!float(tokenData);
  892. advanceChar(ErrorOnEOF.No);
  893. mixin(accept!("Value", "value"));
  894. }
  895.  
  896. // Decimal (128+ bits signed)?
  897. else if(ch == 'B' || ch == 'b')
  898. {
  899. auto value = to!real(tokenData);
  900. advanceChar(ErrorOnEOF.Yes);
  901.  
  902. if(!isEOF && (ch == 'D' || ch == 'd'))
  903. {
  904. advanceChar(ErrorOnEOF.No);
  905. if(isEndOfNumber())
  906. mixin(accept!("Value", "value"));
  907. }
  908.  
  909. error("Invalid floating point suffix.");
  910. }
  911.  
  912. // Double float (64-bit signed) without suffix?
  913. else if(isEOF || !isIdentChar(ch))
  914. {
  915. auto value = to!double(tokenData);
  916. mixin(accept!("Value", "value"));
  917. }
  918.  
  919. // Invalid suffix
  920. else
  921. error("Invalid floating point suffix.");
  922. }
  923. catch(ConvException e)
  924. error("Invalid floating point literal.");
  925. }
  926.  
  927. private Date makeDate(bool isNegative, string yearStr, string monthStr, string dayStr)
  928. {
  929. BigInt biTmp;
  930.  
  931. biTmp = BigInt(yearStr);
  932. if(isNegative)
  933. biTmp = -biTmp;
  934. if(biTmp < int.min || biTmp > int.max)
  935. error(tokenStart, "Date's year is out of range. (Must fit within a 32-bit signed int.)");
  936. auto year = biTmp.toInt();
  937.  
  938. biTmp = BigInt(monthStr);
  939. if(biTmp < 1 || biTmp > 12)
  940. error(tokenStart, "Date's month is out of range.");
  941. auto month = biTmp.toInt();
  942.  
  943. biTmp = BigInt(dayStr);
  944. if(biTmp < 1 || biTmp > 31)
  945. error(tokenStart, "Date's month is out of range.");
  946. auto day = biTmp.toInt();
  947.  
  948. return Date(year, month, day);
  949. }
  950.  
  951. private DateTimeFrac makeDateTimeFrac(
  952. bool isNegative, Date date, string hourStr, string minuteStr,
  953. string secondStr, string millisecondStr
  954. )
  955. {
  956. BigInt biTmp;
  957.  
  958. biTmp = BigInt(hourStr);
  959. if(biTmp < int.min || biTmp > int.max)
  960. error(tokenStart, "Datetime's hour is out of range.");
  961. auto numHours = biTmp.toInt();
  962.  
  963. biTmp = BigInt(minuteStr);
  964. if(biTmp < 0 || biTmp > int.max)
  965. error(tokenStart, "Datetime's minute is out of range.");
  966. auto numMinutes = biTmp.toInt();
  967.  
  968. int numSeconds = 0;
  969. if(secondStr != "")
  970. {
  971. biTmp = BigInt(secondStr);
  972. if(biTmp < 0 || biTmp > int.max)
  973. error(tokenStart, "Datetime's second is out of range.");
  974. numSeconds = biTmp.toInt();
  975. }
  976.  
  977. int millisecond = 0;
  978. if(millisecondStr != "")
  979. {
  980. biTmp = BigInt(millisecondStr);
  981. if(biTmp < 0 || biTmp > int.max)
  982. error(tokenStart, "Datetime's millisecond is out of range.");
  983. millisecond = biTmp.toInt();
  984.  
  985. if(millisecondStr.length == 1)
  986. millisecond *= 100;
  987. else if(millisecondStr.length == 2)
  988. millisecond *= 10;
  989. }
  990.  
  991. Duration fracSecs = millisecond.msecs;
  992.  
  993. auto offset = hours(numHours) + minutes(numMinutes) + seconds(numSeconds);
  994.  
  995. if(isNegative)
  996. {
  997. offset = -offset;
  998. fracSecs = -fracSecs;
  999. }
  1000.  
  1001. return DateTimeFrac(DateTime(date) + offset, fracSecs);
  1002. }
  1003.  
  1004. private Duration makeDuration(
  1005. bool isNegative, string dayStr,
  1006. string hourStr, string minuteStr, string secondStr,
  1007. string millisecondStr
  1008. )
  1009. {
  1010. BigInt biTmp;
  1011.  
  1012. long day = 0;
  1013. if(dayStr != "")
  1014. {
  1015. biTmp = BigInt(dayStr);
  1016. if(biTmp < long.min || biTmp > long.max)
  1017. error(tokenStart, "Time span's day is out of range.");
  1018. day = biTmp.toLong();
  1019. }
  1020.  
  1021. biTmp = BigInt(hourStr);
  1022. if(biTmp < long.min || biTmp > long.max)
  1023. error(tokenStart, "Time span's hour is out of range.");
  1024. auto hour = biTmp.toLong();
  1025.  
  1026. biTmp = BigInt(minuteStr);
  1027. if(biTmp < long.min || biTmp > long.max)
  1028. error(tokenStart, "Time span's minute is out of range.");
  1029. auto minute = biTmp.toLong();
  1030.  
  1031. biTmp = BigInt(secondStr);
  1032. if(biTmp < long.min || biTmp > long.max)
  1033. error(tokenStart, "Time span's second is out of range.");
  1034. auto second = biTmp.toLong();
  1035.  
  1036. long millisecond = 0;
  1037. if(millisecondStr != "")
  1038. {
  1039. biTmp = BigInt(millisecondStr);
  1040. if(biTmp < long.min || biTmp > long.max)
  1041. error(tokenStart, "Time span's millisecond is out of range.");
  1042. millisecond = biTmp.toLong();
  1043.  
  1044. if(millisecondStr.length == 1)
  1045. millisecond *= 100;
  1046. else if(millisecondStr.length == 2)
  1047. millisecond *= 10;
  1048. }
  1049.  
  1050. auto duration =
  1051. dur!"days" (day) +
  1052. dur!"hours" (hour) +
  1053. dur!"minutes"(minute) +
  1054. dur!"seconds"(second) +
  1055. dur!"msecs" (millisecond);
  1056.  
  1057. if(isNegative)
  1058. duration = -duration;
  1059.  
  1060. return duration;
  1061. }
  1062.  
  1063. // This has to reproduce some weird corner case behaviors from the
  1064. // original Java version of SDL. So some of this may seem weird.
  1065. private Nullable!Duration getTimeZoneOffset(string str)
  1066. {
  1067. if(str.length < 2)
  1068. return Nullable!Duration(); // Unknown timezone
  1069.  
  1070. if(str[0] != '+' && str[0] != '-')
  1071. return Nullable!Duration(); // Unknown timezone
  1072.  
  1073. auto isNegative = str[0] == '-';
  1074.  
  1075. string numHoursStr;
  1076. string numMinutesStr;
  1077. if(str[1] == ':')
  1078. {
  1079. numMinutesStr = str[1..$];
  1080. numHoursStr = "";
  1081. }
  1082. else
  1083. {
  1084. numMinutesStr = str.find(':');
  1085. numHoursStr = str[1 .. $-numMinutesStr.length];
  1086. }
  1087.  
  1088. long numHours = 0;
  1089. long numMinutes = 0;
  1090. bool isUnknown = false;
  1091. try
  1092. {
  1093. switch(numHoursStr.length)
  1094. {
  1095. case 0:
  1096. if(numMinutesStr.length == 3)
  1097. {
  1098. numHours = 0;
  1099. numMinutes = to!long(numMinutesStr[1..$]);
  1100. }
  1101. else
  1102. isUnknown = true;
  1103. break;
  1104.  
  1105. case 1:
  1106. case 2:
  1107. if(numMinutesStr.length == 0)
  1108. {
  1109. numHours = to!long(numHoursStr);
  1110. numMinutes = 0;
  1111. }
  1112. else if(numMinutesStr.length == 3)
  1113. {
  1114. numHours = to!long(numHoursStr);
  1115. numMinutes = to!long(numMinutesStr[1..$]);
  1116. }
  1117. else
  1118. isUnknown = true;
  1119. break;
  1120.  
  1121. default:
  1122. if(numMinutesStr.length == 0)
  1123. {
  1124. // Yes, this is correct
  1125. numHours = 0;
  1126. numMinutes = to!long(numHoursStr[1..$]);
  1127. }
  1128. else
  1129. isUnknown = true;
  1130. break;
  1131. }
  1132. }
  1133. catch(ConvException e)
  1134. isUnknown = true;
  1135.  
  1136. if(isUnknown)
  1137. return Nullable!Duration(); // Unknown timezone
  1138.  
  1139. auto timeZoneOffset = hours(numHours) + minutes(numMinutes);
  1140. if(isNegative)
  1141. timeZoneOffset = -timeZoneOffset;
  1142.  
  1143. // Timezone valid
  1144. return Nullable!Duration(timeZoneOffset);
  1145. }
  1146.  
  1147. /// Lex date or datetime (after the initial numeric fragment was lexed)
  1148. private void lexDate(bool isDateNegative, string yearStr)
  1149. {
  1150. assert(ch == '/');
  1151.  
  1152. // Lex months
  1153. advanceChar(ErrorOnEOF.Yes); // Skip '/'
  1154. auto monthStr = lexNumericFragment();
  1155.  
  1156. // Lex days
  1157. if(ch != '/')
  1158. error("Invalid date format: Missing days.");
  1159. advanceChar(ErrorOnEOF.Yes); // Skip '/'
  1160. auto dayStr = lexNumericFragment();
  1161.  
  1162. auto date = makeDate(isDateNegative, yearStr, monthStr, dayStr);
  1163.  
  1164. if(!isEndOfNumber() && ch != '/')
  1165. error("Dates cannot have suffixes.");
  1166.  
  1167. // Date?
  1168. if(isEOF)
  1169. mixin(accept!("Value", "date"));
  1170.  
  1171. auto endOfDate = location;
  1172.  
  1173. while(
  1174. !isEOF &&
  1175. ( ch == '\\' || ch == '/' || (isWhite(ch) && !isNewline(ch)) )
  1176. )
  1177. {
  1178. if(ch == '\\' && hasNextCh && isNewline(nextCh))
  1179. {
  1180. advanceChar(ErrorOnEOF.Yes);
  1181. if(isAtNewline())
  1182. advanceChar(ErrorOnEOF.Yes);
  1183. advanceChar(ErrorOnEOF.No);
  1184. }
  1185.  
  1186. eatWhite();
  1187. }
  1188.  
  1189. // Date?
  1190. if(isEOF || (!isDigit(ch) && ch != '-'))
  1191. mixin(accept!("Value", "date", "", "endOfDate.index"));
  1192.  
  1193. auto startOfTime = location;
  1194.  
  1195. // Is time negative?
  1196. bool isTimeNegative = ch == '-';
  1197. if(isTimeNegative)
  1198. advanceChar(ErrorOnEOF.Yes);
  1199.  
  1200. // Lex hours
  1201. auto hourStr = ch == '.'? "" : lexNumericFragment();
  1202.  
  1203. // Lex minutes
  1204. if(ch != ':')
  1205. {
  1206. // No minutes found. Therefore we had a plain Date followed
  1207. // by a numeric literal, not a DateTime.
  1208. lookaheadTokenInfo.exists = true;
  1209. lookaheadTokenInfo.numericFragment = hourStr;
  1210. lookaheadTokenInfo.isNegative = isTimeNegative;
  1211. lookaheadTokenInfo.tokenStart = startOfTime;
  1212. mixin(accept!("Value", "date", "", "endOfDate.index"));
  1213. }
  1214. advanceChar(ErrorOnEOF.Yes); // Skip ':'
  1215. auto minuteStr = lexNumericFragment();
  1216.  
  1217. // Lex seconds, if exists
  1218. string secondStr;
  1219. if(ch == ':')
  1220. {
  1221. advanceChar(ErrorOnEOF.Yes); // Skip ':'
  1222. secondStr = lexNumericFragment();
  1223. }
  1224.  
  1225. // Lex milliseconds, if exists
  1226. string millisecondStr;
  1227. if(ch == '.')
  1228. {
  1229. advanceChar(ErrorOnEOF.Yes); // Skip '.'
  1230. millisecondStr = lexNumericFragment();
  1231. }
  1232.  
  1233. auto dateTimeFrac = makeDateTimeFrac(isTimeNegative, date, hourStr, minuteStr, secondStr, millisecondStr);
  1234.  
  1235. // Lex zone, if exists
  1236. if(ch == '-')
  1237. {
  1238. advanceChar(ErrorOnEOF.Yes); // Skip '-'
  1239. auto timezoneStart = location;
  1240.  
  1241. if(!isAlpha(ch))
  1242. error("Invalid timezone format.");
  1243.  
  1244. while(!isEOF && !isWhite(ch))
  1245. advanceChar(ErrorOnEOF.No);
  1246.  
  1247. auto timezoneStr = source[timezoneStart.index..location.index];
  1248. if(timezoneStr.startsWith("GMT"))
  1249. {
  1250. auto isoPart = timezoneStr["GMT".length..$];
  1251. auto offset = getTimeZoneOffset(isoPart);
  1252.  
  1253. if(offset.isNull())
  1254. {
  1255. // Unknown time zone
  1256. mixin(accept!("Value", "DateTimeFracUnknownZone(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezoneStr)"));
  1257. }
  1258. else
  1259. {
  1260. auto timezone = new immutable SimpleTimeZone(offset.get());
  1261. auto fsecs = dateTimeFrac.fracSecs;
  1262. mixin(accept!("Value", "SysTime(dateTimeFrac.dateTime, fsecs, timezone)"));
  1263. }
  1264. }
  1265.  
  1266. try
  1267. {
  1268. auto timezone = PosixTimeZone.getTimeZone(timezoneStr);
  1269. if (timezone) {
  1270. auto fsecs = dateTimeFrac.fracSecs;
  1271. mixin(accept!("Value", "SysTime(dateTimeFrac.dateTime, fsecs, timezone)"));
  1272. }
  1273. }
  1274. catch(TimeException e)
  1275. {
  1276. // Time zone not found. So just move along to "Unknown time zone" below.
  1277. }
  1278.  
  1279. // Unknown time zone
  1280. mixin(accept!("Value", "DateTimeFracUnknownZone(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezoneStr)"));
  1281. }
  1282.  
  1283. if(!isEndOfNumber())
  1284. error("Date-Times cannot have suffixes.");
  1285.  
  1286. mixin(accept!("Value", "dateTimeFrac"));
  1287. }
  1288.  
  1289. /// Lex time span (after the initial numeric fragment was lexed)
  1290. private void lexTimeSpan(bool isNegative, string firstPart)
  1291. {
  1292. assert(ch == ':' || ch == 'd');
  1293.  
  1294. string dayStr = "";
  1295. string hourStr;
  1296.  
  1297. // Lexed days?
  1298. bool hasDays = ch == 'd';
  1299. if(hasDays)
  1300. {
  1301. dayStr = firstPart;
  1302. advanceChar(ErrorOnEOF.Yes); // Skip 'd'
  1303.  
  1304. // Lex hours
  1305. if(ch != ':')
  1306. error("Invalid time span format: Missing hours.");
  1307. advanceChar(ErrorOnEOF.Yes); // Skip ':'
  1308. hourStr = lexNumericFragment();
  1309. }
  1310. else
  1311. hourStr = firstPart;
  1312.  
  1313. // Lex minutes
  1314. if(ch != ':')
  1315. error("Invalid time span format: Missing minutes.");
  1316. advanceChar(ErrorOnEOF.Yes); // Skip ':'
  1317. auto minuteStr = lexNumericFragment();
  1318.  
  1319. // Lex seconds
  1320. if(ch != ':')
  1321. error("Invalid time span format: Missing seconds.");
  1322. advanceChar(ErrorOnEOF.Yes); // Skip ':'
  1323. auto secondStr = lexNumericFragment();
  1324.  
  1325. // Lex milliseconds, if exists
  1326. string millisecondStr = "";
  1327. if(ch == '.')
  1328. {
  1329. advanceChar(ErrorOnEOF.Yes); // Skip '.'
  1330. millisecondStr = lexNumericFragment();
  1331. }
  1332.  
  1333. if(!isEndOfNumber())
  1334. error("Time spans cannot have suffixes.");
  1335.  
  1336. auto duration = makeDuration(isNegative, dayStr, hourStr, minuteStr, secondStr, millisecondStr);
  1337. mixin(accept!("Value", "duration"));
  1338. }
  1339.  
  1340. /// Advances past whitespace and comments
  1341. private void eatWhite(bool allowComments=true)
  1342. {
  1343. // -- Comment/Whitespace Lexer -------------
  1344.  
  1345. enum State
  1346. {
  1347. normal,
  1348. lineComment, // Got "#" or "//" or "--", Eating everything until newline
  1349. blockComment, // Got "/*", Eating everything until "*/"
  1350. }
  1351.  
  1352. if(isEOF)
  1353. return;
  1354.  
  1355. Location commentStart;
  1356. State state = State.normal;
  1357. bool consumeNewlines = false;
  1358. bool hasConsumedNewline = false;
  1359. while(true)
  1360. {
  1361. final switch(state)
  1362. {
  1363. case State.normal:
  1364.  
  1365. if(ch == '\\')
  1366. {
  1367. commentStart = location;
  1368. consumeNewlines = true;
  1369. hasConsumedNewline = false;
  1370. }
  1371.  
  1372. else if(ch == '#')
  1373. {
  1374. if(!allowComments)
  1375. return;
  1376.  
  1377. commentStart = location;
  1378. state = State.lineComment;
  1379. continue;
  1380. }
  1381.  
  1382. else if(ch == '/' || ch == '-')
  1383. {
  1384. commentStart = location;
  1385. if(lookahead(ch))
  1386. {
  1387. if(!allowComments)
  1388. return;
  1389.  
  1390. advanceChar(ErrorOnEOF.No);
  1391. state = State.lineComment;
  1392. continue;
  1393. }
  1394. else if(ch == '/' && lookahead('*'))
  1395. {
  1396. if(!allowComments)
  1397. return;
  1398.  
  1399. advanceChar(ErrorOnEOF.No);
  1400. state = State.blockComment;
  1401. continue;
  1402. }
  1403. else
  1404. return; // Done
  1405. }
  1406. else if(isAtNewline())
  1407. {
  1408. if(consumeNewlines)
  1409. hasConsumedNewline = true;
  1410. else
  1411. return; // Done
  1412. }
  1413. else if(!isWhite(ch))
  1414. {
  1415. if(consumeNewlines)
  1416. {
  1417. if(hasConsumedNewline)
  1418. return; // Done
  1419. else
  1420. error("Only whitespace can come between a line-continuation backslash and the following newline.");
  1421. }
  1422. else
  1423. return; // Done
  1424. }
  1425.  
  1426. break;
  1427.  
  1428. case State.lineComment:
  1429. if(lookahead(&isNewline))
  1430. state = State.normal;
  1431. break;
  1432.  
  1433. case State.blockComment:
  1434. if(ch == '*' && lookahead('/'))
  1435. {
  1436. advanceChar(ErrorOnEOF.No);
  1437. state = State.normal;
  1438. }
  1439. break;
  1440. }
  1441.  
  1442. advanceChar(ErrorOnEOF.No);
  1443. if(isEOF)
  1444. {
  1445. // Reached EOF
  1446.  
  1447. if(consumeNewlines && !hasConsumedNewline)
  1448. error("Missing newline after line-continuation backslash.");
  1449.  
  1450. else if(state == State.blockComment)
  1451. error(commentStart, "Unterminated block comment.");
  1452.  
  1453. else
  1454. return; // Done, reached EOF
  1455. }
  1456. }
  1457. }
  1458. }
  1459.  
  1460. version(sdlangUnittest)
  1461. {
  1462. import std.stdio;
  1463.  
  1464. private auto loc = Location("filename", 0, 0, 0);
  1465. private auto loc2 = Location("a", 1, 1, 1);
  1466.  
  1467. unittest
  1468. {
  1469. assert([Token(symbol!"EOL",loc) ] == [Token(symbol!"EOL",loc) ] );
  1470. assert([Token(symbol!"EOL",loc,Value(7),"A")] == [Token(symbol!"EOL",loc2,Value(7),"B")] );
  1471. }
  1472.  
  1473. private int numErrors = 0;
  1474. private void testLex(string source, Token[] expected, bool test_locations = false, string file=__FILE__, size_t line=__LINE__)
  1475. {
  1476. Token[] actual;
  1477. try
  1478. actual = lexSource(source, "filename");
  1479. catch(SDLangParseException e)
  1480. {
  1481. numErrors++;
  1482. stderr.writeln(file, "(", line, "): testLex failed on: ", source);
  1483. stderr.writeln(" Expected:");
  1484. stderr.writeln(" ", expected);
  1485. stderr.writeln(" Actual: SDLangParseException thrown:");
  1486. stderr.writeln(" ", e.msg);
  1487. return;
  1488. }
  1489.  
  1490. bool is_same = actual == expected;
  1491. if (is_same && test_locations) {
  1492. is_same = actual.map!(t => t.location).equal(expected.map!(t => t.location));
  1493. }
  1494.  
  1495. if(!is_same)
  1496. {
  1497. numErrors++;
  1498. stderr.writeln(file, "(", line, "): testLex failed on: ", source);
  1499. stderr.writeln(" Expected:");
  1500. stderr.writeln(" ", expected);
  1501. stderr.writeln(" Actual:");
  1502. stderr.writeln(" ", actual);
  1503.  
  1504. if(expected.length > 1 || actual.length > 1)
  1505. {
  1506. stderr.writeln(" expected.length: ", expected.length);
  1507. stderr.writeln(" actual.length: ", actual.length);
  1508.  
  1509. if(actual.length == expected.length)
  1510. foreach(i; 0..actual.length)
  1511. if(actual[i] != expected[i])
  1512. {
  1513. stderr.writeln(" Unequal at index #", i, ":");
  1514. stderr.writeln(" Expected:");
  1515. stderr.writeln(" ", expected[i]);
  1516. stderr.writeln(" Actual:");
  1517. stderr.writeln(" ", actual[i]);
  1518. }
  1519. }
  1520. }
  1521. }
  1522.  
  1523. private void testLexThrows(string file=__FILE__, size_t line=__LINE__)(string source)
  1524. {
  1525. bool hadException = false;
  1526. Token[] actual;
  1527. try
  1528. actual = lexSource(source, "filename");
  1529. catch(SDLangParseException e)
  1530. hadException = true;
  1531.  
  1532. if(!hadException)
  1533. {
  1534. numErrors++;
  1535. stderr.writeln(file, "(", line, "): testLex failed on: ", source);
  1536. stderr.writeln(" Expected SDLangParseException");
  1537. stderr.writeln(" Actual:");
  1538. stderr.writeln(" ", actual);
  1539. }
  1540. }
  1541. }
  1542.  
  1543. version(sdlangUnittest)
  1544. unittest
  1545. {
  1546. writeln("Unittesting sdlang lexer...");
  1547. stdout.flush();
  1548.  
  1549. testLex("", []);
  1550. testLex(" ", []);
  1551. testLex("\\\n", []);
  1552. testLex("/*foo*/", []);
  1553. testLex("/* multiline \n comment */", []);
  1554. testLex("/* * */", []);
  1555. testLexThrows("/* ");
  1556.  
  1557. testLex(":", [ Token(symbol!":", loc) ]);
  1558. testLex("=", [ Token(symbol!"=", loc) ]);
  1559. testLex("{", [ Token(symbol!"{", loc) ]);
  1560. testLex("}", [ Token(symbol!"}", loc) ]);
  1561. testLex(";", [ Token(symbol!"EOL",loc) ]);
  1562. testLex("\n", [ Token(symbol!"EOL",loc) ]);
  1563.  
  1564. testLex("foo", [ Token(symbol!"Ident",loc,Value(null),"foo") ]);
  1565. testLex("_foo", [ Token(symbol!"Ident",loc,Value(null),"_foo") ]);
  1566. testLex("foo.bar", [ Token(symbol!"Ident",loc,Value(null),"foo.bar") ]);
  1567. testLex("foo-bar", [ Token(symbol!"Ident",loc,Value(null),"foo-bar") ]);
  1568. testLex("foo.", [ Token(symbol!"Ident",loc,Value(null),"foo.") ]);
  1569. testLex("foo-", [ Token(symbol!"Ident",loc,Value(null),"foo-") ]);
  1570. testLexThrows(".foo");
  1571.  
  1572. testLex("foo bar", [
  1573. Token(symbol!"Ident",loc,Value(null),"foo"),
  1574. Token(symbol!"Ident",loc,Value(null),"bar"),
  1575. ]);
  1576. testLex("foo \\ \n \n bar", [
  1577. Token(symbol!"Ident",loc,Value(null),"foo"),
  1578. Token(symbol!"Ident",loc,Value(null),"bar"),
  1579. ]);
  1580. testLex("foo \\ \n \\ \n bar", [
  1581. Token(symbol!"Ident",loc,Value(null),"foo"),
  1582. Token(symbol!"Ident",loc,Value(null),"bar"),
  1583. ]);
  1584. testLexThrows("foo \\ ");
  1585. testLexThrows("foo \\ bar");
  1586. testLexThrows("foo \\ \n \\ ");
  1587. testLexThrows("foo \\ \n \\ bar");
  1588.  
  1589. testLex("foo : = { } ; \n bar \n", [
  1590. Token(symbol!"Ident",loc,Value(null),"foo"),
  1591. Token(symbol!":",loc),
  1592. Token(symbol!"=",loc),
  1593. Token(symbol!"{",loc),
  1594. Token(symbol!"}",loc),
  1595. Token(symbol!"EOL",loc),
  1596. Token(symbol!"EOL",loc),
  1597. Token(symbol!"Ident",loc,Value(null),"bar"),
  1598. Token(symbol!"EOL",loc),
  1599. ]);
  1600.  
  1601. testLexThrows("<");
  1602. testLexThrows("*");
  1603. testLexThrows(`\`);
  1604.  
  1605. // Integers
  1606. testLex( "7", [ Token(symbol!"Value",loc,Value(cast( int) 7)) ]);
  1607. testLex( "-7", [ Token(symbol!"Value",loc,Value(cast( int)-7)) ]);
  1608. testLex( "7L", [ Token(symbol!"Value",loc,Value(cast(long) 7)) ]);
  1609. testLex( "7l", [ Token(symbol!"Value",loc,Value(cast(long) 7)) ]);
  1610. testLex("-7L", [ Token(symbol!"Value",loc,Value(cast(long)-7)) ]);
  1611. testLex( "0", [ Token(symbol!"Value",loc,Value(cast( int) 0)) ]);
  1612. testLex( "-0", [ Token(symbol!"Value",loc,Value(cast( int) 0)) ]);
  1613.  
  1614. testLex("7/**/", [ Token(symbol!"Value",loc,Value(cast( int) 7)) ]);
  1615. testLex("7#", [ Token(symbol!"Value",loc,Value(cast( int) 7)) ]);
  1616.  
  1617. testLex("7 A", [
  1618. Token(symbol!"Value",loc,Value(cast(int)7)),
  1619. Token(symbol!"Ident",loc,Value( null),"A"),
  1620. ]);
  1621. testLexThrows("7A");
  1622. testLexThrows("-A");
  1623. testLexThrows(`-""`);
  1624.  
  1625. testLex("7;", [
  1626. Token(symbol!"Value",loc,Value(cast(int)7)),
  1627. Token(symbol!"EOL",loc),
  1628. ]);
  1629.  
  1630. // Floats
  1631. testLex("1.2F" , [ Token(symbol!"Value",loc,Value(cast( float)1.2)) ]);
  1632. testLex("1.2f" , [ Token(symbol!"Value",loc,Value(cast( float)1.2)) ]);
  1633. testLex("1.2" , [ Token(symbol!"Value",loc,Value(cast(double)1.2)) ]);
  1634. testLex("1.2D" , [ Token(symbol!"Value",loc,Value(cast(double)1.2)) ]);
  1635. testLex("1.2d" , [ Token(symbol!"Value",loc,Value(cast(double)1.2)) ]);
  1636. testLex("1.2BD", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]);
  1637. testLex("1.2bd", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]);
  1638. testLex("1.2Bd", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]);
  1639. testLex("1.2bD", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]);
  1640.  
  1641. testLex(".2F" , [ Token(symbol!"Value",loc,Value(cast( float)0.2)) ]);
  1642. testLex(".2" , [ Token(symbol!"Value",loc,Value(cast(double)0.2)) ]);
  1643. testLex(".2D" , [ Token(symbol!"Value",loc,Value(cast(double)0.2)) ]);
  1644. testLex(".2BD", [ Token(symbol!"Value",loc,Value(cast( real)0.2)) ]);
  1645.  
  1646. testLex("-1.2F" , [ Token(symbol!"Value",loc,Value(cast( float)-1.2)) ]);
  1647. testLex("-1.2" , [ Token(symbol!"Value",loc,Value(cast(double)-1.2)) ]);
  1648. testLex("-1.2D" , [ Token(symbol!"Value",loc,Value(cast(double)-1.2)) ]);
  1649. testLex("-1.2BD", [ Token(symbol!"Value",loc,Value(cast( real)-1.2)) ]);
  1650.  
  1651. testLex("-.2F" , [ Token(symbol!"Value",loc,Value(cast( float)-0.2)) ]);
  1652. testLex("-.2" , [ Token(symbol!"Value",loc,Value(cast(double)-0.2)) ]);
  1653. testLex("-.2D" , [ Token(symbol!"Value",loc,Value(cast(double)-0.2)) ]);
  1654. testLex("-.2BD", [ Token(symbol!"Value",loc,Value(cast( real)-0.2)) ]);
  1655.  
  1656. testLex( "0.0" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]);
  1657. testLex( "0.0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]);
  1658. testLex( "0.0BD", [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]);
  1659. testLex("-0.0" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]);
  1660. testLex("-0.0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]);
  1661. testLex("-0.0BD", [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]);
  1662. testLex( "7F" , [ Token(symbol!"Value",loc,Value(cast( float)7.0)) ]);
  1663. testLex( "7D" , [ Token(symbol!"Value",loc,Value(cast(double)7.0)) ]);
  1664. testLex( "7BD" , [ Token(symbol!"Value",loc,Value(cast( real)7.0)) ]);
  1665. testLex( "0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]);
  1666. testLex( "0D" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]);
  1667. testLex( "0BD" , [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]);
  1668. testLex("-0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]);
  1669. testLex("-0D" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]);
  1670. testLex("-0BD" , [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]);
  1671.  
  1672. testLex("1.2 F", [
  1673. Token(symbol!"Value",loc,Value(cast(double)1.2)),
  1674. Token(symbol!"Ident",loc,Value( null),"F"),
  1675. ]);
  1676. testLexThrows("1.2A");
  1677. testLexThrows("1.2B");
  1678. testLexThrows("1.2BDF");
  1679.  
  1680. testLex("1.2;", [
  1681. Token(symbol!"Value",loc,Value(cast(double)1.2)),
  1682. Token(symbol!"EOL",loc),
  1683. ]);
  1684.  
  1685. testLex("1.2F;", [
  1686. Token(symbol!"Value",loc,Value(cast(float)1.2)),
  1687. Token(symbol!"EOL",loc),
  1688. ]);
  1689.  
  1690. testLex("1.2BD;", [
  1691. Token(symbol!"Value",loc,Value(cast(real)1.2)),
  1692. Token(symbol!"EOL",loc),
  1693. ]);
  1694.  
  1695. // Booleans and null
  1696. testLex("true", [ Token(symbol!"Value",loc,Value( true)) ]);
  1697. testLex("false", [ Token(symbol!"Value",loc,Value(false)) ]);
  1698. testLex("on", [ Token(symbol!"Value",loc,Value( true)) ]);
  1699. testLex("off", [ Token(symbol!"Value",loc,Value(false)) ]);
  1700. testLex("null", [ Token(symbol!"Value",loc,Value( null)) ]);
  1701.  
  1702. testLex("TRUE", [ Token(symbol!"Ident",loc,Value(null),"TRUE") ]);
  1703. testLex("true ", [ Token(symbol!"Value",loc,Value(true)) ]);
  1704. testLex("true ", [ Token(symbol!"Value",loc,Value(true)) ]);
  1705. testLex("tru", [ Token(symbol!"Ident",loc,Value(null),"tru") ]);
  1706. testLex("truX", [ Token(symbol!"Ident",loc,Value(null),"truX") ]);
  1707. testLex("trueX", [ Token(symbol!"Ident",loc,Value(null),"trueX") ]);
  1708.  
  1709. // Raw Backtick Strings
  1710. testLex("`hello world`", [ Token(symbol!"Value",loc,Value(`hello world` )) ]);
  1711. testLex("` hello world `", [ Token(symbol!"Value",loc,Value(` hello world ` )) ]);
  1712. testLex("`hello \\t world`", [ Token(symbol!"Value",loc,Value(`hello \t world`)) ]);
  1713. testLex("`hello \\n world`", [ Token(symbol!"Value",loc,Value(`hello \n world`)) ]);
  1714. testLex("`hello \n world`", [ Token(symbol!"Value",loc,Value("hello \n world")) ]);
  1715. testLex("`hello \r\n world`", [ Token(symbol!"Value",loc,Value("hello \r\n world")) ]);
  1716. testLex("`hello \"world\"`", [ Token(symbol!"Value",loc,Value(`hello "world"` )) ]);
  1717.  
  1718. testLexThrows("`foo");
  1719. testLexThrows("`");
  1720.  
  1721. // Double-Quote Strings
  1722. testLex(`"hello world"`, [ Token(symbol!"Value",loc,Value("hello world" )) ]);
  1723. testLex(`" hello world "`, [ Token(symbol!"Value",loc,Value(" hello world " )) ]);
  1724. testLex(`"hello \t world"`, [ Token(symbol!"Value",loc,Value("hello \t world")) ]);
  1725. testLex(`"hello \n world"`, [ Token(symbol!"Value",loc,Value("hello \n world")) ]);
  1726. testLex("\"hello \\\n world\"", [ Token(symbol!"Value",loc,Value("hello world" )) ]);
  1727. testLex("\"hello \\ \n world\"", [ Token(symbol!"Value",loc,Value("hello world" )) ]);
  1728. testLex("\"hello \\ \n\n world\"", [ Token(symbol!"Value",loc,Value("hello world" )) ]);
  1729. testLex(`"\"hello world\""`, [ Token(symbol!"Value",loc,Value(`"hello world"` )) ]);
  1730.  
  1731. testLexThrows("\"hello \n world\"");
  1732. testLexThrows(`"foo`);
  1733. testLexThrows(`"`);
  1734.  
  1735. // Characters
  1736. testLex("'a'", [ Token(symbol!"Value",loc,Value(cast(dchar) 'a')) ]);
  1737. testLex("'\\n'", [ Token(symbol!"Value",loc,Value(cast(dchar)'\n')) ]);
  1738. testLex("'\\t'", [ Token(symbol!"Value",loc,Value(cast(dchar)'\t')) ]);
  1739. testLex("'\t'", [ Token(symbol!"Value",loc,Value(cast(dchar)'\t')) ]);
  1740. testLex("'\\''", [ Token(symbol!"Value",loc,Value(cast(dchar)'\'')) ]);
  1741. testLex(`'\\'`, [ Token(symbol!"Value",loc,Value(cast(dchar)'\\')) ]);
  1742.  
  1743. testLexThrows("'a");
  1744. testLexThrows("'aa'");
  1745. testLexThrows("''");
  1746. testLexThrows("'\\\n'");
  1747. testLexThrows("'\n'");
  1748. testLexThrows(`'\`);
  1749. testLexThrows(`'\'`);
  1750. testLexThrows("'");
  1751.  
  1752. // Unicode
  1753. testLex("日本語", [ Token(symbol!"Ident",loc,Value(null), "日本語") ]);
  1754. testLex("`おはよう、日本。`", [ Token(symbol!"Value",loc,Value(`おはよう、日本。`)) ]);
  1755. testLex(`"おはよう、日本。"`, [ Token(symbol!"Value",loc,Value(`おはよう、日本。`)) ]);
  1756. testLex("'月'", [ Token(symbol!"Value",loc,Value("月"d.dup[0])) ]);
  1757.  
  1758. // Base64 Binary
  1759. testLex("[aGVsbG8gd29ybGQ=]", [ Token(symbol!"Value",loc,Value(cast(ubyte[])"hello world".dup))]);
  1760. testLex("[ aGVsbG8gd29ybGQ= ]", [ Token(symbol!"Value",loc,Value(cast(ubyte[])"hello world".dup))]);
  1761. testLex("[\n aGVsbG8g \n \n d29ybGQ= \n]", [ Token(symbol!"Value",loc,Value(cast(ubyte[])"hello world".dup))]);
  1762.  
  1763. testLexThrows("[aGVsbG8gd29ybGQ]"); // Ie: Not multiple of 4
  1764. testLexThrows("[ aGVsbG8gd29ybGQ ]");
  1765.  
  1766. // Date
  1767. testLex( "1999/12/5", [ Token(symbol!"Value",loc,Value(Date( 1999, 12, 5))) ]);
  1768. testLex( "2013/2/22", [ Token(symbol!"Value",loc,Value(Date( 2013, 2, 22))) ]);
  1769. testLex("-2013/2/22", [ Token(symbol!"Value",loc,Value(Date(-2013, 2, 22))) ]);
  1770.  
  1771. testLexThrows("7/");
  1772. testLexThrows("2013/2/22a");
  1773. testLexThrows("2013/2/22f");
  1774.  
  1775. testLex("1999/12/5\n", [
  1776. Token(symbol!"Value",loc,Value(Date(1999, 12, 5))),
  1777. Token(symbol!"EOL",loc),
  1778. ]);
  1779.  
  1780. // DateTime, no timezone
  1781. testLex( "2013/2/22 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]);
  1782. testLex( "2013/2/22 \t 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]);
  1783. testLex( "2013/2/22/*foo*/07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]);
  1784. testLex( "2013/2/22 /*foo*/ \\\n /*bar*/ 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]);
  1785. testLex( "2013/2/22 /*foo*/ \\\n\n \n /*bar*/ 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]);
  1786. testLex( "2013/2/22 /*foo*/ \\\n\\\n \\\n /*bar*/ 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]);
  1787. testLex( "2013/2/22/*foo*/\\\n/*bar*/07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]);
  1788. testLex("-2013/2/22 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime(-2013, 2, 22, 7, 53, 0)))) ]);
  1789. testLex( "2013/2/22 -07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53)))) ]);
  1790. testLex("-2013/2/22 -07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime(-2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53)))) ]);
  1791. testLex( "2013/2/22 07:53:34", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34)))) ]);
  1792. testLex( "2013/2/22 07:53:34.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs))) ]);
  1793. testLex( "2013/2/22 07:53:34.12", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34), 120.msecs))) ]);
  1794. testLex( "2013/2/22 07:53:34.1", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34), 100.msecs))) ]);
  1795. testLex( "2013/2/22 07:53.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs))) ]);
  1796.  
  1797. testLex( "2013/2/22 34:65", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) + hours(34) + minutes(65) + seconds( 0)))) ]);
  1798. testLex( "2013/2/22 34:65:77.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) + hours(34) + minutes(65) + seconds(77), 123.msecs))) ]);
  1799. testLex( "2013/2/22 34:65.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) + hours(34) + minutes(65) + seconds( 0), 123.msecs))) ]);
  1800.  
  1801. testLex( "2013/2/22 -34:65", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0)))) ]);
  1802. testLex( "2013/2/22 -34:65:77.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds(77), -123.msecs))) ]);
  1803. testLex( "2013/2/22 -34:65.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0), -123.msecs))) ]);
  1804.  
  1805. testLexThrows("2013/2/22 07:53a");
  1806. testLexThrows("2013/2/22 07:53f");
  1807. testLexThrows("2013/2/22 07:53:34.123a");
  1808. testLexThrows("2013/2/22 07:53:34.123f");
  1809. testLexThrows("2013/2/22a 07:53");
  1810.  
  1811. testLex(`2013/2/22 "foo"`, [
  1812. Token(symbol!"Value",loc,Value(Date(2013, 2, 22))),
  1813. Token(symbol!"Value",loc,Value("foo")),
  1814. ]);
  1815.  
  1816. testLex("2013/2/22 07", [
  1817. Token(symbol!"Value",loc,Value(Date(2013, 2, 22))),
  1818. Token(symbol!"Value",loc,Value(cast(int)7)),
  1819. ]);
  1820.  
  1821. testLex("2013/2/22 1.2F", [
  1822. Token(symbol!"Value",loc,Value(Date(2013, 2, 22))),
  1823. Token(symbol!"Value",loc,Value(cast(float)1.2)),
  1824. ]);
  1825.  
  1826. testLex("2013/2/22 .2F", [
  1827. Token(symbol!"Value",loc,Value(Date(2013, 2, 22))),
  1828. Token(symbol!"Value",loc,Value(cast(float)0.2)),
  1829. ]);
  1830.  
  1831. testLex("2013/2/22 -1.2F", [
  1832. Token(symbol!"Value",loc,Value(Date(2013, 2, 22))),
  1833. Token(symbol!"Value",loc,Value(cast(float)-1.2)),
  1834. ]);
  1835.  
  1836. testLex("2013/2/22 -.2F", [
  1837. Token(symbol!"Value",loc,Value(Date(2013, 2, 22))),
  1838. Token(symbol!"Value",loc,Value(cast(float)-0.2)),
  1839. ]);
  1840.  
  1841. // DateTime, with known timezone
  1842. testLex( "2013/2/22 07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone( hours(0) )))) ]);
  1843. testLex("-2013/2/22 07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime(-2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone( hours(0) )))) ]);
  1844. testLex( "2013/2/22 -07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), new immutable SimpleTimeZone( hours(0) )))) ]);
  1845. testLex("-2013/2/22 -07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime(-2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), new immutable SimpleTimeZone( hours(0) )))) ]);
  1846. testLex( "2013/2/22 07:53-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]);
  1847. testLex( "2013/2/22 07:53-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]);
  1848. testLex( "2013/2/22 07:53:34-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), new immutable SimpleTimeZone( hours(0) )))) ]);
  1849. testLex( "2013/2/22 07:53:34-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]);
  1850. testLex( "2013/2/22 07:53:34-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]);
  1851. testLex( "2013/2/22 07:53:34.123-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, new immutable SimpleTimeZone( hours(0) )))) ]);
  1852. testLex( "2013/2/22 07:53:34.123-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]);
  1853. testLex( "2013/2/22 07:53:34.123-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]);
  1854. testLex( "2013/2/22 07:53.123-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, new immutable SimpleTimeZone( hours(0) )))) ]);
  1855. testLex( "2013/2/22 07:53.123-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]);
  1856. testLex( "2013/2/22 07:53.123-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]);
  1857.  
  1858. testLex( "2013/2/22 -34:65-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]);
  1859.  
  1860. // DateTime, with Java SDL's occasionally weird interpretation of some
  1861. // "not quite ISO" variations of the "GMT with offset" timezone strings.
  1862. Token testTokenSimpleTimeZone(Duration d)
  1863. {
  1864. auto dateTime = DateTime(2013, 2, 22, 7, 53, 0);
  1865. auto tz = new immutable SimpleTimeZone(d);
  1866. return Token( symbol!"Value", loc, Value(SysTime(dateTime,tz)) );
  1867. }
  1868. Token testTokenUnknownTimeZone(string tzName)
  1869. {
  1870. auto dateTime = DateTime(2013, 2, 22, 7, 53, 0);
  1871. auto frac = 0.msecs;
  1872. return Token( symbol!"Value", loc, Value(DateTimeFracUnknownZone(dateTime,frac,tzName)) );
  1873. }
  1874. testLex("2013/2/22 07:53-GMT+", [ testTokenUnknownTimeZone("GMT+") ]);
  1875. testLex("2013/2/22 07:53-GMT+:", [ testTokenUnknownTimeZone("GMT+:") ]);
  1876. testLex("2013/2/22 07:53-GMT+:3", [ testTokenUnknownTimeZone("GMT+:3") ]);
  1877. testLex("2013/2/22 07:53-GMT+:03", [ testTokenSimpleTimeZone(minutes(3)) ]);
  1878. testLex("2013/2/22 07:53-GMT+:003", [ testTokenUnknownTimeZone("GMT+:003") ]);
  1879.  
  1880. testLex("2013/2/22 07:53-GMT+4", [ testTokenSimpleTimeZone(hours(4)) ]);
  1881. testLex("2013/2/22 07:53-GMT+4:", [ testTokenUnknownTimeZone("GMT+4:") ]);
  1882. testLex("2013/2/22 07:53-GMT+4:3", [ testTokenUnknownTimeZone("GMT+4:3") ]);
  1883. testLex("2013/2/22 07:53-GMT+4:03", [ testTokenSimpleTimeZone(hours(4)+minutes(3)) ]);
  1884. testLex("2013/2/22 07:53-GMT+4:003", [ testTokenUnknownTimeZone("GMT+4:003") ]);
  1885.  
  1886. testLex("2013/2/22 07:53-GMT+04", [ testTokenSimpleTimeZone(hours(4)) ]);
  1887. testLex("2013/2/22 07:53-GMT+04:", [ testTokenUnknownTimeZone("GMT+04:") ]);
  1888. testLex("2013/2/22 07:53-GMT+04:3", [ testTokenUnknownTimeZone("GMT+04:3") ]);
  1889. testLex("2013/2/22 07:53-GMT+04:03", [ testTokenSimpleTimeZone(hours(4)+minutes(3)) ]);
  1890. testLex("2013/2/22 07:53-GMT+04:03abc", [ testTokenUnknownTimeZone("GMT+04:03abc") ]);
  1891. testLex("2013/2/22 07:53-GMT+04:003", [ testTokenUnknownTimeZone("GMT+04:003") ]);
  1892.  
  1893. testLex("2013/2/22 07:53-GMT+004", [ testTokenSimpleTimeZone(minutes(4)) ]);
  1894. testLex("2013/2/22 07:53-GMT+004:", [ testTokenUnknownTimeZone("GMT+004:") ]);
  1895. testLex("2013/2/22 07:53-GMT+004:3", [ testTokenUnknownTimeZone("GMT+004:3") ]);
  1896. testLex("2013/2/22 07:53-GMT+004:03", [ testTokenUnknownTimeZone("GMT+004:03") ]);
  1897. testLex("2013/2/22 07:53-GMT+004:003", [ testTokenUnknownTimeZone("GMT+004:003") ]);
  1898.  
  1899. testLex("2013/2/22 07:53-GMT+0004", [ testTokenSimpleTimeZone(minutes(4)) ]);
  1900. testLex("2013/2/22 07:53-GMT+0004:", [ testTokenUnknownTimeZone("GMT+0004:") ]);
  1901. testLex("2013/2/22 07:53-GMT+0004:3", [ testTokenUnknownTimeZone("GMT+0004:3") ]);
  1902. testLex("2013/2/22 07:53-GMT+0004:03", [ testTokenUnknownTimeZone("GMT+0004:03") ]);
  1903. testLex("2013/2/22 07:53-GMT+0004:003", [ testTokenUnknownTimeZone("GMT+0004:003") ]);
  1904.  
  1905. testLex("2013/2/22 07:53-GMT+00004", [ testTokenSimpleTimeZone(minutes(4)) ]);
  1906. testLex("2013/2/22 07:53-GMT+00004:", [ testTokenUnknownTimeZone("GMT+00004:") ]);
  1907. testLex("2013/2/22 07:53-GMT+00004:3", [ testTokenUnknownTimeZone("GMT+00004:3") ]);
  1908. testLex("2013/2/22 07:53-GMT+00004:03", [ testTokenUnknownTimeZone("GMT+00004:03") ]);
  1909. testLex("2013/2/22 07:53-GMT+00004:003", [ testTokenUnknownTimeZone("GMT+00004:003") ]);
  1910.  
  1911. // DateTime, with unknown timezone
  1912. testLex( "2013/2/22 07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 0), 0.msecs, "Bogus/Foo")), "2013/2/22 07:53-Bogus/Foo") ]);
  1913. testLex("-2013/2/22 07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime(-2013, 2, 22, 7, 53, 0), 0.msecs, "Bogus/Foo"))) ]);
  1914. testLex( "2013/2/22 -07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), 0.msecs, "Bogus/Foo"))) ]);
  1915. testLex("-2013/2/22 -07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime(-2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), 0.msecs, "Bogus/Foo"))) ]);
  1916. testLex( "2013/2/22 07:53:34-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 34), 0.msecs, "Bogus/Foo"))) ]);
  1917. testLex( "2013/2/22 07:53:34.123-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, "Bogus/Foo"))) ]);
  1918. testLex( "2013/2/22 07:53.123-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, "Bogus/Foo"))) ]);
  1919.  
  1920. // Time Span
  1921. testLex( "12:14:42", [ Token(symbol!"Value",loc,Value( days( 0)+hours(12)+minutes(14)+seconds(42)+msecs( 0))) ]);
  1922. testLex("-12:14:42", [ Token(symbol!"Value",loc,Value(-days( 0)-hours(12)-minutes(14)-seconds(42)-msecs( 0))) ]);
  1923. testLex( "00:09:12", [ Token(symbol!"Value",loc,Value( days( 0)+hours( 0)+minutes( 9)+seconds(12)+msecs( 0))) ]);
  1924. testLex( "00:00:01.023", [ Token(symbol!"Value",loc,Value( days( 0)+hours( 0)+minutes( 0)+seconds( 1)+msecs( 23))) ]);
  1925. testLex( "23d:05:21:23.532", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs(532))) ]);
  1926. testLex( "23d:05:21:23.53", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs(530))) ]);
  1927. testLex( "23d:05:21:23.5", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs(500))) ]);
  1928. testLex("-23d:05:21:23.532", [ Token(symbol!"Value",loc,Value(-days(23)-hours( 5)-minutes(21)-seconds(23)-msecs(532))) ]);
  1929. testLex("-23d:05:21:23.5", [ Token(symbol!"Value",loc,Value(-days(23)-hours( 5)-minutes(21)-seconds(23)-msecs(500))) ]);
  1930. testLex( "23d:05:21:23", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs( 0))) ]);
  1931.  
  1932. testLexThrows("12:14:42a");
  1933. testLexThrows("23d:05:21:23.532a");
  1934. testLexThrows("23d:05:21:23.532f");
  1935.  
  1936. // Combination
  1937. testLex("foo. 7", [
  1938. Token(symbol!"Ident",loc,Value( null),"foo."),
  1939. Token(symbol!"Value",loc,Value(cast(int)7))
  1940. ]);
  1941.  
  1942. testLex(`
  1943. namespace:person "foo" "bar" 1 23L name.first="ひとみ" name.last="Smith" {
  1944. namespace:age 37; namespace:favorite_color "blue" // comment
  1945. somedate 2013/2/22 07:53 -- comment
  1946.  
  1947. inventory /* comment */ {
  1948. socks
  1949. }
  1950. }
  1951. `,
  1952. [
  1953. Token(symbol!"EOL",loc,Value(null),"\n"),
  1954.  
  1955. Token(symbol!"Ident", loc, Value( null ), "namespace"),
  1956. Token(symbol!":", loc, Value( null ), ":"),
  1957. Token(symbol!"Ident", loc, Value( null ), "person"),
  1958. Token(symbol!"Value", loc, Value( "foo" ), `"foo"`),
  1959. Token(symbol!"Value", loc, Value( "bar" ), `"bar"`),
  1960. Token(symbol!"Value", loc, Value( cast( int) 1 ), "1"),
  1961. Token(symbol!"Value", loc, Value( cast(long)23 ), "23L"),
  1962. Token(symbol!"Ident", loc, Value( null ), "name.first"),
  1963. Token(symbol!"=", loc, Value( null ), "="),
  1964. Token(symbol!"Value", loc, Value( "ひとみ" ), `"ひとみ"`),
  1965. Token(symbol!"Ident", loc, Value( null ), "name.last"),
  1966. Token(symbol!"=", loc, Value( null ), "="),
  1967. Token(symbol!"Value", loc, Value( "Smith" ), `"Smith"`),
  1968. Token(symbol!"{", loc, Value( null ), "{"),
  1969. Token(symbol!"EOL", loc, Value( null ), "\n"),
  1970.  
  1971. Token(symbol!"Ident", loc, Value( null ), "namespace"),
  1972. Token(symbol!":", loc, Value( null ), ":"),
  1973. Token(symbol!"Ident", loc, Value( null ), "age"),
  1974. Token(symbol!"Value", loc, Value( cast(int)37 ), "37"),
  1975. Token(symbol!"EOL", loc, Value( null ), ";"),
  1976. Token(symbol!"Ident", loc, Value( null ), "namespace"),
  1977. Token(symbol!":", loc, Value( null ), ":"),
  1978. Token(symbol!"Ident", loc, Value( null ), "favorite_color"),
  1979. Token(symbol!"Value", loc, Value( "blue" ), `"blue"`),
  1980. Token(symbol!"EOL", loc, Value( null ), "\n"),
  1981.  
  1982. Token(symbol!"Ident", loc, Value( null ), "somedate"),
  1983. Token(symbol!"Value", loc, Value( DateTimeFrac(DateTime(2013, 2, 22, 7, 53, 0)) ), "2013/2/22 07:53"),
  1984. Token(symbol!"EOL", loc, Value( null ), "\n"),
  1985. Token(symbol!"EOL", loc, Value( null ), "\n"),
  1986.  
  1987. Token(symbol!"Ident", loc, Value(null), "inventory"),
  1988. Token(symbol!"{", loc, Value(null), "{"),
  1989. Token(symbol!"EOL", loc, Value(null), "\n"),
  1990.  
  1991. Token(symbol!"Ident", loc, Value(null), "socks"),
  1992. Token(symbol!"EOL", loc, Value(null), "\n"),
  1993.  
  1994. Token(symbol!"}", loc, Value(null), "}"),
  1995. Token(symbol!"EOL", loc, Value(null), "\n"),
  1996.  
  1997. Token(symbol!"}", loc, Value(null), "}"),
  1998. Token(symbol!"EOL", loc, Value(null), "\n"),
  1999. ]);
  2000.  
  2001. if(numErrors > 0)
  2002. stderr.writeln(numErrors, " failed test(s)");
  2003. }
  2004.  
  2005. version(sdlangUnittest)
  2006. unittest
  2007. {
  2008. writeln("lexer: Regression test issue #8...");
  2009. stdout.flush();
  2010.  
  2011. testLex(`"\n \n"`, [ Token(symbol!"Value",loc,Value("\n \n"),`"\n \n"`) ]);
  2012. testLex(`"\t\t"`, [ Token(symbol!"Value",loc,Value("\t\t"),`"\t\t"`) ]);
  2013. testLex(`"\n\n"`, [ Token(symbol!"Value",loc,Value("\n\n"),`"\n\n"`) ]);
  2014. }
  2015.  
  2016. version(sdlangUnittest)
  2017. unittest
  2018. {
  2019. writeln("lexer: Regression test issue #11...");
  2020. stdout.flush();
  2021.  
  2022. void test(string input)
  2023. {
  2024. testLex(
  2025. input,
  2026. [
  2027. Token(symbol!"EOL", loc, Value(null), "\n"),
  2028. Token(symbol!"Ident",loc,Value(null), "a")
  2029. ]
  2030. );
  2031. }
  2032.  
  2033. test("//X\na");
  2034. test("//\na");
  2035. test("--\na");
  2036. test("#\na");
  2037. }
  2038.  
  2039. version(sdlangUnittest)
  2040. unittest
  2041. {
  2042. writeln("lexer: Regression test issue #28...");
  2043. stdout.flush();
  2044.  
  2045. enum offset = 1; // workaround for an of-by-one error for line numbers
  2046. testLex("test", [
  2047. Token(symbol!"Ident", Location("filename", 0, 0, 0), Value(null), "test")
  2048. ], true);
  2049. testLex("\ntest", [
  2050. Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\n"),
  2051. Token(symbol!"Ident", Location("filename", 1, 0, 1), Value(null), "test")
  2052. ], true);
  2053. testLex("\rtest", [
  2054. Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r"),
  2055. Token(symbol!"Ident", Location("filename", 1, 0, 1), Value(null), "test")
  2056. ], true);
  2057. testLex("\r\ntest", [
  2058. Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r\n"),
  2059. Token(symbol!"Ident", Location("filename", 1, 0, 2), Value(null), "test")
  2060. ], true);
  2061. testLex("\r\n\ntest", [
  2062. Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r\n"),
  2063. Token(symbol!"EOL", Location("filename", 1, 0, 2), Value(null), "\n"),
  2064. Token(symbol!"Ident", Location("filename", 2, 0, 3), Value(null), "test")
  2065. ], true);
  2066. testLex("\r\r\ntest", [
  2067. Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r"),
  2068. Token(symbol!"EOL", Location("filename", 1, 0, 1), Value(null), "\r\n"),
  2069. Token(symbol!"Ident", Location("filename", 2, 0, 3), Value(null), "test")
  2070. ], true);
  2071. }