Newer
Older
dub_jkp / source / dub / internal / sdlang / lexer.d
  1. // SDLang-D
  2. // Written in the D programming language.
  3.  
  4. module dub.internal.sdlang.lexer;
  5.  
  6. version (Have_sdlang_d) public import sdlang.lexer;
  7. else:
  8.  
  9. import std.algorithm;
  10. import std.array;
  11. import std.base64;
  12. import std.bigint;
  13. import std.conv;
  14. import std.datetime;
  15. import std.file;
  16. import std.traits;
  17. import std.typecons;
  18. import std.uni;
  19. import std.utf;
  20. import std.variant;
  21.  
  22. import dub.internal.sdlang.exception;
  23. import dub.internal.sdlang.symbol;
  24. import dub.internal.sdlang.token;
  25. import dub.internal.sdlang.util;
  26.  
  27. alias dub.internal.sdlang.util.startsWith startsWith;
  28.  
  29. Token[] lexFile(string filename)
  30. {
  31. auto source = cast(string)read(filename);
  32. return lexSource(source, filename);
  33. }
  34.  
  35. Token[] lexSource(string source, string filename=null)
  36. {
  37. auto lexer = scoped!Lexer(source, filename);
  38.  
  39. // Can't use 'std.array.array(Range)' because 'lexer' is scoped
  40. // and therefore cannot have its reference copied.
  41. Appender!(Token[]) tokens;
  42. foreach(tok; lexer)
  43. tokens.put(tok);
  44.  
  45. return tokens.data;
  46. }
  47.  
  48. // Kind of a poor-man's yield, but fast.
  49. // Only to be used inside Lexer.popFront (and Lexer.this).
  50. private template accept(string symbolName)
  51. {
  52. static assert(symbolName != "Value", "Value symbols must also take a value.");
  53. enum accept = acceptImpl!(symbolName, "null");
  54. }
  55. private template accept(string symbolName, string value)
  56. {
  57. static assert(symbolName == "Value", "Only a Value symbol can take a value.");
  58. enum accept = acceptImpl!(symbolName, value);
  59. }
  60. private template accept(string symbolName, string value, string startLocation, string endLocation)
  61. {
  62. static assert(symbolName == "Value", "Only a Value symbol can take a value.");
  63. enum accept = ("
  64. {
  65. _front = makeToken!"~symbolName.stringof~";
  66. _front.value = "~value~";
  67. _front.location = "~(startLocation==""? "tokenStart" : startLocation)~";
  68. _front.data = source[
  69. "~(startLocation==""? "tokenStart.index" : startLocation)~"
  70. ..
  71. "~(endLocation==""? "location.index" : endLocation)~"
  72. ];
  73. return;
  74. }
  75. ").replace("\n", "");
  76. }
  77. private template acceptImpl(string symbolName, string value)
  78. {
  79. enum acceptImpl = ("
  80. {
  81. _front = makeToken!"~symbolName.stringof~";
  82. _front.value = "~value~";
  83. return;
  84. }
  85. ").replace("\n", "");
  86. }
  87.  
  88. class Lexer
  89. {
  90. string source;
  91. string filename;
  92. Location location; /// Location of current character in source
  93.  
  94. private dchar ch; // Current character
  95. private dchar nextCh; // Lookahead character
  96. private size_t nextPos; // Position of lookahead character (an index into source)
  97. private bool hasNextCh; // If false, then there's no more lookahead, just EOF
  98. private size_t posAfterLookahead; // Position after lookahead character (an index into source)
  99.  
  100. private Location tokenStart; // The starting location of the token being lexed
  101.  
  102. // Length so far of the token being lexed, not including current char
  103. private size_t tokenLength; // Length in UTF-8 code units
  104. private size_t tokenLength32; // Length in UTF-32 code units
  105.  
  106. // Slight kludge:
  107. // If a numeric fragment is found after a Date (separated by arbitrary
  108. // whitespace), it could be the "hours" part of a DateTime, or it could
  109. // be a separate numeric literal that simply follows a plain Date. If the
  110. // latter, then the Date must be emitted, but numeric fragment that was
  111. // found after it needs to be saved for the the lexer's next iteration.
  112. //
  113. // It's a slight kludge, and could instead be implemented as a slightly
  114. // kludgey parser hack, but it's the only situation where SDL's lexing
  115. // needs to lookahead more than one character, so this is good enough.
  116. private struct LookaheadTokenInfo
  117. {
  118. bool exists = false;
  119. string numericFragment = "";
  120. bool isNegative = false;
  121. Location tokenStart;
  122. }
  123. private LookaheadTokenInfo lookaheadTokenInfo;
  124.  
  125. this(string source=null, string filename=null)
  126. {
  127. this.filename = filename;
  128. this.source = source;
  129.  
  130. _front = Token(symbol!"Error", Location());
  131. lookaheadTokenInfo = LookaheadTokenInfo.init;
  132.  
  133. if( source.startsWith( ByteOrderMarks[BOM.UTF8] ) )
  134. {
  135. source = source[ ByteOrderMarks[BOM.UTF8].length .. $ ];
  136. this.source = source;
  137. }
  138.  
  139. foreach(bom; ByteOrderMarks)
  140. if( source.startsWith(bom) )
  141. error(Location(filename,0,0,0), "SDL spec only supports UTF-8, not UTF-16 or UTF-32");
  142.  
  143. if(source == "")
  144. mixin(accept!"EOF");
  145.  
  146. // Prime everything
  147. hasNextCh = true;
  148. nextCh = source.decode(posAfterLookahead);
  149. advanceChar(ErrorOnEOF.Yes);
  150. location = Location(filename, 0, 0, 0);
  151. popFront();
  152. }
  153.  
  154. @property bool empty()
  155. {
  156. return _front.symbol == symbol!"EOF";
  157. }
  158.  
  159. Token _front;
  160. @property Token front()
  161. {
  162. return _front;
  163. }
  164.  
  165. @property bool isEOF()
  166. {
  167. return location.index == source.length && !lookaheadTokenInfo.exists;
  168. }
  169.  
  170. private void error(string msg)
  171. {
  172. error(location, msg);
  173. }
  174.  
  175. private void error(Location loc, string msg)
  176. {
  177. throw new SDLangParseException(loc, "Error: "~msg);
  178. }
  179.  
  180. private Token makeToken(string symbolName)()
  181. {
  182. auto tok = Token(symbol!symbolName, tokenStart);
  183. tok.data = tokenData;
  184. return tok;
  185. }
  186.  
  187. private @property string tokenData()
  188. {
  189. return source[ tokenStart.index .. location.index ];
  190. }
  191.  
  192. /// Check the lookahead character
  193. private bool lookahead(dchar ch)
  194. {
  195. return hasNextCh && nextCh == ch;
  196. }
  197.  
  198. private bool lookahead(bool function(dchar) condition)
  199. {
  200. return hasNextCh && condition(nextCh);
  201. }
  202.  
  203. private static bool isNewline(dchar ch)
  204. {
  205. return ch == '\n' || ch == '\r' || ch == lineSep || ch == paraSep;
  206. }
  207.  
  208. /// Returns the length of the newline sequence, or zero if the current
  209. /// character is not a newline
  210. ///
  211. /// Note that there are only single character sequences and the two
  212. /// character sequence `\r\n` as used on Windows.
  213. private size_t isAtNewline()
  214. {
  215. if(ch == '\n' || ch == lineSep || ch == paraSep) return 1;
  216. else if(ch == '\r') return lookahead('\n') ? 2 : 1;
  217. else return 0;
  218. }
  219.  
  220. /// Is 'ch' a valid base 64 character?
  221. private bool isBase64(dchar ch)
  222. {
  223. if(ch >= 'A' && ch <= 'Z')
  224. return true;
  225.  
  226. if(ch >= 'a' && ch <= 'z')
  227. return true;
  228.  
  229. if(ch >= '0' && ch <= '9')
  230. return true;
  231.  
  232. return ch == '+' || ch == '/' || ch == '=';
  233. }
  234.  
  235. /// Is the current character one that's allowed
  236. /// immediately *after* an int/float literal?
  237. private bool isEndOfNumber()
  238. {
  239. if(isEOF)
  240. return true;
  241.  
  242. return !isDigit(ch) && ch != ':' && ch != '_' && !isAlpha(ch);
  243. }
  244.  
  245. /// Is current character the last one in an ident?
  246. private bool isEndOfIdentCached = false;
  247. private bool _isEndOfIdent;
  248. private bool isEndOfIdent()
  249. {
  250. if(!isEndOfIdentCached)
  251. {
  252. if(!hasNextCh)
  253. _isEndOfIdent = true;
  254. else
  255. _isEndOfIdent = !isIdentChar(nextCh);
  256.  
  257. isEndOfIdentCached = true;
  258. }
  259.  
  260. return _isEndOfIdent;
  261. }
  262.  
  263. /// Is 'ch' a character that's allowed *somewhere* in an identifier?
  264. private bool isIdentChar(dchar ch)
  265. {
  266. if(isAlpha(ch))
  267. return true;
  268.  
  269. else if(isNumber(ch))
  270. return true;
  271.  
  272. else
  273. return
  274. ch == '-' ||
  275. ch == '_' ||
  276. ch == '.' ||
  277. ch == '$';
  278. }
  279.  
  280. private bool isDigit(dchar ch)
  281. {
  282. return ch >= '0' && ch <= '9';
  283. }
  284.  
  285. private enum KeywordResult
  286. {
  287. Accept, // Keyword is matched
  288. Continue, // Keyword is not matched *yet*
  289. Failed, // Keyword doesn't match
  290. }
  291. private KeywordResult checkKeyword(dstring keyword32)
  292. {
  293. // Still within length of keyword
  294. if(tokenLength32 < keyword32.length)
  295. {
  296. if(ch == keyword32[tokenLength32])
  297. return KeywordResult.Continue;
  298. else
  299. return KeywordResult.Failed;
  300. }
  301.  
  302. // At position after keyword
  303. else if(tokenLength32 == keyword32.length)
  304. {
  305. if(isEOF || !isIdentChar(ch))
  306. {
  307. debug assert(tokenData == to!string(keyword32));
  308. return KeywordResult.Accept;
  309. }
  310. else
  311. return KeywordResult.Failed;
  312. }
  313.  
  314. assert(0, "Fell off end of keyword to check");
  315. }
  316.  
  317. enum ErrorOnEOF { No, Yes }
  318.  
  319. /// Advance one code point.
  320. private void advanceChar(ErrorOnEOF errorOnEOF)
  321. {
  322. if(auto cnt = isAtNewline())
  323. {
  324. if (cnt == 1)
  325. location.line++;
  326. location.col = 0;
  327. }
  328. else
  329. location.col++;
  330.  
  331. location.index = nextPos;
  332.  
  333. nextPos = posAfterLookahead;
  334. ch = nextCh;
  335.  
  336. if(!hasNextCh)
  337. {
  338. if(errorOnEOF == ErrorOnEOF.Yes)
  339. error("Unexpected end of file");
  340.  
  341. return;
  342. }
  343.  
  344. tokenLength32++;
  345. tokenLength = location.index - tokenStart.index;
  346.  
  347. if(nextPos == source.length)
  348. {
  349. nextCh = dchar.init;
  350. hasNextCh = false;
  351. return;
  352. }
  353.  
  354. nextCh = source.decode(posAfterLookahead);
  355. isEndOfIdentCached = false;
  356. }
  357.  
  358. /// Advances the specified amount of characters
  359. private void advanceChar(size_t count, ErrorOnEOF errorOnEOF)
  360. {
  361. while(count-- > 0)
  362. advanceChar(errorOnEOF);
  363. }
  364.  
  365. void popFront()
  366. {
  367. // -- Main Lexer -------------
  368.  
  369. eatWhite();
  370.  
  371. if(isEOF)
  372. mixin(accept!"EOF");
  373.  
  374. tokenStart = location;
  375. tokenLength = 0;
  376. tokenLength32 = 0;
  377. isEndOfIdentCached = false;
  378.  
  379. if(lookaheadTokenInfo.exists)
  380. {
  381. tokenStart = lookaheadTokenInfo.tokenStart;
  382.  
  383. auto prevLATokenInfo = lookaheadTokenInfo;
  384. lookaheadTokenInfo = LookaheadTokenInfo.init;
  385. lexNumeric(prevLATokenInfo);
  386. return;
  387. }
  388.  
  389. if(ch == '=')
  390. {
  391. advanceChar(ErrorOnEOF.No);
  392. mixin(accept!"=");
  393. }
  394.  
  395. else if(ch == '{')
  396. {
  397. advanceChar(ErrorOnEOF.No);
  398. mixin(accept!"{");
  399. }
  400.  
  401. else if(ch == '}')
  402. {
  403. advanceChar(ErrorOnEOF.No);
  404. mixin(accept!"}");
  405. }
  406.  
  407. else if(ch == ':')
  408. {
  409. advanceChar(ErrorOnEOF.No);
  410. mixin(accept!":");
  411. }
  412.  
  413. else if(ch == ';')
  414. {
  415. advanceChar(ErrorOnEOF.No);
  416. mixin(accept!"EOL");
  417. }
  418.  
  419. else if(auto cnt = isAtNewline())
  420. {
  421. advanceChar(cnt, ErrorOnEOF.No);
  422. mixin(accept!"EOL");
  423. }
  424.  
  425. else if(isAlpha(ch) || ch == '_')
  426. lexIdentKeyword();
  427.  
  428. else if(ch == '"')
  429. lexRegularString();
  430.  
  431. else if(ch == '`')
  432. lexRawString();
  433.  
  434. else if(ch == '\'')
  435. lexCharacter();
  436.  
  437. else if(ch == '[')
  438. lexBinary();
  439.  
  440. else if(ch == '-' || ch == '.' || isDigit(ch))
  441. lexNumeric();
  442.  
  443. else
  444. {
  445. advanceChar(ErrorOnEOF.No);
  446. error("Syntax error");
  447. }
  448. }
  449.  
  450. /// Lex Ident or Keyword
  451. private void lexIdentKeyword()
  452. {
  453. assert(isAlpha(ch) || ch == '_');
  454.  
  455. // Keyword
  456. struct Key
  457. {
  458. dstring name;
  459. Value value;
  460. bool failed = false;
  461. }
  462. static Key[5] keywords;
  463. static keywordsInited = false;
  464. if(!keywordsInited)
  465. {
  466. // Value (as a std.variant-based type) can't be statically inited
  467. keywords[0] = Key("true", Value(true ));
  468. keywords[1] = Key("false", Value(false));
  469. keywords[2] = Key("on", Value(true ));
  470. keywords[3] = Key("off", Value(false));
  471. keywords[4] = Key("null", Value(null ));
  472. keywordsInited = true;
  473. }
  474.  
  475. foreach(ref key; keywords)
  476. key.failed = false;
  477.  
  478. auto numKeys = keywords.length;
  479.  
  480. do
  481. {
  482. foreach(ref key; keywords)
  483. if(!key.failed)
  484. {
  485. final switch(checkKeyword(key.name))
  486. {
  487. case KeywordResult.Accept:
  488. mixin(accept!("Value", "key.value"));
  489.  
  490. case KeywordResult.Continue:
  491. break;
  492.  
  493. case KeywordResult.Failed:
  494. key.failed = true;
  495. numKeys--;
  496. break;
  497. }
  498. }
  499.  
  500. if(numKeys == 0)
  501. {
  502. lexIdent();
  503. return;
  504. }
  505.  
  506. advanceChar(ErrorOnEOF.No);
  507.  
  508. } while(!isEOF);
  509.  
  510. foreach(ref key; keywords)
  511. if(!key.failed)
  512. if(key.name.length == tokenLength32+1)
  513. mixin(accept!("Value", "key.value"));
  514.  
  515. mixin(accept!"Ident");
  516. }
  517.  
  518. /// Lex Ident
  519. private void lexIdent()
  520. {
  521. if(tokenLength == 0)
  522. assert(isAlpha(ch) || ch == '_');
  523.  
  524. while(!isEOF && isIdentChar(ch))
  525. advanceChar(ErrorOnEOF.No);
  526.  
  527. mixin(accept!"Ident");
  528. }
  529.  
  530. /// Lex regular string
  531. private void lexRegularString()
  532. {
  533. assert(ch == '"');
  534.  
  535. Appender!string buf;
  536. size_t spanStart = nextPos;
  537.  
  538. // Doesn't include current character
  539. void updateBuf()
  540. {
  541. if(location.index == spanStart)
  542. return;
  543.  
  544. buf.put( source[spanStart..location.index] );
  545. }
  546.  
  547. advanceChar(ErrorOnEOF.Yes);
  548. while(ch != '"')
  549. {
  550. if(ch == '\\')
  551. {
  552. updateBuf();
  553.  
  554. bool wasEscSequence = true;
  555. if(hasNextCh)
  556. {
  557. switch(nextCh)
  558. {
  559. case 'n': buf.put('\n'); break;
  560. case 'r': buf.put('\r'); break;
  561. case 't': buf.put('\t'); break;
  562. case '"': buf.put('\"'); break;
  563. case '\\': buf.put('\\'); break;
  564. default: wasEscSequence = false; break;
  565. }
  566. }
  567.  
  568. if(wasEscSequence)
  569. {
  570. advanceChar(ErrorOnEOF.Yes);
  571. spanStart = nextPos;
  572. }
  573. else
  574. {
  575. eatWhite(false);
  576. spanStart = location.index;
  577. }
  578. }
  579.  
  580. else if(isNewline(ch))
  581. error("Unescaped newlines are only allowed in raw strings, not regular strings.");
  582.  
  583. advanceChar(ErrorOnEOF.Yes);
  584. }
  585.  
  586. updateBuf();
  587. advanceChar(ErrorOnEOF.No); // Skip closing double-quote
  588. mixin(accept!("Value", "buf.data"));
  589. }
  590.  
  591. /// Lex raw string
  592. private void lexRawString()
  593. {
  594. assert(ch == '`');
  595.  
  596. do
  597. advanceChar(ErrorOnEOF.Yes);
  598. while(ch != '`');
  599.  
  600. advanceChar(ErrorOnEOF.No); // Skip closing back-tick
  601. mixin(accept!("Value", "tokenData[1..$-1]"));
  602. }
  603.  
  604. /// Lex character literal
  605. private void lexCharacter()
  606. {
  607. assert(ch == '\'');
  608. advanceChar(ErrorOnEOF.Yes); // Skip opening single-quote
  609.  
  610. dchar value;
  611. if(ch == '\\')
  612. {
  613. advanceChar(ErrorOnEOF.Yes); // Skip escape backslash
  614. switch(ch)
  615. {
  616. case 'n': value = '\n'; break;
  617. case 'r': value = '\r'; break;
  618. case 't': value = '\t'; break;
  619. case '\'': value = '\''; break;
  620. case '\\': value = '\\'; break;
  621. default: error("Invalid escape sequence.");
  622. }
  623. }
  624. else if(isNewline(ch))
  625. error("Newline not alowed in character literal.");
  626. else
  627. value = ch;
  628. advanceChar(ErrorOnEOF.Yes); // Skip the character itself
  629.  
  630. if(ch == '\'')
  631. advanceChar(ErrorOnEOF.No); // Skip closing single-quote
  632. else
  633. error("Expected closing single-quote.");
  634.  
  635. mixin(accept!("Value", "value"));
  636. }
  637.  
  638. /// Lex base64 binary literal
  639. private void lexBinary()
  640. {
  641. assert(ch == '[');
  642. advanceChar(ErrorOnEOF.Yes);
  643.  
  644. void eatBase64Whitespace()
  645. {
  646. while(!isEOF && isWhite(ch))
  647. {
  648. if(isNewline(ch))
  649. advanceChar(ErrorOnEOF.Yes);
  650.  
  651. if(!isEOF && isWhite(ch))
  652. eatWhite();
  653. }
  654. }
  655.  
  656. eatBase64Whitespace();
  657.  
  658. // Iterates all valid base64 characters, ending at ']'.
  659. // Skips all whitespace. Throws on invalid chars.
  660. struct Base64InputRange
  661. {
  662. Lexer lexer;
  663. private bool isInited = false;
  664. private int numInputCharsMod4 = 0;
  665.  
  666. @property bool empty()
  667. {
  668. if(lexer.ch == ']')
  669. {
  670. if(numInputCharsMod4 != 0)
  671. lexer.error("Length of Base64 encoding must be a multiple of 4. ("~to!string(numInputCharsMod4)~")");
  672.  
  673. return true;
  674. }
  675.  
  676. return false;
  677. }
  678.  
  679. @property dchar front()
  680. {
  681. return lexer.ch;
  682. }
  683.  
  684. void popFront()
  685. {
  686. auto lex = lexer;
  687.  
  688. if(!isInited)
  689. {
  690. if(lexer.isBase64(lexer.ch))
  691. {
  692. numInputCharsMod4++;
  693. numInputCharsMod4 %= 4;
  694. }
  695.  
  696. isInited = true;
  697. }
  698.  
  699. lex.advanceChar(lex.ErrorOnEOF.Yes);
  700.  
  701. eatBase64Whitespace();
  702.  
  703. if(lex.isEOF)
  704. lex.error("Unexpected end of file.");
  705.  
  706. if(lex.ch != ']')
  707. {
  708. if(!lex.isBase64(lex.ch))
  709. lex.error("Invalid character in base64 binary literal.");
  710.  
  711. numInputCharsMod4++;
  712. numInputCharsMod4 %= 4;
  713. }
  714. }
  715. }
  716.  
  717. // This is a slow ugly hack. It's necessary because Base64.decode
  718. // currently requires the source to have known length.
  719. //TODO: Remove this when DMD issue #9543 is fixed.
  720. dchar[] tmpBuf = array(Base64InputRange(this));
  721.  
  722. Appender!(ubyte[]) outputBuf;
  723. // Ugly workaround for DMD issue #9102
  724. //TODO: Remove this when DMD #9102 is fixed
  725. struct OutputBuf
  726. {
  727. void put(ubyte ch)
  728. {
  729. outputBuf.put(ch);
  730. }
  731. }
  732.  
  733. try
  734. //Base64.decode(Base64InputRange(this), OutputBuf());
  735. Base64.decode(tmpBuf, OutputBuf());
  736.  
  737. //TODO: Starting with dmd 2.062, this should be a Base64Exception
  738. catch(Exception e)
  739. error("Invalid character in base64 binary literal.");
  740.  
  741. advanceChar(ErrorOnEOF.No); // Skip ']'
  742. mixin(accept!("Value", "outputBuf.data"));
  743. }
  744.  
  745. private BigInt toBigInt(bool isNegative, string absValue)
  746. {
  747. auto num = BigInt(absValue);
  748. assert(num >= 0);
  749.  
  750. if(isNegative)
  751. num = -num;
  752.  
  753. return num;
  754. }
  755.  
  756. /// Lex [0-9]+, but without emitting a token.
  757. /// This is used by the other numeric parsing functions.
  758. private string lexNumericFragment()
  759. {
  760. if(!isDigit(ch))
  761. error("Expected a digit 0-9.");
  762.  
  763. auto spanStart = location.index;
  764.  
  765. do
  766. {
  767. advanceChar(ErrorOnEOF.No);
  768. } while(!isEOF && isDigit(ch));
  769.  
  770. return source[spanStart..location.index];
  771. }
  772.  
  773. /// Lex anything that starts with 0-9 or '-'. Ints, floats, dates, etc.
  774. private void lexNumeric(LookaheadTokenInfo laTokenInfo = LookaheadTokenInfo.init)
  775. {
  776. bool isNegative;
  777. string firstFragment;
  778. if(laTokenInfo.exists)
  779. {
  780. firstFragment = laTokenInfo.numericFragment;
  781. isNegative = laTokenInfo.isNegative;
  782. }
  783. else
  784. {
  785. assert(ch == '-' || ch == '.' || isDigit(ch));
  786.  
  787. // Check for negative
  788. isNegative = ch == '-';
  789. if(isNegative)
  790. advanceChar(ErrorOnEOF.Yes);
  791.  
  792. // Some floating point with omitted leading zero?
  793. if(ch == '.')
  794. {
  795. lexFloatingPoint("");
  796. return;
  797. }
  798.  
  799. firstFragment = lexNumericFragment();
  800. }
  801.  
  802. // Long integer (64-bit signed)?
  803. if(ch == 'L' || ch == 'l')
  804. {
  805. advanceChar(ErrorOnEOF.No);
  806.  
  807. // BigInt(long.min) is a workaround for DMD issue #9548
  808. auto num = toBigInt(isNegative, firstFragment);
  809. if(num < BigInt(long.min) || num > long.max)
  810. error(tokenStart, "Value doesn't fit in 64-bit signed long integer: "~to!string(num));
  811.  
  812. mixin(accept!("Value", "num.toLong()"));
  813. }
  814.  
  815. // Float (32-bit signed)?
  816. else if(ch == 'F' || ch == 'f')
  817. {
  818. auto value = to!float(tokenData);
  819. advanceChar(ErrorOnEOF.No);
  820. mixin(accept!("Value", "value"));
  821. }
  822.  
  823. // Double float (64-bit signed) with suffix?
  824. else if((ch == 'D' || ch == 'd') && !lookahead(':')
  825. )
  826. {
  827. auto value = to!double(tokenData);
  828. advanceChar(ErrorOnEOF.No);
  829. mixin(accept!("Value", "value"));
  830. }
  831.  
  832. // Decimal (128+ bits signed)?
  833. else if(
  834. (ch == 'B' || ch == 'b') &&
  835. (lookahead('D') || lookahead('d'))
  836. )
  837. {
  838. auto value = to!real(tokenData);
  839. advanceChar(ErrorOnEOF.No);
  840. advanceChar(ErrorOnEOF.No);
  841. mixin(accept!("Value", "value"));
  842. }
  843.  
  844. // Some floating point?
  845. else if(ch == '.')
  846. lexFloatingPoint(firstFragment);
  847.  
  848. // Some date?
  849. else if(ch == '/' && hasNextCh && isDigit(nextCh))
  850. lexDate(isNegative, firstFragment);
  851.  
  852. // Some time span?
  853. else if(ch == ':' || ch == 'd')
  854. lexTimeSpan(isNegative, firstFragment);
  855.  
  856. // Integer (32-bit signed)?
  857. else if(isEndOfNumber())
  858. {
  859. auto num = toBigInt(isNegative, firstFragment);
  860. if(num < int.min || num > int.max)
  861. error(tokenStart, "Value doesn't fit in 32-bit signed integer: "~to!string(num));
  862.  
  863. mixin(accept!("Value", "num.toInt()"));
  864. }
  865.  
  866. // Invalid suffix
  867. else
  868. error("Invalid integer suffix.");
  869. }
  870.  
  871. /// Lex any floating-point literal (after the initial numeric fragment was lexed)
  872. private void lexFloatingPoint(string firstPart)
  873. {
  874. assert(ch == '.');
  875. advanceChar(ErrorOnEOF.No);
  876.  
  877. auto secondPart = lexNumericFragment();
  878.  
  879. try
  880. {
  881. // Double float (64-bit signed) with suffix?
  882. if(ch == 'D' || ch == 'd')
  883. {
  884. auto value = to!double(tokenData);
  885. advanceChar(ErrorOnEOF.No);
  886. mixin(accept!("Value", "value"));
  887. }
  888.  
  889. // Float (32-bit signed)?
  890. else if(ch == 'F' || ch == 'f')
  891. {
  892. auto value = to!float(tokenData);
  893. advanceChar(ErrorOnEOF.No);
  894. mixin(accept!("Value", "value"));
  895. }
  896.  
  897. // Decimal (128+ bits signed)?
  898. else if(ch == 'B' || ch == 'b')
  899. {
  900. auto value = to!real(tokenData);
  901. advanceChar(ErrorOnEOF.Yes);
  902.  
  903. if(!isEOF && (ch == 'D' || ch == 'd'))
  904. {
  905. advanceChar(ErrorOnEOF.No);
  906. if(isEndOfNumber())
  907. mixin(accept!("Value", "value"));
  908. }
  909.  
  910. error("Invalid floating point suffix.");
  911. }
  912.  
  913. // Double float (64-bit signed) without suffix?
  914. else if(isEOF || !isIdentChar(ch))
  915. {
  916. auto value = to!double(tokenData);
  917. mixin(accept!("Value", "value"));
  918. }
  919.  
  920. // Invalid suffix
  921. else
  922. error("Invalid floating point suffix.");
  923. }
  924. catch(ConvException e)
  925. error("Invalid floating point literal.");
  926. }
  927.  
  928. private Date makeDate(bool isNegative, string yearStr, string monthStr, string dayStr)
  929. {
  930. BigInt biTmp;
  931.  
  932. biTmp = BigInt(yearStr);
  933. if(isNegative)
  934. biTmp = -biTmp;
  935. if(biTmp < int.min || biTmp > int.max)
  936. error(tokenStart, "Date's year is out of range. (Must fit within a 32-bit signed int.)");
  937. auto year = biTmp.toInt();
  938.  
  939. biTmp = BigInt(monthStr);
  940. if(biTmp < 1 || biTmp > 12)
  941. error(tokenStart, "Date's month is out of range.");
  942. auto month = biTmp.toInt();
  943.  
  944. biTmp = BigInt(dayStr);
  945. if(biTmp < 1 || biTmp > 31)
  946. error(tokenStart, "Date's month is out of range.");
  947. auto day = biTmp.toInt();
  948.  
  949. return Date(year, month, day);
  950. }
  951.  
  952. private DateTimeFrac makeDateTimeFrac(
  953. bool isNegative, Date date, string hourStr, string minuteStr,
  954. string secondStr, string millisecondStr
  955. )
  956. {
  957. BigInt biTmp;
  958.  
  959. biTmp = BigInt(hourStr);
  960. if(biTmp < int.min || biTmp > int.max)
  961. error(tokenStart, "Datetime's hour is out of range.");
  962. auto numHours = biTmp.toInt();
  963.  
  964. biTmp = BigInt(minuteStr);
  965. if(biTmp < 0 || biTmp > int.max)
  966. error(tokenStart, "Datetime's minute is out of range.");
  967. auto numMinutes = biTmp.toInt();
  968.  
  969. int numSeconds = 0;
  970. if(secondStr != "")
  971. {
  972. biTmp = BigInt(secondStr);
  973. if(biTmp < 0 || biTmp > int.max)
  974. error(tokenStart, "Datetime's second is out of range.");
  975. numSeconds = biTmp.toInt();
  976. }
  977.  
  978. int millisecond = 0;
  979. if(millisecondStr != "")
  980. {
  981. biTmp = BigInt(millisecondStr);
  982. if(biTmp < 0 || biTmp > int.max)
  983. error(tokenStart, "Datetime's millisecond is out of range.");
  984. millisecond = biTmp.toInt();
  985.  
  986. if(millisecondStr.length == 1)
  987. millisecond *= 100;
  988. else if(millisecondStr.length == 2)
  989. millisecond *= 10;
  990. }
  991.  
  992. Duration fracSecs = millisecond.msecs;
  993.  
  994. auto offset = hours(numHours) + minutes(numMinutes) + seconds(numSeconds);
  995.  
  996. if(isNegative)
  997. {
  998. offset = -offset;
  999. fracSecs = -fracSecs;
  1000. }
  1001.  
  1002. return DateTimeFrac(DateTime(date) + offset, fracSecs);
  1003. }
  1004.  
  1005. private Duration makeDuration(
  1006. bool isNegative, string dayStr,
  1007. string hourStr, string minuteStr, string secondStr,
  1008. string millisecondStr
  1009. )
  1010. {
  1011. BigInt biTmp;
  1012.  
  1013. long day = 0;
  1014. if(dayStr != "")
  1015. {
  1016. biTmp = BigInt(dayStr);
  1017. if(biTmp < long.min || biTmp > long.max)
  1018. error(tokenStart, "Time span's day is out of range.");
  1019. day = biTmp.toLong();
  1020. }
  1021.  
  1022. biTmp = BigInt(hourStr);
  1023. if(biTmp < long.min || biTmp > long.max)
  1024. error(tokenStart, "Time span's hour is out of range.");
  1025. auto hour = biTmp.toLong();
  1026.  
  1027. biTmp = BigInt(minuteStr);
  1028. if(biTmp < long.min || biTmp > long.max)
  1029. error(tokenStart, "Time span's minute is out of range.");
  1030. auto minute = biTmp.toLong();
  1031.  
  1032. biTmp = BigInt(secondStr);
  1033. if(biTmp < long.min || biTmp > long.max)
  1034. error(tokenStart, "Time span's second is out of range.");
  1035. auto second = biTmp.toLong();
  1036.  
  1037. long millisecond = 0;
  1038. if(millisecondStr != "")
  1039. {
  1040. biTmp = BigInt(millisecondStr);
  1041. if(biTmp < long.min || biTmp > long.max)
  1042. error(tokenStart, "Time span's millisecond is out of range.");
  1043. millisecond = biTmp.toLong();
  1044.  
  1045. if(millisecondStr.length == 1)
  1046. millisecond *= 100;
  1047. else if(millisecondStr.length == 2)
  1048. millisecond *= 10;
  1049. }
  1050.  
  1051. auto duration =
  1052. dur!"days" (day) +
  1053. dur!"hours" (hour) +
  1054. dur!"minutes"(minute) +
  1055. dur!"seconds"(second) +
  1056. dur!"msecs" (millisecond);
  1057.  
  1058. if(isNegative)
  1059. duration = -duration;
  1060.  
  1061. return duration;
  1062. }
  1063.  
  1064. // This has to reproduce some weird corner case behaviors from the
  1065. // original Java version of SDL. So some of this may seem weird.
  1066. private Nullable!Duration getTimeZoneOffset(string str)
  1067. {
  1068. if(str.length < 2)
  1069. return Nullable!Duration(); // Unknown timezone
  1070.  
  1071. if(str[0] != '+' && str[0] != '-')
  1072. return Nullable!Duration(); // Unknown timezone
  1073.  
  1074. auto isNegative = str[0] == '-';
  1075.  
  1076. string numHoursStr;
  1077. string numMinutesStr;
  1078. if(str[1] == ':')
  1079. {
  1080. numMinutesStr = str[1..$];
  1081. numHoursStr = "";
  1082. }
  1083. else
  1084. {
  1085. numMinutesStr = str.find(':');
  1086. numHoursStr = str[1 .. $-numMinutesStr.length];
  1087. }
  1088.  
  1089. long numHours = 0;
  1090. long numMinutes = 0;
  1091. bool isUnknown = false;
  1092. try
  1093. {
  1094. switch(numHoursStr.length)
  1095. {
  1096. case 0:
  1097. if(numMinutesStr.length == 3)
  1098. {
  1099. numHours = 0;
  1100. numMinutes = to!long(numMinutesStr[1..$]);
  1101. }
  1102. else
  1103. isUnknown = true;
  1104. break;
  1105.  
  1106. case 1:
  1107. case 2:
  1108. if(numMinutesStr.length == 0)
  1109. {
  1110. numHours = to!long(numHoursStr);
  1111. numMinutes = 0;
  1112. }
  1113. else if(numMinutesStr.length == 3)
  1114. {
  1115. numHours = to!long(numHoursStr);
  1116. numMinutes = to!long(numMinutesStr[1..$]);
  1117. }
  1118. else
  1119. isUnknown = true;
  1120. break;
  1121.  
  1122. default:
  1123. if(numMinutesStr.length == 0)
  1124. {
  1125. // Yes, this is correct
  1126. numHours = 0;
  1127. numMinutes = to!long(numHoursStr[1..$]);
  1128. }
  1129. else
  1130. isUnknown = true;
  1131. break;
  1132. }
  1133. }
  1134. catch(ConvException e)
  1135. isUnknown = true;
  1136.  
  1137. if(isUnknown)
  1138. return Nullable!Duration(); // Unknown timezone
  1139.  
  1140. auto timeZoneOffset = hours(numHours) + minutes(numMinutes);
  1141. if(isNegative)
  1142. timeZoneOffset = -timeZoneOffset;
  1143.  
  1144. // Timezone valid
  1145. return Nullable!Duration(timeZoneOffset);
  1146. }
  1147.  
  1148. /// Lex date or datetime (after the initial numeric fragment was lexed)
  1149. private void lexDate(bool isDateNegative, string yearStr)
  1150. {
  1151. assert(ch == '/');
  1152.  
  1153. // Lex months
  1154. advanceChar(ErrorOnEOF.Yes); // Skip '/'
  1155. auto monthStr = lexNumericFragment();
  1156.  
  1157. // Lex days
  1158. if(ch != '/')
  1159. error("Invalid date format: Missing days.");
  1160. advanceChar(ErrorOnEOF.Yes); // Skip '/'
  1161. auto dayStr = lexNumericFragment();
  1162.  
  1163. auto date = makeDate(isDateNegative, yearStr, monthStr, dayStr);
  1164.  
  1165. if(!isEndOfNumber() && ch != '/')
  1166. error("Dates cannot have suffixes.");
  1167.  
  1168. // Date?
  1169. if(isEOF)
  1170. mixin(accept!("Value", "date"));
  1171.  
  1172. auto endOfDate = location;
  1173.  
  1174. while(
  1175. !isEOF &&
  1176. ( ch == '\\' || ch == '/' || (isWhite(ch) && !isNewline(ch)) )
  1177. )
  1178. {
  1179. if(ch == '\\' && hasNextCh && isNewline(nextCh))
  1180. {
  1181. advanceChar(ErrorOnEOF.Yes);
  1182. if(isAtNewline())
  1183. advanceChar(ErrorOnEOF.Yes);
  1184. advanceChar(ErrorOnEOF.No);
  1185. }
  1186.  
  1187. eatWhite();
  1188. }
  1189.  
  1190. // Date?
  1191. if(isEOF || (!isDigit(ch) && ch != '-'))
  1192. mixin(accept!("Value", "date", "", "endOfDate.index"));
  1193.  
  1194. auto startOfTime = location;
  1195.  
  1196. // Is time negative?
  1197. bool isTimeNegative = ch == '-';
  1198. if(isTimeNegative)
  1199. advanceChar(ErrorOnEOF.Yes);
  1200.  
  1201. // Lex hours
  1202. auto hourStr = ch == '.'? "" : lexNumericFragment();
  1203.  
  1204. // Lex minutes
  1205. if(ch != ':')
  1206. {
  1207. // No minutes found. Therefore we had a plain Date followed
  1208. // by a numeric literal, not a DateTime.
  1209. lookaheadTokenInfo.exists = true;
  1210. lookaheadTokenInfo.numericFragment = hourStr;
  1211. lookaheadTokenInfo.isNegative = isTimeNegative;
  1212. lookaheadTokenInfo.tokenStart = startOfTime;
  1213. mixin(accept!("Value", "date", "", "endOfDate.index"));
  1214. }
  1215. advanceChar(ErrorOnEOF.Yes); // Skip ':'
  1216. auto minuteStr = lexNumericFragment();
  1217.  
  1218. // Lex seconds, if exists
  1219. string secondStr;
  1220. if(ch == ':')
  1221. {
  1222. advanceChar(ErrorOnEOF.Yes); // Skip ':'
  1223. secondStr = lexNumericFragment();
  1224. }
  1225.  
  1226. // Lex milliseconds, if exists
  1227. string millisecondStr;
  1228. if(ch == '.')
  1229. {
  1230. advanceChar(ErrorOnEOF.Yes); // Skip '.'
  1231. millisecondStr = lexNumericFragment();
  1232. }
  1233.  
  1234. auto dateTimeFrac = makeDateTimeFrac(isTimeNegative, date, hourStr, minuteStr, secondStr, millisecondStr);
  1235.  
  1236. // Lex zone, if exists
  1237. if(ch == '-')
  1238. {
  1239. advanceChar(ErrorOnEOF.Yes); // Skip '-'
  1240. auto timezoneStart = location;
  1241.  
  1242. if(!isAlpha(ch))
  1243. error("Invalid timezone format.");
  1244.  
  1245. while(!isEOF && !isWhite(ch))
  1246. advanceChar(ErrorOnEOF.No);
  1247.  
  1248. auto timezoneStr = source[timezoneStart.index..location.index];
  1249. if(timezoneStr.startsWith("GMT"))
  1250. {
  1251. auto isoPart = timezoneStr["GMT".length..$];
  1252. auto offset = getTimeZoneOffset(isoPart);
  1253.  
  1254. if(offset.isNull())
  1255. {
  1256. // Unknown time zone
  1257. mixin(accept!("Value", "DateTimeFracUnknownZone(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezoneStr)"));
  1258. }
  1259. else
  1260. {
  1261. auto timezone = new immutable SimpleTimeZone(offset.get());
  1262. auto fsecs = dateTimeFrac.fracSecs;
  1263. mixin(accept!("Value", "SysTime(dateTimeFrac.dateTime, fsecs, timezone)"));
  1264. }
  1265. }
  1266.  
  1267. try
  1268. {
  1269. auto timezone = PosixTimeZone.getTimeZone(timezoneStr);
  1270. if (timezone) {
  1271. auto fsecs = dateTimeFrac.fracSecs;
  1272. mixin(accept!("Value", "SysTime(dateTimeFrac.dateTime, fsecs, timezone)"));
  1273. }
  1274. }
  1275. catch(TimeException e)
  1276. {
  1277. // Time zone not found. So just move along to "Unknown time zone" below.
  1278. }
  1279.  
  1280. // Unknown time zone
  1281. mixin(accept!("Value", "DateTimeFracUnknownZone(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezoneStr)"));
  1282. }
  1283.  
  1284. if(!isEndOfNumber())
  1285. error("Date-Times cannot have suffixes.");
  1286.  
  1287. mixin(accept!("Value", "dateTimeFrac"));
  1288. }
  1289.  
  1290. /// Lex time span (after the initial numeric fragment was lexed)
  1291. private void lexTimeSpan(bool isNegative, string firstPart)
  1292. {
  1293. assert(ch == ':' || ch == 'd');
  1294.  
  1295. string dayStr = "";
  1296. string hourStr;
  1297.  
  1298. // Lexed days?
  1299. bool hasDays = ch == 'd';
  1300. if(hasDays)
  1301. {
  1302. dayStr = firstPart;
  1303. advanceChar(ErrorOnEOF.Yes); // Skip 'd'
  1304.  
  1305. // Lex hours
  1306. if(ch != ':')
  1307. error("Invalid time span format: Missing hours.");
  1308. advanceChar(ErrorOnEOF.Yes); // Skip ':'
  1309. hourStr = lexNumericFragment();
  1310. }
  1311. else
  1312. hourStr = firstPart;
  1313.  
  1314. // Lex minutes
  1315. if(ch != ':')
  1316. error("Invalid time span format: Missing minutes.");
  1317. advanceChar(ErrorOnEOF.Yes); // Skip ':'
  1318. auto minuteStr = lexNumericFragment();
  1319.  
  1320. // Lex seconds
  1321. if(ch != ':')
  1322. error("Invalid time span format: Missing seconds.");
  1323. advanceChar(ErrorOnEOF.Yes); // Skip ':'
  1324. auto secondStr = lexNumericFragment();
  1325.  
  1326. // Lex milliseconds, if exists
  1327. string millisecondStr = "";
  1328. if(ch == '.')
  1329. {
  1330. advanceChar(ErrorOnEOF.Yes); // Skip '.'
  1331. millisecondStr = lexNumericFragment();
  1332. }
  1333.  
  1334. if(!isEndOfNumber())
  1335. error("Time spans cannot have suffixes.");
  1336.  
  1337. auto duration = makeDuration(isNegative, dayStr, hourStr, minuteStr, secondStr, millisecondStr);
  1338. mixin(accept!("Value", "duration"));
  1339. }
  1340.  
  1341. /// Advances past whitespace and comments
  1342. private void eatWhite(bool allowComments=true)
  1343. {
  1344. // -- Comment/Whitepace Lexer -------------
  1345.  
  1346. enum State
  1347. {
  1348. normal,
  1349. lineComment, // Got "#" or "//" or "--", Eating everything until newline
  1350. blockComment, // Got "/*", Eating everything until "*/"
  1351. }
  1352.  
  1353. if(isEOF)
  1354. return;
  1355.  
  1356. Location commentStart;
  1357. State state = State.normal;
  1358. bool consumeNewlines = false;
  1359. bool hasConsumedNewline = false;
  1360. while(true)
  1361. {
  1362. final switch(state)
  1363. {
  1364. case State.normal:
  1365.  
  1366. if(ch == '\\')
  1367. {
  1368. commentStart = location;
  1369. consumeNewlines = true;
  1370. hasConsumedNewline = false;
  1371. }
  1372.  
  1373. else if(ch == '#')
  1374. {
  1375. if(!allowComments)
  1376. return;
  1377.  
  1378. commentStart = location;
  1379. state = State.lineComment;
  1380. continue;
  1381. }
  1382.  
  1383. else if(ch == '/' || ch == '-')
  1384. {
  1385. commentStart = location;
  1386. if(lookahead(ch))
  1387. {
  1388. if(!allowComments)
  1389. return;
  1390.  
  1391. advanceChar(ErrorOnEOF.No);
  1392. state = State.lineComment;
  1393. continue;
  1394. }
  1395. else if(ch == '/' && lookahead('*'))
  1396. {
  1397. if(!allowComments)
  1398. return;
  1399.  
  1400. advanceChar(ErrorOnEOF.No);
  1401. state = State.blockComment;
  1402. continue;
  1403. }
  1404. else
  1405. return; // Done
  1406. }
  1407. else if(isAtNewline())
  1408. {
  1409. if(consumeNewlines)
  1410. hasConsumedNewline = true;
  1411. else
  1412. return; // Done
  1413. }
  1414. else if(!isWhite(ch))
  1415. {
  1416. if(consumeNewlines)
  1417. {
  1418. if(hasConsumedNewline)
  1419. return; // Done
  1420. else
  1421. error("Only whitespace can come between a line-continuation backslash and the following newline.");
  1422. }
  1423. else
  1424. return; // Done
  1425. }
  1426.  
  1427. break;
  1428.  
  1429. case State.lineComment:
  1430. if(lookahead(&isNewline))
  1431. state = State.normal;
  1432. break;
  1433.  
  1434. case State.blockComment:
  1435. if(ch == '*' && lookahead('/'))
  1436. {
  1437. advanceChar(ErrorOnEOF.No);
  1438. state = State.normal;
  1439. }
  1440. break;
  1441. }
  1442.  
  1443. advanceChar(ErrorOnEOF.No);
  1444. if(isEOF)
  1445. {
  1446. // Reached EOF
  1447.  
  1448. if(consumeNewlines && !hasConsumedNewline)
  1449. error("Missing newline after line-continuation backslash.");
  1450.  
  1451. else if(state == State.blockComment)
  1452. error(commentStart, "Unterminated block comment.");
  1453.  
  1454. else
  1455. return; // Done, reached EOF
  1456. }
  1457. }
  1458. }
  1459. }
  1460.  
  1461. version(sdlangUnittest)
  1462. {
  1463. import std.stdio;
  1464.  
  1465. private auto loc = Location("filename", 0, 0, 0);
  1466. private auto loc2 = Location("a", 1, 1, 1);
  1467.  
  1468. unittest
  1469. {
  1470. assert([Token(symbol!"EOL",loc) ] == [Token(symbol!"EOL",loc) ] );
  1471. assert([Token(symbol!"EOL",loc,Value(7),"A")] == [Token(symbol!"EOL",loc2,Value(7),"B")] );
  1472. }
  1473.  
  1474. private int numErrors = 0;
  1475. private void testLex(string source, Token[] expected, bool test_locations = false, string file=__FILE__, size_t line=__LINE__)
  1476. {
  1477. Token[] actual;
  1478. try
  1479. actual = lexSource(source, "filename");
  1480. catch(SDLangParseException e)
  1481. {
  1482. numErrors++;
  1483. stderr.writeln(file, "(", line, "): testLex failed on: ", source);
  1484. stderr.writeln(" Expected:");
  1485. stderr.writeln(" ", expected);
  1486. stderr.writeln(" Actual: SDLangParseException thrown:");
  1487. stderr.writeln(" ", e.msg);
  1488. return;
  1489. }
  1490.  
  1491. bool is_same = actual == expected;
  1492. if (is_same && test_locations) {
  1493. is_same = actual.map!(t => t.location).equal(expected.map!(t => t.location));
  1494. }
  1495.  
  1496. if(!is_same)
  1497. {
  1498. numErrors++;
  1499. stderr.writeln(file, "(", line, "): testLex failed on: ", source);
  1500. stderr.writeln(" Expected:");
  1501. stderr.writeln(" ", expected);
  1502. stderr.writeln(" Actual:");
  1503. stderr.writeln(" ", actual);
  1504.  
  1505. if(expected.length > 1 || actual.length > 1)
  1506. {
  1507. stderr.writeln(" expected.length: ", expected.length);
  1508. stderr.writeln(" actual.length: ", actual.length);
  1509.  
  1510. if(actual.length == expected.length)
  1511. foreach(i; 0..actual.length)
  1512. if(actual[i] != expected[i])
  1513. {
  1514. stderr.writeln(" Unequal at index #", i, ":");
  1515. stderr.writeln(" Expected:");
  1516. stderr.writeln(" ", expected[i]);
  1517. stderr.writeln(" Actual:");
  1518. stderr.writeln(" ", actual[i]);
  1519. }
  1520. }
  1521. }
  1522. }
  1523.  
  1524. private void testLexThrows(string file=__FILE__, size_t line=__LINE__)(string source)
  1525. {
  1526. bool hadException = false;
  1527. Token[] actual;
  1528. try
  1529. actual = lexSource(source, "filename");
  1530. catch(SDLangParseException e)
  1531. hadException = true;
  1532.  
  1533. if(!hadException)
  1534. {
  1535. numErrors++;
  1536. stderr.writeln(file, "(", line, "): testLex failed on: ", source);
  1537. stderr.writeln(" Expected SDLangParseException");
  1538. stderr.writeln(" Actual:");
  1539. stderr.writeln(" ", actual);
  1540. }
  1541. }
  1542. }
  1543.  
  1544. version(sdlangUnittest)
  1545. unittest
  1546. {
  1547. writeln("Unittesting sdlang lexer...");
  1548. stdout.flush();
  1549.  
  1550. testLex("", []);
  1551. testLex(" ", []);
  1552. testLex("\\\n", []);
  1553. testLex("/*foo*/", []);
  1554. testLex("/* multiline \n comment */", []);
  1555. testLex("/* * */", []);
  1556. testLexThrows("/* ");
  1557.  
  1558. testLex(":", [ Token(symbol!":", loc) ]);
  1559. testLex("=", [ Token(symbol!"=", loc) ]);
  1560. testLex("{", [ Token(symbol!"{", loc) ]);
  1561. testLex("}", [ Token(symbol!"}", loc) ]);
  1562. testLex(";", [ Token(symbol!"EOL",loc) ]);
  1563. testLex("\n", [ Token(symbol!"EOL",loc) ]);
  1564.  
  1565. testLex("foo", [ Token(symbol!"Ident",loc,Value(null),"foo") ]);
  1566. testLex("_foo", [ Token(symbol!"Ident",loc,Value(null),"_foo") ]);
  1567. testLex("foo.bar", [ Token(symbol!"Ident",loc,Value(null),"foo.bar") ]);
  1568. testLex("foo-bar", [ Token(symbol!"Ident",loc,Value(null),"foo-bar") ]);
  1569. testLex("foo.", [ Token(symbol!"Ident",loc,Value(null),"foo.") ]);
  1570. testLex("foo-", [ Token(symbol!"Ident",loc,Value(null),"foo-") ]);
  1571. testLexThrows(".foo");
  1572.  
  1573. testLex("foo bar", [
  1574. Token(symbol!"Ident",loc,Value(null),"foo"),
  1575. Token(symbol!"Ident",loc,Value(null),"bar"),
  1576. ]);
  1577. testLex("foo \\ \n \n bar", [
  1578. Token(symbol!"Ident",loc,Value(null),"foo"),
  1579. Token(symbol!"Ident",loc,Value(null),"bar"),
  1580. ]);
  1581. testLex("foo \\ \n \\ \n bar", [
  1582. Token(symbol!"Ident",loc,Value(null),"foo"),
  1583. Token(symbol!"Ident",loc,Value(null),"bar"),
  1584. ]);
  1585. testLexThrows("foo \\ ");
  1586. testLexThrows("foo \\ bar");
  1587. testLexThrows("foo \\ \n \\ ");
  1588. testLexThrows("foo \\ \n \\ bar");
  1589.  
  1590. testLex("foo : = { } ; \n bar \n", [
  1591. Token(symbol!"Ident",loc,Value(null),"foo"),
  1592. Token(symbol!":",loc),
  1593. Token(symbol!"=",loc),
  1594. Token(symbol!"{",loc),
  1595. Token(symbol!"}",loc),
  1596. Token(symbol!"EOL",loc),
  1597. Token(symbol!"EOL",loc),
  1598. Token(symbol!"Ident",loc,Value(null),"bar"),
  1599. Token(symbol!"EOL",loc),
  1600. ]);
  1601.  
  1602. testLexThrows("<");
  1603. testLexThrows("*");
  1604. testLexThrows(`\`);
  1605.  
  1606. // Integers
  1607. testLex( "7", [ Token(symbol!"Value",loc,Value(cast( int) 7)) ]);
  1608. testLex( "-7", [ Token(symbol!"Value",loc,Value(cast( int)-7)) ]);
  1609. testLex( "7L", [ Token(symbol!"Value",loc,Value(cast(long) 7)) ]);
  1610. testLex( "7l", [ Token(symbol!"Value",loc,Value(cast(long) 7)) ]);
  1611. testLex("-7L", [ Token(symbol!"Value",loc,Value(cast(long)-7)) ]);
  1612. testLex( "0", [ Token(symbol!"Value",loc,Value(cast( int) 0)) ]);
  1613. testLex( "-0", [ Token(symbol!"Value",loc,Value(cast( int) 0)) ]);
  1614.  
  1615. testLex("7/**/", [ Token(symbol!"Value",loc,Value(cast( int) 7)) ]);
  1616. testLex("7#", [ Token(symbol!"Value",loc,Value(cast( int) 7)) ]);
  1617.  
  1618. testLex("7 A", [
  1619. Token(symbol!"Value",loc,Value(cast(int)7)),
  1620. Token(symbol!"Ident",loc,Value( null),"A"),
  1621. ]);
  1622. testLexThrows("7A");
  1623. testLexThrows("-A");
  1624. testLexThrows(`-""`);
  1625.  
  1626. testLex("7;", [
  1627. Token(symbol!"Value",loc,Value(cast(int)7)),
  1628. Token(symbol!"EOL",loc),
  1629. ]);
  1630.  
  1631. // Floats
  1632. testLex("1.2F" , [ Token(symbol!"Value",loc,Value(cast( float)1.2)) ]);
  1633. testLex("1.2f" , [ Token(symbol!"Value",loc,Value(cast( float)1.2)) ]);
  1634. testLex("1.2" , [ Token(symbol!"Value",loc,Value(cast(double)1.2)) ]);
  1635. testLex("1.2D" , [ Token(symbol!"Value",loc,Value(cast(double)1.2)) ]);
  1636. testLex("1.2d" , [ Token(symbol!"Value",loc,Value(cast(double)1.2)) ]);
  1637. testLex("1.2BD", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]);
  1638. testLex("1.2bd", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]);
  1639. testLex("1.2Bd", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]);
  1640. testLex("1.2bD", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]);
  1641.  
  1642. testLex(".2F" , [ Token(symbol!"Value",loc,Value(cast( float)0.2)) ]);
  1643. testLex(".2" , [ Token(symbol!"Value",loc,Value(cast(double)0.2)) ]);
  1644. testLex(".2D" , [ Token(symbol!"Value",loc,Value(cast(double)0.2)) ]);
  1645. testLex(".2BD", [ Token(symbol!"Value",loc,Value(cast( real)0.2)) ]);
  1646.  
  1647. testLex("-1.2F" , [ Token(symbol!"Value",loc,Value(cast( float)-1.2)) ]);
  1648. testLex("-1.2" , [ Token(symbol!"Value",loc,Value(cast(double)-1.2)) ]);
  1649. testLex("-1.2D" , [ Token(symbol!"Value",loc,Value(cast(double)-1.2)) ]);
  1650. testLex("-1.2BD", [ Token(symbol!"Value",loc,Value(cast( real)-1.2)) ]);
  1651.  
  1652. testLex("-.2F" , [ Token(symbol!"Value",loc,Value(cast( float)-0.2)) ]);
  1653. testLex("-.2" , [ Token(symbol!"Value",loc,Value(cast(double)-0.2)) ]);
  1654. testLex("-.2D" , [ Token(symbol!"Value",loc,Value(cast(double)-0.2)) ]);
  1655. testLex("-.2BD", [ Token(symbol!"Value",loc,Value(cast( real)-0.2)) ]);
  1656.  
  1657. testLex( "0.0" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]);
  1658. testLex( "0.0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]);
  1659. testLex( "0.0BD", [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]);
  1660. testLex("-0.0" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]);
  1661. testLex("-0.0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]);
  1662. testLex("-0.0BD", [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]);
  1663. testLex( "7F" , [ Token(symbol!"Value",loc,Value(cast( float)7.0)) ]);
  1664. testLex( "7D" , [ Token(symbol!"Value",loc,Value(cast(double)7.0)) ]);
  1665. testLex( "7BD" , [ Token(symbol!"Value",loc,Value(cast( real)7.0)) ]);
  1666. testLex( "0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]);
  1667. testLex( "0D" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]);
  1668. testLex( "0BD" , [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]);
  1669. testLex("-0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]);
  1670. testLex("-0D" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]);
  1671. testLex("-0BD" , [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]);
  1672.  
  1673. testLex("1.2 F", [
  1674. Token(symbol!"Value",loc,Value(cast(double)1.2)),
  1675. Token(symbol!"Ident",loc,Value( null),"F"),
  1676. ]);
  1677. testLexThrows("1.2A");
  1678. testLexThrows("1.2B");
  1679. testLexThrows("1.2BDF");
  1680.  
  1681. testLex("1.2;", [
  1682. Token(symbol!"Value",loc,Value(cast(double)1.2)),
  1683. Token(symbol!"EOL",loc),
  1684. ]);
  1685.  
  1686. testLex("1.2F;", [
  1687. Token(symbol!"Value",loc,Value(cast(float)1.2)),
  1688. Token(symbol!"EOL",loc),
  1689. ]);
  1690.  
  1691. testLex("1.2BD;", [
  1692. Token(symbol!"Value",loc,Value(cast(real)1.2)),
  1693. Token(symbol!"EOL",loc),
  1694. ]);
  1695.  
  1696. // Booleans and null
  1697. testLex("true", [ Token(symbol!"Value",loc,Value( true)) ]);
  1698. testLex("false", [ Token(symbol!"Value",loc,Value(false)) ]);
  1699. testLex("on", [ Token(symbol!"Value",loc,Value( true)) ]);
  1700. testLex("off", [ Token(symbol!"Value",loc,Value(false)) ]);
  1701. testLex("null", [ Token(symbol!"Value",loc,Value( null)) ]);
  1702.  
  1703. testLex("TRUE", [ Token(symbol!"Ident",loc,Value(null),"TRUE") ]);
  1704. testLex("true ", [ Token(symbol!"Value",loc,Value(true)) ]);
  1705. testLex("true ", [ Token(symbol!"Value",loc,Value(true)) ]);
  1706. testLex("tru", [ Token(symbol!"Ident",loc,Value(null),"tru") ]);
  1707. testLex("truX", [ Token(symbol!"Ident",loc,Value(null),"truX") ]);
  1708. testLex("trueX", [ Token(symbol!"Ident",loc,Value(null),"trueX") ]);
  1709.  
  1710. // Raw Backtick Strings
  1711. testLex("`hello world`", [ Token(symbol!"Value",loc,Value(`hello world` )) ]);
  1712. testLex("` hello world `", [ Token(symbol!"Value",loc,Value(` hello world ` )) ]);
  1713. testLex("`hello \\t world`", [ Token(symbol!"Value",loc,Value(`hello \t world`)) ]);
  1714. testLex("`hello \\n world`", [ Token(symbol!"Value",loc,Value(`hello \n world`)) ]);
  1715. testLex("`hello \n world`", [ Token(symbol!"Value",loc,Value("hello \n world")) ]);
  1716. testLex("`hello \r\n world`", [ Token(symbol!"Value",loc,Value("hello \r\n world")) ]);
  1717. testLex("`hello \"world\"`", [ Token(symbol!"Value",loc,Value(`hello "world"` )) ]);
  1718.  
  1719. testLexThrows("`foo");
  1720. testLexThrows("`");
  1721.  
  1722. // Double-Quote Strings
  1723. testLex(`"hello world"`, [ Token(symbol!"Value",loc,Value("hello world" )) ]);
  1724. testLex(`" hello world "`, [ Token(symbol!"Value",loc,Value(" hello world " )) ]);
  1725. testLex(`"hello \t world"`, [ Token(symbol!"Value",loc,Value("hello \t world")) ]);
  1726. testLex(`"hello \n world"`, [ Token(symbol!"Value",loc,Value("hello \n world")) ]);
  1727. testLex("\"hello \\\n world\"", [ Token(symbol!"Value",loc,Value("hello world" )) ]);
  1728. testLex("\"hello \\ \n world\"", [ Token(symbol!"Value",loc,Value("hello world" )) ]);
  1729. testLex("\"hello \\ \n\n world\"", [ Token(symbol!"Value",loc,Value("hello world" )) ]);
  1730. testLex(`"\"hello world\""`, [ Token(symbol!"Value",loc,Value(`"hello world"` )) ]);
  1731.  
  1732. testLexThrows("\"hello \n world\"");
  1733. testLexThrows(`"foo`);
  1734. testLexThrows(`"`);
  1735.  
  1736. // Characters
  1737. testLex("'a'", [ Token(symbol!"Value",loc,Value(cast(dchar) 'a')) ]);
  1738. testLex("'\\n'", [ Token(symbol!"Value",loc,Value(cast(dchar)'\n')) ]);
  1739. testLex("'\\t'", [ Token(symbol!"Value",loc,Value(cast(dchar)'\t')) ]);
  1740. testLex("'\t'", [ Token(symbol!"Value",loc,Value(cast(dchar)'\t')) ]);
  1741. testLex("'\\''", [ Token(symbol!"Value",loc,Value(cast(dchar)'\'')) ]);
  1742. testLex(`'\\'`, [ Token(symbol!"Value",loc,Value(cast(dchar)'\\')) ]);
  1743.  
  1744. testLexThrows("'a");
  1745. testLexThrows("'aa'");
  1746. testLexThrows("''");
  1747. testLexThrows("'\\\n'");
  1748. testLexThrows("'\n'");
  1749. testLexThrows(`'\`);
  1750. testLexThrows(`'\'`);
  1751. testLexThrows("'");
  1752.  
  1753. // Unicode
  1754. testLex("日本語", [ Token(symbol!"Ident",loc,Value(null), "日本語") ]);
  1755. testLex("`おはよう、日本。`", [ Token(symbol!"Value",loc,Value(`おはよう、日本。`)) ]);
  1756. testLex(`"おはよう、日本。"`, [ Token(symbol!"Value",loc,Value(`おはよう、日本。`)) ]);
  1757. testLex("'月'", [ Token(symbol!"Value",loc,Value("月"d.dup[0])) ]);
  1758.  
  1759. // Base64 Binary
  1760. testLex("[aGVsbG8gd29ybGQ=]", [ Token(symbol!"Value",loc,Value(cast(ubyte[])"hello world".dup))]);
  1761. testLex("[ aGVsbG8gd29ybGQ= ]", [ Token(symbol!"Value",loc,Value(cast(ubyte[])"hello world".dup))]);
  1762. testLex("[\n aGVsbG8g \n \n d29ybGQ= \n]", [ Token(symbol!"Value",loc,Value(cast(ubyte[])"hello world".dup))]);
  1763.  
  1764. testLexThrows("[aGVsbG8gd29ybGQ]"); // Ie: Not multiple of 4
  1765. testLexThrows("[ aGVsbG8gd29ybGQ ]");
  1766.  
  1767. // Date
  1768. testLex( "1999/12/5", [ Token(symbol!"Value",loc,Value(Date( 1999, 12, 5))) ]);
  1769. testLex( "2013/2/22", [ Token(symbol!"Value",loc,Value(Date( 2013, 2, 22))) ]);
  1770. testLex("-2013/2/22", [ Token(symbol!"Value",loc,Value(Date(-2013, 2, 22))) ]);
  1771.  
  1772. testLexThrows("7/");
  1773. testLexThrows("2013/2/22a");
  1774. testLexThrows("2013/2/22f");
  1775.  
  1776. testLex("1999/12/5\n", [
  1777. Token(symbol!"Value",loc,Value(Date(1999, 12, 5))),
  1778. Token(symbol!"EOL",loc),
  1779. ]);
  1780.  
  1781. // DateTime, no timezone
  1782. testLex( "2013/2/22 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]);
  1783. testLex( "2013/2/22 \t 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]);
  1784. testLex( "2013/2/22/*foo*/07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]);
  1785. testLex( "2013/2/22 /*foo*/ \\\n /*bar*/ 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]);
  1786. testLex( "2013/2/22 /*foo*/ \\\n\n \n /*bar*/ 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]);
  1787. testLex( "2013/2/22 /*foo*/ \\\n\\\n \\\n /*bar*/ 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]);
  1788. testLex( "2013/2/22/*foo*/\\\n/*bar*/07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]);
  1789. testLex("-2013/2/22 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime(-2013, 2, 22, 7, 53, 0)))) ]);
  1790. testLex( "2013/2/22 -07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53)))) ]);
  1791. testLex("-2013/2/22 -07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime(-2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53)))) ]);
  1792. testLex( "2013/2/22 07:53:34", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34)))) ]);
  1793. testLex( "2013/2/22 07:53:34.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34), FracSec.from!"msecs"(123)))) ]);
  1794. testLex( "2013/2/22 07:53:34.12", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34), FracSec.from!"msecs"(120)))) ]);
  1795. testLex( "2013/2/22 07:53:34.1", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34), FracSec.from!"msecs"(100)))) ]);
  1796. testLex( "2013/2/22 07:53.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0), FracSec.from!"msecs"(123)))) ]);
  1797.  
  1798. testLex( "2013/2/22 34:65", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) + hours(34) + minutes(65) + seconds( 0)))) ]);
  1799. testLex( "2013/2/22 34:65:77.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) + hours(34) + minutes(65) + seconds(77), FracSec.from!"msecs"(123)))) ]);
  1800. testLex( "2013/2/22 34:65.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) + hours(34) + minutes(65) + seconds( 0), FracSec.from!"msecs"(123)))) ]);
  1801.  
  1802. testLex( "2013/2/22 -34:65", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0)))) ]);
  1803. testLex( "2013/2/22 -34:65:77.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds(77), FracSec.from!"msecs"(-123)))) ]);
  1804. testLex( "2013/2/22 -34:65.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0), FracSec.from!"msecs"(-123)))) ]);
  1805.  
  1806. testLexThrows("2013/2/22 07:53a");
  1807. testLexThrows("2013/2/22 07:53f");
  1808. testLexThrows("2013/2/22 07:53:34.123a");
  1809. testLexThrows("2013/2/22 07:53:34.123f");
  1810. testLexThrows("2013/2/22a 07:53");
  1811.  
  1812. testLex(`2013/2/22 "foo"`, [
  1813. Token(symbol!"Value",loc,Value(Date(2013, 2, 22))),
  1814. Token(symbol!"Value",loc,Value("foo")),
  1815. ]);
  1816.  
  1817. testLex("2013/2/22 07", [
  1818. Token(symbol!"Value",loc,Value(Date(2013, 2, 22))),
  1819. Token(symbol!"Value",loc,Value(cast(int)7)),
  1820. ]);
  1821.  
  1822. testLex("2013/2/22 1.2F", [
  1823. Token(symbol!"Value",loc,Value(Date(2013, 2, 22))),
  1824. Token(symbol!"Value",loc,Value(cast(float)1.2)),
  1825. ]);
  1826.  
  1827. testLex("2013/2/22 .2F", [
  1828. Token(symbol!"Value",loc,Value(Date(2013, 2, 22))),
  1829. Token(symbol!"Value",loc,Value(cast(float)0.2)),
  1830. ]);
  1831.  
  1832. testLex("2013/2/22 -1.2F", [
  1833. Token(symbol!"Value",loc,Value(Date(2013, 2, 22))),
  1834. Token(symbol!"Value",loc,Value(cast(float)-1.2)),
  1835. ]);
  1836.  
  1837. testLex("2013/2/22 -.2F", [
  1838. Token(symbol!"Value",loc,Value(Date(2013, 2, 22))),
  1839. Token(symbol!"Value",loc,Value(cast(float)-0.2)),
  1840. ]);
  1841.  
  1842. // DateTime, with known timezone
  1843. testLex( "2013/2/22 07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone( hours(0) )))) ]);
  1844. testLex("-2013/2/22 07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime(-2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone( hours(0) )))) ]);
  1845. testLex( "2013/2/22 -07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), new immutable SimpleTimeZone( hours(0) )))) ]);
  1846. testLex("-2013/2/22 -07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime(-2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), new immutable SimpleTimeZone( hours(0) )))) ]);
  1847. testLex( "2013/2/22 07:53-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]);
  1848. testLex( "2013/2/22 07:53-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]);
  1849. testLex( "2013/2/22 07:53:34-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), new immutable SimpleTimeZone( hours(0) )))) ]);
  1850. testLex( "2013/2/22 07:53:34-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]);
  1851. testLex( "2013/2/22 07:53:34-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]);
  1852. testLex( "2013/2/22 07:53:34.123-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), FracSec.from!"msecs"(123), new immutable SimpleTimeZone( hours(0) )))) ]);
  1853. testLex( "2013/2/22 07:53:34.123-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), FracSec.from!"msecs"(123), new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]);
  1854. testLex( "2013/2/22 07:53:34.123-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), FracSec.from!"msecs"(123), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]);
  1855. testLex( "2013/2/22 07:53.123-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), FracSec.from!"msecs"(123), new immutable SimpleTimeZone( hours(0) )))) ]);
  1856. testLex( "2013/2/22 07:53.123-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), FracSec.from!"msecs"(123), new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]);
  1857. testLex( "2013/2/22 07:53.123-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), FracSec.from!"msecs"(123), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]);
  1858.  
  1859. testLex( "2013/2/22 -34:65-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]);
  1860.  
  1861. // DateTime, with Java SDL's occasionally weird interpretation of some
  1862. // "not quite ISO" variations of the "GMT with offset" timezone strings.
  1863. Token testTokenSimpleTimeZone(Duration d)
  1864. {
  1865. auto dateTime = DateTime(2013, 2, 22, 7, 53, 0);
  1866. auto tz = new immutable SimpleTimeZone(d);
  1867. return Token( symbol!"Value", loc, Value(SysTime(dateTime,tz)) );
  1868. }
  1869. Token testTokenUnknownTimeZone(string tzName)
  1870. {
  1871. auto dateTime = DateTime(2013, 2, 22, 7, 53, 0);
  1872. auto frac = FracSec.from!"msecs"(0);
  1873. return Token( symbol!"Value", loc, Value(DateTimeFracUnknownZone(dateTime,frac,tzName)) );
  1874. }
  1875. testLex("2013/2/22 07:53-GMT+", [ testTokenUnknownTimeZone("GMT+") ]);
  1876. testLex("2013/2/22 07:53-GMT+:", [ testTokenUnknownTimeZone("GMT+:") ]);
  1877. testLex("2013/2/22 07:53-GMT+:3", [ testTokenUnknownTimeZone("GMT+:3") ]);
  1878. testLex("2013/2/22 07:53-GMT+:03", [ testTokenSimpleTimeZone(minutes(3)) ]);
  1879. testLex("2013/2/22 07:53-GMT+:003", [ testTokenUnknownTimeZone("GMT+:003") ]);
  1880.  
  1881. testLex("2013/2/22 07:53-GMT+4", [ testTokenSimpleTimeZone(hours(4)) ]);
  1882. testLex("2013/2/22 07:53-GMT+4:", [ testTokenUnknownTimeZone("GMT+4:") ]);
  1883. testLex("2013/2/22 07:53-GMT+4:3", [ testTokenUnknownTimeZone("GMT+4:3") ]);
  1884. testLex("2013/2/22 07:53-GMT+4:03", [ testTokenSimpleTimeZone(hours(4)+minutes(3)) ]);
  1885. testLex("2013/2/22 07:53-GMT+4:003", [ testTokenUnknownTimeZone("GMT+4:003") ]);
  1886.  
  1887. testLex("2013/2/22 07:53-GMT+04", [ testTokenSimpleTimeZone(hours(4)) ]);
  1888. testLex("2013/2/22 07:53-GMT+04:", [ testTokenUnknownTimeZone("GMT+04:") ]);
  1889. testLex("2013/2/22 07:53-GMT+04:3", [ testTokenUnknownTimeZone("GMT+04:3") ]);
  1890. testLex("2013/2/22 07:53-GMT+04:03", [ testTokenSimpleTimeZone(hours(4)+minutes(3)) ]);
  1891. testLex("2013/2/22 07:53-GMT+04:03abc", [ testTokenUnknownTimeZone("GMT+04:03abc") ]);
  1892. testLex("2013/2/22 07:53-GMT+04:003", [ testTokenUnknownTimeZone("GMT+04:003") ]);
  1893.  
  1894. testLex("2013/2/22 07:53-GMT+004", [ testTokenSimpleTimeZone(minutes(4)) ]);
  1895. testLex("2013/2/22 07:53-GMT+004:", [ testTokenUnknownTimeZone("GMT+004:") ]);
  1896. testLex("2013/2/22 07:53-GMT+004:3", [ testTokenUnknownTimeZone("GMT+004:3") ]);
  1897. testLex("2013/2/22 07:53-GMT+004:03", [ testTokenUnknownTimeZone("GMT+004:03") ]);
  1898. testLex("2013/2/22 07:53-GMT+004:003", [ testTokenUnknownTimeZone("GMT+004:003") ]);
  1899.  
  1900. testLex("2013/2/22 07:53-GMT+0004", [ testTokenSimpleTimeZone(minutes(4)) ]);
  1901. testLex("2013/2/22 07:53-GMT+0004:", [ testTokenUnknownTimeZone("GMT+0004:") ]);
  1902. testLex("2013/2/22 07:53-GMT+0004:3", [ testTokenUnknownTimeZone("GMT+0004:3") ]);
  1903. testLex("2013/2/22 07:53-GMT+0004:03", [ testTokenUnknownTimeZone("GMT+0004:03") ]);
  1904. testLex("2013/2/22 07:53-GMT+0004:003", [ testTokenUnknownTimeZone("GMT+0004:003") ]);
  1905.  
  1906. testLex("2013/2/22 07:53-GMT+00004", [ testTokenSimpleTimeZone(minutes(4)) ]);
  1907. testLex("2013/2/22 07:53-GMT+00004:", [ testTokenUnknownTimeZone("GMT+00004:") ]);
  1908. testLex("2013/2/22 07:53-GMT+00004:3", [ testTokenUnknownTimeZone("GMT+00004:3") ]);
  1909. testLex("2013/2/22 07:53-GMT+00004:03", [ testTokenUnknownTimeZone("GMT+00004:03") ]);
  1910. testLex("2013/2/22 07:53-GMT+00004:003", [ testTokenUnknownTimeZone("GMT+00004:003") ]);
  1911.  
  1912. // DateTime, with unknown timezone
  1913. testLex( "2013/2/22 07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 0), FracSec.from!"msecs"( 0), "Bogus/Foo")), "2013/2/22 07:53-Bogus/Foo") ]);
  1914. testLex("-2013/2/22 07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime(-2013, 2, 22, 7, 53, 0), FracSec.from!"msecs"( 0), "Bogus/Foo"))) ]);
  1915. testLex( "2013/2/22 -07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), FracSec.from!"msecs"( 0), "Bogus/Foo"))) ]);
  1916. testLex("-2013/2/22 -07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime(-2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), FracSec.from!"msecs"( 0), "Bogus/Foo"))) ]);
  1917. testLex( "2013/2/22 07:53:34-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 34), FracSec.from!"msecs"( 0), "Bogus/Foo"))) ]);
  1918. testLex( "2013/2/22 07:53:34.123-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 34), FracSec.from!"msecs"(123), "Bogus/Foo"))) ]);
  1919. testLex( "2013/2/22 07:53.123-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 0), FracSec.from!"msecs"(123), "Bogus/Foo"))) ]);
  1920.  
  1921. // Time Span
  1922. testLex( "12:14:42", [ Token(symbol!"Value",loc,Value( days( 0)+hours(12)+minutes(14)+seconds(42)+msecs( 0))) ]);
  1923. testLex("-12:14:42", [ Token(symbol!"Value",loc,Value(-days( 0)-hours(12)-minutes(14)-seconds(42)-msecs( 0))) ]);
  1924. testLex( "00:09:12", [ Token(symbol!"Value",loc,Value( days( 0)+hours( 0)+minutes( 9)+seconds(12)+msecs( 0))) ]);
  1925. testLex( "00:00:01.023", [ Token(symbol!"Value",loc,Value( days( 0)+hours( 0)+minutes( 0)+seconds( 1)+msecs( 23))) ]);
  1926. testLex( "23d:05:21:23.532", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs(532))) ]);
  1927. testLex( "23d:05:21:23.53", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs(530))) ]);
  1928. testLex( "23d:05:21:23.5", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs(500))) ]);
  1929. testLex("-23d:05:21:23.532", [ Token(symbol!"Value",loc,Value(-days(23)-hours( 5)-minutes(21)-seconds(23)-msecs(532))) ]);
  1930. testLex("-23d:05:21:23.5", [ Token(symbol!"Value",loc,Value(-days(23)-hours( 5)-minutes(21)-seconds(23)-msecs(500))) ]);
  1931. testLex( "23d:05:21:23", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs( 0))) ]);
  1932.  
  1933. testLexThrows("12:14:42a");
  1934. testLexThrows("23d:05:21:23.532a");
  1935. testLexThrows("23d:05:21:23.532f");
  1936.  
  1937. // Combination
  1938. testLex("foo. 7", [
  1939. Token(symbol!"Ident",loc,Value( null),"foo."),
  1940. Token(symbol!"Value",loc,Value(cast(int)7))
  1941. ]);
  1942.  
  1943. testLex(`
  1944. namespace:person "foo" "bar" 1 23L name.first="ひとみ" name.last="Smith" {
  1945. namespace:age 37; namespace:favorite_color "blue" // comment
  1946. somedate 2013/2/22 07:53 -- comment
  1947.  
  1948. inventory /* comment */ {
  1949. socks
  1950. }
  1951. }
  1952. `,
  1953. [
  1954. Token(symbol!"EOL",loc,Value(null),"\n"),
  1955.  
  1956. Token(symbol!"Ident", loc, Value( null ), "namespace"),
  1957. Token(symbol!":", loc, Value( null ), ":"),
  1958. Token(symbol!"Ident", loc, Value( null ), "person"),
  1959. Token(symbol!"Value", loc, Value( "foo" ), `"foo"`),
  1960. Token(symbol!"Value", loc, Value( "bar" ), `"bar"`),
  1961. Token(symbol!"Value", loc, Value( cast( int) 1 ), "1"),
  1962. Token(symbol!"Value", loc, Value( cast(long)23 ), "23L"),
  1963. Token(symbol!"Ident", loc, Value( null ), "name.first"),
  1964. Token(symbol!"=", loc, Value( null ), "="),
  1965. Token(symbol!"Value", loc, Value( "ひとみ" ), `"ひとみ"`),
  1966. Token(symbol!"Ident", loc, Value( null ), "name.last"),
  1967. Token(symbol!"=", loc, Value( null ), "="),
  1968. Token(symbol!"Value", loc, Value( "Smith" ), `"Smith"`),
  1969. Token(symbol!"{", loc, Value( null ), "{"),
  1970. Token(symbol!"EOL", loc, Value( null ), "\n"),
  1971.  
  1972. Token(symbol!"Ident", loc, Value( null ), "namespace"),
  1973. Token(symbol!":", loc, Value( null ), ":"),
  1974. Token(symbol!"Ident", loc, Value( null ), "age"),
  1975. Token(symbol!"Value", loc, Value( cast(int)37 ), "37"),
  1976. Token(symbol!"EOL", loc, Value( null ), ";"),
  1977. Token(symbol!"Ident", loc, Value( null ), "namespace"),
  1978. Token(symbol!":", loc, Value( null ), ":"),
  1979. Token(symbol!"Ident", loc, Value( null ), "favorite_color"),
  1980. Token(symbol!"Value", loc, Value( "blue" ), `"blue"`),
  1981. Token(symbol!"EOL", loc, Value( null ), "\n"),
  1982.  
  1983. Token(symbol!"Ident", loc, Value( null ), "somedate"),
  1984. Token(symbol!"Value", loc, Value( DateTimeFrac(DateTime(2013, 2, 22, 7, 53, 0)) ), "2013/2/22 07:53"),
  1985. Token(symbol!"EOL", loc, Value( null ), "\n"),
  1986. Token(symbol!"EOL", loc, Value( null ), "\n"),
  1987.  
  1988. Token(symbol!"Ident", loc, Value(null), "inventory"),
  1989. Token(symbol!"{", loc, Value(null), "{"),
  1990. Token(symbol!"EOL", loc, Value(null), "\n"),
  1991.  
  1992. Token(symbol!"Ident", loc, Value(null), "socks"),
  1993. Token(symbol!"EOL", loc, Value(null), "\n"),
  1994.  
  1995. Token(symbol!"}", loc, Value(null), "}"),
  1996. Token(symbol!"EOL", loc, Value(null), "\n"),
  1997.  
  1998. Token(symbol!"}", loc, Value(null), "}"),
  1999. Token(symbol!"EOL", loc, Value(null), "\n"),
  2000. ]);
  2001.  
  2002. if(numErrors > 0)
  2003. stderr.writeln(numErrors, " failed test(s)");
  2004. }
  2005.  
  2006. version(sdlangUnittest)
  2007. unittest
  2008. {
  2009. writeln("lexer: Regression test issue #8...");
  2010. stdout.flush();
  2011.  
  2012. testLex(`"\n \n"`, [ Token(symbol!"Value",loc,Value("\n \n"),`"\n \n"`) ]);
  2013. testLex(`"\t\t"`, [ Token(symbol!"Value",loc,Value("\t\t"),`"\t\t"`) ]);
  2014. testLex(`"\n\n"`, [ Token(symbol!"Value",loc,Value("\n\n"),`"\n\n"`) ]);
  2015. }
  2016.  
  2017. version(sdlangUnittest)
  2018. unittest
  2019. {
  2020. writeln("lexer: Regression test issue #11...");
  2021. stdout.flush();
  2022.  
  2023. void test(string input)
  2024. {
  2025. testLex(
  2026. input,
  2027. [
  2028. Token(symbol!"EOL", loc, Value(null), "\n"),
  2029. Token(symbol!"Ident",loc,Value(null), "a")
  2030. ]
  2031. );
  2032. }
  2033.  
  2034. test("//X\na");
  2035. test("//\na");
  2036. test("--\na");
  2037. test("#\na");
  2038. }
  2039.  
  2040. version(sdlangUnittest)
  2041. unittest
  2042. {
  2043. writeln("lexer: Regression test issue #28...");
  2044. stdout.flush();
  2045.  
  2046. enum offset = 1; // workaround for an of-by-one error for line numbers
  2047. testLex("test", [
  2048. Token(symbol!"Ident", Location("filename", 0, 0, 0), Value(null), "test")
  2049. ], true);
  2050. testLex("\ntest", [
  2051. Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\n"),
  2052. Token(symbol!"Ident", Location("filename", 1, 0, 1), Value(null), "test")
  2053. ], true);
  2054. testLex("\rtest", [
  2055. Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r"),
  2056. Token(symbol!"Ident", Location("filename", 1, 0, 1), Value(null), "test")
  2057. ], true);
  2058. testLex("\r\ntest", [
  2059. Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r\n"),
  2060. Token(symbol!"Ident", Location("filename", 1, 0, 2), Value(null), "test")
  2061. ], true);
  2062. testLex("\r\n\ntest", [
  2063. Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r\n"),
  2064. Token(symbol!"EOL", Location("filename", 1, 0, 2), Value(null), "\n"),
  2065. Token(symbol!"Ident", Location("filename", 2, 0, 3), Value(null), "test")
  2066. ], true);
  2067. testLex("\r\r\ntest", [
  2068. Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r"),
  2069. Token(symbol!"EOL", Location("filename", 1, 0, 1), Value(null), "\r\n"),
  2070. Token(symbol!"Ident", Location("filename", 2, 0, 3), Value(null), "test")
  2071. ], true);
  2072. }