diff --git a/source/dub/internal/utils.d b/source/dub/internal/utils.d index d0026c9..9ff0c43 100644 --- a/source/dub/internal/utils.d +++ b/source/dub/internal/utils.d @@ -609,34 +609,102 @@ /** * Search for module keyword in D Code + * A primitive parser to skip comments and whitespace to get + * the module's name from the module declaration. */ string getModuleNameFromContent(string content) { - import std.regex; - import std.string; + import std.ascii: isAlpha, isAlphaNum, isWhite; + import std.algorithm: among; + import core.exception: RangeError; - content = content.strip; - if (!content.length) return null; + enum keyword = "module"; - static bool regex_initialized = false; - static Regex!char comments_pattern, module_pattern; + size_t i = 0; + size_t startIndex = 0, endIndex = 0; + auto foundKeyword = false; - if (!regex_initialized) { - comments_pattern = regex(`//[^\r\n]*\r?\n?|/\*.*?\*/|/\+.*\+/`, "gs"); - module_pattern = regex(`module\s+([\w\.]+)\s*;`, "g"); - regex_initialized = true; + auto ch() { + return content[i]; } - content = replaceAll(content, comments_pattern, " "); - auto result = matchFirst(content, module_pattern); + static bool isIdentChar(in char c) { + return !isWhite(c) && c != '/' && c != ';'; + } - if (!result.empty) return result[1]; + try { + while(i < content.length) { + if(ch == keyword[0] && content[i .. i + keyword.length] == keyword) { + // -1 because the end of the loop will advance by 1 + i += keyword.length - 1; + foundKeyword = true; + } + else if(ch == '/') { + ++i; + // line comment? + if(ch == '/') { + while(ch != '\n') + ++i; + } + // block comment? + else if(ch == '*') { + ++i; + while(ch != '*' || content[i + 1] != '/') + ++i; + ++i; // skip over closing '/' + } + // nested comment? + else if(ch == '+') { + ++i; - return null; + size_t level = 1; + + while(level > 0) { + if(ch == '/') { + ++i; + if(ch == '+') { + ++i; + ++level; + } + } + if(ch == '+') { + ++i; + if(ch == '/') { + --level; + } else continue; + } + ++i; + } + } + } + else if(isIdentChar(ch) && foundKeyword) { + if(startIndex == 0) + startIndex = i; + ++i; // skip the first char of the name + while(isIdentChar(ch)) { + ++i; + } + // when we get here, either we're at the end of the module's identifier, + // or there are comments afterwards + if(endIndex == 0) { + endIndex = i; + } + if(!isIdentChar(ch)) + return content[startIndex .. endIndex]; + else continue; + } else if(!isIdentChar(ch) && foundKeyword && startIndex != 0) { + return content[startIndex .. endIndex]; + } + ++i; + } + return ""; + } catch(RangeError) { + return ""; + } } unittest { assert(getModuleNameFromContent("") == ""); - assert(getModuleNameFromContent("module myPackage.myModule;") == "myPackage.myModule"); + assert(getModuleNameFromContent("module myPackage.myModule;") == "myPackage.myModule", getModuleNameFromContent("module myPackage.myModule;")); assert(getModuleNameFromContent("module \t\n myPackage.myModule \t\r\n;") == "myPackage.myModule"); assert(getModuleNameFromContent("// foo\nmodule bar;") == "bar"); assert(getModuleNameFromContent("/*\nfoo\n*/\nmodule bar;") == "bar"); @@ -648,11 +716,20 @@ assert(getModuleNameFromContent("/+ module foo; +/\nmodule bar;") == "bar"); assert(getModuleNameFromContent("/+ /+ module foo; +/ +/\nmodule bar;") == "bar"); assert(getModuleNameFromContent("// module foo;\nmodule bar; // module foo;") == "bar"); + assert(getModuleNameFromContent("// module foo;\nmodule// module foo;\nbar//module foo;\n;// module foo;") == "bar"); + assert(getModuleNameFromContent("/* module foo; */\nmodule/*module foo;*/bar/*module foo;*/;") == "bar", getModuleNameFromContent("/* module foo; */\nmodule/*module foo;*/bar/*module foo;*/;")); assert(getModuleNameFromContent("/+ /+ module foo; +/ module foo; +/ module bar;") == "bar"); - //assert(getModuleNameFromContent("/+ /+ module foo; +/ module foo; +/ module bar/++/;") == "bar"); // nested comments require a context-free parser! + assert(getModuleNameFromContent("/+ /+ module foo; +/ module foo; +/ module bar/++/;") == "bar"); assert(getModuleNameFromContent("/*\nmodule sometest;\n*/\n\nmodule fakemath;\n") == "fakemath"); + assert(getModuleNameFromContent("module foo_bar;") == "foo_bar"); + assert(getModuleNameFromContent("module _foo_bar;") == "_foo_bar"); + assert(getModuleNameFromContent("/++ ++/\nmodule foo;") == "foo"); + assert(getModuleNameFromContent("module pokémon;") == "pokémon"); + assert(getModuleNameFromContent("module éclair;") == "éclair"); + assert(getModuleNameFromContent("/** module foo*/ module bar;") == "bar"); + assert(getModuleNameFromContent("/* / module foo*/ module bar;") == "bar"); } /**