diff --git a/build-files.txt b/build-files.txt index 59a0b51..893d9c8 100644 --- a/build-files.txt +++ b/build-files.txt @@ -1,9 +1,4 @@ source/app.d -source/configy/Attributes.d -source/configy/Exceptions.d -source/configy/FieldRef.d -source/configy/Read.d -source/configy/Utils.d source/dub/commandline.d source/dub/compilers/buildsettings.d source/dub/compilers/compiler.d @@ -22,6 +17,34 @@ source/dub/generators/targetdescription.d source/dub/generators/visuald.d source/dub/init.d +source/dub/internal/configy/Attributes.d +source/dub/internal/configy/Exceptions.d +source/dub/internal/configy/FieldRef.d +source/dub/internal/configy/Read.d +source/dub/internal/configy/Utils.d +source/dub/internal/dyaml/composer.d +source/dub/internal/dyaml/constructor.d +source/dub/internal/dyaml/dumper.d +source/dub/internal/dyaml/emitter.d +source/dub/internal/dyaml/encoding.d +source/dub/internal/dyaml/escapes.d +source/dub/internal/dyaml/event.d +source/dub/internal/dyaml/exception.d +source/dub/internal/dyaml/linebreak.d +source/dub/internal/dyaml/loader.d +source/dub/internal/dyaml/node.d +source/dub/internal/dyaml/package.d +source/dub/internal/dyaml/parser.d +source/dub/internal/dyaml/queue.d +source/dub/internal/dyaml/reader.d +source/dub/internal/dyaml/representer.d +source/dub/internal/dyaml/resolver.d +source/dub/internal/dyaml/scanner.d +source/dub/internal/dyaml/serializer.d +source/dub/internal/dyaml/stdsumtype.d +source/dub/internal/dyaml/style.d +source/dub/internal/dyaml/tagdirective.d +source/dub/internal/dyaml/token.d source/dub/internal/git.d source/dub/internal/libInputVisitor.d source/dub/internal/sdlang/ast.d @@ -32,6 +55,7 @@ source/dub/internal/sdlang/symbol.d source/dub/internal/sdlang/token.d source/dub/internal/sdlang/util.d +source/dub/internal/tinyendian.d source/dub/internal/undead/xml.d source/dub/internal/utils.d source/dub/internal/vibecompat/core/file.d @@ -63,27 +87,3 @@ source/dub/recipe/sdl.d source/dub/semver.d source/dub/version_.d -source/dyaml/composer.d -source/dyaml/constructor.d -source/dyaml/dumper.d -source/dyaml/emitter.d -source/dyaml/encoding.d -source/dyaml/escapes.d -source/dyaml/event.d -source/dyaml/exception.d -source/dyaml/linebreak.d -source/dyaml/loader.d -source/dyaml/node.d -source/dyaml/package.d -source/dyaml/parser.d -source/dyaml/queue.d -source/dyaml/reader.d -source/dyaml/representer.d -source/dyaml/resolver.d -source/dyaml/scanner.d -source/dyaml/serializer.d -source/dyaml/stdsumtype.d -source/dyaml/style.d -source/dyaml/tagdirective.d -source/dyaml/token.d -source/tinyendian.d diff --git a/source/configy/Attributes.d b/source/configy/Attributes.d deleted file mode 100644 index c8f090f..0000000 --- a/source/configy/Attributes.d +++ /dev/null @@ -1,315 +0,0 @@ -/******************************************************************************* - - Define UDAs that can be applied to a configuration struct - - This module is stand alone (a leaf module) to allow importing the UDAs - without importing the whole configuration parsing code. - - Copyright: - Copyright (c) 2019-2022 BOSAGORA Foundation - All rights reserved. - - License: - MIT License. See LICENSE for details. - -*******************************************************************************/ - -module configy.Attributes; - -import std.traits; - -/******************************************************************************* - - An optional parameter with an initial value of `T.init` - - The config parser automatically recognize non-default initializer, - so that the following: - ``` - public struct Config - { - public string greeting = "Welcome home"; - } - ``` - Will not error out if `greeting` is not defined in the config file. - However, this relies on the initializer of the field (`greeting`) being - different from the type initializer (`string.init` is `null`). - In some cases, the default value is also the desired initializer, e.g.: - ``` - public struct Config - { - /// Maximum number of connections. 0 means unlimited. - public uint connections_limit = 0; - } - ``` - In this case, one can add `@Optional` to the field to inform the parser. - -*******************************************************************************/ - -public struct Optional {} - -/******************************************************************************* - - Inform the config filler that this sequence is to be read as a mapping - - On some occasions, one might want to read a mapping as an array. - One reason to do so may be to provide a better experience to the user, - e.g. having to type: - ``` - interfaces: - eth0: - ip: "192.168.0.1" - private: true - wlan0: - ip: "1.2.3.4" - ``` - Instead of the slightly more verbose: - ``` - interfaces: - - name: eth0 - ip: "192.168.0.1" - private: true - - name: wlan0 - ip: "1.2.3.4" - ``` - - The former would require to be expressed as an associative arrays. - However, one major drawback of associative arrays is that they can't have - an initializer, which makes them cumbersome to use in the context of the - config filler. To remediate this issue, one may use `@Key("name")` - on a field (here, `interfaces`) so that the mapping is flattened - to an array. If `name` is `null`, the key will be discarded. - -*******************************************************************************/ - -public struct Key -{ - /// - public string name; -} - -/******************************************************************************* - - Look up the provided name in the YAML node, instead of the field name. - - By default, the config filler will look up the field name of a mapping in - the YAML node. If this is not desired, an explicit `Name` attribute can - be given. This is especially useful for names which are keyword. - - ``` - public struct Config - { - public @Name("delete") bool remove; - } - ``` - -*******************************************************************************/ - -public struct Name -{ - /// - public string name; - - /// - public bool startsWith; -} - -/// Short hand syntax -public Name StartsWith(string name) @safe pure nothrow @nogc -{ - return Name(name, true); -} - -/******************************************************************************* - - A field which carries informations about whether it was set or not - - Some configurations may need to know which fields were set explicitly while - keeping defaults. An example of this is a `struct` where at least one field - needs to be set, such as the following: - ``` - public struct ProtoDuration - { - public @Optional long weeks; - public @Optional long days; - public @Optional long hours; - public @Optional long minutes; - public long seconds = 42; - public @Optional long msecs; - public @Optional long usecs; - public @Optional long hnsecs; - public @Optional long nsecs; - } - ``` - In this case, it would be impossible to know if any field was explicitly - provided. Hence, the struct should be written as: - ``` - public struct ProtoDuration - { - public SetInfo!long weeks; - public SetInfo!long days; - public SetInfo!long hours; - public SetInfo!long minutes; - public SetInfo!long seconds = 42; - public SetInfo!long msecs; - public SetInfo!long usecs; - public SetInfo!long hnsecs; - public SetInfo!long nsecs; - } - ``` - Note that `SetInfo` implies `Optional`, and supports default values. - -*******************************************************************************/ - -public struct SetInfo (T) -{ - /*************************************************************************** - - Allow initialization as a field - - This sets the field as having been set, so that: - ``` - struct Config { SetInfo!Duration timeout; } - - Config myConf = { timeout: 10.minutes } - ``` - Will behave as if set explicitly. If this behavior is not wanted, - pass `false` as second argument: - ``` - Config myConf = { timeout: SetInfo!Duration(10.minutes, false) } - ``` - - ***************************************************************************/ - - public this (T initVal, bool isSet = true) @safe pure nothrow @nogc - { - this.value = initVal; - this.set = isSet; - } - - /// Underlying data - public T value; - - /// - alias value this; - - /// Whether this field was set or not - public bool set; -} - -/******************************************************************************* - - Provides a means to convert a field from a `Node` to a complex type - - When filling the config, it might be useful to store types which are - not only simple `string` and integer, such as `URL`, `BigInt`, or any other - library type not directly under the user's control. - - To allow reading those values from the config file, a `Converter` may - be used. The converter will tell the `ConfigFiller` how to convert from - `Node` to the desired type `T`. - - If the type is under the user's control, one can also add a constructor - accepting a single string, or define the `fromString` method, both of which - are tried if no `Converter` is found. - - For types not under the user's control, there might be different ways - to parse the same type within the same struct, or neither the ctor nor - the `fromString` method may be defined under that name. - The exmaple below uses `parse` in place of `fromString`, for example. - - ``` - /// Complex structure representing the age of a person based on its birthday - public struct Age - { - /// - public uint birth_year; - /// - public uint birth_month; - /// - public uint birth_day; - - /// Note that this will be picked up automatically if named `fromString` - /// but this struct might be a library type. - public static Age parse (string value) { /+ Magic +/ } - } - - public struct Person - { - /// - @Converter!Age((Node value) => Age.parse(value.as!string)) - public Age age; - } - ``` - - Note that some fields may also be of multiple YAML types, such as DUB's - `dependencies`, which is either a simple string (`"vibe-d": "~>1.0 "`), - or an in its complex form (`"vibe-d": { "version": "~>1.0" }`). - For those use cases, a `Converter` is the best approach. - - To avoid repeating the field type, a convenience function is provided: - ``` - public struct Age - { - public uint birth_year; - public uint birth_month; - public uint birth_day; - public static Age parse (string value) { /+ Magic +/ } - } - - public struct Person - { - /// Here `converter` will deduct the type from the delegate argument, - /// and return an instance of `Converter`. Mind the case. - @converter((Node value) => Age.parse(value.as!string)) - public Age age; - } - ``` - -*******************************************************************************/ - -public struct Converter (T) -{ - /// - public alias ConverterFunc = T function (scope ConfigParser!T context); - - /// - public ConverterFunc converter; -} - -/// Ditto -public auto converter (FT) (FT func) -{ - static assert(isFunctionPointer!FT, - "Error: Argument to `converter` should be a function pointer, not: " - ~ FT.stringof); - - alias RType = ReturnType!FT; - static assert(!is(RType == void), - "Error: Converter needs to be of the return type of the field, not `void`"); - return Converter!RType(func); -} - -public interface ConfigParser (T) -{ - import dyaml.node; - import configy.FieldRef : StructFieldRef; - import configy.Read : Context, parseField; - - /// Returns: the node being processed - public inout(Node) node () inout @safe pure nothrow @nogc; - - /// Returns: current location we are parsing - public string path () const @safe pure nothrow @nogc; - - /// - public final auto parseAs (OtherType) - (auto ref OtherType defaultValue = OtherType.init) - { - alias TypeFieldRef = StructFieldRef!OtherType; - return this.node().parseField!(TypeFieldRef)( - this.path(), defaultValue, this.context()); - } - - /// Internal use only - protected const(Context) context () const @safe pure nothrow @nogc; -} diff --git a/source/configy/DubTest.d b/source/configy/DubTest.d deleted file mode 100644 index 8a4f6f8..0000000 --- a/source/configy/DubTest.d +++ /dev/null @@ -1,83 +0,0 @@ -/******************************************************************************* - - Contains tests for dub-specific extensions - - Whenever integrating changes from upstream configy, most conflicts tend - to be on `configy.Test`, and as the structure is very similar, - the default diff algorithms are useless. Having a separate module simplify - this greatly. - - License: - MIT License. See LICENSE for details. - -*******************************************************************************/ - -module configy.DubTest; - -import configy.Attributes; -import configy.Read; - -import dyaml.node; - -/// Test name pattern matching -unittest -{ - static struct Config - { - @StartsWith("names") - string[][string] names_; - } - - auto c = parseConfigString!Config("names-x86:\n - John\n - Luca\nnames:\n - Marie", "/dev/null"); - assert(c.names_[null] == [ "Marie" ]); - assert(c.names_["x86"] == [ "John", "Luca" ]); -} - -/// Test our `fromYAML` extension -unittest -{ - static struct PackageDef - { - string name; - @Optional string target; - int build = 42; - } - - static struct Package - { - string path; - PackageDef def; - - public static Package fromYAML (scope ConfigParser!Package parser) - { - if (parser.node.nodeID == NodeID.mapping) - return Package(null, parser.parseAs!PackageDef); - else - return Package(parser.parseAs!string); - } - } - - static struct Config - { - string name; - Package[] deps; - } - - auto c = parseConfigString!Config( -` -name: myPkg -deps: - - /foo/bar - - name: foo - target: bar - build: 24 - - name: fur - - /one/last/path -`, "/dev/null"); - assert(c.name == "myPkg"); - assert(c.deps.length == 4); - assert(c.deps[0] == Package("/foo/bar")); - assert(c.deps[1] == Package(null, PackageDef("foo", "bar", 24))); - assert(c.deps[2] == Package(null, PackageDef("fur", null, 42))); - assert(c.deps[3] == Package("/one/last/path")); -} diff --git a/source/configy/Exceptions.d b/source/configy/Exceptions.d deleted file mode 100644 index 5e1f98c..0000000 --- a/source/configy/Exceptions.d +++ /dev/null @@ -1,382 +0,0 @@ -/******************************************************************************* - - Definitions for Exceptions used by the config module. - - Copyright: - Copyright (c) 2019-2022 BOSAGORA Foundation - All rights reserved. - - License: - MIT License. See LICENSE for details. - -*******************************************************************************/ - -module configy.Exceptions; - -import configy.Utils; - -import dyaml.exception; -import dyaml.node; - -import std.algorithm : filter, map; -import std.format; -import std.string : soundexer; - -/******************************************************************************* - - Base exception type thrown by the config parser - - Whenever dealing with Exceptions thrown by the config parser, catching - this type will allow to optionally format with colors: - ``` - try - { - auto conf = parseConfigFile!Config(cmdln); - // ... - } - catch (ConfigException exc) - { - writeln("Parsing the config file failed:"); - writelfln(isOutputATTY() ? "%S" : "%s", exc); - } - ``` - -*******************************************************************************/ - -public abstract class ConfigException : Exception -{ - /// Position at which the error happened - public Mark yamlPosition; - - /// The path at which the key resides - public string path; - - /// If non-empty, the key under 'path' which triggered the error - /// If empty, the key should be considered part of 'path' - public string key; - - /// Constructor - public this (string path, string key, Mark position, - string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow @nogc - { - super(null, file, line); - this.path = path; - this.key = key; - this.yamlPosition = position; - } - - /// Ditto - public this (string path, Mark position, - string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow @nogc - { - this(path, null, position, file, line); - } - - /*************************************************************************** - - Overrides `Throwable.toString` and its sink overload - - It is quite likely that errors from this module may be printed directly - to the end user, who might not have technical knowledge. - - This format the error in a nicer format (e.g. with colors), - and will additionally provide a stack-trace if the `ConfigFillerDebug` - `debug` version was provided. - - Format_chars: - The default format char ("%s") will print a regular message. - If an uppercase 's' is used ("%S"), colors will be used. - - Params: - sink = The sink to send the piece-meal string to - spec = See https://dlang.org/phobos/std_format_spec.html - - ***************************************************************************/ - - public override string toString () scope - { - // Need to be overriden otherwise the overload is shadowed - return super.toString(); - } - - /// Ditto - public override void toString (scope void delegate(in char[]) sink) const scope - @trusted - { - // This breaks the type system, as it blindly trusts a delegate - // However, the type system lacks a way to sanely build an utility - // which accepts a delegate with different qualifiers, so this is the - // less evil approach. - this.toString(cast(SinkType) sink, FormatSpec!char("%s")); - } - - /// Ditto - public void toString (scope SinkType sink, in FormatSpec!char spec) - const scope @safe - { - import core.internal.string : unsignedToTempString; - - const useColors = spec.spec == 'S'; - char[20] buffer = void; - - if (useColors) sink(Yellow); - sink(this.yamlPosition.name); - if (useColors) sink(Reset); - - sink("("); - if (useColors) sink(Cyan); - sink(unsignedToTempString(this.yamlPosition.line, buffer)); - if (useColors) sink(Reset); - sink(":"); - if (useColors) sink(Cyan); - sink(unsignedToTempString(this.yamlPosition.column, buffer)); - if (useColors) sink(Reset); - sink("): "); - - if (this.path.length || this.key.length) - { - if (useColors) sink(Yellow); - sink(this.path); - if (this.path.length && this.key.length) - sink("."); - sink(this.key); - if (useColors) sink(Reset); - sink(": "); - } - - this.formatMessage(sink, spec); - - debug (ConfigFillerDebug) - { - sink("\n\tError originated from: "); - sink(this.file); - sink("("); - sink(unsignedToTempString(line, buffer)); - sink(")"); - - if (!this.info) - return; - - () @trusted nothrow - { - try - { - sink("\n----------------"); - foreach (t; info) - { - sink("\n"); sink(t); - } - } - // ignore more errors - catch (Throwable) {} - }(); - } - } - - /// Hook called by `toString` to simplify coloring - protected abstract void formatMessage ( - scope SinkType sink, in FormatSpec!char spec) - const scope @safe; -} - -/// A configuration exception that is only a single message -package final class ConfigExceptionImpl : ConfigException -{ - public this (string msg, Mark position, - string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow @nogc - { - this(msg, null, null, position, file, line); - } - - public this (string msg, string path, string key, Mark position, - string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow @nogc - { - super(path, key, position, file, line); - this.msg = msg; - } - - protected override void formatMessage ( - scope SinkType sink, in FormatSpec!char spec) - const scope @safe - { - sink(this.msg); - } -} - -/// Exception thrown when the type of the YAML node does not match the D type -package final class TypeConfigException : ConfigException -{ - /// The actual (in the YAML document) type of the node - public string actual; - - /// The expected (as specified in the D type) type - public string expected; - - /// Constructor - public this (Node node, string expected, string path, string key = null, - string file = __FILE__, size_t line = __LINE__) - @safe nothrow - { - this(node.nodeTypeString(), expected, path, key, node.startMark(), - file, line); - } - - /// Ditto - public this (string actual, string expected, string path, string key, - Mark position, string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow @nogc - { - super(path, key, position, file, line); - this.actual = actual; - this.expected = expected; - } - - /// Format the message with or without colors - protected override void formatMessage ( - scope SinkType sink, in FormatSpec!char spec) - const scope @safe - { - const useColors = spec.spec == 'S'; - - const fmt = "Expected to be of type %s, but is a %s"; - - if (useColors) - formattedWrite(sink, fmt, this.expected.paint(Green), this.actual.paint(Red)); - else - formattedWrite(sink, fmt, this.expected, this.actual); - } -} - -/// Similar to a `TypeConfigException`, but specific to `Duration` -package final class DurationTypeConfigException : ConfigException -{ - /// The list of valid fields - public immutable string[] DurationSuffixes = [ - "weeks", "days", "hours", "minutes", "seconds", - "msecs", "usecs", "hnsecs", "nsecs", - ]; - - /// Actual type of the node - public string actual; - - /// Constructor - public this (Node node, string path, string file = __FILE__, size_t line = __LINE__) - @safe nothrow - { - super(path, null, node.startMark(), file, line); - this.actual = node.nodeTypeString(); - } - - /// Format the message with or without colors - protected override void formatMessage ( - scope SinkType sink, in FormatSpec!char spec) - const scope @safe - { - const useColors = spec.spec == 'S'; - - const fmt = "Field is of type %s, but expected a mapping with at least one of: %-(%s, %)"; - if (useColors) - formattedWrite(sink, fmt, this.actual.paint(Red), - this.DurationSuffixes.map!(s => s.paint(Green))); - else - formattedWrite(sink, fmt, this.actual, this.DurationSuffixes); - } -} - -/// Exception thrown when an unknown key is found in strict mode -public class UnknownKeyConfigException : ConfigException -{ - /// The list of valid field names - public immutable string[] fieldNames; - - /// Constructor - public this (string path, string key, immutable string[] fieldNames, - Mark position, string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow @nogc - { - super(path, key, position, file, line); - this.fieldNames = fieldNames; - } - - /// Format the message with or without colors - protected override void formatMessage ( - scope SinkType sink, in FormatSpec!char spec) - const scope @safe - { - const useColors = spec.spec == 'S'; - - // Try to find a close match, as the error is likely a typo - // This is especially important when the config file has a large - // number of fields, where the message is otherwise near-useless. - const origSound = soundexer(this.key); - auto matches = this.fieldNames.filter!(f => f.soundexer == origSound); - const hasMatch = !matches.save.empty; - - if (hasMatch) - { - const fmt = "Key is not a valid member of this section. Did you mean: %-(%s, %)"; - if (useColors) - formattedWrite(sink, fmt, matches.map!(f => f.paint(Green))); - else - formattedWrite(sink, fmt, matches); - } - else - { - // No match, just print everything - const fmt = "Key is not a valid member of this section. There are %s valid keys: %-(%s, %)"; - if (useColors) - formattedWrite(sink, fmt, this.fieldNames.length.paint(Yellow), - this.fieldNames.map!(f => f.paint(Green))); - else - formattedWrite(sink, fmt, this.fieldNames.length, this.fieldNames); - } - } -} - -/// Exception thrown when a required key is missing -public class MissingKeyException : ConfigException -{ - /// Constructor - public this (string path, string key, Mark position, - string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow @nogc - { - super(path, key, position, file, line); - } - - /// Format the message with or without colors - protected override void formatMessage ( - scope SinkType sink, in FormatSpec!char spec) - const scope @safe - { - sink("Required key was not found in configuration or command line arguments"); - } -} - -/// Wrap an user-thrown Exception that happened in a Converter/ctor/fromString -public class ConstructionException : ConfigException -{ - /// Constructor - public this (Exception next, string path, Mark position, - string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow @nogc - { - super(path, position, file, line); - this.next = next; - } - - /// Format the message with or without colors - protected override void formatMessage ( - scope SinkType sink, in FormatSpec!char spec) - const scope @trusted - { - if (auto dyn = cast(ConfigException) this.next) - dyn.toString(sink, spec); - else - sink(this.next.message); - } -} diff --git a/source/configy/FieldRef.d b/source/configy/FieldRef.d deleted file mode 100644 index 96f91b1..0000000 --- a/source/configy/FieldRef.d +++ /dev/null @@ -1,201 +0,0 @@ -/******************************************************************************* - - Implement a template to keep track of a field references - - Passing field references by `alias` template parameter creates many problem, - and is extremely cumbersome to work with. Instead, we pass an instance of - a `FieldRef` around, which also contains structured information. - - Copyright: - Copyright (c) 2019-2022 BOSAGORA Foundation - All rights reserved. - - License: - MIT License. See LICENSE for details. - -*******************************************************************************/ - -module configy.FieldRef; - -// Renamed imports as the names exposed by `FieldRef` shadow the imported ones. -import configy.Attributes : CAName = Name, CAOptional = Optional, SetInfo; - -import std.meta; -import std.traits; - -/******************************************************************************* - - A reference to a field in a `struct` - - The compiler sometimes rejects passing fields by `alias`, or complains about - missing `this` (meaning it tries to evaluate the value). Sometimes, it also - discards the UDAs. - - To prevent this from happening, we always pass around a `FieldRef`, - which wraps the parent struct type (`T`), the name of the field - as `FieldName`, and other informations. - - To avoid any issue, eponymous usage is also avoided, hence the reference - needs to be accessed using `Ref`. - -*******************************************************************************/ - -package template FieldRef (alias T, string name, bool forceOptional = false) -{ - /// The reference to the field - public alias Ref = __traits(getMember, T, name); - - /// Type of the field - public alias Type = typeof(Ref); - - /// The name of the field in the struct itself - public alias FieldName = name; - - /// The name used in the configuration field (taking `@Name` into account) - static if (hasUDA!(Ref, CAName)) - { - static assert (getUDAs!(Ref, CAName).length == 1, - "Field `" ~ fullyQualifiedName!(Ref) ~ - "` cannot have more than one `Name` attribute"); - - public immutable Name = getUDAs!(Ref, CAName)[0].name; - - public immutable Pattern = getUDAs!(Ref, CAName)[0].startsWith; - } - else - { - public immutable Name = FieldName; - public immutable Pattern = false; - } - - /// Default value of the field (may or may not be `Type.init`) - public enum Default = __traits(getMember, T.init, name); - - /// Evaluates to `true` if this field is to be considered optional - /// (does not need to be present in the YAML document) - public enum Optional = forceOptional || - hasUDA!(Ref, CAOptional) || - is(immutable(Type) == immutable(bool)) || - is(Type : SetInfo!FT, FT) || - (Default != Type.init); -} - -unittest -{ - import configy.Attributes : Name; - - static struct Config1 - { - int integer2 = 42; - @Name("notStr2") - @(42) string str2; - } - - static struct Config2 - { - Config1 c1dup = { 42, "Hello World" }; - string message = "Something"; - } - - static struct Config3 - { - Config1 c1; - int integer; - string str; - Config2 c2 = { c1dup: { integer2: 69 } }; - } - - static assert(is(FieldRef!(Config3, "c2").Type == Config2)); - static assert(FieldRef!(Config3, "c2").Default != Config2.init); - static assert(FieldRef!(Config2, "message").Default == Config2.init.message); - alias NFR1 = FieldRef!(Config3, "c2"); - alias NFR2 = FieldRef!(NFR1.Ref, "c1dup"); - alias NFR3 = FieldRef!(NFR2.Ref, "integer2"); - alias NFR4 = FieldRef!(NFR2.Ref, "str2"); - static assert(hasUDA!(NFR4.Ref, int)); - - static assert(FieldRefTuple!(Config3)[1].Name == "integer"); - static assert(FieldRefTuple!(FieldRefTuple!(Config3)[0].Type)[1].Name == "notStr2"); -} - -/// A pseudo `FieldRef` used for structs which are not fields (top-level) -package template StructFieldRef (ST, string DefaultName = null) -{ - /// - public enum Ref = ST.init; - - /// - public alias Type = ST; - - /// - public enum Default = ST.init; - - /// - public enum Optional = false; - - /// Some places reference their parent's Name / FieldName - public enum Name = DefaultName; - /// Ditto - public enum FieldName = DefaultName; -} - -/// A pseudo `FieldRef` for nested types (e.g. arrays / associative arrays) -package template NestedFieldRef (ElemT, alias FR) -{ - /// - public enum Ref = ElemT.init; - /// - public alias Type = ElemT; - /// - public enum Name = FR.Name; - /// - public enum FieldName = FR.FieldName; - /// Element or keys are never optional - public enum Optional = false; - -} - -/// Get a tuple of `FieldRef` from a `struct` -package template FieldRefTuple (T) -{ - static assert(is(T == struct), - "Argument " ~ T.stringof ~ " to `FieldRefTuple` should be a `struct`"); - - /// - static if (__traits(getAliasThis, T).length == 0) - public alias FieldRefTuple = staticMap!(Pred, FieldNameTuple!T); - else - { - /// Tuple of strings of aliased fields - /// As of DMD v2.100.0, only a single alias this is supported in D. - private immutable AliasedFieldNames = __traits(getAliasThis, T); - static assert(AliasedFieldNames.length == 1, "Multiple `alias this` are not supported"); - - // Ignore alias to functions (if it's a property we can't do anything) - static if (isSomeFunction!(__traits(getMember, T, AliasedFieldNames))) - public alias FieldRefTuple = staticMap!(Pred, FieldNameTuple!T); - else - { - /// "Base" field names minus aliased ones - private immutable BaseFields = Erase!(AliasedFieldNames, FieldNameTuple!T); - static assert(BaseFields.length == FieldNameTuple!(T).length - 1); - - public alias FieldRefTuple = AliasSeq!( - staticMap!(Pred, BaseFields), - FieldRefTuple!(typeof(__traits(getMember, T, AliasedFieldNames)))); - } - } - - private alias Pred (string name) = FieldRef!(T, name); -} - -/// Returns: An alias sequence of field names, taking UDAs (`@Name` et al) into account -package alias FieldsName (T) = staticMap!(FieldRefToName, FieldRefTuple!T); - -/// Helper template for `staticMap` used for strict mode -private enum FieldRefToName (alias FR) = FR.Name; - -/// Dub extension -package enum IsPattern (alias FR) = FR.Pattern; -/// Dub extension -package alias Patterns (T) = staticMap!(FieldRefToName, Filter!(IsPattern, FieldRefTuple!T)); diff --git a/source/configy/Read.d b/source/configy/Read.d deleted file mode 100644 index 1e30d53..0000000 --- a/source/configy/Read.d +++ /dev/null @@ -1,1075 +0,0 @@ -/******************************************************************************* - - Utilities to fill a struct representing the configuration with the content - of a YAML document. - - The main function of this module is `parseConfig`. Convenience functions - `parseConfigString` and `parseConfigFile` are also available. - - The type parameter to those three functions must be a struct and is used - to drive the processing of the YAML node. When an error is encountered, - an `Exception` will be thrown, with a descriptive message. - The rules by which the struct is filled are designed to be - as intuitive as possible, and are described below. - - Optional_Fields: - One of the major convenience offered by this utility is its handling - of optional fields. A field is detected as optional if it has - an initializer that is different from its type `init` value, - for example `string field = "Something";` is an optional field, - but `int count = 0;` is not. - To mark a field as optional even with its default value, - use the `Optional` UDA: `@Optional int count = 0;`. - - Converter: - Because config structs may contain complex types such as - a Phobos type, a user-defined `Amount`, or Vibe.d's `URL`, - one may need to apply a converter to a struct's field. - Converters are functions that take a YAML `Node` as argument - and return a type that is implicitly convertible to the field type - (usually just the field type). They offer the most power to users, - as they can inspect the YAML structure, but should be used as a last resort. - - Composite_Types: - Processing starts from a `struct` at the top level, and recurse into - every fields individually. If a field is itself a struct, - the filler will attempt the following, in order: - - If the field has no value and is not optional, an Exception will - be thrown with an error message detailing where the issue happened. - - If the field has no value and is optional, the default value will - be used. - - If the field has a value, the filler will first check for a converter - and use it if present. - - If the type has a `static` method named `fromString` whose sole argument - is a `string`, it will be used. - - If the type has a constructor whose sole argument is a `string`, - it will be used; - - Finally, the filler will attempt to deserialize all struct members - one by one and pass them to the default constructor, if there is any. - - If none of the above succeeded, a `static assert` will trigger. - - Alias_this: - If a `struct` contains an `alias this`, the field that is aliased will be - ignored, instead the config parser will parse nested fields as if they - were part of the enclosing structure. This allow to re-use a single `struct` - in multiple place without having to resort to a `mixin template`. - Having an initializer will make all fields in the aliased struct optional. - The aliased field cannot have attributes other than `@Optional`, - which will then apply to all fields it exposes. - - Duration_parsing: - If the config field is of type `core.time.Duration`, special parsing rules - will apply. There are two possible forms in which a Duration field may - be expressed. In the first form, the YAML node should be a mapping, - and it will be checked for fields matching the supported units - in `core.time`: `weeks`, `days`, `hours`, `minutes`, `seconds`, `msecs`, - `usecs`, `hnsecs`, `nsecs`. Strict parsing option will be respected. - The values of the fields will then be added together, so the following - YAML usages are equivalent: - --- - // sleepFor: - // hours: 8 - // minutes: 30 - --- - and: - --- - // sleepFor: - // minutes: 510 - --- - Provided that the definition of the field is: - --- - public Duration sleepFor; - --- - - In the second form, the field should have a suffix composed of an - underscore ('_'), followed by a unit name as defined in `core.time`. - This can be either the field name directly, or a name override. - The latter is recommended to avoid confusion when using the field in code. - In this form, the YAML node is expected to be a scalar. - So the previous example, using this form, would be expressed as: - --- - sleepFor_minutes: 510 - --- - and the field definition should be one of those two: - --- - public @Name("sleepFor_minutes") Duration sleepFor; /// Prefer this - public Duration sleepFor_minutes; /// This works too - --- - - Those forms are mutually exclusive, so a field with a unit suffix - will error out if a mapping is used. This prevents surprises and ensures - that the error message, if any, is consistent accross user input. - - To disable or change this behavior, one may use a `Converter` instead. - - Strict_Parsing: - When strict parsing is enabled, the config filler will also validate - that the YAML nodes do not contains entry which are not present in the - mapping (struct) being processed. - This can be useful to catch typos or outdated configuration options. - - Post_Validation: - Some configuration will require validation accross multiple sections. - For example, two sections may be mutually exclusive as a whole, - or may have fields which are mutually exclusive with another section's - field(s). This kind of dependence is hard to account for declaratively, - and does not affect parsing. For this reason, the preferred way to - handle those cases is to define a `validate` member method on the - affected config struct(s), which will be called once - parsing for that mapping is completed. - If an error is detected, this method should throw an Exception. - - Enabled_or_disabled_field: - While most complex logic validation should be handled post-parsing, - some section may be optional by default, but if provided, will have - required fields. To support this use case, if a field with the name - `enabled` is present in a struct, the parser will first process it. - If it is `false`, the parser will not attempt to process the struct - further, and the other fields will have their default value. - Likewise, if a field named `disabled` exists, the struct will not - be processed if it is set to `true`. - - Copyright: - Copyright (c) 2019-2022 BOSAGORA Foundation - All rights reserved. - - License: - MIT License. See LICENSE for details. - -*******************************************************************************/ - -module configy.Read; - -public import configy.Attributes; -public import configy.Exceptions : ConfigException; -import configy.Exceptions; -import configy.FieldRef; -import configy.Utils; - -import dyaml.exception; -import dyaml.node; -import dyaml.loader; - -import std.algorithm; -import std.conv; -import std.datetime; -import std.format; -import std.getopt; -import std.meta; -import std.range; -import std.traits; -import std.typecons : Nullable, nullable, tuple; - -static import core.time; - -// Dub-specific adjustments for output -import dub.internal.logging; - -/// Command-line arguments -public struct CLIArgs -{ - /// Path to the config file - public string config_path = "config.yaml"; - - /// Overrides for config options - public string[][string] overrides; - - /// Helper to add items to `overrides` - public void overridesHandler (string, string value) - { - import std.string; - const idx = value.indexOf('='); - if (idx < 0) return; - string k = value[0 .. idx], v = value[idx + 1 .. $]; - if (auto val = k in this.overrides) - (*val) ~= v; - else - this.overrides[k] = [ v ]; - } - - /*************************************************************************** - - Parses the base command line arguments - - This can be composed with the program argument. - For example, consider a program which wants to expose a `--version` - switch, the definition could look like this: - --- - public struct ProgramCLIArgs - { - public CLIArgs base; // This struct - - public alias base this; // For convenience - - public bool version_; // Program-specific part - } - --- - Then, an application-specific configuration routine would be: - --- - public GetoptResult parse (ref ProgramCLIArgs clargs, ref string[] args) - { - auto r = clargs.base.parse(args); - if (r.helpWanted) return r; - return getopt( - args, - "version", "Print the application version, &clargs.version_"); - } - --- - - Params: - args = The command line args to parse (parsed options will be removed) - passThrough = Whether to enable `config.passThrough` and - `config.keepEndOfOptions`. `true` by default, to allow - composability. If your program doesn't have other - arguments, pass `false`. - - Returns: - The result of calling `getopt` - - ***************************************************************************/ - - public GetoptResult parse (ref string[] args, bool passThrough = true) - { - return getopt( - args, - // `caseInsensistive` is the default, but we need something - // with the same type for the ternary - passThrough ? config.keepEndOfOptions : config.caseInsensitive, - // Also the default, same reasoning - passThrough ? config.passThrough : config.noPassThrough, - "config|c", - "Path to the config file. Defaults to: " ~ this.config_path, - &this.config_path, - - "override|O", - "Override a config file value\n" ~ - "Example: -O foo.bar=true -o dns=1.1.1.1 -o dns=2.2.2.2\n" ~ - "Array values are additive, other items are set to the last override", - &this.overridesHandler, - ); - } -} - -/******************************************************************************* - - Attempt to read and process the config file at `path`, print any error - - This 'simple' overload of the more detailed `parseConfigFile` will attempt - to read the file at `path`, and return a `Nullable` instance of it. - If an error happens, either because the file isn't readable or - the configuration has an issue, a message will be printed to `stderr`, - with colors if the output is a TTY, and a `null` instance will be returned. - - The calling code can hence just read a config file via: - ``` - int main () - { - auto configN = parseConfigFileSimple!Config("config.yaml"); - if (configN.isNull()) return 1; // Error path - auto config = configN.get(); - // Rest of the program ... - } - ``` - An overload accepting `CLIArgs args` also exists. - - Params: - path = Path of the file to read from - args = Command line arguments on which `parse` has been called - strict = Whether the parsing should reject unknown keys in the - document, warn, or ignore them (default: `StrictMode.Error`) - - Returns: - An initialized `Config` instance if reading/parsing was successful; - a `null` instance otherwise. - -*******************************************************************************/ - -public Nullable!T parseConfigFileSimple (T) (string path, StrictMode strict = StrictMode.Error) -{ - return parseConfigFileSimple!(T)(CLIArgs(path), strict); -} - - -/// Ditto -public Nullable!T parseConfigFileSimple (T) (in CLIArgs args, StrictMode strict = StrictMode.Error) -{ - try - { - Node root = Loader.fromFile(args.config_path).load(); - return nullable(parseConfig!T(args, root, strict)); - } - catch (ConfigException exc) - { - exc.printException(); - return typeof(return).init; - } - catch (Exception exc) - { - // Other Exception type may be thrown by D-YAML, - // they won't include rich information. - logWarn("%s", exc.message()); - return typeof(return).init; - } -} - -/******************************************************************************* - - Print an Exception, potentially with colors on - - Trusted because of `stderr` usage. - -*******************************************************************************/ - -private void printException (scope ConfigException exc) @trusted -{ - import dub.internal.logging; - - if (hasColors) - logWarn("%S", exc); - else - logWarn("%s", exc.message()); -} - -/******************************************************************************* - - Parses the config file or string and returns a `Config` instance. - - Params: - cmdln = command-line arguments (containing the path to the config) - path = When parsing a string, the path corresponding to it - strict = Whether the parsing should reject unknown keys in the - document, warn, or ignore them (default: `StrictMode.Error`) - - Throws: - `Exception` if parsing the config file failed. - - Returns: - `Config` instance - -*******************************************************************************/ - -public T parseConfigFile (T) (in CLIArgs cmdln, StrictMode strict = StrictMode.Error) -{ - Node root = Loader.fromFile(cmdln.config_path).load(); - return parseConfig!T(cmdln, root, strict); -} - -/// ditto -public T parseConfigString (T) (string data, string path, StrictMode strict = StrictMode.Error) -{ - CLIArgs cmdln = { config_path: path }; - auto loader = Loader.fromString(data); - loader.name = path; - Node root = loader.load(); - return parseConfig!T(cmdln, root, strict); -} - -/******************************************************************************* - - Process the content of the YAML document described by `node` into an - instance of the struct `T`. - - See the module description for a complete overview of this function. - - Params: - T = Type of the config struct to fill - cmdln = Command line arguments - node = The root node matching `T` - strict = Action to take when encountering unknown keys in the document - - Returns: - An instance of `T` filled with the content of `node` - - Throws: - If the content of `node` cannot satisfy the requirements set by `T`, - or if `node` contain extra fields and `strict` is `true`. - -*******************************************************************************/ - -public T parseConfig (T) ( - in CLIArgs cmdln, Node node, StrictMode strict = StrictMode.Error) -{ - static assert(is(T == struct), "`" ~ __FUNCTION__ ~ - "` should only be called with a `struct` type as argument, not: `" ~ - fullyQualifiedName!T ~ "`"); - - final switch (node.nodeID) - { - case NodeID.mapping: - dbgWrite("Parsing config '%s', strict: %s", - fullyQualifiedName!T, - strict == StrictMode.Warn ? - strict.paint(Yellow) : strict.paintIf(!!strict, Green, Red)); - return node.parseMapping!(StructFieldRef!T)( - null, T.init, const(Context)(cmdln, strict), null); - case NodeID.sequence: - case NodeID.scalar: - case NodeID.invalid: - throw new TypeConfigException(node, "mapping (object)", "document root"); - } -} - -/******************************************************************************* - - The behavior to have when encountering a field in YAML not present - in the config definition. - -*******************************************************************************/ - -public enum StrictMode -{ - /// Issue an error by throwing an `UnknownKeyConfigException` - Error = 0, - /// Write a message to `stderr`, but continue processing the file - Warn = 1, - /// Be silent and do nothing - Ignore = 2, -} - -/// Used to pass around configuration -package struct Context -{ - /// - private CLIArgs cmdln; - - /// - private StrictMode strict; -} - -/******************************************************************************* - - Parse a mapping from `node` into an instance of `T` - - Params: - TLFR = Top level field reference for this mapping - node = The YAML node object matching the struct being read - path = The runtime path to this mapping, used for nested types - defaultValue = The default value to use for `T`, which can be different - from `T.init` when recursing into fields with initializers. - ctx = A context where properties that need to be conserved during - recursion are stored - fieldDefaults = Default value for some fields, used for `Key` recursion - -*******************************************************************************/ -private TLFR.Type parseMapping (alias TLFR) - (Node node, string path, auto ref TLFR.Type defaultValue, - in Context ctx, in Node[string] fieldDefaults) -{ - static assert(is(TLFR.Type == struct), "`parseMapping` called with wrong type (should be a `struct`)"); - assert(node.nodeID == NodeID.mapping, "Internal error: parseMapping shouldn't have been called"); - - dbgWrite("%s: `parseMapping` called for '%s' (node entries: %s)", - TLFR.Type.stringof.paint(Cyan), path.paint(Cyan), - node.length.paintIf(!!node.length, Green, Red)); - - static foreach (FR; FieldRefTuple!(TLFR.Type)) - { - static if (FR.Name != FR.FieldName && hasMember!(TLFR.Type, FR.Name) && - !is(typeof(mixin("TLFR.Type.", FR.Name)) == function)) - static assert (FieldRef!(TLFR.Type, FR.Name).Name != FR.Name, - "Field `" ~ FR.FieldName ~ "` `@Name` attribute shadows field `" ~ - FR.Name ~ "` in `" ~ TLFR.Type.stringof ~ "`: Add a `@Name` attribute to `" ~ - FR.Name ~ "` or change that of `" ~ FR.FieldName ~ "`"); - } - - if (ctx.strict != StrictMode.Ignore) - { - /// First, check that all the sections found in the mapping are present in the type - /// If not, the user might have made a typo. - immutable string[] fieldNames = [ FieldsName!(TLFR.Type) ]; - immutable string[] patterns = [ Patterns!(TLFR.Type) ]; - FIELD: foreach (const ref Node key, const ref Node value; node) - { - const k = key.as!string; - if (!fieldNames.canFind(k)) - { - foreach (p; patterns) - if (k.startsWith(p)) - // Require length because `0` would match `canFind` - // and we don't want to allow `$PATTERN-` - if (k[p.length .. $].length > 1 && k[p.length] == '-') - continue FIELD; - - if (ctx.strict == StrictMode.Warn) - { - scope exc = new UnknownKeyConfigException( - path, key.as!string, fieldNames, key.startMark()); - exc.printException(); - } - else - throw new UnknownKeyConfigException( - path, key.as!string, fieldNames, key.startMark()); - } - } - } - - const enabledState = node.isMappingEnabled!(TLFR.Type)(defaultValue); - - if (enabledState.field != EnabledState.Field.None) - dbgWrite("%s: Mapping is enabled: %s", TLFR.Type.stringof.paint(Cyan), (!!enabledState).paintBool()); - - auto convertField (alias FR) () - { - static if (FR.Name != FR.FieldName) - dbgWrite("Field name `%s` will use YAML field `%s`", - FR.FieldName.paint(Yellow), FR.Name.paint(Green)); - // Using exact type here matters: we could get a qualified type - // (e.g. `immutable(string)`) if the field is qualified, - // which causes problems. - FR.Type default_ = __traits(getMember, defaultValue, FR.FieldName); - - // If this struct is disabled, do not attempt to parse anything besides - // the `enabled` / `disabled` field. - if (!enabledState) - { - // Even this is too noisy - version (none) - dbgWrite("%s: %s field of disabled struct, default: %s", - path.paint(Cyan), "Ignoring".paint(Yellow), default_); - - static if (FR.Name == "enabled") - return false; - else static if (FR.Name == "disabled") - return true; - else - return default_; - } - - if (auto ptr = FR.FieldName in fieldDefaults) - { - dbgWrite("Found %s (%s.%s) in `fieldDefaults`", - FR.Name.paint(Cyan), path.paint(Cyan), FR.FieldName.paint(Cyan)); - - if (ctx.strict && FR.FieldName in node) - throw new ConfigExceptionImpl("'Key' field is specified twice", path, FR.FieldName, node.startMark()); - return (*ptr).parseField!(FR)(path.addPath(FR.FieldName), default_, ctx) - .dbgWriteRet("Using value '%s' from fieldDefaults for field '%s'", - FR.FieldName.paint(Cyan)); - } - - // This, `FR.Pattern`, and the field in `@Name` are special support for `dub` - static if (FR.Pattern) - { - static if (is(FR.Type : V[K], K, V)) - { - alias AAFieldRef = NestedFieldRef!(V, FR); - static assert(is(K : string), "Key type should be string-like"); - } - else - static assert(0, "Cannot have pattern on non-AA field"); - - AAFieldRef.Type[string] result; - foreach (pair; node.mapping) - { - const key = pair.key.as!string; - if (!key.startsWith(FR.Name)) - continue; - string suffix = key[FR.Name.length .. $]; - if (suffix.length) - { - if (suffix[0] == '-') suffix = suffix[1 .. $]; - else continue; - } - - result[suffix] = pair.value.parseField!(AAFieldRef)( - path.addPath(key), default_.get(key, AAFieldRef.Type.init), ctx); - } - bool hack = true; - if (hack) return result; - } - - if (auto ptr = FR.Name in node) - { - dbgWrite("%s: YAML field is %s in node%s", - FR.Name.paint(Cyan), "present".paint(Green), - (FR.Name == FR.FieldName ? "" : " (note that field name is overriden)").paint(Yellow)); - return (*ptr).parseField!(FR)(path.addPath(FR.Name), default_, ctx) - .dbgWriteRet("Using value '%s' from YAML document for field '%s'", - FR.FieldName.paint(Cyan)); - } - - dbgWrite("%s: Field is %s from node%s", - FR.Name.paint(Cyan), "missing".paint(Red), - (FR.Name == FR.FieldName ? "" : " (note that field name is overriden)").paint(Yellow)); - - // A field is considered optional if it has an initializer that is different - // from its default value, or if it has the `Optional` UDA. - // In that case, just return this value. - static if (FR.Optional) - return default_ - .dbgWriteRet("Using default value '%s' for optional field '%s'", FR.FieldName.paint(Cyan)); - - // The field is not present, but it could be because it is an optional section. - // For example, the section could be defined as: - // --- - // struct RequestLimit { size_t reqs = 100; } - // struct Config { RequestLimit limits; } - // --- - // In this case we need to recurse into `RequestLimit` to check if any - // of its field is required. - else static if (mightBeOptional!FR) - { - const npath = path.addPath(FR.Name); - string[string] aa; - return Node(aa).parseMapping!(FR)(npath, default_, ctx, null); - } - else - throw new MissingKeyException(path, FR.Name, node.startMark()); - } - - FR.Type convert (alias FR) () - { - static if (__traits(getAliasThis, TLFR.Type).length == 1 && - __traits(getAliasThis, TLFR.Type)[0] == FR.FieldName) - { - static assert(FR.Name == FR.FieldName, - "Field `" ~ fullyQualifiedName!(FR.Ref) ~ - "` is the target of an `alias this` and cannot have a `@Name` attribute"); - static assert(!hasConverter!(FR.Ref), - "Field `" ~ fullyQualifiedName!(FR.Ref) ~ - "` is the target of an `alias this` and cannot have a `@Converter` attribute"); - - alias convertW(string FieldName) = convert!(FieldRef!(FR.Type, FieldName, FR.Optional)); - return FR.Type(staticMap!(convertW, FieldNameTuple!(FR.Type))); - } - else - return convertField!(FR)(); - } - - debug (ConfigFillerDebug) - { - indent++; - scope (exit) indent--; - } - - TLFR.Type doValidation (TLFR.Type result) - { - static if (is(typeof(result.validate()))) - { - if (enabledState) - { - dbgWrite("%s: Calling `%s` method", - TLFR.Type.stringof.paint(Cyan), "validate()".paint(Green)); - result.validate(); - } - else - { - dbgWrite("%s: Ignoring `%s` method on disabled mapping", - TLFR.Type.stringof.paint(Cyan), "validate()".paint(Green)); - } - } - else if (enabledState) - dbgWrite("%s: No `%s` method found", - TLFR.Type.stringof.paint(Cyan), "validate()".paint(Yellow)); - - return result; - } - - // This might trigger things like "`this` is not accessible". - // In this case, the user most likely needs to provide a converter. - alias convertWrapper(string FieldName) = convert!(FieldRef!(TLFR.Type, FieldName)); - return doValidation(TLFR.Type(staticMap!(convertWrapper, FieldNameTuple!(TLFR.Type)))); -} - -/******************************************************************************* - - Parse a field, trying to match up the compile-time expectation with - the run time value of the Node (`nodeID`). - - This is the central point which does "type conversion", from the YAML node - to the field type. Whenever adding support for a new type, things should - happen here. - - Because a `struct` can be filled from either a mapping or a scalar, - this function will first try the converter / fromString / string ctor - methods before defaulting to fieldwise construction. - - Note that optional fields are checked before recursion happens, - so this method does not do this check. - -*******************************************************************************/ - -package FR.Type parseField (alias FR) - (Node node, string path, auto ref FR.Type defaultValue, in Context ctx) -{ - if (node.nodeID == NodeID.invalid) - throw new TypeConfigException(node, "valid", path); - - // If we reached this, it means the field is set, so just recurse - // to peel the type - static if (is(FR.Type : SetInfo!FT, FT)) - return FR.Type( - parseField!(FieldRef!(FR.Type, "value"))(node, path, defaultValue, ctx), - true); - - else static if (hasConverter!(FR.Ref)) - return wrapException(node.viaConverter!(FR)(path, ctx), path, node.startMark()); - - else static if (hasFromYAML!(FR.Type)) - { - scope impl = new ConfigParserImpl!(FR.Type)(node, path, ctx); - return wrapException(FR.Type.fromYAML(impl), path, node.startMark()); - } - - else static if (hasFromString!(FR.Type)) - return wrapException(FR.Type.fromString(node.as!string), path, node.startMark()); - - else static if (hasStringCtor!(FR.Type)) - return wrapException(FR.Type(node.as!string), path, node.startMark()); - - else static if (is(immutable(FR.Type) == immutable(core.time.Duration))) - { - if (node.nodeID != NodeID.mapping) - throw new DurationTypeConfigException(node, path); - return node.parseMapping!(StructFieldRef!DurationMapping)( - path, DurationMapping.make(defaultValue), ctx, null).opCast!Duration; - } - - else static if (is(FR.Type == struct)) - { - if (node.nodeID != NodeID.mapping) - throw new TypeConfigException(node, "mapping (object)", path); - return node.parseMapping!(FR)(path, defaultValue, ctx, null); - } - - // Handle string early as they match the sequence rule too - else static if (isSomeString!(FR.Type)) - // Use `string` type explicitly because `Variant` thinks - // `immutable(char)[]` (aka `string`) and `immutable(char[])` - // (aka `immutable(string)`) are not compatible. - return node.parseScalar!(string)(path); - // Enum too, as their base type might be an array (including strings) - else static if (is(FR.Type == enum)) - return node.parseScalar!(FR.Type)(path); - - else static if (is(FR.Type : E[K], E, K)) - { - if (node.nodeID != NodeID.mapping) - throw new TypeConfigException(node, "mapping (associative array)", path); - - // Note: As of June 2022 (DMD v2.100.0), associative arrays cannot - // have initializers, hence their UX for config is less optimal. - return node.mapping().map!( - (Node.Pair pair) { - return tuple( - pair.key.get!K, - pair.value.parseField!(NestedFieldRef!(E, FR))( - format("%s[%s]", path, pair.key.as!string), E.init, ctx)); - }).assocArray(); - - } - else static if (is(FR.Type : E[], E)) - { - static if (hasUDA!(FR.Ref, Key)) - { - static assert(getUDAs!(FR.Ref, Key).length == 1, - "`" ~ fullyQualifiedName!(FR.Ref) ~ - "` field shouldn't have more than one `Key` attribute"); - static assert(is(E == struct), - "Field `" ~ fullyQualifiedName!(FR.Ref) ~ - "` has a `Key` attribute, but is a sequence of `" ~ - fullyQualifiedName!E ~ "`, not a sequence of `struct`"); - - string key = getUDAs!(FR.Ref, Key)[0].name; - - if (node.nodeID != NodeID.mapping && node.nodeID != NodeID.sequence) - throw new TypeConfigException(node, "mapping (object) or sequence", path); - - if (node.nodeID == NodeID.mapping) return node.mapping().map!( - (Node.Pair pair) { - if (pair.value.nodeID != NodeID.mapping) - throw new TypeConfigException( - "sequence of " ~ pair.value.nodeTypeString(), - "sequence of mapping (array of objects)", - path, null, node.startMark()); - - return pair.value.parseMapping!(StructFieldRef!E)( - path.addPath(pair.key.as!string), - E.init, ctx, key.length ? [ key: pair.key ] : null); - }).array(); - } - if (node.nodeID != NodeID.sequence) - throw new TypeConfigException(node, "sequence (array)", path); - - // We pass `E.init` as default value as it is not going to be used: - // Either there is something in the YAML document, and that will be - // converted, or `sequence` will not iterate. - return node.sequence.enumerate.map!( - kv => kv.value.parseField!(NestedFieldRef!(E, FR))( - format("%s[%s]", path, kv.index), E.init, ctx)) - .array(); - } - else - { - static assert (!is(FR.Type == union), - "`union` are not supported. Use a converter instead"); - return node.parseScalar!(FR.Type)(path); - } -} - -/// Parse a node as a scalar -private T parseScalar (T) (Node node, string path) -{ - if (node.nodeID != NodeID.scalar) - throw new TypeConfigException(node, "scalar (value)", path); - - static if (is(T == enum)) - return node.as!string.to!(T); - else - return node.as!(T); -} - -/******************************************************************************* - - Write a potentially throwing user-provided expression in ConfigException - - The user-provided hooks may throw (e.g. `fromString / the constructor), - and the error may or may not be clear. We can't do anything about a bad - message but we can wrap the thrown exception in a `ConfigException` - to provide the location in the yaml file where the error happened. - - Params: - exp = The expression that may throw - path = Path within the config file of the field - position = Position of the node in the YAML file - file = Call site file (otherwise the message would point to this function) - line = Call site line (see `file` reasoning) - - Returns: - The result of `exp` evaluation. - -*******************************************************************************/ - -private T wrapException (T) (lazy T exp, string path, Mark position, - string file = __FILE__, size_t line = __LINE__) -{ - try - return exp; - catch (ConfigException exc) - throw exc; - catch (Exception exc) - throw new ConstructionException(exc, path, position, file, line); -} - -/// Allows us to reuse parseMapping and strict parsing -private struct DurationMapping -{ - public SetInfo!long weeks; - public SetInfo!long days; - public SetInfo!long hours; - public SetInfo!long minutes; - public SetInfo!long seconds; - public SetInfo!long msecs; - public SetInfo!long usecs; - public SetInfo!long hnsecs; - public SetInfo!long nsecs; - - private static DurationMapping make (Duration def) @safe pure nothrow @nogc - { - typeof(return) result; - auto fullSplit = def.split(); - result.weeks = SetInfo!long(fullSplit.weeks, fullSplit.weeks != 0); - result.days = SetInfo!long(fullSplit.days, fullSplit.days != 0); - result.hours = SetInfo!long(fullSplit.hours, fullSplit.hours != 0); - result.minutes = SetInfo!long(fullSplit.minutes, fullSplit.minutes != 0); - result.seconds = SetInfo!long(fullSplit.seconds, fullSplit.seconds != 0); - result.msecs = SetInfo!long(fullSplit.msecs, fullSplit.msecs != 0); - result.usecs = SetInfo!long(fullSplit.usecs, fullSplit.usecs != 0); - result.hnsecs = SetInfo!long(fullSplit.hnsecs, fullSplit.hnsecs != 0); - // nsecs is ignored by split as it's not representable in `Duration` - return result; - } - - /// - public void validate () const @safe - { - // That check should never fail, as the YAML parser would error out, - // but better be safe than sorry. - foreach (field; this.tupleof) - if (field.set) - return; - - throw new Exception( - "Expected at least one of the components (weeks, days, hours, " ~ - "minutes, seconds, msecs, usecs, hnsecs, nsecs) to be set"); - } - - /// Allow conversion to a `Duration` - public Duration opCast (T : Duration) () const scope @safe pure nothrow @nogc - { - return core.time.weeks(this.weeks) + core.time.days(this.days) + - core.time.hours(this.hours) + core.time.minutes(this.minutes) + - core.time.seconds(this.seconds) + core.time.msecs(this.msecs) + - core.time.usecs(this.usecs) + core.time.hnsecs(this.hnsecs) + - core.time.nsecs(this.nsecs); - } -} - -/// Evaluates to `true` if we should recurse into the struct via `parseMapping` -private enum mightBeOptional (alias FR) = is(FR.Type == struct) && - !is(immutable(FR.Type) == immutable(core.time.Duration)) && - !hasConverter!(FR.Ref) && !hasFromString!(FR.Type) && - !hasStringCtor!(FR.Type) && !hasFromYAML!(FR.Type); - -/// Convenience template to check for the presence of converter(s) -private enum hasConverter (alias Field) = hasUDA!(Field, Converter); - -/// Provided a field reference `FR` which is known to have at least one converter, -/// perform basic checks and return the value after applying the converter. -private auto viaConverter (alias FR) (Node node, string path, in Context context) -{ - enum Converters = getUDAs!(FR.Ref, Converter); - static assert (Converters.length, - "Internal error: `viaConverter` called on field `" ~ - FR.FieldName ~ "` with no converter"); - - static assert(Converters.length == 1, - "Field `" ~ FR.FieldName ~ "` cannot have more than one `Converter`"); - - scope impl = new ConfigParserImpl!(FR.Type)(node, path, context); - return Converters[0].converter(impl); -} - -private final class ConfigParserImpl (T) : ConfigParser!T -{ - private Node node_; - private string path_; - private const(Context) context_; - - /// Ctor - public this (Node n, string p, const Context c) scope @safe pure nothrow @nogc - { - this.node_ = n; - this.path_ = p; - this.context_ = c; - } - - public final override inout(Node) node () inout @safe pure nothrow @nogc - { - return this.node_; - } - - public final override string path () const @safe pure nothrow @nogc - { - return this.path_; - } - - protected final override const(Context) context () const @safe pure nothrow @nogc - { - return this.context_; - } -} - -/// Helper predicate -private template NameIs (string searching) -{ - enum bool Pred (alias FR) = (searching == FR.Name); -} - -/// Returns whether or not the field has a `enabled` / `disabled` field, -/// and its value. If it does not, returns `true`. -private EnabledState isMappingEnabled (M) (Node node, auto ref M default_) -{ - import std.meta : Filter; - - alias EMT = Filter!(NameIs!("enabled").Pred, FieldRefTuple!M); - alias DMT = Filter!(NameIs!("disabled").Pred, FieldRefTuple!M); - - static if (EMT.length) - { - static assert (DMT.length == 0, - "`enabled` field `" ~ EMT[0].FieldName ~ - "` conflicts with `disabled` field `" ~ DMT[0].FieldName ~ "`"); - - if (auto ptr = "enabled" in node) - return EnabledState(EnabledState.Field.Enabled, (*ptr).as!bool); - return EnabledState(EnabledState.Field.Enabled, __traits(getMember, default_, EMT[0].FieldName)); - } - else static if (DMT.length) - { - if (auto ptr = "disabled" in node) - return EnabledState(EnabledState.Field.Disabled, (*ptr).as!bool); - return EnabledState(EnabledState.Field.Disabled, __traits(getMember, default_, DMT[0].FieldName)); - } - else - { - return EnabledState(EnabledState.Field.None); - } -} - -/// Retun value of `isMappingEnabled` -private struct EnabledState -{ - /// Used to determine which field controls a mapping enabled state - private enum Field - { - /// No such field, the mapping is considered enabled - None, - /// The field is named 'enabled' - Enabled, - /// The field is named 'disabled' - Disabled, - } - - /// Check if the mapping is considered enabled - public bool opCast () const scope @safe pure @nogc nothrow - { - return this.field == Field.None || - (this.field == Field.Enabled && this.fieldValue) || - (this.field == Field.Disabled && !this.fieldValue); - } - - /// Type of field found - private Field field; - - /// Value of the field, interpretation depends on `field` - private bool fieldValue; -} - -/// Evaluates to `true` if `T` is a `struct` with a default ctor -private enum hasFieldwiseCtor (T) = (is(T == struct) && is(typeof(() => T(T.init.tupleof)))); - -/// Evaluates to `true` if `T` has a static method that is designed to work with this library -private enum hasFromYAML (T) = is(typeof(T.fromYAML(ConfigParser!(T).init)) : T); - -/// Evaluates to `true` if `T` has a static method that accepts a `string` and returns a `T` -private enum hasFromString (T) = is(typeof(T.fromString(string.init)) : T); - -/// Evaluates to `true` if `T` is a `struct` which accepts a single string as argument -private enum hasStringCtor (T) = (is(T == struct) && is(typeof(T.__ctor)) && - Parameters!(T.__ctor).length == 1 && - is(typeof(() => T(string.init)))); - -unittest -{ - static struct Simple - { - int value; - string otherValue; - } - - static assert( hasFieldwiseCtor!Simple); - static assert(!hasStringCtor!Simple); - - static struct PubKey - { - ubyte[] data; - - this (string hex) @safe pure nothrow @nogc{} - } - - static assert(!hasFieldwiseCtor!PubKey); - static assert( hasStringCtor!PubKey); - - static assert(!hasFieldwiseCtor!string); - static assert(!hasFieldwiseCtor!int); - static assert(!hasStringCtor!string); - static assert(!hasStringCtor!int); -} - -/// Convenience function to extend a YAML path -private string addPath (string opath, string newPart) -in(newPart.length) -do { - return opath.length ? format("%s.%s", opath, newPart) : newPart; -} diff --git a/source/configy/Test.d b/source/configy/Test.d deleted file mode 100644 index a8a36a6..0000000 --- a/source/configy/Test.d +++ /dev/null @@ -1,694 +0,0 @@ -/******************************************************************************* - Contains all the tests for this library. - - Copyright: - Copyright (c) 2019-2022 BOSAGORA Foundation - All rights reserved. - - License: - MIT License. See LICENSE for details. - -*******************************************************************************/ - -module configy.Test; - -import configy.Attributes; -import configy.Exceptions; -import configy.Read; -import configy.Utils; - -import dyaml.node; - -import std.format; - -import core.time; - -/// Basic usage tests -unittest -{ - static struct Address - { - string address; - string city; - bool accessible; - } - - static struct Nested - { - Address address; - } - - static struct Config - { - bool enabled = true; - - string name = "Jessie"; - int age = 42; - double ratio = 24.42; - - Address address = { address: "Yeoksam-dong", city: "Seoul", accessible: true }; - - Nested nested = { address: { address: "Gangnam-gu", city: "Also Seoul", accessible: false } }; - } - - auto c1 = parseConfigString!Config("enabled: false", "/dev/null"); - assert(!c1.enabled); - assert(c1.name == "Jessie"); - assert(c1.age == 42); - assert(c1.ratio == 24.42); - - assert(c1.address.address == "Yeoksam-dong"); - assert(c1.address.city == "Seoul"); - assert(c1.address.accessible); - - assert(c1.nested.address.address == "Gangnam-gu"); - assert(c1.nested.address.city == "Also Seoul"); - assert(!c1.nested.address.accessible); -} - -// Tests for SetInfo -unittest -{ - static struct Address - { - string address; - string city; - bool accessible; - } - - static struct Config - { - SetInfo!int value; - SetInfo!int answer = 42; - SetInfo!string name = SetInfo!string("Lorene", false); - - SetInfo!Address address; - } - - auto c1 = parseConfigString!Config("value: 24", "/dev/null"); - assert(c1.value == 24); - assert(c1.value.set); - - assert(c1.answer.set); - assert(c1.answer == 42); - - assert(!c1.name.set); - assert(c1.name == "Lorene"); - - assert(!c1.address.set); - - auto c2 = parseConfigString!Config(` -name: Lorene -address: - address: Somewhere - city: Over the rainbow -`, "/dev/null"); - - assert(!c2.value.set); - assert(c2.name == "Lorene"); - assert(c2.name.set); - assert(c2.address.set); - assert(c2.address.address == "Somewhere"); - assert(c2.address.city == "Over the rainbow"); -} - -unittest -{ - static struct Nested { core.time.Duration timeout; } - static struct Config { Nested node; } - - try - { - auto result = parseConfigString!Config("node:\n timeout:", "/dev/null"); - assert(0); - } - catch (Exception exc) - { - assert(exc.toString() == "/dev/null(1:10): node.timeout: Field is of type scalar, " ~ - "but expected a mapping with at least one of: weeks, days, hours, minutes, " ~ - "seconds, msecs, usecs, hnsecs, nsecs"); - } - - { - auto result = parseConfigString!Nested("timeout:\n days: 10\n minutes: 100\n hours: 3\n", "/dev/null"); - assert(result.timeout == 10.days + 4.hours + 40.minutes); - } -} - -unittest -{ - static struct Config { string required; } - try - auto result = parseConfigString!Config("value: 24", "/dev/null"); - catch (ConfigException e) - { - assert(format("%s", e) == - "/dev/null(0:0): value: Key is not a valid member of this section. There are 1 valid keys: required"); - assert(format("%S", e) == - format("%s/dev/null%s(%s0%s:%s0%s): %svalue%s: Key is not a valid member of this section. " ~ - "There are %s1%s valid keys: %srequired%s", Yellow, Reset, Cyan, Reset, Cyan, Reset, - Yellow, Reset, Yellow, Reset, Green, Reset)); - } -} - -// Test for various type errors -unittest -{ - static struct Mapping - { - string value; - } - - static struct Config - { - @Optional Mapping map; - @Optional Mapping[] array; - int scalar; - } - - try - { - auto result = parseConfigString!Config("map: Hello World", "/dev/null"); - assert(0); - } - catch (ConfigException exc) - { - assert(exc.toString() == "/dev/null(0:5): map: Expected to be of type mapping (object), but is a scalar"); - } - - try - { - auto result = parseConfigString!Config("map:\n - Hello\n - World", "/dev/null"); - assert(0); - } - catch (ConfigException exc) - { - assert(exc.toString() == "/dev/null(1:2): map: Expected to be of type mapping (object), but is a sequence"); - } - - try - { - auto result = parseConfigString!Config("scalar:\n - Hello\n - World", "/dev/null"); - assert(0); - } - catch (ConfigException exc) - { - assert(exc.toString() == "/dev/null(1:2): scalar: Expected to be of type scalar (value), but is a sequence"); - } - - try - { - auto result = parseConfigString!Config("scalar:\n hello:\n World", "/dev/null"); - assert(0); - } - catch (ConfigException exc) - { - assert(exc.toString() == "/dev/null(1:2): scalar: Expected to be of type scalar (value), but is a mapping"); - } -} - -// Test for strict mode -unittest -{ - static struct Config - { - string value; - string valhu; - string halvue; - } - - try - { - auto result = parseConfigString!Config("valeu: This is a typo", "/dev/null"); - assert(0); - } - catch (ConfigException exc) - { - assert(exc.toString() == "/dev/null(0:0): valeu: Key is not a valid member of this section. Did you mean: value, valhu"); - } -} - -// Test for required key -unittest -{ - static struct Nested - { - string required; - string optional = "Default"; - } - - static struct Config - { - Nested inner; - } - - try - { - auto result = parseConfigString!Config("inner:\n optional: Not the default value", "/dev/null"); - assert(0); - } - catch (ConfigException exc) - { - assert(exc.toString() == "/dev/null(1:2): inner.required: Required key was not found in configuration or command line arguments"); - } -} - -// Testing 'validate()' on nested structures -unittest -{ - __gshared int validateCalls0 = 0; - __gshared int validateCalls1 = 1; - __gshared int validateCalls2 = 2; - - static struct SecondLayer - { - string value = "default"; - - public void validate () const - { - validateCalls2++; - } - } - - static struct FirstLayer - { - bool enabled = true; - SecondLayer ltwo; - - public void validate () const - { - validateCalls1++; - } - } - - static struct Config - { - FirstLayer lone; - - public void validate () const - { - validateCalls0++; - } - } - - auto r1 = parseConfigString!Config("lone:\n ltwo:\n value: Something\n", "/dev/null"); - - assert(r1.lone.ltwo.value == "Something"); - // `validateCalls` are given different value to avoid false-positive - // if they are set to 0 / mixed up - assert(validateCalls0 == 1); - assert(validateCalls1 == 2); - assert(validateCalls2 == 3); - - auto r2 = parseConfigString!Config("lone:\n enabled: false\n", "/dev/null"); - assert(validateCalls0 == 2); // + 1 - assert(validateCalls1 == 2); // Other are disabled - assert(validateCalls2 == 3); -} - -// Test the throwing ctor / fromString -unittest -{ - static struct ThrowingFromString - { - public static ThrowingFromString fromString (scope const(char)[] value) - @safe pure - { - throw new Exception("Some meaningful error message"); - } - - public int value; - } - - static struct ThrowingCtor - { - public this (scope const(char)[] value) - @safe pure - { - throw new Exception("Something went wrong... Obviously"); - } - - public int value; - } - - static struct InnerConfig - { - public int value; - @Optional ThrowingCtor ctor; - @Optional ThrowingFromString fromString; - - @Converter!int( - (scope ConfigParser!int parser) { - // We have to trick DMD a bit so that it infers an `int` return - // type but doesn't emit a "Statement is not reachable" warning - if (parser.node is Node.init || parser.node !is Node.init ) - throw new Exception("You shall not pass"); - return 42; - }) - @Optional int converter; - } - - static struct Config - { - public InnerConfig config; - } - - try - { - auto result = parseConfigString!Config("config:\n value: 42\n ctor: 42", "/dev/null"); - assert(0); - } - catch (ConfigException exc) - { - assert(exc.toString() == "/dev/null(2:8): config.ctor: Something went wrong... Obviously"); - } - - try - { - auto result = parseConfigString!Config("config:\n value: 42\n fromString: 42", "/dev/null"); - assert(0); - } - catch (ConfigException exc) - { - assert(exc.toString() == "/dev/null(2:14): config.fromString: Some meaningful error message"); - } - - try - { - auto result = parseConfigString!Config("config:\n value: 42\n converter: 42", "/dev/null"); - assert(0); - } - catch (ConfigException exc) - { - assert(exc.toString() == "/dev/null(2:13): config.converter: You shall not pass"); - } - - // We also need to test with arrays, to ensure they are correctly called - static struct InnerArrayConfig - { - @Optional int value; - @Optional ThrowingCtor ctor; - @Optional ThrowingFromString fromString; - } - - static struct ArrayConfig - { - public InnerArrayConfig[] configs; - } - - try - { - auto result = parseConfigString!ArrayConfig("configs:\n - ctor: something", "/dev/null"); - assert(0); - } - catch (ConfigException exc) - { - assert(exc.toString() == "/dev/null(1:10): configs[0].ctor: Something went wrong... Obviously"); - } - - try - { - auto result = parseConfigString!ArrayConfig( - "configs:\n - value: 42\n - fromString: something", "/dev/null"); - assert(0); - } - catch (ConfigException exc) - { - assert(exc.toString() == "/dev/null(2:16): configs[1].fromString: Some meaningful error message"); - } -} - -// Test duplicate fields detection -unittest -{ - static struct Config - { - @Name("shadow") int value; - @Name("value") int shadow; - } - - auto result = parseConfigString!Config("shadow: 42\nvalue: 84\n", "/dev/null"); - assert(result.value == 42); - assert(result.shadow == 84); - - static struct BadConfig - { - int value; - @Name("value") int something; - } - - // Cannot test the error message, so this is as good as it gets - static assert(!is(typeof(() { - auto r = parseConfigString!BadConfig("shadow: 42\nvalue: 84\n", "/dev/null"); - }))); -} - -// Test a renamed `enabled` / `disabled` -unittest -{ - static struct ConfigA - { - @Name("enabled") bool shouldIStay; - int value; - } - - static struct ConfigB - { - @Name("disabled") bool orShouldIGo; - int value; - } - - { - auto c = parseConfigString!ConfigA("enabled: true\nvalue: 42", "/dev/null"); - assert(c.shouldIStay == true); - assert(c.value == 42); - } - - { - auto c = parseConfigString!ConfigB("disabled: false\nvalue: 42", "/dev/null"); - assert(c.orShouldIGo == false); - assert(c.value == 42); - } -} - -// Test for 'mightBeOptional' & missing key -unittest -{ - static struct RequestLimit { size_t reqs = 100; } - static struct Nested { @Name("jay") int value; } - static struct Config { @Name("chris") Nested value; RequestLimit limits; } - - auto r = parseConfigString!Config("chris:\n jay: 42", "/dev/null"); - assert(r.limits.reqs == 100); - - try - { - auto _ = parseConfigString!Config("limits:\n reqs: 42", "/dev/null"); - } - catch (ConfigException exc) - { - assert(exc.toString() == "(0:0): chris.jay: Required key was not found in configuration or command line arguments"); - } -} - -// Support for associative arrays -unittest -{ - static struct Nested - { - int[string] answers; - } - - static struct Parent - { - Nested[string] questions; - string[int] names; - } - - auto c = parseConfigString!Parent( -`names: - 42: "Forty two" - 97: "Quatre vingt dix sept" -questions: - first: - answers: - # Need to use quotes here otherwise it gets interpreted as - # true / false, perhaps a dyaml issue ? - 'yes': 42 - 'no': 24 - second: - answers: - maybe: 69 - whynot: 20 -`, "/dev/null"); - - assert(c.names == [42: "Forty two", 97: "Quatre vingt dix sept"]); - assert(c.questions.length == 2); - assert(c.questions["first"] == Nested(["yes": 42, "no": 24])); - assert(c.questions["second"] == Nested(["maybe": 69, "whynot": 20])); -} - -unittest -{ - static struct FlattenMe - { - int value; - string name; - } - - static struct Config - { - FlattenMe flat = FlattenMe(24, "Four twenty"); - alias flat this; - - FlattenMe not_flat; - } - - auto c = parseConfigString!Config( - "value: 42\nname: John\nnot_flat:\n value: 69\n name: Henry", - "/dev/null"); - assert(c.flat.value == 42); - assert(c.flat.name == "John"); - assert(c.not_flat.value == 69); - assert(c.not_flat.name == "Henry"); - - auto c2 = parseConfigString!Config( - "not_flat:\n value: 69\n name: Henry", "/dev/null"); - assert(c2.flat.value == 24); - assert(c2.flat.name == "Four twenty"); - - static struct OptConfig - { - @Optional FlattenMe flat; - alias flat this; - - int value; - } - auto c3 = parseConfigString!OptConfig("value: 69\n", "/dev/null"); - assert(c3.value == 69); -} - -unittest -{ - static struct Config - { - @Name("names") - string[] names_; - - size_t names () const scope @safe pure nothrow @nogc - { - return this.names_.length; - } - } - - auto c = parseConfigString!Config("names:\n - John\n - Luca\n", "/dev/null"); - assert(c.names_ == [ "John", "Luca" ]); - assert(c.names == 2); -} - -unittest -{ - static struct BuildTemplate - { - string targetName; - string platform; - } - static struct BuildConfig - { - BuildTemplate config; - alias config this; - } - static struct Config - { - string name; - - @Optional BuildConfig config; - alias config this; - } - - auto c = parseConfigString!Config("name: dummy\n", "/dev/null"); - assert(c.name == "dummy"); - - auto c2 = parseConfigString!Config("name: dummy\nplatform: windows\n", "/dev/null"); - assert(c2.name == "dummy"); - assert(c2.config.platform == "windows"); -} - -// Make sure unions don't compile -unittest -{ - static union MyUnion - { - string value; - int number; - } - - static struct Config - { - MyUnion hello; - } - - static assert(!is(typeof(parseConfigString!Config("hello: world\n", "/dev/null")))); - static assert(!is(typeof(parseConfigString!MyUnion("hello: world\n", "/dev/null")))); -} - -// Test the `@Key` attribute -unittest -{ - static struct Interface - { - string name; - string static_ip; - } - - static struct Config - { - string profile; - - @Key("name") - immutable(Interface)[] ifaces = [ - Interface("lo", "127.0.0.1"), - ]; - } - - auto c = parseConfigString!Config(`profile: default -ifaces: - eth0: - static_ip: "192.168.1.42" - lo: - static_ip: "127.0.0.42" -`, "/dev/null"); - assert(c.ifaces.length == 2); - assert(c.ifaces == [ Interface("eth0", "192.168.1.42"), Interface("lo", "127.0.0.42")]); -} - -// Nested ConstructionException -unittest -{ - static struct WillFail - { - string name; - this (string value) @safe pure - { - throw new Exception("Parsing failed!"); - } - } - - static struct Container - { - WillFail[] array; - } - - static struct Config - { - Container data; - } - - try auto c = parseConfigString!Config(`data: - array: - - Not - - Working -`, "/dev/null"); - catch (Exception exc) - assert(exc.toString() == `/dev/null(2:6): data.array[0]: Parsing failed!`); -} diff --git a/source/configy/Utils.d b/source/configy/Utils.d deleted file mode 100644 index f2ce79d..0000000 --- a/source/configy/Utils.d +++ /dev/null @@ -1,124 +0,0 @@ -/******************************************************************************* - - Utilities used internally by the config parser. - - Compile this library with `-debug=ConfigFillerDebug` to get verbose output. - This can be achieved with `debugVersions` in dub, or by depending on the - `debug` configuration provided by `dub.json`. - - Copyright: - Copyright (c) 2019-2022 BOSAGORA Foundation - All rights reserved. - - License: - MIT License. See LICENSE for details. - -*******************************************************************************/ - -module configy.Utils; - -import std.format; - -/// Type of sink used by the `toString` -package alias SinkType = void delegate (in char[]) @safe; - -/******************************************************************************* - - Debugging utility for config filler - - Since this module does a lot of meta-programming, some things can easily - go wrong. For example, a condition being false might happen because it is - genuinely false or because the condition is buggy. - - To make figuring out if a config is properly parsed or not, a little utility - (config-dumper) exists, which will provide a verbose output of what the - config filler does. To do this, `config-dumper` is compiled with - the below `debug` version. - -*******************************************************************************/ - -debug (ConfigFillerDebug) -{ - /// A thin wrapper around `stderr.writefln` with indentation - package void dbgWrite (Args...) (string fmt, Args args) - { - import std.stdio; - stderr.write(IndentChars[0 .. indent >= IndentChars.length ? $ : indent]); - stderr.writefln(fmt, args); - } - - /// Log a value that is to be returned - /// The value will be the first argument and painted yellow - package T dbgWriteRet (T, Args...) (auto ref T return_, string fmt, Args args) - { - dbgWrite(fmt, return_.paint(Yellow), args); - return return_; - } - - /// The current indentation - package size_t indent; - - /// Helper for indentation (who needs more than 16 levels of indent?) - private immutable IndentChars = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"; -} -else -{ - /// No-op - package void dbgWrite (Args...) (string fmt, lazy Args args) {} - - /// Ditto - package T dbgWriteRet (T, Args...) (auto ref T return_, string fmt, lazy Args args) - { - return return_; - } -} - -/// Thin wrapper to simplify colorization -package struct Colored (T) -{ - /// Color used - private string color; - - /// Value to print - private T value; - - /// Hook for `formattedWrite` - public void toString (scope SinkType sink) - { - static if (is(typeof(T.init.length) : size_t)) - if (this.value.length == 0) return; - - formattedWrite(sink, "%s%s%s", this.color, this.value, Reset); - } -} - -/// Ditto -package Colored!T paint (T) (T arg, string color) -{ - return Colored!T(color, arg); -} - -/// Paint `arg` in color `ifTrue` if `cond` evaluates to `true`, use color `ifFalse` otherwise -package Colored!T paintIf (T) (T arg, bool cond, string ifTrue, string ifFalse) -{ - return Colored!T(cond ? ifTrue : ifFalse, arg); -} - -/// Paint a boolean in green if `true`, red otherwise, unless `reverse` is set to `true`, -/// in which case the colors are swapped -package Colored!bool paintBool (bool value, bool reverse = false) -{ - return value.paintIf(reverse ^ value, Green, Red); -} - -/// Reset the foreground color used -package immutable Reset = "\u001b[0m"; -/// Set the foreground color to red, used for `false`, missing, errors, etc... -package immutable Red = "\u001b[31m"; -/// Set the foreground color to red, used for warnings and other things -/// that should draw attention but do not pose an immediate issue -package immutable Yellow = "\u001b[33m"; -/// Set the foreground color to green, used for `true`, present, etc... -package immutable Green = "\u001b[32m"; -/// Set the foreground color to green, used field names / path -package immutable Cyan = "\u001b[36m"; diff --git a/source/dub/commandline.d b/source/dub/commandline.d index 4be40b5..e9f4a10 100644 --- a/source/dub/commandline.d +++ b/source/dub/commandline.d @@ -21,7 +21,7 @@ import dub.project; import dub.internal.utils : getDUBVersion, getClosestMatch, getTempFile; -import dyaml.stdsumtype; +import dub.internal.dyaml.stdsumtype; import std.algorithm; import std.array; diff --git a/source/dub/compilers/buildsettings.d b/source/dub/compilers/buildsettings.d index c9d6b7d..315dec0 100644 --- a/source/dub/compilers/buildsettings.d +++ b/source/dub/compilers/buildsettings.d @@ -9,7 +9,7 @@ import dub.internal.vibecompat.inet.path; -import configy.Attributes; +import dub.internal.configy.Attributes; import std.array : array; import std.algorithm : filter, any; @@ -425,7 +425,7 @@ */ public static Flags!T fromYAML (scope ConfigParser!(Flags!T) p) { - import dyaml.node; + import dub.internal.dyaml.node; import std.exception; import std.conv; @@ -439,7 +439,7 @@ unittest { - import configy.Read; + import dub.internal.configy.Read; static struct Config { diff --git a/source/dub/dependency.d b/source/dub/dependency.d index a4f18c0..2169b8f 100644 --- a/source/dub/dependency.d +++ b/source/dub/dependency.d @@ -15,7 +15,7 @@ import dub.semver; import dub.internal.logging; -import dyaml.stdsumtype; +import dub.internal.dyaml.stdsumtype; import std.algorithm; import std.array; diff --git a/source/dub/dub.d b/source/dub/dub.d index 92c7a4b..67c100d 100644 --- a/source/dub/dub.d +++ b/source/dub/dub.d @@ -221,7 +221,7 @@ */ protected void loadConfig() { - import configy.Read; + import dub.internal.configy.Read; void readSettingsFile (NativePath path_) { @@ -1864,7 +1864,7 @@ * non-additive are marked as `SetInfo`. */ private struct UserConfiguration { - import configy.Attributes; + import dub.internal.configy.Attributes : Optional, SetInfo; @Optional string[] registryUrls; @Optional NativePath[] customCachePaths; @@ -1916,7 +1916,7 @@ } unittest { - import configy.Read; + import dub.internal.configy.Read; const str1 = `{ "registryUrls": [ "http://foo.bar\/optional\/escape" ], diff --git a/source/dub/internal/configy/Attributes.d b/source/dub/internal/configy/Attributes.d new file mode 100644 index 0000000..7823471 --- /dev/null +++ b/source/dub/internal/configy/Attributes.d @@ -0,0 +1,315 @@ +/******************************************************************************* + + Define UDAs that can be applied to a configuration struct + + This module is stand alone (a leaf module) to allow importing the UDAs + without importing the whole configuration parsing code. + + Copyright: + Copyright (c) 2019-2022 BOSAGORA Foundation + All rights reserved. + + License: + MIT License. See LICENSE for details. + +*******************************************************************************/ + +module dub.internal.configy.Attributes; + +import std.traits; + +/******************************************************************************* + + An optional parameter with an initial value of `T.init` + + The config parser automatically recognize non-default initializer, + so that the following: + ``` + public struct Config + { + public string greeting = "Welcome home"; + } + ``` + Will not error out if `greeting` is not defined in the config file. + However, this relies on the initializer of the field (`greeting`) being + different from the type initializer (`string.init` is `null`). + In some cases, the default value is also the desired initializer, e.g.: + ``` + public struct Config + { + /// Maximum number of connections. 0 means unlimited. + public uint connections_limit = 0; + } + ``` + In this case, one can add `@Optional` to the field to inform the parser. + +*******************************************************************************/ + +public struct Optional {} + +/******************************************************************************* + + Inform the config filler that this sequence is to be read as a mapping + + On some occasions, one might want to read a mapping as an array. + One reason to do so may be to provide a better experience to the user, + e.g. having to type: + ``` + interfaces: + eth0: + ip: "192.168.0.1" + private: true + wlan0: + ip: "1.2.3.4" + ``` + Instead of the slightly more verbose: + ``` + interfaces: + - name: eth0 + ip: "192.168.0.1" + private: true + - name: wlan0 + ip: "1.2.3.4" + ``` + + The former would require to be expressed as an associative arrays. + However, one major drawback of associative arrays is that they can't have + an initializer, which makes them cumbersome to use in the context of the + config filler. To remediate this issue, one may use `@Key("name")` + on a field (here, `interfaces`) so that the mapping is flattened + to an array. If `name` is `null`, the key will be discarded. + +*******************************************************************************/ + +public struct Key +{ + /// + public string name; +} + +/******************************************************************************* + + Look up the provided name in the YAML node, instead of the field name. + + By default, the config filler will look up the field name of a mapping in + the YAML node. If this is not desired, an explicit `Name` attribute can + be given. This is especially useful for names which are keyword. + + ``` + public struct Config + { + public @Name("delete") bool remove; + } + ``` + +*******************************************************************************/ + +public struct Name +{ + /// + public string name; + + /// + public bool startsWith; +} + +/// Short hand syntax +public Name StartsWith(string name) @safe pure nothrow @nogc +{ + return Name(name, true); +} + +/******************************************************************************* + + A field which carries informations about whether it was set or not + + Some configurations may need to know which fields were set explicitly while + keeping defaults. An example of this is a `struct` where at least one field + needs to be set, such as the following: + ``` + public struct ProtoDuration + { + public @Optional long weeks; + public @Optional long days; + public @Optional long hours; + public @Optional long minutes; + public long seconds = 42; + public @Optional long msecs; + public @Optional long usecs; + public @Optional long hnsecs; + public @Optional long nsecs; + } + ``` + In this case, it would be impossible to know if any field was explicitly + provided. Hence, the struct should be written as: + ``` + public struct ProtoDuration + { + public SetInfo!long weeks; + public SetInfo!long days; + public SetInfo!long hours; + public SetInfo!long minutes; + public SetInfo!long seconds = 42; + public SetInfo!long msecs; + public SetInfo!long usecs; + public SetInfo!long hnsecs; + public SetInfo!long nsecs; + } + ``` + Note that `SetInfo` implies `Optional`, and supports default values. + +*******************************************************************************/ + +public struct SetInfo (T) +{ + /*************************************************************************** + + Allow initialization as a field + + This sets the field as having been set, so that: + ``` + struct Config { SetInfo!Duration timeout; } + + Config myConf = { timeout: 10.minutes } + ``` + Will behave as if set explicitly. If this behavior is not wanted, + pass `false` as second argument: + ``` + Config myConf = { timeout: SetInfo!Duration(10.minutes, false) } + ``` + + ***************************************************************************/ + + public this (T initVal, bool isSet = true) @safe pure nothrow @nogc + { + this.value = initVal; + this.set = isSet; + } + + /// Underlying data + public T value; + + /// + alias value this; + + /// Whether this field was set or not + public bool set; +} + +/******************************************************************************* + + Provides a means to convert a field from a `Node` to a complex type + + When filling the config, it might be useful to store types which are + not only simple `string` and integer, such as `URL`, `BigInt`, or any other + library type not directly under the user's control. + + To allow reading those values from the config file, a `Converter` may + be used. The converter will tell the `ConfigFiller` how to convert from + `Node` to the desired type `T`. + + If the type is under the user's control, one can also add a constructor + accepting a single string, or define the `fromString` method, both of which + are tried if no `Converter` is found. + + For types not under the user's control, there might be different ways + to parse the same type within the same struct, or neither the ctor nor + the `fromString` method may be defined under that name. + The exmaple below uses `parse` in place of `fromString`, for example. + + ``` + /// Complex structure representing the age of a person based on its birthday + public struct Age + { + /// + public uint birth_year; + /// + public uint birth_month; + /// + public uint birth_day; + + /// Note that this will be picked up automatically if named `fromString` + /// but this struct might be a library type. + public static Age parse (string value) { /+ Magic +/ } + } + + public struct Person + { + /// + @Converter!Age((Node value) => Age.parse(value.as!string)) + public Age age; + } + ``` + + Note that some fields may also be of multiple YAML types, such as DUB's + `dependencies`, which is either a simple string (`"vibe-d": "~>1.0 "`), + or an in its complex form (`"vibe-d": { "version": "~>1.0" }`). + For those use cases, a `Converter` is the best approach. + + To avoid repeating the field type, a convenience function is provided: + ``` + public struct Age + { + public uint birth_year; + public uint birth_month; + public uint birth_day; + public static Age parse (string value) { /+ Magic +/ } + } + + public struct Person + { + /// Here `converter` will deduct the type from the delegate argument, + /// and return an instance of `Converter`. Mind the case. + @converter((Node value) => Age.parse(value.as!string)) + public Age age; + } + ``` + +*******************************************************************************/ + +public struct Converter (T) +{ + /// + public alias ConverterFunc = T function (scope ConfigParser!T context); + + /// + public ConverterFunc converter; +} + +/// Ditto +public auto converter (FT) (FT func) +{ + static assert(isFunctionPointer!FT, + "Error: Argument to `converter` should be a function pointer, not: " + ~ FT.stringof); + + alias RType = ReturnType!FT; + static assert(!is(RType == void), + "Error: Converter needs to be of the return type of the field, not `void`"); + return Converter!RType(func); +} + +public interface ConfigParser (T) +{ + import dub.internal.dyaml.node; + import dub.internal.configy.FieldRef : StructFieldRef; + import dub.internal.configy.Read : Context, parseField; + + /// Returns: the node being processed + public inout(Node) node () inout @safe pure nothrow @nogc; + + /// Returns: current location we are parsing + public string path () const @safe pure nothrow @nogc; + + /// + public final auto parseAs (OtherType) + (auto ref OtherType defaultValue = OtherType.init) + { + alias TypeFieldRef = StructFieldRef!OtherType; + return this.node().parseField!(TypeFieldRef)( + this.path(), defaultValue, this.context()); + } + + /// Internal use only + protected const(Context) context () const @safe pure nothrow @nogc; +} diff --git a/source/dub/internal/configy/DubTest.d b/source/dub/internal/configy/DubTest.d new file mode 100644 index 0000000..8c1ead1 --- /dev/null +++ b/source/dub/internal/configy/DubTest.d @@ -0,0 +1,83 @@ +/******************************************************************************* + + Contains tests for dub-specific extensions + + Whenever integrating changes from upstream configy, most conflicts tend + to be on `configy.Test`, and as the structure is very similar, + the default diff algorithms are useless. Having a separate module simplify + this greatly. + + License: + MIT License. See LICENSE for details. + +*******************************************************************************/ + +module dub.internal.configy.DubTest; + +import dub.internal.configy.Attributes; +import dub.internal.configy.Read; + +import dub.internal.dyaml.node; + +/// Test name pattern matching +unittest +{ + static struct Config + { + @StartsWith("names") + string[][string] names_; + } + + auto c = parseConfigString!Config("names-x86:\n - John\n - Luca\nnames:\n - Marie", "/dev/null"); + assert(c.names_[null] == [ "Marie" ]); + assert(c.names_["x86"] == [ "John", "Luca" ]); +} + +/// Test our `fromYAML` extension +unittest +{ + static struct PackageDef + { + string name; + @Optional string target; + int build = 42; + } + + static struct Package + { + string path; + PackageDef def; + + public static Package fromYAML (scope ConfigParser!Package parser) + { + if (parser.node.nodeID == NodeID.mapping) + return Package(null, parser.parseAs!PackageDef); + else + return Package(parser.parseAs!string); + } + } + + static struct Config + { + string name; + Package[] deps; + } + + auto c = parseConfigString!Config( +` +name: myPkg +deps: + - /foo/bar + - name: foo + target: bar + build: 24 + - name: fur + - /one/last/path +`, "/dev/null"); + assert(c.name == "myPkg"); + assert(c.deps.length == 4); + assert(c.deps[0] == Package("/foo/bar")); + assert(c.deps[1] == Package(null, PackageDef("foo", "bar", 24))); + assert(c.deps[2] == Package(null, PackageDef("fur", null, 42))); + assert(c.deps[3] == Package("/one/last/path")); +} diff --git a/source/dub/internal/configy/Exceptions.d b/source/dub/internal/configy/Exceptions.d new file mode 100644 index 0000000..e225303 --- /dev/null +++ b/source/dub/internal/configy/Exceptions.d @@ -0,0 +1,382 @@ +/******************************************************************************* + + Definitions for Exceptions used by the config module. + + Copyright: + Copyright (c) 2019-2022 BOSAGORA Foundation + All rights reserved. + + License: + MIT License. See LICENSE for details. + +*******************************************************************************/ + +module dub.internal.configy.Exceptions; + +import dub.internal.configy.Utils; + +import dub.internal.dyaml.exception; +import dub.internal.dyaml.node; + +import std.algorithm : filter, map; +import std.format; +import std.string : soundexer; + +/******************************************************************************* + + Base exception type thrown by the config parser + + Whenever dealing with Exceptions thrown by the config parser, catching + this type will allow to optionally format with colors: + ``` + try + { + auto conf = parseConfigFile!Config(cmdln); + // ... + } + catch (ConfigException exc) + { + writeln("Parsing the config file failed:"); + writelfln(isOutputATTY() ? "%S" : "%s", exc); + } + ``` + +*******************************************************************************/ + +public abstract class ConfigException : Exception +{ + /// Position at which the error happened + public Mark yamlPosition; + + /// The path at which the key resides + public string path; + + /// If non-empty, the key under 'path' which triggered the error + /// If empty, the key should be considered part of 'path' + public string key; + + /// Constructor + public this (string path, string key, Mark position, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + super(null, file, line); + this.path = path; + this.key = key; + this.yamlPosition = position; + } + + /// Ditto + public this (string path, Mark position, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + this(path, null, position, file, line); + } + + /*************************************************************************** + + Overrides `Throwable.toString` and its sink overload + + It is quite likely that errors from this module may be printed directly + to the end user, who might not have technical knowledge. + + This format the error in a nicer format (e.g. with colors), + and will additionally provide a stack-trace if the `ConfigFillerDebug` + `debug` version was provided. + + Format_chars: + The default format char ("%s") will print a regular message. + If an uppercase 's' is used ("%S"), colors will be used. + + Params: + sink = The sink to send the piece-meal string to + spec = See https://dlang.org/phobos/std_format_spec.html + + ***************************************************************************/ + + public override string toString () scope + { + // Need to be overriden otherwise the overload is shadowed + return super.toString(); + } + + /// Ditto + public override void toString (scope void delegate(in char[]) sink) const scope + @trusted + { + // This breaks the type system, as it blindly trusts a delegate + // However, the type system lacks a way to sanely build an utility + // which accepts a delegate with different qualifiers, so this is the + // less evil approach. + this.toString(cast(SinkType) sink, FormatSpec!char("%s")); + } + + /// Ditto + public void toString (scope SinkType sink, in FormatSpec!char spec) + const scope @safe + { + import core.internal.string : unsignedToTempString; + + const useColors = spec.spec == 'S'; + char[20] buffer = void; + + if (useColors) sink(Yellow); + sink(this.yamlPosition.name); + if (useColors) sink(Reset); + + sink("("); + if (useColors) sink(Cyan); + sink(unsignedToTempString(this.yamlPosition.line, buffer)); + if (useColors) sink(Reset); + sink(":"); + if (useColors) sink(Cyan); + sink(unsignedToTempString(this.yamlPosition.column, buffer)); + if (useColors) sink(Reset); + sink("): "); + + if (this.path.length || this.key.length) + { + if (useColors) sink(Yellow); + sink(this.path); + if (this.path.length && this.key.length) + sink("."); + sink(this.key); + if (useColors) sink(Reset); + sink(": "); + } + + this.formatMessage(sink, spec); + + debug (ConfigFillerDebug) + { + sink("\n\tError originated from: "); + sink(this.file); + sink("("); + sink(unsignedToTempString(line, buffer)); + sink(")"); + + if (!this.info) + return; + + () @trusted nothrow + { + try + { + sink("\n----------------"); + foreach (t; info) + { + sink("\n"); sink(t); + } + } + // ignore more errors + catch (Throwable) {} + }(); + } + } + + /// Hook called by `toString` to simplify coloring + protected abstract void formatMessage ( + scope SinkType sink, in FormatSpec!char spec) + const scope @safe; +} + +/// A configuration exception that is only a single message +package final class ConfigExceptionImpl : ConfigException +{ + public this (string msg, Mark position, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + this(msg, null, null, position, file, line); + } + + public this (string msg, string path, string key, Mark position, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + super(path, key, position, file, line); + this.msg = msg; + } + + protected override void formatMessage ( + scope SinkType sink, in FormatSpec!char spec) + const scope @safe + { + sink(this.msg); + } +} + +/// Exception thrown when the type of the YAML node does not match the D type +package final class TypeConfigException : ConfigException +{ + /// The actual (in the YAML document) type of the node + public string actual; + + /// The expected (as specified in the D type) type + public string expected; + + /// Constructor + public this (Node node, string expected, string path, string key = null, + string file = __FILE__, size_t line = __LINE__) + @safe nothrow + { + this(node.nodeTypeString(), expected, path, key, node.startMark(), + file, line); + } + + /// Ditto + public this (string actual, string expected, string path, string key, + Mark position, string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + super(path, key, position, file, line); + this.actual = actual; + this.expected = expected; + } + + /// Format the message with or without colors + protected override void formatMessage ( + scope SinkType sink, in FormatSpec!char spec) + const scope @safe + { + const useColors = spec.spec == 'S'; + + const fmt = "Expected to be of type %s, but is a %s"; + + if (useColors) + formattedWrite(sink, fmt, this.expected.paint(Green), this.actual.paint(Red)); + else + formattedWrite(sink, fmt, this.expected, this.actual); + } +} + +/// Similar to a `TypeConfigException`, but specific to `Duration` +package final class DurationTypeConfigException : ConfigException +{ + /// The list of valid fields + public immutable string[] DurationSuffixes = [ + "weeks", "days", "hours", "minutes", "seconds", + "msecs", "usecs", "hnsecs", "nsecs", + ]; + + /// Actual type of the node + public string actual; + + /// Constructor + public this (Node node, string path, string file = __FILE__, size_t line = __LINE__) + @safe nothrow + { + super(path, null, node.startMark(), file, line); + this.actual = node.nodeTypeString(); + } + + /// Format the message with or without colors + protected override void formatMessage ( + scope SinkType sink, in FormatSpec!char spec) + const scope @safe + { + const useColors = spec.spec == 'S'; + + const fmt = "Field is of type %s, but expected a mapping with at least one of: %-(%s, %)"; + if (useColors) + formattedWrite(sink, fmt, this.actual.paint(Red), + this.DurationSuffixes.map!(s => s.paint(Green))); + else + formattedWrite(sink, fmt, this.actual, this.DurationSuffixes); + } +} + +/// Exception thrown when an unknown key is found in strict mode +public class UnknownKeyConfigException : ConfigException +{ + /// The list of valid field names + public immutable string[] fieldNames; + + /// Constructor + public this (string path, string key, immutable string[] fieldNames, + Mark position, string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + super(path, key, position, file, line); + this.fieldNames = fieldNames; + } + + /// Format the message with or without colors + protected override void formatMessage ( + scope SinkType sink, in FormatSpec!char spec) + const scope @safe + { + const useColors = spec.spec == 'S'; + + // Try to find a close match, as the error is likely a typo + // This is especially important when the config file has a large + // number of fields, where the message is otherwise near-useless. + const origSound = soundexer(this.key); + auto matches = this.fieldNames.filter!(f => f.soundexer == origSound); + const hasMatch = !matches.save.empty; + + if (hasMatch) + { + const fmt = "Key is not a valid member of this section. Did you mean: %-(%s, %)"; + if (useColors) + formattedWrite(sink, fmt, matches.map!(f => f.paint(Green))); + else + formattedWrite(sink, fmt, matches); + } + else + { + // No match, just print everything + const fmt = "Key is not a valid member of this section. There are %s valid keys: %-(%s, %)"; + if (useColors) + formattedWrite(sink, fmt, this.fieldNames.length.paint(Yellow), + this.fieldNames.map!(f => f.paint(Green))); + else + formattedWrite(sink, fmt, this.fieldNames.length, this.fieldNames); + } + } +} + +/// Exception thrown when a required key is missing +public class MissingKeyException : ConfigException +{ + /// Constructor + public this (string path, string key, Mark position, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + super(path, key, position, file, line); + } + + /// Format the message with or without colors + protected override void formatMessage ( + scope SinkType sink, in FormatSpec!char spec) + const scope @safe + { + sink("Required key was not found in configuration or command line arguments"); + } +} + +/// Wrap an user-thrown Exception that happened in a Converter/ctor/fromString +public class ConstructionException : ConfigException +{ + /// Constructor + public this (Exception next, string path, Mark position, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + super(path, position, file, line); + this.next = next; + } + + /// Format the message with or without colors + protected override void formatMessage ( + scope SinkType sink, in FormatSpec!char spec) + const scope @trusted + { + if (auto dyn = cast(ConfigException) this.next) + dyn.toString(sink, spec); + else + sink(this.next.message); + } +} diff --git a/source/dub/internal/configy/FieldRef.d b/source/dub/internal/configy/FieldRef.d new file mode 100644 index 0000000..a04a23c --- /dev/null +++ b/source/dub/internal/configy/FieldRef.d @@ -0,0 +1,201 @@ +/******************************************************************************* + + Implement a template to keep track of a field references + + Passing field references by `alias` template parameter creates many problem, + and is extremely cumbersome to work with. Instead, we pass an instance of + a `FieldRef` around, which also contains structured information. + + Copyright: + Copyright (c) 2019-2022 BOSAGORA Foundation + All rights reserved. + + License: + MIT License. See LICENSE for details. + +*******************************************************************************/ + +module dub.internal.configy.FieldRef; + +// Renamed imports as the names exposed by `FieldRef` shadow the imported ones. +import dub.internal.configy.Attributes : CAName = Name, CAOptional = Optional, SetInfo; + +import std.meta; +import std.traits; + +/******************************************************************************* + + A reference to a field in a `struct` + + The compiler sometimes rejects passing fields by `alias`, or complains about + missing `this` (meaning it tries to evaluate the value). Sometimes, it also + discards the UDAs. + + To prevent this from happening, we always pass around a `FieldRef`, + which wraps the parent struct type (`T`), the name of the field + as `FieldName`, and other informations. + + To avoid any issue, eponymous usage is also avoided, hence the reference + needs to be accessed using `Ref`. + +*******************************************************************************/ + +package template FieldRef (alias T, string name, bool forceOptional = false) +{ + /// The reference to the field + public alias Ref = __traits(getMember, T, name); + + /// Type of the field + public alias Type = typeof(Ref); + + /// The name of the field in the struct itself + public alias FieldName = name; + + /// The name used in the configuration field (taking `@Name` into account) + static if (hasUDA!(Ref, CAName)) + { + static assert (getUDAs!(Ref, CAName).length == 1, + "Field `" ~ fullyQualifiedName!(Ref) ~ + "` cannot have more than one `Name` attribute"); + + public immutable Name = getUDAs!(Ref, CAName)[0].name; + + public immutable Pattern = getUDAs!(Ref, CAName)[0].startsWith; + } + else + { + public immutable Name = FieldName; + public immutable Pattern = false; + } + + /// Default value of the field (may or may not be `Type.init`) + public enum Default = __traits(getMember, T.init, name); + + /// Evaluates to `true` if this field is to be considered optional + /// (does not need to be present in the YAML document) + public enum Optional = forceOptional || + hasUDA!(Ref, CAOptional) || + is(immutable(Type) == immutable(bool)) || + is(Type : SetInfo!FT, FT) || + (Default != Type.init); +} + +unittest +{ + import dub.internal.configy.Attributes : Name; + + static struct Config1 + { + int integer2 = 42; + @Name("notStr2") + @(42) string str2; + } + + static struct Config2 + { + Config1 c1dup = { 42, "Hello World" }; + string message = "Something"; + } + + static struct Config3 + { + Config1 c1; + int integer; + string str; + Config2 c2 = { c1dup: { integer2: 69 } }; + } + + static assert(is(FieldRef!(Config3, "c2").Type == Config2)); + static assert(FieldRef!(Config3, "c2").Default != Config2.init); + static assert(FieldRef!(Config2, "message").Default == Config2.init.message); + alias NFR1 = FieldRef!(Config3, "c2"); + alias NFR2 = FieldRef!(NFR1.Ref, "c1dup"); + alias NFR3 = FieldRef!(NFR2.Ref, "integer2"); + alias NFR4 = FieldRef!(NFR2.Ref, "str2"); + static assert(hasUDA!(NFR4.Ref, int)); + + static assert(FieldRefTuple!(Config3)[1].Name == "integer"); + static assert(FieldRefTuple!(FieldRefTuple!(Config3)[0].Type)[1].Name == "notStr2"); +} + +/// A pseudo `FieldRef` used for structs which are not fields (top-level) +package template StructFieldRef (ST, string DefaultName = null) +{ + /// + public enum Ref = ST.init; + + /// + public alias Type = ST; + + /// + public enum Default = ST.init; + + /// + public enum Optional = false; + + /// Some places reference their parent's Name / FieldName + public enum Name = DefaultName; + /// Ditto + public enum FieldName = DefaultName; +} + +/// A pseudo `FieldRef` for nested types (e.g. arrays / associative arrays) +package template NestedFieldRef (ElemT, alias FR) +{ + /// + public enum Ref = ElemT.init; + /// + public alias Type = ElemT; + /// + public enum Name = FR.Name; + /// + public enum FieldName = FR.FieldName; + /// Element or keys are never optional + public enum Optional = false; + +} + +/// Get a tuple of `FieldRef` from a `struct` +package template FieldRefTuple (T) +{ + static assert(is(T == struct), + "Argument " ~ T.stringof ~ " to `FieldRefTuple` should be a `struct`"); + + /// + static if (__traits(getAliasThis, T).length == 0) + public alias FieldRefTuple = staticMap!(Pred, FieldNameTuple!T); + else + { + /// Tuple of strings of aliased fields + /// As of DMD v2.100.0, only a single alias this is supported in D. + private immutable AliasedFieldNames = __traits(getAliasThis, T); + static assert(AliasedFieldNames.length == 1, "Multiple `alias this` are not supported"); + + // Ignore alias to functions (if it's a property we can't do anything) + static if (isSomeFunction!(__traits(getMember, T, AliasedFieldNames))) + public alias FieldRefTuple = staticMap!(Pred, FieldNameTuple!T); + else + { + /// "Base" field names minus aliased ones + private immutable BaseFields = Erase!(AliasedFieldNames, FieldNameTuple!T); + static assert(BaseFields.length == FieldNameTuple!(T).length - 1); + + public alias FieldRefTuple = AliasSeq!( + staticMap!(Pred, BaseFields), + FieldRefTuple!(typeof(__traits(getMember, T, AliasedFieldNames)))); + } + } + + private alias Pred (string name) = FieldRef!(T, name); +} + +/// Returns: An alias sequence of field names, taking UDAs (`@Name` et al) into account +package alias FieldsName (T) = staticMap!(FieldRefToName, FieldRefTuple!T); + +/// Helper template for `staticMap` used for strict mode +private enum FieldRefToName (alias FR) = FR.Name; + +/// Dub extension +package enum IsPattern (alias FR) = FR.Pattern; +/// Dub extension +package alias Patterns (T) = staticMap!(FieldRefToName, Filter!(IsPattern, FieldRefTuple!T)); diff --git a/source/dub/internal/configy/Read.d b/source/dub/internal/configy/Read.d new file mode 100644 index 0000000..f807456 --- /dev/null +++ b/source/dub/internal/configy/Read.d @@ -0,0 +1,1075 @@ +/******************************************************************************* + + Utilities to fill a struct representing the configuration with the content + of a YAML document. + + The main function of this module is `parseConfig`. Convenience functions + `parseConfigString` and `parseConfigFile` are also available. + + The type parameter to those three functions must be a struct and is used + to drive the processing of the YAML node. When an error is encountered, + an `Exception` will be thrown, with a descriptive message. + The rules by which the struct is filled are designed to be + as intuitive as possible, and are described below. + + Optional_Fields: + One of the major convenience offered by this utility is its handling + of optional fields. A field is detected as optional if it has + an initializer that is different from its type `init` value, + for example `string field = "Something";` is an optional field, + but `int count = 0;` is not. + To mark a field as optional even with its default value, + use the `Optional` UDA: `@Optional int count = 0;`. + + Converter: + Because config structs may contain complex types such as + a Phobos type, a user-defined `Amount`, or Vibe.d's `URL`, + one may need to apply a converter to a struct's field. + Converters are functions that take a YAML `Node` as argument + and return a type that is implicitly convertible to the field type + (usually just the field type). They offer the most power to users, + as they can inspect the YAML structure, but should be used as a last resort. + + Composite_Types: + Processing starts from a `struct` at the top level, and recurse into + every fields individually. If a field is itself a struct, + the filler will attempt the following, in order: + - If the field has no value and is not optional, an Exception will + be thrown with an error message detailing where the issue happened. + - If the field has no value and is optional, the default value will + be used. + - If the field has a value, the filler will first check for a converter + and use it if present. + - If the type has a `static` method named `fromString` whose sole argument + is a `string`, it will be used. + - If the type has a constructor whose sole argument is a `string`, + it will be used; + - Finally, the filler will attempt to deserialize all struct members + one by one and pass them to the default constructor, if there is any. + - If none of the above succeeded, a `static assert` will trigger. + + Alias_this: + If a `struct` contains an `alias this`, the field that is aliased will be + ignored, instead the config parser will parse nested fields as if they + were part of the enclosing structure. This allow to re-use a single `struct` + in multiple place without having to resort to a `mixin template`. + Having an initializer will make all fields in the aliased struct optional. + The aliased field cannot have attributes other than `@Optional`, + which will then apply to all fields it exposes. + + Duration_parsing: + If the config field is of type `core.time.Duration`, special parsing rules + will apply. There are two possible forms in which a Duration field may + be expressed. In the first form, the YAML node should be a mapping, + and it will be checked for fields matching the supported units + in `core.time`: `weeks`, `days`, `hours`, `minutes`, `seconds`, `msecs`, + `usecs`, `hnsecs`, `nsecs`. Strict parsing option will be respected. + The values of the fields will then be added together, so the following + YAML usages are equivalent: + --- + // sleepFor: + // hours: 8 + // minutes: 30 + --- + and: + --- + // sleepFor: + // minutes: 510 + --- + Provided that the definition of the field is: + --- + public Duration sleepFor; + --- + + In the second form, the field should have a suffix composed of an + underscore ('_'), followed by a unit name as defined in `core.time`. + This can be either the field name directly, or a name override. + The latter is recommended to avoid confusion when using the field in code. + In this form, the YAML node is expected to be a scalar. + So the previous example, using this form, would be expressed as: + --- + sleepFor_minutes: 510 + --- + and the field definition should be one of those two: + --- + public @Name("sleepFor_minutes") Duration sleepFor; /// Prefer this + public Duration sleepFor_minutes; /// This works too + --- + + Those forms are mutually exclusive, so a field with a unit suffix + will error out if a mapping is used. This prevents surprises and ensures + that the error message, if any, is consistent accross user input. + + To disable or change this behavior, one may use a `Converter` instead. + + Strict_Parsing: + When strict parsing is enabled, the config filler will also validate + that the YAML nodes do not contains entry which are not present in the + mapping (struct) being processed. + This can be useful to catch typos or outdated configuration options. + + Post_Validation: + Some configuration will require validation accross multiple sections. + For example, two sections may be mutually exclusive as a whole, + or may have fields which are mutually exclusive with another section's + field(s). This kind of dependence is hard to account for declaratively, + and does not affect parsing. For this reason, the preferred way to + handle those cases is to define a `validate` member method on the + affected config struct(s), which will be called once + parsing for that mapping is completed. + If an error is detected, this method should throw an Exception. + + Enabled_or_disabled_field: + While most complex logic validation should be handled post-parsing, + some section may be optional by default, but if provided, will have + required fields. To support this use case, if a field with the name + `enabled` is present in a struct, the parser will first process it. + If it is `false`, the parser will not attempt to process the struct + further, and the other fields will have their default value. + Likewise, if a field named `disabled` exists, the struct will not + be processed if it is set to `true`. + + Copyright: + Copyright (c) 2019-2022 BOSAGORA Foundation + All rights reserved. + + License: + MIT License. See LICENSE for details. + +*******************************************************************************/ + +module dub.internal.configy.Read; + +public import dub.internal.configy.Attributes; +public import dub.internal.configy.Exceptions : ConfigException; +import dub.internal.configy.Exceptions; +import dub.internal.configy.FieldRef; +import dub.internal.configy.Utils; + +import dub.internal.dyaml.exception; +import dub.internal.dyaml.node; +import dub.internal.dyaml.loader; + +import std.algorithm; +import std.conv; +import std.datetime; +import std.format; +import std.getopt; +import std.meta; +import std.range; +import std.traits; +import std.typecons : Nullable, nullable, tuple; + +static import core.time; + +// Dub-specific adjustments for output +import dub.internal.logging; + +/// Command-line arguments +public struct CLIArgs +{ + /// Path to the config file + public string config_path = "config.yaml"; + + /// Overrides for config options + public string[][string] overrides; + + /// Helper to add items to `overrides` + public void overridesHandler (string, string value) + { + import std.string; + const idx = value.indexOf('='); + if (idx < 0) return; + string k = value[0 .. idx], v = value[idx + 1 .. $]; + if (auto val = k in this.overrides) + (*val) ~= v; + else + this.overrides[k] = [ v ]; + } + + /*************************************************************************** + + Parses the base command line arguments + + This can be composed with the program argument. + For example, consider a program which wants to expose a `--version` + switch, the definition could look like this: + --- + public struct ProgramCLIArgs + { + public CLIArgs base; // This struct + + public alias base this; // For convenience + + public bool version_; // Program-specific part + } + --- + Then, an application-specific configuration routine would be: + --- + public GetoptResult parse (ref ProgramCLIArgs clargs, ref string[] args) + { + auto r = clargs.base.parse(args); + if (r.helpWanted) return r; + return getopt( + args, + "version", "Print the application version, &clargs.version_"); + } + --- + + Params: + args = The command line args to parse (parsed options will be removed) + passThrough = Whether to enable `config.passThrough` and + `config.keepEndOfOptions`. `true` by default, to allow + composability. If your program doesn't have other + arguments, pass `false`. + + Returns: + The result of calling `getopt` + + ***************************************************************************/ + + public GetoptResult parse (ref string[] args, bool passThrough = true) + { + return getopt( + args, + // `caseInsensistive` is the default, but we need something + // with the same type for the ternary + passThrough ? config.keepEndOfOptions : config.caseInsensitive, + // Also the default, same reasoning + passThrough ? config.passThrough : config.noPassThrough, + "config|c", + "Path to the config file. Defaults to: " ~ this.config_path, + &this.config_path, + + "override|O", + "Override a config file value\n" ~ + "Example: -O foo.bar=true -o dns=1.1.1.1 -o dns=2.2.2.2\n" ~ + "Array values are additive, other items are set to the last override", + &this.overridesHandler, + ); + } +} + +/******************************************************************************* + + Attempt to read and process the config file at `path`, print any error + + This 'simple' overload of the more detailed `parseConfigFile` will attempt + to read the file at `path`, and return a `Nullable` instance of it. + If an error happens, either because the file isn't readable or + the configuration has an issue, a message will be printed to `stderr`, + with colors if the output is a TTY, and a `null` instance will be returned. + + The calling code can hence just read a config file via: + ``` + int main () + { + auto configN = parseConfigFileSimple!Config("config.yaml"); + if (configN.isNull()) return 1; // Error path + auto config = configN.get(); + // Rest of the program ... + } + ``` + An overload accepting `CLIArgs args` also exists. + + Params: + path = Path of the file to read from + args = Command line arguments on which `parse` has been called + strict = Whether the parsing should reject unknown keys in the + document, warn, or ignore them (default: `StrictMode.Error`) + + Returns: + An initialized `Config` instance if reading/parsing was successful; + a `null` instance otherwise. + +*******************************************************************************/ + +public Nullable!T parseConfigFileSimple (T) (string path, StrictMode strict = StrictMode.Error) +{ + return parseConfigFileSimple!(T)(CLIArgs(path), strict); +} + + +/// Ditto +public Nullable!T parseConfigFileSimple (T) (in CLIArgs args, StrictMode strict = StrictMode.Error) +{ + try + { + Node root = Loader.fromFile(args.config_path).load(); + return nullable(parseConfig!T(args, root, strict)); + } + catch (ConfigException exc) + { + exc.printException(); + return typeof(return).init; + } + catch (Exception exc) + { + // Other Exception type may be thrown by D-YAML, + // they won't include rich information. + logWarn("%s", exc.message()); + return typeof(return).init; + } +} + +/******************************************************************************* + + Print an Exception, potentially with colors on + + Trusted because of `stderr` usage. + +*******************************************************************************/ + +private void printException (scope ConfigException exc) @trusted +{ + import dub.internal.logging; + + if (hasColors) + logWarn("%S", exc); + else + logWarn("%s", exc.message()); +} + +/******************************************************************************* + + Parses the config file or string and returns a `Config` instance. + + Params: + cmdln = command-line arguments (containing the path to the config) + path = When parsing a string, the path corresponding to it + strict = Whether the parsing should reject unknown keys in the + document, warn, or ignore them (default: `StrictMode.Error`) + + Throws: + `Exception` if parsing the config file failed. + + Returns: + `Config` instance + +*******************************************************************************/ + +public T parseConfigFile (T) (in CLIArgs cmdln, StrictMode strict = StrictMode.Error) +{ + Node root = Loader.fromFile(cmdln.config_path).load(); + return parseConfig!T(cmdln, root, strict); +} + +/// ditto +public T parseConfigString (T) (string data, string path, StrictMode strict = StrictMode.Error) +{ + CLIArgs cmdln = { config_path: path }; + auto loader = Loader.fromString(data); + loader.name = path; + Node root = loader.load(); + return parseConfig!T(cmdln, root, strict); +} + +/******************************************************************************* + + Process the content of the YAML document described by `node` into an + instance of the struct `T`. + + See the module description for a complete overview of this function. + + Params: + T = Type of the config struct to fill + cmdln = Command line arguments + node = The root node matching `T` + strict = Action to take when encountering unknown keys in the document + + Returns: + An instance of `T` filled with the content of `node` + + Throws: + If the content of `node` cannot satisfy the requirements set by `T`, + or if `node` contain extra fields and `strict` is `true`. + +*******************************************************************************/ + +public T parseConfig (T) ( + in CLIArgs cmdln, Node node, StrictMode strict = StrictMode.Error) +{ + static assert(is(T == struct), "`" ~ __FUNCTION__ ~ + "` should only be called with a `struct` type as argument, not: `" ~ + fullyQualifiedName!T ~ "`"); + + final switch (node.nodeID) + { + case NodeID.mapping: + dbgWrite("Parsing config '%s', strict: %s", + fullyQualifiedName!T, + strict == StrictMode.Warn ? + strict.paint(Yellow) : strict.paintIf(!!strict, Green, Red)); + return node.parseMapping!(StructFieldRef!T)( + null, T.init, const(Context)(cmdln, strict), null); + case NodeID.sequence: + case NodeID.scalar: + case NodeID.invalid: + throw new TypeConfigException(node, "mapping (object)", "document root"); + } +} + +/******************************************************************************* + + The behavior to have when encountering a field in YAML not present + in the config definition. + +*******************************************************************************/ + +public enum StrictMode +{ + /// Issue an error by throwing an `UnknownKeyConfigException` + Error = 0, + /// Write a message to `stderr`, but continue processing the file + Warn = 1, + /// Be silent and do nothing + Ignore = 2, +} + +/// Used to pass around configuration +package struct Context +{ + /// + private CLIArgs cmdln; + + /// + private StrictMode strict; +} + +/******************************************************************************* + + Parse a mapping from `node` into an instance of `T` + + Params: + TLFR = Top level field reference for this mapping + node = The YAML node object matching the struct being read + path = The runtime path to this mapping, used for nested types + defaultValue = The default value to use for `T`, which can be different + from `T.init` when recursing into fields with initializers. + ctx = A context where properties that need to be conserved during + recursion are stored + fieldDefaults = Default value for some fields, used for `Key` recursion + +*******************************************************************************/ +private TLFR.Type parseMapping (alias TLFR) + (Node node, string path, auto ref TLFR.Type defaultValue, + in Context ctx, in Node[string] fieldDefaults) +{ + static assert(is(TLFR.Type == struct), "`parseMapping` called with wrong type (should be a `struct`)"); + assert(node.nodeID == NodeID.mapping, "Internal error: parseMapping shouldn't have been called"); + + dbgWrite("%s: `parseMapping` called for '%s' (node entries: %s)", + TLFR.Type.stringof.paint(Cyan), path.paint(Cyan), + node.length.paintIf(!!node.length, Green, Red)); + + static foreach (FR; FieldRefTuple!(TLFR.Type)) + { + static if (FR.Name != FR.FieldName && hasMember!(TLFR.Type, FR.Name) && + !is(typeof(mixin("TLFR.Type.", FR.Name)) == function)) + static assert (FieldRef!(TLFR.Type, FR.Name).Name != FR.Name, + "Field `" ~ FR.FieldName ~ "` `@Name` attribute shadows field `" ~ + FR.Name ~ "` in `" ~ TLFR.Type.stringof ~ "`: Add a `@Name` attribute to `" ~ + FR.Name ~ "` or change that of `" ~ FR.FieldName ~ "`"); + } + + if (ctx.strict != StrictMode.Ignore) + { + /// First, check that all the sections found in the mapping are present in the type + /// If not, the user might have made a typo. + immutable string[] fieldNames = [ FieldsName!(TLFR.Type) ]; + immutable string[] patterns = [ Patterns!(TLFR.Type) ]; + FIELD: foreach (const ref Node key, const ref Node value; node) + { + const k = key.as!string; + if (!fieldNames.canFind(k)) + { + foreach (p; patterns) + if (k.startsWith(p)) + // Require length because `0` would match `canFind` + // and we don't want to allow `$PATTERN-` + if (k[p.length .. $].length > 1 && k[p.length] == '-') + continue FIELD; + + if (ctx.strict == StrictMode.Warn) + { + scope exc = new UnknownKeyConfigException( + path, key.as!string, fieldNames, key.startMark()); + exc.printException(); + } + else + throw new UnknownKeyConfigException( + path, key.as!string, fieldNames, key.startMark()); + } + } + } + + const enabledState = node.isMappingEnabled!(TLFR.Type)(defaultValue); + + if (enabledState.field != EnabledState.Field.None) + dbgWrite("%s: Mapping is enabled: %s", TLFR.Type.stringof.paint(Cyan), (!!enabledState).paintBool()); + + auto convertField (alias FR) () + { + static if (FR.Name != FR.FieldName) + dbgWrite("Field name `%s` will use YAML field `%s`", + FR.FieldName.paint(Yellow), FR.Name.paint(Green)); + // Using exact type here matters: we could get a qualified type + // (e.g. `immutable(string)`) if the field is qualified, + // which causes problems. + FR.Type default_ = __traits(getMember, defaultValue, FR.FieldName); + + // If this struct is disabled, do not attempt to parse anything besides + // the `enabled` / `disabled` field. + if (!enabledState) + { + // Even this is too noisy + version (none) + dbgWrite("%s: %s field of disabled struct, default: %s", + path.paint(Cyan), "Ignoring".paint(Yellow), default_); + + static if (FR.Name == "enabled") + return false; + else static if (FR.Name == "disabled") + return true; + else + return default_; + } + + if (auto ptr = FR.FieldName in fieldDefaults) + { + dbgWrite("Found %s (%s.%s) in `fieldDefaults`", + FR.Name.paint(Cyan), path.paint(Cyan), FR.FieldName.paint(Cyan)); + + if (ctx.strict && FR.FieldName in node) + throw new ConfigExceptionImpl("'Key' field is specified twice", path, FR.FieldName, node.startMark()); + return (*ptr).parseField!(FR)(path.addPath(FR.FieldName), default_, ctx) + .dbgWriteRet("Using value '%s' from fieldDefaults for field '%s'", + FR.FieldName.paint(Cyan)); + } + + // This, `FR.Pattern`, and the field in `@Name` are special support for `dub` + static if (FR.Pattern) + { + static if (is(FR.Type : V[K], K, V)) + { + alias AAFieldRef = NestedFieldRef!(V, FR); + static assert(is(K : string), "Key type should be string-like"); + } + else + static assert(0, "Cannot have pattern on non-AA field"); + + AAFieldRef.Type[string] result; + foreach (pair; node.mapping) + { + const key = pair.key.as!string; + if (!key.startsWith(FR.Name)) + continue; + string suffix = key[FR.Name.length .. $]; + if (suffix.length) + { + if (suffix[0] == '-') suffix = suffix[1 .. $]; + else continue; + } + + result[suffix] = pair.value.parseField!(AAFieldRef)( + path.addPath(key), default_.get(key, AAFieldRef.Type.init), ctx); + } + bool hack = true; + if (hack) return result; + } + + if (auto ptr = FR.Name in node) + { + dbgWrite("%s: YAML field is %s in node%s", + FR.Name.paint(Cyan), "present".paint(Green), + (FR.Name == FR.FieldName ? "" : " (note that field name is overriden)").paint(Yellow)); + return (*ptr).parseField!(FR)(path.addPath(FR.Name), default_, ctx) + .dbgWriteRet("Using value '%s' from YAML document for field '%s'", + FR.FieldName.paint(Cyan)); + } + + dbgWrite("%s: Field is %s from node%s", + FR.Name.paint(Cyan), "missing".paint(Red), + (FR.Name == FR.FieldName ? "" : " (note that field name is overriden)").paint(Yellow)); + + // A field is considered optional if it has an initializer that is different + // from its default value, or if it has the `Optional` UDA. + // In that case, just return this value. + static if (FR.Optional) + return default_ + .dbgWriteRet("Using default value '%s' for optional field '%s'", FR.FieldName.paint(Cyan)); + + // The field is not present, but it could be because it is an optional section. + // For example, the section could be defined as: + // --- + // struct RequestLimit { size_t reqs = 100; } + // struct Config { RequestLimit limits; } + // --- + // In this case we need to recurse into `RequestLimit` to check if any + // of its field is required. + else static if (mightBeOptional!FR) + { + const npath = path.addPath(FR.Name); + string[string] aa; + return Node(aa).parseMapping!(FR)(npath, default_, ctx, null); + } + else + throw new MissingKeyException(path, FR.Name, node.startMark()); + } + + FR.Type convert (alias FR) () + { + static if (__traits(getAliasThis, TLFR.Type).length == 1 && + __traits(getAliasThis, TLFR.Type)[0] == FR.FieldName) + { + static assert(FR.Name == FR.FieldName, + "Field `" ~ fullyQualifiedName!(FR.Ref) ~ + "` is the target of an `alias this` and cannot have a `@Name` attribute"); + static assert(!hasConverter!(FR.Ref), + "Field `" ~ fullyQualifiedName!(FR.Ref) ~ + "` is the target of an `alias this` and cannot have a `@Converter` attribute"); + + alias convertW(string FieldName) = convert!(FieldRef!(FR.Type, FieldName, FR.Optional)); + return FR.Type(staticMap!(convertW, FieldNameTuple!(FR.Type))); + } + else + return convertField!(FR)(); + } + + debug (ConfigFillerDebug) + { + indent++; + scope (exit) indent--; + } + + TLFR.Type doValidation (TLFR.Type result) + { + static if (is(typeof(result.validate()))) + { + if (enabledState) + { + dbgWrite("%s: Calling `%s` method", + TLFR.Type.stringof.paint(Cyan), "validate()".paint(Green)); + result.validate(); + } + else + { + dbgWrite("%s: Ignoring `%s` method on disabled mapping", + TLFR.Type.stringof.paint(Cyan), "validate()".paint(Green)); + } + } + else if (enabledState) + dbgWrite("%s: No `%s` method found", + TLFR.Type.stringof.paint(Cyan), "validate()".paint(Yellow)); + + return result; + } + + // This might trigger things like "`this` is not accessible". + // In this case, the user most likely needs to provide a converter. + alias convertWrapper(string FieldName) = convert!(FieldRef!(TLFR.Type, FieldName)); + return doValidation(TLFR.Type(staticMap!(convertWrapper, FieldNameTuple!(TLFR.Type)))); +} + +/******************************************************************************* + + Parse a field, trying to match up the compile-time expectation with + the run time value of the Node (`nodeID`). + + This is the central point which does "type conversion", from the YAML node + to the field type. Whenever adding support for a new type, things should + happen here. + + Because a `struct` can be filled from either a mapping or a scalar, + this function will first try the converter / fromString / string ctor + methods before defaulting to fieldwise construction. + + Note that optional fields are checked before recursion happens, + so this method does not do this check. + +*******************************************************************************/ + +package FR.Type parseField (alias FR) + (Node node, string path, auto ref FR.Type defaultValue, in Context ctx) +{ + if (node.nodeID == NodeID.invalid) + throw new TypeConfigException(node, "valid", path); + + // If we reached this, it means the field is set, so just recurse + // to peel the type + static if (is(FR.Type : SetInfo!FT, FT)) + return FR.Type( + parseField!(FieldRef!(FR.Type, "value"))(node, path, defaultValue, ctx), + true); + + else static if (hasConverter!(FR.Ref)) + return wrapException(node.viaConverter!(FR)(path, ctx), path, node.startMark()); + + else static if (hasFromYAML!(FR.Type)) + { + scope impl = new ConfigParserImpl!(FR.Type)(node, path, ctx); + return wrapException(FR.Type.fromYAML(impl), path, node.startMark()); + } + + else static if (hasFromString!(FR.Type)) + return wrapException(FR.Type.fromString(node.as!string), path, node.startMark()); + + else static if (hasStringCtor!(FR.Type)) + return wrapException(FR.Type(node.as!string), path, node.startMark()); + + else static if (is(immutable(FR.Type) == immutable(core.time.Duration))) + { + if (node.nodeID != NodeID.mapping) + throw new DurationTypeConfigException(node, path); + return node.parseMapping!(StructFieldRef!DurationMapping)( + path, DurationMapping.make(defaultValue), ctx, null).opCast!Duration; + } + + else static if (is(FR.Type == struct)) + { + if (node.nodeID != NodeID.mapping) + throw new TypeConfigException(node, "mapping (object)", path); + return node.parseMapping!(FR)(path, defaultValue, ctx, null); + } + + // Handle string early as they match the sequence rule too + else static if (isSomeString!(FR.Type)) + // Use `string` type explicitly because `Variant` thinks + // `immutable(char)[]` (aka `string`) and `immutable(char[])` + // (aka `immutable(string)`) are not compatible. + return node.parseScalar!(string)(path); + // Enum too, as their base type might be an array (including strings) + else static if (is(FR.Type == enum)) + return node.parseScalar!(FR.Type)(path); + + else static if (is(FR.Type : E[K], E, K)) + { + if (node.nodeID != NodeID.mapping) + throw new TypeConfigException(node, "mapping (associative array)", path); + + // Note: As of June 2022 (DMD v2.100.0), associative arrays cannot + // have initializers, hence their UX for config is less optimal. + return node.mapping().map!( + (Node.Pair pair) { + return tuple( + pair.key.get!K, + pair.value.parseField!(NestedFieldRef!(E, FR))( + format("%s[%s]", path, pair.key.as!string), E.init, ctx)); + }).assocArray(); + + } + else static if (is(FR.Type : E[], E)) + { + static if (hasUDA!(FR.Ref, Key)) + { + static assert(getUDAs!(FR.Ref, Key).length == 1, + "`" ~ fullyQualifiedName!(FR.Ref) ~ + "` field shouldn't have more than one `Key` attribute"); + static assert(is(E == struct), + "Field `" ~ fullyQualifiedName!(FR.Ref) ~ + "` has a `Key` attribute, but is a sequence of `" ~ + fullyQualifiedName!E ~ "`, not a sequence of `struct`"); + + string key = getUDAs!(FR.Ref, Key)[0].name; + + if (node.nodeID != NodeID.mapping && node.nodeID != NodeID.sequence) + throw new TypeConfigException(node, "mapping (object) or sequence", path); + + if (node.nodeID == NodeID.mapping) return node.mapping().map!( + (Node.Pair pair) { + if (pair.value.nodeID != NodeID.mapping) + throw new TypeConfigException( + "sequence of " ~ pair.value.nodeTypeString(), + "sequence of mapping (array of objects)", + path, null, node.startMark()); + + return pair.value.parseMapping!(StructFieldRef!E)( + path.addPath(pair.key.as!string), + E.init, ctx, key.length ? [ key: pair.key ] : null); + }).array(); + } + if (node.nodeID != NodeID.sequence) + throw new TypeConfigException(node, "sequence (array)", path); + + // We pass `E.init` as default value as it is not going to be used: + // Either there is something in the YAML document, and that will be + // converted, or `sequence` will not iterate. + return node.sequence.enumerate.map!( + kv => kv.value.parseField!(NestedFieldRef!(E, FR))( + format("%s[%s]", path, kv.index), E.init, ctx)) + .array(); + } + else + { + static assert (!is(FR.Type == union), + "`union` are not supported. Use a converter instead"); + return node.parseScalar!(FR.Type)(path); + } +} + +/// Parse a node as a scalar +private T parseScalar (T) (Node node, string path) +{ + if (node.nodeID != NodeID.scalar) + throw new TypeConfigException(node, "scalar (value)", path); + + static if (is(T == enum)) + return node.as!string.to!(T); + else + return node.as!(T); +} + +/******************************************************************************* + + Write a potentially throwing user-provided expression in ConfigException + + The user-provided hooks may throw (e.g. `fromString / the constructor), + and the error may or may not be clear. We can't do anything about a bad + message but we can wrap the thrown exception in a `ConfigException` + to provide the location in the yaml file where the error happened. + + Params: + exp = The expression that may throw + path = Path within the config file of the field + position = Position of the node in the YAML file + file = Call site file (otherwise the message would point to this function) + line = Call site line (see `file` reasoning) + + Returns: + The result of `exp` evaluation. + +*******************************************************************************/ + +private T wrapException (T) (lazy T exp, string path, Mark position, + string file = __FILE__, size_t line = __LINE__) +{ + try + return exp; + catch (ConfigException exc) + throw exc; + catch (Exception exc) + throw new ConstructionException(exc, path, position, file, line); +} + +/// Allows us to reuse parseMapping and strict parsing +private struct DurationMapping +{ + public SetInfo!long weeks; + public SetInfo!long days; + public SetInfo!long hours; + public SetInfo!long minutes; + public SetInfo!long seconds; + public SetInfo!long msecs; + public SetInfo!long usecs; + public SetInfo!long hnsecs; + public SetInfo!long nsecs; + + private static DurationMapping make (Duration def) @safe pure nothrow @nogc + { + typeof(return) result; + auto fullSplit = def.split(); + result.weeks = SetInfo!long(fullSplit.weeks, fullSplit.weeks != 0); + result.days = SetInfo!long(fullSplit.days, fullSplit.days != 0); + result.hours = SetInfo!long(fullSplit.hours, fullSplit.hours != 0); + result.minutes = SetInfo!long(fullSplit.minutes, fullSplit.minutes != 0); + result.seconds = SetInfo!long(fullSplit.seconds, fullSplit.seconds != 0); + result.msecs = SetInfo!long(fullSplit.msecs, fullSplit.msecs != 0); + result.usecs = SetInfo!long(fullSplit.usecs, fullSplit.usecs != 0); + result.hnsecs = SetInfo!long(fullSplit.hnsecs, fullSplit.hnsecs != 0); + // nsecs is ignored by split as it's not representable in `Duration` + return result; + } + + /// + public void validate () const @safe + { + // That check should never fail, as the YAML parser would error out, + // but better be safe than sorry. + foreach (field; this.tupleof) + if (field.set) + return; + + throw new Exception( + "Expected at least one of the components (weeks, days, hours, " ~ + "minutes, seconds, msecs, usecs, hnsecs, nsecs) to be set"); + } + + /// Allow conversion to a `Duration` + public Duration opCast (T : Duration) () const scope @safe pure nothrow @nogc + { + return core.time.weeks(this.weeks) + core.time.days(this.days) + + core.time.hours(this.hours) + core.time.minutes(this.minutes) + + core.time.seconds(this.seconds) + core.time.msecs(this.msecs) + + core.time.usecs(this.usecs) + core.time.hnsecs(this.hnsecs) + + core.time.nsecs(this.nsecs); + } +} + +/// Evaluates to `true` if we should recurse into the struct via `parseMapping` +private enum mightBeOptional (alias FR) = is(FR.Type == struct) && + !is(immutable(FR.Type) == immutable(core.time.Duration)) && + !hasConverter!(FR.Ref) && !hasFromString!(FR.Type) && + !hasStringCtor!(FR.Type) && !hasFromYAML!(FR.Type); + +/// Convenience template to check for the presence of converter(s) +private enum hasConverter (alias Field) = hasUDA!(Field, Converter); + +/// Provided a field reference `FR` which is known to have at least one converter, +/// perform basic checks and return the value after applying the converter. +private auto viaConverter (alias FR) (Node node, string path, in Context context) +{ + enum Converters = getUDAs!(FR.Ref, Converter); + static assert (Converters.length, + "Internal error: `viaConverter` called on field `" ~ + FR.FieldName ~ "` with no converter"); + + static assert(Converters.length == 1, + "Field `" ~ FR.FieldName ~ "` cannot have more than one `Converter`"); + + scope impl = new ConfigParserImpl!(FR.Type)(node, path, context); + return Converters[0].converter(impl); +} + +private final class ConfigParserImpl (T) : ConfigParser!T +{ + private Node node_; + private string path_; + private const(Context) context_; + + /// Ctor + public this (Node n, string p, const Context c) scope @safe pure nothrow @nogc + { + this.node_ = n; + this.path_ = p; + this.context_ = c; + } + + public final override inout(Node) node () inout @safe pure nothrow @nogc + { + return this.node_; + } + + public final override string path () const @safe pure nothrow @nogc + { + return this.path_; + } + + protected final override const(Context) context () const @safe pure nothrow @nogc + { + return this.context_; + } +} + +/// Helper predicate +private template NameIs (string searching) +{ + enum bool Pred (alias FR) = (searching == FR.Name); +} + +/// Returns whether or not the field has a `enabled` / `disabled` field, +/// and its value. If it does not, returns `true`. +private EnabledState isMappingEnabled (M) (Node node, auto ref M default_) +{ + import std.meta : Filter; + + alias EMT = Filter!(NameIs!("enabled").Pred, FieldRefTuple!M); + alias DMT = Filter!(NameIs!("disabled").Pred, FieldRefTuple!M); + + static if (EMT.length) + { + static assert (DMT.length == 0, + "`enabled` field `" ~ EMT[0].FieldName ~ + "` conflicts with `disabled` field `" ~ DMT[0].FieldName ~ "`"); + + if (auto ptr = "enabled" in node) + return EnabledState(EnabledState.Field.Enabled, (*ptr).as!bool); + return EnabledState(EnabledState.Field.Enabled, __traits(getMember, default_, EMT[0].FieldName)); + } + else static if (DMT.length) + { + if (auto ptr = "disabled" in node) + return EnabledState(EnabledState.Field.Disabled, (*ptr).as!bool); + return EnabledState(EnabledState.Field.Disabled, __traits(getMember, default_, DMT[0].FieldName)); + } + else + { + return EnabledState(EnabledState.Field.None); + } +} + +/// Retun value of `isMappingEnabled` +private struct EnabledState +{ + /// Used to determine which field controls a mapping enabled state + private enum Field + { + /// No such field, the mapping is considered enabled + None, + /// The field is named 'enabled' + Enabled, + /// The field is named 'disabled' + Disabled, + } + + /// Check if the mapping is considered enabled + public bool opCast () const scope @safe pure @nogc nothrow + { + return this.field == Field.None || + (this.field == Field.Enabled && this.fieldValue) || + (this.field == Field.Disabled && !this.fieldValue); + } + + /// Type of field found + private Field field; + + /// Value of the field, interpretation depends on `field` + private bool fieldValue; +} + +/// Evaluates to `true` if `T` is a `struct` with a default ctor +private enum hasFieldwiseCtor (T) = (is(T == struct) && is(typeof(() => T(T.init.tupleof)))); + +/// Evaluates to `true` if `T` has a static method that is designed to work with this library +private enum hasFromYAML (T) = is(typeof(T.fromYAML(ConfigParser!(T).init)) : T); + +/// Evaluates to `true` if `T` has a static method that accepts a `string` and returns a `T` +private enum hasFromString (T) = is(typeof(T.fromString(string.init)) : T); + +/// Evaluates to `true` if `T` is a `struct` which accepts a single string as argument +private enum hasStringCtor (T) = (is(T == struct) && is(typeof(T.__ctor)) && + Parameters!(T.__ctor).length == 1 && + is(typeof(() => T(string.init)))); + +unittest +{ + static struct Simple + { + int value; + string otherValue; + } + + static assert( hasFieldwiseCtor!Simple); + static assert(!hasStringCtor!Simple); + + static struct PubKey + { + ubyte[] data; + + this (string hex) @safe pure nothrow @nogc{} + } + + static assert(!hasFieldwiseCtor!PubKey); + static assert( hasStringCtor!PubKey); + + static assert(!hasFieldwiseCtor!string); + static assert(!hasFieldwiseCtor!int); + static assert(!hasStringCtor!string); + static assert(!hasStringCtor!int); +} + +/// Convenience function to extend a YAML path +private string addPath (string opath, string newPart) +in(newPart.length) +do { + return opath.length ? format("%s.%s", opath, newPart) : newPart; +} diff --git a/source/dub/internal/configy/Test.d b/source/dub/internal/configy/Test.d new file mode 100644 index 0000000..a334b6d --- /dev/null +++ b/source/dub/internal/configy/Test.d @@ -0,0 +1,694 @@ +/******************************************************************************* + Contains all the tests for this library. + + Copyright: + Copyright (c) 2019-2022 BOSAGORA Foundation + All rights reserved. + + License: + MIT License. See LICENSE for details. + +*******************************************************************************/ + +module dub.internal.configy.Test; + +import dub.internal.configy.Attributes; +import dub.internal.configy.Exceptions; +import dub.internal.configy.Read; +import dub.internal.configy.Utils; + +import dub.internal.dyaml.node; + +import std.format; + +import core.time; + +/// Basic usage tests +unittest +{ + static struct Address + { + string address; + string city; + bool accessible; + } + + static struct Nested + { + Address address; + } + + static struct Config + { + bool enabled = true; + + string name = "Jessie"; + int age = 42; + double ratio = 24.42; + + Address address = { address: "Yeoksam-dong", city: "Seoul", accessible: true }; + + Nested nested = { address: { address: "Gangnam-gu", city: "Also Seoul", accessible: false } }; + } + + auto c1 = parseConfigString!Config("enabled: false", "/dev/null"); + assert(!c1.enabled); + assert(c1.name == "Jessie"); + assert(c1.age == 42); + assert(c1.ratio == 24.42); + + assert(c1.address.address == "Yeoksam-dong"); + assert(c1.address.city == "Seoul"); + assert(c1.address.accessible); + + assert(c1.nested.address.address == "Gangnam-gu"); + assert(c1.nested.address.city == "Also Seoul"); + assert(!c1.nested.address.accessible); +} + +// Tests for SetInfo +unittest +{ + static struct Address + { + string address; + string city; + bool accessible; + } + + static struct Config + { + SetInfo!int value; + SetInfo!int answer = 42; + SetInfo!string name = SetInfo!string("Lorene", false); + + SetInfo!Address address; + } + + auto c1 = parseConfigString!Config("value: 24", "/dev/null"); + assert(c1.value == 24); + assert(c1.value.set); + + assert(c1.answer.set); + assert(c1.answer == 42); + + assert(!c1.name.set); + assert(c1.name == "Lorene"); + + assert(!c1.address.set); + + auto c2 = parseConfigString!Config(` +name: Lorene +address: + address: Somewhere + city: Over the rainbow +`, "/dev/null"); + + assert(!c2.value.set); + assert(c2.name == "Lorene"); + assert(c2.name.set); + assert(c2.address.set); + assert(c2.address.address == "Somewhere"); + assert(c2.address.city == "Over the rainbow"); +} + +unittest +{ + static struct Nested { core.time.Duration timeout; } + static struct Config { Nested node; } + + try + { + auto result = parseConfigString!Config("node:\n timeout:", "/dev/null"); + assert(0); + } + catch (Exception exc) + { + assert(exc.toString() == "/dev/null(1:10): node.timeout: Field is of type scalar, " ~ + "but expected a mapping with at least one of: weeks, days, hours, minutes, " ~ + "seconds, msecs, usecs, hnsecs, nsecs"); + } + + { + auto result = parseConfigString!Nested("timeout:\n days: 10\n minutes: 100\n hours: 3\n", "/dev/null"); + assert(result.timeout == 10.days + 4.hours + 40.minutes); + } +} + +unittest +{ + static struct Config { string required; } + try + auto result = parseConfigString!Config("value: 24", "/dev/null"); + catch (ConfigException e) + { + assert(format("%s", e) == + "/dev/null(0:0): value: Key is not a valid member of this section. There are 1 valid keys: required"); + assert(format("%S", e) == + format("%s/dev/null%s(%s0%s:%s0%s): %svalue%s: Key is not a valid member of this section. " ~ + "There are %s1%s valid keys: %srequired%s", Yellow, Reset, Cyan, Reset, Cyan, Reset, + Yellow, Reset, Yellow, Reset, Green, Reset)); + } +} + +// Test for various type errors +unittest +{ + static struct Mapping + { + string value; + } + + static struct Config + { + @Optional Mapping map; + @Optional Mapping[] array; + int scalar; + } + + try + { + auto result = parseConfigString!Config("map: Hello World", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(0:5): map: Expected to be of type mapping (object), but is a scalar"); + } + + try + { + auto result = parseConfigString!Config("map:\n - Hello\n - World", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(1:2): map: Expected to be of type mapping (object), but is a sequence"); + } + + try + { + auto result = parseConfigString!Config("scalar:\n - Hello\n - World", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(1:2): scalar: Expected to be of type scalar (value), but is a sequence"); + } + + try + { + auto result = parseConfigString!Config("scalar:\n hello:\n World", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(1:2): scalar: Expected to be of type scalar (value), but is a mapping"); + } +} + +// Test for strict mode +unittest +{ + static struct Config + { + string value; + string valhu; + string halvue; + } + + try + { + auto result = parseConfigString!Config("valeu: This is a typo", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(0:0): valeu: Key is not a valid member of this section. Did you mean: value, valhu"); + } +} + +// Test for required key +unittest +{ + static struct Nested + { + string required; + string optional = "Default"; + } + + static struct Config + { + Nested inner; + } + + try + { + auto result = parseConfigString!Config("inner:\n optional: Not the default value", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(1:2): inner.required: Required key was not found in configuration or command line arguments"); + } +} + +// Testing 'validate()' on nested structures +unittest +{ + __gshared int validateCalls0 = 0; + __gshared int validateCalls1 = 1; + __gshared int validateCalls2 = 2; + + static struct SecondLayer + { + string value = "default"; + + public void validate () const + { + validateCalls2++; + } + } + + static struct FirstLayer + { + bool enabled = true; + SecondLayer ltwo; + + public void validate () const + { + validateCalls1++; + } + } + + static struct Config + { + FirstLayer lone; + + public void validate () const + { + validateCalls0++; + } + } + + auto r1 = parseConfigString!Config("lone:\n ltwo:\n value: Something\n", "/dev/null"); + + assert(r1.lone.ltwo.value == "Something"); + // `validateCalls` are given different value to avoid false-positive + // if they are set to 0 / mixed up + assert(validateCalls0 == 1); + assert(validateCalls1 == 2); + assert(validateCalls2 == 3); + + auto r2 = parseConfigString!Config("lone:\n enabled: false\n", "/dev/null"); + assert(validateCalls0 == 2); // + 1 + assert(validateCalls1 == 2); // Other are disabled + assert(validateCalls2 == 3); +} + +// Test the throwing ctor / fromString +unittest +{ + static struct ThrowingFromString + { + public static ThrowingFromString fromString (scope const(char)[] value) + @safe pure + { + throw new Exception("Some meaningful error message"); + } + + public int value; + } + + static struct ThrowingCtor + { + public this (scope const(char)[] value) + @safe pure + { + throw new Exception("Something went wrong... Obviously"); + } + + public int value; + } + + static struct InnerConfig + { + public int value; + @Optional ThrowingCtor ctor; + @Optional ThrowingFromString fromString; + + @Converter!int( + (scope ConfigParser!int parser) { + // We have to trick DMD a bit so that it infers an `int` return + // type but doesn't emit a "Statement is not reachable" warning + if (parser.node is Node.init || parser.node !is Node.init ) + throw new Exception("You shall not pass"); + return 42; + }) + @Optional int converter; + } + + static struct Config + { + public InnerConfig config; + } + + try + { + auto result = parseConfigString!Config("config:\n value: 42\n ctor: 42", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(2:8): config.ctor: Something went wrong... Obviously"); + } + + try + { + auto result = parseConfigString!Config("config:\n value: 42\n fromString: 42", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(2:14): config.fromString: Some meaningful error message"); + } + + try + { + auto result = parseConfigString!Config("config:\n value: 42\n converter: 42", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(2:13): config.converter: You shall not pass"); + } + + // We also need to test with arrays, to ensure they are correctly called + static struct InnerArrayConfig + { + @Optional int value; + @Optional ThrowingCtor ctor; + @Optional ThrowingFromString fromString; + } + + static struct ArrayConfig + { + public InnerArrayConfig[] configs; + } + + try + { + auto result = parseConfigString!ArrayConfig("configs:\n - ctor: something", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(1:10): configs[0].ctor: Something went wrong... Obviously"); + } + + try + { + auto result = parseConfigString!ArrayConfig( + "configs:\n - value: 42\n - fromString: something", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(2:16): configs[1].fromString: Some meaningful error message"); + } +} + +// Test duplicate fields detection +unittest +{ + static struct Config + { + @Name("shadow") int value; + @Name("value") int shadow; + } + + auto result = parseConfigString!Config("shadow: 42\nvalue: 84\n", "/dev/null"); + assert(result.value == 42); + assert(result.shadow == 84); + + static struct BadConfig + { + int value; + @Name("value") int something; + } + + // Cannot test the error message, so this is as good as it gets + static assert(!is(typeof(() { + auto r = parseConfigString!BadConfig("shadow: 42\nvalue: 84\n", "/dev/null"); + }))); +} + +// Test a renamed `enabled` / `disabled` +unittest +{ + static struct ConfigA + { + @Name("enabled") bool shouldIStay; + int value; + } + + static struct ConfigB + { + @Name("disabled") bool orShouldIGo; + int value; + } + + { + auto c = parseConfigString!ConfigA("enabled: true\nvalue: 42", "/dev/null"); + assert(c.shouldIStay == true); + assert(c.value == 42); + } + + { + auto c = parseConfigString!ConfigB("disabled: false\nvalue: 42", "/dev/null"); + assert(c.orShouldIGo == false); + assert(c.value == 42); + } +} + +// Test for 'mightBeOptional' & missing key +unittest +{ + static struct RequestLimit { size_t reqs = 100; } + static struct Nested { @Name("jay") int value; } + static struct Config { @Name("chris") Nested value; RequestLimit limits; } + + auto r = parseConfigString!Config("chris:\n jay: 42", "/dev/null"); + assert(r.limits.reqs == 100); + + try + { + auto _ = parseConfigString!Config("limits:\n reqs: 42", "/dev/null"); + } + catch (ConfigException exc) + { + assert(exc.toString() == "(0:0): chris.jay: Required key was not found in configuration or command line arguments"); + } +} + +// Support for associative arrays +unittest +{ + static struct Nested + { + int[string] answers; + } + + static struct Parent + { + Nested[string] questions; + string[int] names; + } + + auto c = parseConfigString!Parent( +`names: + 42: "Forty two" + 97: "Quatre vingt dix sept" +questions: + first: + answers: + # Need to use quotes here otherwise it gets interpreted as + # true / false, perhaps a dyaml issue ? + 'yes': 42 + 'no': 24 + second: + answers: + maybe: 69 + whynot: 20 +`, "/dev/null"); + + assert(c.names == [42: "Forty two", 97: "Quatre vingt dix sept"]); + assert(c.questions.length == 2); + assert(c.questions["first"] == Nested(["yes": 42, "no": 24])); + assert(c.questions["second"] == Nested(["maybe": 69, "whynot": 20])); +} + +unittest +{ + static struct FlattenMe + { + int value; + string name; + } + + static struct Config + { + FlattenMe flat = FlattenMe(24, "Four twenty"); + alias flat this; + + FlattenMe not_flat; + } + + auto c = parseConfigString!Config( + "value: 42\nname: John\nnot_flat:\n value: 69\n name: Henry", + "/dev/null"); + assert(c.flat.value == 42); + assert(c.flat.name == "John"); + assert(c.not_flat.value == 69); + assert(c.not_flat.name == "Henry"); + + auto c2 = parseConfigString!Config( + "not_flat:\n value: 69\n name: Henry", "/dev/null"); + assert(c2.flat.value == 24); + assert(c2.flat.name == "Four twenty"); + + static struct OptConfig + { + @Optional FlattenMe flat; + alias flat this; + + int value; + } + auto c3 = parseConfigString!OptConfig("value: 69\n", "/dev/null"); + assert(c3.value == 69); +} + +unittest +{ + static struct Config + { + @Name("names") + string[] names_; + + size_t names () const scope @safe pure nothrow @nogc + { + return this.names_.length; + } + } + + auto c = parseConfigString!Config("names:\n - John\n - Luca\n", "/dev/null"); + assert(c.names_ == [ "John", "Luca" ]); + assert(c.names == 2); +} + +unittest +{ + static struct BuildTemplate + { + string targetName; + string platform; + } + static struct BuildConfig + { + BuildTemplate config; + alias config this; + } + static struct Config + { + string name; + + @Optional BuildConfig config; + alias config this; + } + + auto c = parseConfigString!Config("name: dummy\n", "/dev/null"); + assert(c.name == "dummy"); + + auto c2 = parseConfigString!Config("name: dummy\nplatform: windows\n", "/dev/null"); + assert(c2.name == "dummy"); + assert(c2.config.platform == "windows"); +} + +// Make sure unions don't compile +unittest +{ + static union MyUnion + { + string value; + int number; + } + + static struct Config + { + MyUnion hello; + } + + static assert(!is(typeof(parseConfigString!Config("hello: world\n", "/dev/null")))); + static assert(!is(typeof(parseConfigString!MyUnion("hello: world\n", "/dev/null")))); +} + +// Test the `@Key` attribute +unittest +{ + static struct Interface + { + string name; + string static_ip; + } + + static struct Config + { + string profile; + + @Key("name") + immutable(Interface)[] ifaces = [ + Interface("lo", "127.0.0.1"), + ]; + } + + auto c = parseConfigString!Config(`profile: default +ifaces: + eth0: + static_ip: "192.168.1.42" + lo: + static_ip: "127.0.0.42" +`, "/dev/null"); + assert(c.ifaces.length == 2); + assert(c.ifaces == [ Interface("eth0", "192.168.1.42"), Interface("lo", "127.0.0.42")]); +} + +// Nested ConstructionException +unittest +{ + static struct WillFail + { + string name; + this (string value) @safe pure + { + throw new Exception("Parsing failed!"); + } + } + + static struct Container + { + WillFail[] array; + } + + static struct Config + { + Container data; + } + + try auto c = parseConfigString!Config(`data: + array: + - Not + - Working +`, "/dev/null"); + catch (Exception exc) + assert(exc.toString() == `/dev/null(2:6): data.array[0]: Parsing failed!`); +} diff --git a/source/dub/internal/configy/Utils.d b/source/dub/internal/configy/Utils.d new file mode 100644 index 0000000..be981bd --- /dev/null +++ b/source/dub/internal/configy/Utils.d @@ -0,0 +1,124 @@ +/******************************************************************************* + + Utilities used internally by the config parser. + + Compile this library with `-debug=ConfigFillerDebug` to get verbose output. + This can be achieved with `debugVersions` in dub, or by depending on the + `debug` configuration provided by `dub.json`. + + Copyright: + Copyright (c) 2019-2022 BOSAGORA Foundation + All rights reserved. + + License: + MIT License. See LICENSE for details. + +*******************************************************************************/ + +module dub.internal.configy.Utils; + +import std.format; + +/// Type of sink used by the `toString` +package alias SinkType = void delegate (in char[]) @safe; + +/******************************************************************************* + + Debugging utility for config filler + + Since this module does a lot of meta-programming, some things can easily + go wrong. For example, a condition being false might happen because it is + genuinely false or because the condition is buggy. + + To make figuring out if a config is properly parsed or not, a little utility + (config-dumper) exists, which will provide a verbose output of what the + config filler does. To do this, `config-dumper` is compiled with + the below `debug` version. + +*******************************************************************************/ + +debug (ConfigFillerDebug) +{ + /// A thin wrapper around `stderr.writefln` with indentation + package void dbgWrite (Args...) (string fmt, Args args) + { + import std.stdio; + stderr.write(IndentChars[0 .. indent >= IndentChars.length ? $ : indent]); + stderr.writefln(fmt, args); + } + + /// Log a value that is to be returned + /// The value will be the first argument and painted yellow + package T dbgWriteRet (T, Args...) (auto ref T return_, string fmt, Args args) + { + dbgWrite(fmt, return_.paint(Yellow), args); + return return_; + } + + /// The current indentation + package size_t indent; + + /// Helper for indentation (who needs more than 16 levels of indent?) + private immutable IndentChars = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"; +} +else +{ + /// No-op + package void dbgWrite (Args...) (string fmt, lazy Args args) {} + + /// Ditto + package T dbgWriteRet (T, Args...) (auto ref T return_, string fmt, lazy Args args) + { + return return_; + } +} + +/// Thin wrapper to simplify colorization +package struct Colored (T) +{ + /// Color used + private string color; + + /// Value to print + private T value; + + /// Hook for `formattedWrite` + public void toString (scope SinkType sink) + { + static if (is(typeof(T.init.length) : size_t)) + if (this.value.length == 0) return; + + formattedWrite(sink, "%s%s%s", this.color, this.value, Reset); + } +} + +/// Ditto +package Colored!T paint (T) (T arg, string color) +{ + return Colored!T(color, arg); +} + +/// Paint `arg` in color `ifTrue` if `cond` evaluates to `true`, use color `ifFalse` otherwise +package Colored!T paintIf (T) (T arg, bool cond, string ifTrue, string ifFalse) +{ + return Colored!T(cond ? ifTrue : ifFalse, arg); +} + +/// Paint a boolean in green if `true`, red otherwise, unless `reverse` is set to `true`, +/// in which case the colors are swapped +package Colored!bool paintBool (bool value, bool reverse = false) +{ + return value.paintIf(reverse ^ value, Green, Red); +} + +/// Reset the foreground color used +package immutable Reset = "\u001b[0m"; +/// Set the foreground color to red, used for `false`, missing, errors, etc... +package immutable Red = "\u001b[31m"; +/// Set the foreground color to red, used for warnings and other things +/// that should draw attention but do not pose an immediate issue +package immutable Yellow = "\u001b[33m"; +/// Set the foreground color to green, used for `true`, present, etc... +package immutable Green = "\u001b[32m"; +/// Set the foreground color to green, used field names / path +package immutable Cyan = "\u001b[36m"; diff --git a/source/dub/internal/dyaml/composer.d b/source/dub/internal/dyaml/composer.d new file mode 100644 index 0000000..867885c --- /dev/null +++ b/source/dub/internal/dyaml/composer.d @@ -0,0 +1,402 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * Composes nodes from YAML events provided by parser. + * Code based on PyYAML: http://www.pyyaml.org + */ +module dub.internal.dyaml.composer; + +import core.memory; + +import std.algorithm; +import std.array; +import std.conv; +import std.exception; +import std.format; +import std.range; +import std.typecons; + +import dub.internal.dyaml.constructor; +import dub.internal.dyaml.event; +import dub.internal.dyaml.exception; +import dub.internal.dyaml.node; +import dub.internal.dyaml.parser; +import dub.internal.dyaml.resolver; + + +package: +/** + * Exception thrown at composer errors. + * + * See_Also: MarkedYAMLException + */ +class ComposerException : MarkedYAMLException +{ + mixin MarkedExceptionCtors; +} + +///Composes YAML documents from events provided by a Parser. +struct Composer +{ + private: + ///Parser providing YAML events. + Parser parser_; + ///Resolver resolving tags (data types). + Resolver resolver_; + ///Nodes associated with anchors. Used by YAML aliases. + Node[string] anchors_; + + ///Used to reduce allocations when creating pair arrays. + /// + ///We need one appender for each nesting level that involves + ///a pair array, as the inner levels are processed as a + ///part of the outer levels. Used as a stack. + Appender!(Node.Pair[])[] pairAppenders_; + ///Used to reduce allocations when creating node arrays. + /// + ///We need one appender for each nesting level that involves + ///a node array, as the inner levels are processed as a + ///part of the outer levels. Used as a stack. + Appender!(Node[])[] nodeAppenders_; + + public: + /** + * Construct a composer. + * + * Params: parser = Parser to provide YAML events. + * resolver = Resolver to resolve tags (data types). + */ + this(Parser parser, Resolver resolver) @safe + { + parser_ = parser; + resolver_ = resolver; + } + + /** + * Determine if there are any nodes left. + * + * Must be called before loading as it handles the stream start event. + */ + bool checkNode() @safe + { + // If next event is stream start, skip it + parser_.skipOver!"a.id == b"(EventID.streamStart); + + //True if there are more documents available. + return parser_.front.id != EventID.streamEnd; + } + + ///Get a YAML document as a node (the root of the document). + Node getNode() @safe + { + //Get the root node of the next document. + assert(parser_.front.id != EventID.streamEnd, + "Trying to get a node from Composer when there is no node to " ~ + "get. use checkNode() to determine if there is a node."); + + return composeDocument(); + } + + private: + + void skipExpected(const EventID id) @safe + { + const foundExpected = parser_.skipOver!"a.id == b"(id); + assert(foundExpected, text("Expected ", id, " not found.")); + } + ///Ensure that appenders for specified nesting levels exist. + /// + ///Params: pairAppenderLevel = Current level in the pair appender stack. + /// nodeAppenderLevel = Current level the node appender stack. + void ensureAppendersExist(const uint pairAppenderLevel, const uint nodeAppenderLevel) + @safe + { + while(pairAppenders_.length <= pairAppenderLevel) + { + pairAppenders_ ~= appender!(Node.Pair[])(); + } + while(nodeAppenders_.length <= nodeAppenderLevel) + { + nodeAppenders_ ~= appender!(Node[])(); + } + } + + ///Compose a YAML document and return its root node. + Node composeDocument() @safe + { + skipExpected(EventID.documentStart); + + //Compose the root node. + Node node = composeNode(0, 0); + + skipExpected(EventID.documentEnd); + + anchors_.destroy(); + return node; + } + + /// Compose a node. + /// + /// Params: pairAppenderLevel = Current level of the pair appender stack. + /// nodeAppenderLevel = Current level of the node appender stack. + Node composeNode(const uint pairAppenderLevel, const uint nodeAppenderLevel) @safe + { + if(parser_.front.id == EventID.alias_) + { + const event = parser_.front; + parser_.popFront(); + const anchor = event.anchor; + enforce((anchor in anchors_) !is null, + new ComposerException("Found undefined alias: " ~ anchor, + event.startMark)); + + //If the node referenced by the anchor is uninitialized, + //it's not finished, i.e. we're currently composing it + //and trying to use it recursively here. + enforce(anchors_[anchor] != Node(), + new ComposerException("Found recursive alias: " ~ anchor, + event.startMark)); + + return anchors_[anchor]; + } + + const event = parser_.front; + const anchor = event.anchor; + if((anchor !is null) && (anchor in anchors_) !is null) + { + throw new ComposerException("Found duplicate anchor: " ~ anchor, + event.startMark); + } + + Node result; + //Associate the anchor, if any, with an uninitialized node. + //used to detect duplicate and recursive anchors. + if(anchor !is null) + { + anchors_[anchor] = Node(); + } + + switch (parser_.front.id) + { + case EventID.scalar: + result = composeScalarNode(); + break; + case EventID.sequenceStart: + result = composeSequenceNode(pairAppenderLevel, nodeAppenderLevel); + break; + case EventID.mappingStart: + result = composeMappingNode(pairAppenderLevel, nodeAppenderLevel); + break; + default: assert(false, "This code should never be reached"); + } + + if(anchor !is null) + { + anchors_[anchor] = result; + } + return result; + } + + ///Compose a scalar node. + Node composeScalarNode() @safe + { + const event = parser_.front; + parser_.popFront(); + const tag = resolver_.resolve(NodeID.scalar, event.tag, event.value, + event.implicit); + + Node node = constructNode(event.startMark, event.endMark, tag, + event.value); + node.scalarStyle = event.scalarStyle; + + return node; + } + + /// Compose a sequence node. + /// + /// Params: pairAppenderLevel = Current level of the pair appender stack. + /// nodeAppenderLevel = Current level of the node appender stack. + Node composeSequenceNode(const uint pairAppenderLevel, const uint nodeAppenderLevel) + @safe + { + ensureAppendersExist(pairAppenderLevel, nodeAppenderLevel); + auto nodeAppender = &(nodeAppenders_[nodeAppenderLevel]); + + const startEvent = parser_.front; + parser_.popFront(); + const tag = resolver_.resolve(NodeID.sequence, startEvent.tag, null, + startEvent.implicit); + + while(parser_.front.id != EventID.sequenceEnd) + { + nodeAppender.put(composeNode(pairAppenderLevel, nodeAppenderLevel + 1)); + } + + Node node = constructNode(startEvent.startMark, parser_.front.endMark, + tag, nodeAppender.data.dup); + node.collectionStyle = startEvent.collectionStyle; + parser_.popFront(); + nodeAppender.clear(); + + return node; + } + + /** + * Flatten a node, merging it with nodes referenced through YAMLMerge data type. + * + * Node must be a mapping or a sequence of mappings. + * + * Params: root = Node to flatten. + * startMark = Start position of the node. + * endMark = End position of the node. + * pairAppenderLevel = Current level of the pair appender stack. + * nodeAppenderLevel = Current level of the node appender stack. + * + * Returns: Flattened mapping as pairs. + */ + Node.Pair[] flatten(ref Node root, const Mark startMark, const Mark endMark, + const uint pairAppenderLevel, const uint nodeAppenderLevel) @safe + { + void error(Node node) + { + //this is Composer, but the code is related to Constructor. + throw new ConstructorException("While constructing a mapping, " ~ + "expected a mapping or a list of " ~ + "mappings for merging, but found: " ~ + text(node.type) ~ + " NOTE: line/column shows topmost parent " ~ + "to which the content is being merged", + startMark, endMark); + } + + ensureAppendersExist(pairAppenderLevel, nodeAppenderLevel); + auto pairAppender = &(pairAppenders_[pairAppenderLevel]); + + final switch (root.nodeID) + { + case NodeID.mapping: + Node[] toMerge; + toMerge.reserve(root.length); + foreach (ref Node key, ref Node value; root) + { + if(key.type == NodeType.merge) + { + toMerge ~= value; + } + else + { + auto temp = Node.Pair(key, value); + pairAppender.put(temp); + } + } + foreach (node; toMerge) + { + pairAppender.put(flatten(node, startMark, endMark, + pairAppenderLevel + 1, nodeAppenderLevel)); + } + break; + case NodeID.sequence: + foreach (ref Node node; root) + { + if (node.nodeID != NodeID.mapping) + { + error(node); + } + pairAppender.put(flatten(node, startMark, endMark, + pairAppenderLevel + 1, nodeAppenderLevel)); + } + break; + case NodeID.scalar: + case NodeID.invalid: + error(root); + break; + } + + auto flattened = pairAppender.data.dup; + pairAppender.clear(); + + return flattened; + } + + /// Compose a mapping node. + /// + /// Params: pairAppenderLevel = Current level of the pair appender stack. + /// nodeAppenderLevel = Current level of the node appender stack. + Node composeMappingNode(const uint pairAppenderLevel, const uint nodeAppenderLevel) + @safe + { + ensureAppendersExist(pairAppenderLevel, nodeAppenderLevel); + const startEvent = parser_.front; + parser_.popFront(); + const tag = resolver_.resolve(NodeID.mapping, startEvent.tag, null, + startEvent.implicit); + auto pairAppender = &(pairAppenders_[pairAppenderLevel]); + + Tuple!(Node, Mark)[] toMerge; + while(parser_.front.id != EventID.mappingEnd) + { + auto pair = Node.Pair(composeNode(pairAppenderLevel + 1, nodeAppenderLevel), + composeNode(pairAppenderLevel + 1, nodeAppenderLevel)); + + //Need to flatten and merge the node referred by YAMLMerge. + if(pair.key.type == NodeType.merge) + { + toMerge ~= tuple(pair.value, cast(Mark)parser_.front.endMark); + } + //Not YAMLMerge, just add the pair. + else + { + pairAppender.put(pair); + } + } + foreach(node; toMerge) + { + merge(*pairAppender, flatten(node[0], startEvent.startMark, node[1], + pairAppenderLevel + 1, nodeAppenderLevel)); + } + + auto sorted = pairAppender.data.dup.sort!((x,y) => x.key > y.key); + if (sorted.length) { + foreach (index, const ref value; sorted[0 .. $ - 1].enumerate) + if (value.key == sorted[index + 1].key) { + const message = () @trusted { + return format("Key '%s' appears multiple times in mapping (first: %s)", + value.key.get!string, value.key.startMark); + }(); + throw new ComposerException(message, sorted[index + 1].key.startMark); + } + } + + Node node = constructNode(startEvent.startMark, parser_.front.endMark, + tag, pairAppender.data.dup); + node.collectionStyle = startEvent.collectionStyle; + parser_.popFront(); + + pairAppender.clear(); + return node; + } +} + +// Provide good error message on multiple keys (which JSON supports) +// DUB: This unittest is `@safe` from v2.100 as `message` was made `@safe`, not before +unittest +{ + import dub.internal.dyaml.loader : Loader; + + const str = `{ + "comment": "This is a common technique", + "name": "foobar", + "comment": "To write down comments pre-JSON5" +}`; + + try + auto node = Loader.fromString(str).load(); + catch (ComposerException exc) + assert(exc.message() == + "Key 'comment' appears multiple times in mapping " ~ + "(first: file ,line 2,column 5)\nfile ,line 4,column 5"); +} diff --git a/source/dub/internal/dyaml/constructor.d b/source/dub/internal/dyaml/constructor.d new file mode 100644 index 0000000..2a660a6 --- /dev/null +++ b/source/dub/internal/dyaml/constructor.d @@ -0,0 +1,611 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * Class that processes YAML mappings, sequences and scalars into nodes. + * This can be used to add custom data types. A tutorial can be found + * $(LINK2 https://dlang-community.github.io/D-YAML/, here). + */ +module dub.internal.dyaml.constructor; + + +import std.array; +import std.algorithm; +import std.base64; +import std.container; +import std.conv; +import std.datetime; +import std.exception; +import std.regex; +import std.string; +import std.typecons; +import std.utf; + +import dub.internal.dyaml.node; +import dub.internal.dyaml.exception; +import dub.internal.dyaml.style; + +package: + +// Exception thrown at constructor errors. +class ConstructorException : YAMLException +{ + /// Construct a ConstructorException. + /// + /// Params: msg = Error message. + /// start = Start position of the error context. + /// end = End position of the error context. + this(string msg, Mark start, Mark end, string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow + { + super(msg ~ "\nstart: " ~ start.toString() ~ "\nend: " ~ end.toString(), + file, line); + } +} + +/** Constructs YAML values. + * + * Each YAML scalar, sequence or mapping has a tag specifying its data type. + * Constructor uses user-specifyable functions to create a node of desired + * data type from a scalar, sequence or mapping. + * + * + * Each of these functions is associated with a tag, and can process either + * a scalar, a sequence, or a mapping. The constructor passes each value to + * the function with corresponding tag, which then returns the resulting value + * that can be stored in a node. + * + * If a tag is detected with no known constructor function, it is considered an error. + */ +/* + * Construct a node. + * + * Params: start = Start position of the node. + * end = End position of the node. + * tag = Tag (data type) of the node. + * value = Value to construct node from (string, nodes or pairs). + * style = Style of the node (scalar or collection style). + * + * Returns: Constructed node. + */ +Node constructNode(T)(const Mark start, const Mark end, const string tag, + T value) @safe + if((is(T : string) || is(T == Node[]) || is(T == Node.Pair[]))) +{ + Node newNode; + try + { + switch(tag) + { + case "tag:yaml.org,2002:null": + newNode = Node(YAMLNull(), tag); + break; + case "tag:yaml.org,2002:bool": + static if(is(T == string)) + { + newNode = Node(constructBool(value), tag); + break; + } + else throw new Exception("Only scalars can be bools"); + case "tag:yaml.org,2002:int": + static if(is(T == string)) + { + newNode = Node(constructLong(value), tag); + break; + } + else throw new Exception("Only scalars can be ints"); + case "tag:yaml.org,2002:float": + static if(is(T == string)) + { + newNode = Node(constructReal(value), tag); + break; + } + else throw new Exception("Only scalars can be floats"); + case "tag:yaml.org,2002:binary": + static if(is(T == string)) + { + newNode = Node(constructBinary(value), tag); + break; + } + else throw new Exception("Only scalars can be binary data"); + case "tag:yaml.org,2002:timestamp": + static if(is(T == string)) + { + newNode = Node(constructTimestamp(value), tag); + break; + } + else throw new Exception("Only scalars can be timestamps"); + case "tag:yaml.org,2002:str": + static if(is(T == string)) + { + newNode = Node(constructString(value), tag); + break; + } + else throw new Exception("Only scalars can be strings"); + case "tag:yaml.org,2002:value": + static if(is(T == string)) + { + newNode = Node(constructString(value), tag); + break; + } + else throw new Exception("Only scalars can be values"); + case "tag:yaml.org,2002:omap": + static if(is(T == Node[])) + { + newNode = Node(constructOrderedMap(value), tag); + break; + } + else throw new Exception("Only sequences can be ordered maps"); + case "tag:yaml.org,2002:pairs": + static if(is(T == Node[])) + { + newNode = Node(constructPairs(value), tag); + break; + } + else throw new Exception("Only sequences can be pairs"); + case "tag:yaml.org,2002:set": + static if(is(T == Node.Pair[])) + { + newNode = Node(constructSet(value), tag); + break; + } + else throw new Exception("Only mappings can be sets"); + case "tag:yaml.org,2002:seq": + static if(is(T == Node[])) + { + newNode = Node(constructSequence(value), tag); + break; + } + else throw new Exception("Only sequences can be sequences"); + case "tag:yaml.org,2002:map": + static if(is(T == Node.Pair[])) + { + newNode = Node(constructMap(value), tag); + break; + } + else throw new Exception("Only mappings can be maps"); + case "tag:yaml.org,2002:merge": + newNode = Node(YAMLMerge(), tag); + break; + default: + newNode = Node(value, tag); + break; + } + } + catch(Exception e) + { + throw new ConstructorException("Error constructing " ~ typeid(T).toString() + ~ ":\n" ~ e.msg, start, end); + } + + newNode.startMark_ = start; + + return newNode; +} + +private: +// Construct a boolean _node. +bool constructBool(const string str) @safe +{ + string value = str.toLower(); + if(value.among!("yes", "true", "on")){return true;} + if(value.among!("no", "false", "off")){return false;} + throw new Exception("Unable to parse boolean value: " ~ value); +} + +// Construct an integer (long) _node. +long constructLong(const string str) @safe +{ + string value = str.replace("_", ""); + const char c = value[0]; + const long sign = c != '-' ? 1 : -1; + if(c == '-' || c == '+') + { + value = value[1 .. $]; + } + + enforce(value != "", new Exception("Unable to parse float value: " ~ value)); + + long result; + try + { + //Zero. + if(value == "0") {result = cast(long)0;} + //Binary. + else if(value.startsWith("0b")){result = sign * to!int(value[2 .. $], 2);} + //Hexadecimal. + else if(value.startsWith("0x")){result = sign * to!int(value[2 .. $], 16);} + //Octal. + else if(value[0] == '0') {result = sign * to!int(value, 8);} + //Sexagesimal. + else if(value.canFind(":")) + { + long val; + long base = 1; + foreach_reverse(digit; value.split(":")) + { + val += to!long(digit) * base; + base *= 60; + } + result = sign * val; + } + //Decimal. + else{result = sign * to!long(value);} + } + catch(ConvException e) + { + throw new Exception("Unable to parse integer value: " ~ value); + } + + return result; +} +@safe unittest +{ + string canonical = "685230"; + string decimal = "+685_230"; + string octal = "02472256"; + string hexadecimal = "0x_0A_74_AE"; + string binary = "0b1010_0111_0100_1010_1110"; + string sexagesimal = "190:20:30"; + + assert(685230 == constructLong(canonical)); + assert(685230 == constructLong(decimal)); + assert(685230 == constructLong(octal)); + assert(685230 == constructLong(hexadecimal)); + assert(685230 == constructLong(binary)); + assert(685230 == constructLong(sexagesimal)); +} + +// Construct a floating point (real) _node. +real constructReal(const string str) @safe +{ + string value = str.replace("_", "").toLower(); + const char c = value[0]; + const real sign = c != '-' ? 1.0 : -1.0; + if(c == '-' || c == '+') + { + value = value[1 .. $]; + } + + enforce(value != "" && value != "nan" && value != "inf" && value != "-inf", + new Exception("Unable to parse float value: " ~ value)); + + real result; + try + { + //Infinity. + if (value == ".inf"){result = sign * real.infinity;} + //Not a Number. + else if(value == ".nan"){result = real.nan;} + //Sexagesimal. + else if(value.canFind(":")) + { + real val = 0.0; + real base = 1.0; + foreach_reverse(digit; value.split(":")) + { + val += to!real(digit) * base; + base *= 60.0; + } + result = sign * val; + } + //Plain floating point. + else{result = sign * to!real(value);} + } + catch(ConvException e) + { + throw new Exception("Unable to parse float value: \"" ~ value ~ "\""); + } + + return result; +} +@safe unittest +{ + bool eq(real a, real b, real epsilon = 0.2) @safe + { + return a >= (b - epsilon) && a <= (b + epsilon); + } + + string canonical = "6.8523015e+5"; + string exponential = "685.230_15e+03"; + string fixed = "685_230.15"; + string sexagesimal = "190:20:30.15"; + string negativeInf = "-.inf"; + string NaN = ".NaN"; + + assert(eq(685230.15, constructReal(canonical))); + assert(eq(685230.15, constructReal(exponential))); + assert(eq(685230.15, constructReal(fixed))); + assert(eq(685230.15, constructReal(sexagesimal))); + assert(eq(-real.infinity, constructReal(negativeInf))); + assert(to!string(constructReal(NaN)) == "nan"); +} + +// Construct a binary (base64) _node. +ubyte[] constructBinary(const string value) @safe +{ + import std.ascii : newline; + import std.array : array; + + // For an unknown reason, this must be nested to work (compiler bug?). + try + { + return Base64.decode(value.representation.filter!(c => !newline.canFind(c)).array); + } + catch(Base64Exception e) + { + throw new Exception("Unable to decode base64 value: " ~ e.msg); + } +} + +@safe unittest +{ + auto test = "The Answer: 42".representation; + char[] buffer; + buffer.length = 256; + string input = Base64.encode(test, buffer).idup; + const value = constructBinary(input); + assert(value == test); + assert(value == [84, 104, 101, 32, 65, 110, 115, 119, 101, 114, 58, 32, 52, 50]); +} + +// Construct a timestamp (SysTime) _node. +SysTime constructTimestamp(const string str) @safe +{ + string value = str; + + auto YMDRegexp = regex("^([0-9][0-9][0-9][0-9])-([0-9][0-9]?)-([0-9][0-9]?)"); + auto HMSRegexp = regex("^[Tt \t]+([0-9][0-9]?):([0-9][0-9]):([0-9][0-9])(\\.[0-9]*)?"); + auto TZRegexp = regex("^[ \t]*Z|([-+][0-9][0-9]?)(:[0-9][0-9])?"); + + try + { + // First, get year, month and day. + auto matches = match(value, YMDRegexp); + + enforce(!matches.empty, + new Exception("Unable to parse timestamp value: " ~ value)); + + auto captures = matches.front.captures; + const year = to!int(captures[1]); + const month = to!int(captures[2]); + const day = to!int(captures[3]); + + // If available, get hour, minute, second and fraction, if present. + value = matches.front.post; + matches = match(value, HMSRegexp); + if(matches.empty) + { + return SysTime(DateTime(year, month, day), UTC()); + } + + captures = matches.front.captures; + const hour = to!int(captures[1]); + const minute = to!int(captures[2]); + const second = to!int(captures[3]); + const hectonanosecond = cast(int)(to!real("0" ~ captures[4]) * 10_000_000); + + // If available, get timezone. + value = matches.front.post; + matches = match(value, TZRegexp); + if(matches.empty || matches.front.captures[0] == "Z") + { + // No timezone. + return SysTime(DateTime(year, month, day, hour, minute, second), + hectonanosecond.dur!"hnsecs", UTC()); + } + + // We have a timezone, so parse it. + captures = matches.front.captures; + int sign = 1; + int tzHours; + if(!captures[1].empty) + { + if(captures[1][0] == '-') {sign = -1;} + tzHours = to!int(captures[1][1 .. $]); + } + const tzMinutes = (!captures[2].empty) ? to!int(captures[2][1 .. $]) : 0; + const tzOffset = dur!"minutes"(sign * (60 * tzHours + tzMinutes)); + + return SysTime(DateTime(year, month, day, hour, minute, second), + hectonanosecond.dur!"hnsecs", + new immutable SimpleTimeZone(tzOffset)); + } + catch(ConvException e) + { + throw new Exception("Unable to parse timestamp value " ~ value ~ " : " ~ e.msg); + } + catch(DateTimeException e) + { + throw new Exception("Invalid timestamp value " ~ value ~ " : " ~ e.msg); + } + + assert(false, "This code should never be reached"); +} +@safe unittest +{ + string timestamp(string value) + { + return constructTimestamp(value).toISOString(); + } + + string canonical = "2001-12-15T02:59:43.1Z"; + string iso8601 = "2001-12-14t21:59:43.10-05:00"; + string spaceSeparated = "2001-12-14 21:59:43.10 -5"; + string noTZ = "2001-12-15 2:59:43.10"; + string noFraction = "2001-12-15 2:59:43"; + string ymd = "2002-12-14"; + + assert(timestamp(canonical) == "20011215T025943.1Z"); + //avoiding float conversion errors + assert(timestamp(iso8601) == "20011214T215943.0999999-05:00" || + timestamp(iso8601) == "20011214T215943.1-05:00"); + assert(timestamp(spaceSeparated) == "20011214T215943.0999999-05:00" || + timestamp(spaceSeparated) == "20011214T215943.1-05:00"); + assert(timestamp(noTZ) == "20011215T025943.0999999Z" || + timestamp(noTZ) == "20011215T025943.1Z"); + assert(timestamp(noFraction) == "20011215T025943Z"); + assert(timestamp(ymd) == "20021214T000000Z"); +} + +// Construct a string _node. +string constructString(const string str) @safe +{ + return str; +} + +// Convert a sequence of single-element mappings into a sequence of pairs. +Node.Pair[] getPairs(string type, const Node[] nodes) @safe +{ + Node.Pair[] pairs; + pairs.reserve(nodes.length); + foreach(node; nodes) + { + enforce(node.nodeID == NodeID.mapping && node.length == 1, + new Exception("While constructing " ~ type ~ + ", expected a mapping with single element")); + + pairs ~= node.as!(Node.Pair[]); + } + + return pairs; +} + +// Construct an ordered map (ordered sequence of key:value pairs without duplicates) _node. +Node.Pair[] constructOrderedMap(const Node[] nodes) @safe +{ + auto pairs = getPairs("ordered map", nodes); + + //Detect duplicates. + //TODO this should be replaced by something with deterministic memory allocation. + auto keys = new RedBlackTree!Node(); + foreach(ref pair; pairs) + { + enforce(!(pair.key in keys), + new Exception("Duplicate entry in an ordered map: " + ~ pair.key.debugString())); + keys.insert(pair.key); + } + return pairs; +} +@safe unittest +{ + Node[] alternateTypes(uint length) @safe + { + Node[] pairs; + foreach(long i; 0 .. length) + { + auto pair = (i % 2) ? Node.Pair(i.to!string, i) : Node.Pair(i, i.to!string); + pairs ~= Node([pair]); + } + return pairs; + } + + Node[] sameType(uint length) @safe + { + Node[] pairs; + foreach(long i; 0 .. length) + { + auto pair = Node.Pair(i.to!string, i); + pairs ~= Node([pair]); + } + return pairs; + } + + assertThrown(constructOrderedMap(alternateTypes(8) ~ alternateTypes(2))); + assertNotThrown(constructOrderedMap(alternateTypes(8))); + assertThrown(constructOrderedMap(sameType(64) ~ sameType(16))); + assertThrown(constructOrderedMap(alternateTypes(64) ~ alternateTypes(16))); + assertNotThrown(constructOrderedMap(sameType(64))); + assertNotThrown(constructOrderedMap(alternateTypes(64))); +} + +// Construct a pairs (ordered sequence of key: value pairs allowing duplicates) _node. +Node.Pair[] constructPairs(const Node[] nodes) @safe +{ + return getPairs("pairs", nodes); +} + +// Construct a set _node. +Node[] constructSet(const Node.Pair[] pairs) @safe +{ + // In future, the map here should be replaced with something with deterministic + // memory allocation if possible. + // Detect duplicates. + ubyte[Node] map; + Node[] nodes; + nodes.reserve(pairs.length); + foreach(pair; pairs) + { + enforce((pair.key in map) is null, new Exception("Duplicate entry in a set")); + map[pair.key] = 0; + nodes ~= pair.key; + } + + return nodes; +} +@safe unittest +{ + Node.Pair[] set(uint length) @safe + { + Node.Pair[] pairs; + foreach(long i; 0 .. length) + { + pairs ~= Node.Pair(i.to!string, YAMLNull()); + } + + return pairs; + } + + auto DuplicatesShort = set(8) ~ set(2); + auto noDuplicatesShort = set(8); + auto DuplicatesLong = set(64) ~ set(4); + auto noDuplicatesLong = set(64); + + bool eq(Node.Pair[] a, Node[] b) + { + if(a.length != b.length){return false;} + foreach(i; 0 .. a.length) + { + if(a[i].key != b[i]) + { + return false; + } + } + return true; + } + + auto nodeDuplicatesShort = DuplicatesShort.dup; + auto nodeNoDuplicatesShort = noDuplicatesShort.dup; + auto nodeDuplicatesLong = DuplicatesLong.dup; + auto nodeNoDuplicatesLong = noDuplicatesLong.dup; + + assertThrown(constructSet(nodeDuplicatesShort)); + assertNotThrown(constructSet(nodeNoDuplicatesShort)); + assertThrown(constructSet(nodeDuplicatesLong)); + assertNotThrown(constructSet(nodeNoDuplicatesLong)); +} + +// Construct a sequence (array) _node. +Node[] constructSequence(Node[] nodes) @safe +{ + return nodes; +} + +// Construct an unordered map (unordered set of key:value _pairs without duplicates) _node. +Node.Pair[] constructMap(Node.Pair[] pairs) @safe +{ + //Detect duplicates. + //TODO this should be replaced by something with deterministic memory allocation. + auto keys = new RedBlackTree!Node(); + foreach(ref pair; pairs) + { + enforce(!(pair.key in keys), + new Exception("Duplicate entry in a map: " ~ pair.key.debugString())); + keys.insert(pair.key); + } + return pairs; +} diff --git a/source/dub/internal/dyaml/dumper.d b/source/dub/internal/dyaml/dumper.d new file mode 100644 index 0000000..02b8cc2 --- /dev/null +++ b/source/dub/internal/dyaml/dumper.d @@ -0,0 +1,298 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * YAML dumper. + * + * Code based on $(LINK2 http://www.pyyaml.org, PyYAML). + */ +module dub.internal.dyaml.dumper; + +import std.array; +import std.range.primitives; +import std.typecons; + +import dub.internal.dyaml.emitter; +import dub.internal.dyaml.event; +import dub.internal.dyaml.exception; +import dub.internal.dyaml.linebreak; +import dub.internal.dyaml.node; +import dub.internal.dyaml.representer; +import dub.internal.dyaml.resolver; +import dub.internal.dyaml.serializer; +import dub.internal.dyaml.style; +import dub.internal.dyaml.tagdirective; + + +/** + * Dumps YAML documents to files or streams. + * + * User specified Representer and/or Resolver can be used to support new + * tags / data types. + * + * Setters are provided to affect output details (style, etc.). + */ +auto dumper() +{ + auto dumper = Dumper(); + dumper.resolver = Resolver.withDefaultResolvers; + return dumper; +} + +struct Dumper +{ + private: + //Indentation width. + int indent_ = 2; + //Tag directives to use. + TagDirective[] tags_; + public: + //Resolver to resolve tags. + Resolver resolver; + //Write scalars in canonical form? + bool canonical; + //Preferred text width. + uint textWidth = 80; + //Line break to use. Unix by default. + LineBreak lineBreak = LineBreak.unix; + //YAML version string. Default is 1.1. + string YAMLVersion = "1.1"; + //Always explicitly write document start? Default is no explicit start. + bool explicitStart = false; + //Always explicitly write document end? Default is no explicit end. + bool explicitEnd = false; + + //Name of the output file or stream, used in error messages. + string name = ""; + + // Default style for scalar nodes. If style is $(D ScalarStyle.invalid), the _style is chosen automatically. + ScalarStyle defaultScalarStyle = ScalarStyle.invalid; + // Default style for collection nodes. If style is $(D CollectionStyle.invalid), the _style is chosen automatically. + CollectionStyle defaultCollectionStyle = CollectionStyle.invalid; + + @disable bool opEquals(ref Dumper); + @disable int opCmp(ref Dumper); + + ///Set indentation width. 2 by default. Must not be zero. + @property void indent(uint indent) pure @safe nothrow + in + { + assert(indent != 0, "Can't use zero YAML indent width"); + } + do + { + indent_ = indent; + } + + /** + * Specify tag directives. + * + * A tag directive specifies a shorthand notation for specifying _tags. + * Each tag directive associates a handle with a prefix. This allows for + * compact tag notation. + * + * Each handle specified MUST start and end with a '!' character + * (a single character "!" handle is allowed as well). + * + * Only alphanumeric characters, '-', and '__' may be used in handles. + * + * Each prefix MUST not be empty. + * + * The "!!" handle is used for default YAML _tags with prefix + * "tag:yaml.org,2002:". This can be overridden. + * + * Params: tags = Tag directives (keys are handles, values are prefixes). + */ + @property void tagDirectives(string[string] tags) pure @safe + { + TagDirective[] t; + foreach(handle, prefix; tags) + { + assert(handle.length >= 1 && handle[0] == '!' && handle[$ - 1] == '!', + "A tag handle is empty or does not start and end with a " ~ + "'!' character : " ~ handle); + assert(prefix.length >= 1, "A tag prefix is empty"); + t ~= TagDirective(handle, prefix); + } + tags_ = t; + } + /// + @safe unittest + { + auto dumper = dumper(); + string[string] directives; + directives["!short!"] = "tag:long.org,2011:"; + //This will emit tags starting with "tag:long.org,2011" + //with a "!short!" prefix instead. + dumper.tagDirectives(directives); + dumper.dump(new Appender!string(), Node("foo")); + } + + /** + * Dump one or more YAML _documents to the file/stream. + * + * Note that while you can call dump() multiple times on the same + * dumper, you will end up writing multiple YAML "files" to the same + * file/stream. + * + * Params: documents = Documents to _dump (root nodes of the _documents). + * + * Throws: YAMLException on error (e.g. invalid nodes, + * unable to write to file/stream). + */ + void dump(CharacterType = char, Range)(Range range, Node[] documents ...) + if (isOutputRange!(Range, CharacterType) && + isOutputRange!(Range, char) || isOutputRange!(Range, wchar) || isOutputRange!(Range, dchar)) + { + try + { + auto emitter = new Emitter!(Range, CharacterType)(range, canonical, indent_, textWidth, lineBreak); + auto serializer = Serializer(resolver, explicitStart ? Yes.explicitStart : No.explicitStart, + explicitEnd ? Yes.explicitEnd : No.explicitEnd, YAMLVersion, tags_); + serializer.startStream(emitter); + foreach(ref document; documents) + { + auto data = representData(document, defaultScalarStyle, defaultCollectionStyle); + serializer.serialize(emitter, data); + } + serializer.endStream(emitter); + } + catch(YAMLException e) + { + throw new YAMLException("Unable to dump YAML to stream " + ~ name ~ " : " ~ e.msg, e.file, e.line); + } + } +} +///Write to a file +@safe unittest +{ + auto node = Node([1, 2, 3, 4, 5]); + dumper().dump(new Appender!string(), node); +} +///Write multiple YAML documents to a file +@safe unittest +{ + auto node1 = Node([1, 2, 3, 4, 5]); + auto node2 = Node("This document contains only one string"); + dumper().dump(new Appender!string(), node1, node2); + //Or with an array: + dumper().dump(new Appender!string(), [node1, node2]); +} +///Write to memory +@safe unittest +{ + auto stream = new Appender!string(); + auto node = Node([1, 2, 3, 4, 5]); + dumper().dump(stream, node); +} +///Use a custom resolver to support custom data types and/or implicit tags +@safe unittest +{ + import std.regex : regex; + auto node = Node([1, 2, 3, 4, 5]); + auto dumper = dumper(); + dumper.resolver.addImplicitResolver("!tag", regex("A.*"), "A"); + dumper.dump(new Appender!string(), node); +} +/// Set default scalar style +@safe unittest +{ + auto stream = new Appender!string(); + auto node = Node("Hello world!"); + auto dumper = dumper(); + dumper.defaultScalarStyle = ScalarStyle.singleQuoted; + dumper.dump(stream, node); +} +/// Set default collection style +@safe unittest +{ + auto stream = new Appender!string(); + auto node = Node(["Hello", "world!"]); + auto dumper = dumper(); + dumper.defaultCollectionStyle = CollectionStyle.flow; + dumper.dump(stream, node); +} +// Make sure the styles are actually used +@safe unittest +{ + auto stream = new Appender!string(); + auto node = Node([Node("Hello world!"), Node(["Hello", "world!"])]); + auto dumper = dumper(); + dumper.defaultScalarStyle = ScalarStyle.singleQuoted; + dumper.defaultCollectionStyle = CollectionStyle.flow; + dumper.explicitEnd = false; + dumper.explicitStart = false; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + assert(stream.data == "['Hello world!', ['Hello', 'world!']]\n"); +} +// Explicit document start/end markers +@safe unittest +{ + auto stream = new Appender!string(); + auto node = Node([1, 2, 3, 4, 5]); + auto dumper = dumper(); + dumper.explicitEnd = true; + dumper.explicitStart = true; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + //Skip version string + assert(stream.data[0..3] == "---"); + //account for newline at end + assert(stream.data[$-4..$-1] == "..."); +} +@safe unittest +{ + auto stream = new Appender!string(); + auto node = Node([Node("Te, st2")]); + auto dumper = dumper(); + dumper.explicitStart = true; + dumper.explicitEnd = false; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + assert(stream.data == "--- ['Te, st2']\n"); +} +// No explicit document start/end markers +@safe unittest +{ + auto stream = new Appender!string(); + auto node = Node([1, 2, 3, 4, 5]); + auto dumper = dumper(); + dumper.explicitEnd = false; + dumper.explicitStart = false; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + //Skip version string + assert(stream.data[0..3] != "---"); + //account for newline at end + assert(stream.data[$-4..$-1] != "..."); +} +// Windows, macOS line breaks +@safe unittest +{ + auto node = Node(0); + { + auto stream = new Appender!string(); + auto dumper = dumper(); + dumper.explicitEnd = true; + dumper.explicitStart = true; + dumper.YAMLVersion = null; + dumper.lineBreak = LineBreak.windows; + dumper.dump(stream, node); + assert(stream.data == "--- 0\r\n...\r\n"); + } + { + auto stream = new Appender!string(); + auto dumper = dumper(); + dumper.explicitEnd = true; + dumper.explicitStart = true; + dumper.YAMLVersion = null; + dumper.lineBreak = LineBreak.macintosh; + dumper.dump(stream, node); + assert(stream.data == "--- 0\r...\r"); + } +} diff --git a/source/dub/internal/dyaml/emitter.d b/source/dub/internal/dyaml/emitter.d new file mode 100644 index 0000000..ee8bc80 --- /dev/null +++ b/source/dub/internal/dyaml/emitter.d @@ -0,0 +1,1690 @@ +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * YAML emitter. + * Code based on PyYAML: http://www.pyyaml.org + */ +module dub.internal.dyaml.emitter; + + +import std.algorithm; +import std.array; +import std.ascii; +import std.conv; +import std.encoding; +import std.exception; +import std.format; +import std.range; +import std.string; +import std.system; +import std.typecons; +import std.utf; + +import dub.internal.dyaml.encoding; +import dub.internal.dyaml.escapes; +import dub.internal.dyaml.event; +import dub.internal.dyaml.exception; +import dub.internal.dyaml.linebreak; +import dub.internal.dyaml.queue; +import dub.internal.dyaml.scanner; +import dub.internal.dyaml.style; +import dub.internal.dyaml.tagdirective; + + +package: + +//Stores results of analysis of a scalar, determining e.g. what scalar style to use. +struct ScalarAnalysis +{ + //Scalar itself. + string scalar; + + enum AnalysisFlags + { + empty = 1<<0, + multiline = 1<<1, + allowFlowPlain = 1<<2, + allowBlockPlain = 1<<3, + allowSingleQuoted = 1<<4, + allowDoubleQuoted = 1<<5, + allowBlock = 1<<6, + isNull = 1<<7 + } + + ///Analysis results. + BitFlags!AnalysisFlags flags; +} + +private alias isNewLine = among!('\n', '\u0085', '\u2028', '\u2029'); + +private alias isSpecialChar = among!('#', ',', '[', ']', '{', '}', '&', '*', '!', '|', '>', '\\', '\'', '"', '%', '@', '`'); + +private alias isFlowIndicator = among!(',', '?', '[', ']', '{', '}'); + +private alias isSpace = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029', ' ', '\t'); + +//Emits YAML events into a file/stream. +struct Emitter(Range, CharType) if (isOutputRange!(Range, CharType)) +{ + private: + ///Default tag handle shortcuts and replacements. + static TagDirective[] defaultTagDirectives_ = + [TagDirective("!", "!"), TagDirective("!!", "tag:yaml.org,2002:")]; + + ///Stream to write to. + Range stream_; + + /// Type used for upcoming emitter steps + alias EmitterFunction = void function(scope typeof(this)*) @safe; + + ///Stack of states. + Appender!(EmitterFunction[]) states_; + + ///Current state. + EmitterFunction state_; + + ///Event queue. + Queue!Event events_; + ///Event we're currently emitting. + Event event_; + + ///Stack of previous indentation levels. + Appender!(int[]) indents_; + ///Current indentation level. + int indent_ = -1; + + ///Level of nesting in flow context. If 0, we're in block context. + uint flowLevel_ = 0; + + /// Describes context (where we are in the document). + enum Context + { + /// Root node of a document. + root, + /// Sequence. + sequence, + /// Mapping. + mappingNoSimpleKey, + /// Mapping, in a simple key. + mappingSimpleKey, + } + /// Current context. + Context context_; + + ///Characteristics of the last emitted character: + + ///Line. + uint line_ = 0; + ///Column. + uint column_ = 0; + ///Whitespace character? + bool whitespace_ = true; + ///indentation space, '-', '?', or ':'? + bool indentation_ = true; + + ///Does the document require an explicit document indicator? + bool openEnded_; + + ///Formatting details. + + ///Canonical scalar format? + bool canonical_; + ///Best indentation width. + uint bestIndent_ = 2; + ///Best text width. + uint bestWidth_ = 80; + ///Best line break character/s. + LineBreak bestLineBreak_; + + ///Tag directive handle - prefix pairs. + TagDirective[] tagDirectives_; + + ///Anchor/alias to process. + string preparedAnchor_ = null; + ///Tag to process. + string preparedTag_ = null; + + ///Analysis result of the current scalar. + ScalarAnalysis analysis_; + ///Style of the current scalar. + ScalarStyle style_ = ScalarStyle.invalid; + + public: + @disable int opCmp(ref Emitter); + @disable bool opEquals(ref Emitter); + + /** + * Construct an emitter. + * + * Params: stream = Output range to write to. + * canonical = Write scalars in canonical form? + * indent = Indentation width. + * lineBreak = Line break character/s. + */ + this(Range stream, const bool canonical, const int indent, const int width, + const LineBreak lineBreak) @safe + { + states_.reserve(32); + indents_.reserve(32); + stream_ = stream; + canonical_ = canonical; + nextExpected!"expectStreamStart"(); + + if(indent > 1 && indent < 10){bestIndent_ = indent;} + if(width > bestIndent_ * 2) {bestWidth_ = width;} + bestLineBreak_ = lineBreak; + + analysis_.flags.isNull = true; + } + + ///Emit an event. + void emit(Event event) @safe + { + events_.push(event); + while(!needMoreEvents()) + { + event_ = events_.pop(); + callNext(); + event_.destroy(); + } + } + + private: + ///Pop and return the newest state in states_. + EmitterFunction popState() @safe + in(states_.data.length > 0, + "Emitter: Need to pop a state but there are no states left") + { + const result = states_.data[$-1]; + states_.shrinkTo(states_.data.length - 1); + return result; + } + + void pushState(string D)() @safe + { + states_ ~= mixin("function(typeof(this)* self) { self."~D~"(); }"); + } + + ///Pop and return the newest indent in indents_. + int popIndent() @safe + in(indents_.data.length > 0, + "Emitter: Need to pop an indent level but there" ~ + " are no indent levels left") + { + const result = indents_.data[$-1]; + indents_.shrinkTo(indents_.data.length - 1); + return result; + } + + ///Write a string to the file/stream. + void writeString(const scope char[] str) @safe + { + static if(is(CharType == char)) + { + copy(str, stream_); + } + static if(is(CharType == wchar)) + { + const buffer = to!wstring(str); + copy(buffer, stream_); + } + static if(is(CharType == dchar)) + { + const buffer = to!dstring(str); + copy(buffer, stream_); + } + } + + ///In some cases, we wait for a few next events before emitting. + bool needMoreEvents() @safe nothrow + { + if(events_.length == 0){return true;} + + const event = events_.peek(); + if(event.id == EventID.documentStart){return needEvents(1);} + if(event.id == EventID.sequenceStart){return needEvents(2);} + if(event.id == EventID.mappingStart) {return needEvents(3);} + + return false; + } + + ///Determines if we need specified number of more events. + bool needEvents(in uint count) @safe nothrow + { + int level; + + foreach(const event; events_.range) + { + if(event.id.among!(EventID.documentStart, EventID.sequenceStart, EventID.mappingStart)) {++level;} + else if(event.id.among!(EventID.documentEnd, EventID.sequenceEnd, EventID.mappingEnd)) {--level;} + else if(event.id == EventID.streamStart){level = -1;} + + if(level < 0) + { + return false; + } + } + + return events_.length < (count + 1); + } + + ///Increase indentation level. + void increaseIndent(const Flag!"flow" flow = No.flow, const bool indentless = false) @safe + { + indents_ ~= indent_; + if(indent_ == -1) + { + indent_ = flow ? bestIndent_ : 0; + } + else if(!indentless) + { + indent_ += bestIndent_; + } + } + + ///Determines if the type of current event is as specified. Throws if no event. + bool eventTypeIs(in EventID id) const pure @safe + in(!event_.isNull, "Expected an event, but no event is available.") + { + return event_.id == id; + } + + + //States. + + + //Stream handlers. + + ///Handle start of a file/stream. + void expectStreamStart() @safe + in(eventTypeIs(EventID.streamStart), + "Expected streamStart, but got " ~ event_.idString) + { + + writeStreamStart(); + nextExpected!"expectDocumentStart!(Yes.first)"(); + } + + ///Expect nothing, throwing if we still have something. + void expectNothing() @safe + { + assert(0, "Expected nothing, but got " ~ event_.idString); + } + + //Document handlers. + + ///Handle start of a document. + void expectDocumentStart(Flag!"first" first)() @safe + in(eventTypeIs(EventID.documentStart) || eventTypeIs(EventID.streamEnd), + "Expected documentStart or streamEnd, but got " ~ event_.idString) + { + + if(event_.id == EventID.documentStart) + { + const YAMLVersion = event_.value; + auto tagDirectives = event_.tagDirectives; + if(openEnded_ && (YAMLVersion !is null || tagDirectives !is null)) + { + writeIndicator("...", Yes.needWhitespace); + writeIndent(); + } + + if(YAMLVersion !is null) + { + writeVersionDirective(prepareVersion(YAMLVersion)); + } + + if(tagDirectives !is null) + { + tagDirectives_ = tagDirectives; + sort!"icmp(a.handle, b.handle) < 0"(tagDirectives_); + + foreach(ref pair; tagDirectives_) + { + writeTagDirective(prepareTagHandle(pair.handle), + prepareTagPrefix(pair.prefix)); + } + } + + bool eq(ref TagDirective a, ref TagDirective b){return a.handle == b.handle;} + //Add any default tag directives that have not been overriden. + foreach(ref def; defaultTagDirectives_) + { + if(!std.algorithm.canFind!eq(tagDirectives_, def)) + { + tagDirectives_ ~= def; + } + } + + const implicit = first && !event_.explicitDocument && !canonical_ && + YAMLVersion is null && tagDirectives is null && + !checkEmptyDocument(); + if(!implicit) + { + writeIndent(); + writeIndicator("---", Yes.needWhitespace); + if(canonical_){writeIndent();} + } + nextExpected!"expectRootNode"(); + } + else if(event_.id == EventID.streamEnd) + { + if(openEnded_) + { + writeIndicator("...", Yes.needWhitespace); + writeIndent(); + } + writeStreamEnd(); + nextExpected!"expectNothing"(); + } + } + + ///Handle end of a document. + void expectDocumentEnd() @safe + in(eventTypeIs(EventID.documentEnd), + "Expected DocumentEnd, but got " ~ event_.idString) + { + + writeIndent(); + if(event_.explicitDocument) + { + writeIndicator("...", Yes.needWhitespace); + writeIndent(); + } + nextExpected!"expectDocumentStart!(No.first)"(); + } + + ///Handle the root node of a document. + void expectRootNode() @safe + { + pushState!"expectDocumentEnd"(); + expectNode(Context.root); + } + + ///Handle a mapping node. + // + //Params: simpleKey = Are we in a simple key? + void expectMappingNode(const bool simpleKey = false) @safe + { + expectNode(simpleKey ? Context.mappingSimpleKey : Context.mappingNoSimpleKey); + } + + ///Handle a sequence node. + void expectSequenceNode() @safe + { + expectNode(Context.sequence); + } + + ///Handle a new node. Context specifies where in the document we are. + void expectNode(const Context context) @safe + { + context_ = context; + + const flowCollection = event_.collectionStyle == CollectionStyle.flow; + + switch(event_.id) + { + case EventID.alias_: expectAlias(); break; + case EventID.scalar: + processAnchor("&"); + processTag(); + expectScalar(); + break; + case EventID.sequenceStart: + processAnchor("&"); + processTag(); + if(flowLevel_ > 0 || canonical_ || flowCollection || checkEmptySequence()) + { + expectFlowSequence(); + } + else + { + expectBlockSequence(); + } + break; + case EventID.mappingStart: + processAnchor("&"); + processTag(); + if(flowLevel_ > 0 || canonical_ || flowCollection || checkEmptyMapping()) + { + expectFlowMapping(); + } + else + { + expectBlockMapping(); + } + break; + default: + assert(0, "Expected alias_, scalar, sequenceStart or " ~ + "mappingStart, but got: " ~ event_.idString); + } + } + ///Handle an alias. + void expectAlias() @safe + in(event_.anchor != "", "Anchor is not specified for alias") + { + processAnchor("*"); + nextExpected(popState()); + } + + ///Handle a scalar. + void expectScalar() @safe + { + increaseIndent(Yes.flow); + processScalar(); + indent_ = popIndent(); + nextExpected(popState()); + } + + //Flow sequence handlers. + + ///Handle a flow sequence. + void expectFlowSequence() @safe + { + writeIndicator("[", Yes.needWhitespace, Yes.whitespace); + ++flowLevel_; + increaseIndent(Yes.flow); + nextExpected!"expectFlowSequenceItem!(Yes.first)"(); + } + + ///Handle a flow sequence item. + void expectFlowSequenceItem(Flag!"first" first)() @safe + { + if(event_.id == EventID.sequenceEnd) + { + indent_ = popIndent(); + --flowLevel_; + static if(!first) if(canonical_) + { + writeIndicator(",", No.needWhitespace); + writeIndent(); + } + writeIndicator("]", No.needWhitespace); + nextExpected(popState()); + return; + } + static if(!first){writeIndicator(",", No.needWhitespace);} + if(canonical_ || column_ > bestWidth_){writeIndent();} + pushState!"expectFlowSequenceItem!(No.first)"(); + expectSequenceNode(); + } + + //Flow mapping handlers. + + ///Handle a flow mapping. + void expectFlowMapping() @safe + { + writeIndicator("{", Yes.needWhitespace, Yes.whitespace); + ++flowLevel_; + increaseIndent(Yes.flow); + nextExpected!"expectFlowMappingKey!(Yes.first)"(); + } + + ///Handle a key in a flow mapping. + void expectFlowMappingKey(Flag!"first" first)() @safe + { + if(event_.id == EventID.mappingEnd) + { + indent_ = popIndent(); + --flowLevel_; + static if (!first) if(canonical_) + { + writeIndicator(",", No.needWhitespace); + writeIndent(); + } + writeIndicator("}", No.needWhitespace); + nextExpected(popState()); + return; + } + + static if(!first){writeIndicator(",", No.needWhitespace);} + if(canonical_ || column_ > bestWidth_){writeIndent();} + if(!canonical_ && checkSimpleKey()) + { + pushState!"expectFlowMappingSimpleValue"(); + expectMappingNode(true); + return; + } + + writeIndicator("?", Yes.needWhitespace); + pushState!"expectFlowMappingValue"(); + expectMappingNode(); + } + + ///Handle a simple value in a flow mapping. + void expectFlowMappingSimpleValue() @safe + { + writeIndicator(":", No.needWhitespace); + pushState!"expectFlowMappingKey!(No.first)"(); + expectMappingNode(); + } + + ///Handle a complex value in a flow mapping. + void expectFlowMappingValue() @safe + { + if(canonical_ || column_ > bestWidth_){writeIndent();} + writeIndicator(":", Yes.needWhitespace); + pushState!"expectFlowMappingKey!(No.first)"(); + expectMappingNode(); + } + + //Block sequence handlers. + + ///Handle a block sequence. + void expectBlockSequence() @safe + { + const indentless = (context_ == Context.mappingNoSimpleKey || + context_ == Context.mappingSimpleKey) && !indentation_; + increaseIndent(No.flow, indentless); + nextExpected!"expectBlockSequenceItem!(Yes.first)"(); + } + + ///Handle a block sequence item. + void expectBlockSequenceItem(Flag!"first" first)() @safe + { + static if(!first) if(event_.id == EventID.sequenceEnd) + { + indent_ = popIndent(); + nextExpected(popState()); + return; + } + + writeIndent(); + writeIndicator("-", Yes.needWhitespace, No.whitespace, Yes.indentation); + pushState!"expectBlockSequenceItem!(No.first)"(); + expectSequenceNode(); + } + + //Block mapping handlers. + + ///Handle a block mapping. + void expectBlockMapping() @safe + { + increaseIndent(No.flow); + nextExpected!"expectBlockMappingKey!(Yes.first)"(); + } + + ///Handle a key in a block mapping. + void expectBlockMappingKey(Flag!"first" first)() @safe + { + static if(!first) if(event_.id == EventID.mappingEnd) + { + indent_ = popIndent(); + nextExpected(popState()); + return; + } + + writeIndent(); + if(checkSimpleKey()) + { + pushState!"expectBlockMappingSimpleValue"(); + expectMappingNode(true); + return; + } + + writeIndicator("?", Yes.needWhitespace, No.whitespace, Yes.indentation); + pushState!"expectBlockMappingValue"(); + expectMappingNode(); + } + + ///Handle a simple value in a block mapping. + void expectBlockMappingSimpleValue() @safe + { + writeIndicator(":", No.needWhitespace); + pushState!"expectBlockMappingKey!(No.first)"(); + expectMappingNode(); + } + + ///Handle a complex value in a block mapping. + void expectBlockMappingValue() @safe + { + writeIndent(); + writeIndicator(":", Yes.needWhitespace, No.whitespace, Yes.indentation); + pushState!"expectBlockMappingKey!(No.first)"(); + expectMappingNode(); + } + + //Checkers. + + ///Check if an empty sequence is next. + bool checkEmptySequence() const @safe pure nothrow + { + return event_.id == EventID.sequenceStart && events_.length > 0 + && events_.peek().id == EventID.sequenceEnd; + } + + ///Check if an empty mapping is next. + bool checkEmptyMapping() const @safe pure nothrow + { + return event_.id == EventID.mappingStart && events_.length > 0 + && events_.peek().id == EventID.mappingEnd; + } + + ///Check if an empty document is next. + bool checkEmptyDocument() const @safe pure nothrow + { + if(event_.id != EventID.documentStart || events_.length == 0) + { + return false; + } + + const event = events_.peek(); + const emptyScalar = event.id == EventID.scalar && (event.anchor is null) && + (event.tag is null) && event.implicit && event.value == ""; + return emptyScalar; + } + + ///Check if a simple key is next. + bool checkSimpleKey() @safe + { + uint length; + const id = event_.id; + const scalar = id == EventID.scalar; + const collectionStart = id == EventID.mappingStart || + id == EventID.sequenceStart; + + if((id == EventID.alias_ || scalar || collectionStart) + && (event_.anchor !is null)) + { + if(preparedAnchor_ is null) + { + preparedAnchor_ = prepareAnchor(event_.anchor); + } + length += preparedAnchor_.length; + } + + if((scalar || collectionStart) && (event_.tag !is null)) + { + if(preparedTag_ is null){preparedTag_ = prepareTag(event_.tag);} + length += preparedTag_.length; + } + + if(scalar) + { + if(analysis_.flags.isNull){analysis_ = analyzeScalar(event_.value);} + length += analysis_.scalar.length; + } + + if(length >= 128){return false;} + + return id == EventID.alias_ || + (scalar && !analysis_.flags.empty && !analysis_.flags.multiline) || + checkEmptySequence() || + checkEmptyMapping(); + } + + ///Process and write a scalar. + void processScalar() @safe + { + if(analysis_.flags.isNull){analysis_ = analyzeScalar(event_.value);} + if(style_ == ScalarStyle.invalid) + { + style_ = chooseScalarStyle(); + } + + //if(analysis_.flags.multiline && (context_ != Context.mappingSimpleKey) && + // ([ScalarStyle.invalid, ScalarStyle.plain, ScalarStyle.singleQuoted, ScalarStyle.doubleQuoted) + // .canFind(style_)) + //{ + // writeIndent(); + //} + auto writer = ScalarWriter!(Range, CharType)(&this, analysis_.scalar, + context_ != Context.mappingSimpleKey); + final switch(style_) + { + case ScalarStyle.invalid: assert(false); + case ScalarStyle.doubleQuoted: writer.writeDoubleQuoted(); break; + case ScalarStyle.singleQuoted: writer.writeSingleQuoted(); break; + case ScalarStyle.folded: writer.writeFolded(); break; + case ScalarStyle.literal: writer.writeLiteral(); break; + case ScalarStyle.plain: writer.writePlain(); break; + } + analysis_.flags.isNull = true; + style_ = ScalarStyle.invalid; + } + + ///Process and write an anchor/alias. + void processAnchor(const string indicator) @safe + { + if(event_.anchor is null) + { + preparedAnchor_ = null; + return; + } + if(preparedAnchor_ is null) + { + preparedAnchor_ = prepareAnchor(event_.anchor); + } + if(preparedAnchor_ !is null && preparedAnchor_ != "") + { + writeIndicator(indicator, Yes.needWhitespace); + writeString(preparedAnchor_); + } + preparedAnchor_ = null; + } + + ///Process and write a tag. + void processTag() @safe + { + string tag = event_.tag; + + if(event_.id == EventID.scalar) + { + if(style_ == ScalarStyle.invalid){style_ = chooseScalarStyle();} + if((!canonical_ || (tag is null)) && + ((tag == "tag:yaml.org,2002:str") || (style_ == ScalarStyle.plain ? event_.implicit : !event_.implicit && (tag is null)))) + { + preparedTag_ = null; + return; + } + if(event_.implicit && (tag is null)) + { + tag = "!"; + preparedTag_ = null; + } + } + else if((!canonical_ || (tag is null)) && event_.implicit) + { + preparedTag_ = null; + return; + } + + assert(tag != "", "Tag is not specified"); + if(preparedTag_ is null){preparedTag_ = prepareTag(tag);} + if(preparedTag_ !is null && preparedTag_ != "") + { + writeIndicator(preparedTag_, Yes.needWhitespace); + } + preparedTag_ = null; + } + + ///Determine style to write the current scalar in. + ScalarStyle chooseScalarStyle() @safe + { + if(analysis_.flags.isNull){analysis_ = analyzeScalar(event_.value);} + + const style = event_.scalarStyle; + const invalidOrPlain = style == ScalarStyle.invalid || style == ScalarStyle.plain; + const block = style == ScalarStyle.literal || style == ScalarStyle.folded; + const singleQuoted = style == ScalarStyle.singleQuoted; + const doubleQuoted = style == ScalarStyle.doubleQuoted; + + const allowPlain = flowLevel_ > 0 ? analysis_.flags.allowFlowPlain + : analysis_.flags.allowBlockPlain; + //simple empty or multiline scalars can't be written in plain style + const simpleNonPlain = (context_ == Context.mappingSimpleKey) && + (analysis_.flags.empty || analysis_.flags.multiline); + + if(doubleQuoted || canonical_) + { + return ScalarStyle.doubleQuoted; + } + + if(invalidOrPlain && event_.implicit && !simpleNonPlain && allowPlain) + { + return ScalarStyle.plain; + } + + if(block && flowLevel_ == 0 && context_ != Context.mappingSimpleKey && + analysis_.flags.allowBlock) + { + return style; + } + + if((invalidOrPlain || singleQuoted) && + analysis_.flags.allowSingleQuoted && + !(context_ == Context.mappingSimpleKey && analysis_.flags.multiline)) + { + return ScalarStyle.singleQuoted; + } + + return ScalarStyle.doubleQuoted; + } + + ///Prepare YAML version string for output. + static string prepareVersion(const string YAMLVersion) @safe + in(YAMLVersion.split(".")[0] == "1", + "Unsupported YAML version: " ~ YAMLVersion) + { + return YAMLVersion; + } + + ///Encode an Unicode character for tag directive and write it to writer. + static void encodeChar(Writer)(ref Writer writer, in dchar c) @safe + { + char[4] data; + const bytes = encode(data, c); + //For each byte add string in format %AB , where AB are hex digits of the byte. + foreach(const char b; data[0 .. bytes]) + { + formattedWrite(writer, "%%%02X", cast(ubyte)b); + } + } + + ///Prepare tag directive handle for output. + static string prepareTagHandle(const string handle) @safe + in(handle != "", "Tag handle must not be empty") + in(handle.drop(1).dropBack(1).all!(c => isAlphaNum(c) || c.among!('-', '_')), + "Tag handle contains invalid characters") + { + return handle; + } + + ///Prepare tag directive prefix for output. + static string prepareTagPrefix(const string prefix) @safe + in(prefix != "", "Tag prefix must not be empty") + { + auto appender = appender!string(); + const int offset = prefix[0] == '!'; + size_t start, end; + + foreach(const size_t i, const dchar c; prefix) + { + const size_t idx = i + offset; + if(isAlphaNum(c) || c.among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '!', '~', '*', '\\', '\'', '(', ')', '[', ']', '%')) + { + end = idx + 1; + continue; + } + + if(start < idx){appender.put(prefix[start .. idx]);} + start = end = idx + 1; + + encodeChar(appender, c); + } + + end = min(end, prefix.length); + if(start < end){appender.put(prefix[start .. end]);} + return appender.data; + } + + ///Prepare tag for output. + string prepareTag(in string tag) @safe + in(tag != "", "Tag must not be empty") + { + + string tagString = tag; + if (tagString == "!") return "!"; + string handle; + string suffix = tagString; + + //Sort lexicographically by prefix. + sort!"icmp(a.prefix, b.prefix) < 0"(tagDirectives_); + foreach(ref pair; tagDirectives_) + { + auto prefix = pair.prefix; + if(tagString.startsWith(prefix) && + (prefix != "!" || prefix.length < tagString.length)) + { + handle = pair.handle; + suffix = tagString[prefix.length .. $]; + } + } + + auto appender = appender!string(); + appender.put(handle !is null && handle != "" ? handle : "!<"); + size_t start, end; + foreach(const dchar c; suffix) + { + if(isAlphaNum(c) || c.among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '~', '*', '\\', '\'', '(', ')', '[', ']') || + (c == '!' && handle != "!")) + { + ++end; + continue; + } + if(start < end){appender.put(suffix[start .. end]);} + start = end = end + 1; + + encodeChar(appender, c); + } + + if(start < end){appender.put(suffix[start .. end]);} + if(handle is null || handle == ""){appender.put(">");} + + return appender.data; + } + + ///Prepare anchor for output. + static string prepareAnchor(const string anchor) @safe + in(anchor != "", "Anchor must not be empty") + in(anchor.all!isNSAnchorName, "Anchor contains invalid characters") + { + return anchor; + } + + ///Analyze specifed scalar and return the analysis result. + static ScalarAnalysis analyzeScalar(string scalar) @safe + { + ScalarAnalysis analysis; + analysis.flags.isNull = false; + analysis.scalar = scalar; + + //Empty scalar is a special case. + if(scalar is null || scalar == "") + { + with(ScalarAnalysis.AnalysisFlags) + analysis.flags = + empty | + allowBlockPlain | + allowSingleQuoted | + allowDoubleQuoted; + return analysis; + } + + //Indicators and special characters (All false by default). + bool blockIndicators, flowIndicators, lineBreaks, specialCharacters; + + //Important whitespace combinations (All false by default). + bool leadingSpace, leadingBreak, trailingSpace, trailingBreak, + breakSpace, spaceBreak; + + //Check document indicators. + if(scalar.startsWith("---", "...")) + { + blockIndicators = flowIndicators = true; + } + + //First character or preceded by a whitespace. + bool preceededByWhitespace = true; + + //Last character or followed by a whitespace. + bool followedByWhitespace = scalar.length == 1 || + scalar[1].among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + + //The previous character is a space/break (false by default). + bool previousSpace, previousBreak; + + foreach(const size_t index, const dchar c; scalar) + { + //Check for indicators. + if(index == 0) + { + //Leading indicators are special characters. + if(c.isSpecialChar) + { + flowIndicators = blockIndicators = true; + } + if(':' == c || '?' == c) + { + flowIndicators = true; + if(followedByWhitespace){blockIndicators = true;} + } + if(c == '-' && followedByWhitespace) + { + flowIndicators = blockIndicators = true; + } + } + else + { + //Some indicators cannot appear within a scalar as well. + if(c.isFlowIndicator){flowIndicators = true;} + if(c == ':') + { + flowIndicators = true; + if(followedByWhitespace){blockIndicators = true;} + } + if(c == '#' && preceededByWhitespace) + { + flowIndicators = blockIndicators = true; + } + } + + //Check for line breaks, special, and unicode characters. + if(c.isNewLine){lineBreaks = true;} + if(!(c == '\n' || (c >= '\x20' && c <= '\x7E')) && + !((c == '\u0085' || (c >= '\xA0' && c <= '\uD7FF') || + (c >= '\uE000' && c <= '\uFFFD')) && c != '\uFEFF')) + { + specialCharacters = true; + } + + //Detect important whitespace combinations. + if(c == ' ') + { + if(index == 0){leadingSpace = true;} + if(index == scalar.length - 1){trailingSpace = true;} + if(previousBreak){breakSpace = true;} + previousSpace = true; + previousBreak = false; + } + else if(c.isNewLine) + { + if(index == 0){leadingBreak = true;} + if(index == scalar.length - 1){trailingBreak = true;} + if(previousSpace){spaceBreak = true;} + previousSpace = false; + previousBreak = true; + } + else + { + previousSpace = previousBreak = false; + } + + //Prepare for the next character. + preceededByWhitespace = c.isSpace != 0; + followedByWhitespace = index + 2 >= scalar.length || + scalar[index + 2].isSpace; + } + + with(ScalarAnalysis.AnalysisFlags) + { + //Let's decide what styles are allowed. + analysis.flags |= allowFlowPlain | allowBlockPlain | allowSingleQuoted | + allowDoubleQuoted | allowBlock; + + //Leading and trailing whitespaces are bad for plain scalars. + if(leadingSpace || leadingBreak || trailingSpace || trailingBreak) + { + analysis.flags &= ~(allowFlowPlain | allowBlockPlain); + } + + //We do not permit trailing spaces for block scalars. + if(trailingSpace) + { + analysis.flags &= ~allowBlock; + } + + //Spaces at the beginning of a new line are only acceptable for block + //scalars. + if(breakSpace) + { + analysis.flags &= ~(allowFlowPlain | allowBlockPlain | allowSingleQuoted); + } + + //Spaces followed by breaks, as well as special character are only + //allowed for double quoted scalars. + if(spaceBreak || specialCharacters) + { + analysis.flags &= ~(allowFlowPlain | allowBlockPlain | allowSingleQuoted | allowBlock); + } + + //Although the plain scalar writer supports breaks, we never emit + //multiline plain scalars. + if(lineBreaks) + { + analysis.flags &= ~(allowFlowPlain | allowBlockPlain); + analysis.flags |= multiline; + } + + //Flow indicators are forbidden for flow plain scalars. + if(flowIndicators) + { + analysis.flags &= ~allowFlowPlain; + } + + //Block indicators are forbidden for block plain scalars. + if(blockIndicators) + { + analysis.flags &= ~allowBlockPlain; + } + } + return analysis; + } + + @safe unittest + { + with(analyzeScalar("").flags) + { + // workaround for empty being std.range.primitives.empty here + alias empty = ScalarAnalysis.AnalysisFlags.empty; + assert(empty && allowBlockPlain && allowSingleQuoted && allowDoubleQuoted); + } + with(analyzeScalar("a").flags) + { + assert(allowFlowPlain && allowBlockPlain && allowSingleQuoted && allowDoubleQuoted && allowBlock); + } + with(analyzeScalar(" ").flags) + { + assert(allowSingleQuoted && allowDoubleQuoted); + } + with(analyzeScalar(" a").flags) + { + assert(allowSingleQuoted && allowDoubleQuoted); + } + with(analyzeScalar("a ").flags) + { + assert(allowSingleQuoted && allowDoubleQuoted); + } + with(analyzeScalar("\na").flags) + { + assert(allowSingleQuoted && allowDoubleQuoted); + } + with(analyzeScalar("a\n").flags) + { + assert(allowSingleQuoted && allowDoubleQuoted); + } + with(analyzeScalar("\n").flags) + { + assert(multiline && allowSingleQuoted && allowDoubleQuoted && allowBlock); + } + with(analyzeScalar(" \n").flags) + { + assert(multiline && allowDoubleQuoted); + } + with(analyzeScalar("\n a").flags) + { + assert(multiline && allowDoubleQuoted && allowBlock); + } + } + + //Writers. + + ///Start the YAML stream (write the unicode byte order mark). + void writeStreamStart() @safe + { + //Write BOM (except for UTF-8) + static if(is(CharType == wchar) || is(CharType == dchar)) + { + stream_.put(cast(CharType)'\uFEFF'); + } + } + + ///End the YAML stream. + void writeStreamEnd() @safe {} + + ///Write an indicator (e.g. ":", "[", ">", etc.). + void writeIndicator(const scope char[] indicator, + const Flag!"needWhitespace" needWhitespace, + const Flag!"whitespace" whitespace = No.whitespace, + const Flag!"indentation" indentation = No.indentation) @safe + { + const bool prefixSpace = !whitespace_ && needWhitespace; + whitespace_ = whitespace; + indentation_ = indentation_ && indentation; + openEnded_ = false; + column_ += indicator.length; + if(prefixSpace) + { + ++column_; + writeString(" "); + } + writeString(indicator); + } + + ///Write indentation. + void writeIndent() @safe + { + const indent = indent_ == -1 ? 0 : indent_; + + if(!indentation_ || column_ > indent || (column_ == indent && !whitespace_)) + { + writeLineBreak(); + } + if(column_ < indent) + { + whitespace_ = true; + + //Used to avoid allocation of arbitrary length strings. + static immutable spaces = " "; + size_t numSpaces = indent - column_; + column_ = indent; + while(numSpaces >= spaces.length) + { + writeString(spaces); + numSpaces -= spaces.length; + } + writeString(spaces[0 .. numSpaces]); + } + } + + ///Start new line. + void writeLineBreak(const scope char[] data = null) @safe + { + whitespace_ = indentation_ = true; + ++line_; + column_ = 0; + writeString(data is null ? lineBreak(bestLineBreak_) : data); + } + + ///Write a YAML version directive. + void writeVersionDirective(const string versionText) @safe + { + writeString("%YAML "); + writeString(versionText); + writeLineBreak(); + } + + ///Write a tag directive. + void writeTagDirective(const string handle, const string prefix) @safe + { + writeString("%TAG "); + writeString(handle); + writeString(" "); + writeString(prefix); + writeLineBreak(); + } + void nextExpected(string D)() @safe + { + state_ = mixin("function(typeof(this)* self) { self."~D~"(); }"); + } + void nextExpected(EmitterFunction f) @safe + { + state_ = f; + } + void callNext() @safe + { + state_(&this); + } +} + + +private: + +///RAII struct used to write out scalar values. +struct ScalarWriter(Range, CharType) +{ + invariant() + { + assert(emitter_.bestIndent_ > 0 && emitter_.bestIndent_ < 10, + "Emitter bestIndent must be 1 to 9 for one-character indent hint"); + } + + private: + @disable int opCmp(ref Emitter!(Range, CharType)); + @disable bool opEquals(ref Emitter!(Range, CharType)); + + ///Used as "null" UTF-32 character. + static immutable dcharNone = dchar.max; + + ///Emitter used to emit the scalar. + Emitter!(Range, CharType)* emitter_; + + ///UTF-8 encoded text of the scalar to write. + string text_; + + ///Can we split the scalar into multiple lines? + bool split_; + ///Are we currently going over spaces in the text? + bool spaces_; + ///Are we currently going over line breaks in the text? + bool breaks_; + + ///Start and end byte of the text range we're currently working with. + size_t startByte_, endByte_; + ///End byte of the text range including the currently processed character. + size_t nextEndByte_; + ///Start and end character of the text range we're currently working with. + long startChar_, endChar_; + + public: + ///Construct a ScalarWriter using emitter to output text. + this(Emitter!(Range, CharType)* emitter, string text, const bool split = true) @safe nothrow + { + emitter_ = emitter; + text_ = text; + split_ = split; + } + + ///Write text as single quoted scalar. + void writeSingleQuoted() @safe + { + emitter_.writeIndicator("\'", Yes.needWhitespace); + spaces_ = breaks_ = false; + resetTextPosition(); + + do + { + const dchar c = nextChar(); + if(spaces_) + { + if(c != ' ' && tooWide() && split_ && + startByte_ != 0 && endByte_ != text_.length) + { + writeIndent(Flag!"ResetSpace".no); + updateRangeStart(); + } + else if(c != ' ') + { + writeCurrentRange(Flag!"UpdateColumn".yes); + } + } + else if(breaks_) + { + if(!c.isNewLine) + { + writeStartLineBreak(); + writeLineBreaks(); + emitter_.writeIndent(); + } + } + else if((c == dcharNone || c == '\'' || c == ' ' || c.isNewLine) + && startChar_ < endChar_) + { + writeCurrentRange(Flag!"UpdateColumn".yes); + } + if(c == '\'') + { + emitter_.column_ += 2; + emitter_.writeString("\'\'"); + startByte_ = endByte_ + 1; + startChar_ = endChar_ + 1; + } + updateBreaks(c, Flag!"UpdateSpaces".yes); + }while(endByte_ < text_.length); + + emitter_.writeIndicator("\'", No.needWhitespace); + } + + ///Write text as double quoted scalar. + void writeDoubleQuoted() @safe + { + resetTextPosition(); + emitter_.writeIndicator("\"", Yes.needWhitespace); + do + { + const dchar c = nextChar(); + //handle special characters + if(c == dcharNone || c.among!('\"', '\\', '\u0085', '\u2028', '\u2029', '\uFEFF') || + !((c >= '\x20' && c <= '\x7E') || + ((c >= '\xA0' && c <= '\uD7FF') || (c >= '\uE000' && c <= '\uFFFD')))) + { + if(startChar_ < endChar_) + { + writeCurrentRange(Flag!"UpdateColumn".yes); + } + if(c != dcharNone) + { + auto appender = appender!string(); + if(const dchar es = toEscape(c)) + { + appender.put('\\'); + appender.put(es); + } + else + { + //Write an escaped Unicode character. + const format = c <= 255 ? "\\x%02X": + c <= 65535 ? "\\u%04X": "\\U%08X"; + formattedWrite(appender, format, cast(uint)c); + } + + emitter_.column_ += appender.data.length; + emitter_.writeString(appender.data); + startChar_ = endChar_ + 1; + startByte_ = nextEndByte_; + } + } + if((endByte_ > 0 && endByte_ < text_.length - strideBack(text_, text_.length)) + && (c == ' ' || startChar_ >= endChar_) + && (emitter_.column_ + endChar_ - startChar_ > emitter_.bestWidth_) + && split_) + { + //text_[2:1] is ok in Python but not in D, so we have to use min() + emitter_.writeString(text_[min(startByte_, endByte_) .. endByte_]); + emitter_.writeString("\\"); + emitter_.column_ += startChar_ - endChar_ + 1; + startChar_ = max(startChar_, endChar_); + startByte_ = max(startByte_, endByte_); + + writeIndent(Flag!"ResetSpace".yes); + if(charAtStart() == ' ') + { + emitter_.writeString("\\"); + ++emitter_.column_; + } + } + }while(endByte_ < text_.length); + emitter_.writeIndicator("\"", No.needWhitespace); + } + + ///Write text as folded block scalar. + void writeFolded() @safe + { + initBlock('>'); + bool leadingSpace = true; + spaces_ = false; + breaks_ = true; + resetTextPosition(); + + do + { + const dchar c = nextChar(); + if(breaks_) + { + if(!c.isNewLine) + { + if(!leadingSpace && c != dcharNone && c != ' ') + { + writeStartLineBreak(); + } + leadingSpace = (c == ' '); + writeLineBreaks(); + if(c != dcharNone){emitter_.writeIndent();} + } + } + else if(spaces_) + { + if(c != ' ' && tooWide()) + { + writeIndent(Flag!"ResetSpace".no); + updateRangeStart(); + } + else if(c != ' ') + { + writeCurrentRange(Flag!"UpdateColumn".yes); + } + } + else if(c == dcharNone || c.isNewLine || c == ' ') + { + writeCurrentRange(Flag!"UpdateColumn".yes); + if(c == dcharNone){emitter_.writeLineBreak();} + } + updateBreaks(c, Flag!"UpdateSpaces".yes); + }while(endByte_ < text_.length); + } + + ///Write text as literal block scalar. + void writeLiteral() @safe + { + initBlock('|'); + breaks_ = true; + resetTextPosition(); + + do + { + const dchar c = nextChar(); + if(breaks_) + { + if(!c.isNewLine) + { + writeLineBreaks(); + if(c != dcharNone){emitter_.writeIndent();} + } + } + else if(c == dcharNone || c.isNewLine) + { + writeCurrentRange(Flag!"UpdateColumn".no); + if(c == dcharNone){emitter_.writeLineBreak();} + } + updateBreaks(c, Flag!"UpdateSpaces".no); + }while(endByte_ < text_.length); + } + + ///Write text as plain scalar. + void writePlain() @safe + { + if(emitter_.context_ == Emitter!(Range, CharType).Context.root){emitter_.openEnded_ = true;} + if(text_ == ""){return;} + if(!emitter_.whitespace_) + { + ++emitter_.column_; + emitter_.writeString(" "); + } + emitter_.whitespace_ = emitter_.indentation_ = false; + spaces_ = breaks_ = false; + resetTextPosition(); + + do + { + const dchar c = nextChar(); + if(spaces_) + { + if(c != ' ' && tooWide() && split_) + { + writeIndent(Flag!"ResetSpace".yes); + updateRangeStart(); + } + else if(c != ' ') + { + writeCurrentRange(Flag!"UpdateColumn".yes); + } + } + else if(breaks_) + { + if(!c.isNewLine) + { + writeStartLineBreak(); + writeLineBreaks(); + writeIndent(Flag!"ResetSpace".yes); + } + } + else if(c == dcharNone || c.isNewLine || c == ' ') + { + writeCurrentRange(Flag!"UpdateColumn".yes); + } + updateBreaks(c, Flag!"UpdateSpaces".yes); + }while(endByte_ < text_.length); + } + + private: + ///Get next character and move end of the text range to it. + @property dchar nextChar() pure @safe + { + ++endChar_; + endByte_ = nextEndByte_; + if(endByte_ >= text_.length){return dcharNone;} + const c = text_[nextEndByte_]; + //c is ascii, no need to decode. + if(c < 0x80) + { + ++nextEndByte_; + return c; + } + return decode(text_, nextEndByte_); + } + + ///Get character at start of the text range. + @property dchar charAtStart() const pure @safe + { + size_t idx = startByte_; + return decode(text_, idx); + } + + ///Is the current line too wide? + @property bool tooWide() const pure @safe nothrow + { + return startChar_ + 1 == endChar_ && + emitter_.column_ > emitter_.bestWidth_; + } + + ///Determine hints (indicators) for block scalar. + size_t determineBlockHints(char[] hints, uint bestIndent) const pure @safe + { + size_t hintsIdx; + if(text_.length == 0) + return hintsIdx; + + dchar lastChar(const string str, ref size_t end) + { + size_t idx = end = end - strideBack(str, end); + return decode(text_, idx); + } + + size_t end = text_.length; + const last = lastChar(text_, end); + const secondLast = end > 0 ? lastChar(text_, end) : 0; + + if(text_[0].isNewLine || text_[0] == ' ') + { + hints[hintsIdx++] = cast(char)('0' + bestIndent); + } + if(!last.isNewLine) + { + hints[hintsIdx++] = '-'; + } + else if(std.utf.count(text_) == 1 || secondLast.isNewLine) + { + hints[hintsIdx++] = '+'; + } + return hintsIdx; + } + + ///Initialize for block scalar writing with specified indicator. + void initBlock(const char indicator) @safe + { + char[4] hints; + hints[0] = indicator; + const hintsLength = 1 + determineBlockHints(hints[1 .. $], emitter_.bestIndent_); + emitter_.writeIndicator(hints[0 .. hintsLength], Yes.needWhitespace); + if(hints.length > 0 && hints[$ - 1] == '+') + { + emitter_.openEnded_ = true; + } + emitter_.writeLineBreak(); + } + + ///Write out the current text range. + void writeCurrentRange(const Flag!"UpdateColumn" updateColumn) @safe + { + emitter_.writeString(text_[startByte_ .. endByte_]); + if(updateColumn){emitter_.column_ += endChar_ - startChar_;} + updateRangeStart(); + } + + ///Write line breaks in the text range. + void writeLineBreaks() @safe + { + foreach(const dchar br; text_[startByte_ .. endByte_]) + { + if(br == '\n'){emitter_.writeLineBreak();} + else + { + char[4] brString; + const bytes = encode(brString, br); + emitter_.writeLineBreak(brString[0 .. bytes]); + } + } + updateRangeStart(); + } + + ///Write line break if start of the text range is a newline. + void writeStartLineBreak() @safe + { + if(charAtStart == '\n'){emitter_.writeLineBreak();} + } + + ///Write indentation, optionally resetting whitespace/indentation flags. + void writeIndent(const Flag!"ResetSpace" resetSpace) @safe + { + emitter_.writeIndent(); + if(resetSpace) + { + emitter_.whitespace_ = emitter_.indentation_ = false; + } + } + + ///Move start of text range to its end. + void updateRangeStart() pure @safe nothrow + { + startByte_ = endByte_; + startChar_ = endChar_; + } + + ///Update the line breaks_ flag, optionally updating the spaces_ flag. + void updateBreaks(in dchar c, const Flag!"UpdateSpaces" updateSpaces) pure @safe + { + if(c == dcharNone){return;} + breaks_ = (c.isNewLine != 0); + if(updateSpaces){spaces_ = c == ' ';} + } + + ///Move to the beginning of text. + void resetTextPosition() pure @safe nothrow + { + startByte_ = endByte_ = nextEndByte_ = 0; + startChar_ = endChar_ = -1; + } +} diff --git a/source/dub/internal/dyaml/encoding.d b/source/dub/internal/dyaml/encoding.d new file mode 100644 index 0000000..88df984 --- /dev/null +++ b/source/dub/internal/dyaml/encoding.d @@ -0,0 +1,11 @@ +// Copyright Ferdinand Majerech 2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module dub.internal.dyaml.encoding; + + +import dub.internal.tinyendian; + +alias Encoding = dub.internal.tinyendian.UTFEncoding; diff --git a/source/dub/internal/dyaml/escapes.d b/source/dub/internal/dyaml/escapes.d new file mode 100644 index 0000000..1ddebcb --- /dev/null +++ b/source/dub/internal/dyaml/escapes.d @@ -0,0 +1,106 @@ + + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module dub.internal.dyaml.escapes; + +package: + +import std.meta : AliasSeq; +alias escapes = AliasSeq!('0', 'a', 'b', 't', '\t', 'n', 'v', 'f', 'r', 'e', ' ', + '/', '\"', '\\', 'N', '_', 'L', 'P'); + +/// YAML hex codes specifying the length of the hex number. +alias escapeHexCodeList = AliasSeq!('x', 'u', 'U'); + +/// Convert a YAML escape to a dchar. +dchar fromEscape(dchar escape) @safe pure nothrow @nogc +{ + switch(escape) + { + case '0': return '\0'; + case 'a': return '\x07'; + case 'b': return '\x08'; + case 't': return '\x09'; + case '\t': return '\x09'; + case 'n': return '\x0A'; + case 'v': return '\x0B'; + case 'f': return '\x0C'; + case 'r': return '\x0D'; + case 'e': return '\x1B'; + case '/': return '/'; + case ' ': return '\x20'; + case '\"': return '\"'; + case '\\': return '\\'; + case 'N': return '\x85'; //'\u0085'; + case '_': return '\xA0'; + case 'L': return '\u2028'; + case 'P': return '\u2029'; + default: assert(false, "No such YAML escape"); + } +} + +/** + * Convert a dchar to a YAML escape. + * + * Params: + * value = The possibly escapable character. + * + * Returns: + * If the character passed as parameter can be escaped, returns the matching + * escape, otherwise returns a null character. + */ +dchar toEscape(dchar value) @safe pure nothrow @nogc +{ + switch(value) + { + case '\0': return '0'; + case '\x07': return 'a'; + case '\x08': return 'b'; + case '\x09': return 't'; + case '\x0A': return 'n'; + case '\x0B': return 'v'; + case '\x0C': return 'f'; + case '\x0D': return 'r'; + case '\x1B': return 'e'; + case '\"': return '\"'; + case '\\': return '\\'; + case '\xA0': return '_'; + case '\x85': return 'N'; + case '\u2028': return 'L'; + case '\u2029': return 'P'; + default: return 0; + } +} + +/// Get the length of a hexadecimal number determined by its hex code. +/// +/// Need a function as associative arrays don't work with @nogc. +/// (And this may be even faster with a function.) +uint escapeHexLength(dchar hexCode) @safe pure nothrow @nogc +{ + switch(hexCode) + { + case 'x': return 2; + case 'u': return 4; + case 'U': return 8; + default: assert(false, "No such YAML hex code"); + } +} + +// Issue #302: Support optional escaping of forward slashes in string +// for JSON compatibility +@safe unittest +{ + import dub.internal.dyaml.loader : Loader; + + const str = `{ + "forward/slashes": "can\/be\/optionally\/escaped" +}`; + + auto node = Loader.fromString(str).load(); + assert(node["forward/slashes"] == "can/be/optionally/escaped"); +} diff --git a/source/dub/internal/dyaml/event.d b/source/dub/internal/dyaml/event.d new file mode 100644 index 0000000..2adc792 --- /dev/null +++ b/source/dub/internal/dyaml/event.d @@ -0,0 +1,243 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * YAML events. + * Code based on PyYAML: http://www.pyyaml.org + */ +module dub.internal.dyaml.event; + +import std.array; +import std.conv; + +import dub.internal.dyaml.exception; +import dub.internal.dyaml.reader; +import dub.internal.dyaml.tagdirective; +import dub.internal.dyaml.style; + + +package: +///Event types. +enum EventID : ubyte +{ + invalid = 0, /// Invalid (uninitialized) event. + streamStart, /// Stream start + streamEnd, /// Stream end + documentStart, /// Document start + documentEnd, /// Document end + alias_, /// Alias + scalar, /// Scalar + sequenceStart, /// Sequence start + sequenceEnd, /// Sequence end + mappingStart, /// Mapping start + mappingEnd /// Mapping end +} + +/** + * YAML event produced by parser. + * + * 48 bytes on 64bit. + */ +struct Event +{ + @disable int opCmp(ref Event); + + ///Value of the event, if any. + string value; + ///Start position of the event in file/stream. + Mark startMark; + ///End position of the event in file/stream. + Mark endMark; + union + { + struct + { + ///Anchor of the event, if any. + string _anchor; + ///Tag of the event, if any. + string _tag; + } + ///Tag directives, if this is a DocumentStart. + //TagDirectives tagDirectives; + TagDirective[] _tagDirectives; + } + ///Event type. + EventID id = EventID.invalid; + ///Style of scalar event, if this is a scalar event. + ScalarStyle scalarStyle = ScalarStyle.invalid; + union + { + ///Should the tag be implicitly resolved? + bool implicit; + /** + * Is this document event explicit? + * + * Used if this is a DocumentStart or DocumentEnd. + */ + bool explicitDocument; + } + ///Collection style, if this is a SequenceStart or MappingStart. + CollectionStyle collectionStyle = CollectionStyle.invalid; + + ///Is this a null (uninitialized) event? + @property bool isNull() const pure @safe nothrow {return id == EventID.invalid;} + + ///Get string representation of the token ID. + @property string idString() const @safe {return to!string(id);} + + auto ref anchor() inout @trusted pure { + assert(id != EventID.documentStart, "DocumentStart events cannot have anchors."); + return _anchor; + } + + auto ref tag() inout @trusted pure { + assert(id != EventID.documentStart, "DocumentStart events cannot have tags."); + return _tag; + } + + auto ref tagDirectives() inout @trusted pure { + assert(id == EventID.documentStart, "Only DocumentStart events have tag directives."); + return _tagDirectives; + } +} + +/** + * Construct a simple event. + * + * Params: start = Start position of the event in the file/stream. + * end = End position of the event in the file/stream. + * anchor = Anchor, if this is an alias event. + */ +Event event(EventID id)(const Mark start, const Mark end, const string anchor = null) + @safe + in(!(id == EventID.alias_ && anchor == ""), "Missing anchor for alias event") +{ + Event result; + result.startMark = start; + result.endMark = end; + result.anchor = anchor; + result.id = id; + return result; +} + +/** + * Construct a collection (mapping or sequence) start event. + * + * Params: start = Start position of the event in the file/stream. + * end = End position of the event in the file/stream. + * anchor = Anchor of the sequence, if any. + * tag = Tag of the sequence, if specified. + * implicit = Should the tag be implicitly resolved? + * style = Style to use when outputting document. + */ +Event collectionStartEvent(EventID id) + (const Mark start, const Mark end, const string anchor, const string tag, + const bool implicit, const CollectionStyle style) pure @safe nothrow +{ + static assert(id == EventID.sequenceStart || id == EventID.sequenceEnd || + id == EventID.mappingStart || id == EventID.mappingEnd); + Event result; + result.startMark = start; + result.endMark = end; + result.anchor = anchor; + result.tag = tag; + result.id = id; + result.implicit = implicit; + result.collectionStyle = style; + return result; +} + +/** + * Construct a stream start event. + * + * Params: start = Start position of the event in the file/stream. + * end = End position of the event in the file/stream. + */ +Event streamStartEvent(const Mark start, const Mark end) + pure @safe nothrow +{ + Event result; + result.startMark = start; + result.endMark = end; + result.id = EventID.streamStart; + return result; +} + +///Aliases for simple events. +alias streamEndEvent = event!(EventID.streamEnd); +alias aliasEvent = event!(EventID.alias_); +alias sequenceEndEvent = event!(EventID.sequenceEnd); +alias mappingEndEvent = event!(EventID.mappingEnd); + +///Aliases for collection start events. +alias sequenceStartEvent = collectionStartEvent!(EventID.sequenceStart); +alias mappingStartEvent = collectionStartEvent!(EventID.mappingStart); + +/** + * Construct a document start event. + * + * Params: start = Start position of the event in the file/stream. + * end = End position of the event in the file/stream. + * explicit = Is this an explicit document start? + * YAMLVersion = YAML version string of the document. + * tagDirectives = Tag directives of the document. + */ +Event documentStartEvent(const Mark start, const Mark end, const bool explicit, string YAMLVersion, + TagDirective[] tagDirectives) pure @safe nothrow +{ + Event result; + result.value = YAMLVersion; + result.startMark = start; + result.endMark = end; + result.id = EventID.documentStart; + result.explicitDocument = explicit; + result.tagDirectives = tagDirectives; + return result; +} + +/** + * Construct a document end event. + * + * Params: start = Start position of the event in the file/stream. + * end = End position of the event in the file/stream. + * explicit = Is this an explicit document end? + */ +Event documentEndEvent(const Mark start, const Mark end, const bool explicit) pure @safe nothrow +{ + Event result; + result.startMark = start; + result.endMark = end; + result.id = EventID.documentEnd; + result.explicitDocument = explicit; + return result; +} + +/// Construct a scalar event. +/// +/// Params: start = Start position of the event in the file/stream. +/// end = End position of the event in the file/stream. +/// anchor = Anchor of the scalar, if any. +/// tag = Tag of the scalar, if specified. +/// implicit = Should the tag be implicitly resolved? +/// value = String value of the scalar. +/// style = Scalar style. +Event scalarEvent(const Mark start, const Mark end, const string anchor, const string tag, + const bool implicit, const string value, + const ScalarStyle style = ScalarStyle.invalid) @safe pure nothrow @nogc +{ + Event result; + result.value = value; + result.startMark = start; + result.endMark = end; + + result.anchor = anchor; + result.tag = tag; + + result.id = EventID.scalar; + result.scalarStyle = style; + result.implicit = implicit; + return result; +} diff --git a/source/dub/internal/dyaml/exception.d b/source/dub/internal/dyaml/exception.d new file mode 100644 index 0000000..10e2b8e --- /dev/null +++ b/source/dub/internal/dyaml/exception.d @@ -0,0 +1,171 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +///Exceptions thrown by D:YAML and _exception related code. +module dub.internal.dyaml.exception; + + +import std.algorithm; +import std.array; +import std.string; +import std.conv; + + +/// Base class for all exceptions thrown by D:YAML. +class YAMLException : Exception +{ + /// Construct a YAMLException with specified message and position where it was thrown. + public this(string msg, string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + super(msg, file, line); + } +} + +/// Position in a YAML stream, used for error messages. +struct Mark +{ + package: + /// File name. + string name_; + /// Line number. + ushort line_; + /// Column number. + ushort column_; + + public: + /// Construct a Mark with specified line and column in the file. + this(string name, const uint line, const uint column) @safe pure nothrow @nogc + { + name_ = name; + line_ = cast(ushort)min(ushort.max, line); + // This *will* overflow on extremely wide files but saves CPU time + // (mark ctor takes ~5% of time) + column_ = cast(ushort)column; + } + + /// Get a file name. + @property string name() @safe pure nothrow @nogc const + { + return name_; + } + + /// Get a line number. + @property ushort line() @safe pure nothrow @nogc const + { + return line_; + } + + /// Get a column number. + @property ushort column() @safe pure nothrow @nogc const + { + return column_; + } + + /// Duplicate a mark + Mark dup () const scope @safe pure nothrow + { + return Mark(this.name_.idup, this.line_, this.column_); + } + + /// Get a string representation of the mark. + string toString() const scope @safe pure nothrow + { + // Line/column numbers start at zero internally, make them start at 1. + static string clamped(ushort v) @safe pure nothrow + { + return text(v + 1, v == ushort.max ? " or higher" : ""); + } + return "file " ~ name_ ~ ",line " ~ clamped(line_) ~ ",column " ~ clamped(column_); + } +} + +// Base class of YAML exceptions with marked positions of the problem. +abstract class MarkedYAMLException : YAMLException +{ + /// Position of the error. + Mark mark; + + // Construct a MarkedYAMLException with specified context and problem. + this(string context, scope const Mark contextMark, + string problem, scope const Mark problemMark, + string file = __FILE__, size_t line = __LINE__) @safe pure nothrow + { + const msg = context ~ '\n' ~ + (contextMark != problemMark ? contextMark.toString() ~ '\n' : "") ~ + problem ~ '\n' ~ problemMark.toString() ~ '\n'; + super(msg, file, line); + mark = problemMark.dup; + } + + // Construct a MarkedYAMLException with specified problem. + this(string problem, scope const Mark problemMark, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow + { + super(problem ~ '\n' ~ problemMark.toString(), file, line); + mark = problemMark.dup; + } + + /// Construct a MarkedYAMLException from a struct storing constructor parameters. + this(ref const(MarkedYAMLExceptionData) data) @safe pure nothrow + { + with(data) this(context, contextMark, problem, problemMark); + } +} + +package: +// A struct storing parameters to the MarkedYAMLException constructor. +struct MarkedYAMLExceptionData +{ + // Context of the error. + string context; + // Position of the context in a YAML buffer. + Mark contextMark; + // The error itself. + string problem; + // Position if the error. + Mark problemMark; +} + +// Constructors of YAML exceptions are mostly the same, so we use a mixin. +// +// See_Also: YAMLException +template ExceptionCtors() +{ + public this(string msg, string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow + { + super(msg, file, line); + } +} + +// Constructors of marked YAML exceptions are mostly the same, so we use a mixin. +// +// See_Also: MarkedYAMLException +template MarkedExceptionCtors() +{ + public: + this(string context, const Mark contextMark, string problem, + const Mark problemMark, string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow + { + super(context, contextMark, problem, problemMark, + file, line); + } + + this(string problem, const Mark problemMark, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow + { + super(problem, problemMark, file, line); + } + + this(ref const(MarkedYAMLExceptionData) data) @safe pure nothrow + { + super(data); + } +} diff --git a/source/dub/internal/dyaml/linebreak.d b/source/dub/internal/dyaml/linebreak.d new file mode 100644 index 0000000..e64edd3 --- /dev/null +++ b/source/dub/internal/dyaml/linebreak.d @@ -0,0 +1,32 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module dub.internal.dyaml.linebreak; + + +///Enumerates platform specific line breaks. +enum LineBreak +{ + ///Unix line break ("\n"). + unix, + ///Windows line break ("\r\n"). + windows, + ///Macintosh line break ("\r"). + macintosh +} + +package: + +//Get line break string for specified line break. +string lineBreak(in LineBreak b) pure @safe nothrow +{ + final switch(b) + { + case LineBreak.unix: return "\n"; + case LineBreak.windows: return "\r\n"; + case LineBreak.macintosh: return "\r"; + } +} diff --git a/source/dub/internal/dyaml/loader.d b/source/dub/internal/dyaml/loader.d new file mode 100644 index 0000000..d801c38 --- /dev/null +++ b/source/dub/internal/dyaml/loader.d @@ -0,0 +1,413 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/// Class used to load YAML documents. +module dub.internal.dyaml.loader; + + +import std.exception; +import std.file; +import std.stdio : File; +import std.string; + +import dub.internal.dyaml.composer; +import dub.internal.dyaml.constructor; +import dub.internal.dyaml.event; +import dub.internal.dyaml.exception; +import dub.internal.dyaml.node; +import dub.internal.dyaml.parser; +import dub.internal.dyaml.reader; +import dub.internal.dyaml.resolver; +import dub.internal.dyaml.scanner; +import dub.internal.dyaml.token; + + +/** Loads YAML documents from files or char[]. + * + * User specified Constructor and/or Resolver can be used to support new + * tags / data types. + */ +struct Loader +{ + private: + // Processes character data to YAML tokens. + Scanner scanner_; + // Processes tokens to YAML events. + Parser parser_; + // Resolves tags (data types). + Resolver resolver_; + // Name of the input file or stream, used in error messages. + string name_ = ""; + // Are we done loading? + bool done_; + // Last node read from stream + Node currentNode; + // Has the range interface been initialized yet? + bool rangeInitialized; + + public: + @disable this(); + @disable int opCmp(ref Loader); + @disable bool opEquals(ref Loader); + + /** Construct a Loader to load YAML from a file. + * + * Params: filename = Name of the file to load from. + * file = Already-opened file to load from. + * + * Throws: YAMLException if the file could not be opened or read. + */ + static Loader fromFile(string filename) @trusted + { + try + { + auto loader = Loader(std.file.read(filename), filename); + return loader; + } + catch(FileException e) + { + throw new YAMLException("Unable to open file %s for YAML loading: %s" + .format(filename, e.msg), e.file, e.line); + } + } + /// ditto + static Loader fromFile(File file) @system + { + auto loader = Loader(file.byChunk(4096).join, file.name); + return loader; + } + + /** Construct a Loader to load YAML from a string. + * + * Params: + * data = String to load YAML from. The char[] version $(B will) + * overwrite its input during parsing as D:YAML reuses memory. + * filename = The filename to give to the Loader, defaults to `""` + * + * Returns: Loader loading YAML from given string. + * + * Throws: + * + * YAMLException if data could not be read (e.g. a decoding error) + */ + static Loader fromString(char[] data, string filename = "") @safe + { + return Loader(cast(ubyte[])data, filename); + } + /// Ditto + static Loader fromString(string data, string filename = "") @safe + { + return fromString(data.dup, filename); + } + /// Load a char[]. + @safe unittest + { + assert(Loader.fromString("42".dup).load().as!int == 42); + } + /// Load a string. + @safe unittest + { + assert(Loader.fromString("42").load().as!int == 42); + } + + /** Construct a Loader to load YAML from a buffer. + * + * Params: yamlData = Buffer with YAML data to load. This may be e.g. a file + * loaded to memory or a string with YAML data. Note that + * buffer $(B will) be overwritten, as D:YAML minimizes + * memory allocations by reusing the input _buffer. + * $(B Must not be deleted or modified by the user as long + * as nodes loaded by this Loader are in use!) - Nodes may + * refer to data in this buffer. + * + * Note that D:YAML looks for byte-order-marks YAML files encoded in + * UTF-16/UTF-32 (and sometimes UTF-8) use to specify the encoding and + * endianness, so it should be enough to load an entire file to a buffer and + * pass it to D:YAML, regardless of Unicode encoding. + * + * Throws: YAMLException if yamlData contains data illegal in YAML. + */ + static Loader fromBuffer(ubyte[] yamlData) @safe + { + return Loader(yamlData); + } + /// Ditto + static Loader fromBuffer(void[] yamlData) @system + { + return Loader(yamlData); + } + /// Ditto + private this(void[] yamlData, string name = "") @system + { + this(cast(ubyte[])yamlData, name); + } + /// Ditto + private this(ubyte[] yamlData, string name = "") @safe + { + resolver_ = Resolver.withDefaultResolvers; + name_ = name; + try + { + auto reader_ = new Reader(yamlData, name); + scanner_ = Scanner(reader_); + parser_ = new Parser(scanner_); + } + catch(YAMLException e) + { + throw new YAMLException("Unable to open %s for YAML loading: %s" + .format(name_, e.msg), e.file, e.line); + } + } + + + /// Set stream _name. Used in debugging messages. + void name(string name) pure @safe nothrow @nogc + { + name_ = name; + scanner_.name = name; + } + + /// Specify custom Resolver to use. + auto ref resolver() pure @safe nothrow @nogc + { + return resolver_; + } + + /** Load single YAML document. + * + * If none or more than one YAML document is found, this throws a YAMLException. + * + * This can only be called once; this is enforced by contract. + * + * Returns: Root node of the document. + * + * Throws: YAMLException if there wasn't exactly one document + * or on a YAML parsing error. + */ + Node load() @safe + { + enforce!YAMLException(!empty, "Zero documents in stream"); + auto output = front; + popFront(); + enforce!YAMLException(empty, "More than one document in stream"); + return output; + } + + /** Implements the empty range primitive. + * + * If there's no more documents left in the stream, this will be true. + * + * Returns: `true` if no more documents left, `false` otherwise. + */ + bool empty() @safe + { + // currentNode and done_ are both invalid until popFront is called once + if (!rangeInitialized) + { + popFront(); + } + return done_; + } + /** Implements the popFront range primitive. + * + * Reads the next document from the stream, if possible. + */ + void popFront() @safe + { + // Composer initialization is done here in case the constructor is + // modified, which is a pretty common case. + static Composer composer; + if (!rangeInitialized) + { + composer = Composer(parser_, resolver_); + rangeInitialized = true; + } + assert(!done_, "Loader.popFront called on empty range"); + if (composer.checkNode()) + { + currentNode = composer.getNode(); + } + else + { + done_ = true; + } + } + /** Implements the front range primitive. + * + * Returns: the current document as a Node. + */ + Node front() @safe + { + // currentNode and done_ are both invalid until popFront is called once + if (!rangeInitialized) + { + popFront(); + } + return currentNode; + } + + // Scan all tokens, throwing them away. Used for benchmarking. + void scanBench() @safe + { + try + { + while(!scanner_.empty) + { + scanner_.popFront(); + } + } + catch(YAMLException e) + { + throw new YAMLException("Unable to scan YAML from stream " ~ + name_ ~ " : " ~ e.msg, e.file, e.line); + } + } + + + // Parse and return all events. Used for debugging. + auto parse() @safe + { + return parser_; + } +} +/// Load single YAML document from a file: +@safe unittest +{ + write("example.yaml", "Hello world!"); + auto rootNode = Loader.fromFile("example.yaml").load(); + assert(rootNode == "Hello world!"); +} +/// Load single YAML document from an already-opened file: +@system unittest +{ + // Open a temporary file + auto file = File.tmpfile; + // Write valid YAML + file.write("Hello world!"); + // Return to the beginning + file.seek(0); + // Load document + auto rootNode = Loader.fromFile(file).load(); + assert(rootNode == "Hello world!"); +} +/// Load all YAML documents from a file: +@safe unittest +{ + import std.array : array; + import std.file : write; + write("example.yaml", + "---\n"~ + "Hello world!\n"~ + "...\n"~ + "---\n"~ + "Hello world 2!\n"~ + "...\n" + ); + auto nodes = Loader.fromFile("example.yaml").array; + assert(nodes.length == 2); +} +/// Iterate over YAML documents in a file, lazily loading them: +@safe unittest +{ + import std.file : write; + write("example.yaml", + "---\n"~ + "Hello world!\n"~ + "...\n"~ + "---\n"~ + "Hello world 2!\n"~ + "...\n" + ); + auto loader = Loader.fromFile("example.yaml"); + + foreach(ref node; loader) + { + //Do something + } +} +/// Load YAML from a string: +@safe unittest +{ + string yaml_input = ("red: '#ff0000'\n" ~ + "green: '#00ff00'\n" ~ + "blue: '#0000ff'"); + + auto colors = Loader.fromString(yaml_input).load(); + + foreach(string color, string value; colors) + { + // Do something with the color and its value... + } +} + +/// Load a file into a buffer in memory and then load YAML from that buffer: +@safe unittest +{ + import std.file : read, write; + import std.stdio : writeln; + // Create a yaml document + write("example.yaml", + "---\n"~ + "Hello world!\n"~ + "...\n"~ + "---\n"~ + "Hello world 2!\n"~ + "...\n" + ); + try + { + string buffer = readText("example.yaml"); + auto yamlNode = Loader.fromString(buffer); + + // Read data from yamlNode here... + } + catch(FileException e) + { + writeln("Failed to read file 'example.yaml'"); + } +} +/// Use a custom resolver to support custom data types and/or implicit tags: +@safe unittest +{ + import std.file : write; + // Create a yaml document + write("example.yaml", + "---\n"~ + "Hello world!\n"~ + "...\n" + ); + + auto loader = Loader.fromFile("example.yaml"); + + // Add resolver expressions here... + // loader.resolver.addImplicitResolver(...); + + auto rootNode = loader.load(); +} + +//Issue #258 - https://github.com/dlang-community/D-YAML/issues/258 +@safe unittest +{ + auto yaml = "{\n\"root\": {\n\t\"key\": \"value\"\n }\n}"; + auto doc = Loader.fromString(yaml).load(); + assert(doc.isValid); +} + +@safe unittest +{ + import std.exception : collectException; + + auto yaml = q"EOS + value: invalid: string +EOS"; + auto filename = "invalid.yml"; + auto loader = Loader.fromString(yaml); + loader.name = filename; + + Node unused; + auto e = loader.load().collectException!ScannerException(unused); + assert(e.mark.name == filename); +} diff --git a/source/dub/internal/dyaml/node.d b/source/dub/internal/dyaml/node.d new file mode 100644 index 0000000..45a25b7 --- /dev/null +++ b/source/dub/internal/dyaml/node.d @@ -0,0 +1,2641 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/// Node of a YAML document. Used to read YAML data once it's loaded, +/// and to prepare data to emit. +module dub.internal.dyaml.node; + + +import std.algorithm; +import std.array; +import std.conv; +import std.datetime; +import std.exception; +import std.format; +import std.math; +import std.meta : AliasSeq; +import std.range; +import std.string; +import std.traits; +import std.typecons; + +// FIXME: Switch back to upstream's when v2.101 is the oldest +// supported version (recommended: after v2.111 release). +import dub.internal.dyaml.stdsumtype; + +import dub.internal.dyaml.event; +import dub.internal.dyaml.exception; +import dub.internal.dyaml.style; + +/// Exception thrown at node related errors. +class NodeException : MarkedYAMLException +{ + package: + // Construct a NodeException. + // + // Params: msg = Error message. + // start = Start position of the node. + this(string msg, const scope Mark start, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow + { + super(msg, start, file, line); + } +} + +// Node kinds. +enum NodeID : ubyte +{ + scalar, + sequence, + mapping, + invalid +} + +/// Null YAML type. Used in nodes with _null values. +struct YAMLNull +{ + /// Used for string conversion. + string toString() const pure @safe nothrow {return "null";} +} + +/// Invalid YAML type, used internally by SumType +private struct YAMLInvalid {} + +// Merge YAML type, used to support "tag:yaml.org,2002:merge". +package struct YAMLMerge{} + +// Key-value pair of YAML nodes, used in mappings. +private struct Pair +{ + public: + /// Key node. + Node key; + /// Value node. + Node value; + + /// Construct a Pair from two values. Will be converted to Nodes if needed. + this(K, V)(K key, V value) + { + static if(is(Unqual!K == Node)){this.key = key;} + else {this.key = Node(key);} + static if(is(Unqual!V == Node)){this.value = value;} + else {this.value = Node(value);} + } + + /// Equality test with another Pair. + bool opEquals(const ref Pair rhs) const scope @safe + { + return key == rhs.key && value == rhs.value; + } + + // Comparison with another Pair. + int opCmp(const scope ref Pair rhs) const scope @safe + { + const keyCmp = key.opCmp(rhs.key); + return keyCmp != 0 ? keyCmp + : value.opCmp(rhs.value); + } + + /// + public void toString (scope void delegate(scope const(char)[]) @safe sink) + const scope @safe + { + // formattedWrite does not accept `scope` parameters + () @trusted { + formattedWrite(sink, "%s: %s", this.key, this.value); + }(); + } +} + +enum NodeType +{ + null_, + merge, + boolean, + integer, + decimal, + binary, + timestamp, + string, + mapping, + sequence, + invalid +} + +/** YAML node. + * + * This is a pseudo-dynamic type that can store any YAML value, including a + * sequence or mapping of nodes. You can get data from a Node directly or + * iterate over it if it's a collection. + */ +struct Node +{ + public: + alias Pair = .Pair; + + package: + // YAML value type. + alias Value = SumType!( + YAMLInvalid, YAMLNull, YAMLMerge, + bool, long, real, ubyte[], SysTime, string, + Node.Pair[], Node[]); + + // Can Value hold this type naturally? + enum allowed(T) = isIntegral!T || + isFloatingPoint!T || + isSomeString!T || + is(typeof({ Value i = T.init; })); + + // Stored value. + Value value_; + // Start position of the node. + Mark startMark_; + + // Tag of the node. + string tag_; + // Node scalar style. Used to remember style this node was loaded with. + ScalarStyle scalarStyle = ScalarStyle.invalid; + // Node collection style. Used to remember style this node was loaded with. + CollectionStyle collectionStyle = CollectionStyle.invalid; + + public: + /** Construct a Node from a value. + * + * Any type except for Node can be stored in a Node, but default YAML + * types (integers, floats, strings, timestamps, etc.) will be stored + * more efficiently. To create a node representing a null value, + * construct it from YAMLNull. + * + * If value is a node, its value will be copied directly. The tag and + * other information attached to the original node will be discarded. + * + * If value is an array of nodes or pairs, it is stored directly. + * Otherwise, every value in the array is converted to a node, and + * those nodes are stored. + * + * Note that to emit any non-default types you store + * in a node, you need a Representer to represent them in YAML - + * otherwise emitting will fail. + * + * Params: value = Value to store in the node. + * tag = Overrides tag of the node when emitted, regardless + * of tag determined by Representer. Representer uses + * this to determine YAML data type when a D data type + * maps to multiple different YAML data types. Tag must + * be in full form, e.g. "tag:yaml.org,2002:int", not + * a shortcut, like "!!int". + */ + this(T)(T value, const string tag = null) @safe + if (allowed!T || isArray!T || isAssociativeArray!T || is(Unqual!T == Node) || castableToNode!T) + { + tag_ = tag; + + //Unlike with assignment, we're just copying the value. + static if (is(Unqual!T == Node)) + { + setValue(value.value_); + } + else static if(isSomeString!T) + { + setValue(value.to!string); + } + else static if(is(Unqual!T == bool)) + { + setValue(cast(bool)value); + } + else static if(isIntegral!T) + { + setValue(cast(long)value); + } + else static if(isFloatingPoint!T) + { + setValue(cast(real)value); + } + else static if (isArray!T) + { + alias ElementT = Unqual!(ElementType!T); + // Construction from raw node or pair array. + static if(is(ElementT == Node) || is(ElementT == Node.Pair)) + { + setValue(value); + } + // Need to handle byte buffers separately. + else static if(is(ElementT == byte) || is(ElementT == ubyte)) + { + setValue(cast(ubyte[]) value); + } + else + { + Node[] nodes; + foreach(ref v; value) + { + nodes ~= Node(v); + } + setValue(nodes); + } + } + else static if (isAssociativeArray!T) + { + Node.Pair[] pairs; + foreach(k, ref v; value) + { + pairs ~= Pair(k, v); + } + setValue(pairs); + } + // User defined type. + else + { + setValue(value); + } + } + /// Construct a scalar node + @safe unittest + { + // Integer + { + auto node = Node(5); + } + // String + { + auto node = Node("Hello world!"); + } + // Floating point + { + auto node = Node(5.0f); + } + // Boolean + { + auto node = Node(true); + } + // Time + { + auto node = Node(SysTime(DateTime(2005, 6, 15, 20, 0, 0), UTC())); + } + // Integer, dumped as a string + { + auto node = Node(5, "tag:yaml.org,2002:str"); + } + } + /// Construct a sequence node + @safe unittest + { + // Will be emitted as a sequence (default for arrays) + { + auto seq = Node([1, 2, 3, 4, 5]); + } + // Will be emitted as a set (overridden tag) + { + auto set = Node([1, 2, 3, 4, 5], "tag:yaml.org,2002:set"); + } + // Can also store arrays of arrays + { + auto node = Node([[1,2], [3,4]]); + } + } + /// Construct a mapping node + @safe unittest + { + // Will be emitted as an unordered mapping (default for mappings) + auto map = Node([1 : "a", 2 : "b"]); + // Will be emitted as an ordered map (overridden tag) + auto omap = Node([1 : "a", 2 : "b"], "tag:yaml.org,2002:omap"); + // Will be emitted as pairs (overridden tag) + auto pairs = Node([1 : "a", 2 : "b"], "tag:yaml.org,2002:pairs"); + } + @safe unittest + { + { + auto node = Node(42); + assert(node.nodeID == NodeID.scalar); + assert(node.as!int == 42 && node.as!float == 42.0f && node.as!string == "42"); + } + + { + auto node = Node("string"); + assert(node.as!string == "string"); + } + } + @safe unittest + { + with(Node([1, 2, 3])) + { + assert(nodeID == NodeID.sequence); + assert(length == 3); + assert(opIndex(2).as!int == 3); + } + + } + @safe unittest + { + int[string] aa; + aa["1"] = 1; + aa["2"] = 2; + with(Node(aa)) + { + assert(nodeID == NodeID.mapping); + assert(length == 2); + assert(opIndex("2").as!int == 2); + } + } + @safe unittest + { + auto node = Node(Node(4, "tag:yaml.org,2002:str")); + assert(node == 4); + assert(node.tag_ == ""); + } + + /** Construct a node from arrays of _keys and _values. + * + * Constructs a mapping node with key-value pairs from + * _keys and _values, keeping their order. Useful when order + * is important (ordered maps, pairs). + * + * + * keys and values must have equal length. + * + * + * If _keys and/or _values are nodes, they are stored directly/ + * Otherwise they are converted to nodes and then stored. + * + * Params: keys = Keys of the mapping, from first to last pair. + * values = Values of the mapping, from first to last pair. + * tag = Overrides tag of the node when emitted, regardless + * of tag determined by Representer. Representer uses + * this to determine YAML data type when a D data type + * maps to multiple different YAML data types. + * This is used to differentiate between YAML unordered + * mappings ("!!map"), ordered mappings ("!!omap"), and + * pairs ("!!pairs") which are all internally + * represented as an array of node pairs. Tag must be + * in full form, e.g. "tag:yaml.org,2002:omap", not a + * shortcut, like "!!omap". + * + */ + this(K, V)(K[] keys, V[] values, const string tag = null) + if(!(isSomeString!(K[]) || isSomeString!(V[]))) + in(keys.length == values.length, + "Lengths of keys and values arrays to construct " ~ + "a YAML node from don't match") + { + tag_ = tag; + + Node.Pair[] pairs; + foreach(i; 0 .. keys.length){pairs ~= Pair(keys[i], values[i]);} + setValue(pairs); + } + /// + @safe unittest + { + // Will be emitted as an unordered mapping (default for mappings) + auto map = Node([1, 2], ["a", "b"]); + // Will be emitted as an ordered map (overridden tag) + auto omap = Node([1, 2], ["a", "b"], "tag:yaml.org,2002:omap"); + // Will be emitted as pairs (overriden tag) + auto pairs = Node([1, 2], ["a", "b"], "tag:yaml.org,2002:pairs"); + } + @safe unittest + { + with(Node(["1", "2"], [1, 2])) + { + assert(nodeID == NodeID.mapping); + assert(length == 2); + assert(opIndex("2").as!int == 2); + } + + } + + /// Is this node valid (initialized)? + @property bool isValid() const scope @safe pure nothrow @nogc + { + return value_.match!((const YAMLInvalid _) => false, _ => true); + } + + /// Return tag of the node. + @property string tag() const return scope @safe pure nothrow @nogc + { + return tag_; + } + + /// Return the start position of the node. + @property Mark startMark() const return scope @safe pure nothrow @nogc + { + return startMark_; + } + + /** Equality test. + * + * If T is Node, recursively compares all subnodes. + * This might be quite expensive if testing entire documents. + * + * If T is not Node, gets a value of type T from the node and tests + * equality with that. + * + * To test equality with a null YAML value, use YAMLNull. + * + * Params: rhs = Variable to test equality with. + * + * Returns: true if equal, false otherwise. + */ + bool opEquals(const scope Node rhs) const scope @safe + { + return opCmp(rhs) == 0; + } + bool opEquals(T)(const scope auto ref T rhs) const @safe + { + try + { + auto stored = get!(T, No.stringConversion); + // NaNs aren't normally equal to each other, but we'll pretend they are. + static if(isFloatingPoint!T) + { + return rhs == stored || (isNaN(rhs) && isNaN(stored)); + } + else + { + return rhs == stored; + } + } + catch(NodeException e) + { + return false; + } + } + /// + @safe unittest + { + auto node = Node(42); + + assert(node == 42); + assert(node != "42"); + assert(node != "43"); + + auto node2 = Node(YAMLNull()); + assert(node2 == YAMLNull()); + + const node3 = Node(42); + assert(node3 == 42); + } + + /// Shortcut for get(). + alias as = get; + + /** Get the value of the node as specified type. + * + * If the specifed type does not match type in the node, + * conversion is attempted. The stringConversion template + * parameter can be used to disable conversion from non-string + * types to strings. + * + * Numeric values are range checked, throwing if out of range of + * requested type. + * + * Timestamps are stored as std.datetime.SysTime. + * Binary values are decoded and stored as ubyte[]. + * + * To get a null value, use get!YAMLNull . This is to + * prevent getting null values for types such as strings or classes. + * + * $(BR)$(B Mapping default values:) + * + * $(PBR + * The '=' key can be used to denote the default value of a mapping. + * This can be used when a node is scalar in early versions of a program, + * but is replaced by a mapping later. Even if the node is a mapping, the + * get method can be used as if it was a scalar if it has a default value. + * This way, new YAML files where the node is a mapping can still be read + * by old versions of the program, which expect the node to be a scalar. + * ) + * + * Returns: Value of the node as specified type. + * + * Throws: NodeException if unable to convert to specified type, or if + * the value is out of range of requested type. + */ + inout(T) get(T, Flag!"stringConversion" stringConversion = Yes.stringConversion)() inout @safe return scope + { + static assert (allowed!(Unqual!T) || + hasNodeConstructor!(inout(Unqual!T)) || + (!hasIndirections!(Unqual!T) && hasNodeConstructor!(Unqual!T))); + + static if(!allowed!(Unqual!T)) + { + static if (hasSimpleNodeConstructor!(Unqual!T) || hasSimpleNodeConstructor!(inout(Unqual!T))) + { + alias params = AliasSeq!(this); + } + else static if (hasExpandedNodeConstructor!(Unqual!T) || hasExpandedNodeConstructor!(inout(Unqual!T))) + { + alias params = AliasSeq!(this, tag_); + } + else + { + static assert(0, "Unknown Node constructor?"); + } + + static if (is(T == class)) + { + return new inout T(params); + } + else static if (is(T == struct)) + { + return T(params); + } + else + { + static assert(0, "Unhandled user type"); + } + } else { + static if (canBeType!T) + if (isType!(Unqual!T)) { return getValue!T; } + + // If we're getting from a mapping and we're not getting Node.Pair[], + // we're getting the default value. + if(nodeID == NodeID.mapping){return this["="].get!( T, stringConversion);} + + static if(isSomeString!T) + { + static if(!stringConversion) + { + enforce(type == NodeType.string, new NodeException( + "Node stores unexpected type: " ~ text(type) ~ + ". Expected: " ~ typeid(T).toString(), startMark_)); + return to!T(getValue!string); + } + else + { + // Try to convert to string. + try + { + return coerceValue!T().dup; + } + catch (MatchException e) + { + throw new NodeException("Unable to convert node value to string", startMark_); + } + } + } + else static if(isFloatingPoint!T) + { + final switch (type) + { + case NodeType.integer: + return to!T(getValue!long); + case NodeType.decimal: + return to!T(getValue!real); + case NodeType.binary: + case NodeType.string: + case NodeType.boolean: + case NodeType.null_: + case NodeType.merge: + case NodeType.invalid: + case NodeType.timestamp: + case NodeType.mapping: + case NodeType.sequence: + throw new NodeException("Node stores unexpected type: " ~ text(type) ~ + ". Expected: " ~ typeid(T).toString, startMark_); + } + } + else static if(isIntegral!T) + { + enforce(type == NodeType.integer, new NodeException("Node stores unexpected type: " ~ text(type) ~ + ". Expected: " ~ typeid(T).toString, startMark_)); + immutable temp = getValue!long; + enforce(temp >= T.min && temp <= T.max, + new NodeException("Integer value of type " ~ typeid(T).toString() ~ + " out of range. Value: " ~ to!string(temp), startMark_)); + return temp.to!T; + } + else throw new NodeException("Node stores unexpected type: " ~ text(type) ~ + ". Expected: " ~ typeid(T).toString, startMark_); + } + } + /// ditto + T get(T)() const + if (hasIndirections!(Unqual!T) && hasNodeConstructor!(Unqual!T) && (!hasNodeConstructor!(inout(Unqual!T)))) + { + static if (hasSimpleNodeConstructor!T) + { + alias params = AliasSeq!(this); + } + else static if (hasExpandedNodeConstructor!T) + { + alias params = AliasSeq!(this, tag_); + } + else + { + static assert(0, "Unknown Node constructor?"); + } + static if (is(T == class)) + { + return new T(params); + } + else static if (is(T == struct)) + { + return T(params); + } + else + { + static assert(0, "Unhandled user type"); + } + } + /// Automatic type conversion + @safe unittest + { + auto node = Node(42); + + assert(node.get!int == 42); + assert(node.get!string == "42"); + assert(node.get!double == 42.0); + } + /// Scalar node to struct and vice versa + @safe unittest + { + import dub.internal.dyaml.dumper : dumper; + import dub.internal.dyaml.loader : Loader; + static struct MyStruct + { + int x, y, z; + + this(int x, int y, int z) @safe + { + this.x = x; + this.y = y; + this.z = z; + } + + this(scope const Node node) @safe + { + // `std.array.split` is not marked as taking a `scope` range, + // but we don't escape a reference. + scope parts = () @trusted { return node.as!string().split(":"); }(); + x = parts[0].to!int; + y = parts[1].to!int; + z = parts[2].to!int; + } + + Node opCast(T: Node)() @safe + { + //Using custom scalar format, x:y:z. + auto scalar = format("%s:%s:%s", x, y, z); + //Representing as a scalar, with custom tag to specify this data type. + return Node(scalar, "!mystruct.tag"); + } + } + + auto appender = new Appender!string; + + // Dump struct to yaml document + dumper().dump(appender, Node(MyStruct(1,2,3))); + + // Read yaml document back as a MyStruct + auto loader = Loader.fromString(appender.data); + Node node = loader.load(); + assert(node.as!MyStruct == MyStruct(1,2,3)); + } + /// Sequence node to struct and vice versa + @safe unittest + { + import dub.internal.dyaml.dumper : dumper; + import dub.internal.dyaml.loader : Loader; + static struct MyStruct + { + int x, y, z; + + this(int x, int y, int z) @safe + { + this.x = x; + this.y = y; + this.z = z; + } + + this(Node node) @safe + { + x = node[0].as!int; + y = node[1].as!int; + z = node[2].as!int; + } + + Node opCast(T: Node)() + { + return Node([x, y, z], "!mystruct.tag"); + } + } + + auto appender = new Appender!string; + + // Dump struct to yaml document + dumper().dump(appender, Node(MyStruct(1,2,3))); + + // Read yaml document back as a MyStruct + auto loader = Loader.fromString(appender.data); + Node node = loader.load(); + assert(node.as!MyStruct == MyStruct(1,2,3)); + } + /// Mapping node to struct and vice versa + @safe unittest + { + import dub.internal.dyaml.dumper : dumper; + import dub.internal.dyaml.loader : Loader; + static struct MyStruct + { + int x, y, z; + + Node opCast(T: Node)() + { + auto pairs = [Node.Pair("x", x), + Node.Pair("y", y), + Node.Pair("z", z)]; + return Node(pairs, "!mystruct.tag"); + } + + this(int x, int y, int z) + { + this.x = x; + this.y = y; + this.z = z; + } + + this(Node node) @safe + { + x = node["x"].as!int; + y = node["y"].as!int; + z = node["z"].as!int; + } + } + + auto appender = new Appender!string; + + // Dump struct to yaml document + dumper().dump(appender, Node(MyStruct(1,2,3))); + + // Read yaml document back as a MyStruct + auto loader = Loader.fromString(appender.data); + Node node = loader.load(); + assert(node.as!MyStruct == MyStruct(1,2,3)); + } + /// Classes can be used too + @system unittest { + import dub.internal.dyaml.dumper : dumper; + import dub.internal.dyaml.loader : Loader; + + static class MyClass + { + int x, y, z; + + this(int x, int y, int z) + { + this.x = x; + this.y = y; + this.z = z; + } + + this(scope const Node node) @safe inout + { + // `std.array.split` is not marked as taking a `scope` range, + // but we don't escape a reference. + scope parts = () @trusted { return node.as!string().split(":"); }(); + x = parts[0].to!int; + y = parts[1].to!int; + z = parts[2].to!int; + } + + ///Useful for Node.as!string. + override string toString() + { + return format("MyClass(%s, %s, %s)", x, y, z); + } + + Node opCast(T: Node)() @safe + { + //Using custom scalar format, x:y:z. + auto scalar = format("%s:%s:%s", x, y, z); + //Representing as a scalar, with custom tag to specify this data type. + return Node(scalar, "!myclass.tag"); + } + override bool opEquals(Object o) + { + if (auto other = cast(MyClass)o) + { + return (other.x == x) && (other.y == y) && (other.z == z); + } + return false; + } + } + auto appender = new Appender!string; + + // Dump class to yaml document + dumper().dump(appender, Node(new MyClass(1,2,3))); + + // Read yaml document back as a MyClass + auto loader = Loader.fromString(appender.data); + Node node = loader.load(); + assert(node.as!MyClass == new MyClass(1,2,3)); + } + // Make sure custom tags and styles are kept. + @safe unittest + { + static struct MyStruct + { + Node opCast(T: Node)() + { + auto node = Node("hi", "!mystruct.tag"); + node.setStyle(ScalarStyle.doubleQuoted); + return node; + } + } + + auto node = Node(MyStruct.init); + assert(node.tag == "!mystruct.tag"); + assert(node.scalarStyle == ScalarStyle.doubleQuoted); + } + // ditto, but for collection style + @safe unittest + { + static struct MyStruct + { + Node opCast(T: Node)() + { + auto node = Node(["hi"], "!mystruct.tag"); + node.setStyle(CollectionStyle.flow); + return node; + } + } + + auto node = Node(MyStruct.init); + assert(node.tag == "!mystruct.tag"); + assert(node.collectionStyle == CollectionStyle.flow); + } + @safe unittest + { + assertThrown!NodeException(Node("42").get!int); + assertThrown!NodeException(Node("42").get!double); + assertThrown!NodeException(Node(long.max).get!ushort); + Node(YAMLNull()).get!YAMLNull; + } + @safe unittest + { + const node = Node(42); + assert(node.get!int == 42); + assert(node.get!string == "42"); + assert(node.get!double == 42.0); + + immutable node2 = Node(42); + assert(node2.get!int == 42); + assert(node2.get!(const int) == 42); + assert(node2.get!(immutable int) == 42); + assert(node2.get!string == "42"); + assert(node2.get!(const string) == "42"); + assert(node2.get!(immutable string) == "42"); + assert(node2.get!double == 42.0); + assert(node2.get!(const double) == 42.0); + assert(node2.get!(immutable double) == 42.0); + } + + /** If this is a collection, return its _length. + * + * Otherwise, throw NodeException. + * + * Returns: Number of elements in a sequence or key-value pairs in a mapping. + * + * Throws: NodeException if this is not a sequence nor a mapping. + */ + @property size_t length() const @safe + { + final switch(nodeID) + { + case NodeID.sequence: + return getValue!(Node[]).length; + case NodeID.mapping: + return getValue!(Pair[]).length; + case NodeID.scalar: + case NodeID.invalid: + throw new NodeException("Trying to get length of a " ~ nodeTypeString ~ " node", + startMark_); + } + } + @safe unittest + { + auto node = Node([1,2,3]); + assert(node.length == 3); + const cNode = Node([1,2,3]); + assert(cNode.length == 3); + immutable iNode = Node([1,2,3]); + assert(iNode.length == 3); + } + + /** Get the element at specified index. + * + * If the node is a sequence, index must be integral. + * + * + * If the node is a mapping, return the value corresponding to the first + * key equal to index. containsKey() can be used to determine if a mapping + * has a specific key. + * + * To get element at a null index, use YAMLNull for index. + * + * Params: index = Index to use. + * + * Returns: Value corresponding to the index. + * + * Throws: NodeException if the index could not be found, + * non-integral index is used with a sequence or the node is + * not a collection. + */ + ref inout(Node) opIndex(T)(T index) inout return scope @safe + { + final switch (nodeID) + { + case NodeID.sequence: + checkSequenceIndex(index); + static if(isIntegral!T) + { + return getValue!(Node[])[index]; + } + else + { + assert(false, "Only integers may index sequence nodes"); + } + case NodeID.mapping: + auto idx = findPair(index); + if(idx >= 0) + { + return getValue!(Pair[])[idx].value; + } + + string msg = "Mapping index not found" ~ (isSomeString!T ? ": " ~ to!string(index) : ""); + throw new NodeException(msg, startMark_); + case NodeID.scalar: + case NodeID.invalid: + throw new NodeException("Trying to index a " ~ nodeTypeString ~ " node", startMark_); + } + } + /// + @safe unittest + { + Node narray = Node([11, 12, 13, 14]); + Node nmap = Node(["11", "12", "13", "14"], [11, 12, 13, 14]); + + assert(narray[0].as!int == 11); + assert(null !is collectException(narray[42])); + assert(nmap["11"].as!int == 11); + assert(nmap["14"].as!int == 14); + } + @safe unittest + { + Node narray = Node([11, 12, 13, 14]); + Node nmap = Node(["11", "12", "13", "14"], [11, 12, 13, 14]); + + assert(narray[0].as!int == 11); + assert(null !is collectException(narray[42])); + assert(nmap["11"].as!int == 11); + assert(nmap["14"].as!int == 14); + assert(null !is collectException(nmap["42"])); + + narray.add(YAMLNull()); + nmap.add(YAMLNull(), "Nothing"); + assert(narray[4].as!YAMLNull == YAMLNull()); + assert(nmap[YAMLNull()].as!string == "Nothing"); + + assertThrown!NodeException(nmap[11]); + assertThrown!NodeException(nmap[14]); + } + + /** Determine if a collection contains specified value. + * + * If the node is a sequence, check if it contains the specified value. + * If it's a mapping, check if it has a value that matches specified value. + * + * Params: rhs = Item to look for. Use YAMLNull to check for a null value. + * + * Returns: true if rhs was found, false otherwise. + * + * Throws: NodeException if the node is not a collection. + */ + bool contains(T)(T rhs) const + { + return contains_!(T, No.key, "contains")(rhs); + } + @safe unittest + { + auto mNode = Node(["1", "2", "3"]); + assert(mNode.contains("2")); + const cNode = Node(["1", "2", "3"]); + assert(cNode.contains("2")); + immutable iNode = Node(["1", "2", "3"]); + assert(iNode.contains("2")); + } + + + /** Determine if a mapping contains specified key. + * + * Params: rhs = Key to look for. Use YAMLNull to check for a null key. + * + * Returns: true if rhs was found, false otherwise. + * + * Throws: NodeException if the node is not a mapping. + */ + bool containsKey(T)(T rhs) const + { + return contains_!(T, Yes.key, "containsKey")(rhs); + } + + // Unittest for contains() and containsKey(). + @safe unittest + { + auto seq = Node([1, 2, 3, 4, 5]); + assert(seq.contains(3)); + assert(seq.contains(5)); + assert(!seq.contains("5")); + assert(!seq.contains(6)); + assert(!seq.contains(float.nan)); + assertThrown!NodeException(seq.containsKey(5)); + + auto seq2 = Node(["1", "2"]); + assert(seq2.contains("1")); + assert(!seq2.contains(1)); + + auto map = Node(["1", "2", "3", "4"], [1, 2, 3, 4]); + assert(map.contains(1)); + assert(!map.contains("1")); + assert(!map.contains(5)); + assert(!map.contains(float.nan)); + assert(map.containsKey("1")); + assert(map.containsKey("4")); + assert(!map.containsKey(1)); + assert(!map.containsKey("5")); + + assert(!seq.contains(YAMLNull())); + assert(!map.contains(YAMLNull())); + assert(!map.containsKey(YAMLNull())); + seq.add(YAMLNull()); + map.add("Nothing", YAMLNull()); + assert(seq.contains(YAMLNull())); + assert(map.contains(YAMLNull())); + assert(!map.containsKey(YAMLNull())); + map.add(YAMLNull(), "Nothing"); + assert(map.containsKey(YAMLNull())); + + auto map2 = Node([1, 2, 3, 4], [1, 2, 3, 4]); + assert(!map2.contains("1")); + assert(map2.contains(1)); + assert(!map2.containsKey("1")); + assert(map2.containsKey(1)); + + // scalar + assertThrown!NodeException(Node(1).contains(4)); + assertThrown!NodeException(Node(1).containsKey(4)); + + auto mapNan = Node([1.0, 2, double.nan], [1, double.nan, 5]); + + assert(mapNan.contains(double.nan)); + assert(mapNan.containsKey(double.nan)); + } + + /// Assignment (shallow copy) by value. + void opAssign()(auto ref Node rhs) + { + assumeWontThrow(setValue(rhs.value_)); + startMark_ = rhs.startMark_; + tag_ = rhs.tag_; + scalarStyle = rhs.scalarStyle; + collectionStyle = rhs.collectionStyle; + } + // Unittest for opAssign(). + @safe unittest + { + auto seq = Node([1, 2, 3, 4, 5]); + auto assigned = seq; + assert(seq == assigned, + "Node.opAssign() doesn't produce an equivalent copy"); + } + + /** Set element at specified index in a collection. + * + * This method can only be called on collection nodes. + * + * If the node is a sequence, index must be integral. + * + * If the node is a mapping, sets the _value corresponding to the first + * key matching index (including conversion, so e.g. "42" matches 42). + * + * If the node is a mapping and no key matches index, a new key-value + * pair is added to the mapping. In sequences the index must be in + * range. This ensures behavior siilar to D arrays and associative + * arrays. + * + * To set element at a null index, use YAMLNull for index. + * + * Params: + * value = Value to assign. + * index = Index of the value to set. + * + * Throws: NodeException if the node is not a collection, index is out + * of range or if a non-integral index is used on a sequence node. + */ + void opIndexAssign(K, V)(V value, K index) + { + final switch (nodeID) + { + case NodeID.sequence: + checkSequenceIndex(index); + static if(isIntegral!K || is(Unqual!K == bool)) + { + auto nodes = getValue!(Node[]); + static if(is(Unqual!V == Node)){nodes[index] = value;} + else {nodes[index] = Node(value);} + setValue(nodes); + return; + } + assert(false, "Only integers may index sequence nodes"); + case NodeID.mapping: + const idx = findPair(index); + if(idx < 0){add(index, value);} + else + { + auto pairs = as!(Node.Pair[])(); + static if(is(Unqual!V == Node)){pairs[idx].value = value;} + else {pairs[idx].value = Node(value);} + setValue(pairs); + } + return; + case NodeID.scalar: + case NodeID.invalid: + throw new NodeException("Trying to index a " ~ nodeTypeString ~ " node", startMark_); + } + } + @safe unittest + { + with(Node([1, 2, 3, 4, 3])) + { + opIndexAssign(42, 3); + assert(length == 5); + assert(opIndex(3).as!int == 42); + + opIndexAssign(YAMLNull(), 0); + assert(opIndex(0) == YAMLNull()); + } + with(Node(["1", "2", "3"], [4, 5, 6])) + { + opIndexAssign(42, "3"); + opIndexAssign(123, 456); + assert(length == 4); + assert(opIndex("3").as!int == 42); + assert(opIndex(456).as!int == 123); + + opIndexAssign(43, 3); + //3 and "3" should be different + assert(length == 5); + assert(opIndex("3").as!int == 42); + assert(opIndex(3).as!int == 43); + + opIndexAssign(YAMLNull(), "2"); + assert(opIndex("2") == YAMLNull()); + } + } + + /** Return a range object iterating over a sequence, getting each + * element as T. + * + * If T is Node, simply iterate over the nodes in the sequence. + * Otherwise, convert each node to T during iteration. + * + * Throws: NodeException if the node is not a sequence or an element + * could not be converted to specified type. + */ + template sequence(T = Node) + { + struct Range(N) + { + N subnodes; + size_t position; + + this(N nodes) + { + subnodes = nodes; + position = 0; + } + + /* Input range functionality. */ + bool empty() const @property { return position >= subnodes.length; } + + void popFront() + { + enforce(!empty, "Attempted to popFront an empty sequence"); + position++; + } + + T front() const @property + { + enforce(!empty, "Attempted to take the front of an empty sequence"); + static if (is(Unqual!T == Node)) + return subnodes[position]; + else + return subnodes[position].as!T; + } + + /* Forward range functionality. */ + Range save() { return this; } + + /* Bidirectional range functionality. */ + void popBack() + { + enforce(!empty, "Attempted to popBack an empty sequence"); + subnodes = subnodes[0 .. $ - 1]; + } + + T back() + { + enforce(!empty, "Attempted to take the back of an empty sequence"); + static if (is(Unqual!T == Node)) + return subnodes[$ - 1]; + else + return subnodes[$ - 1].as!T; + } + + /* Random-access range functionality. */ + size_t length() const @property { return subnodes.length; } + T opIndex(size_t index) + { + static if (is(Unqual!T == Node)) + return subnodes[index]; + else + return subnodes[index].as!T; + } + + static assert(isInputRange!Range); + static assert(isForwardRange!Range); + static assert(isBidirectionalRange!Range); + static assert(isRandomAccessRange!Range); + } + auto sequence() + { + enforce(nodeID == NodeID.sequence, + new NodeException("Trying to 'sequence'-iterate over a " ~ nodeTypeString ~ " node", + startMark_)); + return Range!(Node[])(get!(Node[])); + } + auto sequence() const + { + enforce(nodeID == NodeID.sequence, + new NodeException("Trying to 'sequence'-iterate over a " ~ nodeTypeString ~ " node", + startMark_)); + return Range!(const(Node)[])(get!(Node[])); + } + } + @safe unittest + { + Node n1 = Node([1, 2, 3, 4]); + int[int] array; + Node n2 = Node(array); + const n3 = Node([1, 2, 3, 4]); + + auto r = n1.sequence!int.map!(x => x * 10); + assert(r.equal([10, 20, 30, 40])); + + assertThrown(n2.sequence); + + auto r2 = n3.sequence!int.map!(x => x * 10); + assert(r2.equal([10, 20, 30, 40])); + } + + /** Return a range object iterating over mapping's pairs. + * + * Throws: NodeException if the node is not a mapping. + * + */ + template mapping() + { + struct Range(T) + { + T pairs; + size_t position; + + this(T pairs) @safe + { + this.pairs = pairs; + position = 0; + } + + /* Input range functionality. */ + bool empty() @safe { return position >= pairs.length; } + + void popFront() @safe + { + enforce(!empty, "Attempted to popFront an empty mapping"); + position++; + } + + auto front() @safe + { + enforce(!empty, "Attempted to take the front of an empty mapping"); + return pairs[position]; + } + + /* Forward range functionality. */ + Range save() @safe { return this; } + + /* Bidirectional range functionality. */ + void popBack() @safe + { + enforce(!empty, "Attempted to popBack an empty mapping"); + pairs = pairs[0 .. $ - 1]; + } + + auto back() @safe + { + enforce(!empty, "Attempted to take the back of an empty mapping"); + return pairs[$ - 1]; + } + + /* Random-access range functionality. */ + size_t length() const @property @safe { return pairs.length; } + auto opIndex(size_t index) @safe { return pairs[index]; } + + static assert(isInputRange!Range); + static assert(isForwardRange!Range); + static assert(isBidirectionalRange!Range); + static assert(isRandomAccessRange!Range); + } + + auto mapping() + { + enforce(nodeID == NodeID.mapping, + new NodeException("Trying to 'mapping'-iterate over a " + ~ nodeTypeString ~ " node", startMark_)); + return Range!(Node.Pair[])(get!(Node.Pair[])); + } + auto mapping() const + { + enforce(nodeID == NodeID.mapping, + new NodeException("Trying to 'mapping'-iterate over a " + ~ nodeTypeString ~ " node", startMark_)); + return Range!(const(Node.Pair)[])(get!(Node.Pair[])); + } + } + @safe unittest + { + int[int] array; + Node n = Node(array); + n[1] = "foo"; + n[2] = "bar"; + n[3] = "baz"; + + string[int] test; + foreach (pair; n.mapping) + test[pair.key.as!int] = pair.value.as!string.idup; + + assert(test[1] == "foo"); + assert(test[2] == "bar"); + assert(test[3] == "baz"); + + int[int] constArray = [1: 2, 3: 4]; + const x = Node(constArray); + foreach (pair; x.mapping) + assert(pair.value == constArray[pair.key.as!int]); + } + + /** Return a range object iterating over mapping's keys. + * + * If K is Node, simply iterate over the keys in the mapping. + * Otherwise, convert each key to T during iteration. + * + * Throws: NodeException if the nodes is not a mapping or an element + * could not be converted to specified type. + */ + auto mappingKeys(K = Node)() const + { + enforce(nodeID == NodeID.mapping, + new NodeException("Trying to 'mappingKeys'-iterate over a " + ~ nodeTypeString ~ " node", startMark_)); + static if (is(Unqual!K == Node)) + return mapping.map!(pair => pair.key); + else + return mapping.map!(pair => pair.key.as!K); + } + @safe unittest + { + int[int] array; + Node m1 = Node(array); + m1["foo"] = 2; + m1["bar"] = 3; + + assert(m1.mappingKeys.equal(["foo", "bar"]) || m1.mappingKeys.equal(["bar", "foo"])); + + const cm1 = Node(["foo": 2, "bar": 3]); + + assert(cm1.mappingKeys.equal(["foo", "bar"]) || cm1.mappingKeys.equal(["bar", "foo"])); + } + + /** Return a range object iterating over mapping's values. + * + * If V is Node, simply iterate over the values in the mapping. + * Otherwise, convert each key to V during iteration. + * + * Throws: NodeException if the nodes is not a mapping or an element + * could not be converted to specified type. + */ + auto mappingValues(V = Node)() const + { + enforce(nodeID == NodeID.mapping, + new NodeException("Trying to 'mappingValues'-iterate over a " + ~ nodeTypeString ~ " node", startMark_)); + static if (is(Unqual!V == Node)) + return mapping.map!(pair => pair.value); + else + return mapping.map!(pair => pair.value.as!V); + } + @safe unittest + { + int[int] array; + Node m1 = Node(array); + m1["foo"] = 2; + m1["bar"] = 3; + + assert(m1.mappingValues.equal([2, 3]) || m1.mappingValues.equal([3, 2])); + + const cm1 = Node(["foo": 2, "bar": 3]); + + assert(cm1.mappingValues.equal([2, 3]) || cm1.mappingValues.equal([3, 2])); + } + + + /** Foreach over a sequence, getting each element as T. + * + * If T is Node, simply iterate over the nodes in the sequence. + * Otherwise, convert each node to T during iteration. + * + * Throws: NodeException if the node is not a sequence or an + * element could not be converted to specified type. + */ + int opApply(D)(D dg) if (isDelegate!D && (Parameters!D.length == 1)) + { + enforce(nodeID == NodeID.sequence, + new NodeException("Trying to sequence-foreach over a " ~ nodeTypeString ~ " node", + startMark_)); + + int result; + foreach(ref node; get!(Node[])) + { + static if(is(Unqual!(Parameters!D[0]) == Node)) + { + result = dg(node); + } + else + { + Parameters!D[0] temp = node.as!(Parameters!D[0]); + result = dg(temp); + } + if(result){break;} + } + return result; + } + /// ditto + int opApply(D)(D dg) const if (isDelegate!D && (Parameters!D.length == 1)) + { + enforce(nodeID == NodeID.sequence, + new NodeException("Trying to sequence-foreach over a " ~ nodeTypeString ~ " node", + startMark_)); + + int result; + foreach(ref node; get!(Node[])) + { + static if(is(Unqual!(Parameters!D[0]) == Node)) + { + result = dg(node); + } + else + { + Parameters!D[0] temp = node.as!(Parameters!D[0]); + result = dg(temp); + } + if(result){break;} + } + return result; + } + @safe unittest + { + Node n1 = Node(11); + Node n2 = Node(12); + Node n3 = Node(13); + Node n4 = Node(14); + Node narray = Node([n1, n2, n3, n4]); + const cNArray = narray; + + int[] array, array2, array3; + foreach(int value; narray) + { + array ~= value; + } + foreach(Node node; narray) + { + array2 ~= node.as!int; + } + foreach (const Node node; cNArray) + { + array3 ~= node.as!int; + } + assert(array == [11, 12, 13, 14]); + assert(array2 == [11, 12, 13, 14]); + assert(array3 == [11, 12, 13, 14]); + } + @safe unittest + { + string[] testStrs = ["1", "2", "3"]; + auto node1 = Node(testStrs); + int i = 0; + foreach (string elem; node1) + { + assert(elem == testStrs[i]); + i++; + } + const node2 = Node(testStrs); + i = 0; + foreach (string elem; node2) + { + assert(elem == testStrs[i]); + i++; + } + immutable node3 = Node(testStrs); + i = 0; + foreach (string elem; node3) + { + assert(elem == testStrs[i]); + i++; + } + } + @safe unittest + { + auto node = Node(["a":1, "b":2, "c":3]); + const cNode = node; + assertThrown({foreach (Node n; node) {}}()); + assertThrown({foreach (const Node n; cNode) {}}()); + } + + /** Foreach over a mapping, getting each key/value as K/V. + * + * If the K and/or V is Node, simply iterate over the nodes in the mapping. + * Otherwise, convert each key/value to T during iteration. + * + * Throws: NodeException if the node is not a mapping or an + * element could not be converted to specified type. + */ + int opApply(DG)(DG dg) if (isDelegate!DG && (Parameters!DG.length == 2)) + { + alias K = Parameters!DG[0]; + alias V = Parameters!DG[1]; + enforce(nodeID == NodeID.mapping, + new NodeException("Trying to mapping-foreach over a " ~ nodeTypeString ~ " node", + startMark_)); + + int result; + foreach(ref pair; get!(Node.Pair[])) + { + static if(is(Unqual!K == Node) && is(Unqual!V == Node)) + { + result = dg(pair.key, pair.value); + } + else static if(is(Unqual!K == Node)) + { + V tempValue = pair.value.as!V; + result = dg(pair.key, tempValue); + } + else static if(is(Unqual!V == Node)) + { + K tempKey = pair.key.as!K; + result = dg(tempKey, pair.value); + } + else + { + K tempKey = pair.key.as!K; + V tempValue = pair.value.as!V; + result = dg(tempKey, tempValue); + } + + if(result){break;} + } + return result; + } + /// ditto + int opApply(DG)(DG dg) const if (isDelegate!DG && (Parameters!DG.length == 2)) + { + alias K = Parameters!DG[0]; + alias V = Parameters!DG[1]; + enforce(nodeID == NodeID.mapping, + new NodeException("Trying to mapping-foreach over a " ~ nodeTypeString ~ " node", + startMark_)); + + int result; + foreach(ref pair; get!(Node.Pair[])) + { + static if(is(Unqual!K == Node) && is(Unqual!V == Node)) + { + result = dg(pair.key, pair.value); + } + else static if(is(Unqual!K == Node)) + { + V tempValue = pair.value.as!V; + result = dg(pair.key, tempValue); + } + else static if(is(Unqual!V == Node)) + { + K tempKey = pair.key.as!K; + result = dg(tempKey, pair.value); + } + else + { + K tempKey = pair.key.as!K; + V tempValue = pair.value.as!V; + result = dg(tempKey, tempValue); + } + + if(result){break;} + } + return result; + } + @safe unittest + { + Node n1 = Node(cast(long)11); + Node n2 = Node(cast(long)12); + Node n3 = Node(cast(long)13); + Node n4 = Node(cast(long)14); + + Node k1 = Node("11"); + Node k2 = Node("12"); + Node k3 = Node("13"); + Node k4 = Node("14"); + + Node nmap1 = Node([Pair(k1, n1), + Pair(k2, n2), + Pair(k3, n3), + Pair(k4, n4)]); + + int[string] expected = ["11" : 11, + "12" : 12, + "13" : 13, + "14" : 14]; + int[string] array; + foreach(string key, int value; nmap1) + { + array[key] = value; + } + assert(array == expected); + + Node nmap2 = Node([Pair(k1, Node(cast(long)5)), + Pair(k2, Node(true)), + Pair(k3, Node(cast(real)1.0)), + Pair(k4, Node("yarly"))]); + + // DUB: `scope` in `foreach` not supported before 2.098 + int dummy; // Otherwise the delegate is infered as a function + nmap2.opApply((scope string key, scope Node value) + { + switch(key) + { + case "11": assert(value.as!int == 5 ); break; + case "12": assert(value.as!bool == true ); break; + case "13": assert(value.as!float == 1.0 ); break; + case "14": assert(value.as!string == "yarly"); break; + default: assert(false); + } + return dummy; + }); + const nmap3 = nmap2; + + foreach(const Node key, const Node value; nmap3) + { + switch(key.as!string) + { + case "11": assert(value.as!int == 5 ); break; + case "12": assert(value.as!bool == true ); break; + case "13": assert(value.as!float == 1.0 ); break; + case "14": assert(value.as!string == "yarly"); break; + default: assert(false); + } + } + } + @safe unittest + { + string[int] testStrs = [0: "1", 1: "2", 2: "3"]; + auto node1 = Node(testStrs); + foreach (const int i, string elem; node1) + { + assert(elem == testStrs[i]); + } + const node2 = Node(testStrs); + foreach (const int i, string elem; node2) + { + assert(elem == testStrs[i]); + } + immutable node3 = Node(testStrs); + foreach (const int i, string elem; node3) + { + assert(elem == testStrs[i]); + } + } + @safe unittest + { + auto node = Node(["a", "b", "c"]); + const cNode = node; + assertThrown({foreach (Node a, Node b; node) {}}()); + assertThrown({foreach (const Node a, const Node b; cNode) {}}()); + } + + /** Add an element to a sequence. + * + * This method can only be called on sequence nodes. + * + * If value is a node, it is copied to the sequence directly. Otherwise + * value is converted to a node and then stored in the sequence. + * + * $(P When emitting, all values in the sequence will be emitted. When + * using the !!set tag, the user needs to ensure that all elements in + * the sequence are unique, otherwise $(B invalid) YAML code will be + * emitted.) + * + * Params: value = Value to _add to the sequence. + */ + void add(T)(T value) + { + if (!isValid) + { + setValue(Node[].init); + } + enforce(nodeID == NodeID.sequence, + new NodeException("Trying to add an element to a " ~ nodeTypeString ~ " node", startMark_)); + + auto nodes = get!(Node[])(); + static if(is(Unqual!T == Node)){nodes ~= value;} + else {nodes ~= Node(value);} + setValue(nodes); + } + @safe unittest + { + with(Node([1, 2, 3, 4])) + { + add(5.0f); + assert(opIndex(4).as!float == 5.0f); + } + with(Node()) + { + add(5.0f); + assert(opIndex(0).as!float == 5.0f); + } + with(Node(5.0f)) + { + assertThrown!NodeException(add(5.0f)); + } + with(Node([5.0f : true])) + { + assertThrown!NodeException(add(5.0f)); + } + } + + /** Add a key-value pair to a mapping. + * + * This method can only be called on mapping nodes. + * + * If key and/or value is a node, it is copied to the mapping directly. + * Otherwise it is converted to a node and then stored in the mapping. + * + * $(P It is possible for the same key to be present more than once in a + * mapping. When emitting, all key-value pairs will be emitted. + * This is useful with the "!!pairs" tag, but will result in + * $(B invalid) YAML with "!!map" and "!!omap" tags.) + * + * Params: key = Key to _add. + * value = Value to _add. + */ + void add(K, V)(K key, V value) + { + if (!isValid) + { + setValue(Node.Pair[].init); + } + enforce(nodeID == NodeID.mapping, + new NodeException("Trying to add a key-value pair to a " ~ + nodeTypeString ~ " node", + startMark_)); + + auto pairs = get!(Node.Pair[])(); + pairs ~= Pair(key, value); + setValue(pairs); + } + @safe unittest + { + with(Node([1, 2], [3, 4])) + { + add(5, "6"); + assert(opIndex(5).as!string == "6"); + } + with(Node()) + { + add(5, "6"); + assert(opIndex(5).as!string == "6"); + } + with(Node(5.0f)) + { + assertThrown!NodeException(add(5, "6")); + } + with(Node([5.0f])) + { + assertThrown!NodeException(add(5, "6")); + } + } + + /** Determine whether a key is in a mapping, and access its value. + * + * This method can only be called on mapping nodes. + * + * Params: key = Key to search for. + * + * Returns: A pointer to the value (as a Node) corresponding to key, + * or null if not found. + * + * Note: Any modification to the node can invalidate the returned + * pointer. + * + * See_Also: contains + */ + inout(Node*) opBinaryRight(string op, K)(K key) inout + if (op == "in") + { + enforce(nodeID == NodeID.mapping, new NodeException("Trying to use 'in' on a " ~ + nodeTypeString ~ " node", startMark_)); + + auto idx = findPair(key); + if(idx < 0) + { + return null; + } + else + { + return &(get!(Node.Pair[])[idx].value); + } + } + @safe unittest + { + auto mapping = Node(["foo", "baz"], ["bar", "qux"]); + assert("bad" !in mapping && ("bad" in mapping) is null); + Node* foo = "foo" in mapping; + assert(foo !is null); + assert(*foo == Node("bar")); + assert(foo.get!string == "bar"); + *foo = Node("newfoo"); + assert(mapping["foo"] == Node("newfoo")); + } + @safe unittest + { + auto mNode = Node(["a": 2]); + assert("a" in mNode); + const cNode = Node(["a": 2]); + assert("a" in cNode); + immutable iNode = Node(["a": 2]); + assert("a" in iNode); + } + + /** Remove first (if any) occurence of a value in a collection. + * + * This method can only be called on collection nodes. + * + * If the node is a sequence, the first node matching value is removed. + * If the node is a mapping, the first key-value pair where _value + * matches specified value is removed. + * + * Params: rhs = Value to _remove. + * + * Throws: NodeException if the node is not a collection. + */ + void remove(T)(T rhs) + { + remove_!(T, No.key, "remove")(rhs); + } + @safe unittest + { + with(Node([1, 2, 3, 4, 3])) + { + remove(3); + assert(length == 4); + assert(opIndex(2).as!int == 4); + assert(opIndex(3).as!int == 3); + + add(YAMLNull()); + assert(length == 5); + remove(YAMLNull()); + assert(length == 4); + } + with(Node(["1", "2", "3"], [4, 5, 6])) + { + remove(4); + assert(length == 2); + add("nullkey", YAMLNull()); + assert(length == 3); + remove(YAMLNull()); + assert(length == 2); + } + } + + /** Remove element at the specified index of a collection. + * + * This method can only be called on collection nodes. + * + * If the node is a sequence, index must be integral. + * + * If the node is a mapping, remove the first key-value pair where + * key matches index. + * + * If the node is a mapping and no key matches index, nothing is removed + * and no exception is thrown. This ensures behavior siilar to D arrays + * and associative arrays. + * + * Params: index = Index to remove at. + * + * Throws: NodeException if the node is not a collection, index is out + * of range or if a non-integral index is used on a sequence node. + */ + void removeAt(T)(T index) + { + remove_!(T, Yes.key, "removeAt")(index); + } + @safe unittest + { + with(Node([1, 2, 3, 4, 3])) + { + removeAt(3); + assertThrown!NodeException(removeAt("3")); + assert(length == 4); + assert(opIndex(3).as!int == 3); + } + with(Node(["1", "2", "3"], [4, 5, 6])) + { + // no integer 2 key, so don't remove anything + removeAt(2); + assert(length == 3); + removeAt("2"); + assert(length == 2); + add(YAMLNull(), "nullval"); + assert(length == 3); + removeAt(YAMLNull()); + assert(length == 2); + } + } + + /// Compare with another _node. + int opCmp(const scope ref Node rhs) const scope @safe + { + const bool hasNullTag = this.tag_ is null; + // Only one of them is null: we can order nodes + if ((hasNullTag) ^ (rhs.tag is null)) + return hasNullTag ? -1 : 1; + // Either both `null` or both have a value + if (!hasNullTag) + if (int result = std.algorithm.comparison.cmp(tag_, rhs.tag_)) + return result; + + static int cmp(T1, T2)(T1 a, T2 b) + { + return a > b ? 1 : + a < b ? -1 : + 0; + } + + // Compare validity: if both valid, we have to compare further. + if (!this.isValid()) + return rhs.isValid() ? -1 : 0; + if (!rhs.isValid()) + return 1; + if (const typeCmp = cmp(type, rhs.type)) + return typeCmp; + + static int compareCollections(T)(const scope ref Node lhs, const scope ref Node rhs) + { + const c1 = lhs.getValue!T; + const c2 = rhs.getValue!T; + if(c1 is c2){return 0;} + if(c1.length != c2.length) + { + return cmp(c1.length, c2.length); + } + // Equal lengths, compare items. + foreach(i; 0 .. c1.length) + { + const itemCmp = c1[i].opCmp(c2[i]); + if(itemCmp != 0){return itemCmp;} + } + return 0; + } + + final switch(type) + { + case NodeType.string: + return std.algorithm.cmp(getValue!string, + rhs.getValue!string); + case NodeType.integer: + return cmp(getValue!long, rhs.getValue!long); + case NodeType.boolean: + const b1 = getValue!bool; + const b2 = rhs.getValue!bool; + return b1 ? b2 ? 0 : 1 + : b2 ? -1 : 0; + case NodeType.binary: + const b1 = getValue!(ubyte[]); + const b2 = rhs.getValue!(ubyte[]); + return std.algorithm.cmp(b1, b2); + case NodeType.null_: + return 0; + case NodeType.decimal: + const r1 = getValue!real; + const r2 = rhs.getValue!real; + if(isNaN(r1)) + { + return isNaN(r2) ? 0 : -1; + } + if(isNaN(r2)) + { + return 1; + } + // Fuzzy equality. + if(r1 <= r2 + real.epsilon && r1 >= r2 - real.epsilon) + { + return 0; + } + return cmp(r1, r2); + case NodeType.timestamp: + const t1 = getValue!SysTime; + const t2 = rhs.getValue!SysTime; + return cmp(t1, t2); + case NodeType.mapping: + return compareCollections!(Pair[])(this, rhs); + case NodeType.sequence: + return compareCollections!(Node[])(this, rhs); + case NodeType.merge: + assert(false, "Cannot compare merge nodes"); + case NodeType.invalid: + assert(false, "Cannot compare invalid nodes"); + } + } + + // Ensure opCmp is symmetric for collections + @safe unittest + { + auto node1 = Node( + [ + Node("New York Yankees", "tag:yaml.org,2002:str"), + Node("Atlanta Braves", "tag:yaml.org,2002:str") + ], "tag:yaml.org,2002:seq" + ); + auto node2 = Node( + [ + Node("Detroit Tigers", "tag:yaml.org,2002:str"), + Node("Chicago cubs", "tag:yaml.org,2002:str") + ], "tag:yaml.org,2002:seq" + ); + assert(node1 > node2); + assert(node2 < node1); + } + + // Compute hash of the node. + hash_t toHash() nothrow const @trusted + { + const valueHash = value_.match!(v => hashOf(v)); + + return tag_ is null ? valueHash : tag_.hashOf(valueHash); + } + @safe unittest + { + assert(Node(42).toHash() != Node(41).toHash()); + assert(Node(42).toHash() != Node(42, "some-tag").toHash()); + } + + /// Get type of the node value. + @property NodeType type() const scope @safe pure nothrow @nogc + { + return this.value_.match!( + (const bool _) => NodeType.boolean, + (const long _) => NodeType.integer, + (const Node[] _) => NodeType.sequence, + (const ubyte[] _) => NodeType.binary, + (const string _) => NodeType.string, + (const Node.Pair[] _) => NodeType.mapping, + (const SysTime _) => NodeType.timestamp, + (const YAMLNull _) => NodeType.null_, + (const YAMLMerge _) => NodeType.merge, + (const real _) => NodeType.decimal, + (const YAMLInvalid _) => NodeType.invalid, + ); + } + + /// Get the kind of node this is. + @property NodeID nodeID() const scope @safe pure nothrow @nogc + { + final switch (type) + { + case NodeType.sequence: + return NodeID.sequence; + case NodeType.mapping: + return NodeID.mapping; + case NodeType.boolean: + case NodeType.integer: + case NodeType.binary: + case NodeType.string: + case NodeType.timestamp: + case NodeType.null_: + case NodeType.merge: + case NodeType.decimal: + return NodeID.scalar; + case NodeType.invalid: + return NodeID.invalid; + } + } + package: + + // Get a string representation of the node tree. Used for debugging. + // + // Params: level = Level of the node in the tree. + // + // Returns: String representing the node tree. + @property string debugString(uint level = 0) const scope @safe + { + string indent; + foreach(i; 0 .. level){indent ~= " ";} + + final switch (nodeID) + { + case NodeID.invalid: + return indent ~ "invalid"; + case NodeID.sequence: + string result = indent ~ "sequence:\n"; + foreach(ref node; get!(Node[])) + { + result ~= node.debugString(level + 1); + } + return result; + case NodeID.mapping: + string result = indent ~ "mapping:\n"; + foreach(ref pair; get!(Node.Pair[])) + { + result ~= indent ~ " pair\n"; + result ~= pair.key.debugString(level + 2); + result ~= pair.value.debugString(level + 2); + } + return result; + case NodeID.scalar: + return indent ~ "scalar(" ~ + (convertsTo!string ? get!string : text(type)) ~ ")\n"; + } + } + + + public: + @property string nodeTypeString() const scope @safe pure nothrow @nogc + { + final switch (nodeID) + { + case NodeID.mapping: + return "mapping"; + case NodeID.sequence: + return "sequence"; + case NodeID.scalar: + return "scalar"; + case NodeID.invalid: + return "invalid"; + } + } + + // Determine if the value can be converted to specified type. + @property bool convertsTo(T)() const + { + if(isType!T){return true;} + + // Every type allowed in Value should be convertible to string. + static if(isSomeString!T) {return true;} + else static if(isFloatingPoint!T){return type.among!(NodeType.integer, NodeType.decimal);} + else static if(isIntegral!T) {return type == NodeType.integer;} + else static if(is(Unqual!T==bool)){return type == NodeType.boolean;} + else {return false;} + } + /** + * Sets the style of this node when dumped. + * + * Params: style = Any valid style. + */ + void setStyle(CollectionStyle style) @safe + { + enforce(!isValid || (nodeID.among(NodeID.mapping, NodeID.sequence)), new NodeException( + "Cannot set collection style for non-collection nodes", startMark_)); + collectionStyle = style; + } + /// Ditto + void setStyle(ScalarStyle style) @safe + { + enforce(!isValid || (nodeID == NodeID.scalar), new NodeException( + "Cannot set scalar style for non-scalar nodes", startMark_)); + scalarStyle = style; + } + /// + @safe unittest + { + import dub.internal.dyaml.dumper; + auto stream = new Appender!string(); + auto node = Node([1, 2, 3, 4, 5]); + node.setStyle(CollectionStyle.block); + + auto dumper = dumper(); + dumper.dump(stream, node); + } + /// + @safe unittest + { + import dub.internal.dyaml.dumper; + auto stream = new Appender!string(); + auto node = Node(4); + node.setStyle(ScalarStyle.literal); + + auto dumper = dumper(); + dumper.dump(stream, node); + } + @safe unittest + { + assertThrown!NodeException(Node(4).setStyle(CollectionStyle.block)); + assertThrown!NodeException(Node([4]).setStyle(ScalarStyle.literal)); + } + @safe unittest + { + import dub.internal.dyaml.dumper; + { + auto stream = new Appender!string(); + auto node = Node([1, 2, 3, 4, 5]); + node.setStyle(CollectionStyle.block); + auto dumper = dumper(); + dumper.explicitEnd = false; + dumper.explicitStart = false; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + + //Block style should start with a hyphen. + assert(stream.data[0] == '-'); + } + { + auto stream = new Appender!string(); + auto node = Node([1, 2, 3, 4, 5]); + node.setStyle(CollectionStyle.flow); + auto dumper = dumper(); + dumper.explicitEnd = false; + dumper.explicitStart = false; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + + //Flow style should start with a bracket. + assert(stream.data[0] == '['); + } + { + auto stream = new Appender!string(); + auto node = Node(1); + node.setStyle(ScalarStyle.singleQuoted); + auto dumper = dumper(); + dumper.explicitEnd = false; + dumper.explicitStart = false; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + + assert(stream.data == "!!int '1'\n"); + } + { + auto stream = new Appender!string(); + auto node = Node(1); + node.setStyle(ScalarStyle.doubleQuoted); + auto dumper = dumper(); + dumper.explicitEnd = false; + dumper.explicitStart = false; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + + assert(stream.data == "!!int \"1\"\n"); + } + } + + private: + // Determine if the value stored by the node is of specified type. + // + // This only works for default YAML types, not for user defined types. + @property bool isType(T)() const + { + return value_.match!( + (const T _) => true, + _ => false, + ); + } + + /// Check at compile time if a type is stored natively + enum canBeType (T) = is(typeof({ value_.match!((const T _) => true, _ => false); })); + + + // Implementation of contains() and containsKey(). + bool contains_(T, Flag!"key" key, string func)(T rhs) const + { + final switch (nodeID) + { + case NodeID.mapping: + return findPair!(T, key)(rhs) >= 0; + case NodeID.sequence: + static if(!key) + { + foreach(ref node; getValue!(Node[])) + { + if(node == rhs){return true;} + } + return false; + } + else + { + throw new NodeException("Trying to use " ~ func ~ "() on a " ~ nodeTypeString ~ " node", + startMark_); + } + case NodeID.scalar: + case NodeID.invalid: + throw new NodeException("Trying to use " ~ func ~ "() on a " ~ nodeTypeString ~ " node", + startMark_); + } + + } + + // Implementation of remove() and removeAt() + void remove_(T, Flag!"key" key, string func)(T rhs) + { + static void removeElem(E, I)(ref Node node, I index) + { + auto elems = node.getValue!(E[]); + moveAll(elems[cast(size_t)index + 1 .. $], elems[cast(size_t)index .. $ - 1]); + elems.length = elems.length - 1; + node.setValue(elems); + } + + final switch (nodeID) + { + case NodeID.mapping: + const index = findPair!(T, key)(rhs); + if(index >= 0){removeElem!Pair(this, index);} + break; + case NodeID.sequence: + static long getIndex(ref Node node, ref T rhs) + { + foreach(idx, ref elem; node.get!(Node[])) + { + if(elem.convertsTo!T && elem.as!(T, No.stringConversion) == rhs) + { + return idx; + } + } + return -1; + } + + const index = select!key(rhs, getIndex(this, rhs)); + + // This throws if the index is not integral. + checkSequenceIndex(index); + + static if(isIntegral!(typeof(index))){removeElem!Node(this, index); break; } + else {assert(false, "Non-integral sequence index");} + case NodeID.scalar: + case NodeID.invalid: + throw new NodeException("Trying to " ~ func ~ "() from a " ~ nodeTypeString ~ " node", + startMark_); + } + } + + // Get index of pair with key (or value, if key is false) matching index. + // Cannot be inferred @safe due to https://issues.dlang.org/show_bug.cgi?id=16528 + sizediff_t findPair(T, Flag!"key" key = Yes.key)(const scope ref T index) + const scope @safe + { + const pairs = getValue!(Pair[])(); + const(Node)* node; + foreach(idx, ref const(Pair) pair; pairs) + { + static if(key){node = &pair.key;} + else {node = &pair.value;} + + + const bool typeMatch = (isFloatingPoint!T && (node.type.among!(NodeType.integer, NodeType.decimal))) || + (isIntegral!T && node.type == NodeType.integer) || + (is(Unqual!T==bool) && node.type == NodeType.boolean) || + (isSomeString!T && node.type == NodeType.string) || + (node.isType!T); + if(typeMatch && *node == index) + { + return idx; + } + } + return -1; + } + + // Check if index is integral and in range. + void checkSequenceIndex(T)(T index) const scope @safe + { + assert(nodeID == NodeID.sequence, + "checkSequenceIndex() called on a " ~ nodeTypeString ~ " node"); + + static if(!isIntegral!T) + { + throw new NodeException("Indexing a sequence with a non-integral type.", startMark_); + } + else + { + enforce(index >= 0 && index < getValue!(Node[]).length, + new NodeException("Sequence index out of range: " ~ to!string(index), + startMark_)); + } + } + // Safe wrapper for getting a value out of the variant. + inout(T) getValue(T)() @safe return scope inout + { + alias RType = typeof(return); + return value_.tryMatch!((RType r) => r); + } + // Safe wrapper for coercing a value out of the variant. + inout(T) coerceValue(T)() @trusted scope return inout + { + alias RType = typeof(return); + static if (is(typeof({ RType rt = T.init; T t = RType.init; }))) + alias TType = T; + else // `inout` matters (indirection) + alias TType = RType; + + // `inout(Node[]).to!string` apparently is not safe: + // struct SumTypeBug { + // import std.conv; + // Node[] data; + // + // string bug () inout @safe + // { + // return this.data.to!string; + // } + // } + // Doesn't compile with DMD v2.100.0 + return this.value_.tryMatch!( + (inout bool v) @safe => v.to!TType, + (inout long v) @safe => v.to!TType, + (inout Node[] v) @trusted => v.to!TType, + (inout ubyte[] v) @safe => v.to!TType, + (inout string v) @safe => v.to!TType, + (inout Node.Pair[] v) @trusted => v.to!TType, + (inout SysTime v) @trusted => v.to!TType, + (inout real v) @safe => v.to!TType, + (inout YAMLNull v) @safe => null.to!TType, + ); + } + // Safe wrapper for setting a value for the variant. + void setValue(T)(T value) @trusted + { + static if (allowed!T) + { + value_ = value; + } + else + { + auto tmpNode = cast(Node)value; + tag_ = tmpNode.tag; + scalarStyle = tmpNode.scalarStyle; + collectionStyle = tmpNode.collectionStyle; + value_ = tmpNode.value_; + } + } + + /// + public void toString (DGT) (scope DGT sink) + const scope @safe + { + this.value_.match!( + (const bool v) => formattedWrite(sink, v ? "true" : "false"), + (const long v) => formattedWrite(sink, "%s", v), + (const Node[] v) => formattedWrite(sink, "[%(%s, %)]", v), + (const ubyte[] v) => formattedWrite(sink, "%s", v), + (const string v) => formattedWrite(sink, `"%s"`, v), + (const Node.Pair[] v) => formattedWrite(sink, "{%(%s, %)}", v), + (const SysTime v) => formattedWrite(sink, "%s", v), + (const YAMLNull v) => formattedWrite(sink, "%s", v), + (const YAMLMerge v) => formattedWrite(sink, "%s", v), + (const real v) => formattedWrite(sink, "%s", v), + (const YAMLInvalid v) => formattedWrite(sink, "%s", v), + ); + } +} + +package: +// Merge pairs into an array of pairs based on merge rules in the YAML spec. +// +// Any new pair will only be added if there is not already a pair +// with the same key. +// +// Params: pairs = Appender managing the array of pairs to merge into. +// toMerge = Pairs to merge. +void merge(ref Appender!(Node.Pair[]) pairs, Node.Pair[] toMerge) @safe +{ + bool eq(ref Node.Pair a, ref Node.Pair b) @safe + { + return a.key == b.key; + } + + foreach(ref pair; toMerge) if(!canFind!eq(pairs.data, pair)) + { + pairs.put(pair); + } +} + +enum hasNodeConstructor(T) = hasSimpleNodeConstructor!T || hasExpandedNodeConstructor!T; +template hasSimpleNodeConstructor(T) +{ + static if (is(T == struct)) + { + enum hasSimpleNodeConstructor = is(typeof(T(Node.init))); + } + else static if (is(T == class)) + { + enum hasSimpleNodeConstructor = is(typeof(new T(Node.init))); + } + else enum hasSimpleNodeConstructor = false; +} +template hasExpandedNodeConstructor(T) +{ + static if (is(T == struct)) + { + enum hasExpandedNodeConstructor = is(typeof(T(Node.init, ""))); + } + else static if (is(T == class)) + { + enum hasExpandedNodeConstructor = is(typeof(new T(Node.init, ""))); + } + else enum hasExpandedNodeConstructor = false; +} +enum castableToNode(T) = (is(T == struct) || is(T == class)) && is(typeof(T.opCast!Node()) : Node); + +@safe unittest +{ + import dub.internal.dyaml : Loader, Node; + + static struct Foo + { + string[] bars; + + this(const Node node) + { + foreach(value; node["bars"].sequence) + { + bars ~= value.as!string.idup; + } + } + } + + Loader.fromString(`{ bars: ["a", "b"] }`) + .load + .as!(Foo); +} +@safe unittest +{ + import dub.internal.dyaml : Loader, Node; + import std : split, to; + + static class MyClass + { + int x, y, z; + + this(Node node) + { + auto parts = node.as!string().split(":"); + x = parts[0].to!int; + y = parts[1].to!int; + z = parts[2].to!int; + } + } + + auto loader = Loader.fromString(`"1:2:3"`); + Node node = loader.load(); + auto mc = node.get!MyClass; +} +@safe unittest +{ + import dub.internal.dyaml : Loader, Node; + import std : split, to; + + static class MyClass + { + int x, y, z; + + this(Node node) + { + auto parts = node.as!string().split(":"); + x = parts[0].to!int; + y = parts[1].to!int; + z = parts[2].to!int; + } + } + + auto loader = Loader.fromString(`"1:2:3"`); + const node = loader.load(); + auto mc = node.get!MyClass; +} diff --git a/source/dub/internal/dyaml/package.d b/source/dub/internal/dyaml/package.d new file mode 100644 index 0000000..b9c953c --- /dev/null +++ b/source/dub/internal/dyaml/package.d @@ -0,0 +1,15 @@ +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module dub.internal.dyaml; + +public import dub.internal.dyaml.dumper; +public import dub.internal.dyaml.encoding; +public import dub.internal.dyaml.exception; +public import dub.internal.dyaml.linebreak; +public import dub.internal.dyaml.loader; +public import dub.internal.dyaml.resolver; +public import dub.internal.dyaml.style; +public import dub.internal.dyaml.node; diff --git a/source/dub/internal/dyaml/parser.d b/source/dub/internal/dyaml/parser.d new file mode 100644 index 0000000..b4e3d1b --- /dev/null +++ b/source/dub/internal/dyaml/parser.d @@ -0,0 +1,958 @@ + +// Copyright Ferdinand Majerech 2011-2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * YAML parser. + * Code based on PyYAML: http://www.pyyaml.org + */ +module dub.internal.dyaml.parser; + + +import std.algorithm; +import std.array; +import std.conv; +import std.exception; +import std.typecons; + +import dub.internal.dyaml.event; +import dub.internal.dyaml.exception; +import dub.internal.dyaml.scanner; +import dub.internal.dyaml.style; +import dub.internal.dyaml.token; +import dub.internal.dyaml.tagdirective; + + +/** + * The following YAML grammar is LL(1) and is parsed by a recursive descent + * parser. + * + * stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + * implicit_document ::= block_node DOCUMENT-END* + * explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + * block_node_or_indentless_sequence ::= + * ALIAS + * | properties (block_content | indentless_block_sequence)? + * | block_content + * | indentless_block_sequence + * block_node ::= ALIAS + * | properties block_content? + * | block_content + * flow_node ::= ALIAS + * | properties flow_content? + * | flow_content + * properties ::= TAG ANCHOR? | ANCHOR TAG? + * block_content ::= block_collection | flow_collection | SCALAR + * flow_content ::= flow_collection | SCALAR + * block_collection ::= block_sequence | block_mapping + * flow_collection ::= flow_sequence | flow_mapping + * block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END + * indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + * block_mapping ::= BLOCK-MAPPING_START + * ((KEY block_node_or_indentless_sequence?)? + * (VALUE block_node_or_indentless_sequence?)?)* + * BLOCK-END + * flow_sequence ::= FLOW-SEQUENCE-START + * (flow_sequence_entry FLOW-ENTRY)* + * flow_sequence_entry? + * FLOW-SEQUENCE-END + * flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + * flow_mapping ::= FLOW-MAPPING-START + * (flow_mapping_entry FLOW-ENTRY)* + * flow_mapping_entry? + * FLOW-MAPPING-END + * flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + * + * FIRST sets: + * + * stream: { STREAM-START } + * explicit_document: { DIRECTIVE DOCUMENT-START } + * implicit_document: FIRST(block_node) + * block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } + * flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } + * block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } + * flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } + * block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } + * flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } + * block_sequence: { BLOCK-SEQUENCE-START } + * block_mapping: { BLOCK-MAPPING-START } + * block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } + * indentless_sequence: { ENTRY } + * flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } + * flow_sequence: { FLOW-SEQUENCE-START } + * flow_mapping: { FLOW-MAPPING-START } + * flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } + * flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } + */ + + +/** + * Marked exception thrown at parser errors. + * + * See_Also: MarkedYAMLException + */ +class ParserException : MarkedYAMLException +{ + mixin MarkedExceptionCtors; +} + +package: +/// Generates events from tokens provided by a Scanner. +/// +/// While Parser receives tokens with non-const character slices, the events it +/// produces are immutable strings, which are usually the same slices, cast to string. +/// Parser is the last layer of D:YAML that may possibly do any modifications to these +/// slices. +final class Parser +{ + private: + ///Default tag handle shortcuts and replacements. + static TagDirective[] defaultTagDirectives_ = + [TagDirective("!", "!"), TagDirective("!!", "tag:yaml.org,2002:")]; + + ///Scanner providing YAML tokens. + Scanner scanner_; + + ///Event produced by the most recent state. + Event currentEvent_; + + ///YAML version string. + string YAMLVersion_ = null; + ///Tag handle shortcuts and replacements. + TagDirective[] tagDirectives_; + + ///Stack of states. + Appender!(Event delegate() @safe[]) states_; + ///Stack of marks used to keep track of extents of e.g. YAML collections. + Appender!(Mark[]) marks_; + + ///Current state. + Event delegate() @safe state_; + + public: + ///Construct a Parser using specified Scanner. + this(Scanner scanner) @safe + { + state_ = &parseStreamStart; + scanner_ = scanner; + states_.reserve(32); + marks_.reserve(32); + } + + /** + * Check if any events are left. May have side effects in some cases. + */ + bool empty() @safe + { + ensureState(); + return currentEvent_.isNull; + } + + /** + * Return the current event. + * + * Must not be called if there are no events left. + */ + Event front() @safe + { + ensureState(); + assert(!currentEvent_.isNull, "No event left to peek"); + return currentEvent_; + } + + /** + * Skip to the next event. + * + * Must not be called if there are no events left. + */ + void popFront() @safe + { + currentEvent_.id = EventID.invalid; + ensureState(); + } + + private: + /// If current event is invalid, load the next valid one if possible. + void ensureState() @safe + { + if(currentEvent_.isNull && state_ !is null) + { + currentEvent_ = state_(); + } + } + ///Pop and return the newest state in states_. + Event delegate() @safe popState() @safe + { + enforce(states_.data.length > 0, + new YAMLException("Parser: Need to pop state but no states left to pop")); + const result = states_.data.back; + states_.shrinkTo(states_.data.length - 1); + return result; + } + + ///Pop and return the newest mark in marks_. + Mark popMark() @safe + { + enforce(marks_.data.length > 0, + new YAMLException("Parser: Need to pop mark but no marks left to pop")); + const result = marks_.data.back; + marks_.shrinkTo(marks_.data.length - 1); + return result; + } + + /// Push a state on the stack + void pushState(Event delegate() @safe state) @safe + { + states_ ~= state; + } + /// Push a mark on the stack + void pushMark(Mark mark) @safe + { + marks_ ~= mark; + } + + /** + * stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + * implicit_document ::= block_node DOCUMENT-END* + * explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + */ + + ///Parse stream start. + Event parseStreamStart() @safe + { + const token = scanner_.front; + scanner_.popFront(); + state_ = &parseImplicitDocumentStart; + return streamStartEvent(token.startMark, token.endMark); + } + + /// Parse implicit document start, unless explicit detected: if so, parse explicit. + Event parseImplicitDocumentStart() @safe + { + // Parse an implicit document. + if(!scanner_.front.id.among!(TokenID.directive, TokenID.documentStart, + TokenID.streamEnd)) + { + tagDirectives_ = defaultTagDirectives_; + const token = scanner_.front; + + pushState(&parseDocumentEnd); + state_ = &parseBlockNode; + + return documentStartEvent(token.startMark, token.endMark, false, null, null); + } + return parseDocumentStart(); + } + + ///Parse explicit document start. + Event parseDocumentStart() @safe + { + //Parse any extra document end indicators. + while(scanner_.front.id == TokenID.documentEnd) + { + scanner_.popFront(); + } + + //Parse an explicit document. + if(scanner_.front.id != TokenID.streamEnd) + { + const startMark = scanner_.front.startMark; + + auto tagDirectives = processDirectives(); + enforce(scanner_.front.id == TokenID.documentStart, + new ParserException("Expected document start but found " ~ + scanner_.front.idString, + scanner_.front.startMark)); + + const endMark = scanner_.front.endMark; + scanner_.popFront(); + pushState(&parseDocumentEnd); + state_ = &parseDocumentContent; + return documentStartEvent(startMark, endMark, true, YAMLVersion_, tagDirectives); + } + else + { + //Parse the end of the stream. + const token = scanner_.front; + scanner_.popFront(); + assert(states_.data.length == 0); + assert(marks_.data.length == 0); + state_ = null; + return streamEndEvent(token.startMark, token.endMark); + } + } + + ///Parse document end (explicit or implicit). + Event parseDocumentEnd() @safe + { + Mark startMark = scanner_.front.startMark; + const bool explicit = scanner_.front.id == TokenID.documentEnd; + Mark endMark = startMark; + if (explicit) + { + endMark = scanner_.front.endMark; + scanner_.popFront(); + } + + state_ = &parseDocumentStart; + + return documentEndEvent(startMark, endMark, explicit); + } + + ///Parse document content. + Event parseDocumentContent() @safe + { + if(scanner_.front.id.among!(TokenID.directive, TokenID.documentStart, + TokenID.documentEnd, TokenID.streamEnd)) + { + state_ = popState(); + return processEmptyScalar(scanner_.front.startMark); + } + return parseBlockNode(); + } + + /// Process directives at the beginning of a document. + TagDirective[] processDirectives() @safe + { + // Destroy version and tag handles from previous document. + YAMLVersion_ = null; + tagDirectives_.length = 0; + + // Process directives. + while(scanner_.front.id == TokenID.directive) + { + const token = scanner_.front; + scanner_.popFront(); + string value = token.value.idup; + if(token.directive == DirectiveType.yaml) + { + enforce(YAMLVersion_ is null, + new ParserException("Duplicate YAML directive", token.startMark)); + const minor = value.split(".")[0]; + enforce(minor == "1", + new ParserException("Incompatible document (version 1.x is required)", + token.startMark)); + YAMLVersion_ = value; + } + else if(token.directive == DirectiveType.tag) + { + auto handle = value[0 .. token.valueDivider]; + + foreach(ref pair; tagDirectives_) + { + // handle + const h = pair.handle; + enforce(h != handle, new ParserException("Duplicate tag handle: " ~ handle, + token.startMark)); + } + tagDirectives_ ~= + TagDirective(handle, value[token.valueDivider .. $]); + } + // Any other directive type is ignored (only YAML and TAG are in YAML + // 1.1/1.2, any other directives are "reserved") + } + + TagDirective[] value = tagDirectives_; + + //Add any default tag handles that haven't been overridden. + foreach(ref defaultPair; defaultTagDirectives_) + { + bool found; + foreach(ref pair; tagDirectives_) if(defaultPair.handle == pair.handle) + { + found = true; + break; + } + if(!found) {tagDirectives_ ~= defaultPair; } + } + + return value; + } + + /** + * block_node_or_indentless_sequence ::= ALIAS + * | properties (block_content | indentless_block_sequence)? + * | block_content + * | indentless_block_sequence + * block_node ::= ALIAS + * | properties block_content? + * | block_content + * flow_node ::= ALIAS + * | properties flow_content? + * | flow_content + * properties ::= TAG ANCHOR? | ANCHOR TAG? + * block_content ::= block_collection | flow_collection | SCALAR + * flow_content ::= flow_collection | SCALAR + * block_collection ::= block_sequence | block_mapping + * flow_collection ::= flow_sequence | flow_mapping + */ + + ///Parse a node. + Event parseNode(const Flag!"block" block, + const Flag!"indentlessSequence" indentlessSequence = No.indentlessSequence) + @trusted + { + if(scanner_.front.id == TokenID.alias_) + { + const token = scanner_.front; + scanner_.popFront(); + state_ = popState(); + return aliasEvent(token.startMark, token.endMark, + cast(string)token.value); + } + + string anchor; + string tag; + Mark startMark, endMark, tagMark; + bool invalidMarks = true; + // The index in the tag string where tag handle ends and tag suffix starts. + uint tagHandleEnd; + + //Get anchor/tag if detected. Return false otherwise. + bool get(const TokenID id, const Flag!"first" first, ref string target) @safe + { + if(scanner_.front.id != id){return false;} + invalidMarks = false; + const token = scanner_.front; + scanner_.popFront(); + if(first){startMark = token.startMark;} + if(id == TokenID.tag) + { + tagMark = token.startMark; + tagHandleEnd = token.valueDivider; + } + endMark = token.endMark; + target = token.value.idup; + return true; + } + + //Anchor and/or tag can be in any order. + if(get(TokenID.anchor, Yes.first, anchor)){get(TokenID.tag, No.first, tag);} + else if(get(TokenID.tag, Yes.first, tag)) {get(TokenID.anchor, No.first, anchor);} + + if(tag !is null){tag = processTag(tag, tagHandleEnd, startMark, tagMark);} + + if(invalidMarks) + { + startMark = endMark = scanner_.front.startMark; + } + + bool implicit = (tag is null || tag == "!"); + + if(indentlessSequence && scanner_.front.id == TokenID.blockEntry) + { + state_ = &parseIndentlessSequenceEntry; + return sequenceStartEvent + (startMark, scanner_.front.endMark, anchor, + tag, implicit, CollectionStyle.block); + } + + if(scanner_.front.id == TokenID.scalar) + { + auto token = scanner_.front; + scanner_.popFront(); + auto value = token.style == ScalarStyle.doubleQuoted + ? handleDoubleQuotedScalarEscapes(token.value) + : cast(string)token.value; + + implicit = (token.style == ScalarStyle.plain && tag is null) || tag == "!"; + state_ = popState(); + return scalarEvent(startMark, token.endMark, anchor, tag, + implicit, value, token.style); + } + + if(scanner_.front.id == TokenID.flowSequenceStart) + { + endMark = scanner_.front.endMark; + state_ = &parseFlowSequenceEntry!(Yes.first); + return sequenceStartEvent(startMark, endMark, anchor, tag, + implicit, CollectionStyle.flow); + } + + if(scanner_.front.id == TokenID.flowMappingStart) + { + endMark = scanner_.front.endMark; + state_ = &parseFlowMappingKey!(Yes.first); + return mappingStartEvent(startMark, endMark, anchor, tag, + implicit, CollectionStyle.flow); + } + + if(block && scanner_.front.id == TokenID.blockSequenceStart) + { + endMark = scanner_.front.endMark; + state_ = &parseBlockSequenceEntry!(Yes.first); + return sequenceStartEvent(startMark, endMark, anchor, tag, + implicit, CollectionStyle.block); + } + + if(block && scanner_.front.id == TokenID.blockMappingStart) + { + endMark = scanner_.front.endMark; + state_ = &parseBlockMappingKey!(Yes.first); + return mappingStartEvent(startMark, endMark, anchor, tag, + implicit, CollectionStyle.block); + } + + if(anchor !is null || tag !is null) + { + state_ = popState(); + + //PyYAML uses a tuple(implicit, false) for the second last arg here, + //but the second bool is never used after that - so we don't use it. + + //Empty scalars are allowed even if a tag or an anchor is specified. + return scalarEvent(startMark, endMark, anchor, tag, + implicit , ""); + } + + const token = scanner_.front; + throw new ParserException("While parsing a " ~ (block ? "block" : "flow") ~ " node", + startMark, "expected node content, but found: " + ~ token.idString, token.startMark); + } + + /// Handle escape sequences in a double quoted scalar. + /// + /// Moved here from scanner as it can't always be done in-place with slices. + string handleDoubleQuotedScalarEscapes(const(char)[] tokenValue) const @safe + { + string notInPlace; + bool inEscape; + auto appender = appender!(string)(); + for(const(char)[] oldValue = tokenValue; !oldValue.empty();) + { + const dchar c = oldValue.front(); + oldValue.popFront(); + + if(!inEscape) + { + if(c != '\\') + { + if(notInPlace is null) { appender.put(c); } + else { notInPlace ~= c; } + continue; + } + // Escape sequence starts with a '\' + inEscape = true; + continue; + } + + import dub.internal.dyaml.escapes; + scope(exit) { inEscape = false; } + + // 'Normal' escape sequence. + if(c.among!(escapes)) + { + if(notInPlace is null) + { + // \L and \C can't be handled in place as the expand into + // many-byte unicode chars + if(c != 'L' && c != 'P') + { + appender.put(dub.internal.dyaml.escapes.fromEscape(c)); + continue; + } + // Need to duplicate as we won't fit into + // token.value - which is what appender uses + notInPlace = appender.data.dup; + notInPlace ~= dub.internal.dyaml.escapes.fromEscape(c); + continue; + } + notInPlace ~= dub.internal.dyaml.escapes.fromEscape(c); + continue; + } + + // Unicode char written in hexadecimal in an escape sequence. + if(c.among!(escapeHexCodeList)) + { + // Scanner has already checked that the hex string is valid. + + const hexLength = dub.internal.dyaml.escapes.escapeHexLength(c); + // Any hex digits are 1-byte so this works. + const(char)[] hex = oldValue[0 .. hexLength]; + oldValue = oldValue[hexLength .. $]; + import std.ascii : isHexDigit; + assert(!hex.canFind!(d => !d.isHexDigit), + "Scanner must ensure the hex string is valid"); + + const decoded = cast(dchar)parse!int(hex, 16u); + if(notInPlace is null) { appender.put(decoded); } + else { notInPlace ~= decoded; } + continue; + } + + assert(false, "Scanner must handle unsupported escapes"); + } + + return notInPlace is null ? appender.data : notInPlace; + } + + /** + * Process a tag string retrieved from a tag token. + * + * Params: tag = Tag before processing. + * handleEnd = Index in tag where tag handle ends and tag suffix + * starts. + * startMark = Position of the node the tag belongs to. + * tagMark = Position of the tag. + */ + string processTag(const string tag, const uint handleEnd, + const Mark startMark, const Mark tagMark) + const @safe + { + const handle = tag[0 .. handleEnd]; + const suffix = tag[handleEnd .. $]; + + if(handle.length > 0) + { + string replacement; + foreach(ref pair; tagDirectives_) + { + if(pair.handle == handle) + { + replacement = pair.prefix; + break; + } + } + //handle must be in tagDirectives_ + enforce(replacement !is null, + new ParserException("While parsing a node", startMark, + "found undefined tag handle: " ~ handle, tagMark)); + return replacement ~ suffix; + } + return suffix; + } + + ///Wrappers to parse nodes. + Event parseBlockNode() @safe {return parseNode(Yes.block);} + Event parseFlowNode() @safe {return parseNode(No.block);} + Event parseBlockNodeOrIndentlessSequence() @safe {return parseNode(Yes.block, Yes.indentlessSequence);} + + ///block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END + + ///Parse an entry of a block sequence. If first is true, this is the first entry. + Event parseBlockSequenceEntry(Flag!"first" first)() @safe + { + static if(first) + { + pushMark(scanner_.front.startMark); + scanner_.popFront(); + } + + if(scanner_.front.id == TokenID.blockEntry) + { + const token = scanner_.front; + scanner_.popFront(); + if(!scanner_.front.id.among!(TokenID.blockEntry, TokenID.blockEnd)) + { + pushState(&parseBlockSequenceEntry!(No.first)); + return parseBlockNode(); + } + + state_ = &parseBlockSequenceEntry!(No.first); + return processEmptyScalar(token.endMark); + } + + if(scanner_.front.id != TokenID.blockEnd) + { + const token = scanner_.front; + throw new ParserException("While parsing a block collection", marks_.data.back, + "expected block end, but found " ~ token.idString, + token.startMark); + } + + state_ = popState(); + popMark(); + const token = scanner_.front; + scanner_.popFront(); + return sequenceEndEvent(token.startMark, token.endMark); + } + + ///indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + + ///Parse an entry of an indentless sequence. + Event parseIndentlessSequenceEntry() @safe + { + if(scanner_.front.id == TokenID.blockEntry) + { + const token = scanner_.front; + scanner_.popFront(); + + if(!scanner_.front.id.among!(TokenID.blockEntry, TokenID.key, + TokenID.value, TokenID.blockEnd)) + { + pushState(&parseIndentlessSequenceEntry); + return parseBlockNode(); + } + + state_ = &parseIndentlessSequenceEntry; + return processEmptyScalar(token.endMark); + } + + state_ = popState(); + const token = scanner_.front; + return sequenceEndEvent(token.startMark, token.endMark); + } + + /** + * block_mapping ::= BLOCK-MAPPING_START + * ((KEY block_node_or_indentless_sequence?)? + * (VALUE block_node_or_indentless_sequence?)?)* + * BLOCK-END + */ + + ///Parse a key in a block mapping. If first is true, this is the first key. + Event parseBlockMappingKey(Flag!"first" first)() @safe + { + static if(first) + { + pushMark(scanner_.front.startMark); + scanner_.popFront(); + } + + if(scanner_.front.id == TokenID.key) + { + const token = scanner_.front; + scanner_.popFront(); + + if(!scanner_.front.id.among!(TokenID.key, TokenID.value, TokenID.blockEnd)) + { + pushState(&parseBlockMappingValue); + return parseBlockNodeOrIndentlessSequence(); + } + + state_ = &parseBlockMappingValue; + return processEmptyScalar(token.endMark); + } + + if(scanner_.front.id != TokenID.blockEnd) + { + const token = scanner_.front; + throw new ParserException("While parsing a block mapping", marks_.data.back, + "expected block end, but found: " ~ token.idString, + token.startMark); + } + + state_ = popState(); + popMark(); + const token = scanner_.front; + scanner_.popFront(); + return mappingEndEvent(token.startMark, token.endMark); + } + + ///Parse a value in a block mapping. + Event parseBlockMappingValue() @safe + { + if(scanner_.front.id == TokenID.value) + { + const token = scanner_.front; + scanner_.popFront(); + + if(!scanner_.front.id.among!(TokenID.key, TokenID.value, TokenID.blockEnd)) + { + pushState(&parseBlockMappingKey!(No.first)); + return parseBlockNodeOrIndentlessSequence(); + } + + state_ = &parseBlockMappingKey!(No.first); + return processEmptyScalar(token.endMark); + } + + state_= &parseBlockMappingKey!(No.first); + return processEmptyScalar(scanner_.front.startMark); + } + + /** + * flow_sequence ::= FLOW-SEQUENCE-START + * (flow_sequence_entry FLOW-ENTRY)* + * flow_sequence_entry? + * FLOW-SEQUENCE-END + * flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + * + * Note that while production rules for both flow_sequence_entry and + * flow_mapping_entry are equal, their interpretations are different. + * For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` + * generate an inline mapping (set syntax). + */ + + ///Parse an entry in a flow sequence. If first is true, this is the first entry. + Event parseFlowSequenceEntry(Flag!"first" first)() @safe + { + static if(first) + { + pushMark(scanner_.front.startMark); + scanner_.popFront(); + } + + if(scanner_.front.id != TokenID.flowSequenceEnd) + { + static if(!first) + { + if(scanner_.front.id == TokenID.flowEntry) + { + scanner_.popFront(); + } + else + { + const token = scanner_.front; + throw new ParserException("While parsing a flow sequence", marks_.data.back, + "expected ',' or ']', but got: " ~ + token.idString, token.startMark); + } + } + + if(scanner_.front.id == TokenID.key) + { + const token = scanner_.front; + state_ = &parseFlowSequenceEntryMappingKey; + return mappingStartEvent(token.startMark, token.endMark, + null, null, true, CollectionStyle.flow); + } + else if(scanner_.front.id != TokenID.flowSequenceEnd) + { + pushState(&parseFlowSequenceEntry!(No.first)); + return parseFlowNode(); + } + } + + const token = scanner_.front; + scanner_.popFront(); + state_ = popState(); + popMark(); + return sequenceEndEvent(token.startMark, token.endMark); + } + + ///Parse a key in flow context. + Event parseFlowKey(Event delegate() @safe nextState) @safe + { + const token = scanner_.front; + scanner_.popFront(); + + if(!scanner_.front.id.among!(TokenID.value, TokenID.flowEntry, + TokenID.flowSequenceEnd)) + { + pushState(nextState); + return parseFlowNode(); + } + + state_ = nextState; + return processEmptyScalar(token.endMark); + } + + ///Parse a mapping key in an entry in a flow sequence. + Event parseFlowSequenceEntryMappingKey() @safe + { + return parseFlowKey(&parseFlowSequenceEntryMappingValue); + } + + ///Parse a mapping value in a flow context. + Event parseFlowValue(TokenID checkId, Event delegate() @safe nextState) + @safe + { + if(scanner_.front.id == TokenID.value) + { + const token = scanner_.front; + scanner_.popFront(); + if(!scanner_.front.id.among(TokenID.flowEntry, checkId)) + { + pushState(nextState); + return parseFlowNode(); + } + + state_ = nextState; + return processEmptyScalar(token.endMark); + } + + state_ = nextState; + return processEmptyScalar(scanner_.front.startMark); + } + + ///Parse a mapping value in an entry in a flow sequence. + Event parseFlowSequenceEntryMappingValue() @safe + { + return parseFlowValue(TokenID.flowSequenceEnd, + &parseFlowSequenceEntryMappingEnd); + } + + ///Parse end of a mapping in a flow sequence entry. + Event parseFlowSequenceEntryMappingEnd() @safe + { + state_ = &parseFlowSequenceEntry!(No.first); + const token = scanner_.front; + return mappingEndEvent(token.startMark, token.startMark); + } + + /** + * flow_mapping ::= FLOW-MAPPING-START + * (flow_mapping_entry FLOW-ENTRY)* + * flow_mapping_entry? + * FLOW-MAPPING-END + * flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + */ + + ///Parse a key in a flow mapping. + Event parseFlowMappingKey(Flag!"first" first)() @safe + { + static if(first) + { + pushMark(scanner_.front.startMark); + scanner_.popFront(); + } + + if(scanner_.front.id != TokenID.flowMappingEnd) + { + static if(!first) + { + if(scanner_.front.id == TokenID.flowEntry) + { + scanner_.popFront(); + } + else + { + const token = scanner_.front; + throw new ParserException("While parsing a flow mapping", marks_.data.back, + "expected ',' or '}', but got: " ~ + token.idString, token.startMark); + } + } + + if(scanner_.front.id == TokenID.key) + { + return parseFlowKey(&parseFlowMappingValue); + } + + if(scanner_.front.id != TokenID.flowMappingEnd) + { + pushState(&parseFlowMappingEmptyValue); + return parseFlowNode(); + } + } + + const token = scanner_.front; + scanner_.popFront(); + state_ = popState(); + popMark(); + return mappingEndEvent(token.startMark, token.endMark); + } + + ///Parse a value in a flow mapping. + Event parseFlowMappingValue() @safe + { + return parseFlowValue(TokenID.flowMappingEnd, &parseFlowMappingKey!(No.first)); + } + + ///Parse an empty value in a flow mapping. + Event parseFlowMappingEmptyValue() @safe + { + state_ = &parseFlowMappingKey!(No.first); + return processEmptyScalar(scanner_.front.startMark); + } + + ///Return an empty scalar. + Event processEmptyScalar(const Mark mark) @safe pure nothrow const @nogc + { + return scalarEvent(mark, mark, null, null, true, ""); + } +} diff --git a/source/dub/internal/dyaml/queue.d b/source/dub/internal/dyaml/queue.d new file mode 100644 index 0000000..269065c --- /dev/null +++ b/source/dub/internal/dyaml/queue.d @@ -0,0 +1,272 @@ + +// Copyright Ferdinand Majerech 2011-2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module dub.internal.dyaml.queue; + + +import std.traits : hasMember, hasIndirections; + +package: + +/// Simple queue implemented as a singly linked list with a tail pointer. +/// +/// Needed in some D:YAML code that needs a queue-like structure without too much +/// reallocation that goes with an array. +/// +/// Allocations are non-GC and are damped by a free-list based on the nodes +/// that are removed. Note that elements lifetime must be managed +/// outside. +struct Queue(T) +if (!hasMember!(T, "__xdtor")) +{ + +private: + + // Linked list node containing one element and pointer to the next node. + struct Node + { + T payload_; + Node* next_; + } + + // Start of the linked list - first element added in time (end of the queue). + Node* first_; + // Last element of the linked list - last element added in time (start of the queue). + Node* last_; + // free-list + Node* stock; + + // Length of the queue. + size_t length_; + + // allocate a new node or recycle one from the stock. + Node* makeNewNode(T thePayload, Node* theNext = null) @trusted nothrow @nogc + { + import std.experimental.allocator : make; + import std.experimental.allocator.mallocator : Mallocator; + + Node* result; + if (stock !is null) + { + result = stock; + stock = result.next_; + result.payload_ = thePayload; + result.next_ = theNext; + } + else + { + result = Mallocator.instance.make!(Node)(thePayload, theNext); + // GC can dispose T managed member if it thinks they are no used... + static if (hasIndirections!T) + { + import core.memory : GC; + GC.addRange(result, Node.sizeof); + } + } + return result; + } + + // free the stock of available free nodes. + void freeStock() @trusted @nogc nothrow + { + import std.experimental.allocator.mallocator : Mallocator; + + while (stock !is null) + { + Node* toFree = stock; + stock = stock.next_; + static if (hasIndirections!T) + { + import core.memory : GC; + GC.removeRange(toFree); + } + Mallocator.instance.deallocate((cast(ubyte*) toFree)[0 .. Node.sizeof]); + } + } + +public: + + @disable void opAssign(ref Queue); + @disable bool opEquals(ref Queue); + @disable int opCmp(ref Queue); + + this(this) @safe nothrow @nogc + { + auto node = first_; + first_ = null; + last_ = null; + while (node !is null) + { + Node* newLast = makeNewNode(node.payload_); + if (last_ !is null) + last_.next_ = newLast; + if (first_ is null) + first_ = newLast; + last_ = newLast; + node = node.next_; + } + } + + ~this() @safe nothrow @nogc + { + freeStock(); + stock = first_; + freeStock(); + } + + /// Returns a forward range iterating over this queue. + auto range() @safe pure nothrow @nogc + { + static struct Result + { + private Node* cursor; + + void popFront() @safe pure nothrow @nogc + { + cursor = cursor.next_; + } + ref T front() @safe pure nothrow @nogc + in(cursor !is null) + { + return cursor.payload_; + } + bool empty() @safe pure nothrow @nogc const + { + return cursor is null; + } + } + return Result(first_); + } + + /// Push a new item to the queue. + void push(T item) @nogc @safe nothrow + { + Node* newLast = makeNewNode(item); + if (last_ !is null) + last_.next_ = newLast; + if (first_ is null) + first_ = newLast; + last_ = newLast; + ++length_; + } + + /// Insert a new item putting it to specified index in the linked list. + void insert(T item, const size_t idx) @safe nothrow + in + { + assert(idx <= length_); + } + do + { + if (idx == 0) + { + first_ = makeNewNode(item, first_); + ++length_; + } + // Adding before last added element, so we can just push. + else if (idx == length_) + { + push(item); + } + else + { + // Get the element before one we're inserting. + Node* current = first_; + foreach (i; 1 .. idx) + current = current.next_; + + assert(current); + // Insert a new node after current, and put current.next_ behind it. + current.next_ = makeNewNode(item, current.next_); + ++length_; + } + } + + /// Returns: The next element in the queue and remove it. + T pop() @safe nothrow + in + { + assert(!empty, "Trying to pop an element from an empty queue"); + } + do + { + T result = peek(); + + Node* oldStock = stock; + Node* old = first_; + first_ = first_.next_; + + // start the stock from the popped element + stock = old; + old.next_ = null; + // add the existing "old" stock to the new first stock element + if (oldStock !is null) + stock.next_ = oldStock; + + if (--length_ == 0) + { + assert(first_ is null); + last_ = null; + } + + return result; + } + + /// Returns: The next element in the queue. + ref inout(T) peek() @safe pure nothrow inout @nogc + in + { + assert(!empty, "Trying to peek at an element in an empty queue"); + } + do + { + return first_.payload_; + } + + /// Returns: true of the queue empty, false otherwise. + bool empty() @safe pure nothrow const @nogc + { + return first_ is null; + } + + /// Returns: The number of elements in the queue. + size_t length() @safe pure nothrow const @nogc + { + return length_; + } +} + +@safe nothrow unittest +{ + auto queue = Queue!int(); + assert(queue.empty); + foreach (i; 0 .. 65) + { + queue.push(5); + assert(queue.pop() == 5); + assert(queue.empty); + assert(queue.length_ == 0); + } + + int[] array = [1, -1, 2, -2, 3, -3, 4, -4, 5, -5]; + foreach (i; array) + { + queue.push(i); + } + + array = 42 ~ array[0 .. 3] ~ 42 ~ array[3 .. $] ~ 42; + queue.insert(42, 3); + queue.insert(42, 0); + queue.insert(42, queue.length); + + int[] array2; + while (!queue.empty) + { + array2 ~= queue.pop(); + } + + assert(array == array2); +} diff --git a/source/dub/internal/dyaml/reader.d b/source/dub/internal/dyaml/reader.d new file mode 100644 index 0000000..c294ab0 --- /dev/null +++ b/source/dub/internal/dyaml/reader.d @@ -0,0 +1,909 @@ + +// Copyright Ferdinand Majerech 2011-2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module dub.internal.dyaml.reader; + + +import core.stdc.stdlib; +import core.stdc.string; +import core.thread; + +import std.algorithm; +import std.array; +import std.conv; +import std.exception; +import std.range; +import std.string; +import std.system; +import std.typecons; +import std.utf; + +import dub.internal.tinyendian; + +import dub.internal.dyaml.encoding; +import dub.internal.dyaml.exception; + +alias isBreak = among!('\n', '\u0085', '\u2028', '\u2029'); + +package: + + +///Exception thrown at Reader errors. +class ReaderException : YAMLException +{ + this(string msg, string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow + { + super("Reader error: " ~ msg, file, line); + } +} + +/// Provides an API to read characters from a UTF-8 buffer and build slices into that +/// buffer to avoid allocations (see SliceBuilder). +final class Reader +{ + private: + // Buffer of currently loaded characters. + char[] buffer_; + + // Current position within buffer. Only data after this position can be read. + size_t bufferOffset_; + + // Index of the current character in the buffer. + size_t charIndex_; + // Number of characters (code points) in buffer_. + size_t characterCount_; + + // File name + string name_; + // Current line in file. + uint line_; + // Current column in file. + uint column_; + + // Original Unicode encoding of the data. + Encoding encoding_; + + version(unittest) + { + // Endianness of the input before it was converted (for testing) + Endian endian_; + } + + // The number of consecutive ASCII characters starting at bufferOffset_. + // + // Used to minimize UTF-8 decoding. + size_t upcomingASCII_; + + // Index to buffer_ where the last decoded character starts. + size_t lastDecodedBufferOffset_; + // Offset, relative to charIndex_, of the last decoded character, + // in code points, not chars. + size_t lastDecodedCharOffset_; + + public: + /// Construct a Reader. + /// + /// Params: buffer = Buffer with YAML data. This may be e.g. the entire + /// contents of a file or a string. $(B will) be modified by + /// the Reader and other parts of D:YAML (D:YAML tries to + /// reuse the buffer to minimize memory allocations) + /// name = File name if the buffer is the contents of a file or + /// `""` if the buffer is the contents of a string. + /// + /// Throws: ReaderException on a UTF decoding error or if there are + /// nonprintable Unicode characters illegal in YAML. + this(ubyte[] buffer, string name = "") @safe pure + { + name_ = name; + auto endianResult = fixUTFByteOrder(buffer); + if(endianResult.bytesStripped > 0) + { + throw new ReaderException("Size of UTF-16 or UTF-32 input not aligned " ~ + "to 2 or 4 bytes, respectively"); + } + + version(unittest) { endian_ = endianResult.endian; } + encoding_ = endianResult.encoding; + + auto utf8Result = toUTF8(endianResult.array, endianResult.encoding); + const msg = utf8Result.errorMessage; + if(msg !is null) + { + throw new ReaderException("Error when converting to UTF-8: " ~ msg); + } + + buffer_ = utf8Result.utf8; + + characterCount_ = utf8Result.characterCount; + // Check that all characters in buffer are printable. + enforce(isPrintableValidUTF8(buffer_), + new ReaderException("Special unicode characters are not allowed")); + + this.sliceBuilder = SliceBuilder(this); + checkASCII(); + } + + /// Get character at specified index relative to current position. + /// + /// Params: index = Index of the character to get relative to current position + /// in the buffer. Can point outside of the buffer; In that + /// case, '\0' will be returned. + /// + /// Returns: Character at specified position or '\0' if outside of the buffer. + /// + // XXX removed; search for 'risky' to find why. + // Throws: ReaderException if trying to read past the end of the buffer. + dchar peek(const size_t index) @safe pure + { + if(index < upcomingASCII_) { return buffer_[bufferOffset_ + index]; } + if(characterCount_ <= charIndex_ + index) + { + // XXX This is risky; revert this if bugs are introduced. We rely on + // the assumption that Reader only uses peek() to detect end of buffer. + // The test suite passes. + // Revert this case here and in other peek() versions if this causes + // errors. + // throw new ReaderException("Trying to read past the end of the buffer"); + return '\0'; + } + + // Optimized path for Scanner code that peeks chars in linear order to + // determine the length of some sequence. + if(index == lastDecodedCharOffset_) + { + ++lastDecodedCharOffset_; + const char b = buffer_[lastDecodedBufferOffset_]; + // ASCII + if(b < 0x80) + { + ++lastDecodedBufferOffset_; + return b; + } + return decode(buffer_, lastDecodedBufferOffset_); + } + + // 'Slow' path where we decode everything up to the requested character. + const asciiToTake = min(upcomingASCII_, index); + lastDecodedCharOffset_ = asciiToTake; + lastDecodedBufferOffset_ = bufferOffset_ + asciiToTake; + dchar d; + while(lastDecodedCharOffset_ <= index) + { + d = decodeNext(); + } + + return d; + } + + /// Optimized version of peek() for the case where peek index is 0. + dchar peek() @safe pure + { + if(upcomingASCII_ > 0) { return buffer_[bufferOffset_]; } + if(characterCount_ <= charIndex_) { return '\0'; } + + lastDecodedCharOffset_ = 0; + lastDecodedBufferOffset_ = bufferOffset_; + return decodeNext(); + } + + /// Get byte at specified index relative to current position. + /// + /// Params: index = Index of the byte to get relative to current position + /// in the buffer. Can point outside of the buffer; In that + /// case, '\0' will be returned. + /// + /// Returns: Byte at specified position or '\0' if outside of the buffer. + char peekByte(const size_t index) @safe pure nothrow @nogc + { + return characterCount_ > (charIndex_ + index) ? buffer_[bufferOffset_ + index] : '\0'; + } + + /// Optimized version of peekByte() for the case where peek byte index is 0. + char peekByte() @safe pure nothrow @nogc + { + return characterCount_ > charIndex_ ? buffer_[bufferOffset_] : '\0'; + } + + + /// Get specified number of characters starting at current position. + /// + /// Note: This gets only a "view" into the internal buffer, which will be + /// invalidated after other Reader calls. Use SliceBuilder to build slices + /// for permanent use. + /// + /// Params: length = Number of characters (code points, not bytes) to get. May + /// reach past the end of the buffer; in that case the returned + /// slice will be shorter. + /// + /// Returns: Characters starting at current position or an empty slice if out of bounds. + char[] prefix(const size_t length) @safe pure + { + return slice(length); + } + + /// Get specified number of bytes, not code points, starting at current position. + /// + /// Note: This gets only a "view" into the internal buffer, which will be + /// invalidated after other Reader calls. Use SliceBuilder to build slices + /// for permanent use. + /// + /// Params: length = Number bytes (not code points) to get. May NOT reach past + /// the end of the buffer; should be used with peek() to avoid + /// this. + /// + /// Returns: Bytes starting at current position. + char[] prefixBytes(const size_t length) @safe pure nothrow @nogc + in(length == 0 || bufferOffset_ + length <= buffer_.length, "prefixBytes out of bounds") + { + return buffer_[bufferOffset_ .. bufferOffset_ + length]; + } + + /// Get a slice view of the internal buffer, starting at the current position. + /// + /// Note: This gets only a "view" into the internal buffer, + /// which get invalidated after other Reader calls. + /// + /// Params: end = End of the slice relative to current position. May reach past + /// the end of the buffer; in that case the returned slice will + /// be shorter. + /// + /// Returns: Slice into the internal buffer or an empty slice if out of bounds. + char[] slice(const size_t end) @safe pure + { + // Fast path in case the caller has already peek()ed all the way to end. + if(end == lastDecodedCharOffset_) + { + return buffer_[bufferOffset_ .. lastDecodedBufferOffset_]; + } + + const asciiToTake = min(upcomingASCII_, end, buffer_.length); + lastDecodedCharOffset_ = asciiToTake; + lastDecodedBufferOffset_ = bufferOffset_ + asciiToTake; + + // 'Slow' path - decode everything up to end. + while(lastDecodedCharOffset_ < end && + lastDecodedBufferOffset_ < buffer_.length) + { + decodeNext(); + } + + return buffer_[bufferOffset_ .. lastDecodedBufferOffset_]; + } + + /// Get the next character, moving buffer position beyond it. + /// + /// Returns: Next character. + /// + /// Throws: ReaderException if trying to read past the end of the buffer + /// or if invalid data is read. + dchar get() @safe pure + { + const result = peek(); + forward(); + return result; + } + + /// Get specified number of characters, moving buffer position beyond them. + /// + /// Params: length = Number or characters (code points, not bytes) to get. + /// + /// Returns: Characters starting at current position. + char[] get(const size_t length) @safe pure + { + auto result = slice(length); + forward(length); + return result; + } + + /// Move current position forward. + /// + /// Params: length = Number of characters to move position forward. + void forward(size_t length) @safe pure + { + while(length > 0) + { + auto asciiToTake = min(upcomingASCII_, length); + charIndex_ += asciiToTake; + length -= asciiToTake; + upcomingASCII_ -= asciiToTake; + + for(; asciiToTake > 0; --asciiToTake) + { + const c = buffer_[bufferOffset_++]; + // c is ASCII, do we only need to check for ASCII line breaks. + if(c == '\n' || (c == '\r' && buffer_[bufferOffset_] != '\n')) + { + ++line_; + column_ = 0; + continue; + } + ++column_; + } + + // If we have used up all upcoming ASCII chars, the next char is + // non-ASCII even after this returns, so upcomingASCII_ doesn't need to + // be updated - it's zero. + if(length == 0) { break; } + + assert(upcomingASCII_ == 0, + "Running unicode handling code but we haven't run out of ASCII chars"); + assert(bufferOffset_ < buffer_.length, + "Attempted to decode past the end of YAML buffer"); + assert(buffer_[bufferOffset_] >= 0x80, + "ASCII must be handled by preceding code"); + + ++charIndex_; + const c = decode(buffer_, bufferOffset_); + + // New line. (can compare with '\n' without decoding since it's ASCII) + if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n')) + { + ++line_; + column_ = 0; + } + else if(c != '\uFEFF') { ++column_; } + --length; + checkASCII(); + } + + lastDecodedBufferOffset_ = bufferOffset_; + lastDecodedCharOffset_ = 0; + } + + /// Move current position forward by one character. + void forward() @safe pure + { + ++charIndex_; + lastDecodedBufferOffset_ = bufferOffset_; + lastDecodedCharOffset_ = 0; + + // ASCII + if(upcomingASCII_ > 0) + { + --upcomingASCII_; + const c = buffer_[bufferOffset_++]; + + if(c == '\n' || (c == '\r' && buffer_[bufferOffset_] != '\n')) + { + ++line_; + column_ = 0; + return; + } + ++column_; + return; + } + + // UTF-8 + assert(bufferOffset_ < buffer_.length, + "Attempted to decode past the end of YAML buffer"); + assert(buffer_[bufferOffset_] >= 0x80, + "ASCII must be handled by preceding code"); + + const c = decode(buffer_, bufferOffset_); + + // New line. (can compare with '\n' without decoding since it's ASCII) + if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n')) + { + ++line_; + column_ = 0; + } + else if(c != '\uFEFF') { ++column_; } + + checkASCII(); + } + + /// Used to build slices of read data in Reader; to avoid allocations. + SliceBuilder sliceBuilder; + + /// Get a string describing current buffer position, used for error messages. + Mark mark() const pure nothrow @nogc @safe { return Mark(name_, line_, column_); } + + /// Get file name. + string name() const @safe pure nothrow @nogc { return name_; } + + /// Set file name. + void name(string name) pure @safe nothrow @nogc { name_ = name; } + + /// Get current line number. + uint line() const @safe pure nothrow @nogc { return line_; } + + /// Get current column number. + uint column() const @safe pure nothrow @nogc { return column_; } + + /// Get index of the current character in the buffer. + size_t charIndex() const @safe pure nothrow @nogc { return charIndex_; } + + /// Get encoding of the input buffer. + Encoding encoding() const @safe pure nothrow @nogc { return encoding_; } + +private: + // Update upcomingASCII_ (should be called forward()ing over a UTF-8 sequence) + void checkASCII() @safe pure nothrow @nogc + { + upcomingASCII_ = countASCII(buffer_[bufferOffset_ .. $]); + } + + // Decode the next character relative to + // lastDecodedCharOffset_/lastDecodedBufferOffset_ and update them. + // + // Does not advance the buffer position. Used in peek() and slice(). + dchar decodeNext() @safe pure + { + assert(lastDecodedBufferOffset_ < buffer_.length, + "Attempted to decode past the end of YAML buffer"); + const char b = buffer_[lastDecodedBufferOffset_]; + ++lastDecodedCharOffset_; + // ASCII + if(b < 0x80) + { + ++lastDecodedBufferOffset_; + return b; + } + + return decode(buffer_, lastDecodedBufferOffset_); + } +} + +/// Used to build slices of already read data in Reader buffer, avoiding allocations. +/// +/// Usually these slices point to unchanged Reader data, but sometimes the data is +/// changed due to how YAML interprets certain characters/strings. +/// +/// See begin() documentation. +struct SliceBuilder +{ +private: + // No copying by the user. + @disable this(this); + @disable void opAssign(ref SliceBuilder); + + // Reader this builder works in. + Reader reader_; + + // Start of the slice om reader_.buffer_ (size_t.max while no slice being build) + size_t start_ = size_t.max; + // End of the slice om reader_.buffer_ (size_t.max while no slice being build) + size_t end_ = size_t.max; + + // Stack of slice ends to revert to (see Transaction) + // + // Very few levels as we don't want arbitrarily nested transactions. + size_t[4] endStack_; + // The number of elements currently in endStack_. + size_t endStackUsed_; + + @safe const pure nothrow @nogc invariant() + { + if(!inProgress) { return; } + assert(end_ <= reader_.bufferOffset_, "Slice ends after buffer position"); + assert(start_ <= end_, "Slice start after slice end"); + } + + // Is a slice currently being built? + bool inProgress() @safe const pure nothrow @nogc + in(start_ == size_t.max ? end_ == size_t.max : end_ != size_t.max, "start_/end_ are not consistent") + { + return start_ != size_t.max; + } + +public: + /// Begin building a slice. + /// + /// Only one slice can be built at any given time; before beginning a new slice, + /// finish the previous one (if any). + /// + /// The slice starts at the current position in the Reader buffer. It can only be + /// extended up to the current position in the buffer; Reader methods get() and + /// forward() move the position. E.g. it is valid to extend a slice by write()-ing + /// a string just returned by get() - but not one returned by prefix() unless the + /// position has changed since the prefix() call. + void begin() @safe pure nothrow @nogc + in(!inProgress, "Beginning a slice while another slice is being built") + in(endStackUsed_ == 0, "Slice stack not empty at slice begin") + { + + start_ = reader_.bufferOffset_; + end_ = reader_.bufferOffset_; + } + + /// Finish building a slice and return it. + /// + /// Any Transactions on the slice must be committed or destroyed before the slice + /// is finished. + /// + /// Returns a string; once a slice is finished it is definitive that its contents + /// will not be changed. + char[] finish() @safe pure nothrow @nogc + in(inProgress, "finish called without begin") + in(endStackUsed_ == 0, "Finishing a slice with running transactions.") + { + + auto result = reader_.buffer_[start_ .. end_]; + start_ = end_ = size_t.max; + return result; + } + + /// Write a string to the slice being built. + /// + /// Data can only be written up to the current position in the Reader buffer. + /// + /// If str is a string returned by a Reader method, and str starts right after the + /// end of the slice being built, the slice is extended (trivial operation). + /// + /// See_Also: begin + void write(scope char[] str) @safe pure nothrow @nogc + { + assert(inProgress, "write called without begin"); + assert(end_ <= reader_.bufferOffset_, + "AT START: Slice ends after buffer position"); + + // Nothing? Already done. + if (str.length == 0) { return; } + // If str starts at the end of the slice (is a string returned by a Reader + // method), just extend the slice to contain str. + if(&str[0] == &reader_.buffer_[end_]) + { + end_ += str.length; + } + // Even if str does not start at the end of the slice, it still may be returned + // by a Reader method and point to buffer. So we need to memmove. + else + { + copy(str, reader_.buffer_[end_..end_ + str.length * char.sizeof]); + end_ += str.length; + } + } + + /// Write a character to the slice being built. + /// + /// Data can only be written up to the current position in the Reader buffer. + /// + /// See_Also: begin + void write(dchar c) @safe pure + in(inProgress, "write called without begin") + { + if(c < 0x80) + { + reader_.buffer_[end_++] = cast(char)c; + return; + } + + // We need to encode a non-ASCII dchar into UTF-8 + char[4] encodeBuf; + const bytes = encode(encodeBuf, c); + reader_.buffer_[end_ .. end_ + bytes] = encodeBuf[0 .. bytes]; + end_ += bytes; + } + + /// Insert a character to a specified position in the slice. + /// + /// Enlarges the slice by 1 char. Note that the slice can only extend up to the + /// current position in the Reader buffer. + /// + /// Params: + /// + /// c = The character to insert. + /// position = Position to insert the character at in code units, not code points. + /// Must be less than slice length(); a previously returned length() + /// can be used. + void insert(const dchar c, const size_t position) @safe pure + in(inProgress, "insert called without begin") + in(start_ + position <= end_, "Trying to insert after the end of the slice") + { + + const point = start_ + position; + const movedLength = end_ - point; + + // Encode c into UTF-8 + char[4] encodeBuf; + if(c < 0x80) { encodeBuf[0] = cast(char)c; } + const size_t bytes = c < 0x80 ? 1 : encode(encodeBuf, c); + + if(movedLength > 0) + { + copy(reader_.buffer_[point..point + movedLength * char.sizeof], + reader_.buffer_[point + bytes..point + bytes + movedLength * char.sizeof]); + } + reader_.buffer_[point .. point + bytes] = encodeBuf[0 .. bytes]; + end_ += bytes; + } + + /// Get the current length of the slice. + size_t length() @safe const pure nothrow @nogc + { + return end_ - start_; + } + + /// A slice building transaction. + /// + /// Can be used to save and revert back to slice state. + struct Transaction + { + private: + // The slice builder affected by the transaction. + SliceBuilder* builder_; + // Index of the return point of the transaction in StringBuilder.endStack_. + size_t stackLevel_; + // True after commit() has been called. + bool committed_; + + public: + /// Begins a transaction on a SliceBuilder object. + /// + /// The transaction must end $(B after) any transactions created within the + /// transaction but $(B before) the slice is finish()-ed. A transaction can be + /// ended either by commit()-ing or reverting through the destructor. + /// + /// Saves the current state of a slice. + this(SliceBuilder* builder) @safe pure nothrow @nogc + { + builder_ = builder; + stackLevel_ = builder_.endStackUsed_; + builder_.push(); + } + + /// Commit changes to the slice. + /// + /// Ends the transaction - can only be called once, and removes the possibility + /// to revert slice state. + /// + /// Does nothing for a default-initialized transaction (the transaction has not + /// been started yet). + void commit() @safe pure nothrow @nogc + in(!committed_, "Can't commit a transaction more than once") + { + + if(builder_ is null) { return; } + assert(builder_.endStackUsed_ == stackLevel_ + 1, + "Parent transactions don't fully contain child transactions"); + builder_.apply(); + committed_ = true; + } + + /// Destroy the transaction and revert it if it hasn't been committed yet. + void end() @safe pure nothrow @nogc + in(builder_ && builder_.endStackUsed_ == stackLevel_ + 1, "Parent transactions don't fully contain child transactions") + { + builder_.pop(); + builder_ = null; + } + + } + +private: + // Push the current end of the slice so we can revert to it if needed. + // + // Used by Transaction. + void push() @safe pure nothrow @nogc + in(inProgress, "push called without begin") + in(endStackUsed_ < endStack_.length, "Slice stack overflow") + { + endStack_[endStackUsed_++] = end_; + } + + // Pop the current end of endStack_ and set the end of the slice to the popped + // value, reverting changes since the old end was pushed. + // + // Used by Transaction. + void pop() @safe pure nothrow @nogc + in(inProgress, "pop called without begin") + in(endStackUsed_ > 0, "Trying to pop an empty slice stack") + { + end_ = endStack_[--endStackUsed_]; + } + + // Pop the current end of endStack_, but keep the current end of the slice, applying + // changes made since pushing the old end. + // + // Used by Transaction. + void apply() @safe pure nothrow @nogc + in(inProgress, "apply called without begin") + in(endStackUsed_ > 0, "Trying to apply an empty slice stack") + { + --endStackUsed_; + } +} + + +private: + +// Convert a UTF-8/16/32 buffer to UTF-8, in-place if possible. +// +// Params: +// +// input = Buffer with UTF-8/16/32 data to decode. May be overwritten by the +// conversion, in which case the result will be a slice of this buffer. +// encoding = Encoding of input. +// +// Returns: +// +// A struct with the following members: +// +// $(D string errorMessage) In case of an error, the error message is stored here. If +// there was no error, errorMessage is NULL. Always check +// this first. +// $(D char[] utf8) input converted to UTF-8. May be a slice of input. +// $(D size_t characterCount) Number of characters (code points) in input. +auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow +{ + // Documented in function ddoc. + struct Result + { + string errorMessage; + char[] utf8; + size_t characterCount; + } + + Result result; + + // Encode input_ into UTF-8 if it's encoded as UTF-16 or UTF-32. + // + // Params: + // + // buffer = The input buffer to encode. + // result = A Result struct to put encoded result and any error messages to. + // + // On error, result.errorMessage will be set. + static void encode(C)(C[] input, ref Result result) @safe pure + { + // We can do UTF-32->UTF-8 in place because all UTF-8 sequences are 4 or + // less bytes. + static if(is(C == dchar)) + { + char[4] encodeBuf; + auto utf8 = cast(char[])input; + auto length = 0; + foreach(dchar c; input) + { + ++result.characterCount; + // ASCII + if(c < 0x80) + { + utf8[length++] = cast(char)c; + continue; + } + + std.utf.encode(encodeBuf, c); + const bytes = codeLength!char(c); + utf8[length .. length + bytes] = encodeBuf[0 .. bytes]; + length += bytes; + } + result.utf8 = utf8[0 .. length]; + } + // Unfortunately we can't do UTF-16 in place so we just use std.conv.to + else + { + result.characterCount = std.utf.count(input); + result.utf8 = input.to!(char[]); + } + } + + try final switch(encoding) + { + case UTFEncoding.UTF_8: + result.utf8 = cast(char[])input; + result.utf8.validate(); + result.characterCount = std.utf.count(result.utf8); + break; + case UTFEncoding.UTF_16: + assert(input.length % 2 == 0, "UTF-16 buffer size must be even"); + encode(cast(wchar[])input, result); + break; + case UTFEncoding.UTF_32: + assert(input.length % 4 == 0, "UTF-32 buffer size must be a multiple of 4"); + encode(cast(dchar[])input, result); + break; + } + catch(ConvException e) { result.errorMessage = e.msg; } + catch(UTFException e) { result.errorMessage = e.msg; } + catch(Exception e) + { + assert(false, "Unexpected exception in encode(): " ~ e.msg); + } + + return result; +} + +/// Determine if all characters (code points, not bytes) in a string are printable. +bool isPrintableValidUTF8(const char[] chars) @safe pure +{ + import std.uni : isControl, isWhite; + foreach (dchar chr; chars) + { + if (!chr.isValidDchar || (chr.isControl && !chr.isWhite)) + { + return false; + } + } + return true; +} + +/// Counts the number of ASCII characters in buffer until the first UTF-8 sequence. +/// +/// Used to determine how many characters we can process without decoding. +size_t countASCII(const(char)[] buffer) @safe pure nothrow @nogc +{ + return buffer.byCodeUnit.until!(x => x > 0x7F).walkLength; +} +// Unittests. + +void testEndian(R)() +{ + void endian_test(ubyte[] data, Encoding encoding_expected, Endian endian_expected) + { + auto reader = new R(data); + assert(reader.encoding == encoding_expected); + assert(reader.endian_ == endian_expected); + } + ubyte[] little_endian_utf_16 = [0xFF, 0xFE, 0x7A, 0x00]; + ubyte[] big_endian_utf_16 = [0xFE, 0xFF, 0x00, 0x7A]; + endian_test(little_endian_utf_16, Encoding.UTF_16, Endian.littleEndian); + endian_test(big_endian_utf_16, Encoding.UTF_16, Endian.bigEndian); +} + +void testPeekPrefixForward(R)() +{ + import std.encoding; + ubyte[] data = bomTable[BOM.utf8].sequence ~ cast(ubyte[])"data"; + auto reader = new R(data); + assert(reader.peek() == 'd'); + assert(reader.peek(1) == 'a'); + assert(reader.peek(2) == 't'); + assert(reader.peek(3) == 'a'); + assert(reader.peek(4) == '\0'); + assert(reader.prefix(4) == "data"); + // assert(reader.prefix(6) == "data\0"); + reader.forward(2); + assert(reader.peek(1) == 'a'); + // assert(collectException(reader.peek(3))); +} + +void testUTF(R)() +{ + import std.encoding; + dchar[] data = cast(dchar[])"data"; + void utf_test(T)(T[] data, BOM bom) + { + ubyte[] bytes = bomTable[bom].sequence ~ + (cast(ubyte[])data)[0 .. data.length * T.sizeof]; + auto reader = new R(bytes); + assert(reader.peek() == 'd'); + assert(reader.peek(1) == 'a'); + assert(reader.peek(2) == 't'); + assert(reader.peek(3) == 'a'); + } + utf_test!char(to!(char[])(data), BOM.utf8); + utf_test!wchar(to!(wchar[])(data), endian == Endian.bigEndian ? BOM.utf16be : BOM.utf16le); + utf_test(data, endian == Endian.bigEndian ? BOM.utf32be : BOM.utf32le); +} + +void test1Byte(R)() +{ + ubyte[] data = [97]; + + auto reader = new R(data); + assert(reader.peek() == 'a'); + assert(reader.peek(1) == '\0'); + // assert(collectException(reader.peek(2))); +} + +@system unittest +{ + testEndian!Reader(); + testPeekPrefixForward!Reader(); + testUTF!Reader(); + test1Byte!Reader(); +} +//Issue 257 - https://github.com/dlang-community/D-YAML/issues/257 +@safe unittest +{ + import dub.internal.dyaml.loader : Loader; + auto yaml = "hello "; + auto root = Loader.fromString(yaml).load(); + + assert(root.isValid); +} diff --git a/source/dub/internal/dyaml/representer.d b/source/dub/internal/dyaml/representer.d new file mode 100644 index 0000000..0132da9 --- /dev/null +++ b/source/dub/internal/dyaml/representer.d @@ -0,0 +1,517 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * YAML node _representer. Prepares YAML nodes for output. A tutorial can be + * found $(LINK2 ../tutorials/custom_types.html, here). + * + * Code based on $(LINK2 http://www.pyyaml.org, PyYAML). + */ +module dub.internal.dyaml.representer; + + +import std.algorithm; +import std.array; +import std.base64; +import std.container; +import std.conv; +import std.datetime; +import std.exception; +import std.format; +import std.math; +import std.typecons; +import std.string; + +import dub.internal.dyaml.exception; +import dub.internal.dyaml.node; +import dub.internal.dyaml.serializer; +import dub.internal.dyaml.style; + +package: +///Exception thrown on Representer errors. +class RepresenterException : YAMLException +{ + mixin ExceptionCtors; +} + +/** + * Represents YAML nodes as scalar, sequence and mapping nodes ready for output. + */ +Node representData(const Node data, ScalarStyle defaultScalarStyle, CollectionStyle defaultCollectionStyle) @safe +{ + Node result; + final switch(data.type) + { + case NodeType.null_: + result = representNull(); + break; + case NodeType.merge: + break; + case NodeType.boolean: + result = representBool(data); + break; + case NodeType.integer: + result = representLong(data); + break; + case NodeType.decimal: + result = representReal(data); + break; + case NodeType.binary: + result = representBytes(data); + break; + case NodeType.timestamp: + result = representSysTime(data); + break; + case NodeType.string: + result = representString(data); + break; + case NodeType.mapping: + result = representPairs(data, defaultScalarStyle, defaultCollectionStyle); + break; + case NodeType.sequence: + result = representNodes(data, defaultScalarStyle, defaultCollectionStyle); + break; + case NodeType.invalid: + assert(0); + } + + final switch (result.nodeID) + { + case NodeID.scalar: + if (result.scalarStyle == ScalarStyle.invalid) + { + result.scalarStyle = defaultScalarStyle; + } + break; + case NodeID.sequence, NodeID.mapping: + if (defaultCollectionStyle != CollectionStyle.invalid) + { + result.collectionStyle = defaultCollectionStyle; + } + break; + case NodeID.invalid: + break; + } + + + //Override tag if specified. + if(data.tag_ !is null){result.tag_ = data.tag_;} + + //Remember style if this was loaded before. + if(data.scalarStyle != ScalarStyle.invalid) + { + result.scalarStyle = data.scalarStyle; + } + if(data.collectionStyle != CollectionStyle.invalid) + { + result.collectionStyle = data.collectionStyle; + } + return result; +} + +@safe unittest +{ + // We don't emit yaml merge nodes. + assert(representData(Node(YAMLMerge()), ScalarStyle.invalid, CollectionStyle.invalid) == Node.init); +} + +@safe unittest +{ + assert(representData(Node(YAMLNull()), ScalarStyle.invalid, CollectionStyle.invalid) == Node("null", "tag:yaml.org,2002:null")); +} + +@safe unittest +{ + assert(representData(Node(cast(string)null), ScalarStyle.invalid, CollectionStyle.invalid) == Node("", "tag:yaml.org,2002:str")); + assert(representData(Node("Hello world!"), ScalarStyle.invalid, CollectionStyle.invalid) == Node("Hello world!", "tag:yaml.org,2002:str")); +} + +@safe unittest +{ + assert(representData(Node(64), ScalarStyle.invalid, CollectionStyle.invalid) == Node("64", "tag:yaml.org,2002:int")); +} + +@safe unittest +{ + assert(representData(Node(true), ScalarStyle.invalid, CollectionStyle.invalid) == Node("true", "tag:yaml.org,2002:bool")); + assert(representData(Node(false), ScalarStyle.invalid, CollectionStyle.invalid) == Node("false", "tag:yaml.org,2002:bool")); +} + +@safe unittest +{ + // Float comparison is pretty unreliable... + auto result = representData(Node(1.0), ScalarStyle.invalid, CollectionStyle.invalid); + assert(isClose(result.as!string.to!real, 1.0)); + assert(result.tag == "tag:yaml.org,2002:float"); + + assert(representData(Node(real.nan), ScalarStyle.invalid, CollectionStyle.invalid) == Node(".nan", "tag:yaml.org,2002:float")); + assert(representData(Node(real.infinity), ScalarStyle.invalid, CollectionStyle.invalid) == Node(".inf", "tag:yaml.org,2002:float")); + assert(representData(Node(-real.infinity), ScalarStyle.invalid, CollectionStyle.invalid) == Node("-.inf", "tag:yaml.org,2002:float")); +} + +@safe unittest +{ + assert(representData(Node(SysTime(DateTime(2000, 3, 14, 12, 34, 56), UTC())), ScalarStyle.invalid, CollectionStyle.invalid) == Node("2000-03-14T12:34:56Z", "tag:yaml.org,2002:timestamp")); +} + +@safe unittest +{ + assert(representData(Node(Node[].init, "tag:yaml.org,2002:set"), ScalarStyle.invalid, CollectionStyle.invalid) == Node(Node.Pair[].init, "tag:yaml.org,2002:set")); + assert(representData(Node(Node[].init, "tag:yaml.org,2002:seq"), ScalarStyle.invalid, CollectionStyle.invalid) == Node(Node[].init, "tag:yaml.org,2002:seq")); + { + auto nodes = [ + Node("a"), + Node("b"), + Node("c"), + ]; + assert(representData(Node(nodes, "tag:yaml.org,2002:set"), ScalarStyle.invalid, CollectionStyle.invalid) == + Node([ + Node.Pair( + Node("a", "tag:yaml.org,2002:str"), + Node("null", "tag:yaml.org,2002:null") + ), + Node.Pair( + Node("b", "tag:yaml.org,2002:str"), + Node("null", "tag:yaml.org,2002:null") + ), + Node.Pair( + Node("c", "tag:yaml.org,2002:str"), + Node("null", "tag:yaml.org,2002:null") + ) + ], "tag:yaml.org,2002:set")); + } + { + auto nodes = [ + Node("a"), + Node("b"), + Node("c"), + ]; + assert(representData(Node(nodes, "tag:yaml.org,2002:seq"), ScalarStyle.invalid, CollectionStyle.invalid) == + Node([ + Node("a", "tag:yaml.org,2002:str"), + Node("b", "tag:yaml.org,2002:str"), + Node("c", "tag:yaml.org,2002:str") + ], "tag:yaml.org,2002:seq")); + } +} + +@safe unittest +{ + assert(representData(Node(Node.Pair[].init, "tag:yaml.org,2002:omap"), ScalarStyle.invalid, CollectionStyle.invalid) == Node(Node[].init, "tag:yaml.org,2002:omap")); + assert(representData(Node(Node.Pair[].init, "tag:yaml.org,2002:pairs"), ScalarStyle.invalid, CollectionStyle.invalid) == Node(Node[].init, "tag:yaml.org,2002:pairs")); + assert(representData(Node(Node.Pair[].init, "tag:yaml.org,2002:map"), ScalarStyle.invalid, CollectionStyle.invalid) == Node(Node.Pair[].init, "tag:yaml.org,2002:map")); + { + auto nodes = [ + Node.Pair("a", "b"), + Node.Pair("a", "c") + ]; + assertThrown(representData(Node(nodes, "tag:yaml.org,2002:omap"), ScalarStyle.invalid, CollectionStyle.invalid)); + } + // Yeah, this gets ugly really fast. + { + auto nodes = [ + Node.Pair("a", "b"), + Node.Pair("a", "c") + ]; + assert(representData(Node(nodes, "tag:yaml.org,2002:pairs"), ScalarStyle.invalid, CollectionStyle.invalid) == + Node([ + Node( + [Node.Pair( + Node("a", "tag:yaml.org,2002:str"), + Node("b", "tag:yaml.org,2002:str") + )], + "tag:yaml.org,2002:map"), + Node( + [Node.Pair( + Node("a", "tag:yaml.org,2002:str"), + Node("c", "tag:yaml.org,2002:str") + )], + "tag:yaml.org,2002:map"), + ], "tag:yaml.org,2002:pairs")); + } + { + auto nodes = [ + Node.Pair("a", "b"), + Node.Pair("a", "c") + ]; + assertThrown(representData(Node(nodes, "tag:yaml.org,2002:map"), ScalarStyle.invalid, CollectionStyle.invalid)); + } + { + auto nodes = [ + Node.Pair("a", "b"), + Node.Pair("c", "d") + ]; + assert(representData(Node(nodes, "tag:yaml.org,2002:omap"), ScalarStyle.invalid, CollectionStyle.invalid) == + Node([ + Node([ + Node.Pair( + Node("a", "tag:yaml.org,2002:str"), + Node("b", "tag:yaml.org,2002:str") + ) + ], "tag:yaml.org,2002:map"), + Node([ + Node.Pair( + Node("c", "tag:yaml.org,2002:str"), + Node("d", "tag:yaml.org,2002:str") + ) + ], "tag:yaml.org,2002:map" + )], "tag:yaml.org,2002:omap")); + } + { + auto nodes = [ + Node.Pair("a", "b"), + Node.Pair("c", "d") + ]; + assert(representData(Node(nodes, "tag:yaml.org,2002:map"), ScalarStyle.invalid, CollectionStyle.invalid) == + Node([ + Node.Pair( + Node("a", "tag:yaml.org,2002:str"), + Node("b", "tag:yaml.org,2002:str") + ), + Node.Pair( + Node("c", "tag:yaml.org,2002:str"), + Node("d", "tag:yaml.org,2002:str") + ), + ], "tag:yaml.org,2002:map")); + } +} + +private: + +//Represent a _null _node as a _null YAML value. +Node representNull() @safe +{ + return Node("null", "tag:yaml.org,2002:null"); +} + +//Represent a string _node as a string scalar. +Node representString(const Node node) @safe +{ + string value = node.as!string; + return Node(value, "tag:yaml.org,2002:str"); +} + +//Represent a bytes _node as a binary scalar. +Node representBytes(const Node node) @safe +{ + const ubyte[] value = node.as!(ubyte[]); + if(value is null){return Node("null", "tag:yaml.org,2002:null");} + + auto newNode = Node(Base64.encode(value).idup, "tag:yaml.org,2002:binary"); + newNode.scalarStyle = ScalarStyle.literal; + return newNode; +} + +//Represent a bool _node as a bool scalar. +Node representBool(const Node node) @safe +{ + return Node(node.as!bool ? "true" : "false", "tag:yaml.org,2002:bool"); +} + +//Represent a long _node as an integer scalar. +Node representLong(const Node node) @safe +{ + return Node(node.as!long.to!string, "tag:yaml.org,2002:int"); +} + +//Represent a real _node as a floating point scalar. +Node representReal(const Node node) @safe +{ + real f = node.as!real; + string value = isNaN(f) ? ".nan": + f == real.infinity ? ".inf": + f == -1.0 * real.infinity ? "-.inf": + {auto a = appender!string(); + formattedWrite(a, "%12f", f); + return a.data.strip();}(); + + return Node(value, "tag:yaml.org,2002:float"); +} + +//Represent a SysTime _node as a timestamp. +Node representSysTime(const Node node) @safe +{ + return Node(node.as!SysTime.toISOExtString(), "tag:yaml.org,2002:timestamp"); +} + +//Represent a sequence _node as sequence/set. +Node representNodes(const Node node, ScalarStyle defaultScalarStyle, CollectionStyle defaultCollectionStyle) @safe +{ + auto nodes = node.as!(Node[]); + if(node.tag_ == "tag:yaml.org,2002:set") + { + //YAML sets are mapping with null values. + Node.Pair[] pairs; + pairs.length = nodes.length; + + foreach(idx, key; nodes) + { + pairs[idx] = Node.Pair(key, Node("null", "tag:yaml.org,2002:null")); + } + Node.Pair[] value; + value.length = pairs.length; + + auto bestStyle = CollectionStyle.flow; + foreach(idx, pair; pairs) + { + value[idx] = Node.Pair(representData(pair.key, defaultScalarStyle, defaultCollectionStyle), representData(pair.value, defaultScalarStyle, defaultCollectionStyle)); + if(value[idx].shouldUseBlockStyle) + { + bestStyle = CollectionStyle.block; + } + } + + auto newNode = Node(value, node.tag_); + newNode.collectionStyle = bestStyle; + return newNode; + } + else + { + Node[] value; + value.length = nodes.length; + + auto bestStyle = CollectionStyle.flow; + foreach(idx, item; nodes) + { + value[idx] = representData(item, defaultScalarStyle, defaultCollectionStyle); + const isScalar = value[idx].nodeID == NodeID.scalar; + const s = value[idx].scalarStyle; + if(!isScalar || (s != ScalarStyle.invalid && s != ScalarStyle.plain)) + { + bestStyle = CollectionStyle.block; + } + } + + auto newNode = Node(value, "tag:yaml.org,2002:seq"); + newNode.collectionStyle = bestStyle; + return newNode; + } +} + +bool shouldUseBlockStyle(const Node value) @safe +{ + const isScalar = value.nodeID == NodeID.scalar; + const s = value.scalarStyle; + return (!isScalar || (s != ScalarStyle.invalid && s != ScalarStyle.plain)); +} +bool shouldUseBlockStyle(const Node.Pair value) @safe +{ + const keyScalar = value.key.nodeID == NodeID.scalar; + const valScalar = value.value.nodeID == NodeID.scalar; + const keyStyle = value.key.scalarStyle; + const valStyle = value.value.scalarStyle; + if(!keyScalar || + (keyStyle != ScalarStyle.invalid && keyStyle != ScalarStyle.plain)) + { + return true; + } + if(!valScalar || + (valStyle != ScalarStyle.invalid && valStyle != ScalarStyle.plain)) + { + return true; + } + return false; +} + +//Represent a mapping _node as map/ordered map/pairs. +Node representPairs(const Node node, ScalarStyle defaultScalarStyle, CollectionStyle defaultCollectionStyle) @safe +{ + auto pairs = node.as!(Node.Pair[]); + + bool hasDuplicates(const Node.Pair[] pairs) @safe + { + //TODO this should be replaced by something with deterministic memory allocation. + auto keys = redBlackTree!Node(); + foreach(pair; pairs) + { + if(pair.key in keys){return true;} + keys.insert(pair.key); + } + return false; + } + + Node[] mapToSequence(const Node.Pair[] pairs) @safe + { + Node[] nodes; + nodes.length = pairs.length; + foreach(idx, pair; pairs) + { + Node.Pair value; + + auto bestStyle = value.shouldUseBlockStyle ? CollectionStyle.block : CollectionStyle.flow; + value = Node.Pair(representData(pair.key, defaultScalarStyle, defaultCollectionStyle), representData(pair.value, defaultScalarStyle, defaultCollectionStyle)); + + auto newNode = Node([value], "tag:yaml.org,2002:map"); + newNode.collectionStyle = bestStyle; + nodes[idx] = newNode; + } + return nodes; + } + + if(node.tag_ == "tag:yaml.org,2002:omap") + { + enforce(!hasDuplicates(pairs), + new RepresenterException("Duplicate entry in an ordered map")); + auto sequence = mapToSequence(pairs); + Node[] value; + value.length = sequence.length; + + auto bestStyle = CollectionStyle.flow; + foreach(idx, item; sequence) + { + value[idx] = representData(item, defaultScalarStyle, defaultCollectionStyle); + if(value[idx].shouldUseBlockStyle) + { + bestStyle = CollectionStyle.block; + } + } + + auto newNode = Node(value, node.tag_); + newNode.collectionStyle = bestStyle; + return newNode; + } + else if(node.tag_ == "tag:yaml.org,2002:pairs") + { + auto sequence = mapToSequence(pairs); + Node[] value; + value.length = sequence.length; + + auto bestStyle = CollectionStyle.flow; + foreach(idx, item; sequence) + { + value[idx] = representData(item, defaultScalarStyle, defaultCollectionStyle); + if(value[idx].shouldUseBlockStyle) + { + bestStyle = CollectionStyle.block; + } + } + + auto newNode = Node(value, node.tag_); + newNode.collectionStyle = bestStyle; + return newNode; + } + else + { + enforce(!hasDuplicates(pairs), + new RepresenterException("Duplicate entry in an unordered map")); + Node.Pair[] value; + value.length = pairs.length; + + auto bestStyle = CollectionStyle.flow; + foreach(idx, pair; pairs) + { + value[idx] = Node.Pair(representData(pair.key, defaultScalarStyle, defaultCollectionStyle), representData(pair.value, defaultScalarStyle, defaultCollectionStyle)); + if(value[idx].shouldUseBlockStyle) + { + bestStyle = CollectionStyle.block; + } + } + + auto newNode = Node(value, "tag:yaml.org,2002:map"); + newNode.collectionStyle = bestStyle; + return newNode; + } +} diff --git a/source/dub/internal/dyaml/resolver.d b/source/dub/internal/dyaml/resolver.d new file mode 100644 index 0000000..f3cbfe0 --- /dev/null +++ b/source/dub/internal/dyaml/resolver.d @@ -0,0 +1,260 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * Implements a class that resolves YAML tags. This can be used to implicitly + * resolve tags for custom data types, removing the need to explicitly + * specify tags in YAML. A tutorial can be found + * $(LINK2 ../tutorials/custom_types.html, here). + * + * Code based on $(LINK2 http://www.pyyaml.org, PyYAML). + */ +module dub.internal.dyaml.resolver; + + +import std.conv; +import std.regex; +import std.typecons; +import std.utf; + +import dub.internal.dyaml.node; +import dub.internal.dyaml.exception; + + +/// Type of `regexes` +private alias RegexType = Tuple!(string, "tag", const Regex!char, "regexp", string, "chars"); + +private immutable RegexType[] regexes = [ + RegexType("tag:yaml.org,2002:bool", + regex(r"^(?:yes|Yes|YES|no|No|NO|true|True|TRUE" ~ + "|false|False|FALSE|on|On|ON|off|Off|OFF)$"), + "yYnNtTfFoO"), + RegexType("tag:yaml.org,2002:float", + regex(r"^(?:[-+]?([0-9][0-9_]*)\\.[0-9_]*" ~ + "(?:[eE][-+][0-9]+)?|[-+]?(?:[0-9][0-9_]" ~ + "*)?\\.[0-9_]+(?:[eE][-+][0-9]+)?|[-+]?" ~ + "[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]" ~ + "*|[-+]?\\.(?:inf|Inf|INF)|\\." ~ + "(?:nan|NaN|NAN))$"), + "-+0123456789."), + RegexType("tag:yaml.org,2002:int", + regex(r"^(?:[-+]?0b[0-1_]+" ~ + "|[-+]?0[0-7_]+" ~ + "|[-+]?(?:0|[1-9][0-9_]*)" ~ + "|[-+]?0x[0-9a-fA-F_]+" ~ + "|[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$"), + "-+0123456789"), + RegexType("tag:yaml.org,2002:merge", regex(r"^<<$"), "<"), + RegexType("tag:yaml.org,2002:null", + regex(r"^$|^(?:~|null|Null|NULL)$"), "~nN\0"), + RegexType("tag:yaml.org,2002:timestamp", + regex(r"^[0-9][0-9][0-9][0-9]-[0-9][0-9]-" ~ + "[0-9][0-9]|[0-9][0-9][0-9][0-9]-[0-9]" ~ + "[0-9]?-[0-9][0-9]?[Tt]|[ \t]+[0-9]" ~ + "[0-9]?:[0-9][0-9]:[0-9][0-9]" ~ + "(?:\\.[0-9]*)?(?:[ \t]*Z|[-+][0-9]" ~ + "[0-9]?(?::[0-9][0-9])?)?$"), + "0123456789"), + RegexType("tag:yaml.org,2002:value", regex(r"^=$"), "="), + + //The following resolver is only for documentation purposes. It cannot work + //because plain scalars cannot start with '!', '&', or '*'. + RegexType("tag:yaml.org,2002:yaml", regex(r"^(?:!|&|\*)$"), "!&*"), +]; + +/** + * Resolves YAML tags (data types). + * + * Can be used to implicitly resolve custom data types of scalar values. + */ +struct Resolver +{ + private: + // Default tag to use for scalars. + string defaultScalarTag_ = "tag:yaml.org,2002:str"; + // Default tag to use for sequences. + string defaultSequenceTag_ = "tag:yaml.org,2002:seq"; + // Default tag to use for mappings. + string defaultMappingTag_ = "tag:yaml.org,2002:map"; + + /* + * Arrays of scalar resolver tuples indexed by starting character of a scalar. + * + * Each tuple stores regular expression the scalar must match, + * and tag to assign to it if it matches. + */ + Tuple!(string, const Regex!char)[][dchar] yamlImplicitResolvers_; + + package: + static auto withDefaultResolvers() @safe + { + Resolver resolver; + foreach(pair; regexes) + { + resolver.addImplicitResolver(pair.tag, pair.regexp, pair.chars); + } + return resolver; + } + + public: + @disable bool opEquals(ref Resolver); + @disable int opCmp(ref Resolver); + + /** + * Add an implicit scalar resolver. + * + * If a scalar matches regexp and starts with any character in first, + * its _tag is set to tag. If it matches more than one resolver _regexp + * resolvers added _first override ones added later. Default resolvers + * override any user specified resolvers, but they can be disabled in + * Resolver constructor. + * + * If a scalar is not resolved to anything, it is assigned the default + * YAML _tag for strings. + * + * Params: tag = Tag to resolve to. + * regexp = Regular expression the scalar must match to have this _tag. + * first = String of possible starting characters of the scalar. + * + */ + void addImplicitResolver(string tag, const Regex!char regexp, string first) + pure @safe + { + foreach(const dchar c; first) + { + if((c in yamlImplicitResolvers_) is null) + { + yamlImplicitResolvers_[c] = []; + } + yamlImplicitResolvers_[c] ~= tuple(tag, regexp); + } + } + /// Resolve scalars starting with 'A' to !_tag + @safe unittest + { + import std.file : write; + import std.regex : regex; + import dub.internal.dyaml.loader : Loader; + import dub.internal.dyaml.resolver : Resolver; + + write("example.yaml", "A"); + + auto loader = Loader.fromFile("example.yaml"); + loader.resolver.addImplicitResolver("!tag", regex("A.*"), "A"); + + auto node = loader.load(); + assert(node.tag == "!tag"); + } + + package: + /** + * Resolve tag of a node. + * + * Params: kind = Type of the node. + * tag = Explicit tag of the node, if any. + * value = Value of the node, if any. + * implicit = Should the node be implicitly resolved? + * + * If the tag is already specified and not non-specific, that tag will + * be returned. + * + * Returns: Resolved tag. + */ + string resolve(const NodeID kind, const string tag, scope string value, + const bool implicit) @safe + { + import std.array : empty, front; + if((tag !is null) && (tag != "!")) + { + return tag; + } + + final switch (kind) + { + case NodeID.scalar: + if(!implicit) + { + return defaultScalarTag_; + } + + //Get the first char of the value. + const dchar first = value.empty ? '\0' : value.front; + + auto resolvers = (first in yamlImplicitResolvers_) is null ? + [] : yamlImplicitResolvers_[first]; + + //If regexp matches, return tag. + foreach(resolver; resolvers) + { + // source/dyaml/resolver.d(192,35): Error: scope variable `__tmpfordtorXXX` + // assigned to non-scope parameter `this` calling + // `std.regex.RegexMatch!string.RegexMatch.~this` + bool isEmpty = () @trusted { + return match(value, resolver[1]).empty; + }(); + if(!isEmpty) + { + return resolver[0]; + } + } + return defaultScalarTag_; + case NodeID.sequence: + return defaultSequenceTag_; + case NodeID.mapping: + return defaultMappingTag_; + case NodeID.invalid: + assert(false, "Cannot resolve an invalid node"); + } + } + @safe unittest + { + auto resolver = Resolver.withDefaultResolvers; + + bool tagMatch(string tag, string[] values) @safe + { + const string expected = tag; + foreach(value; values) + { + const string resolved = resolver.resolve(NodeID.scalar, null, value, true); + if(expected != resolved) + { + return false; + } + } + return true; + } + + assert(tagMatch("tag:yaml.org,2002:bool", + ["yes", "NO", "True", "on"])); + assert(tagMatch("tag:yaml.org,2002:float", + ["6.8523015e+5", "685.230_15e+03", "685_230.15", + "190:20:30.15", "-.inf", ".NaN"])); + assert(tagMatch("tag:yaml.org,2002:int", + ["685230", "+685_230", "02472256", "0x_0A_74_AE", + "0b1010_0111_0100_1010_1110", "190:20:30"])); + assert(tagMatch("tag:yaml.org,2002:merge", ["<<"])); + assert(tagMatch("tag:yaml.org,2002:null", ["~", "null", ""])); + assert(tagMatch("tag:yaml.org,2002:str", + ["abcd", "9a8b", "9.1adsf"])); + assert(tagMatch("tag:yaml.org,2002:timestamp", + ["2001-12-15T02:59:43.1Z", + "2001-12-14t21:59:43.10-05:00", + "2001-12-14 21:59:43.10 -5", + "2001-12-15 2:59:43.10", + "2002-12-14"])); + assert(tagMatch("tag:yaml.org,2002:value", ["="])); + assert(tagMatch("tag:yaml.org,2002:yaml", ["!", "&", "*"])); + } + + ///Returns: Default scalar tag. + @property string defaultScalarTag() const pure @safe nothrow {return defaultScalarTag_;} + + ///Returns: Default sequence tag. + @property string defaultSequenceTag() const pure @safe nothrow {return defaultSequenceTag_;} + + ///Returns: Default mapping tag. + @property string defaultMappingTag() const pure @safe nothrow {return defaultMappingTag_;} +} diff --git a/source/dub/internal/dyaml/scanner.d b/source/dub/internal/dyaml/scanner.d new file mode 100644 index 0000000..3be567f --- /dev/null +++ b/source/dub/internal/dyaml/scanner.d @@ -0,0 +1,1809 @@ + +// Copyright Ferdinand Majerech 2011-2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/// YAML scanner. +/// Code based on PyYAML: http://www.pyyaml.org +module dub.internal.dyaml.scanner; + + +import core.stdc.string; + +import std.algorithm; +import std.array; +import std.conv; +import std.ascii : isAlphaNum, isDigit, isHexDigit; +import std.exception; +import std.string; +import std.typecons; +import std.traits : Unqual; +import std.utf; + +import dub.internal.dyaml.escapes; +import dub.internal.dyaml.exception; +import dub.internal.dyaml.queue; +import dub.internal.dyaml.reader; +import dub.internal.dyaml.style; +import dub.internal.dyaml.token; + +package: +/// Scanner produces tokens of the following types: +/// STREAM-START +/// STREAM-END +/// DIRECTIVE(name, value) +/// DOCUMENT-START +/// DOCUMENT-END +/// BLOCK-SEQUENCE-START +/// BLOCK-MAPPING-START +/// BLOCK-END +/// FLOW-SEQUENCE-START +/// FLOW-MAPPING-START +/// FLOW-SEQUENCE-END +/// FLOW-MAPPING-END +/// BLOCK-ENTRY +/// FLOW-ENTRY +/// KEY +/// VALUE +/// ALIAS(value) +/// ANCHOR(value) +/// TAG(value) +/// SCALAR(value, plain, style) + +alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isNonLinebreakWhitespace = among!(' ', '\t'); + +alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}', + '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n', + '\r', '\u0085', '\u2028', '\u2029'); + +alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', + '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%'); + +alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\'); + +alias isNSAnchorName = c => !c.isWhiteSpace && !c.among!('[', ']', '{', '}', ',', '\uFEFF'); + +/// Marked exception thrown at scanner errors. +/// +/// See_Also: MarkedYAMLException +class ScannerException : MarkedYAMLException +{ + mixin MarkedExceptionCtors; +} + +/// Generates tokens from data provided by a Reader. +struct Scanner +{ + private: + /// A simple key is a key that is not denoted by the '?' indicator. + /// For example: + /// --- + /// block simple key: value + /// ? not a simple key: + /// : { flow simple key: value } + /// We emit the KEY token before all keys, so when we find a potential simple + /// key, we try to locate the corresponding ':' indicator. Simple keys should be + /// limited to a single line and 1024 characters. + /// + /// 16 bytes on 64-bit. + static struct SimpleKey + { + /// Character index in reader where the key starts. + uint charIndex = uint.max; + /// Index of the key token from start (first token scanned being 0). + uint tokenIndex; + /// Line the key starts at. + uint line; + /// Column the key starts at. + ushort column; + /// Is this required to be a simple key? + bool required; + /// Is this struct "null" (invalid)?. + bool isNull; + } + + /// Block chomping types. + enum Chomping + { + /// Strip all trailing line breaks. '-' indicator. + strip, + /// Line break of the last line is preserved, others discarded. Default. + clip, + /// All trailing line breaks are preserved. '+' indicator. + keep + } + + /// Reader used to read from a file/stream. + Reader reader_; + /// Are we done scanning? + bool done_; + + /// Level of nesting in flow context. If 0, we're in block context. + uint flowLevel_; + /// Current indentation level. + int indent_ = -1; + /// Past indentation levels. Used as a stack. + Appender!(int[]) indents_; + + /// Processed tokens not yet emitted. Used as a queue. + Queue!Token tokens_; + + /// Number of tokens emitted through the getToken method. + uint tokensTaken_; + + /// Can a simple key start at the current position? A simple key may start: + /// - at the beginning of the line, not counting indentation spaces + /// (in block context), + /// - after '{', '[', ',' (in the flow context), + /// - after '?', ':', '-' (in the block context). + /// In the block context, this flag also signifies if a block collection + /// may start at the current position. + bool allowSimpleKey_ = true; + + /// Possible simple keys indexed by flow levels. + SimpleKey[] possibleSimpleKeys_; + + public: + /// Construct a Scanner using specified Reader. + this(Reader reader) @safe nothrow + { + // Return the next token, but do not delete it from the queue + reader_ = reader; + fetchStreamStart(); + } + + /// Advance to the next token + void popFront() @safe + { + ++tokensTaken_; + tokens_.pop(); + } + + /// Return the current token + const(Token) front() @safe + { + enforce(!empty, "No token left to peek"); + return tokens_.peek(); + } + + /// Return whether there are any more tokens left. + bool empty() @safe + { + while (needMoreTokens()) + { + fetchToken(); + } + return tokens_.empty; + } + + /// Set file name. + void name(string name) @safe pure nothrow @nogc + { + reader_.name = name; + } + + private: + /// Most scanning error messages have the same format; so build them with this + /// function. + string expected(T)(string expected, T found) + { + return text("expected ", expected, ", but found ", found); + } + + /// Determine whether or not we need to fetch more tokens before peeking/getting a token. + bool needMoreTokens() @safe pure + { + if(done_) { return false; } + if(tokens_.empty) { return true; } + + /// The current token may be a potential simple key, so we need to look further. + stalePossibleSimpleKeys(); + return nextPossibleSimpleKey() == tokensTaken_; + } + + /// Fetch at token, adding it to tokens_. + void fetchToken() @safe + { + // Eat whitespaces and comments until we reach the next token. + scanToNextToken(); + + // Remove obsolete possible simple keys. + stalePossibleSimpleKeys(); + + // Compare current indentation and column. It may add some tokens + // and decrease the current indentation level. + unwindIndent(reader_.column); + + // Get the next character. + const dchar c = reader_.peekByte(); + + // Fetch the token. + if(c == '\0') { return fetchStreamEnd(); } + if(checkDirective()) { return fetchDirective(); } + if(checkDocumentStart()) { return fetchDocumentStart(); } + if(checkDocumentEnd()) { return fetchDocumentEnd(); } + // Order of the following checks is NOT significant. + switch(c) + { + case '[': return fetchFlowSequenceStart(); + case '{': return fetchFlowMappingStart(); + case ']': return fetchFlowSequenceEnd(); + case '}': return fetchFlowMappingEnd(); + case ',': return fetchFlowEntry(); + case '!': return fetchTag(); + case '\'': return fetchSingle(); + case '\"': return fetchDouble(); + case '*': return fetchAlias(); + case '&': return fetchAnchor(); + case '?': if(checkKey()) { return fetchKey(); } goto default; + case ':': if(checkValue()) { return fetchValue(); } goto default; + case '-': if(checkBlockEntry()) { return fetchBlockEntry(); } goto default; + case '|': if(flowLevel_ == 0) { return fetchLiteral(); } break; + case '>': if(flowLevel_ == 0) { return fetchFolded(); } break; + default: if(checkPlain()) { return fetchPlain(); } + } + + throw new ScannerException("While scanning for the next token, found character " ~ + "\'%s\', index %s that cannot start any token" + .format(c, to!int(c)), reader_.mark); + } + + + /// Return the token number of the nearest possible simple key. + uint nextPossibleSimpleKey() @safe pure nothrow @nogc + { + uint minTokenNumber = uint.max; + foreach(k, ref simpleKey; possibleSimpleKeys_) + { + if(simpleKey.isNull) { continue; } + minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex); + } + return minTokenNumber; + } + + /// Remove entries that are no longer possible simple keys. + /// + /// According to the YAML specification, simple keys + /// - should be limited to a single line, + /// - should be no longer than 1024 characters. + /// Disabling this will allow simple keys of any length and + /// height (may cause problems if indentation is broken though). + void stalePossibleSimpleKeys() @safe pure + { + foreach(level, ref key; possibleSimpleKeys_) + { + if(key.isNull) { continue; } + if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024) + { + enforce(!key.required, + new ScannerException("While scanning a simple key", + Mark(reader_.name, key.line, key.column), + "could not find expected ':'", reader_.mark)); + key.isNull = true; + } + } + } + + /// Check if the next token starts a possible simple key and if so, save its position. + /// + /// This function is called for ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. + void savePossibleSimpleKey() @safe pure + { + // Check if a simple key is required at the current position. + const required = (flowLevel_ == 0 && indent_ == reader_.column); + assert(allowSimpleKey_ || !required, "A simple key is required only if it is " ~ + "the first token in the current line. Therefore it is always allowed."); + + if(!allowSimpleKey_) { return; } + + // The next token might be a simple key, so save its number and position. + removePossibleSimpleKey(); + const tokenCount = tokensTaken_ + cast(uint)tokens_.length; + + const line = reader_.line; + const column = reader_.column; + const key = SimpleKey(cast(uint)reader_.charIndex, tokenCount, line, + cast(ushort)min(column, ushort.max), required); + + if(possibleSimpleKeys_.length <= flowLevel_) + { + const oldLength = possibleSimpleKeys_.length; + possibleSimpleKeys_.length = flowLevel_ + 1; + //No need to initialize the last element, it's already done in the next line. + possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init; + } + possibleSimpleKeys_[flowLevel_] = key; + } + + /// Remove the saved possible key position at the current flow level. + void removePossibleSimpleKey() @safe pure + { + if(possibleSimpleKeys_.length <= flowLevel_) { return; } + + if(!possibleSimpleKeys_[flowLevel_].isNull) + { + const key = possibleSimpleKeys_[flowLevel_]; + enforce(!key.required, + new ScannerException("While scanning a simple key", + Mark(reader_.name, key.line, key.column), + "could not find expected ':'", reader_.mark)); + possibleSimpleKeys_[flowLevel_].isNull = true; + } + } + + /// Decrease indentation, removing entries in indents_. + /// + /// Params: column = Current column in the file/stream. + void unwindIndent(const int column) @safe + { + if(flowLevel_ > 0) + { + // In flow context, tokens should respect indentation. + // The condition should be `indent >= column` according to the spec. + // But this condition will prohibit intuitively correct + // constructions such as + // key : { + // } + + // In the flow context, indentation is ignored. We make the scanner less + // restrictive than what the specification requires. + // if(pedantic_ && flowLevel_ > 0 && indent_ > column) + // { + // throw new ScannerException("Invalid intendation or unclosed '[' or '{'", + // reader_.mark) + // } + return; + } + + // In block context, we may need to issue the BLOCK-END tokens. + while(indent_ > column) + { + indent_ = indents_.data.back; + assert(indents_.data.length); + indents_.shrinkTo(indents_.data.length - 1); + tokens_.push(blockEndToken(reader_.mark, reader_.mark)); + } + } + + /// Increase indentation if needed. + /// + /// Params: column = Current column in the file/stream. + /// + /// Returns: true if the indentation was increased, false otherwise. + bool addIndent(int column) @safe + { + if(indent_ >= column){return false;} + indents_ ~= indent_; + indent_ = column; + return true; + } + + + /// Add STREAM-START token. + void fetchStreamStart() @safe nothrow + { + tokens_.push(streamStartToken(reader_.mark, reader_.mark, reader_.encoding)); + } + + ///Add STREAM-END token. + void fetchStreamEnd() @safe + { + //Set intendation to -1 . + unwindIndent(-1); + removePossibleSimpleKey(); + allowSimpleKey_ = false; + possibleSimpleKeys_.destroy; + + tokens_.push(streamEndToken(reader_.mark, reader_.mark)); + done_ = true; + } + + /// Add DIRECTIVE token. + void fetchDirective() @safe + { + // Set intendation to -1 . + unwindIndent(-1); + // Reset simple keys. + removePossibleSimpleKey(); + allowSimpleKey_ = false; + + auto directive = scanDirective(); + tokens_.push(directive); + } + + /// Add DOCUMENT-START or DOCUMENT-END token. + void fetchDocumentIndicator(TokenID id)() + if(id == TokenID.documentStart || id == TokenID.documentEnd) + { + // Set indentation to -1 . + unwindIndent(-1); + // Reset simple keys. Note that there can't be a block collection after '---'. + removePossibleSimpleKey(); + allowSimpleKey_ = false; + + Mark startMark = reader_.mark; + reader_.forward(3); + tokens_.push(simpleToken!id(startMark, reader_.mark)); + } + + /// Aliases to add DOCUMENT-START or DOCUMENT-END token. + alias fetchDocumentStart = fetchDocumentIndicator!(TokenID.documentStart); + alias fetchDocumentEnd = fetchDocumentIndicator!(TokenID.documentEnd); + + /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token. + void fetchFlowCollectionStart(TokenID id)() @safe + { + // '[' and '{' may start a simple key. + savePossibleSimpleKey(); + // Simple keys are allowed after '[' and '{'. + allowSimpleKey_ = true; + ++flowLevel_; + + Mark startMark = reader_.mark; + reader_.forward(); + tokens_.push(simpleToken!id(startMark, reader_.mark)); + } + + /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token. + alias fetchFlowSequenceStart = fetchFlowCollectionStart!(TokenID.flowSequenceStart); + alias fetchFlowMappingStart = fetchFlowCollectionStart!(TokenID.flowMappingStart); + + /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token. + void fetchFlowCollectionEnd(TokenID id)() + { + // Reset possible simple key on the current level. + removePossibleSimpleKey(); + // No simple keys after ']' and '}'. + allowSimpleKey_ = false; + --flowLevel_; + + Mark startMark = reader_.mark; + reader_.forward(); + tokens_.push(simpleToken!id(startMark, reader_.mark)); + } + + /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token/ + alias fetchFlowSequenceEnd = fetchFlowCollectionEnd!(TokenID.flowSequenceEnd); + alias fetchFlowMappingEnd = fetchFlowCollectionEnd!(TokenID.flowMappingEnd); + + /// Add FLOW-ENTRY token; + void fetchFlowEntry() @safe + { + // Reset possible simple key on the current level. + removePossibleSimpleKey(); + // Simple keys are allowed after ','. + allowSimpleKey_ = true; + + Mark startMark = reader_.mark; + reader_.forward(); + tokens_.push(flowEntryToken(startMark, reader_.mark)); + } + + /// Additional checks used in block context in fetchBlockEntry and fetchKey. + /// + /// Params: type = String representing the token type we might need to add. + /// id = Token type we might need to add. + void blockChecks(string type, TokenID id)() + { + enum context = type ~ " keys are not allowed here"; + // Are we allowed to start a key (not neccesarily a simple one)? + enforce(allowSimpleKey_, new ScannerException(context, reader_.mark)); + + if(addIndent(reader_.column)) + { + tokens_.push(simpleToken!id(reader_.mark, reader_.mark)); + } + } + + /// Add BLOCK-ENTRY token. Might add BLOCK-SEQUENCE-START in the process. + void fetchBlockEntry() @safe + { + if(flowLevel_ == 0) { blockChecks!("Sequence", TokenID.blockSequenceStart)(); } + + // It's an error for the block entry to occur in the flow context, + // but we let the parser detect this. + + // Reset possible simple key on the current level. + removePossibleSimpleKey(); + // Simple keys are allowed after '-'. + allowSimpleKey_ = true; + + Mark startMark = reader_.mark; + reader_.forward(); + tokens_.push(blockEntryToken(startMark, reader_.mark)); + } + + /// Add KEY token. Might add BLOCK-MAPPING-START in the process. + void fetchKey() @safe + { + if(flowLevel_ == 0) { blockChecks!("Mapping", TokenID.blockMappingStart)(); } + + // Reset possible simple key on the current level. + removePossibleSimpleKey(); + // Simple keys are allowed after '?' in the block context. + allowSimpleKey_ = (flowLevel_ == 0); + + Mark startMark = reader_.mark; + reader_.forward(); + tokens_.push(keyToken(startMark, reader_.mark)); + } + + /// Add VALUE token. Might add KEY and/or BLOCK-MAPPING-START in the process. + void fetchValue() @safe + { + //Do we determine a simple key? + if(possibleSimpleKeys_.length > flowLevel_ && + !possibleSimpleKeys_[flowLevel_].isNull) + { + const key = possibleSimpleKeys_[flowLevel_]; + possibleSimpleKeys_[flowLevel_].isNull = true; + Mark keyMark = Mark(reader_.name, key.line, key.column); + const idx = key.tokenIndex - tokensTaken_; + + assert(idx >= 0); + + // Add KEY. + // Manually inserting since tokens are immutable (need linked list). + tokens_.insert(keyToken(keyMark, keyMark), idx); + + // If this key starts a new block mapping, we need to add BLOCK-MAPPING-START. + if(flowLevel_ == 0 && addIndent(key.column)) + { + tokens_.insert(blockMappingStartToken(keyMark, keyMark), idx); + } + + // There cannot be two simple keys in a row. + allowSimpleKey_ = false; + } + // Part of a complex key + else + { + // We can start a complex value if and only if we can start a simple key. + enforce(flowLevel_ > 0 || allowSimpleKey_, + new ScannerException("Mapping values are not allowed here", reader_.mark)); + + // If this value starts a new block mapping, we need to add + // BLOCK-MAPPING-START. It'll be detected as an error later by the parser. + if(flowLevel_ == 0 && addIndent(reader_.column)) + { + tokens_.push(blockMappingStartToken(reader_.mark, reader_.mark)); + } + + // Reset possible simple key on the current level. + removePossibleSimpleKey(); + // Simple keys are allowed after ':' in the block context. + allowSimpleKey_ = (flowLevel_ == 0); + } + + // Add VALUE. + Mark startMark = reader_.mark; + reader_.forward(); + tokens_.push(valueToken(startMark, reader_.mark)); + } + + /// Add ALIAS or ANCHOR token. + void fetchAnchor_(TokenID id)() @safe + if(id == TokenID.alias_ || id == TokenID.anchor) + { + // ALIAS/ANCHOR could be a simple key. + savePossibleSimpleKey(); + // No simple keys after ALIAS/ANCHOR. + allowSimpleKey_ = false; + + auto anchor = scanAnchor(id); + tokens_.push(anchor); + } + + /// Aliases to add ALIAS or ANCHOR token. + alias fetchAlias = fetchAnchor_!(TokenID.alias_); + alias fetchAnchor = fetchAnchor_!(TokenID.anchor); + + /// Add TAG token. + void fetchTag() @safe + { + //TAG could start a simple key. + savePossibleSimpleKey(); + //No simple keys after TAG. + allowSimpleKey_ = false; + + tokens_.push(scanTag()); + } + + /// Add block SCALAR token. + void fetchBlockScalar(ScalarStyle style)() @safe + if(style == ScalarStyle.literal || style == ScalarStyle.folded) + { + // Reset possible simple key on the current level. + removePossibleSimpleKey(); + // A simple key may follow a block scalar. + allowSimpleKey_ = true; + + auto blockScalar = scanBlockScalar(style); + tokens_.push(blockScalar); + } + + /// Aliases to add literal or folded block scalar. + alias fetchLiteral = fetchBlockScalar!(ScalarStyle.literal); + alias fetchFolded = fetchBlockScalar!(ScalarStyle.folded); + + /// Add quoted flow SCALAR token. + void fetchFlowScalar(ScalarStyle quotes)() + { + // A flow scalar could be a simple key. + savePossibleSimpleKey(); + // No simple keys after flow scalars. + allowSimpleKey_ = false; + + // Scan and add SCALAR. + auto scalar = scanFlowScalar(quotes); + tokens_.push(scalar); + } + + /// Aliases to add single or double quoted block scalar. + alias fetchSingle = fetchFlowScalar!(ScalarStyle.singleQuoted); + alias fetchDouble = fetchFlowScalar!(ScalarStyle.doubleQuoted); + + /// Add plain SCALAR token. + void fetchPlain() @safe + { + // A plain scalar could be a simple key + savePossibleSimpleKey(); + // No simple keys after plain scalars. But note that scanPlain() will + // change this flag if the scan is finished at the beginning of the line. + allowSimpleKey_ = false; + auto plain = scanPlain(); + + // Scan and add SCALAR. May change allowSimpleKey_ + tokens_.push(plain); + } + + pure: + + ///Check if the next token is DIRECTIVE: ^ '%' ... + bool checkDirective() @safe + { + return reader_.peekByte() == '%' && reader_.column == 0; + } + + /// Check if the next token is DOCUMENT-START: ^ '---' (' '|'\n') + bool checkDocumentStart() @safe + { + // Check one char first, then all 3, to prevent reading outside the buffer. + return reader_.column == 0 && + reader_.peekByte() == '-' && + reader_.prefix(3) == "---" && + reader_.peek(3).isWhiteSpace; + } + + /// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n') + bool checkDocumentEnd() @safe + { + // Check one char first, then all 3, to prevent reading outside the buffer. + return reader_.column == 0 && + reader_.peekByte() == '.' && + reader_.prefix(3) == "..." && + reader_.peek(3).isWhiteSpace; + } + + /// Check if the next token is BLOCK-ENTRY: '-' (' '|'\n') + bool checkBlockEntry() @safe + { + return !!reader_.peek(1).isWhiteSpace; + } + + /// Check if the next token is KEY(flow context): '?' + /// + /// or KEY(block context): '?' (' '|'\n') + bool checkKey() @safe + { + return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace); + } + + /// Check if the next token is VALUE(flow context): ':' + /// + /// or VALUE(block context): ':' (' '|'\n') + bool checkValue() @safe + { + return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace; + } + + /// Check if the next token is a plain scalar. + /// + /// A plain scalar may start with any non-space character except: + /// '-', '?', ':', ',', '[', ']', '{', '}', + /// '#', '&', '*', '!', '|', '>', '\'', '\"', + /// '%', '@', '`'. + /// + /// It may also start with + /// '-', '?', ':' + /// if it is followed by a non-space character. + /// + /// Note that we limit the last rule to the block context (except the + /// '-' character) because we want the flow context to be space + /// independent. + bool checkPlain() @safe + { + const c = reader_.peek(); + if(!c.isNonScalarStartCharacter) + { + return true; + } + return !reader_.peek(1).isWhiteSpace && + (c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':'))); + } + + /// Move to the next non-space character. + void findNextNonSpace() @safe + { + while(reader_.peekByte() == ' ') { reader_.forward(); } + } + + /// Scan a string of alphanumeric or "-_" characters. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanAlphaNumericToSlice(string name)(const Mark startMark) + { + size_t length; + dchar c = reader_.peek(); + while(c.isAlphaNum || c.among!('-', '_')) { c = reader_.peek(++length); } + + enforce(length > 0, new ScannerException("While scanning " ~ name, + startMark, expected("alphanumeric, '-' or '_'", c), reader_.mark)); + + reader_.sliceBuilder.write(reader_.get(length)); + } + + /// Scan a string. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanAnchorAliasToSlice(const Mark startMark) @safe + { + size_t length; + dchar c = reader_.peek(); + while (c.isNSAnchorName) + { + c = reader_.peek(++length); + } + + enforce(length > 0, new ScannerException("While scanning an anchor or alias", + startMark, expected("a printable character besides '[', ']', '{', '}' and ','", c), reader_.mark)); + + reader_.sliceBuilder.write(reader_.get(length)); + } + + /// Scan and throw away all characters until next line break. + void scanToNextBreak() @safe + { + while(!reader_.peek().isBreak) { reader_.forward(); } + } + + /// Scan all characters until next line break. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanToNextBreakToSlice() @safe + { + uint length; + while(!reader_.peek(length).isBreak) + { + ++length; + } + reader_.sliceBuilder.write(reader_.get(length)); + } + + + /// Move to next token in the file/stream. + /// + /// We ignore spaces, line breaks and comments. + /// If we find a line break in the block context, we set + /// allowSimpleKey` on. + /// + /// We do not yet support BOM inside the stream as the + /// specification requires. Any such mark will be considered as a part + /// of the document. + void scanToNextToken() @safe + { + // TODO(PyYAML): We need to make tab handling rules more sane. A good rule is: + // Tabs cannot precede tokens + // BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, + // KEY(block), VALUE(block), BLOCK-ENTRY + // So the checking code is + // if : + // allowSimpleKey_ = false + // We also need to add the check for `allowSimpleKey_ == true` to + // `unwindIndent` before issuing BLOCK-END. + // Scanners for block, flow, and plain scalars need to be modified. + + for(;;) + { + //All whitespace in flow context is ignored, even whitespace + // not allowed in other contexts + if (flowLevel_ > 0) + { + while(reader_.peekByte().isNonLinebreakWhitespace) { reader_.forward(); } + } + else + { + findNextNonSpace(); + } + if(reader_.peekByte() == '#') { scanToNextBreak(); } + if(scanLineBreak() != '\0') + { + if(flowLevel_ == 0) { allowSimpleKey_ = true; } + } + else + { + break; + } + } + } + + /// Scan directive token. + Token scanDirective() @safe + { + Mark startMark = reader_.mark; + // Skip the '%'. + reader_.forward(); + + // Scan directive name + reader_.sliceBuilder.begin(); + scanDirectiveNameToSlice(startMark); + const name = reader_.sliceBuilder.finish(); + + reader_.sliceBuilder.begin(); + + // Index where tag handle ends and suffix starts in a tag directive value. + uint tagHandleEnd = uint.max; + if(name == "YAML") { scanYAMLDirectiveValueToSlice(startMark); } + else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(startMark); } + char[] value = reader_.sliceBuilder.finish(); + + Mark endMark = reader_.mark; + + DirectiveType directive; + if(name == "YAML") { directive = DirectiveType.yaml; } + else if(name == "TAG") { directive = DirectiveType.tag; } + else + { + directive = DirectiveType.reserved; + scanToNextBreak(); + } + + scanDirectiveIgnoredLine(startMark); + + return directiveToken(startMark, endMark, value, directive, tagHandleEnd); + } + + /// Scan name of a directive token. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanDirectiveNameToSlice(const Mark startMark) @safe + { + // Scan directive name. + scanAlphaNumericToSlice!"a directive"(startMark); + + enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), + new ScannerException("While scanning a directive", startMark, + expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark)); + } + + /// Scan value of a YAML directive token. Returns major, minor version separated by '.'. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanYAMLDirectiveValueToSlice(const Mark startMark) @safe + { + findNextNonSpace(); + + scanYAMLDirectiveNumberToSlice(startMark); + + enforce(reader_.peekByte() == '.', + new ScannerException("While scanning a directive", startMark, + expected("digit or '.'", reader_.peek()), reader_.mark)); + // Skip the '.'. + reader_.forward(); + + reader_.sliceBuilder.write('.'); + scanYAMLDirectiveNumberToSlice(startMark); + + enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), + new ScannerException("While scanning a directive", startMark, + expected("digit or '.'", reader_.peek()), reader_.mark)); + } + + /// Scan a number from a YAML directive. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanYAMLDirectiveNumberToSlice(const Mark startMark) @safe + { + enforce(isDigit(reader_.peek()), + new ScannerException("While scanning a directive", startMark, + expected("digit", reader_.peek()), reader_.mark)); + + // Already found the first digit in the enforce(), so set length to 1. + uint length = 1; + while(reader_.peek(length).isDigit) { ++length; } + + reader_.sliceBuilder.write(reader_.get(length)); + } + + /// Scan value of a tag directive. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + /// + /// Returns: Length of tag handle (which is before tag prefix) in scanned data + uint scanTagDirectiveValueToSlice(const Mark startMark) @safe + { + findNextNonSpace(); + const startLength = reader_.sliceBuilder.length; + scanTagDirectiveHandleToSlice(startMark); + const handleLength = cast(uint)(reader_.sliceBuilder.length - startLength); + findNextNonSpace(); + scanTagDirectivePrefixToSlice(startMark); + + return handleLength; + } + + /// Scan handle of a tag directive. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanTagDirectiveHandleToSlice(const Mark startMark) @safe + { + scanTagHandleToSlice!"directive"(startMark); + enforce(reader_.peekByte() == ' ', + new ScannerException("While scanning a directive handle", startMark, + expected("' '", reader_.peek()), reader_.mark)); + } + + /// Scan prefix of a tag directive. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanTagDirectivePrefixToSlice(const Mark startMark) @safe + { + scanTagURIToSlice!"directive"(startMark); + enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), + new ScannerException("While scanning a directive prefix", startMark, + expected("' '", reader_.peek()), reader_.mark)); + } + + /// Scan (and ignore) ignored line after a directive. + void scanDirectiveIgnoredLine(const Mark startMark) @safe + { + findNextNonSpace(); + if(reader_.peekByte() == '#') { scanToNextBreak(); } + enforce(reader_.peek().isBreak, + new ScannerException("While scanning a directive", startMark, + expected("comment or a line break", reader_.peek()), reader_.mark)); + scanLineBreak(); + } + + + /// Scan an alias or an anchor. + /// + /// The specification does not restrict characters for anchors and + /// aliases. This may lead to problems, for instance, the document: + /// [ *alias, value ] + /// can be interpteted in two ways, as + /// [ "value" ] + /// and + /// [ *alias , "value" ] + /// Therefore we restrict aliases to ASCII alphanumeric characters. + Token scanAnchor(const TokenID id) @safe + { + const startMark = reader_.mark; + reader_.forward(); // The */& character was only peeked, so we drop it now + + reader_.sliceBuilder.begin(); + scanAnchorAliasToSlice(startMark); + // On error, value is discarded as we return immediately + char[] value = reader_.sliceBuilder.finish(); + + assert(!reader_.peek().isNSAnchorName, "Anchor/alias name not fully scanned"); + + if(id == TokenID.alias_) + { + return aliasToken(startMark, reader_.mark, value); + } + if(id == TokenID.anchor) + { + return anchorToken(startMark, reader_.mark, value); + } + assert(false, "This code should never be reached"); + } + + /// Scan a tag token. + Token scanTag() @safe + { + const startMark = reader_.mark; + dchar c = reader_.peek(1); + + reader_.sliceBuilder.begin(); + scope(failure) { reader_.sliceBuilder.finish(); } + // Index where tag handle ends and tag suffix starts in the tag value + // (slice) we will produce. + uint handleEnd; + + if(c == '<') + { + reader_.forward(2); + + handleEnd = 0; + scanTagURIToSlice!"tag"(startMark); + enforce(reader_.peekByte() == '>', + new ScannerException("While scanning a tag", startMark, + expected("'>'", reader_.peek()), reader_.mark)); + reader_.forward(); + } + else if(c.isWhiteSpace) + { + reader_.forward(); + handleEnd = 0; + reader_.sliceBuilder.write('!'); + } + else + { + uint length = 1; + bool useHandle; + + while(!c.isBreakOrSpace) + { + if(c == '!') + { + useHandle = true; + break; + } + ++length; + c = reader_.peek(length); + } + + if(useHandle) + { + scanTagHandleToSlice!"tag"(startMark); + handleEnd = cast(uint)reader_.sliceBuilder.length; + } + else + { + reader_.forward(); + reader_.sliceBuilder.write('!'); + handleEnd = cast(uint)reader_.sliceBuilder.length; + } + + scanTagURIToSlice!"tag"(startMark); + } + + enforce(reader_.peek().isBreakOrSpace, + new ScannerException("While scanning a tag", startMark, expected("' '", reader_.peek()), + reader_.mark)); + + char[] slice = reader_.sliceBuilder.finish(); + return tagToken(startMark, reader_.mark, slice, handleEnd); + } + + /// Scan a block scalar token with specified style. + Token scanBlockScalar(const ScalarStyle style) @safe + { + const startMark = reader_.mark; + + // Scan the header. + reader_.forward(); + + const indicators = scanBlockScalarIndicators(startMark); + + const chomping = indicators[0]; + const increment = indicators[1]; + scanBlockScalarIgnoredLine(startMark); + + // Determine the indentation level and go to the first non-empty line. + Mark endMark; + uint indent = max(1, indent_ + 1); + + reader_.sliceBuilder.begin(); + alias Transaction = SliceBuilder.Transaction; + // Used to strip the last line breaks written to the slice at the end of the + // scalar, which may be needed based on chomping. + Transaction breaksTransaction = Transaction(&reader_.sliceBuilder); + // Read the first indentation/line breaks before the scalar. + size_t startLen = reader_.sliceBuilder.length; + if(increment == int.min) + { + auto indentation = scanBlockScalarIndentationToSlice(); + endMark = indentation[1]; + indent = max(indent, indentation[0]); + } + else + { + indent += increment - 1; + endMark = scanBlockScalarBreaksToSlice(indent); + } + + // int.max means there's no line break (int.max is outside UTF-32). + dchar lineBreak = cast(dchar)int.max; + + // Scan the inner part of the block scalar. + while(reader_.column == indent && reader_.peekByte() != '\0') + { + breaksTransaction.commit(); + const bool leadingNonSpace = !reader_.peekByte().among!(' ', '\t'); + // This is where the 'interesting' non-whitespace data gets read. + scanToNextBreakToSlice(); + lineBreak = scanLineBreak(); + + + // This transaction serves to rollback data read in the + // scanBlockScalarBreaksToSlice() call. + breaksTransaction = Transaction(&reader_.sliceBuilder); + startLen = reader_.sliceBuilder.length; + // The line breaks should actually be written _after_ the if() block + // below. We work around that by inserting + endMark = scanBlockScalarBreaksToSlice(indent); + + // This will not run during the last iteration (see the if() vs the + // while()), hence breaksTransaction rollback (which happens after this + // loop) will never roll back data written in this if() block. + if(reader_.column == indent && reader_.peekByte() != '\0') + { + // Unfortunately, folding rules are ambiguous. + + // This is the folding according to the specification: + if(style == ScalarStyle.folded && lineBreak == '\n' && + leadingNonSpace && !reader_.peekByte().among!(' ', '\t')) + { + // No breaks were scanned; no need to insert the space in the + // middle of slice. + if(startLen == reader_.sliceBuilder.length) + { + reader_.sliceBuilder.write(' '); + } + } + else + { + // We need to insert in the middle of the slice in case any line + // breaks were scanned. + reader_.sliceBuilder.insert(lineBreak, startLen); + } + + ////this is Clark Evans's interpretation (also in the spec + ////examples): + // + //if(style == ScalarStyle.folded && lineBreak == '\n') + //{ + // if(startLen == endLen) + // { + // if(!" \t"d.canFind(reader_.peekByte())) + // { + // reader_.sliceBuilder.write(' '); + // } + // else + // { + // chunks ~= lineBreak; + // } + // } + //} + //else + //{ + // reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen); + //} + } + else + { + break; + } + } + + // If chompint is Keep, we keep (commit) the last scanned line breaks + // (which are at the end of the scalar). Otherwise re remove them (end the + // transaction). + if(chomping == Chomping.keep) { breaksTransaction.commit(); } + else { breaksTransaction.end(); } + if(chomping != Chomping.strip && lineBreak != int.max) + { + // If chomping is Keep, we keep the line break but the first line break + // that isn't stripped (since chomping isn't Strip in this branch) must + // be inserted _before_ the other line breaks. + if(chomping == Chomping.keep) + { + reader_.sliceBuilder.insert(lineBreak, startLen); + } + // If chomping is not Keep, breaksTransaction was cancelled so we can + // directly write the first line break (as it isn't stripped - chomping + // is not Strip) + else + { + reader_.sliceBuilder.write(lineBreak); + } + } + + char[] slice = reader_.sliceBuilder.finish(); + return scalarToken(startMark, endMark, slice, style); + } + + /// Scan chomping and indentation indicators of a scalar token. + Tuple!(Chomping, int) scanBlockScalarIndicators(const Mark startMark) @safe + { + auto chomping = Chomping.clip; + int increment = int.min; + dchar c = reader_.peek(); + + /// Indicators can be in any order. + if(getChomping(c, chomping)) + { + getIncrement(c, increment, startMark); + } + else + { + const gotIncrement = getIncrement(c, increment, startMark); + if(gotIncrement) { getChomping(c, chomping); } + } + + enforce(c.among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), + new ScannerException("While scanning a block scalar", startMark, + expected("chomping or indentation indicator", c), reader_.mark)); + + return tuple(chomping, increment); + } + + /// Get chomping indicator, if detected. Return false otherwise. + /// + /// Used in scanBlockScalarIndicators. + /// + /// Params: + /// + /// c = The character that may be a chomping indicator. + /// chomping = Write the chomping value here, if detected. + bool getChomping(ref dchar c, ref Chomping chomping) @safe + { + if(!c.among!('+', '-')) { return false; } + chomping = c == '+' ? Chomping.keep : Chomping.strip; + reader_.forward(); + c = reader_.peek(); + return true; + } + + /// Get increment indicator, if detected. Return false otherwise. + /// + /// Used in scanBlockScalarIndicators. + /// + /// Params: + /// + /// c = The character that may be an increment indicator. + /// If an increment indicator is detected, this will be updated to + /// the next character in the Reader. + /// increment = Write the increment value here, if detected. + /// startMark = Mark for error messages. + bool getIncrement(ref dchar c, ref int increment, const Mark startMark) @safe + { + if(!c.isDigit) { return false; } + // Convert a digit to integer. + increment = c - '0'; + assert(increment < 10 && increment >= 0, "Digit has invalid value"); + + enforce(increment > 0, + new ScannerException("While scanning a block scalar", startMark, + expected("indentation indicator in range 1-9", "0"), reader_.mark)); + + reader_.forward(); + c = reader_.peek(); + return true; + } + + /// Scan (and ignore) ignored line in a block scalar. + void scanBlockScalarIgnoredLine(const Mark startMark) @safe + { + findNextNonSpace(); + if(reader_.peekByte()== '#') { scanToNextBreak(); } + + enforce(reader_.peek().isBreak, + new ScannerException("While scanning a block scalar", startMark, + expected("comment or line break", reader_.peek()), reader_.mark)); + + scanLineBreak(); + } + + /// Scan indentation in a block scalar, returning line breaks, max indent and end mark. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + Tuple!(uint, Mark) scanBlockScalarIndentationToSlice() @safe + { + uint maxIndent; + Mark endMark = reader_.mark; + + while(reader_.peek().among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029')) + { + if(reader_.peekByte() != ' ') + { + reader_.sliceBuilder.write(scanLineBreak()); + endMark = reader_.mark; + continue; + } + reader_.forward(); + maxIndent = max(reader_.column, maxIndent); + } + + return tuple(maxIndent, endMark); + } + + /// Scan line breaks at lower or specified indentation in a block scalar. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + Mark scanBlockScalarBreaksToSlice(const uint indent) @safe + { + Mark endMark = reader_.mark; + + for(;;) + { + while(reader_.column < indent && reader_.peekByte() == ' ') { reader_.forward(); } + if(!reader_.peek().among!('\n', '\r', '\u0085', '\u2028', '\u2029')) { break; } + reader_.sliceBuilder.write(scanLineBreak()); + endMark = reader_.mark; + } + + return endMark; + } + + /// Scan a qouted flow scalar token with specified quotes. + Token scanFlowScalar(const ScalarStyle quotes) @safe + { + const startMark = reader_.mark; + const quote = reader_.get(); + + reader_.sliceBuilder.begin(); + + scanFlowScalarNonSpacesToSlice(quotes, startMark); + + while(reader_.peek() != quote) + { + scanFlowScalarSpacesToSlice(startMark); + scanFlowScalarNonSpacesToSlice(quotes, startMark); + } + reader_.forward(); + + auto slice = reader_.sliceBuilder.finish(); + return scalarToken(startMark, reader_.mark, slice, quotes); + } + + /// Scan nonspace characters in a flow scalar. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanFlowScalarNonSpacesToSlice(const ScalarStyle quotes, const Mark startMark) + @safe + { + for(;;) + { + dchar c = reader_.peek(); + + size_t numCodePoints; + while(!reader_.peek(numCodePoints).isFlowScalarBreakSpace) { ++numCodePoints; } + + if (numCodePoints > 0) { reader_.sliceBuilder.write(reader_.get(numCodePoints)); } + + c = reader_.peek(); + if(quotes == ScalarStyle.singleQuoted && c == '\'' && reader_.peek(1) == '\'') + { + reader_.forward(2); + reader_.sliceBuilder.write('\''); + } + else if((quotes == ScalarStyle.doubleQuoted && c == '\'') || + (quotes == ScalarStyle.singleQuoted && c.among!('"', '\\'))) + { + reader_.forward(); + reader_.sliceBuilder.write(c); + } + else if(quotes == ScalarStyle.doubleQuoted && c == '\\') + { + reader_.forward(); + c = reader_.peek(); + if(c.among!(escapes)) + { + reader_.forward(); + // Escaping has been moved to Parser as it can't be done in + // place (in a slice) in case of '\P' and '\L' (very uncommon, + // but we don't want to break the spec) + char[2] escapeSequence = ['\\', cast(char)c]; + reader_.sliceBuilder.write(escapeSequence); + } + else if(c.among!(escapeHexCodeList)) + { + const hexLength = dub.internal.dyaml.escapes.escapeHexLength(c); + reader_.forward(); + + foreach(i; 0 .. hexLength) { + enforce(reader_.peek(i).isHexDigit, + new ScannerException("While scanning a double quoted scalar", startMark, + expected("escape sequence of hexadecimal numbers", + reader_.peek(i)), reader_.mark)); + } + char[] hex = reader_.get(hexLength); + + enforce((hex.length > 0) && (hex.length <= 8), + new ScannerException("While scanning a double quoted scalar", startMark, + "overflow when parsing an escape sequence of " ~ + "hexadecimal numbers.", reader_.mark)); + + char[2] escapeStart = ['\\', cast(char) c]; + reader_.sliceBuilder.write(escapeStart); + reader_.sliceBuilder.write(hex); + + } + else if(c.among!('\n', '\r', '\u0085', '\u2028', '\u2029')) + { + scanLineBreak(); + scanFlowScalarBreaksToSlice(startMark); + } + else + { + throw new ScannerException("While scanning a double quoted scalar", startMark, + text("found unsupported escape character ", c), + reader_.mark); + } + } + else { return; } + } + } + + /// Scan space characters in a flow scalar. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// spaces into that slice. + void scanFlowScalarSpacesToSlice(const Mark startMark) @safe + { + // Increase length as long as we see whitespace. + size_t length; + while(reader_.peekByte(length).among!(' ', '\t')) { ++length; } + auto whitespaces = reader_.prefixBytes(length); + + // Can check the last byte without striding because '\0' is ASCII + const c = reader_.peek(length); + enforce(c != '\0', + new ScannerException("While scanning a quoted scalar", startMark, + "found unexpected end of buffer", reader_.mark)); + + // Spaces not followed by a line break. + if(!c.among!('\n', '\r', '\u0085', '\u2028', '\u2029')) + { + reader_.forward(length); + reader_.sliceBuilder.write(whitespaces); + return; + } + + // There's a line break after the spaces. + reader_.forward(length); + const lineBreak = scanLineBreak(); + + if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); } + + // If we have extra line breaks after the first, scan them into the + // slice. + const bool extraBreaks = scanFlowScalarBreaksToSlice(startMark); + + // No extra breaks, one normal line break. Replace it with a space. + if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); } + } + + /// Scan line breaks in a flow scalar. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// line breaks into that slice. + bool scanFlowScalarBreaksToSlice(const Mark startMark) @safe + { + // True if at least one line break was found. + bool anyBreaks; + for(;;) + { + // Instead of checking indentation, we check for document separators. + const prefix = reader_.prefix(3); + enforce(!(prefix == "---" || prefix == "...") || + !reader_.peek(3).isWhiteSpace, + new ScannerException("While scanning a quoted scalar", startMark, + "found unexpected document separator", reader_.mark)); + + // Skip any whitespaces. + while(reader_.peekByte().among!(' ', '\t')) { reader_.forward(); } + + // Encountered a non-whitespace non-linebreak character, so we're done. + if(!reader_.peek().among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029')) { break; } + + const lineBreak = scanLineBreak(); + anyBreaks = true; + reader_.sliceBuilder.write(lineBreak); + } + return anyBreaks; + } + + /// Scan plain scalar token (no block, no quotes). + Token scanPlain() @safe + { + // We keep track of the allowSimpleKey_ flag here. + // Indentation rules are loosed for the flow context + const startMark = reader_.mark; + Mark endMark = startMark; + const indent = indent_ + 1; + + // We allow zero indentation for scalars, but then we need to check for + // document separators at the beginning of the line. + // if(indent == 0) { indent = 1; } + + reader_.sliceBuilder.begin(); + + alias Transaction = SliceBuilder.Transaction; + Transaction spacesTransaction; + // Stop at a comment. + while(reader_.peekByte() != '#') + { + // Scan the entire plain scalar. + size_t length; + dchar c = reader_.peek(length); + for(;;) + { + const cNext = reader_.peek(length + 1); + if(c.isWhiteSpace || + (flowLevel_ == 0 && c == ':' && cNext.isWhiteSpace) || + (flowLevel_ > 0 && c.among!(',', ':', '?', '[', ']', '{', '}'))) + { + break; + } + ++length; + c = cNext; + } + + // It's not clear what we should do with ':' in the flow context. + enforce(flowLevel_ == 0 || c != ':' || + reader_.peek(length + 1).isWhiteSpace || + reader_.peek(length + 1).among!(',', '[', ']', '{', '}'), + new ScannerException("While scanning a plain scalar", startMark, + "found unexpected ':' . Please check " ~ + "http://pyyaml.org/wiki/YAMLColonInFlowContext for details.", + reader_.mark)); + + if(length == 0) { break; } + + allowSimpleKey_ = false; + + reader_.sliceBuilder.write(reader_.get(length)); + + endMark = reader_.mark; + + spacesTransaction.commit(); + spacesTransaction = Transaction(&reader_.sliceBuilder); + + const startLength = reader_.sliceBuilder.length; + scanPlainSpacesToSlice(); + if(startLength == reader_.sliceBuilder.length || + (flowLevel_ == 0 && reader_.column < indent)) + { + break; + } + } + + spacesTransaction.end(); + char[] slice = reader_.sliceBuilder.finish(); + + return scalarToken(startMark, endMark, slice, ScalarStyle.plain); + } + + /// Scan spaces in a plain scalar. + /// + /// Assumes that the caller is building a slice in Reader, and puts the spaces + /// into that slice. + void scanPlainSpacesToSlice() @safe + { + // The specification is really confusing about tabs in plain scalars. + // We just forbid them completely. Do not use tabs in YAML! + + // Get as many plain spaces as there are. + size_t length; + while(reader_.peekByte(length) == ' ') { ++length; } + char[] whitespaces = reader_.prefixBytes(length); + reader_.forward(length); + + const dchar c = reader_.peek(); + if(!c.isNSChar) + { + // We have spaces, but no newline. + if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); } + return; + } + + // Newline after the spaces (if any) + const lineBreak = scanLineBreak(); + allowSimpleKey_ = true; + + static bool end(Reader reader_) @safe pure + { + const prefix = reader_.prefix(3); + return ("---" == prefix || "..." == prefix) + && reader_.peek(3).among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + } + + if(end(reader_)) { return; } + + bool extraBreaks; + + alias Transaction = SliceBuilder.Transaction; + auto transaction = Transaction(&reader_.sliceBuilder); + if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); } + while(reader_.peek().isNSChar) + { + if(reader_.peekByte() == ' ') { reader_.forward(); } + else + { + const lBreak = scanLineBreak(); + extraBreaks = true; + reader_.sliceBuilder.write(lBreak); + + if(end(reader_)) { return; } + } + } + transaction.commit(); + + // No line breaks, only a space. + if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); } + } + + /// Scan handle of a tag token. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanTagHandleToSlice(string name)(const Mark startMark) + { + dchar c = reader_.peek(); + enum contextMsg = "While scanning a " ~ name; + enforce(c == '!', + new ScannerException(contextMsg, startMark, expected("'!'", c), reader_.mark)); + + uint length = 1; + c = reader_.peek(length); + if(c != ' ') + { + while(c.isAlphaNum || c.among!('-', '_')) + { + ++length; + c = reader_.peek(length); + } + enforce(c == '!', + new ScannerException(contextMsg, startMark, expected("'!'", c), reader_.mark)); + ++length; + } + + reader_.sliceBuilder.write(reader_.get(length)); + } + + /// Scan URI in a tag token. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanTagURIToSlice(string name)(const Mark startMark) + { + // Note: we do not check if URI is well-formed. + dchar c = reader_.peek(); + const startLen = reader_.sliceBuilder.length; + { + uint length; + while(c.isAlphaNum || c.isURIChar) + { + if(c == '%') + { + auto chars = reader_.get(length); + reader_.sliceBuilder.write(chars); + length = 0; + scanURIEscapesToSlice!name(startMark); + } + else { ++length; } + c = reader_.peek(length); + } + if(length > 0) + { + auto chars = reader_.get(length); + reader_.sliceBuilder.write(chars); + length = 0; + } + } + // OK if we scanned something, error otherwise. + enum contextMsg = "While parsing a " ~ name; + enforce(reader_.sliceBuilder.length > startLen, + new ScannerException(contextMsg, startMark, expected("URI", c), reader_.mark)); + } + + // Not @nogc yet because std.utf.decode is not @nogc + /// Scan URI escape sequences. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanURIEscapesToSlice(string name)(const Mark startMark) + { + import core.exception : UnicodeException; + // URI escapes encode a UTF-8 string. We store UTF-8 code units here for + // decoding into UTF-32. + Appender!string buffer; + + + enum contextMsg = "While scanning a " ~ name; + while(reader_.peekByte() == '%') + { + reader_.forward(); + char[2] nextByte = [reader_.peekByte(), reader_.peekByte(1)]; + + enforce(nextByte[0].isHexDigit && nextByte[1].isHexDigit, + new ScannerException(contextMsg, startMark, + expected("URI escape sequence of 2 hexadecimal " ~ + "numbers", nextByte), reader_.mark)); + + buffer ~= nextByte[].to!ubyte(16); + + reader_.forward(2); + } + try + { + foreach (dchar chr; buffer.data) + { + reader_.sliceBuilder.write(chr); + } + } + catch (UnicodeException) + { + throw new ScannerException(contextMsg, startMark, + "Invalid UTF-8 data encoded in URI escape sequence", + reader_.mark); + } + } + + + /// Scan a line break, if any. + /// + /// Transforms: + /// '\r\n' : '\n' + /// '\r' : '\n' + /// '\n' : '\n' + /// '\u0085' : '\n' + /// '\u2028' : '\u2028' + /// '\u2029 : '\u2029' + /// no break : '\0' + dchar scanLineBreak() @safe + { + // Fast path for ASCII line breaks. + const b = reader_.peekByte(); + if(b < 0x80) + { + if(b == '\n' || b == '\r') + { + if(reader_.prefix(2) == "\r\n") { reader_.forward(2); } + else { reader_.forward(); } + return '\n'; + } + return '\0'; + } + + const c = reader_.peek(); + if(c == '\x85') + { + reader_.forward(); + return '\n'; + } + if(c == '\u2028' || c == '\u2029') + { + reader_.forward(); + return c; + } + return '\0'; + } +} diff --git a/source/dub/internal/dyaml/serializer.d b/source/dub/internal/dyaml/serializer.d new file mode 100644 index 0000000..89402f4 --- /dev/null +++ b/source/dub/internal/dyaml/serializer.d @@ -0,0 +1,322 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * YAML serializer. + * Code based on PyYAML: http://www.pyyaml.org + */ +module dub.internal.dyaml.serializer; + + +import std.array; +import std.format; +import std.typecons; + +import dub.internal.dyaml.emitter; +import dub.internal.dyaml.event; +import dub.internal.dyaml.exception; +import dub.internal.dyaml.node; +import dub.internal.dyaml.resolver; +import dub.internal.dyaml.tagdirective; +import dub.internal.dyaml.token; + + +package: + +///Serializes represented YAML nodes, generating events which are then emitted by Emitter. +struct Serializer +{ + private: + ///Resolver used to determine which tags are automaticaly resolvable. + Resolver resolver_; + + ///Do all document starts have to be specified explicitly? + Flag!"explicitStart" explicitStart_; + ///Do all document ends have to be specified explicitly? + Flag!"explicitEnd" explicitEnd_; + ///YAML version string. + string YAMLVersion_; + + ///Tag directives to emit. + TagDirective[] tagDirectives_; + + //TODO Use something with more deterministic memory usage. + ///Nodes with assigned anchors. + string[Node] anchors_; + ///Nodes with assigned anchors that are already serialized. + bool[Node] serializedNodes_; + ///ID of the last anchor generated. + uint lastAnchorID_ = 0; + + public: + /** + * Construct a Serializer. + * + * Params: + * resolver = Resolver used to determine which tags are automaticaly resolvable. + * explicitStart = Do all document starts have to be specified explicitly? + * explicitEnd = Do all document ends have to be specified explicitly? + * YAMLVersion = YAML version string. + * tagDirectives = Tag directives to emit. + */ + this(Resolver resolver, + const Flag!"explicitStart" explicitStart, + const Flag!"explicitEnd" explicitEnd, string YAMLVersion, + TagDirective[] tagDirectives) @safe + { + resolver_ = resolver; + explicitStart_ = explicitStart; + explicitEnd_ = explicitEnd; + YAMLVersion_ = YAMLVersion; + tagDirectives_ = tagDirectives; + } + + ///Begin the stream. + void startStream(EmitterT)(ref EmitterT emitter) @safe + { + emitter.emit(streamStartEvent(Mark(), Mark())); + } + + ///End the stream. + void endStream(EmitterT)(ref EmitterT emitter) @safe + { + emitter.emit(streamEndEvent(Mark(), Mark())); + } + + ///Serialize a node, emitting it in the process. + void serialize(EmitterT)(ref EmitterT emitter, ref Node node) @safe + { + emitter.emit(documentStartEvent(Mark(), Mark(), explicitStart_, + YAMLVersion_, tagDirectives_)); + anchorNode(node); + serializeNode(emitter, node); + emitter.emit(documentEndEvent(Mark(), Mark(), explicitEnd_)); + serializedNodes_.destroy(); + anchors_.destroy(); + string[Node] emptyAnchors; + anchors_ = emptyAnchors; + lastAnchorID_ = 0; + } + + private: + /** + * Determine if it's a good idea to add an anchor to a node. + * + * Used to prevent associating every single repeating scalar with an + * anchor/alias - only nodes long enough can use anchors. + * + * Params: node = Node to check for anchorability. + * + * Returns: True if the node is anchorable, false otherwise. + */ + static bool anchorable(ref Node node) @safe + { + if(node.nodeID == NodeID.scalar) + { + return (node.type == NodeType.string) ? node.as!string.length > 64 : + (node.type == NodeType.binary) ? node.as!(ubyte[]).length > 64 : + false; + } + return node.length > 2; + } + + @safe unittest + { + import std.string : representation; + auto shortString = "not much"; + auto longString = "A fairly long string that would be a good idea to add an anchor to"; + auto node1 = Node(shortString); + auto node2 = Node(shortString.representation.dup); + auto node3 = Node(longString); + auto node4 = Node(longString.representation.dup); + auto node5 = Node([node1]); + auto node6 = Node([node1, node2, node3, node4]); + assert(!anchorable(node1)); + assert(!anchorable(node2)); + assert(anchorable(node3)); + assert(anchorable(node4)); + assert(!anchorable(node5)); + assert(anchorable(node6)); + } + + ///Add an anchor to the node if it's anchorable and not anchored yet. + void anchorNode(ref Node node) @safe + { + if(!anchorable(node)){return;} + + if((node in anchors_) !is null) + { + if(anchors_[node] is null) + { + anchors_[node] = generateAnchor(); + } + return; + } + + anchors_.remove(node); + final switch (node.nodeID) + { + case NodeID.mapping: + foreach(ref Node key, ref Node value; node) + { + anchorNode(key); + anchorNode(value); + } + break; + case NodeID.sequence: + foreach(ref Node item; node) + { + anchorNode(item); + } + break; + case NodeID.invalid: + assert(0); + case NodeID.scalar: + } + } + + ///Generate and return a new anchor. + string generateAnchor() @safe + { + ++lastAnchorID_; + auto appender = appender!string(); + formattedWrite(appender, "id%03d", lastAnchorID_); + return appender.data; + } + + ///Serialize a node and all its subnodes. + void serializeNode(EmitterT)(ref EmitterT emitter, ref Node node) @safe + { + //If the node has an anchor, emit an anchor (as aliasEvent) on the + //first occurrence, save it in serializedNodes_, and emit an alias + //if it reappears. + string aliased; + if(anchorable(node) && (node in anchors_) !is null) + { + aliased = anchors_[node]; + if((node in serializedNodes_) !is null) + { + emitter.emit(aliasEvent(Mark(), Mark(), aliased)); + return; + } + serializedNodes_[node] = true; + } + final switch (node.nodeID) + { + case NodeID.mapping: + const defaultTag = resolver_.defaultMappingTag; + const implicit = node.tag_ == defaultTag; + emitter.emit(mappingStartEvent(Mark(), Mark(), aliased, node.tag_, + implicit, node.collectionStyle)); + foreach(ref Node key, ref Node value; node) + { + serializeNode(emitter, key); + serializeNode(emitter, value); + } + emitter.emit(mappingEndEvent(Mark(), Mark())); + return; + case NodeID.sequence: + const defaultTag = resolver_.defaultSequenceTag; + const implicit = node.tag_ == defaultTag; + emitter.emit(sequenceStartEvent(Mark(), Mark(), aliased, node.tag_, + implicit, node.collectionStyle)); + foreach(ref Node item; node) + { + serializeNode(emitter, item); + } + emitter.emit(sequenceEndEvent(Mark(), Mark())); + return; + case NodeID.scalar: + assert(node.type == NodeType.string, "Scalar node type must be string before serialized"); + auto value = node.as!string; + const detectedTag = resolver_.resolve(NodeID.scalar, null, value, true); + const bool isDetected = node.tag_ == detectedTag; + + emitter.emit(scalarEvent(Mark(), Mark(), aliased, node.tag_, + isDetected, value.idup, node.scalarStyle)); + return; + case NodeID.invalid: + assert(0); + } + } +} + +// Issue #244 +@safe unittest +{ + import dub.internal.dyaml.dumper : dumper; + auto node = Node([ + Node.Pair( + Node(""), + Node([ + Node([ + Node.Pair( + Node("d"), + Node([ + Node([ + Node.Pair( + Node("c"), + Node("") + ), + Node.Pair( + Node("b"), + Node("") + ), + Node.Pair( + Node(""), + Node("") + ) + ]) + ]) + ), + ]), + Node([ + Node.Pair( + Node("d"), + Node([ + Node(""), + Node(""), + Node([ + Node.Pair( + Node("c"), + Node("") + ), + Node.Pair( + Node("b"), + Node("") + ), + Node.Pair( + Node(""), + Node("") + ) + ]) + ]) + ), + Node.Pair( + Node("z"), + Node("") + ), + Node.Pair( + Node(""), + Node("") + ) + ]), + Node("") + ]) + ), + Node.Pair( + Node("g"), + Node("") + ), + Node.Pair( + Node("h"), + Node("") + ), + ]); + + auto stream = appender!string(); + dumper().dump(stream, node); +} diff --git a/source/dub/internal/dyaml/stdsumtype.d b/source/dub/internal/dyaml/stdsumtype.d new file mode 100644 index 0000000..e27a734 --- /dev/null +++ b/source/dub/internal/dyaml/stdsumtype.d @@ -0,0 +1,2643 @@ +/++ + This module was copied from Phobos at commit 87c6e7e35 (2022-07-06). + This is necessary to include https://github.com/dlang/phobos/pull/8501 + which is a fix needed for DIP1000 compatibility. A couple minor changes + where also required to deal with `package(std)` imports. + +[SumType] is a generic discriminated union implementation that uses +design-by-introspection to generate safe and efficient code. Its features +include: + +* [Pattern matching.][match] +* Support for self-referential types. +* Full attribute correctness (`pure`, `@safe`, `@nogc`, and `nothrow` are + inferred whenever possible). +* A type-safe and memory-safe API compatible with DIP 1000 (`scope`). +* No dependency on runtime type information (`TypeInfo`). +* Compatibility with BetterC. + +License: Boost License 1.0 +Authors: Paul Backus +Source: $(PHOBOSSRC std/sumtype.d) ++/ +module dub.internal.dyaml.stdsumtype; + +/// $(DIVID basic-usage,$(H3 Basic usage)) +version (D_BetterC) {} else +@safe unittest +{ + import std.math : isClose; + + struct Fahrenheit { double degrees; } + struct Celsius { double degrees; } + struct Kelvin { double degrees; } + + alias Temperature = SumType!(Fahrenheit, Celsius, Kelvin); + + // Construct from any of the member types. + Temperature t1 = Fahrenheit(98.6); + Temperature t2 = Celsius(100); + Temperature t3 = Kelvin(273); + + // Use pattern matching to access the value. + Fahrenheit toFahrenheit(Temperature t) + { + return Fahrenheit( + t.match!( + (Fahrenheit f) => f.degrees, + (Celsius c) => c.degrees * 9.0/5 + 32, + (Kelvin k) => k.degrees * 9.0/5 - 459.4 + ) + ); + } + + assert(toFahrenheit(t1).degrees.isClose(98.6)); + assert(toFahrenheit(t2).degrees.isClose(212)); + assert(toFahrenheit(t3).degrees.isClose(32)); + + // Use ref to modify the value in place. + void freeze(ref Temperature t) + { + t.match!( + (ref Fahrenheit f) => f.degrees = 32, + (ref Celsius c) => c.degrees = 0, + (ref Kelvin k) => k.degrees = 273 + ); + } + + freeze(t1); + assert(toFahrenheit(t1).degrees.isClose(32)); + + // Use a catch-all handler to give a default result. + bool isFahrenheit(Temperature t) + { + return t.match!( + (Fahrenheit f) => true, + _ => false + ); + } + + assert(isFahrenheit(t1)); + assert(!isFahrenheit(t2)); + assert(!isFahrenheit(t3)); +} + +/** $(DIVID introspection-based-matching, $(H3 Introspection-based matching)) + * + * In the `length` and `horiz` functions below, the handlers for `match` do not + * specify the types of their arguments. Instead, matching is done based on how + * the argument is used in the body of the handler: any type with `x` and `y` + * properties will be matched by the `rect` handlers, and any type with `r` and + * `theta` properties will be matched by the `polar` handlers. + */ +version (D_BetterC) {} else +@safe unittest +{ + import std.math : isClose; + import std.math : cos; + import std.math : PI; + import std.math : sqrt; + + struct Rectangular { double x, y; } + struct Polar { double r, theta; } + alias Vector = SumType!(Rectangular, Polar); + + double length(Vector v) + { + return v.match!( + rect => sqrt(rect.x^^2 + rect.y^^2), + polar => polar.r + ); + } + + double horiz(Vector v) + { + return v.match!( + rect => rect.x, + polar => polar.r * cos(polar.theta) + ); + } + + Vector u = Rectangular(1, 1); + Vector v = Polar(1, PI/4); + + assert(length(u).isClose(sqrt(2.0))); + assert(length(v).isClose(1)); + assert(horiz(u).isClose(1)); + assert(horiz(v).isClose(sqrt(0.5))); +} + +/** $(DIVID arithmetic-expression-evaluator, $(H3 Arithmetic expression evaluator)) + * + * This example makes use of the special placeholder type `This` to define a + * [recursive data type](https://en.wikipedia.org/wiki/Recursive_data_type): an + * [abstract syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) for + * representing simple arithmetic expressions. + */ +version (D_BetterC) {} else +@system unittest +{ + import std.functional : partial; + import std.traits : EnumMembers; + import std.typecons : Tuple; + + enum Op : string + { + Plus = "+", + Minus = "-", + Times = "*", + Div = "/" + } + + // An expression is either + // - a number, + // - a variable, or + // - a binary operation combining two sub-expressions. + alias Expr = SumType!( + double, + string, + Tuple!(Op, "op", This*, "lhs", This*, "rhs") + ); + + // Shorthand for Tuple!(Op, "op", Expr*, "lhs", Expr*, "rhs"), + // the Tuple type above with Expr substituted for This. + alias BinOp = Expr.Types[2]; + + // Factory function for number expressions + Expr* num(double value) + { + return new Expr(value); + } + + // Factory function for variable expressions + Expr* var(string name) + { + return new Expr(name); + } + + // Factory function for binary operation expressions + Expr* binOp(Op op, Expr* lhs, Expr* rhs) + { + return new Expr(BinOp(op, lhs, rhs)); + } + + // Convenience wrappers for creating BinOp expressions + alias sum = partial!(binOp, Op.Plus); + alias diff = partial!(binOp, Op.Minus); + alias prod = partial!(binOp, Op.Times); + alias quot = partial!(binOp, Op.Div); + + // Evaluate expr, looking up variables in env + double eval(Expr expr, double[string] env) + { + return expr.match!( + (double num) => num, + (string var) => env[var], + (BinOp bop) + { + double lhs = eval(*bop.lhs, env); + double rhs = eval(*bop.rhs, env); + final switch (bop.op) + { + static foreach (op; EnumMembers!Op) + { + case op: + return mixin("lhs" ~ op ~ "rhs"); + } + } + } + ); + } + + // Return a "pretty-printed" representation of expr + string pprint(Expr expr) + { + import std.format : format; + + return expr.match!( + (double num) => "%g".format(num), + (string var) => var, + (BinOp bop) => "(%s %s %s)".format( + pprint(*bop.lhs), + cast(string) bop.op, + pprint(*bop.rhs) + ) + ); + } + + Expr* myExpr = sum(var("a"), prod(num(2), var("b"))); + double[string] myEnv = ["a":3, "b":4, "c":7]; + + assert(eval(*myExpr, myEnv) == 11); + assert(pprint(*myExpr) == "(a + (2 * b))"); +} + +import std.format : FormatSpec, singleSpec; +import std.meta : AliasSeq, Filter, IndexOf = staticIndexOf, Map = staticMap; +import std.meta : NoDuplicates; +import std.meta : anySatisfy, allSatisfy; +import std.traits : hasElaborateCopyConstructor, hasElaborateDestructor; +import std.traits : isAssignable, isCopyable, isStaticArray; +import std.traits : ConstOf, ImmutableOf, InoutOf, TemplateArgsOf; + +// FIXME: std.sumtype : `std.traits : DeducedParameterType` and `std.conv : toCtString` +// are `package(std)` but trivial, hence copied below +import std.traits : CommonType, /*DeducatedParameterType*/ Unqual; +private template DeducedParameterType(T) +{ + static if (is(T == U*, U) || is(T == U[], U)) + alias DeducedParameterType = Unqual!T; + else + alias DeducedParameterType = T; +} + +/// Compatibility with < v2.095.0 +private struct __InoutWorkaroundStruct{} +private @property T rvalueOf(T)(inout __InoutWorkaroundStruct = __InoutWorkaroundStruct.init); +private @property ref T lvalueOf(T)(inout __InoutWorkaroundStruct = __InoutWorkaroundStruct.init); +private enum isRvalueAssignable(Lhs, Rhs = Lhs) = __traits(compiles, { lvalueOf!Lhs = rvalueOf!Rhs; }); + +import std.typecons : ReplaceTypeUnless; +import std.typecons : Flag; +//import std.conv : toCtString; +private enum toCtString(ulong n) = n.stringof[0 .. $ - "LU".length]; + +/// Placeholder used to refer to the enclosing [SumType]. +struct This {} + +// True if a variable of type T can appear on the lhs of an assignment +private enum isAssignableTo(T) = + isAssignable!T || (!isCopyable!T && isRvalueAssignable!T); + +// toHash is required by the language spec to be nothrow and @safe +private enum isHashable(T) = __traits(compiles, + () nothrow @safe { hashOf(T.init); } +); + +private enum hasPostblit(T) = __traits(hasPostblit, T); + +private enum isInout(T) = is(T == inout); + +/** + * A [tagged union](https://en.wikipedia.org/wiki/Tagged_union) that can hold a + * single value from any of a specified set of types. + * + * The value in a `SumType` can be operated on using [pattern matching][match]. + * + * To avoid ambiguity, duplicate types are not allowed (but see the + * ["basic usage" example](#basic-usage) for a workaround). + * + * The special type `This` can be used as a placeholder to create + * self-referential types, just like with `Algebraic`. See the + * ["Arithmetic expression evaluator" example](#arithmetic-expression-evaluator) for + * usage. + * + * A `SumType` is initialized by default to hold the `.init` value of its + * first member type, just like a regular union. The version identifier + * `SumTypeNoDefaultCtor` can be used to disable this behavior. + * + * See_Also: $(REF Algebraic, std,variant) + */ +struct SumType(Types...) +if (is(NoDuplicates!Types == Types) && Types.length > 0) +{ + /// The types a `SumType` can hold. + alias Types = AliasSeq!( + ReplaceTypeUnless!(isSumTypeInstance, This, typeof(this), TemplateArgsOf!SumType) + ); + +private: + + enum bool canHoldTag(T) = Types.length <= T.max; + alias unsignedInts = AliasSeq!(ubyte, ushort, uint, ulong); + + alias Tag = Filter!(canHoldTag, unsignedInts)[0]; + + union Storage + { + // Workaround for https://issues.dlang.org/show_bug.cgi?id=20068 + template memberName(T) + if (IndexOf!(T, Types) >= 0) + { + enum tid = IndexOf!(T, Types); + mixin("enum memberName = `values_", toCtString!tid, "`;"); + } + + static foreach (T; Types) + { + mixin("T ", memberName!T, ";"); + } + } + + Storage storage; + Tag tag; + + /* Accesses the value stored in a SumType. + * + * This method is memory-safe, provided that: + * + * 1. A SumType's tag is always accurate. + * 2. A SumType cannot be assigned to in @safe code if that assignment + * could cause unsafe aliasing. + * + * All code that accesses a SumType's tag or storage directly, including + * @safe code in this module, must be manually checked to ensure that it + * does not violate either of the above requirements. + */ + @trusted + ref inout(T) get(T)() inout + if (IndexOf!(T, Types) >= 0) + { + enum tid = IndexOf!(T, Types); + assert(tag == tid, + "This `" ~ SumType.stringof ~ + "` does not contain a(n) `" ~ T.stringof ~ "`" + ); + return __traits(getMember, storage, Storage.memberName!T); + } + +public: + + // Workaround for https://issues.dlang.org/show_bug.cgi?id=21399 + version (StdDdoc) + { + // Dummy type to stand in for loop variable + private struct T; + + /// Constructs a `SumType` holding a specific value. + this(T value); + + /// ditto + this(const(T) value) const; + + /// ditto + this(immutable(T) value) immutable; + + /// ditto + this(Value)(Value value) inout + if (is(Value == DeducedParameterType!(inout(T)))); + } + + static foreach (tid, T; Types) + { + /// Constructs a `SumType` holding a specific value. + this(T value) + { + import core.lifetime : forward; + + static if (isCopyable!T) + { + // Workaround for https://issues.dlang.org/show_bug.cgi?id=21542 + if (__ctfe) + __traits(getMember, storage, Storage.memberName!T) = value; + else + __traits(getMember, storage, Storage.memberName!T) = forward!value; + } + else + { + __traits(getMember, storage, Storage.memberName!T) = forward!value; + } + + tag = tid; + } + + // DUB: Those traits compile work around bugs in < v2.098 + static if (!__traits(compiles, { T c = const(T).init; })) + { + static if (isCopyable!(const(T))) + { + static if (IndexOf!(const(T), Map!(ConstOf, Types)) == tid) + { + /// ditto + this(const(T) value) const + { + __traits(getMember, storage, Storage.memberName!T) = value; + tag = tid; + } + } + } + else + { + @disable this(const(T) value) const; + } + } + + static if (!__traits(compiles, { T c = immutable(T).init; })) + { + static if (isCopyable!(immutable(T))) + { + static if (IndexOf!(immutable(T), Map!(ImmutableOf, Types)) == tid) + { + /// ditto + this(immutable(T) value) immutable + { + __traits(getMember, storage, Storage.memberName!T) = value; + tag = tid; + } + } + } + else + { + @disable this(immutable(T) value) immutable; + } + } + + static if (isCopyable!(inout(T))) + { + static if (IndexOf!(inout(T), Map!(InoutOf, Types)) == tid) + { + /// ditto + this(Value)(Value value) inout + if (is(Value == DeducedParameterType!(inout(T)))) + { + __traits(getMember, storage, Storage.memberName!T) = value; + tag = tid; + } + } + } + else + { + @disable this(Value)(Value value) inout + if (is(Value == DeducedParameterType!(inout(T)))); + } + } + + static if (anySatisfy!(hasElaborateCopyConstructor, Types)) + { + static if + ( + allSatisfy!(isCopyable, Map!(InoutOf, Types)) + && !anySatisfy!(hasPostblit, Map!(InoutOf, Types)) + && allSatisfy!(isInout, Map!(InoutOf, Types)) + ) + { + /// Constructs a `SumType` that's a copy of another `SumType`. + this(ref inout(SumType) other) inout + { + storage = other.match!((ref value) { + alias OtherTypes = Map!(InoutOf, Types); + enum tid = IndexOf!(typeof(value), OtherTypes); + alias T = Types[tid]; + + mixin("inout(Storage) newStorage = { ", + Storage.memberName!T, ": value", + " };"); + + return newStorage; + }); + + tag = other.tag; + } + } + else + { + static if (allSatisfy!(isCopyable, Types)) + { + /// ditto + this(ref SumType other) + { + storage = other.match!((ref value) { + alias T = typeof(value); + + mixin("Storage newStorage = { ", + Storage.memberName!T, ": value", + " };"); + + return newStorage; + }); + + tag = other.tag; + } + } + else + { + @disable this(ref SumType other); + } + + static if (allSatisfy!(isCopyable, Map!(ConstOf, Types))) + { + /// ditto + this(ref const(SumType) other) const + { + storage = other.match!((ref value) { + alias OtherTypes = Map!(ConstOf, Types); + enum tid = IndexOf!(typeof(value), OtherTypes); + alias T = Types[tid]; + + mixin("const(Storage) newStorage = { ", + Storage.memberName!T, ": value", + " };"); + + return newStorage; + }); + + tag = other.tag; + } + } + else + { + @disable this(ref const(SumType) other) const; + } + + static if (allSatisfy!(isCopyable, Map!(ImmutableOf, Types))) + { + /// ditto + this(ref immutable(SumType) other) immutable + { + storage = other.match!((ref value) { + alias OtherTypes = Map!(ImmutableOf, Types); + enum tid = IndexOf!(typeof(value), OtherTypes); + alias T = Types[tid]; + + mixin("immutable(Storage) newStorage = { ", + Storage.memberName!T, ": value", + " };"); + + return newStorage; + }); + + tag = other.tag; + } + } + else + { + @disable this(ref immutable(SumType) other) immutable; + } + } + } + + version (SumTypeNoDefaultCtor) + { + @disable this(); + } + + // Workaround for https://issues.dlang.org/show_bug.cgi?id=21399 + version (StdDdoc) + { + // Dummy type to stand in for loop variable + private struct T; + + /** + * Assigns a value to a `SumType`. + * + * If any of the `SumType`'s members other than the one being assigned + * to contain pointers or references, it is possible for the assignment + * to cause memory corruption (see the + * ["Memory corruption" example](#memory-corruption) below for an + * illustration of how). Therefore, such assignments are considered + * `@system`. + * + * An individual assignment can be `@trusted` if the caller can + * guarantee that there are no outstanding references to any `SumType` + * members that contain pointers or references at the time the + * assignment occurs. + * + * Examples: + * + * $(DIVID memory-corruption, $(H3 Memory corruption)) + * + * This example shows how assignment to a `SumType` can be used to + * cause memory corruption in `@system` code. In `@safe` code, the + * assignment `s = 123` would not be allowed. + * + * --- + * SumType!(int*, int) s = new int; + * s.tryMatch!( + * (ref int* p) { + * s = 123; // overwrites `p` + * return *p; // undefined behavior + * } + * ); + * --- + */ + ref SumType opAssign(T rhs); + } + + static foreach (tid, T; Types) + { + static if (isAssignableTo!T) + { + /** + * Assigns a value to a `SumType`. + * + * If any of the `SumType`'s members other than the one being assigned + * to contain pointers or references, it is possible for the assignment + * to cause memory corruption (see the + * ["Memory corruption" example](#memory-corruption) below for an + * illustration of how). Therefore, such assignments are considered + * `@system`. + * + * An individual assignment can be `@trusted` if the caller can + * guarantee that there are no outstanding references to any `SumType` + * members that contain pointers or references at the time the + * assignment occurs. + * + * Examples: + * + * $(DIVID memory-corruption, $(H3 Memory corruption)) + * + * This example shows how assignment to a `SumType` can be used to + * cause memory corruption in `@system` code. In `@safe` code, the + * assignment `s = 123` would not be allowed. + * + * --- + * SumType!(int*, int) s = new int; + * s.tryMatch!( + * (ref int* p) { + * s = 123; // overwrites `p` + * return *p; // undefined behavior + * } + * ); + * --- + */ + ref SumType opAssign(T rhs) + { + import core.lifetime : forward; + import std.traits : hasIndirections, hasNested; + import std.meta : AliasSeq, Or = templateOr; + + alias OtherTypes = + AliasSeq!(Types[0 .. tid], Types[tid + 1 .. $]); + enum unsafeToOverwrite = + anySatisfy!(Or!(hasIndirections, hasNested), OtherTypes); + + static if (unsafeToOverwrite) + { + cast(void) () @system {}(); + } + + this.match!destroyIfOwner; + + static if (isCopyable!T) + { + // Workaround for https://issues.dlang.org/show_bug.cgi?id=21542 + mixin("Storage newStorage = { ", + Storage.memberName!T, ": __ctfe ? rhs : forward!rhs", + " };"); + } + else + { + mixin("Storage newStorage = { ", + Storage.memberName!T, ": forward!rhs", + " };"); + } + + storage = newStorage; + tag = tid; + + return this; + } + } + } + + static if (allSatisfy!(isAssignableTo, Types)) + { + static if (allSatisfy!(isCopyable, Types)) + { + /** + * Copies the value from another `SumType` into this one. + * + * See the value-assignment overload for details on `@safe`ty. + * + * Copy assignment is `@disable`d if any of `Types` is non-copyable. + */ + ref SumType opAssign(ref SumType rhs) + { + rhs.match!((ref value) { this = value; }); + return this; + } + } + else + { + @disable ref SumType opAssign(ref SumType rhs); + } + + /** + * Moves the value from another `SumType` into this one. + * + * See the value-assignment overload for details on `@safe`ty. + */ + ref SumType opAssign(SumType rhs) + { + import core.lifetime : move; + + rhs.match!((ref value) { + static if (isCopyable!(typeof(value))) + { + // Workaround for https://issues.dlang.org/show_bug.cgi?id=21542 + this = __ctfe ? value : move(value); + } + else + { + this = move(value); + } + }); + return this; + } + } + + /** + * Compares two `SumType`s for equality. + * + * Two `SumType`s are equal if they are the same kind of `SumType`, they + * contain values of the same type, and those values are equal. + */ + bool opEquals(this This, Rhs)(auto ref Rhs rhs) + if (!is(CommonType!(This, Rhs) == void)) + { + static if (is(This == Rhs)) + { + return AliasSeq!(this, rhs).match!((ref value, ref rhsValue) { + static if (is(typeof(value) == typeof(rhsValue))) + { + return value == rhsValue; + } + else + { + return false; + } + }); + } + else + { + alias CommonSumType = CommonType!(This, Rhs); + return cast(CommonSumType) this == cast(CommonSumType) rhs; + } + } + + // Workaround for https://issues.dlang.org/show_bug.cgi?id=19407 + static if (__traits(compiles, anySatisfy!(hasElaborateDestructor, Types))) + { + // If possible, include the destructor only when it's needed + private enum includeDtor = anySatisfy!(hasElaborateDestructor, Types); + } + else + { + // If we can't tell, always include it, even when it does nothing + private enum includeDtor = true; + } + + static if (includeDtor) + { + /// Calls the destructor of the `SumType`'s current value. + ~this() + { + this.match!destroyIfOwner; + } + } + + invariant + { + this.match!((ref value) { + static if (is(typeof(value) == class)) + { + if (value !is null) + { + assert(value); + } + } + else static if (is(typeof(value) == struct)) + { + assert(&value); + } + }); + } + + // Workaround for https://issues.dlang.org/show_bug.cgi?id=21400 + version (StdDdoc) + { + /** + * Returns a string representation of the `SumType`'s current value. + * + * Not available when compiled with `-betterC`. + */ + string toString(this This)(); + + /** + * Handles formatted writing of the `SumType`'s current value. + * + * Not available when compiled with `-betterC`. + * + * Params: + * sink = Output range to write to. + * fmt = Format specifier to use. + * + * See_Also: $(REF formatValue, std,format) + */ + void toString(this This, Sink, Char)(ref Sink sink, const ref FormatSpec!Char fmt); + } + + version (D_BetterC) {} else + /** + * Returns a string representation of the `SumType`'s current value. + * + * Not available when compiled with `-betterC`. + */ + string toString(this This)() + { + import std.conv : to; + + return this.match!(to!string); + } + + version (D_BetterC) {} else + /** + * Handles formatted writing of the `SumType`'s current value. + * + * Not available when compiled with `-betterC`. + * + * Params: + * sink = Output range to write to. + * fmt = Format specifier to use. + * + * See_Also: $(REF formatValue, std,format) + */ + void toString(this This, Sink, Char)(ref Sink sink, const ref FormatSpec!Char fmt) + { + import std.format : formatValue; + + this.match!((ref value) { + formatValue(sink, value, fmt); + }); + } + + static if (allSatisfy!(isHashable, Map!(ConstOf, Types))) + { + // Workaround for https://issues.dlang.org/show_bug.cgi?id=21400 + version (StdDdoc) + { + /** + * Returns the hash of the `SumType`'s current value. + * + * Not available when compiled with `-betterC`. + */ + size_t toHash() const; + } + + // Workaround for https://issues.dlang.org/show_bug.cgi?id=20095 + version (D_BetterC) {} else + /** + * Returns the hash of the `SumType`'s current value. + * + * Not available when compiled with `-betterC`. + */ + size_t toHash() const + { + return this.match!hashOf; + } + } +} + +// Construction +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + MySum y = MySum(3.14); +} + +// Assignment +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + x = 3.14; +} + +// Self assignment +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + MySum y = MySum(3.14); + y = x; +} + +// Equality +@safe unittest +{ + alias MySum = SumType!(int, float); + + assert(MySum(123) == MySum(123)); + assert(MySum(123) != MySum(456)); + assert(MySum(123) != MySum(123.0)); + assert(MySum(123) != MySum(456.0)); + +} + +// Equality of differently-qualified SumTypes +// Disabled in BetterC due to use of dynamic arrays +version (D_BetterC) {} else +@safe unittest +{ + alias SumA = SumType!(int, float); + alias SumB = SumType!(const(int[]), int[]); + alias SumC = SumType!(int[], const(int[])); + + int[] ma = [1, 2, 3]; + const(int[]) ca = [1, 2, 3]; + + assert(const(SumA)(123) == SumA(123)); + assert(const(SumB)(ma[]) == SumB(ca[])); + assert(const(SumC)(ma[]) == SumC(ca[])); +} + +// Imported types +@safe unittest +{ + import std.typecons : Tuple; + + alias MySum = SumType!(Tuple!(int, int)); +} + +// const and immutable types +@safe unittest +{ + alias MySum = SumType!(const(int[]), immutable(float[])); +} + +// Recursive types +@safe unittest +{ + alias MySum = SumType!(This*); + assert(is(MySum.Types[0] == MySum*)); +} + +// Allowed types +@safe unittest +{ + import std.meta : AliasSeq; + + alias MySum = SumType!(int, float, This*); + + assert(is(MySum.Types == AliasSeq!(int, float, MySum*))); +} + +// Types with destructors and postblits +@system unittest +{ + int copies; + + static struct Test + { + bool initialized = false; + int* copiesPtr; + + this(this) { (*copiesPtr)++; } + ~this() { if (initialized) (*copiesPtr)--; } + } + + alias MySum = SumType!(int, Test); + + Test t = Test(true, &copies); + + { + MySum x = t; + assert(copies == 1); + } + assert(copies == 0); + + { + MySum x = 456; + assert(copies == 0); + } + assert(copies == 0); + + { + MySum x = t; + assert(copies == 1); + x = 456; + assert(copies == 0); + } + + { + MySum x = 456; + assert(copies == 0); + x = t; + assert(copies == 1); + } + + { + MySum x = t; + MySum y = x; + assert(copies == 2); + } + + { + MySum x = t; + MySum y; + y = x; + assert(copies == 2); + } +} + +// Doesn't destroy reference types +// Disabled in BetterC due to use of classes +version (D_BetterC) {} else +@system unittest +{ + bool destroyed; + + class C + { + ~this() + { + destroyed = true; + } + } + + struct S + { + ~this() {} + } + + alias MySum = SumType!(S, C); + + C c = new C(); + { + MySum x = c; + destroyed = false; + } + assert(!destroyed); + + { + MySum x = c; + destroyed = false; + x = S(); + assert(!destroyed); + } +} + +// Types with @disable this() +@safe unittest +{ + static struct NoInit + { + @disable this(); + } + + alias MySum = SumType!(NoInit, int); + + assert(!__traits(compiles, MySum())); + auto _ = MySum(42); +} + +// const SumTypes +version (D_BetterC) {} else // not @nogc, https://issues.dlang.org/show_bug.cgi?id=22117 +@safe unittest +{ + auto _ = const(SumType!(int[]))([1, 2, 3]); +} + +// Equality of const SumTypes +@safe unittest +{ + alias MySum = SumType!int; + + auto _ = const(MySum)(123) == const(MySum)(456); +} + +// Compares reference types using value equality +@safe unittest +{ + import std.array : staticArray; + + static struct Field {} + static struct Struct { Field[] fields; } + alias MySum = SumType!Struct; + + static arr1 = staticArray([Field()]); + static arr2 = staticArray([Field()]); + + auto a = MySum(Struct(arr1[])); + auto b = MySum(Struct(arr2[])); + + assert(a == b); +} + +// toString +// Disabled in BetterC due to use of std.conv.text +version (D_BetterC) {} else +@safe unittest +{ + import std.conv : text; + + static struct Int { int i; } + static struct Double { double d; } + alias Sum = SumType!(Int, Double); + + assert(Sum(Int(42)).text == Int(42).text, Sum(Int(42)).text); + assert(Sum(Double(33.3)).text == Double(33.3).text, Sum(Double(33.3)).text); + assert((const(Sum)(Int(42))).text == (const(Int)(42)).text, (const(Sum)(Int(42))).text); +} + +// string formatting +// Disabled in BetterC due to use of std.format.format +version (D_BetterC) {} else +@safe unittest +{ + import std.format : format; + + SumType!int x = 123; + + assert(format!"%s"(x) == format!"%s"(123)); + assert(format!"%x"(x) == format!"%x"(123)); +} + +// string formatting of qualified SumTypes +// Disabled in BetterC due to use of std.format.format and dynamic arrays +version (D_BetterC) {} else +@safe unittest +{ + import std.format : format; + + int[] a = [1, 2, 3]; + const(SumType!(int[])) x = a; + + assert(format!"%(%d, %)"(x) == format!"%(%s, %)"(a)); +} + +// Github issue #16 +// Disabled in BetterC due to use of dynamic arrays +version (D_BetterC) {} else +@safe unittest +{ + alias Node = SumType!(This[], string); + + // override inference of @system attribute for cyclic functions + assert((() @trusted => + Node([Node([Node("x")])]) + == + Node([Node([Node("x")])]) + )()); +} + +// Github issue #16 with const +// Disabled in BetterC due to use of dynamic arrays +version (D_BetterC) {} else +@safe unittest +{ + alias Node = SumType!(const(This)[], string); + + // override inference of @system attribute for cyclic functions + assert((() @trusted => + Node([Node([Node("x")])]) + == + Node([Node([Node("x")])]) + )()); +} + +// Stale pointers +// Disabled in BetterC due to use of dynamic arrays +version (D_BetterC) {} else +@system unittest +{ + alias MySum = SumType!(ubyte, void*[2]); + + MySum x = [null, cast(void*) 0x12345678]; + void** p = &x.get!(void*[2])[1]; + x = ubyte(123); + + assert(*p != cast(void*) 0x12345678); +} + +// Exception-safe assignment +// Disabled in BetterC due to use of exceptions +version (D_BetterC) {} else +@safe unittest +{ + static struct A + { + int value = 123; + } + + static struct B + { + int value = 456; + this(this) { throw new Exception("oops"); } + } + + alias MySum = SumType!(A, B); + + MySum x; + try + { + x = B(); + } + catch (Exception e) {} + + assert( + (x.tag == 0 && x.get!A.value == 123) || + (x.tag == 1 && x.get!B.value == 456) + ); +} + +// Types with @disable this(this) +@safe unittest +{ + import core.lifetime : move; + + static struct NoCopy + { + @disable this(this); + } + + alias MySum = SumType!NoCopy; + + NoCopy lval = NoCopy(); + + MySum x = NoCopy(); + MySum y = NoCopy(); + + + assert(!__traits(compiles, SumType!NoCopy(lval))); + + y = NoCopy(); + y = move(x); + assert(!__traits(compiles, y = lval)); + assert(!__traits(compiles, y = x)); + + bool b = x == y; +} + +// Github issue #22 +// Disabled in BetterC due to use of std.typecons.Nullable +version (D_BetterC) {} else +@safe unittest +{ + import std.typecons; + + static struct A + { + SumType!(Nullable!int) a = Nullable!int.init; + } +} + +// Static arrays of structs with postblits +// Disabled in BetterC due to use of dynamic arrays +version (D_BetterC) {} else +@safe unittest +{ + static struct S + { + int n; + this(this) { n++; } + } + + SumType!(S[1]) x = [S(0)]; + SumType!(S[1]) y = x; + + auto xval = x.get!(S[1])[0].n; + auto yval = y.get!(S[1])[0].n; + + assert(xval != yval); +} + +// Replacement does not happen inside SumType +// Disabled in BetterC due to use of associative arrays +version (D_BetterC) {} else +@safe unittest +{ + import std.typecons : Tuple, ReplaceTypeUnless; + alias A = Tuple!(This*,SumType!(This*))[SumType!(This*,string)[This]]; + alias TR = ReplaceTypeUnless!(isSumTypeInstance, This, int, A); + static assert(is(TR == Tuple!(int*,SumType!(This*))[SumType!(This*, string)[int]])); +} + +// Supports nested self-referential SumTypes +@safe unittest +{ + import std.typecons : Tuple, Flag; + alias Nat = SumType!(Flag!"0", Tuple!(This*)); + alias Inner = SumType!Nat; + alias Outer = SumType!(Nat*, Tuple!(This*, This*)); +} + +// Self-referential SumTypes inside Algebraic +// Disabled in BetterC due to use of std.variant.Algebraic +version (D_BetterC) {} else +@safe unittest +{ + import std.variant : Algebraic; + + alias T = Algebraic!(SumType!(This*)); + + assert(is(T.AllowedTypes[0].Types[0] == T.AllowedTypes[0]*)); +} + +// Doesn't call @system postblits in @safe code +@safe unittest +{ + static struct SystemCopy { @system this(this) {} } + SystemCopy original; + + assert(!__traits(compiles, () @safe + { + SumType!SystemCopy copy = original; + })); + + assert(!__traits(compiles, () @safe + { + SumType!SystemCopy copy; copy = original; + })); +} + +// Doesn't overwrite pointers in @safe code +@safe unittest +{ + alias MySum = SumType!(int*, int); + + MySum x; + + assert(!__traits(compiles, () @safe + { + x = 123; + })); + + assert(!__traits(compiles, () @safe + { + x = MySum(123); + })); +} + +// Types with invariants +// Disabled in BetterC due to use of exceptions +version (D_BetterC) {} else +version (D_Invariants) +@system unittest +{ + import std.exception : assertThrown; + import core.exception : AssertError; + + struct S + { + int i; + invariant { assert(i >= 0); } + } + + class C + { + int i; + invariant { assert(i >= 0); } + } + + SumType!S x; + x.match!((ref v) { v.i = -1; }); + assertThrown!AssertError(assert(&x)); + + SumType!C y = new C(); + y.match!((ref v) { v.i = -1; }); + assertThrown!AssertError(assert(&y)); +} + +// Calls value postblit on self-assignment +@safe unittest +{ + static struct S + { + int n; + this(this) { n++; } + } + + SumType!S x = S(); + SumType!S y; + y = x; + + auto xval = x.get!S.n; + auto yval = y.get!S.n; + + assert(xval != yval); +} + +// Github issue #29 +@safe unittest +{ + alias A = SumType!string; + + @safe A createA(string arg) + { + return A(arg); + } + + @safe void test() + { + A a = createA(""); + } +} + +// SumTypes as associative array keys +// Disabled in BetterC due to use of associative arrays +version (D_BetterC) {} else +@safe unittest +{ + int[SumType!(int, string)] aa; +} + +// toString with non-copyable types +// Disabled in BetterC due to use of std.conv.to (in toString) +version (D_BetterC) {} else +@safe unittest +{ + struct NoCopy + { + @disable this(this); + } + + SumType!NoCopy x; + + auto _ = x.toString(); +} + +// Can use the result of assignment +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum a = MySum(123); + MySum b = MySum(3.14); + + assert((a = b) == b); + assert((a = MySum(123)) == MySum(123)); + assert((a = 3.14) == MySum(3.14)); + assert(((a = b) = MySum(123)) == MySum(123)); +} + +// Types with copy constructors +@safe unittest +{ + static struct S + { + int n; + + this(ref return scope inout S other) inout + { + n = other.n + 1; + } + } + + SumType!S x = S(); + SumType!S y = x; + + auto xval = x.get!S.n; + auto yval = y.get!S.n; + + assert(xval != yval); +} + +// Copyable by generated copy constructors +@safe unittest +{ + static struct Inner + { + ref this(ref inout Inner other) {} + } + + static struct Outer + { + SumType!Inner inner; + } + + Outer x; + Outer y = x; +} + +// Types with qualified copy constructors +@safe unittest +{ + static struct ConstCopy + { + int n; + this(inout int n) inout { this.n = n; } + this(ref const typeof(this) other) const { this.n = other.n; } + } + + static struct ImmutableCopy + { + int n; + this(inout int n) inout { this.n = n; } + this(ref immutable typeof(this) other) immutable { this.n = other.n; } + } + + const SumType!ConstCopy x = const(ConstCopy)(1); + immutable SumType!ImmutableCopy y = immutable(ImmutableCopy)(1); +} + +// Types with disabled opEquals +@safe unittest +{ + static struct S + { + @disable bool opEquals(const S rhs) const; + } + + auto _ = SumType!S(S()); +} + +// Types with non-const opEquals +@safe unittest +{ + static struct S + { + int i; + bool opEquals(S rhs) { return i == rhs.i; } + } + + auto _ = SumType!S(S(123)); +} + +// Incomparability of different SumTypes +@safe unittest +{ + SumType!(int, string) x = 123; + SumType!(string, int) y = 123; + + assert(!__traits(compiles, x != y)); +} + +// Self-reference in return/parameter type of function pointer member +// Disabled in BetterC due to use of delegates +version (D_BetterC) {} else +@safe unittest +{ + alias T = SumType!(int, This delegate(This)); +} + +// Construction and assignment from implicitly-convertible lvalue +@safe unittest +{ + alias MySum = SumType!bool; + + const(bool) b = true; + + MySum x = b; + MySum y; y = b; +} + +// @safe assignment to the only pointer type in a SumType +@safe unittest +{ + SumType!(string, int) sm = 123; + sm = "this should be @safe"; +} + +// Immutable member type with copy constructor +// https://issues.dlang.org/show_bug.cgi?id=22572 +@safe unittest +{ + static struct CopyConstruct + { + this(ref inout CopyConstruct other) inout {} + } + + static immutable struct Value + { + CopyConstruct c; + } + + SumType!Value s; +} + +// Construction of inout-qualified SumTypes +// https://issues.dlang.org/show_bug.cgi?id=22901 +@safe unittest +{ + static inout(SumType!(int[])) example(inout(int[]) arr) + { + return inout(SumType!(int[]))(arr); + } +} + +// Assignment of struct with overloaded opAssign in CTFE +// https://issues.dlang.org/show_bug.cgi?id=23182 +@safe unittest +{ + static struct HasOpAssign + { + void opAssign(HasOpAssign rhs) {} + } + + static SumType!HasOpAssign test() + { + SumType!HasOpAssign s; + // Test both overloads + s = HasOpAssign(); + s = SumType!HasOpAssign(); + return s; + } + + // Force CTFE + enum result = test(); +} + +/// True if `T` is an instance of the `SumType` template, otherwise false. +private enum bool isSumTypeInstance(T) = is(T == SumType!Args, Args...); + +@safe unittest +{ + static struct Wrapper + { + SumType!int s; + alias s this; + } + + assert(isSumTypeInstance!(SumType!int)); + assert(!isSumTypeInstance!Wrapper); +} + +/// True if `T` is a [SumType] or implicitly converts to one, otherwise false. +enum bool isSumType(T) = is(T : SumType!Args, Args...); + +/// +@safe unittest +{ + static struct ConvertsToSumType + { + SumType!int payload; + alias payload this; + } + + static struct ContainsSumType + { + SumType!int payload; + } + + assert(isSumType!(SumType!int)); + assert(isSumType!ConvertsToSumType); + assert(!isSumType!ContainsSumType); +} + +/** + * Calls a type-appropriate function with the value held in a [SumType]. + * + * For each possible type the [SumType] can hold, the given handlers are + * checked, in order, to see whether they accept a single argument of that type. + * The first one that does is chosen as the match for that type. (Note that the + * first match may not always be the most exact match. + * See ["Avoiding unintentional matches"](#avoiding-unintentional-matches) for + * one common pitfall.) + * + * Every type must have a matching handler, and every handler must match at + * least one type. This is enforced at compile time. + * + * Handlers may be functions, delegates, or objects with `opCall` overloads. If + * a function with more than one overload is given as a handler, all of the + * overloads are considered as potential matches. + * + * Templated handlers are also accepted, and will match any type for which they + * can be [implicitly instantiated](https://dlang.org/glossary.html#ifti). See + * ["Introspection-based matching"](#introspection-based-matching) for an + * example of templated handler usage. + * + * If multiple [SumType]s are passed to match, their values are passed to the + * handlers as separate arguments, and matching is done for each possible + * combination of value types. See ["Multiple dispatch"](#multiple-dispatch) for + * an example. + * + * Returns: + * The value returned from the handler that matches the currently-held type. + * + * See_Also: $(REF visit, std,variant) + */ +template match(handlers...) +{ + import std.typecons : Yes; + + /** + * The actual `match` function. + * + * Params: + * args = One or more [SumType] objects. + */ + auto ref match(SumTypes...)(auto ref SumTypes args) + if (allSatisfy!(isSumType, SumTypes) && args.length > 0) + { + return matchImpl!(Yes.exhaustive, handlers)(args); + } +} + +/** $(DIVID avoiding-unintentional-matches, $(H3 Avoiding unintentional matches)) + * + * Sometimes, implicit conversions may cause a handler to match more types than + * intended. The example below shows two solutions to this problem. + */ +@safe unittest +{ + alias Number = SumType!(double, int); + + Number x; + + // Problem: because int implicitly converts to double, the double + // handler is used for both types, and the int handler never matches. + assert(!__traits(compiles, + x.match!( + (double d) => "got double", + (int n) => "got int" + ) + )); + + // Solution 1: put the handler for the "more specialized" type (in this + // case, int) before the handler for the type it converts to. + assert(__traits(compiles, + x.match!( + (int n) => "got int", + (double d) => "got double" + ) + )); + + // Solution 2: use a template that only accepts the exact type it's + // supposed to match, instead of any type that implicitly converts to it. + alias exactly(T, alias fun) = function (arg) + { + static assert(is(typeof(arg) == T)); + return fun(arg); + }; + + // Now, even if we put the double handler first, it will only be used for + // doubles, not ints. + assert(__traits(compiles, + x.match!( + exactly!(double, d => "got double"), + exactly!(int, n => "got int") + ) + )); +} + +/** $(DIVID multiple-dispatch, $(H3 Multiple dispatch)) + * + * Pattern matching can be performed on multiple `SumType`s at once by passing + * handlers with multiple arguments. This usually leads to more concise code + * than using nested calls to `match`, as show below. + */ +@safe unittest +{ + struct Point2D { double x, y; } + struct Point3D { double x, y, z; } + + alias Point = SumType!(Point2D, Point3D); + + version (none) + { + // This function works, but the code is ugly and repetitive. + // It uses three separate calls to match! + @safe pure nothrow @nogc + bool sameDimensions(Point p1, Point p2) + { + return p1.match!( + (Point2D _) => p2.match!( + (Point2D _) => true, + _ => false + ), + (Point3D _) => p2.match!( + (Point3D _) => true, + _ => false + ) + ); + } + } + + // This version is much nicer. + @safe pure nothrow @nogc + bool sameDimensions(Point p1, Point p2) + { + alias doMatch = match!( + (Point2D _1, Point2D _2) => true, + (Point3D _1, Point3D _2) => true, + (_1, _2) => false + ); + + return doMatch(p1, p2); + } + + Point a = Point2D(1, 2); + Point b = Point2D(3, 4); + Point c = Point3D(5, 6, 7); + Point d = Point3D(8, 9, 0); + + assert( sameDimensions(a, b)); + assert( sameDimensions(c, d)); + assert(!sameDimensions(a, c)); + assert(!sameDimensions(d, b)); +} + +/** + * Attempts to call a type-appropriate function with the value held in a + * [SumType], and throws on failure. + * + * Matches are chosen using the same rules as [match], but are not required to + * be exhaustive—in other words, a type (or combination of types) is allowed to + * have no matching handler. If a type without a handler is encountered at + * runtime, a [MatchException] is thrown. + * + * Not available when compiled with `-betterC`. + * + * Returns: + * The value returned from the handler that matches the currently-held type, + * if a handler was given for that type. + * + * Throws: + * [MatchException], if the currently-held type has no matching handler. + * + * See_Also: $(REF tryVisit, std,variant) + */ +version (D_Exceptions) +template tryMatch(handlers...) +{ + import std.typecons : No; + + /** + * The actual `tryMatch` function. + * + * Params: + * args = One or more [SumType] objects. + */ + auto ref tryMatch(SumTypes...)(auto ref SumTypes args) + if (allSatisfy!(isSumType, SumTypes) && args.length > 0) + { + return matchImpl!(No.exhaustive, handlers)(args); + } +} + +/** + * Thrown by [tryMatch] when an unhandled type is encountered. + * + * Not available when compiled with `-betterC`. + */ +version (D_Exceptions) +class MatchException : Exception +{ + /// + pure @safe @nogc nothrow + this(string msg, string file = __FILE__, size_t line = __LINE__) + { + super(msg, file, line); + } +} + +/** + * True if `handler` is a potential match for `Ts`, otherwise false. + * + * See the documentation for [match] for a full explanation of how matches are + * chosen. + */ +template canMatch(alias handler, Ts...) +if (Ts.length > 0) +{ + enum canMatch = is(typeof((ref Ts args) => handler(args))); +} + +/// +@safe unittest +{ + alias handleInt = (int i) => "got an int"; + + assert( canMatch!(handleInt, int)); + assert(!canMatch!(handleInt, string)); +} + +// Includes all overloads of the given handler +@safe unittest +{ + static struct OverloadSet + { + static void fun(int n) {} + static void fun(double d) {} + } + + assert(canMatch!(OverloadSet.fun, int)); + assert(canMatch!(OverloadSet.fun, double)); +} + +// Like aliasSeqOf!(iota(n)), but works in BetterC +private template Iota(size_t n) +{ + static if (n == 0) + { + alias Iota = AliasSeq!(); + } + else + { + alias Iota = AliasSeq!(Iota!(n - 1), n - 1); + } +} + +@safe unittest +{ + assert(is(Iota!0 == AliasSeq!())); + assert(Iota!1 == AliasSeq!(0)); + assert(Iota!3 == AliasSeq!(0, 1, 2)); +} + +/* The number that the dim-th argument's tag is multiplied by when + * converting TagTuples to and from case indices ("caseIds"). + * + * Named by analogy to the stride that the dim-th index into a + * multidimensional static array is multiplied by to calculate the + * offset of a specific element. + */ +private size_t stride(size_t dim, lengths...)() +{ + import core.checkedint : mulu; + + size_t result = 1; + bool overflow = false; + + static foreach (i; 0 .. dim) + { + result = mulu(result, lengths[i], overflow); + } + + /* The largest number matchImpl uses, numCases, is calculated with + * stride!(SumTypes.length), so as long as this overflow check + * passes, we don't need to check for overflow anywhere else. + */ + assert(!overflow, "Integer overflow"); + return result; +} + +private template matchImpl(Flag!"exhaustive" exhaustive, handlers...) +{ + auto ref matchImpl(SumTypes...)(auto ref SumTypes args) + if (allSatisfy!(isSumType, SumTypes) && args.length > 0) + { + alias stride(size_t i) = .stride!(i, Map!(typeCount, SumTypes)); + alias TagTuple = .TagTuple!(SumTypes); + + /* + * A list of arguments to be passed to a handler needed for the case + * labeled with `caseId`. + */ + template handlerArgs(size_t caseId) + { + enum tags = TagTuple.fromCaseId(caseId); + enum argsFrom(size_t i : tags.length) = ""; + enum argsFrom(size_t i) = "args[" ~ toCtString!i ~ "].get!(SumTypes[" ~ toCtString!i ~ "]" ~ + ".Types[" ~ toCtString!(tags[i]) ~ "])(), " ~ argsFrom!(i + 1); + enum handlerArgs = argsFrom!0; + } + + /* An AliasSeq of the types of the member values in the argument list + * returned by `handlerArgs!caseId`. + * + * Note that these are the actual (that is, qualified) types of the + * member values, which may not be the same as the types listed in + * the arguments' `.Types` properties. + */ + template valueTypes(size_t caseId) + { + enum tags = TagTuple.fromCaseId(caseId); + + template getType(size_t i) + { + enum tid = tags[i]; + alias T = SumTypes[i].Types[tid]; + alias getType = typeof(args[i].get!T()); + } + + alias valueTypes = Map!(getType, Iota!(tags.length)); + } + + /* The total number of cases is + * + * Π SumTypes[i].Types.length for 0 ≤ i < SumTypes.length + * + * Or, equivalently, + * + * ubyte[SumTypes[0].Types.length]...[SumTypes[$-1].Types.length].sizeof + * + * Conveniently, this is equal to stride!(SumTypes.length), so we can + * use that function to compute it. + */ + enum numCases = stride!(SumTypes.length); + + /* Guaranteed to never be a valid handler index, since + * handlers.length <= size_t.max. + */ + enum noMatch = size_t.max; + + // An array that maps caseIds to handler indices ("hids"). + enum matches = () + { + size_t[numCases] matches; + + // Workaround for https://issues.dlang.org/show_bug.cgi?id=19561 + foreach (ref match; matches) + { + match = noMatch; + } + + static foreach (caseId; 0 .. numCases) + { + static foreach (hid, handler; handlers) + { + static if (canMatch!(handler, valueTypes!caseId)) + { + if (matches[caseId] == noMatch) + { + matches[caseId] = hid; + } + } + } + } + + return matches; + }(); + + import std.algorithm.searching : canFind; + + // Check for unreachable handlers + static foreach (hid, handler; handlers) + { + static assert(matches[].canFind(hid), + "`handlers[" ~ toCtString!hid ~ "]` " ~ + "of type `" ~ ( __traits(isTemplate, handler) + ? "template" + : typeof(handler).stringof + ) ~ "` " ~ + "never matches" + ); + } + + // Workaround for https://issues.dlang.org/show_bug.cgi?id=19993 + enum handlerName(size_t hid) = "handler" ~ toCtString!hid; + + static foreach (size_t hid, handler; handlers) + { + mixin("alias ", handlerName!hid, " = handler;"); + } + + immutable argsId = TagTuple(args).toCaseId; + + final switch (argsId) + { + static foreach (caseId; 0 .. numCases) + { + case caseId: + static if (matches[caseId] != noMatch) + { + return mixin(handlerName!(matches[caseId]), "(", handlerArgs!caseId, ")"); + } + else + { + static if (exhaustive) + { + static assert(false, + "No matching handler for types `" ~ valueTypes!caseId.stringof ~ "`"); + } + else + { + throw new MatchException( + "No matching handler for types `" ~ valueTypes!caseId.stringof ~ "`"); + } + } + } + } + + assert(false, "unreachable"); + } +} + +private enum typeCount(SumType) = SumType.Types.length; + +/* A TagTuple represents a single possible set of tags that `args` + * could have at runtime. + * + * Because D does not allow a struct to be the controlling expression + * of a switch statement, we cannot dispatch on the TagTuple directly. + * Instead, we must map each TagTuple to a unique integer and generate + * a case label for each of those integers. + * + * This mapping is implemented in `fromCaseId` and `toCaseId`. It uses + * the same technique that's used to map index tuples to memory offsets + * in a multidimensional static array. + * + * For example, when `args` consists of two SumTypes with two member + * types each, the TagTuples corresponding to each case label are: + * + * case 0: TagTuple([0, 0]) + * case 1: TagTuple([1, 0]) + * case 2: TagTuple([0, 1]) + * case 3: TagTuple([1, 1]) + * + * When there is only one argument, the caseId is equal to that + * argument's tag. + */ +private struct TagTuple(SumTypes...) +{ + size_t[SumTypes.length] tags; + alias tags this; + + alias stride(size_t i) = .stride!(i, Map!(typeCount, SumTypes)); + + invariant + { + static foreach (i; 0 .. tags.length) + { + assert(tags[i] < SumTypes[i].Types.length, "Invalid tag"); + } + } + + this(ref const(SumTypes) args) + { + static foreach (i; 0 .. tags.length) + { + tags[i] = args[i].tag; + } + } + + static TagTuple fromCaseId(size_t caseId) + { + TagTuple result; + + // Most-significant to least-significant + static foreach_reverse (i; 0 .. result.length) + { + result[i] = caseId / stride!i; + caseId %= stride!i; + } + + return result; + } + + size_t toCaseId() + { + size_t result; + + static foreach (i; 0 .. tags.length) + { + result += tags[i] * stride!i; + } + + return result; + } +} + +// Matching +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + MySum y = MySum(3.14); + + assert(x.match!((int v) => true, (float v) => false)); + assert(y.match!((int v) => false, (float v) => true)); +} + +// Missing handlers +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + + assert(!__traits(compiles, x.match!((int x) => true))); + assert(!__traits(compiles, x.match!())); +} + +// Handlers with qualified parameters +// Disabled in BetterC due to use of dynamic arrays +version (D_BetterC) {} else +@safe unittest +{ + alias MySum = SumType!(int[], float[]); + + MySum x = MySum([1, 2, 3]); + MySum y = MySum([1.0, 2.0, 3.0]); + + assert(x.match!((const(int[]) v) => true, (const(float[]) v) => false)); + assert(y.match!((const(int[]) v) => false, (const(float[]) v) => true)); +} + +// Handlers for qualified types +// Disabled in BetterC due to use of dynamic arrays +version (D_BetterC) {} else +@safe unittest +{ + alias MySum = SumType!(immutable(int[]), immutable(float[])); + + MySum x = MySum([1, 2, 3]); + + assert(x.match!((immutable(int[]) v) => true, (immutable(float[]) v) => false)); + assert(x.match!((const(int[]) v) => true, (const(float[]) v) => false)); + // Tail-qualified parameters + assert(x.match!((immutable(int)[] v) => true, (immutable(float)[] v) => false)); + assert(x.match!((const(int)[] v) => true, (const(float)[] v) => false)); + // Generic parameters + assert(x.match!((immutable v) => true)); + assert(x.match!((const v) => true)); + // Unqualified parameters + assert(!__traits(compiles, + x.match!((int[] v) => true, (float[] v) => false) + )); +} + +// Delegate handlers +// Disabled in BetterC due to use of closures +version (D_BetterC) {} else +@safe unittest +{ + alias MySum = SumType!(int, float); + + int answer = 42; + MySum x = MySum(42); + MySum y = MySum(3.14); + + assert(x.match!((int v) => v == answer, (float v) => v == answer)); + assert(!y.match!((int v) => v == answer, (float v) => v == answer)); +} + +version (unittest) +{ + version (D_BetterC) + { + // std.math.isClose depends on core.runtime.math, so use a + // libc-based version for testing with -betterC + @safe pure @nogc nothrow + private bool isClose(double lhs, double rhs) + { + import core.stdc.math : fabs; + + return fabs(lhs - rhs) < 1e-5; + } + } + else + { + import std.math : isClose; + } +} + +// Generic handler +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + MySum y = MySum(3.14); + + assert(x.match!(v => v*2) == 84); + assert(y.match!(v => v*2).isClose(6.28)); +} + +// Fallback to generic handler +// Disabled in BetterC due to use of std.conv.to +version (D_BetterC) {} else +@safe unittest +{ + import std.conv : to; + + alias MySum = SumType!(int, float, string); + + MySum x = MySum(42); + MySum y = MySum("42"); + + assert(x.match!((string v) => v.to!int, v => v*2) == 84); + assert(y.match!((string v) => v.to!int, v => v*2) == 42); +} + +// Multiple non-overlapping generic handlers +@safe unittest +{ + import std.array : staticArray; + + alias MySum = SumType!(int, float, int[], char[]); + + static ints = staticArray([1, 2, 3]); + static chars = staticArray(['a', 'b', 'c']); + + MySum x = MySum(42); + MySum y = MySum(3.14); + MySum z = MySum(ints[]); + MySum w = MySum(chars[]); + + assert(x.match!(v => v*2, v => v.length) == 84); + assert(y.match!(v => v*2, v => v.length).isClose(6.28)); + assert(w.match!(v => v*2, v => v.length) == 3); + assert(z.match!(v => v*2, v => v.length) == 3); +} + +// Structural matching +@safe unittest +{ + static struct S1 { int x; } + static struct S2 { int y; } + alias MySum = SumType!(S1, S2); + + MySum a = MySum(S1(0)); + MySum b = MySum(S2(0)); + + assert(a.match!(s1 => s1.x + 1, s2 => s2.y - 1) == 1); + assert(b.match!(s1 => s1.x + 1, s2 => s2.y - 1) == -1); +} + +// Separate opCall handlers +@safe unittest +{ + static struct IntHandler + { + bool opCall(int arg) + { + return true; + } + } + + static struct FloatHandler + { + bool opCall(float arg) + { + return false; + } + } + + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + MySum y = MySum(3.14); + + assert(x.match!(IntHandler.init, FloatHandler.init)); + assert(!y.match!(IntHandler.init, FloatHandler.init)); +} + +// Compound opCall handler +@safe unittest +{ + static struct CompoundHandler + { + bool opCall(int arg) + { + return true; + } + + bool opCall(float arg) + { + return false; + } + } + + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + MySum y = MySum(3.14); + + assert(x.match!(CompoundHandler.init)); + assert(!y.match!(CompoundHandler.init)); +} + +// Ordered matching +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + + assert(x.match!((int v) => true, v => false)); +} + +// Non-exhaustive matching +version (D_Exceptions) +@system unittest +{ + import std.exception : assertThrown, assertNotThrown; + + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + MySum y = MySum(3.14); + + assertNotThrown!MatchException(x.tryMatch!((int n) => true)); + assertThrown!MatchException(y.tryMatch!((int n) => true)); +} + +// Non-exhaustive matching in @safe code +version (D_Exceptions) +@safe unittest +{ + SumType!(int, float) x; + + auto _ = x.tryMatch!( + (int n) => n + 1, + ); +} + +// Handlers with ref parameters +@safe unittest +{ + alias Value = SumType!(long, double); + + auto value = Value(3.14); + + value.match!( + (long) {}, + (ref double d) { d *= 2; } + ); + + assert(value.get!double.isClose(6.28)); +} + +// Unreachable handlers +@safe unittest +{ + alias MySum = SumType!(int, string); + + MySum s; + + assert(!__traits(compiles, + s.match!( + (int _) => 0, + (string _) => 1, + (double _) => 2 + ) + )); + + assert(!__traits(compiles, + s.match!( + _ => 0, + (int _) => 1 + ) + )); +} + +// Unsafe handlers +@system unittest +{ + SumType!int x; + alias unsafeHandler = (int x) @system { return; }; + + assert(!__traits(compiles, () @safe + { + x.match!unsafeHandler; + })); + + auto test() @system + { + return x.match!unsafeHandler; + } +} + +// Overloaded handlers +@safe unittest +{ + static struct OverloadSet + { + static string fun(int i) { return "int"; } + static string fun(double d) { return "double"; } + } + + alias MySum = SumType!(int, double); + + MySum a = 42; + MySum b = 3.14; + + assert(a.match!(OverloadSet.fun) == "int"); + assert(b.match!(OverloadSet.fun) == "double"); +} + +// Overload sets that include SumType arguments +@safe unittest +{ + alias Inner = SumType!(int, double); + alias Outer = SumType!(Inner, string); + + static struct OverloadSet + { + @safe: + static string fun(int i) { return "int"; } + static string fun(double d) { return "double"; } + static string fun(string s) { return "string"; } + static string fun(Inner i) { return i.match!fun; } + static string fun(Outer o) { return o.match!fun; } + } + + Outer a = Inner(42); + Outer b = Inner(3.14); + Outer c = "foo"; + + assert(OverloadSet.fun(a) == "int"); + assert(OverloadSet.fun(b) == "double"); + assert(OverloadSet.fun(c) == "string"); +} + +// Overload sets with ref arguments +@safe unittest +{ + static struct OverloadSet + { + static void fun(ref int i) { i = 42; } + static void fun(ref double d) { d = 3.14; } + } + + alias MySum = SumType!(int, double); + + MySum x = 0; + MySum y = 0.0; + + x.match!(OverloadSet.fun); + y.match!(OverloadSet.fun); + + assert(x.match!((value) => is(typeof(value) == int) && value == 42)); + assert(y.match!((value) => is(typeof(value) == double) && value == 3.14)); +} + +// Overload sets with templates +@safe unittest +{ + import std.traits : isNumeric; + + static struct OverloadSet + { + static string fun(string arg) + { + return "string"; + } + + static string fun(T)(T arg) + if (isNumeric!T) + { + return "numeric"; + } + } + + alias MySum = SumType!(int, string); + + MySum x = 123; + MySum y = "hello"; + + assert(x.match!(OverloadSet.fun) == "numeric"); + assert(y.match!(OverloadSet.fun) == "string"); +} + +// Github issue #24 +@safe unittest +{ + void test() @nogc + { + int acc = 0; + SumType!int(1).match!((int x) => acc += x); + } +} + +// Github issue #31 +@safe unittest +{ + void test() @nogc + { + int acc = 0; + + SumType!(int, string)(1).match!( + (int x) => acc += x, + (string _) => 0, + ); + } +} + +// Types that `alias this` a SumType +@safe unittest +{ + static struct A {} + static struct B {} + static struct D { SumType!(A, B) value; alias value this; } + + auto _ = D().match!(_ => true); +} + +// Multiple dispatch +@safe unittest +{ + alias MySum = SumType!(int, string); + + static int fun(MySum x, MySum y) + { + import std.meta : Args = AliasSeq; + + return Args!(x, y).match!( + (int xv, int yv) => 0, + (string xv, int yv) => 1, + (int xv, string yv) => 2, + (string xv, string yv) => 3 + ); + } + + assert(fun(MySum(0), MySum(0)) == 0); + assert(fun(MySum(""), MySum(0)) == 1); + assert(fun(MySum(0), MySum("")) == 2); + assert(fun(MySum(""), MySum("")) == 3); +} + +// inout SumTypes +@safe unittest +{ + inout(int[]) fun(inout(SumType!(int[])) x) + { + return x.match!((inout(int[]) a) => a); + } +} + +private void destroyIfOwner(T)(ref T value) +{ + static if (hasElaborateDestructor!T) + { + destroy(value); + } +} diff --git a/source/dub/internal/dyaml/style.d b/source/dub/internal/dyaml/style.d new file mode 100644 index 0000000..c2a105c --- /dev/null +++ b/source/dub/internal/dyaml/style.d @@ -0,0 +1,37 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +///YAML node formatting styles. +module dub.internal.dyaml.style; + + +///Scalar styles. +enum ScalarStyle : ubyte +{ + /// Invalid (uninitialized) style + invalid = 0, + /// `|` (Literal block style) + literal, + /// `>` (Folded block style) + folded, + /// Plain scalar + plain, + /// Single quoted scalar + singleQuoted, + /// Double quoted scalar + doubleQuoted +} + +///Collection styles. +enum CollectionStyle : ubyte +{ + /// Invalid (uninitialized) style + invalid = 0, + /// Block style. + block, + /// Flow style. + flow +} diff --git a/source/dub/internal/dyaml/tagdirective.d b/source/dub/internal/dyaml/tagdirective.d new file mode 100644 index 0000000..0ccc0b9 --- /dev/null +++ b/source/dub/internal/dyaml/tagdirective.d @@ -0,0 +1,15 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +///Tag directives. +module dub.internal.dyaml.tagdirective; + +///Single tag directive. handle is the shortcut, prefix is the prefix that replaces it. +struct TagDirective +{ + string handle; + string prefix; +} diff --git a/source/dub/internal/dyaml/token.d b/source/dub/internal/dyaml/token.d new file mode 100644 index 0000000..8120196 --- /dev/null +++ b/source/dub/internal/dyaml/token.d @@ -0,0 +1,172 @@ + +// Copyright Ferdinand Majerech 2011-2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/// YAML tokens. +/// Code based on PyYAML: http://www.pyyaml.org +module dub.internal.dyaml.token; + + +import std.conv; + +import dub.internal.dyaml.encoding; +import dub.internal.dyaml.exception; +import dub.internal.dyaml.reader; +import dub.internal.dyaml.style; + + +package: + +/// Token types. +enum TokenID : ubyte +{ + // Invalid (uninitialized) token + invalid = 0, + directive, + documentStart, + documentEnd, + streamStart, + streamEnd, + blockSequenceStart, + blockMappingStart, + blockEnd, + flowSequenceStart, + flowMappingStart, + flowSequenceEnd, + flowMappingEnd, + key, + value, + blockEntry, + flowEntry, + alias_, + anchor, + tag, + scalar +} + +/// Specifies the type of a tag directive token. +enum DirectiveType : ubyte +{ + // YAML version directive. + yaml, + // Tag directive. + tag, + // Any other directive is "reserved" for future YAML versions. + reserved +} + +/// Token produced by scanner. +/// +/// 32 bytes on 64-bit. +struct Token +{ + @disable int opCmp(ref Token); + + // 16B + /// Value of the token, if any. + /// + /// Values are char[] instead of string, as Parser may still change them in a few + /// cases. Parser casts values to strings when producing Events. + char[] value; + // 4B + /// Start position of the token in file/stream. + Mark startMark; + // 4B + /// End position of the token in file/stream. + Mark endMark; + // 1B + /// Token type. + TokenID id; + // 1B + /// Style of scalar token, if this is a scalar token. + ScalarStyle style; + // 1B + /// Encoding, if this is a stream start token. + Encoding encoding; + // 1B + /// Type of directive for directiveToken. + DirectiveType directive; + // 4B + /// Used to split value into 2 substrings for tokens that need 2 values (tagToken) + uint valueDivider; + + /// Get string representation of the token ID. + @property string idString() @safe pure const {return id.to!string;} +} + +/// Construct a directive token. +/// +/// Params: start = Start position of the token. +/// end = End position of the token. +/// value = Value of the token. +/// directive = Directive type (YAML or TAG in YAML 1.1). +/// nameEnd = Position of the end of the name +Token directiveToken(const Mark start, const Mark end, char[] value, + DirectiveType directive, const uint nameEnd) @safe pure nothrow @nogc +{ + return Token(value, start, end, TokenID.directive, ScalarStyle.init, Encoding.init, + directive, nameEnd); +} + +/// Construct a simple (no value) token with specified type. +/// +/// Params: id = Type of the token. +/// start = Start position of the token. +/// end = End position of the token. +Token simpleToken(TokenID id)(const Mark start, const Mark end) +{ + return Token(null, start, end, id); +} + +/// Construct a stream start token. +/// +/// Params: start = Start position of the token. +/// end = End position of the token. +/// encoding = Encoding of the stream. +Token streamStartToken(const Mark start, const Mark end, const Encoding encoding) @safe pure nothrow @nogc +{ + return Token(null, start, end, TokenID.streamStart, ScalarStyle.invalid, encoding); +} + +/// Aliases for construction of simple token types. +alias streamEndToken = simpleToken!(TokenID.streamEnd); +alias blockSequenceStartToken = simpleToken!(TokenID.blockSequenceStart); +alias blockMappingStartToken = simpleToken!(TokenID.blockMappingStart); +alias blockEndToken = simpleToken!(TokenID.blockEnd); +alias keyToken = simpleToken!(TokenID.key); +alias valueToken = simpleToken!(TokenID.value); +alias blockEntryToken = simpleToken!(TokenID.blockEntry); +alias flowEntryToken = simpleToken!(TokenID.flowEntry); + +/// Construct a simple token with value with specified type. +/// +/// Params: id = Type of the token. +/// start = Start position of the token. +/// end = End position of the token. +/// value = Value of the token. +/// valueDivider = A hack for TagToken to store 2 values in value; the first +/// value goes up to valueDivider, the second after it. +Token simpleValueToken(TokenID id)(const Mark start, const Mark end, char[] value, + const uint valueDivider = uint.max) +{ + return Token(value, start, end, id, ScalarStyle.invalid, Encoding.init, + DirectiveType.init, valueDivider); +} + +/// Alias for construction of tag token. +alias tagToken = simpleValueToken!(TokenID.tag); +alias aliasToken = simpleValueToken!(TokenID.alias_); +alias anchorToken = simpleValueToken!(TokenID.anchor); + +/// Construct a scalar token. +/// +/// Params: start = Start position of the token. +/// end = End position of the token. +/// value = Value of the token. +/// style = Style of the token. +Token scalarToken(const Mark start, const Mark end, char[] value, const ScalarStyle style) @safe pure nothrow @nogc +{ + return Token(value, start, end, TokenID.scalar, style); +} diff --git a/source/dub/internal/tinyendian.d b/source/dub/internal/tinyendian.d new file mode 100644 index 0000000..d9b227a --- /dev/null +++ b/source/dub/internal/tinyendian.d @@ -0,0 +1,213 @@ +// Copyright Ferdinand Majerech 2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/// A minimal library providing functionality for changing the endianness of data. +module dub.internal.tinyendian; + +import std.system : Endian, endian; + +/// Unicode UTF encodings. +enum UTFEncoding : ubyte +{ + UTF_8, + UTF_16, + UTF_32 +} +/// +@safe unittest +{ + const ints = [314, -101]; + int[2] intsSwapBuffer = ints; + swapByteOrder(intsSwapBuffer[]); + swapByteOrder(intsSwapBuffer[]); + assert(ints == intsSwapBuffer, "Lost information when swapping byte order"); + + const floats = [3.14f, 10.1f]; + float[2] floatsSwapBuffer = floats; + swapByteOrder(floatsSwapBuffer[]); + swapByteOrder(floatsSwapBuffer[]); + assert(floats == floatsSwapBuffer, "Lost information when swapping byte order"); +} + +/** Swap byte order of items in an array in place. + * + * Params: + * + * T = Item type. Must be either 2 or 4 bytes long. + * array = Buffer with values to fix byte order of. + */ +void swapByteOrder(T)(T[] array) @trusted @nogc pure nothrow +if (T.sizeof == 2 || T.sizeof == 4) +{ + // Swap the byte order of all read characters. + foreach (ref item; array) + { + static if (T.sizeof == 2) + { + import std.algorithm.mutation : swap; + swap(*cast(ubyte*)&item, *(cast(ubyte*)&item + 1)); + } + else static if (T.sizeof == 4) + { + import core.bitop : bswap; + const swapped = bswap(*cast(uint*)&item); + item = *cast(const(T)*)&swapped; + } + else static assert(false, "Unsupported T: " ~ T.stringof); + } +} + +/// See fixUTFByteOrder. +struct FixUTFByteOrderResult +{ + ubyte[] array; + UTFEncoding encoding; + Endian endian; + uint bytesStripped = 0; +} + +/** Convert byte order of an array encoded in UTF(8/16/32) to system endianness in place. + * + * Uses the UTF byte-order-mark (BOM) to determine UTF encoding. If there is no BOM + * at the beginning of array, UTF-8 is assumed (this is compatible with ASCII). The + * BOM, if any, will be removed from the buffer. + * + * If the encoding is determined to be UTF-16 or UTF-32 and there aren't enough bytes + * for the last code unit (i.e. if array.length is odd for UTF-16 or not divisible by + * 4 for UTF-32), the extra bytes (1 for UTF-16, 1-3 for UTF-32) are stripped. + * + * Note that this function does $(B not) check if the array is a valid UTF string. It + * only works with the BOM and 1,2 or 4-byte items. + * + * Params: + * + * array = The array with UTF-data. + * + * Returns: + * + * A struct with the following members: + * + * $(D ubyte[] array) A slice of the input array containing data in correct + * byte order, without BOM and in case of UTF-16/UTF-32, + * without stripped bytes, if any. + * $(D UTFEncoding encoding) Encoding of the result (UTF-8, UTF-16 or UTF-32) + * $(D std.system.Endian endian) Endianness of the original array. + * $(D uint bytesStripped) Number of bytes stripped from a UTF-16/UTF-32 array, if + * any. This is non-zero only if array.length was not + * divisible by 2 or 4 for UTF-16 and UTF-32, respectively. + * + * Complexity: (BIGOH array.length) + */ +auto fixUTFByteOrder(ubyte[] array) @safe @nogc pure nothrow +{ + // Enumerates UTF BOMs, matching indices to byteOrderMarks/bomEndian. + enum BOM: ubyte + { + UTF_8 = 0, + UTF_16_LE = 1, + UTF_16_BE = 2, + UTF_32_LE = 3, + UTF_32_BE = 4, + None = ubyte.max + } + + // These 2 are from std.stream + static immutable ubyte[][5] byteOrderMarks = [ [0xEF, 0xBB, 0xBF], + [0xFF, 0xFE], + [0xFE, 0xFF], + [0xFF, 0xFE, 0x00, 0x00], + [0x00, 0x00, 0xFE, 0xFF] ]; + static immutable Endian[5] bomEndian = [ endian, + Endian.littleEndian, + Endian.bigEndian, + Endian.littleEndian, + Endian.bigEndian ]; + + // Documented in function ddoc. + + FixUTFByteOrderResult result; + + // Detect BOM, if any, in the bytes we've read. -1 means no BOM. + // Need the last match: First 2 bytes of UTF-32LE BOM match the UTF-16LE BOM. If we + // used the first match, UTF-16LE would be detected when we have a UTF-32LE BOM. + import std.algorithm.searching : startsWith; + BOM bomId = BOM.None; + foreach (i, bom; byteOrderMarks) + if (array.startsWith(bom)) + bomId = cast(BOM)i; + + result.endian = (bomId != BOM.None) ? bomEndian[bomId] : Endian.init; + + // Start of UTF data (after BOM, if any) + size_t start = 0; + // If we've read more than just the BOM, put the rest into the array. + with(BOM) final switch(bomId) + { + case None: result.encoding = UTFEncoding.UTF_8; break; + case UTF_8: + start = 3; + result.encoding = UTFEncoding.UTF_8; + break; + case UTF_16_LE, UTF_16_BE: + result.bytesStripped = array.length % 2; + start = 2; + result.encoding = UTFEncoding.UTF_16; + break; + case UTF_32_LE, UTF_32_BE: + result.bytesStripped = array.length % 4; + start = 4; + result.encoding = UTFEncoding.UTF_32; + break; + } + + // If there's a BOM, we need to move data back to ensure it starts at array[0] + if (start != 0) + { + array = array[start .. $ - result.bytesStripped]; + } + + // We enforce above that array.length is divisible by 2/4 for UTF-16/32 + if (endian != result.endian) + { + if (result.encoding == UTFEncoding.UTF_16) + swapByteOrder(cast(wchar[])array); + else if (result.encoding == UTFEncoding.UTF_32) + swapByteOrder(cast(dchar[])array); + } + + result.array = array; + return result; +} +/// +@safe unittest +{ + { + ubyte[] s = [0xEF, 0xBB, 0xBF, 'a']; + FixUTFByteOrderResult r = fixUTFByteOrder(s); + assert(r.encoding == UTFEncoding.UTF_8); + assert(r.array.length == 1); + assert(r.array == ['a']); + assert(r.endian == Endian.littleEndian); + } + + { + ubyte[] s = ['a']; + FixUTFByteOrderResult r = fixUTFByteOrder(s); + assert(r.encoding == UTFEncoding.UTF_8); + assert(r.array.length == 1); + assert(r.array == ['a']); + assert(r.endian == Endian.bigEndian); + } + + { + // strip 'a' b/c not complete unit + ubyte[] s = [0xFE, 0xFF, 'a']; + FixUTFByteOrderResult r = fixUTFByteOrder(s); + assert(r.encoding == UTFEncoding.UTF_16); + assert(r.array.length == 0); + assert(r.endian == Endian.bigEndian); + } + +} diff --git a/source/dub/package_.d b/source/dub/package_.d index 3303086..522425c 100644 --- a/source/dub/package_.d +++ b/source/dub/package_.d @@ -21,7 +21,7 @@ import dub.internal.vibecompat.data.json; import dub.internal.vibecompat.inet.path; -import configy.Read : StrictMode; +import dub.internal.configy.Read : StrictMode; import std.algorithm; import std.array; diff --git a/source/dub/packagemanager.d b/source/dub/packagemanager.d index 7ceedca..058953c 100644 --- a/source/dub/packagemanager.d +++ b/source/dub/packagemanager.d @@ -15,10 +15,10 @@ import dub.internal.logging; import dub.package_; import dub.recipe.io; -import configy.Exceptions; -public import configy.Read : StrictMode; +import dub.internal.configy.Exceptions; +public import dub.internal.configy.Read : StrictMode; -import dyaml.stdsumtype; +import dub.internal.dyaml.stdsumtype; import std.algorithm : countUntil, filter, map, sort, canFind, remove; import std.array; diff --git a/source/dub/project.d b/source/dub/project.d index 87487db..5c06117 100644 --- a/source/dub/project.d +++ b/source/dub/project.d @@ -20,7 +20,7 @@ import dub.packagemanager; import dub.recipe.selection; -import configy.Read; +import dub.internal.configy.Read; import std.algorithm; import std.array; diff --git a/source/dub/recipe/io.d b/source/dub/recipe/io.d index e0d647d..5fedf46 100644 --- a/source/dub/recipe/io.d +++ b/source/dub/recipe/io.d @@ -11,7 +11,7 @@ import dub.internal.logging; import dub.internal.vibecompat.core.file; import dub.internal.vibecompat.inet.path; -import configy.Read; +import dub.internal.configy.Read; /** Reads a package recipe from a file. diff --git a/source/dub/recipe/packagerecipe.d b/source/dub/recipe/packagerecipe.d index 7037f06..74ef546 100644 --- a/source/dub/recipe/packagerecipe.d +++ b/source/dub/recipe/packagerecipe.d @@ -15,7 +15,7 @@ import dub.internal.vibecompat.core.file; import dub.internal.vibecompat.inet.path; -import configy.Attributes; +import dub.internal.configy.Attributes; import std.algorithm : findSplit, sort; import std.array : join, split; @@ -193,7 +193,7 @@ */ static SubPackage fromYAML (scope ConfigParser!SubPackage p) { - import dyaml.node; + import dub.internal.dyaml.node; if (p.node.nodeID == NodeID.mapping) return SubPackage(null, p.parseAs!PackageRecipe); @@ -310,7 +310,7 @@ */ static RecipeDependency fromYAML (scope ConfigParser!RecipeDependency p) { - import dyaml.node; + import dub.internal.dyaml.node; if (p.node.nodeID == NodeID.scalar) { auto d = YAMLFormat(p.node.as!string); @@ -703,7 +703,7 @@ private T clone(T)(ref const(T) val) { - import dyaml.stdsumtype; + import dub.internal.dyaml.stdsumtype; import std.traits : isSomeString, isDynamicArray, isAssociativeArray, isBasicType, ValueType; static if (is(T == immutable)) return val; diff --git a/source/dub/recipe/selection.d b/source/dub/recipe/selection.d index ced50e4..8ad0d25 100644 --- a/source/dub/recipe/selection.d +++ b/source/dub/recipe/selection.d @@ -6,7 +6,7 @@ import dub.dependency; import dub.internal.vibecompat.core.file : NativePath; -import configy.Attributes; +import dub.internal.configy.Attributes; import std.exception; @@ -43,7 +43,7 @@ /// Read a `Dependency` from the config file - Required to support both short and long form static SelectedDependency fromYAML (scope ConfigParser!SelectedDependency p) { - import dyaml.node; + import dub.internal.dyaml.node; if (p.node.nodeID == NodeID.scalar) return SelectedDependency(Dependency(Version(p.node.as!string))); @@ -84,7 +84,7 @@ // Ensure we can read all type of dependencies unittest { - import configy.Read : parseConfigString; + import dub.internal.configy.Read : parseConfigString; import dub.internal.vibecompat.core.file : NativePath; immutable string content = `{ diff --git a/source/dyaml/composer.d b/source/dyaml/composer.d deleted file mode 100644 index 6d0692a..0000000 --- a/source/dyaml/composer.d +++ /dev/null @@ -1,402 +0,0 @@ - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -/** - * Composes nodes from YAML events provided by parser. - * Code based on PyYAML: http://www.pyyaml.org - */ -module dyaml.composer; - -import core.memory; - -import std.algorithm; -import std.array; -import std.conv; -import std.exception; -import std.format; -import std.range; -import std.typecons; - -import dyaml.constructor; -import dyaml.event; -import dyaml.exception; -import dyaml.node; -import dyaml.parser; -import dyaml.resolver; - - -package: -/** - * Exception thrown at composer errors. - * - * See_Also: MarkedYAMLException - */ -class ComposerException : MarkedYAMLException -{ - mixin MarkedExceptionCtors; -} - -///Composes YAML documents from events provided by a Parser. -struct Composer -{ - private: - ///Parser providing YAML events. - Parser parser_; - ///Resolver resolving tags (data types). - Resolver resolver_; - ///Nodes associated with anchors. Used by YAML aliases. - Node[string] anchors_; - - ///Used to reduce allocations when creating pair arrays. - /// - ///We need one appender for each nesting level that involves - ///a pair array, as the inner levels are processed as a - ///part of the outer levels. Used as a stack. - Appender!(Node.Pair[])[] pairAppenders_; - ///Used to reduce allocations when creating node arrays. - /// - ///We need one appender for each nesting level that involves - ///a node array, as the inner levels are processed as a - ///part of the outer levels. Used as a stack. - Appender!(Node[])[] nodeAppenders_; - - public: - /** - * Construct a composer. - * - * Params: parser = Parser to provide YAML events. - * resolver = Resolver to resolve tags (data types). - */ - this(Parser parser, Resolver resolver) @safe - { - parser_ = parser; - resolver_ = resolver; - } - - /** - * Determine if there are any nodes left. - * - * Must be called before loading as it handles the stream start event. - */ - bool checkNode() @safe - { - // If next event is stream start, skip it - parser_.skipOver!"a.id == b"(EventID.streamStart); - - //True if there are more documents available. - return parser_.front.id != EventID.streamEnd; - } - - ///Get a YAML document as a node (the root of the document). - Node getNode() @safe - { - //Get the root node of the next document. - assert(parser_.front.id != EventID.streamEnd, - "Trying to get a node from Composer when there is no node to " ~ - "get. use checkNode() to determine if there is a node."); - - return composeDocument(); - } - - private: - - void skipExpected(const EventID id) @safe - { - const foundExpected = parser_.skipOver!"a.id == b"(id); - assert(foundExpected, text("Expected ", id, " not found.")); - } - ///Ensure that appenders for specified nesting levels exist. - /// - ///Params: pairAppenderLevel = Current level in the pair appender stack. - /// nodeAppenderLevel = Current level the node appender stack. - void ensureAppendersExist(const uint pairAppenderLevel, const uint nodeAppenderLevel) - @safe - { - while(pairAppenders_.length <= pairAppenderLevel) - { - pairAppenders_ ~= appender!(Node.Pair[])(); - } - while(nodeAppenders_.length <= nodeAppenderLevel) - { - nodeAppenders_ ~= appender!(Node[])(); - } - } - - ///Compose a YAML document and return its root node. - Node composeDocument() @safe - { - skipExpected(EventID.documentStart); - - //Compose the root node. - Node node = composeNode(0, 0); - - skipExpected(EventID.documentEnd); - - anchors_.destroy(); - return node; - } - - /// Compose a node. - /// - /// Params: pairAppenderLevel = Current level of the pair appender stack. - /// nodeAppenderLevel = Current level of the node appender stack. - Node composeNode(const uint pairAppenderLevel, const uint nodeAppenderLevel) @safe - { - if(parser_.front.id == EventID.alias_) - { - const event = parser_.front; - parser_.popFront(); - const anchor = event.anchor; - enforce((anchor in anchors_) !is null, - new ComposerException("Found undefined alias: " ~ anchor, - event.startMark)); - - //If the node referenced by the anchor is uninitialized, - //it's not finished, i.e. we're currently composing it - //and trying to use it recursively here. - enforce(anchors_[anchor] != Node(), - new ComposerException("Found recursive alias: " ~ anchor, - event.startMark)); - - return anchors_[anchor]; - } - - const event = parser_.front; - const anchor = event.anchor; - if((anchor !is null) && (anchor in anchors_) !is null) - { - throw new ComposerException("Found duplicate anchor: " ~ anchor, - event.startMark); - } - - Node result; - //Associate the anchor, if any, with an uninitialized node. - //used to detect duplicate and recursive anchors. - if(anchor !is null) - { - anchors_[anchor] = Node(); - } - - switch (parser_.front.id) - { - case EventID.scalar: - result = composeScalarNode(); - break; - case EventID.sequenceStart: - result = composeSequenceNode(pairAppenderLevel, nodeAppenderLevel); - break; - case EventID.mappingStart: - result = composeMappingNode(pairAppenderLevel, nodeAppenderLevel); - break; - default: assert(false, "This code should never be reached"); - } - - if(anchor !is null) - { - anchors_[anchor] = result; - } - return result; - } - - ///Compose a scalar node. - Node composeScalarNode() @safe - { - const event = parser_.front; - parser_.popFront(); - const tag = resolver_.resolve(NodeID.scalar, event.tag, event.value, - event.implicit); - - Node node = constructNode(event.startMark, event.endMark, tag, - event.value); - node.scalarStyle = event.scalarStyle; - - return node; - } - - /// Compose a sequence node. - /// - /// Params: pairAppenderLevel = Current level of the pair appender stack. - /// nodeAppenderLevel = Current level of the node appender stack. - Node composeSequenceNode(const uint pairAppenderLevel, const uint nodeAppenderLevel) - @safe - { - ensureAppendersExist(pairAppenderLevel, nodeAppenderLevel); - auto nodeAppender = &(nodeAppenders_[nodeAppenderLevel]); - - const startEvent = parser_.front; - parser_.popFront(); - const tag = resolver_.resolve(NodeID.sequence, startEvent.tag, null, - startEvent.implicit); - - while(parser_.front.id != EventID.sequenceEnd) - { - nodeAppender.put(composeNode(pairAppenderLevel, nodeAppenderLevel + 1)); - } - - Node node = constructNode(startEvent.startMark, parser_.front.endMark, - tag, nodeAppender.data.dup); - node.collectionStyle = startEvent.collectionStyle; - parser_.popFront(); - nodeAppender.clear(); - - return node; - } - - /** - * Flatten a node, merging it with nodes referenced through YAMLMerge data type. - * - * Node must be a mapping or a sequence of mappings. - * - * Params: root = Node to flatten. - * startMark = Start position of the node. - * endMark = End position of the node. - * pairAppenderLevel = Current level of the pair appender stack. - * nodeAppenderLevel = Current level of the node appender stack. - * - * Returns: Flattened mapping as pairs. - */ - Node.Pair[] flatten(ref Node root, const Mark startMark, const Mark endMark, - const uint pairAppenderLevel, const uint nodeAppenderLevel) @safe - { - void error(Node node) - { - //this is Composer, but the code is related to Constructor. - throw new ConstructorException("While constructing a mapping, " ~ - "expected a mapping or a list of " ~ - "mappings for merging, but found: " ~ - text(node.type) ~ - " NOTE: line/column shows topmost parent " ~ - "to which the content is being merged", - startMark, endMark); - } - - ensureAppendersExist(pairAppenderLevel, nodeAppenderLevel); - auto pairAppender = &(pairAppenders_[pairAppenderLevel]); - - final switch (root.nodeID) - { - case NodeID.mapping: - Node[] toMerge; - toMerge.reserve(root.length); - foreach (ref Node key, ref Node value; root) - { - if(key.type == NodeType.merge) - { - toMerge ~= value; - } - else - { - auto temp = Node.Pair(key, value); - pairAppender.put(temp); - } - } - foreach (node; toMerge) - { - pairAppender.put(flatten(node, startMark, endMark, - pairAppenderLevel + 1, nodeAppenderLevel)); - } - break; - case NodeID.sequence: - foreach (ref Node node; root) - { - if (node.nodeID != NodeID.mapping) - { - error(node); - } - pairAppender.put(flatten(node, startMark, endMark, - pairAppenderLevel + 1, nodeAppenderLevel)); - } - break; - case NodeID.scalar: - case NodeID.invalid: - error(root); - break; - } - - auto flattened = pairAppender.data.dup; - pairAppender.clear(); - - return flattened; - } - - /// Compose a mapping node. - /// - /// Params: pairAppenderLevel = Current level of the pair appender stack. - /// nodeAppenderLevel = Current level of the node appender stack. - Node composeMappingNode(const uint pairAppenderLevel, const uint nodeAppenderLevel) - @safe - { - ensureAppendersExist(pairAppenderLevel, nodeAppenderLevel); - const startEvent = parser_.front; - parser_.popFront(); - const tag = resolver_.resolve(NodeID.mapping, startEvent.tag, null, - startEvent.implicit); - auto pairAppender = &(pairAppenders_[pairAppenderLevel]); - - Tuple!(Node, Mark)[] toMerge; - while(parser_.front.id != EventID.mappingEnd) - { - auto pair = Node.Pair(composeNode(pairAppenderLevel + 1, nodeAppenderLevel), - composeNode(pairAppenderLevel + 1, nodeAppenderLevel)); - - //Need to flatten and merge the node referred by YAMLMerge. - if(pair.key.type == NodeType.merge) - { - toMerge ~= tuple(pair.value, cast(Mark)parser_.front.endMark); - } - //Not YAMLMerge, just add the pair. - else - { - pairAppender.put(pair); - } - } - foreach(node; toMerge) - { - merge(*pairAppender, flatten(node[0], startEvent.startMark, node[1], - pairAppenderLevel + 1, nodeAppenderLevel)); - } - - auto sorted = pairAppender.data.dup.sort!((x,y) => x.key > y.key); - if (sorted.length) { - foreach (index, const ref value; sorted[0 .. $ - 1].enumerate) - if (value.key == sorted[index + 1].key) { - const message = () @trusted { - return format("Key '%s' appears multiple times in mapping (first: %s)", - value.key.get!string, value.key.startMark); - }(); - throw new ComposerException(message, sorted[index + 1].key.startMark); - } - } - - Node node = constructNode(startEvent.startMark, parser_.front.endMark, - tag, pairAppender.data.dup); - node.collectionStyle = startEvent.collectionStyle; - parser_.popFront(); - - pairAppender.clear(); - return node; - } -} - -// Provide good error message on multiple keys (which JSON supports) -// DUB: This unittest is `@safe` from v2.100 as `message` was made `@safe`, not before -unittest -{ - import dyaml.loader : Loader; - - const str = `{ - "comment": "This is a common technique", - "name": "foobar", - "comment": "To write down comments pre-JSON5" -}`; - - try - auto node = Loader.fromString(str).load(); - catch (ComposerException exc) - assert(exc.message() == - "Key 'comment' appears multiple times in mapping " ~ - "(first: file ,line 2,column 5)\nfile ,line 4,column 5"); -} diff --git a/source/dyaml/constructor.d b/source/dyaml/constructor.d deleted file mode 100644 index 4cd1546..0000000 --- a/source/dyaml/constructor.d +++ /dev/null @@ -1,611 +0,0 @@ - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -/** - * Class that processes YAML mappings, sequences and scalars into nodes. - * This can be used to add custom data types. A tutorial can be found - * $(LINK2 https://dlang-community.github.io/D-YAML/, here). - */ -module dyaml.constructor; - - -import std.array; -import std.algorithm; -import std.base64; -import std.container; -import std.conv; -import std.datetime; -import std.exception; -import std.regex; -import std.string; -import std.typecons; -import std.utf; - -import dyaml.node; -import dyaml.exception; -import dyaml.style; - -package: - -// Exception thrown at constructor errors. -class ConstructorException : YAMLException -{ - /// Construct a ConstructorException. - /// - /// Params: msg = Error message. - /// start = Start position of the error context. - /// end = End position of the error context. - this(string msg, Mark start, Mark end, string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow - { - super(msg ~ "\nstart: " ~ start.toString() ~ "\nend: " ~ end.toString(), - file, line); - } -} - -/** Constructs YAML values. - * - * Each YAML scalar, sequence or mapping has a tag specifying its data type. - * Constructor uses user-specifyable functions to create a node of desired - * data type from a scalar, sequence or mapping. - * - * - * Each of these functions is associated with a tag, and can process either - * a scalar, a sequence, or a mapping. The constructor passes each value to - * the function with corresponding tag, which then returns the resulting value - * that can be stored in a node. - * - * If a tag is detected with no known constructor function, it is considered an error. - */ -/* - * Construct a node. - * - * Params: start = Start position of the node. - * end = End position of the node. - * tag = Tag (data type) of the node. - * value = Value to construct node from (string, nodes or pairs). - * style = Style of the node (scalar or collection style). - * - * Returns: Constructed node. - */ -Node constructNode(T)(const Mark start, const Mark end, const string tag, - T value) @safe - if((is(T : string) || is(T == Node[]) || is(T == Node.Pair[]))) -{ - Node newNode; - try - { - switch(tag) - { - case "tag:yaml.org,2002:null": - newNode = Node(YAMLNull(), tag); - break; - case "tag:yaml.org,2002:bool": - static if(is(T == string)) - { - newNode = Node(constructBool(value), tag); - break; - } - else throw new Exception("Only scalars can be bools"); - case "tag:yaml.org,2002:int": - static if(is(T == string)) - { - newNode = Node(constructLong(value), tag); - break; - } - else throw new Exception("Only scalars can be ints"); - case "tag:yaml.org,2002:float": - static if(is(T == string)) - { - newNode = Node(constructReal(value), tag); - break; - } - else throw new Exception("Only scalars can be floats"); - case "tag:yaml.org,2002:binary": - static if(is(T == string)) - { - newNode = Node(constructBinary(value), tag); - break; - } - else throw new Exception("Only scalars can be binary data"); - case "tag:yaml.org,2002:timestamp": - static if(is(T == string)) - { - newNode = Node(constructTimestamp(value), tag); - break; - } - else throw new Exception("Only scalars can be timestamps"); - case "tag:yaml.org,2002:str": - static if(is(T == string)) - { - newNode = Node(constructString(value), tag); - break; - } - else throw new Exception("Only scalars can be strings"); - case "tag:yaml.org,2002:value": - static if(is(T == string)) - { - newNode = Node(constructString(value), tag); - break; - } - else throw new Exception("Only scalars can be values"); - case "tag:yaml.org,2002:omap": - static if(is(T == Node[])) - { - newNode = Node(constructOrderedMap(value), tag); - break; - } - else throw new Exception("Only sequences can be ordered maps"); - case "tag:yaml.org,2002:pairs": - static if(is(T == Node[])) - { - newNode = Node(constructPairs(value), tag); - break; - } - else throw new Exception("Only sequences can be pairs"); - case "tag:yaml.org,2002:set": - static if(is(T == Node.Pair[])) - { - newNode = Node(constructSet(value), tag); - break; - } - else throw new Exception("Only mappings can be sets"); - case "tag:yaml.org,2002:seq": - static if(is(T == Node[])) - { - newNode = Node(constructSequence(value), tag); - break; - } - else throw new Exception("Only sequences can be sequences"); - case "tag:yaml.org,2002:map": - static if(is(T == Node.Pair[])) - { - newNode = Node(constructMap(value), tag); - break; - } - else throw new Exception("Only mappings can be maps"); - case "tag:yaml.org,2002:merge": - newNode = Node(YAMLMerge(), tag); - break; - default: - newNode = Node(value, tag); - break; - } - } - catch(Exception e) - { - throw new ConstructorException("Error constructing " ~ typeid(T).toString() - ~ ":\n" ~ e.msg, start, end); - } - - newNode.startMark_ = start; - - return newNode; -} - -private: -// Construct a boolean _node. -bool constructBool(const string str) @safe -{ - string value = str.toLower(); - if(value.among!("yes", "true", "on")){return true;} - if(value.among!("no", "false", "off")){return false;} - throw new Exception("Unable to parse boolean value: " ~ value); -} - -// Construct an integer (long) _node. -long constructLong(const string str) @safe -{ - string value = str.replace("_", ""); - const char c = value[0]; - const long sign = c != '-' ? 1 : -1; - if(c == '-' || c == '+') - { - value = value[1 .. $]; - } - - enforce(value != "", new Exception("Unable to parse float value: " ~ value)); - - long result; - try - { - //Zero. - if(value == "0") {result = cast(long)0;} - //Binary. - else if(value.startsWith("0b")){result = sign * to!int(value[2 .. $], 2);} - //Hexadecimal. - else if(value.startsWith("0x")){result = sign * to!int(value[2 .. $], 16);} - //Octal. - else if(value[0] == '0') {result = sign * to!int(value, 8);} - //Sexagesimal. - else if(value.canFind(":")) - { - long val; - long base = 1; - foreach_reverse(digit; value.split(":")) - { - val += to!long(digit) * base; - base *= 60; - } - result = sign * val; - } - //Decimal. - else{result = sign * to!long(value);} - } - catch(ConvException e) - { - throw new Exception("Unable to parse integer value: " ~ value); - } - - return result; -} -@safe unittest -{ - string canonical = "685230"; - string decimal = "+685_230"; - string octal = "02472256"; - string hexadecimal = "0x_0A_74_AE"; - string binary = "0b1010_0111_0100_1010_1110"; - string sexagesimal = "190:20:30"; - - assert(685230 == constructLong(canonical)); - assert(685230 == constructLong(decimal)); - assert(685230 == constructLong(octal)); - assert(685230 == constructLong(hexadecimal)); - assert(685230 == constructLong(binary)); - assert(685230 == constructLong(sexagesimal)); -} - -// Construct a floating point (real) _node. -real constructReal(const string str) @safe -{ - string value = str.replace("_", "").toLower(); - const char c = value[0]; - const real sign = c != '-' ? 1.0 : -1.0; - if(c == '-' || c == '+') - { - value = value[1 .. $]; - } - - enforce(value != "" && value != "nan" && value != "inf" && value != "-inf", - new Exception("Unable to parse float value: " ~ value)); - - real result; - try - { - //Infinity. - if (value == ".inf"){result = sign * real.infinity;} - //Not a Number. - else if(value == ".nan"){result = real.nan;} - //Sexagesimal. - else if(value.canFind(":")) - { - real val = 0.0; - real base = 1.0; - foreach_reverse(digit; value.split(":")) - { - val += to!real(digit) * base; - base *= 60.0; - } - result = sign * val; - } - //Plain floating point. - else{result = sign * to!real(value);} - } - catch(ConvException e) - { - throw new Exception("Unable to parse float value: \"" ~ value ~ "\""); - } - - return result; -} -@safe unittest -{ - bool eq(real a, real b, real epsilon = 0.2) @safe - { - return a >= (b - epsilon) && a <= (b + epsilon); - } - - string canonical = "6.8523015e+5"; - string exponential = "685.230_15e+03"; - string fixed = "685_230.15"; - string sexagesimal = "190:20:30.15"; - string negativeInf = "-.inf"; - string NaN = ".NaN"; - - assert(eq(685230.15, constructReal(canonical))); - assert(eq(685230.15, constructReal(exponential))); - assert(eq(685230.15, constructReal(fixed))); - assert(eq(685230.15, constructReal(sexagesimal))); - assert(eq(-real.infinity, constructReal(negativeInf))); - assert(to!string(constructReal(NaN)) == "nan"); -} - -// Construct a binary (base64) _node. -ubyte[] constructBinary(const string value) @safe -{ - import std.ascii : newline; - import std.array : array; - - // For an unknown reason, this must be nested to work (compiler bug?). - try - { - return Base64.decode(value.representation.filter!(c => !newline.canFind(c)).array); - } - catch(Base64Exception e) - { - throw new Exception("Unable to decode base64 value: " ~ e.msg); - } -} - -@safe unittest -{ - auto test = "The Answer: 42".representation; - char[] buffer; - buffer.length = 256; - string input = Base64.encode(test, buffer).idup; - const value = constructBinary(input); - assert(value == test); - assert(value == [84, 104, 101, 32, 65, 110, 115, 119, 101, 114, 58, 32, 52, 50]); -} - -// Construct a timestamp (SysTime) _node. -SysTime constructTimestamp(const string str) @safe -{ - string value = str; - - auto YMDRegexp = regex("^([0-9][0-9][0-9][0-9])-([0-9][0-9]?)-([0-9][0-9]?)"); - auto HMSRegexp = regex("^[Tt \t]+([0-9][0-9]?):([0-9][0-9]):([0-9][0-9])(\\.[0-9]*)?"); - auto TZRegexp = regex("^[ \t]*Z|([-+][0-9][0-9]?)(:[0-9][0-9])?"); - - try - { - // First, get year, month and day. - auto matches = match(value, YMDRegexp); - - enforce(!matches.empty, - new Exception("Unable to parse timestamp value: " ~ value)); - - auto captures = matches.front.captures; - const year = to!int(captures[1]); - const month = to!int(captures[2]); - const day = to!int(captures[3]); - - // If available, get hour, minute, second and fraction, if present. - value = matches.front.post; - matches = match(value, HMSRegexp); - if(matches.empty) - { - return SysTime(DateTime(year, month, day), UTC()); - } - - captures = matches.front.captures; - const hour = to!int(captures[1]); - const minute = to!int(captures[2]); - const second = to!int(captures[3]); - const hectonanosecond = cast(int)(to!real("0" ~ captures[4]) * 10_000_000); - - // If available, get timezone. - value = matches.front.post; - matches = match(value, TZRegexp); - if(matches.empty || matches.front.captures[0] == "Z") - { - // No timezone. - return SysTime(DateTime(year, month, day, hour, minute, second), - hectonanosecond.dur!"hnsecs", UTC()); - } - - // We have a timezone, so parse it. - captures = matches.front.captures; - int sign = 1; - int tzHours; - if(!captures[1].empty) - { - if(captures[1][0] == '-') {sign = -1;} - tzHours = to!int(captures[1][1 .. $]); - } - const tzMinutes = (!captures[2].empty) ? to!int(captures[2][1 .. $]) : 0; - const tzOffset = dur!"minutes"(sign * (60 * tzHours + tzMinutes)); - - return SysTime(DateTime(year, month, day, hour, minute, second), - hectonanosecond.dur!"hnsecs", - new immutable SimpleTimeZone(tzOffset)); - } - catch(ConvException e) - { - throw new Exception("Unable to parse timestamp value " ~ value ~ " : " ~ e.msg); - } - catch(DateTimeException e) - { - throw new Exception("Invalid timestamp value " ~ value ~ " : " ~ e.msg); - } - - assert(false, "This code should never be reached"); -} -@safe unittest -{ - string timestamp(string value) - { - return constructTimestamp(value).toISOString(); - } - - string canonical = "2001-12-15T02:59:43.1Z"; - string iso8601 = "2001-12-14t21:59:43.10-05:00"; - string spaceSeparated = "2001-12-14 21:59:43.10 -5"; - string noTZ = "2001-12-15 2:59:43.10"; - string noFraction = "2001-12-15 2:59:43"; - string ymd = "2002-12-14"; - - assert(timestamp(canonical) == "20011215T025943.1Z"); - //avoiding float conversion errors - assert(timestamp(iso8601) == "20011214T215943.0999999-05:00" || - timestamp(iso8601) == "20011214T215943.1-05:00"); - assert(timestamp(spaceSeparated) == "20011214T215943.0999999-05:00" || - timestamp(spaceSeparated) == "20011214T215943.1-05:00"); - assert(timestamp(noTZ) == "20011215T025943.0999999Z" || - timestamp(noTZ) == "20011215T025943.1Z"); - assert(timestamp(noFraction) == "20011215T025943Z"); - assert(timestamp(ymd) == "20021214T000000Z"); -} - -// Construct a string _node. -string constructString(const string str) @safe -{ - return str; -} - -// Convert a sequence of single-element mappings into a sequence of pairs. -Node.Pair[] getPairs(string type, const Node[] nodes) @safe -{ - Node.Pair[] pairs; - pairs.reserve(nodes.length); - foreach(node; nodes) - { - enforce(node.nodeID == NodeID.mapping && node.length == 1, - new Exception("While constructing " ~ type ~ - ", expected a mapping with single element")); - - pairs ~= node.as!(Node.Pair[]); - } - - return pairs; -} - -// Construct an ordered map (ordered sequence of key:value pairs without duplicates) _node. -Node.Pair[] constructOrderedMap(const Node[] nodes) @safe -{ - auto pairs = getPairs("ordered map", nodes); - - //Detect duplicates. - //TODO this should be replaced by something with deterministic memory allocation. - auto keys = new RedBlackTree!Node(); - foreach(ref pair; pairs) - { - enforce(!(pair.key in keys), - new Exception("Duplicate entry in an ordered map: " - ~ pair.key.debugString())); - keys.insert(pair.key); - } - return pairs; -} -@safe unittest -{ - Node[] alternateTypes(uint length) @safe - { - Node[] pairs; - foreach(long i; 0 .. length) - { - auto pair = (i % 2) ? Node.Pair(i.to!string, i) : Node.Pair(i, i.to!string); - pairs ~= Node([pair]); - } - return pairs; - } - - Node[] sameType(uint length) @safe - { - Node[] pairs; - foreach(long i; 0 .. length) - { - auto pair = Node.Pair(i.to!string, i); - pairs ~= Node([pair]); - } - return pairs; - } - - assertThrown(constructOrderedMap(alternateTypes(8) ~ alternateTypes(2))); - assertNotThrown(constructOrderedMap(alternateTypes(8))); - assertThrown(constructOrderedMap(sameType(64) ~ sameType(16))); - assertThrown(constructOrderedMap(alternateTypes(64) ~ alternateTypes(16))); - assertNotThrown(constructOrderedMap(sameType(64))); - assertNotThrown(constructOrderedMap(alternateTypes(64))); -} - -// Construct a pairs (ordered sequence of key: value pairs allowing duplicates) _node. -Node.Pair[] constructPairs(const Node[] nodes) @safe -{ - return getPairs("pairs", nodes); -} - -// Construct a set _node. -Node[] constructSet(const Node.Pair[] pairs) @safe -{ - // In future, the map here should be replaced with something with deterministic - // memory allocation if possible. - // Detect duplicates. - ubyte[Node] map; - Node[] nodes; - nodes.reserve(pairs.length); - foreach(pair; pairs) - { - enforce((pair.key in map) is null, new Exception("Duplicate entry in a set")); - map[pair.key] = 0; - nodes ~= pair.key; - } - - return nodes; -} -@safe unittest -{ - Node.Pair[] set(uint length) @safe - { - Node.Pair[] pairs; - foreach(long i; 0 .. length) - { - pairs ~= Node.Pair(i.to!string, YAMLNull()); - } - - return pairs; - } - - auto DuplicatesShort = set(8) ~ set(2); - auto noDuplicatesShort = set(8); - auto DuplicatesLong = set(64) ~ set(4); - auto noDuplicatesLong = set(64); - - bool eq(Node.Pair[] a, Node[] b) - { - if(a.length != b.length){return false;} - foreach(i; 0 .. a.length) - { - if(a[i].key != b[i]) - { - return false; - } - } - return true; - } - - auto nodeDuplicatesShort = DuplicatesShort.dup; - auto nodeNoDuplicatesShort = noDuplicatesShort.dup; - auto nodeDuplicatesLong = DuplicatesLong.dup; - auto nodeNoDuplicatesLong = noDuplicatesLong.dup; - - assertThrown(constructSet(nodeDuplicatesShort)); - assertNotThrown(constructSet(nodeNoDuplicatesShort)); - assertThrown(constructSet(nodeDuplicatesLong)); - assertNotThrown(constructSet(nodeNoDuplicatesLong)); -} - -// Construct a sequence (array) _node. -Node[] constructSequence(Node[] nodes) @safe -{ - return nodes; -} - -// Construct an unordered map (unordered set of key:value _pairs without duplicates) _node. -Node.Pair[] constructMap(Node.Pair[] pairs) @safe -{ - //Detect duplicates. - //TODO this should be replaced by something with deterministic memory allocation. - auto keys = new RedBlackTree!Node(); - foreach(ref pair; pairs) - { - enforce(!(pair.key in keys), - new Exception("Duplicate entry in a map: " ~ pair.key.debugString())); - keys.insert(pair.key); - } - return pairs; -} diff --git a/source/dyaml/dumper.d b/source/dyaml/dumper.d deleted file mode 100644 index 03d3620..0000000 --- a/source/dyaml/dumper.d +++ /dev/null @@ -1,298 +0,0 @@ - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -/** - * YAML dumper. - * - * Code based on $(LINK2 http://www.pyyaml.org, PyYAML). - */ -module dyaml.dumper; - -import std.array; -import std.range.primitives; -import std.typecons; - -import dyaml.emitter; -import dyaml.event; -import dyaml.exception; -import dyaml.linebreak; -import dyaml.node; -import dyaml.representer; -import dyaml.resolver; -import dyaml.serializer; -import dyaml.style; -import dyaml.tagdirective; - - -/** - * Dumps YAML documents to files or streams. - * - * User specified Representer and/or Resolver can be used to support new - * tags / data types. - * - * Setters are provided to affect output details (style, etc.). - */ -auto dumper() -{ - auto dumper = Dumper(); - dumper.resolver = Resolver.withDefaultResolvers; - return dumper; -} - -struct Dumper -{ - private: - //Indentation width. - int indent_ = 2; - //Tag directives to use. - TagDirective[] tags_; - public: - //Resolver to resolve tags. - Resolver resolver; - //Write scalars in canonical form? - bool canonical; - //Preferred text width. - uint textWidth = 80; - //Line break to use. Unix by default. - LineBreak lineBreak = LineBreak.unix; - //YAML version string. Default is 1.1. - string YAMLVersion = "1.1"; - //Always explicitly write document start? Default is no explicit start. - bool explicitStart = false; - //Always explicitly write document end? Default is no explicit end. - bool explicitEnd = false; - - //Name of the output file or stream, used in error messages. - string name = ""; - - // Default style for scalar nodes. If style is $(D ScalarStyle.invalid), the _style is chosen automatically. - ScalarStyle defaultScalarStyle = ScalarStyle.invalid; - // Default style for collection nodes. If style is $(D CollectionStyle.invalid), the _style is chosen automatically. - CollectionStyle defaultCollectionStyle = CollectionStyle.invalid; - - @disable bool opEquals(ref Dumper); - @disable int opCmp(ref Dumper); - - ///Set indentation width. 2 by default. Must not be zero. - @property void indent(uint indent) pure @safe nothrow - in - { - assert(indent != 0, "Can't use zero YAML indent width"); - } - do - { - indent_ = indent; - } - - /** - * Specify tag directives. - * - * A tag directive specifies a shorthand notation for specifying _tags. - * Each tag directive associates a handle with a prefix. This allows for - * compact tag notation. - * - * Each handle specified MUST start and end with a '!' character - * (a single character "!" handle is allowed as well). - * - * Only alphanumeric characters, '-', and '__' may be used in handles. - * - * Each prefix MUST not be empty. - * - * The "!!" handle is used for default YAML _tags with prefix - * "tag:yaml.org,2002:". This can be overridden. - * - * Params: tags = Tag directives (keys are handles, values are prefixes). - */ - @property void tagDirectives(string[string] tags) pure @safe - { - TagDirective[] t; - foreach(handle, prefix; tags) - { - assert(handle.length >= 1 && handle[0] == '!' && handle[$ - 1] == '!', - "A tag handle is empty or does not start and end with a " ~ - "'!' character : " ~ handle); - assert(prefix.length >= 1, "A tag prefix is empty"); - t ~= TagDirective(handle, prefix); - } - tags_ = t; - } - /// - @safe unittest - { - auto dumper = dumper(); - string[string] directives; - directives["!short!"] = "tag:long.org,2011:"; - //This will emit tags starting with "tag:long.org,2011" - //with a "!short!" prefix instead. - dumper.tagDirectives(directives); - dumper.dump(new Appender!string(), Node("foo")); - } - - /** - * Dump one or more YAML _documents to the file/stream. - * - * Note that while you can call dump() multiple times on the same - * dumper, you will end up writing multiple YAML "files" to the same - * file/stream. - * - * Params: documents = Documents to _dump (root nodes of the _documents). - * - * Throws: YAMLException on error (e.g. invalid nodes, - * unable to write to file/stream). - */ - void dump(CharacterType = char, Range)(Range range, Node[] documents ...) - if (isOutputRange!(Range, CharacterType) && - isOutputRange!(Range, char) || isOutputRange!(Range, wchar) || isOutputRange!(Range, dchar)) - { - try - { - auto emitter = new Emitter!(Range, CharacterType)(range, canonical, indent_, textWidth, lineBreak); - auto serializer = Serializer(resolver, explicitStart ? Yes.explicitStart : No.explicitStart, - explicitEnd ? Yes.explicitEnd : No.explicitEnd, YAMLVersion, tags_); - serializer.startStream(emitter); - foreach(ref document; documents) - { - auto data = representData(document, defaultScalarStyle, defaultCollectionStyle); - serializer.serialize(emitter, data); - } - serializer.endStream(emitter); - } - catch(YAMLException e) - { - throw new YAMLException("Unable to dump YAML to stream " - ~ name ~ " : " ~ e.msg, e.file, e.line); - } - } -} -///Write to a file -@safe unittest -{ - auto node = Node([1, 2, 3, 4, 5]); - dumper().dump(new Appender!string(), node); -} -///Write multiple YAML documents to a file -@safe unittest -{ - auto node1 = Node([1, 2, 3, 4, 5]); - auto node2 = Node("This document contains only one string"); - dumper().dump(new Appender!string(), node1, node2); - //Or with an array: - dumper().dump(new Appender!string(), [node1, node2]); -} -///Write to memory -@safe unittest -{ - auto stream = new Appender!string(); - auto node = Node([1, 2, 3, 4, 5]); - dumper().dump(stream, node); -} -///Use a custom resolver to support custom data types and/or implicit tags -@safe unittest -{ - import std.regex : regex; - auto node = Node([1, 2, 3, 4, 5]); - auto dumper = dumper(); - dumper.resolver.addImplicitResolver("!tag", regex("A.*"), "A"); - dumper.dump(new Appender!string(), node); -} -/// Set default scalar style -@safe unittest -{ - auto stream = new Appender!string(); - auto node = Node("Hello world!"); - auto dumper = dumper(); - dumper.defaultScalarStyle = ScalarStyle.singleQuoted; - dumper.dump(stream, node); -} -/// Set default collection style -@safe unittest -{ - auto stream = new Appender!string(); - auto node = Node(["Hello", "world!"]); - auto dumper = dumper(); - dumper.defaultCollectionStyle = CollectionStyle.flow; - dumper.dump(stream, node); -} -// Make sure the styles are actually used -@safe unittest -{ - auto stream = new Appender!string(); - auto node = Node([Node("Hello world!"), Node(["Hello", "world!"])]); - auto dumper = dumper(); - dumper.defaultScalarStyle = ScalarStyle.singleQuoted; - dumper.defaultCollectionStyle = CollectionStyle.flow; - dumper.explicitEnd = false; - dumper.explicitStart = false; - dumper.YAMLVersion = null; - dumper.dump(stream, node); - assert(stream.data == "['Hello world!', ['Hello', 'world!']]\n"); -} -// Explicit document start/end markers -@safe unittest -{ - auto stream = new Appender!string(); - auto node = Node([1, 2, 3, 4, 5]); - auto dumper = dumper(); - dumper.explicitEnd = true; - dumper.explicitStart = true; - dumper.YAMLVersion = null; - dumper.dump(stream, node); - //Skip version string - assert(stream.data[0..3] == "---"); - //account for newline at end - assert(stream.data[$-4..$-1] == "..."); -} -@safe unittest -{ - auto stream = new Appender!string(); - auto node = Node([Node("Te, st2")]); - auto dumper = dumper(); - dumper.explicitStart = true; - dumper.explicitEnd = false; - dumper.YAMLVersion = null; - dumper.dump(stream, node); - assert(stream.data == "--- ['Te, st2']\n"); -} -// No explicit document start/end markers -@safe unittest -{ - auto stream = new Appender!string(); - auto node = Node([1, 2, 3, 4, 5]); - auto dumper = dumper(); - dumper.explicitEnd = false; - dumper.explicitStart = false; - dumper.YAMLVersion = null; - dumper.dump(stream, node); - //Skip version string - assert(stream.data[0..3] != "---"); - //account for newline at end - assert(stream.data[$-4..$-1] != "..."); -} -// Windows, macOS line breaks -@safe unittest -{ - auto node = Node(0); - { - auto stream = new Appender!string(); - auto dumper = dumper(); - dumper.explicitEnd = true; - dumper.explicitStart = true; - dumper.YAMLVersion = null; - dumper.lineBreak = LineBreak.windows; - dumper.dump(stream, node); - assert(stream.data == "--- 0\r\n...\r\n"); - } - { - auto stream = new Appender!string(); - auto dumper = dumper(); - dumper.explicitEnd = true; - dumper.explicitStart = true; - dumper.YAMLVersion = null; - dumper.lineBreak = LineBreak.macintosh; - dumper.dump(stream, node); - assert(stream.data == "--- 0\r...\r"); - } -} diff --git a/source/dyaml/emitter.d b/source/dyaml/emitter.d deleted file mode 100644 index 5aafc0e..0000000 --- a/source/dyaml/emitter.d +++ /dev/null @@ -1,1690 +0,0 @@ -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -/** - * YAML emitter. - * Code based on PyYAML: http://www.pyyaml.org - */ -module dyaml.emitter; - - -import std.algorithm; -import std.array; -import std.ascii; -import std.conv; -import std.encoding; -import std.exception; -import std.format; -import std.range; -import std.string; -import std.system; -import std.typecons; -import std.utf; - -import dyaml.encoding; -import dyaml.escapes; -import dyaml.event; -import dyaml.exception; -import dyaml.linebreak; -import dyaml.queue; -import dyaml.scanner; -import dyaml.style; -import dyaml.tagdirective; - - -package: - -//Stores results of analysis of a scalar, determining e.g. what scalar style to use. -struct ScalarAnalysis -{ - //Scalar itself. - string scalar; - - enum AnalysisFlags - { - empty = 1<<0, - multiline = 1<<1, - allowFlowPlain = 1<<2, - allowBlockPlain = 1<<3, - allowSingleQuoted = 1<<4, - allowDoubleQuoted = 1<<5, - allowBlock = 1<<6, - isNull = 1<<7 - } - - ///Analysis results. - BitFlags!AnalysisFlags flags; -} - -private alias isNewLine = among!('\n', '\u0085', '\u2028', '\u2029'); - -private alias isSpecialChar = among!('#', ',', '[', ']', '{', '}', '&', '*', '!', '|', '>', '\\', '\'', '"', '%', '@', '`'); - -private alias isFlowIndicator = among!(',', '?', '[', ']', '{', '}'); - -private alias isSpace = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029', ' ', '\t'); - -//Emits YAML events into a file/stream. -struct Emitter(Range, CharType) if (isOutputRange!(Range, CharType)) -{ - private: - ///Default tag handle shortcuts and replacements. - static TagDirective[] defaultTagDirectives_ = - [TagDirective("!", "!"), TagDirective("!!", "tag:yaml.org,2002:")]; - - ///Stream to write to. - Range stream_; - - /// Type used for upcoming emitter steps - alias EmitterFunction = void function(scope typeof(this)*) @safe; - - ///Stack of states. - Appender!(EmitterFunction[]) states_; - - ///Current state. - EmitterFunction state_; - - ///Event queue. - Queue!Event events_; - ///Event we're currently emitting. - Event event_; - - ///Stack of previous indentation levels. - Appender!(int[]) indents_; - ///Current indentation level. - int indent_ = -1; - - ///Level of nesting in flow context. If 0, we're in block context. - uint flowLevel_ = 0; - - /// Describes context (where we are in the document). - enum Context - { - /// Root node of a document. - root, - /// Sequence. - sequence, - /// Mapping. - mappingNoSimpleKey, - /// Mapping, in a simple key. - mappingSimpleKey, - } - /// Current context. - Context context_; - - ///Characteristics of the last emitted character: - - ///Line. - uint line_ = 0; - ///Column. - uint column_ = 0; - ///Whitespace character? - bool whitespace_ = true; - ///indentation space, '-', '?', or ':'? - bool indentation_ = true; - - ///Does the document require an explicit document indicator? - bool openEnded_; - - ///Formatting details. - - ///Canonical scalar format? - bool canonical_; - ///Best indentation width. - uint bestIndent_ = 2; - ///Best text width. - uint bestWidth_ = 80; - ///Best line break character/s. - LineBreak bestLineBreak_; - - ///Tag directive handle - prefix pairs. - TagDirective[] tagDirectives_; - - ///Anchor/alias to process. - string preparedAnchor_ = null; - ///Tag to process. - string preparedTag_ = null; - - ///Analysis result of the current scalar. - ScalarAnalysis analysis_; - ///Style of the current scalar. - ScalarStyle style_ = ScalarStyle.invalid; - - public: - @disable int opCmp(ref Emitter); - @disable bool opEquals(ref Emitter); - - /** - * Construct an emitter. - * - * Params: stream = Output range to write to. - * canonical = Write scalars in canonical form? - * indent = Indentation width. - * lineBreak = Line break character/s. - */ - this(Range stream, const bool canonical, const int indent, const int width, - const LineBreak lineBreak) @safe - { - states_.reserve(32); - indents_.reserve(32); - stream_ = stream; - canonical_ = canonical; - nextExpected!"expectStreamStart"(); - - if(indent > 1 && indent < 10){bestIndent_ = indent;} - if(width > bestIndent_ * 2) {bestWidth_ = width;} - bestLineBreak_ = lineBreak; - - analysis_.flags.isNull = true; - } - - ///Emit an event. - void emit(Event event) @safe - { - events_.push(event); - while(!needMoreEvents()) - { - event_ = events_.pop(); - callNext(); - event_.destroy(); - } - } - - private: - ///Pop and return the newest state in states_. - EmitterFunction popState() @safe - in(states_.data.length > 0, - "Emitter: Need to pop a state but there are no states left") - { - const result = states_.data[$-1]; - states_.shrinkTo(states_.data.length - 1); - return result; - } - - void pushState(string D)() @safe - { - states_ ~= mixin("function(typeof(this)* self) { self."~D~"(); }"); - } - - ///Pop and return the newest indent in indents_. - int popIndent() @safe - in(indents_.data.length > 0, - "Emitter: Need to pop an indent level but there" ~ - " are no indent levels left") - { - const result = indents_.data[$-1]; - indents_.shrinkTo(indents_.data.length - 1); - return result; - } - - ///Write a string to the file/stream. - void writeString(const scope char[] str) @safe - { - static if(is(CharType == char)) - { - copy(str, stream_); - } - static if(is(CharType == wchar)) - { - const buffer = to!wstring(str); - copy(buffer, stream_); - } - static if(is(CharType == dchar)) - { - const buffer = to!dstring(str); - copy(buffer, stream_); - } - } - - ///In some cases, we wait for a few next events before emitting. - bool needMoreEvents() @safe nothrow - { - if(events_.length == 0){return true;} - - const event = events_.peek(); - if(event.id == EventID.documentStart){return needEvents(1);} - if(event.id == EventID.sequenceStart){return needEvents(2);} - if(event.id == EventID.mappingStart) {return needEvents(3);} - - return false; - } - - ///Determines if we need specified number of more events. - bool needEvents(in uint count) @safe nothrow - { - int level; - - foreach(const event; events_.range) - { - if(event.id.among!(EventID.documentStart, EventID.sequenceStart, EventID.mappingStart)) {++level;} - else if(event.id.among!(EventID.documentEnd, EventID.sequenceEnd, EventID.mappingEnd)) {--level;} - else if(event.id == EventID.streamStart){level = -1;} - - if(level < 0) - { - return false; - } - } - - return events_.length < (count + 1); - } - - ///Increase indentation level. - void increaseIndent(const Flag!"flow" flow = No.flow, const bool indentless = false) @safe - { - indents_ ~= indent_; - if(indent_ == -1) - { - indent_ = flow ? bestIndent_ : 0; - } - else if(!indentless) - { - indent_ += bestIndent_; - } - } - - ///Determines if the type of current event is as specified. Throws if no event. - bool eventTypeIs(in EventID id) const pure @safe - in(!event_.isNull, "Expected an event, but no event is available.") - { - return event_.id == id; - } - - - //States. - - - //Stream handlers. - - ///Handle start of a file/stream. - void expectStreamStart() @safe - in(eventTypeIs(EventID.streamStart), - "Expected streamStart, but got " ~ event_.idString) - { - - writeStreamStart(); - nextExpected!"expectDocumentStart!(Yes.first)"(); - } - - ///Expect nothing, throwing if we still have something. - void expectNothing() @safe - { - assert(0, "Expected nothing, but got " ~ event_.idString); - } - - //Document handlers. - - ///Handle start of a document. - void expectDocumentStart(Flag!"first" first)() @safe - in(eventTypeIs(EventID.documentStart) || eventTypeIs(EventID.streamEnd), - "Expected documentStart or streamEnd, but got " ~ event_.idString) - { - - if(event_.id == EventID.documentStart) - { - const YAMLVersion = event_.value; - auto tagDirectives = event_.tagDirectives; - if(openEnded_ && (YAMLVersion !is null || tagDirectives !is null)) - { - writeIndicator("...", Yes.needWhitespace); - writeIndent(); - } - - if(YAMLVersion !is null) - { - writeVersionDirective(prepareVersion(YAMLVersion)); - } - - if(tagDirectives !is null) - { - tagDirectives_ = tagDirectives; - sort!"icmp(a.handle, b.handle) < 0"(tagDirectives_); - - foreach(ref pair; tagDirectives_) - { - writeTagDirective(prepareTagHandle(pair.handle), - prepareTagPrefix(pair.prefix)); - } - } - - bool eq(ref TagDirective a, ref TagDirective b){return a.handle == b.handle;} - //Add any default tag directives that have not been overriden. - foreach(ref def; defaultTagDirectives_) - { - if(!std.algorithm.canFind!eq(tagDirectives_, def)) - { - tagDirectives_ ~= def; - } - } - - const implicit = first && !event_.explicitDocument && !canonical_ && - YAMLVersion is null && tagDirectives is null && - !checkEmptyDocument(); - if(!implicit) - { - writeIndent(); - writeIndicator("---", Yes.needWhitespace); - if(canonical_){writeIndent();} - } - nextExpected!"expectRootNode"(); - } - else if(event_.id == EventID.streamEnd) - { - if(openEnded_) - { - writeIndicator("...", Yes.needWhitespace); - writeIndent(); - } - writeStreamEnd(); - nextExpected!"expectNothing"(); - } - } - - ///Handle end of a document. - void expectDocumentEnd() @safe - in(eventTypeIs(EventID.documentEnd), - "Expected DocumentEnd, but got " ~ event_.idString) - { - - writeIndent(); - if(event_.explicitDocument) - { - writeIndicator("...", Yes.needWhitespace); - writeIndent(); - } - nextExpected!"expectDocumentStart!(No.first)"(); - } - - ///Handle the root node of a document. - void expectRootNode() @safe - { - pushState!"expectDocumentEnd"(); - expectNode(Context.root); - } - - ///Handle a mapping node. - // - //Params: simpleKey = Are we in a simple key? - void expectMappingNode(const bool simpleKey = false) @safe - { - expectNode(simpleKey ? Context.mappingSimpleKey : Context.mappingNoSimpleKey); - } - - ///Handle a sequence node. - void expectSequenceNode() @safe - { - expectNode(Context.sequence); - } - - ///Handle a new node. Context specifies where in the document we are. - void expectNode(const Context context) @safe - { - context_ = context; - - const flowCollection = event_.collectionStyle == CollectionStyle.flow; - - switch(event_.id) - { - case EventID.alias_: expectAlias(); break; - case EventID.scalar: - processAnchor("&"); - processTag(); - expectScalar(); - break; - case EventID.sequenceStart: - processAnchor("&"); - processTag(); - if(flowLevel_ > 0 || canonical_ || flowCollection || checkEmptySequence()) - { - expectFlowSequence(); - } - else - { - expectBlockSequence(); - } - break; - case EventID.mappingStart: - processAnchor("&"); - processTag(); - if(flowLevel_ > 0 || canonical_ || flowCollection || checkEmptyMapping()) - { - expectFlowMapping(); - } - else - { - expectBlockMapping(); - } - break; - default: - assert(0, "Expected alias_, scalar, sequenceStart or " ~ - "mappingStart, but got: " ~ event_.idString); - } - } - ///Handle an alias. - void expectAlias() @safe - in(event_.anchor != "", "Anchor is not specified for alias") - { - processAnchor("*"); - nextExpected(popState()); - } - - ///Handle a scalar. - void expectScalar() @safe - { - increaseIndent(Yes.flow); - processScalar(); - indent_ = popIndent(); - nextExpected(popState()); - } - - //Flow sequence handlers. - - ///Handle a flow sequence. - void expectFlowSequence() @safe - { - writeIndicator("[", Yes.needWhitespace, Yes.whitespace); - ++flowLevel_; - increaseIndent(Yes.flow); - nextExpected!"expectFlowSequenceItem!(Yes.first)"(); - } - - ///Handle a flow sequence item. - void expectFlowSequenceItem(Flag!"first" first)() @safe - { - if(event_.id == EventID.sequenceEnd) - { - indent_ = popIndent(); - --flowLevel_; - static if(!first) if(canonical_) - { - writeIndicator(",", No.needWhitespace); - writeIndent(); - } - writeIndicator("]", No.needWhitespace); - nextExpected(popState()); - return; - } - static if(!first){writeIndicator(",", No.needWhitespace);} - if(canonical_ || column_ > bestWidth_){writeIndent();} - pushState!"expectFlowSequenceItem!(No.first)"(); - expectSequenceNode(); - } - - //Flow mapping handlers. - - ///Handle a flow mapping. - void expectFlowMapping() @safe - { - writeIndicator("{", Yes.needWhitespace, Yes.whitespace); - ++flowLevel_; - increaseIndent(Yes.flow); - nextExpected!"expectFlowMappingKey!(Yes.first)"(); - } - - ///Handle a key in a flow mapping. - void expectFlowMappingKey(Flag!"first" first)() @safe - { - if(event_.id == EventID.mappingEnd) - { - indent_ = popIndent(); - --flowLevel_; - static if (!first) if(canonical_) - { - writeIndicator(",", No.needWhitespace); - writeIndent(); - } - writeIndicator("}", No.needWhitespace); - nextExpected(popState()); - return; - } - - static if(!first){writeIndicator(",", No.needWhitespace);} - if(canonical_ || column_ > bestWidth_){writeIndent();} - if(!canonical_ && checkSimpleKey()) - { - pushState!"expectFlowMappingSimpleValue"(); - expectMappingNode(true); - return; - } - - writeIndicator("?", Yes.needWhitespace); - pushState!"expectFlowMappingValue"(); - expectMappingNode(); - } - - ///Handle a simple value in a flow mapping. - void expectFlowMappingSimpleValue() @safe - { - writeIndicator(":", No.needWhitespace); - pushState!"expectFlowMappingKey!(No.first)"(); - expectMappingNode(); - } - - ///Handle a complex value in a flow mapping. - void expectFlowMappingValue() @safe - { - if(canonical_ || column_ > bestWidth_){writeIndent();} - writeIndicator(":", Yes.needWhitespace); - pushState!"expectFlowMappingKey!(No.first)"(); - expectMappingNode(); - } - - //Block sequence handlers. - - ///Handle a block sequence. - void expectBlockSequence() @safe - { - const indentless = (context_ == Context.mappingNoSimpleKey || - context_ == Context.mappingSimpleKey) && !indentation_; - increaseIndent(No.flow, indentless); - nextExpected!"expectBlockSequenceItem!(Yes.first)"(); - } - - ///Handle a block sequence item. - void expectBlockSequenceItem(Flag!"first" first)() @safe - { - static if(!first) if(event_.id == EventID.sequenceEnd) - { - indent_ = popIndent(); - nextExpected(popState()); - return; - } - - writeIndent(); - writeIndicator("-", Yes.needWhitespace, No.whitespace, Yes.indentation); - pushState!"expectBlockSequenceItem!(No.first)"(); - expectSequenceNode(); - } - - //Block mapping handlers. - - ///Handle a block mapping. - void expectBlockMapping() @safe - { - increaseIndent(No.flow); - nextExpected!"expectBlockMappingKey!(Yes.first)"(); - } - - ///Handle a key in a block mapping. - void expectBlockMappingKey(Flag!"first" first)() @safe - { - static if(!first) if(event_.id == EventID.mappingEnd) - { - indent_ = popIndent(); - nextExpected(popState()); - return; - } - - writeIndent(); - if(checkSimpleKey()) - { - pushState!"expectBlockMappingSimpleValue"(); - expectMappingNode(true); - return; - } - - writeIndicator("?", Yes.needWhitespace, No.whitespace, Yes.indentation); - pushState!"expectBlockMappingValue"(); - expectMappingNode(); - } - - ///Handle a simple value in a block mapping. - void expectBlockMappingSimpleValue() @safe - { - writeIndicator(":", No.needWhitespace); - pushState!"expectBlockMappingKey!(No.first)"(); - expectMappingNode(); - } - - ///Handle a complex value in a block mapping. - void expectBlockMappingValue() @safe - { - writeIndent(); - writeIndicator(":", Yes.needWhitespace, No.whitespace, Yes.indentation); - pushState!"expectBlockMappingKey!(No.first)"(); - expectMappingNode(); - } - - //Checkers. - - ///Check if an empty sequence is next. - bool checkEmptySequence() const @safe pure nothrow - { - return event_.id == EventID.sequenceStart && events_.length > 0 - && events_.peek().id == EventID.sequenceEnd; - } - - ///Check if an empty mapping is next. - bool checkEmptyMapping() const @safe pure nothrow - { - return event_.id == EventID.mappingStart && events_.length > 0 - && events_.peek().id == EventID.mappingEnd; - } - - ///Check if an empty document is next. - bool checkEmptyDocument() const @safe pure nothrow - { - if(event_.id != EventID.documentStart || events_.length == 0) - { - return false; - } - - const event = events_.peek(); - const emptyScalar = event.id == EventID.scalar && (event.anchor is null) && - (event.tag is null) && event.implicit && event.value == ""; - return emptyScalar; - } - - ///Check if a simple key is next. - bool checkSimpleKey() @safe - { - uint length; - const id = event_.id; - const scalar = id == EventID.scalar; - const collectionStart = id == EventID.mappingStart || - id == EventID.sequenceStart; - - if((id == EventID.alias_ || scalar || collectionStart) - && (event_.anchor !is null)) - { - if(preparedAnchor_ is null) - { - preparedAnchor_ = prepareAnchor(event_.anchor); - } - length += preparedAnchor_.length; - } - - if((scalar || collectionStart) && (event_.tag !is null)) - { - if(preparedTag_ is null){preparedTag_ = prepareTag(event_.tag);} - length += preparedTag_.length; - } - - if(scalar) - { - if(analysis_.flags.isNull){analysis_ = analyzeScalar(event_.value);} - length += analysis_.scalar.length; - } - - if(length >= 128){return false;} - - return id == EventID.alias_ || - (scalar && !analysis_.flags.empty && !analysis_.flags.multiline) || - checkEmptySequence() || - checkEmptyMapping(); - } - - ///Process and write a scalar. - void processScalar() @safe - { - if(analysis_.flags.isNull){analysis_ = analyzeScalar(event_.value);} - if(style_ == ScalarStyle.invalid) - { - style_ = chooseScalarStyle(); - } - - //if(analysis_.flags.multiline && (context_ != Context.mappingSimpleKey) && - // ([ScalarStyle.invalid, ScalarStyle.plain, ScalarStyle.singleQuoted, ScalarStyle.doubleQuoted) - // .canFind(style_)) - //{ - // writeIndent(); - //} - auto writer = ScalarWriter!(Range, CharType)(&this, analysis_.scalar, - context_ != Context.mappingSimpleKey); - final switch(style_) - { - case ScalarStyle.invalid: assert(false); - case ScalarStyle.doubleQuoted: writer.writeDoubleQuoted(); break; - case ScalarStyle.singleQuoted: writer.writeSingleQuoted(); break; - case ScalarStyle.folded: writer.writeFolded(); break; - case ScalarStyle.literal: writer.writeLiteral(); break; - case ScalarStyle.plain: writer.writePlain(); break; - } - analysis_.flags.isNull = true; - style_ = ScalarStyle.invalid; - } - - ///Process and write an anchor/alias. - void processAnchor(const string indicator) @safe - { - if(event_.anchor is null) - { - preparedAnchor_ = null; - return; - } - if(preparedAnchor_ is null) - { - preparedAnchor_ = prepareAnchor(event_.anchor); - } - if(preparedAnchor_ !is null && preparedAnchor_ != "") - { - writeIndicator(indicator, Yes.needWhitespace); - writeString(preparedAnchor_); - } - preparedAnchor_ = null; - } - - ///Process and write a tag. - void processTag() @safe - { - string tag = event_.tag; - - if(event_.id == EventID.scalar) - { - if(style_ == ScalarStyle.invalid){style_ = chooseScalarStyle();} - if((!canonical_ || (tag is null)) && - ((tag == "tag:yaml.org,2002:str") || (style_ == ScalarStyle.plain ? event_.implicit : !event_.implicit && (tag is null)))) - { - preparedTag_ = null; - return; - } - if(event_.implicit && (tag is null)) - { - tag = "!"; - preparedTag_ = null; - } - } - else if((!canonical_ || (tag is null)) && event_.implicit) - { - preparedTag_ = null; - return; - } - - assert(tag != "", "Tag is not specified"); - if(preparedTag_ is null){preparedTag_ = prepareTag(tag);} - if(preparedTag_ !is null && preparedTag_ != "") - { - writeIndicator(preparedTag_, Yes.needWhitespace); - } - preparedTag_ = null; - } - - ///Determine style to write the current scalar in. - ScalarStyle chooseScalarStyle() @safe - { - if(analysis_.flags.isNull){analysis_ = analyzeScalar(event_.value);} - - const style = event_.scalarStyle; - const invalidOrPlain = style == ScalarStyle.invalid || style == ScalarStyle.plain; - const block = style == ScalarStyle.literal || style == ScalarStyle.folded; - const singleQuoted = style == ScalarStyle.singleQuoted; - const doubleQuoted = style == ScalarStyle.doubleQuoted; - - const allowPlain = flowLevel_ > 0 ? analysis_.flags.allowFlowPlain - : analysis_.flags.allowBlockPlain; - //simple empty or multiline scalars can't be written in plain style - const simpleNonPlain = (context_ == Context.mappingSimpleKey) && - (analysis_.flags.empty || analysis_.flags.multiline); - - if(doubleQuoted || canonical_) - { - return ScalarStyle.doubleQuoted; - } - - if(invalidOrPlain && event_.implicit && !simpleNonPlain && allowPlain) - { - return ScalarStyle.plain; - } - - if(block && flowLevel_ == 0 && context_ != Context.mappingSimpleKey && - analysis_.flags.allowBlock) - { - return style; - } - - if((invalidOrPlain || singleQuoted) && - analysis_.flags.allowSingleQuoted && - !(context_ == Context.mappingSimpleKey && analysis_.flags.multiline)) - { - return ScalarStyle.singleQuoted; - } - - return ScalarStyle.doubleQuoted; - } - - ///Prepare YAML version string for output. - static string prepareVersion(const string YAMLVersion) @safe - in(YAMLVersion.split(".")[0] == "1", - "Unsupported YAML version: " ~ YAMLVersion) - { - return YAMLVersion; - } - - ///Encode an Unicode character for tag directive and write it to writer. - static void encodeChar(Writer)(ref Writer writer, in dchar c) @safe - { - char[4] data; - const bytes = encode(data, c); - //For each byte add string in format %AB , where AB are hex digits of the byte. - foreach(const char b; data[0 .. bytes]) - { - formattedWrite(writer, "%%%02X", cast(ubyte)b); - } - } - - ///Prepare tag directive handle for output. - static string prepareTagHandle(const string handle) @safe - in(handle != "", "Tag handle must not be empty") - in(handle.drop(1).dropBack(1).all!(c => isAlphaNum(c) || c.among!('-', '_')), - "Tag handle contains invalid characters") - { - return handle; - } - - ///Prepare tag directive prefix for output. - static string prepareTagPrefix(const string prefix) @safe - in(prefix != "", "Tag prefix must not be empty") - { - auto appender = appender!string(); - const int offset = prefix[0] == '!'; - size_t start, end; - - foreach(const size_t i, const dchar c; prefix) - { - const size_t idx = i + offset; - if(isAlphaNum(c) || c.among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '!', '~', '*', '\\', '\'', '(', ')', '[', ']', '%')) - { - end = idx + 1; - continue; - } - - if(start < idx){appender.put(prefix[start .. idx]);} - start = end = idx + 1; - - encodeChar(appender, c); - } - - end = min(end, prefix.length); - if(start < end){appender.put(prefix[start .. end]);} - return appender.data; - } - - ///Prepare tag for output. - string prepareTag(in string tag) @safe - in(tag != "", "Tag must not be empty") - { - - string tagString = tag; - if (tagString == "!") return "!"; - string handle; - string suffix = tagString; - - //Sort lexicographically by prefix. - sort!"icmp(a.prefix, b.prefix) < 0"(tagDirectives_); - foreach(ref pair; tagDirectives_) - { - auto prefix = pair.prefix; - if(tagString.startsWith(prefix) && - (prefix != "!" || prefix.length < tagString.length)) - { - handle = pair.handle; - suffix = tagString[prefix.length .. $]; - } - } - - auto appender = appender!string(); - appender.put(handle !is null && handle != "" ? handle : "!<"); - size_t start, end; - foreach(const dchar c; suffix) - { - if(isAlphaNum(c) || c.among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '~', '*', '\\', '\'', '(', ')', '[', ']') || - (c == '!' && handle != "!")) - { - ++end; - continue; - } - if(start < end){appender.put(suffix[start .. end]);} - start = end = end + 1; - - encodeChar(appender, c); - } - - if(start < end){appender.put(suffix[start .. end]);} - if(handle is null || handle == ""){appender.put(">");} - - return appender.data; - } - - ///Prepare anchor for output. - static string prepareAnchor(const string anchor) @safe - in(anchor != "", "Anchor must not be empty") - in(anchor.all!isNSAnchorName, "Anchor contains invalid characters") - { - return anchor; - } - - ///Analyze specifed scalar and return the analysis result. - static ScalarAnalysis analyzeScalar(string scalar) @safe - { - ScalarAnalysis analysis; - analysis.flags.isNull = false; - analysis.scalar = scalar; - - //Empty scalar is a special case. - if(scalar is null || scalar == "") - { - with(ScalarAnalysis.AnalysisFlags) - analysis.flags = - empty | - allowBlockPlain | - allowSingleQuoted | - allowDoubleQuoted; - return analysis; - } - - //Indicators and special characters (All false by default). - bool blockIndicators, flowIndicators, lineBreaks, specialCharacters; - - //Important whitespace combinations (All false by default). - bool leadingSpace, leadingBreak, trailingSpace, trailingBreak, - breakSpace, spaceBreak; - - //Check document indicators. - if(scalar.startsWith("---", "...")) - { - blockIndicators = flowIndicators = true; - } - - //First character or preceded by a whitespace. - bool preceededByWhitespace = true; - - //Last character or followed by a whitespace. - bool followedByWhitespace = scalar.length == 1 || - scalar[1].among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); - - //The previous character is a space/break (false by default). - bool previousSpace, previousBreak; - - foreach(const size_t index, const dchar c; scalar) - { - //Check for indicators. - if(index == 0) - { - //Leading indicators are special characters. - if(c.isSpecialChar) - { - flowIndicators = blockIndicators = true; - } - if(':' == c || '?' == c) - { - flowIndicators = true; - if(followedByWhitespace){blockIndicators = true;} - } - if(c == '-' && followedByWhitespace) - { - flowIndicators = blockIndicators = true; - } - } - else - { - //Some indicators cannot appear within a scalar as well. - if(c.isFlowIndicator){flowIndicators = true;} - if(c == ':') - { - flowIndicators = true; - if(followedByWhitespace){blockIndicators = true;} - } - if(c == '#' && preceededByWhitespace) - { - flowIndicators = blockIndicators = true; - } - } - - //Check for line breaks, special, and unicode characters. - if(c.isNewLine){lineBreaks = true;} - if(!(c == '\n' || (c >= '\x20' && c <= '\x7E')) && - !((c == '\u0085' || (c >= '\xA0' && c <= '\uD7FF') || - (c >= '\uE000' && c <= '\uFFFD')) && c != '\uFEFF')) - { - specialCharacters = true; - } - - //Detect important whitespace combinations. - if(c == ' ') - { - if(index == 0){leadingSpace = true;} - if(index == scalar.length - 1){trailingSpace = true;} - if(previousBreak){breakSpace = true;} - previousSpace = true; - previousBreak = false; - } - else if(c.isNewLine) - { - if(index == 0){leadingBreak = true;} - if(index == scalar.length - 1){trailingBreak = true;} - if(previousSpace){spaceBreak = true;} - previousSpace = false; - previousBreak = true; - } - else - { - previousSpace = previousBreak = false; - } - - //Prepare for the next character. - preceededByWhitespace = c.isSpace != 0; - followedByWhitespace = index + 2 >= scalar.length || - scalar[index + 2].isSpace; - } - - with(ScalarAnalysis.AnalysisFlags) - { - //Let's decide what styles are allowed. - analysis.flags |= allowFlowPlain | allowBlockPlain | allowSingleQuoted | - allowDoubleQuoted | allowBlock; - - //Leading and trailing whitespaces are bad for plain scalars. - if(leadingSpace || leadingBreak || trailingSpace || trailingBreak) - { - analysis.flags &= ~(allowFlowPlain | allowBlockPlain); - } - - //We do not permit trailing spaces for block scalars. - if(trailingSpace) - { - analysis.flags &= ~allowBlock; - } - - //Spaces at the beginning of a new line are only acceptable for block - //scalars. - if(breakSpace) - { - analysis.flags &= ~(allowFlowPlain | allowBlockPlain | allowSingleQuoted); - } - - //Spaces followed by breaks, as well as special character are only - //allowed for double quoted scalars. - if(spaceBreak || specialCharacters) - { - analysis.flags &= ~(allowFlowPlain | allowBlockPlain | allowSingleQuoted | allowBlock); - } - - //Although the plain scalar writer supports breaks, we never emit - //multiline plain scalars. - if(lineBreaks) - { - analysis.flags &= ~(allowFlowPlain | allowBlockPlain); - analysis.flags |= multiline; - } - - //Flow indicators are forbidden for flow plain scalars. - if(flowIndicators) - { - analysis.flags &= ~allowFlowPlain; - } - - //Block indicators are forbidden for block plain scalars. - if(blockIndicators) - { - analysis.flags &= ~allowBlockPlain; - } - } - return analysis; - } - - @safe unittest - { - with(analyzeScalar("").flags) - { - // workaround for empty being std.range.primitives.empty here - alias empty = ScalarAnalysis.AnalysisFlags.empty; - assert(empty && allowBlockPlain && allowSingleQuoted && allowDoubleQuoted); - } - with(analyzeScalar("a").flags) - { - assert(allowFlowPlain && allowBlockPlain && allowSingleQuoted && allowDoubleQuoted && allowBlock); - } - with(analyzeScalar(" ").flags) - { - assert(allowSingleQuoted && allowDoubleQuoted); - } - with(analyzeScalar(" a").flags) - { - assert(allowSingleQuoted && allowDoubleQuoted); - } - with(analyzeScalar("a ").flags) - { - assert(allowSingleQuoted && allowDoubleQuoted); - } - with(analyzeScalar("\na").flags) - { - assert(allowSingleQuoted && allowDoubleQuoted); - } - with(analyzeScalar("a\n").flags) - { - assert(allowSingleQuoted && allowDoubleQuoted); - } - with(analyzeScalar("\n").flags) - { - assert(multiline && allowSingleQuoted && allowDoubleQuoted && allowBlock); - } - with(analyzeScalar(" \n").flags) - { - assert(multiline && allowDoubleQuoted); - } - with(analyzeScalar("\n a").flags) - { - assert(multiline && allowDoubleQuoted && allowBlock); - } - } - - //Writers. - - ///Start the YAML stream (write the unicode byte order mark). - void writeStreamStart() @safe - { - //Write BOM (except for UTF-8) - static if(is(CharType == wchar) || is(CharType == dchar)) - { - stream_.put(cast(CharType)'\uFEFF'); - } - } - - ///End the YAML stream. - void writeStreamEnd() @safe {} - - ///Write an indicator (e.g. ":", "[", ">", etc.). - void writeIndicator(const scope char[] indicator, - const Flag!"needWhitespace" needWhitespace, - const Flag!"whitespace" whitespace = No.whitespace, - const Flag!"indentation" indentation = No.indentation) @safe - { - const bool prefixSpace = !whitespace_ && needWhitespace; - whitespace_ = whitespace; - indentation_ = indentation_ && indentation; - openEnded_ = false; - column_ += indicator.length; - if(prefixSpace) - { - ++column_; - writeString(" "); - } - writeString(indicator); - } - - ///Write indentation. - void writeIndent() @safe - { - const indent = indent_ == -1 ? 0 : indent_; - - if(!indentation_ || column_ > indent || (column_ == indent && !whitespace_)) - { - writeLineBreak(); - } - if(column_ < indent) - { - whitespace_ = true; - - //Used to avoid allocation of arbitrary length strings. - static immutable spaces = " "; - size_t numSpaces = indent - column_; - column_ = indent; - while(numSpaces >= spaces.length) - { - writeString(spaces); - numSpaces -= spaces.length; - } - writeString(spaces[0 .. numSpaces]); - } - } - - ///Start new line. - void writeLineBreak(const scope char[] data = null) @safe - { - whitespace_ = indentation_ = true; - ++line_; - column_ = 0; - writeString(data is null ? lineBreak(bestLineBreak_) : data); - } - - ///Write a YAML version directive. - void writeVersionDirective(const string versionText) @safe - { - writeString("%YAML "); - writeString(versionText); - writeLineBreak(); - } - - ///Write a tag directive. - void writeTagDirective(const string handle, const string prefix) @safe - { - writeString("%TAG "); - writeString(handle); - writeString(" "); - writeString(prefix); - writeLineBreak(); - } - void nextExpected(string D)() @safe - { - state_ = mixin("function(typeof(this)* self) { self."~D~"(); }"); - } - void nextExpected(EmitterFunction f) @safe - { - state_ = f; - } - void callNext() @safe - { - state_(&this); - } -} - - -private: - -///RAII struct used to write out scalar values. -struct ScalarWriter(Range, CharType) -{ - invariant() - { - assert(emitter_.bestIndent_ > 0 && emitter_.bestIndent_ < 10, - "Emitter bestIndent must be 1 to 9 for one-character indent hint"); - } - - private: - @disable int opCmp(ref Emitter!(Range, CharType)); - @disable bool opEquals(ref Emitter!(Range, CharType)); - - ///Used as "null" UTF-32 character. - static immutable dcharNone = dchar.max; - - ///Emitter used to emit the scalar. - Emitter!(Range, CharType)* emitter_; - - ///UTF-8 encoded text of the scalar to write. - string text_; - - ///Can we split the scalar into multiple lines? - bool split_; - ///Are we currently going over spaces in the text? - bool spaces_; - ///Are we currently going over line breaks in the text? - bool breaks_; - - ///Start and end byte of the text range we're currently working with. - size_t startByte_, endByte_; - ///End byte of the text range including the currently processed character. - size_t nextEndByte_; - ///Start and end character of the text range we're currently working with. - long startChar_, endChar_; - - public: - ///Construct a ScalarWriter using emitter to output text. - this(Emitter!(Range, CharType)* emitter, string text, const bool split = true) @safe nothrow - { - emitter_ = emitter; - text_ = text; - split_ = split; - } - - ///Write text as single quoted scalar. - void writeSingleQuoted() @safe - { - emitter_.writeIndicator("\'", Yes.needWhitespace); - spaces_ = breaks_ = false; - resetTextPosition(); - - do - { - const dchar c = nextChar(); - if(spaces_) - { - if(c != ' ' && tooWide() && split_ && - startByte_ != 0 && endByte_ != text_.length) - { - writeIndent(Flag!"ResetSpace".no); - updateRangeStart(); - } - else if(c != ' ') - { - writeCurrentRange(Flag!"UpdateColumn".yes); - } - } - else if(breaks_) - { - if(!c.isNewLine) - { - writeStartLineBreak(); - writeLineBreaks(); - emitter_.writeIndent(); - } - } - else if((c == dcharNone || c == '\'' || c == ' ' || c.isNewLine) - && startChar_ < endChar_) - { - writeCurrentRange(Flag!"UpdateColumn".yes); - } - if(c == '\'') - { - emitter_.column_ += 2; - emitter_.writeString("\'\'"); - startByte_ = endByte_ + 1; - startChar_ = endChar_ + 1; - } - updateBreaks(c, Flag!"UpdateSpaces".yes); - }while(endByte_ < text_.length); - - emitter_.writeIndicator("\'", No.needWhitespace); - } - - ///Write text as double quoted scalar. - void writeDoubleQuoted() @safe - { - resetTextPosition(); - emitter_.writeIndicator("\"", Yes.needWhitespace); - do - { - const dchar c = nextChar(); - //handle special characters - if(c == dcharNone || c.among!('\"', '\\', '\u0085', '\u2028', '\u2029', '\uFEFF') || - !((c >= '\x20' && c <= '\x7E') || - ((c >= '\xA0' && c <= '\uD7FF') || (c >= '\uE000' && c <= '\uFFFD')))) - { - if(startChar_ < endChar_) - { - writeCurrentRange(Flag!"UpdateColumn".yes); - } - if(c != dcharNone) - { - auto appender = appender!string(); - if(const dchar es = toEscape(c)) - { - appender.put('\\'); - appender.put(es); - } - else - { - //Write an escaped Unicode character. - const format = c <= 255 ? "\\x%02X": - c <= 65535 ? "\\u%04X": "\\U%08X"; - formattedWrite(appender, format, cast(uint)c); - } - - emitter_.column_ += appender.data.length; - emitter_.writeString(appender.data); - startChar_ = endChar_ + 1; - startByte_ = nextEndByte_; - } - } - if((endByte_ > 0 && endByte_ < text_.length - strideBack(text_, text_.length)) - && (c == ' ' || startChar_ >= endChar_) - && (emitter_.column_ + endChar_ - startChar_ > emitter_.bestWidth_) - && split_) - { - //text_[2:1] is ok in Python but not in D, so we have to use min() - emitter_.writeString(text_[min(startByte_, endByte_) .. endByte_]); - emitter_.writeString("\\"); - emitter_.column_ += startChar_ - endChar_ + 1; - startChar_ = max(startChar_, endChar_); - startByte_ = max(startByte_, endByte_); - - writeIndent(Flag!"ResetSpace".yes); - if(charAtStart() == ' ') - { - emitter_.writeString("\\"); - ++emitter_.column_; - } - } - }while(endByte_ < text_.length); - emitter_.writeIndicator("\"", No.needWhitespace); - } - - ///Write text as folded block scalar. - void writeFolded() @safe - { - initBlock('>'); - bool leadingSpace = true; - spaces_ = false; - breaks_ = true; - resetTextPosition(); - - do - { - const dchar c = nextChar(); - if(breaks_) - { - if(!c.isNewLine) - { - if(!leadingSpace && c != dcharNone && c != ' ') - { - writeStartLineBreak(); - } - leadingSpace = (c == ' '); - writeLineBreaks(); - if(c != dcharNone){emitter_.writeIndent();} - } - } - else if(spaces_) - { - if(c != ' ' && tooWide()) - { - writeIndent(Flag!"ResetSpace".no); - updateRangeStart(); - } - else if(c != ' ') - { - writeCurrentRange(Flag!"UpdateColumn".yes); - } - } - else if(c == dcharNone || c.isNewLine || c == ' ') - { - writeCurrentRange(Flag!"UpdateColumn".yes); - if(c == dcharNone){emitter_.writeLineBreak();} - } - updateBreaks(c, Flag!"UpdateSpaces".yes); - }while(endByte_ < text_.length); - } - - ///Write text as literal block scalar. - void writeLiteral() @safe - { - initBlock('|'); - breaks_ = true; - resetTextPosition(); - - do - { - const dchar c = nextChar(); - if(breaks_) - { - if(!c.isNewLine) - { - writeLineBreaks(); - if(c != dcharNone){emitter_.writeIndent();} - } - } - else if(c == dcharNone || c.isNewLine) - { - writeCurrentRange(Flag!"UpdateColumn".no); - if(c == dcharNone){emitter_.writeLineBreak();} - } - updateBreaks(c, Flag!"UpdateSpaces".no); - }while(endByte_ < text_.length); - } - - ///Write text as plain scalar. - void writePlain() @safe - { - if(emitter_.context_ == Emitter!(Range, CharType).Context.root){emitter_.openEnded_ = true;} - if(text_ == ""){return;} - if(!emitter_.whitespace_) - { - ++emitter_.column_; - emitter_.writeString(" "); - } - emitter_.whitespace_ = emitter_.indentation_ = false; - spaces_ = breaks_ = false; - resetTextPosition(); - - do - { - const dchar c = nextChar(); - if(spaces_) - { - if(c != ' ' && tooWide() && split_) - { - writeIndent(Flag!"ResetSpace".yes); - updateRangeStart(); - } - else if(c != ' ') - { - writeCurrentRange(Flag!"UpdateColumn".yes); - } - } - else if(breaks_) - { - if(!c.isNewLine) - { - writeStartLineBreak(); - writeLineBreaks(); - writeIndent(Flag!"ResetSpace".yes); - } - } - else if(c == dcharNone || c.isNewLine || c == ' ') - { - writeCurrentRange(Flag!"UpdateColumn".yes); - } - updateBreaks(c, Flag!"UpdateSpaces".yes); - }while(endByte_ < text_.length); - } - - private: - ///Get next character and move end of the text range to it. - @property dchar nextChar() pure @safe - { - ++endChar_; - endByte_ = nextEndByte_; - if(endByte_ >= text_.length){return dcharNone;} - const c = text_[nextEndByte_]; - //c is ascii, no need to decode. - if(c < 0x80) - { - ++nextEndByte_; - return c; - } - return decode(text_, nextEndByte_); - } - - ///Get character at start of the text range. - @property dchar charAtStart() const pure @safe - { - size_t idx = startByte_; - return decode(text_, idx); - } - - ///Is the current line too wide? - @property bool tooWide() const pure @safe nothrow - { - return startChar_ + 1 == endChar_ && - emitter_.column_ > emitter_.bestWidth_; - } - - ///Determine hints (indicators) for block scalar. - size_t determineBlockHints(char[] hints, uint bestIndent) const pure @safe - { - size_t hintsIdx; - if(text_.length == 0) - return hintsIdx; - - dchar lastChar(const string str, ref size_t end) - { - size_t idx = end = end - strideBack(str, end); - return decode(text_, idx); - } - - size_t end = text_.length; - const last = lastChar(text_, end); - const secondLast = end > 0 ? lastChar(text_, end) : 0; - - if(text_[0].isNewLine || text_[0] == ' ') - { - hints[hintsIdx++] = cast(char)('0' + bestIndent); - } - if(!last.isNewLine) - { - hints[hintsIdx++] = '-'; - } - else if(std.utf.count(text_) == 1 || secondLast.isNewLine) - { - hints[hintsIdx++] = '+'; - } - return hintsIdx; - } - - ///Initialize for block scalar writing with specified indicator. - void initBlock(const char indicator) @safe - { - char[4] hints; - hints[0] = indicator; - const hintsLength = 1 + determineBlockHints(hints[1 .. $], emitter_.bestIndent_); - emitter_.writeIndicator(hints[0 .. hintsLength], Yes.needWhitespace); - if(hints.length > 0 && hints[$ - 1] == '+') - { - emitter_.openEnded_ = true; - } - emitter_.writeLineBreak(); - } - - ///Write out the current text range. - void writeCurrentRange(const Flag!"UpdateColumn" updateColumn) @safe - { - emitter_.writeString(text_[startByte_ .. endByte_]); - if(updateColumn){emitter_.column_ += endChar_ - startChar_;} - updateRangeStart(); - } - - ///Write line breaks in the text range. - void writeLineBreaks() @safe - { - foreach(const dchar br; text_[startByte_ .. endByte_]) - { - if(br == '\n'){emitter_.writeLineBreak();} - else - { - char[4] brString; - const bytes = encode(brString, br); - emitter_.writeLineBreak(brString[0 .. bytes]); - } - } - updateRangeStart(); - } - - ///Write line break if start of the text range is a newline. - void writeStartLineBreak() @safe - { - if(charAtStart == '\n'){emitter_.writeLineBreak();} - } - - ///Write indentation, optionally resetting whitespace/indentation flags. - void writeIndent(const Flag!"ResetSpace" resetSpace) @safe - { - emitter_.writeIndent(); - if(resetSpace) - { - emitter_.whitespace_ = emitter_.indentation_ = false; - } - } - - ///Move start of text range to its end. - void updateRangeStart() pure @safe nothrow - { - startByte_ = endByte_; - startChar_ = endChar_; - } - - ///Update the line breaks_ flag, optionally updating the spaces_ flag. - void updateBreaks(in dchar c, const Flag!"UpdateSpaces" updateSpaces) pure @safe - { - if(c == dcharNone){return;} - breaks_ = (c.isNewLine != 0); - if(updateSpaces){spaces_ = c == ' ';} - } - - ///Move to the beginning of text. - void resetTextPosition() pure @safe nothrow - { - startByte_ = endByte_ = nextEndByte_ = 0; - startChar_ = endChar_ = -1; - } -} diff --git a/source/dyaml/encoding.d b/source/dyaml/encoding.d deleted file mode 100644 index 50c10b9..0000000 --- a/source/dyaml/encoding.d +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright Ferdinand Majerech 2014. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -module dyaml.encoding; - - -import tinyendian; - -alias Encoding = tinyendian.UTFEncoding; diff --git a/source/dyaml/escapes.d b/source/dyaml/escapes.d deleted file mode 100644 index 36fd744..0000000 --- a/source/dyaml/escapes.d +++ /dev/null @@ -1,106 +0,0 @@ - - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -module dyaml.escapes; - -package: - -import std.meta : AliasSeq; -alias escapes = AliasSeq!('0', 'a', 'b', 't', '\t', 'n', 'v', 'f', 'r', 'e', ' ', - '/', '\"', '\\', 'N', '_', 'L', 'P'); - -/// YAML hex codes specifying the length of the hex number. -alias escapeHexCodeList = AliasSeq!('x', 'u', 'U'); - -/// Convert a YAML escape to a dchar. -dchar fromEscape(dchar escape) @safe pure nothrow @nogc -{ - switch(escape) - { - case '0': return '\0'; - case 'a': return '\x07'; - case 'b': return '\x08'; - case 't': return '\x09'; - case '\t': return '\x09'; - case 'n': return '\x0A'; - case 'v': return '\x0B'; - case 'f': return '\x0C'; - case 'r': return '\x0D'; - case 'e': return '\x1B'; - case '/': return '/'; - case ' ': return '\x20'; - case '\"': return '\"'; - case '\\': return '\\'; - case 'N': return '\x85'; //'\u0085'; - case '_': return '\xA0'; - case 'L': return '\u2028'; - case 'P': return '\u2029'; - default: assert(false, "No such YAML escape"); - } -} - -/** - * Convert a dchar to a YAML escape. - * - * Params: - * value = The possibly escapable character. - * - * Returns: - * If the character passed as parameter can be escaped, returns the matching - * escape, otherwise returns a null character. - */ -dchar toEscape(dchar value) @safe pure nothrow @nogc -{ - switch(value) - { - case '\0': return '0'; - case '\x07': return 'a'; - case '\x08': return 'b'; - case '\x09': return 't'; - case '\x0A': return 'n'; - case '\x0B': return 'v'; - case '\x0C': return 'f'; - case '\x0D': return 'r'; - case '\x1B': return 'e'; - case '\"': return '\"'; - case '\\': return '\\'; - case '\xA0': return '_'; - case '\x85': return 'N'; - case '\u2028': return 'L'; - case '\u2029': return 'P'; - default: return 0; - } -} - -/// Get the length of a hexadecimal number determined by its hex code. -/// -/// Need a function as associative arrays don't work with @nogc. -/// (And this may be even faster with a function.) -uint escapeHexLength(dchar hexCode) @safe pure nothrow @nogc -{ - switch(hexCode) - { - case 'x': return 2; - case 'u': return 4; - case 'U': return 8; - default: assert(false, "No such YAML hex code"); - } -} - -// Issue #302: Support optional escaping of forward slashes in string -// for JSON compatibility -@safe unittest -{ - import dyaml.loader : Loader; - - const str = `{ - "forward/slashes": "can\/be\/optionally\/escaped" -}`; - - auto node = Loader.fromString(str).load(); - assert(node["forward/slashes"] == "can/be/optionally/escaped"); -} diff --git a/source/dyaml/event.d b/source/dyaml/event.d deleted file mode 100644 index f4a747f..0000000 --- a/source/dyaml/event.d +++ /dev/null @@ -1,243 +0,0 @@ - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -/** - * YAML events. - * Code based on PyYAML: http://www.pyyaml.org - */ -module dyaml.event; - -import std.array; -import std.conv; - -import dyaml.exception; -import dyaml.reader; -import dyaml.tagdirective; -import dyaml.style; - - -package: -///Event types. -enum EventID : ubyte -{ - invalid = 0, /// Invalid (uninitialized) event. - streamStart, /// Stream start - streamEnd, /// Stream end - documentStart, /// Document start - documentEnd, /// Document end - alias_, /// Alias - scalar, /// Scalar - sequenceStart, /// Sequence start - sequenceEnd, /// Sequence end - mappingStart, /// Mapping start - mappingEnd /// Mapping end -} - -/** - * YAML event produced by parser. - * - * 48 bytes on 64bit. - */ -struct Event -{ - @disable int opCmp(ref Event); - - ///Value of the event, if any. - string value; - ///Start position of the event in file/stream. - Mark startMark; - ///End position of the event in file/stream. - Mark endMark; - union - { - struct - { - ///Anchor of the event, if any. - string _anchor; - ///Tag of the event, if any. - string _tag; - } - ///Tag directives, if this is a DocumentStart. - //TagDirectives tagDirectives; - TagDirective[] _tagDirectives; - } - ///Event type. - EventID id = EventID.invalid; - ///Style of scalar event, if this is a scalar event. - ScalarStyle scalarStyle = ScalarStyle.invalid; - union - { - ///Should the tag be implicitly resolved? - bool implicit; - /** - * Is this document event explicit? - * - * Used if this is a DocumentStart or DocumentEnd. - */ - bool explicitDocument; - } - ///Collection style, if this is a SequenceStart or MappingStart. - CollectionStyle collectionStyle = CollectionStyle.invalid; - - ///Is this a null (uninitialized) event? - @property bool isNull() const pure @safe nothrow {return id == EventID.invalid;} - - ///Get string representation of the token ID. - @property string idString() const @safe {return to!string(id);} - - auto ref anchor() inout @trusted pure { - assert(id != EventID.documentStart, "DocumentStart events cannot have anchors."); - return _anchor; - } - - auto ref tag() inout @trusted pure { - assert(id != EventID.documentStart, "DocumentStart events cannot have tags."); - return _tag; - } - - auto ref tagDirectives() inout @trusted pure { - assert(id == EventID.documentStart, "Only DocumentStart events have tag directives."); - return _tagDirectives; - } -} - -/** - * Construct a simple event. - * - * Params: start = Start position of the event in the file/stream. - * end = End position of the event in the file/stream. - * anchor = Anchor, if this is an alias event. - */ -Event event(EventID id)(const Mark start, const Mark end, const string anchor = null) - @safe - in(!(id == EventID.alias_ && anchor == ""), "Missing anchor for alias event") -{ - Event result; - result.startMark = start; - result.endMark = end; - result.anchor = anchor; - result.id = id; - return result; -} - -/** - * Construct a collection (mapping or sequence) start event. - * - * Params: start = Start position of the event in the file/stream. - * end = End position of the event in the file/stream. - * anchor = Anchor of the sequence, if any. - * tag = Tag of the sequence, if specified. - * implicit = Should the tag be implicitly resolved? - * style = Style to use when outputting document. - */ -Event collectionStartEvent(EventID id) - (const Mark start, const Mark end, const string anchor, const string tag, - const bool implicit, const CollectionStyle style) pure @safe nothrow -{ - static assert(id == EventID.sequenceStart || id == EventID.sequenceEnd || - id == EventID.mappingStart || id == EventID.mappingEnd); - Event result; - result.startMark = start; - result.endMark = end; - result.anchor = anchor; - result.tag = tag; - result.id = id; - result.implicit = implicit; - result.collectionStyle = style; - return result; -} - -/** - * Construct a stream start event. - * - * Params: start = Start position of the event in the file/stream. - * end = End position of the event in the file/stream. - */ -Event streamStartEvent(const Mark start, const Mark end) - pure @safe nothrow -{ - Event result; - result.startMark = start; - result.endMark = end; - result.id = EventID.streamStart; - return result; -} - -///Aliases for simple events. -alias streamEndEvent = event!(EventID.streamEnd); -alias aliasEvent = event!(EventID.alias_); -alias sequenceEndEvent = event!(EventID.sequenceEnd); -alias mappingEndEvent = event!(EventID.mappingEnd); - -///Aliases for collection start events. -alias sequenceStartEvent = collectionStartEvent!(EventID.sequenceStart); -alias mappingStartEvent = collectionStartEvent!(EventID.mappingStart); - -/** - * Construct a document start event. - * - * Params: start = Start position of the event in the file/stream. - * end = End position of the event in the file/stream. - * explicit = Is this an explicit document start? - * YAMLVersion = YAML version string of the document. - * tagDirectives = Tag directives of the document. - */ -Event documentStartEvent(const Mark start, const Mark end, const bool explicit, string YAMLVersion, - TagDirective[] tagDirectives) pure @safe nothrow -{ - Event result; - result.value = YAMLVersion; - result.startMark = start; - result.endMark = end; - result.id = EventID.documentStart; - result.explicitDocument = explicit; - result.tagDirectives = tagDirectives; - return result; -} - -/** - * Construct a document end event. - * - * Params: start = Start position of the event in the file/stream. - * end = End position of the event in the file/stream. - * explicit = Is this an explicit document end? - */ -Event documentEndEvent(const Mark start, const Mark end, const bool explicit) pure @safe nothrow -{ - Event result; - result.startMark = start; - result.endMark = end; - result.id = EventID.documentEnd; - result.explicitDocument = explicit; - return result; -} - -/// Construct a scalar event. -/// -/// Params: start = Start position of the event in the file/stream. -/// end = End position of the event in the file/stream. -/// anchor = Anchor of the scalar, if any. -/// tag = Tag of the scalar, if specified. -/// implicit = Should the tag be implicitly resolved? -/// value = String value of the scalar. -/// style = Scalar style. -Event scalarEvent(const Mark start, const Mark end, const string anchor, const string tag, - const bool implicit, const string value, - const ScalarStyle style = ScalarStyle.invalid) @safe pure nothrow @nogc -{ - Event result; - result.value = value; - result.startMark = start; - result.endMark = end; - - result.anchor = anchor; - result.tag = tag; - - result.id = EventID.scalar; - result.scalarStyle = style; - result.implicit = implicit; - return result; -} diff --git a/source/dyaml/exception.d b/source/dyaml/exception.d deleted file mode 100644 index 145e9c3..0000000 --- a/source/dyaml/exception.d +++ /dev/null @@ -1,171 +0,0 @@ - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -///Exceptions thrown by D:YAML and _exception related code. -module dyaml.exception; - - -import std.algorithm; -import std.array; -import std.string; -import std.conv; - - -/// Base class for all exceptions thrown by D:YAML. -class YAMLException : Exception -{ - /// Construct a YAMLException with specified message and position where it was thrown. - public this(string msg, string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow @nogc - { - super(msg, file, line); - } -} - -/// Position in a YAML stream, used for error messages. -struct Mark -{ - package: - /// File name. - string name_; - /// Line number. - ushort line_; - /// Column number. - ushort column_; - - public: - /// Construct a Mark with specified line and column in the file. - this(string name, const uint line, const uint column) @safe pure nothrow @nogc - { - name_ = name; - line_ = cast(ushort)min(ushort.max, line); - // This *will* overflow on extremely wide files but saves CPU time - // (mark ctor takes ~5% of time) - column_ = cast(ushort)column; - } - - /// Get a file name. - @property string name() @safe pure nothrow @nogc const - { - return name_; - } - - /// Get a line number. - @property ushort line() @safe pure nothrow @nogc const - { - return line_; - } - - /// Get a column number. - @property ushort column() @safe pure nothrow @nogc const - { - return column_; - } - - /// Duplicate a mark - Mark dup () const scope @safe pure nothrow - { - return Mark(this.name_.idup, this.line_, this.column_); - } - - /// Get a string representation of the mark. - string toString() const scope @safe pure nothrow - { - // Line/column numbers start at zero internally, make them start at 1. - static string clamped(ushort v) @safe pure nothrow - { - return text(v + 1, v == ushort.max ? " or higher" : ""); - } - return "file " ~ name_ ~ ",line " ~ clamped(line_) ~ ",column " ~ clamped(column_); - } -} - -// Base class of YAML exceptions with marked positions of the problem. -abstract class MarkedYAMLException : YAMLException -{ - /// Position of the error. - Mark mark; - - // Construct a MarkedYAMLException with specified context and problem. - this(string context, scope const Mark contextMark, - string problem, scope const Mark problemMark, - string file = __FILE__, size_t line = __LINE__) @safe pure nothrow - { - const msg = context ~ '\n' ~ - (contextMark != problemMark ? contextMark.toString() ~ '\n' : "") ~ - problem ~ '\n' ~ problemMark.toString() ~ '\n'; - super(msg, file, line); - mark = problemMark.dup; - } - - // Construct a MarkedYAMLException with specified problem. - this(string problem, scope const Mark problemMark, - string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow - { - super(problem ~ '\n' ~ problemMark.toString(), file, line); - mark = problemMark.dup; - } - - /// Construct a MarkedYAMLException from a struct storing constructor parameters. - this(ref const(MarkedYAMLExceptionData) data) @safe pure nothrow - { - with(data) this(context, contextMark, problem, problemMark); - } -} - -package: -// A struct storing parameters to the MarkedYAMLException constructor. -struct MarkedYAMLExceptionData -{ - // Context of the error. - string context; - // Position of the context in a YAML buffer. - Mark contextMark; - // The error itself. - string problem; - // Position if the error. - Mark problemMark; -} - -// Constructors of YAML exceptions are mostly the same, so we use a mixin. -// -// See_Also: YAMLException -template ExceptionCtors() -{ - public this(string msg, string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow - { - super(msg, file, line); - } -} - -// Constructors of marked YAML exceptions are mostly the same, so we use a mixin. -// -// See_Also: MarkedYAMLException -template MarkedExceptionCtors() -{ - public: - this(string context, const Mark contextMark, string problem, - const Mark problemMark, string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow - { - super(context, contextMark, problem, problemMark, - file, line); - } - - this(string problem, const Mark problemMark, - string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow - { - super(problem, problemMark, file, line); - } - - this(ref const(MarkedYAMLExceptionData) data) @safe pure nothrow - { - super(data); - } -} diff --git a/source/dyaml/linebreak.d b/source/dyaml/linebreak.d deleted file mode 100644 index 1f0f661..0000000 --- a/source/dyaml/linebreak.d +++ /dev/null @@ -1,32 +0,0 @@ - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -module dyaml.linebreak; - - -///Enumerates platform specific line breaks. -enum LineBreak -{ - ///Unix line break ("\n"). - unix, - ///Windows line break ("\r\n"). - windows, - ///Macintosh line break ("\r"). - macintosh -} - -package: - -//Get line break string for specified line break. -string lineBreak(in LineBreak b) pure @safe nothrow -{ - final switch(b) - { - case LineBreak.unix: return "\n"; - case LineBreak.windows: return "\r\n"; - case LineBreak.macintosh: return "\r"; - } -} diff --git a/source/dyaml/loader.d b/source/dyaml/loader.d deleted file mode 100644 index 6638dfc..0000000 --- a/source/dyaml/loader.d +++ /dev/null @@ -1,413 +0,0 @@ - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -/// Class used to load YAML documents. -module dyaml.loader; - - -import std.exception; -import std.file; -import std.stdio : File; -import std.string; - -import dyaml.composer; -import dyaml.constructor; -import dyaml.event; -import dyaml.exception; -import dyaml.node; -import dyaml.parser; -import dyaml.reader; -import dyaml.resolver; -import dyaml.scanner; -import dyaml.token; - - -/** Loads YAML documents from files or char[]. - * - * User specified Constructor and/or Resolver can be used to support new - * tags / data types. - */ -struct Loader -{ - private: - // Processes character data to YAML tokens. - Scanner scanner_; - // Processes tokens to YAML events. - Parser parser_; - // Resolves tags (data types). - Resolver resolver_; - // Name of the input file or stream, used in error messages. - string name_ = ""; - // Are we done loading? - bool done_; - // Last node read from stream - Node currentNode; - // Has the range interface been initialized yet? - bool rangeInitialized; - - public: - @disable this(); - @disable int opCmp(ref Loader); - @disable bool opEquals(ref Loader); - - /** Construct a Loader to load YAML from a file. - * - * Params: filename = Name of the file to load from. - * file = Already-opened file to load from. - * - * Throws: YAMLException if the file could not be opened or read. - */ - static Loader fromFile(string filename) @trusted - { - try - { - auto loader = Loader(std.file.read(filename), filename); - return loader; - } - catch(FileException e) - { - throw new YAMLException("Unable to open file %s for YAML loading: %s" - .format(filename, e.msg), e.file, e.line); - } - } - /// ditto - static Loader fromFile(File file) @system - { - auto loader = Loader(file.byChunk(4096).join, file.name); - return loader; - } - - /** Construct a Loader to load YAML from a string. - * - * Params: - * data = String to load YAML from. The char[] version $(B will) - * overwrite its input during parsing as D:YAML reuses memory. - * filename = The filename to give to the Loader, defaults to `""` - * - * Returns: Loader loading YAML from given string. - * - * Throws: - * - * YAMLException if data could not be read (e.g. a decoding error) - */ - static Loader fromString(char[] data, string filename = "") @safe - { - return Loader(cast(ubyte[])data, filename); - } - /// Ditto - static Loader fromString(string data, string filename = "") @safe - { - return fromString(data.dup, filename); - } - /// Load a char[]. - @safe unittest - { - assert(Loader.fromString("42".dup).load().as!int == 42); - } - /// Load a string. - @safe unittest - { - assert(Loader.fromString("42").load().as!int == 42); - } - - /** Construct a Loader to load YAML from a buffer. - * - * Params: yamlData = Buffer with YAML data to load. This may be e.g. a file - * loaded to memory or a string with YAML data. Note that - * buffer $(B will) be overwritten, as D:YAML minimizes - * memory allocations by reusing the input _buffer. - * $(B Must not be deleted or modified by the user as long - * as nodes loaded by this Loader are in use!) - Nodes may - * refer to data in this buffer. - * - * Note that D:YAML looks for byte-order-marks YAML files encoded in - * UTF-16/UTF-32 (and sometimes UTF-8) use to specify the encoding and - * endianness, so it should be enough to load an entire file to a buffer and - * pass it to D:YAML, regardless of Unicode encoding. - * - * Throws: YAMLException if yamlData contains data illegal in YAML. - */ - static Loader fromBuffer(ubyte[] yamlData) @safe - { - return Loader(yamlData); - } - /// Ditto - static Loader fromBuffer(void[] yamlData) @system - { - return Loader(yamlData); - } - /// Ditto - private this(void[] yamlData, string name = "") @system - { - this(cast(ubyte[])yamlData, name); - } - /// Ditto - private this(ubyte[] yamlData, string name = "") @safe - { - resolver_ = Resolver.withDefaultResolvers; - name_ = name; - try - { - auto reader_ = new Reader(yamlData, name); - scanner_ = Scanner(reader_); - parser_ = new Parser(scanner_); - } - catch(YAMLException e) - { - throw new YAMLException("Unable to open %s for YAML loading: %s" - .format(name_, e.msg), e.file, e.line); - } - } - - - /// Set stream _name. Used in debugging messages. - void name(string name) pure @safe nothrow @nogc - { - name_ = name; - scanner_.name = name; - } - - /// Specify custom Resolver to use. - auto ref resolver() pure @safe nothrow @nogc - { - return resolver_; - } - - /** Load single YAML document. - * - * If none or more than one YAML document is found, this throws a YAMLException. - * - * This can only be called once; this is enforced by contract. - * - * Returns: Root node of the document. - * - * Throws: YAMLException if there wasn't exactly one document - * or on a YAML parsing error. - */ - Node load() @safe - { - enforce!YAMLException(!empty, "Zero documents in stream"); - auto output = front; - popFront(); - enforce!YAMLException(empty, "More than one document in stream"); - return output; - } - - /** Implements the empty range primitive. - * - * If there's no more documents left in the stream, this will be true. - * - * Returns: `true` if no more documents left, `false` otherwise. - */ - bool empty() @safe - { - // currentNode and done_ are both invalid until popFront is called once - if (!rangeInitialized) - { - popFront(); - } - return done_; - } - /** Implements the popFront range primitive. - * - * Reads the next document from the stream, if possible. - */ - void popFront() @safe - { - // Composer initialization is done here in case the constructor is - // modified, which is a pretty common case. - static Composer composer; - if (!rangeInitialized) - { - composer = Composer(parser_, resolver_); - rangeInitialized = true; - } - assert(!done_, "Loader.popFront called on empty range"); - if (composer.checkNode()) - { - currentNode = composer.getNode(); - } - else - { - done_ = true; - } - } - /** Implements the front range primitive. - * - * Returns: the current document as a Node. - */ - Node front() @safe - { - // currentNode and done_ are both invalid until popFront is called once - if (!rangeInitialized) - { - popFront(); - } - return currentNode; - } - - // Scan all tokens, throwing them away. Used for benchmarking. - void scanBench() @safe - { - try - { - while(!scanner_.empty) - { - scanner_.popFront(); - } - } - catch(YAMLException e) - { - throw new YAMLException("Unable to scan YAML from stream " ~ - name_ ~ " : " ~ e.msg, e.file, e.line); - } - } - - - // Parse and return all events. Used for debugging. - auto parse() @safe - { - return parser_; - } -} -/// Load single YAML document from a file: -@safe unittest -{ - write("example.yaml", "Hello world!"); - auto rootNode = Loader.fromFile("example.yaml").load(); - assert(rootNode == "Hello world!"); -} -/// Load single YAML document from an already-opened file: -@system unittest -{ - // Open a temporary file - auto file = File.tmpfile; - // Write valid YAML - file.write("Hello world!"); - // Return to the beginning - file.seek(0); - // Load document - auto rootNode = Loader.fromFile(file).load(); - assert(rootNode == "Hello world!"); -} -/// Load all YAML documents from a file: -@safe unittest -{ - import std.array : array; - import std.file : write; - write("example.yaml", - "---\n"~ - "Hello world!\n"~ - "...\n"~ - "---\n"~ - "Hello world 2!\n"~ - "...\n" - ); - auto nodes = Loader.fromFile("example.yaml").array; - assert(nodes.length == 2); -} -/// Iterate over YAML documents in a file, lazily loading them: -@safe unittest -{ - import std.file : write; - write("example.yaml", - "---\n"~ - "Hello world!\n"~ - "...\n"~ - "---\n"~ - "Hello world 2!\n"~ - "...\n" - ); - auto loader = Loader.fromFile("example.yaml"); - - foreach(ref node; loader) - { - //Do something - } -} -/// Load YAML from a string: -@safe unittest -{ - string yaml_input = ("red: '#ff0000'\n" ~ - "green: '#00ff00'\n" ~ - "blue: '#0000ff'"); - - auto colors = Loader.fromString(yaml_input).load(); - - foreach(string color, string value; colors) - { - // Do something with the color and its value... - } -} - -/// Load a file into a buffer in memory and then load YAML from that buffer: -@safe unittest -{ - import std.file : read, write; - import std.stdio : writeln; - // Create a yaml document - write("example.yaml", - "---\n"~ - "Hello world!\n"~ - "...\n"~ - "---\n"~ - "Hello world 2!\n"~ - "...\n" - ); - try - { - string buffer = readText("example.yaml"); - auto yamlNode = Loader.fromString(buffer); - - // Read data from yamlNode here... - } - catch(FileException e) - { - writeln("Failed to read file 'example.yaml'"); - } -} -/// Use a custom resolver to support custom data types and/or implicit tags: -@safe unittest -{ - import std.file : write; - // Create a yaml document - write("example.yaml", - "---\n"~ - "Hello world!\n"~ - "...\n" - ); - - auto loader = Loader.fromFile("example.yaml"); - - // Add resolver expressions here... - // loader.resolver.addImplicitResolver(...); - - auto rootNode = loader.load(); -} - -//Issue #258 - https://github.com/dlang-community/D-YAML/issues/258 -@safe unittest -{ - auto yaml = "{\n\"root\": {\n\t\"key\": \"value\"\n }\n}"; - auto doc = Loader.fromString(yaml).load(); - assert(doc.isValid); -} - -@safe unittest -{ - import std.exception : collectException; - - auto yaml = q"EOS - value: invalid: string -EOS"; - auto filename = "invalid.yml"; - auto loader = Loader.fromString(yaml); - loader.name = filename; - - Node unused; - auto e = loader.load().collectException!ScannerException(unused); - assert(e.mark.name == filename); -} diff --git a/source/dyaml/node.d b/source/dyaml/node.d deleted file mode 100644 index a043e54..0000000 --- a/source/dyaml/node.d +++ /dev/null @@ -1,2641 +0,0 @@ - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -/// Node of a YAML document. Used to read YAML data once it's loaded, -/// and to prepare data to emit. -module dyaml.node; - - -import std.algorithm; -import std.array; -import std.conv; -import std.datetime; -import std.exception; -import std.format; -import std.math; -import std.meta : AliasSeq; -import std.range; -import std.string; -import std.traits; -import std.typecons; - -// FIXME: Switch back to upstream's when v2.101 is the oldest -// supported version (recommended: after v2.111 release). -import dyaml.stdsumtype; - -import dyaml.event; -import dyaml.exception; -import dyaml.style; - -/// Exception thrown at node related errors. -class NodeException : MarkedYAMLException -{ - package: - // Construct a NodeException. - // - // Params: msg = Error message. - // start = Start position of the node. - this(string msg, const scope Mark start, - string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow - { - super(msg, start, file, line); - } -} - -// Node kinds. -enum NodeID : ubyte -{ - scalar, - sequence, - mapping, - invalid -} - -/// Null YAML type. Used in nodes with _null values. -struct YAMLNull -{ - /// Used for string conversion. - string toString() const pure @safe nothrow {return "null";} -} - -/// Invalid YAML type, used internally by SumType -private struct YAMLInvalid {} - -// Merge YAML type, used to support "tag:yaml.org,2002:merge". -package struct YAMLMerge{} - -// Key-value pair of YAML nodes, used in mappings. -private struct Pair -{ - public: - /// Key node. - Node key; - /// Value node. - Node value; - - /// Construct a Pair from two values. Will be converted to Nodes if needed. - this(K, V)(K key, V value) - { - static if(is(Unqual!K == Node)){this.key = key;} - else {this.key = Node(key);} - static if(is(Unqual!V == Node)){this.value = value;} - else {this.value = Node(value);} - } - - /// Equality test with another Pair. - bool opEquals(const ref Pair rhs) const scope @safe - { - return key == rhs.key && value == rhs.value; - } - - // Comparison with another Pair. - int opCmp(const scope ref Pair rhs) const scope @safe - { - const keyCmp = key.opCmp(rhs.key); - return keyCmp != 0 ? keyCmp - : value.opCmp(rhs.value); - } - - /// - public void toString (scope void delegate(scope const(char)[]) @safe sink) - const scope @safe - { - // formattedWrite does not accept `scope` parameters - () @trusted { - formattedWrite(sink, "%s: %s", this.key, this.value); - }(); - } -} - -enum NodeType -{ - null_, - merge, - boolean, - integer, - decimal, - binary, - timestamp, - string, - mapping, - sequence, - invalid -} - -/** YAML node. - * - * This is a pseudo-dynamic type that can store any YAML value, including a - * sequence or mapping of nodes. You can get data from a Node directly or - * iterate over it if it's a collection. - */ -struct Node -{ - public: - alias Pair = .Pair; - - package: - // YAML value type. - alias Value = SumType!( - YAMLInvalid, YAMLNull, YAMLMerge, - bool, long, real, ubyte[], SysTime, string, - Node.Pair[], Node[]); - - // Can Value hold this type naturally? - enum allowed(T) = isIntegral!T || - isFloatingPoint!T || - isSomeString!T || - is(typeof({ Value i = T.init; })); - - // Stored value. - Value value_; - // Start position of the node. - Mark startMark_; - - // Tag of the node. - string tag_; - // Node scalar style. Used to remember style this node was loaded with. - ScalarStyle scalarStyle = ScalarStyle.invalid; - // Node collection style. Used to remember style this node was loaded with. - CollectionStyle collectionStyle = CollectionStyle.invalid; - - public: - /** Construct a Node from a value. - * - * Any type except for Node can be stored in a Node, but default YAML - * types (integers, floats, strings, timestamps, etc.) will be stored - * more efficiently. To create a node representing a null value, - * construct it from YAMLNull. - * - * If value is a node, its value will be copied directly. The tag and - * other information attached to the original node will be discarded. - * - * If value is an array of nodes or pairs, it is stored directly. - * Otherwise, every value in the array is converted to a node, and - * those nodes are stored. - * - * Note that to emit any non-default types you store - * in a node, you need a Representer to represent them in YAML - - * otherwise emitting will fail. - * - * Params: value = Value to store in the node. - * tag = Overrides tag of the node when emitted, regardless - * of tag determined by Representer. Representer uses - * this to determine YAML data type when a D data type - * maps to multiple different YAML data types. Tag must - * be in full form, e.g. "tag:yaml.org,2002:int", not - * a shortcut, like "!!int". - */ - this(T)(T value, const string tag = null) @safe - if (allowed!T || isArray!T || isAssociativeArray!T || is(Unqual!T == Node) || castableToNode!T) - { - tag_ = tag; - - //Unlike with assignment, we're just copying the value. - static if (is(Unqual!T == Node)) - { - setValue(value.value_); - } - else static if(isSomeString!T) - { - setValue(value.to!string); - } - else static if(is(Unqual!T == bool)) - { - setValue(cast(bool)value); - } - else static if(isIntegral!T) - { - setValue(cast(long)value); - } - else static if(isFloatingPoint!T) - { - setValue(cast(real)value); - } - else static if (isArray!T) - { - alias ElementT = Unqual!(ElementType!T); - // Construction from raw node or pair array. - static if(is(ElementT == Node) || is(ElementT == Node.Pair)) - { - setValue(value); - } - // Need to handle byte buffers separately. - else static if(is(ElementT == byte) || is(ElementT == ubyte)) - { - setValue(cast(ubyte[]) value); - } - else - { - Node[] nodes; - foreach(ref v; value) - { - nodes ~= Node(v); - } - setValue(nodes); - } - } - else static if (isAssociativeArray!T) - { - Node.Pair[] pairs; - foreach(k, ref v; value) - { - pairs ~= Pair(k, v); - } - setValue(pairs); - } - // User defined type. - else - { - setValue(value); - } - } - /// Construct a scalar node - @safe unittest - { - // Integer - { - auto node = Node(5); - } - // String - { - auto node = Node("Hello world!"); - } - // Floating point - { - auto node = Node(5.0f); - } - // Boolean - { - auto node = Node(true); - } - // Time - { - auto node = Node(SysTime(DateTime(2005, 6, 15, 20, 0, 0), UTC())); - } - // Integer, dumped as a string - { - auto node = Node(5, "tag:yaml.org,2002:str"); - } - } - /// Construct a sequence node - @safe unittest - { - // Will be emitted as a sequence (default for arrays) - { - auto seq = Node([1, 2, 3, 4, 5]); - } - // Will be emitted as a set (overridden tag) - { - auto set = Node([1, 2, 3, 4, 5], "tag:yaml.org,2002:set"); - } - // Can also store arrays of arrays - { - auto node = Node([[1,2], [3,4]]); - } - } - /// Construct a mapping node - @safe unittest - { - // Will be emitted as an unordered mapping (default for mappings) - auto map = Node([1 : "a", 2 : "b"]); - // Will be emitted as an ordered map (overridden tag) - auto omap = Node([1 : "a", 2 : "b"], "tag:yaml.org,2002:omap"); - // Will be emitted as pairs (overridden tag) - auto pairs = Node([1 : "a", 2 : "b"], "tag:yaml.org,2002:pairs"); - } - @safe unittest - { - { - auto node = Node(42); - assert(node.nodeID == NodeID.scalar); - assert(node.as!int == 42 && node.as!float == 42.0f && node.as!string == "42"); - } - - { - auto node = Node("string"); - assert(node.as!string == "string"); - } - } - @safe unittest - { - with(Node([1, 2, 3])) - { - assert(nodeID == NodeID.sequence); - assert(length == 3); - assert(opIndex(2).as!int == 3); - } - - } - @safe unittest - { - int[string] aa; - aa["1"] = 1; - aa["2"] = 2; - with(Node(aa)) - { - assert(nodeID == NodeID.mapping); - assert(length == 2); - assert(opIndex("2").as!int == 2); - } - } - @safe unittest - { - auto node = Node(Node(4, "tag:yaml.org,2002:str")); - assert(node == 4); - assert(node.tag_ == ""); - } - - /** Construct a node from arrays of _keys and _values. - * - * Constructs a mapping node with key-value pairs from - * _keys and _values, keeping their order. Useful when order - * is important (ordered maps, pairs). - * - * - * keys and values must have equal length. - * - * - * If _keys and/or _values are nodes, they are stored directly/ - * Otherwise they are converted to nodes and then stored. - * - * Params: keys = Keys of the mapping, from first to last pair. - * values = Values of the mapping, from first to last pair. - * tag = Overrides tag of the node when emitted, regardless - * of tag determined by Representer. Representer uses - * this to determine YAML data type when a D data type - * maps to multiple different YAML data types. - * This is used to differentiate between YAML unordered - * mappings ("!!map"), ordered mappings ("!!omap"), and - * pairs ("!!pairs") which are all internally - * represented as an array of node pairs. Tag must be - * in full form, e.g. "tag:yaml.org,2002:omap", not a - * shortcut, like "!!omap". - * - */ - this(K, V)(K[] keys, V[] values, const string tag = null) - if(!(isSomeString!(K[]) || isSomeString!(V[]))) - in(keys.length == values.length, - "Lengths of keys and values arrays to construct " ~ - "a YAML node from don't match") - { - tag_ = tag; - - Node.Pair[] pairs; - foreach(i; 0 .. keys.length){pairs ~= Pair(keys[i], values[i]);} - setValue(pairs); - } - /// - @safe unittest - { - // Will be emitted as an unordered mapping (default for mappings) - auto map = Node([1, 2], ["a", "b"]); - // Will be emitted as an ordered map (overridden tag) - auto omap = Node([1, 2], ["a", "b"], "tag:yaml.org,2002:omap"); - // Will be emitted as pairs (overriden tag) - auto pairs = Node([1, 2], ["a", "b"], "tag:yaml.org,2002:pairs"); - } - @safe unittest - { - with(Node(["1", "2"], [1, 2])) - { - assert(nodeID == NodeID.mapping); - assert(length == 2); - assert(opIndex("2").as!int == 2); - } - - } - - /// Is this node valid (initialized)? - @property bool isValid() const scope @safe pure nothrow @nogc - { - return value_.match!((const YAMLInvalid _) => false, _ => true); - } - - /// Return tag of the node. - @property string tag() const return scope @safe pure nothrow @nogc - { - return tag_; - } - - /// Return the start position of the node. - @property Mark startMark() const return scope @safe pure nothrow @nogc - { - return startMark_; - } - - /** Equality test. - * - * If T is Node, recursively compares all subnodes. - * This might be quite expensive if testing entire documents. - * - * If T is not Node, gets a value of type T from the node and tests - * equality with that. - * - * To test equality with a null YAML value, use YAMLNull. - * - * Params: rhs = Variable to test equality with. - * - * Returns: true if equal, false otherwise. - */ - bool opEquals(const scope Node rhs) const scope @safe - { - return opCmp(rhs) == 0; - } - bool opEquals(T)(const scope auto ref T rhs) const @safe - { - try - { - auto stored = get!(T, No.stringConversion); - // NaNs aren't normally equal to each other, but we'll pretend they are. - static if(isFloatingPoint!T) - { - return rhs == stored || (isNaN(rhs) && isNaN(stored)); - } - else - { - return rhs == stored; - } - } - catch(NodeException e) - { - return false; - } - } - /// - @safe unittest - { - auto node = Node(42); - - assert(node == 42); - assert(node != "42"); - assert(node != "43"); - - auto node2 = Node(YAMLNull()); - assert(node2 == YAMLNull()); - - const node3 = Node(42); - assert(node3 == 42); - } - - /// Shortcut for get(). - alias as = get; - - /** Get the value of the node as specified type. - * - * If the specifed type does not match type in the node, - * conversion is attempted. The stringConversion template - * parameter can be used to disable conversion from non-string - * types to strings. - * - * Numeric values are range checked, throwing if out of range of - * requested type. - * - * Timestamps are stored as std.datetime.SysTime. - * Binary values are decoded and stored as ubyte[]. - * - * To get a null value, use get!YAMLNull . This is to - * prevent getting null values for types such as strings or classes. - * - * $(BR)$(B Mapping default values:) - * - * $(PBR - * The '=' key can be used to denote the default value of a mapping. - * This can be used when a node is scalar in early versions of a program, - * but is replaced by a mapping later. Even if the node is a mapping, the - * get method can be used as if it was a scalar if it has a default value. - * This way, new YAML files where the node is a mapping can still be read - * by old versions of the program, which expect the node to be a scalar. - * ) - * - * Returns: Value of the node as specified type. - * - * Throws: NodeException if unable to convert to specified type, or if - * the value is out of range of requested type. - */ - inout(T) get(T, Flag!"stringConversion" stringConversion = Yes.stringConversion)() inout @safe return scope - { - static assert (allowed!(Unqual!T) || - hasNodeConstructor!(inout(Unqual!T)) || - (!hasIndirections!(Unqual!T) && hasNodeConstructor!(Unqual!T))); - - static if(!allowed!(Unqual!T)) - { - static if (hasSimpleNodeConstructor!(Unqual!T) || hasSimpleNodeConstructor!(inout(Unqual!T))) - { - alias params = AliasSeq!(this); - } - else static if (hasExpandedNodeConstructor!(Unqual!T) || hasExpandedNodeConstructor!(inout(Unqual!T))) - { - alias params = AliasSeq!(this, tag_); - } - else - { - static assert(0, "Unknown Node constructor?"); - } - - static if (is(T == class)) - { - return new inout T(params); - } - else static if (is(T == struct)) - { - return T(params); - } - else - { - static assert(0, "Unhandled user type"); - } - } else { - static if (canBeType!T) - if (isType!(Unqual!T)) { return getValue!T; } - - // If we're getting from a mapping and we're not getting Node.Pair[], - // we're getting the default value. - if(nodeID == NodeID.mapping){return this["="].get!( T, stringConversion);} - - static if(isSomeString!T) - { - static if(!stringConversion) - { - enforce(type == NodeType.string, new NodeException( - "Node stores unexpected type: " ~ text(type) ~ - ". Expected: " ~ typeid(T).toString(), startMark_)); - return to!T(getValue!string); - } - else - { - // Try to convert to string. - try - { - return coerceValue!T().dup; - } - catch (MatchException e) - { - throw new NodeException("Unable to convert node value to string", startMark_); - } - } - } - else static if(isFloatingPoint!T) - { - final switch (type) - { - case NodeType.integer: - return to!T(getValue!long); - case NodeType.decimal: - return to!T(getValue!real); - case NodeType.binary: - case NodeType.string: - case NodeType.boolean: - case NodeType.null_: - case NodeType.merge: - case NodeType.invalid: - case NodeType.timestamp: - case NodeType.mapping: - case NodeType.sequence: - throw new NodeException("Node stores unexpected type: " ~ text(type) ~ - ". Expected: " ~ typeid(T).toString, startMark_); - } - } - else static if(isIntegral!T) - { - enforce(type == NodeType.integer, new NodeException("Node stores unexpected type: " ~ text(type) ~ - ". Expected: " ~ typeid(T).toString, startMark_)); - immutable temp = getValue!long; - enforce(temp >= T.min && temp <= T.max, - new NodeException("Integer value of type " ~ typeid(T).toString() ~ - " out of range. Value: " ~ to!string(temp), startMark_)); - return temp.to!T; - } - else throw new NodeException("Node stores unexpected type: " ~ text(type) ~ - ". Expected: " ~ typeid(T).toString, startMark_); - } - } - /// ditto - T get(T)() const - if (hasIndirections!(Unqual!T) && hasNodeConstructor!(Unqual!T) && (!hasNodeConstructor!(inout(Unqual!T)))) - { - static if (hasSimpleNodeConstructor!T) - { - alias params = AliasSeq!(this); - } - else static if (hasExpandedNodeConstructor!T) - { - alias params = AliasSeq!(this, tag_); - } - else - { - static assert(0, "Unknown Node constructor?"); - } - static if (is(T == class)) - { - return new T(params); - } - else static if (is(T == struct)) - { - return T(params); - } - else - { - static assert(0, "Unhandled user type"); - } - } - /// Automatic type conversion - @safe unittest - { - auto node = Node(42); - - assert(node.get!int == 42); - assert(node.get!string == "42"); - assert(node.get!double == 42.0); - } - /// Scalar node to struct and vice versa - @safe unittest - { - import dyaml.dumper : dumper; - import dyaml.loader : Loader; - static struct MyStruct - { - int x, y, z; - - this(int x, int y, int z) @safe - { - this.x = x; - this.y = y; - this.z = z; - } - - this(scope const Node node) @safe - { - // `std.array.split` is not marked as taking a `scope` range, - // but we don't escape a reference. - scope parts = () @trusted { return node.as!string().split(":"); }(); - x = parts[0].to!int; - y = parts[1].to!int; - z = parts[2].to!int; - } - - Node opCast(T: Node)() @safe - { - //Using custom scalar format, x:y:z. - auto scalar = format("%s:%s:%s", x, y, z); - //Representing as a scalar, with custom tag to specify this data type. - return Node(scalar, "!mystruct.tag"); - } - } - - auto appender = new Appender!string; - - // Dump struct to yaml document - dumper().dump(appender, Node(MyStruct(1,2,3))); - - // Read yaml document back as a MyStruct - auto loader = Loader.fromString(appender.data); - Node node = loader.load(); - assert(node.as!MyStruct == MyStruct(1,2,3)); - } - /// Sequence node to struct and vice versa - @safe unittest - { - import dyaml.dumper : dumper; - import dyaml.loader : Loader; - static struct MyStruct - { - int x, y, z; - - this(int x, int y, int z) @safe - { - this.x = x; - this.y = y; - this.z = z; - } - - this(Node node) @safe - { - x = node[0].as!int; - y = node[1].as!int; - z = node[2].as!int; - } - - Node opCast(T: Node)() - { - return Node([x, y, z], "!mystruct.tag"); - } - } - - auto appender = new Appender!string; - - // Dump struct to yaml document - dumper().dump(appender, Node(MyStruct(1,2,3))); - - // Read yaml document back as a MyStruct - auto loader = Loader.fromString(appender.data); - Node node = loader.load(); - assert(node.as!MyStruct == MyStruct(1,2,3)); - } - /// Mapping node to struct and vice versa - @safe unittest - { - import dyaml.dumper : dumper; - import dyaml.loader : Loader; - static struct MyStruct - { - int x, y, z; - - Node opCast(T: Node)() - { - auto pairs = [Node.Pair("x", x), - Node.Pair("y", y), - Node.Pair("z", z)]; - return Node(pairs, "!mystruct.tag"); - } - - this(int x, int y, int z) - { - this.x = x; - this.y = y; - this.z = z; - } - - this(Node node) @safe - { - x = node["x"].as!int; - y = node["y"].as!int; - z = node["z"].as!int; - } - } - - auto appender = new Appender!string; - - // Dump struct to yaml document - dumper().dump(appender, Node(MyStruct(1,2,3))); - - // Read yaml document back as a MyStruct - auto loader = Loader.fromString(appender.data); - Node node = loader.load(); - assert(node.as!MyStruct == MyStruct(1,2,3)); - } - /// Classes can be used too - @system unittest { - import dyaml.dumper : dumper; - import dyaml.loader : Loader; - - static class MyClass - { - int x, y, z; - - this(int x, int y, int z) - { - this.x = x; - this.y = y; - this.z = z; - } - - this(scope const Node node) @safe inout - { - // `std.array.split` is not marked as taking a `scope` range, - // but we don't escape a reference. - scope parts = () @trusted { return node.as!string().split(":"); }(); - x = parts[0].to!int; - y = parts[1].to!int; - z = parts[2].to!int; - } - - ///Useful for Node.as!string. - override string toString() - { - return format("MyClass(%s, %s, %s)", x, y, z); - } - - Node opCast(T: Node)() @safe - { - //Using custom scalar format, x:y:z. - auto scalar = format("%s:%s:%s", x, y, z); - //Representing as a scalar, with custom tag to specify this data type. - return Node(scalar, "!myclass.tag"); - } - override bool opEquals(Object o) - { - if (auto other = cast(MyClass)o) - { - return (other.x == x) && (other.y == y) && (other.z == z); - } - return false; - } - } - auto appender = new Appender!string; - - // Dump class to yaml document - dumper().dump(appender, Node(new MyClass(1,2,3))); - - // Read yaml document back as a MyClass - auto loader = Loader.fromString(appender.data); - Node node = loader.load(); - assert(node.as!MyClass == new MyClass(1,2,3)); - } - // Make sure custom tags and styles are kept. - @safe unittest - { - static struct MyStruct - { - Node opCast(T: Node)() - { - auto node = Node("hi", "!mystruct.tag"); - node.setStyle(ScalarStyle.doubleQuoted); - return node; - } - } - - auto node = Node(MyStruct.init); - assert(node.tag == "!mystruct.tag"); - assert(node.scalarStyle == ScalarStyle.doubleQuoted); - } - // ditto, but for collection style - @safe unittest - { - static struct MyStruct - { - Node opCast(T: Node)() - { - auto node = Node(["hi"], "!mystruct.tag"); - node.setStyle(CollectionStyle.flow); - return node; - } - } - - auto node = Node(MyStruct.init); - assert(node.tag == "!mystruct.tag"); - assert(node.collectionStyle == CollectionStyle.flow); - } - @safe unittest - { - assertThrown!NodeException(Node("42").get!int); - assertThrown!NodeException(Node("42").get!double); - assertThrown!NodeException(Node(long.max).get!ushort); - Node(YAMLNull()).get!YAMLNull; - } - @safe unittest - { - const node = Node(42); - assert(node.get!int == 42); - assert(node.get!string == "42"); - assert(node.get!double == 42.0); - - immutable node2 = Node(42); - assert(node2.get!int == 42); - assert(node2.get!(const int) == 42); - assert(node2.get!(immutable int) == 42); - assert(node2.get!string == "42"); - assert(node2.get!(const string) == "42"); - assert(node2.get!(immutable string) == "42"); - assert(node2.get!double == 42.0); - assert(node2.get!(const double) == 42.0); - assert(node2.get!(immutable double) == 42.0); - } - - /** If this is a collection, return its _length. - * - * Otherwise, throw NodeException. - * - * Returns: Number of elements in a sequence or key-value pairs in a mapping. - * - * Throws: NodeException if this is not a sequence nor a mapping. - */ - @property size_t length() const @safe - { - final switch(nodeID) - { - case NodeID.sequence: - return getValue!(Node[]).length; - case NodeID.mapping: - return getValue!(Pair[]).length; - case NodeID.scalar: - case NodeID.invalid: - throw new NodeException("Trying to get length of a " ~ nodeTypeString ~ " node", - startMark_); - } - } - @safe unittest - { - auto node = Node([1,2,3]); - assert(node.length == 3); - const cNode = Node([1,2,3]); - assert(cNode.length == 3); - immutable iNode = Node([1,2,3]); - assert(iNode.length == 3); - } - - /** Get the element at specified index. - * - * If the node is a sequence, index must be integral. - * - * - * If the node is a mapping, return the value corresponding to the first - * key equal to index. containsKey() can be used to determine if a mapping - * has a specific key. - * - * To get element at a null index, use YAMLNull for index. - * - * Params: index = Index to use. - * - * Returns: Value corresponding to the index. - * - * Throws: NodeException if the index could not be found, - * non-integral index is used with a sequence or the node is - * not a collection. - */ - ref inout(Node) opIndex(T)(T index) inout return scope @safe - { - final switch (nodeID) - { - case NodeID.sequence: - checkSequenceIndex(index); - static if(isIntegral!T) - { - return getValue!(Node[])[index]; - } - else - { - assert(false, "Only integers may index sequence nodes"); - } - case NodeID.mapping: - auto idx = findPair(index); - if(idx >= 0) - { - return getValue!(Pair[])[idx].value; - } - - string msg = "Mapping index not found" ~ (isSomeString!T ? ": " ~ to!string(index) : ""); - throw new NodeException(msg, startMark_); - case NodeID.scalar: - case NodeID.invalid: - throw new NodeException("Trying to index a " ~ nodeTypeString ~ " node", startMark_); - } - } - /// - @safe unittest - { - Node narray = Node([11, 12, 13, 14]); - Node nmap = Node(["11", "12", "13", "14"], [11, 12, 13, 14]); - - assert(narray[0].as!int == 11); - assert(null !is collectException(narray[42])); - assert(nmap["11"].as!int == 11); - assert(nmap["14"].as!int == 14); - } - @safe unittest - { - Node narray = Node([11, 12, 13, 14]); - Node nmap = Node(["11", "12", "13", "14"], [11, 12, 13, 14]); - - assert(narray[0].as!int == 11); - assert(null !is collectException(narray[42])); - assert(nmap["11"].as!int == 11); - assert(nmap["14"].as!int == 14); - assert(null !is collectException(nmap["42"])); - - narray.add(YAMLNull()); - nmap.add(YAMLNull(), "Nothing"); - assert(narray[4].as!YAMLNull == YAMLNull()); - assert(nmap[YAMLNull()].as!string == "Nothing"); - - assertThrown!NodeException(nmap[11]); - assertThrown!NodeException(nmap[14]); - } - - /** Determine if a collection contains specified value. - * - * If the node is a sequence, check if it contains the specified value. - * If it's a mapping, check if it has a value that matches specified value. - * - * Params: rhs = Item to look for. Use YAMLNull to check for a null value. - * - * Returns: true if rhs was found, false otherwise. - * - * Throws: NodeException if the node is not a collection. - */ - bool contains(T)(T rhs) const - { - return contains_!(T, No.key, "contains")(rhs); - } - @safe unittest - { - auto mNode = Node(["1", "2", "3"]); - assert(mNode.contains("2")); - const cNode = Node(["1", "2", "3"]); - assert(cNode.contains("2")); - immutable iNode = Node(["1", "2", "3"]); - assert(iNode.contains("2")); - } - - - /** Determine if a mapping contains specified key. - * - * Params: rhs = Key to look for. Use YAMLNull to check for a null key. - * - * Returns: true if rhs was found, false otherwise. - * - * Throws: NodeException if the node is not a mapping. - */ - bool containsKey(T)(T rhs) const - { - return contains_!(T, Yes.key, "containsKey")(rhs); - } - - // Unittest for contains() and containsKey(). - @safe unittest - { - auto seq = Node([1, 2, 3, 4, 5]); - assert(seq.contains(3)); - assert(seq.contains(5)); - assert(!seq.contains("5")); - assert(!seq.contains(6)); - assert(!seq.contains(float.nan)); - assertThrown!NodeException(seq.containsKey(5)); - - auto seq2 = Node(["1", "2"]); - assert(seq2.contains("1")); - assert(!seq2.contains(1)); - - auto map = Node(["1", "2", "3", "4"], [1, 2, 3, 4]); - assert(map.contains(1)); - assert(!map.contains("1")); - assert(!map.contains(5)); - assert(!map.contains(float.nan)); - assert(map.containsKey("1")); - assert(map.containsKey("4")); - assert(!map.containsKey(1)); - assert(!map.containsKey("5")); - - assert(!seq.contains(YAMLNull())); - assert(!map.contains(YAMLNull())); - assert(!map.containsKey(YAMLNull())); - seq.add(YAMLNull()); - map.add("Nothing", YAMLNull()); - assert(seq.contains(YAMLNull())); - assert(map.contains(YAMLNull())); - assert(!map.containsKey(YAMLNull())); - map.add(YAMLNull(), "Nothing"); - assert(map.containsKey(YAMLNull())); - - auto map2 = Node([1, 2, 3, 4], [1, 2, 3, 4]); - assert(!map2.contains("1")); - assert(map2.contains(1)); - assert(!map2.containsKey("1")); - assert(map2.containsKey(1)); - - // scalar - assertThrown!NodeException(Node(1).contains(4)); - assertThrown!NodeException(Node(1).containsKey(4)); - - auto mapNan = Node([1.0, 2, double.nan], [1, double.nan, 5]); - - assert(mapNan.contains(double.nan)); - assert(mapNan.containsKey(double.nan)); - } - - /// Assignment (shallow copy) by value. - void opAssign()(auto ref Node rhs) - { - assumeWontThrow(setValue(rhs.value_)); - startMark_ = rhs.startMark_; - tag_ = rhs.tag_; - scalarStyle = rhs.scalarStyle; - collectionStyle = rhs.collectionStyle; - } - // Unittest for opAssign(). - @safe unittest - { - auto seq = Node([1, 2, 3, 4, 5]); - auto assigned = seq; - assert(seq == assigned, - "Node.opAssign() doesn't produce an equivalent copy"); - } - - /** Set element at specified index in a collection. - * - * This method can only be called on collection nodes. - * - * If the node is a sequence, index must be integral. - * - * If the node is a mapping, sets the _value corresponding to the first - * key matching index (including conversion, so e.g. "42" matches 42). - * - * If the node is a mapping and no key matches index, a new key-value - * pair is added to the mapping. In sequences the index must be in - * range. This ensures behavior siilar to D arrays and associative - * arrays. - * - * To set element at a null index, use YAMLNull for index. - * - * Params: - * value = Value to assign. - * index = Index of the value to set. - * - * Throws: NodeException if the node is not a collection, index is out - * of range or if a non-integral index is used on a sequence node. - */ - void opIndexAssign(K, V)(V value, K index) - { - final switch (nodeID) - { - case NodeID.sequence: - checkSequenceIndex(index); - static if(isIntegral!K || is(Unqual!K == bool)) - { - auto nodes = getValue!(Node[]); - static if(is(Unqual!V == Node)){nodes[index] = value;} - else {nodes[index] = Node(value);} - setValue(nodes); - return; - } - assert(false, "Only integers may index sequence nodes"); - case NodeID.mapping: - const idx = findPair(index); - if(idx < 0){add(index, value);} - else - { - auto pairs = as!(Node.Pair[])(); - static if(is(Unqual!V == Node)){pairs[idx].value = value;} - else {pairs[idx].value = Node(value);} - setValue(pairs); - } - return; - case NodeID.scalar: - case NodeID.invalid: - throw new NodeException("Trying to index a " ~ nodeTypeString ~ " node", startMark_); - } - } - @safe unittest - { - with(Node([1, 2, 3, 4, 3])) - { - opIndexAssign(42, 3); - assert(length == 5); - assert(opIndex(3).as!int == 42); - - opIndexAssign(YAMLNull(), 0); - assert(opIndex(0) == YAMLNull()); - } - with(Node(["1", "2", "3"], [4, 5, 6])) - { - opIndexAssign(42, "3"); - opIndexAssign(123, 456); - assert(length == 4); - assert(opIndex("3").as!int == 42); - assert(opIndex(456).as!int == 123); - - opIndexAssign(43, 3); - //3 and "3" should be different - assert(length == 5); - assert(opIndex("3").as!int == 42); - assert(opIndex(3).as!int == 43); - - opIndexAssign(YAMLNull(), "2"); - assert(opIndex("2") == YAMLNull()); - } - } - - /** Return a range object iterating over a sequence, getting each - * element as T. - * - * If T is Node, simply iterate over the nodes in the sequence. - * Otherwise, convert each node to T during iteration. - * - * Throws: NodeException if the node is not a sequence or an element - * could not be converted to specified type. - */ - template sequence(T = Node) - { - struct Range(N) - { - N subnodes; - size_t position; - - this(N nodes) - { - subnodes = nodes; - position = 0; - } - - /* Input range functionality. */ - bool empty() const @property { return position >= subnodes.length; } - - void popFront() - { - enforce(!empty, "Attempted to popFront an empty sequence"); - position++; - } - - T front() const @property - { - enforce(!empty, "Attempted to take the front of an empty sequence"); - static if (is(Unqual!T == Node)) - return subnodes[position]; - else - return subnodes[position].as!T; - } - - /* Forward range functionality. */ - Range save() { return this; } - - /* Bidirectional range functionality. */ - void popBack() - { - enforce(!empty, "Attempted to popBack an empty sequence"); - subnodes = subnodes[0 .. $ - 1]; - } - - T back() - { - enforce(!empty, "Attempted to take the back of an empty sequence"); - static if (is(Unqual!T == Node)) - return subnodes[$ - 1]; - else - return subnodes[$ - 1].as!T; - } - - /* Random-access range functionality. */ - size_t length() const @property { return subnodes.length; } - T opIndex(size_t index) - { - static if (is(Unqual!T == Node)) - return subnodes[index]; - else - return subnodes[index].as!T; - } - - static assert(isInputRange!Range); - static assert(isForwardRange!Range); - static assert(isBidirectionalRange!Range); - static assert(isRandomAccessRange!Range); - } - auto sequence() - { - enforce(nodeID == NodeID.sequence, - new NodeException("Trying to 'sequence'-iterate over a " ~ nodeTypeString ~ " node", - startMark_)); - return Range!(Node[])(get!(Node[])); - } - auto sequence() const - { - enforce(nodeID == NodeID.sequence, - new NodeException("Trying to 'sequence'-iterate over a " ~ nodeTypeString ~ " node", - startMark_)); - return Range!(const(Node)[])(get!(Node[])); - } - } - @safe unittest - { - Node n1 = Node([1, 2, 3, 4]); - int[int] array; - Node n2 = Node(array); - const n3 = Node([1, 2, 3, 4]); - - auto r = n1.sequence!int.map!(x => x * 10); - assert(r.equal([10, 20, 30, 40])); - - assertThrown(n2.sequence); - - auto r2 = n3.sequence!int.map!(x => x * 10); - assert(r2.equal([10, 20, 30, 40])); - } - - /** Return a range object iterating over mapping's pairs. - * - * Throws: NodeException if the node is not a mapping. - * - */ - template mapping() - { - struct Range(T) - { - T pairs; - size_t position; - - this(T pairs) @safe - { - this.pairs = pairs; - position = 0; - } - - /* Input range functionality. */ - bool empty() @safe { return position >= pairs.length; } - - void popFront() @safe - { - enforce(!empty, "Attempted to popFront an empty mapping"); - position++; - } - - auto front() @safe - { - enforce(!empty, "Attempted to take the front of an empty mapping"); - return pairs[position]; - } - - /* Forward range functionality. */ - Range save() @safe { return this; } - - /* Bidirectional range functionality. */ - void popBack() @safe - { - enforce(!empty, "Attempted to popBack an empty mapping"); - pairs = pairs[0 .. $ - 1]; - } - - auto back() @safe - { - enforce(!empty, "Attempted to take the back of an empty mapping"); - return pairs[$ - 1]; - } - - /* Random-access range functionality. */ - size_t length() const @property @safe { return pairs.length; } - auto opIndex(size_t index) @safe { return pairs[index]; } - - static assert(isInputRange!Range); - static assert(isForwardRange!Range); - static assert(isBidirectionalRange!Range); - static assert(isRandomAccessRange!Range); - } - - auto mapping() - { - enforce(nodeID == NodeID.mapping, - new NodeException("Trying to 'mapping'-iterate over a " - ~ nodeTypeString ~ " node", startMark_)); - return Range!(Node.Pair[])(get!(Node.Pair[])); - } - auto mapping() const - { - enforce(nodeID == NodeID.mapping, - new NodeException("Trying to 'mapping'-iterate over a " - ~ nodeTypeString ~ " node", startMark_)); - return Range!(const(Node.Pair)[])(get!(Node.Pair[])); - } - } - @safe unittest - { - int[int] array; - Node n = Node(array); - n[1] = "foo"; - n[2] = "bar"; - n[3] = "baz"; - - string[int] test; - foreach (pair; n.mapping) - test[pair.key.as!int] = pair.value.as!string.idup; - - assert(test[1] == "foo"); - assert(test[2] == "bar"); - assert(test[3] == "baz"); - - int[int] constArray = [1: 2, 3: 4]; - const x = Node(constArray); - foreach (pair; x.mapping) - assert(pair.value == constArray[pair.key.as!int]); - } - - /** Return a range object iterating over mapping's keys. - * - * If K is Node, simply iterate over the keys in the mapping. - * Otherwise, convert each key to T during iteration. - * - * Throws: NodeException if the nodes is not a mapping or an element - * could not be converted to specified type. - */ - auto mappingKeys(K = Node)() const - { - enforce(nodeID == NodeID.mapping, - new NodeException("Trying to 'mappingKeys'-iterate over a " - ~ nodeTypeString ~ " node", startMark_)); - static if (is(Unqual!K == Node)) - return mapping.map!(pair => pair.key); - else - return mapping.map!(pair => pair.key.as!K); - } - @safe unittest - { - int[int] array; - Node m1 = Node(array); - m1["foo"] = 2; - m1["bar"] = 3; - - assert(m1.mappingKeys.equal(["foo", "bar"]) || m1.mappingKeys.equal(["bar", "foo"])); - - const cm1 = Node(["foo": 2, "bar": 3]); - - assert(cm1.mappingKeys.equal(["foo", "bar"]) || cm1.mappingKeys.equal(["bar", "foo"])); - } - - /** Return a range object iterating over mapping's values. - * - * If V is Node, simply iterate over the values in the mapping. - * Otherwise, convert each key to V during iteration. - * - * Throws: NodeException if the nodes is not a mapping or an element - * could not be converted to specified type. - */ - auto mappingValues(V = Node)() const - { - enforce(nodeID == NodeID.mapping, - new NodeException("Trying to 'mappingValues'-iterate over a " - ~ nodeTypeString ~ " node", startMark_)); - static if (is(Unqual!V == Node)) - return mapping.map!(pair => pair.value); - else - return mapping.map!(pair => pair.value.as!V); - } - @safe unittest - { - int[int] array; - Node m1 = Node(array); - m1["foo"] = 2; - m1["bar"] = 3; - - assert(m1.mappingValues.equal([2, 3]) || m1.mappingValues.equal([3, 2])); - - const cm1 = Node(["foo": 2, "bar": 3]); - - assert(cm1.mappingValues.equal([2, 3]) || cm1.mappingValues.equal([3, 2])); - } - - - /** Foreach over a sequence, getting each element as T. - * - * If T is Node, simply iterate over the nodes in the sequence. - * Otherwise, convert each node to T during iteration. - * - * Throws: NodeException if the node is not a sequence or an - * element could not be converted to specified type. - */ - int opApply(D)(D dg) if (isDelegate!D && (Parameters!D.length == 1)) - { - enforce(nodeID == NodeID.sequence, - new NodeException("Trying to sequence-foreach over a " ~ nodeTypeString ~ " node", - startMark_)); - - int result; - foreach(ref node; get!(Node[])) - { - static if(is(Unqual!(Parameters!D[0]) == Node)) - { - result = dg(node); - } - else - { - Parameters!D[0] temp = node.as!(Parameters!D[0]); - result = dg(temp); - } - if(result){break;} - } - return result; - } - /// ditto - int opApply(D)(D dg) const if (isDelegate!D && (Parameters!D.length == 1)) - { - enforce(nodeID == NodeID.sequence, - new NodeException("Trying to sequence-foreach over a " ~ nodeTypeString ~ " node", - startMark_)); - - int result; - foreach(ref node; get!(Node[])) - { - static if(is(Unqual!(Parameters!D[0]) == Node)) - { - result = dg(node); - } - else - { - Parameters!D[0] temp = node.as!(Parameters!D[0]); - result = dg(temp); - } - if(result){break;} - } - return result; - } - @safe unittest - { - Node n1 = Node(11); - Node n2 = Node(12); - Node n3 = Node(13); - Node n4 = Node(14); - Node narray = Node([n1, n2, n3, n4]); - const cNArray = narray; - - int[] array, array2, array3; - foreach(int value; narray) - { - array ~= value; - } - foreach(Node node; narray) - { - array2 ~= node.as!int; - } - foreach (const Node node; cNArray) - { - array3 ~= node.as!int; - } - assert(array == [11, 12, 13, 14]); - assert(array2 == [11, 12, 13, 14]); - assert(array3 == [11, 12, 13, 14]); - } - @safe unittest - { - string[] testStrs = ["1", "2", "3"]; - auto node1 = Node(testStrs); - int i = 0; - foreach (string elem; node1) - { - assert(elem == testStrs[i]); - i++; - } - const node2 = Node(testStrs); - i = 0; - foreach (string elem; node2) - { - assert(elem == testStrs[i]); - i++; - } - immutable node3 = Node(testStrs); - i = 0; - foreach (string elem; node3) - { - assert(elem == testStrs[i]); - i++; - } - } - @safe unittest - { - auto node = Node(["a":1, "b":2, "c":3]); - const cNode = node; - assertThrown({foreach (Node n; node) {}}()); - assertThrown({foreach (const Node n; cNode) {}}()); - } - - /** Foreach over a mapping, getting each key/value as K/V. - * - * If the K and/or V is Node, simply iterate over the nodes in the mapping. - * Otherwise, convert each key/value to T during iteration. - * - * Throws: NodeException if the node is not a mapping or an - * element could not be converted to specified type. - */ - int opApply(DG)(DG dg) if (isDelegate!DG && (Parameters!DG.length == 2)) - { - alias K = Parameters!DG[0]; - alias V = Parameters!DG[1]; - enforce(nodeID == NodeID.mapping, - new NodeException("Trying to mapping-foreach over a " ~ nodeTypeString ~ " node", - startMark_)); - - int result; - foreach(ref pair; get!(Node.Pair[])) - { - static if(is(Unqual!K == Node) && is(Unqual!V == Node)) - { - result = dg(pair.key, pair.value); - } - else static if(is(Unqual!K == Node)) - { - V tempValue = pair.value.as!V; - result = dg(pair.key, tempValue); - } - else static if(is(Unqual!V == Node)) - { - K tempKey = pair.key.as!K; - result = dg(tempKey, pair.value); - } - else - { - K tempKey = pair.key.as!K; - V tempValue = pair.value.as!V; - result = dg(tempKey, tempValue); - } - - if(result){break;} - } - return result; - } - /// ditto - int opApply(DG)(DG dg) const if (isDelegate!DG && (Parameters!DG.length == 2)) - { - alias K = Parameters!DG[0]; - alias V = Parameters!DG[1]; - enforce(nodeID == NodeID.mapping, - new NodeException("Trying to mapping-foreach over a " ~ nodeTypeString ~ " node", - startMark_)); - - int result; - foreach(ref pair; get!(Node.Pair[])) - { - static if(is(Unqual!K == Node) && is(Unqual!V == Node)) - { - result = dg(pair.key, pair.value); - } - else static if(is(Unqual!K == Node)) - { - V tempValue = pair.value.as!V; - result = dg(pair.key, tempValue); - } - else static if(is(Unqual!V == Node)) - { - K tempKey = pair.key.as!K; - result = dg(tempKey, pair.value); - } - else - { - K tempKey = pair.key.as!K; - V tempValue = pair.value.as!V; - result = dg(tempKey, tempValue); - } - - if(result){break;} - } - return result; - } - @safe unittest - { - Node n1 = Node(cast(long)11); - Node n2 = Node(cast(long)12); - Node n3 = Node(cast(long)13); - Node n4 = Node(cast(long)14); - - Node k1 = Node("11"); - Node k2 = Node("12"); - Node k3 = Node("13"); - Node k4 = Node("14"); - - Node nmap1 = Node([Pair(k1, n1), - Pair(k2, n2), - Pair(k3, n3), - Pair(k4, n4)]); - - int[string] expected = ["11" : 11, - "12" : 12, - "13" : 13, - "14" : 14]; - int[string] array; - foreach(string key, int value; nmap1) - { - array[key] = value; - } - assert(array == expected); - - Node nmap2 = Node([Pair(k1, Node(cast(long)5)), - Pair(k2, Node(true)), - Pair(k3, Node(cast(real)1.0)), - Pair(k4, Node("yarly"))]); - - // DUB: `scope` in `foreach` not supported before 2.098 - int dummy; // Otherwise the delegate is infered as a function - nmap2.opApply((scope string key, scope Node value) - { - switch(key) - { - case "11": assert(value.as!int == 5 ); break; - case "12": assert(value.as!bool == true ); break; - case "13": assert(value.as!float == 1.0 ); break; - case "14": assert(value.as!string == "yarly"); break; - default: assert(false); - } - return dummy; - }); - const nmap3 = nmap2; - - foreach(const Node key, const Node value; nmap3) - { - switch(key.as!string) - { - case "11": assert(value.as!int == 5 ); break; - case "12": assert(value.as!bool == true ); break; - case "13": assert(value.as!float == 1.0 ); break; - case "14": assert(value.as!string == "yarly"); break; - default: assert(false); - } - } - } - @safe unittest - { - string[int] testStrs = [0: "1", 1: "2", 2: "3"]; - auto node1 = Node(testStrs); - foreach (const int i, string elem; node1) - { - assert(elem == testStrs[i]); - } - const node2 = Node(testStrs); - foreach (const int i, string elem; node2) - { - assert(elem == testStrs[i]); - } - immutable node3 = Node(testStrs); - foreach (const int i, string elem; node3) - { - assert(elem == testStrs[i]); - } - } - @safe unittest - { - auto node = Node(["a", "b", "c"]); - const cNode = node; - assertThrown({foreach (Node a, Node b; node) {}}()); - assertThrown({foreach (const Node a, const Node b; cNode) {}}()); - } - - /** Add an element to a sequence. - * - * This method can only be called on sequence nodes. - * - * If value is a node, it is copied to the sequence directly. Otherwise - * value is converted to a node and then stored in the sequence. - * - * $(P When emitting, all values in the sequence will be emitted. When - * using the !!set tag, the user needs to ensure that all elements in - * the sequence are unique, otherwise $(B invalid) YAML code will be - * emitted.) - * - * Params: value = Value to _add to the sequence. - */ - void add(T)(T value) - { - if (!isValid) - { - setValue(Node[].init); - } - enforce(nodeID == NodeID.sequence, - new NodeException("Trying to add an element to a " ~ nodeTypeString ~ " node", startMark_)); - - auto nodes = get!(Node[])(); - static if(is(Unqual!T == Node)){nodes ~= value;} - else {nodes ~= Node(value);} - setValue(nodes); - } - @safe unittest - { - with(Node([1, 2, 3, 4])) - { - add(5.0f); - assert(opIndex(4).as!float == 5.0f); - } - with(Node()) - { - add(5.0f); - assert(opIndex(0).as!float == 5.0f); - } - with(Node(5.0f)) - { - assertThrown!NodeException(add(5.0f)); - } - with(Node([5.0f : true])) - { - assertThrown!NodeException(add(5.0f)); - } - } - - /** Add a key-value pair to a mapping. - * - * This method can only be called on mapping nodes. - * - * If key and/or value is a node, it is copied to the mapping directly. - * Otherwise it is converted to a node and then stored in the mapping. - * - * $(P It is possible for the same key to be present more than once in a - * mapping. When emitting, all key-value pairs will be emitted. - * This is useful with the "!!pairs" tag, but will result in - * $(B invalid) YAML with "!!map" and "!!omap" tags.) - * - * Params: key = Key to _add. - * value = Value to _add. - */ - void add(K, V)(K key, V value) - { - if (!isValid) - { - setValue(Node.Pair[].init); - } - enforce(nodeID == NodeID.mapping, - new NodeException("Trying to add a key-value pair to a " ~ - nodeTypeString ~ " node", - startMark_)); - - auto pairs = get!(Node.Pair[])(); - pairs ~= Pair(key, value); - setValue(pairs); - } - @safe unittest - { - with(Node([1, 2], [3, 4])) - { - add(5, "6"); - assert(opIndex(5).as!string == "6"); - } - with(Node()) - { - add(5, "6"); - assert(opIndex(5).as!string == "6"); - } - with(Node(5.0f)) - { - assertThrown!NodeException(add(5, "6")); - } - with(Node([5.0f])) - { - assertThrown!NodeException(add(5, "6")); - } - } - - /** Determine whether a key is in a mapping, and access its value. - * - * This method can only be called on mapping nodes. - * - * Params: key = Key to search for. - * - * Returns: A pointer to the value (as a Node) corresponding to key, - * or null if not found. - * - * Note: Any modification to the node can invalidate the returned - * pointer. - * - * See_Also: contains - */ - inout(Node*) opBinaryRight(string op, K)(K key) inout - if (op == "in") - { - enforce(nodeID == NodeID.mapping, new NodeException("Trying to use 'in' on a " ~ - nodeTypeString ~ " node", startMark_)); - - auto idx = findPair(key); - if(idx < 0) - { - return null; - } - else - { - return &(get!(Node.Pair[])[idx].value); - } - } - @safe unittest - { - auto mapping = Node(["foo", "baz"], ["bar", "qux"]); - assert("bad" !in mapping && ("bad" in mapping) is null); - Node* foo = "foo" in mapping; - assert(foo !is null); - assert(*foo == Node("bar")); - assert(foo.get!string == "bar"); - *foo = Node("newfoo"); - assert(mapping["foo"] == Node("newfoo")); - } - @safe unittest - { - auto mNode = Node(["a": 2]); - assert("a" in mNode); - const cNode = Node(["a": 2]); - assert("a" in cNode); - immutable iNode = Node(["a": 2]); - assert("a" in iNode); - } - - /** Remove first (if any) occurence of a value in a collection. - * - * This method can only be called on collection nodes. - * - * If the node is a sequence, the first node matching value is removed. - * If the node is a mapping, the first key-value pair where _value - * matches specified value is removed. - * - * Params: rhs = Value to _remove. - * - * Throws: NodeException if the node is not a collection. - */ - void remove(T)(T rhs) - { - remove_!(T, No.key, "remove")(rhs); - } - @safe unittest - { - with(Node([1, 2, 3, 4, 3])) - { - remove(3); - assert(length == 4); - assert(opIndex(2).as!int == 4); - assert(opIndex(3).as!int == 3); - - add(YAMLNull()); - assert(length == 5); - remove(YAMLNull()); - assert(length == 4); - } - with(Node(["1", "2", "3"], [4, 5, 6])) - { - remove(4); - assert(length == 2); - add("nullkey", YAMLNull()); - assert(length == 3); - remove(YAMLNull()); - assert(length == 2); - } - } - - /** Remove element at the specified index of a collection. - * - * This method can only be called on collection nodes. - * - * If the node is a sequence, index must be integral. - * - * If the node is a mapping, remove the first key-value pair where - * key matches index. - * - * If the node is a mapping and no key matches index, nothing is removed - * and no exception is thrown. This ensures behavior siilar to D arrays - * and associative arrays. - * - * Params: index = Index to remove at. - * - * Throws: NodeException if the node is not a collection, index is out - * of range or if a non-integral index is used on a sequence node. - */ - void removeAt(T)(T index) - { - remove_!(T, Yes.key, "removeAt")(index); - } - @safe unittest - { - with(Node([1, 2, 3, 4, 3])) - { - removeAt(3); - assertThrown!NodeException(removeAt("3")); - assert(length == 4); - assert(opIndex(3).as!int == 3); - } - with(Node(["1", "2", "3"], [4, 5, 6])) - { - // no integer 2 key, so don't remove anything - removeAt(2); - assert(length == 3); - removeAt("2"); - assert(length == 2); - add(YAMLNull(), "nullval"); - assert(length == 3); - removeAt(YAMLNull()); - assert(length == 2); - } - } - - /// Compare with another _node. - int opCmp(const scope ref Node rhs) const scope @safe - { - const bool hasNullTag = this.tag_ is null; - // Only one of them is null: we can order nodes - if ((hasNullTag) ^ (rhs.tag is null)) - return hasNullTag ? -1 : 1; - // Either both `null` or both have a value - if (!hasNullTag) - if (int result = std.algorithm.comparison.cmp(tag_, rhs.tag_)) - return result; - - static int cmp(T1, T2)(T1 a, T2 b) - { - return a > b ? 1 : - a < b ? -1 : - 0; - } - - // Compare validity: if both valid, we have to compare further. - if (!this.isValid()) - return rhs.isValid() ? -1 : 0; - if (!rhs.isValid()) - return 1; - if (const typeCmp = cmp(type, rhs.type)) - return typeCmp; - - static int compareCollections(T)(const scope ref Node lhs, const scope ref Node rhs) - { - const c1 = lhs.getValue!T; - const c2 = rhs.getValue!T; - if(c1 is c2){return 0;} - if(c1.length != c2.length) - { - return cmp(c1.length, c2.length); - } - // Equal lengths, compare items. - foreach(i; 0 .. c1.length) - { - const itemCmp = c1[i].opCmp(c2[i]); - if(itemCmp != 0){return itemCmp;} - } - return 0; - } - - final switch(type) - { - case NodeType.string: - return std.algorithm.cmp(getValue!string, - rhs.getValue!string); - case NodeType.integer: - return cmp(getValue!long, rhs.getValue!long); - case NodeType.boolean: - const b1 = getValue!bool; - const b2 = rhs.getValue!bool; - return b1 ? b2 ? 0 : 1 - : b2 ? -1 : 0; - case NodeType.binary: - const b1 = getValue!(ubyte[]); - const b2 = rhs.getValue!(ubyte[]); - return std.algorithm.cmp(b1, b2); - case NodeType.null_: - return 0; - case NodeType.decimal: - const r1 = getValue!real; - const r2 = rhs.getValue!real; - if(isNaN(r1)) - { - return isNaN(r2) ? 0 : -1; - } - if(isNaN(r2)) - { - return 1; - } - // Fuzzy equality. - if(r1 <= r2 + real.epsilon && r1 >= r2 - real.epsilon) - { - return 0; - } - return cmp(r1, r2); - case NodeType.timestamp: - const t1 = getValue!SysTime; - const t2 = rhs.getValue!SysTime; - return cmp(t1, t2); - case NodeType.mapping: - return compareCollections!(Pair[])(this, rhs); - case NodeType.sequence: - return compareCollections!(Node[])(this, rhs); - case NodeType.merge: - assert(false, "Cannot compare merge nodes"); - case NodeType.invalid: - assert(false, "Cannot compare invalid nodes"); - } - } - - // Ensure opCmp is symmetric for collections - @safe unittest - { - auto node1 = Node( - [ - Node("New York Yankees", "tag:yaml.org,2002:str"), - Node("Atlanta Braves", "tag:yaml.org,2002:str") - ], "tag:yaml.org,2002:seq" - ); - auto node2 = Node( - [ - Node("Detroit Tigers", "tag:yaml.org,2002:str"), - Node("Chicago cubs", "tag:yaml.org,2002:str") - ], "tag:yaml.org,2002:seq" - ); - assert(node1 > node2); - assert(node2 < node1); - } - - // Compute hash of the node. - hash_t toHash() nothrow const @trusted - { - const valueHash = value_.match!(v => hashOf(v)); - - return tag_ is null ? valueHash : tag_.hashOf(valueHash); - } - @safe unittest - { - assert(Node(42).toHash() != Node(41).toHash()); - assert(Node(42).toHash() != Node(42, "some-tag").toHash()); - } - - /// Get type of the node value. - @property NodeType type() const scope @safe pure nothrow @nogc - { - return this.value_.match!( - (const bool _) => NodeType.boolean, - (const long _) => NodeType.integer, - (const Node[] _) => NodeType.sequence, - (const ubyte[] _) => NodeType.binary, - (const string _) => NodeType.string, - (const Node.Pair[] _) => NodeType.mapping, - (const SysTime _) => NodeType.timestamp, - (const YAMLNull _) => NodeType.null_, - (const YAMLMerge _) => NodeType.merge, - (const real _) => NodeType.decimal, - (const YAMLInvalid _) => NodeType.invalid, - ); - } - - /// Get the kind of node this is. - @property NodeID nodeID() const scope @safe pure nothrow @nogc - { - final switch (type) - { - case NodeType.sequence: - return NodeID.sequence; - case NodeType.mapping: - return NodeID.mapping; - case NodeType.boolean: - case NodeType.integer: - case NodeType.binary: - case NodeType.string: - case NodeType.timestamp: - case NodeType.null_: - case NodeType.merge: - case NodeType.decimal: - return NodeID.scalar; - case NodeType.invalid: - return NodeID.invalid; - } - } - package: - - // Get a string representation of the node tree. Used for debugging. - // - // Params: level = Level of the node in the tree. - // - // Returns: String representing the node tree. - @property string debugString(uint level = 0) const scope @safe - { - string indent; - foreach(i; 0 .. level){indent ~= " ";} - - final switch (nodeID) - { - case NodeID.invalid: - return indent ~ "invalid"; - case NodeID.sequence: - string result = indent ~ "sequence:\n"; - foreach(ref node; get!(Node[])) - { - result ~= node.debugString(level + 1); - } - return result; - case NodeID.mapping: - string result = indent ~ "mapping:\n"; - foreach(ref pair; get!(Node.Pair[])) - { - result ~= indent ~ " pair\n"; - result ~= pair.key.debugString(level + 2); - result ~= pair.value.debugString(level + 2); - } - return result; - case NodeID.scalar: - return indent ~ "scalar(" ~ - (convertsTo!string ? get!string : text(type)) ~ ")\n"; - } - } - - - public: - @property string nodeTypeString() const scope @safe pure nothrow @nogc - { - final switch (nodeID) - { - case NodeID.mapping: - return "mapping"; - case NodeID.sequence: - return "sequence"; - case NodeID.scalar: - return "scalar"; - case NodeID.invalid: - return "invalid"; - } - } - - // Determine if the value can be converted to specified type. - @property bool convertsTo(T)() const - { - if(isType!T){return true;} - - // Every type allowed in Value should be convertible to string. - static if(isSomeString!T) {return true;} - else static if(isFloatingPoint!T){return type.among!(NodeType.integer, NodeType.decimal);} - else static if(isIntegral!T) {return type == NodeType.integer;} - else static if(is(Unqual!T==bool)){return type == NodeType.boolean;} - else {return false;} - } - /** - * Sets the style of this node when dumped. - * - * Params: style = Any valid style. - */ - void setStyle(CollectionStyle style) @safe - { - enforce(!isValid || (nodeID.among(NodeID.mapping, NodeID.sequence)), new NodeException( - "Cannot set collection style for non-collection nodes", startMark_)); - collectionStyle = style; - } - /// Ditto - void setStyle(ScalarStyle style) @safe - { - enforce(!isValid || (nodeID == NodeID.scalar), new NodeException( - "Cannot set scalar style for non-scalar nodes", startMark_)); - scalarStyle = style; - } - /// - @safe unittest - { - import dyaml.dumper; - auto stream = new Appender!string(); - auto node = Node([1, 2, 3, 4, 5]); - node.setStyle(CollectionStyle.block); - - auto dumper = dumper(); - dumper.dump(stream, node); - } - /// - @safe unittest - { - import dyaml.dumper; - auto stream = new Appender!string(); - auto node = Node(4); - node.setStyle(ScalarStyle.literal); - - auto dumper = dumper(); - dumper.dump(stream, node); - } - @safe unittest - { - assertThrown!NodeException(Node(4).setStyle(CollectionStyle.block)); - assertThrown!NodeException(Node([4]).setStyle(ScalarStyle.literal)); - } - @safe unittest - { - import dyaml.dumper; - { - auto stream = new Appender!string(); - auto node = Node([1, 2, 3, 4, 5]); - node.setStyle(CollectionStyle.block); - auto dumper = dumper(); - dumper.explicitEnd = false; - dumper.explicitStart = false; - dumper.YAMLVersion = null; - dumper.dump(stream, node); - - //Block style should start with a hyphen. - assert(stream.data[0] == '-'); - } - { - auto stream = new Appender!string(); - auto node = Node([1, 2, 3, 4, 5]); - node.setStyle(CollectionStyle.flow); - auto dumper = dumper(); - dumper.explicitEnd = false; - dumper.explicitStart = false; - dumper.YAMLVersion = null; - dumper.dump(stream, node); - - //Flow style should start with a bracket. - assert(stream.data[0] == '['); - } - { - auto stream = new Appender!string(); - auto node = Node(1); - node.setStyle(ScalarStyle.singleQuoted); - auto dumper = dumper(); - dumper.explicitEnd = false; - dumper.explicitStart = false; - dumper.YAMLVersion = null; - dumper.dump(stream, node); - - assert(stream.data == "!!int '1'\n"); - } - { - auto stream = new Appender!string(); - auto node = Node(1); - node.setStyle(ScalarStyle.doubleQuoted); - auto dumper = dumper(); - dumper.explicitEnd = false; - dumper.explicitStart = false; - dumper.YAMLVersion = null; - dumper.dump(stream, node); - - assert(stream.data == "!!int \"1\"\n"); - } - } - - private: - // Determine if the value stored by the node is of specified type. - // - // This only works for default YAML types, not for user defined types. - @property bool isType(T)() const - { - return value_.match!( - (const T _) => true, - _ => false, - ); - } - - /// Check at compile time if a type is stored natively - enum canBeType (T) = is(typeof({ value_.match!((const T _) => true, _ => false); })); - - - // Implementation of contains() and containsKey(). - bool contains_(T, Flag!"key" key, string func)(T rhs) const - { - final switch (nodeID) - { - case NodeID.mapping: - return findPair!(T, key)(rhs) >= 0; - case NodeID.sequence: - static if(!key) - { - foreach(ref node; getValue!(Node[])) - { - if(node == rhs){return true;} - } - return false; - } - else - { - throw new NodeException("Trying to use " ~ func ~ "() on a " ~ nodeTypeString ~ " node", - startMark_); - } - case NodeID.scalar: - case NodeID.invalid: - throw new NodeException("Trying to use " ~ func ~ "() on a " ~ nodeTypeString ~ " node", - startMark_); - } - - } - - // Implementation of remove() and removeAt() - void remove_(T, Flag!"key" key, string func)(T rhs) - { - static void removeElem(E, I)(ref Node node, I index) - { - auto elems = node.getValue!(E[]); - moveAll(elems[cast(size_t)index + 1 .. $], elems[cast(size_t)index .. $ - 1]); - elems.length = elems.length - 1; - node.setValue(elems); - } - - final switch (nodeID) - { - case NodeID.mapping: - const index = findPair!(T, key)(rhs); - if(index >= 0){removeElem!Pair(this, index);} - break; - case NodeID.sequence: - static long getIndex(ref Node node, ref T rhs) - { - foreach(idx, ref elem; node.get!(Node[])) - { - if(elem.convertsTo!T && elem.as!(T, No.stringConversion) == rhs) - { - return idx; - } - } - return -1; - } - - const index = select!key(rhs, getIndex(this, rhs)); - - // This throws if the index is not integral. - checkSequenceIndex(index); - - static if(isIntegral!(typeof(index))){removeElem!Node(this, index); break; } - else {assert(false, "Non-integral sequence index");} - case NodeID.scalar: - case NodeID.invalid: - throw new NodeException("Trying to " ~ func ~ "() from a " ~ nodeTypeString ~ " node", - startMark_); - } - } - - // Get index of pair with key (or value, if key is false) matching index. - // Cannot be inferred @safe due to https://issues.dlang.org/show_bug.cgi?id=16528 - sizediff_t findPair(T, Flag!"key" key = Yes.key)(const scope ref T index) - const scope @safe - { - const pairs = getValue!(Pair[])(); - const(Node)* node; - foreach(idx, ref const(Pair) pair; pairs) - { - static if(key){node = &pair.key;} - else {node = &pair.value;} - - - const bool typeMatch = (isFloatingPoint!T && (node.type.among!(NodeType.integer, NodeType.decimal))) || - (isIntegral!T && node.type == NodeType.integer) || - (is(Unqual!T==bool) && node.type == NodeType.boolean) || - (isSomeString!T && node.type == NodeType.string) || - (node.isType!T); - if(typeMatch && *node == index) - { - return idx; - } - } - return -1; - } - - // Check if index is integral and in range. - void checkSequenceIndex(T)(T index) const scope @safe - { - assert(nodeID == NodeID.sequence, - "checkSequenceIndex() called on a " ~ nodeTypeString ~ " node"); - - static if(!isIntegral!T) - { - throw new NodeException("Indexing a sequence with a non-integral type.", startMark_); - } - else - { - enforce(index >= 0 && index < getValue!(Node[]).length, - new NodeException("Sequence index out of range: " ~ to!string(index), - startMark_)); - } - } - // Safe wrapper for getting a value out of the variant. - inout(T) getValue(T)() @safe return scope inout - { - alias RType = typeof(return); - return value_.tryMatch!((RType r) => r); - } - // Safe wrapper for coercing a value out of the variant. - inout(T) coerceValue(T)() @trusted scope return inout - { - alias RType = typeof(return); - static if (is(typeof({ RType rt = T.init; T t = RType.init; }))) - alias TType = T; - else // `inout` matters (indirection) - alias TType = RType; - - // `inout(Node[]).to!string` apparently is not safe: - // struct SumTypeBug { - // import std.conv; - // Node[] data; - // - // string bug () inout @safe - // { - // return this.data.to!string; - // } - // } - // Doesn't compile with DMD v2.100.0 - return this.value_.tryMatch!( - (inout bool v) @safe => v.to!TType, - (inout long v) @safe => v.to!TType, - (inout Node[] v) @trusted => v.to!TType, - (inout ubyte[] v) @safe => v.to!TType, - (inout string v) @safe => v.to!TType, - (inout Node.Pair[] v) @trusted => v.to!TType, - (inout SysTime v) @trusted => v.to!TType, - (inout real v) @safe => v.to!TType, - (inout YAMLNull v) @safe => null.to!TType, - ); - } - // Safe wrapper for setting a value for the variant. - void setValue(T)(T value) @trusted - { - static if (allowed!T) - { - value_ = value; - } - else - { - auto tmpNode = cast(Node)value; - tag_ = tmpNode.tag; - scalarStyle = tmpNode.scalarStyle; - collectionStyle = tmpNode.collectionStyle; - value_ = tmpNode.value_; - } - } - - /// - public void toString (DGT) (scope DGT sink) - const scope @safe - { - this.value_.match!( - (const bool v) => formattedWrite(sink, v ? "true" : "false"), - (const long v) => formattedWrite(sink, "%s", v), - (const Node[] v) => formattedWrite(sink, "[%(%s, %)]", v), - (const ubyte[] v) => formattedWrite(sink, "%s", v), - (const string v) => formattedWrite(sink, `"%s"`, v), - (const Node.Pair[] v) => formattedWrite(sink, "{%(%s, %)}", v), - (const SysTime v) => formattedWrite(sink, "%s", v), - (const YAMLNull v) => formattedWrite(sink, "%s", v), - (const YAMLMerge v) => formattedWrite(sink, "%s", v), - (const real v) => formattedWrite(sink, "%s", v), - (const YAMLInvalid v) => formattedWrite(sink, "%s", v), - ); - } -} - -package: -// Merge pairs into an array of pairs based on merge rules in the YAML spec. -// -// Any new pair will only be added if there is not already a pair -// with the same key. -// -// Params: pairs = Appender managing the array of pairs to merge into. -// toMerge = Pairs to merge. -void merge(ref Appender!(Node.Pair[]) pairs, Node.Pair[] toMerge) @safe -{ - bool eq(ref Node.Pair a, ref Node.Pair b) @safe - { - return a.key == b.key; - } - - foreach(ref pair; toMerge) if(!canFind!eq(pairs.data, pair)) - { - pairs.put(pair); - } -} - -enum hasNodeConstructor(T) = hasSimpleNodeConstructor!T || hasExpandedNodeConstructor!T; -template hasSimpleNodeConstructor(T) -{ - static if (is(T == struct)) - { - enum hasSimpleNodeConstructor = is(typeof(T(Node.init))); - } - else static if (is(T == class)) - { - enum hasSimpleNodeConstructor = is(typeof(new T(Node.init))); - } - else enum hasSimpleNodeConstructor = false; -} -template hasExpandedNodeConstructor(T) -{ - static if (is(T == struct)) - { - enum hasExpandedNodeConstructor = is(typeof(T(Node.init, ""))); - } - else static if (is(T == class)) - { - enum hasExpandedNodeConstructor = is(typeof(new T(Node.init, ""))); - } - else enum hasExpandedNodeConstructor = false; -} -enum castableToNode(T) = (is(T == struct) || is(T == class)) && is(typeof(T.opCast!Node()) : Node); - -@safe unittest -{ - import dyaml : Loader, Node; - - static struct Foo - { - string[] bars; - - this(const Node node) - { - foreach(value; node["bars"].sequence) - { - bars ~= value.as!string.idup; - } - } - } - - Loader.fromString(`{ bars: ["a", "b"] }`) - .load - .as!(Foo); -} -@safe unittest -{ - import dyaml : Loader, Node; - import std : split, to; - - static class MyClass - { - int x, y, z; - - this(Node node) - { - auto parts = node.as!string().split(":"); - x = parts[0].to!int; - y = parts[1].to!int; - z = parts[2].to!int; - } - } - - auto loader = Loader.fromString(`"1:2:3"`); - Node node = loader.load(); - auto mc = node.get!MyClass; -} -@safe unittest -{ - import dyaml : Loader, Node; - import std : split, to; - - static class MyClass - { - int x, y, z; - - this(Node node) - { - auto parts = node.as!string().split(":"); - x = parts[0].to!int; - y = parts[1].to!int; - z = parts[2].to!int; - } - } - - auto loader = Loader.fromString(`"1:2:3"`); - const node = loader.load(); - auto mc = node.get!MyClass; -} diff --git a/source/dyaml/package.d b/source/dyaml/package.d deleted file mode 100644 index e61b716..0000000 --- a/source/dyaml/package.d +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -module dyaml; - -public import dyaml.dumper; -public import dyaml.encoding; -public import dyaml.exception; -public import dyaml.linebreak; -public import dyaml.loader; -public import dyaml.resolver; -public import dyaml.style; -public import dyaml.node; diff --git a/source/dyaml/parser.d b/source/dyaml/parser.d deleted file mode 100644 index befdfa4..0000000 --- a/source/dyaml/parser.d +++ /dev/null @@ -1,958 +0,0 @@ - -// Copyright Ferdinand Majerech 2011-2014. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -/** - * YAML parser. - * Code based on PyYAML: http://www.pyyaml.org - */ -module dyaml.parser; - - -import std.algorithm; -import std.array; -import std.conv; -import std.exception; -import std.typecons; - -import dyaml.event; -import dyaml.exception; -import dyaml.scanner; -import dyaml.style; -import dyaml.token; -import dyaml.tagdirective; - - -/** - * The following YAML grammar is LL(1) and is parsed by a recursive descent - * parser. - * - * stream ::= STREAM-START implicit_document? explicit_document* STREAM-END - * implicit_document ::= block_node DOCUMENT-END* - * explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* - * block_node_or_indentless_sequence ::= - * ALIAS - * | properties (block_content | indentless_block_sequence)? - * | block_content - * | indentless_block_sequence - * block_node ::= ALIAS - * | properties block_content? - * | block_content - * flow_node ::= ALIAS - * | properties flow_content? - * | flow_content - * properties ::= TAG ANCHOR? | ANCHOR TAG? - * block_content ::= block_collection | flow_collection | SCALAR - * flow_content ::= flow_collection | SCALAR - * block_collection ::= block_sequence | block_mapping - * flow_collection ::= flow_sequence | flow_mapping - * block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END - * indentless_sequence ::= (BLOCK-ENTRY block_node?)+ - * block_mapping ::= BLOCK-MAPPING_START - * ((KEY block_node_or_indentless_sequence?)? - * (VALUE block_node_or_indentless_sequence?)?)* - * BLOCK-END - * flow_sequence ::= FLOW-SEQUENCE-START - * (flow_sequence_entry FLOW-ENTRY)* - * flow_sequence_entry? - * FLOW-SEQUENCE-END - * flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - * flow_mapping ::= FLOW-MAPPING-START - * (flow_mapping_entry FLOW-ENTRY)* - * flow_mapping_entry? - * FLOW-MAPPING-END - * flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - * - * FIRST sets: - * - * stream: { STREAM-START } - * explicit_document: { DIRECTIVE DOCUMENT-START } - * implicit_document: FIRST(block_node) - * block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } - * flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } - * block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } - * flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } - * block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } - * flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } - * block_sequence: { BLOCK-SEQUENCE-START } - * block_mapping: { BLOCK-MAPPING-START } - * block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } - * indentless_sequence: { ENTRY } - * flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } - * flow_sequence: { FLOW-SEQUENCE-START } - * flow_mapping: { FLOW-MAPPING-START } - * flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } - * flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } - */ - - -/** - * Marked exception thrown at parser errors. - * - * See_Also: MarkedYAMLException - */ -class ParserException : MarkedYAMLException -{ - mixin MarkedExceptionCtors; -} - -package: -/// Generates events from tokens provided by a Scanner. -/// -/// While Parser receives tokens with non-const character slices, the events it -/// produces are immutable strings, which are usually the same slices, cast to string. -/// Parser is the last layer of D:YAML that may possibly do any modifications to these -/// slices. -final class Parser -{ - private: - ///Default tag handle shortcuts and replacements. - static TagDirective[] defaultTagDirectives_ = - [TagDirective("!", "!"), TagDirective("!!", "tag:yaml.org,2002:")]; - - ///Scanner providing YAML tokens. - Scanner scanner_; - - ///Event produced by the most recent state. - Event currentEvent_; - - ///YAML version string. - string YAMLVersion_ = null; - ///Tag handle shortcuts and replacements. - TagDirective[] tagDirectives_; - - ///Stack of states. - Appender!(Event delegate() @safe[]) states_; - ///Stack of marks used to keep track of extents of e.g. YAML collections. - Appender!(Mark[]) marks_; - - ///Current state. - Event delegate() @safe state_; - - public: - ///Construct a Parser using specified Scanner. - this(Scanner scanner) @safe - { - state_ = &parseStreamStart; - scanner_ = scanner; - states_.reserve(32); - marks_.reserve(32); - } - - /** - * Check if any events are left. May have side effects in some cases. - */ - bool empty() @safe - { - ensureState(); - return currentEvent_.isNull; - } - - /** - * Return the current event. - * - * Must not be called if there are no events left. - */ - Event front() @safe - { - ensureState(); - assert(!currentEvent_.isNull, "No event left to peek"); - return currentEvent_; - } - - /** - * Skip to the next event. - * - * Must not be called if there are no events left. - */ - void popFront() @safe - { - currentEvent_.id = EventID.invalid; - ensureState(); - } - - private: - /// If current event is invalid, load the next valid one if possible. - void ensureState() @safe - { - if(currentEvent_.isNull && state_ !is null) - { - currentEvent_ = state_(); - } - } - ///Pop and return the newest state in states_. - Event delegate() @safe popState() @safe - { - enforce(states_.data.length > 0, - new YAMLException("Parser: Need to pop state but no states left to pop")); - const result = states_.data.back; - states_.shrinkTo(states_.data.length - 1); - return result; - } - - ///Pop and return the newest mark in marks_. - Mark popMark() @safe - { - enforce(marks_.data.length > 0, - new YAMLException("Parser: Need to pop mark but no marks left to pop")); - const result = marks_.data.back; - marks_.shrinkTo(marks_.data.length - 1); - return result; - } - - /// Push a state on the stack - void pushState(Event delegate() @safe state) @safe - { - states_ ~= state; - } - /// Push a mark on the stack - void pushMark(Mark mark) @safe - { - marks_ ~= mark; - } - - /** - * stream ::= STREAM-START implicit_document? explicit_document* STREAM-END - * implicit_document ::= block_node DOCUMENT-END* - * explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* - */ - - ///Parse stream start. - Event parseStreamStart() @safe - { - const token = scanner_.front; - scanner_.popFront(); - state_ = &parseImplicitDocumentStart; - return streamStartEvent(token.startMark, token.endMark); - } - - /// Parse implicit document start, unless explicit detected: if so, parse explicit. - Event parseImplicitDocumentStart() @safe - { - // Parse an implicit document. - if(!scanner_.front.id.among!(TokenID.directive, TokenID.documentStart, - TokenID.streamEnd)) - { - tagDirectives_ = defaultTagDirectives_; - const token = scanner_.front; - - pushState(&parseDocumentEnd); - state_ = &parseBlockNode; - - return documentStartEvent(token.startMark, token.endMark, false, null, null); - } - return parseDocumentStart(); - } - - ///Parse explicit document start. - Event parseDocumentStart() @safe - { - //Parse any extra document end indicators. - while(scanner_.front.id == TokenID.documentEnd) - { - scanner_.popFront(); - } - - //Parse an explicit document. - if(scanner_.front.id != TokenID.streamEnd) - { - const startMark = scanner_.front.startMark; - - auto tagDirectives = processDirectives(); - enforce(scanner_.front.id == TokenID.documentStart, - new ParserException("Expected document start but found " ~ - scanner_.front.idString, - scanner_.front.startMark)); - - const endMark = scanner_.front.endMark; - scanner_.popFront(); - pushState(&parseDocumentEnd); - state_ = &parseDocumentContent; - return documentStartEvent(startMark, endMark, true, YAMLVersion_, tagDirectives); - } - else - { - //Parse the end of the stream. - const token = scanner_.front; - scanner_.popFront(); - assert(states_.data.length == 0); - assert(marks_.data.length == 0); - state_ = null; - return streamEndEvent(token.startMark, token.endMark); - } - } - - ///Parse document end (explicit or implicit). - Event parseDocumentEnd() @safe - { - Mark startMark = scanner_.front.startMark; - const bool explicit = scanner_.front.id == TokenID.documentEnd; - Mark endMark = startMark; - if (explicit) - { - endMark = scanner_.front.endMark; - scanner_.popFront(); - } - - state_ = &parseDocumentStart; - - return documentEndEvent(startMark, endMark, explicit); - } - - ///Parse document content. - Event parseDocumentContent() @safe - { - if(scanner_.front.id.among!(TokenID.directive, TokenID.documentStart, - TokenID.documentEnd, TokenID.streamEnd)) - { - state_ = popState(); - return processEmptyScalar(scanner_.front.startMark); - } - return parseBlockNode(); - } - - /// Process directives at the beginning of a document. - TagDirective[] processDirectives() @safe - { - // Destroy version and tag handles from previous document. - YAMLVersion_ = null; - tagDirectives_.length = 0; - - // Process directives. - while(scanner_.front.id == TokenID.directive) - { - const token = scanner_.front; - scanner_.popFront(); - string value = token.value.idup; - if(token.directive == DirectiveType.yaml) - { - enforce(YAMLVersion_ is null, - new ParserException("Duplicate YAML directive", token.startMark)); - const minor = value.split(".")[0]; - enforce(minor == "1", - new ParserException("Incompatible document (version 1.x is required)", - token.startMark)); - YAMLVersion_ = value; - } - else if(token.directive == DirectiveType.tag) - { - auto handle = value[0 .. token.valueDivider]; - - foreach(ref pair; tagDirectives_) - { - // handle - const h = pair.handle; - enforce(h != handle, new ParserException("Duplicate tag handle: " ~ handle, - token.startMark)); - } - tagDirectives_ ~= - TagDirective(handle, value[token.valueDivider .. $]); - } - // Any other directive type is ignored (only YAML and TAG are in YAML - // 1.1/1.2, any other directives are "reserved") - } - - TagDirective[] value = tagDirectives_; - - //Add any default tag handles that haven't been overridden. - foreach(ref defaultPair; defaultTagDirectives_) - { - bool found; - foreach(ref pair; tagDirectives_) if(defaultPair.handle == pair.handle) - { - found = true; - break; - } - if(!found) {tagDirectives_ ~= defaultPair; } - } - - return value; - } - - /** - * block_node_or_indentless_sequence ::= ALIAS - * | properties (block_content | indentless_block_sequence)? - * | block_content - * | indentless_block_sequence - * block_node ::= ALIAS - * | properties block_content? - * | block_content - * flow_node ::= ALIAS - * | properties flow_content? - * | flow_content - * properties ::= TAG ANCHOR? | ANCHOR TAG? - * block_content ::= block_collection | flow_collection | SCALAR - * flow_content ::= flow_collection | SCALAR - * block_collection ::= block_sequence | block_mapping - * flow_collection ::= flow_sequence | flow_mapping - */ - - ///Parse a node. - Event parseNode(const Flag!"block" block, - const Flag!"indentlessSequence" indentlessSequence = No.indentlessSequence) - @trusted - { - if(scanner_.front.id == TokenID.alias_) - { - const token = scanner_.front; - scanner_.popFront(); - state_ = popState(); - return aliasEvent(token.startMark, token.endMark, - cast(string)token.value); - } - - string anchor; - string tag; - Mark startMark, endMark, tagMark; - bool invalidMarks = true; - // The index in the tag string where tag handle ends and tag suffix starts. - uint tagHandleEnd; - - //Get anchor/tag if detected. Return false otherwise. - bool get(const TokenID id, const Flag!"first" first, ref string target) @safe - { - if(scanner_.front.id != id){return false;} - invalidMarks = false; - const token = scanner_.front; - scanner_.popFront(); - if(first){startMark = token.startMark;} - if(id == TokenID.tag) - { - tagMark = token.startMark; - tagHandleEnd = token.valueDivider; - } - endMark = token.endMark; - target = token.value.idup; - return true; - } - - //Anchor and/or tag can be in any order. - if(get(TokenID.anchor, Yes.first, anchor)){get(TokenID.tag, No.first, tag);} - else if(get(TokenID.tag, Yes.first, tag)) {get(TokenID.anchor, No.first, anchor);} - - if(tag !is null){tag = processTag(tag, tagHandleEnd, startMark, tagMark);} - - if(invalidMarks) - { - startMark = endMark = scanner_.front.startMark; - } - - bool implicit = (tag is null || tag == "!"); - - if(indentlessSequence && scanner_.front.id == TokenID.blockEntry) - { - state_ = &parseIndentlessSequenceEntry; - return sequenceStartEvent - (startMark, scanner_.front.endMark, anchor, - tag, implicit, CollectionStyle.block); - } - - if(scanner_.front.id == TokenID.scalar) - { - auto token = scanner_.front; - scanner_.popFront(); - auto value = token.style == ScalarStyle.doubleQuoted - ? handleDoubleQuotedScalarEscapes(token.value) - : cast(string)token.value; - - implicit = (token.style == ScalarStyle.plain && tag is null) || tag == "!"; - state_ = popState(); - return scalarEvent(startMark, token.endMark, anchor, tag, - implicit, value, token.style); - } - - if(scanner_.front.id == TokenID.flowSequenceStart) - { - endMark = scanner_.front.endMark; - state_ = &parseFlowSequenceEntry!(Yes.first); - return sequenceStartEvent(startMark, endMark, anchor, tag, - implicit, CollectionStyle.flow); - } - - if(scanner_.front.id == TokenID.flowMappingStart) - { - endMark = scanner_.front.endMark; - state_ = &parseFlowMappingKey!(Yes.first); - return mappingStartEvent(startMark, endMark, anchor, tag, - implicit, CollectionStyle.flow); - } - - if(block && scanner_.front.id == TokenID.blockSequenceStart) - { - endMark = scanner_.front.endMark; - state_ = &parseBlockSequenceEntry!(Yes.first); - return sequenceStartEvent(startMark, endMark, anchor, tag, - implicit, CollectionStyle.block); - } - - if(block && scanner_.front.id == TokenID.blockMappingStart) - { - endMark = scanner_.front.endMark; - state_ = &parseBlockMappingKey!(Yes.first); - return mappingStartEvent(startMark, endMark, anchor, tag, - implicit, CollectionStyle.block); - } - - if(anchor !is null || tag !is null) - { - state_ = popState(); - - //PyYAML uses a tuple(implicit, false) for the second last arg here, - //but the second bool is never used after that - so we don't use it. - - //Empty scalars are allowed even if a tag or an anchor is specified. - return scalarEvent(startMark, endMark, anchor, tag, - implicit , ""); - } - - const token = scanner_.front; - throw new ParserException("While parsing a " ~ (block ? "block" : "flow") ~ " node", - startMark, "expected node content, but found: " - ~ token.idString, token.startMark); - } - - /// Handle escape sequences in a double quoted scalar. - /// - /// Moved here from scanner as it can't always be done in-place with slices. - string handleDoubleQuotedScalarEscapes(const(char)[] tokenValue) const @safe - { - string notInPlace; - bool inEscape; - auto appender = appender!(string)(); - for(const(char)[] oldValue = tokenValue; !oldValue.empty();) - { - const dchar c = oldValue.front(); - oldValue.popFront(); - - if(!inEscape) - { - if(c != '\\') - { - if(notInPlace is null) { appender.put(c); } - else { notInPlace ~= c; } - continue; - } - // Escape sequence starts with a '\' - inEscape = true; - continue; - } - - import dyaml.escapes; - scope(exit) { inEscape = false; } - - // 'Normal' escape sequence. - if(c.among!(escapes)) - { - if(notInPlace is null) - { - // \L and \C can't be handled in place as the expand into - // many-byte unicode chars - if(c != 'L' && c != 'P') - { - appender.put(dyaml.escapes.fromEscape(c)); - continue; - } - // Need to duplicate as we won't fit into - // token.value - which is what appender uses - notInPlace = appender.data.dup; - notInPlace ~= dyaml.escapes.fromEscape(c); - continue; - } - notInPlace ~= dyaml.escapes.fromEscape(c); - continue; - } - - // Unicode char written in hexadecimal in an escape sequence. - if(c.among!(escapeHexCodeList)) - { - // Scanner has already checked that the hex string is valid. - - const hexLength = dyaml.escapes.escapeHexLength(c); - // Any hex digits are 1-byte so this works. - const(char)[] hex = oldValue[0 .. hexLength]; - oldValue = oldValue[hexLength .. $]; - import std.ascii : isHexDigit; - assert(!hex.canFind!(d => !d.isHexDigit), - "Scanner must ensure the hex string is valid"); - - const decoded = cast(dchar)parse!int(hex, 16u); - if(notInPlace is null) { appender.put(decoded); } - else { notInPlace ~= decoded; } - continue; - } - - assert(false, "Scanner must handle unsupported escapes"); - } - - return notInPlace is null ? appender.data : notInPlace; - } - - /** - * Process a tag string retrieved from a tag token. - * - * Params: tag = Tag before processing. - * handleEnd = Index in tag where tag handle ends and tag suffix - * starts. - * startMark = Position of the node the tag belongs to. - * tagMark = Position of the tag. - */ - string processTag(const string tag, const uint handleEnd, - const Mark startMark, const Mark tagMark) - const @safe - { - const handle = tag[0 .. handleEnd]; - const suffix = tag[handleEnd .. $]; - - if(handle.length > 0) - { - string replacement; - foreach(ref pair; tagDirectives_) - { - if(pair.handle == handle) - { - replacement = pair.prefix; - break; - } - } - //handle must be in tagDirectives_ - enforce(replacement !is null, - new ParserException("While parsing a node", startMark, - "found undefined tag handle: " ~ handle, tagMark)); - return replacement ~ suffix; - } - return suffix; - } - - ///Wrappers to parse nodes. - Event parseBlockNode() @safe {return parseNode(Yes.block);} - Event parseFlowNode() @safe {return parseNode(No.block);} - Event parseBlockNodeOrIndentlessSequence() @safe {return parseNode(Yes.block, Yes.indentlessSequence);} - - ///block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END - - ///Parse an entry of a block sequence. If first is true, this is the first entry. - Event parseBlockSequenceEntry(Flag!"first" first)() @safe - { - static if(first) - { - pushMark(scanner_.front.startMark); - scanner_.popFront(); - } - - if(scanner_.front.id == TokenID.blockEntry) - { - const token = scanner_.front; - scanner_.popFront(); - if(!scanner_.front.id.among!(TokenID.blockEntry, TokenID.blockEnd)) - { - pushState(&parseBlockSequenceEntry!(No.first)); - return parseBlockNode(); - } - - state_ = &parseBlockSequenceEntry!(No.first); - return processEmptyScalar(token.endMark); - } - - if(scanner_.front.id != TokenID.blockEnd) - { - const token = scanner_.front; - throw new ParserException("While parsing a block collection", marks_.data.back, - "expected block end, but found " ~ token.idString, - token.startMark); - } - - state_ = popState(); - popMark(); - const token = scanner_.front; - scanner_.popFront(); - return sequenceEndEvent(token.startMark, token.endMark); - } - - ///indentless_sequence ::= (BLOCK-ENTRY block_node?)+ - - ///Parse an entry of an indentless sequence. - Event parseIndentlessSequenceEntry() @safe - { - if(scanner_.front.id == TokenID.blockEntry) - { - const token = scanner_.front; - scanner_.popFront(); - - if(!scanner_.front.id.among!(TokenID.blockEntry, TokenID.key, - TokenID.value, TokenID.blockEnd)) - { - pushState(&parseIndentlessSequenceEntry); - return parseBlockNode(); - } - - state_ = &parseIndentlessSequenceEntry; - return processEmptyScalar(token.endMark); - } - - state_ = popState(); - const token = scanner_.front; - return sequenceEndEvent(token.startMark, token.endMark); - } - - /** - * block_mapping ::= BLOCK-MAPPING_START - * ((KEY block_node_or_indentless_sequence?)? - * (VALUE block_node_or_indentless_sequence?)?)* - * BLOCK-END - */ - - ///Parse a key in a block mapping. If first is true, this is the first key. - Event parseBlockMappingKey(Flag!"first" first)() @safe - { - static if(first) - { - pushMark(scanner_.front.startMark); - scanner_.popFront(); - } - - if(scanner_.front.id == TokenID.key) - { - const token = scanner_.front; - scanner_.popFront(); - - if(!scanner_.front.id.among!(TokenID.key, TokenID.value, TokenID.blockEnd)) - { - pushState(&parseBlockMappingValue); - return parseBlockNodeOrIndentlessSequence(); - } - - state_ = &parseBlockMappingValue; - return processEmptyScalar(token.endMark); - } - - if(scanner_.front.id != TokenID.blockEnd) - { - const token = scanner_.front; - throw new ParserException("While parsing a block mapping", marks_.data.back, - "expected block end, but found: " ~ token.idString, - token.startMark); - } - - state_ = popState(); - popMark(); - const token = scanner_.front; - scanner_.popFront(); - return mappingEndEvent(token.startMark, token.endMark); - } - - ///Parse a value in a block mapping. - Event parseBlockMappingValue() @safe - { - if(scanner_.front.id == TokenID.value) - { - const token = scanner_.front; - scanner_.popFront(); - - if(!scanner_.front.id.among!(TokenID.key, TokenID.value, TokenID.blockEnd)) - { - pushState(&parseBlockMappingKey!(No.first)); - return parseBlockNodeOrIndentlessSequence(); - } - - state_ = &parseBlockMappingKey!(No.first); - return processEmptyScalar(token.endMark); - } - - state_= &parseBlockMappingKey!(No.first); - return processEmptyScalar(scanner_.front.startMark); - } - - /** - * flow_sequence ::= FLOW-SEQUENCE-START - * (flow_sequence_entry FLOW-ENTRY)* - * flow_sequence_entry? - * FLOW-SEQUENCE-END - * flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - * - * Note that while production rules for both flow_sequence_entry and - * flow_mapping_entry are equal, their interpretations are different. - * For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` - * generate an inline mapping (set syntax). - */ - - ///Parse an entry in a flow sequence. If first is true, this is the first entry. - Event parseFlowSequenceEntry(Flag!"first" first)() @safe - { - static if(first) - { - pushMark(scanner_.front.startMark); - scanner_.popFront(); - } - - if(scanner_.front.id != TokenID.flowSequenceEnd) - { - static if(!first) - { - if(scanner_.front.id == TokenID.flowEntry) - { - scanner_.popFront(); - } - else - { - const token = scanner_.front; - throw new ParserException("While parsing a flow sequence", marks_.data.back, - "expected ',' or ']', but got: " ~ - token.idString, token.startMark); - } - } - - if(scanner_.front.id == TokenID.key) - { - const token = scanner_.front; - state_ = &parseFlowSequenceEntryMappingKey; - return mappingStartEvent(token.startMark, token.endMark, - null, null, true, CollectionStyle.flow); - } - else if(scanner_.front.id != TokenID.flowSequenceEnd) - { - pushState(&parseFlowSequenceEntry!(No.first)); - return parseFlowNode(); - } - } - - const token = scanner_.front; - scanner_.popFront(); - state_ = popState(); - popMark(); - return sequenceEndEvent(token.startMark, token.endMark); - } - - ///Parse a key in flow context. - Event parseFlowKey(Event delegate() @safe nextState) @safe - { - const token = scanner_.front; - scanner_.popFront(); - - if(!scanner_.front.id.among!(TokenID.value, TokenID.flowEntry, - TokenID.flowSequenceEnd)) - { - pushState(nextState); - return parseFlowNode(); - } - - state_ = nextState; - return processEmptyScalar(token.endMark); - } - - ///Parse a mapping key in an entry in a flow sequence. - Event parseFlowSequenceEntryMappingKey() @safe - { - return parseFlowKey(&parseFlowSequenceEntryMappingValue); - } - - ///Parse a mapping value in a flow context. - Event parseFlowValue(TokenID checkId, Event delegate() @safe nextState) - @safe - { - if(scanner_.front.id == TokenID.value) - { - const token = scanner_.front; - scanner_.popFront(); - if(!scanner_.front.id.among(TokenID.flowEntry, checkId)) - { - pushState(nextState); - return parseFlowNode(); - } - - state_ = nextState; - return processEmptyScalar(token.endMark); - } - - state_ = nextState; - return processEmptyScalar(scanner_.front.startMark); - } - - ///Parse a mapping value in an entry in a flow sequence. - Event parseFlowSequenceEntryMappingValue() @safe - { - return parseFlowValue(TokenID.flowSequenceEnd, - &parseFlowSequenceEntryMappingEnd); - } - - ///Parse end of a mapping in a flow sequence entry. - Event parseFlowSequenceEntryMappingEnd() @safe - { - state_ = &parseFlowSequenceEntry!(No.first); - const token = scanner_.front; - return mappingEndEvent(token.startMark, token.startMark); - } - - /** - * flow_mapping ::= FLOW-MAPPING-START - * (flow_mapping_entry FLOW-ENTRY)* - * flow_mapping_entry? - * FLOW-MAPPING-END - * flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - */ - - ///Parse a key in a flow mapping. - Event parseFlowMappingKey(Flag!"first" first)() @safe - { - static if(first) - { - pushMark(scanner_.front.startMark); - scanner_.popFront(); - } - - if(scanner_.front.id != TokenID.flowMappingEnd) - { - static if(!first) - { - if(scanner_.front.id == TokenID.flowEntry) - { - scanner_.popFront(); - } - else - { - const token = scanner_.front; - throw new ParserException("While parsing a flow mapping", marks_.data.back, - "expected ',' or '}', but got: " ~ - token.idString, token.startMark); - } - } - - if(scanner_.front.id == TokenID.key) - { - return parseFlowKey(&parseFlowMappingValue); - } - - if(scanner_.front.id != TokenID.flowMappingEnd) - { - pushState(&parseFlowMappingEmptyValue); - return parseFlowNode(); - } - } - - const token = scanner_.front; - scanner_.popFront(); - state_ = popState(); - popMark(); - return mappingEndEvent(token.startMark, token.endMark); - } - - ///Parse a value in a flow mapping. - Event parseFlowMappingValue() @safe - { - return parseFlowValue(TokenID.flowMappingEnd, &parseFlowMappingKey!(No.first)); - } - - ///Parse an empty value in a flow mapping. - Event parseFlowMappingEmptyValue() @safe - { - state_ = &parseFlowMappingKey!(No.first); - return processEmptyScalar(scanner_.front.startMark); - } - - ///Return an empty scalar. - Event processEmptyScalar(const Mark mark) @safe pure nothrow const @nogc - { - return scalarEvent(mark, mark, null, null, true, ""); - } -} diff --git a/source/dyaml/queue.d b/source/dyaml/queue.d deleted file mode 100644 index 57b0d34..0000000 --- a/source/dyaml/queue.d +++ /dev/null @@ -1,272 +0,0 @@ - -// Copyright Ferdinand Majerech 2011-2014. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -module dyaml.queue; - - -import std.traits : hasMember, hasIndirections; - -package: - -/// Simple queue implemented as a singly linked list with a tail pointer. -/// -/// Needed in some D:YAML code that needs a queue-like structure without too much -/// reallocation that goes with an array. -/// -/// Allocations are non-GC and are damped by a free-list based on the nodes -/// that are removed. Note that elements lifetime must be managed -/// outside. -struct Queue(T) -if (!hasMember!(T, "__xdtor")) -{ - -private: - - // Linked list node containing one element and pointer to the next node. - struct Node - { - T payload_; - Node* next_; - } - - // Start of the linked list - first element added in time (end of the queue). - Node* first_; - // Last element of the linked list - last element added in time (start of the queue). - Node* last_; - // free-list - Node* stock; - - // Length of the queue. - size_t length_; - - // allocate a new node or recycle one from the stock. - Node* makeNewNode(T thePayload, Node* theNext = null) @trusted nothrow @nogc - { - import std.experimental.allocator : make; - import std.experimental.allocator.mallocator : Mallocator; - - Node* result; - if (stock !is null) - { - result = stock; - stock = result.next_; - result.payload_ = thePayload; - result.next_ = theNext; - } - else - { - result = Mallocator.instance.make!(Node)(thePayload, theNext); - // GC can dispose T managed member if it thinks they are no used... - static if (hasIndirections!T) - { - import core.memory : GC; - GC.addRange(result, Node.sizeof); - } - } - return result; - } - - // free the stock of available free nodes. - void freeStock() @trusted @nogc nothrow - { - import std.experimental.allocator.mallocator : Mallocator; - - while (stock !is null) - { - Node* toFree = stock; - stock = stock.next_; - static if (hasIndirections!T) - { - import core.memory : GC; - GC.removeRange(toFree); - } - Mallocator.instance.deallocate((cast(ubyte*) toFree)[0 .. Node.sizeof]); - } - } - -public: - - @disable void opAssign(ref Queue); - @disable bool opEquals(ref Queue); - @disable int opCmp(ref Queue); - - this(this) @safe nothrow @nogc - { - auto node = first_; - first_ = null; - last_ = null; - while (node !is null) - { - Node* newLast = makeNewNode(node.payload_); - if (last_ !is null) - last_.next_ = newLast; - if (first_ is null) - first_ = newLast; - last_ = newLast; - node = node.next_; - } - } - - ~this() @safe nothrow @nogc - { - freeStock(); - stock = first_; - freeStock(); - } - - /// Returns a forward range iterating over this queue. - auto range() @safe pure nothrow @nogc - { - static struct Result - { - private Node* cursor; - - void popFront() @safe pure nothrow @nogc - { - cursor = cursor.next_; - } - ref T front() @safe pure nothrow @nogc - in(cursor !is null) - { - return cursor.payload_; - } - bool empty() @safe pure nothrow @nogc const - { - return cursor is null; - } - } - return Result(first_); - } - - /// Push a new item to the queue. - void push(T item) @nogc @safe nothrow - { - Node* newLast = makeNewNode(item); - if (last_ !is null) - last_.next_ = newLast; - if (first_ is null) - first_ = newLast; - last_ = newLast; - ++length_; - } - - /// Insert a new item putting it to specified index in the linked list. - void insert(T item, const size_t idx) @safe nothrow - in - { - assert(idx <= length_); - } - do - { - if (idx == 0) - { - first_ = makeNewNode(item, first_); - ++length_; - } - // Adding before last added element, so we can just push. - else if (idx == length_) - { - push(item); - } - else - { - // Get the element before one we're inserting. - Node* current = first_; - foreach (i; 1 .. idx) - current = current.next_; - - assert(current); - // Insert a new node after current, and put current.next_ behind it. - current.next_ = makeNewNode(item, current.next_); - ++length_; - } - } - - /// Returns: The next element in the queue and remove it. - T pop() @safe nothrow - in - { - assert(!empty, "Trying to pop an element from an empty queue"); - } - do - { - T result = peek(); - - Node* oldStock = stock; - Node* old = first_; - first_ = first_.next_; - - // start the stock from the popped element - stock = old; - old.next_ = null; - // add the existing "old" stock to the new first stock element - if (oldStock !is null) - stock.next_ = oldStock; - - if (--length_ == 0) - { - assert(first_ is null); - last_ = null; - } - - return result; - } - - /// Returns: The next element in the queue. - ref inout(T) peek() @safe pure nothrow inout @nogc - in - { - assert(!empty, "Trying to peek at an element in an empty queue"); - } - do - { - return first_.payload_; - } - - /// Returns: true of the queue empty, false otherwise. - bool empty() @safe pure nothrow const @nogc - { - return first_ is null; - } - - /// Returns: The number of elements in the queue. - size_t length() @safe pure nothrow const @nogc - { - return length_; - } -} - -@safe nothrow unittest -{ - auto queue = Queue!int(); - assert(queue.empty); - foreach (i; 0 .. 65) - { - queue.push(5); - assert(queue.pop() == 5); - assert(queue.empty); - assert(queue.length_ == 0); - } - - int[] array = [1, -1, 2, -2, 3, -3, 4, -4, 5, -5]; - foreach (i; array) - { - queue.push(i); - } - - array = 42 ~ array[0 .. 3] ~ 42 ~ array[3 .. $] ~ 42; - queue.insert(42, 3); - queue.insert(42, 0); - queue.insert(42, queue.length); - - int[] array2; - while (!queue.empty) - { - array2 ~= queue.pop(); - } - - assert(array == array2); -} diff --git a/source/dyaml/reader.d b/source/dyaml/reader.d deleted file mode 100644 index ae44c80..0000000 --- a/source/dyaml/reader.d +++ /dev/null @@ -1,909 +0,0 @@ - -// Copyright Ferdinand Majerech 2011-2014. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -module dyaml.reader; - - -import core.stdc.stdlib; -import core.stdc.string; -import core.thread; - -import std.algorithm; -import std.array; -import std.conv; -import std.exception; -import std.range; -import std.string; -import std.system; -import std.typecons; -import std.utf; - -import tinyendian; - -import dyaml.encoding; -import dyaml.exception; - -alias isBreak = among!('\n', '\u0085', '\u2028', '\u2029'); - -package: - - -///Exception thrown at Reader errors. -class ReaderException : YAMLException -{ - this(string msg, string file = __FILE__, size_t line = __LINE__) - @safe pure nothrow - { - super("Reader error: " ~ msg, file, line); - } -} - -/// Provides an API to read characters from a UTF-8 buffer and build slices into that -/// buffer to avoid allocations (see SliceBuilder). -final class Reader -{ - private: - // Buffer of currently loaded characters. - char[] buffer_; - - // Current position within buffer. Only data after this position can be read. - size_t bufferOffset_; - - // Index of the current character in the buffer. - size_t charIndex_; - // Number of characters (code points) in buffer_. - size_t characterCount_; - - // File name - string name_; - // Current line in file. - uint line_; - // Current column in file. - uint column_; - - // Original Unicode encoding of the data. - Encoding encoding_; - - version(unittest) - { - // Endianness of the input before it was converted (for testing) - Endian endian_; - } - - // The number of consecutive ASCII characters starting at bufferOffset_. - // - // Used to minimize UTF-8 decoding. - size_t upcomingASCII_; - - // Index to buffer_ where the last decoded character starts. - size_t lastDecodedBufferOffset_; - // Offset, relative to charIndex_, of the last decoded character, - // in code points, not chars. - size_t lastDecodedCharOffset_; - - public: - /// Construct a Reader. - /// - /// Params: buffer = Buffer with YAML data. This may be e.g. the entire - /// contents of a file or a string. $(B will) be modified by - /// the Reader and other parts of D:YAML (D:YAML tries to - /// reuse the buffer to minimize memory allocations) - /// name = File name if the buffer is the contents of a file or - /// `""` if the buffer is the contents of a string. - /// - /// Throws: ReaderException on a UTF decoding error or if there are - /// nonprintable Unicode characters illegal in YAML. - this(ubyte[] buffer, string name = "") @safe pure - { - name_ = name; - auto endianResult = fixUTFByteOrder(buffer); - if(endianResult.bytesStripped > 0) - { - throw new ReaderException("Size of UTF-16 or UTF-32 input not aligned " ~ - "to 2 or 4 bytes, respectively"); - } - - version(unittest) { endian_ = endianResult.endian; } - encoding_ = endianResult.encoding; - - auto utf8Result = toUTF8(endianResult.array, endianResult.encoding); - const msg = utf8Result.errorMessage; - if(msg !is null) - { - throw new ReaderException("Error when converting to UTF-8: " ~ msg); - } - - buffer_ = utf8Result.utf8; - - characterCount_ = utf8Result.characterCount; - // Check that all characters in buffer are printable. - enforce(isPrintableValidUTF8(buffer_), - new ReaderException("Special unicode characters are not allowed")); - - this.sliceBuilder = SliceBuilder(this); - checkASCII(); - } - - /// Get character at specified index relative to current position. - /// - /// Params: index = Index of the character to get relative to current position - /// in the buffer. Can point outside of the buffer; In that - /// case, '\0' will be returned. - /// - /// Returns: Character at specified position or '\0' if outside of the buffer. - /// - // XXX removed; search for 'risky' to find why. - // Throws: ReaderException if trying to read past the end of the buffer. - dchar peek(const size_t index) @safe pure - { - if(index < upcomingASCII_) { return buffer_[bufferOffset_ + index]; } - if(characterCount_ <= charIndex_ + index) - { - // XXX This is risky; revert this if bugs are introduced. We rely on - // the assumption that Reader only uses peek() to detect end of buffer. - // The test suite passes. - // Revert this case here and in other peek() versions if this causes - // errors. - // throw new ReaderException("Trying to read past the end of the buffer"); - return '\0'; - } - - // Optimized path for Scanner code that peeks chars in linear order to - // determine the length of some sequence. - if(index == lastDecodedCharOffset_) - { - ++lastDecodedCharOffset_; - const char b = buffer_[lastDecodedBufferOffset_]; - // ASCII - if(b < 0x80) - { - ++lastDecodedBufferOffset_; - return b; - } - return decode(buffer_, lastDecodedBufferOffset_); - } - - // 'Slow' path where we decode everything up to the requested character. - const asciiToTake = min(upcomingASCII_, index); - lastDecodedCharOffset_ = asciiToTake; - lastDecodedBufferOffset_ = bufferOffset_ + asciiToTake; - dchar d; - while(lastDecodedCharOffset_ <= index) - { - d = decodeNext(); - } - - return d; - } - - /// Optimized version of peek() for the case where peek index is 0. - dchar peek() @safe pure - { - if(upcomingASCII_ > 0) { return buffer_[bufferOffset_]; } - if(characterCount_ <= charIndex_) { return '\0'; } - - lastDecodedCharOffset_ = 0; - lastDecodedBufferOffset_ = bufferOffset_; - return decodeNext(); - } - - /// Get byte at specified index relative to current position. - /// - /// Params: index = Index of the byte to get relative to current position - /// in the buffer. Can point outside of the buffer; In that - /// case, '\0' will be returned. - /// - /// Returns: Byte at specified position or '\0' if outside of the buffer. - char peekByte(const size_t index) @safe pure nothrow @nogc - { - return characterCount_ > (charIndex_ + index) ? buffer_[bufferOffset_ + index] : '\0'; - } - - /// Optimized version of peekByte() for the case where peek byte index is 0. - char peekByte() @safe pure nothrow @nogc - { - return characterCount_ > charIndex_ ? buffer_[bufferOffset_] : '\0'; - } - - - /// Get specified number of characters starting at current position. - /// - /// Note: This gets only a "view" into the internal buffer, which will be - /// invalidated after other Reader calls. Use SliceBuilder to build slices - /// for permanent use. - /// - /// Params: length = Number of characters (code points, not bytes) to get. May - /// reach past the end of the buffer; in that case the returned - /// slice will be shorter. - /// - /// Returns: Characters starting at current position or an empty slice if out of bounds. - char[] prefix(const size_t length) @safe pure - { - return slice(length); - } - - /// Get specified number of bytes, not code points, starting at current position. - /// - /// Note: This gets only a "view" into the internal buffer, which will be - /// invalidated after other Reader calls. Use SliceBuilder to build slices - /// for permanent use. - /// - /// Params: length = Number bytes (not code points) to get. May NOT reach past - /// the end of the buffer; should be used with peek() to avoid - /// this. - /// - /// Returns: Bytes starting at current position. - char[] prefixBytes(const size_t length) @safe pure nothrow @nogc - in(length == 0 || bufferOffset_ + length <= buffer_.length, "prefixBytes out of bounds") - { - return buffer_[bufferOffset_ .. bufferOffset_ + length]; - } - - /// Get a slice view of the internal buffer, starting at the current position. - /// - /// Note: This gets only a "view" into the internal buffer, - /// which get invalidated after other Reader calls. - /// - /// Params: end = End of the slice relative to current position. May reach past - /// the end of the buffer; in that case the returned slice will - /// be shorter. - /// - /// Returns: Slice into the internal buffer or an empty slice if out of bounds. - char[] slice(const size_t end) @safe pure - { - // Fast path in case the caller has already peek()ed all the way to end. - if(end == lastDecodedCharOffset_) - { - return buffer_[bufferOffset_ .. lastDecodedBufferOffset_]; - } - - const asciiToTake = min(upcomingASCII_, end, buffer_.length); - lastDecodedCharOffset_ = asciiToTake; - lastDecodedBufferOffset_ = bufferOffset_ + asciiToTake; - - // 'Slow' path - decode everything up to end. - while(lastDecodedCharOffset_ < end && - lastDecodedBufferOffset_ < buffer_.length) - { - decodeNext(); - } - - return buffer_[bufferOffset_ .. lastDecodedBufferOffset_]; - } - - /// Get the next character, moving buffer position beyond it. - /// - /// Returns: Next character. - /// - /// Throws: ReaderException if trying to read past the end of the buffer - /// or if invalid data is read. - dchar get() @safe pure - { - const result = peek(); - forward(); - return result; - } - - /// Get specified number of characters, moving buffer position beyond them. - /// - /// Params: length = Number or characters (code points, not bytes) to get. - /// - /// Returns: Characters starting at current position. - char[] get(const size_t length) @safe pure - { - auto result = slice(length); - forward(length); - return result; - } - - /// Move current position forward. - /// - /// Params: length = Number of characters to move position forward. - void forward(size_t length) @safe pure - { - while(length > 0) - { - auto asciiToTake = min(upcomingASCII_, length); - charIndex_ += asciiToTake; - length -= asciiToTake; - upcomingASCII_ -= asciiToTake; - - for(; asciiToTake > 0; --asciiToTake) - { - const c = buffer_[bufferOffset_++]; - // c is ASCII, do we only need to check for ASCII line breaks. - if(c == '\n' || (c == '\r' && buffer_[bufferOffset_] != '\n')) - { - ++line_; - column_ = 0; - continue; - } - ++column_; - } - - // If we have used up all upcoming ASCII chars, the next char is - // non-ASCII even after this returns, so upcomingASCII_ doesn't need to - // be updated - it's zero. - if(length == 0) { break; } - - assert(upcomingASCII_ == 0, - "Running unicode handling code but we haven't run out of ASCII chars"); - assert(bufferOffset_ < buffer_.length, - "Attempted to decode past the end of YAML buffer"); - assert(buffer_[bufferOffset_] >= 0x80, - "ASCII must be handled by preceding code"); - - ++charIndex_; - const c = decode(buffer_, bufferOffset_); - - // New line. (can compare with '\n' without decoding since it's ASCII) - if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n')) - { - ++line_; - column_ = 0; - } - else if(c != '\uFEFF') { ++column_; } - --length; - checkASCII(); - } - - lastDecodedBufferOffset_ = bufferOffset_; - lastDecodedCharOffset_ = 0; - } - - /// Move current position forward by one character. - void forward() @safe pure - { - ++charIndex_; - lastDecodedBufferOffset_ = bufferOffset_; - lastDecodedCharOffset_ = 0; - - // ASCII - if(upcomingASCII_ > 0) - { - --upcomingASCII_; - const c = buffer_[bufferOffset_++]; - - if(c == '\n' || (c == '\r' && buffer_[bufferOffset_] != '\n')) - { - ++line_; - column_ = 0; - return; - } - ++column_; - return; - } - - // UTF-8 - assert(bufferOffset_ < buffer_.length, - "Attempted to decode past the end of YAML buffer"); - assert(buffer_[bufferOffset_] >= 0x80, - "ASCII must be handled by preceding code"); - - const c = decode(buffer_, bufferOffset_); - - // New line. (can compare with '\n' without decoding since it's ASCII) - if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n')) - { - ++line_; - column_ = 0; - } - else if(c != '\uFEFF') { ++column_; } - - checkASCII(); - } - - /// Used to build slices of read data in Reader; to avoid allocations. - SliceBuilder sliceBuilder; - - /// Get a string describing current buffer position, used for error messages. - Mark mark() const pure nothrow @nogc @safe { return Mark(name_, line_, column_); } - - /// Get file name. - string name() const @safe pure nothrow @nogc { return name_; } - - /// Set file name. - void name(string name) pure @safe nothrow @nogc { name_ = name; } - - /// Get current line number. - uint line() const @safe pure nothrow @nogc { return line_; } - - /// Get current column number. - uint column() const @safe pure nothrow @nogc { return column_; } - - /// Get index of the current character in the buffer. - size_t charIndex() const @safe pure nothrow @nogc { return charIndex_; } - - /// Get encoding of the input buffer. - Encoding encoding() const @safe pure nothrow @nogc { return encoding_; } - -private: - // Update upcomingASCII_ (should be called forward()ing over a UTF-8 sequence) - void checkASCII() @safe pure nothrow @nogc - { - upcomingASCII_ = countASCII(buffer_[bufferOffset_ .. $]); - } - - // Decode the next character relative to - // lastDecodedCharOffset_/lastDecodedBufferOffset_ and update them. - // - // Does not advance the buffer position. Used in peek() and slice(). - dchar decodeNext() @safe pure - { - assert(lastDecodedBufferOffset_ < buffer_.length, - "Attempted to decode past the end of YAML buffer"); - const char b = buffer_[lastDecodedBufferOffset_]; - ++lastDecodedCharOffset_; - // ASCII - if(b < 0x80) - { - ++lastDecodedBufferOffset_; - return b; - } - - return decode(buffer_, lastDecodedBufferOffset_); - } -} - -/// Used to build slices of already read data in Reader buffer, avoiding allocations. -/// -/// Usually these slices point to unchanged Reader data, but sometimes the data is -/// changed due to how YAML interprets certain characters/strings. -/// -/// See begin() documentation. -struct SliceBuilder -{ -private: - // No copying by the user. - @disable this(this); - @disable void opAssign(ref SliceBuilder); - - // Reader this builder works in. - Reader reader_; - - // Start of the slice om reader_.buffer_ (size_t.max while no slice being build) - size_t start_ = size_t.max; - // End of the slice om reader_.buffer_ (size_t.max while no slice being build) - size_t end_ = size_t.max; - - // Stack of slice ends to revert to (see Transaction) - // - // Very few levels as we don't want arbitrarily nested transactions. - size_t[4] endStack_; - // The number of elements currently in endStack_. - size_t endStackUsed_; - - @safe const pure nothrow @nogc invariant() - { - if(!inProgress) { return; } - assert(end_ <= reader_.bufferOffset_, "Slice ends after buffer position"); - assert(start_ <= end_, "Slice start after slice end"); - } - - // Is a slice currently being built? - bool inProgress() @safe const pure nothrow @nogc - in(start_ == size_t.max ? end_ == size_t.max : end_ != size_t.max, "start_/end_ are not consistent") - { - return start_ != size_t.max; - } - -public: - /// Begin building a slice. - /// - /// Only one slice can be built at any given time; before beginning a new slice, - /// finish the previous one (if any). - /// - /// The slice starts at the current position in the Reader buffer. It can only be - /// extended up to the current position in the buffer; Reader methods get() and - /// forward() move the position. E.g. it is valid to extend a slice by write()-ing - /// a string just returned by get() - but not one returned by prefix() unless the - /// position has changed since the prefix() call. - void begin() @safe pure nothrow @nogc - in(!inProgress, "Beginning a slice while another slice is being built") - in(endStackUsed_ == 0, "Slice stack not empty at slice begin") - { - - start_ = reader_.bufferOffset_; - end_ = reader_.bufferOffset_; - } - - /// Finish building a slice and return it. - /// - /// Any Transactions on the slice must be committed or destroyed before the slice - /// is finished. - /// - /// Returns a string; once a slice is finished it is definitive that its contents - /// will not be changed. - char[] finish() @safe pure nothrow @nogc - in(inProgress, "finish called without begin") - in(endStackUsed_ == 0, "Finishing a slice with running transactions.") - { - - auto result = reader_.buffer_[start_ .. end_]; - start_ = end_ = size_t.max; - return result; - } - - /// Write a string to the slice being built. - /// - /// Data can only be written up to the current position in the Reader buffer. - /// - /// If str is a string returned by a Reader method, and str starts right after the - /// end of the slice being built, the slice is extended (trivial operation). - /// - /// See_Also: begin - void write(scope char[] str) @safe pure nothrow @nogc - { - assert(inProgress, "write called without begin"); - assert(end_ <= reader_.bufferOffset_, - "AT START: Slice ends after buffer position"); - - // Nothing? Already done. - if (str.length == 0) { return; } - // If str starts at the end of the slice (is a string returned by a Reader - // method), just extend the slice to contain str. - if(&str[0] == &reader_.buffer_[end_]) - { - end_ += str.length; - } - // Even if str does not start at the end of the slice, it still may be returned - // by a Reader method and point to buffer. So we need to memmove. - else - { - copy(str, reader_.buffer_[end_..end_ + str.length * char.sizeof]); - end_ += str.length; - } - } - - /// Write a character to the slice being built. - /// - /// Data can only be written up to the current position in the Reader buffer. - /// - /// See_Also: begin - void write(dchar c) @safe pure - in(inProgress, "write called without begin") - { - if(c < 0x80) - { - reader_.buffer_[end_++] = cast(char)c; - return; - } - - // We need to encode a non-ASCII dchar into UTF-8 - char[4] encodeBuf; - const bytes = encode(encodeBuf, c); - reader_.buffer_[end_ .. end_ + bytes] = encodeBuf[0 .. bytes]; - end_ += bytes; - } - - /// Insert a character to a specified position in the slice. - /// - /// Enlarges the slice by 1 char. Note that the slice can only extend up to the - /// current position in the Reader buffer. - /// - /// Params: - /// - /// c = The character to insert. - /// position = Position to insert the character at in code units, not code points. - /// Must be less than slice length(); a previously returned length() - /// can be used. - void insert(const dchar c, const size_t position) @safe pure - in(inProgress, "insert called without begin") - in(start_ + position <= end_, "Trying to insert after the end of the slice") - { - - const point = start_ + position; - const movedLength = end_ - point; - - // Encode c into UTF-8 - char[4] encodeBuf; - if(c < 0x80) { encodeBuf[0] = cast(char)c; } - const size_t bytes = c < 0x80 ? 1 : encode(encodeBuf, c); - - if(movedLength > 0) - { - copy(reader_.buffer_[point..point + movedLength * char.sizeof], - reader_.buffer_[point + bytes..point + bytes + movedLength * char.sizeof]); - } - reader_.buffer_[point .. point + bytes] = encodeBuf[0 .. bytes]; - end_ += bytes; - } - - /// Get the current length of the slice. - size_t length() @safe const pure nothrow @nogc - { - return end_ - start_; - } - - /// A slice building transaction. - /// - /// Can be used to save and revert back to slice state. - struct Transaction - { - private: - // The slice builder affected by the transaction. - SliceBuilder* builder_; - // Index of the return point of the transaction in StringBuilder.endStack_. - size_t stackLevel_; - // True after commit() has been called. - bool committed_; - - public: - /// Begins a transaction on a SliceBuilder object. - /// - /// The transaction must end $(B after) any transactions created within the - /// transaction but $(B before) the slice is finish()-ed. A transaction can be - /// ended either by commit()-ing or reverting through the destructor. - /// - /// Saves the current state of a slice. - this(SliceBuilder* builder) @safe pure nothrow @nogc - { - builder_ = builder; - stackLevel_ = builder_.endStackUsed_; - builder_.push(); - } - - /// Commit changes to the slice. - /// - /// Ends the transaction - can only be called once, and removes the possibility - /// to revert slice state. - /// - /// Does nothing for a default-initialized transaction (the transaction has not - /// been started yet). - void commit() @safe pure nothrow @nogc - in(!committed_, "Can't commit a transaction more than once") - { - - if(builder_ is null) { return; } - assert(builder_.endStackUsed_ == stackLevel_ + 1, - "Parent transactions don't fully contain child transactions"); - builder_.apply(); - committed_ = true; - } - - /// Destroy the transaction and revert it if it hasn't been committed yet. - void end() @safe pure nothrow @nogc - in(builder_ && builder_.endStackUsed_ == stackLevel_ + 1, "Parent transactions don't fully contain child transactions") - { - builder_.pop(); - builder_ = null; - } - - } - -private: - // Push the current end of the slice so we can revert to it if needed. - // - // Used by Transaction. - void push() @safe pure nothrow @nogc - in(inProgress, "push called without begin") - in(endStackUsed_ < endStack_.length, "Slice stack overflow") - { - endStack_[endStackUsed_++] = end_; - } - - // Pop the current end of endStack_ and set the end of the slice to the popped - // value, reverting changes since the old end was pushed. - // - // Used by Transaction. - void pop() @safe pure nothrow @nogc - in(inProgress, "pop called without begin") - in(endStackUsed_ > 0, "Trying to pop an empty slice stack") - { - end_ = endStack_[--endStackUsed_]; - } - - // Pop the current end of endStack_, but keep the current end of the slice, applying - // changes made since pushing the old end. - // - // Used by Transaction. - void apply() @safe pure nothrow @nogc - in(inProgress, "apply called without begin") - in(endStackUsed_ > 0, "Trying to apply an empty slice stack") - { - --endStackUsed_; - } -} - - -private: - -// Convert a UTF-8/16/32 buffer to UTF-8, in-place if possible. -// -// Params: -// -// input = Buffer with UTF-8/16/32 data to decode. May be overwritten by the -// conversion, in which case the result will be a slice of this buffer. -// encoding = Encoding of input. -// -// Returns: -// -// A struct with the following members: -// -// $(D string errorMessage) In case of an error, the error message is stored here. If -// there was no error, errorMessage is NULL. Always check -// this first. -// $(D char[] utf8) input converted to UTF-8. May be a slice of input. -// $(D size_t characterCount) Number of characters (code points) in input. -auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow -{ - // Documented in function ddoc. - struct Result - { - string errorMessage; - char[] utf8; - size_t characterCount; - } - - Result result; - - // Encode input_ into UTF-8 if it's encoded as UTF-16 or UTF-32. - // - // Params: - // - // buffer = The input buffer to encode. - // result = A Result struct to put encoded result and any error messages to. - // - // On error, result.errorMessage will be set. - static void encode(C)(C[] input, ref Result result) @safe pure - { - // We can do UTF-32->UTF-8 in place because all UTF-8 sequences are 4 or - // less bytes. - static if(is(C == dchar)) - { - char[4] encodeBuf; - auto utf8 = cast(char[])input; - auto length = 0; - foreach(dchar c; input) - { - ++result.characterCount; - // ASCII - if(c < 0x80) - { - utf8[length++] = cast(char)c; - continue; - } - - std.utf.encode(encodeBuf, c); - const bytes = codeLength!char(c); - utf8[length .. length + bytes] = encodeBuf[0 .. bytes]; - length += bytes; - } - result.utf8 = utf8[0 .. length]; - } - // Unfortunately we can't do UTF-16 in place so we just use std.conv.to - else - { - result.characterCount = std.utf.count(input); - result.utf8 = input.to!(char[]); - } - } - - try final switch(encoding) - { - case UTFEncoding.UTF_8: - result.utf8 = cast(char[])input; - result.utf8.validate(); - result.characterCount = std.utf.count(result.utf8); - break; - case UTFEncoding.UTF_16: - assert(input.length % 2 == 0, "UTF-16 buffer size must be even"); - encode(cast(wchar[])input, result); - break; - case UTFEncoding.UTF_32: - assert(input.length % 4 == 0, "UTF-32 buffer size must be a multiple of 4"); - encode(cast(dchar[])input, result); - break; - } - catch(ConvException e) { result.errorMessage = e.msg; } - catch(UTFException e) { result.errorMessage = e.msg; } - catch(Exception e) - { - assert(false, "Unexpected exception in encode(): " ~ e.msg); - } - - return result; -} - -/// Determine if all characters (code points, not bytes) in a string are printable. -bool isPrintableValidUTF8(const char[] chars) @safe pure -{ - import std.uni : isControl, isWhite; - foreach (dchar chr; chars) - { - if (!chr.isValidDchar || (chr.isControl && !chr.isWhite)) - { - return false; - } - } - return true; -} - -/// Counts the number of ASCII characters in buffer until the first UTF-8 sequence. -/// -/// Used to determine how many characters we can process without decoding. -size_t countASCII(const(char)[] buffer) @safe pure nothrow @nogc -{ - return buffer.byCodeUnit.until!(x => x > 0x7F).walkLength; -} -// Unittests. - -void testEndian(R)() -{ - void endian_test(ubyte[] data, Encoding encoding_expected, Endian endian_expected) - { - auto reader = new R(data); - assert(reader.encoding == encoding_expected); - assert(reader.endian_ == endian_expected); - } - ubyte[] little_endian_utf_16 = [0xFF, 0xFE, 0x7A, 0x00]; - ubyte[] big_endian_utf_16 = [0xFE, 0xFF, 0x00, 0x7A]; - endian_test(little_endian_utf_16, Encoding.UTF_16, Endian.littleEndian); - endian_test(big_endian_utf_16, Encoding.UTF_16, Endian.bigEndian); -} - -void testPeekPrefixForward(R)() -{ - import std.encoding; - ubyte[] data = bomTable[BOM.utf8].sequence ~ cast(ubyte[])"data"; - auto reader = new R(data); - assert(reader.peek() == 'd'); - assert(reader.peek(1) == 'a'); - assert(reader.peek(2) == 't'); - assert(reader.peek(3) == 'a'); - assert(reader.peek(4) == '\0'); - assert(reader.prefix(4) == "data"); - // assert(reader.prefix(6) == "data\0"); - reader.forward(2); - assert(reader.peek(1) == 'a'); - // assert(collectException(reader.peek(3))); -} - -void testUTF(R)() -{ - import std.encoding; - dchar[] data = cast(dchar[])"data"; - void utf_test(T)(T[] data, BOM bom) - { - ubyte[] bytes = bomTable[bom].sequence ~ - (cast(ubyte[])data)[0 .. data.length * T.sizeof]; - auto reader = new R(bytes); - assert(reader.peek() == 'd'); - assert(reader.peek(1) == 'a'); - assert(reader.peek(2) == 't'); - assert(reader.peek(3) == 'a'); - } - utf_test!char(to!(char[])(data), BOM.utf8); - utf_test!wchar(to!(wchar[])(data), endian == Endian.bigEndian ? BOM.utf16be : BOM.utf16le); - utf_test(data, endian == Endian.bigEndian ? BOM.utf32be : BOM.utf32le); -} - -void test1Byte(R)() -{ - ubyte[] data = [97]; - - auto reader = new R(data); - assert(reader.peek() == 'a'); - assert(reader.peek(1) == '\0'); - // assert(collectException(reader.peek(2))); -} - -@system unittest -{ - testEndian!Reader(); - testPeekPrefixForward!Reader(); - testUTF!Reader(); - test1Byte!Reader(); -} -//Issue 257 - https://github.com/dlang-community/D-YAML/issues/257 -@safe unittest -{ - import dyaml.loader : Loader; - auto yaml = "hello "; - auto root = Loader.fromString(yaml).load(); - - assert(root.isValid); -} diff --git a/source/dyaml/representer.d b/source/dyaml/representer.d deleted file mode 100644 index f903b60..0000000 --- a/source/dyaml/representer.d +++ /dev/null @@ -1,517 +0,0 @@ - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -/** - * YAML node _representer. Prepares YAML nodes for output. A tutorial can be - * found $(LINK2 ../tutorials/custom_types.html, here). - * - * Code based on $(LINK2 http://www.pyyaml.org, PyYAML). - */ -module dyaml.representer; - - -import std.algorithm; -import std.array; -import std.base64; -import std.container; -import std.conv; -import std.datetime; -import std.exception; -import std.format; -import std.math; -import std.typecons; -import std.string; - -import dyaml.exception; -import dyaml.node; -import dyaml.serializer; -import dyaml.style; - -package: -///Exception thrown on Representer errors. -class RepresenterException : YAMLException -{ - mixin ExceptionCtors; -} - -/** - * Represents YAML nodes as scalar, sequence and mapping nodes ready for output. - */ -Node representData(const Node data, ScalarStyle defaultScalarStyle, CollectionStyle defaultCollectionStyle) @safe -{ - Node result; - final switch(data.type) - { - case NodeType.null_: - result = representNull(); - break; - case NodeType.merge: - break; - case NodeType.boolean: - result = representBool(data); - break; - case NodeType.integer: - result = representLong(data); - break; - case NodeType.decimal: - result = representReal(data); - break; - case NodeType.binary: - result = representBytes(data); - break; - case NodeType.timestamp: - result = representSysTime(data); - break; - case NodeType.string: - result = representString(data); - break; - case NodeType.mapping: - result = representPairs(data, defaultScalarStyle, defaultCollectionStyle); - break; - case NodeType.sequence: - result = representNodes(data, defaultScalarStyle, defaultCollectionStyle); - break; - case NodeType.invalid: - assert(0); - } - - final switch (result.nodeID) - { - case NodeID.scalar: - if (result.scalarStyle == ScalarStyle.invalid) - { - result.scalarStyle = defaultScalarStyle; - } - break; - case NodeID.sequence, NodeID.mapping: - if (defaultCollectionStyle != CollectionStyle.invalid) - { - result.collectionStyle = defaultCollectionStyle; - } - break; - case NodeID.invalid: - break; - } - - - //Override tag if specified. - if(data.tag_ !is null){result.tag_ = data.tag_;} - - //Remember style if this was loaded before. - if(data.scalarStyle != ScalarStyle.invalid) - { - result.scalarStyle = data.scalarStyle; - } - if(data.collectionStyle != CollectionStyle.invalid) - { - result.collectionStyle = data.collectionStyle; - } - return result; -} - -@safe unittest -{ - // We don't emit yaml merge nodes. - assert(representData(Node(YAMLMerge()), ScalarStyle.invalid, CollectionStyle.invalid) == Node.init); -} - -@safe unittest -{ - assert(representData(Node(YAMLNull()), ScalarStyle.invalid, CollectionStyle.invalid) == Node("null", "tag:yaml.org,2002:null")); -} - -@safe unittest -{ - assert(representData(Node(cast(string)null), ScalarStyle.invalid, CollectionStyle.invalid) == Node("", "tag:yaml.org,2002:str")); - assert(representData(Node("Hello world!"), ScalarStyle.invalid, CollectionStyle.invalid) == Node("Hello world!", "tag:yaml.org,2002:str")); -} - -@safe unittest -{ - assert(representData(Node(64), ScalarStyle.invalid, CollectionStyle.invalid) == Node("64", "tag:yaml.org,2002:int")); -} - -@safe unittest -{ - assert(representData(Node(true), ScalarStyle.invalid, CollectionStyle.invalid) == Node("true", "tag:yaml.org,2002:bool")); - assert(representData(Node(false), ScalarStyle.invalid, CollectionStyle.invalid) == Node("false", "tag:yaml.org,2002:bool")); -} - -@safe unittest -{ - // Float comparison is pretty unreliable... - auto result = representData(Node(1.0), ScalarStyle.invalid, CollectionStyle.invalid); - assert(isClose(result.as!string.to!real, 1.0)); - assert(result.tag == "tag:yaml.org,2002:float"); - - assert(representData(Node(real.nan), ScalarStyle.invalid, CollectionStyle.invalid) == Node(".nan", "tag:yaml.org,2002:float")); - assert(representData(Node(real.infinity), ScalarStyle.invalid, CollectionStyle.invalid) == Node(".inf", "tag:yaml.org,2002:float")); - assert(representData(Node(-real.infinity), ScalarStyle.invalid, CollectionStyle.invalid) == Node("-.inf", "tag:yaml.org,2002:float")); -} - -@safe unittest -{ - assert(representData(Node(SysTime(DateTime(2000, 3, 14, 12, 34, 56), UTC())), ScalarStyle.invalid, CollectionStyle.invalid) == Node("2000-03-14T12:34:56Z", "tag:yaml.org,2002:timestamp")); -} - -@safe unittest -{ - assert(representData(Node(Node[].init, "tag:yaml.org,2002:set"), ScalarStyle.invalid, CollectionStyle.invalid) == Node(Node.Pair[].init, "tag:yaml.org,2002:set")); - assert(representData(Node(Node[].init, "tag:yaml.org,2002:seq"), ScalarStyle.invalid, CollectionStyle.invalid) == Node(Node[].init, "tag:yaml.org,2002:seq")); - { - auto nodes = [ - Node("a"), - Node("b"), - Node("c"), - ]; - assert(representData(Node(nodes, "tag:yaml.org,2002:set"), ScalarStyle.invalid, CollectionStyle.invalid) == - Node([ - Node.Pair( - Node("a", "tag:yaml.org,2002:str"), - Node("null", "tag:yaml.org,2002:null") - ), - Node.Pair( - Node("b", "tag:yaml.org,2002:str"), - Node("null", "tag:yaml.org,2002:null") - ), - Node.Pair( - Node("c", "tag:yaml.org,2002:str"), - Node("null", "tag:yaml.org,2002:null") - ) - ], "tag:yaml.org,2002:set")); - } - { - auto nodes = [ - Node("a"), - Node("b"), - Node("c"), - ]; - assert(representData(Node(nodes, "tag:yaml.org,2002:seq"), ScalarStyle.invalid, CollectionStyle.invalid) == - Node([ - Node("a", "tag:yaml.org,2002:str"), - Node("b", "tag:yaml.org,2002:str"), - Node("c", "tag:yaml.org,2002:str") - ], "tag:yaml.org,2002:seq")); - } -} - -@safe unittest -{ - assert(representData(Node(Node.Pair[].init, "tag:yaml.org,2002:omap"), ScalarStyle.invalid, CollectionStyle.invalid) == Node(Node[].init, "tag:yaml.org,2002:omap")); - assert(representData(Node(Node.Pair[].init, "tag:yaml.org,2002:pairs"), ScalarStyle.invalid, CollectionStyle.invalid) == Node(Node[].init, "tag:yaml.org,2002:pairs")); - assert(representData(Node(Node.Pair[].init, "tag:yaml.org,2002:map"), ScalarStyle.invalid, CollectionStyle.invalid) == Node(Node.Pair[].init, "tag:yaml.org,2002:map")); - { - auto nodes = [ - Node.Pair("a", "b"), - Node.Pair("a", "c") - ]; - assertThrown(representData(Node(nodes, "tag:yaml.org,2002:omap"), ScalarStyle.invalid, CollectionStyle.invalid)); - } - // Yeah, this gets ugly really fast. - { - auto nodes = [ - Node.Pair("a", "b"), - Node.Pair("a", "c") - ]; - assert(representData(Node(nodes, "tag:yaml.org,2002:pairs"), ScalarStyle.invalid, CollectionStyle.invalid) == - Node([ - Node( - [Node.Pair( - Node("a", "tag:yaml.org,2002:str"), - Node("b", "tag:yaml.org,2002:str") - )], - "tag:yaml.org,2002:map"), - Node( - [Node.Pair( - Node("a", "tag:yaml.org,2002:str"), - Node("c", "tag:yaml.org,2002:str") - )], - "tag:yaml.org,2002:map"), - ], "tag:yaml.org,2002:pairs")); - } - { - auto nodes = [ - Node.Pair("a", "b"), - Node.Pair("a", "c") - ]; - assertThrown(representData(Node(nodes, "tag:yaml.org,2002:map"), ScalarStyle.invalid, CollectionStyle.invalid)); - } - { - auto nodes = [ - Node.Pair("a", "b"), - Node.Pair("c", "d") - ]; - assert(representData(Node(nodes, "tag:yaml.org,2002:omap"), ScalarStyle.invalid, CollectionStyle.invalid) == - Node([ - Node([ - Node.Pair( - Node("a", "tag:yaml.org,2002:str"), - Node("b", "tag:yaml.org,2002:str") - ) - ], "tag:yaml.org,2002:map"), - Node([ - Node.Pair( - Node("c", "tag:yaml.org,2002:str"), - Node("d", "tag:yaml.org,2002:str") - ) - ], "tag:yaml.org,2002:map" - )], "tag:yaml.org,2002:omap")); - } - { - auto nodes = [ - Node.Pair("a", "b"), - Node.Pair("c", "d") - ]; - assert(representData(Node(nodes, "tag:yaml.org,2002:map"), ScalarStyle.invalid, CollectionStyle.invalid) == - Node([ - Node.Pair( - Node("a", "tag:yaml.org,2002:str"), - Node("b", "tag:yaml.org,2002:str") - ), - Node.Pair( - Node("c", "tag:yaml.org,2002:str"), - Node("d", "tag:yaml.org,2002:str") - ), - ], "tag:yaml.org,2002:map")); - } -} - -private: - -//Represent a _null _node as a _null YAML value. -Node representNull() @safe -{ - return Node("null", "tag:yaml.org,2002:null"); -} - -//Represent a string _node as a string scalar. -Node representString(const Node node) @safe -{ - string value = node.as!string; - return Node(value, "tag:yaml.org,2002:str"); -} - -//Represent a bytes _node as a binary scalar. -Node representBytes(const Node node) @safe -{ - const ubyte[] value = node.as!(ubyte[]); - if(value is null){return Node("null", "tag:yaml.org,2002:null");} - - auto newNode = Node(Base64.encode(value).idup, "tag:yaml.org,2002:binary"); - newNode.scalarStyle = ScalarStyle.literal; - return newNode; -} - -//Represent a bool _node as a bool scalar. -Node representBool(const Node node) @safe -{ - return Node(node.as!bool ? "true" : "false", "tag:yaml.org,2002:bool"); -} - -//Represent a long _node as an integer scalar. -Node representLong(const Node node) @safe -{ - return Node(node.as!long.to!string, "tag:yaml.org,2002:int"); -} - -//Represent a real _node as a floating point scalar. -Node representReal(const Node node) @safe -{ - real f = node.as!real; - string value = isNaN(f) ? ".nan": - f == real.infinity ? ".inf": - f == -1.0 * real.infinity ? "-.inf": - {auto a = appender!string(); - formattedWrite(a, "%12f", f); - return a.data.strip();}(); - - return Node(value, "tag:yaml.org,2002:float"); -} - -//Represent a SysTime _node as a timestamp. -Node representSysTime(const Node node) @safe -{ - return Node(node.as!SysTime.toISOExtString(), "tag:yaml.org,2002:timestamp"); -} - -//Represent a sequence _node as sequence/set. -Node representNodes(const Node node, ScalarStyle defaultScalarStyle, CollectionStyle defaultCollectionStyle) @safe -{ - auto nodes = node.as!(Node[]); - if(node.tag_ == "tag:yaml.org,2002:set") - { - //YAML sets are mapping with null values. - Node.Pair[] pairs; - pairs.length = nodes.length; - - foreach(idx, key; nodes) - { - pairs[idx] = Node.Pair(key, Node("null", "tag:yaml.org,2002:null")); - } - Node.Pair[] value; - value.length = pairs.length; - - auto bestStyle = CollectionStyle.flow; - foreach(idx, pair; pairs) - { - value[idx] = Node.Pair(representData(pair.key, defaultScalarStyle, defaultCollectionStyle), representData(pair.value, defaultScalarStyle, defaultCollectionStyle)); - if(value[idx].shouldUseBlockStyle) - { - bestStyle = CollectionStyle.block; - } - } - - auto newNode = Node(value, node.tag_); - newNode.collectionStyle = bestStyle; - return newNode; - } - else - { - Node[] value; - value.length = nodes.length; - - auto bestStyle = CollectionStyle.flow; - foreach(idx, item; nodes) - { - value[idx] = representData(item, defaultScalarStyle, defaultCollectionStyle); - const isScalar = value[idx].nodeID == NodeID.scalar; - const s = value[idx].scalarStyle; - if(!isScalar || (s != ScalarStyle.invalid && s != ScalarStyle.plain)) - { - bestStyle = CollectionStyle.block; - } - } - - auto newNode = Node(value, "tag:yaml.org,2002:seq"); - newNode.collectionStyle = bestStyle; - return newNode; - } -} - -bool shouldUseBlockStyle(const Node value) @safe -{ - const isScalar = value.nodeID == NodeID.scalar; - const s = value.scalarStyle; - return (!isScalar || (s != ScalarStyle.invalid && s != ScalarStyle.plain)); -} -bool shouldUseBlockStyle(const Node.Pair value) @safe -{ - const keyScalar = value.key.nodeID == NodeID.scalar; - const valScalar = value.value.nodeID == NodeID.scalar; - const keyStyle = value.key.scalarStyle; - const valStyle = value.value.scalarStyle; - if(!keyScalar || - (keyStyle != ScalarStyle.invalid && keyStyle != ScalarStyle.plain)) - { - return true; - } - if(!valScalar || - (valStyle != ScalarStyle.invalid && valStyle != ScalarStyle.plain)) - { - return true; - } - return false; -} - -//Represent a mapping _node as map/ordered map/pairs. -Node representPairs(const Node node, ScalarStyle defaultScalarStyle, CollectionStyle defaultCollectionStyle) @safe -{ - auto pairs = node.as!(Node.Pair[]); - - bool hasDuplicates(const Node.Pair[] pairs) @safe - { - //TODO this should be replaced by something with deterministic memory allocation. - auto keys = redBlackTree!Node(); - foreach(pair; pairs) - { - if(pair.key in keys){return true;} - keys.insert(pair.key); - } - return false; - } - - Node[] mapToSequence(const Node.Pair[] pairs) @safe - { - Node[] nodes; - nodes.length = pairs.length; - foreach(idx, pair; pairs) - { - Node.Pair value; - - auto bestStyle = value.shouldUseBlockStyle ? CollectionStyle.block : CollectionStyle.flow; - value = Node.Pair(representData(pair.key, defaultScalarStyle, defaultCollectionStyle), representData(pair.value, defaultScalarStyle, defaultCollectionStyle)); - - auto newNode = Node([value], "tag:yaml.org,2002:map"); - newNode.collectionStyle = bestStyle; - nodes[idx] = newNode; - } - return nodes; - } - - if(node.tag_ == "tag:yaml.org,2002:omap") - { - enforce(!hasDuplicates(pairs), - new RepresenterException("Duplicate entry in an ordered map")); - auto sequence = mapToSequence(pairs); - Node[] value; - value.length = sequence.length; - - auto bestStyle = CollectionStyle.flow; - foreach(idx, item; sequence) - { - value[idx] = representData(item, defaultScalarStyle, defaultCollectionStyle); - if(value[idx].shouldUseBlockStyle) - { - bestStyle = CollectionStyle.block; - } - } - - auto newNode = Node(value, node.tag_); - newNode.collectionStyle = bestStyle; - return newNode; - } - else if(node.tag_ == "tag:yaml.org,2002:pairs") - { - auto sequence = mapToSequence(pairs); - Node[] value; - value.length = sequence.length; - - auto bestStyle = CollectionStyle.flow; - foreach(idx, item; sequence) - { - value[idx] = representData(item, defaultScalarStyle, defaultCollectionStyle); - if(value[idx].shouldUseBlockStyle) - { - bestStyle = CollectionStyle.block; - } - } - - auto newNode = Node(value, node.tag_); - newNode.collectionStyle = bestStyle; - return newNode; - } - else - { - enforce(!hasDuplicates(pairs), - new RepresenterException("Duplicate entry in an unordered map")); - Node.Pair[] value; - value.length = pairs.length; - - auto bestStyle = CollectionStyle.flow; - foreach(idx, pair; pairs) - { - value[idx] = Node.Pair(representData(pair.key, defaultScalarStyle, defaultCollectionStyle), representData(pair.value, defaultScalarStyle, defaultCollectionStyle)); - if(value[idx].shouldUseBlockStyle) - { - bestStyle = CollectionStyle.block; - } - } - - auto newNode = Node(value, "tag:yaml.org,2002:map"); - newNode.collectionStyle = bestStyle; - return newNode; - } -} diff --git a/source/dyaml/resolver.d b/source/dyaml/resolver.d deleted file mode 100644 index 16d8419..0000000 --- a/source/dyaml/resolver.d +++ /dev/null @@ -1,260 +0,0 @@ - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -/** - * Implements a class that resolves YAML tags. This can be used to implicitly - * resolve tags for custom data types, removing the need to explicitly - * specify tags in YAML. A tutorial can be found - * $(LINK2 ../tutorials/custom_types.html, here). - * - * Code based on $(LINK2 http://www.pyyaml.org, PyYAML). - */ -module dyaml.resolver; - - -import std.conv; -import std.regex; -import std.typecons; -import std.utf; - -import dyaml.node; -import dyaml.exception; - - -/// Type of `regexes` -private alias RegexType = Tuple!(string, "tag", const Regex!char, "regexp", string, "chars"); - -private immutable RegexType[] regexes = [ - RegexType("tag:yaml.org,2002:bool", - regex(r"^(?:yes|Yes|YES|no|No|NO|true|True|TRUE" ~ - "|false|False|FALSE|on|On|ON|off|Off|OFF)$"), - "yYnNtTfFoO"), - RegexType("tag:yaml.org,2002:float", - regex(r"^(?:[-+]?([0-9][0-9_]*)\\.[0-9_]*" ~ - "(?:[eE][-+][0-9]+)?|[-+]?(?:[0-9][0-9_]" ~ - "*)?\\.[0-9_]+(?:[eE][-+][0-9]+)?|[-+]?" ~ - "[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]" ~ - "*|[-+]?\\.(?:inf|Inf|INF)|\\." ~ - "(?:nan|NaN|NAN))$"), - "-+0123456789."), - RegexType("tag:yaml.org,2002:int", - regex(r"^(?:[-+]?0b[0-1_]+" ~ - "|[-+]?0[0-7_]+" ~ - "|[-+]?(?:0|[1-9][0-9_]*)" ~ - "|[-+]?0x[0-9a-fA-F_]+" ~ - "|[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$"), - "-+0123456789"), - RegexType("tag:yaml.org,2002:merge", regex(r"^<<$"), "<"), - RegexType("tag:yaml.org,2002:null", - regex(r"^$|^(?:~|null|Null|NULL)$"), "~nN\0"), - RegexType("tag:yaml.org,2002:timestamp", - regex(r"^[0-9][0-9][0-9][0-9]-[0-9][0-9]-" ~ - "[0-9][0-9]|[0-9][0-9][0-9][0-9]-[0-9]" ~ - "[0-9]?-[0-9][0-9]?[Tt]|[ \t]+[0-9]" ~ - "[0-9]?:[0-9][0-9]:[0-9][0-9]" ~ - "(?:\\.[0-9]*)?(?:[ \t]*Z|[-+][0-9]" ~ - "[0-9]?(?::[0-9][0-9])?)?$"), - "0123456789"), - RegexType("tag:yaml.org,2002:value", regex(r"^=$"), "="), - - //The following resolver is only for documentation purposes. It cannot work - //because plain scalars cannot start with '!', '&', or '*'. - RegexType("tag:yaml.org,2002:yaml", regex(r"^(?:!|&|\*)$"), "!&*"), -]; - -/** - * Resolves YAML tags (data types). - * - * Can be used to implicitly resolve custom data types of scalar values. - */ -struct Resolver -{ - private: - // Default tag to use for scalars. - string defaultScalarTag_ = "tag:yaml.org,2002:str"; - // Default tag to use for sequences. - string defaultSequenceTag_ = "tag:yaml.org,2002:seq"; - // Default tag to use for mappings. - string defaultMappingTag_ = "tag:yaml.org,2002:map"; - - /* - * Arrays of scalar resolver tuples indexed by starting character of a scalar. - * - * Each tuple stores regular expression the scalar must match, - * and tag to assign to it if it matches. - */ - Tuple!(string, const Regex!char)[][dchar] yamlImplicitResolvers_; - - package: - static auto withDefaultResolvers() @safe - { - Resolver resolver; - foreach(pair; regexes) - { - resolver.addImplicitResolver(pair.tag, pair.regexp, pair.chars); - } - return resolver; - } - - public: - @disable bool opEquals(ref Resolver); - @disable int opCmp(ref Resolver); - - /** - * Add an implicit scalar resolver. - * - * If a scalar matches regexp and starts with any character in first, - * its _tag is set to tag. If it matches more than one resolver _regexp - * resolvers added _first override ones added later. Default resolvers - * override any user specified resolvers, but they can be disabled in - * Resolver constructor. - * - * If a scalar is not resolved to anything, it is assigned the default - * YAML _tag for strings. - * - * Params: tag = Tag to resolve to. - * regexp = Regular expression the scalar must match to have this _tag. - * first = String of possible starting characters of the scalar. - * - */ - void addImplicitResolver(string tag, const Regex!char regexp, string first) - pure @safe - { - foreach(const dchar c; first) - { - if((c in yamlImplicitResolvers_) is null) - { - yamlImplicitResolvers_[c] = []; - } - yamlImplicitResolvers_[c] ~= tuple(tag, regexp); - } - } - /// Resolve scalars starting with 'A' to !_tag - @safe unittest - { - import std.file : write; - import std.regex : regex; - import dyaml.loader : Loader; - import dyaml.resolver : Resolver; - - write("example.yaml", "A"); - - auto loader = Loader.fromFile("example.yaml"); - loader.resolver.addImplicitResolver("!tag", regex("A.*"), "A"); - - auto node = loader.load(); - assert(node.tag == "!tag"); - } - - package: - /** - * Resolve tag of a node. - * - * Params: kind = Type of the node. - * tag = Explicit tag of the node, if any. - * value = Value of the node, if any. - * implicit = Should the node be implicitly resolved? - * - * If the tag is already specified and not non-specific, that tag will - * be returned. - * - * Returns: Resolved tag. - */ - string resolve(const NodeID kind, const string tag, scope string value, - const bool implicit) @safe - { - import std.array : empty, front; - if((tag !is null) && (tag != "!")) - { - return tag; - } - - final switch (kind) - { - case NodeID.scalar: - if(!implicit) - { - return defaultScalarTag_; - } - - //Get the first char of the value. - const dchar first = value.empty ? '\0' : value.front; - - auto resolvers = (first in yamlImplicitResolvers_) is null ? - [] : yamlImplicitResolvers_[first]; - - //If regexp matches, return tag. - foreach(resolver; resolvers) - { - // source/dyaml/resolver.d(192,35): Error: scope variable `__tmpfordtorXXX` - // assigned to non-scope parameter `this` calling - // `std.regex.RegexMatch!string.RegexMatch.~this` - bool isEmpty = () @trusted { - return match(value, resolver[1]).empty; - }(); - if(!isEmpty) - { - return resolver[0]; - } - } - return defaultScalarTag_; - case NodeID.sequence: - return defaultSequenceTag_; - case NodeID.mapping: - return defaultMappingTag_; - case NodeID.invalid: - assert(false, "Cannot resolve an invalid node"); - } - } - @safe unittest - { - auto resolver = Resolver.withDefaultResolvers; - - bool tagMatch(string tag, string[] values) @safe - { - const string expected = tag; - foreach(value; values) - { - const string resolved = resolver.resolve(NodeID.scalar, null, value, true); - if(expected != resolved) - { - return false; - } - } - return true; - } - - assert(tagMatch("tag:yaml.org,2002:bool", - ["yes", "NO", "True", "on"])); - assert(tagMatch("tag:yaml.org,2002:float", - ["6.8523015e+5", "685.230_15e+03", "685_230.15", - "190:20:30.15", "-.inf", ".NaN"])); - assert(tagMatch("tag:yaml.org,2002:int", - ["685230", "+685_230", "02472256", "0x_0A_74_AE", - "0b1010_0111_0100_1010_1110", "190:20:30"])); - assert(tagMatch("tag:yaml.org,2002:merge", ["<<"])); - assert(tagMatch("tag:yaml.org,2002:null", ["~", "null", ""])); - assert(tagMatch("tag:yaml.org,2002:str", - ["abcd", "9a8b", "9.1adsf"])); - assert(tagMatch("tag:yaml.org,2002:timestamp", - ["2001-12-15T02:59:43.1Z", - "2001-12-14t21:59:43.10-05:00", - "2001-12-14 21:59:43.10 -5", - "2001-12-15 2:59:43.10", - "2002-12-14"])); - assert(tagMatch("tag:yaml.org,2002:value", ["="])); - assert(tagMatch("tag:yaml.org,2002:yaml", ["!", "&", "*"])); - } - - ///Returns: Default scalar tag. - @property string defaultScalarTag() const pure @safe nothrow {return defaultScalarTag_;} - - ///Returns: Default sequence tag. - @property string defaultSequenceTag() const pure @safe nothrow {return defaultSequenceTag_;} - - ///Returns: Default mapping tag. - @property string defaultMappingTag() const pure @safe nothrow {return defaultMappingTag_;} -} diff --git a/source/dyaml/scanner.d b/source/dyaml/scanner.d deleted file mode 100644 index 17893d1..0000000 --- a/source/dyaml/scanner.d +++ /dev/null @@ -1,1809 +0,0 @@ - -// Copyright Ferdinand Majerech 2011-2014. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -/// YAML scanner. -/// Code based on PyYAML: http://www.pyyaml.org -module dyaml.scanner; - - -import core.stdc.string; - -import std.algorithm; -import std.array; -import std.conv; -import std.ascii : isAlphaNum, isDigit, isHexDigit; -import std.exception; -import std.string; -import std.typecons; -import std.traits : Unqual; -import std.utf; - -import dyaml.escapes; -import dyaml.exception; -import dyaml.queue; -import dyaml.reader; -import dyaml.style; -import dyaml.token; - -package: -/// Scanner produces tokens of the following types: -/// STREAM-START -/// STREAM-END -/// DIRECTIVE(name, value) -/// DOCUMENT-START -/// DOCUMENT-END -/// BLOCK-SEQUENCE-START -/// BLOCK-MAPPING-START -/// BLOCK-END -/// FLOW-SEQUENCE-START -/// FLOW-MAPPING-START -/// FLOW-SEQUENCE-END -/// FLOW-MAPPING-END -/// BLOCK-ENTRY -/// FLOW-ENTRY -/// KEY -/// VALUE -/// ALIAS(value) -/// ANCHOR(value) -/// TAG(value) -/// SCALAR(value, plain, style) - -alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); - -alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); - -alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); - -alias isNonLinebreakWhitespace = among!(' ', '\t'); - -alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}', - '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n', - '\r', '\u0085', '\u2028', '\u2029'); - -alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', - '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%'); - -alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029'); - -alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029'); - -alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\'); - -alias isNSAnchorName = c => !c.isWhiteSpace && !c.among!('[', ']', '{', '}', ',', '\uFEFF'); - -/// Marked exception thrown at scanner errors. -/// -/// See_Also: MarkedYAMLException -class ScannerException : MarkedYAMLException -{ - mixin MarkedExceptionCtors; -} - -/// Generates tokens from data provided by a Reader. -struct Scanner -{ - private: - /// A simple key is a key that is not denoted by the '?' indicator. - /// For example: - /// --- - /// block simple key: value - /// ? not a simple key: - /// : { flow simple key: value } - /// We emit the KEY token before all keys, so when we find a potential simple - /// key, we try to locate the corresponding ':' indicator. Simple keys should be - /// limited to a single line and 1024 characters. - /// - /// 16 bytes on 64-bit. - static struct SimpleKey - { - /// Character index in reader where the key starts. - uint charIndex = uint.max; - /// Index of the key token from start (first token scanned being 0). - uint tokenIndex; - /// Line the key starts at. - uint line; - /// Column the key starts at. - ushort column; - /// Is this required to be a simple key? - bool required; - /// Is this struct "null" (invalid)?. - bool isNull; - } - - /// Block chomping types. - enum Chomping - { - /// Strip all trailing line breaks. '-' indicator. - strip, - /// Line break of the last line is preserved, others discarded. Default. - clip, - /// All trailing line breaks are preserved. '+' indicator. - keep - } - - /// Reader used to read from a file/stream. - Reader reader_; - /// Are we done scanning? - bool done_; - - /// Level of nesting in flow context. If 0, we're in block context. - uint flowLevel_; - /// Current indentation level. - int indent_ = -1; - /// Past indentation levels. Used as a stack. - Appender!(int[]) indents_; - - /// Processed tokens not yet emitted. Used as a queue. - Queue!Token tokens_; - - /// Number of tokens emitted through the getToken method. - uint tokensTaken_; - - /// Can a simple key start at the current position? A simple key may start: - /// - at the beginning of the line, not counting indentation spaces - /// (in block context), - /// - after '{', '[', ',' (in the flow context), - /// - after '?', ':', '-' (in the block context). - /// In the block context, this flag also signifies if a block collection - /// may start at the current position. - bool allowSimpleKey_ = true; - - /// Possible simple keys indexed by flow levels. - SimpleKey[] possibleSimpleKeys_; - - public: - /// Construct a Scanner using specified Reader. - this(Reader reader) @safe nothrow - { - // Return the next token, but do not delete it from the queue - reader_ = reader; - fetchStreamStart(); - } - - /// Advance to the next token - void popFront() @safe - { - ++tokensTaken_; - tokens_.pop(); - } - - /// Return the current token - const(Token) front() @safe - { - enforce(!empty, "No token left to peek"); - return tokens_.peek(); - } - - /// Return whether there are any more tokens left. - bool empty() @safe - { - while (needMoreTokens()) - { - fetchToken(); - } - return tokens_.empty; - } - - /// Set file name. - void name(string name) @safe pure nothrow @nogc - { - reader_.name = name; - } - - private: - /// Most scanning error messages have the same format; so build them with this - /// function. - string expected(T)(string expected, T found) - { - return text("expected ", expected, ", but found ", found); - } - - /// Determine whether or not we need to fetch more tokens before peeking/getting a token. - bool needMoreTokens() @safe pure - { - if(done_) { return false; } - if(tokens_.empty) { return true; } - - /// The current token may be a potential simple key, so we need to look further. - stalePossibleSimpleKeys(); - return nextPossibleSimpleKey() == tokensTaken_; - } - - /// Fetch at token, adding it to tokens_. - void fetchToken() @safe - { - // Eat whitespaces and comments until we reach the next token. - scanToNextToken(); - - // Remove obsolete possible simple keys. - stalePossibleSimpleKeys(); - - // Compare current indentation and column. It may add some tokens - // and decrease the current indentation level. - unwindIndent(reader_.column); - - // Get the next character. - const dchar c = reader_.peekByte(); - - // Fetch the token. - if(c == '\0') { return fetchStreamEnd(); } - if(checkDirective()) { return fetchDirective(); } - if(checkDocumentStart()) { return fetchDocumentStart(); } - if(checkDocumentEnd()) { return fetchDocumentEnd(); } - // Order of the following checks is NOT significant. - switch(c) - { - case '[': return fetchFlowSequenceStart(); - case '{': return fetchFlowMappingStart(); - case ']': return fetchFlowSequenceEnd(); - case '}': return fetchFlowMappingEnd(); - case ',': return fetchFlowEntry(); - case '!': return fetchTag(); - case '\'': return fetchSingle(); - case '\"': return fetchDouble(); - case '*': return fetchAlias(); - case '&': return fetchAnchor(); - case '?': if(checkKey()) { return fetchKey(); } goto default; - case ':': if(checkValue()) { return fetchValue(); } goto default; - case '-': if(checkBlockEntry()) { return fetchBlockEntry(); } goto default; - case '|': if(flowLevel_ == 0) { return fetchLiteral(); } break; - case '>': if(flowLevel_ == 0) { return fetchFolded(); } break; - default: if(checkPlain()) { return fetchPlain(); } - } - - throw new ScannerException("While scanning for the next token, found character " ~ - "\'%s\', index %s that cannot start any token" - .format(c, to!int(c)), reader_.mark); - } - - - /// Return the token number of the nearest possible simple key. - uint nextPossibleSimpleKey() @safe pure nothrow @nogc - { - uint minTokenNumber = uint.max; - foreach(k, ref simpleKey; possibleSimpleKeys_) - { - if(simpleKey.isNull) { continue; } - minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex); - } - return minTokenNumber; - } - - /// Remove entries that are no longer possible simple keys. - /// - /// According to the YAML specification, simple keys - /// - should be limited to a single line, - /// - should be no longer than 1024 characters. - /// Disabling this will allow simple keys of any length and - /// height (may cause problems if indentation is broken though). - void stalePossibleSimpleKeys() @safe pure - { - foreach(level, ref key; possibleSimpleKeys_) - { - if(key.isNull) { continue; } - if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024) - { - enforce(!key.required, - new ScannerException("While scanning a simple key", - Mark(reader_.name, key.line, key.column), - "could not find expected ':'", reader_.mark)); - key.isNull = true; - } - } - } - - /// Check if the next token starts a possible simple key and if so, save its position. - /// - /// This function is called for ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. - void savePossibleSimpleKey() @safe pure - { - // Check if a simple key is required at the current position. - const required = (flowLevel_ == 0 && indent_ == reader_.column); - assert(allowSimpleKey_ || !required, "A simple key is required only if it is " ~ - "the first token in the current line. Therefore it is always allowed."); - - if(!allowSimpleKey_) { return; } - - // The next token might be a simple key, so save its number and position. - removePossibleSimpleKey(); - const tokenCount = tokensTaken_ + cast(uint)tokens_.length; - - const line = reader_.line; - const column = reader_.column; - const key = SimpleKey(cast(uint)reader_.charIndex, tokenCount, line, - cast(ushort)min(column, ushort.max), required); - - if(possibleSimpleKeys_.length <= flowLevel_) - { - const oldLength = possibleSimpleKeys_.length; - possibleSimpleKeys_.length = flowLevel_ + 1; - //No need to initialize the last element, it's already done in the next line. - possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init; - } - possibleSimpleKeys_[flowLevel_] = key; - } - - /// Remove the saved possible key position at the current flow level. - void removePossibleSimpleKey() @safe pure - { - if(possibleSimpleKeys_.length <= flowLevel_) { return; } - - if(!possibleSimpleKeys_[flowLevel_].isNull) - { - const key = possibleSimpleKeys_[flowLevel_]; - enforce(!key.required, - new ScannerException("While scanning a simple key", - Mark(reader_.name, key.line, key.column), - "could not find expected ':'", reader_.mark)); - possibleSimpleKeys_[flowLevel_].isNull = true; - } - } - - /// Decrease indentation, removing entries in indents_. - /// - /// Params: column = Current column in the file/stream. - void unwindIndent(const int column) @safe - { - if(flowLevel_ > 0) - { - // In flow context, tokens should respect indentation. - // The condition should be `indent >= column` according to the spec. - // But this condition will prohibit intuitively correct - // constructions such as - // key : { - // } - - // In the flow context, indentation is ignored. We make the scanner less - // restrictive than what the specification requires. - // if(pedantic_ && flowLevel_ > 0 && indent_ > column) - // { - // throw new ScannerException("Invalid intendation or unclosed '[' or '{'", - // reader_.mark) - // } - return; - } - - // In block context, we may need to issue the BLOCK-END tokens. - while(indent_ > column) - { - indent_ = indents_.data.back; - assert(indents_.data.length); - indents_.shrinkTo(indents_.data.length - 1); - tokens_.push(blockEndToken(reader_.mark, reader_.mark)); - } - } - - /// Increase indentation if needed. - /// - /// Params: column = Current column in the file/stream. - /// - /// Returns: true if the indentation was increased, false otherwise. - bool addIndent(int column) @safe - { - if(indent_ >= column){return false;} - indents_ ~= indent_; - indent_ = column; - return true; - } - - - /// Add STREAM-START token. - void fetchStreamStart() @safe nothrow - { - tokens_.push(streamStartToken(reader_.mark, reader_.mark, reader_.encoding)); - } - - ///Add STREAM-END token. - void fetchStreamEnd() @safe - { - //Set intendation to -1 . - unwindIndent(-1); - removePossibleSimpleKey(); - allowSimpleKey_ = false; - possibleSimpleKeys_.destroy; - - tokens_.push(streamEndToken(reader_.mark, reader_.mark)); - done_ = true; - } - - /// Add DIRECTIVE token. - void fetchDirective() @safe - { - // Set intendation to -1 . - unwindIndent(-1); - // Reset simple keys. - removePossibleSimpleKey(); - allowSimpleKey_ = false; - - auto directive = scanDirective(); - tokens_.push(directive); - } - - /// Add DOCUMENT-START or DOCUMENT-END token. - void fetchDocumentIndicator(TokenID id)() - if(id == TokenID.documentStart || id == TokenID.documentEnd) - { - // Set indentation to -1 . - unwindIndent(-1); - // Reset simple keys. Note that there can't be a block collection after '---'. - removePossibleSimpleKey(); - allowSimpleKey_ = false; - - Mark startMark = reader_.mark; - reader_.forward(3); - tokens_.push(simpleToken!id(startMark, reader_.mark)); - } - - /// Aliases to add DOCUMENT-START or DOCUMENT-END token. - alias fetchDocumentStart = fetchDocumentIndicator!(TokenID.documentStart); - alias fetchDocumentEnd = fetchDocumentIndicator!(TokenID.documentEnd); - - /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token. - void fetchFlowCollectionStart(TokenID id)() @safe - { - // '[' and '{' may start a simple key. - savePossibleSimpleKey(); - // Simple keys are allowed after '[' and '{'. - allowSimpleKey_ = true; - ++flowLevel_; - - Mark startMark = reader_.mark; - reader_.forward(); - tokens_.push(simpleToken!id(startMark, reader_.mark)); - } - - /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token. - alias fetchFlowSequenceStart = fetchFlowCollectionStart!(TokenID.flowSequenceStart); - alias fetchFlowMappingStart = fetchFlowCollectionStart!(TokenID.flowMappingStart); - - /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token. - void fetchFlowCollectionEnd(TokenID id)() - { - // Reset possible simple key on the current level. - removePossibleSimpleKey(); - // No simple keys after ']' and '}'. - allowSimpleKey_ = false; - --flowLevel_; - - Mark startMark = reader_.mark; - reader_.forward(); - tokens_.push(simpleToken!id(startMark, reader_.mark)); - } - - /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token/ - alias fetchFlowSequenceEnd = fetchFlowCollectionEnd!(TokenID.flowSequenceEnd); - alias fetchFlowMappingEnd = fetchFlowCollectionEnd!(TokenID.flowMappingEnd); - - /// Add FLOW-ENTRY token; - void fetchFlowEntry() @safe - { - // Reset possible simple key on the current level. - removePossibleSimpleKey(); - // Simple keys are allowed after ','. - allowSimpleKey_ = true; - - Mark startMark = reader_.mark; - reader_.forward(); - tokens_.push(flowEntryToken(startMark, reader_.mark)); - } - - /// Additional checks used in block context in fetchBlockEntry and fetchKey. - /// - /// Params: type = String representing the token type we might need to add. - /// id = Token type we might need to add. - void blockChecks(string type, TokenID id)() - { - enum context = type ~ " keys are not allowed here"; - // Are we allowed to start a key (not neccesarily a simple one)? - enforce(allowSimpleKey_, new ScannerException(context, reader_.mark)); - - if(addIndent(reader_.column)) - { - tokens_.push(simpleToken!id(reader_.mark, reader_.mark)); - } - } - - /// Add BLOCK-ENTRY token. Might add BLOCK-SEQUENCE-START in the process. - void fetchBlockEntry() @safe - { - if(flowLevel_ == 0) { blockChecks!("Sequence", TokenID.blockSequenceStart)(); } - - // It's an error for the block entry to occur in the flow context, - // but we let the parser detect this. - - // Reset possible simple key on the current level. - removePossibleSimpleKey(); - // Simple keys are allowed after '-'. - allowSimpleKey_ = true; - - Mark startMark = reader_.mark; - reader_.forward(); - tokens_.push(blockEntryToken(startMark, reader_.mark)); - } - - /// Add KEY token. Might add BLOCK-MAPPING-START in the process. - void fetchKey() @safe - { - if(flowLevel_ == 0) { blockChecks!("Mapping", TokenID.blockMappingStart)(); } - - // Reset possible simple key on the current level. - removePossibleSimpleKey(); - // Simple keys are allowed after '?' in the block context. - allowSimpleKey_ = (flowLevel_ == 0); - - Mark startMark = reader_.mark; - reader_.forward(); - tokens_.push(keyToken(startMark, reader_.mark)); - } - - /// Add VALUE token. Might add KEY and/or BLOCK-MAPPING-START in the process. - void fetchValue() @safe - { - //Do we determine a simple key? - if(possibleSimpleKeys_.length > flowLevel_ && - !possibleSimpleKeys_[flowLevel_].isNull) - { - const key = possibleSimpleKeys_[flowLevel_]; - possibleSimpleKeys_[flowLevel_].isNull = true; - Mark keyMark = Mark(reader_.name, key.line, key.column); - const idx = key.tokenIndex - tokensTaken_; - - assert(idx >= 0); - - // Add KEY. - // Manually inserting since tokens are immutable (need linked list). - tokens_.insert(keyToken(keyMark, keyMark), idx); - - // If this key starts a new block mapping, we need to add BLOCK-MAPPING-START. - if(flowLevel_ == 0 && addIndent(key.column)) - { - tokens_.insert(blockMappingStartToken(keyMark, keyMark), idx); - } - - // There cannot be two simple keys in a row. - allowSimpleKey_ = false; - } - // Part of a complex key - else - { - // We can start a complex value if and only if we can start a simple key. - enforce(flowLevel_ > 0 || allowSimpleKey_, - new ScannerException("Mapping values are not allowed here", reader_.mark)); - - // If this value starts a new block mapping, we need to add - // BLOCK-MAPPING-START. It'll be detected as an error later by the parser. - if(flowLevel_ == 0 && addIndent(reader_.column)) - { - tokens_.push(blockMappingStartToken(reader_.mark, reader_.mark)); - } - - // Reset possible simple key on the current level. - removePossibleSimpleKey(); - // Simple keys are allowed after ':' in the block context. - allowSimpleKey_ = (flowLevel_ == 0); - } - - // Add VALUE. - Mark startMark = reader_.mark; - reader_.forward(); - tokens_.push(valueToken(startMark, reader_.mark)); - } - - /// Add ALIAS or ANCHOR token. - void fetchAnchor_(TokenID id)() @safe - if(id == TokenID.alias_ || id == TokenID.anchor) - { - // ALIAS/ANCHOR could be a simple key. - savePossibleSimpleKey(); - // No simple keys after ALIAS/ANCHOR. - allowSimpleKey_ = false; - - auto anchor = scanAnchor(id); - tokens_.push(anchor); - } - - /// Aliases to add ALIAS or ANCHOR token. - alias fetchAlias = fetchAnchor_!(TokenID.alias_); - alias fetchAnchor = fetchAnchor_!(TokenID.anchor); - - /// Add TAG token. - void fetchTag() @safe - { - //TAG could start a simple key. - savePossibleSimpleKey(); - //No simple keys after TAG. - allowSimpleKey_ = false; - - tokens_.push(scanTag()); - } - - /// Add block SCALAR token. - void fetchBlockScalar(ScalarStyle style)() @safe - if(style == ScalarStyle.literal || style == ScalarStyle.folded) - { - // Reset possible simple key on the current level. - removePossibleSimpleKey(); - // A simple key may follow a block scalar. - allowSimpleKey_ = true; - - auto blockScalar = scanBlockScalar(style); - tokens_.push(blockScalar); - } - - /// Aliases to add literal or folded block scalar. - alias fetchLiteral = fetchBlockScalar!(ScalarStyle.literal); - alias fetchFolded = fetchBlockScalar!(ScalarStyle.folded); - - /// Add quoted flow SCALAR token. - void fetchFlowScalar(ScalarStyle quotes)() - { - // A flow scalar could be a simple key. - savePossibleSimpleKey(); - // No simple keys after flow scalars. - allowSimpleKey_ = false; - - // Scan and add SCALAR. - auto scalar = scanFlowScalar(quotes); - tokens_.push(scalar); - } - - /// Aliases to add single or double quoted block scalar. - alias fetchSingle = fetchFlowScalar!(ScalarStyle.singleQuoted); - alias fetchDouble = fetchFlowScalar!(ScalarStyle.doubleQuoted); - - /// Add plain SCALAR token. - void fetchPlain() @safe - { - // A plain scalar could be a simple key - savePossibleSimpleKey(); - // No simple keys after plain scalars. But note that scanPlain() will - // change this flag if the scan is finished at the beginning of the line. - allowSimpleKey_ = false; - auto plain = scanPlain(); - - // Scan and add SCALAR. May change allowSimpleKey_ - tokens_.push(plain); - } - - pure: - - ///Check if the next token is DIRECTIVE: ^ '%' ... - bool checkDirective() @safe - { - return reader_.peekByte() == '%' && reader_.column == 0; - } - - /// Check if the next token is DOCUMENT-START: ^ '---' (' '|'\n') - bool checkDocumentStart() @safe - { - // Check one char first, then all 3, to prevent reading outside the buffer. - return reader_.column == 0 && - reader_.peekByte() == '-' && - reader_.prefix(3) == "---" && - reader_.peek(3).isWhiteSpace; - } - - /// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n') - bool checkDocumentEnd() @safe - { - // Check one char first, then all 3, to prevent reading outside the buffer. - return reader_.column == 0 && - reader_.peekByte() == '.' && - reader_.prefix(3) == "..." && - reader_.peek(3).isWhiteSpace; - } - - /// Check if the next token is BLOCK-ENTRY: '-' (' '|'\n') - bool checkBlockEntry() @safe - { - return !!reader_.peek(1).isWhiteSpace; - } - - /// Check if the next token is KEY(flow context): '?' - /// - /// or KEY(block context): '?' (' '|'\n') - bool checkKey() @safe - { - return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace); - } - - /// Check if the next token is VALUE(flow context): ':' - /// - /// or VALUE(block context): ':' (' '|'\n') - bool checkValue() @safe - { - return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace; - } - - /// Check if the next token is a plain scalar. - /// - /// A plain scalar may start with any non-space character except: - /// '-', '?', ':', ',', '[', ']', '{', '}', - /// '#', '&', '*', '!', '|', '>', '\'', '\"', - /// '%', '@', '`'. - /// - /// It may also start with - /// '-', '?', ':' - /// if it is followed by a non-space character. - /// - /// Note that we limit the last rule to the block context (except the - /// '-' character) because we want the flow context to be space - /// independent. - bool checkPlain() @safe - { - const c = reader_.peek(); - if(!c.isNonScalarStartCharacter) - { - return true; - } - return !reader_.peek(1).isWhiteSpace && - (c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':'))); - } - - /// Move to the next non-space character. - void findNextNonSpace() @safe - { - while(reader_.peekByte() == ' ') { reader_.forward(); } - } - - /// Scan a string of alphanumeric or "-_" characters. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// characters into that slice. - void scanAlphaNumericToSlice(string name)(const Mark startMark) - { - size_t length; - dchar c = reader_.peek(); - while(c.isAlphaNum || c.among!('-', '_')) { c = reader_.peek(++length); } - - enforce(length > 0, new ScannerException("While scanning " ~ name, - startMark, expected("alphanumeric, '-' or '_'", c), reader_.mark)); - - reader_.sliceBuilder.write(reader_.get(length)); - } - - /// Scan a string. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// characters into that slice. - void scanAnchorAliasToSlice(const Mark startMark) @safe - { - size_t length; - dchar c = reader_.peek(); - while (c.isNSAnchorName) - { - c = reader_.peek(++length); - } - - enforce(length > 0, new ScannerException("While scanning an anchor or alias", - startMark, expected("a printable character besides '[', ']', '{', '}' and ','", c), reader_.mark)); - - reader_.sliceBuilder.write(reader_.get(length)); - } - - /// Scan and throw away all characters until next line break. - void scanToNextBreak() @safe - { - while(!reader_.peek().isBreak) { reader_.forward(); } - } - - /// Scan all characters until next line break. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// characters into that slice. - void scanToNextBreakToSlice() @safe - { - uint length; - while(!reader_.peek(length).isBreak) - { - ++length; - } - reader_.sliceBuilder.write(reader_.get(length)); - } - - - /// Move to next token in the file/stream. - /// - /// We ignore spaces, line breaks and comments. - /// If we find a line break in the block context, we set - /// allowSimpleKey` on. - /// - /// We do not yet support BOM inside the stream as the - /// specification requires. Any such mark will be considered as a part - /// of the document. - void scanToNextToken() @safe - { - // TODO(PyYAML): We need to make tab handling rules more sane. A good rule is: - // Tabs cannot precede tokens - // BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, - // KEY(block), VALUE(block), BLOCK-ENTRY - // So the checking code is - // if : - // allowSimpleKey_ = false - // We also need to add the check for `allowSimpleKey_ == true` to - // `unwindIndent` before issuing BLOCK-END. - // Scanners for block, flow, and plain scalars need to be modified. - - for(;;) - { - //All whitespace in flow context is ignored, even whitespace - // not allowed in other contexts - if (flowLevel_ > 0) - { - while(reader_.peekByte().isNonLinebreakWhitespace) { reader_.forward(); } - } - else - { - findNextNonSpace(); - } - if(reader_.peekByte() == '#') { scanToNextBreak(); } - if(scanLineBreak() != '\0') - { - if(flowLevel_ == 0) { allowSimpleKey_ = true; } - } - else - { - break; - } - } - } - - /// Scan directive token. - Token scanDirective() @safe - { - Mark startMark = reader_.mark; - // Skip the '%'. - reader_.forward(); - - // Scan directive name - reader_.sliceBuilder.begin(); - scanDirectiveNameToSlice(startMark); - const name = reader_.sliceBuilder.finish(); - - reader_.sliceBuilder.begin(); - - // Index where tag handle ends and suffix starts in a tag directive value. - uint tagHandleEnd = uint.max; - if(name == "YAML") { scanYAMLDirectiveValueToSlice(startMark); } - else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(startMark); } - char[] value = reader_.sliceBuilder.finish(); - - Mark endMark = reader_.mark; - - DirectiveType directive; - if(name == "YAML") { directive = DirectiveType.yaml; } - else if(name == "TAG") { directive = DirectiveType.tag; } - else - { - directive = DirectiveType.reserved; - scanToNextBreak(); - } - - scanDirectiveIgnoredLine(startMark); - - return directiveToken(startMark, endMark, value, directive, tagHandleEnd); - } - - /// Scan name of a directive token. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// characters into that slice. - void scanDirectiveNameToSlice(const Mark startMark) @safe - { - // Scan directive name. - scanAlphaNumericToSlice!"a directive"(startMark); - - enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), - new ScannerException("While scanning a directive", startMark, - expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark)); - } - - /// Scan value of a YAML directive token. Returns major, minor version separated by '.'. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// characters into that slice. - void scanYAMLDirectiveValueToSlice(const Mark startMark) @safe - { - findNextNonSpace(); - - scanYAMLDirectiveNumberToSlice(startMark); - - enforce(reader_.peekByte() == '.', - new ScannerException("While scanning a directive", startMark, - expected("digit or '.'", reader_.peek()), reader_.mark)); - // Skip the '.'. - reader_.forward(); - - reader_.sliceBuilder.write('.'); - scanYAMLDirectiveNumberToSlice(startMark); - - enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), - new ScannerException("While scanning a directive", startMark, - expected("digit or '.'", reader_.peek()), reader_.mark)); - } - - /// Scan a number from a YAML directive. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// characters into that slice. - void scanYAMLDirectiveNumberToSlice(const Mark startMark) @safe - { - enforce(isDigit(reader_.peek()), - new ScannerException("While scanning a directive", startMark, - expected("digit", reader_.peek()), reader_.mark)); - - // Already found the first digit in the enforce(), so set length to 1. - uint length = 1; - while(reader_.peek(length).isDigit) { ++length; } - - reader_.sliceBuilder.write(reader_.get(length)); - } - - /// Scan value of a tag directive. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// characters into that slice. - /// - /// Returns: Length of tag handle (which is before tag prefix) in scanned data - uint scanTagDirectiveValueToSlice(const Mark startMark) @safe - { - findNextNonSpace(); - const startLength = reader_.sliceBuilder.length; - scanTagDirectiveHandleToSlice(startMark); - const handleLength = cast(uint)(reader_.sliceBuilder.length - startLength); - findNextNonSpace(); - scanTagDirectivePrefixToSlice(startMark); - - return handleLength; - } - - /// Scan handle of a tag directive. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// characters into that slice. - void scanTagDirectiveHandleToSlice(const Mark startMark) @safe - { - scanTagHandleToSlice!"directive"(startMark); - enforce(reader_.peekByte() == ' ', - new ScannerException("While scanning a directive handle", startMark, - expected("' '", reader_.peek()), reader_.mark)); - } - - /// Scan prefix of a tag directive. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// characters into that slice. - void scanTagDirectivePrefixToSlice(const Mark startMark) @safe - { - scanTagURIToSlice!"directive"(startMark); - enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), - new ScannerException("While scanning a directive prefix", startMark, - expected("' '", reader_.peek()), reader_.mark)); - } - - /// Scan (and ignore) ignored line after a directive. - void scanDirectiveIgnoredLine(const Mark startMark) @safe - { - findNextNonSpace(); - if(reader_.peekByte() == '#') { scanToNextBreak(); } - enforce(reader_.peek().isBreak, - new ScannerException("While scanning a directive", startMark, - expected("comment or a line break", reader_.peek()), reader_.mark)); - scanLineBreak(); - } - - - /// Scan an alias or an anchor. - /// - /// The specification does not restrict characters for anchors and - /// aliases. This may lead to problems, for instance, the document: - /// [ *alias, value ] - /// can be interpteted in two ways, as - /// [ "value" ] - /// and - /// [ *alias , "value" ] - /// Therefore we restrict aliases to ASCII alphanumeric characters. - Token scanAnchor(const TokenID id) @safe - { - const startMark = reader_.mark; - reader_.forward(); // The */& character was only peeked, so we drop it now - - reader_.sliceBuilder.begin(); - scanAnchorAliasToSlice(startMark); - // On error, value is discarded as we return immediately - char[] value = reader_.sliceBuilder.finish(); - - assert(!reader_.peek().isNSAnchorName, "Anchor/alias name not fully scanned"); - - if(id == TokenID.alias_) - { - return aliasToken(startMark, reader_.mark, value); - } - if(id == TokenID.anchor) - { - return anchorToken(startMark, reader_.mark, value); - } - assert(false, "This code should never be reached"); - } - - /// Scan a tag token. - Token scanTag() @safe - { - const startMark = reader_.mark; - dchar c = reader_.peek(1); - - reader_.sliceBuilder.begin(); - scope(failure) { reader_.sliceBuilder.finish(); } - // Index where tag handle ends and tag suffix starts in the tag value - // (slice) we will produce. - uint handleEnd; - - if(c == '<') - { - reader_.forward(2); - - handleEnd = 0; - scanTagURIToSlice!"tag"(startMark); - enforce(reader_.peekByte() == '>', - new ScannerException("While scanning a tag", startMark, - expected("'>'", reader_.peek()), reader_.mark)); - reader_.forward(); - } - else if(c.isWhiteSpace) - { - reader_.forward(); - handleEnd = 0; - reader_.sliceBuilder.write('!'); - } - else - { - uint length = 1; - bool useHandle; - - while(!c.isBreakOrSpace) - { - if(c == '!') - { - useHandle = true; - break; - } - ++length; - c = reader_.peek(length); - } - - if(useHandle) - { - scanTagHandleToSlice!"tag"(startMark); - handleEnd = cast(uint)reader_.sliceBuilder.length; - } - else - { - reader_.forward(); - reader_.sliceBuilder.write('!'); - handleEnd = cast(uint)reader_.sliceBuilder.length; - } - - scanTagURIToSlice!"tag"(startMark); - } - - enforce(reader_.peek().isBreakOrSpace, - new ScannerException("While scanning a tag", startMark, expected("' '", reader_.peek()), - reader_.mark)); - - char[] slice = reader_.sliceBuilder.finish(); - return tagToken(startMark, reader_.mark, slice, handleEnd); - } - - /// Scan a block scalar token with specified style. - Token scanBlockScalar(const ScalarStyle style) @safe - { - const startMark = reader_.mark; - - // Scan the header. - reader_.forward(); - - const indicators = scanBlockScalarIndicators(startMark); - - const chomping = indicators[0]; - const increment = indicators[1]; - scanBlockScalarIgnoredLine(startMark); - - // Determine the indentation level and go to the first non-empty line. - Mark endMark; - uint indent = max(1, indent_ + 1); - - reader_.sliceBuilder.begin(); - alias Transaction = SliceBuilder.Transaction; - // Used to strip the last line breaks written to the slice at the end of the - // scalar, which may be needed based on chomping. - Transaction breaksTransaction = Transaction(&reader_.sliceBuilder); - // Read the first indentation/line breaks before the scalar. - size_t startLen = reader_.sliceBuilder.length; - if(increment == int.min) - { - auto indentation = scanBlockScalarIndentationToSlice(); - endMark = indentation[1]; - indent = max(indent, indentation[0]); - } - else - { - indent += increment - 1; - endMark = scanBlockScalarBreaksToSlice(indent); - } - - // int.max means there's no line break (int.max is outside UTF-32). - dchar lineBreak = cast(dchar)int.max; - - // Scan the inner part of the block scalar. - while(reader_.column == indent && reader_.peekByte() != '\0') - { - breaksTransaction.commit(); - const bool leadingNonSpace = !reader_.peekByte().among!(' ', '\t'); - // This is where the 'interesting' non-whitespace data gets read. - scanToNextBreakToSlice(); - lineBreak = scanLineBreak(); - - - // This transaction serves to rollback data read in the - // scanBlockScalarBreaksToSlice() call. - breaksTransaction = Transaction(&reader_.sliceBuilder); - startLen = reader_.sliceBuilder.length; - // The line breaks should actually be written _after_ the if() block - // below. We work around that by inserting - endMark = scanBlockScalarBreaksToSlice(indent); - - // This will not run during the last iteration (see the if() vs the - // while()), hence breaksTransaction rollback (which happens after this - // loop) will never roll back data written in this if() block. - if(reader_.column == indent && reader_.peekByte() != '\0') - { - // Unfortunately, folding rules are ambiguous. - - // This is the folding according to the specification: - if(style == ScalarStyle.folded && lineBreak == '\n' && - leadingNonSpace && !reader_.peekByte().among!(' ', '\t')) - { - // No breaks were scanned; no need to insert the space in the - // middle of slice. - if(startLen == reader_.sliceBuilder.length) - { - reader_.sliceBuilder.write(' '); - } - } - else - { - // We need to insert in the middle of the slice in case any line - // breaks were scanned. - reader_.sliceBuilder.insert(lineBreak, startLen); - } - - ////this is Clark Evans's interpretation (also in the spec - ////examples): - // - //if(style == ScalarStyle.folded && lineBreak == '\n') - //{ - // if(startLen == endLen) - // { - // if(!" \t"d.canFind(reader_.peekByte())) - // { - // reader_.sliceBuilder.write(' '); - // } - // else - // { - // chunks ~= lineBreak; - // } - // } - //} - //else - //{ - // reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen); - //} - } - else - { - break; - } - } - - // If chompint is Keep, we keep (commit) the last scanned line breaks - // (which are at the end of the scalar). Otherwise re remove them (end the - // transaction). - if(chomping == Chomping.keep) { breaksTransaction.commit(); } - else { breaksTransaction.end(); } - if(chomping != Chomping.strip && lineBreak != int.max) - { - // If chomping is Keep, we keep the line break but the first line break - // that isn't stripped (since chomping isn't Strip in this branch) must - // be inserted _before_ the other line breaks. - if(chomping == Chomping.keep) - { - reader_.sliceBuilder.insert(lineBreak, startLen); - } - // If chomping is not Keep, breaksTransaction was cancelled so we can - // directly write the first line break (as it isn't stripped - chomping - // is not Strip) - else - { - reader_.sliceBuilder.write(lineBreak); - } - } - - char[] slice = reader_.sliceBuilder.finish(); - return scalarToken(startMark, endMark, slice, style); - } - - /// Scan chomping and indentation indicators of a scalar token. - Tuple!(Chomping, int) scanBlockScalarIndicators(const Mark startMark) @safe - { - auto chomping = Chomping.clip; - int increment = int.min; - dchar c = reader_.peek(); - - /// Indicators can be in any order. - if(getChomping(c, chomping)) - { - getIncrement(c, increment, startMark); - } - else - { - const gotIncrement = getIncrement(c, increment, startMark); - if(gotIncrement) { getChomping(c, chomping); } - } - - enforce(c.among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), - new ScannerException("While scanning a block scalar", startMark, - expected("chomping or indentation indicator", c), reader_.mark)); - - return tuple(chomping, increment); - } - - /// Get chomping indicator, if detected. Return false otherwise. - /// - /// Used in scanBlockScalarIndicators. - /// - /// Params: - /// - /// c = The character that may be a chomping indicator. - /// chomping = Write the chomping value here, if detected. - bool getChomping(ref dchar c, ref Chomping chomping) @safe - { - if(!c.among!('+', '-')) { return false; } - chomping = c == '+' ? Chomping.keep : Chomping.strip; - reader_.forward(); - c = reader_.peek(); - return true; - } - - /// Get increment indicator, if detected. Return false otherwise. - /// - /// Used in scanBlockScalarIndicators. - /// - /// Params: - /// - /// c = The character that may be an increment indicator. - /// If an increment indicator is detected, this will be updated to - /// the next character in the Reader. - /// increment = Write the increment value here, if detected. - /// startMark = Mark for error messages. - bool getIncrement(ref dchar c, ref int increment, const Mark startMark) @safe - { - if(!c.isDigit) { return false; } - // Convert a digit to integer. - increment = c - '0'; - assert(increment < 10 && increment >= 0, "Digit has invalid value"); - - enforce(increment > 0, - new ScannerException("While scanning a block scalar", startMark, - expected("indentation indicator in range 1-9", "0"), reader_.mark)); - - reader_.forward(); - c = reader_.peek(); - return true; - } - - /// Scan (and ignore) ignored line in a block scalar. - void scanBlockScalarIgnoredLine(const Mark startMark) @safe - { - findNextNonSpace(); - if(reader_.peekByte()== '#') { scanToNextBreak(); } - - enforce(reader_.peek().isBreak, - new ScannerException("While scanning a block scalar", startMark, - expected("comment or line break", reader_.peek()), reader_.mark)); - - scanLineBreak(); - } - - /// Scan indentation in a block scalar, returning line breaks, max indent and end mark. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// characters into that slice. - Tuple!(uint, Mark) scanBlockScalarIndentationToSlice() @safe - { - uint maxIndent; - Mark endMark = reader_.mark; - - while(reader_.peek().among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029')) - { - if(reader_.peekByte() != ' ') - { - reader_.sliceBuilder.write(scanLineBreak()); - endMark = reader_.mark; - continue; - } - reader_.forward(); - maxIndent = max(reader_.column, maxIndent); - } - - return tuple(maxIndent, endMark); - } - - /// Scan line breaks at lower or specified indentation in a block scalar. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// characters into that slice. - Mark scanBlockScalarBreaksToSlice(const uint indent) @safe - { - Mark endMark = reader_.mark; - - for(;;) - { - while(reader_.column < indent && reader_.peekByte() == ' ') { reader_.forward(); } - if(!reader_.peek().among!('\n', '\r', '\u0085', '\u2028', '\u2029')) { break; } - reader_.sliceBuilder.write(scanLineBreak()); - endMark = reader_.mark; - } - - return endMark; - } - - /// Scan a qouted flow scalar token with specified quotes. - Token scanFlowScalar(const ScalarStyle quotes) @safe - { - const startMark = reader_.mark; - const quote = reader_.get(); - - reader_.sliceBuilder.begin(); - - scanFlowScalarNonSpacesToSlice(quotes, startMark); - - while(reader_.peek() != quote) - { - scanFlowScalarSpacesToSlice(startMark); - scanFlowScalarNonSpacesToSlice(quotes, startMark); - } - reader_.forward(); - - auto slice = reader_.sliceBuilder.finish(); - return scalarToken(startMark, reader_.mark, slice, quotes); - } - - /// Scan nonspace characters in a flow scalar. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// characters into that slice. - void scanFlowScalarNonSpacesToSlice(const ScalarStyle quotes, const Mark startMark) - @safe - { - for(;;) - { - dchar c = reader_.peek(); - - size_t numCodePoints; - while(!reader_.peek(numCodePoints).isFlowScalarBreakSpace) { ++numCodePoints; } - - if (numCodePoints > 0) { reader_.sliceBuilder.write(reader_.get(numCodePoints)); } - - c = reader_.peek(); - if(quotes == ScalarStyle.singleQuoted && c == '\'' && reader_.peek(1) == '\'') - { - reader_.forward(2); - reader_.sliceBuilder.write('\''); - } - else if((quotes == ScalarStyle.doubleQuoted && c == '\'') || - (quotes == ScalarStyle.singleQuoted && c.among!('"', '\\'))) - { - reader_.forward(); - reader_.sliceBuilder.write(c); - } - else if(quotes == ScalarStyle.doubleQuoted && c == '\\') - { - reader_.forward(); - c = reader_.peek(); - if(c.among!(escapes)) - { - reader_.forward(); - // Escaping has been moved to Parser as it can't be done in - // place (in a slice) in case of '\P' and '\L' (very uncommon, - // but we don't want to break the spec) - char[2] escapeSequence = ['\\', cast(char)c]; - reader_.sliceBuilder.write(escapeSequence); - } - else if(c.among!(escapeHexCodeList)) - { - const hexLength = dyaml.escapes.escapeHexLength(c); - reader_.forward(); - - foreach(i; 0 .. hexLength) { - enforce(reader_.peek(i).isHexDigit, - new ScannerException("While scanning a double quoted scalar", startMark, - expected("escape sequence of hexadecimal numbers", - reader_.peek(i)), reader_.mark)); - } - char[] hex = reader_.get(hexLength); - - enforce((hex.length > 0) && (hex.length <= 8), - new ScannerException("While scanning a double quoted scalar", startMark, - "overflow when parsing an escape sequence of " ~ - "hexadecimal numbers.", reader_.mark)); - - char[2] escapeStart = ['\\', cast(char) c]; - reader_.sliceBuilder.write(escapeStart); - reader_.sliceBuilder.write(hex); - - } - else if(c.among!('\n', '\r', '\u0085', '\u2028', '\u2029')) - { - scanLineBreak(); - scanFlowScalarBreaksToSlice(startMark); - } - else - { - throw new ScannerException("While scanning a double quoted scalar", startMark, - text("found unsupported escape character ", c), - reader_.mark); - } - } - else { return; } - } - } - - /// Scan space characters in a flow scalar. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// spaces into that slice. - void scanFlowScalarSpacesToSlice(const Mark startMark) @safe - { - // Increase length as long as we see whitespace. - size_t length; - while(reader_.peekByte(length).among!(' ', '\t')) { ++length; } - auto whitespaces = reader_.prefixBytes(length); - - // Can check the last byte without striding because '\0' is ASCII - const c = reader_.peek(length); - enforce(c != '\0', - new ScannerException("While scanning a quoted scalar", startMark, - "found unexpected end of buffer", reader_.mark)); - - // Spaces not followed by a line break. - if(!c.among!('\n', '\r', '\u0085', '\u2028', '\u2029')) - { - reader_.forward(length); - reader_.sliceBuilder.write(whitespaces); - return; - } - - // There's a line break after the spaces. - reader_.forward(length); - const lineBreak = scanLineBreak(); - - if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); } - - // If we have extra line breaks after the first, scan them into the - // slice. - const bool extraBreaks = scanFlowScalarBreaksToSlice(startMark); - - // No extra breaks, one normal line break. Replace it with a space. - if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); } - } - - /// Scan line breaks in a flow scalar. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// line breaks into that slice. - bool scanFlowScalarBreaksToSlice(const Mark startMark) @safe - { - // True if at least one line break was found. - bool anyBreaks; - for(;;) - { - // Instead of checking indentation, we check for document separators. - const prefix = reader_.prefix(3); - enforce(!(prefix == "---" || prefix == "...") || - !reader_.peek(3).isWhiteSpace, - new ScannerException("While scanning a quoted scalar", startMark, - "found unexpected document separator", reader_.mark)); - - // Skip any whitespaces. - while(reader_.peekByte().among!(' ', '\t')) { reader_.forward(); } - - // Encountered a non-whitespace non-linebreak character, so we're done. - if(!reader_.peek().among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029')) { break; } - - const lineBreak = scanLineBreak(); - anyBreaks = true; - reader_.sliceBuilder.write(lineBreak); - } - return anyBreaks; - } - - /// Scan plain scalar token (no block, no quotes). - Token scanPlain() @safe - { - // We keep track of the allowSimpleKey_ flag here. - // Indentation rules are loosed for the flow context - const startMark = reader_.mark; - Mark endMark = startMark; - const indent = indent_ + 1; - - // We allow zero indentation for scalars, but then we need to check for - // document separators at the beginning of the line. - // if(indent == 0) { indent = 1; } - - reader_.sliceBuilder.begin(); - - alias Transaction = SliceBuilder.Transaction; - Transaction spacesTransaction; - // Stop at a comment. - while(reader_.peekByte() != '#') - { - // Scan the entire plain scalar. - size_t length; - dchar c = reader_.peek(length); - for(;;) - { - const cNext = reader_.peek(length + 1); - if(c.isWhiteSpace || - (flowLevel_ == 0 && c == ':' && cNext.isWhiteSpace) || - (flowLevel_ > 0 && c.among!(',', ':', '?', '[', ']', '{', '}'))) - { - break; - } - ++length; - c = cNext; - } - - // It's not clear what we should do with ':' in the flow context. - enforce(flowLevel_ == 0 || c != ':' || - reader_.peek(length + 1).isWhiteSpace || - reader_.peek(length + 1).among!(',', '[', ']', '{', '}'), - new ScannerException("While scanning a plain scalar", startMark, - "found unexpected ':' . Please check " ~ - "http://pyyaml.org/wiki/YAMLColonInFlowContext for details.", - reader_.mark)); - - if(length == 0) { break; } - - allowSimpleKey_ = false; - - reader_.sliceBuilder.write(reader_.get(length)); - - endMark = reader_.mark; - - spacesTransaction.commit(); - spacesTransaction = Transaction(&reader_.sliceBuilder); - - const startLength = reader_.sliceBuilder.length; - scanPlainSpacesToSlice(); - if(startLength == reader_.sliceBuilder.length || - (flowLevel_ == 0 && reader_.column < indent)) - { - break; - } - } - - spacesTransaction.end(); - char[] slice = reader_.sliceBuilder.finish(); - - return scalarToken(startMark, endMark, slice, ScalarStyle.plain); - } - - /// Scan spaces in a plain scalar. - /// - /// Assumes that the caller is building a slice in Reader, and puts the spaces - /// into that slice. - void scanPlainSpacesToSlice() @safe - { - // The specification is really confusing about tabs in plain scalars. - // We just forbid them completely. Do not use tabs in YAML! - - // Get as many plain spaces as there are. - size_t length; - while(reader_.peekByte(length) == ' ') { ++length; } - char[] whitespaces = reader_.prefixBytes(length); - reader_.forward(length); - - const dchar c = reader_.peek(); - if(!c.isNSChar) - { - // We have spaces, but no newline. - if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); } - return; - } - - // Newline after the spaces (if any) - const lineBreak = scanLineBreak(); - allowSimpleKey_ = true; - - static bool end(Reader reader_) @safe pure - { - const prefix = reader_.prefix(3); - return ("---" == prefix || "..." == prefix) - && reader_.peek(3).among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); - } - - if(end(reader_)) { return; } - - bool extraBreaks; - - alias Transaction = SliceBuilder.Transaction; - auto transaction = Transaction(&reader_.sliceBuilder); - if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); } - while(reader_.peek().isNSChar) - { - if(reader_.peekByte() == ' ') { reader_.forward(); } - else - { - const lBreak = scanLineBreak(); - extraBreaks = true; - reader_.sliceBuilder.write(lBreak); - - if(end(reader_)) { return; } - } - } - transaction.commit(); - - // No line breaks, only a space. - if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); } - } - - /// Scan handle of a tag token. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// characters into that slice. - void scanTagHandleToSlice(string name)(const Mark startMark) - { - dchar c = reader_.peek(); - enum contextMsg = "While scanning a " ~ name; - enforce(c == '!', - new ScannerException(contextMsg, startMark, expected("'!'", c), reader_.mark)); - - uint length = 1; - c = reader_.peek(length); - if(c != ' ') - { - while(c.isAlphaNum || c.among!('-', '_')) - { - ++length; - c = reader_.peek(length); - } - enforce(c == '!', - new ScannerException(contextMsg, startMark, expected("'!'", c), reader_.mark)); - ++length; - } - - reader_.sliceBuilder.write(reader_.get(length)); - } - - /// Scan URI in a tag token. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// characters into that slice. - void scanTagURIToSlice(string name)(const Mark startMark) - { - // Note: we do not check if URI is well-formed. - dchar c = reader_.peek(); - const startLen = reader_.sliceBuilder.length; - { - uint length; - while(c.isAlphaNum || c.isURIChar) - { - if(c == '%') - { - auto chars = reader_.get(length); - reader_.sliceBuilder.write(chars); - length = 0; - scanURIEscapesToSlice!name(startMark); - } - else { ++length; } - c = reader_.peek(length); - } - if(length > 0) - { - auto chars = reader_.get(length); - reader_.sliceBuilder.write(chars); - length = 0; - } - } - // OK if we scanned something, error otherwise. - enum contextMsg = "While parsing a " ~ name; - enforce(reader_.sliceBuilder.length > startLen, - new ScannerException(contextMsg, startMark, expected("URI", c), reader_.mark)); - } - - // Not @nogc yet because std.utf.decode is not @nogc - /// Scan URI escape sequences. - /// - /// Assumes that the caller is building a slice in Reader, and puts the scanned - /// characters into that slice. - void scanURIEscapesToSlice(string name)(const Mark startMark) - { - import core.exception : UnicodeException; - // URI escapes encode a UTF-8 string. We store UTF-8 code units here for - // decoding into UTF-32. - Appender!string buffer; - - - enum contextMsg = "While scanning a " ~ name; - while(reader_.peekByte() == '%') - { - reader_.forward(); - char[2] nextByte = [reader_.peekByte(), reader_.peekByte(1)]; - - enforce(nextByte[0].isHexDigit && nextByte[1].isHexDigit, - new ScannerException(contextMsg, startMark, - expected("URI escape sequence of 2 hexadecimal " ~ - "numbers", nextByte), reader_.mark)); - - buffer ~= nextByte[].to!ubyte(16); - - reader_.forward(2); - } - try - { - foreach (dchar chr; buffer.data) - { - reader_.sliceBuilder.write(chr); - } - } - catch (UnicodeException) - { - throw new ScannerException(contextMsg, startMark, - "Invalid UTF-8 data encoded in URI escape sequence", - reader_.mark); - } - } - - - /// Scan a line break, if any. - /// - /// Transforms: - /// '\r\n' : '\n' - /// '\r' : '\n' - /// '\n' : '\n' - /// '\u0085' : '\n' - /// '\u2028' : '\u2028' - /// '\u2029 : '\u2029' - /// no break : '\0' - dchar scanLineBreak() @safe - { - // Fast path for ASCII line breaks. - const b = reader_.peekByte(); - if(b < 0x80) - { - if(b == '\n' || b == '\r') - { - if(reader_.prefix(2) == "\r\n") { reader_.forward(2); } - else { reader_.forward(); } - return '\n'; - } - return '\0'; - } - - const c = reader_.peek(); - if(c == '\x85') - { - reader_.forward(); - return '\n'; - } - if(c == '\u2028' || c == '\u2029') - { - reader_.forward(); - return c; - } - return '\0'; - } -} diff --git a/source/dyaml/serializer.d b/source/dyaml/serializer.d deleted file mode 100644 index cbaef63..0000000 --- a/source/dyaml/serializer.d +++ /dev/null @@ -1,322 +0,0 @@ - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -/** - * YAML serializer. - * Code based on PyYAML: http://www.pyyaml.org - */ -module dyaml.serializer; - - -import std.array; -import std.format; -import std.typecons; - -import dyaml.emitter; -import dyaml.event; -import dyaml.exception; -import dyaml.node; -import dyaml.resolver; -import dyaml.tagdirective; -import dyaml.token; - - -package: - -///Serializes represented YAML nodes, generating events which are then emitted by Emitter. -struct Serializer -{ - private: - ///Resolver used to determine which tags are automaticaly resolvable. - Resolver resolver_; - - ///Do all document starts have to be specified explicitly? - Flag!"explicitStart" explicitStart_; - ///Do all document ends have to be specified explicitly? - Flag!"explicitEnd" explicitEnd_; - ///YAML version string. - string YAMLVersion_; - - ///Tag directives to emit. - TagDirective[] tagDirectives_; - - //TODO Use something with more deterministic memory usage. - ///Nodes with assigned anchors. - string[Node] anchors_; - ///Nodes with assigned anchors that are already serialized. - bool[Node] serializedNodes_; - ///ID of the last anchor generated. - uint lastAnchorID_ = 0; - - public: - /** - * Construct a Serializer. - * - * Params: - * resolver = Resolver used to determine which tags are automaticaly resolvable. - * explicitStart = Do all document starts have to be specified explicitly? - * explicitEnd = Do all document ends have to be specified explicitly? - * YAMLVersion = YAML version string. - * tagDirectives = Tag directives to emit. - */ - this(Resolver resolver, - const Flag!"explicitStart" explicitStart, - const Flag!"explicitEnd" explicitEnd, string YAMLVersion, - TagDirective[] tagDirectives) @safe - { - resolver_ = resolver; - explicitStart_ = explicitStart; - explicitEnd_ = explicitEnd; - YAMLVersion_ = YAMLVersion; - tagDirectives_ = tagDirectives; - } - - ///Begin the stream. - void startStream(EmitterT)(ref EmitterT emitter) @safe - { - emitter.emit(streamStartEvent(Mark(), Mark())); - } - - ///End the stream. - void endStream(EmitterT)(ref EmitterT emitter) @safe - { - emitter.emit(streamEndEvent(Mark(), Mark())); - } - - ///Serialize a node, emitting it in the process. - void serialize(EmitterT)(ref EmitterT emitter, ref Node node) @safe - { - emitter.emit(documentStartEvent(Mark(), Mark(), explicitStart_, - YAMLVersion_, tagDirectives_)); - anchorNode(node); - serializeNode(emitter, node); - emitter.emit(documentEndEvent(Mark(), Mark(), explicitEnd_)); - serializedNodes_.destroy(); - anchors_.destroy(); - string[Node] emptyAnchors; - anchors_ = emptyAnchors; - lastAnchorID_ = 0; - } - - private: - /** - * Determine if it's a good idea to add an anchor to a node. - * - * Used to prevent associating every single repeating scalar with an - * anchor/alias - only nodes long enough can use anchors. - * - * Params: node = Node to check for anchorability. - * - * Returns: True if the node is anchorable, false otherwise. - */ - static bool anchorable(ref Node node) @safe - { - if(node.nodeID == NodeID.scalar) - { - return (node.type == NodeType.string) ? node.as!string.length > 64 : - (node.type == NodeType.binary) ? node.as!(ubyte[]).length > 64 : - false; - } - return node.length > 2; - } - - @safe unittest - { - import std.string : representation; - auto shortString = "not much"; - auto longString = "A fairly long string that would be a good idea to add an anchor to"; - auto node1 = Node(shortString); - auto node2 = Node(shortString.representation.dup); - auto node3 = Node(longString); - auto node4 = Node(longString.representation.dup); - auto node5 = Node([node1]); - auto node6 = Node([node1, node2, node3, node4]); - assert(!anchorable(node1)); - assert(!anchorable(node2)); - assert(anchorable(node3)); - assert(anchorable(node4)); - assert(!anchorable(node5)); - assert(anchorable(node6)); - } - - ///Add an anchor to the node if it's anchorable and not anchored yet. - void anchorNode(ref Node node) @safe - { - if(!anchorable(node)){return;} - - if((node in anchors_) !is null) - { - if(anchors_[node] is null) - { - anchors_[node] = generateAnchor(); - } - return; - } - - anchors_.remove(node); - final switch (node.nodeID) - { - case NodeID.mapping: - foreach(ref Node key, ref Node value; node) - { - anchorNode(key); - anchorNode(value); - } - break; - case NodeID.sequence: - foreach(ref Node item; node) - { - anchorNode(item); - } - break; - case NodeID.invalid: - assert(0); - case NodeID.scalar: - } - } - - ///Generate and return a new anchor. - string generateAnchor() @safe - { - ++lastAnchorID_; - auto appender = appender!string(); - formattedWrite(appender, "id%03d", lastAnchorID_); - return appender.data; - } - - ///Serialize a node and all its subnodes. - void serializeNode(EmitterT)(ref EmitterT emitter, ref Node node) @safe - { - //If the node has an anchor, emit an anchor (as aliasEvent) on the - //first occurrence, save it in serializedNodes_, and emit an alias - //if it reappears. - string aliased; - if(anchorable(node) && (node in anchors_) !is null) - { - aliased = anchors_[node]; - if((node in serializedNodes_) !is null) - { - emitter.emit(aliasEvent(Mark(), Mark(), aliased)); - return; - } - serializedNodes_[node] = true; - } - final switch (node.nodeID) - { - case NodeID.mapping: - const defaultTag = resolver_.defaultMappingTag; - const implicit = node.tag_ == defaultTag; - emitter.emit(mappingStartEvent(Mark(), Mark(), aliased, node.tag_, - implicit, node.collectionStyle)); - foreach(ref Node key, ref Node value; node) - { - serializeNode(emitter, key); - serializeNode(emitter, value); - } - emitter.emit(mappingEndEvent(Mark(), Mark())); - return; - case NodeID.sequence: - const defaultTag = resolver_.defaultSequenceTag; - const implicit = node.tag_ == defaultTag; - emitter.emit(sequenceStartEvent(Mark(), Mark(), aliased, node.tag_, - implicit, node.collectionStyle)); - foreach(ref Node item; node) - { - serializeNode(emitter, item); - } - emitter.emit(sequenceEndEvent(Mark(), Mark())); - return; - case NodeID.scalar: - assert(node.type == NodeType.string, "Scalar node type must be string before serialized"); - auto value = node.as!string; - const detectedTag = resolver_.resolve(NodeID.scalar, null, value, true); - const bool isDetected = node.tag_ == detectedTag; - - emitter.emit(scalarEvent(Mark(), Mark(), aliased, node.tag_, - isDetected, value.idup, node.scalarStyle)); - return; - case NodeID.invalid: - assert(0); - } - } -} - -// Issue #244 -@safe unittest -{ - import dyaml.dumper : dumper; - auto node = Node([ - Node.Pair( - Node(""), - Node([ - Node([ - Node.Pair( - Node("d"), - Node([ - Node([ - Node.Pair( - Node("c"), - Node("") - ), - Node.Pair( - Node("b"), - Node("") - ), - Node.Pair( - Node(""), - Node("") - ) - ]) - ]) - ), - ]), - Node([ - Node.Pair( - Node("d"), - Node([ - Node(""), - Node(""), - Node([ - Node.Pair( - Node("c"), - Node("") - ), - Node.Pair( - Node("b"), - Node("") - ), - Node.Pair( - Node(""), - Node("") - ) - ]) - ]) - ), - Node.Pair( - Node("z"), - Node("") - ), - Node.Pair( - Node(""), - Node("") - ) - ]), - Node("") - ]) - ), - Node.Pair( - Node("g"), - Node("") - ), - Node.Pair( - Node("h"), - Node("") - ), - ]); - - auto stream = appender!string(); - dumper().dump(stream, node); -} diff --git a/source/dyaml/stdsumtype.d b/source/dyaml/stdsumtype.d deleted file mode 100644 index aa5a2ec..0000000 --- a/source/dyaml/stdsumtype.d +++ /dev/null @@ -1,2643 +0,0 @@ -/++ - This module was copied from Phobos at commit 87c6e7e35 (2022-07-06). - This is necessary to include https://github.com/dlang/phobos/pull/8501 - which is a fix needed for DIP1000 compatibility. A couple minor changes - where also required to deal with `package(std)` imports. - -[SumType] is a generic discriminated union implementation that uses -design-by-introspection to generate safe and efficient code. Its features -include: - -* [Pattern matching.][match] -* Support for self-referential types. -* Full attribute correctness (`pure`, `@safe`, `@nogc`, and `nothrow` are - inferred whenever possible). -* A type-safe and memory-safe API compatible with DIP 1000 (`scope`). -* No dependency on runtime type information (`TypeInfo`). -* Compatibility with BetterC. - -License: Boost License 1.0 -Authors: Paul Backus -Source: $(PHOBOSSRC std/sumtype.d) -+/ -module dyaml.stdsumtype; - -/// $(DIVID basic-usage,$(H3 Basic usage)) -version (D_BetterC) {} else -@safe unittest -{ - import std.math : isClose; - - struct Fahrenheit { double degrees; } - struct Celsius { double degrees; } - struct Kelvin { double degrees; } - - alias Temperature = SumType!(Fahrenheit, Celsius, Kelvin); - - // Construct from any of the member types. - Temperature t1 = Fahrenheit(98.6); - Temperature t2 = Celsius(100); - Temperature t3 = Kelvin(273); - - // Use pattern matching to access the value. - Fahrenheit toFahrenheit(Temperature t) - { - return Fahrenheit( - t.match!( - (Fahrenheit f) => f.degrees, - (Celsius c) => c.degrees * 9.0/5 + 32, - (Kelvin k) => k.degrees * 9.0/5 - 459.4 - ) - ); - } - - assert(toFahrenheit(t1).degrees.isClose(98.6)); - assert(toFahrenheit(t2).degrees.isClose(212)); - assert(toFahrenheit(t3).degrees.isClose(32)); - - // Use ref to modify the value in place. - void freeze(ref Temperature t) - { - t.match!( - (ref Fahrenheit f) => f.degrees = 32, - (ref Celsius c) => c.degrees = 0, - (ref Kelvin k) => k.degrees = 273 - ); - } - - freeze(t1); - assert(toFahrenheit(t1).degrees.isClose(32)); - - // Use a catch-all handler to give a default result. - bool isFahrenheit(Temperature t) - { - return t.match!( - (Fahrenheit f) => true, - _ => false - ); - } - - assert(isFahrenheit(t1)); - assert(!isFahrenheit(t2)); - assert(!isFahrenheit(t3)); -} - -/** $(DIVID introspection-based-matching, $(H3 Introspection-based matching)) - * - * In the `length` and `horiz` functions below, the handlers for `match` do not - * specify the types of their arguments. Instead, matching is done based on how - * the argument is used in the body of the handler: any type with `x` and `y` - * properties will be matched by the `rect` handlers, and any type with `r` and - * `theta` properties will be matched by the `polar` handlers. - */ -version (D_BetterC) {} else -@safe unittest -{ - import std.math : isClose; - import std.math : cos; - import std.math : PI; - import std.math : sqrt; - - struct Rectangular { double x, y; } - struct Polar { double r, theta; } - alias Vector = SumType!(Rectangular, Polar); - - double length(Vector v) - { - return v.match!( - rect => sqrt(rect.x^^2 + rect.y^^2), - polar => polar.r - ); - } - - double horiz(Vector v) - { - return v.match!( - rect => rect.x, - polar => polar.r * cos(polar.theta) - ); - } - - Vector u = Rectangular(1, 1); - Vector v = Polar(1, PI/4); - - assert(length(u).isClose(sqrt(2.0))); - assert(length(v).isClose(1)); - assert(horiz(u).isClose(1)); - assert(horiz(v).isClose(sqrt(0.5))); -} - -/** $(DIVID arithmetic-expression-evaluator, $(H3 Arithmetic expression evaluator)) - * - * This example makes use of the special placeholder type `This` to define a - * [recursive data type](https://en.wikipedia.org/wiki/Recursive_data_type): an - * [abstract syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) for - * representing simple arithmetic expressions. - */ -version (D_BetterC) {} else -@system unittest -{ - import std.functional : partial; - import std.traits : EnumMembers; - import std.typecons : Tuple; - - enum Op : string - { - Plus = "+", - Minus = "-", - Times = "*", - Div = "/" - } - - // An expression is either - // - a number, - // - a variable, or - // - a binary operation combining two sub-expressions. - alias Expr = SumType!( - double, - string, - Tuple!(Op, "op", This*, "lhs", This*, "rhs") - ); - - // Shorthand for Tuple!(Op, "op", Expr*, "lhs", Expr*, "rhs"), - // the Tuple type above with Expr substituted for This. - alias BinOp = Expr.Types[2]; - - // Factory function for number expressions - Expr* num(double value) - { - return new Expr(value); - } - - // Factory function for variable expressions - Expr* var(string name) - { - return new Expr(name); - } - - // Factory function for binary operation expressions - Expr* binOp(Op op, Expr* lhs, Expr* rhs) - { - return new Expr(BinOp(op, lhs, rhs)); - } - - // Convenience wrappers for creating BinOp expressions - alias sum = partial!(binOp, Op.Plus); - alias diff = partial!(binOp, Op.Minus); - alias prod = partial!(binOp, Op.Times); - alias quot = partial!(binOp, Op.Div); - - // Evaluate expr, looking up variables in env - double eval(Expr expr, double[string] env) - { - return expr.match!( - (double num) => num, - (string var) => env[var], - (BinOp bop) - { - double lhs = eval(*bop.lhs, env); - double rhs = eval(*bop.rhs, env); - final switch (bop.op) - { - static foreach (op; EnumMembers!Op) - { - case op: - return mixin("lhs" ~ op ~ "rhs"); - } - } - } - ); - } - - // Return a "pretty-printed" representation of expr - string pprint(Expr expr) - { - import std.format : format; - - return expr.match!( - (double num) => "%g".format(num), - (string var) => var, - (BinOp bop) => "(%s %s %s)".format( - pprint(*bop.lhs), - cast(string) bop.op, - pprint(*bop.rhs) - ) - ); - } - - Expr* myExpr = sum(var("a"), prod(num(2), var("b"))); - double[string] myEnv = ["a":3, "b":4, "c":7]; - - assert(eval(*myExpr, myEnv) == 11); - assert(pprint(*myExpr) == "(a + (2 * b))"); -} - -import std.format : FormatSpec, singleSpec; -import std.meta : AliasSeq, Filter, IndexOf = staticIndexOf, Map = staticMap; -import std.meta : NoDuplicates; -import std.meta : anySatisfy, allSatisfy; -import std.traits : hasElaborateCopyConstructor, hasElaborateDestructor; -import std.traits : isAssignable, isCopyable, isStaticArray; -import std.traits : ConstOf, ImmutableOf, InoutOf, TemplateArgsOf; - -// FIXME: std.sumtype : `std.traits : DeducedParameterType` and `std.conv : toCtString` -// are `package(std)` but trivial, hence copied below -import std.traits : CommonType, /*DeducatedParameterType*/ Unqual; -private template DeducedParameterType(T) -{ - static if (is(T == U*, U) || is(T == U[], U)) - alias DeducedParameterType = Unqual!T; - else - alias DeducedParameterType = T; -} - -/// Compatibility with < v2.095.0 -private struct __InoutWorkaroundStruct{} -private @property T rvalueOf(T)(inout __InoutWorkaroundStruct = __InoutWorkaroundStruct.init); -private @property ref T lvalueOf(T)(inout __InoutWorkaroundStruct = __InoutWorkaroundStruct.init); -private enum isRvalueAssignable(Lhs, Rhs = Lhs) = __traits(compiles, { lvalueOf!Lhs = rvalueOf!Rhs; }); - -import std.typecons : ReplaceTypeUnless; -import std.typecons : Flag; -//import std.conv : toCtString; -private enum toCtString(ulong n) = n.stringof[0 .. $ - "LU".length]; - -/// Placeholder used to refer to the enclosing [SumType]. -struct This {} - -// True if a variable of type T can appear on the lhs of an assignment -private enum isAssignableTo(T) = - isAssignable!T || (!isCopyable!T && isRvalueAssignable!T); - -// toHash is required by the language spec to be nothrow and @safe -private enum isHashable(T) = __traits(compiles, - () nothrow @safe { hashOf(T.init); } -); - -private enum hasPostblit(T) = __traits(hasPostblit, T); - -private enum isInout(T) = is(T == inout); - -/** - * A [tagged union](https://en.wikipedia.org/wiki/Tagged_union) that can hold a - * single value from any of a specified set of types. - * - * The value in a `SumType` can be operated on using [pattern matching][match]. - * - * To avoid ambiguity, duplicate types are not allowed (but see the - * ["basic usage" example](#basic-usage) for a workaround). - * - * The special type `This` can be used as a placeholder to create - * self-referential types, just like with `Algebraic`. See the - * ["Arithmetic expression evaluator" example](#arithmetic-expression-evaluator) for - * usage. - * - * A `SumType` is initialized by default to hold the `.init` value of its - * first member type, just like a regular union. The version identifier - * `SumTypeNoDefaultCtor` can be used to disable this behavior. - * - * See_Also: $(REF Algebraic, std,variant) - */ -struct SumType(Types...) -if (is(NoDuplicates!Types == Types) && Types.length > 0) -{ - /// The types a `SumType` can hold. - alias Types = AliasSeq!( - ReplaceTypeUnless!(isSumTypeInstance, This, typeof(this), TemplateArgsOf!SumType) - ); - -private: - - enum bool canHoldTag(T) = Types.length <= T.max; - alias unsignedInts = AliasSeq!(ubyte, ushort, uint, ulong); - - alias Tag = Filter!(canHoldTag, unsignedInts)[0]; - - union Storage - { - // Workaround for https://issues.dlang.org/show_bug.cgi?id=20068 - template memberName(T) - if (IndexOf!(T, Types) >= 0) - { - enum tid = IndexOf!(T, Types); - mixin("enum memberName = `values_", toCtString!tid, "`;"); - } - - static foreach (T; Types) - { - mixin("T ", memberName!T, ";"); - } - } - - Storage storage; - Tag tag; - - /* Accesses the value stored in a SumType. - * - * This method is memory-safe, provided that: - * - * 1. A SumType's tag is always accurate. - * 2. A SumType cannot be assigned to in @safe code if that assignment - * could cause unsafe aliasing. - * - * All code that accesses a SumType's tag or storage directly, including - * @safe code in this module, must be manually checked to ensure that it - * does not violate either of the above requirements. - */ - @trusted - ref inout(T) get(T)() inout - if (IndexOf!(T, Types) >= 0) - { - enum tid = IndexOf!(T, Types); - assert(tag == tid, - "This `" ~ SumType.stringof ~ - "` does not contain a(n) `" ~ T.stringof ~ "`" - ); - return __traits(getMember, storage, Storage.memberName!T); - } - -public: - - // Workaround for https://issues.dlang.org/show_bug.cgi?id=21399 - version (StdDdoc) - { - // Dummy type to stand in for loop variable - private struct T; - - /// Constructs a `SumType` holding a specific value. - this(T value); - - /// ditto - this(const(T) value) const; - - /// ditto - this(immutable(T) value) immutable; - - /// ditto - this(Value)(Value value) inout - if (is(Value == DeducedParameterType!(inout(T)))); - } - - static foreach (tid, T; Types) - { - /// Constructs a `SumType` holding a specific value. - this(T value) - { - import core.lifetime : forward; - - static if (isCopyable!T) - { - // Workaround for https://issues.dlang.org/show_bug.cgi?id=21542 - if (__ctfe) - __traits(getMember, storage, Storage.memberName!T) = value; - else - __traits(getMember, storage, Storage.memberName!T) = forward!value; - } - else - { - __traits(getMember, storage, Storage.memberName!T) = forward!value; - } - - tag = tid; - } - - // DUB: Those traits compile work around bugs in < v2.098 - static if (!__traits(compiles, { T c = const(T).init; })) - { - static if (isCopyable!(const(T))) - { - static if (IndexOf!(const(T), Map!(ConstOf, Types)) == tid) - { - /// ditto - this(const(T) value) const - { - __traits(getMember, storage, Storage.memberName!T) = value; - tag = tid; - } - } - } - else - { - @disable this(const(T) value) const; - } - } - - static if (!__traits(compiles, { T c = immutable(T).init; })) - { - static if (isCopyable!(immutable(T))) - { - static if (IndexOf!(immutable(T), Map!(ImmutableOf, Types)) == tid) - { - /// ditto - this(immutable(T) value) immutable - { - __traits(getMember, storage, Storage.memberName!T) = value; - tag = tid; - } - } - } - else - { - @disable this(immutable(T) value) immutable; - } - } - - static if (isCopyable!(inout(T))) - { - static if (IndexOf!(inout(T), Map!(InoutOf, Types)) == tid) - { - /// ditto - this(Value)(Value value) inout - if (is(Value == DeducedParameterType!(inout(T)))) - { - __traits(getMember, storage, Storage.memberName!T) = value; - tag = tid; - } - } - } - else - { - @disable this(Value)(Value value) inout - if (is(Value == DeducedParameterType!(inout(T)))); - } - } - - static if (anySatisfy!(hasElaborateCopyConstructor, Types)) - { - static if - ( - allSatisfy!(isCopyable, Map!(InoutOf, Types)) - && !anySatisfy!(hasPostblit, Map!(InoutOf, Types)) - && allSatisfy!(isInout, Map!(InoutOf, Types)) - ) - { - /// Constructs a `SumType` that's a copy of another `SumType`. - this(ref inout(SumType) other) inout - { - storage = other.match!((ref value) { - alias OtherTypes = Map!(InoutOf, Types); - enum tid = IndexOf!(typeof(value), OtherTypes); - alias T = Types[tid]; - - mixin("inout(Storage) newStorage = { ", - Storage.memberName!T, ": value", - " };"); - - return newStorage; - }); - - tag = other.tag; - } - } - else - { - static if (allSatisfy!(isCopyable, Types)) - { - /// ditto - this(ref SumType other) - { - storage = other.match!((ref value) { - alias T = typeof(value); - - mixin("Storage newStorage = { ", - Storage.memberName!T, ": value", - " };"); - - return newStorage; - }); - - tag = other.tag; - } - } - else - { - @disable this(ref SumType other); - } - - static if (allSatisfy!(isCopyable, Map!(ConstOf, Types))) - { - /// ditto - this(ref const(SumType) other) const - { - storage = other.match!((ref value) { - alias OtherTypes = Map!(ConstOf, Types); - enum tid = IndexOf!(typeof(value), OtherTypes); - alias T = Types[tid]; - - mixin("const(Storage) newStorage = { ", - Storage.memberName!T, ": value", - " };"); - - return newStorage; - }); - - tag = other.tag; - } - } - else - { - @disable this(ref const(SumType) other) const; - } - - static if (allSatisfy!(isCopyable, Map!(ImmutableOf, Types))) - { - /// ditto - this(ref immutable(SumType) other) immutable - { - storage = other.match!((ref value) { - alias OtherTypes = Map!(ImmutableOf, Types); - enum tid = IndexOf!(typeof(value), OtherTypes); - alias T = Types[tid]; - - mixin("immutable(Storage) newStorage = { ", - Storage.memberName!T, ": value", - " };"); - - return newStorage; - }); - - tag = other.tag; - } - } - else - { - @disable this(ref immutable(SumType) other) immutable; - } - } - } - - version (SumTypeNoDefaultCtor) - { - @disable this(); - } - - // Workaround for https://issues.dlang.org/show_bug.cgi?id=21399 - version (StdDdoc) - { - // Dummy type to stand in for loop variable - private struct T; - - /** - * Assigns a value to a `SumType`. - * - * If any of the `SumType`'s members other than the one being assigned - * to contain pointers or references, it is possible for the assignment - * to cause memory corruption (see the - * ["Memory corruption" example](#memory-corruption) below for an - * illustration of how). Therefore, such assignments are considered - * `@system`. - * - * An individual assignment can be `@trusted` if the caller can - * guarantee that there are no outstanding references to any `SumType` - * members that contain pointers or references at the time the - * assignment occurs. - * - * Examples: - * - * $(DIVID memory-corruption, $(H3 Memory corruption)) - * - * This example shows how assignment to a `SumType` can be used to - * cause memory corruption in `@system` code. In `@safe` code, the - * assignment `s = 123` would not be allowed. - * - * --- - * SumType!(int*, int) s = new int; - * s.tryMatch!( - * (ref int* p) { - * s = 123; // overwrites `p` - * return *p; // undefined behavior - * } - * ); - * --- - */ - ref SumType opAssign(T rhs); - } - - static foreach (tid, T; Types) - { - static if (isAssignableTo!T) - { - /** - * Assigns a value to a `SumType`. - * - * If any of the `SumType`'s members other than the one being assigned - * to contain pointers or references, it is possible for the assignment - * to cause memory corruption (see the - * ["Memory corruption" example](#memory-corruption) below for an - * illustration of how). Therefore, such assignments are considered - * `@system`. - * - * An individual assignment can be `@trusted` if the caller can - * guarantee that there are no outstanding references to any `SumType` - * members that contain pointers or references at the time the - * assignment occurs. - * - * Examples: - * - * $(DIVID memory-corruption, $(H3 Memory corruption)) - * - * This example shows how assignment to a `SumType` can be used to - * cause memory corruption in `@system` code. In `@safe` code, the - * assignment `s = 123` would not be allowed. - * - * --- - * SumType!(int*, int) s = new int; - * s.tryMatch!( - * (ref int* p) { - * s = 123; // overwrites `p` - * return *p; // undefined behavior - * } - * ); - * --- - */ - ref SumType opAssign(T rhs) - { - import core.lifetime : forward; - import std.traits : hasIndirections, hasNested; - import std.meta : AliasSeq, Or = templateOr; - - alias OtherTypes = - AliasSeq!(Types[0 .. tid], Types[tid + 1 .. $]); - enum unsafeToOverwrite = - anySatisfy!(Or!(hasIndirections, hasNested), OtherTypes); - - static if (unsafeToOverwrite) - { - cast(void) () @system {}(); - } - - this.match!destroyIfOwner; - - static if (isCopyable!T) - { - // Workaround for https://issues.dlang.org/show_bug.cgi?id=21542 - mixin("Storage newStorage = { ", - Storage.memberName!T, ": __ctfe ? rhs : forward!rhs", - " };"); - } - else - { - mixin("Storage newStorage = { ", - Storage.memberName!T, ": forward!rhs", - " };"); - } - - storage = newStorage; - tag = tid; - - return this; - } - } - } - - static if (allSatisfy!(isAssignableTo, Types)) - { - static if (allSatisfy!(isCopyable, Types)) - { - /** - * Copies the value from another `SumType` into this one. - * - * See the value-assignment overload for details on `@safe`ty. - * - * Copy assignment is `@disable`d if any of `Types` is non-copyable. - */ - ref SumType opAssign(ref SumType rhs) - { - rhs.match!((ref value) { this = value; }); - return this; - } - } - else - { - @disable ref SumType opAssign(ref SumType rhs); - } - - /** - * Moves the value from another `SumType` into this one. - * - * See the value-assignment overload for details on `@safe`ty. - */ - ref SumType opAssign(SumType rhs) - { - import core.lifetime : move; - - rhs.match!((ref value) { - static if (isCopyable!(typeof(value))) - { - // Workaround for https://issues.dlang.org/show_bug.cgi?id=21542 - this = __ctfe ? value : move(value); - } - else - { - this = move(value); - } - }); - return this; - } - } - - /** - * Compares two `SumType`s for equality. - * - * Two `SumType`s are equal if they are the same kind of `SumType`, they - * contain values of the same type, and those values are equal. - */ - bool opEquals(this This, Rhs)(auto ref Rhs rhs) - if (!is(CommonType!(This, Rhs) == void)) - { - static if (is(This == Rhs)) - { - return AliasSeq!(this, rhs).match!((ref value, ref rhsValue) { - static if (is(typeof(value) == typeof(rhsValue))) - { - return value == rhsValue; - } - else - { - return false; - } - }); - } - else - { - alias CommonSumType = CommonType!(This, Rhs); - return cast(CommonSumType) this == cast(CommonSumType) rhs; - } - } - - // Workaround for https://issues.dlang.org/show_bug.cgi?id=19407 - static if (__traits(compiles, anySatisfy!(hasElaborateDestructor, Types))) - { - // If possible, include the destructor only when it's needed - private enum includeDtor = anySatisfy!(hasElaborateDestructor, Types); - } - else - { - // If we can't tell, always include it, even when it does nothing - private enum includeDtor = true; - } - - static if (includeDtor) - { - /// Calls the destructor of the `SumType`'s current value. - ~this() - { - this.match!destroyIfOwner; - } - } - - invariant - { - this.match!((ref value) { - static if (is(typeof(value) == class)) - { - if (value !is null) - { - assert(value); - } - } - else static if (is(typeof(value) == struct)) - { - assert(&value); - } - }); - } - - // Workaround for https://issues.dlang.org/show_bug.cgi?id=21400 - version (StdDdoc) - { - /** - * Returns a string representation of the `SumType`'s current value. - * - * Not available when compiled with `-betterC`. - */ - string toString(this This)(); - - /** - * Handles formatted writing of the `SumType`'s current value. - * - * Not available when compiled with `-betterC`. - * - * Params: - * sink = Output range to write to. - * fmt = Format specifier to use. - * - * See_Also: $(REF formatValue, std,format) - */ - void toString(this This, Sink, Char)(ref Sink sink, const ref FormatSpec!Char fmt); - } - - version (D_BetterC) {} else - /** - * Returns a string representation of the `SumType`'s current value. - * - * Not available when compiled with `-betterC`. - */ - string toString(this This)() - { - import std.conv : to; - - return this.match!(to!string); - } - - version (D_BetterC) {} else - /** - * Handles formatted writing of the `SumType`'s current value. - * - * Not available when compiled with `-betterC`. - * - * Params: - * sink = Output range to write to. - * fmt = Format specifier to use. - * - * See_Also: $(REF formatValue, std,format) - */ - void toString(this This, Sink, Char)(ref Sink sink, const ref FormatSpec!Char fmt) - { - import std.format : formatValue; - - this.match!((ref value) { - formatValue(sink, value, fmt); - }); - } - - static if (allSatisfy!(isHashable, Map!(ConstOf, Types))) - { - // Workaround for https://issues.dlang.org/show_bug.cgi?id=21400 - version (StdDdoc) - { - /** - * Returns the hash of the `SumType`'s current value. - * - * Not available when compiled with `-betterC`. - */ - size_t toHash() const; - } - - // Workaround for https://issues.dlang.org/show_bug.cgi?id=20095 - version (D_BetterC) {} else - /** - * Returns the hash of the `SumType`'s current value. - * - * Not available when compiled with `-betterC`. - */ - size_t toHash() const - { - return this.match!hashOf; - } - } -} - -// Construction -@safe unittest -{ - alias MySum = SumType!(int, float); - - MySum x = MySum(42); - MySum y = MySum(3.14); -} - -// Assignment -@safe unittest -{ - alias MySum = SumType!(int, float); - - MySum x = MySum(42); - x = 3.14; -} - -// Self assignment -@safe unittest -{ - alias MySum = SumType!(int, float); - - MySum x = MySum(42); - MySum y = MySum(3.14); - y = x; -} - -// Equality -@safe unittest -{ - alias MySum = SumType!(int, float); - - assert(MySum(123) == MySum(123)); - assert(MySum(123) != MySum(456)); - assert(MySum(123) != MySum(123.0)); - assert(MySum(123) != MySum(456.0)); - -} - -// Equality of differently-qualified SumTypes -// Disabled in BetterC due to use of dynamic arrays -version (D_BetterC) {} else -@safe unittest -{ - alias SumA = SumType!(int, float); - alias SumB = SumType!(const(int[]), int[]); - alias SumC = SumType!(int[], const(int[])); - - int[] ma = [1, 2, 3]; - const(int[]) ca = [1, 2, 3]; - - assert(const(SumA)(123) == SumA(123)); - assert(const(SumB)(ma[]) == SumB(ca[])); - assert(const(SumC)(ma[]) == SumC(ca[])); -} - -// Imported types -@safe unittest -{ - import std.typecons : Tuple; - - alias MySum = SumType!(Tuple!(int, int)); -} - -// const and immutable types -@safe unittest -{ - alias MySum = SumType!(const(int[]), immutable(float[])); -} - -// Recursive types -@safe unittest -{ - alias MySum = SumType!(This*); - assert(is(MySum.Types[0] == MySum*)); -} - -// Allowed types -@safe unittest -{ - import std.meta : AliasSeq; - - alias MySum = SumType!(int, float, This*); - - assert(is(MySum.Types == AliasSeq!(int, float, MySum*))); -} - -// Types with destructors and postblits -@system unittest -{ - int copies; - - static struct Test - { - bool initialized = false; - int* copiesPtr; - - this(this) { (*copiesPtr)++; } - ~this() { if (initialized) (*copiesPtr)--; } - } - - alias MySum = SumType!(int, Test); - - Test t = Test(true, &copies); - - { - MySum x = t; - assert(copies == 1); - } - assert(copies == 0); - - { - MySum x = 456; - assert(copies == 0); - } - assert(copies == 0); - - { - MySum x = t; - assert(copies == 1); - x = 456; - assert(copies == 0); - } - - { - MySum x = 456; - assert(copies == 0); - x = t; - assert(copies == 1); - } - - { - MySum x = t; - MySum y = x; - assert(copies == 2); - } - - { - MySum x = t; - MySum y; - y = x; - assert(copies == 2); - } -} - -// Doesn't destroy reference types -// Disabled in BetterC due to use of classes -version (D_BetterC) {} else -@system unittest -{ - bool destroyed; - - class C - { - ~this() - { - destroyed = true; - } - } - - struct S - { - ~this() {} - } - - alias MySum = SumType!(S, C); - - C c = new C(); - { - MySum x = c; - destroyed = false; - } - assert(!destroyed); - - { - MySum x = c; - destroyed = false; - x = S(); - assert(!destroyed); - } -} - -// Types with @disable this() -@safe unittest -{ - static struct NoInit - { - @disable this(); - } - - alias MySum = SumType!(NoInit, int); - - assert(!__traits(compiles, MySum())); - auto _ = MySum(42); -} - -// const SumTypes -version (D_BetterC) {} else // not @nogc, https://issues.dlang.org/show_bug.cgi?id=22117 -@safe unittest -{ - auto _ = const(SumType!(int[]))([1, 2, 3]); -} - -// Equality of const SumTypes -@safe unittest -{ - alias MySum = SumType!int; - - auto _ = const(MySum)(123) == const(MySum)(456); -} - -// Compares reference types using value equality -@safe unittest -{ - import std.array : staticArray; - - static struct Field {} - static struct Struct { Field[] fields; } - alias MySum = SumType!Struct; - - static arr1 = staticArray([Field()]); - static arr2 = staticArray([Field()]); - - auto a = MySum(Struct(arr1[])); - auto b = MySum(Struct(arr2[])); - - assert(a == b); -} - -// toString -// Disabled in BetterC due to use of std.conv.text -version (D_BetterC) {} else -@safe unittest -{ - import std.conv : text; - - static struct Int { int i; } - static struct Double { double d; } - alias Sum = SumType!(Int, Double); - - assert(Sum(Int(42)).text == Int(42).text, Sum(Int(42)).text); - assert(Sum(Double(33.3)).text == Double(33.3).text, Sum(Double(33.3)).text); - assert((const(Sum)(Int(42))).text == (const(Int)(42)).text, (const(Sum)(Int(42))).text); -} - -// string formatting -// Disabled in BetterC due to use of std.format.format -version (D_BetterC) {} else -@safe unittest -{ - import std.format : format; - - SumType!int x = 123; - - assert(format!"%s"(x) == format!"%s"(123)); - assert(format!"%x"(x) == format!"%x"(123)); -} - -// string formatting of qualified SumTypes -// Disabled in BetterC due to use of std.format.format and dynamic arrays -version (D_BetterC) {} else -@safe unittest -{ - import std.format : format; - - int[] a = [1, 2, 3]; - const(SumType!(int[])) x = a; - - assert(format!"%(%d, %)"(x) == format!"%(%s, %)"(a)); -} - -// Github issue #16 -// Disabled in BetterC due to use of dynamic arrays -version (D_BetterC) {} else -@safe unittest -{ - alias Node = SumType!(This[], string); - - // override inference of @system attribute for cyclic functions - assert((() @trusted => - Node([Node([Node("x")])]) - == - Node([Node([Node("x")])]) - )()); -} - -// Github issue #16 with const -// Disabled in BetterC due to use of dynamic arrays -version (D_BetterC) {} else -@safe unittest -{ - alias Node = SumType!(const(This)[], string); - - // override inference of @system attribute for cyclic functions - assert((() @trusted => - Node([Node([Node("x")])]) - == - Node([Node([Node("x")])]) - )()); -} - -// Stale pointers -// Disabled in BetterC due to use of dynamic arrays -version (D_BetterC) {} else -@system unittest -{ - alias MySum = SumType!(ubyte, void*[2]); - - MySum x = [null, cast(void*) 0x12345678]; - void** p = &x.get!(void*[2])[1]; - x = ubyte(123); - - assert(*p != cast(void*) 0x12345678); -} - -// Exception-safe assignment -// Disabled in BetterC due to use of exceptions -version (D_BetterC) {} else -@safe unittest -{ - static struct A - { - int value = 123; - } - - static struct B - { - int value = 456; - this(this) { throw new Exception("oops"); } - } - - alias MySum = SumType!(A, B); - - MySum x; - try - { - x = B(); - } - catch (Exception e) {} - - assert( - (x.tag == 0 && x.get!A.value == 123) || - (x.tag == 1 && x.get!B.value == 456) - ); -} - -// Types with @disable this(this) -@safe unittest -{ - import core.lifetime : move; - - static struct NoCopy - { - @disable this(this); - } - - alias MySum = SumType!NoCopy; - - NoCopy lval = NoCopy(); - - MySum x = NoCopy(); - MySum y = NoCopy(); - - - assert(!__traits(compiles, SumType!NoCopy(lval))); - - y = NoCopy(); - y = move(x); - assert(!__traits(compiles, y = lval)); - assert(!__traits(compiles, y = x)); - - bool b = x == y; -} - -// Github issue #22 -// Disabled in BetterC due to use of std.typecons.Nullable -version (D_BetterC) {} else -@safe unittest -{ - import std.typecons; - - static struct A - { - SumType!(Nullable!int) a = Nullable!int.init; - } -} - -// Static arrays of structs with postblits -// Disabled in BetterC due to use of dynamic arrays -version (D_BetterC) {} else -@safe unittest -{ - static struct S - { - int n; - this(this) { n++; } - } - - SumType!(S[1]) x = [S(0)]; - SumType!(S[1]) y = x; - - auto xval = x.get!(S[1])[0].n; - auto yval = y.get!(S[1])[0].n; - - assert(xval != yval); -} - -// Replacement does not happen inside SumType -// Disabled in BetterC due to use of associative arrays -version (D_BetterC) {} else -@safe unittest -{ - import std.typecons : Tuple, ReplaceTypeUnless; - alias A = Tuple!(This*,SumType!(This*))[SumType!(This*,string)[This]]; - alias TR = ReplaceTypeUnless!(isSumTypeInstance, This, int, A); - static assert(is(TR == Tuple!(int*,SumType!(This*))[SumType!(This*, string)[int]])); -} - -// Supports nested self-referential SumTypes -@safe unittest -{ - import std.typecons : Tuple, Flag; - alias Nat = SumType!(Flag!"0", Tuple!(This*)); - alias Inner = SumType!Nat; - alias Outer = SumType!(Nat*, Tuple!(This*, This*)); -} - -// Self-referential SumTypes inside Algebraic -// Disabled in BetterC due to use of std.variant.Algebraic -version (D_BetterC) {} else -@safe unittest -{ - import std.variant : Algebraic; - - alias T = Algebraic!(SumType!(This*)); - - assert(is(T.AllowedTypes[0].Types[0] == T.AllowedTypes[0]*)); -} - -// Doesn't call @system postblits in @safe code -@safe unittest -{ - static struct SystemCopy { @system this(this) {} } - SystemCopy original; - - assert(!__traits(compiles, () @safe - { - SumType!SystemCopy copy = original; - })); - - assert(!__traits(compiles, () @safe - { - SumType!SystemCopy copy; copy = original; - })); -} - -// Doesn't overwrite pointers in @safe code -@safe unittest -{ - alias MySum = SumType!(int*, int); - - MySum x; - - assert(!__traits(compiles, () @safe - { - x = 123; - })); - - assert(!__traits(compiles, () @safe - { - x = MySum(123); - })); -} - -// Types with invariants -// Disabled in BetterC due to use of exceptions -version (D_BetterC) {} else -version (D_Invariants) -@system unittest -{ - import std.exception : assertThrown; - import core.exception : AssertError; - - struct S - { - int i; - invariant { assert(i >= 0); } - } - - class C - { - int i; - invariant { assert(i >= 0); } - } - - SumType!S x; - x.match!((ref v) { v.i = -1; }); - assertThrown!AssertError(assert(&x)); - - SumType!C y = new C(); - y.match!((ref v) { v.i = -1; }); - assertThrown!AssertError(assert(&y)); -} - -// Calls value postblit on self-assignment -@safe unittest -{ - static struct S - { - int n; - this(this) { n++; } - } - - SumType!S x = S(); - SumType!S y; - y = x; - - auto xval = x.get!S.n; - auto yval = y.get!S.n; - - assert(xval != yval); -} - -// Github issue #29 -@safe unittest -{ - alias A = SumType!string; - - @safe A createA(string arg) - { - return A(arg); - } - - @safe void test() - { - A a = createA(""); - } -} - -// SumTypes as associative array keys -// Disabled in BetterC due to use of associative arrays -version (D_BetterC) {} else -@safe unittest -{ - int[SumType!(int, string)] aa; -} - -// toString with non-copyable types -// Disabled in BetterC due to use of std.conv.to (in toString) -version (D_BetterC) {} else -@safe unittest -{ - struct NoCopy - { - @disable this(this); - } - - SumType!NoCopy x; - - auto _ = x.toString(); -} - -// Can use the result of assignment -@safe unittest -{ - alias MySum = SumType!(int, float); - - MySum a = MySum(123); - MySum b = MySum(3.14); - - assert((a = b) == b); - assert((a = MySum(123)) == MySum(123)); - assert((a = 3.14) == MySum(3.14)); - assert(((a = b) = MySum(123)) == MySum(123)); -} - -// Types with copy constructors -@safe unittest -{ - static struct S - { - int n; - - this(ref return scope inout S other) inout - { - n = other.n + 1; - } - } - - SumType!S x = S(); - SumType!S y = x; - - auto xval = x.get!S.n; - auto yval = y.get!S.n; - - assert(xval != yval); -} - -// Copyable by generated copy constructors -@safe unittest -{ - static struct Inner - { - ref this(ref inout Inner other) {} - } - - static struct Outer - { - SumType!Inner inner; - } - - Outer x; - Outer y = x; -} - -// Types with qualified copy constructors -@safe unittest -{ - static struct ConstCopy - { - int n; - this(inout int n) inout { this.n = n; } - this(ref const typeof(this) other) const { this.n = other.n; } - } - - static struct ImmutableCopy - { - int n; - this(inout int n) inout { this.n = n; } - this(ref immutable typeof(this) other) immutable { this.n = other.n; } - } - - const SumType!ConstCopy x = const(ConstCopy)(1); - immutable SumType!ImmutableCopy y = immutable(ImmutableCopy)(1); -} - -// Types with disabled opEquals -@safe unittest -{ - static struct S - { - @disable bool opEquals(const S rhs) const; - } - - auto _ = SumType!S(S()); -} - -// Types with non-const opEquals -@safe unittest -{ - static struct S - { - int i; - bool opEquals(S rhs) { return i == rhs.i; } - } - - auto _ = SumType!S(S(123)); -} - -// Incomparability of different SumTypes -@safe unittest -{ - SumType!(int, string) x = 123; - SumType!(string, int) y = 123; - - assert(!__traits(compiles, x != y)); -} - -// Self-reference in return/parameter type of function pointer member -// Disabled in BetterC due to use of delegates -version (D_BetterC) {} else -@safe unittest -{ - alias T = SumType!(int, This delegate(This)); -} - -// Construction and assignment from implicitly-convertible lvalue -@safe unittest -{ - alias MySum = SumType!bool; - - const(bool) b = true; - - MySum x = b; - MySum y; y = b; -} - -// @safe assignment to the only pointer type in a SumType -@safe unittest -{ - SumType!(string, int) sm = 123; - sm = "this should be @safe"; -} - -// Immutable member type with copy constructor -// https://issues.dlang.org/show_bug.cgi?id=22572 -@safe unittest -{ - static struct CopyConstruct - { - this(ref inout CopyConstruct other) inout {} - } - - static immutable struct Value - { - CopyConstruct c; - } - - SumType!Value s; -} - -// Construction of inout-qualified SumTypes -// https://issues.dlang.org/show_bug.cgi?id=22901 -@safe unittest -{ - static inout(SumType!(int[])) example(inout(int[]) arr) - { - return inout(SumType!(int[]))(arr); - } -} - -// Assignment of struct with overloaded opAssign in CTFE -// https://issues.dlang.org/show_bug.cgi?id=23182 -@safe unittest -{ - static struct HasOpAssign - { - void opAssign(HasOpAssign rhs) {} - } - - static SumType!HasOpAssign test() - { - SumType!HasOpAssign s; - // Test both overloads - s = HasOpAssign(); - s = SumType!HasOpAssign(); - return s; - } - - // Force CTFE - enum result = test(); -} - -/// True if `T` is an instance of the `SumType` template, otherwise false. -private enum bool isSumTypeInstance(T) = is(T == SumType!Args, Args...); - -@safe unittest -{ - static struct Wrapper - { - SumType!int s; - alias s this; - } - - assert(isSumTypeInstance!(SumType!int)); - assert(!isSumTypeInstance!Wrapper); -} - -/// True if `T` is a [SumType] or implicitly converts to one, otherwise false. -enum bool isSumType(T) = is(T : SumType!Args, Args...); - -/// -@safe unittest -{ - static struct ConvertsToSumType - { - SumType!int payload; - alias payload this; - } - - static struct ContainsSumType - { - SumType!int payload; - } - - assert(isSumType!(SumType!int)); - assert(isSumType!ConvertsToSumType); - assert(!isSumType!ContainsSumType); -} - -/** - * Calls a type-appropriate function with the value held in a [SumType]. - * - * For each possible type the [SumType] can hold, the given handlers are - * checked, in order, to see whether they accept a single argument of that type. - * The first one that does is chosen as the match for that type. (Note that the - * first match may not always be the most exact match. - * See ["Avoiding unintentional matches"](#avoiding-unintentional-matches) for - * one common pitfall.) - * - * Every type must have a matching handler, and every handler must match at - * least one type. This is enforced at compile time. - * - * Handlers may be functions, delegates, or objects with `opCall` overloads. If - * a function with more than one overload is given as a handler, all of the - * overloads are considered as potential matches. - * - * Templated handlers are also accepted, and will match any type for which they - * can be [implicitly instantiated](https://dlang.org/glossary.html#ifti). See - * ["Introspection-based matching"](#introspection-based-matching) for an - * example of templated handler usage. - * - * If multiple [SumType]s are passed to match, their values are passed to the - * handlers as separate arguments, and matching is done for each possible - * combination of value types. See ["Multiple dispatch"](#multiple-dispatch) for - * an example. - * - * Returns: - * The value returned from the handler that matches the currently-held type. - * - * See_Also: $(REF visit, std,variant) - */ -template match(handlers...) -{ - import std.typecons : Yes; - - /** - * The actual `match` function. - * - * Params: - * args = One or more [SumType] objects. - */ - auto ref match(SumTypes...)(auto ref SumTypes args) - if (allSatisfy!(isSumType, SumTypes) && args.length > 0) - { - return matchImpl!(Yes.exhaustive, handlers)(args); - } -} - -/** $(DIVID avoiding-unintentional-matches, $(H3 Avoiding unintentional matches)) - * - * Sometimes, implicit conversions may cause a handler to match more types than - * intended. The example below shows two solutions to this problem. - */ -@safe unittest -{ - alias Number = SumType!(double, int); - - Number x; - - // Problem: because int implicitly converts to double, the double - // handler is used for both types, and the int handler never matches. - assert(!__traits(compiles, - x.match!( - (double d) => "got double", - (int n) => "got int" - ) - )); - - // Solution 1: put the handler for the "more specialized" type (in this - // case, int) before the handler for the type it converts to. - assert(__traits(compiles, - x.match!( - (int n) => "got int", - (double d) => "got double" - ) - )); - - // Solution 2: use a template that only accepts the exact type it's - // supposed to match, instead of any type that implicitly converts to it. - alias exactly(T, alias fun) = function (arg) - { - static assert(is(typeof(arg) == T)); - return fun(arg); - }; - - // Now, even if we put the double handler first, it will only be used for - // doubles, not ints. - assert(__traits(compiles, - x.match!( - exactly!(double, d => "got double"), - exactly!(int, n => "got int") - ) - )); -} - -/** $(DIVID multiple-dispatch, $(H3 Multiple dispatch)) - * - * Pattern matching can be performed on multiple `SumType`s at once by passing - * handlers with multiple arguments. This usually leads to more concise code - * than using nested calls to `match`, as show below. - */ -@safe unittest -{ - struct Point2D { double x, y; } - struct Point3D { double x, y, z; } - - alias Point = SumType!(Point2D, Point3D); - - version (none) - { - // This function works, but the code is ugly and repetitive. - // It uses three separate calls to match! - @safe pure nothrow @nogc - bool sameDimensions(Point p1, Point p2) - { - return p1.match!( - (Point2D _) => p2.match!( - (Point2D _) => true, - _ => false - ), - (Point3D _) => p2.match!( - (Point3D _) => true, - _ => false - ) - ); - } - } - - // This version is much nicer. - @safe pure nothrow @nogc - bool sameDimensions(Point p1, Point p2) - { - alias doMatch = match!( - (Point2D _1, Point2D _2) => true, - (Point3D _1, Point3D _2) => true, - (_1, _2) => false - ); - - return doMatch(p1, p2); - } - - Point a = Point2D(1, 2); - Point b = Point2D(3, 4); - Point c = Point3D(5, 6, 7); - Point d = Point3D(8, 9, 0); - - assert( sameDimensions(a, b)); - assert( sameDimensions(c, d)); - assert(!sameDimensions(a, c)); - assert(!sameDimensions(d, b)); -} - -/** - * Attempts to call a type-appropriate function with the value held in a - * [SumType], and throws on failure. - * - * Matches are chosen using the same rules as [match], but are not required to - * be exhaustive—in other words, a type (or combination of types) is allowed to - * have no matching handler. If a type without a handler is encountered at - * runtime, a [MatchException] is thrown. - * - * Not available when compiled with `-betterC`. - * - * Returns: - * The value returned from the handler that matches the currently-held type, - * if a handler was given for that type. - * - * Throws: - * [MatchException], if the currently-held type has no matching handler. - * - * See_Also: $(REF tryVisit, std,variant) - */ -version (D_Exceptions) -template tryMatch(handlers...) -{ - import std.typecons : No; - - /** - * The actual `tryMatch` function. - * - * Params: - * args = One or more [SumType] objects. - */ - auto ref tryMatch(SumTypes...)(auto ref SumTypes args) - if (allSatisfy!(isSumType, SumTypes) && args.length > 0) - { - return matchImpl!(No.exhaustive, handlers)(args); - } -} - -/** - * Thrown by [tryMatch] when an unhandled type is encountered. - * - * Not available when compiled with `-betterC`. - */ -version (D_Exceptions) -class MatchException : Exception -{ - /// - pure @safe @nogc nothrow - this(string msg, string file = __FILE__, size_t line = __LINE__) - { - super(msg, file, line); - } -} - -/** - * True if `handler` is a potential match for `Ts`, otherwise false. - * - * See the documentation for [match] for a full explanation of how matches are - * chosen. - */ -template canMatch(alias handler, Ts...) -if (Ts.length > 0) -{ - enum canMatch = is(typeof((ref Ts args) => handler(args))); -} - -/// -@safe unittest -{ - alias handleInt = (int i) => "got an int"; - - assert( canMatch!(handleInt, int)); - assert(!canMatch!(handleInt, string)); -} - -// Includes all overloads of the given handler -@safe unittest -{ - static struct OverloadSet - { - static void fun(int n) {} - static void fun(double d) {} - } - - assert(canMatch!(OverloadSet.fun, int)); - assert(canMatch!(OverloadSet.fun, double)); -} - -// Like aliasSeqOf!(iota(n)), but works in BetterC -private template Iota(size_t n) -{ - static if (n == 0) - { - alias Iota = AliasSeq!(); - } - else - { - alias Iota = AliasSeq!(Iota!(n - 1), n - 1); - } -} - -@safe unittest -{ - assert(is(Iota!0 == AliasSeq!())); - assert(Iota!1 == AliasSeq!(0)); - assert(Iota!3 == AliasSeq!(0, 1, 2)); -} - -/* The number that the dim-th argument's tag is multiplied by when - * converting TagTuples to and from case indices ("caseIds"). - * - * Named by analogy to the stride that the dim-th index into a - * multidimensional static array is multiplied by to calculate the - * offset of a specific element. - */ -private size_t stride(size_t dim, lengths...)() -{ - import core.checkedint : mulu; - - size_t result = 1; - bool overflow = false; - - static foreach (i; 0 .. dim) - { - result = mulu(result, lengths[i], overflow); - } - - /* The largest number matchImpl uses, numCases, is calculated with - * stride!(SumTypes.length), so as long as this overflow check - * passes, we don't need to check for overflow anywhere else. - */ - assert(!overflow, "Integer overflow"); - return result; -} - -private template matchImpl(Flag!"exhaustive" exhaustive, handlers...) -{ - auto ref matchImpl(SumTypes...)(auto ref SumTypes args) - if (allSatisfy!(isSumType, SumTypes) && args.length > 0) - { - alias stride(size_t i) = .stride!(i, Map!(typeCount, SumTypes)); - alias TagTuple = .TagTuple!(SumTypes); - - /* - * A list of arguments to be passed to a handler needed for the case - * labeled with `caseId`. - */ - template handlerArgs(size_t caseId) - { - enum tags = TagTuple.fromCaseId(caseId); - enum argsFrom(size_t i : tags.length) = ""; - enum argsFrom(size_t i) = "args[" ~ toCtString!i ~ "].get!(SumTypes[" ~ toCtString!i ~ "]" ~ - ".Types[" ~ toCtString!(tags[i]) ~ "])(), " ~ argsFrom!(i + 1); - enum handlerArgs = argsFrom!0; - } - - /* An AliasSeq of the types of the member values in the argument list - * returned by `handlerArgs!caseId`. - * - * Note that these are the actual (that is, qualified) types of the - * member values, which may not be the same as the types listed in - * the arguments' `.Types` properties. - */ - template valueTypes(size_t caseId) - { - enum tags = TagTuple.fromCaseId(caseId); - - template getType(size_t i) - { - enum tid = tags[i]; - alias T = SumTypes[i].Types[tid]; - alias getType = typeof(args[i].get!T()); - } - - alias valueTypes = Map!(getType, Iota!(tags.length)); - } - - /* The total number of cases is - * - * Π SumTypes[i].Types.length for 0 ≤ i < SumTypes.length - * - * Or, equivalently, - * - * ubyte[SumTypes[0].Types.length]...[SumTypes[$-1].Types.length].sizeof - * - * Conveniently, this is equal to stride!(SumTypes.length), so we can - * use that function to compute it. - */ - enum numCases = stride!(SumTypes.length); - - /* Guaranteed to never be a valid handler index, since - * handlers.length <= size_t.max. - */ - enum noMatch = size_t.max; - - // An array that maps caseIds to handler indices ("hids"). - enum matches = () - { - size_t[numCases] matches; - - // Workaround for https://issues.dlang.org/show_bug.cgi?id=19561 - foreach (ref match; matches) - { - match = noMatch; - } - - static foreach (caseId; 0 .. numCases) - { - static foreach (hid, handler; handlers) - { - static if (canMatch!(handler, valueTypes!caseId)) - { - if (matches[caseId] == noMatch) - { - matches[caseId] = hid; - } - } - } - } - - return matches; - }(); - - import std.algorithm.searching : canFind; - - // Check for unreachable handlers - static foreach (hid, handler; handlers) - { - static assert(matches[].canFind(hid), - "`handlers[" ~ toCtString!hid ~ "]` " ~ - "of type `" ~ ( __traits(isTemplate, handler) - ? "template" - : typeof(handler).stringof - ) ~ "` " ~ - "never matches" - ); - } - - // Workaround for https://issues.dlang.org/show_bug.cgi?id=19993 - enum handlerName(size_t hid) = "handler" ~ toCtString!hid; - - static foreach (size_t hid, handler; handlers) - { - mixin("alias ", handlerName!hid, " = handler;"); - } - - immutable argsId = TagTuple(args).toCaseId; - - final switch (argsId) - { - static foreach (caseId; 0 .. numCases) - { - case caseId: - static if (matches[caseId] != noMatch) - { - return mixin(handlerName!(matches[caseId]), "(", handlerArgs!caseId, ")"); - } - else - { - static if (exhaustive) - { - static assert(false, - "No matching handler for types `" ~ valueTypes!caseId.stringof ~ "`"); - } - else - { - throw new MatchException( - "No matching handler for types `" ~ valueTypes!caseId.stringof ~ "`"); - } - } - } - } - - assert(false, "unreachable"); - } -} - -private enum typeCount(SumType) = SumType.Types.length; - -/* A TagTuple represents a single possible set of tags that `args` - * could have at runtime. - * - * Because D does not allow a struct to be the controlling expression - * of a switch statement, we cannot dispatch on the TagTuple directly. - * Instead, we must map each TagTuple to a unique integer and generate - * a case label for each of those integers. - * - * This mapping is implemented in `fromCaseId` and `toCaseId`. It uses - * the same technique that's used to map index tuples to memory offsets - * in a multidimensional static array. - * - * For example, when `args` consists of two SumTypes with two member - * types each, the TagTuples corresponding to each case label are: - * - * case 0: TagTuple([0, 0]) - * case 1: TagTuple([1, 0]) - * case 2: TagTuple([0, 1]) - * case 3: TagTuple([1, 1]) - * - * When there is only one argument, the caseId is equal to that - * argument's tag. - */ -private struct TagTuple(SumTypes...) -{ - size_t[SumTypes.length] tags; - alias tags this; - - alias stride(size_t i) = .stride!(i, Map!(typeCount, SumTypes)); - - invariant - { - static foreach (i; 0 .. tags.length) - { - assert(tags[i] < SumTypes[i].Types.length, "Invalid tag"); - } - } - - this(ref const(SumTypes) args) - { - static foreach (i; 0 .. tags.length) - { - tags[i] = args[i].tag; - } - } - - static TagTuple fromCaseId(size_t caseId) - { - TagTuple result; - - // Most-significant to least-significant - static foreach_reverse (i; 0 .. result.length) - { - result[i] = caseId / stride!i; - caseId %= stride!i; - } - - return result; - } - - size_t toCaseId() - { - size_t result; - - static foreach (i; 0 .. tags.length) - { - result += tags[i] * stride!i; - } - - return result; - } -} - -// Matching -@safe unittest -{ - alias MySum = SumType!(int, float); - - MySum x = MySum(42); - MySum y = MySum(3.14); - - assert(x.match!((int v) => true, (float v) => false)); - assert(y.match!((int v) => false, (float v) => true)); -} - -// Missing handlers -@safe unittest -{ - alias MySum = SumType!(int, float); - - MySum x = MySum(42); - - assert(!__traits(compiles, x.match!((int x) => true))); - assert(!__traits(compiles, x.match!())); -} - -// Handlers with qualified parameters -// Disabled in BetterC due to use of dynamic arrays -version (D_BetterC) {} else -@safe unittest -{ - alias MySum = SumType!(int[], float[]); - - MySum x = MySum([1, 2, 3]); - MySum y = MySum([1.0, 2.0, 3.0]); - - assert(x.match!((const(int[]) v) => true, (const(float[]) v) => false)); - assert(y.match!((const(int[]) v) => false, (const(float[]) v) => true)); -} - -// Handlers for qualified types -// Disabled in BetterC due to use of dynamic arrays -version (D_BetterC) {} else -@safe unittest -{ - alias MySum = SumType!(immutable(int[]), immutable(float[])); - - MySum x = MySum([1, 2, 3]); - - assert(x.match!((immutable(int[]) v) => true, (immutable(float[]) v) => false)); - assert(x.match!((const(int[]) v) => true, (const(float[]) v) => false)); - // Tail-qualified parameters - assert(x.match!((immutable(int)[] v) => true, (immutable(float)[] v) => false)); - assert(x.match!((const(int)[] v) => true, (const(float)[] v) => false)); - // Generic parameters - assert(x.match!((immutable v) => true)); - assert(x.match!((const v) => true)); - // Unqualified parameters - assert(!__traits(compiles, - x.match!((int[] v) => true, (float[] v) => false) - )); -} - -// Delegate handlers -// Disabled in BetterC due to use of closures -version (D_BetterC) {} else -@safe unittest -{ - alias MySum = SumType!(int, float); - - int answer = 42; - MySum x = MySum(42); - MySum y = MySum(3.14); - - assert(x.match!((int v) => v == answer, (float v) => v == answer)); - assert(!y.match!((int v) => v == answer, (float v) => v == answer)); -} - -version (unittest) -{ - version (D_BetterC) - { - // std.math.isClose depends on core.runtime.math, so use a - // libc-based version for testing with -betterC - @safe pure @nogc nothrow - private bool isClose(double lhs, double rhs) - { - import core.stdc.math : fabs; - - return fabs(lhs - rhs) < 1e-5; - } - } - else - { - import std.math : isClose; - } -} - -// Generic handler -@safe unittest -{ - alias MySum = SumType!(int, float); - - MySum x = MySum(42); - MySum y = MySum(3.14); - - assert(x.match!(v => v*2) == 84); - assert(y.match!(v => v*2).isClose(6.28)); -} - -// Fallback to generic handler -// Disabled in BetterC due to use of std.conv.to -version (D_BetterC) {} else -@safe unittest -{ - import std.conv : to; - - alias MySum = SumType!(int, float, string); - - MySum x = MySum(42); - MySum y = MySum("42"); - - assert(x.match!((string v) => v.to!int, v => v*2) == 84); - assert(y.match!((string v) => v.to!int, v => v*2) == 42); -} - -// Multiple non-overlapping generic handlers -@safe unittest -{ - import std.array : staticArray; - - alias MySum = SumType!(int, float, int[], char[]); - - static ints = staticArray([1, 2, 3]); - static chars = staticArray(['a', 'b', 'c']); - - MySum x = MySum(42); - MySum y = MySum(3.14); - MySum z = MySum(ints[]); - MySum w = MySum(chars[]); - - assert(x.match!(v => v*2, v => v.length) == 84); - assert(y.match!(v => v*2, v => v.length).isClose(6.28)); - assert(w.match!(v => v*2, v => v.length) == 3); - assert(z.match!(v => v*2, v => v.length) == 3); -} - -// Structural matching -@safe unittest -{ - static struct S1 { int x; } - static struct S2 { int y; } - alias MySum = SumType!(S1, S2); - - MySum a = MySum(S1(0)); - MySum b = MySum(S2(0)); - - assert(a.match!(s1 => s1.x + 1, s2 => s2.y - 1) == 1); - assert(b.match!(s1 => s1.x + 1, s2 => s2.y - 1) == -1); -} - -// Separate opCall handlers -@safe unittest -{ - static struct IntHandler - { - bool opCall(int arg) - { - return true; - } - } - - static struct FloatHandler - { - bool opCall(float arg) - { - return false; - } - } - - alias MySum = SumType!(int, float); - - MySum x = MySum(42); - MySum y = MySum(3.14); - - assert(x.match!(IntHandler.init, FloatHandler.init)); - assert(!y.match!(IntHandler.init, FloatHandler.init)); -} - -// Compound opCall handler -@safe unittest -{ - static struct CompoundHandler - { - bool opCall(int arg) - { - return true; - } - - bool opCall(float arg) - { - return false; - } - } - - alias MySum = SumType!(int, float); - - MySum x = MySum(42); - MySum y = MySum(3.14); - - assert(x.match!(CompoundHandler.init)); - assert(!y.match!(CompoundHandler.init)); -} - -// Ordered matching -@safe unittest -{ - alias MySum = SumType!(int, float); - - MySum x = MySum(42); - - assert(x.match!((int v) => true, v => false)); -} - -// Non-exhaustive matching -version (D_Exceptions) -@system unittest -{ - import std.exception : assertThrown, assertNotThrown; - - alias MySum = SumType!(int, float); - - MySum x = MySum(42); - MySum y = MySum(3.14); - - assertNotThrown!MatchException(x.tryMatch!((int n) => true)); - assertThrown!MatchException(y.tryMatch!((int n) => true)); -} - -// Non-exhaustive matching in @safe code -version (D_Exceptions) -@safe unittest -{ - SumType!(int, float) x; - - auto _ = x.tryMatch!( - (int n) => n + 1, - ); -} - -// Handlers with ref parameters -@safe unittest -{ - alias Value = SumType!(long, double); - - auto value = Value(3.14); - - value.match!( - (long) {}, - (ref double d) { d *= 2; } - ); - - assert(value.get!double.isClose(6.28)); -} - -// Unreachable handlers -@safe unittest -{ - alias MySum = SumType!(int, string); - - MySum s; - - assert(!__traits(compiles, - s.match!( - (int _) => 0, - (string _) => 1, - (double _) => 2 - ) - )); - - assert(!__traits(compiles, - s.match!( - _ => 0, - (int _) => 1 - ) - )); -} - -// Unsafe handlers -@system unittest -{ - SumType!int x; - alias unsafeHandler = (int x) @system { return; }; - - assert(!__traits(compiles, () @safe - { - x.match!unsafeHandler; - })); - - auto test() @system - { - return x.match!unsafeHandler; - } -} - -// Overloaded handlers -@safe unittest -{ - static struct OverloadSet - { - static string fun(int i) { return "int"; } - static string fun(double d) { return "double"; } - } - - alias MySum = SumType!(int, double); - - MySum a = 42; - MySum b = 3.14; - - assert(a.match!(OverloadSet.fun) == "int"); - assert(b.match!(OverloadSet.fun) == "double"); -} - -// Overload sets that include SumType arguments -@safe unittest -{ - alias Inner = SumType!(int, double); - alias Outer = SumType!(Inner, string); - - static struct OverloadSet - { - @safe: - static string fun(int i) { return "int"; } - static string fun(double d) { return "double"; } - static string fun(string s) { return "string"; } - static string fun(Inner i) { return i.match!fun; } - static string fun(Outer o) { return o.match!fun; } - } - - Outer a = Inner(42); - Outer b = Inner(3.14); - Outer c = "foo"; - - assert(OverloadSet.fun(a) == "int"); - assert(OverloadSet.fun(b) == "double"); - assert(OverloadSet.fun(c) == "string"); -} - -// Overload sets with ref arguments -@safe unittest -{ - static struct OverloadSet - { - static void fun(ref int i) { i = 42; } - static void fun(ref double d) { d = 3.14; } - } - - alias MySum = SumType!(int, double); - - MySum x = 0; - MySum y = 0.0; - - x.match!(OverloadSet.fun); - y.match!(OverloadSet.fun); - - assert(x.match!((value) => is(typeof(value) == int) && value == 42)); - assert(y.match!((value) => is(typeof(value) == double) && value == 3.14)); -} - -// Overload sets with templates -@safe unittest -{ - import std.traits : isNumeric; - - static struct OverloadSet - { - static string fun(string arg) - { - return "string"; - } - - static string fun(T)(T arg) - if (isNumeric!T) - { - return "numeric"; - } - } - - alias MySum = SumType!(int, string); - - MySum x = 123; - MySum y = "hello"; - - assert(x.match!(OverloadSet.fun) == "numeric"); - assert(y.match!(OverloadSet.fun) == "string"); -} - -// Github issue #24 -@safe unittest -{ - void test() @nogc - { - int acc = 0; - SumType!int(1).match!((int x) => acc += x); - } -} - -// Github issue #31 -@safe unittest -{ - void test() @nogc - { - int acc = 0; - - SumType!(int, string)(1).match!( - (int x) => acc += x, - (string _) => 0, - ); - } -} - -// Types that `alias this` a SumType -@safe unittest -{ - static struct A {} - static struct B {} - static struct D { SumType!(A, B) value; alias value this; } - - auto _ = D().match!(_ => true); -} - -// Multiple dispatch -@safe unittest -{ - alias MySum = SumType!(int, string); - - static int fun(MySum x, MySum y) - { - import std.meta : Args = AliasSeq; - - return Args!(x, y).match!( - (int xv, int yv) => 0, - (string xv, int yv) => 1, - (int xv, string yv) => 2, - (string xv, string yv) => 3 - ); - } - - assert(fun(MySum(0), MySum(0)) == 0); - assert(fun(MySum(""), MySum(0)) == 1); - assert(fun(MySum(0), MySum("")) == 2); - assert(fun(MySum(""), MySum("")) == 3); -} - -// inout SumTypes -@safe unittest -{ - inout(int[]) fun(inout(SumType!(int[])) x) - { - return x.match!((inout(int[]) a) => a); - } -} - -private void destroyIfOwner(T)(ref T value) -{ - static if (hasElaborateDestructor!T) - { - destroy(value); - } -} diff --git a/source/dyaml/style.d b/source/dyaml/style.d deleted file mode 100644 index 319592c..0000000 --- a/source/dyaml/style.d +++ /dev/null @@ -1,37 +0,0 @@ - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -///YAML node formatting styles. -module dyaml.style; - - -///Scalar styles. -enum ScalarStyle : ubyte -{ - /// Invalid (uninitialized) style - invalid = 0, - /// `|` (Literal block style) - literal, - /// `>` (Folded block style) - folded, - /// Plain scalar - plain, - /// Single quoted scalar - singleQuoted, - /// Double quoted scalar - doubleQuoted -} - -///Collection styles. -enum CollectionStyle : ubyte -{ - /// Invalid (uninitialized) style - invalid = 0, - /// Block style. - block, - /// Flow style. - flow -} diff --git a/source/dyaml/tagdirective.d b/source/dyaml/tagdirective.d deleted file mode 100644 index 54687fe..0000000 --- a/source/dyaml/tagdirective.d +++ /dev/null @@ -1,15 +0,0 @@ - -// Copyright Ferdinand Majerech 2011. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -///Tag directives. -module dyaml.tagdirective; - -///Single tag directive. handle is the shortcut, prefix is the prefix that replaces it. -struct TagDirective -{ - string handle; - string prefix; -} diff --git a/source/dyaml/token.d b/source/dyaml/token.d deleted file mode 100644 index 5400a3f..0000000 --- a/source/dyaml/token.d +++ /dev/null @@ -1,172 +0,0 @@ - -// Copyright Ferdinand Majerech 2011-2014. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -/// YAML tokens. -/// Code based on PyYAML: http://www.pyyaml.org -module dyaml.token; - - -import std.conv; - -import dyaml.encoding; -import dyaml.exception; -import dyaml.reader; -import dyaml.style; - - -package: - -/// Token types. -enum TokenID : ubyte -{ - // Invalid (uninitialized) token - invalid = 0, - directive, - documentStart, - documentEnd, - streamStart, - streamEnd, - blockSequenceStart, - blockMappingStart, - blockEnd, - flowSequenceStart, - flowMappingStart, - flowSequenceEnd, - flowMappingEnd, - key, - value, - blockEntry, - flowEntry, - alias_, - anchor, - tag, - scalar -} - -/// Specifies the type of a tag directive token. -enum DirectiveType : ubyte -{ - // YAML version directive. - yaml, - // Tag directive. - tag, - // Any other directive is "reserved" for future YAML versions. - reserved -} - -/// Token produced by scanner. -/// -/// 32 bytes on 64-bit. -struct Token -{ - @disable int opCmp(ref Token); - - // 16B - /// Value of the token, if any. - /// - /// Values are char[] instead of string, as Parser may still change them in a few - /// cases. Parser casts values to strings when producing Events. - char[] value; - // 4B - /// Start position of the token in file/stream. - Mark startMark; - // 4B - /// End position of the token in file/stream. - Mark endMark; - // 1B - /// Token type. - TokenID id; - // 1B - /// Style of scalar token, if this is a scalar token. - ScalarStyle style; - // 1B - /// Encoding, if this is a stream start token. - Encoding encoding; - // 1B - /// Type of directive for directiveToken. - DirectiveType directive; - // 4B - /// Used to split value into 2 substrings for tokens that need 2 values (tagToken) - uint valueDivider; - - /// Get string representation of the token ID. - @property string idString() @safe pure const {return id.to!string;} -} - -/// Construct a directive token. -/// -/// Params: start = Start position of the token. -/// end = End position of the token. -/// value = Value of the token. -/// directive = Directive type (YAML or TAG in YAML 1.1). -/// nameEnd = Position of the end of the name -Token directiveToken(const Mark start, const Mark end, char[] value, - DirectiveType directive, const uint nameEnd) @safe pure nothrow @nogc -{ - return Token(value, start, end, TokenID.directive, ScalarStyle.init, Encoding.init, - directive, nameEnd); -} - -/// Construct a simple (no value) token with specified type. -/// -/// Params: id = Type of the token. -/// start = Start position of the token. -/// end = End position of the token. -Token simpleToken(TokenID id)(const Mark start, const Mark end) -{ - return Token(null, start, end, id); -} - -/// Construct a stream start token. -/// -/// Params: start = Start position of the token. -/// end = End position of the token. -/// encoding = Encoding of the stream. -Token streamStartToken(const Mark start, const Mark end, const Encoding encoding) @safe pure nothrow @nogc -{ - return Token(null, start, end, TokenID.streamStart, ScalarStyle.invalid, encoding); -} - -/// Aliases for construction of simple token types. -alias streamEndToken = simpleToken!(TokenID.streamEnd); -alias blockSequenceStartToken = simpleToken!(TokenID.blockSequenceStart); -alias blockMappingStartToken = simpleToken!(TokenID.blockMappingStart); -alias blockEndToken = simpleToken!(TokenID.blockEnd); -alias keyToken = simpleToken!(TokenID.key); -alias valueToken = simpleToken!(TokenID.value); -alias blockEntryToken = simpleToken!(TokenID.blockEntry); -alias flowEntryToken = simpleToken!(TokenID.flowEntry); - -/// Construct a simple token with value with specified type. -/// -/// Params: id = Type of the token. -/// start = Start position of the token. -/// end = End position of the token. -/// value = Value of the token. -/// valueDivider = A hack for TagToken to store 2 values in value; the first -/// value goes up to valueDivider, the second after it. -Token simpleValueToken(TokenID id)(const Mark start, const Mark end, char[] value, - const uint valueDivider = uint.max) -{ - return Token(value, start, end, id, ScalarStyle.invalid, Encoding.init, - DirectiveType.init, valueDivider); -} - -/// Alias for construction of tag token. -alias tagToken = simpleValueToken!(TokenID.tag); -alias aliasToken = simpleValueToken!(TokenID.alias_); -alias anchorToken = simpleValueToken!(TokenID.anchor); - -/// Construct a scalar token. -/// -/// Params: start = Start position of the token. -/// end = End position of the token. -/// value = Value of the token. -/// style = Style of the token. -Token scalarToken(const Mark start, const Mark end, char[] value, const ScalarStyle style) @safe pure nothrow @nogc -{ - return Token(value, start, end, TokenID.scalar, style); -} diff --git a/source/tinyendian.d b/source/tinyendian.d deleted file mode 100644 index 731b048..0000000 --- a/source/tinyendian.d +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright Ferdinand Majerech 2014. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -/// A minimal library providing functionality for changing the endianness of data. -module tinyendian; - -import std.system : Endian, endian; - -/// Unicode UTF encodings. -enum UTFEncoding : ubyte -{ - UTF_8, - UTF_16, - UTF_32 -} -/// -@safe unittest -{ - const ints = [314, -101]; - int[2] intsSwapBuffer = ints; - swapByteOrder(intsSwapBuffer[]); - swapByteOrder(intsSwapBuffer[]); - assert(ints == intsSwapBuffer, "Lost information when swapping byte order"); - - const floats = [3.14f, 10.1f]; - float[2] floatsSwapBuffer = floats; - swapByteOrder(floatsSwapBuffer[]); - swapByteOrder(floatsSwapBuffer[]); - assert(floats == floatsSwapBuffer, "Lost information when swapping byte order"); -} - -/** Swap byte order of items in an array in place. - * - * Params: - * - * T = Item type. Must be either 2 or 4 bytes long. - * array = Buffer with values to fix byte order of. - */ -void swapByteOrder(T)(T[] array) @trusted @nogc pure nothrow -if (T.sizeof == 2 || T.sizeof == 4) -{ - // Swap the byte order of all read characters. - foreach (ref item; array) - { - static if (T.sizeof == 2) - { - import std.algorithm.mutation : swap; - swap(*cast(ubyte*)&item, *(cast(ubyte*)&item + 1)); - } - else static if (T.sizeof == 4) - { - import core.bitop : bswap; - const swapped = bswap(*cast(uint*)&item); - item = *cast(const(T)*)&swapped; - } - else static assert(false, "Unsupported T: " ~ T.stringof); - } -} - -/// See fixUTFByteOrder. -struct FixUTFByteOrderResult -{ - ubyte[] array; - UTFEncoding encoding; - Endian endian; - uint bytesStripped = 0; -} - -/** Convert byte order of an array encoded in UTF(8/16/32) to system endianness in place. - * - * Uses the UTF byte-order-mark (BOM) to determine UTF encoding. If there is no BOM - * at the beginning of array, UTF-8 is assumed (this is compatible with ASCII). The - * BOM, if any, will be removed from the buffer. - * - * If the encoding is determined to be UTF-16 or UTF-32 and there aren't enough bytes - * for the last code unit (i.e. if array.length is odd for UTF-16 or not divisible by - * 4 for UTF-32), the extra bytes (1 for UTF-16, 1-3 for UTF-32) are stripped. - * - * Note that this function does $(B not) check if the array is a valid UTF string. It - * only works with the BOM and 1,2 or 4-byte items. - * - * Params: - * - * array = The array with UTF-data. - * - * Returns: - * - * A struct with the following members: - * - * $(D ubyte[] array) A slice of the input array containing data in correct - * byte order, without BOM and in case of UTF-16/UTF-32, - * without stripped bytes, if any. - * $(D UTFEncoding encoding) Encoding of the result (UTF-8, UTF-16 or UTF-32) - * $(D std.system.Endian endian) Endianness of the original array. - * $(D uint bytesStripped) Number of bytes stripped from a UTF-16/UTF-32 array, if - * any. This is non-zero only if array.length was not - * divisible by 2 or 4 for UTF-16 and UTF-32, respectively. - * - * Complexity: (BIGOH array.length) - */ -auto fixUTFByteOrder(ubyte[] array) @safe @nogc pure nothrow -{ - // Enumerates UTF BOMs, matching indices to byteOrderMarks/bomEndian. - enum BOM: ubyte - { - UTF_8 = 0, - UTF_16_LE = 1, - UTF_16_BE = 2, - UTF_32_LE = 3, - UTF_32_BE = 4, - None = ubyte.max - } - - // These 2 are from std.stream - static immutable ubyte[][5] byteOrderMarks = [ [0xEF, 0xBB, 0xBF], - [0xFF, 0xFE], - [0xFE, 0xFF], - [0xFF, 0xFE, 0x00, 0x00], - [0x00, 0x00, 0xFE, 0xFF] ]; - static immutable Endian[5] bomEndian = [ endian, - Endian.littleEndian, - Endian.bigEndian, - Endian.littleEndian, - Endian.bigEndian ]; - - // Documented in function ddoc. - - FixUTFByteOrderResult result; - - // Detect BOM, if any, in the bytes we've read. -1 means no BOM. - // Need the last match: First 2 bytes of UTF-32LE BOM match the UTF-16LE BOM. If we - // used the first match, UTF-16LE would be detected when we have a UTF-32LE BOM. - import std.algorithm.searching : startsWith; - BOM bomId = BOM.None; - foreach (i, bom; byteOrderMarks) - if (array.startsWith(bom)) - bomId = cast(BOM)i; - - result.endian = (bomId != BOM.None) ? bomEndian[bomId] : Endian.init; - - // Start of UTF data (after BOM, if any) - size_t start = 0; - // If we've read more than just the BOM, put the rest into the array. - with(BOM) final switch(bomId) - { - case None: result.encoding = UTFEncoding.UTF_8; break; - case UTF_8: - start = 3; - result.encoding = UTFEncoding.UTF_8; - break; - case UTF_16_LE, UTF_16_BE: - result.bytesStripped = array.length % 2; - start = 2; - result.encoding = UTFEncoding.UTF_16; - break; - case UTF_32_LE, UTF_32_BE: - result.bytesStripped = array.length % 4; - start = 4; - result.encoding = UTFEncoding.UTF_32; - break; - } - - // If there's a BOM, we need to move data back to ensure it starts at array[0] - if (start != 0) - { - array = array[start .. $ - result.bytesStripped]; - } - - // We enforce above that array.length is divisible by 2/4 for UTF-16/32 - if (endian != result.endian) - { - if (result.encoding == UTFEncoding.UTF_16) - swapByteOrder(cast(wchar[])array); - else if (result.encoding == UTFEncoding.UTF_32) - swapByteOrder(cast(dchar[])array); - } - - result.array = array; - return result; -} -/// -@safe unittest -{ - { - ubyte[] s = [0xEF, 0xBB, 0xBF, 'a']; - FixUTFByteOrderResult r = fixUTFByteOrder(s); - assert(r.encoding == UTFEncoding.UTF_8); - assert(r.array.length == 1); - assert(r.array == ['a']); - assert(r.endian == Endian.littleEndian); - } - - { - ubyte[] s = ['a']; - FixUTFByteOrderResult r = fixUTFByteOrder(s); - assert(r.encoding == UTFEncoding.UTF_8); - assert(r.array.length == 1); - assert(r.array == ['a']); - assert(r.endian == Endian.bigEndian); - } - - { - // strip 'a' b/c not complete unit - ubyte[] s = [0xFE, 0xFF, 'a']; - FixUTFByteOrderResult r = fixUTFByteOrder(s); - assert(r.encoding == UTFEncoding.UTF_16); - assert(r.array.length == 0); - assert(r.endian == Endian.bigEndian); - } - -}