diff --git a/build-files.txt b/build-files.txt index d6b2445..ed0aaa7 100644 --- a/build-files.txt +++ b/build-files.txt @@ -1,34 +1,26 @@ source/app.d +source/configy/Attributes.d +source/configy/Exceptions.d +source/configy/Read.d +source/configy/Utils.d source/dub/commandline.d -source/dub/dependency.d -source/dub/dependencyresolver.d -source/dub/description.d -source/dub/dub.d -source/dub/init.d -source/dub/packagemanager.d -source/dub/packagesupplier.d -source/dub/packagesuppliers/package.d -source/dub/packagesuppliers/fallback.d -source/dub/packagesuppliers/filesystem.d -source/dub/packagesuppliers/packagesupplier.d -source/dub/packagesuppliers/maven.d -source/dub/packagesuppliers/registry.d -source/dub/package_.d -source/dub/platform.d -source/dub/project.d -source/dub/semver.d source/dub/compilers/buildsettings.d source/dub/compilers/compiler.d source/dub/compilers/dmd.d source/dub/compilers/gdc.d source/dub/compilers/ldc.d source/dub/compilers/utils.d +source/dub/dependency.d +source/dub/dependencyresolver.d +source/dub/description.d +source/dub/dub.d source/dub/generators/build.d source/dub/generators/cmake.d source/dub/generators/generator.d source/dub/generators/sublimetext.d source/dub/generators/targetdescription.d source/dub/generators/visuald.d +source/dub/init.d source/dub/internal/git.d source/dub/internal/libInputVisitor.d source/dub/internal/sdlang/ast.d @@ -48,8 +40,45 @@ source/dub/internal/vibecompat/data/utils.d source/dub/internal/vibecompat/inet/path.d source/dub/internal/vibecompat/inet/url.d +source/dub/package_.d +source/dub/packagemanager.d +source/dub/packagesupplier.d +source/dub/packagesuppliers/fallback.d +source/dub/packagesuppliers/filesystem.d +source/dub/packagesuppliers/maven.d +source/dub/packagesuppliers/package.d +source/dub/packagesuppliers/packagesupplier.d +source/dub/packagesuppliers/registry.d +source/dub/platform.d +source/dub/project.d source/dub/recipe/io.d source/dub/recipe/json.d source/dub/recipe/packagerecipe.d source/dub/recipe/selection.d source/dub/recipe/sdl.d +source/dub/semver.d +source/dub/version_.d +source/dyaml/composer.d +source/dyaml/constructor.d +source/dyaml/dumper.d +source/dyaml/emitter.d +source/dyaml/encoding.d +source/dyaml/escapes.d +source/dyaml/event.d +source/dyaml/exception.d +source/dyaml/linebreak.d +source/dyaml/loader.d +source/dyaml/node.d +source/dyaml/package.d +source/dyaml/parser.d +source/dyaml/queue.d +source/dyaml/reader.d +source/dyaml/representer.d +source/dyaml/resolver.d +source/dyaml/scanner.d +source/dyaml/serializer.d +source/dyaml/stdsumtype.d +source/dyaml/style.d +source/dyaml/tagdirective.d +source/dyaml/token.d +source/tinyendian.d diff --git a/scripts/ci/travis.sh b/scripts/ci/travis.sh index 067d848..bbca31f 100755 --- a/scripts/ci/travis.sh +++ b/scripts/ci/travis.sh @@ -9,7 +9,7 @@ export DMD="$(command -v $DMD)" if [ "$FRONTEND" \> 2.087.z ]; then - ./build.d -preview=dip1000 -w -g -debug + ./build.d -preview=dip1000 -preview=in -w -g -debug fi function clean() { diff --git a/source/configy/Attributes.d b/source/configy/Attributes.d new file mode 100644 index 0000000..a3bcbe0 --- /dev/null +++ b/source/configy/Attributes.d @@ -0,0 +1,283 @@ +/******************************************************************************* + + Define UDAs that can be applied to a configuration struct + + This module is stand alone (a leaf module) to allow importing the UDAs + without importing the whole configuration parsing code. + + Copyright: + Copyright (c) 2019-2022 BOSAGORA Foundation + All rights reserved. + + License: + MIT License. See LICENSE for details. + +*******************************************************************************/ + +module configy.Attributes; + +import std.traits; + +/******************************************************************************* + + An optional parameter with an initial value of `T.init` + + The config parser automatically recognize non-default initializer, + so that the following: + ``` + public struct Config + { + public string greeting = "Welcome home"; + } + ``` + Will not error out if `greeting` is not defined in the config file. + However, this relies on the initializer of the field (`greeting`) being + different from the type initializer (`string.init` is `null`). + In some cases, the default value is also the desired initializer, e.g.: + ``` + public struct Config + { + /// Maximum number of connections. 0 means unlimited. + public uint connections_limit = 0; + } + ``` + In this case, one can add `@Optional` to the field to inform the parser. + +*******************************************************************************/ + +public struct Optional {} + +/******************************************************************************* + + Inform the config filler that this sequence is to be read as a mapping + + On some occasions, one might want to read a mapping as an array. + One reason to do so may be to provide a better experience to the user, + e.g. having to type: + ``` + interfaces: + eth0: + ip: "192.168.0.1" + private: true + wlan0: + ip: "1.2.3.4" + ``` + Instead of the slightly more verbose: + ``` + interfaces: + - name: eth0 + ip: "192.168.0.1" + private: true + - name: wlan0 + ip: "1.2.3.4" + ``` + + The former would require to be expressed as an associative arrays. + However, one major drawback of associative arrays is that they can't have + an initializer, which makes them cumbersome to use in the context of the + config filler. To remediate this issue, one may use `@Key("name")` + on a field (here, `interfaces`) so that the mapping is flattened + to an array. If `name` is `null`, the key will be discarded. + +*******************************************************************************/ + +public struct Key +{ + /// + public string name; +} + +/******************************************************************************* + + Look up the provided name in the YAML node, instead of the field name. + + By default, the config filler will look up the field name of a mapping in + the YAML node. If this is not desired, an explicit `Name` attribute can + be given. This is especially useful for names which are keyword. + + ``` + public struct Config + { + public @Name("delete") bool remove; + } + ``` + +*******************************************************************************/ + +public struct Name +{ + /// + public string name; +} + +/******************************************************************************* + + A field which carries informations about whether it was set or not + + Some configurations may need to know which fields were set explicitly while + keeping defaults. An example of this is a `struct` where at least one field + needs to be set, such as the following: + ``` + public struct ProtoDuration + { + public @Optional long weeks; + public @Optional long days; + public @Optional long hours; + public @Optional long minutes; + public long seconds = 42; + public @Optional long msecs; + public @Optional long usecs; + public @Optional long hnsecs; + public @Optional long nsecs; + } + ``` + In this case, it would be impossible to know if any field was explicitly + provided. Hence, the struct should be written as: + ``` + public struct ProtoDuration + { + public SetInfo!long weeks; + public SetInfo!long days; + public SetInfo!long hours; + public SetInfo!long minutes; + public SetInfo!long seconds = 42; + public SetInfo!long msecs; + public SetInfo!long usecs; + public SetInfo!long hnsecs; + public SetInfo!long nsecs; + } + ``` + Note that `SetInfo` implies `Optional`, and supports default values. + +*******************************************************************************/ + +public struct SetInfo (T) +{ + /*************************************************************************** + + Allow initialization as a field + + This sets the field as having been set, so that: + ``` + struct Config { SetInfo!Duration timeout; } + + Config myConf = { timeout: 10.minutes } + ``` + Will behave as if set explicitly. If this behavior is not wanted, + pass `false` as second argument: + ``` + Config myConf = { timeout: SetInfo!Duration(10.minutes, false) } + ``` + + ***************************************************************************/ + + public this (T initVal, bool isSet = true) @safe pure nothrow @nogc + { + this.value = initVal; + this.set = isSet; + } + + /// Underlying data + public T value; + + /// + alias value this; + + /// Whether this field was set or not + public bool set; +} + +/******************************************************************************* + + Provides a means to convert a field from a `Node` to a complex type + + When filling the config, it might be useful to store types which are + not only simple `string` and integer, such as `URL`, `BigInt`, or any other + library type not directly under the user's control. + + To allow reading those values from the config file, a `Converter` may + be used. The converter will tell the `ConfigFiller` how to convert from + `Node` to the desired type `T`. + + If the type is under the user's control, one can also add a constructor + accepting a single string, or define the `fromString` method, both of which + are tried if no `Converter` is found. + + For types not under the user's control, there might be different ways + to parse the same type within the same struct, or neither the ctor nor + the `fromString` method may be defined under that name. + The exmaple below uses `parse` in place of `fromString`, for example. + + ``` + /// Complex structure representing the age of a person based on its birthday + public struct Age + { + /// + public uint birth_year; + /// + public uint birth_month; + /// + public uint birth_day; + + /// Note that this will be picked up automatically if named `fromString` + /// but this struct might be a library type. + public static Age parse (string value) { /+ Magic +/ } + } + + public struct Person + { + /// + @Converter!Age((Node value) => Age.parse(value.as!string)) + public Age age; + } + ``` + + Note that some fields may also be of multiple YAML types, such as DUB's + `dependencies`, which is either a simple string (`"vibe-d": "~>1.0 "`), + or an in its complex form (`"vibe-d": { "version": "~>1.0" }`). + For those use cases, a `Converter` is the best approach. + + To avoid repeating the field type, a convenience function is provided: + ``` + public struct Age + { + public uint birth_year; + public uint birth_month; + public uint birth_day; + public static Age parse (string value) { /+ Magic +/ } + } + + public struct Person + { + /// Here `converter` will deduct the type from the delegate argument, + /// and return an instance of `Converter`. Mind the case. + @converter((Node value) => Age.parse(value.as!string)) + public Age age; + } + ``` + +*******************************************************************************/ + +public struct Converter (T) +{ + import dyaml.node; + + /// + public alias ConverterFunc = T function (Node input); + + /// + public ConverterFunc converter; +} + +/// Ditto +public auto converter (FT) (FT func) +{ + static assert(isFunctionPointer!FT, + "Error: Argument to `converter` should be a function pointer, not: " + ~ FT.stringof); + + alias RType = ReturnType!FT; + static assert(!is(RType == void), + "Error: Converter needs to be of the return type of the field, not `void`"); + return Converter!RType(func); +} diff --git a/source/configy/Exceptions.d b/source/configy/Exceptions.d new file mode 100644 index 0000000..402ee69 --- /dev/null +++ b/source/configy/Exceptions.d @@ -0,0 +1,360 @@ +/******************************************************************************* + + Definitions for Exceptions used by the config module. + + Copyright: + Copyright (c) 2019-2022 BOSAGORA Foundation + All rights reserved. + + License: + MIT License. See LICENSE for details. + +*******************************************************************************/ + +module configy.Exceptions; + +import configy.Utils; + +import dyaml.exception; +import dyaml.node; + +import std.algorithm : map; +import std.format; + +/******************************************************************************* + + Base exception type thrown by the config parser + + Whenever dealing with Exceptions thrown by the config parser, catching + this type will allow to optionally format with colors: + ``` + try + { + auto conf = parseConfigFile!Config(cmdln); + // ... + } + catch (ConfigException exc) + { + writeln("Parsing the config file failed:"); + writelfln(isOutputATTY() ? "%S" : "%s", exc); + } + ``` + +*******************************************************************************/ + +public abstract class ConfigException : Exception +{ + /// Position at which the error happened + public Mark yamlPosition; + + /// The path at which the key resides + public string path; + + /// If non-empty, the key under 'path' which triggered the error + /// If empty, the key should be considered part of 'path' + public string key; + + /// Constructor + public this (string path, string key, Mark position, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + super(null, file, line); + this.path = path; + this.key = key; + this.yamlPosition = position; + } + + /// Ditto + public this (string path, Mark position, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + this(path, null, position, file, line); + } + + /*************************************************************************** + + Overrides `Throwable.toString` and its sink overload + + It is quite likely that errors from this module may be printed directly + to the end user, who might not have technical knowledge. + + This format the error in a nicer format (e.g. with colors), + and will additionally provide a stack-trace if the `ConfigFillerDebug` + `debug` version was provided. + + Format_chars: + The default format char ("%s") will print a regular message. + If an uppercase 's' is used ("%S"), colors will be used. + + Params: + sink = The sink to send the piece-meal string to + spec = See https://dlang.org/phobos/std_format_spec.html + + ***************************************************************************/ + + public override string toString () scope + { + // Need to be overriden otherwise the overload is shadowed + return super.toString(); + } + + /// Ditto + public override void toString (scope void delegate(in char[]) sink) const scope + @trusted + { + // This breaks the type system, as it blindly trusts a delegate + // However, the type system lacks a way to sanely build an utility + // which accepts a delegate with different qualifiers, so this is the + // less evil approach. + this.toString(cast(SinkType) sink, FormatSpec!char("%s")); + } + + /// Ditto + public void toString (scope SinkType sink, in FormatSpec!char spec) + const scope @safe + { + import core.internal.string : unsignedToTempString; + + const useColors = spec.spec == 'S'; + char[20] buffer = void; + + if (useColors) sink(Yellow); + sink(this.yamlPosition.name); + if (useColors) sink(Reset); + + sink("("); + if (useColors) sink(Cyan); + sink(unsignedToTempString(this.yamlPosition.line, buffer)); + if (useColors) sink(Reset); + sink(":"); + if (useColors) sink(Cyan); + sink(unsignedToTempString(this.yamlPosition.column, buffer)); + if (useColors) sink(Reset); + sink("): "); + + if (this.path.length || this.key.length) + { + if (useColors) sink(Yellow); + sink(this.path); + if (this.path.length && this.key.length) + sink("."); + sink(this.key); + if (useColors) sink(Reset); + sink(": "); + } + + this.formatMessage(sink, spec); + + debug (ConfigFillerDebug) + { + sink("\n\tError originated from: "); + sink(this.file); + sink("("); + sink(unsignedToTempString(line, buffer)); + sink(")"); + + if (!this.info) + return; + + () @trusted nothrow + { + try + { + sink("\n----------------"); + foreach (t; info) + { + sink("\n"); sink(t); + } + } + // ignore more errors + catch (Throwable) {} + }(); + } + } + + /// Hook called by `toString` to simplify coloring + protected abstract void formatMessage ( + scope SinkType sink, in FormatSpec!char spec) + const scope @safe; +} + +/// A configuration exception that is only a single message +package final class ConfigExceptionImpl : ConfigException +{ + public this (string msg, Mark position, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + this(msg, null, null, position, file, line); + } + + public this (string msg, string path, string key, Mark position, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + super(path, key, position, file, line); + this.msg = msg; + } + + protected override void formatMessage ( + scope SinkType sink, in FormatSpec!char spec) + const scope @safe + { + sink(this.msg); + } +} + +/// Exception thrown when the type of the YAML node does not match the D type +package final class TypeConfigException : ConfigException +{ + /// The actual (in the YAML document) type of the node + public string actual; + + /// The expected (as specified in the D type) type + public string expected; + + /// Constructor + public this (Node node, string expected, string path, string key = null, + string file = __FILE__, size_t line = __LINE__) + @safe nothrow + { + this(node.nodeTypeString(), expected, path, key, node.startMark(), + file, line); + } + + /// Ditto + public this (string actual, string expected, string path, string key, + Mark position, string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + super(path, key, position, file, line); + this.actual = actual; + this.expected = expected; + } + + /// Format the message with or without colors + protected override void formatMessage ( + scope SinkType sink, in FormatSpec!char spec) + const scope @safe + { + const useColors = spec.spec == 'S'; + + const fmt = "Expected to be of type %s, but is a %s"; + + if (useColors) + formattedWrite(sink, fmt, this.expected.paint(Green), this.actual.paint(Red)); + else + formattedWrite(sink, fmt, this.expected, this.actual); + } +} + +/// Similar to a `TypeConfigException`, but specific to `Duration` +package final class DurationTypeConfigException : ConfigException +{ + /// The list of valid field (a manifest constant, but we want to avoid the dependency) + public immutable string[] DurationSuffixes; + + /// Actual type of the node + public string actual; + + /// Constructor + public this (Node node, string path, immutable string[] DurationSuffixes, + string file = __FILE__, size_t line = __LINE__) + @safe nothrow + { + super(path, null, node.startMark(), file, line); + this.actual = node.nodeTypeString(); + this.DurationSuffixes = DurationSuffixes; + } + + /// Format the message with or without colors + protected override void formatMessage ( + scope SinkType sink, in FormatSpec!char spec) + const scope @safe + { + const useColors = spec.spec == 'S'; + + const fmt = "Field is of type %s, but expected a mapping with at least one of: %-(%s, %)"; + if (useColors) + formattedWrite(sink, fmt, this.actual.paint(Red), + this.DurationSuffixes.map!(s => s[1 .. $].paint(Green))); + else + formattedWrite(sink, fmt, this.actual, this.DurationSuffixes.map!(s => s[1 .. $])); + } +} + +/// Exception thrown when an unknown key is found in strict mode +public class UnknownKeyConfigException : ConfigException +{ + /// The list of valid field names + public immutable string[] fieldNames; + + /// Constructor + public this (string path, string key, immutable string[] fieldNames, + Mark position, string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + super(path, key, position, file, line); + this.fieldNames = fieldNames; + } + + /// Format the message with or without colors + protected override void formatMessage ( + scope SinkType sink, in FormatSpec!char spec) + const scope @safe + { + const useColors = spec.spec == 'S'; + const fmt = "Key is not a valid member of this section. There are %s valid keys: %-(%s, %)"; + + if (useColors) + formattedWrite(sink, fmt, this.fieldNames.length.paint(Yellow), + this.fieldNames.map!(f => f.paint(Green))); + else + formattedWrite(sink, fmt, this.fieldNames.length, this.fieldNames); + } +} + +/// Exception thrown when a required key is missing +public class MissingKeyException : ConfigException +{ + /// Constructor + public this (string path, string key, Mark position, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + super(path, key, position, file, line); + } + + /// Format the message with or without colors + protected override void formatMessage ( + scope SinkType sink, in FormatSpec!char spec) + const scope @safe + { + sink("Required key was not found in configuration or command line arguments"); + } +} + +/// Wrap an user-thrown Exception that happened in a Converter/ctor/fromString +public class ConstructionException : ConfigException +{ + /// Constructor + public this (Exception next, string path, Mark position, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + super(path, position, file, line); + this.next = next; + } + + /// Format the message with or without colors + protected override void formatMessage ( + scope SinkType sink, in FormatSpec!char spec) + const scope @trusted + { + // Here we break the type system too, calling a `@system` function, + // but since it takes no parameter, why would it not be `@safe` ? + sink(this.next.message()); + } +} diff --git a/source/configy/Read.d b/source/configy/Read.d new file mode 100644 index 0000000..a874160 --- /dev/null +++ b/source/configy/Read.d @@ -0,0 +1,1151 @@ +/******************************************************************************* + + Utilities to fill a struct representing the configuration with the content + of a YAML document. + + The main function of this module is `parseConfig`. Convenience functions + `parseConfigString` and `parseConfigFile` are also available. + + The type parameter to those three functions must be a struct and is used + to drive the processing of the YAML node. When an error is encountered, + an `Exception` will be thrown, with a descriptive message. + The rules by which the struct is filled are designed to be + as intuitive as possible, and are described below. + + Optional_Fields: + One of the major convenience offered by this utility is its handling + of optional fields. A field is detected as optional if it has + an initializer that is different from its type `init` value, + for example `string field = "Something";` is an optional field, + but `int count = 0;` is not. + To mark a field as optional even with its default value, + use the `Optional` UDA: `@Optional int count = 0;`. + + Converter: + Because config structs may contain complex types such as + a Phobos type, a user-defined `Amount`, or Vibe.d's `URL`, + one may need to apply a converter to a struct's field. + Converters are functions that take a YAML `Node` as argument + and return a type that is implicitly convertible to the field type + (usually just the field type). They offer the most power to users, + as they can inspect the YAML structure, but should be used as a last resort. + + Composite_Types: + Processing starts from a `struct` at the top level, and recurse into + every fields individually. If a field is itself a struct, + the filler will attempt the following, in order: + - If the field has no value and is not optional, an Exception will + be thrown with an error message detailing where the issue happened. + - If the field has no value and is optional, the default value will + be used. + - If the field has a value, the filler will first check for a converter + and use it if present. + - If the type has a `static` method named `fromString` whose sole argument + is a `string`, it will be used. + - If the type has a constructor whose sole argument is a `string`, + it will be used; + - Finally, the filler will attempt to deserialize all struct members + one by one and pass them to the default constructor, if there is any. + - If none of the above succeeded, a `static assert` will trigger. + + Alias_this: + If a `struct` contains an `alias this`, the field that is aliased will be + ignored, instead the config parser will parse nested fields as if they + were part of the enclosing structure. This allow to re-use a single `struct` + in multiple place without having to resort to a `mixin template`. + Having an initializer will make all fields in the aliased struct optional. + The aliased field cannot have attributes other than `@Optional`, + which will then apply to all fields it exposes. + + Duration_parsing: + If the config field is of type `core.time.Duration`, special parsing rules + will apply. There are two possible forms in which a Duration field may + be expressed. In the first form, the YAML node should be a mapping, + and it will be checked for fields matching the supported units + in `core.time`: `weeks`, `days`, `hours`, `minutes`, `seconds`, `msecs`, + `usecs`, `hnsecs`, `nsecs`. Strict parsing option will be respected. + The values of the fields will then be added together, so the following + YAML usages are equivalent: + --- + // sleepFor: + // hours: 8 + // minutes: 30 + --- + and: + --- + // sleepFor: + // minutes: 510 + --- + Provided that the definition of the field is: + --- + public Duration sleepFor; + --- + + In the second form, the field should have a suffix composed of an + underscore ('_'), followed by a unit name as defined in `core.time`. + This can be either the field name directly, or a name override. + The latter is recommended to avoid confusion when using the field in code. + In this form, the YAML node is expected to be a scalar. + So the previous example, using this form, would be expressed as: + --- + sleepFor_minutes: 510 + --- + and the field definition should be one of those two: + --- + public @Name("sleepFor_minutes") Duration sleepFor; /// Prefer this + public Duration sleepFor_minutes; /// This works too + --- + + Those forms are mutually exclusive, so a field with a unit suffix + will error out if a mapping is used. This prevents surprises and ensures + that the error message, if any, is consistent accross user input. + + To disable or change this behavior, one may use a `Converter` instead. + + Strict_Parsing: + When strict parsing is enabled, the config filler will also validate + that the YAML nodes do not contains entry which are not present in the + mapping (struct) being processed. + This can be useful to catch typos or outdated configuration options. + + Post_Validation: + Some configuration will require validation accross multiple sections. + For example, two sections may be mutually exclusive as a whole, + or may have fields which are mutually exclusive with another section's + field(s). This kind of dependence is hard to account for declaratively, + and does not affect parsing. For this reason, the preferred way to + handle those cases is to define a `validate` member method on the + affected config struct(s), which will be called once + parsing for that mapping is completed. + If an error is detected, this method should throw an Exception. + + Enabled_or_disabled_field: + While most complex logic validation should be handled post-parsing, + some section may be optional by default, but if provided, will have + required fields. To support this use case, if a field with the name + `enabled` is present in a struct, the parser will first process it. + If it is `false`, the parser will not attempt to process the struct + further, and the other fields will have their default value. + Likewise, if a field named `disabled` exists, the struct will not + be processed if it is set to `true`. + + Copyright: + Copyright (c) 2019-2022 BOSAGORA Foundation + All rights reserved. + + License: + MIT License. See LICENSE for details. + +*******************************************************************************/ + +module configy.Read; + +public import configy.Attributes; +public import configy.Exceptions : ConfigException; +import configy.Exceptions; +import configy.Utils; + +import dyaml.exception; +import dyaml.node; +import dyaml.loader; + +import std.algorithm; +import std.conv; +import std.datetime; +import std.format; +import std.getopt; +import std.meta; +import std.range; +import std.stdio; +import std.traits; +import std.typecons : Nullable, nullable, tuple; + +static import core.time; + +/// Command-line arguments +public struct CLIArgs +{ + /// Path to the config file + public string config_path = "config.yaml"; + + /// Overrides for config options + public string[][string] overrides; + + /// Helper to add items to `overrides` + public void overridesHandler (string, string value) + { + import std.string; + const idx = value.indexOf('='); + if (idx < 0) return; + string k = value[0 .. idx], v = value[idx + 1 .. $]; + if (auto val = k in this.overrides) + (*val) ~= v; + else + this.overrides[k] = [ v ]; + } + + /*************************************************************************** + + Parses the base command line arguments + + This can be composed with the program argument. + For example, consider a program which wants to expose a `--version` + switch, the definition could look like this: + --- + public struct ProgramCLIArgs + { + public CLIArgs base; // This struct + + public alias base this; // For convenience + + public bool version_; // Program-specific part + } + --- + Then, an application-specific configuration routine would be: + --- + public GetoptResult parse (ref ProgramCLIArgs clargs, ref string[] args) + { + auto r = clargs.base.parse(args); + if (r.helpWanted) return r; + return getopt( + args, + "version", "Print the application version, &clargs.version_"); + } + --- + + Params: + args = The command line args to parse (parsed options will be removed) + passThrough = Whether to enable `config.passThrough` and + `config.keepEndOfOptions`. `true` by default, to allow + composability. If your program doesn't have other + arguments, pass `false`. + + Returns: + The result of calling `getopt` + + ***************************************************************************/ + + public GetoptResult parse (ref string[] args, bool passThrough = true) + { + return getopt( + args, + // `caseInsensistive` is the default, but we need something + // with the same type for the ternary + passThrough ? config.keepEndOfOptions : config.caseInsensitive, + // Also the default, same reasoning + passThrough ? config.passThrough : config.noPassThrough, + "config|c", + "Path to the config file. Defaults to: " ~ this.config_path, + &this.config_path, + + "override|O", + "Override a config file value\n" ~ + "Example: -O foo.bar=true -o dns=1.1.1.1 -o dns=2.2.2.2\n" ~ + "Array values are additive, other items are set to the last override", + &this.overridesHandler, + ); + } +} + +/******************************************************************************* + + Attempt to read and process the config file at `path`, print any error + + This 'simple' overload of the more detailed `parseConfigFile` will attempt + to read the file at `path`, and return a `Nullable` instance of it. + If an error happens, either because the file isn't readable or + the configuration has an issue, a message will be printed to `stderr`, + with colors if the output is a TTY, and a `null` instance will be returned. + + The calling code can hence just read a config file via: + ``` + int main () + { + auto configN = parseConfigFileSimple!Config("config.yaml"); + if (configN.isNull()) return 1; // Error path + auto config = configN.get(); + // Rest of the program ... + } + ``` + An overload accepting `CLIArgs args` also exists. + + Params: + path = Path of the file to read from + args = Command line arguments on which `parse` has been called + strict = Whether the parsing should reject unknown keys in the + document, warn, or ignore them (default: `StrictMode.Error`) + + Returns: + An initialized `Config` instance if reading/parsing was successful; + a `null` instance otherwise. + +*******************************************************************************/ + +public Nullable!T parseConfigFileSimple (T) (string path, StrictMode strict = StrictMode.Error) +{ + return parseConfigFileSimple!(T)(CLIArgs(path), strict); +} + + +/// Ditto +public Nullable!T parseConfigFileSimple (T) (in CLIArgs args, StrictMode strict = StrictMode.Error) +{ + try + { + Node root = Loader.fromFile(args.config_path).load(); + return nullable(parseConfig!T(args, root, strict)); + } + catch (ConfigException exc) + { + exc.printException(); + return typeof(return).init; + } + catch (Exception exc) + { + // Other Exception type may be thrown by D-YAML, + // they won't include rich information. + stderr.writeln(exc.message()); + return typeof(return).init; + } +} + +/******************************************************************************* + + Print an Exception, potentially with colors on + + Trusted because of `stderr` usage. + +*******************************************************************************/ + +private void printException (scope ConfigException exc) @trusted +{ + version (Posix) + { + import core.sys.posix.unistd : isatty; + const colors = isatty(stderr.fileno); + } + else + const colors = false; + + if (colors) + stderr.writefln("%S", exc); + else + stderr.writefln("%s", exc.message()); +} + +/******************************************************************************* + + Parses the config file or string and returns a `Config` instance. + + Params: + cmdln = command-line arguments (containing the path to the config) + path = When parsing a string, the path corresponding to it + strict = Whether the parsing should reject unknown keys in the + document, warn, or ignore them (default: `StrictMode.Error`) + + Throws: + `Exception` if parsing the config file failed. + + Returns: + `Config` instance + +*******************************************************************************/ + +public T parseConfigFile (T) (in CLIArgs cmdln, StrictMode strict = StrictMode.Error) +{ + Node root = Loader.fromFile(cmdln.config_path).load(); + return parseConfig!T(cmdln, root, strict); +} + +/// ditto +public T parseConfigString (T) (string data, string path, StrictMode strict = StrictMode.Error) +{ + CLIArgs cmdln = { config_path: path }; + auto loader = Loader.fromString(data); + loader.name = path; + Node root = loader.load(); + return parseConfig!T(cmdln, root, strict); +} + +/******************************************************************************* + + Process the content of the YAML document described by `node` into an + instance of the struct `T`. + + See the module description for a complete overview of this function. + + Params: + T = Type of the config struct to fill + cmdln = Command line arguments + node = The root node matching `T` + strict = Action to take when encountering unknown keys in the document + initPath = Unused + + Returns: + An instance of `T` filled with the content of `node` + + Throws: + If the content of `node` cannot satisfy the requirements set by `T`, + or if `node` contain extra fields and `strict` is `true`. + +*******************************************************************************/ + +public T parseConfig (T) ( + in CLIArgs cmdln, Node node, StrictMode strict = StrictMode.Error, string initPath = null) +{ + static assert(is(T == struct), "`" ~ __FUNCTION__ ~ + "` should only be called with a `struct` type as argument, not: `" ~ + fullyQualifiedName!T ~ "`"); + + final switch (node.nodeID) + { + case NodeID.mapping: + dbgWrite("Parsing config '%s', strict: %s, initPath: %s", + fullyQualifiedName!T, + strict == StrictMode.Warn ? + strict.paint(Yellow) : strict.paintIf(!!strict, Green, Red), + initPath.length ? initPath : "(none)"); + return node.parseMapping!T(initPath, T.init, const(Context)(cmdln, strict), null); + case NodeID.sequence: + case NodeID.scalar: + case NodeID.invalid: + throw new TypeConfigException(node, "mapping (object)", "document root"); + } +} + +/******************************************************************************* + + The behavior to have when encountering a field in YAML not present + in the config definition. + +*******************************************************************************/ + +public enum StrictMode +{ + /// Issue an error by throwing an `UnknownKeyConfigException` + Error = 0, + /// Write a message to `stderr`, but continue processing the file + Warn = 1, + /// Be silent and do nothing + Ignore = 2, +} + +/// Used to pass around configuration +private struct Context +{ + /// + private CLIArgs cmdln; + + /// + private StrictMode strict; +} + +/// Helper template for `staticMap` used for strict mode +private enum FieldRefToName (alias FR) = FR.Name; + +/// Returns: An alias sequence of field names, taking UDAs (`@Name` et al) into account +private alias FieldsName (T) = staticMap!(FieldRefToName, FieldRefTuple!T); + +/// Parse a single mapping, recurse as needed +private T parseMapping (T) + (Node node, string path, auto ref T defaultValue, in Context ctx, in Node[string] fieldDefaults) +{ + static assert(is(T == struct), "`parseMapping` called with wrong type (should be a `struct`)"); + assert(node.nodeID == NodeID.mapping, "Internal error: parseMapping shouldn't have been called"); + + dbgWrite("%s: `parseMapping` called for '%s' (node entries: %s)", + T.stringof.paint(Cyan), path.paint(Cyan), + node.length.paintIf(!!node.length, Green, Red)); + + static foreach (FR; FieldRefTuple!T) + { + static if (FR.Name != FR.FieldName && hasMember!(T, FR.Name) && + !is(typeof(mixin("T.", FR.Name)) == function)) + static assert (FieldRef!(T, FR.Name).Name != FR.Name, + "Field `" ~ FR.FieldName ~ "` `@Name` attribute shadows field `" ~ + FR.Name ~ "` in `" ~ T.stringof ~ "`: Add a `@Name` attribute to `" ~ + FR.Name ~ "` or change that of `" ~ FR.FieldName ~ "`"); + } + + if (ctx.strict != StrictMode.Ignore) + { + /// First, check that all the sections found in the mapping are present in the type + /// If not, the user might have made a typo. + immutable string[] fieldNames = [ FieldsName!T ]; + foreach (const ref Node key, const ref Node value; node) + { + if (!fieldNames.canFind(key.as!string)) + { + if (ctx.strict == StrictMode.Warn) + { + scope exc = new UnknownKeyConfigException( + path, key.as!string, fieldNames, key.startMark()); + exc.printException(); + } + else + throw new UnknownKeyConfigException( + path, key.as!string, fieldNames, key.startMark()); + } + } + } + + const enabledState = node.isMappingEnabled!T(defaultValue); + + if (enabledState.field != EnabledState.Field.None) + dbgWrite("%s: Mapping is enabled: %s", T.stringof.paint(Cyan), (!!enabledState).paintBool()); + + auto convertField (alias FR) () + { + static if (FR.Name != FR.FieldName) + dbgWrite("Field name `%s` will use YAML field `%s`", + FR.FieldName.paint(Yellow), FR.Name.paint(Green)); + // Using exact type here matters: we could get a qualified type + // (e.g. `immutable(string)`) if the field is qualified, + // which causes problems. + FR.Type default_ = __traits(getMember, defaultValue, FR.FieldName); + + // If this struct is disabled, do not attempt to parse anything besides + // the `enabled` / `disabled` field. + if (!enabledState) + { + // Even this is too noisy + version (none) + dbgWrite("%s: %s field of disabled struct, default: %s", + path.paint(Cyan), "Ignoring".paint(Yellow), default_); + + static if (FR.Name == "enabled") + return false; + else static if (FR.Name == "disabled") + return true; + else + return default_; + } + + if (auto ptr = FR.FieldName in fieldDefaults) + { + dbgWrite("Found %s (%s.%s) in `fieldDefaults`", + FR.Name.paint(Cyan), path.paint(Cyan), FR.FieldName.paint(Cyan)); + + if (ctx.strict && FR.FieldName in node) + throw new ConfigExceptionImpl("'Key' field is specified twice", path, FR.FieldName, node.startMark()); + return (*ptr).parseField!(FR)(path.addPath(FR.FieldName), default_, ctx) + .dbgWriteRet("Using value '%s' from fieldDefaults for field '%s'", + FR.FieldName.paint(Cyan)); + } + + if (auto ptr = FR.Name in node) + { + dbgWrite("%s: YAML field is %s in node%s", + FR.Name.paint(Cyan), "present".paint(Green), + (FR.Name == FR.FieldName ? "" : " (note that field name is overriden)").paint(Yellow)); + return (*ptr).parseField!(FR)(path.addPath(FR.Name), default_, ctx) + .dbgWriteRet("Using value '%s' from YAML document for field '%s'", + FR.FieldName.paint(Cyan)); + } + + dbgWrite("%s: Field is %s from node%s", + FR.Name.paint(Cyan), "missing".paint(Red), + (FR.Name == FR.FieldName ? "" : " (note that field name is overriden)").paint(Yellow)); + + // A field is considered optional if it has an initializer that is different + // from its default value, or if it has the `Optional` UDA. + // In that case, just return this value. + static if (FR.Optional) + return FR.Default + .dbgWriteRet("Using default value '%s' for optional field '%s'", FR.FieldName.paint(Cyan)); + + // The field is not present, but it could be because it is an optional section. + // For example, the section could be defined as: + // --- + // struct RequestLimit { size_t reqs = 100; } + // struct Config { RequestLimit limits; } + // --- + // In this case we need to recurse into `RequestLimit` to check if any + // of its field is required. + else static if (mightBeOptional!FR) + { + const npath = path.addPath(FR.Name); + string[string] aa; + return Node(aa).parseMapping!(FR.Type)(npath, FR.Default, ctx, null); + } + else + throw new MissingKeyException(path, FR.Name, node.startMark()); + } + + auto convert (string FName, bool forceOptional = false) () + { + alias FR = FieldRef!(T, FName, forceOptional); + static if (__traits(getAliasThis, T).length == 1 && + __traits(getAliasThis, T)[0] == FName) + { + static assert(FR.Name == FR.FieldName, + "Field `" ~ fullyQualifiedName!(FR.Ref) ~ + "` is the target of an `alias this` and cannot have a `@Name` attribute"); + static assert(!hasConverter!(FR.Ref), + "Field `" ~ fullyQualifiedName!(FR.Ref) ~ + "` is the target of an `alias this` and cannot have a `@Converter` attribute"); + + alias convertMaybe(string FName) = convert!(FName, FR.Optional); + return FR.Type(staticMap!(convertMaybe, FieldNameTuple!(FR.Type))); + } + else + return convertField!(FR)(); + } + + debug (ConfigFillerDebug) + { + indent++; + scope (exit) indent--; + } + + T doValidation (T result) + { + static if (is(typeof(result.validate()))) + { + if (enabledState) + { + dbgWrite("%s: Calling `%s` method", + T.stringof.paint(Cyan), "validate()".paint(Green)); + result.validate(); + } + else + { + dbgWrite("%s: Ignoring `%s` method on disabled mapping", + T.stringof.paint(Cyan), "validate()".paint(Green)); + } + } + else if (enabledState) + dbgWrite("%s: No `%s` method found", + T.stringof.paint(Cyan), "validate()".paint(Yellow)); + + return result; + } + + // This might trigger things like "`this` is not accessible". + // In this case, the user most likely needs to provide a converter. + return doValidation(T(staticMap!(convert, FieldNameTuple!T))); +} + +/******************************************************************************* + + Parse a field, trying to match up the compile-time expectation with + the run time value of the Node (`nodeID`). + + This is the central point which does "type conversion", from the YAML node + to the field type. Whenever adding support for a new type, things should + happen here. + + Because a `struct` can be filled from either a mapping or a scalar, + this function will first try the converter / fromString / string ctor + methods before defaulting to fieldwise construction. + + Note that optional fields are checked before recursion happens, + so this method does not do this check. + +*******************************************************************************/ + +private FR.Type parseField (alias FR) + (Node node, string path, auto ref FR.Type defaultValue, in Context ctx) +{ + if (node.nodeID == NodeID.invalid) + throw new TypeConfigException(node, "valid", path); + + // If we reached this, it means the field is set, so just recurse + // to peel the type + static if (is(FR.Type : SetInfo!FT, FT)) + return FR.Type( + parseField!(FieldRef!(FR.Type, "value"))(node, path, defaultValue, ctx), + true); + + else static if (hasConverter!(FR.Ref)) + return wrapException(node.viaConverter!(FR), path, node.startMark()); + + else static if (hasFromString!(FR.Type)) + return wrapException(FR.Type.fromString(node.as!string), path, node.startMark()); + + else static if (hasStringCtor!(FR.Type)) + return wrapException(FR.Type(node.as!string), path, node.startMark()); + + else static if (is(immutable(FR.Type) == immutable(core.time.Duration))) + return parseDuration!(FR)(node, path, defaultValue, ctx); + + else static if (is(FR.Type == struct)) + { + if (node.nodeID != NodeID.mapping) + throw new TypeConfigException(node, "mapping (object)", path); + return node.parseMapping!(FR.Type)(path, defaultValue, ctx, null); + } + + // Handle string early as they match the sequence rule too + else static if (isSomeString!(FR.Type)) + // Use `string` type explicitly because `Variant` thinks + // `immutable(char)[]` (aka `string`) and `immutable(char[])` + // (aka `immutable(string)`) are not compatible. + return node.parseScalar!(string)(path); + // Enum too, as their base type might be an array (including strings) + else static if (is(FR.Type == enum)) + return node.parseScalar!(FR.Type)(path); + + else static if (is(FR.Type : E[K], E, K)) + { + if (node.nodeID != NodeID.mapping) + throw new TypeConfigException(node, "mapping (associative array)", path); + + static struct AAFieldRef + { + /// + private enum Ref = E.init; + /// + private alias Type = E; + } + + // Note: As of June 2022 (DMD v2.100.0), associative arrays cannot + // have initializers, hence their UX for config is less optimal. + return node.mapping().map!( + (Node.Pair pair) { + return tuple( + pair.key.get!K, + pair.value.parseField!(AAFieldRef)( + format("%s[%s]", path, pair.key.as!string), E.init, ctx)); + }).assocArray(); + + } + else static if (is(FR.Type : E[], E)) + { + static if (hasUDA!(FR.Ref, Key)) + { + if (node.nodeID != NodeID.mapping) + throw new TypeConfigException(node, "mapping (object)", path); + + static assert(getUDAs!(FR.Ref, Key).length == 1, + "`" ~ fullyQualifiedName!(FR.Ref) ~ + "` field shouldn't have more than one `Key` attribute"); + static assert(is(E == struct), + "Field `" ~ fullyQualifiedName!(FR.Ref) ~ + "` has a `Key` attribute, but is a sequence of `" ~ + fullyQualifiedName!E ~ "`, not a sequence of `struct`"); + + string key = getUDAs!(FR.Ref, Key)[0].name; + return node.mapping().map!( + (Node.Pair pair) { + if (pair.value.nodeID != NodeID.mapping) + throw new TypeConfigException( + "sequence of " ~ pair.value.nodeTypeString(), + "sequence of mapping (array of objects)", + path, null, node.startMark()); + + return pair.value.parseMapping!E( + path.addPath(pair.key.as!string), + E.init, ctx, key.length ? [ key: pair.key ] : null); + }).array(); + } + else + { + if (node.nodeID != NodeID.sequence) + throw new TypeConfigException(node, "sequence (array)", path); + + // Only those two fields are used by `parseField` + static struct ArrayFieldRef + { + /// + private enum Ref = E.init; + /// + private alias Type = E; + } + + // We pass `E.init` as default value as it is not going to be used: + // Either there is something in the YAML document, and that will be + // converted, or `sequence` will not iterate. + return node.sequence.enumerate.map!( + kv => kv.value.parseField!(ArrayFieldRef)( + format("%s[%s]", path, kv.index), E.init, ctx)) + .array(); + } + } + else + { + static assert (!is(FR.Type == union), + "`union` are not supported. Use a converter instead"); + return node.parseScalar!(FR.Type)(path); + } +} + +/// Parse a node as a scalar +private T parseScalar (T) (Node node, string path) +{ + if (node.nodeID != NodeID.scalar) + throw new TypeConfigException(node, "scalar (value)", path); + + static if (is(T == enum)) + return node.as!string.to!(T); + else + return node.as!(T); +} + +/******************************************************************************* + + Write a potentially throwing user-provided expression in ConfigException + + The user-provided hooks may throw (e.g. `fromString / the constructor), + and the error may or may not be clear. We can't do anything about a bad + message but we can wrap the thrown exception in a `ConfigException` + to provide the location in the yaml file where the error happened. + + Params: + exp = The expression that may throw + path = Path within the config file of the field + position = Position of the node in the YAML file + file = Call site file (otherwise the message would point to this function) + line = Call site line (see `file` reasoning) + + Returns: + The result of `exp` evaluation. + +*******************************************************************************/ + +private T wrapException (T) (lazy T exp, string path, Mark position, + string file = __FILE__, size_t line = __LINE__) +{ + try + return exp; + catch (Exception exc) + throw new ConstructionException(exc, path, position, file, line); +} + +/******************************************************************************* + + Parse a `core.time : Duration` from the YAML + +*******************************************************************************/ + +private core.time.Duration parseDuration (alias FR) + (Node node, string path, in core.time.Duration defaultValue, in Context ctx) +{ + // Try second form first as it convey the developer's intent explicitly + static foreach (Suffix; DurationSuffixes) + { + static if (FR.Name.endsWith(Suffix)) + { + // Since we don't have flow control at CT, we have to rely on `is()` + // check to see if variables have been defined... Ugly but it works. + // We would get "Warning: Statement is not reachable" otherwise. + enum hasMatch = true; + + if (node.nodeID != NodeID.scalar) + throw new TypeConfigException(node, "integer value (scalar)", path); + + return core.time.dur!(Suffix[1 .. $])(node.as!long); + } + } + // First form, sum all possible fields + static if (!is(typeof(hasMatch))) + { + if (node.nodeID != NodeID.mapping) + throw new DurationTypeConfigException(node, path, DurationSuffixes); + auto result = node.parseMapping!DurationPseudoMapping( + path, DurationPseudoMapping.init, ctx, null); + bool hasOneSet; + FOREACH: foreach (field; result.tupleof) + if ((hasOneSet = field.set) == true) + break FOREACH; + + if (!hasOneSet) + { + static if (FR.Optional) + return defaultValue; + else + throw new ConfigExceptionImpl("Expected one of the field's values to be set", + path, null, node.startMark()); + } + + return result.opCast!Duration(); + } +} + +/// Supported suffix names +private immutable DurationSuffixes = [ + "_weeks", "_days", "_hours", "_minutes", "_seconds", + "_msecs", "_usecs", "_hnsecs", "_nsecs", +]; + +/// Allows us to reuse parseMapping and strict parsing +private struct DurationPseudoMapping +{ + public SetInfo!long weeks; + public SetInfo!long days; + public SetInfo!long hours; + public SetInfo!long minutes; + public SetInfo!long seconds; + public SetInfo!long msecs; + public SetInfo!long usecs; + public SetInfo!long hnsecs; + public SetInfo!long nsecs; + + /// Allow conversion to a `Duration` + public Duration opCast (T : Duration) () const scope @safe pure nothrow @nogc + { + return core.time.weeks(this.weeks) + core.time.days(this.days) + + core.time.hours(this.hours) + core.time.minutes(this.minutes) + + core.time.seconds(this.seconds) + core.time.msecs(this.msecs) + + core.time.usecs(this.usecs) + core.time.hnsecs(this.hnsecs) + + core.time.nsecs(this.nsecs); + } +} + +/// Evaluates to `true` if we should recurse into the struct via `parseMapping` +private enum mightBeOptional (alias FR) = is(FR.Type == struct) && + !is(immutable(FR.Type) == immutable(core.time.Duration)) && + !hasConverter!(FR.Ref) && !hasFromString!(FR.Type) && !hasStringCtor!(FR.Type); + +/// Convenience template to check for the presence of converter(s) +private enum hasConverter (alias Field) = hasUDA!(Field, Converter); + +/// Provided a field reference `FR` which is known to have at least one converter, +/// perform basic checks and return the value after applying the converter. +private auto viaConverter (alias FR) (Node node) +{ + enum Converters = getUDAs!(FR.Ref, Converter); + static assert (Converters.length, + "Internal error: `viaConverter` called on field `" ~ + FR.FieldName ~ "` with no converter"); + + static assert(Converters.length == 1, + "Field `" ~ FR.FieldName ~ "` cannot have more than one `Converter`"); + return Converters[0].converter(node); +} + +/******************************************************************************* + + A reference to a field in a `struct` + + The compiler sometimes rejects passing fields by `alias`, or complains about + missing `this` (meaning it tries to evaluate the value). Sometimes, it also + discards the UDAs. + + To prevent this from happening, we always pass around a `FieldRef`, + which wraps the parent struct type (`T`) and the name of the field (`name`). + + To avoid any issue, eponymous usage is also avoided, hence the reference + needs to be accessed using `Ref`. A convenience `Type` alias is provided, + as well as `Default`. + +*******************************************************************************/ + +private template FieldRef (alias T, string name, bool forceOptional = false) +{ + // Renamed imports as the names exposed by this template clash + // with what we import. + import configy.Attributes : CAName = Name, CAOptional = Optional; + + /// The reference to the field + public alias Ref = __traits(getMember, T, name); + + /// Type of the field + public alias Type = typeof(Ref); + + /// The name of the field in the struct itself + public alias FieldName = name; + + /// The name used in the configuration field (taking `@Name` into account) + static if (hasUDA!(Ref, CAName)) + { + static assert (getUDAs!(Ref, CAName).length == 1, + "Field `" ~ fullyQualifiedName!(Ref) ~ + "` cannot have more than one `Name` attribute"); + + public immutable Name = getUDAs!(Ref, CAName)[0].name; + } + else + public immutable Name = FieldName; + + /// Default value of the field (may or may not be `Type.init`) + public enum Default = __traits(getMember, T.init, name); + + /// Evaluates to `true` if this field is to be considered optional + /// (does not need to be present in the YAML document) + public enum Optional = forceOptional || + hasUDA!(Ref, CAOptional) || + is(immutable(Type) == immutable(bool)) || + is(Type : SetInfo!FT, FT) || + (Default != Type.init); +} + +/// Get a tuple of `FieldRef` from a `struct` +private template FieldRefTuple (T) +{ + static assert(is(T == struct), + "Argument " ~ T.stringof ~ " to `FieldRefTuple` should be a `struct`"); + + /// + static if (__traits(getAliasThis, T).length == 0) + public alias FieldRefTuple = staticMap!(Pred, FieldNameTuple!T); + else + { + /// Tuple of strings of aliased fields + /// As of DMD v2.100.0, only a single alias this is supported in D. + private immutable AliasedFieldNames = __traits(getAliasThis, T); + static assert(AliasedFieldNames.length == 1, "Multiple `alias this` are not supported"); + + // Ignore alias to functions (if it's a property we can't do anything) + static if (isSomeFunction!(__traits(getMember, T, AliasedFieldNames))) + public alias FieldRefTuple = staticMap!(Pred, FieldNameTuple!T); + else + { + /// "Base" field names minus aliased ones + private immutable BaseFields = Erase!(AliasedFieldNames, FieldNameTuple!T); + static assert(BaseFields.length == FieldNameTuple!(T).length - 1); + + public alias FieldRefTuple = AliasSeq!( + staticMap!(Pred, BaseFields), + FieldRefTuple!(typeof(__traits(getMember, T, AliasedFieldNames)))); + } + } + + private alias Pred (string name) = FieldRef!(T, name); +} + +/// Helper predicate +private template NameIs (string searching) +{ + enum bool Pred (alias FR) = (searching == FR.Name); +} + +/// Returns whether or not the field has a `enabled` / `disabled` field, +/// and its value. If it does not, returns `true`. +private EnabledState isMappingEnabled (M) (Node node, auto ref M default_) +{ + import std.meta : Filter; + + alias EMT = Filter!(NameIs!("enabled").Pred, FieldRefTuple!M); + alias DMT = Filter!(NameIs!("disabled").Pred, FieldRefTuple!M); + + static if (EMT.length) + { + static assert (DMT.length == 0, + "`enabled` field `" ~ EMT[0].FieldName ~ + "` conflicts with `disabled` field `" ~ DMT[0].FieldName ~ "`"); + + if (auto ptr = "enabled" in node) + return EnabledState(EnabledState.Field.Enabled, (*ptr).as!bool); + return EnabledState(EnabledState.Field.Enabled, __traits(getMember, default_, EMT[0].FieldName)); + } + else static if (DMT.length) + { + if (auto ptr = "disabled" in node) + return EnabledState(EnabledState.Field.Disabled, (*ptr).as!bool); + return EnabledState(EnabledState.Field.Disabled, __traits(getMember, default_, DMT[0].FieldName)); + } + else + { + return EnabledState(EnabledState.Field.None); + } +} + +/// Retun value of `isMappingEnabled` +private struct EnabledState +{ + /// Used to determine which field controls a mapping enabled state + private enum Field + { + /// No such field, the mapping is considered enabled + None, + /// The field is named 'enabled' + Enabled, + /// The field is named 'disabled' + Disabled, + } + + /// Check if the mapping is considered enabled + public bool opCast () const scope @safe pure @nogc nothrow + { + return this.field == Field.None || + (this.field == Field.Enabled && this.fieldValue) || + (this.field == Field.Disabled && !this.fieldValue); + } + + /// Type of field found + private Field field; + + /// Value of the field, interpretation depends on `field` + private bool fieldValue; +} + +unittest +{ + static struct Config1 + { + int integer2 = 42; + @Name("notStr2") + @(42) string str2; + } + + static struct Config2 + { + Config1 c1dup = { 42, "Hello World" }; + string message = "Something"; + } + + static struct Config3 + { + Config1 c1; + int integer; + string str; + Config2 c2 = { c1dup: { integer2: 69 } }; + } + + static assert(is(FieldRef!(Config3, "c2").Type == Config2)); + static assert(FieldRef!(Config3, "c2").Default != Config2.init); + static assert(FieldRef!(Config2, "message").Default == Config2.init.message); + alias NFR1 = FieldRef!(Config3, "c2"); + alias NFR2 = FieldRef!(NFR1.Ref, "c1dup"); + alias NFR3 = FieldRef!(NFR2.Ref, "integer2"); + alias NFR4 = FieldRef!(NFR2.Ref, "str2"); + static assert(hasUDA!(NFR4.Ref, int)); + + static assert(FieldRefTuple!(Config3)[1].Name == "integer"); + static assert(FieldRefTuple!(FieldRefTuple!(Config3)[0].Type)[1].Name == "notStr2"); +} + +/// Evaluates to `true` if `T` is a `struct` with a default ctor +private enum hasFieldwiseCtor (T) = (is(T == struct) && is(typeof(() => T(T.init.tupleof)))); + +/// Evaluates to `true` if `T` has a static method that accepts a `string` and returns a `T` +private enum hasFromString (T) = is(typeof(T.fromString(string.init)) : T); + +/// Evaluates to `true` if `T` is a `struct` which accepts a single string as argument +private enum hasStringCtor (T) = (is(T == struct) && is(typeof(T.__ctor)) && + Parameters!(T.__ctor).length == 1 && + is(typeof(() => T(string.init)))); + +unittest +{ + static struct Simple + { + int value; + string otherValue; + } + + static assert( hasFieldwiseCtor!Simple); + static assert(!hasStringCtor!Simple); + + static struct PubKey + { + ubyte[] data; + + this (string hex) @safe pure nothrow @nogc{} + } + + static assert(!hasFieldwiseCtor!PubKey); + static assert( hasStringCtor!PubKey); + + static assert(!hasFieldwiseCtor!string); + static assert(!hasFieldwiseCtor!int); + static assert(!hasStringCtor!string); + static assert(!hasStringCtor!int); +} + +/// Convenience function to extend a YAML path +private string addPath (string opath, string newPart) +{ + return opath.length ? format("%s.%s", opath, newPart) : newPart; +} diff --git a/source/configy/Test.d b/source/configy/Test.d new file mode 100644 index 0000000..38227fe --- /dev/null +++ b/source/configy/Test.d @@ -0,0 +1,626 @@ +/******************************************************************************* + Contains all the tests for this library. + + Copyright: + Copyright (c) 2019-2022 BOSAGORA Foundation + All rights reserved. + + License: + MIT License. See LICENSE for details. + +*******************************************************************************/ + +module configy.Test; + +import configy.Attributes; +import configy.Exceptions; +import configy.Read; +import configy.Utils; + +import dyaml.node; + +import std.format; + +import core.time; + +/// Basic usage tests +unittest +{ + static struct Address + { + string address; + string city; + bool accessible; + } + + static struct Nested + { + Address address; + } + + static struct Config + { + bool enabled = true; + + string name = "Jessie"; + int age = 42; + double ratio = 24.42; + + Address address = { address: "Yeoksam-dong", city: "Seoul", accessible: true }; + + Nested nested = { address: { address: "Gangnam-gu", city: "Also Seoul", accessible: false } }; + } + + auto c1 = parseConfigString!Config("enabled: false", "/dev/null"); + assert(c1.name == "Jessie"); + assert(c1.age == 42); + assert(c1.ratio == 24.42); + + assert(c1.address.address == "Yeoksam-dong"); + assert(c1.address.city == "Seoul"); + assert(c1.address.accessible); + + assert(c1.nested.address.address == "Gangnam-gu"); + assert(c1.nested.address.city == "Also Seoul"); + assert(!c1.nested.address.accessible); +} + +// Tests for SetInfo +unittest +{ + static struct Address + { + string address; + string city; + bool accessible; + } + + static struct Config + { + SetInfo!int value; + SetInfo!int answer = 42; + SetInfo!string name = SetInfo!string("Lorene", false); + + SetInfo!Address address; + } + + auto c1 = parseConfigString!Config("value: 24", "/dev/null"); + assert(c1.value == 24); + assert(c1.value.set); + + assert(c1.answer.set); + assert(c1.answer == 42); + + assert(!c1.name.set); + assert(c1.name == "Lorene"); + + assert(!c1.address.set); + + auto c2 = parseConfigString!Config(` +name: Lorene +address: + address: Somewhere + city: Over the rainbow +`, "/dev/null"); + + assert(!c2.value.set); + assert(c2.name == "Lorene"); + assert(c2.name.set); + assert(c2.address.set); + assert(c2.address.address == "Somewhere"); + assert(c2.address.city == "Over the rainbow"); +} + +unittest +{ + static struct Nested { core.time.Duration timeout; } + static struct Config { Nested node; } + try + { + auto result = parseConfigString!Config("node:\n timeout:", "/dev/null"); + assert(0); + } + catch (Exception exc) + { + assert(exc.toString() == "/dev/null(1:10): node.timeout: Field is of type scalar, " ~ + "but expected a mapping with at least one of: weeks, days, hours, minutes, " ~ + "seconds, msecs, usecs, hnsecs, nsecs"); + } +} + +unittest +{ + static struct Config { string required; } + try + auto result = parseConfigString!Config("value: 24", "/dev/null"); + catch (ConfigException e) + { + assert(format("%s", e) == + "/dev/null(0:0): value: Key is not a valid member of this section. There are 1 valid keys: required"); + assert(format("%S", e) == + format("%s/dev/null%s(%s0%s:%s0%s): %svalue%s: Key is not a valid member of this section. " ~ + "There are %s1%s valid keys: %srequired%s", Yellow, Reset, Cyan, Reset, Cyan, Reset, + Yellow, Reset, Yellow, Reset, Green, Reset)); + } +} + +// Test for various type errors +unittest +{ + static struct Mapping + { + string value; + } + + static struct Config + { + @Optional Mapping map; + @Optional Mapping[] array; + int scalar; + } + + try + { + auto result = parseConfigString!Config("map: Hello World", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(0:5): map: Expected to be of type mapping (object), but is a scalar"); + } + + try + { + auto result = parseConfigString!Config("map:\n - Hello\n - World", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(1:2): map: Expected to be of type mapping (object), but is a sequence"); + } + + try + { + auto result = parseConfigString!Config("scalar:\n - Hello\n - World", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(1:2): scalar: Expected to be of type scalar (value), but is a sequence"); + } + + try + { + auto result = parseConfigString!Config("scalar:\n hello:\n World", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(1:2): scalar: Expected to be of type scalar (value), but is a mapping"); + } +} + +// Test for strict mode +unittest +{ + static struct Config + { + string value; + } + + try + { + auto result = parseConfigString!Config("valeu: This is a typo", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(0:0): valeu: Key is not a valid member of this section. There are 1 valid keys: value"); + } +} + +// Test for required key +unittest +{ + static struct Nested + { + string required; + string optional = "Default"; + } + + static struct Config + { + Nested inner; + } + + try + { + auto result = parseConfigString!Config("inner:\n optional: Not the default value", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(1:2): inner.required: Required key was not found in configuration or command line arguments"); + } +} + +// Testing 'validate()' on nested structures +unittest +{ + __gshared int validateCalls0 = 0; + __gshared int validateCalls1 = 1; + __gshared int validateCalls2 = 2; + + static struct SecondLayer + { + string value = "default"; + + public void validate () const + { + validateCalls2++; + } + } + + static struct FirstLayer + { + bool enabled = true; + SecondLayer ltwo; + + public void validate () const + { + validateCalls1++; + } + } + + static struct Config + { + FirstLayer lone; + + public void validate () const + { + validateCalls0++; + } + } + + auto r1 = parseConfigString!Config("lone:\n ltwo:\n value: Something\n", "/dev/null"); + + assert(r1.lone.ltwo.value == "Something"); + // `validateCalls` are given different value to avoid false-positive + // if they are set to 0 / mixed up + assert(validateCalls0 == 1); + assert(validateCalls1 == 2); + assert(validateCalls2 == 3); + + auto r2 = parseConfigString!Config("lone:\n enabled: false\n", "/dev/null"); + assert(validateCalls0 == 2); // + 1 + assert(validateCalls1 == 2); // Other are disabled + assert(validateCalls2 == 3); +} + +// Test the throwing ctor / fromString +unittest +{ + static struct ThrowingFromString + { + public static ThrowingFromString fromString (scope const(char)[] value) + @safe pure + { + throw new Exception("Some meaningful error message"); + } + + public int value; + } + + static struct ThrowingCtor + { + public this (scope const(char)[] value) + @safe pure + { + throw new Exception("Something went wrong... Obviously"); + } + + public int value; + } + + static struct InnerConfig + { + public int value; + @Optional ThrowingCtor ctor; + @Optional ThrowingFromString fromString; + + @Converter!int( + (Node value) { + // We have to trick DMD a bit so that it infers an `int` return + // type but doesn't emit a "Statement is not reachable" warning + if (value is Node.init || value !is Node.init ) + throw new Exception("You shall not pass"); + return 42; + }) + @Optional int converter; + } + + static struct Config + { + public InnerConfig config; + } + + try + { + auto result = parseConfigString!Config("config:\n value: 42\n ctor: 42", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(2:8): config.ctor: Something went wrong... Obviously"); + } + + try + { + auto result = parseConfigString!Config("config:\n value: 42\n fromString: 42", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(2:14): config.fromString: Some meaningful error message"); + } + + try + { + auto result = parseConfigString!Config("config:\n value: 42\n converter: 42", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(2:13): config.converter: You shall not pass"); + } + + // We also need to test with arrays, to ensure they are correctly called + static struct InnerArrayConfig + { + @Optional int value; + @Optional ThrowingCtor ctor; + @Optional ThrowingFromString fromString; + } + + static struct ArrayConfig + { + public InnerArrayConfig[] configs; + } + + try + { + auto result = parseConfigString!ArrayConfig("configs:\n - ctor: something", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(1:10): configs[0].ctor: Something went wrong... Obviously"); + } + + try + { + auto result = parseConfigString!ArrayConfig( + "configs:\n - value: 42\n - fromString: something", "/dev/null"); + assert(0); + } + catch (ConfigException exc) + { + assert(exc.toString() == "/dev/null(2:16): configs[1].fromString: Some meaningful error message"); + } +} + +// Test duplicate fields detection +unittest +{ + static struct Config + { + @Name("shadow") int value; + @Name("value") int shadow; + } + + auto result = parseConfigString!Config("shadow: 42\nvalue: 84\n", "/dev/null"); + assert(result.value == 42); + assert(result.shadow == 84); + + static struct BadConfig + { + int value; + @Name("value") int something; + } + + // Cannot test the error message, so this is as good as it gets + static assert(!is(typeof(() { + auto r = parseConfigString!BadConfig("shadow: 42\nvalue: 84\n", "/dev/null"); + }))); +} + +// Test a renamed `enabled` / `disabled` +unittest +{ + static struct ConfigA + { + @Name("enabled") bool shouldIStay; + int value; + } + + static struct ConfigB + { + @Name("disabled") bool orShouldIGo; + int value; + } + + { + auto c = parseConfigString!ConfigA("enabled: true\nvalue: 42", "/dev/null"); + assert(c.shouldIStay == true); + assert(c.value == 42); + } + + { + auto c = parseConfigString!ConfigB("disabled: false\nvalue: 42", "/dev/null"); + assert(c.orShouldIGo == false); + assert(c.value == 42); + } +} + +// Test for 'mightBeOptional' & missing key +unittest +{ + static struct RequestLimit { size_t reqs = 100; } + static struct Nested { @Name("jay") int value; } + static struct Config { @Name("chris") Nested value; RequestLimit limits; } + + auto r = parseConfigString!Config("chris:\n jay: 42", "/dev/null"); + assert(r.limits.reqs == 100); + + try + { + auto _ = parseConfigString!Config("limits:\n reqs: 42", "/dev/null"); + } + catch (ConfigException exc) + { + assert(exc.toString() == "(0:0): chris.jay: Required key was not found in configuration or command line arguments"); + } +} + +// Support for associative arrays +unittest +{ + static struct Nested + { + int[string] answers; + } + + static struct Parent + { + Nested[string] questions; + string[int] names; + } + + auto c = parseConfigString!Parent( +`names: + 42: "Forty two" + 97: "Quatre vingt dix sept" +questions: + first: + answers: + # Need to use quotes here otherwise it gets interpreted as + # true / false, perhaps a dyaml issue ? + 'yes': 42 + 'no': 24 + second: + answers: + maybe: 69 + whynot: 20 +`, "/dev/null"); + + assert(c.names == [42: "Forty two", 97: "Quatre vingt dix sept"]); + assert(c.questions.length == 2); + assert(c.questions["first"] == Nested(["yes": 42, "no": 24])); + assert(c.questions["second"] == Nested(["maybe": 69, "whynot": 20])); +} + +unittest +{ + static struct FlattenMe + { + int value; + string name; + } + + static struct Config + { + FlattenMe flat = FlattenMe(24, "Four twenty"); + alias flat this; + + FlattenMe not_flat; + } + + auto c = parseConfigString!Config( + "value: 42\nname: John\nnot_flat:\n value: 69\n name: Henry", + "/dev/null"); + assert(c.flat.value == 42); + assert(c.flat.name == "John"); + assert(c.not_flat.value == 69); + assert(c.not_flat.name == "Henry"); + + auto c2 = parseConfigString!Config( + "not_flat:\n value: 69\n name: Henry", "/dev/null"); + assert(c2.flat.value == 24); + assert(c2.flat.name == "Four twenty"); + + static struct OptConfig + { + @Optional FlattenMe flat; + alias flat this; + + int value; + } + auto c3 = parseConfigString!OptConfig("value: 69\n", "/dev/null"); + assert(c3.value == 69); +} + +unittest +{ + static struct Config + { + @Name("names") + string[] names_; + + size_t names () const scope @safe pure nothrow @nogc + { + return this.names_.length; + } + } + + auto c = parseConfigString!Config("names:\n - John\n - Luca\n", "/dev/null"); + assert(c.names_ == [ "John", "Luca" ]); + assert(c.names == 2); +} + +// Make sure unions don't compile +unittest +{ + static union MyUnion + { + string value; + int number; + } + + static struct Config + { + MyUnion hello; + } + + static assert(!is(typeof(parseConfigString!Config("hello: world\n", "/dev/null")))); + static assert(!is(typeof(parseConfigString!MyUnion("hello: world\n", "/dev/null")))); +} + +// Test the `@Key` attribute +unittest +{ + static struct Interface + { + string name; + string static_ip; + } + + static struct Config + { + string profile; + + @Key("name") + immutable(Interface)[] ifaces = [ + Interface("lo", "127.0.0.1"), + ]; + } + + auto c = parseConfigString!Config(`profile: default +ifaces: + eth0: + static_ip: "192.168.1.42" + lo: + static_ip: "127.0.0.42" +`, "/dev/null"); + assert(c.ifaces.length == 2); + assert(c.ifaces == [ Interface("eth0", "192.168.1.42"), Interface("lo", "127.0.0.42")]); +} diff --git a/source/configy/Utils.d b/source/configy/Utils.d new file mode 100644 index 0000000..f2ce79d --- /dev/null +++ b/source/configy/Utils.d @@ -0,0 +1,124 @@ +/******************************************************************************* + + Utilities used internally by the config parser. + + Compile this library with `-debug=ConfigFillerDebug` to get verbose output. + This can be achieved with `debugVersions` in dub, or by depending on the + `debug` configuration provided by `dub.json`. + + Copyright: + Copyright (c) 2019-2022 BOSAGORA Foundation + All rights reserved. + + License: + MIT License. See LICENSE for details. + +*******************************************************************************/ + +module configy.Utils; + +import std.format; + +/// Type of sink used by the `toString` +package alias SinkType = void delegate (in char[]) @safe; + +/******************************************************************************* + + Debugging utility for config filler + + Since this module does a lot of meta-programming, some things can easily + go wrong. For example, a condition being false might happen because it is + genuinely false or because the condition is buggy. + + To make figuring out if a config is properly parsed or not, a little utility + (config-dumper) exists, which will provide a verbose output of what the + config filler does. To do this, `config-dumper` is compiled with + the below `debug` version. + +*******************************************************************************/ + +debug (ConfigFillerDebug) +{ + /// A thin wrapper around `stderr.writefln` with indentation + package void dbgWrite (Args...) (string fmt, Args args) + { + import std.stdio; + stderr.write(IndentChars[0 .. indent >= IndentChars.length ? $ : indent]); + stderr.writefln(fmt, args); + } + + /// Log a value that is to be returned + /// The value will be the first argument and painted yellow + package T dbgWriteRet (T, Args...) (auto ref T return_, string fmt, Args args) + { + dbgWrite(fmt, return_.paint(Yellow), args); + return return_; + } + + /// The current indentation + package size_t indent; + + /// Helper for indentation (who needs more than 16 levels of indent?) + private immutable IndentChars = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"; +} +else +{ + /// No-op + package void dbgWrite (Args...) (string fmt, lazy Args args) {} + + /// Ditto + package T dbgWriteRet (T, Args...) (auto ref T return_, string fmt, lazy Args args) + { + return return_; + } +} + +/// Thin wrapper to simplify colorization +package struct Colored (T) +{ + /// Color used + private string color; + + /// Value to print + private T value; + + /// Hook for `formattedWrite` + public void toString (scope SinkType sink) + { + static if (is(typeof(T.init.length) : size_t)) + if (this.value.length == 0) return; + + formattedWrite(sink, "%s%s%s", this.color, this.value, Reset); + } +} + +/// Ditto +package Colored!T paint (T) (T arg, string color) +{ + return Colored!T(color, arg); +} + +/// Paint `arg` in color `ifTrue` if `cond` evaluates to `true`, use color `ifFalse` otherwise +package Colored!T paintIf (T) (T arg, bool cond, string ifTrue, string ifFalse) +{ + return Colored!T(cond ? ifTrue : ifFalse, arg); +} + +/// Paint a boolean in green if `true`, red otherwise, unless `reverse` is set to `true`, +/// in which case the colors are swapped +package Colored!bool paintBool (bool value, bool reverse = false) +{ + return value.paintIf(reverse ^ value, Green, Red); +} + +/// Reset the foreground color used +package immutable Reset = "\u001b[0m"; +/// Set the foreground color to red, used for `false`, missing, errors, etc... +package immutable Red = "\u001b[31m"; +/// Set the foreground color to red, used for warnings and other things +/// that should draw attention but do not pose an immediate issue +package immutable Yellow = "\u001b[33m"; +/// Set the foreground color to green, used for `true`, present, etc... +package immutable Green = "\u001b[32m"; +/// Set the foreground color to green, used field names / path +package immutable Cyan = "\u001b[36m"; diff --git a/source/dyaml/composer.d b/source/dyaml/composer.d new file mode 100644 index 0000000..c000b02 --- /dev/null +++ b/source/dyaml/composer.d @@ -0,0 +1,375 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * Composes nodes from YAML events provided by parser. + * Code based on PyYAML: http://www.pyyaml.org + */ +module dyaml.composer; + +import core.memory; + +import std.algorithm; +import std.array; +import std.conv; +import std.exception; +import std.range; +import std.typecons; + +import dyaml.constructor; +import dyaml.event; +import dyaml.exception; +import dyaml.node; +import dyaml.parser; +import dyaml.resolver; + + +package: +/** + * Exception thrown at composer errors. + * + * See_Also: MarkedYAMLException + */ +class ComposerException : MarkedYAMLException +{ + mixin MarkedExceptionCtors; +} + +///Composes YAML documents from events provided by a Parser. +struct Composer +{ + private: + ///Parser providing YAML events. + Parser parser_; + ///Resolver resolving tags (data types). + Resolver resolver_; + ///Nodes associated with anchors. Used by YAML aliases. + Node[string] anchors_; + + ///Used to reduce allocations when creating pair arrays. + /// + ///We need one appender for each nesting level that involves + ///a pair array, as the inner levels are processed as a + ///part of the outer levels. Used as a stack. + Appender!(Node.Pair[])[] pairAppenders_; + ///Used to reduce allocations when creating node arrays. + /// + ///We need one appender for each nesting level that involves + ///a node array, as the inner levels are processed as a + ///part of the outer levels. Used as a stack. + Appender!(Node[])[] nodeAppenders_; + + public: + /** + * Construct a composer. + * + * Params: parser = Parser to provide YAML events. + * resolver = Resolver to resolve tags (data types). + */ + this(Parser parser, Resolver resolver) @safe + { + parser_ = parser; + resolver_ = resolver; + } + + /** + * Determine if there are any nodes left. + * + * Must be called before loading as it handles the stream start event. + */ + bool checkNode() @safe + { + // If next event is stream start, skip it + parser_.skipOver!"a.id == b"(EventID.streamStart); + + //True if there are more documents available. + return parser_.front.id != EventID.streamEnd; + } + + ///Get a YAML document as a node (the root of the document). + Node getNode() @safe + { + //Get the root node of the next document. + assert(parser_.front.id != EventID.streamEnd, + "Trying to get a node from Composer when there is no node to " ~ + "get. use checkNode() to determine if there is a node."); + + return composeDocument(); + } + + private: + + void skipExpected(const EventID id) @safe + { + const foundExpected = parser_.skipOver!"a.id == b"(id); + assert(foundExpected, text("Expected ", id, " not found.")); + } + ///Ensure that appenders for specified nesting levels exist. + /// + ///Params: pairAppenderLevel = Current level in the pair appender stack. + /// nodeAppenderLevel = Current level the node appender stack. + void ensureAppendersExist(const uint pairAppenderLevel, const uint nodeAppenderLevel) + @safe + { + while(pairAppenders_.length <= pairAppenderLevel) + { + pairAppenders_ ~= appender!(Node.Pair[])(); + } + while(nodeAppenders_.length <= nodeAppenderLevel) + { + nodeAppenders_ ~= appender!(Node[])(); + } + } + + ///Compose a YAML document and return its root node. + Node composeDocument() @safe + { + skipExpected(EventID.documentStart); + + //Compose the root node. + Node node = composeNode(0, 0); + + skipExpected(EventID.documentEnd); + + anchors_.destroy(); + return node; + } + + /// Compose a node. + /// + /// Params: pairAppenderLevel = Current level of the pair appender stack. + /// nodeAppenderLevel = Current level of the node appender stack. + Node composeNode(const uint pairAppenderLevel, const uint nodeAppenderLevel) @safe + { + if(parser_.front.id == EventID.alias_) + { + const event = parser_.front; + parser_.popFront(); + const anchor = event.anchor; + enforce((anchor in anchors_) !is null, + new ComposerException("Found undefined alias: " ~ anchor, + event.startMark)); + + //If the node referenced by the anchor is uninitialized, + //it's not finished, i.e. we're currently composing it + //and trying to use it recursively here. + enforce(anchors_[anchor] != Node(), + new ComposerException("Found recursive alias: " ~ anchor, + event.startMark)); + + return anchors_[anchor]; + } + + const event = parser_.front; + const anchor = event.anchor; + if((anchor !is null) && (anchor in anchors_) !is null) + { + throw new ComposerException("Found duplicate anchor: " ~ anchor, + event.startMark); + } + + Node result; + //Associate the anchor, if any, with an uninitialized node. + //used to detect duplicate and recursive anchors. + if(anchor !is null) + { + anchors_[anchor] = Node(); + } + + switch (parser_.front.id) + { + case EventID.scalar: + result = composeScalarNode(); + break; + case EventID.sequenceStart: + result = composeSequenceNode(pairAppenderLevel, nodeAppenderLevel); + break; + case EventID.mappingStart: + result = composeMappingNode(pairAppenderLevel, nodeAppenderLevel); + break; + default: assert(false, "This code should never be reached"); + } + + if(anchor !is null) + { + anchors_[anchor] = result; + } + return result; + } + + ///Compose a scalar node. + Node composeScalarNode() @safe + { + const event = parser_.front; + parser_.popFront(); + const tag = resolver_.resolve(NodeID.scalar, event.tag, event.value, + event.implicit); + + Node node = constructNode(event.startMark, event.endMark, tag, + event.value); + node.scalarStyle = event.scalarStyle; + + return node; + } + + /// Compose a sequence node. + /// + /// Params: pairAppenderLevel = Current level of the pair appender stack. + /// nodeAppenderLevel = Current level of the node appender stack. + Node composeSequenceNode(const uint pairAppenderLevel, const uint nodeAppenderLevel) + @safe + { + ensureAppendersExist(pairAppenderLevel, nodeAppenderLevel); + auto nodeAppender = &(nodeAppenders_[nodeAppenderLevel]); + + const startEvent = parser_.front; + parser_.popFront(); + const tag = resolver_.resolve(NodeID.sequence, startEvent.tag, null, + startEvent.implicit); + + while(parser_.front.id != EventID.sequenceEnd) + { + nodeAppender.put(composeNode(pairAppenderLevel, nodeAppenderLevel + 1)); + } + + Node node = constructNode(startEvent.startMark, parser_.front.endMark, + tag, nodeAppender.data.dup); + node.collectionStyle = startEvent.collectionStyle; + parser_.popFront(); + nodeAppender.clear(); + + return node; + } + + /** + * Flatten a node, merging it with nodes referenced through YAMLMerge data type. + * + * Node must be a mapping or a sequence of mappings. + * + * Params: root = Node to flatten. + * startMark = Start position of the node. + * endMark = End position of the node. + * pairAppenderLevel = Current level of the pair appender stack. + * nodeAppenderLevel = Current level of the node appender stack. + * + * Returns: Flattened mapping as pairs. + */ + Node.Pair[] flatten(ref Node root, const Mark startMark, const Mark endMark, + const uint pairAppenderLevel, const uint nodeAppenderLevel) @safe + { + void error(Node node) + { + //this is Composer, but the code is related to Constructor. + throw new ConstructorException("While constructing a mapping, " ~ + "expected a mapping or a list of " ~ + "mappings for merging, but found: " ~ + text(node.type) ~ + " NOTE: line/column shows topmost parent " ~ + "to which the content is being merged", + startMark, endMark); + } + + ensureAppendersExist(pairAppenderLevel, nodeAppenderLevel); + auto pairAppender = &(pairAppenders_[pairAppenderLevel]); + + final switch (root.nodeID) + { + case NodeID.mapping: + Node[] toMerge; + toMerge.reserve(root.length); + foreach (ref Node key, ref Node value; root) + { + if(key.type == NodeType.merge) + { + toMerge ~= value; + } + else + { + auto temp = Node.Pair(key, value); + pairAppender.put(temp); + } + } + foreach (node; toMerge) + { + pairAppender.put(flatten(node, startMark, endMark, + pairAppenderLevel + 1, nodeAppenderLevel)); + } + break; + case NodeID.sequence: + foreach (ref Node node; root) + { + if (node.nodeID != NodeID.mapping) + { + error(node); + } + pairAppender.put(flatten(node, startMark, endMark, + pairAppenderLevel + 1, nodeAppenderLevel)); + } + break; + case NodeID.scalar: + case NodeID.invalid: + error(root); + break; + } + + auto flattened = pairAppender.data.dup; + pairAppender.clear(); + + return flattened; + } + + /// Compose a mapping node. + /// + /// Params: pairAppenderLevel = Current level of the pair appender stack. + /// nodeAppenderLevel = Current level of the node appender stack. + Node composeMappingNode(const uint pairAppenderLevel, const uint nodeAppenderLevel) + @safe + { + ensureAppendersExist(pairAppenderLevel, nodeAppenderLevel); + const startEvent = parser_.front; + parser_.popFront(); + const tag = resolver_.resolve(NodeID.mapping, startEvent.tag, null, + startEvent.implicit); + auto pairAppender = &(pairAppenders_[pairAppenderLevel]); + + Tuple!(Node, Mark)[] toMerge; + while(parser_.front.id != EventID.mappingEnd) + { + auto pair = Node.Pair(composeNode(pairAppenderLevel + 1, nodeAppenderLevel), + composeNode(pairAppenderLevel + 1, nodeAppenderLevel)); + + //Need to flatten and merge the node referred by YAMLMerge. + if(pair.key.type == NodeType.merge) + { + toMerge ~= tuple(pair.value, cast(Mark)parser_.front.endMark); + } + //Not YAMLMerge, just add the pair. + else + { + pairAppender.put(pair); + } + } + foreach(node; toMerge) + { + merge(*pairAppender, flatten(node[0], startEvent.startMark, node[1], + pairAppenderLevel + 1, nodeAppenderLevel)); + } + auto numUnique = pairAppender.data.dup + .sort!((x,y) => x.key > y.key) + .uniq!((x,y) => x.key == y.key) + .walkLength; + enforce(numUnique == pairAppender.data.length, + new ComposerException("Duplicate key found in mapping", parser_.front.startMark)); + + Node node = constructNode(startEvent.startMark, parser_.front.endMark, + tag, pairAppender.data.dup); + node.collectionStyle = startEvent.collectionStyle; + parser_.popFront(); + + pairAppender.clear(); + return node; + } +} diff --git a/source/dyaml/constructor.d b/source/dyaml/constructor.d new file mode 100644 index 0000000..4cd1546 --- /dev/null +++ b/source/dyaml/constructor.d @@ -0,0 +1,611 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * Class that processes YAML mappings, sequences and scalars into nodes. + * This can be used to add custom data types. A tutorial can be found + * $(LINK2 https://dlang-community.github.io/D-YAML/, here). + */ +module dyaml.constructor; + + +import std.array; +import std.algorithm; +import std.base64; +import std.container; +import std.conv; +import std.datetime; +import std.exception; +import std.regex; +import std.string; +import std.typecons; +import std.utf; + +import dyaml.node; +import dyaml.exception; +import dyaml.style; + +package: + +// Exception thrown at constructor errors. +class ConstructorException : YAMLException +{ + /// Construct a ConstructorException. + /// + /// Params: msg = Error message. + /// start = Start position of the error context. + /// end = End position of the error context. + this(string msg, Mark start, Mark end, string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow + { + super(msg ~ "\nstart: " ~ start.toString() ~ "\nend: " ~ end.toString(), + file, line); + } +} + +/** Constructs YAML values. + * + * Each YAML scalar, sequence or mapping has a tag specifying its data type. + * Constructor uses user-specifyable functions to create a node of desired + * data type from a scalar, sequence or mapping. + * + * + * Each of these functions is associated with a tag, and can process either + * a scalar, a sequence, or a mapping. The constructor passes each value to + * the function with corresponding tag, which then returns the resulting value + * that can be stored in a node. + * + * If a tag is detected with no known constructor function, it is considered an error. + */ +/* + * Construct a node. + * + * Params: start = Start position of the node. + * end = End position of the node. + * tag = Tag (data type) of the node. + * value = Value to construct node from (string, nodes or pairs). + * style = Style of the node (scalar or collection style). + * + * Returns: Constructed node. + */ +Node constructNode(T)(const Mark start, const Mark end, const string tag, + T value) @safe + if((is(T : string) || is(T == Node[]) || is(T == Node.Pair[]))) +{ + Node newNode; + try + { + switch(tag) + { + case "tag:yaml.org,2002:null": + newNode = Node(YAMLNull(), tag); + break; + case "tag:yaml.org,2002:bool": + static if(is(T == string)) + { + newNode = Node(constructBool(value), tag); + break; + } + else throw new Exception("Only scalars can be bools"); + case "tag:yaml.org,2002:int": + static if(is(T == string)) + { + newNode = Node(constructLong(value), tag); + break; + } + else throw new Exception("Only scalars can be ints"); + case "tag:yaml.org,2002:float": + static if(is(T == string)) + { + newNode = Node(constructReal(value), tag); + break; + } + else throw new Exception("Only scalars can be floats"); + case "tag:yaml.org,2002:binary": + static if(is(T == string)) + { + newNode = Node(constructBinary(value), tag); + break; + } + else throw new Exception("Only scalars can be binary data"); + case "tag:yaml.org,2002:timestamp": + static if(is(T == string)) + { + newNode = Node(constructTimestamp(value), tag); + break; + } + else throw new Exception("Only scalars can be timestamps"); + case "tag:yaml.org,2002:str": + static if(is(T == string)) + { + newNode = Node(constructString(value), tag); + break; + } + else throw new Exception("Only scalars can be strings"); + case "tag:yaml.org,2002:value": + static if(is(T == string)) + { + newNode = Node(constructString(value), tag); + break; + } + else throw new Exception("Only scalars can be values"); + case "tag:yaml.org,2002:omap": + static if(is(T == Node[])) + { + newNode = Node(constructOrderedMap(value), tag); + break; + } + else throw new Exception("Only sequences can be ordered maps"); + case "tag:yaml.org,2002:pairs": + static if(is(T == Node[])) + { + newNode = Node(constructPairs(value), tag); + break; + } + else throw new Exception("Only sequences can be pairs"); + case "tag:yaml.org,2002:set": + static if(is(T == Node.Pair[])) + { + newNode = Node(constructSet(value), tag); + break; + } + else throw new Exception("Only mappings can be sets"); + case "tag:yaml.org,2002:seq": + static if(is(T == Node[])) + { + newNode = Node(constructSequence(value), tag); + break; + } + else throw new Exception("Only sequences can be sequences"); + case "tag:yaml.org,2002:map": + static if(is(T == Node.Pair[])) + { + newNode = Node(constructMap(value), tag); + break; + } + else throw new Exception("Only mappings can be maps"); + case "tag:yaml.org,2002:merge": + newNode = Node(YAMLMerge(), tag); + break; + default: + newNode = Node(value, tag); + break; + } + } + catch(Exception e) + { + throw new ConstructorException("Error constructing " ~ typeid(T).toString() + ~ ":\n" ~ e.msg, start, end); + } + + newNode.startMark_ = start; + + return newNode; +} + +private: +// Construct a boolean _node. +bool constructBool(const string str) @safe +{ + string value = str.toLower(); + if(value.among!("yes", "true", "on")){return true;} + if(value.among!("no", "false", "off")){return false;} + throw new Exception("Unable to parse boolean value: " ~ value); +} + +// Construct an integer (long) _node. +long constructLong(const string str) @safe +{ + string value = str.replace("_", ""); + const char c = value[0]; + const long sign = c != '-' ? 1 : -1; + if(c == '-' || c == '+') + { + value = value[1 .. $]; + } + + enforce(value != "", new Exception("Unable to parse float value: " ~ value)); + + long result; + try + { + //Zero. + if(value == "0") {result = cast(long)0;} + //Binary. + else if(value.startsWith("0b")){result = sign * to!int(value[2 .. $], 2);} + //Hexadecimal. + else if(value.startsWith("0x")){result = sign * to!int(value[2 .. $], 16);} + //Octal. + else if(value[0] == '0') {result = sign * to!int(value, 8);} + //Sexagesimal. + else if(value.canFind(":")) + { + long val; + long base = 1; + foreach_reverse(digit; value.split(":")) + { + val += to!long(digit) * base; + base *= 60; + } + result = sign * val; + } + //Decimal. + else{result = sign * to!long(value);} + } + catch(ConvException e) + { + throw new Exception("Unable to parse integer value: " ~ value); + } + + return result; +} +@safe unittest +{ + string canonical = "685230"; + string decimal = "+685_230"; + string octal = "02472256"; + string hexadecimal = "0x_0A_74_AE"; + string binary = "0b1010_0111_0100_1010_1110"; + string sexagesimal = "190:20:30"; + + assert(685230 == constructLong(canonical)); + assert(685230 == constructLong(decimal)); + assert(685230 == constructLong(octal)); + assert(685230 == constructLong(hexadecimal)); + assert(685230 == constructLong(binary)); + assert(685230 == constructLong(sexagesimal)); +} + +// Construct a floating point (real) _node. +real constructReal(const string str) @safe +{ + string value = str.replace("_", "").toLower(); + const char c = value[0]; + const real sign = c != '-' ? 1.0 : -1.0; + if(c == '-' || c == '+') + { + value = value[1 .. $]; + } + + enforce(value != "" && value != "nan" && value != "inf" && value != "-inf", + new Exception("Unable to parse float value: " ~ value)); + + real result; + try + { + //Infinity. + if (value == ".inf"){result = sign * real.infinity;} + //Not a Number. + else if(value == ".nan"){result = real.nan;} + //Sexagesimal. + else if(value.canFind(":")) + { + real val = 0.0; + real base = 1.0; + foreach_reverse(digit; value.split(":")) + { + val += to!real(digit) * base; + base *= 60.0; + } + result = sign * val; + } + //Plain floating point. + else{result = sign * to!real(value);} + } + catch(ConvException e) + { + throw new Exception("Unable to parse float value: \"" ~ value ~ "\""); + } + + return result; +} +@safe unittest +{ + bool eq(real a, real b, real epsilon = 0.2) @safe + { + return a >= (b - epsilon) && a <= (b + epsilon); + } + + string canonical = "6.8523015e+5"; + string exponential = "685.230_15e+03"; + string fixed = "685_230.15"; + string sexagesimal = "190:20:30.15"; + string negativeInf = "-.inf"; + string NaN = ".NaN"; + + assert(eq(685230.15, constructReal(canonical))); + assert(eq(685230.15, constructReal(exponential))); + assert(eq(685230.15, constructReal(fixed))); + assert(eq(685230.15, constructReal(sexagesimal))); + assert(eq(-real.infinity, constructReal(negativeInf))); + assert(to!string(constructReal(NaN)) == "nan"); +} + +// Construct a binary (base64) _node. +ubyte[] constructBinary(const string value) @safe +{ + import std.ascii : newline; + import std.array : array; + + // For an unknown reason, this must be nested to work (compiler bug?). + try + { + return Base64.decode(value.representation.filter!(c => !newline.canFind(c)).array); + } + catch(Base64Exception e) + { + throw new Exception("Unable to decode base64 value: " ~ e.msg); + } +} + +@safe unittest +{ + auto test = "The Answer: 42".representation; + char[] buffer; + buffer.length = 256; + string input = Base64.encode(test, buffer).idup; + const value = constructBinary(input); + assert(value == test); + assert(value == [84, 104, 101, 32, 65, 110, 115, 119, 101, 114, 58, 32, 52, 50]); +} + +// Construct a timestamp (SysTime) _node. +SysTime constructTimestamp(const string str) @safe +{ + string value = str; + + auto YMDRegexp = regex("^([0-9][0-9][0-9][0-9])-([0-9][0-9]?)-([0-9][0-9]?)"); + auto HMSRegexp = regex("^[Tt \t]+([0-9][0-9]?):([0-9][0-9]):([0-9][0-9])(\\.[0-9]*)?"); + auto TZRegexp = regex("^[ \t]*Z|([-+][0-9][0-9]?)(:[0-9][0-9])?"); + + try + { + // First, get year, month and day. + auto matches = match(value, YMDRegexp); + + enforce(!matches.empty, + new Exception("Unable to parse timestamp value: " ~ value)); + + auto captures = matches.front.captures; + const year = to!int(captures[1]); + const month = to!int(captures[2]); + const day = to!int(captures[3]); + + // If available, get hour, minute, second and fraction, if present. + value = matches.front.post; + matches = match(value, HMSRegexp); + if(matches.empty) + { + return SysTime(DateTime(year, month, day), UTC()); + } + + captures = matches.front.captures; + const hour = to!int(captures[1]); + const minute = to!int(captures[2]); + const second = to!int(captures[3]); + const hectonanosecond = cast(int)(to!real("0" ~ captures[4]) * 10_000_000); + + // If available, get timezone. + value = matches.front.post; + matches = match(value, TZRegexp); + if(matches.empty || matches.front.captures[0] == "Z") + { + // No timezone. + return SysTime(DateTime(year, month, day, hour, minute, second), + hectonanosecond.dur!"hnsecs", UTC()); + } + + // We have a timezone, so parse it. + captures = matches.front.captures; + int sign = 1; + int tzHours; + if(!captures[1].empty) + { + if(captures[1][0] == '-') {sign = -1;} + tzHours = to!int(captures[1][1 .. $]); + } + const tzMinutes = (!captures[2].empty) ? to!int(captures[2][1 .. $]) : 0; + const tzOffset = dur!"minutes"(sign * (60 * tzHours + tzMinutes)); + + return SysTime(DateTime(year, month, day, hour, minute, second), + hectonanosecond.dur!"hnsecs", + new immutable SimpleTimeZone(tzOffset)); + } + catch(ConvException e) + { + throw new Exception("Unable to parse timestamp value " ~ value ~ " : " ~ e.msg); + } + catch(DateTimeException e) + { + throw new Exception("Invalid timestamp value " ~ value ~ " : " ~ e.msg); + } + + assert(false, "This code should never be reached"); +} +@safe unittest +{ + string timestamp(string value) + { + return constructTimestamp(value).toISOString(); + } + + string canonical = "2001-12-15T02:59:43.1Z"; + string iso8601 = "2001-12-14t21:59:43.10-05:00"; + string spaceSeparated = "2001-12-14 21:59:43.10 -5"; + string noTZ = "2001-12-15 2:59:43.10"; + string noFraction = "2001-12-15 2:59:43"; + string ymd = "2002-12-14"; + + assert(timestamp(canonical) == "20011215T025943.1Z"); + //avoiding float conversion errors + assert(timestamp(iso8601) == "20011214T215943.0999999-05:00" || + timestamp(iso8601) == "20011214T215943.1-05:00"); + assert(timestamp(spaceSeparated) == "20011214T215943.0999999-05:00" || + timestamp(spaceSeparated) == "20011214T215943.1-05:00"); + assert(timestamp(noTZ) == "20011215T025943.0999999Z" || + timestamp(noTZ) == "20011215T025943.1Z"); + assert(timestamp(noFraction) == "20011215T025943Z"); + assert(timestamp(ymd) == "20021214T000000Z"); +} + +// Construct a string _node. +string constructString(const string str) @safe +{ + return str; +} + +// Convert a sequence of single-element mappings into a sequence of pairs. +Node.Pair[] getPairs(string type, const Node[] nodes) @safe +{ + Node.Pair[] pairs; + pairs.reserve(nodes.length); + foreach(node; nodes) + { + enforce(node.nodeID == NodeID.mapping && node.length == 1, + new Exception("While constructing " ~ type ~ + ", expected a mapping with single element")); + + pairs ~= node.as!(Node.Pair[]); + } + + return pairs; +} + +// Construct an ordered map (ordered sequence of key:value pairs without duplicates) _node. +Node.Pair[] constructOrderedMap(const Node[] nodes) @safe +{ + auto pairs = getPairs("ordered map", nodes); + + //Detect duplicates. + //TODO this should be replaced by something with deterministic memory allocation. + auto keys = new RedBlackTree!Node(); + foreach(ref pair; pairs) + { + enforce(!(pair.key in keys), + new Exception("Duplicate entry in an ordered map: " + ~ pair.key.debugString())); + keys.insert(pair.key); + } + return pairs; +} +@safe unittest +{ + Node[] alternateTypes(uint length) @safe + { + Node[] pairs; + foreach(long i; 0 .. length) + { + auto pair = (i % 2) ? Node.Pair(i.to!string, i) : Node.Pair(i, i.to!string); + pairs ~= Node([pair]); + } + return pairs; + } + + Node[] sameType(uint length) @safe + { + Node[] pairs; + foreach(long i; 0 .. length) + { + auto pair = Node.Pair(i.to!string, i); + pairs ~= Node([pair]); + } + return pairs; + } + + assertThrown(constructOrderedMap(alternateTypes(8) ~ alternateTypes(2))); + assertNotThrown(constructOrderedMap(alternateTypes(8))); + assertThrown(constructOrderedMap(sameType(64) ~ sameType(16))); + assertThrown(constructOrderedMap(alternateTypes(64) ~ alternateTypes(16))); + assertNotThrown(constructOrderedMap(sameType(64))); + assertNotThrown(constructOrderedMap(alternateTypes(64))); +} + +// Construct a pairs (ordered sequence of key: value pairs allowing duplicates) _node. +Node.Pair[] constructPairs(const Node[] nodes) @safe +{ + return getPairs("pairs", nodes); +} + +// Construct a set _node. +Node[] constructSet(const Node.Pair[] pairs) @safe +{ + // In future, the map here should be replaced with something with deterministic + // memory allocation if possible. + // Detect duplicates. + ubyte[Node] map; + Node[] nodes; + nodes.reserve(pairs.length); + foreach(pair; pairs) + { + enforce((pair.key in map) is null, new Exception("Duplicate entry in a set")); + map[pair.key] = 0; + nodes ~= pair.key; + } + + return nodes; +} +@safe unittest +{ + Node.Pair[] set(uint length) @safe + { + Node.Pair[] pairs; + foreach(long i; 0 .. length) + { + pairs ~= Node.Pair(i.to!string, YAMLNull()); + } + + return pairs; + } + + auto DuplicatesShort = set(8) ~ set(2); + auto noDuplicatesShort = set(8); + auto DuplicatesLong = set(64) ~ set(4); + auto noDuplicatesLong = set(64); + + bool eq(Node.Pair[] a, Node[] b) + { + if(a.length != b.length){return false;} + foreach(i; 0 .. a.length) + { + if(a[i].key != b[i]) + { + return false; + } + } + return true; + } + + auto nodeDuplicatesShort = DuplicatesShort.dup; + auto nodeNoDuplicatesShort = noDuplicatesShort.dup; + auto nodeDuplicatesLong = DuplicatesLong.dup; + auto nodeNoDuplicatesLong = noDuplicatesLong.dup; + + assertThrown(constructSet(nodeDuplicatesShort)); + assertNotThrown(constructSet(nodeNoDuplicatesShort)); + assertThrown(constructSet(nodeDuplicatesLong)); + assertNotThrown(constructSet(nodeNoDuplicatesLong)); +} + +// Construct a sequence (array) _node. +Node[] constructSequence(Node[] nodes) @safe +{ + return nodes; +} + +// Construct an unordered map (unordered set of key:value _pairs without duplicates) _node. +Node.Pair[] constructMap(Node.Pair[] pairs) @safe +{ + //Detect duplicates. + //TODO this should be replaced by something with deterministic memory allocation. + auto keys = new RedBlackTree!Node(); + foreach(ref pair; pairs) + { + enforce(!(pair.key in keys), + new Exception("Duplicate entry in a map: " ~ pair.key.debugString())); + keys.insert(pair.key); + } + return pairs; +} diff --git a/source/dyaml/dumper.d b/source/dyaml/dumper.d new file mode 100644 index 0000000..03d3620 --- /dev/null +++ b/source/dyaml/dumper.d @@ -0,0 +1,298 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * YAML dumper. + * + * Code based on $(LINK2 http://www.pyyaml.org, PyYAML). + */ +module dyaml.dumper; + +import std.array; +import std.range.primitives; +import std.typecons; + +import dyaml.emitter; +import dyaml.event; +import dyaml.exception; +import dyaml.linebreak; +import dyaml.node; +import dyaml.representer; +import dyaml.resolver; +import dyaml.serializer; +import dyaml.style; +import dyaml.tagdirective; + + +/** + * Dumps YAML documents to files or streams. + * + * User specified Representer and/or Resolver can be used to support new + * tags / data types. + * + * Setters are provided to affect output details (style, etc.). + */ +auto dumper() +{ + auto dumper = Dumper(); + dumper.resolver = Resolver.withDefaultResolvers; + return dumper; +} + +struct Dumper +{ + private: + //Indentation width. + int indent_ = 2; + //Tag directives to use. + TagDirective[] tags_; + public: + //Resolver to resolve tags. + Resolver resolver; + //Write scalars in canonical form? + bool canonical; + //Preferred text width. + uint textWidth = 80; + //Line break to use. Unix by default. + LineBreak lineBreak = LineBreak.unix; + //YAML version string. Default is 1.1. + string YAMLVersion = "1.1"; + //Always explicitly write document start? Default is no explicit start. + bool explicitStart = false; + //Always explicitly write document end? Default is no explicit end. + bool explicitEnd = false; + + //Name of the output file or stream, used in error messages. + string name = "<unknown>"; + + // Default style for scalar nodes. If style is $(D ScalarStyle.invalid), the _style is chosen automatically. + ScalarStyle defaultScalarStyle = ScalarStyle.invalid; + // Default style for collection nodes. If style is $(D CollectionStyle.invalid), the _style is chosen automatically. + CollectionStyle defaultCollectionStyle = CollectionStyle.invalid; + + @disable bool opEquals(ref Dumper); + @disable int opCmp(ref Dumper); + + ///Set indentation width. 2 by default. Must not be zero. + @property void indent(uint indent) pure @safe nothrow + in + { + assert(indent != 0, "Can't use zero YAML indent width"); + } + do + { + indent_ = indent; + } + + /** + * Specify tag directives. + * + * A tag directive specifies a shorthand notation for specifying _tags. + * Each tag directive associates a handle with a prefix. This allows for + * compact tag notation. + * + * Each handle specified MUST start and end with a '!' character + * (a single character "!" handle is allowed as well). + * + * Only alphanumeric characters, '-', and '__' may be used in handles. + * + * Each prefix MUST not be empty. + * + * The "!!" handle is used for default YAML _tags with prefix + * "tag:yaml.org,2002:". This can be overridden. + * + * Params: tags = Tag directives (keys are handles, values are prefixes). + */ + @property void tagDirectives(string[string] tags) pure @safe + { + TagDirective[] t; + foreach(handle, prefix; tags) + { + assert(handle.length >= 1 && handle[0] == '!' && handle[$ - 1] == '!', + "A tag handle is empty or does not start and end with a " ~ + "'!' character : " ~ handle); + assert(prefix.length >= 1, "A tag prefix is empty"); + t ~= TagDirective(handle, prefix); + } + tags_ = t; + } + /// + @safe unittest + { + auto dumper = dumper(); + string[string] directives; + directives["!short!"] = "tag:long.org,2011:"; + //This will emit tags starting with "tag:long.org,2011" + //with a "!short!" prefix instead. + dumper.tagDirectives(directives); + dumper.dump(new Appender!string(), Node("foo")); + } + + /** + * Dump one or more YAML _documents to the file/stream. + * + * Note that while you can call dump() multiple times on the same + * dumper, you will end up writing multiple YAML "files" to the same + * file/stream. + * + * Params: documents = Documents to _dump (root nodes of the _documents). + * + * Throws: YAMLException on error (e.g. invalid nodes, + * unable to write to file/stream). + */ + void dump(CharacterType = char, Range)(Range range, Node[] documents ...) + if (isOutputRange!(Range, CharacterType) && + isOutputRange!(Range, char) || isOutputRange!(Range, wchar) || isOutputRange!(Range, dchar)) + { + try + { + auto emitter = new Emitter!(Range, CharacterType)(range, canonical, indent_, textWidth, lineBreak); + auto serializer = Serializer(resolver, explicitStart ? Yes.explicitStart : No.explicitStart, + explicitEnd ? Yes.explicitEnd : No.explicitEnd, YAMLVersion, tags_); + serializer.startStream(emitter); + foreach(ref document; documents) + { + auto data = representData(document, defaultScalarStyle, defaultCollectionStyle); + serializer.serialize(emitter, data); + } + serializer.endStream(emitter); + } + catch(YAMLException e) + { + throw new YAMLException("Unable to dump YAML to stream " + ~ name ~ " : " ~ e.msg, e.file, e.line); + } + } +} +///Write to a file +@safe unittest +{ + auto node = Node([1, 2, 3, 4, 5]); + dumper().dump(new Appender!string(), node); +} +///Write multiple YAML documents to a file +@safe unittest +{ + auto node1 = Node([1, 2, 3, 4, 5]); + auto node2 = Node("This document contains only one string"); + dumper().dump(new Appender!string(), node1, node2); + //Or with an array: + dumper().dump(new Appender!string(), [node1, node2]); +} +///Write to memory +@safe unittest +{ + auto stream = new Appender!string(); + auto node = Node([1, 2, 3, 4, 5]); + dumper().dump(stream, node); +} +///Use a custom resolver to support custom data types and/or implicit tags +@safe unittest +{ + import std.regex : regex; + auto node = Node([1, 2, 3, 4, 5]); + auto dumper = dumper(); + dumper.resolver.addImplicitResolver("!tag", regex("A.*"), "A"); + dumper.dump(new Appender!string(), node); +} +/// Set default scalar style +@safe unittest +{ + auto stream = new Appender!string(); + auto node = Node("Hello world!"); + auto dumper = dumper(); + dumper.defaultScalarStyle = ScalarStyle.singleQuoted; + dumper.dump(stream, node); +} +/// Set default collection style +@safe unittest +{ + auto stream = new Appender!string(); + auto node = Node(["Hello", "world!"]); + auto dumper = dumper(); + dumper.defaultCollectionStyle = CollectionStyle.flow; + dumper.dump(stream, node); +} +// Make sure the styles are actually used +@safe unittest +{ + auto stream = new Appender!string(); + auto node = Node([Node("Hello world!"), Node(["Hello", "world!"])]); + auto dumper = dumper(); + dumper.defaultScalarStyle = ScalarStyle.singleQuoted; + dumper.defaultCollectionStyle = CollectionStyle.flow; + dumper.explicitEnd = false; + dumper.explicitStart = false; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + assert(stream.data == "['Hello world!', ['Hello', 'world!']]\n"); +} +// Explicit document start/end markers +@safe unittest +{ + auto stream = new Appender!string(); + auto node = Node([1, 2, 3, 4, 5]); + auto dumper = dumper(); + dumper.explicitEnd = true; + dumper.explicitStart = true; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + //Skip version string + assert(stream.data[0..3] == "---"); + //account for newline at end + assert(stream.data[$-4..$-1] == "..."); +} +@safe unittest +{ + auto stream = new Appender!string(); + auto node = Node([Node("Te, st2")]); + auto dumper = dumper(); + dumper.explicitStart = true; + dumper.explicitEnd = false; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + assert(stream.data == "--- ['Te, st2']\n"); +} +// No explicit document start/end markers +@safe unittest +{ + auto stream = new Appender!string(); + auto node = Node([1, 2, 3, 4, 5]); + auto dumper = dumper(); + dumper.explicitEnd = false; + dumper.explicitStart = false; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + //Skip version string + assert(stream.data[0..3] != "---"); + //account for newline at end + assert(stream.data[$-4..$-1] != "..."); +} +// Windows, macOS line breaks +@safe unittest +{ + auto node = Node(0); + { + auto stream = new Appender!string(); + auto dumper = dumper(); + dumper.explicitEnd = true; + dumper.explicitStart = true; + dumper.YAMLVersion = null; + dumper.lineBreak = LineBreak.windows; + dumper.dump(stream, node); + assert(stream.data == "--- 0\r\n...\r\n"); + } + { + auto stream = new Appender!string(); + auto dumper = dumper(); + dumper.explicitEnd = true; + dumper.explicitStart = true; + dumper.YAMLVersion = null; + dumper.lineBreak = LineBreak.macintosh; + dumper.dump(stream, node); + assert(stream.data == "--- 0\r...\r"); + } +} diff --git a/source/dyaml/emitter.d b/source/dyaml/emitter.d new file mode 100644 index 0000000..d8e016c --- /dev/null +++ b/source/dyaml/emitter.d @@ -0,0 +1,1689 @@ +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * YAML emitter. + * Code based on PyYAML: http://www.pyyaml.org + */ +module dyaml.emitter; + + +import std.algorithm; +import std.array; +import std.ascii; +import std.conv; +import std.encoding; +import std.exception; +import std.format; +import std.range; +import std.string; +import std.system; +import std.typecons; +import std.utf; + +import dyaml.encoding; +import dyaml.escapes; +import dyaml.event; +import dyaml.exception; +import dyaml.linebreak; +import dyaml.queue; +import dyaml.style; +import dyaml.tagdirective; + + +package: + +//Stores results of analysis of a scalar, determining e.g. what scalar style to use. +struct ScalarAnalysis +{ + //Scalar itself. + string scalar; + + enum AnalysisFlags + { + empty = 1<<0, + multiline = 1<<1, + allowFlowPlain = 1<<2, + allowBlockPlain = 1<<3, + allowSingleQuoted = 1<<4, + allowDoubleQuoted = 1<<5, + allowBlock = 1<<6, + isNull = 1<<7 + } + + ///Analysis results. + BitFlags!AnalysisFlags flags; +} + +private alias isNewLine = among!('\n', '\u0085', '\u2028', '\u2029'); + +private alias isSpecialChar = among!('#', ',', '[', ']', '{', '}', '&', '*', '!', '|', '>', '\\', '\'', '"', '%', '@', '`'); + +private alias isFlowIndicator = among!(',', '?', '[', ']', '{', '}'); + +private alias isSpace = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029', ' ', '\t'); + +//Emits YAML events into a file/stream. +struct Emitter(Range, CharType) if (isOutputRange!(Range, CharType)) +{ + private: + ///Default tag handle shortcuts and replacements. + static TagDirective[] defaultTagDirectives_ = + [TagDirective("!", "!"), TagDirective("!!", "tag:yaml.org,2002:")]; + + ///Stream to write to. + Range stream_; + + /// Type used for upcoming emitter steps + alias EmitterFunction = void function(scope typeof(this)*) @safe; + + ///Stack of states. + Appender!(EmitterFunction[]) states_; + + ///Current state. + EmitterFunction state_; + + ///Event queue. + Queue!Event events_; + ///Event we're currently emitting. + Event event_; + + ///Stack of previous indentation levels. + Appender!(int[]) indents_; + ///Current indentation level. + int indent_ = -1; + + ///Level of nesting in flow context. If 0, we're in block context. + uint flowLevel_ = 0; + + /// Describes context (where we are in the document). + enum Context + { + /// Root node of a document. + root, + /// Sequence. + sequence, + /// Mapping. + mappingNoSimpleKey, + /// Mapping, in a simple key. + mappingSimpleKey, + } + /// Current context. + Context context_; + + ///Characteristics of the last emitted character: + + ///Line. + uint line_ = 0; + ///Column. + uint column_ = 0; + ///Whitespace character? + bool whitespace_ = true; + ///indentation space, '-', '?', or ':'? + bool indentation_ = true; + + ///Does the document require an explicit document indicator? + bool openEnded_; + + ///Formatting details. + + ///Canonical scalar format? + bool canonical_; + ///Best indentation width. + uint bestIndent_ = 2; + ///Best text width. + uint bestWidth_ = 80; + ///Best line break character/s. + LineBreak bestLineBreak_; + + ///Tag directive handle - prefix pairs. + TagDirective[] tagDirectives_; + + ///Anchor/alias to process. + string preparedAnchor_ = null; + ///Tag to process. + string preparedTag_ = null; + + ///Analysis result of the current scalar. + ScalarAnalysis analysis_; + ///Style of the current scalar. + ScalarStyle style_ = ScalarStyle.invalid; + + public: + @disable int opCmp(ref Emitter); + @disable bool opEquals(ref Emitter); + + /** + * Construct an emitter. + * + * Params: stream = Output range to write to. + * canonical = Write scalars in canonical form? + * indent = Indentation width. + * lineBreak = Line break character/s. + */ + this(Range stream, const bool canonical, const int indent, const int width, + const LineBreak lineBreak) @safe + { + states_.reserve(32); + indents_.reserve(32); + stream_ = stream; + canonical_ = canonical; + nextExpected!"expectStreamStart"(); + + if(indent > 1 && indent < 10){bestIndent_ = indent;} + if(width > bestIndent_ * 2) {bestWidth_ = width;} + bestLineBreak_ = lineBreak; + + analysis_.flags.isNull = true; + } + + ///Emit an event. + void emit(Event event) @safe + { + events_.push(event); + while(!needMoreEvents()) + { + event_ = events_.pop(); + callNext(); + event_.destroy(); + } + } + + private: + ///Pop and return the newest state in states_. + EmitterFunction popState() @safe + in(states_.data.length > 0, + "Emitter: Need to pop a state but there are no states left") + { + const result = states_.data[$-1]; + states_.shrinkTo(states_.data.length - 1); + return result; + } + + void pushState(string D)() @safe + { + states_ ~= mixin("function(typeof(this)* self) { self."~D~"(); }"); + } + + ///Pop and return the newest indent in indents_. + int popIndent() @safe + in(indents_.data.length > 0, + "Emitter: Need to pop an indent level but there" ~ + " are no indent levels left") + { + const result = indents_.data[$-1]; + indents_.shrinkTo(indents_.data.length - 1); + return result; + } + + ///Write a string to the file/stream. + void writeString(const scope char[] str) @safe + { + static if(is(CharType == char)) + { + copy(str, stream_); + } + static if(is(CharType == wchar)) + { + const buffer = to!wstring(str); + copy(buffer, stream_); + } + static if(is(CharType == dchar)) + { + const buffer = to!dstring(str); + copy(buffer, stream_); + } + } + + ///In some cases, we wait for a few next events before emitting. + bool needMoreEvents() @safe nothrow + { + if(events_.length == 0){return true;} + + const event = events_.peek(); + if(event.id == EventID.documentStart){return needEvents(1);} + if(event.id == EventID.sequenceStart){return needEvents(2);} + if(event.id == EventID.mappingStart) {return needEvents(3);} + + return false; + } + + ///Determines if we need specified number of more events. + bool needEvents(in uint count) @safe nothrow + { + int level; + + foreach(const event; events_.range) + { + if(event.id.among!(EventID.documentStart, EventID.sequenceStart, EventID.mappingStart)) {++level;} + else if(event.id.among!(EventID.documentEnd, EventID.sequenceEnd, EventID.mappingEnd)) {--level;} + else if(event.id == EventID.streamStart){level = -1;} + + if(level < 0) + { + return false; + } + } + + return events_.length < (count + 1); + } + + ///Increase indentation level. + void increaseIndent(const Flag!"flow" flow = No.flow, const bool indentless = false) @safe + { + indents_ ~= indent_; + if(indent_ == -1) + { + indent_ = flow ? bestIndent_ : 0; + } + else if(!indentless) + { + indent_ += bestIndent_; + } + } + + ///Determines if the type of current event is as specified. Throws if no event. + bool eventTypeIs(in EventID id) const pure @safe + in(!event_.isNull, "Expected an event, but no event is available.") + { + return event_.id == id; + } + + + //States. + + + //Stream handlers. + + ///Handle start of a file/stream. + void expectStreamStart() @safe + in(eventTypeIs(EventID.streamStart), + "Expected streamStart, but got " ~ event_.idString) + { + + writeStreamStart(); + nextExpected!"expectDocumentStart!(Yes.first)"(); + } + + ///Expect nothing, throwing if we still have something. + void expectNothing() @safe + { + assert(0, "Expected nothing, but got " ~ event_.idString); + } + + //Document handlers. + + ///Handle start of a document. + void expectDocumentStart(Flag!"first" first)() @safe + in(eventTypeIs(EventID.documentStart) || eventTypeIs(EventID.streamEnd), + "Expected documentStart or streamEnd, but got " ~ event_.idString) + { + + if(event_.id == EventID.documentStart) + { + const YAMLVersion = event_.value; + auto tagDirectives = event_.tagDirectives; + if(openEnded_ && (YAMLVersion !is null || tagDirectives !is null)) + { + writeIndicator("...", Yes.needWhitespace); + writeIndent(); + } + + if(YAMLVersion !is null) + { + writeVersionDirective(prepareVersion(YAMLVersion)); + } + + if(tagDirectives !is null) + { + tagDirectives_ = tagDirectives; + sort!"icmp(a.handle, b.handle) < 0"(tagDirectives_); + + foreach(ref pair; tagDirectives_) + { + writeTagDirective(prepareTagHandle(pair.handle), + prepareTagPrefix(pair.prefix)); + } + } + + bool eq(ref TagDirective a, ref TagDirective b){return a.handle == b.handle;} + //Add any default tag directives that have not been overriden. + foreach(ref def; defaultTagDirectives_) + { + if(!std.algorithm.canFind!eq(tagDirectives_, def)) + { + tagDirectives_ ~= def; + } + } + + const implicit = first && !event_.explicitDocument && !canonical_ && + YAMLVersion is null && tagDirectives is null && + !checkEmptyDocument(); + if(!implicit) + { + writeIndent(); + writeIndicator("---", Yes.needWhitespace); + if(canonical_){writeIndent();} + } + nextExpected!"expectRootNode"(); + } + else if(event_.id == EventID.streamEnd) + { + if(openEnded_) + { + writeIndicator("...", Yes.needWhitespace); + writeIndent(); + } + writeStreamEnd(); + nextExpected!"expectNothing"(); + } + } + + ///Handle end of a document. + void expectDocumentEnd() @safe + in(eventTypeIs(EventID.documentEnd), + "Expected DocumentEnd, but got " ~ event_.idString) + { + + writeIndent(); + if(event_.explicitDocument) + { + writeIndicator("...", Yes.needWhitespace); + writeIndent(); + } + nextExpected!"expectDocumentStart!(No.first)"(); + } + + ///Handle the root node of a document. + void expectRootNode() @safe + { + pushState!"expectDocumentEnd"(); + expectNode(Context.root); + } + + ///Handle a mapping node. + // + //Params: simpleKey = Are we in a simple key? + void expectMappingNode(const bool simpleKey = false) @safe + { + expectNode(simpleKey ? Context.mappingSimpleKey : Context.mappingNoSimpleKey); + } + + ///Handle a sequence node. + void expectSequenceNode() @safe + { + expectNode(Context.sequence); + } + + ///Handle a new node. Context specifies where in the document we are. + void expectNode(const Context context) @safe + { + context_ = context; + + const flowCollection = event_.collectionStyle == CollectionStyle.flow; + + switch(event_.id) + { + case EventID.alias_: expectAlias(); break; + case EventID.scalar: + processAnchor("&"); + processTag(); + expectScalar(); + break; + case EventID.sequenceStart: + processAnchor("&"); + processTag(); + if(flowLevel_ > 0 || canonical_ || flowCollection || checkEmptySequence()) + { + expectFlowSequence(); + } + else + { + expectBlockSequence(); + } + break; + case EventID.mappingStart: + processAnchor("&"); + processTag(); + if(flowLevel_ > 0 || canonical_ || flowCollection || checkEmptyMapping()) + { + expectFlowMapping(); + } + else + { + expectBlockMapping(); + } + break; + default: + assert(0, "Expected alias_, scalar, sequenceStart or " ~ + "mappingStart, but got: " ~ event_.idString); + } + } + ///Handle an alias. + void expectAlias() @safe + in(event_.anchor != "", "Anchor is not specified for alias") + { + processAnchor("*"); + nextExpected(popState()); + } + + ///Handle a scalar. + void expectScalar() @safe + { + increaseIndent(Yes.flow); + processScalar(); + indent_ = popIndent(); + nextExpected(popState()); + } + + //Flow sequence handlers. + + ///Handle a flow sequence. + void expectFlowSequence() @safe + { + writeIndicator("[", Yes.needWhitespace, Yes.whitespace); + ++flowLevel_; + increaseIndent(Yes.flow); + nextExpected!"expectFlowSequenceItem!(Yes.first)"(); + } + + ///Handle a flow sequence item. + void expectFlowSequenceItem(Flag!"first" first)() @safe + { + if(event_.id == EventID.sequenceEnd) + { + indent_ = popIndent(); + --flowLevel_; + static if(!first) if(canonical_) + { + writeIndicator(",", No.needWhitespace); + writeIndent(); + } + writeIndicator("]", No.needWhitespace); + nextExpected(popState()); + return; + } + static if(!first){writeIndicator(",", No.needWhitespace);} + if(canonical_ || column_ > bestWidth_){writeIndent();} + pushState!"expectFlowSequenceItem!(No.first)"(); + expectSequenceNode(); + } + + //Flow mapping handlers. + + ///Handle a flow mapping. + void expectFlowMapping() @safe + { + writeIndicator("{", Yes.needWhitespace, Yes.whitespace); + ++flowLevel_; + increaseIndent(Yes.flow); + nextExpected!"expectFlowMappingKey!(Yes.first)"(); + } + + ///Handle a key in a flow mapping. + void expectFlowMappingKey(Flag!"first" first)() @safe + { + if(event_.id == EventID.mappingEnd) + { + indent_ = popIndent(); + --flowLevel_; + static if (!first) if(canonical_) + { + writeIndicator(",", No.needWhitespace); + writeIndent(); + } + writeIndicator("}", No.needWhitespace); + nextExpected(popState()); + return; + } + + static if(!first){writeIndicator(",", No.needWhitespace);} + if(canonical_ || column_ > bestWidth_){writeIndent();} + if(!canonical_ && checkSimpleKey()) + { + pushState!"expectFlowMappingSimpleValue"(); + expectMappingNode(true); + return; + } + + writeIndicator("?", Yes.needWhitespace); + pushState!"expectFlowMappingValue"(); + expectMappingNode(); + } + + ///Handle a simple value in a flow mapping. + void expectFlowMappingSimpleValue() @safe + { + writeIndicator(":", No.needWhitespace); + pushState!"expectFlowMappingKey!(No.first)"(); + expectMappingNode(); + } + + ///Handle a complex value in a flow mapping. + void expectFlowMappingValue() @safe + { + if(canonical_ || column_ > bestWidth_){writeIndent();} + writeIndicator(":", Yes.needWhitespace); + pushState!"expectFlowMappingKey!(No.first)"(); + expectMappingNode(); + } + + //Block sequence handlers. + + ///Handle a block sequence. + void expectBlockSequence() @safe + { + const indentless = (context_ == Context.mappingNoSimpleKey || + context_ == Context.mappingSimpleKey) && !indentation_; + increaseIndent(No.flow, indentless); + nextExpected!"expectBlockSequenceItem!(Yes.first)"(); + } + + ///Handle a block sequence item. + void expectBlockSequenceItem(Flag!"first" first)() @safe + { + static if(!first) if(event_.id == EventID.sequenceEnd) + { + indent_ = popIndent(); + nextExpected(popState()); + return; + } + + writeIndent(); + writeIndicator("-", Yes.needWhitespace, No.whitespace, Yes.indentation); + pushState!"expectBlockSequenceItem!(No.first)"(); + expectSequenceNode(); + } + + //Block mapping handlers. + + ///Handle a block mapping. + void expectBlockMapping() @safe + { + increaseIndent(No.flow); + nextExpected!"expectBlockMappingKey!(Yes.first)"(); + } + + ///Handle a key in a block mapping. + void expectBlockMappingKey(Flag!"first" first)() @safe + { + static if(!first) if(event_.id == EventID.mappingEnd) + { + indent_ = popIndent(); + nextExpected(popState()); + return; + } + + writeIndent(); + if(checkSimpleKey()) + { + pushState!"expectBlockMappingSimpleValue"(); + expectMappingNode(true); + return; + } + + writeIndicator("?", Yes.needWhitespace, No.whitespace, Yes.indentation); + pushState!"expectBlockMappingValue"(); + expectMappingNode(); + } + + ///Handle a simple value in a block mapping. + void expectBlockMappingSimpleValue() @safe + { + writeIndicator(":", No.needWhitespace); + pushState!"expectBlockMappingKey!(No.first)"(); + expectMappingNode(); + } + + ///Handle a complex value in a block mapping. + void expectBlockMappingValue() @safe + { + writeIndent(); + writeIndicator(":", Yes.needWhitespace, No.whitespace, Yes.indentation); + pushState!"expectBlockMappingKey!(No.first)"(); + expectMappingNode(); + } + + //Checkers. + + ///Check if an empty sequence is next. + bool checkEmptySequence() const @safe pure nothrow + { + return event_.id == EventID.sequenceStart && events_.length > 0 + && events_.peek().id == EventID.sequenceEnd; + } + + ///Check if an empty mapping is next. + bool checkEmptyMapping() const @safe pure nothrow + { + return event_.id == EventID.mappingStart && events_.length > 0 + && events_.peek().id == EventID.mappingEnd; + } + + ///Check if an empty document is next. + bool checkEmptyDocument() const @safe pure nothrow + { + if(event_.id != EventID.documentStart || events_.length == 0) + { + return false; + } + + const event = events_.peek(); + const emptyScalar = event.id == EventID.scalar && (event.anchor is null) && + (event.tag is null) && event.implicit && event.value == ""; + return emptyScalar; + } + + ///Check if a simple key is next. + bool checkSimpleKey() @safe + { + uint length; + const id = event_.id; + const scalar = id == EventID.scalar; + const collectionStart = id == EventID.mappingStart || + id == EventID.sequenceStart; + + if((id == EventID.alias_ || scalar || collectionStart) + && (event_.anchor !is null)) + { + if(preparedAnchor_ is null) + { + preparedAnchor_ = prepareAnchor(event_.anchor); + } + length += preparedAnchor_.length; + } + + if((scalar || collectionStart) && (event_.tag !is null)) + { + if(preparedTag_ is null){preparedTag_ = prepareTag(event_.tag);} + length += preparedTag_.length; + } + + if(scalar) + { + if(analysis_.flags.isNull){analysis_ = analyzeScalar(event_.value);} + length += analysis_.scalar.length; + } + + if(length >= 128){return false;} + + return id == EventID.alias_ || + (scalar && !analysis_.flags.empty && !analysis_.flags.multiline) || + checkEmptySequence() || + checkEmptyMapping(); + } + + ///Process and write a scalar. + void processScalar() @safe + { + if(analysis_.flags.isNull){analysis_ = analyzeScalar(event_.value);} + if(style_ == ScalarStyle.invalid) + { + style_ = chooseScalarStyle(); + } + + //if(analysis_.flags.multiline && (context_ != Context.mappingSimpleKey) && + // ([ScalarStyle.invalid, ScalarStyle.plain, ScalarStyle.singleQuoted, ScalarStyle.doubleQuoted) + // .canFind(style_)) + //{ + // writeIndent(); + //} + auto writer = ScalarWriter!(Range, CharType)(&this, analysis_.scalar, + context_ != Context.mappingSimpleKey); + final switch(style_) + { + case ScalarStyle.invalid: assert(false); + case ScalarStyle.doubleQuoted: writer.writeDoubleQuoted(); break; + case ScalarStyle.singleQuoted: writer.writeSingleQuoted(); break; + case ScalarStyle.folded: writer.writeFolded(); break; + case ScalarStyle.literal: writer.writeLiteral(); break; + case ScalarStyle.plain: writer.writePlain(); break; + } + analysis_.flags.isNull = true; + style_ = ScalarStyle.invalid; + } + + ///Process and write an anchor/alias. + void processAnchor(const string indicator) @safe + { + if(event_.anchor is null) + { + preparedAnchor_ = null; + return; + } + if(preparedAnchor_ is null) + { + preparedAnchor_ = prepareAnchor(event_.anchor); + } + if(preparedAnchor_ !is null && preparedAnchor_ != "") + { + writeIndicator(indicator, Yes.needWhitespace); + writeString(preparedAnchor_); + } + preparedAnchor_ = null; + } + + ///Process and write a tag. + void processTag() @safe + { + string tag = event_.tag; + + if(event_.id == EventID.scalar) + { + if(style_ == ScalarStyle.invalid){style_ = chooseScalarStyle();} + if((!canonical_ || (tag is null)) && + ((tag == "tag:yaml.org,2002:str") || (style_ == ScalarStyle.plain ? event_.implicit : !event_.implicit && (tag is null)))) + { + preparedTag_ = null; + return; + } + if(event_.implicit && (tag is null)) + { + tag = "!"; + preparedTag_ = null; + } + } + else if((!canonical_ || (tag is null)) && event_.implicit) + { + preparedTag_ = null; + return; + } + + assert(tag != "", "Tag is not specified"); + if(preparedTag_ is null){preparedTag_ = prepareTag(tag);} + if(preparedTag_ !is null && preparedTag_ != "") + { + writeIndicator(preparedTag_, Yes.needWhitespace); + } + preparedTag_ = null; + } + + ///Determine style to write the current scalar in. + ScalarStyle chooseScalarStyle() @safe + { + if(analysis_.flags.isNull){analysis_ = analyzeScalar(event_.value);} + + const style = event_.scalarStyle; + const invalidOrPlain = style == ScalarStyle.invalid || style == ScalarStyle.plain; + const block = style == ScalarStyle.literal || style == ScalarStyle.folded; + const singleQuoted = style == ScalarStyle.singleQuoted; + const doubleQuoted = style == ScalarStyle.doubleQuoted; + + const allowPlain = flowLevel_ > 0 ? analysis_.flags.allowFlowPlain + : analysis_.flags.allowBlockPlain; + //simple empty or multiline scalars can't be written in plain style + const simpleNonPlain = (context_ == Context.mappingSimpleKey) && + (analysis_.flags.empty || analysis_.flags.multiline); + + if(doubleQuoted || canonical_) + { + return ScalarStyle.doubleQuoted; + } + + if(invalidOrPlain && event_.implicit && !simpleNonPlain && allowPlain) + { + return ScalarStyle.plain; + } + + if(block && flowLevel_ == 0 && context_ != Context.mappingSimpleKey && + analysis_.flags.allowBlock) + { + return style; + } + + if((invalidOrPlain || singleQuoted) && + analysis_.flags.allowSingleQuoted && + !(context_ == Context.mappingSimpleKey && analysis_.flags.multiline)) + { + return ScalarStyle.singleQuoted; + } + + return ScalarStyle.doubleQuoted; + } + + ///Prepare YAML version string for output. + static string prepareVersion(const string YAMLVersion) @safe + in(YAMLVersion.split(".")[0] == "1", + "Unsupported YAML version: " ~ YAMLVersion) + { + return YAMLVersion; + } + + ///Encode an Unicode character for tag directive and write it to writer. + static void encodeChar(Writer)(ref Writer writer, in dchar c) @safe + { + char[4] data; + const bytes = encode(data, c); + //For each byte add string in format %AB , where AB are hex digits of the byte. + foreach(const char b; data[0 .. bytes]) + { + formattedWrite(writer, "%%%02X", cast(ubyte)b); + } + } + + ///Prepare tag directive handle for output. + static string prepareTagHandle(const string handle) @safe + in(handle != "", "Tag handle must not be empty") + in(handle.drop(1).dropBack(1).all!(c => isAlphaNum(c) || c.among!('-', '_')), + "Tag handle contains invalid characters") + { + return handle; + } + + ///Prepare tag directive prefix for output. + static string prepareTagPrefix(const string prefix) @safe + in(prefix != "", "Tag prefix must not be empty") + { + auto appender = appender!string(); + const int offset = prefix[0] == '!'; + size_t start, end; + + foreach(const size_t i, const dchar c; prefix) + { + const size_t idx = i + offset; + if(isAlphaNum(c) || c.among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '!', '~', '*', '\\', '\'', '(', ')', '[', ']', '%')) + { + end = idx + 1; + continue; + } + + if(start < idx){appender.put(prefix[start .. idx]);} + start = end = idx + 1; + + encodeChar(appender, c); + } + + end = min(end, prefix.length); + if(start < end){appender.put(prefix[start .. end]);} + return appender.data; + } + + ///Prepare tag for output. + string prepareTag(in string tag) @safe + in(tag != "", "Tag must not be empty") + { + + string tagString = tag; + if (tagString == "!") return "!"; + string handle; + string suffix = tagString; + + //Sort lexicographically by prefix. + sort!"icmp(a.prefix, b.prefix) < 0"(tagDirectives_); + foreach(ref pair; tagDirectives_) + { + auto prefix = pair.prefix; + if(tagString.startsWith(prefix) && + (prefix != "!" || prefix.length < tagString.length)) + { + handle = pair.handle; + suffix = tagString[prefix.length .. $]; + } + } + + auto appender = appender!string(); + appender.put(handle !is null && handle != "" ? handle : "!<"); + size_t start, end; + foreach(const dchar c; suffix) + { + if(isAlphaNum(c) || c.among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '~', '*', '\\', '\'', '(', ')', '[', ']') || + (c == '!' && handle != "!")) + { + ++end; + continue; + } + if(start < end){appender.put(suffix[start .. end]);} + start = end = end + 1; + + encodeChar(appender, c); + } + + if(start < end){appender.put(suffix[start .. end]);} + if(handle is null || handle == ""){appender.put(">");} + + return appender.data; + } + + ///Prepare anchor for output. + static string prepareAnchor(const string anchor) @safe + in(anchor != "", "Anchor must not be empty") + in(anchor.all!(c => isAlphaNum(c) || c.among!('-', '_')), "Anchor contains invalid characters") + { + return anchor; + } + + ///Analyze specifed scalar and return the analysis result. + static ScalarAnalysis analyzeScalar(string scalar) @safe + { + ScalarAnalysis analysis; + analysis.flags.isNull = false; + analysis.scalar = scalar; + + //Empty scalar is a special case. + if(scalar is null || scalar == "") + { + with(ScalarAnalysis.AnalysisFlags) + analysis.flags = + empty | + allowBlockPlain | + allowSingleQuoted | + allowDoubleQuoted; + return analysis; + } + + //Indicators and special characters (All false by default). + bool blockIndicators, flowIndicators, lineBreaks, specialCharacters; + + //Important whitespace combinations (All false by default). + bool leadingSpace, leadingBreak, trailingSpace, trailingBreak, + breakSpace, spaceBreak; + + //Check document indicators. + if(scalar.startsWith("---", "...")) + { + blockIndicators = flowIndicators = true; + } + + //First character or preceded by a whitespace. + bool preceededByWhitespace = true; + + //Last character or followed by a whitespace. + bool followedByWhitespace = scalar.length == 1 || + scalar[1].among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + + //The previous character is a space/break (false by default). + bool previousSpace, previousBreak; + + foreach(const size_t index, const dchar c; scalar) + { + //Check for indicators. + if(index == 0) + { + //Leading indicators are special characters. + if(c.isSpecialChar) + { + flowIndicators = blockIndicators = true; + } + if(':' == c || '?' == c) + { + flowIndicators = true; + if(followedByWhitespace){blockIndicators = true;} + } + if(c == '-' && followedByWhitespace) + { + flowIndicators = blockIndicators = true; + } + } + else + { + //Some indicators cannot appear within a scalar as well. + if(c.isFlowIndicator){flowIndicators = true;} + if(c == ':') + { + flowIndicators = true; + if(followedByWhitespace){blockIndicators = true;} + } + if(c == '#' && preceededByWhitespace) + { + flowIndicators = blockIndicators = true; + } + } + + //Check for line breaks, special, and unicode characters. + if(c.isNewLine){lineBreaks = true;} + if(!(c == '\n' || (c >= '\x20' && c <= '\x7E')) && + !((c == '\u0085' || (c >= '\xA0' && c <= '\uD7FF') || + (c >= '\uE000' && c <= '\uFFFD')) && c != '\uFEFF')) + { + specialCharacters = true; + } + + //Detect important whitespace combinations. + if(c == ' ') + { + if(index == 0){leadingSpace = true;} + if(index == scalar.length - 1){trailingSpace = true;} + if(previousBreak){breakSpace = true;} + previousSpace = true; + previousBreak = false; + } + else if(c.isNewLine) + { + if(index == 0){leadingBreak = true;} + if(index == scalar.length - 1){trailingBreak = true;} + if(previousSpace){spaceBreak = true;} + previousSpace = false; + previousBreak = true; + } + else + { + previousSpace = previousBreak = false; + } + + //Prepare for the next character. + preceededByWhitespace = c.isSpace != 0; + followedByWhitespace = index + 2 >= scalar.length || + scalar[index + 2].isSpace; + } + + with(ScalarAnalysis.AnalysisFlags) + { + //Let's decide what styles are allowed. + analysis.flags |= allowFlowPlain | allowBlockPlain | allowSingleQuoted | + allowDoubleQuoted | allowBlock; + + //Leading and trailing whitespaces are bad for plain scalars. + if(leadingSpace || leadingBreak || trailingSpace || trailingBreak) + { + analysis.flags &= ~(allowFlowPlain | allowBlockPlain); + } + + //We do not permit trailing spaces for block scalars. + if(trailingSpace) + { + analysis.flags &= ~allowBlock; + } + + //Spaces at the beginning of a new line are only acceptable for block + //scalars. + if(breakSpace) + { + analysis.flags &= ~(allowFlowPlain | allowBlockPlain | allowSingleQuoted); + } + + //Spaces followed by breaks, as well as special character are only + //allowed for double quoted scalars. + if(spaceBreak || specialCharacters) + { + analysis.flags &= ~(allowFlowPlain | allowBlockPlain | allowSingleQuoted | allowBlock); + } + + //Although the plain scalar writer supports breaks, we never emit + //multiline plain scalars. + if(lineBreaks) + { + analysis.flags &= ~(allowFlowPlain | allowBlockPlain); + analysis.flags |= multiline; + } + + //Flow indicators are forbidden for flow plain scalars. + if(flowIndicators) + { + analysis.flags &= ~allowFlowPlain; + } + + //Block indicators are forbidden for block plain scalars. + if(blockIndicators) + { + analysis.flags &= ~allowBlockPlain; + } + } + return analysis; + } + + @safe unittest + { + with(analyzeScalar("").flags) + { + // workaround for empty being std.range.primitives.empty here + alias empty = ScalarAnalysis.AnalysisFlags.empty; + assert(empty && allowBlockPlain && allowSingleQuoted && allowDoubleQuoted); + } + with(analyzeScalar("a").flags) + { + assert(allowFlowPlain && allowBlockPlain && allowSingleQuoted && allowDoubleQuoted && allowBlock); + } + with(analyzeScalar(" ").flags) + { + assert(allowSingleQuoted && allowDoubleQuoted); + } + with(analyzeScalar(" a").flags) + { + assert(allowSingleQuoted && allowDoubleQuoted); + } + with(analyzeScalar("a ").flags) + { + assert(allowSingleQuoted && allowDoubleQuoted); + } + with(analyzeScalar("\na").flags) + { + assert(allowSingleQuoted && allowDoubleQuoted); + } + with(analyzeScalar("a\n").flags) + { + assert(allowSingleQuoted && allowDoubleQuoted); + } + with(analyzeScalar("\n").flags) + { + assert(multiline && allowSingleQuoted && allowDoubleQuoted && allowBlock); + } + with(analyzeScalar(" \n").flags) + { + assert(multiline && allowDoubleQuoted); + } + with(analyzeScalar("\n a").flags) + { + assert(multiline && allowDoubleQuoted && allowBlock); + } + } + + //Writers. + + ///Start the YAML stream (write the unicode byte order mark). + void writeStreamStart() @safe + { + //Write BOM (except for UTF-8) + static if(is(CharType == wchar) || is(CharType == dchar)) + { + stream_.put(cast(CharType)'\uFEFF'); + } + } + + ///End the YAML stream. + void writeStreamEnd() @safe {} + + ///Write an indicator (e.g. ":", "[", ">", etc.). + void writeIndicator(const scope char[] indicator, + const Flag!"needWhitespace" needWhitespace, + const Flag!"whitespace" whitespace = No.whitespace, + const Flag!"indentation" indentation = No.indentation) @safe + { + const bool prefixSpace = !whitespace_ && needWhitespace; + whitespace_ = whitespace; + indentation_ = indentation_ && indentation; + openEnded_ = false; + column_ += indicator.length; + if(prefixSpace) + { + ++column_; + writeString(" "); + } + writeString(indicator); + } + + ///Write indentation. + void writeIndent() @safe + { + const indent = indent_ == -1 ? 0 : indent_; + + if(!indentation_ || column_ > indent || (column_ == indent && !whitespace_)) + { + writeLineBreak(); + } + if(column_ < indent) + { + whitespace_ = true; + + //Used to avoid allocation of arbitrary length strings. + static immutable spaces = " "; + size_t numSpaces = indent - column_; + column_ = indent; + while(numSpaces >= spaces.length) + { + writeString(spaces); + numSpaces -= spaces.length; + } + writeString(spaces[0 .. numSpaces]); + } + } + + ///Start new line. + void writeLineBreak(const scope char[] data = null) @safe + { + whitespace_ = indentation_ = true; + ++line_; + column_ = 0; + writeString(data is null ? lineBreak(bestLineBreak_) : data); + } + + ///Write a YAML version directive. + void writeVersionDirective(const string versionText) @safe + { + writeString("%YAML "); + writeString(versionText); + writeLineBreak(); + } + + ///Write a tag directive. + void writeTagDirective(const string handle, const string prefix) @safe + { + writeString("%TAG "); + writeString(handle); + writeString(" "); + writeString(prefix); + writeLineBreak(); + } + void nextExpected(string D)() @safe + { + state_ = mixin("function(typeof(this)* self) { self."~D~"(); }"); + } + void nextExpected(EmitterFunction f) @safe + { + state_ = f; + } + void callNext() @safe + { + state_(&this); + } +} + + +private: + +///RAII struct used to write out scalar values. +struct ScalarWriter(Range, CharType) +{ + invariant() + { + assert(emitter_.bestIndent_ > 0 && emitter_.bestIndent_ < 10, + "Emitter bestIndent must be 1 to 9 for one-character indent hint"); + } + + private: + @disable int opCmp(ref Emitter!(Range, CharType)); + @disable bool opEquals(ref Emitter!(Range, CharType)); + + ///Used as "null" UTF-32 character. + static immutable dcharNone = dchar.max; + + ///Emitter used to emit the scalar. + Emitter!(Range, CharType)* emitter_; + + ///UTF-8 encoded text of the scalar to write. + string text_; + + ///Can we split the scalar into multiple lines? + bool split_; + ///Are we currently going over spaces in the text? + bool spaces_; + ///Are we currently going over line breaks in the text? + bool breaks_; + + ///Start and end byte of the text range we're currently working with. + size_t startByte_, endByte_; + ///End byte of the text range including the currently processed character. + size_t nextEndByte_; + ///Start and end character of the text range we're currently working with. + long startChar_, endChar_; + + public: + ///Construct a ScalarWriter using emitter to output text. + this(Emitter!(Range, CharType)* emitter, string text, const bool split = true) @safe nothrow + { + emitter_ = emitter; + text_ = text; + split_ = split; + } + + ///Write text as single quoted scalar. + void writeSingleQuoted() @safe + { + emitter_.writeIndicator("\'", Yes.needWhitespace); + spaces_ = breaks_ = false; + resetTextPosition(); + + do + { + const dchar c = nextChar(); + if(spaces_) + { + if(c != ' ' && tooWide() && split_ && + startByte_ != 0 && endByte_ != text_.length) + { + writeIndent(Flag!"ResetSpace".no); + updateRangeStart(); + } + else if(c != ' ') + { + writeCurrentRange(Flag!"UpdateColumn".yes); + } + } + else if(breaks_) + { + if(!c.isNewLine) + { + writeStartLineBreak(); + writeLineBreaks(); + emitter_.writeIndent(); + } + } + else if((c == dcharNone || c == '\'' || c == ' ' || c.isNewLine) + && startChar_ < endChar_) + { + writeCurrentRange(Flag!"UpdateColumn".yes); + } + if(c == '\'') + { + emitter_.column_ += 2; + emitter_.writeString("\'\'"); + startByte_ = endByte_ + 1; + startChar_ = endChar_ + 1; + } + updateBreaks(c, Flag!"UpdateSpaces".yes); + }while(endByte_ < text_.length); + + emitter_.writeIndicator("\'", No.needWhitespace); + } + + ///Write text as double quoted scalar. + void writeDoubleQuoted() @safe + { + resetTextPosition(); + emitter_.writeIndicator("\"", Yes.needWhitespace); + do + { + const dchar c = nextChar(); + //handle special characters + if(c == dcharNone || c.among!('\"', '\\', '\u0085', '\u2028', '\u2029', '\uFEFF') || + !((c >= '\x20' && c <= '\x7E') || + ((c >= '\xA0' && c <= '\uD7FF') || (c >= '\uE000' && c <= '\uFFFD')))) + { + if(startChar_ < endChar_) + { + writeCurrentRange(Flag!"UpdateColumn".yes); + } + if(c != dcharNone) + { + auto appender = appender!string(); + if(const dchar es = toEscape(c)) + { + appender.put('\\'); + appender.put(es); + } + else + { + //Write an escaped Unicode character. + const format = c <= 255 ? "\\x%02X": + c <= 65535 ? "\\u%04X": "\\U%08X"; + formattedWrite(appender, format, cast(uint)c); + } + + emitter_.column_ += appender.data.length; + emitter_.writeString(appender.data); + startChar_ = endChar_ + 1; + startByte_ = nextEndByte_; + } + } + if((endByte_ > 0 && endByte_ < text_.length - strideBack(text_, text_.length)) + && (c == ' ' || startChar_ >= endChar_) + && (emitter_.column_ + endChar_ - startChar_ > emitter_.bestWidth_) + && split_) + { + //text_[2:1] is ok in Python but not in D, so we have to use min() + emitter_.writeString(text_[min(startByte_, endByte_) .. endByte_]); + emitter_.writeString("\\"); + emitter_.column_ += startChar_ - endChar_ + 1; + startChar_ = max(startChar_, endChar_); + startByte_ = max(startByte_, endByte_); + + writeIndent(Flag!"ResetSpace".yes); + if(charAtStart() == ' ') + { + emitter_.writeString("\\"); + ++emitter_.column_; + } + } + }while(endByte_ < text_.length); + emitter_.writeIndicator("\"", No.needWhitespace); + } + + ///Write text as folded block scalar. + void writeFolded() @safe + { + initBlock('>'); + bool leadingSpace = true; + spaces_ = false; + breaks_ = true; + resetTextPosition(); + + do + { + const dchar c = nextChar(); + if(breaks_) + { + if(!c.isNewLine) + { + if(!leadingSpace && c != dcharNone && c != ' ') + { + writeStartLineBreak(); + } + leadingSpace = (c == ' '); + writeLineBreaks(); + if(c != dcharNone){emitter_.writeIndent();} + } + } + else if(spaces_) + { + if(c != ' ' && tooWide()) + { + writeIndent(Flag!"ResetSpace".no); + updateRangeStart(); + } + else if(c != ' ') + { + writeCurrentRange(Flag!"UpdateColumn".yes); + } + } + else if(c == dcharNone || c.isNewLine || c == ' ') + { + writeCurrentRange(Flag!"UpdateColumn".yes); + if(c == dcharNone){emitter_.writeLineBreak();} + } + updateBreaks(c, Flag!"UpdateSpaces".yes); + }while(endByte_ < text_.length); + } + + ///Write text as literal block scalar. + void writeLiteral() @safe + { + initBlock('|'); + breaks_ = true; + resetTextPosition(); + + do + { + const dchar c = nextChar(); + if(breaks_) + { + if(!c.isNewLine) + { + writeLineBreaks(); + if(c != dcharNone){emitter_.writeIndent();} + } + } + else if(c == dcharNone || c.isNewLine) + { + writeCurrentRange(Flag!"UpdateColumn".no); + if(c == dcharNone){emitter_.writeLineBreak();} + } + updateBreaks(c, Flag!"UpdateSpaces".no); + }while(endByte_ < text_.length); + } + + ///Write text as plain scalar. + void writePlain() @safe + { + if(emitter_.context_ == Emitter!(Range, CharType).Context.root){emitter_.openEnded_ = true;} + if(text_ == ""){return;} + if(!emitter_.whitespace_) + { + ++emitter_.column_; + emitter_.writeString(" "); + } + emitter_.whitespace_ = emitter_.indentation_ = false; + spaces_ = breaks_ = false; + resetTextPosition(); + + do + { + const dchar c = nextChar(); + if(spaces_) + { + if(c != ' ' && tooWide() && split_) + { + writeIndent(Flag!"ResetSpace".yes); + updateRangeStart(); + } + else if(c != ' ') + { + writeCurrentRange(Flag!"UpdateColumn".yes); + } + } + else if(breaks_) + { + if(!c.isNewLine) + { + writeStartLineBreak(); + writeLineBreaks(); + writeIndent(Flag!"ResetSpace".yes); + } + } + else if(c == dcharNone || c.isNewLine || c == ' ') + { + writeCurrentRange(Flag!"UpdateColumn".yes); + } + updateBreaks(c, Flag!"UpdateSpaces".yes); + }while(endByte_ < text_.length); + } + + private: + ///Get next character and move end of the text range to it. + @property dchar nextChar() pure @safe + { + ++endChar_; + endByte_ = nextEndByte_; + if(endByte_ >= text_.length){return dcharNone;} + const c = text_[nextEndByte_]; + //c is ascii, no need to decode. + if(c < 0x80) + { + ++nextEndByte_; + return c; + } + return decode(text_, nextEndByte_); + } + + ///Get character at start of the text range. + @property dchar charAtStart() const pure @safe + { + size_t idx = startByte_; + return decode(text_, idx); + } + + ///Is the current line too wide? + @property bool tooWide() const pure @safe nothrow + { + return startChar_ + 1 == endChar_ && + emitter_.column_ > emitter_.bestWidth_; + } + + ///Determine hints (indicators) for block scalar. + size_t determineBlockHints(char[] hints, uint bestIndent) const pure @safe + { + size_t hintsIdx; + if(text_.length == 0) + return hintsIdx; + + dchar lastChar(const string str, ref size_t end) + { + size_t idx = end = end - strideBack(str, end); + return decode(text_, idx); + } + + size_t end = text_.length; + const last = lastChar(text_, end); + const secondLast = end > 0 ? lastChar(text_, end) : 0; + + if(text_[0].isNewLine || text_[0] == ' ') + { + hints[hintsIdx++] = cast(char)('0' + bestIndent); + } + if(!last.isNewLine) + { + hints[hintsIdx++] = '-'; + } + else if(std.utf.count(text_) == 1 || secondLast.isNewLine) + { + hints[hintsIdx++] = '+'; + } + return hintsIdx; + } + + ///Initialize for block scalar writing with specified indicator. + void initBlock(const char indicator) @safe + { + char[4] hints; + hints[0] = indicator; + const hintsLength = 1 + determineBlockHints(hints[1 .. $], emitter_.bestIndent_); + emitter_.writeIndicator(hints[0 .. hintsLength], Yes.needWhitespace); + if(hints.length > 0 && hints[$ - 1] == '+') + { + emitter_.openEnded_ = true; + } + emitter_.writeLineBreak(); + } + + ///Write out the current text range. + void writeCurrentRange(const Flag!"UpdateColumn" updateColumn) @safe + { + emitter_.writeString(text_[startByte_ .. endByte_]); + if(updateColumn){emitter_.column_ += endChar_ - startChar_;} + updateRangeStart(); + } + + ///Write line breaks in the text range. + void writeLineBreaks() @safe + { + foreach(const dchar br; text_[startByte_ .. endByte_]) + { + if(br == '\n'){emitter_.writeLineBreak();} + else + { + char[4] brString; + const bytes = encode(brString, br); + emitter_.writeLineBreak(brString[0 .. bytes]); + } + } + updateRangeStart(); + } + + ///Write line break if start of the text range is a newline. + void writeStartLineBreak() @safe + { + if(charAtStart == '\n'){emitter_.writeLineBreak();} + } + + ///Write indentation, optionally resetting whitespace/indentation flags. + void writeIndent(const Flag!"ResetSpace" resetSpace) @safe + { + emitter_.writeIndent(); + if(resetSpace) + { + emitter_.whitespace_ = emitter_.indentation_ = false; + } + } + + ///Move start of text range to its end. + void updateRangeStart() pure @safe nothrow + { + startByte_ = endByte_; + startChar_ = endChar_; + } + + ///Update the line breaks_ flag, optionally updating the spaces_ flag. + void updateBreaks(in dchar c, const Flag!"UpdateSpaces" updateSpaces) pure @safe + { + if(c == dcharNone){return;} + breaks_ = (c.isNewLine != 0); + if(updateSpaces){spaces_ = c == ' ';} + } + + ///Move to the beginning of text. + void resetTextPosition() pure @safe nothrow + { + startByte_ = endByte_ = nextEndByte_ = 0; + startChar_ = endChar_ = -1; + } +} diff --git a/source/dyaml/encoding.d b/source/dyaml/encoding.d new file mode 100644 index 0000000..50c10b9 --- /dev/null +++ b/source/dyaml/encoding.d @@ -0,0 +1,11 @@ +// Copyright Ferdinand Majerech 2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module dyaml.encoding; + + +import tinyendian; + +alias Encoding = tinyendian.UTFEncoding; diff --git a/source/dyaml/escapes.d b/source/dyaml/escapes.d new file mode 100644 index 0000000..32080a2 --- /dev/null +++ b/source/dyaml/escapes.d @@ -0,0 +1,92 @@ + + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module dyaml.escapes; + +package: + +import std.meta : AliasSeq; +alias escapes = AliasSeq!('0', 'a', 'b', 't', '\t', 'n', 'v', 'f', 'r', 'e', ' ', + '\"', '\\', 'N', '_', 'L', 'P'); + +/// YAML hex codes specifying the length of the hex number. +alias escapeHexCodeList = AliasSeq!('x', 'u', 'U'); + +/// Convert a YAML escape to a dchar. +dchar fromEscape(dchar escape) @safe pure nothrow @nogc +{ + switch(escape) + { + case '0': return '\0'; + case 'a': return '\x07'; + case 'b': return '\x08'; + case 't': return '\x09'; + case '\t': return '\x09'; + case 'n': return '\x0A'; + case 'v': return '\x0B'; + case 'f': return '\x0C'; + case 'r': return '\x0D'; + case 'e': return '\x1B'; + case ' ': return '\x20'; + case '\"': return '\"'; + case '\\': return '\\'; + case 'N': return '\x85'; //'\u0085'; + case '_': return '\xA0'; + case 'L': return '\u2028'; + case 'P': return '\u2029'; + default: assert(false, "No such YAML escape"); + } +} + +/** + * Convert a dchar to a YAML escape. + * + * Params: + * value = The possibly escapable character. + * + * Returns: + * If the character passed as parameter can be escaped, returns the matching + * escape, otherwise returns a null character. + */ +dchar toEscape(dchar value) @safe pure nothrow @nogc +{ + switch(value) + { + case '\0': return '0'; + case '\x07': return 'a'; + case '\x08': return 'b'; + case '\x09': return 't'; + case '\x0A': return 'n'; + case '\x0B': return 'v'; + case '\x0C': return 'f'; + case '\x0D': return 'r'; + case '\x1B': return 'e'; + case '\"': return '\"'; + case '\\': return '\\'; + case '\xA0': return '_'; + case '\x85': return 'N'; + case '\u2028': return 'L'; + case '\u2029': return 'P'; + default: return 0; + } +} + +/// Get the length of a hexadecimal number determined by its hex code. +/// +/// Need a function as associative arrays don't work with @nogc. +/// (And this may be even faster with a function.) +uint escapeHexLength(dchar hexCode) @safe pure nothrow @nogc +{ + switch(hexCode) + { + case 'x': return 2; + case 'u': return 4; + case 'U': return 8; + default: assert(false, "No such YAML hex code"); + } +} + diff --git a/source/dyaml/event.d b/source/dyaml/event.d new file mode 100644 index 0000000..f4a747f --- /dev/null +++ b/source/dyaml/event.d @@ -0,0 +1,243 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * YAML events. + * Code based on PyYAML: http://www.pyyaml.org + */ +module dyaml.event; + +import std.array; +import std.conv; + +import dyaml.exception; +import dyaml.reader; +import dyaml.tagdirective; +import dyaml.style; + + +package: +///Event types. +enum EventID : ubyte +{ + invalid = 0, /// Invalid (uninitialized) event. + streamStart, /// Stream start + streamEnd, /// Stream end + documentStart, /// Document start + documentEnd, /// Document end + alias_, /// Alias + scalar, /// Scalar + sequenceStart, /// Sequence start + sequenceEnd, /// Sequence end + mappingStart, /// Mapping start + mappingEnd /// Mapping end +} + +/** + * YAML event produced by parser. + * + * 48 bytes on 64bit. + */ +struct Event +{ + @disable int opCmp(ref Event); + + ///Value of the event, if any. + string value; + ///Start position of the event in file/stream. + Mark startMark; + ///End position of the event in file/stream. + Mark endMark; + union + { + struct + { + ///Anchor of the event, if any. + string _anchor; + ///Tag of the event, if any. + string _tag; + } + ///Tag directives, if this is a DocumentStart. + //TagDirectives tagDirectives; + TagDirective[] _tagDirectives; + } + ///Event type. + EventID id = EventID.invalid; + ///Style of scalar event, if this is a scalar event. + ScalarStyle scalarStyle = ScalarStyle.invalid; + union + { + ///Should the tag be implicitly resolved? + bool implicit; + /** + * Is this document event explicit? + * + * Used if this is a DocumentStart or DocumentEnd. + */ + bool explicitDocument; + } + ///Collection style, if this is a SequenceStart or MappingStart. + CollectionStyle collectionStyle = CollectionStyle.invalid; + + ///Is this a null (uninitialized) event? + @property bool isNull() const pure @safe nothrow {return id == EventID.invalid;} + + ///Get string representation of the token ID. + @property string idString() const @safe {return to!string(id);} + + auto ref anchor() inout @trusted pure { + assert(id != EventID.documentStart, "DocumentStart events cannot have anchors."); + return _anchor; + } + + auto ref tag() inout @trusted pure { + assert(id != EventID.documentStart, "DocumentStart events cannot have tags."); + return _tag; + } + + auto ref tagDirectives() inout @trusted pure { + assert(id == EventID.documentStart, "Only DocumentStart events have tag directives."); + return _tagDirectives; + } +} + +/** + * Construct a simple event. + * + * Params: start = Start position of the event in the file/stream. + * end = End position of the event in the file/stream. + * anchor = Anchor, if this is an alias event. + */ +Event event(EventID id)(const Mark start, const Mark end, const string anchor = null) + @safe + in(!(id == EventID.alias_ && anchor == ""), "Missing anchor for alias event") +{ + Event result; + result.startMark = start; + result.endMark = end; + result.anchor = anchor; + result.id = id; + return result; +} + +/** + * Construct a collection (mapping or sequence) start event. + * + * Params: start = Start position of the event in the file/stream. + * end = End position of the event in the file/stream. + * anchor = Anchor of the sequence, if any. + * tag = Tag of the sequence, if specified. + * implicit = Should the tag be implicitly resolved? + * style = Style to use when outputting document. + */ +Event collectionStartEvent(EventID id) + (const Mark start, const Mark end, const string anchor, const string tag, + const bool implicit, const CollectionStyle style) pure @safe nothrow +{ + static assert(id == EventID.sequenceStart || id == EventID.sequenceEnd || + id == EventID.mappingStart || id == EventID.mappingEnd); + Event result; + result.startMark = start; + result.endMark = end; + result.anchor = anchor; + result.tag = tag; + result.id = id; + result.implicit = implicit; + result.collectionStyle = style; + return result; +} + +/** + * Construct a stream start event. + * + * Params: start = Start position of the event in the file/stream. + * end = End position of the event in the file/stream. + */ +Event streamStartEvent(const Mark start, const Mark end) + pure @safe nothrow +{ + Event result; + result.startMark = start; + result.endMark = end; + result.id = EventID.streamStart; + return result; +} + +///Aliases for simple events. +alias streamEndEvent = event!(EventID.streamEnd); +alias aliasEvent = event!(EventID.alias_); +alias sequenceEndEvent = event!(EventID.sequenceEnd); +alias mappingEndEvent = event!(EventID.mappingEnd); + +///Aliases for collection start events. +alias sequenceStartEvent = collectionStartEvent!(EventID.sequenceStart); +alias mappingStartEvent = collectionStartEvent!(EventID.mappingStart); + +/** + * Construct a document start event. + * + * Params: start = Start position of the event in the file/stream. + * end = End position of the event in the file/stream. + * explicit = Is this an explicit document start? + * YAMLVersion = YAML version string of the document. + * tagDirectives = Tag directives of the document. + */ +Event documentStartEvent(const Mark start, const Mark end, const bool explicit, string YAMLVersion, + TagDirective[] tagDirectives) pure @safe nothrow +{ + Event result; + result.value = YAMLVersion; + result.startMark = start; + result.endMark = end; + result.id = EventID.documentStart; + result.explicitDocument = explicit; + result.tagDirectives = tagDirectives; + return result; +} + +/** + * Construct a document end event. + * + * Params: start = Start position of the event in the file/stream. + * end = End position of the event in the file/stream. + * explicit = Is this an explicit document end? + */ +Event documentEndEvent(const Mark start, const Mark end, const bool explicit) pure @safe nothrow +{ + Event result; + result.startMark = start; + result.endMark = end; + result.id = EventID.documentEnd; + result.explicitDocument = explicit; + return result; +} + +/// Construct a scalar event. +/// +/// Params: start = Start position of the event in the file/stream. +/// end = End position of the event in the file/stream. +/// anchor = Anchor of the scalar, if any. +/// tag = Tag of the scalar, if specified. +/// implicit = Should the tag be implicitly resolved? +/// value = String value of the scalar. +/// style = Scalar style. +Event scalarEvent(const Mark start, const Mark end, const string anchor, const string tag, + const bool implicit, const string value, + const ScalarStyle style = ScalarStyle.invalid) @safe pure nothrow @nogc +{ + Event result; + result.value = value; + result.startMark = start; + result.endMark = end; + + result.anchor = anchor; + result.tag = tag; + + result.id = EventID.scalar; + result.scalarStyle = style; + result.implicit = implicit; + return result; +} diff --git a/source/dyaml/exception.d b/source/dyaml/exception.d new file mode 100644 index 0000000..145e9c3 --- /dev/null +++ b/source/dyaml/exception.d @@ -0,0 +1,171 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +///Exceptions thrown by D:YAML and _exception related code. +module dyaml.exception; + + +import std.algorithm; +import std.array; +import std.string; +import std.conv; + + +/// Base class for all exceptions thrown by D:YAML. +class YAMLException : Exception +{ + /// Construct a YAMLException with specified message and position where it was thrown. + public this(string msg, string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow @nogc + { + super(msg, file, line); + } +} + +/// Position in a YAML stream, used for error messages. +struct Mark +{ + package: + /// File name. + string name_; + /// Line number. + ushort line_; + /// Column number. + ushort column_; + + public: + /// Construct a Mark with specified line and column in the file. + this(string name, const uint line, const uint column) @safe pure nothrow @nogc + { + name_ = name; + line_ = cast(ushort)min(ushort.max, line); + // This *will* overflow on extremely wide files but saves CPU time + // (mark ctor takes ~5% of time) + column_ = cast(ushort)column; + } + + /// Get a file name. + @property string name() @safe pure nothrow @nogc const + { + return name_; + } + + /// Get a line number. + @property ushort line() @safe pure nothrow @nogc const + { + return line_; + } + + /// Get a column number. + @property ushort column() @safe pure nothrow @nogc const + { + return column_; + } + + /// Duplicate a mark + Mark dup () const scope @safe pure nothrow + { + return Mark(this.name_.idup, this.line_, this.column_); + } + + /// Get a string representation of the mark. + string toString() const scope @safe pure nothrow + { + // Line/column numbers start at zero internally, make them start at 1. + static string clamped(ushort v) @safe pure nothrow + { + return text(v + 1, v == ushort.max ? " or higher" : ""); + } + return "file " ~ name_ ~ ",line " ~ clamped(line_) ~ ",column " ~ clamped(column_); + } +} + +// Base class of YAML exceptions with marked positions of the problem. +abstract class MarkedYAMLException : YAMLException +{ + /// Position of the error. + Mark mark; + + // Construct a MarkedYAMLException with specified context and problem. + this(string context, scope const Mark contextMark, + string problem, scope const Mark problemMark, + string file = __FILE__, size_t line = __LINE__) @safe pure nothrow + { + const msg = context ~ '\n' ~ + (contextMark != problemMark ? contextMark.toString() ~ '\n' : "") ~ + problem ~ '\n' ~ problemMark.toString() ~ '\n'; + super(msg, file, line); + mark = problemMark.dup; + } + + // Construct a MarkedYAMLException with specified problem. + this(string problem, scope const Mark problemMark, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow + { + super(problem ~ '\n' ~ problemMark.toString(), file, line); + mark = problemMark.dup; + } + + /// Construct a MarkedYAMLException from a struct storing constructor parameters. + this(ref const(MarkedYAMLExceptionData) data) @safe pure nothrow + { + with(data) this(context, contextMark, problem, problemMark); + } +} + +package: +// A struct storing parameters to the MarkedYAMLException constructor. +struct MarkedYAMLExceptionData +{ + // Context of the error. + string context; + // Position of the context in a YAML buffer. + Mark contextMark; + // The error itself. + string problem; + // Position if the error. + Mark problemMark; +} + +// Constructors of YAML exceptions are mostly the same, so we use a mixin. +// +// See_Also: YAMLException +template ExceptionCtors() +{ + public this(string msg, string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow + { + super(msg, file, line); + } +} + +// Constructors of marked YAML exceptions are mostly the same, so we use a mixin. +// +// See_Also: MarkedYAMLException +template MarkedExceptionCtors() +{ + public: + this(string context, const Mark contextMark, string problem, + const Mark problemMark, string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow + { + super(context, contextMark, problem, problemMark, + file, line); + } + + this(string problem, const Mark problemMark, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow + { + super(problem, problemMark, file, line); + } + + this(ref const(MarkedYAMLExceptionData) data) @safe pure nothrow + { + super(data); + } +} diff --git a/source/dyaml/linebreak.d b/source/dyaml/linebreak.d new file mode 100644 index 0000000..1f0f661 --- /dev/null +++ b/source/dyaml/linebreak.d @@ -0,0 +1,32 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module dyaml.linebreak; + + +///Enumerates platform specific line breaks. +enum LineBreak +{ + ///Unix line break ("\n"). + unix, + ///Windows line break ("\r\n"). + windows, + ///Macintosh line break ("\r"). + macintosh +} + +package: + +//Get line break string for specified line break. +string lineBreak(in LineBreak b) pure @safe nothrow +{ + final switch(b) + { + case LineBreak.unix: return "\n"; + case LineBreak.windows: return "\r\n"; + case LineBreak.macintosh: return "\r"; + } +} diff --git a/source/dyaml/loader.d b/source/dyaml/loader.d new file mode 100644 index 0000000..09c19db --- /dev/null +++ b/source/dyaml/loader.d @@ -0,0 +1,411 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/// Class used to load YAML documents. +module dyaml.loader; + + +import std.exception; +import std.file; +import std.stdio : File; +import std.string; + +import dyaml.composer; +import dyaml.constructor; +import dyaml.event; +import dyaml.exception; +import dyaml.node; +import dyaml.parser; +import dyaml.reader; +import dyaml.resolver; +import dyaml.scanner; +import dyaml.token; + + +/** Loads YAML documents from files or char[]. + * + * User specified Constructor and/or Resolver can be used to support new + * tags / data types. + */ +struct Loader +{ + private: + // Processes character data to YAML tokens. + Scanner scanner_; + // Processes tokens to YAML events. + Parser parser_; + // Resolves tags (data types). + Resolver resolver_; + // Name of the input file or stream, used in error messages. + string name_ = "<unknown>"; + // Are we done loading? + bool done_; + // Last node read from stream + Node currentNode; + // Has the range interface been initialized yet? + bool rangeInitialized; + + public: + @disable this(); + @disable int opCmp(ref Loader); + @disable bool opEquals(ref Loader); + + /** Construct a Loader to load YAML from a file. + * + * Params: filename = Name of the file to load from. + * file = Already-opened file to load from. + * + * Throws: YAMLException if the file could not be opened or read. + */ + static Loader fromFile(string filename) @trusted + { + try + { + auto loader = Loader(std.file.read(filename), filename); + return loader; + } + catch(FileException e) + { + throw new YAMLException("Unable to open file %s for YAML loading: %s" + .format(filename, e.msg), e.file, e.line); + } + } + /// ditto + static Loader fromFile(File file) @system + { + auto loader = Loader(file.byChunk(4096).join, file.name); + return loader; + } + + /** Construct a Loader to load YAML from a string. + * + * Params: data = String to load YAML from. The char[] version $(B will) + * overwrite its input during parsing as D:YAML reuses memory. + * + * Returns: Loader loading YAML from given string. + * + * Throws: + * + * YAMLException if data could not be read (e.g. a decoding error) + */ + static Loader fromString(char[] data) @safe + { + return Loader(cast(ubyte[])data); + } + /// Ditto + static Loader fromString(string data) @safe + { + return fromString(data.dup); + } + /// Load a char[]. + @safe unittest + { + assert(Loader.fromString("42".dup).load().as!int == 42); + } + /// Load a string. + @safe unittest + { + assert(Loader.fromString("42").load().as!int == 42); + } + + /** Construct a Loader to load YAML from a buffer. + * + * Params: yamlData = Buffer with YAML data to load. This may be e.g. a file + * loaded to memory or a string with YAML data. Note that + * buffer $(B will) be overwritten, as D:YAML minimizes + * memory allocations by reusing the input _buffer. + * $(B Must not be deleted or modified by the user as long + * as nodes loaded by this Loader are in use!) - Nodes may + * refer to data in this buffer. + * + * Note that D:YAML looks for byte-order-marks YAML files encoded in + * UTF-16/UTF-32 (and sometimes UTF-8) use to specify the encoding and + * endianness, so it should be enough to load an entire file to a buffer and + * pass it to D:YAML, regardless of Unicode encoding. + * + * Throws: YAMLException if yamlData contains data illegal in YAML. + */ + static Loader fromBuffer(ubyte[] yamlData) @safe + { + return Loader(yamlData); + } + /// Ditto + static Loader fromBuffer(void[] yamlData) @system + { + return Loader(yamlData); + } + /// Ditto + private this(void[] yamlData, string name = "<unknown>") @system + { + this(cast(ubyte[])yamlData, name); + } + /// Ditto + private this(ubyte[] yamlData, string name = "<unknown>") @safe + { + resolver_ = Resolver.withDefaultResolvers; + name_ = name; + try + { + auto reader_ = new Reader(yamlData, name); + scanner_ = Scanner(reader_); + parser_ = new Parser(scanner_); + } + catch(YAMLException e) + { + throw new YAMLException("Unable to open %s for YAML loading: %s" + .format(name_, e.msg), e.file, e.line); + } + } + + + /// Set stream _name. Used in debugging messages. + void name(string name) pure @safe nothrow @nogc + { + name_ = name; + scanner_.name = name; + } + + /// Specify custom Resolver to use. + auto ref resolver() pure @safe nothrow @nogc + { + return resolver_; + } + + /** Load single YAML document. + * + * If none or more than one YAML document is found, this throws a YAMLException. + * + * This can only be called once; this is enforced by contract. + * + * Returns: Root node of the document. + * + * Throws: YAMLException if there wasn't exactly one document + * or on a YAML parsing error. + */ + Node load() @safe + { + enforce!YAMLException(!empty, "Zero documents in stream"); + auto output = front; + popFront(); + enforce!YAMLException(empty, "More than one document in stream"); + return output; + } + + /** Implements the empty range primitive. + * + * If there's no more documents left in the stream, this will be true. + * + * Returns: `true` if no more documents left, `false` otherwise. + */ + bool empty() @safe + { + // currentNode and done_ are both invalid until popFront is called once + if (!rangeInitialized) + { + popFront(); + } + return done_; + } + /** Implements the popFront range primitive. + * + * Reads the next document from the stream, if possible. + */ + void popFront() @safe + { + // Composer initialization is done here in case the constructor is + // modified, which is a pretty common case. + static Composer composer; + if (!rangeInitialized) + { + composer = Composer(parser_, resolver_); + rangeInitialized = true; + } + assert(!done_, "Loader.popFront called on empty range"); + if (composer.checkNode()) + { + currentNode = composer.getNode(); + } + else + { + done_ = true; + } + } + /** Implements the front range primitive. + * + * Returns: the current document as a Node. + */ + Node front() @safe + { + // currentNode and done_ are both invalid until popFront is called once + if (!rangeInitialized) + { + popFront(); + } + return currentNode; + } + + // Scan all tokens, throwing them away. Used for benchmarking. + void scanBench() @safe + { + try + { + while(!scanner_.empty) + { + scanner_.popFront(); + } + } + catch(YAMLException e) + { + throw new YAMLException("Unable to scan YAML from stream " ~ + name_ ~ " : " ~ e.msg, e.file, e.line); + } + } + + + // Parse and return all events. Used for debugging. + auto parse() @safe + { + return parser_; + } +} +/// Load single YAML document from a file: +@safe unittest +{ + write("example.yaml", "Hello world!"); + auto rootNode = Loader.fromFile("example.yaml").load(); + assert(rootNode == "Hello world!"); +} +/// Load single YAML document from an already-opened file: +@system unittest +{ + // Open a temporary file + auto file = File.tmpfile; + // Write valid YAML + file.write("Hello world!"); + // Return to the beginning + file.seek(0); + // Load document + auto rootNode = Loader.fromFile(file).load(); + assert(rootNode == "Hello world!"); +} +/// Load all YAML documents from a file: +@safe unittest +{ + import std.array : array; + import std.file : write; + write("example.yaml", + "---\n"~ + "Hello world!\n"~ + "...\n"~ + "---\n"~ + "Hello world 2!\n"~ + "...\n" + ); + auto nodes = Loader.fromFile("example.yaml").array; + assert(nodes.length == 2); +} +/// Iterate over YAML documents in a file, lazily loading them: +@safe unittest +{ + import std.file : write; + write("example.yaml", + "---\n"~ + "Hello world!\n"~ + "...\n"~ + "---\n"~ + "Hello world 2!\n"~ + "...\n" + ); + auto loader = Loader.fromFile("example.yaml"); + + foreach(ref node; loader) + { + //Do something + } +} +/// Load YAML from a string: +@safe unittest +{ + string yaml_input = ("red: '#ff0000'\n" ~ + "green: '#00ff00'\n" ~ + "blue: '#0000ff'"); + + auto colors = Loader.fromString(yaml_input).load(); + + foreach(string color, string value; colors) + { + // Do something with the color and its value... + } +} + +/// Load a file into a buffer in memory and then load YAML from that buffer: +@safe unittest +{ + import std.file : read, write; + import std.stdio : writeln; + // Create a yaml document + write("example.yaml", + "---\n"~ + "Hello world!\n"~ + "...\n"~ + "---\n"~ + "Hello world 2!\n"~ + "...\n" + ); + try + { + string buffer = readText("example.yaml"); + auto yamlNode = Loader.fromString(buffer); + + // Read data from yamlNode here... + } + catch(FileException e) + { + writeln("Failed to read file 'example.yaml'"); + } +} +/// Use a custom resolver to support custom data types and/or implicit tags: +@safe unittest +{ + import std.file : write; + // Create a yaml document + write("example.yaml", + "---\n"~ + "Hello world!\n"~ + "...\n" + ); + + auto loader = Loader.fromFile("example.yaml"); + + // Add resolver expressions here... + // loader.resolver.addImplicitResolver(...); + + auto rootNode = loader.load(); +} + +//Issue #258 - https://github.com/dlang-community/D-YAML/issues/258 +@safe unittest +{ + auto yaml = "{\n\"root\": {\n\t\"key\": \"value\"\n }\n}"; + auto doc = Loader.fromString(yaml).load(); + assert(doc.isValid); +} + +@safe unittest +{ + import std.exception : collectException; + + auto yaml = q"EOS + value: invalid: string +EOS"; + auto filename = "invalid.yml"; + auto loader = Loader.fromString(yaml); + loader.name = filename; + + Node unused; + auto e = loader.load().collectException!ScannerException(unused); + assert(e.mark.name == filename); +} diff --git a/source/dyaml/node.d b/source/dyaml/node.d new file mode 100644 index 0000000..4c3c5eb --- /dev/null +++ b/source/dyaml/node.d @@ -0,0 +1,2638 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/// Node of a YAML document. Used to read YAML data once it's loaded, +/// and to prepare data to emit. +module dyaml.node; + + +import std.algorithm; +import std.array; +import std.conv; +import std.datetime; +import std.exception; +import std.format; +import std.math; +import std.meta : AliasSeq; +import std.range; +import std.string; +import std.traits; +import std.typecons; + +// FIXME: Switch back to upstream's when v2.101 is the oldest +// supported version (recommended: after v2.111 release). +import dyaml.stdsumtype; + +import dyaml.event; +import dyaml.exception; +import dyaml.style; + +/// Exception thrown at node related errors. +class NodeException : MarkedYAMLException +{ + package: + // Construct a NodeException. + // + // Params: msg = Error message. + // start = Start position of the node. + this(string msg, const scope Mark start, + string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow + { + super(msg, start, file, line); + } +} + +// Node kinds. +enum NodeID : ubyte +{ + scalar, + sequence, + mapping, + invalid +} + +/// Null YAML type. Used in nodes with _null values. +struct YAMLNull +{ + /// Used for string conversion. + string toString() const pure @safe nothrow {return "null";} +} + +/// Invalid YAML type, used internally by SumType +private struct YAMLInvalid {} + +// Merge YAML type, used to support "tag:yaml.org,2002:merge". +package struct YAMLMerge{} + +// Key-value pair of YAML nodes, used in mappings. +private struct Pair +{ + public: + /// Key node. + Node key; + /// Value node. + Node value; + + /// Construct a Pair from two values. Will be converted to Nodes if needed. + this(K, V)(K key, V value) + { + static if(is(Unqual!K == Node)){this.key = key;} + else {this.key = Node(key);} + static if(is(Unqual!V == Node)){this.value = value;} + else {this.value = Node(value);} + } + + /// Equality test with another Pair. + bool opEquals(const ref Pair rhs) const scope @safe + { + return key == rhs.key && value == rhs.value; + } + + // Comparison with another Pair. + int opCmp(const scope ref Pair rhs) const scope @safe + { + const keyCmp = key.opCmp(rhs.key); + return keyCmp != 0 ? keyCmp + : value.opCmp(rhs.value); + } + + /// + public void toString (scope void delegate(scope const(char)[]) @safe sink) + const scope @safe + { + // formattedWrite does not accept `scope` parameters + () @trusted { + formattedWrite(sink, "%s: %s", this.key, this.value); + }(); + } +} + +enum NodeType +{ + null_, + merge, + boolean, + integer, + decimal, + binary, + timestamp, + string, + mapping, + sequence, + invalid +} + +/** YAML node. + * + * This is a pseudo-dynamic type that can store any YAML value, including a + * sequence or mapping of nodes. You can get data from a Node directly or + * iterate over it if it's a collection. + */ +struct Node +{ + public: + alias Pair = .Pair; + + package: + // YAML value type. + alias Value = SumType!( + YAMLInvalid, YAMLNull, YAMLMerge, + bool, long, real, ubyte[], SysTime, string, + Node.Pair[], Node[]); + + // Can Value hold this type naturally? + enum allowed(T) = isIntegral!T || + isFloatingPoint!T || + isSomeString!T || + is(typeof({ Value i = T.init; })); + + // Stored value. + Value value_; + // Start position of the node. + Mark startMark_; + + // Tag of the node. + string tag_; + // Node scalar style. Used to remember style this node was loaded with. + ScalarStyle scalarStyle = ScalarStyle.invalid; + // Node collection style. Used to remember style this node was loaded with. + CollectionStyle collectionStyle = CollectionStyle.invalid; + + public: + /** Construct a Node from a value. + * + * Any type except for Node can be stored in a Node, but default YAML + * types (integers, floats, strings, timestamps, etc.) will be stored + * more efficiently. To create a node representing a null value, + * construct it from YAMLNull. + * + * If value is a node, its value will be copied directly. The tag and + * other information attached to the original node will be discarded. + * + * If value is an array of nodes or pairs, it is stored directly. + * Otherwise, every value in the array is converted to a node, and + * those nodes are stored. + * + * Note that to emit any non-default types you store + * in a node, you need a Representer to represent them in YAML - + * otherwise emitting will fail. + * + * Params: value = Value to store in the node. + * tag = Overrides tag of the node when emitted, regardless + * of tag determined by Representer. Representer uses + * this to determine YAML data type when a D data type + * maps to multiple different YAML data types. Tag must + * be in full form, e.g. "tag:yaml.org,2002:int", not + * a shortcut, like "!!int". + */ + this(T)(T value, const string tag = null) @safe + if (allowed!T || isArray!T || isAssociativeArray!T || is(Unqual!T == Node) || castableToNode!T) + { + tag_ = tag; + + //Unlike with assignment, we're just copying the value. + static if (is(Unqual!T == Node)) + { + setValue(value.value_); + } + else static if(isSomeString!T) + { + setValue(value.to!string); + } + else static if(is(Unqual!T == bool)) + { + setValue(cast(bool)value); + } + else static if(isIntegral!T) + { + setValue(cast(long)value); + } + else static if(isFloatingPoint!T) + { + setValue(cast(real)value); + } + else static if (isArray!T) + { + alias ElementT = Unqual!(ElementType!T); + // Construction from raw node or pair array. + static if(is(ElementT == Node) || is(ElementT == Node.Pair)) + { + setValue(value); + } + // Need to handle byte buffers separately. + else static if(is(ElementT == byte) || is(ElementT == ubyte)) + { + setValue(cast(ubyte[]) value); + } + else + { + Node[] nodes; + foreach(ref v; value) + { + nodes ~= Node(v); + } + setValue(nodes); + } + } + else static if (isAssociativeArray!T) + { + Node.Pair[] pairs; + foreach(k, ref v; value) + { + pairs ~= Pair(k, v); + } + setValue(pairs); + } + // User defined type. + else + { + setValue(value); + } + } + /// Construct a scalar node + @safe unittest + { + // Integer + { + auto node = Node(5); + } + // String + { + auto node = Node("Hello world!"); + } + // Floating point + { + auto node = Node(5.0f); + } + // Boolean + { + auto node = Node(true); + } + // Time + { + auto node = Node(SysTime(DateTime(2005, 6, 15, 20, 0, 0), UTC())); + } + // Integer, dumped as a string + { + auto node = Node(5, "tag:yaml.org,2002:str"); + } + } + /// Construct a sequence node + @safe unittest + { + // Will be emitted as a sequence (default for arrays) + { + auto seq = Node([1, 2, 3, 4, 5]); + } + // Will be emitted as a set (overridden tag) + { + auto set = Node([1, 2, 3, 4, 5], "tag:yaml.org,2002:set"); + } + // Can also store arrays of arrays + { + auto node = Node([[1,2], [3,4]]); + } + } + /// Construct a mapping node + @safe unittest + { + // Will be emitted as an unordered mapping (default for mappings) + auto map = Node([1 : "a", 2 : "b"]); + // Will be emitted as an ordered map (overridden tag) + auto omap = Node([1 : "a", 2 : "b"], "tag:yaml.org,2002:omap"); + // Will be emitted as pairs (overridden tag) + auto pairs = Node([1 : "a", 2 : "b"], "tag:yaml.org,2002:pairs"); + } + @safe unittest + { + { + auto node = Node(42); + assert(node.nodeID == NodeID.scalar); + assert(node.as!int == 42 && node.as!float == 42.0f && node.as!string == "42"); + } + + { + auto node = Node("string"); + assert(node.as!string == "string"); + } + } + @safe unittest + { + with(Node([1, 2, 3])) + { + assert(nodeID == NodeID.sequence); + assert(length == 3); + assert(opIndex(2).as!int == 3); + } + + } + @safe unittest + { + int[string] aa; + aa["1"] = 1; + aa["2"] = 2; + with(Node(aa)) + { + assert(nodeID == NodeID.mapping); + assert(length == 2); + assert(opIndex("2").as!int == 2); + } + } + @safe unittest + { + auto node = Node(Node(4, "tag:yaml.org,2002:str")); + assert(node == 4); + assert(node.tag_ == ""); + } + + /** Construct a node from arrays of _keys and _values. + * + * Constructs a mapping node with key-value pairs from + * _keys and _values, keeping their order. Useful when order + * is important (ordered maps, pairs). + * + * + * keys and values must have equal length. + * + * + * If _keys and/or _values are nodes, they are stored directly/ + * Otherwise they are converted to nodes and then stored. + * + * Params: keys = Keys of the mapping, from first to last pair. + * values = Values of the mapping, from first to last pair. + * tag = Overrides tag of the node when emitted, regardless + * of tag determined by Representer. Representer uses + * this to determine YAML data type when a D data type + * maps to multiple different YAML data types. + * This is used to differentiate between YAML unordered + * mappings ("!!map"), ordered mappings ("!!omap"), and + * pairs ("!!pairs") which are all internally + * represented as an array of node pairs. Tag must be + * in full form, e.g. "tag:yaml.org,2002:omap", not a + * shortcut, like "!!omap". + * + */ + this(K, V)(K[] keys, V[] values, const string tag = null) + if(!(isSomeString!(K[]) || isSomeString!(V[]))) + in(keys.length == values.length, + "Lengths of keys and values arrays to construct " ~ + "a YAML node from don't match") + { + tag_ = tag; + + Node.Pair[] pairs; + foreach(i; 0 .. keys.length){pairs ~= Pair(keys[i], values[i]);} + setValue(pairs); + } + /// + @safe unittest + { + // Will be emitted as an unordered mapping (default for mappings) + auto map = Node([1, 2], ["a", "b"]); + // Will be emitted as an ordered map (overridden tag) + auto omap = Node([1, 2], ["a", "b"], "tag:yaml.org,2002:omap"); + // Will be emitted as pairs (overriden tag) + auto pairs = Node([1, 2], ["a", "b"], "tag:yaml.org,2002:pairs"); + } + @safe unittest + { + with(Node(["1", "2"], [1, 2])) + { + assert(nodeID == NodeID.mapping); + assert(length == 2); + assert(opIndex("2").as!int == 2); + } + + } + + /// Is this node valid (initialized)? + @property bool isValid() const scope @safe pure nothrow @nogc + { + return value_.match!((const YAMLInvalid _) => false, _ => true); + } + + /// Return tag of the node. + @property string tag() const return scope @safe pure nothrow @nogc + { + return tag_; + } + + /// Return the start position of the node. + @property Mark startMark() const return scope @safe pure nothrow @nogc + { + return startMark_; + } + + /** Equality test. + * + * If T is Node, recursively compares all subnodes. + * This might be quite expensive if testing entire documents. + * + * If T is not Node, gets a value of type T from the node and tests + * equality with that. + * + * To test equality with a null YAML value, use YAMLNull. + * + * Params: rhs = Variable to test equality with. + * + * Returns: true if equal, false otherwise. + */ + bool opEquals(const scope Node rhs) const scope @safe + { + return opCmp(rhs) == 0; + } + bool opEquals(T)(const scope auto ref T rhs) const @safe + { + try + { + auto stored = get!(T, No.stringConversion); + // NaNs aren't normally equal to each other, but we'll pretend they are. + static if(isFloatingPoint!T) + { + return rhs == stored || (isNaN(rhs) && isNaN(stored)); + } + else + { + return rhs == stored; + } + } + catch(NodeException e) + { + return false; + } + } + /// + @safe unittest + { + auto node = Node(42); + + assert(node == 42); + assert(node != "42"); + assert(node != "43"); + + auto node2 = Node(YAMLNull()); + assert(node2 == YAMLNull()); + + const node3 = Node(42); + assert(node3 == 42); + } + + /// Shortcut for get(). + alias as = get; + + /** Get the value of the node as specified type. + * + * If the specifed type does not match type in the node, + * conversion is attempted. The stringConversion template + * parameter can be used to disable conversion from non-string + * types to strings. + * + * Numeric values are range checked, throwing if out of range of + * requested type. + * + * Timestamps are stored as std.datetime.SysTime. + * Binary values are decoded and stored as ubyte[]. + * + * To get a null value, use get!YAMLNull . This is to + * prevent getting null values for types such as strings or classes. + * + * $(BR)$(B Mapping default values:) + * + * $(PBR + * The '=' key can be used to denote the default value of a mapping. + * This can be used when a node is scalar in early versions of a program, + * but is replaced by a mapping later. Even if the node is a mapping, the + * get method can be used as if it was a scalar if it has a default value. + * This way, new YAML files where the node is a mapping can still be read + * by old versions of the program, which expect the node to be a scalar. + * ) + * + * Returns: Value of the node as specified type. + * + * Throws: NodeException if unable to convert to specified type, or if + * the value is out of range of requested type. + */ + inout(T) get(T, Flag!"stringConversion" stringConversion = Yes.stringConversion)() inout @safe return scope + { + static assert (allowed!(Unqual!T) || + hasNodeConstructor!(inout(Unqual!T)) || + (!hasIndirections!(Unqual!T) && hasNodeConstructor!(Unqual!T))); + + static if(!allowed!(Unqual!T)) + { + static if (hasSimpleNodeConstructor!(Unqual!T) || hasSimpleNodeConstructor!(inout(Unqual!T))) + { + alias params = AliasSeq!(this); + } + else static if (hasExpandedNodeConstructor!(Unqual!T) || hasExpandedNodeConstructor!(inout(Unqual!T))) + { + alias params = AliasSeq!(this, tag_); + } + else + { + static assert(0, "Unknown Node constructor?"); + } + + static if (is(T == class)) + { + return new inout T(params); + } + else static if (is(T == struct)) + { + return T(params); + } + else + { + static assert(0, "Unhandled user type"); + } + } else { + static if (canBeType!T) + if (isType!(Unqual!T)) { return getValue!T; } + + // If we're getting from a mapping and we're not getting Node.Pair[], + // we're getting the default value. + if(nodeID == NodeID.mapping){return this["="].get!( T, stringConversion);} + + static if(isSomeString!T) + { + static if(!stringConversion) + { + enforce(type == NodeType.string, new NodeException( + "Node stores unexpected type: " ~ text(type) ~ + ". Expected: " ~ typeid(T).toString(), startMark_)); + return to!T(getValue!string); + } + else + { + // Try to convert to string. + try + { + return coerceValue!T().dup; + } + catch (MatchException e) + { + throw new NodeException("Unable to convert node value to string", startMark_); + } + } + } + else static if(isFloatingPoint!T) + { + final switch (type) + { + case NodeType.integer: + return to!T(getValue!long); + case NodeType.decimal: + return to!T(getValue!real); + case NodeType.binary: + case NodeType.string: + case NodeType.boolean: + case NodeType.null_: + case NodeType.merge: + case NodeType.invalid: + case NodeType.timestamp: + case NodeType.mapping: + case NodeType.sequence: + throw new NodeException("Node stores unexpected type: " ~ text(type) ~ + ". Expected: " ~ typeid(T).toString, startMark_); + } + } + else static if(isIntegral!T) + { + enforce(type == NodeType.integer, new NodeException("Node stores unexpected type: " ~ text(type) ~ + ". Expected: " ~ typeid(T).toString, startMark_)); + immutable temp = getValue!long; + enforce(temp >= T.min && temp <= T.max, + new NodeException("Integer value of type " ~ typeid(T).toString() ~ + " out of range. Value: " ~ to!string(temp), startMark_)); + return temp.to!T; + } + else throw new NodeException("Node stores unexpected type: " ~ text(type) ~ + ". Expected: " ~ typeid(T).toString, startMark_); + } + } + /// ditto + T get(T)() const + if (hasIndirections!(Unqual!T) && hasNodeConstructor!(Unqual!T) && (!hasNodeConstructor!(inout(Unqual!T)))) + { + static if (hasSimpleNodeConstructor!T) + { + alias params = AliasSeq!(this); + } + else static if (hasExpandedNodeConstructor!T) + { + alias params = AliasSeq!(this, tag_); + } + else + { + static assert(0, "Unknown Node constructor?"); + } + static if (is(T == class)) + { + return new T(params); + } + else static if (is(T == struct)) + { + return T(params); + } + else + { + static assert(0, "Unhandled user type"); + } + } + /// Automatic type conversion + @safe unittest + { + auto node = Node(42); + + assert(node.get!int == 42); + assert(node.get!string == "42"); + assert(node.get!double == 42.0); + } + /// Scalar node to struct and vice versa + @safe unittest + { + import dyaml.dumper : dumper; + import dyaml.loader : Loader; + static struct MyStruct + { + int x, y, z; + + this(int x, int y, int z) @safe + { + this.x = x; + this.y = y; + this.z = z; + } + + this(scope const Node node) @safe + { + // `std.array.split` is not marked as taking a `scope` range, + // but we don't escape a reference. + scope parts = () @trusted { return node.as!string().split(":"); }(); + x = parts[0].to!int; + y = parts[1].to!int; + z = parts[2].to!int; + } + + Node opCast(T: Node)() @safe + { + //Using custom scalar format, x:y:z. + auto scalar = format("%s:%s:%s", x, y, z); + //Representing as a scalar, with custom tag to specify this data type. + return Node(scalar, "!mystruct.tag"); + } + } + + auto appender = new Appender!string; + + // Dump struct to yaml document + dumper().dump(appender, Node(MyStruct(1,2,3))); + + // Read yaml document back as a MyStruct + auto loader = Loader.fromString(appender.data); + Node node = loader.load(); + assert(node.as!MyStruct == MyStruct(1,2,3)); + } + /// Sequence node to struct and vice versa + @safe unittest + { + import dyaml.dumper : dumper; + import dyaml.loader : Loader; + static struct MyStruct + { + int x, y, z; + + this(int x, int y, int z) @safe + { + this.x = x; + this.y = y; + this.z = z; + } + + this(Node node) @safe + { + x = node[0].as!int; + y = node[1].as!int; + z = node[2].as!int; + } + + Node opCast(T: Node)() + { + return Node([x, y, z], "!mystruct.tag"); + } + } + + auto appender = new Appender!string; + + // Dump struct to yaml document + dumper().dump(appender, Node(MyStruct(1,2,3))); + + // Read yaml document back as a MyStruct + auto loader = Loader.fromString(appender.data); + Node node = loader.load(); + assert(node.as!MyStruct == MyStruct(1,2,3)); + } + /// Mapping node to struct and vice versa + @safe unittest + { + import dyaml.dumper : dumper; + import dyaml.loader : Loader; + static struct MyStruct + { + int x, y, z; + + Node opCast(T: Node)() + { + auto pairs = [Node.Pair("x", x), + Node.Pair("y", y), + Node.Pair("z", z)]; + return Node(pairs, "!mystruct.tag"); + } + + this(int x, int y, int z) + { + this.x = x; + this.y = y; + this.z = z; + } + + this(Node node) @safe + { + x = node["x"].as!int; + y = node["y"].as!int; + z = node["z"].as!int; + } + } + + auto appender = new Appender!string; + + // Dump struct to yaml document + dumper().dump(appender, Node(MyStruct(1,2,3))); + + // Read yaml document back as a MyStruct + auto loader = Loader.fromString(appender.data); + Node node = loader.load(); + assert(node.as!MyStruct == MyStruct(1,2,3)); + } + /// Classes can be used too + @system unittest { + import dyaml.dumper : dumper; + import dyaml.loader : Loader; + + static class MyClass + { + int x, y, z; + + this(int x, int y, int z) + { + this.x = x; + this.y = y; + this.z = z; + } + + this(scope const Node node) @safe inout + { + // `std.array.split` is not marked as taking a `scope` range, + // but we don't escape a reference. + scope parts = () @trusted { return node.as!string().split(":"); }(); + x = parts[0].to!int; + y = parts[1].to!int; + z = parts[2].to!int; + } + + ///Useful for Node.as!string. + override string toString() + { + return format("MyClass(%s, %s, %s)", x, y, z); + } + + Node opCast(T: Node)() @safe + { + //Using custom scalar format, x:y:z. + auto scalar = format("%s:%s:%s", x, y, z); + //Representing as a scalar, with custom tag to specify this data type. + return Node(scalar, "!myclass.tag"); + } + override bool opEquals(Object o) + { + if (auto other = cast(MyClass)o) + { + return (other.x == x) && (other.y == y) && (other.z == z); + } + return false; + } + } + auto appender = new Appender!string; + + // Dump class to yaml document + dumper().dump(appender, Node(new MyClass(1,2,3))); + + // Read yaml document back as a MyClass + auto loader = Loader.fromString(appender.data); + Node node = loader.load(); + assert(node.as!MyClass == new MyClass(1,2,3)); + } + // Make sure custom tags and styles are kept. + @safe unittest + { + static struct MyStruct + { + Node opCast(T: Node)() + { + auto node = Node("hi", "!mystruct.tag"); + node.setStyle(ScalarStyle.doubleQuoted); + return node; + } + } + + auto node = Node(MyStruct.init); + assert(node.tag == "!mystruct.tag"); + assert(node.scalarStyle == ScalarStyle.doubleQuoted); + } + // ditto, but for collection style + @safe unittest + { + static struct MyStruct + { + Node opCast(T: Node)() + { + auto node = Node(["hi"], "!mystruct.tag"); + node.setStyle(CollectionStyle.flow); + return node; + } + } + + auto node = Node(MyStruct.init); + assert(node.tag == "!mystruct.tag"); + assert(node.collectionStyle == CollectionStyle.flow); + } + @safe unittest + { + assertThrown!NodeException(Node("42").get!int); + assertThrown!NodeException(Node("42").get!double); + assertThrown!NodeException(Node(long.max).get!ushort); + Node(YAMLNull()).get!YAMLNull; + } + @safe unittest + { + const node = Node(42); + assert(node.get!int == 42); + assert(node.get!string == "42"); + assert(node.get!double == 42.0); + + immutable node2 = Node(42); + assert(node2.get!int == 42); + assert(node2.get!(const int) == 42); + assert(node2.get!(immutable int) == 42); + assert(node2.get!string == "42"); + assert(node2.get!(const string) == "42"); + assert(node2.get!(immutable string) == "42"); + assert(node2.get!double == 42.0); + assert(node2.get!(const double) == 42.0); + assert(node2.get!(immutable double) == 42.0); + } + + /** If this is a collection, return its _length. + * + * Otherwise, throw NodeException. + * + * Returns: Number of elements in a sequence or key-value pairs in a mapping. + * + * Throws: NodeException if this is not a sequence nor a mapping. + */ + @property size_t length() const @safe + { + final switch(nodeID) + { + case NodeID.sequence: + return getValue!(Node[]).length; + case NodeID.mapping: + return getValue!(Pair[]).length; + case NodeID.scalar: + case NodeID.invalid: + throw new NodeException("Trying to get length of a " ~ nodeTypeString ~ " node", + startMark_); + } + } + @safe unittest + { + auto node = Node([1,2,3]); + assert(node.length == 3); + const cNode = Node([1,2,3]); + assert(cNode.length == 3); + immutable iNode = Node([1,2,3]); + assert(iNode.length == 3); + } + + /** Get the element at specified index. + * + * If the node is a sequence, index must be integral. + * + * + * If the node is a mapping, return the value corresponding to the first + * key equal to index. containsKey() can be used to determine if a mapping + * has a specific key. + * + * To get element at a null index, use YAMLNull for index. + * + * Params: index = Index to use. + * + * Returns: Value corresponding to the index. + * + * Throws: NodeException if the index could not be found, + * non-integral index is used with a sequence or the node is + * not a collection. + */ + ref inout(Node) opIndex(T)(T index) inout return scope @safe + { + final switch (nodeID) + { + case NodeID.sequence: + checkSequenceIndex(index); + static if(isIntegral!T) + { + return getValue!(Node[])[index]; + } + else + { + assert(false, "Only integers may index sequence nodes"); + } + case NodeID.mapping: + auto idx = findPair(index); + if(idx >= 0) + { + return getValue!(Pair[])[idx].value; + } + + string msg = "Mapping index not found" ~ (isSomeString!T ? ": " ~ to!string(index) : ""); + throw new NodeException(msg, startMark_); + case NodeID.scalar: + case NodeID.invalid: + throw new NodeException("Trying to index a " ~ nodeTypeString ~ " node", startMark_); + } + } + /// + @safe unittest + { + Node narray = Node([11, 12, 13, 14]); + Node nmap = Node(["11", "12", "13", "14"], [11, 12, 13, 14]); + + assert(narray[0].as!int == 11); + assert(null !is collectException(narray[42])); + assert(nmap["11"].as!int == 11); + assert(nmap["14"].as!int == 14); + } + @safe unittest + { + Node narray = Node([11, 12, 13, 14]); + Node nmap = Node(["11", "12", "13", "14"], [11, 12, 13, 14]); + + assert(narray[0].as!int == 11); + assert(null !is collectException(narray[42])); + assert(nmap["11"].as!int == 11); + assert(nmap["14"].as!int == 14); + assert(null !is collectException(nmap["42"])); + + narray.add(YAMLNull()); + nmap.add(YAMLNull(), "Nothing"); + assert(narray[4].as!YAMLNull == YAMLNull()); + assert(nmap[YAMLNull()].as!string == "Nothing"); + + assertThrown!NodeException(nmap[11]); + assertThrown!NodeException(nmap[14]); + } + + /** Determine if a collection contains specified value. + * + * If the node is a sequence, check if it contains the specified value. + * If it's a mapping, check if it has a value that matches specified value. + * + * Params: rhs = Item to look for. Use YAMLNull to check for a null value. + * + * Returns: true if rhs was found, false otherwise. + * + * Throws: NodeException if the node is not a collection. + */ + bool contains(T)(T rhs) const + { + return contains_!(T, No.key, "contains")(rhs); + } + @safe unittest + { + auto mNode = Node(["1", "2", "3"]); + assert(mNode.contains("2")); + const cNode = Node(["1", "2", "3"]); + assert(cNode.contains("2")); + immutable iNode = Node(["1", "2", "3"]); + assert(iNode.contains("2")); + } + + + /** Determine if a mapping contains specified key. + * + * Params: rhs = Key to look for. Use YAMLNull to check for a null key. + * + * Returns: true if rhs was found, false otherwise. + * + * Throws: NodeException if the node is not a mapping. + */ + bool containsKey(T)(T rhs) const + { + return contains_!(T, Yes.key, "containsKey")(rhs); + } + + // Unittest for contains() and containsKey(). + @safe unittest + { + auto seq = Node([1, 2, 3, 4, 5]); + assert(seq.contains(3)); + assert(seq.contains(5)); + assert(!seq.contains("5")); + assert(!seq.contains(6)); + assert(!seq.contains(float.nan)); + assertThrown!NodeException(seq.containsKey(5)); + + auto seq2 = Node(["1", "2"]); + assert(seq2.contains("1")); + assert(!seq2.contains(1)); + + auto map = Node(["1", "2", "3", "4"], [1, 2, 3, 4]); + assert(map.contains(1)); + assert(!map.contains("1")); + assert(!map.contains(5)); + assert(!map.contains(float.nan)); + assert(map.containsKey("1")); + assert(map.containsKey("4")); + assert(!map.containsKey(1)); + assert(!map.containsKey("5")); + + assert(!seq.contains(YAMLNull())); + assert(!map.contains(YAMLNull())); + assert(!map.containsKey(YAMLNull())); + seq.add(YAMLNull()); + map.add("Nothing", YAMLNull()); + assert(seq.contains(YAMLNull())); + assert(map.contains(YAMLNull())); + assert(!map.containsKey(YAMLNull())); + map.add(YAMLNull(), "Nothing"); + assert(map.containsKey(YAMLNull())); + + auto map2 = Node([1, 2, 3, 4], [1, 2, 3, 4]); + assert(!map2.contains("1")); + assert(map2.contains(1)); + assert(!map2.containsKey("1")); + assert(map2.containsKey(1)); + + // scalar + assertThrown!NodeException(Node(1).contains(4)); + assertThrown!NodeException(Node(1).containsKey(4)); + + auto mapNan = Node([1.0, 2, double.nan], [1, double.nan, 5]); + + assert(mapNan.contains(double.nan)); + assert(mapNan.containsKey(double.nan)); + } + + /// Assignment (shallow copy) by value. + void opAssign()(auto ref Node rhs) + { + assumeWontThrow(setValue(rhs.value_)); + startMark_ = rhs.startMark_; + tag_ = rhs.tag_; + scalarStyle = rhs.scalarStyle; + collectionStyle = rhs.collectionStyle; + } + // Unittest for opAssign(). + @safe unittest + { + auto seq = Node([1, 2, 3, 4, 5]); + auto assigned = seq; + assert(seq == assigned, + "Node.opAssign() doesn't produce an equivalent copy"); + } + + /** Set element at specified index in a collection. + * + * This method can only be called on collection nodes. + * + * If the node is a sequence, index must be integral. + * + * If the node is a mapping, sets the _value corresponding to the first + * key matching index (including conversion, so e.g. "42" matches 42). + * + * If the node is a mapping and no key matches index, a new key-value + * pair is added to the mapping. In sequences the index must be in + * range. This ensures behavior siilar to D arrays and associative + * arrays. + * + * To set element at a null index, use YAMLNull for index. + * + * Params: + * value = Value to assign. + * index = Index of the value to set. + * + * Throws: NodeException if the node is not a collection, index is out + * of range or if a non-integral index is used on a sequence node. + */ + void opIndexAssign(K, V)(V value, K index) + { + final switch (nodeID) + { + case NodeID.sequence: + checkSequenceIndex(index); + static if(isIntegral!K || is(Unqual!K == bool)) + { + auto nodes = getValue!(Node[]); + static if(is(Unqual!V == Node)){nodes[index] = value;} + else {nodes[index] = Node(value);} + setValue(nodes); + return; + } + assert(false, "Only integers may index sequence nodes"); + case NodeID.mapping: + const idx = findPair(index); + if(idx < 0){add(index, value);} + else + { + auto pairs = as!(Node.Pair[])(); + static if(is(Unqual!V == Node)){pairs[idx].value = value;} + else {pairs[idx].value = Node(value);} + setValue(pairs); + } + return; + case NodeID.scalar: + case NodeID.invalid: + throw new NodeException("Trying to index a " ~ nodeTypeString ~ " node", startMark_); + } + } + @safe unittest + { + with(Node([1, 2, 3, 4, 3])) + { + opIndexAssign(42, 3); + assert(length == 5); + assert(opIndex(3).as!int == 42); + + opIndexAssign(YAMLNull(), 0); + assert(opIndex(0) == YAMLNull()); + } + with(Node(["1", "2", "3"], [4, 5, 6])) + { + opIndexAssign(42, "3"); + opIndexAssign(123, 456); + assert(length == 4); + assert(opIndex("3").as!int == 42); + assert(opIndex(456).as!int == 123); + + opIndexAssign(43, 3); + //3 and "3" should be different + assert(length == 5); + assert(opIndex("3").as!int == 42); + assert(opIndex(3).as!int == 43); + + opIndexAssign(YAMLNull(), "2"); + assert(opIndex("2") == YAMLNull()); + } + } + + /** Return a range object iterating over a sequence, getting each + * element as T. + * + * If T is Node, simply iterate over the nodes in the sequence. + * Otherwise, convert each node to T during iteration. + * + * Throws: NodeException if the node is not a sequence or an element + * could not be converted to specified type. + */ + template sequence(T = Node) + { + struct Range(N) + { + N subnodes; + size_t position; + + this(N nodes) + { + subnodes = nodes; + position = 0; + } + + /* Input range functionality. */ + bool empty() const @property { return position >= subnodes.length; } + + void popFront() + { + enforce(!empty, "Attempted to popFront an empty sequence"); + position++; + } + + T front() const @property + { + enforce(!empty, "Attempted to take the front of an empty sequence"); + static if (is(Unqual!T == Node)) + return subnodes[position]; + else + return subnodes[position].as!T; + } + + /* Forward range functionality. */ + Range save() { return this; } + + /* Bidirectional range functionality. */ + void popBack() + { + enforce(!empty, "Attempted to popBack an empty sequence"); + subnodes = subnodes[0 .. $ - 1]; + } + + T back() + { + enforce(!empty, "Attempted to take the back of an empty sequence"); + static if (is(Unqual!T == Node)) + return subnodes[$ - 1]; + else + return subnodes[$ - 1].as!T; + } + + /* Random-access range functionality. */ + size_t length() const @property { return subnodes.length; } + T opIndex(size_t index) + { + static if (is(Unqual!T == Node)) + return subnodes[index]; + else + return subnodes[index].as!T; + } + + static assert(isInputRange!Range); + static assert(isForwardRange!Range); + static assert(isBidirectionalRange!Range); + static assert(isRandomAccessRange!Range); + } + auto sequence() + { + enforce(nodeID == NodeID.sequence, + new NodeException("Trying to 'sequence'-iterate over a " ~ nodeTypeString ~ " node", + startMark_)); + return Range!(Node[])(get!(Node[])); + } + auto sequence() const + { + enforce(nodeID == NodeID.sequence, + new NodeException("Trying to 'sequence'-iterate over a " ~ nodeTypeString ~ " node", + startMark_)); + return Range!(const(Node)[])(get!(Node[])); + } + } + @safe unittest + { + Node n1 = Node([1, 2, 3, 4]); + int[int] array; + Node n2 = Node(array); + const n3 = Node([1, 2, 3, 4]); + + auto r = n1.sequence!int.map!(x => x * 10); + assert(r.equal([10, 20, 30, 40])); + + assertThrown(n2.sequence); + + auto r2 = n3.sequence!int.map!(x => x * 10); + assert(r2.equal([10, 20, 30, 40])); + } + + /** Return a range object iterating over mapping's pairs. + * + * Throws: NodeException if the node is not a mapping. + * + */ + template mapping() + { + struct Range(T) + { + T pairs; + size_t position; + + this(T pairs) @safe + { + this.pairs = pairs; + position = 0; + } + + /* Input range functionality. */ + bool empty() @safe { return position >= pairs.length; } + + void popFront() @safe + { + enforce(!empty, "Attempted to popFront an empty mapping"); + position++; + } + + auto front() @safe + { + enforce(!empty, "Attempted to take the front of an empty mapping"); + return pairs[position]; + } + + /* Forward range functionality. */ + Range save() @safe { return this; } + + /* Bidirectional range functionality. */ + void popBack() @safe + { + enforce(!empty, "Attempted to popBack an empty mapping"); + pairs = pairs[0 .. $ - 1]; + } + + auto back() @safe + { + enforce(!empty, "Attempted to take the back of an empty mapping"); + return pairs[$ - 1]; + } + + /* Random-access range functionality. */ + size_t length() const @property @safe { return pairs.length; } + auto opIndex(size_t index) @safe { return pairs[index]; } + + static assert(isInputRange!Range); + static assert(isForwardRange!Range); + static assert(isBidirectionalRange!Range); + static assert(isRandomAccessRange!Range); + } + + auto mapping() + { + enforce(nodeID == NodeID.mapping, + new NodeException("Trying to 'mapping'-iterate over a " + ~ nodeTypeString ~ " node", startMark_)); + return Range!(Node.Pair[])(get!(Node.Pair[])); + } + auto mapping() const + { + enforce(nodeID == NodeID.mapping, + new NodeException("Trying to 'mapping'-iterate over a " + ~ nodeTypeString ~ " node", startMark_)); + return Range!(const(Node.Pair)[])(get!(Node.Pair[])); + } + } + @safe unittest + { + int[int] array; + Node n = Node(array); + n[1] = "foo"; + n[2] = "bar"; + n[3] = "baz"; + + string[int] test; + foreach (pair; n.mapping) + test[pair.key.as!int] = pair.value.as!string.idup; + + assert(test[1] == "foo"); + assert(test[2] == "bar"); + assert(test[3] == "baz"); + + int[int] constArray = [1: 2, 3: 4]; + const x = Node(constArray); + foreach (pair; x.mapping) + assert(pair.value == constArray[pair.key.as!int]); + } + + /** Return a range object iterating over mapping's keys. + * + * If K is Node, simply iterate over the keys in the mapping. + * Otherwise, convert each key to T during iteration. + * + * Throws: NodeException if the nodes is not a mapping or an element + * could not be converted to specified type. + */ + auto mappingKeys(K = Node)() const + { + enforce(nodeID == NodeID.mapping, + new NodeException("Trying to 'mappingKeys'-iterate over a " + ~ nodeTypeString ~ " node", startMark_)); + static if (is(Unqual!K == Node)) + return mapping.map!(pair => pair.key); + else + return mapping.map!(pair => pair.key.as!K); + } + @safe unittest + { + int[int] array; + Node m1 = Node(array); + m1["foo"] = 2; + m1["bar"] = 3; + + assert(m1.mappingKeys.equal(["foo", "bar"]) || m1.mappingKeys.equal(["bar", "foo"])); + + const cm1 = Node(["foo": 2, "bar": 3]); + + assert(cm1.mappingKeys.equal(["foo", "bar"]) || cm1.mappingKeys.equal(["bar", "foo"])); + } + + /** Return a range object iterating over mapping's values. + * + * If V is Node, simply iterate over the values in the mapping. + * Otherwise, convert each key to V during iteration. + * + * Throws: NodeException if the nodes is not a mapping or an element + * could not be converted to specified type. + */ + auto mappingValues(V = Node)() const + { + enforce(nodeID == NodeID.mapping, + new NodeException("Trying to 'mappingValues'-iterate over a " + ~ nodeTypeString ~ " node", startMark_)); + static if (is(Unqual!V == Node)) + return mapping.map!(pair => pair.value); + else + return mapping.map!(pair => pair.value.as!V); + } + @safe unittest + { + int[int] array; + Node m1 = Node(array); + m1["foo"] = 2; + m1["bar"] = 3; + + assert(m1.mappingValues.equal([2, 3]) || m1.mappingValues.equal([3, 2])); + + const cm1 = Node(["foo": 2, "bar": 3]); + + assert(cm1.mappingValues.equal([2, 3]) || cm1.mappingValues.equal([3, 2])); + } + + + /** Foreach over a sequence, getting each element as T. + * + * If T is Node, simply iterate over the nodes in the sequence. + * Otherwise, convert each node to T during iteration. + * + * Throws: NodeException if the node is not a sequence or an + * element could not be converted to specified type. + */ + int opApply(D)(D dg) if (isDelegate!D && (Parameters!D.length == 1)) + { + enforce(nodeID == NodeID.sequence, + new NodeException("Trying to sequence-foreach over a " ~ nodeTypeString ~ " node", + startMark_)); + + int result; + foreach(ref node; get!(Node[])) + { + static if(is(Unqual!(Parameters!D[0]) == Node)) + { + result = dg(node); + } + else + { + Parameters!D[0] temp = node.as!(Parameters!D[0]); + result = dg(temp); + } + if(result){break;} + } + return result; + } + /// ditto + int opApply(D)(D dg) const if (isDelegate!D && (Parameters!D.length == 1)) + { + enforce(nodeID == NodeID.sequence, + new NodeException("Trying to sequence-foreach over a " ~ nodeTypeString ~ " node", + startMark_)); + + int result; + foreach(ref node; get!(Node[])) + { + static if(is(Unqual!(Parameters!D[0]) == Node)) + { + result = dg(node); + } + else + { + Parameters!D[0] temp = node.as!(Parameters!D[0]); + result = dg(temp); + } + if(result){break;} + } + return result; + } + @safe unittest + { + Node n1 = Node(11); + Node n2 = Node(12); + Node n3 = Node(13); + Node n4 = Node(14); + Node narray = Node([n1, n2, n3, n4]); + const cNArray = narray; + + int[] array, array2, array3; + foreach(int value; narray) + { + array ~= value; + } + foreach(Node node; narray) + { + array2 ~= node.as!int; + } + foreach (const Node node; cNArray) + { + array3 ~= node.as!int; + } + assert(array == [11, 12, 13, 14]); + assert(array2 == [11, 12, 13, 14]); + assert(array3 == [11, 12, 13, 14]); + } + @safe unittest + { + string[] testStrs = ["1", "2", "3"]; + auto node1 = Node(testStrs); + int i = 0; + foreach (string elem; node1) + { + assert(elem == testStrs[i]); + i++; + } + const node2 = Node(testStrs); + i = 0; + foreach (string elem; node2) + { + assert(elem == testStrs[i]); + i++; + } + immutable node3 = Node(testStrs); + i = 0; + foreach (string elem; node3) + { + assert(elem == testStrs[i]); + i++; + } + } + @safe unittest + { + auto node = Node(["a":1, "b":2, "c":3]); + const cNode = node; + assertThrown({foreach (Node n; node) {}}()); + assertThrown({foreach (const Node n; cNode) {}}()); + } + + /** Foreach over a mapping, getting each key/value as K/V. + * + * If the K and/or V is Node, simply iterate over the nodes in the mapping. + * Otherwise, convert each key/value to T during iteration. + * + * Throws: NodeException if the node is not a mapping or an + * element could not be converted to specified type. + */ + int opApply(DG)(DG dg) if (isDelegate!DG && (Parameters!DG.length == 2)) + { + alias K = Parameters!DG[0]; + alias V = Parameters!DG[1]; + enforce(nodeID == NodeID.mapping, + new NodeException("Trying to mapping-foreach over a " ~ nodeTypeString ~ " node", + startMark_)); + + int result; + foreach(ref pair; get!(Node.Pair[])) + { + static if(is(Unqual!K == Node) && is(Unqual!V == Node)) + { + result = dg(pair.key, pair.value); + } + else static if(is(Unqual!K == Node)) + { + V tempValue = pair.value.as!V; + result = dg(pair.key, tempValue); + } + else static if(is(Unqual!V == Node)) + { + K tempKey = pair.key.as!K; + result = dg(tempKey, pair.value); + } + else + { + K tempKey = pair.key.as!K; + V tempValue = pair.value.as!V; + result = dg(tempKey, tempValue); + } + + if(result){break;} + } + return result; + } + /// ditto + int opApply(DG)(DG dg) const if (isDelegate!DG && (Parameters!DG.length == 2)) + { + alias K = Parameters!DG[0]; + alias V = Parameters!DG[1]; + enforce(nodeID == NodeID.mapping, + new NodeException("Trying to mapping-foreach over a " ~ nodeTypeString ~ " node", + startMark_)); + + int result; + foreach(ref pair; get!(Node.Pair[])) + { + static if(is(Unqual!K == Node) && is(Unqual!V == Node)) + { + result = dg(pair.key, pair.value); + } + else static if(is(Unqual!K == Node)) + { + V tempValue = pair.value.as!V; + result = dg(pair.key, tempValue); + } + else static if(is(Unqual!V == Node)) + { + K tempKey = pair.key.as!K; + result = dg(tempKey, pair.value); + } + else + { + K tempKey = pair.key.as!K; + V tempValue = pair.value.as!V; + result = dg(tempKey, tempValue); + } + + if(result){break;} + } + return result; + } + @safe unittest + { + Node n1 = Node(cast(long)11); + Node n2 = Node(cast(long)12); + Node n3 = Node(cast(long)13); + Node n4 = Node(cast(long)14); + + Node k1 = Node("11"); + Node k2 = Node("12"); + Node k3 = Node("13"); + Node k4 = Node("14"); + + Node nmap1 = Node([Pair(k1, n1), + Pair(k2, n2), + Pair(k3, n3), + Pair(k4, n4)]); + + int[string] expected = ["11" : 11, + "12" : 12, + "13" : 13, + "14" : 14]; + int[string] array; + foreach(string key, int value; nmap1) + { + array[key] = value; + } + assert(array == expected); + + Node nmap2 = Node([Pair(k1, Node(cast(long)5)), + Pair(k2, Node(true)), + Pair(k3, Node(cast(real)1.0)), + Pair(k4, Node("yarly"))]); + + foreach(scope string key, scope Node value; nmap2) + { + switch(key) + { + case "11": assert(value.as!int == 5 ); break; + case "12": assert(value.as!bool == true ); break; + case "13": assert(value.as!float == 1.0 ); break; + case "14": assert(value.as!string == "yarly"); break; + default: assert(false); + } + } + const nmap3 = nmap2; + + foreach(const Node key, const Node value; nmap3) + { + switch(key.as!string) + { + case "11": assert(value.as!int == 5 ); break; + case "12": assert(value.as!bool == true ); break; + case "13": assert(value.as!float == 1.0 ); break; + case "14": assert(value.as!string == "yarly"); break; + default: assert(false); + } + } + } + @safe unittest + { + string[int] testStrs = [0: "1", 1: "2", 2: "3"]; + auto node1 = Node(testStrs); + foreach (const int i, string elem; node1) + { + assert(elem == testStrs[i]); + } + const node2 = Node(testStrs); + foreach (const int i, string elem; node2) + { + assert(elem == testStrs[i]); + } + immutable node3 = Node(testStrs); + foreach (const int i, string elem; node3) + { + assert(elem == testStrs[i]); + } + } + @safe unittest + { + auto node = Node(["a", "b", "c"]); + const cNode = node; + assertThrown({foreach (Node a, Node b; node) {}}()); + assertThrown({foreach (const Node a, const Node b; cNode) {}}()); + } + + /** Add an element to a sequence. + * + * This method can only be called on sequence nodes. + * + * If value is a node, it is copied to the sequence directly. Otherwise + * value is converted to a node and then stored in the sequence. + * + * $(P When emitting, all values in the sequence will be emitted. When + * using the !!set tag, the user needs to ensure that all elements in + * the sequence are unique, otherwise $(B invalid) YAML code will be + * emitted.) + * + * Params: value = Value to _add to the sequence. + */ + void add(T)(T value) + { + if (!isValid) + { + setValue(Node[].init); + } + enforce(nodeID == NodeID.sequence, + new NodeException("Trying to add an element to a " ~ nodeTypeString ~ " node", startMark_)); + + auto nodes = get!(Node[])(); + static if(is(Unqual!T == Node)){nodes ~= value;} + else {nodes ~= Node(value);} + setValue(nodes); + } + @safe unittest + { + with(Node([1, 2, 3, 4])) + { + add(5.0f); + assert(opIndex(4).as!float == 5.0f); + } + with(Node()) + { + add(5.0f); + assert(opIndex(0).as!float == 5.0f); + } + with(Node(5.0f)) + { + assertThrown!NodeException(add(5.0f)); + } + with(Node([5.0f : true])) + { + assertThrown!NodeException(add(5.0f)); + } + } + + /** Add a key-value pair to a mapping. + * + * This method can only be called on mapping nodes. + * + * If key and/or value is a node, it is copied to the mapping directly. + * Otherwise it is converted to a node and then stored in the mapping. + * + * $(P It is possible for the same key to be present more than once in a + * mapping. When emitting, all key-value pairs will be emitted. + * This is useful with the "!!pairs" tag, but will result in + * $(B invalid) YAML with "!!map" and "!!omap" tags.) + * + * Params: key = Key to _add. + * value = Value to _add. + */ + void add(K, V)(K key, V value) + { + if (!isValid) + { + setValue(Node.Pair[].init); + } + enforce(nodeID == NodeID.mapping, + new NodeException("Trying to add a key-value pair to a " ~ + nodeTypeString ~ " node", + startMark_)); + + auto pairs = get!(Node.Pair[])(); + pairs ~= Pair(key, value); + setValue(pairs); + } + @safe unittest + { + with(Node([1, 2], [3, 4])) + { + add(5, "6"); + assert(opIndex(5).as!string == "6"); + } + with(Node()) + { + add(5, "6"); + assert(opIndex(5).as!string == "6"); + } + with(Node(5.0f)) + { + assertThrown!NodeException(add(5, "6")); + } + with(Node([5.0f])) + { + assertThrown!NodeException(add(5, "6")); + } + } + + /** Determine whether a key is in a mapping, and access its value. + * + * This method can only be called on mapping nodes. + * + * Params: key = Key to search for. + * + * Returns: A pointer to the value (as a Node) corresponding to key, + * or null if not found. + * + * Note: Any modification to the node can invalidate the returned + * pointer. + * + * See_Also: contains + */ + inout(Node*) opBinaryRight(string op, K)(K key) inout + if (op == "in") + { + enforce(nodeID == NodeID.mapping, new NodeException("Trying to use 'in' on a " ~ + nodeTypeString ~ " node", startMark_)); + + auto idx = findPair(key); + if(idx < 0) + { + return null; + } + else + { + return &(get!(Node.Pair[])[idx].value); + } + } + @safe unittest + { + auto mapping = Node(["foo", "baz"], ["bar", "qux"]); + assert("bad" !in mapping && ("bad" in mapping) is null); + Node* foo = "foo" in mapping; + assert(foo !is null); + assert(*foo == Node("bar")); + assert(foo.get!string == "bar"); + *foo = Node("newfoo"); + assert(mapping["foo"] == Node("newfoo")); + } + @safe unittest + { + auto mNode = Node(["a": 2]); + assert("a" in mNode); + const cNode = Node(["a": 2]); + assert("a" in cNode); + immutable iNode = Node(["a": 2]); + assert("a" in iNode); + } + + /** Remove first (if any) occurence of a value in a collection. + * + * This method can only be called on collection nodes. + * + * If the node is a sequence, the first node matching value is removed. + * If the node is a mapping, the first key-value pair where _value + * matches specified value is removed. + * + * Params: rhs = Value to _remove. + * + * Throws: NodeException if the node is not a collection. + */ + void remove(T)(T rhs) + { + remove_!(T, No.key, "remove")(rhs); + } + @safe unittest + { + with(Node([1, 2, 3, 4, 3])) + { + remove(3); + assert(length == 4); + assert(opIndex(2).as!int == 4); + assert(opIndex(3).as!int == 3); + + add(YAMLNull()); + assert(length == 5); + remove(YAMLNull()); + assert(length == 4); + } + with(Node(["1", "2", "3"], [4, 5, 6])) + { + remove(4); + assert(length == 2); + add("nullkey", YAMLNull()); + assert(length == 3); + remove(YAMLNull()); + assert(length == 2); + } + } + + /** Remove element at the specified index of a collection. + * + * This method can only be called on collection nodes. + * + * If the node is a sequence, index must be integral. + * + * If the node is a mapping, remove the first key-value pair where + * key matches index. + * + * If the node is a mapping and no key matches index, nothing is removed + * and no exception is thrown. This ensures behavior siilar to D arrays + * and associative arrays. + * + * Params: index = Index to remove at. + * + * Throws: NodeException if the node is not a collection, index is out + * of range or if a non-integral index is used on a sequence node. + */ + void removeAt(T)(T index) + { + remove_!(T, Yes.key, "removeAt")(index); + } + @safe unittest + { + with(Node([1, 2, 3, 4, 3])) + { + removeAt(3); + assertThrown!NodeException(removeAt("3")); + assert(length == 4); + assert(opIndex(3).as!int == 3); + } + with(Node(["1", "2", "3"], [4, 5, 6])) + { + // no integer 2 key, so don't remove anything + removeAt(2); + assert(length == 3); + removeAt("2"); + assert(length == 2); + add(YAMLNull(), "nullval"); + assert(length == 3); + removeAt(YAMLNull()); + assert(length == 2); + } + } + + /// Compare with another _node. + int opCmp(const scope ref Node rhs) const scope @safe + { + const bool hasNullTag = this.tag_ is null; + // Only one of them is null: we can order nodes + if ((hasNullTag) ^ (rhs.tag is null)) + return hasNullTag ? -1 : 1; + // Either both `null` or both have a value + if (!hasNullTag) + if (int result = std.algorithm.comparison.cmp(tag_, rhs.tag_)) + return result; + + static int cmp(T1, T2)(T1 a, T2 b) + { + return a > b ? 1 : + a < b ? -1 : + 0; + } + + // Compare validity: if both valid, we have to compare further. + if (!this.isValid()) + return rhs.isValid() ? -1 : 0; + if (!rhs.isValid()) + return 1; + if (const typeCmp = cmp(type, rhs.type)) + return typeCmp; + + static int compareCollections(T)(const scope ref Node lhs, const scope ref Node rhs) + { + const c1 = lhs.getValue!T; + const c2 = rhs.getValue!T; + if(c1 is c2){return 0;} + if(c1.length != c2.length) + { + return cmp(c1.length, c2.length); + } + // Equal lengths, compare items. + foreach(i; 0 .. c1.length) + { + const itemCmp = c1[i].opCmp(c2[i]); + if(itemCmp != 0){return itemCmp;} + } + return 0; + } + + final switch(type) + { + case NodeType.string: + return std.algorithm.cmp(getValue!string, + rhs.getValue!string); + case NodeType.integer: + return cmp(getValue!long, rhs.getValue!long); + case NodeType.boolean: + const b1 = getValue!bool; + const b2 = rhs.getValue!bool; + return b1 ? b2 ? 0 : 1 + : b2 ? -1 : 0; + case NodeType.binary: + const b1 = getValue!(ubyte[]); + const b2 = rhs.getValue!(ubyte[]); + return std.algorithm.cmp(b1, b2); + case NodeType.null_: + return 0; + case NodeType.decimal: + const r1 = getValue!real; + const r2 = rhs.getValue!real; + if(isNaN(r1)) + { + return isNaN(r2) ? 0 : -1; + } + if(isNaN(r2)) + { + return 1; + } + // Fuzzy equality. + if(r1 <= r2 + real.epsilon && r1 >= r2 - real.epsilon) + { + return 0; + } + return cmp(r1, r2); + case NodeType.timestamp: + const t1 = getValue!SysTime; + const t2 = rhs.getValue!SysTime; + return cmp(t1, t2); + case NodeType.mapping: + return compareCollections!(Pair[])(this, rhs); + case NodeType.sequence: + return compareCollections!(Node[])(this, rhs); + case NodeType.merge: + assert(false, "Cannot compare merge nodes"); + case NodeType.invalid: + assert(false, "Cannot compare invalid nodes"); + } + } + + // Ensure opCmp is symmetric for collections + @safe unittest + { + auto node1 = Node( + [ + Node("New York Yankees", "tag:yaml.org,2002:str"), + Node("Atlanta Braves", "tag:yaml.org,2002:str") + ], "tag:yaml.org,2002:seq" + ); + auto node2 = Node( + [ + Node("Detroit Tigers", "tag:yaml.org,2002:str"), + Node("Chicago cubs", "tag:yaml.org,2002:str") + ], "tag:yaml.org,2002:seq" + ); + assert(node1 > node2); + assert(node2 < node1); + } + + // Compute hash of the node. + hash_t toHash() nothrow const @trusted + { + const valueHash = value_.match!(v => hashOf(v)); + + return tag_ is null ? valueHash : tag_.hashOf(valueHash); + } + @safe unittest + { + assert(Node(42).toHash() != Node(41).toHash()); + assert(Node(42).toHash() != Node(42, "some-tag").toHash()); + } + + /// Get type of the node value. + @property NodeType type() const scope @safe pure nothrow @nogc + { + return this.value_.match!( + (const bool _) => NodeType.boolean, + (const long _) => NodeType.integer, + (const Node[] _) => NodeType.sequence, + (const ubyte[] _) => NodeType.binary, + (const string _) => NodeType.string, + (const Node.Pair[] _) => NodeType.mapping, + (const SysTime _) => NodeType.timestamp, + (const YAMLNull _) => NodeType.null_, + (const YAMLMerge _) => NodeType.merge, + (const real _) => NodeType.decimal, + (const YAMLInvalid _) => NodeType.invalid, + ); + } + + /// Get the kind of node this is. + @property NodeID nodeID() const scope @safe pure nothrow @nogc + { + final switch (type) + { + case NodeType.sequence: + return NodeID.sequence; + case NodeType.mapping: + return NodeID.mapping; + case NodeType.boolean: + case NodeType.integer: + case NodeType.binary: + case NodeType.string: + case NodeType.timestamp: + case NodeType.null_: + case NodeType.merge: + case NodeType.decimal: + return NodeID.scalar; + case NodeType.invalid: + return NodeID.invalid; + } + } + package: + + // Get a string representation of the node tree. Used for debugging. + // + // Params: level = Level of the node in the tree. + // + // Returns: String representing the node tree. + @property string debugString(uint level = 0) const scope @safe + { + string indent; + foreach(i; 0 .. level){indent ~= " ";} + + final switch (nodeID) + { + case NodeID.invalid: + return indent ~ "invalid"; + case NodeID.sequence: + string result = indent ~ "sequence:\n"; + foreach(ref node; get!(Node[])) + { + result ~= node.debugString(level + 1); + } + return result; + case NodeID.mapping: + string result = indent ~ "mapping:\n"; + foreach(ref pair; get!(Node.Pair[])) + { + result ~= indent ~ " pair\n"; + result ~= pair.key.debugString(level + 2); + result ~= pair.value.debugString(level + 2); + } + return result; + case NodeID.scalar: + return indent ~ "scalar(" ~ + (convertsTo!string ? get!string : text(type)) ~ ")\n"; + } + } + + + public: + @property string nodeTypeString() const scope @safe pure nothrow @nogc + { + final switch (nodeID) + { + case NodeID.mapping: + return "mapping"; + case NodeID.sequence: + return "sequence"; + case NodeID.scalar: + return "scalar"; + case NodeID.invalid: + return "invalid"; + } + } + + // Determine if the value can be converted to specified type. + @property bool convertsTo(T)() const + { + if(isType!T){return true;} + + // Every type allowed in Value should be convertible to string. + static if(isSomeString!T) {return true;} + else static if(isFloatingPoint!T){return type.among!(NodeType.integer, NodeType.decimal);} + else static if(isIntegral!T) {return type == NodeType.integer;} + else static if(is(Unqual!T==bool)){return type == NodeType.boolean;} + else {return false;} + } + /** + * Sets the style of this node when dumped. + * + * Params: style = Any valid style. + */ + void setStyle(CollectionStyle style) @safe + { + enforce(!isValid || (nodeID.among(NodeID.mapping, NodeID.sequence)), new NodeException( + "Cannot set collection style for non-collection nodes", startMark_)); + collectionStyle = style; + } + /// Ditto + void setStyle(ScalarStyle style) @safe + { + enforce(!isValid || (nodeID == NodeID.scalar), new NodeException( + "Cannot set scalar style for non-scalar nodes", startMark_)); + scalarStyle = style; + } + /// + @safe unittest + { + import dyaml.dumper; + auto stream = new Appender!string(); + auto node = Node([1, 2, 3, 4, 5]); + node.setStyle(CollectionStyle.block); + + auto dumper = dumper(); + dumper.dump(stream, node); + } + /// + @safe unittest + { + import dyaml.dumper; + auto stream = new Appender!string(); + auto node = Node(4); + node.setStyle(ScalarStyle.literal); + + auto dumper = dumper(); + dumper.dump(stream, node); + } + @safe unittest + { + assertThrown!NodeException(Node(4).setStyle(CollectionStyle.block)); + assertThrown!NodeException(Node([4]).setStyle(ScalarStyle.literal)); + } + @safe unittest + { + import dyaml.dumper; + { + auto stream = new Appender!string(); + auto node = Node([1, 2, 3, 4, 5]); + node.setStyle(CollectionStyle.block); + auto dumper = dumper(); + dumper.explicitEnd = false; + dumper.explicitStart = false; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + + //Block style should start with a hyphen. + assert(stream.data[0] == '-'); + } + { + auto stream = new Appender!string(); + auto node = Node([1, 2, 3, 4, 5]); + node.setStyle(CollectionStyle.flow); + auto dumper = dumper(); + dumper.explicitEnd = false; + dumper.explicitStart = false; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + + //Flow style should start with a bracket. + assert(stream.data[0] == '['); + } + { + auto stream = new Appender!string(); + auto node = Node(1); + node.setStyle(ScalarStyle.singleQuoted); + auto dumper = dumper(); + dumper.explicitEnd = false; + dumper.explicitStart = false; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + + assert(stream.data == "!!int '1'\n"); + } + { + auto stream = new Appender!string(); + auto node = Node(1); + node.setStyle(ScalarStyle.doubleQuoted); + auto dumper = dumper(); + dumper.explicitEnd = false; + dumper.explicitStart = false; + dumper.YAMLVersion = null; + dumper.dump(stream, node); + + assert(stream.data == "!!int \"1\"\n"); + } + } + + private: + // Determine if the value stored by the node is of specified type. + // + // This only works for default YAML types, not for user defined types. + @property bool isType(T)() const + { + return value_.match!( + (const T _) => true, + _ => false, + ); + } + + /// Check at compile time if a type is stored natively + enum canBeType (T) = is(typeof({ value_.match!((const T _) => true, _ => false); })); + + + // Implementation of contains() and containsKey(). + bool contains_(T, Flag!"key" key, string func)(T rhs) const + { + final switch (nodeID) + { + case NodeID.mapping: + return findPair!(T, key)(rhs) >= 0; + case NodeID.sequence: + static if(!key) + { + foreach(ref node; getValue!(Node[])) + { + if(node == rhs){return true;} + } + return false; + } + else + { + throw new NodeException("Trying to use " ~ func ~ "() on a " ~ nodeTypeString ~ " node", + startMark_); + } + case NodeID.scalar: + case NodeID.invalid: + throw new NodeException("Trying to use " ~ func ~ "() on a " ~ nodeTypeString ~ " node", + startMark_); + } + + } + + // Implementation of remove() and removeAt() + void remove_(T, Flag!"key" key, string func)(T rhs) + { + static void removeElem(E, I)(ref Node node, I index) + { + auto elems = node.getValue!(E[]); + moveAll(elems[cast(size_t)index + 1 .. $], elems[cast(size_t)index .. $ - 1]); + elems.length = elems.length - 1; + node.setValue(elems); + } + + final switch (nodeID) + { + case NodeID.mapping: + const index = findPair!(T, key)(rhs); + if(index >= 0){removeElem!Pair(this, index);} + break; + case NodeID.sequence: + static long getIndex(ref Node node, ref T rhs) + { + foreach(idx, ref elem; node.get!(Node[])) + { + if(elem.convertsTo!T && elem.as!(T, No.stringConversion) == rhs) + { + return idx; + } + } + return -1; + } + + const index = select!key(rhs, getIndex(this, rhs)); + + // This throws if the index is not integral. + checkSequenceIndex(index); + + static if(isIntegral!(typeof(index))){removeElem!Node(this, index); break; } + else {assert(false, "Non-integral sequence index");} + case NodeID.scalar: + case NodeID.invalid: + throw new NodeException("Trying to " ~ func ~ "() from a " ~ nodeTypeString ~ " node", + startMark_); + } + } + + // Get index of pair with key (or value, if key is false) matching index. + // Cannot be inferred @safe due to https://issues.dlang.org/show_bug.cgi?id=16528 + sizediff_t findPair(T, Flag!"key" key = Yes.key)(const scope ref T index) + const scope @safe + { + const pairs = getValue!(Pair[])(); + const(Node)* node; + foreach(idx, ref const(Pair) pair; pairs) + { + static if(key){node = &pair.key;} + else {node = &pair.value;} + + + const bool typeMatch = (isFloatingPoint!T && (node.type.among!(NodeType.integer, NodeType.decimal))) || + (isIntegral!T && node.type == NodeType.integer) || + (is(Unqual!T==bool) && node.type == NodeType.boolean) || + (isSomeString!T && node.type == NodeType.string) || + (node.isType!T); + if(typeMatch && *node == index) + { + return idx; + } + } + return -1; + } + + // Check if index is integral and in range. + void checkSequenceIndex(T)(T index) const scope @safe + { + assert(nodeID == NodeID.sequence, + "checkSequenceIndex() called on a " ~ nodeTypeString ~ " node"); + + static if(!isIntegral!T) + { + throw new NodeException("Indexing a sequence with a non-integral type.", startMark_); + } + else + { + enforce(index >= 0 && index < getValue!(Node[]).length, + new NodeException("Sequence index out of range: " ~ to!string(index), + startMark_)); + } + } + // Safe wrapper for getting a value out of the variant. + inout(T) getValue(T)() @safe return scope inout + { + alias RType = typeof(return); + return value_.tryMatch!((RType r) => r); + } + // Safe wrapper for coercing a value out of the variant. + inout(T) coerceValue(T)() @trusted scope return inout + { + alias RType = typeof(return); + static if (is(typeof({ RType rt = T.init; T t = RType.init; }))) + alias TType = T; + else // `inout` matters (indirection) + alias TType = RType; + + // `inout(Node[]).to!string` apparently is not safe: + // struct SumTypeBug { + // import std.conv; + // Node[] data; + // + // string bug () inout @safe + // { + // return this.data.to!string; + // } + // } + // Doesn't compile with DMD v2.100.0 + return this.value_.tryMatch!( + (inout bool v) @safe => v.to!TType, + (inout long v) @safe => v.to!TType, + (inout Node[] v) @trusted => v.to!TType, + (inout ubyte[] v) @safe => v.to!TType, + (inout string v) @safe => v.to!TType, + (inout Node.Pair[] v) @trusted => v.to!TType, + (inout SysTime v) @trusted => v.to!TType, + (inout real v) @safe => v.to!TType, + (inout YAMLNull v) @safe => null.to!TType, + ); + } + // Safe wrapper for setting a value for the variant. + void setValue(T)(T value) @trusted + { + static if (allowed!T) + { + value_ = value; + } + else + { + auto tmpNode = cast(Node)value; + tag_ = tmpNode.tag; + scalarStyle = tmpNode.scalarStyle; + collectionStyle = tmpNode.collectionStyle; + value_ = tmpNode.value_; + } + } + + /// + public void toString (DGT) (scope DGT sink) + const scope @safe + { + this.value_.match!( + (const bool v) => formattedWrite(sink, v ? "true" : "false"), + (const long v) => formattedWrite(sink, "%s", v), + (const Node[] v) => formattedWrite(sink, "[%(%s, %)]", v), + (const ubyte[] v) => formattedWrite(sink, "%s", v), + (const string v) => formattedWrite(sink, `"%s"`, v), + (const Node.Pair[] v) => formattedWrite(sink, "{%(%s, %)}", v), + (const SysTime v) => formattedWrite(sink, "%s", v), + (const YAMLNull v) => formattedWrite(sink, "%s", v), + (const YAMLMerge v) => formattedWrite(sink, "%s", v), + (const real v) => formattedWrite(sink, "%s", v), + (const YAMLInvalid v) => formattedWrite(sink, "%s", v), + ); + } +} + +package: +// Merge pairs into an array of pairs based on merge rules in the YAML spec. +// +// Any new pair will only be added if there is not already a pair +// with the same key. +// +// Params: pairs = Appender managing the array of pairs to merge into. +// toMerge = Pairs to merge. +void merge(ref Appender!(Node.Pair[]) pairs, Node.Pair[] toMerge) @safe +{ + bool eq(ref Node.Pair a, ref Node.Pair b) @safe + { + return a.key == b.key; + } + + foreach(ref pair; toMerge) if(!canFind!eq(pairs.data, pair)) + { + pairs.put(pair); + } +} + +enum hasNodeConstructor(T) = hasSimpleNodeConstructor!T || hasExpandedNodeConstructor!T; +template hasSimpleNodeConstructor(T) +{ + static if (is(T == struct)) + { + enum hasSimpleNodeConstructor = is(typeof(T(Node.init))); + } + else static if (is(T == class)) + { + enum hasSimpleNodeConstructor = is(typeof(new T(Node.init))); + } + else enum hasSimpleNodeConstructor = false; +} +template hasExpandedNodeConstructor(T) +{ + static if (is(T == struct)) + { + enum hasExpandedNodeConstructor = is(typeof(T(Node.init, ""))); + } + else static if (is(T == class)) + { + enum hasExpandedNodeConstructor = is(typeof(new T(Node.init, ""))); + } + else enum hasExpandedNodeConstructor = false; +} +enum castableToNode(T) = (is(T == struct) || is(T == class)) && is(typeof(T.opCast!Node()) : Node); + +@safe unittest +{ + import dyaml : Loader, Node; + + static struct Foo + { + string[] bars; + + this(const Node node) + { + foreach(value; node["bars"].sequence) + { + bars ~= value.as!string.idup; + } + } + } + + Loader.fromString(`{ bars: ["a", "b"] }`) + .load + .as!(Foo); +} +@safe unittest +{ + import dyaml : Loader, Node; + import std : split, to; + + static class MyClass + { + int x, y, z; + + this(Node node) + { + auto parts = node.as!string().split(":"); + x = parts[0].to!int; + y = parts[1].to!int; + z = parts[2].to!int; + } + } + + auto loader = Loader.fromString(`"1:2:3"`); + Node node = loader.load(); + auto mc = node.get!MyClass; +} +@safe unittest +{ + import dyaml : Loader, Node; + import std : split, to; + + static class MyClass + { + int x, y, z; + + this(Node node) + { + auto parts = node.as!string().split(":"); + x = parts[0].to!int; + y = parts[1].to!int; + z = parts[2].to!int; + } + } + + auto loader = Loader.fromString(`"1:2:3"`); + const node = loader.load(); + auto mc = node.get!MyClass; +} diff --git a/source/dyaml/package.d b/source/dyaml/package.d new file mode 100644 index 0000000..e61b716 --- /dev/null +++ b/source/dyaml/package.d @@ -0,0 +1,15 @@ +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module dyaml; + +public import dyaml.dumper; +public import dyaml.encoding; +public import dyaml.exception; +public import dyaml.linebreak; +public import dyaml.loader; +public import dyaml.resolver; +public import dyaml.style; +public import dyaml.node; diff --git a/source/dyaml/parser.d b/source/dyaml/parser.d new file mode 100644 index 0000000..befdfa4 --- /dev/null +++ b/source/dyaml/parser.d @@ -0,0 +1,958 @@ + +// Copyright Ferdinand Majerech 2011-2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * YAML parser. + * Code based on PyYAML: http://www.pyyaml.org + */ +module dyaml.parser; + + +import std.algorithm; +import std.array; +import std.conv; +import std.exception; +import std.typecons; + +import dyaml.event; +import dyaml.exception; +import dyaml.scanner; +import dyaml.style; +import dyaml.token; +import dyaml.tagdirective; + + +/** + * The following YAML grammar is LL(1) and is parsed by a recursive descent + * parser. + * + * stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + * implicit_document ::= block_node DOCUMENT-END* + * explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + * block_node_or_indentless_sequence ::= + * ALIAS + * | properties (block_content | indentless_block_sequence)? + * | block_content + * | indentless_block_sequence + * block_node ::= ALIAS + * | properties block_content? + * | block_content + * flow_node ::= ALIAS + * | properties flow_content? + * | flow_content + * properties ::= TAG ANCHOR? | ANCHOR TAG? + * block_content ::= block_collection | flow_collection | SCALAR + * flow_content ::= flow_collection | SCALAR + * block_collection ::= block_sequence | block_mapping + * flow_collection ::= flow_sequence | flow_mapping + * block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END + * indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + * block_mapping ::= BLOCK-MAPPING_START + * ((KEY block_node_or_indentless_sequence?)? + * (VALUE block_node_or_indentless_sequence?)?)* + * BLOCK-END + * flow_sequence ::= FLOW-SEQUENCE-START + * (flow_sequence_entry FLOW-ENTRY)* + * flow_sequence_entry? + * FLOW-SEQUENCE-END + * flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + * flow_mapping ::= FLOW-MAPPING-START + * (flow_mapping_entry FLOW-ENTRY)* + * flow_mapping_entry? + * FLOW-MAPPING-END + * flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + * + * FIRST sets: + * + * stream: { STREAM-START } + * explicit_document: { DIRECTIVE DOCUMENT-START } + * implicit_document: FIRST(block_node) + * block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } + * flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } + * block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } + * flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } + * block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } + * flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } + * block_sequence: { BLOCK-SEQUENCE-START } + * block_mapping: { BLOCK-MAPPING-START } + * block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } + * indentless_sequence: { ENTRY } + * flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } + * flow_sequence: { FLOW-SEQUENCE-START } + * flow_mapping: { FLOW-MAPPING-START } + * flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } + * flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } + */ + + +/** + * Marked exception thrown at parser errors. + * + * See_Also: MarkedYAMLException + */ +class ParserException : MarkedYAMLException +{ + mixin MarkedExceptionCtors; +} + +package: +/// Generates events from tokens provided by a Scanner. +/// +/// While Parser receives tokens with non-const character slices, the events it +/// produces are immutable strings, which are usually the same slices, cast to string. +/// Parser is the last layer of D:YAML that may possibly do any modifications to these +/// slices. +final class Parser +{ + private: + ///Default tag handle shortcuts and replacements. + static TagDirective[] defaultTagDirectives_ = + [TagDirective("!", "!"), TagDirective("!!", "tag:yaml.org,2002:")]; + + ///Scanner providing YAML tokens. + Scanner scanner_; + + ///Event produced by the most recent state. + Event currentEvent_; + + ///YAML version string. + string YAMLVersion_ = null; + ///Tag handle shortcuts and replacements. + TagDirective[] tagDirectives_; + + ///Stack of states. + Appender!(Event delegate() @safe[]) states_; + ///Stack of marks used to keep track of extents of e.g. YAML collections. + Appender!(Mark[]) marks_; + + ///Current state. + Event delegate() @safe state_; + + public: + ///Construct a Parser using specified Scanner. + this(Scanner scanner) @safe + { + state_ = &parseStreamStart; + scanner_ = scanner; + states_.reserve(32); + marks_.reserve(32); + } + + /** + * Check if any events are left. May have side effects in some cases. + */ + bool empty() @safe + { + ensureState(); + return currentEvent_.isNull; + } + + /** + * Return the current event. + * + * Must not be called if there are no events left. + */ + Event front() @safe + { + ensureState(); + assert(!currentEvent_.isNull, "No event left to peek"); + return currentEvent_; + } + + /** + * Skip to the next event. + * + * Must not be called if there are no events left. + */ + void popFront() @safe + { + currentEvent_.id = EventID.invalid; + ensureState(); + } + + private: + /// If current event is invalid, load the next valid one if possible. + void ensureState() @safe + { + if(currentEvent_.isNull && state_ !is null) + { + currentEvent_ = state_(); + } + } + ///Pop and return the newest state in states_. + Event delegate() @safe popState() @safe + { + enforce(states_.data.length > 0, + new YAMLException("Parser: Need to pop state but no states left to pop")); + const result = states_.data.back; + states_.shrinkTo(states_.data.length - 1); + return result; + } + + ///Pop and return the newest mark in marks_. + Mark popMark() @safe + { + enforce(marks_.data.length > 0, + new YAMLException("Parser: Need to pop mark but no marks left to pop")); + const result = marks_.data.back; + marks_.shrinkTo(marks_.data.length - 1); + return result; + } + + /// Push a state on the stack + void pushState(Event delegate() @safe state) @safe + { + states_ ~= state; + } + /// Push a mark on the stack + void pushMark(Mark mark) @safe + { + marks_ ~= mark; + } + + /** + * stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + * implicit_document ::= block_node DOCUMENT-END* + * explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + */ + + ///Parse stream start. + Event parseStreamStart() @safe + { + const token = scanner_.front; + scanner_.popFront(); + state_ = &parseImplicitDocumentStart; + return streamStartEvent(token.startMark, token.endMark); + } + + /// Parse implicit document start, unless explicit detected: if so, parse explicit. + Event parseImplicitDocumentStart() @safe + { + // Parse an implicit document. + if(!scanner_.front.id.among!(TokenID.directive, TokenID.documentStart, + TokenID.streamEnd)) + { + tagDirectives_ = defaultTagDirectives_; + const token = scanner_.front; + + pushState(&parseDocumentEnd); + state_ = &parseBlockNode; + + return documentStartEvent(token.startMark, token.endMark, false, null, null); + } + return parseDocumentStart(); + } + + ///Parse explicit document start. + Event parseDocumentStart() @safe + { + //Parse any extra document end indicators. + while(scanner_.front.id == TokenID.documentEnd) + { + scanner_.popFront(); + } + + //Parse an explicit document. + if(scanner_.front.id != TokenID.streamEnd) + { + const startMark = scanner_.front.startMark; + + auto tagDirectives = processDirectives(); + enforce(scanner_.front.id == TokenID.documentStart, + new ParserException("Expected document start but found " ~ + scanner_.front.idString, + scanner_.front.startMark)); + + const endMark = scanner_.front.endMark; + scanner_.popFront(); + pushState(&parseDocumentEnd); + state_ = &parseDocumentContent; + return documentStartEvent(startMark, endMark, true, YAMLVersion_, tagDirectives); + } + else + { + //Parse the end of the stream. + const token = scanner_.front; + scanner_.popFront(); + assert(states_.data.length == 0); + assert(marks_.data.length == 0); + state_ = null; + return streamEndEvent(token.startMark, token.endMark); + } + } + + ///Parse document end (explicit or implicit). + Event parseDocumentEnd() @safe + { + Mark startMark = scanner_.front.startMark; + const bool explicit = scanner_.front.id == TokenID.documentEnd; + Mark endMark = startMark; + if (explicit) + { + endMark = scanner_.front.endMark; + scanner_.popFront(); + } + + state_ = &parseDocumentStart; + + return documentEndEvent(startMark, endMark, explicit); + } + + ///Parse document content. + Event parseDocumentContent() @safe + { + if(scanner_.front.id.among!(TokenID.directive, TokenID.documentStart, + TokenID.documentEnd, TokenID.streamEnd)) + { + state_ = popState(); + return processEmptyScalar(scanner_.front.startMark); + } + return parseBlockNode(); + } + + /// Process directives at the beginning of a document. + TagDirective[] processDirectives() @safe + { + // Destroy version and tag handles from previous document. + YAMLVersion_ = null; + tagDirectives_.length = 0; + + // Process directives. + while(scanner_.front.id == TokenID.directive) + { + const token = scanner_.front; + scanner_.popFront(); + string value = token.value.idup; + if(token.directive == DirectiveType.yaml) + { + enforce(YAMLVersion_ is null, + new ParserException("Duplicate YAML directive", token.startMark)); + const minor = value.split(".")[0]; + enforce(minor == "1", + new ParserException("Incompatible document (version 1.x is required)", + token.startMark)); + YAMLVersion_ = value; + } + else if(token.directive == DirectiveType.tag) + { + auto handle = value[0 .. token.valueDivider]; + + foreach(ref pair; tagDirectives_) + { + // handle + const h = pair.handle; + enforce(h != handle, new ParserException("Duplicate tag handle: " ~ handle, + token.startMark)); + } + tagDirectives_ ~= + TagDirective(handle, value[token.valueDivider .. $]); + } + // Any other directive type is ignored (only YAML and TAG are in YAML + // 1.1/1.2, any other directives are "reserved") + } + + TagDirective[] value = tagDirectives_; + + //Add any default tag handles that haven't been overridden. + foreach(ref defaultPair; defaultTagDirectives_) + { + bool found; + foreach(ref pair; tagDirectives_) if(defaultPair.handle == pair.handle) + { + found = true; + break; + } + if(!found) {tagDirectives_ ~= defaultPair; } + } + + return value; + } + + /** + * block_node_or_indentless_sequence ::= ALIAS + * | properties (block_content | indentless_block_sequence)? + * | block_content + * | indentless_block_sequence + * block_node ::= ALIAS + * | properties block_content? + * | block_content + * flow_node ::= ALIAS + * | properties flow_content? + * | flow_content + * properties ::= TAG ANCHOR? | ANCHOR TAG? + * block_content ::= block_collection | flow_collection | SCALAR + * flow_content ::= flow_collection | SCALAR + * block_collection ::= block_sequence | block_mapping + * flow_collection ::= flow_sequence | flow_mapping + */ + + ///Parse a node. + Event parseNode(const Flag!"block" block, + const Flag!"indentlessSequence" indentlessSequence = No.indentlessSequence) + @trusted + { + if(scanner_.front.id == TokenID.alias_) + { + const token = scanner_.front; + scanner_.popFront(); + state_ = popState(); + return aliasEvent(token.startMark, token.endMark, + cast(string)token.value); + } + + string anchor; + string tag; + Mark startMark, endMark, tagMark; + bool invalidMarks = true; + // The index in the tag string where tag handle ends and tag suffix starts. + uint tagHandleEnd; + + //Get anchor/tag if detected. Return false otherwise. + bool get(const TokenID id, const Flag!"first" first, ref string target) @safe + { + if(scanner_.front.id != id){return false;} + invalidMarks = false; + const token = scanner_.front; + scanner_.popFront(); + if(first){startMark = token.startMark;} + if(id == TokenID.tag) + { + tagMark = token.startMark; + tagHandleEnd = token.valueDivider; + } + endMark = token.endMark; + target = token.value.idup; + return true; + } + + //Anchor and/or tag can be in any order. + if(get(TokenID.anchor, Yes.first, anchor)){get(TokenID.tag, No.first, tag);} + else if(get(TokenID.tag, Yes.first, tag)) {get(TokenID.anchor, No.first, anchor);} + + if(tag !is null){tag = processTag(tag, tagHandleEnd, startMark, tagMark);} + + if(invalidMarks) + { + startMark = endMark = scanner_.front.startMark; + } + + bool implicit = (tag is null || tag == "!"); + + if(indentlessSequence && scanner_.front.id == TokenID.blockEntry) + { + state_ = &parseIndentlessSequenceEntry; + return sequenceStartEvent + (startMark, scanner_.front.endMark, anchor, + tag, implicit, CollectionStyle.block); + } + + if(scanner_.front.id == TokenID.scalar) + { + auto token = scanner_.front; + scanner_.popFront(); + auto value = token.style == ScalarStyle.doubleQuoted + ? handleDoubleQuotedScalarEscapes(token.value) + : cast(string)token.value; + + implicit = (token.style == ScalarStyle.plain && tag is null) || tag == "!"; + state_ = popState(); + return scalarEvent(startMark, token.endMark, anchor, tag, + implicit, value, token.style); + } + + if(scanner_.front.id == TokenID.flowSequenceStart) + { + endMark = scanner_.front.endMark; + state_ = &parseFlowSequenceEntry!(Yes.first); + return sequenceStartEvent(startMark, endMark, anchor, tag, + implicit, CollectionStyle.flow); + } + + if(scanner_.front.id == TokenID.flowMappingStart) + { + endMark = scanner_.front.endMark; + state_ = &parseFlowMappingKey!(Yes.first); + return mappingStartEvent(startMark, endMark, anchor, tag, + implicit, CollectionStyle.flow); + } + + if(block && scanner_.front.id == TokenID.blockSequenceStart) + { + endMark = scanner_.front.endMark; + state_ = &parseBlockSequenceEntry!(Yes.first); + return sequenceStartEvent(startMark, endMark, anchor, tag, + implicit, CollectionStyle.block); + } + + if(block && scanner_.front.id == TokenID.blockMappingStart) + { + endMark = scanner_.front.endMark; + state_ = &parseBlockMappingKey!(Yes.first); + return mappingStartEvent(startMark, endMark, anchor, tag, + implicit, CollectionStyle.block); + } + + if(anchor !is null || tag !is null) + { + state_ = popState(); + + //PyYAML uses a tuple(implicit, false) for the second last arg here, + //but the second bool is never used after that - so we don't use it. + + //Empty scalars are allowed even if a tag or an anchor is specified. + return scalarEvent(startMark, endMark, anchor, tag, + implicit , ""); + } + + const token = scanner_.front; + throw new ParserException("While parsing a " ~ (block ? "block" : "flow") ~ " node", + startMark, "expected node content, but found: " + ~ token.idString, token.startMark); + } + + /// Handle escape sequences in a double quoted scalar. + /// + /// Moved here from scanner as it can't always be done in-place with slices. + string handleDoubleQuotedScalarEscapes(const(char)[] tokenValue) const @safe + { + string notInPlace; + bool inEscape; + auto appender = appender!(string)(); + for(const(char)[] oldValue = tokenValue; !oldValue.empty();) + { + const dchar c = oldValue.front(); + oldValue.popFront(); + + if(!inEscape) + { + if(c != '\\') + { + if(notInPlace is null) { appender.put(c); } + else { notInPlace ~= c; } + continue; + } + // Escape sequence starts with a '\' + inEscape = true; + continue; + } + + import dyaml.escapes; + scope(exit) { inEscape = false; } + + // 'Normal' escape sequence. + if(c.among!(escapes)) + { + if(notInPlace is null) + { + // \L and \C can't be handled in place as the expand into + // many-byte unicode chars + if(c != 'L' && c != 'P') + { + appender.put(dyaml.escapes.fromEscape(c)); + continue; + } + // Need to duplicate as we won't fit into + // token.value - which is what appender uses + notInPlace = appender.data.dup; + notInPlace ~= dyaml.escapes.fromEscape(c); + continue; + } + notInPlace ~= dyaml.escapes.fromEscape(c); + continue; + } + + // Unicode char written in hexadecimal in an escape sequence. + if(c.among!(escapeHexCodeList)) + { + // Scanner has already checked that the hex string is valid. + + const hexLength = dyaml.escapes.escapeHexLength(c); + // Any hex digits are 1-byte so this works. + const(char)[] hex = oldValue[0 .. hexLength]; + oldValue = oldValue[hexLength .. $]; + import std.ascii : isHexDigit; + assert(!hex.canFind!(d => !d.isHexDigit), + "Scanner must ensure the hex string is valid"); + + const decoded = cast(dchar)parse!int(hex, 16u); + if(notInPlace is null) { appender.put(decoded); } + else { notInPlace ~= decoded; } + continue; + } + + assert(false, "Scanner must handle unsupported escapes"); + } + + return notInPlace is null ? appender.data : notInPlace; + } + + /** + * Process a tag string retrieved from a tag token. + * + * Params: tag = Tag before processing. + * handleEnd = Index in tag where tag handle ends and tag suffix + * starts. + * startMark = Position of the node the tag belongs to. + * tagMark = Position of the tag. + */ + string processTag(const string tag, const uint handleEnd, + const Mark startMark, const Mark tagMark) + const @safe + { + const handle = tag[0 .. handleEnd]; + const suffix = tag[handleEnd .. $]; + + if(handle.length > 0) + { + string replacement; + foreach(ref pair; tagDirectives_) + { + if(pair.handle == handle) + { + replacement = pair.prefix; + break; + } + } + //handle must be in tagDirectives_ + enforce(replacement !is null, + new ParserException("While parsing a node", startMark, + "found undefined tag handle: " ~ handle, tagMark)); + return replacement ~ suffix; + } + return suffix; + } + + ///Wrappers to parse nodes. + Event parseBlockNode() @safe {return parseNode(Yes.block);} + Event parseFlowNode() @safe {return parseNode(No.block);} + Event parseBlockNodeOrIndentlessSequence() @safe {return parseNode(Yes.block, Yes.indentlessSequence);} + + ///block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END + + ///Parse an entry of a block sequence. If first is true, this is the first entry. + Event parseBlockSequenceEntry(Flag!"first" first)() @safe + { + static if(first) + { + pushMark(scanner_.front.startMark); + scanner_.popFront(); + } + + if(scanner_.front.id == TokenID.blockEntry) + { + const token = scanner_.front; + scanner_.popFront(); + if(!scanner_.front.id.among!(TokenID.blockEntry, TokenID.blockEnd)) + { + pushState(&parseBlockSequenceEntry!(No.first)); + return parseBlockNode(); + } + + state_ = &parseBlockSequenceEntry!(No.first); + return processEmptyScalar(token.endMark); + } + + if(scanner_.front.id != TokenID.blockEnd) + { + const token = scanner_.front; + throw new ParserException("While parsing a block collection", marks_.data.back, + "expected block end, but found " ~ token.idString, + token.startMark); + } + + state_ = popState(); + popMark(); + const token = scanner_.front; + scanner_.popFront(); + return sequenceEndEvent(token.startMark, token.endMark); + } + + ///indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + + ///Parse an entry of an indentless sequence. + Event parseIndentlessSequenceEntry() @safe + { + if(scanner_.front.id == TokenID.blockEntry) + { + const token = scanner_.front; + scanner_.popFront(); + + if(!scanner_.front.id.among!(TokenID.blockEntry, TokenID.key, + TokenID.value, TokenID.blockEnd)) + { + pushState(&parseIndentlessSequenceEntry); + return parseBlockNode(); + } + + state_ = &parseIndentlessSequenceEntry; + return processEmptyScalar(token.endMark); + } + + state_ = popState(); + const token = scanner_.front; + return sequenceEndEvent(token.startMark, token.endMark); + } + + /** + * block_mapping ::= BLOCK-MAPPING_START + * ((KEY block_node_or_indentless_sequence?)? + * (VALUE block_node_or_indentless_sequence?)?)* + * BLOCK-END + */ + + ///Parse a key in a block mapping. If first is true, this is the first key. + Event parseBlockMappingKey(Flag!"first" first)() @safe + { + static if(first) + { + pushMark(scanner_.front.startMark); + scanner_.popFront(); + } + + if(scanner_.front.id == TokenID.key) + { + const token = scanner_.front; + scanner_.popFront(); + + if(!scanner_.front.id.among!(TokenID.key, TokenID.value, TokenID.blockEnd)) + { + pushState(&parseBlockMappingValue); + return parseBlockNodeOrIndentlessSequence(); + } + + state_ = &parseBlockMappingValue; + return processEmptyScalar(token.endMark); + } + + if(scanner_.front.id != TokenID.blockEnd) + { + const token = scanner_.front; + throw new ParserException("While parsing a block mapping", marks_.data.back, + "expected block end, but found: " ~ token.idString, + token.startMark); + } + + state_ = popState(); + popMark(); + const token = scanner_.front; + scanner_.popFront(); + return mappingEndEvent(token.startMark, token.endMark); + } + + ///Parse a value in a block mapping. + Event parseBlockMappingValue() @safe + { + if(scanner_.front.id == TokenID.value) + { + const token = scanner_.front; + scanner_.popFront(); + + if(!scanner_.front.id.among!(TokenID.key, TokenID.value, TokenID.blockEnd)) + { + pushState(&parseBlockMappingKey!(No.first)); + return parseBlockNodeOrIndentlessSequence(); + } + + state_ = &parseBlockMappingKey!(No.first); + return processEmptyScalar(token.endMark); + } + + state_= &parseBlockMappingKey!(No.first); + return processEmptyScalar(scanner_.front.startMark); + } + + /** + * flow_sequence ::= FLOW-SEQUENCE-START + * (flow_sequence_entry FLOW-ENTRY)* + * flow_sequence_entry? + * FLOW-SEQUENCE-END + * flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + * + * Note that while production rules for both flow_sequence_entry and + * flow_mapping_entry are equal, their interpretations are different. + * For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` + * generate an inline mapping (set syntax). + */ + + ///Parse an entry in a flow sequence. If first is true, this is the first entry. + Event parseFlowSequenceEntry(Flag!"first" first)() @safe + { + static if(first) + { + pushMark(scanner_.front.startMark); + scanner_.popFront(); + } + + if(scanner_.front.id != TokenID.flowSequenceEnd) + { + static if(!first) + { + if(scanner_.front.id == TokenID.flowEntry) + { + scanner_.popFront(); + } + else + { + const token = scanner_.front; + throw new ParserException("While parsing a flow sequence", marks_.data.back, + "expected ',' or ']', but got: " ~ + token.idString, token.startMark); + } + } + + if(scanner_.front.id == TokenID.key) + { + const token = scanner_.front; + state_ = &parseFlowSequenceEntryMappingKey; + return mappingStartEvent(token.startMark, token.endMark, + null, null, true, CollectionStyle.flow); + } + else if(scanner_.front.id != TokenID.flowSequenceEnd) + { + pushState(&parseFlowSequenceEntry!(No.first)); + return parseFlowNode(); + } + } + + const token = scanner_.front; + scanner_.popFront(); + state_ = popState(); + popMark(); + return sequenceEndEvent(token.startMark, token.endMark); + } + + ///Parse a key in flow context. + Event parseFlowKey(Event delegate() @safe nextState) @safe + { + const token = scanner_.front; + scanner_.popFront(); + + if(!scanner_.front.id.among!(TokenID.value, TokenID.flowEntry, + TokenID.flowSequenceEnd)) + { + pushState(nextState); + return parseFlowNode(); + } + + state_ = nextState; + return processEmptyScalar(token.endMark); + } + + ///Parse a mapping key in an entry in a flow sequence. + Event parseFlowSequenceEntryMappingKey() @safe + { + return parseFlowKey(&parseFlowSequenceEntryMappingValue); + } + + ///Parse a mapping value in a flow context. + Event parseFlowValue(TokenID checkId, Event delegate() @safe nextState) + @safe + { + if(scanner_.front.id == TokenID.value) + { + const token = scanner_.front; + scanner_.popFront(); + if(!scanner_.front.id.among(TokenID.flowEntry, checkId)) + { + pushState(nextState); + return parseFlowNode(); + } + + state_ = nextState; + return processEmptyScalar(token.endMark); + } + + state_ = nextState; + return processEmptyScalar(scanner_.front.startMark); + } + + ///Parse a mapping value in an entry in a flow sequence. + Event parseFlowSequenceEntryMappingValue() @safe + { + return parseFlowValue(TokenID.flowSequenceEnd, + &parseFlowSequenceEntryMappingEnd); + } + + ///Parse end of a mapping in a flow sequence entry. + Event parseFlowSequenceEntryMappingEnd() @safe + { + state_ = &parseFlowSequenceEntry!(No.first); + const token = scanner_.front; + return mappingEndEvent(token.startMark, token.startMark); + } + + /** + * flow_mapping ::= FLOW-MAPPING-START + * (flow_mapping_entry FLOW-ENTRY)* + * flow_mapping_entry? + * FLOW-MAPPING-END + * flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + */ + + ///Parse a key in a flow mapping. + Event parseFlowMappingKey(Flag!"first" first)() @safe + { + static if(first) + { + pushMark(scanner_.front.startMark); + scanner_.popFront(); + } + + if(scanner_.front.id != TokenID.flowMappingEnd) + { + static if(!first) + { + if(scanner_.front.id == TokenID.flowEntry) + { + scanner_.popFront(); + } + else + { + const token = scanner_.front; + throw new ParserException("While parsing a flow mapping", marks_.data.back, + "expected ',' or '}', but got: " ~ + token.idString, token.startMark); + } + } + + if(scanner_.front.id == TokenID.key) + { + return parseFlowKey(&parseFlowMappingValue); + } + + if(scanner_.front.id != TokenID.flowMappingEnd) + { + pushState(&parseFlowMappingEmptyValue); + return parseFlowNode(); + } + } + + const token = scanner_.front; + scanner_.popFront(); + state_ = popState(); + popMark(); + return mappingEndEvent(token.startMark, token.endMark); + } + + ///Parse a value in a flow mapping. + Event parseFlowMappingValue() @safe + { + return parseFlowValue(TokenID.flowMappingEnd, &parseFlowMappingKey!(No.first)); + } + + ///Parse an empty value in a flow mapping. + Event parseFlowMappingEmptyValue() @safe + { + state_ = &parseFlowMappingKey!(No.first); + return processEmptyScalar(scanner_.front.startMark); + } + + ///Return an empty scalar. + Event processEmptyScalar(const Mark mark) @safe pure nothrow const @nogc + { + return scalarEvent(mark, mark, null, null, true, ""); + } +} diff --git a/source/dyaml/queue.d b/source/dyaml/queue.d new file mode 100644 index 0000000..57b0d34 --- /dev/null +++ b/source/dyaml/queue.d @@ -0,0 +1,272 @@ + +// Copyright Ferdinand Majerech 2011-2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module dyaml.queue; + + +import std.traits : hasMember, hasIndirections; + +package: + +/// Simple queue implemented as a singly linked list with a tail pointer. +/// +/// Needed in some D:YAML code that needs a queue-like structure without too much +/// reallocation that goes with an array. +/// +/// Allocations are non-GC and are damped by a free-list based on the nodes +/// that are removed. Note that elements lifetime must be managed +/// outside. +struct Queue(T) +if (!hasMember!(T, "__xdtor")) +{ + +private: + + // Linked list node containing one element and pointer to the next node. + struct Node + { + T payload_; + Node* next_; + } + + // Start of the linked list - first element added in time (end of the queue). + Node* first_; + // Last element of the linked list - last element added in time (start of the queue). + Node* last_; + // free-list + Node* stock; + + // Length of the queue. + size_t length_; + + // allocate a new node or recycle one from the stock. + Node* makeNewNode(T thePayload, Node* theNext = null) @trusted nothrow @nogc + { + import std.experimental.allocator : make; + import std.experimental.allocator.mallocator : Mallocator; + + Node* result; + if (stock !is null) + { + result = stock; + stock = result.next_; + result.payload_ = thePayload; + result.next_ = theNext; + } + else + { + result = Mallocator.instance.make!(Node)(thePayload, theNext); + // GC can dispose T managed member if it thinks they are no used... + static if (hasIndirections!T) + { + import core.memory : GC; + GC.addRange(result, Node.sizeof); + } + } + return result; + } + + // free the stock of available free nodes. + void freeStock() @trusted @nogc nothrow + { + import std.experimental.allocator.mallocator : Mallocator; + + while (stock !is null) + { + Node* toFree = stock; + stock = stock.next_; + static if (hasIndirections!T) + { + import core.memory : GC; + GC.removeRange(toFree); + } + Mallocator.instance.deallocate((cast(ubyte*) toFree)[0 .. Node.sizeof]); + } + } + +public: + + @disable void opAssign(ref Queue); + @disable bool opEquals(ref Queue); + @disable int opCmp(ref Queue); + + this(this) @safe nothrow @nogc + { + auto node = first_; + first_ = null; + last_ = null; + while (node !is null) + { + Node* newLast = makeNewNode(node.payload_); + if (last_ !is null) + last_.next_ = newLast; + if (first_ is null) + first_ = newLast; + last_ = newLast; + node = node.next_; + } + } + + ~this() @safe nothrow @nogc + { + freeStock(); + stock = first_; + freeStock(); + } + + /// Returns a forward range iterating over this queue. + auto range() @safe pure nothrow @nogc + { + static struct Result + { + private Node* cursor; + + void popFront() @safe pure nothrow @nogc + { + cursor = cursor.next_; + } + ref T front() @safe pure nothrow @nogc + in(cursor !is null) + { + return cursor.payload_; + } + bool empty() @safe pure nothrow @nogc const + { + return cursor is null; + } + } + return Result(first_); + } + + /// Push a new item to the queue. + void push(T item) @nogc @safe nothrow + { + Node* newLast = makeNewNode(item); + if (last_ !is null) + last_.next_ = newLast; + if (first_ is null) + first_ = newLast; + last_ = newLast; + ++length_; + } + + /// Insert a new item putting it to specified index in the linked list. + void insert(T item, const size_t idx) @safe nothrow + in + { + assert(idx <= length_); + } + do + { + if (idx == 0) + { + first_ = makeNewNode(item, first_); + ++length_; + } + // Adding before last added element, so we can just push. + else if (idx == length_) + { + push(item); + } + else + { + // Get the element before one we're inserting. + Node* current = first_; + foreach (i; 1 .. idx) + current = current.next_; + + assert(current); + // Insert a new node after current, and put current.next_ behind it. + current.next_ = makeNewNode(item, current.next_); + ++length_; + } + } + + /// Returns: The next element in the queue and remove it. + T pop() @safe nothrow + in + { + assert(!empty, "Trying to pop an element from an empty queue"); + } + do + { + T result = peek(); + + Node* oldStock = stock; + Node* old = first_; + first_ = first_.next_; + + // start the stock from the popped element + stock = old; + old.next_ = null; + // add the existing "old" stock to the new first stock element + if (oldStock !is null) + stock.next_ = oldStock; + + if (--length_ == 0) + { + assert(first_ is null); + last_ = null; + } + + return result; + } + + /// Returns: The next element in the queue. + ref inout(T) peek() @safe pure nothrow inout @nogc + in + { + assert(!empty, "Trying to peek at an element in an empty queue"); + } + do + { + return first_.payload_; + } + + /// Returns: true of the queue empty, false otherwise. + bool empty() @safe pure nothrow const @nogc + { + return first_ is null; + } + + /// Returns: The number of elements in the queue. + size_t length() @safe pure nothrow const @nogc + { + return length_; + } +} + +@safe nothrow unittest +{ + auto queue = Queue!int(); + assert(queue.empty); + foreach (i; 0 .. 65) + { + queue.push(5); + assert(queue.pop() == 5); + assert(queue.empty); + assert(queue.length_ == 0); + } + + int[] array = [1, -1, 2, -2, 3, -3, 4, -4, 5, -5]; + foreach (i; array) + { + queue.push(i); + } + + array = 42 ~ array[0 .. 3] ~ 42 ~ array[3 .. $] ~ 42; + queue.insert(42, 3); + queue.insert(42, 0); + queue.insert(42, queue.length); + + int[] array2; + while (!queue.empty) + { + array2 ~= queue.pop(); + } + + assert(array == array2); +} diff --git a/source/dyaml/reader.d b/source/dyaml/reader.d new file mode 100644 index 0000000..ae44c80 --- /dev/null +++ b/source/dyaml/reader.d @@ -0,0 +1,909 @@ + +// Copyright Ferdinand Majerech 2011-2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +module dyaml.reader; + + +import core.stdc.stdlib; +import core.stdc.string; +import core.thread; + +import std.algorithm; +import std.array; +import std.conv; +import std.exception; +import std.range; +import std.string; +import std.system; +import std.typecons; +import std.utf; + +import tinyendian; + +import dyaml.encoding; +import dyaml.exception; + +alias isBreak = among!('\n', '\u0085', '\u2028', '\u2029'); + +package: + + +///Exception thrown at Reader errors. +class ReaderException : YAMLException +{ + this(string msg, string file = __FILE__, size_t line = __LINE__) + @safe pure nothrow + { + super("Reader error: " ~ msg, file, line); + } +} + +/// Provides an API to read characters from a UTF-8 buffer and build slices into that +/// buffer to avoid allocations (see SliceBuilder). +final class Reader +{ + private: + // Buffer of currently loaded characters. + char[] buffer_; + + // Current position within buffer. Only data after this position can be read. + size_t bufferOffset_; + + // Index of the current character in the buffer. + size_t charIndex_; + // Number of characters (code points) in buffer_. + size_t characterCount_; + + // File name + string name_; + // Current line in file. + uint line_; + // Current column in file. + uint column_; + + // Original Unicode encoding of the data. + Encoding encoding_; + + version(unittest) + { + // Endianness of the input before it was converted (for testing) + Endian endian_; + } + + // The number of consecutive ASCII characters starting at bufferOffset_. + // + // Used to minimize UTF-8 decoding. + size_t upcomingASCII_; + + // Index to buffer_ where the last decoded character starts. + size_t lastDecodedBufferOffset_; + // Offset, relative to charIndex_, of the last decoded character, + // in code points, not chars. + size_t lastDecodedCharOffset_; + + public: + /// Construct a Reader. + /// + /// Params: buffer = Buffer with YAML data. This may be e.g. the entire + /// contents of a file or a string. $(B will) be modified by + /// the Reader and other parts of D:YAML (D:YAML tries to + /// reuse the buffer to minimize memory allocations) + /// name = File name if the buffer is the contents of a file or + /// `"<unknown>"` if the buffer is the contents of a string. + /// + /// Throws: ReaderException on a UTF decoding error or if there are + /// nonprintable Unicode characters illegal in YAML. + this(ubyte[] buffer, string name = "<unknown>") @safe pure + { + name_ = name; + auto endianResult = fixUTFByteOrder(buffer); + if(endianResult.bytesStripped > 0) + { + throw new ReaderException("Size of UTF-16 or UTF-32 input not aligned " ~ + "to 2 or 4 bytes, respectively"); + } + + version(unittest) { endian_ = endianResult.endian; } + encoding_ = endianResult.encoding; + + auto utf8Result = toUTF8(endianResult.array, endianResult.encoding); + const msg = utf8Result.errorMessage; + if(msg !is null) + { + throw new ReaderException("Error when converting to UTF-8: " ~ msg); + } + + buffer_ = utf8Result.utf8; + + characterCount_ = utf8Result.characterCount; + // Check that all characters in buffer are printable. + enforce(isPrintableValidUTF8(buffer_), + new ReaderException("Special unicode characters are not allowed")); + + this.sliceBuilder = SliceBuilder(this); + checkASCII(); + } + + /// Get character at specified index relative to current position. + /// + /// Params: index = Index of the character to get relative to current position + /// in the buffer. Can point outside of the buffer; In that + /// case, '\0' will be returned. + /// + /// Returns: Character at specified position or '\0' if outside of the buffer. + /// + // XXX removed; search for 'risky' to find why. + // Throws: ReaderException if trying to read past the end of the buffer. + dchar peek(const size_t index) @safe pure + { + if(index < upcomingASCII_) { return buffer_[bufferOffset_ + index]; } + if(characterCount_ <= charIndex_ + index) + { + // XXX This is risky; revert this if bugs are introduced. We rely on + // the assumption that Reader only uses peek() to detect end of buffer. + // The test suite passes. + // Revert this case here and in other peek() versions if this causes + // errors. + // throw new ReaderException("Trying to read past the end of the buffer"); + return '\0'; + } + + // Optimized path for Scanner code that peeks chars in linear order to + // determine the length of some sequence. + if(index == lastDecodedCharOffset_) + { + ++lastDecodedCharOffset_; + const char b = buffer_[lastDecodedBufferOffset_]; + // ASCII + if(b < 0x80) + { + ++lastDecodedBufferOffset_; + return b; + } + return decode(buffer_, lastDecodedBufferOffset_); + } + + // 'Slow' path where we decode everything up to the requested character. + const asciiToTake = min(upcomingASCII_, index); + lastDecodedCharOffset_ = asciiToTake; + lastDecodedBufferOffset_ = bufferOffset_ + asciiToTake; + dchar d; + while(lastDecodedCharOffset_ <= index) + { + d = decodeNext(); + } + + return d; + } + + /// Optimized version of peek() for the case where peek index is 0. + dchar peek() @safe pure + { + if(upcomingASCII_ > 0) { return buffer_[bufferOffset_]; } + if(characterCount_ <= charIndex_) { return '\0'; } + + lastDecodedCharOffset_ = 0; + lastDecodedBufferOffset_ = bufferOffset_; + return decodeNext(); + } + + /// Get byte at specified index relative to current position. + /// + /// Params: index = Index of the byte to get relative to current position + /// in the buffer. Can point outside of the buffer; In that + /// case, '\0' will be returned. + /// + /// Returns: Byte at specified position or '\0' if outside of the buffer. + char peekByte(const size_t index) @safe pure nothrow @nogc + { + return characterCount_ > (charIndex_ + index) ? buffer_[bufferOffset_ + index] : '\0'; + } + + /// Optimized version of peekByte() for the case where peek byte index is 0. + char peekByte() @safe pure nothrow @nogc + { + return characterCount_ > charIndex_ ? buffer_[bufferOffset_] : '\0'; + } + + + /// Get specified number of characters starting at current position. + /// + /// Note: This gets only a "view" into the internal buffer, which will be + /// invalidated after other Reader calls. Use SliceBuilder to build slices + /// for permanent use. + /// + /// Params: length = Number of characters (code points, not bytes) to get. May + /// reach past the end of the buffer; in that case the returned + /// slice will be shorter. + /// + /// Returns: Characters starting at current position or an empty slice if out of bounds. + char[] prefix(const size_t length) @safe pure + { + return slice(length); + } + + /// Get specified number of bytes, not code points, starting at current position. + /// + /// Note: This gets only a "view" into the internal buffer, which will be + /// invalidated after other Reader calls. Use SliceBuilder to build slices + /// for permanent use. + /// + /// Params: length = Number bytes (not code points) to get. May NOT reach past + /// the end of the buffer; should be used with peek() to avoid + /// this. + /// + /// Returns: Bytes starting at current position. + char[] prefixBytes(const size_t length) @safe pure nothrow @nogc + in(length == 0 || bufferOffset_ + length <= buffer_.length, "prefixBytes out of bounds") + { + return buffer_[bufferOffset_ .. bufferOffset_ + length]; + } + + /// Get a slice view of the internal buffer, starting at the current position. + /// + /// Note: This gets only a "view" into the internal buffer, + /// which get invalidated after other Reader calls. + /// + /// Params: end = End of the slice relative to current position. May reach past + /// the end of the buffer; in that case the returned slice will + /// be shorter. + /// + /// Returns: Slice into the internal buffer or an empty slice if out of bounds. + char[] slice(const size_t end) @safe pure + { + // Fast path in case the caller has already peek()ed all the way to end. + if(end == lastDecodedCharOffset_) + { + return buffer_[bufferOffset_ .. lastDecodedBufferOffset_]; + } + + const asciiToTake = min(upcomingASCII_, end, buffer_.length); + lastDecodedCharOffset_ = asciiToTake; + lastDecodedBufferOffset_ = bufferOffset_ + asciiToTake; + + // 'Slow' path - decode everything up to end. + while(lastDecodedCharOffset_ < end && + lastDecodedBufferOffset_ < buffer_.length) + { + decodeNext(); + } + + return buffer_[bufferOffset_ .. lastDecodedBufferOffset_]; + } + + /// Get the next character, moving buffer position beyond it. + /// + /// Returns: Next character. + /// + /// Throws: ReaderException if trying to read past the end of the buffer + /// or if invalid data is read. + dchar get() @safe pure + { + const result = peek(); + forward(); + return result; + } + + /// Get specified number of characters, moving buffer position beyond them. + /// + /// Params: length = Number or characters (code points, not bytes) to get. + /// + /// Returns: Characters starting at current position. + char[] get(const size_t length) @safe pure + { + auto result = slice(length); + forward(length); + return result; + } + + /// Move current position forward. + /// + /// Params: length = Number of characters to move position forward. + void forward(size_t length) @safe pure + { + while(length > 0) + { + auto asciiToTake = min(upcomingASCII_, length); + charIndex_ += asciiToTake; + length -= asciiToTake; + upcomingASCII_ -= asciiToTake; + + for(; asciiToTake > 0; --asciiToTake) + { + const c = buffer_[bufferOffset_++]; + // c is ASCII, do we only need to check for ASCII line breaks. + if(c == '\n' || (c == '\r' && buffer_[bufferOffset_] != '\n')) + { + ++line_; + column_ = 0; + continue; + } + ++column_; + } + + // If we have used up all upcoming ASCII chars, the next char is + // non-ASCII even after this returns, so upcomingASCII_ doesn't need to + // be updated - it's zero. + if(length == 0) { break; } + + assert(upcomingASCII_ == 0, + "Running unicode handling code but we haven't run out of ASCII chars"); + assert(bufferOffset_ < buffer_.length, + "Attempted to decode past the end of YAML buffer"); + assert(buffer_[bufferOffset_] >= 0x80, + "ASCII must be handled by preceding code"); + + ++charIndex_; + const c = decode(buffer_, bufferOffset_); + + // New line. (can compare with '\n' without decoding since it's ASCII) + if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n')) + { + ++line_; + column_ = 0; + } + else if(c != '\uFEFF') { ++column_; } + --length; + checkASCII(); + } + + lastDecodedBufferOffset_ = bufferOffset_; + lastDecodedCharOffset_ = 0; + } + + /// Move current position forward by one character. + void forward() @safe pure + { + ++charIndex_; + lastDecodedBufferOffset_ = bufferOffset_; + lastDecodedCharOffset_ = 0; + + // ASCII + if(upcomingASCII_ > 0) + { + --upcomingASCII_; + const c = buffer_[bufferOffset_++]; + + if(c == '\n' || (c == '\r' && buffer_[bufferOffset_] != '\n')) + { + ++line_; + column_ = 0; + return; + } + ++column_; + return; + } + + // UTF-8 + assert(bufferOffset_ < buffer_.length, + "Attempted to decode past the end of YAML buffer"); + assert(buffer_[bufferOffset_] >= 0x80, + "ASCII must be handled by preceding code"); + + const c = decode(buffer_, bufferOffset_); + + // New line. (can compare with '\n' without decoding since it's ASCII) + if(c.isBreak || (c == '\r' && buffer_[bufferOffset_] != '\n')) + { + ++line_; + column_ = 0; + } + else if(c != '\uFEFF') { ++column_; } + + checkASCII(); + } + + /// Used to build slices of read data in Reader; to avoid allocations. + SliceBuilder sliceBuilder; + + /// Get a string describing current buffer position, used for error messages. + Mark mark() const pure nothrow @nogc @safe { return Mark(name_, line_, column_); } + + /// Get file name. + string name() const @safe pure nothrow @nogc { return name_; } + + /// Set file name. + void name(string name) pure @safe nothrow @nogc { name_ = name; } + + /// Get current line number. + uint line() const @safe pure nothrow @nogc { return line_; } + + /// Get current column number. + uint column() const @safe pure nothrow @nogc { return column_; } + + /// Get index of the current character in the buffer. + size_t charIndex() const @safe pure nothrow @nogc { return charIndex_; } + + /// Get encoding of the input buffer. + Encoding encoding() const @safe pure nothrow @nogc { return encoding_; } + +private: + // Update upcomingASCII_ (should be called forward()ing over a UTF-8 sequence) + void checkASCII() @safe pure nothrow @nogc + { + upcomingASCII_ = countASCII(buffer_[bufferOffset_ .. $]); + } + + // Decode the next character relative to + // lastDecodedCharOffset_/lastDecodedBufferOffset_ and update them. + // + // Does not advance the buffer position. Used in peek() and slice(). + dchar decodeNext() @safe pure + { + assert(lastDecodedBufferOffset_ < buffer_.length, + "Attempted to decode past the end of YAML buffer"); + const char b = buffer_[lastDecodedBufferOffset_]; + ++lastDecodedCharOffset_; + // ASCII + if(b < 0x80) + { + ++lastDecodedBufferOffset_; + return b; + } + + return decode(buffer_, lastDecodedBufferOffset_); + } +} + +/// Used to build slices of already read data in Reader buffer, avoiding allocations. +/// +/// Usually these slices point to unchanged Reader data, but sometimes the data is +/// changed due to how YAML interprets certain characters/strings. +/// +/// See begin() documentation. +struct SliceBuilder +{ +private: + // No copying by the user. + @disable this(this); + @disable void opAssign(ref SliceBuilder); + + // Reader this builder works in. + Reader reader_; + + // Start of the slice om reader_.buffer_ (size_t.max while no slice being build) + size_t start_ = size_t.max; + // End of the slice om reader_.buffer_ (size_t.max while no slice being build) + size_t end_ = size_t.max; + + // Stack of slice ends to revert to (see Transaction) + // + // Very few levels as we don't want arbitrarily nested transactions. + size_t[4] endStack_; + // The number of elements currently in endStack_. + size_t endStackUsed_; + + @safe const pure nothrow @nogc invariant() + { + if(!inProgress) { return; } + assert(end_ <= reader_.bufferOffset_, "Slice ends after buffer position"); + assert(start_ <= end_, "Slice start after slice end"); + } + + // Is a slice currently being built? + bool inProgress() @safe const pure nothrow @nogc + in(start_ == size_t.max ? end_ == size_t.max : end_ != size_t.max, "start_/end_ are not consistent") + { + return start_ != size_t.max; + } + +public: + /// Begin building a slice. + /// + /// Only one slice can be built at any given time; before beginning a new slice, + /// finish the previous one (if any). + /// + /// The slice starts at the current position in the Reader buffer. It can only be + /// extended up to the current position in the buffer; Reader methods get() and + /// forward() move the position. E.g. it is valid to extend a slice by write()-ing + /// a string just returned by get() - but not one returned by prefix() unless the + /// position has changed since the prefix() call. + void begin() @safe pure nothrow @nogc + in(!inProgress, "Beginning a slice while another slice is being built") + in(endStackUsed_ == 0, "Slice stack not empty at slice begin") + { + + start_ = reader_.bufferOffset_; + end_ = reader_.bufferOffset_; + } + + /// Finish building a slice and return it. + /// + /// Any Transactions on the slice must be committed or destroyed before the slice + /// is finished. + /// + /// Returns a string; once a slice is finished it is definitive that its contents + /// will not be changed. + char[] finish() @safe pure nothrow @nogc + in(inProgress, "finish called without begin") + in(endStackUsed_ == 0, "Finishing a slice with running transactions.") + { + + auto result = reader_.buffer_[start_ .. end_]; + start_ = end_ = size_t.max; + return result; + } + + /// Write a string to the slice being built. + /// + /// Data can only be written up to the current position in the Reader buffer. + /// + /// If str is a string returned by a Reader method, and str starts right after the + /// end of the slice being built, the slice is extended (trivial operation). + /// + /// See_Also: begin + void write(scope char[] str) @safe pure nothrow @nogc + { + assert(inProgress, "write called without begin"); + assert(end_ <= reader_.bufferOffset_, + "AT START: Slice ends after buffer position"); + + // Nothing? Already done. + if (str.length == 0) { return; } + // If str starts at the end of the slice (is a string returned by a Reader + // method), just extend the slice to contain str. + if(&str[0] == &reader_.buffer_[end_]) + { + end_ += str.length; + } + // Even if str does not start at the end of the slice, it still may be returned + // by a Reader method and point to buffer. So we need to memmove. + else + { + copy(str, reader_.buffer_[end_..end_ + str.length * char.sizeof]); + end_ += str.length; + } + } + + /// Write a character to the slice being built. + /// + /// Data can only be written up to the current position in the Reader buffer. + /// + /// See_Also: begin + void write(dchar c) @safe pure + in(inProgress, "write called without begin") + { + if(c < 0x80) + { + reader_.buffer_[end_++] = cast(char)c; + return; + } + + // We need to encode a non-ASCII dchar into UTF-8 + char[4] encodeBuf; + const bytes = encode(encodeBuf, c); + reader_.buffer_[end_ .. end_ + bytes] = encodeBuf[0 .. bytes]; + end_ += bytes; + } + + /// Insert a character to a specified position in the slice. + /// + /// Enlarges the slice by 1 char. Note that the slice can only extend up to the + /// current position in the Reader buffer. + /// + /// Params: + /// + /// c = The character to insert. + /// position = Position to insert the character at in code units, not code points. + /// Must be less than slice length(); a previously returned length() + /// can be used. + void insert(const dchar c, const size_t position) @safe pure + in(inProgress, "insert called without begin") + in(start_ + position <= end_, "Trying to insert after the end of the slice") + { + + const point = start_ + position; + const movedLength = end_ - point; + + // Encode c into UTF-8 + char[4] encodeBuf; + if(c < 0x80) { encodeBuf[0] = cast(char)c; } + const size_t bytes = c < 0x80 ? 1 : encode(encodeBuf, c); + + if(movedLength > 0) + { + copy(reader_.buffer_[point..point + movedLength * char.sizeof], + reader_.buffer_[point + bytes..point + bytes + movedLength * char.sizeof]); + } + reader_.buffer_[point .. point + bytes] = encodeBuf[0 .. bytes]; + end_ += bytes; + } + + /// Get the current length of the slice. + size_t length() @safe const pure nothrow @nogc + { + return end_ - start_; + } + + /// A slice building transaction. + /// + /// Can be used to save and revert back to slice state. + struct Transaction + { + private: + // The slice builder affected by the transaction. + SliceBuilder* builder_; + // Index of the return point of the transaction in StringBuilder.endStack_. + size_t stackLevel_; + // True after commit() has been called. + bool committed_; + + public: + /// Begins a transaction on a SliceBuilder object. + /// + /// The transaction must end $(B after) any transactions created within the + /// transaction but $(B before) the slice is finish()-ed. A transaction can be + /// ended either by commit()-ing or reverting through the destructor. + /// + /// Saves the current state of a slice. + this(SliceBuilder* builder) @safe pure nothrow @nogc + { + builder_ = builder; + stackLevel_ = builder_.endStackUsed_; + builder_.push(); + } + + /// Commit changes to the slice. + /// + /// Ends the transaction - can only be called once, and removes the possibility + /// to revert slice state. + /// + /// Does nothing for a default-initialized transaction (the transaction has not + /// been started yet). + void commit() @safe pure nothrow @nogc + in(!committed_, "Can't commit a transaction more than once") + { + + if(builder_ is null) { return; } + assert(builder_.endStackUsed_ == stackLevel_ + 1, + "Parent transactions don't fully contain child transactions"); + builder_.apply(); + committed_ = true; + } + + /// Destroy the transaction and revert it if it hasn't been committed yet. + void end() @safe pure nothrow @nogc + in(builder_ && builder_.endStackUsed_ == stackLevel_ + 1, "Parent transactions don't fully contain child transactions") + { + builder_.pop(); + builder_ = null; + } + + } + +private: + // Push the current end of the slice so we can revert to it if needed. + // + // Used by Transaction. + void push() @safe pure nothrow @nogc + in(inProgress, "push called without begin") + in(endStackUsed_ < endStack_.length, "Slice stack overflow") + { + endStack_[endStackUsed_++] = end_; + } + + // Pop the current end of endStack_ and set the end of the slice to the popped + // value, reverting changes since the old end was pushed. + // + // Used by Transaction. + void pop() @safe pure nothrow @nogc + in(inProgress, "pop called without begin") + in(endStackUsed_ > 0, "Trying to pop an empty slice stack") + { + end_ = endStack_[--endStackUsed_]; + } + + // Pop the current end of endStack_, but keep the current end of the slice, applying + // changes made since pushing the old end. + // + // Used by Transaction. + void apply() @safe pure nothrow @nogc + in(inProgress, "apply called without begin") + in(endStackUsed_ > 0, "Trying to apply an empty slice stack") + { + --endStackUsed_; + } +} + + +private: + +// Convert a UTF-8/16/32 buffer to UTF-8, in-place if possible. +// +// Params: +// +// input = Buffer with UTF-8/16/32 data to decode. May be overwritten by the +// conversion, in which case the result will be a slice of this buffer. +// encoding = Encoding of input. +// +// Returns: +// +// A struct with the following members: +// +// $(D string errorMessage) In case of an error, the error message is stored here. If +// there was no error, errorMessage is NULL. Always check +// this first. +// $(D char[] utf8) input converted to UTF-8. May be a slice of input. +// $(D size_t characterCount) Number of characters (code points) in input. +auto toUTF8(ubyte[] input, const UTFEncoding encoding) @safe pure nothrow +{ + // Documented in function ddoc. + struct Result + { + string errorMessage; + char[] utf8; + size_t characterCount; + } + + Result result; + + // Encode input_ into UTF-8 if it's encoded as UTF-16 or UTF-32. + // + // Params: + // + // buffer = The input buffer to encode. + // result = A Result struct to put encoded result and any error messages to. + // + // On error, result.errorMessage will be set. + static void encode(C)(C[] input, ref Result result) @safe pure + { + // We can do UTF-32->UTF-8 in place because all UTF-8 sequences are 4 or + // less bytes. + static if(is(C == dchar)) + { + char[4] encodeBuf; + auto utf8 = cast(char[])input; + auto length = 0; + foreach(dchar c; input) + { + ++result.characterCount; + // ASCII + if(c < 0x80) + { + utf8[length++] = cast(char)c; + continue; + } + + std.utf.encode(encodeBuf, c); + const bytes = codeLength!char(c); + utf8[length .. length + bytes] = encodeBuf[0 .. bytes]; + length += bytes; + } + result.utf8 = utf8[0 .. length]; + } + // Unfortunately we can't do UTF-16 in place so we just use std.conv.to + else + { + result.characterCount = std.utf.count(input); + result.utf8 = input.to!(char[]); + } + } + + try final switch(encoding) + { + case UTFEncoding.UTF_8: + result.utf8 = cast(char[])input; + result.utf8.validate(); + result.characterCount = std.utf.count(result.utf8); + break; + case UTFEncoding.UTF_16: + assert(input.length % 2 == 0, "UTF-16 buffer size must be even"); + encode(cast(wchar[])input, result); + break; + case UTFEncoding.UTF_32: + assert(input.length % 4 == 0, "UTF-32 buffer size must be a multiple of 4"); + encode(cast(dchar[])input, result); + break; + } + catch(ConvException e) { result.errorMessage = e.msg; } + catch(UTFException e) { result.errorMessage = e.msg; } + catch(Exception e) + { + assert(false, "Unexpected exception in encode(): " ~ e.msg); + } + + return result; +} + +/// Determine if all characters (code points, not bytes) in a string are printable. +bool isPrintableValidUTF8(const char[] chars) @safe pure +{ + import std.uni : isControl, isWhite; + foreach (dchar chr; chars) + { + if (!chr.isValidDchar || (chr.isControl && !chr.isWhite)) + { + return false; + } + } + return true; +} + +/// Counts the number of ASCII characters in buffer until the first UTF-8 sequence. +/// +/// Used to determine how many characters we can process without decoding. +size_t countASCII(const(char)[] buffer) @safe pure nothrow @nogc +{ + return buffer.byCodeUnit.until!(x => x > 0x7F).walkLength; +} +// Unittests. + +void testEndian(R)() +{ + void endian_test(ubyte[] data, Encoding encoding_expected, Endian endian_expected) + { + auto reader = new R(data); + assert(reader.encoding == encoding_expected); + assert(reader.endian_ == endian_expected); + } + ubyte[] little_endian_utf_16 = [0xFF, 0xFE, 0x7A, 0x00]; + ubyte[] big_endian_utf_16 = [0xFE, 0xFF, 0x00, 0x7A]; + endian_test(little_endian_utf_16, Encoding.UTF_16, Endian.littleEndian); + endian_test(big_endian_utf_16, Encoding.UTF_16, Endian.bigEndian); +} + +void testPeekPrefixForward(R)() +{ + import std.encoding; + ubyte[] data = bomTable[BOM.utf8].sequence ~ cast(ubyte[])"data"; + auto reader = new R(data); + assert(reader.peek() == 'd'); + assert(reader.peek(1) == 'a'); + assert(reader.peek(2) == 't'); + assert(reader.peek(3) == 'a'); + assert(reader.peek(4) == '\0'); + assert(reader.prefix(4) == "data"); + // assert(reader.prefix(6) == "data\0"); + reader.forward(2); + assert(reader.peek(1) == 'a'); + // assert(collectException(reader.peek(3))); +} + +void testUTF(R)() +{ + import std.encoding; + dchar[] data = cast(dchar[])"data"; + void utf_test(T)(T[] data, BOM bom) + { + ubyte[] bytes = bomTable[bom].sequence ~ + (cast(ubyte[])data)[0 .. data.length * T.sizeof]; + auto reader = new R(bytes); + assert(reader.peek() == 'd'); + assert(reader.peek(1) == 'a'); + assert(reader.peek(2) == 't'); + assert(reader.peek(3) == 'a'); + } + utf_test!char(to!(char[])(data), BOM.utf8); + utf_test!wchar(to!(wchar[])(data), endian == Endian.bigEndian ? BOM.utf16be : BOM.utf16le); + utf_test(data, endian == Endian.bigEndian ? BOM.utf32be : BOM.utf32le); +} + +void test1Byte(R)() +{ + ubyte[] data = [97]; + + auto reader = new R(data); + assert(reader.peek() == 'a'); + assert(reader.peek(1) == '\0'); + // assert(collectException(reader.peek(2))); +} + +@system unittest +{ + testEndian!Reader(); + testPeekPrefixForward!Reader(); + testUTF!Reader(); + test1Byte!Reader(); +} +//Issue 257 - https://github.com/dlang-community/D-YAML/issues/257 +@safe unittest +{ + import dyaml.loader : Loader; + auto yaml = "hello "; + auto root = Loader.fromString(yaml).load(); + + assert(root.isValid); +} diff --git a/source/dyaml/representer.d b/source/dyaml/representer.d new file mode 100644 index 0000000..f903b60 --- /dev/null +++ b/source/dyaml/representer.d @@ -0,0 +1,517 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * YAML node _representer. Prepares YAML nodes for output. A tutorial can be + * found $(LINK2 ../tutorials/custom_types.html, here). + * + * Code based on $(LINK2 http://www.pyyaml.org, PyYAML). + */ +module dyaml.representer; + + +import std.algorithm; +import std.array; +import std.base64; +import std.container; +import std.conv; +import std.datetime; +import std.exception; +import std.format; +import std.math; +import std.typecons; +import std.string; + +import dyaml.exception; +import dyaml.node; +import dyaml.serializer; +import dyaml.style; + +package: +///Exception thrown on Representer errors. +class RepresenterException : YAMLException +{ + mixin ExceptionCtors; +} + +/** + * Represents YAML nodes as scalar, sequence and mapping nodes ready for output. + */ +Node representData(const Node data, ScalarStyle defaultScalarStyle, CollectionStyle defaultCollectionStyle) @safe +{ + Node result; + final switch(data.type) + { + case NodeType.null_: + result = representNull(); + break; + case NodeType.merge: + break; + case NodeType.boolean: + result = representBool(data); + break; + case NodeType.integer: + result = representLong(data); + break; + case NodeType.decimal: + result = representReal(data); + break; + case NodeType.binary: + result = representBytes(data); + break; + case NodeType.timestamp: + result = representSysTime(data); + break; + case NodeType.string: + result = representString(data); + break; + case NodeType.mapping: + result = representPairs(data, defaultScalarStyle, defaultCollectionStyle); + break; + case NodeType.sequence: + result = representNodes(data, defaultScalarStyle, defaultCollectionStyle); + break; + case NodeType.invalid: + assert(0); + } + + final switch (result.nodeID) + { + case NodeID.scalar: + if (result.scalarStyle == ScalarStyle.invalid) + { + result.scalarStyle = defaultScalarStyle; + } + break; + case NodeID.sequence, NodeID.mapping: + if (defaultCollectionStyle != CollectionStyle.invalid) + { + result.collectionStyle = defaultCollectionStyle; + } + break; + case NodeID.invalid: + break; + } + + + //Override tag if specified. + if(data.tag_ !is null){result.tag_ = data.tag_;} + + //Remember style if this was loaded before. + if(data.scalarStyle != ScalarStyle.invalid) + { + result.scalarStyle = data.scalarStyle; + } + if(data.collectionStyle != CollectionStyle.invalid) + { + result.collectionStyle = data.collectionStyle; + } + return result; +} + +@safe unittest +{ + // We don't emit yaml merge nodes. + assert(representData(Node(YAMLMerge()), ScalarStyle.invalid, CollectionStyle.invalid) == Node.init); +} + +@safe unittest +{ + assert(representData(Node(YAMLNull()), ScalarStyle.invalid, CollectionStyle.invalid) == Node("null", "tag:yaml.org,2002:null")); +} + +@safe unittest +{ + assert(representData(Node(cast(string)null), ScalarStyle.invalid, CollectionStyle.invalid) == Node("", "tag:yaml.org,2002:str")); + assert(representData(Node("Hello world!"), ScalarStyle.invalid, CollectionStyle.invalid) == Node("Hello world!", "tag:yaml.org,2002:str")); +} + +@safe unittest +{ + assert(representData(Node(64), ScalarStyle.invalid, CollectionStyle.invalid) == Node("64", "tag:yaml.org,2002:int")); +} + +@safe unittest +{ + assert(representData(Node(true), ScalarStyle.invalid, CollectionStyle.invalid) == Node("true", "tag:yaml.org,2002:bool")); + assert(representData(Node(false), ScalarStyle.invalid, CollectionStyle.invalid) == Node("false", "tag:yaml.org,2002:bool")); +} + +@safe unittest +{ + // Float comparison is pretty unreliable... + auto result = representData(Node(1.0), ScalarStyle.invalid, CollectionStyle.invalid); + assert(isClose(result.as!string.to!real, 1.0)); + assert(result.tag == "tag:yaml.org,2002:float"); + + assert(representData(Node(real.nan), ScalarStyle.invalid, CollectionStyle.invalid) == Node(".nan", "tag:yaml.org,2002:float")); + assert(representData(Node(real.infinity), ScalarStyle.invalid, CollectionStyle.invalid) == Node(".inf", "tag:yaml.org,2002:float")); + assert(representData(Node(-real.infinity), ScalarStyle.invalid, CollectionStyle.invalid) == Node("-.inf", "tag:yaml.org,2002:float")); +} + +@safe unittest +{ + assert(representData(Node(SysTime(DateTime(2000, 3, 14, 12, 34, 56), UTC())), ScalarStyle.invalid, CollectionStyle.invalid) == Node("2000-03-14T12:34:56Z", "tag:yaml.org,2002:timestamp")); +} + +@safe unittest +{ + assert(representData(Node(Node[].init, "tag:yaml.org,2002:set"), ScalarStyle.invalid, CollectionStyle.invalid) == Node(Node.Pair[].init, "tag:yaml.org,2002:set")); + assert(representData(Node(Node[].init, "tag:yaml.org,2002:seq"), ScalarStyle.invalid, CollectionStyle.invalid) == Node(Node[].init, "tag:yaml.org,2002:seq")); + { + auto nodes = [ + Node("a"), + Node("b"), + Node("c"), + ]; + assert(representData(Node(nodes, "tag:yaml.org,2002:set"), ScalarStyle.invalid, CollectionStyle.invalid) == + Node([ + Node.Pair( + Node("a", "tag:yaml.org,2002:str"), + Node("null", "tag:yaml.org,2002:null") + ), + Node.Pair( + Node("b", "tag:yaml.org,2002:str"), + Node("null", "tag:yaml.org,2002:null") + ), + Node.Pair( + Node("c", "tag:yaml.org,2002:str"), + Node("null", "tag:yaml.org,2002:null") + ) + ], "tag:yaml.org,2002:set")); + } + { + auto nodes = [ + Node("a"), + Node("b"), + Node("c"), + ]; + assert(representData(Node(nodes, "tag:yaml.org,2002:seq"), ScalarStyle.invalid, CollectionStyle.invalid) == + Node([ + Node("a", "tag:yaml.org,2002:str"), + Node("b", "tag:yaml.org,2002:str"), + Node("c", "tag:yaml.org,2002:str") + ], "tag:yaml.org,2002:seq")); + } +} + +@safe unittest +{ + assert(representData(Node(Node.Pair[].init, "tag:yaml.org,2002:omap"), ScalarStyle.invalid, CollectionStyle.invalid) == Node(Node[].init, "tag:yaml.org,2002:omap")); + assert(representData(Node(Node.Pair[].init, "tag:yaml.org,2002:pairs"), ScalarStyle.invalid, CollectionStyle.invalid) == Node(Node[].init, "tag:yaml.org,2002:pairs")); + assert(representData(Node(Node.Pair[].init, "tag:yaml.org,2002:map"), ScalarStyle.invalid, CollectionStyle.invalid) == Node(Node.Pair[].init, "tag:yaml.org,2002:map")); + { + auto nodes = [ + Node.Pair("a", "b"), + Node.Pair("a", "c") + ]; + assertThrown(representData(Node(nodes, "tag:yaml.org,2002:omap"), ScalarStyle.invalid, CollectionStyle.invalid)); + } + // Yeah, this gets ugly really fast. + { + auto nodes = [ + Node.Pair("a", "b"), + Node.Pair("a", "c") + ]; + assert(representData(Node(nodes, "tag:yaml.org,2002:pairs"), ScalarStyle.invalid, CollectionStyle.invalid) == + Node([ + Node( + [Node.Pair( + Node("a", "tag:yaml.org,2002:str"), + Node("b", "tag:yaml.org,2002:str") + )], + "tag:yaml.org,2002:map"), + Node( + [Node.Pair( + Node("a", "tag:yaml.org,2002:str"), + Node("c", "tag:yaml.org,2002:str") + )], + "tag:yaml.org,2002:map"), + ], "tag:yaml.org,2002:pairs")); + } + { + auto nodes = [ + Node.Pair("a", "b"), + Node.Pair("a", "c") + ]; + assertThrown(representData(Node(nodes, "tag:yaml.org,2002:map"), ScalarStyle.invalid, CollectionStyle.invalid)); + } + { + auto nodes = [ + Node.Pair("a", "b"), + Node.Pair("c", "d") + ]; + assert(representData(Node(nodes, "tag:yaml.org,2002:omap"), ScalarStyle.invalid, CollectionStyle.invalid) == + Node([ + Node([ + Node.Pair( + Node("a", "tag:yaml.org,2002:str"), + Node("b", "tag:yaml.org,2002:str") + ) + ], "tag:yaml.org,2002:map"), + Node([ + Node.Pair( + Node("c", "tag:yaml.org,2002:str"), + Node("d", "tag:yaml.org,2002:str") + ) + ], "tag:yaml.org,2002:map" + )], "tag:yaml.org,2002:omap")); + } + { + auto nodes = [ + Node.Pair("a", "b"), + Node.Pair("c", "d") + ]; + assert(representData(Node(nodes, "tag:yaml.org,2002:map"), ScalarStyle.invalid, CollectionStyle.invalid) == + Node([ + Node.Pair( + Node("a", "tag:yaml.org,2002:str"), + Node("b", "tag:yaml.org,2002:str") + ), + Node.Pair( + Node("c", "tag:yaml.org,2002:str"), + Node("d", "tag:yaml.org,2002:str") + ), + ], "tag:yaml.org,2002:map")); + } +} + +private: + +//Represent a _null _node as a _null YAML value. +Node representNull() @safe +{ + return Node("null", "tag:yaml.org,2002:null"); +} + +//Represent a string _node as a string scalar. +Node representString(const Node node) @safe +{ + string value = node.as!string; + return Node(value, "tag:yaml.org,2002:str"); +} + +//Represent a bytes _node as a binary scalar. +Node representBytes(const Node node) @safe +{ + const ubyte[] value = node.as!(ubyte[]); + if(value is null){return Node("null", "tag:yaml.org,2002:null");} + + auto newNode = Node(Base64.encode(value).idup, "tag:yaml.org,2002:binary"); + newNode.scalarStyle = ScalarStyle.literal; + return newNode; +} + +//Represent a bool _node as a bool scalar. +Node representBool(const Node node) @safe +{ + return Node(node.as!bool ? "true" : "false", "tag:yaml.org,2002:bool"); +} + +//Represent a long _node as an integer scalar. +Node representLong(const Node node) @safe +{ + return Node(node.as!long.to!string, "tag:yaml.org,2002:int"); +} + +//Represent a real _node as a floating point scalar. +Node representReal(const Node node) @safe +{ + real f = node.as!real; + string value = isNaN(f) ? ".nan": + f == real.infinity ? ".inf": + f == -1.0 * real.infinity ? "-.inf": + {auto a = appender!string(); + formattedWrite(a, "%12f", f); + return a.data.strip();}(); + + return Node(value, "tag:yaml.org,2002:float"); +} + +//Represent a SysTime _node as a timestamp. +Node representSysTime(const Node node) @safe +{ + return Node(node.as!SysTime.toISOExtString(), "tag:yaml.org,2002:timestamp"); +} + +//Represent a sequence _node as sequence/set. +Node representNodes(const Node node, ScalarStyle defaultScalarStyle, CollectionStyle defaultCollectionStyle) @safe +{ + auto nodes = node.as!(Node[]); + if(node.tag_ == "tag:yaml.org,2002:set") + { + //YAML sets are mapping with null values. + Node.Pair[] pairs; + pairs.length = nodes.length; + + foreach(idx, key; nodes) + { + pairs[idx] = Node.Pair(key, Node("null", "tag:yaml.org,2002:null")); + } + Node.Pair[] value; + value.length = pairs.length; + + auto bestStyle = CollectionStyle.flow; + foreach(idx, pair; pairs) + { + value[idx] = Node.Pair(representData(pair.key, defaultScalarStyle, defaultCollectionStyle), representData(pair.value, defaultScalarStyle, defaultCollectionStyle)); + if(value[idx].shouldUseBlockStyle) + { + bestStyle = CollectionStyle.block; + } + } + + auto newNode = Node(value, node.tag_); + newNode.collectionStyle = bestStyle; + return newNode; + } + else + { + Node[] value; + value.length = nodes.length; + + auto bestStyle = CollectionStyle.flow; + foreach(idx, item; nodes) + { + value[idx] = representData(item, defaultScalarStyle, defaultCollectionStyle); + const isScalar = value[idx].nodeID == NodeID.scalar; + const s = value[idx].scalarStyle; + if(!isScalar || (s != ScalarStyle.invalid && s != ScalarStyle.plain)) + { + bestStyle = CollectionStyle.block; + } + } + + auto newNode = Node(value, "tag:yaml.org,2002:seq"); + newNode.collectionStyle = bestStyle; + return newNode; + } +} + +bool shouldUseBlockStyle(const Node value) @safe +{ + const isScalar = value.nodeID == NodeID.scalar; + const s = value.scalarStyle; + return (!isScalar || (s != ScalarStyle.invalid && s != ScalarStyle.plain)); +} +bool shouldUseBlockStyle(const Node.Pair value) @safe +{ + const keyScalar = value.key.nodeID == NodeID.scalar; + const valScalar = value.value.nodeID == NodeID.scalar; + const keyStyle = value.key.scalarStyle; + const valStyle = value.value.scalarStyle; + if(!keyScalar || + (keyStyle != ScalarStyle.invalid && keyStyle != ScalarStyle.plain)) + { + return true; + } + if(!valScalar || + (valStyle != ScalarStyle.invalid && valStyle != ScalarStyle.plain)) + { + return true; + } + return false; +} + +//Represent a mapping _node as map/ordered map/pairs. +Node representPairs(const Node node, ScalarStyle defaultScalarStyle, CollectionStyle defaultCollectionStyle) @safe +{ + auto pairs = node.as!(Node.Pair[]); + + bool hasDuplicates(const Node.Pair[] pairs) @safe + { + //TODO this should be replaced by something with deterministic memory allocation. + auto keys = redBlackTree!Node(); + foreach(pair; pairs) + { + if(pair.key in keys){return true;} + keys.insert(pair.key); + } + return false; + } + + Node[] mapToSequence(const Node.Pair[] pairs) @safe + { + Node[] nodes; + nodes.length = pairs.length; + foreach(idx, pair; pairs) + { + Node.Pair value; + + auto bestStyle = value.shouldUseBlockStyle ? CollectionStyle.block : CollectionStyle.flow; + value = Node.Pair(representData(pair.key, defaultScalarStyle, defaultCollectionStyle), representData(pair.value, defaultScalarStyle, defaultCollectionStyle)); + + auto newNode = Node([value], "tag:yaml.org,2002:map"); + newNode.collectionStyle = bestStyle; + nodes[idx] = newNode; + } + return nodes; + } + + if(node.tag_ == "tag:yaml.org,2002:omap") + { + enforce(!hasDuplicates(pairs), + new RepresenterException("Duplicate entry in an ordered map")); + auto sequence = mapToSequence(pairs); + Node[] value; + value.length = sequence.length; + + auto bestStyle = CollectionStyle.flow; + foreach(idx, item; sequence) + { + value[idx] = representData(item, defaultScalarStyle, defaultCollectionStyle); + if(value[idx].shouldUseBlockStyle) + { + bestStyle = CollectionStyle.block; + } + } + + auto newNode = Node(value, node.tag_); + newNode.collectionStyle = bestStyle; + return newNode; + } + else if(node.tag_ == "tag:yaml.org,2002:pairs") + { + auto sequence = mapToSequence(pairs); + Node[] value; + value.length = sequence.length; + + auto bestStyle = CollectionStyle.flow; + foreach(idx, item; sequence) + { + value[idx] = representData(item, defaultScalarStyle, defaultCollectionStyle); + if(value[idx].shouldUseBlockStyle) + { + bestStyle = CollectionStyle.block; + } + } + + auto newNode = Node(value, node.tag_); + newNode.collectionStyle = bestStyle; + return newNode; + } + else + { + enforce(!hasDuplicates(pairs), + new RepresenterException("Duplicate entry in an unordered map")); + Node.Pair[] value; + value.length = pairs.length; + + auto bestStyle = CollectionStyle.flow; + foreach(idx, pair; pairs) + { + value[idx] = Node.Pair(representData(pair.key, defaultScalarStyle, defaultCollectionStyle), representData(pair.value, defaultScalarStyle, defaultCollectionStyle)); + if(value[idx].shouldUseBlockStyle) + { + bestStyle = CollectionStyle.block; + } + } + + auto newNode = Node(value, "tag:yaml.org,2002:map"); + newNode.collectionStyle = bestStyle; + return newNode; + } +} diff --git a/source/dyaml/resolver.d b/source/dyaml/resolver.d new file mode 100644 index 0000000..16d8419 --- /dev/null +++ b/source/dyaml/resolver.d @@ -0,0 +1,260 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * Implements a class that resolves YAML tags. This can be used to implicitly + * resolve tags for custom data types, removing the need to explicitly + * specify tags in YAML. A tutorial can be found + * $(LINK2 ../tutorials/custom_types.html, here). + * + * Code based on $(LINK2 http://www.pyyaml.org, PyYAML). + */ +module dyaml.resolver; + + +import std.conv; +import std.regex; +import std.typecons; +import std.utf; + +import dyaml.node; +import dyaml.exception; + + +/// Type of `regexes` +private alias RegexType = Tuple!(string, "tag", const Regex!char, "regexp", string, "chars"); + +private immutable RegexType[] regexes = [ + RegexType("tag:yaml.org,2002:bool", + regex(r"^(?:yes|Yes|YES|no|No|NO|true|True|TRUE" ~ + "|false|False|FALSE|on|On|ON|off|Off|OFF)$"), + "yYnNtTfFoO"), + RegexType("tag:yaml.org,2002:float", + regex(r"^(?:[-+]?([0-9][0-9_]*)\\.[0-9_]*" ~ + "(?:[eE][-+][0-9]+)?|[-+]?(?:[0-9][0-9_]" ~ + "*)?\\.[0-9_]+(?:[eE][-+][0-9]+)?|[-+]?" ~ + "[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]" ~ + "*|[-+]?\\.(?:inf|Inf|INF)|\\." ~ + "(?:nan|NaN|NAN))$"), + "-+0123456789."), + RegexType("tag:yaml.org,2002:int", + regex(r"^(?:[-+]?0b[0-1_]+" ~ + "|[-+]?0[0-7_]+" ~ + "|[-+]?(?:0|[1-9][0-9_]*)" ~ + "|[-+]?0x[0-9a-fA-F_]+" ~ + "|[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$"), + "-+0123456789"), + RegexType("tag:yaml.org,2002:merge", regex(r"^<<$"), "<"), + RegexType("tag:yaml.org,2002:null", + regex(r"^$|^(?:~|null|Null|NULL)$"), "~nN\0"), + RegexType("tag:yaml.org,2002:timestamp", + regex(r"^[0-9][0-9][0-9][0-9]-[0-9][0-9]-" ~ + "[0-9][0-9]|[0-9][0-9][0-9][0-9]-[0-9]" ~ + "[0-9]?-[0-9][0-9]?[Tt]|[ \t]+[0-9]" ~ + "[0-9]?:[0-9][0-9]:[0-9][0-9]" ~ + "(?:\\.[0-9]*)?(?:[ \t]*Z|[-+][0-9]" ~ + "[0-9]?(?::[0-9][0-9])?)?$"), + "0123456789"), + RegexType("tag:yaml.org,2002:value", regex(r"^=$"), "="), + + //The following resolver is only for documentation purposes. It cannot work + //because plain scalars cannot start with '!', '&', or '*'. + RegexType("tag:yaml.org,2002:yaml", regex(r"^(?:!|&|\*)$"), "!&*"), +]; + +/** + * Resolves YAML tags (data types). + * + * Can be used to implicitly resolve custom data types of scalar values. + */ +struct Resolver +{ + private: + // Default tag to use for scalars. + string defaultScalarTag_ = "tag:yaml.org,2002:str"; + // Default tag to use for sequences. + string defaultSequenceTag_ = "tag:yaml.org,2002:seq"; + // Default tag to use for mappings. + string defaultMappingTag_ = "tag:yaml.org,2002:map"; + + /* + * Arrays of scalar resolver tuples indexed by starting character of a scalar. + * + * Each tuple stores regular expression the scalar must match, + * and tag to assign to it if it matches. + */ + Tuple!(string, const Regex!char)[][dchar] yamlImplicitResolvers_; + + package: + static auto withDefaultResolvers() @safe + { + Resolver resolver; + foreach(pair; regexes) + { + resolver.addImplicitResolver(pair.tag, pair.regexp, pair.chars); + } + return resolver; + } + + public: + @disable bool opEquals(ref Resolver); + @disable int opCmp(ref Resolver); + + /** + * Add an implicit scalar resolver. + * + * If a scalar matches regexp and starts with any character in first, + * its _tag is set to tag. If it matches more than one resolver _regexp + * resolvers added _first override ones added later. Default resolvers + * override any user specified resolvers, but they can be disabled in + * Resolver constructor. + * + * If a scalar is not resolved to anything, it is assigned the default + * YAML _tag for strings. + * + * Params: tag = Tag to resolve to. + * regexp = Regular expression the scalar must match to have this _tag. + * first = String of possible starting characters of the scalar. + * + */ + void addImplicitResolver(string tag, const Regex!char regexp, string first) + pure @safe + { + foreach(const dchar c; first) + { + if((c in yamlImplicitResolvers_) is null) + { + yamlImplicitResolvers_[c] = []; + } + yamlImplicitResolvers_[c] ~= tuple(tag, regexp); + } + } + /// Resolve scalars starting with 'A' to !_tag + @safe unittest + { + import std.file : write; + import std.regex : regex; + import dyaml.loader : Loader; + import dyaml.resolver : Resolver; + + write("example.yaml", "A"); + + auto loader = Loader.fromFile("example.yaml"); + loader.resolver.addImplicitResolver("!tag", regex("A.*"), "A"); + + auto node = loader.load(); + assert(node.tag == "!tag"); + } + + package: + /** + * Resolve tag of a node. + * + * Params: kind = Type of the node. + * tag = Explicit tag of the node, if any. + * value = Value of the node, if any. + * implicit = Should the node be implicitly resolved? + * + * If the tag is already specified and not non-specific, that tag will + * be returned. + * + * Returns: Resolved tag. + */ + string resolve(const NodeID kind, const string tag, scope string value, + const bool implicit) @safe + { + import std.array : empty, front; + if((tag !is null) && (tag != "!")) + { + return tag; + } + + final switch (kind) + { + case NodeID.scalar: + if(!implicit) + { + return defaultScalarTag_; + } + + //Get the first char of the value. + const dchar first = value.empty ? '\0' : value.front; + + auto resolvers = (first in yamlImplicitResolvers_) is null ? + [] : yamlImplicitResolvers_[first]; + + //If regexp matches, return tag. + foreach(resolver; resolvers) + { + // source/dyaml/resolver.d(192,35): Error: scope variable `__tmpfordtorXXX` + // assigned to non-scope parameter `this` calling + // `std.regex.RegexMatch!string.RegexMatch.~this` + bool isEmpty = () @trusted { + return match(value, resolver[1]).empty; + }(); + if(!isEmpty) + { + return resolver[0]; + } + } + return defaultScalarTag_; + case NodeID.sequence: + return defaultSequenceTag_; + case NodeID.mapping: + return defaultMappingTag_; + case NodeID.invalid: + assert(false, "Cannot resolve an invalid node"); + } + } + @safe unittest + { + auto resolver = Resolver.withDefaultResolvers; + + bool tagMatch(string tag, string[] values) @safe + { + const string expected = tag; + foreach(value; values) + { + const string resolved = resolver.resolve(NodeID.scalar, null, value, true); + if(expected != resolved) + { + return false; + } + } + return true; + } + + assert(tagMatch("tag:yaml.org,2002:bool", + ["yes", "NO", "True", "on"])); + assert(tagMatch("tag:yaml.org,2002:float", + ["6.8523015e+5", "685.230_15e+03", "685_230.15", + "190:20:30.15", "-.inf", ".NaN"])); + assert(tagMatch("tag:yaml.org,2002:int", + ["685230", "+685_230", "02472256", "0x_0A_74_AE", + "0b1010_0111_0100_1010_1110", "190:20:30"])); + assert(tagMatch("tag:yaml.org,2002:merge", ["<<"])); + assert(tagMatch("tag:yaml.org,2002:null", ["~", "null", ""])); + assert(tagMatch("tag:yaml.org,2002:str", + ["abcd", "9a8b", "9.1adsf"])); + assert(tagMatch("tag:yaml.org,2002:timestamp", + ["2001-12-15T02:59:43.1Z", + "2001-12-14t21:59:43.10-05:00", + "2001-12-14 21:59:43.10 -5", + "2001-12-15 2:59:43.10", + "2002-12-14"])); + assert(tagMatch("tag:yaml.org,2002:value", ["="])); + assert(tagMatch("tag:yaml.org,2002:yaml", ["!", "&", "*"])); + } + + ///Returns: Default scalar tag. + @property string defaultScalarTag() const pure @safe nothrow {return defaultScalarTag_;} + + ///Returns: Default sequence tag. + @property string defaultSequenceTag() const pure @safe nothrow {return defaultSequenceTag_;} + + ///Returns: Default mapping tag. + @property string defaultMappingTag() const pure @safe nothrow {return defaultMappingTag_;} +} diff --git a/source/dyaml/scanner.d b/source/dyaml/scanner.d new file mode 100644 index 0000000..3f0f394 --- /dev/null +++ b/source/dyaml/scanner.d @@ -0,0 +1,1794 @@ + +// Copyright Ferdinand Majerech 2011-2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/// YAML scanner. +/// Code based on PyYAML: http://www.pyyaml.org +module dyaml.scanner; + + +import core.stdc.string; + +import std.algorithm; +import std.array; +import std.conv; +import std.ascii : isAlphaNum, isDigit, isHexDigit; +import std.exception; +import std.string; +import std.typecons; +import std.traits : Unqual; +import std.utf; + +import dyaml.escapes; +import dyaml.exception; +import dyaml.queue; +import dyaml.reader; +import dyaml.style; +import dyaml.token; + +package: +/// Scanner produces tokens of the following types: +/// STREAM-START +/// STREAM-END +/// DIRECTIVE(name, value) +/// DOCUMENT-START +/// DOCUMENT-END +/// BLOCK-SEQUENCE-START +/// BLOCK-MAPPING-START +/// BLOCK-END +/// FLOW-SEQUENCE-START +/// FLOW-MAPPING-START +/// FLOW-SEQUENCE-END +/// FLOW-MAPPING-END +/// BLOCK-ENTRY +/// FLOW-ENTRY +/// KEY +/// VALUE +/// ALIAS(value) +/// ANCHOR(value) +/// TAG(value) +/// SCALAR(value, plain, style) + +alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isNonLinebreakWhitespace = among!(' ', '\t'); + +alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}', + '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n', + '\r', '\u0085', '\u2028', '\u2029'); + +alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', + '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%'); + +alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029'); + +alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\'); + +/// Marked exception thrown at scanner errors. +/// +/// See_Also: MarkedYAMLException +class ScannerException : MarkedYAMLException +{ + mixin MarkedExceptionCtors; +} + +/// Generates tokens from data provided by a Reader. +struct Scanner +{ + private: + /// A simple key is a key that is not denoted by the '?' indicator. + /// For example: + /// --- + /// block simple key: value + /// ? not a simple key: + /// : { flow simple key: value } + /// We emit the KEY token before all keys, so when we find a potential simple + /// key, we try to locate the corresponding ':' indicator. Simple keys should be + /// limited to a single line and 1024 characters. + /// + /// 16 bytes on 64-bit. + static struct SimpleKey + { + /// Character index in reader where the key starts. + uint charIndex = uint.max; + /// Index of the key token from start (first token scanned being 0). + uint tokenIndex; + /// Line the key starts at. + uint line; + /// Column the key starts at. + ushort column; + /// Is this required to be a simple key? + bool required; + /// Is this struct "null" (invalid)?. + bool isNull; + } + + /// Block chomping types. + enum Chomping + { + /// Strip all trailing line breaks. '-' indicator. + strip, + /// Line break of the last line is preserved, others discarded. Default. + clip, + /// All trailing line breaks are preserved. '+' indicator. + keep + } + + /// Reader used to read from a file/stream. + Reader reader_; + /// Are we done scanning? + bool done_; + + /// Level of nesting in flow context. If 0, we're in block context. + uint flowLevel_; + /// Current indentation level. + int indent_ = -1; + /// Past indentation levels. Used as a stack. + Appender!(int[]) indents_; + + /// Processed tokens not yet emitted. Used as a queue. + Queue!Token tokens_; + + /// Number of tokens emitted through the getToken method. + uint tokensTaken_; + + /// Can a simple key start at the current position? A simple key may start: + /// - at the beginning of the line, not counting indentation spaces + /// (in block context), + /// - after '{', '[', ',' (in the flow context), + /// - after '?', ':', '-' (in the block context). + /// In the block context, this flag also signifies if a block collection + /// may start at the current position. + bool allowSimpleKey_ = true; + + /// Possible simple keys indexed by flow levels. + SimpleKey[] possibleSimpleKeys_; + + public: + /// Construct a Scanner using specified Reader. + this(Reader reader) @safe nothrow + { + // Return the next token, but do not delete it from the queue + reader_ = reader; + fetchStreamStart(); + } + + /// Advance to the next token + void popFront() @safe + { + ++tokensTaken_; + tokens_.pop(); + } + + /// Return the current token + const(Token) front() @safe + { + enforce(!empty, "No token left to peek"); + return tokens_.peek(); + } + + /// Return whether there are any more tokens left. + bool empty() @safe + { + while (needMoreTokens()) + { + fetchToken(); + } + return tokens_.empty; + } + + /// Set file name. + void name(string name) @safe pure nothrow @nogc + { + reader_.name = name; + } + + private: + /// Most scanning error messages have the same format; so build them with this + /// function. + string expected(T)(string expected, T found) + { + return text("expected ", expected, ", but found ", found); + } + + /// Determine whether or not we need to fetch more tokens before peeking/getting a token. + bool needMoreTokens() @safe pure + { + if(done_) { return false; } + if(tokens_.empty) { return true; } + + /// The current token may be a potential simple key, so we need to look further. + stalePossibleSimpleKeys(); + return nextPossibleSimpleKey() == tokensTaken_; + } + + /// Fetch at token, adding it to tokens_. + void fetchToken() @safe + { + // Eat whitespaces and comments until we reach the next token. + scanToNextToken(); + + // Remove obsolete possible simple keys. + stalePossibleSimpleKeys(); + + // Compare current indentation and column. It may add some tokens + // and decrease the current indentation level. + unwindIndent(reader_.column); + + // Get the next character. + const dchar c = reader_.peekByte(); + + // Fetch the token. + if(c == '\0') { return fetchStreamEnd(); } + if(checkDirective()) { return fetchDirective(); } + if(checkDocumentStart()) { return fetchDocumentStart(); } + if(checkDocumentEnd()) { return fetchDocumentEnd(); } + // Order of the following checks is NOT significant. + switch(c) + { + case '[': return fetchFlowSequenceStart(); + case '{': return fetchFlowMappingStart(); + case ']': return fetchFlowSequenceEnd(); + case '}': return fetchFlowMappingEnd(); + case ',': return fetchFlowEntry(); + case '!': return fetchTag(); + case '\'': return fetchSingle(); + case '\"': return fetchDouble(); + case '*': return fetchAlias(); + case '&': return fetchAnchor(); + case '?': if(checkKey()) { return fetchKey(); } goto default; + case ':': if(checkValue()) { return fetchValue(); } goto default; + case '-': if(checkBlockEntry()) { return fetchBlockEntry(); } goto default; + case '|': if(flowLevel_ == 0) { return fetchLiteral(); } break; + case '>': if(flowLevel_ == 0) { return fetchFolded(); } break; + default: if(checkPlain()) { return fetchPlain(); } + } + + throw new ScannerException("While scanning for the next token, found character " ~ + "\'%s\', index %s that cannot start any token" + .format(c, to!int(c)), reader_.mark); + } + + + /// Return the token number of the nearest possible simple key. + uint nextPossibleSimpleKey() @safe pure nothrow @nogc + { + uint minTokenNumber = uint.max; + foreach(k, ref simpleKey; possibleSimpleKeys_) + { + if(simpleKey.isNull) { continue; } + minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex); + } + return minTokenNumber; + } + + /// Remove entries that are no longer possible simple keys. + /// + /// According to the YAML specification, simple keys + /// - should be limited to a single line, + /// - should be no longer than 1024 characters. + /// Disabling this will allow simple keys of any length and + /// height (may cause problems if indentation is broken though). + void stalePossibleSimpleKeys() @safe pure + { + foreach(level, ref key; possibleSimpleKeys_) + { + if(key.isNull) { continue; } + if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024) + { + enforce(!key.required, + new ScannerException("While scanning a simple key", + Mark(reader_.name, key.line, key.column), + "could not find expected ':'", reader_.mark)); + key.isNull = true; + } + } + } + + /// Check if the next token starts a possible simple key and if so, save its position. + /// + /// This function is called for ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. + void savePossibleSimpleKey() @safe pure + { + // Check if a simple key is required at the current position. + const required = (flowLevel_ == 0 && indent_ == reader_.column); + assert(allowSimpleKey_ || !required, "A simple key is required only if it is " ~ + "the first token in the current line. Therefore it is always allowed."); + + if(!allowSimpleKey_) { return; } + + // The next token might be a simple key, so save its number and position. + removePossibleSimpleKey(); + const tokenCount = tokensTaken_ + cast(uint)tokens_.length; + + const line = reader_.line; + const column = reader_.column; + const key = SimpleKey(cast(uint)reader_.charIndex, tokenCount, line, + cast(ushort)min(column, ushort.max), required); + + if(possibleSimpleKeys_.length <= flowLevel_) + { + const oldLength = possibleSimpleKeys_.length; + possibleSimpleKeys_.length = flowLevel_ + 1; + //No need to initialize the last element, it's already done in the next line. + possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init; + } + possibleSimpleKeys_[flowLevel_] = key; + } + + /// Remove the saved possible key position at the current flow level. + void removePossibleSimpleKey() @safe pure + { + if(possibleSimpleKeys_.length <= flowLevel_) { return; } + + if(!possibleSimpleKeys_[flowLevel_].isNull) + { + const key = possibleSimpleKeys_[flowLevel_]; + enforce(!key.required, + new ScannerException("While scanning a simple key", + Mark(reader_.name, key.line, key.column), + "could not find expected ':'", reader_.mark)); + possibleSimpleKeys_[flowLevel_].isNull = true; + } + } + + /// Decrease indentation, removing entries in indents_. + /// + /// Params: column = Current column in the file/stream. + void unwindIndent(const int column) @safe + { + if(flowLevel_ > 0) + { + // In flow context, tokens should respect indentation. + // The condition should be `indent >= column` according to the spec. + // But this condition will prohibit intuitively correct + // constructions such as + // key : { + // } + + // In the flow context, indentation is ignored. We make the scanner less + // restrictive than what the specification requires. + // if(pedantic_ && flowLevel_ > 0 && indent_ > column) + // { + // throw new ScannerException("Invalid intendation or unclosed '[' or '{'", + // reader_.mark) + // } + return; + } + + // In block context, we may need to issue the BLOCK-END tokens. + while(indent_ > column) + { + indent_ = indents_.data.back; + assert(indents_.data.length); + indents_.shrinkTo(indents_.data.length - 1); + tokens_.push(blockEndToken(reader_.mark, reader_.mark)); + } + } + + /// Increase indentation if needed. + /// + /// Params: column = Current column in the file/stream. + /// + /// Returns: true if the indentation was increased, false otherwise. + bool addIndent(int column) @safe + { + if(indent_ >= column){return false;} + indents_ ~= indent_; + indent_ = column; + return true; + } + + + /// Add STREAM-START token. + void fetchStreamStart() @safe nothrow + { + tokens_.push(streamStartToken(reader_.mark, reader_.mark, reader_.encoding)); + } + + ///Add STREAM-END token. + void fetchStreamEnd() @safe + { + //Set intendation to -1 . + unwindIndent(-1); + removePossibleSimpleKey(); + allowSimpleKey_ = false; + possibleSimpleKeys_.destroy; + + tokens_.push(streamEndToken(reader_.mark, reader_.mark)); + done_ = true; + } + + /// Add DIRECTIVE token. + void fetchDirective() @safe + { + // Set intendation to -1 . + unwindIndent(-1); + // Reset simple keys. + removePossibleSimpleKey(); + allowSimpleKey_ = false; + + auto directive = scanDirective(); + tokens_.push(directive); + } + + /// Add DOCUMENT-START or DOCUMENT-END token. + void fetchDocumentIndicator(TokenID id)() + if(id == TokenID.documentStart || id == TokenID.documentEnd) + { + // Set indentation to -1 . + unwindIndent(-1); + // Reset simple keys. Note that there can't be a block collection after '---'. + removePossibleSimpleKey(); + allowSimpleKey_ = false; + + Mark startMark = reader_.mark; + reader_.forward(3); + tokens_.push(simpleToken!id(startMark, reader_.mark)); + } + + /// Aliases to add DOCUMENT-START or DOCUMENT-END token. + alias fetchDocumentStart = fetchDocumentIndicator!(TokenID.documentStart); + alias fetchDocumentEnd = fetchDocumentIndicator!(TokenID.documentEnd); + + /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token. + void fetchFlowCollectionStart(TokenID id)() @safe + { + // '[' and '{' may start a simple key. + savePossibleSimpleKey(); + // Simple keys are allowed after '[' and '{'. + allowSimpleKey_ = true; + ++flowLevel_; + + Mark startMark = reader_.mark; + reader_.forward(); + tokens_.push(simpleToken!id(startMark, reader_.mark)); + } + + /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token. + alias fetchFlowSequenceStart = fetchFlowCollectionStart!(TokenID.flowSequenceStart); + alias fetchFlowMappingStart = fetchFlowCollectionStart!(TokenID.flowMappingStart); + + /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token. + void fetchFlowCollectionEnd(TokenID id)() + { + // Reset possible simple key on the current level. + removePossibleSimpleKey(); + // No simple keys after ']' and '}'. + allowSimpleKey_ = false; + --flowLevel_; + + Mark startMark = reader_.mark; + reader_.forward(); + tokens_.push(simpleToken!id(startMark, reader_.mark)); + } + + /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token/ + alias fetchFlowSequenceEnd = fetchFlowCollectionEnd!(TokenID.flowSequenceEnd); + alias fetchFlowMappingEnd = fetchFlowCollectionEnd!(TokenID.flowMappingEnd); + + /// Add FLOW-ENTRY token; + void fetchFlowEntry() @safe + { + // Reset possible simple key on the current level. + removePossibleSimpleKey(); + // Simple keys are allowed after ','. + allowSimpleKey_ = true; + + Mark startMark = reader_.mark; + reader_.forward(); + tokens_.push(flowEntryToken(startMark, reader_.mark)); + } + + /// Additional checks used in block context in fetchBlockEntry and fetchKey. + /// + /// Params: type = String representing the token type we might need to add. + /// id = Token type we might need to add. + void blockChecks(string type, TokenID id)() + { + enum context = type ~ " keys are not allowed here"; + // Are we allowed to start a key (not neccesarily a simple one)? + enforce(allowSimpleKey_, new ScannerException(context, reader_.mark)); + + if(addIndent(reader_.column)) + { + tokens_.push(simpleToken!id(reader_.mark, reader_.mark)); + } + } + + /// Add BLOCK-ENTRY token. Might add BLOCK-SEQUENCE-START in the process. + void fetchBlockEntry() @safe + { + if(flowLevel_ == 0) { blockChecks!("Sequence", TokenID.blockSequenceStart)(); } + + // It's an error for the block entry to occur in the flow context, + // but we let the parser detect this. + + // Reset possible simple key on the current level. + removePossibleSimpleKey(); + // Simple keys are allowed after '-'. + allowSimpleKey_ = true; + + Mark startMark = reader_.mark; + reader_.forward(); + tokens_.push(blockEntryToken(startMark, reader_.mark)); + } + + /// Add KEY token. Might add BLOCK-MAPPING-START in the process. + void fetchKey() @safe + { + if(flowLevel_ == 0) { blockChecks!("Mapping", TokenID.blockMappingStart)(); } + + // Reset possible simple key on the current level. + removePossibleSimpleKey(); + // Simple keys are allowed after '?' in the block context. + allowSimpleKey_ = (flowLevel_ == 0); + + Mark startMark = reader_.mark; + reader_.forward(); + tokens_.push(keyToken(startMark, reader_.mark)); + } + + /// Add VALUE token. Might add KEY and/or BLOCK-MAPPING-START in the process. + void fetchValue() @safe + { + //Do we determine a simple key? + if(possibleSimpleKeys_.length > flowLevel_ && + !possibleSimpleKeys_[flowLevel_].isNull) + { + const key = possibleSimpleKeys_[flowLevel_]; + possibleSimpleKeys_[flowLevel_].isNull = true; + Mark keyMark = Mark(reader_.name, key.line, key.column); + const idx = key.tokenIndex - tokensTaken_; + + assert(idx >= 0); + + // Add KEY. + // Manually inserting since tokens are immutable (need linked list). + tokens_.insert(keyToken(keyMark, keyMark), idx); + + // If this key starts a new block mapping, we need to add BLOCK-MAPPING-START. + if(flowLevel_ == 0 && addIndent(key.column)) + { + tokens_.insert(blockMappingStartToken(keyMark, keyMark), idx); + } + + // There cannot be two simple keys in a row. + allowSimpleKey_ = false; + } + // Part of a complex key + else + { + // We can start a complex value if and only if we can start a simple key. + enforce(flowLevel_ > 0 || allowSimpleKey_, + new ScannerException("Mapping values are not allowed here", reader_.mark)); + + // If this value starts a new block mapping, we need to add + // BLOCK-MAPPING-START. It'll be detected as an error later by the parser. + if(flowLevel_ == 0 && addIndent(reader_.column)) + { + tokens_.push(blockMappingStartToken(reader_.mark, reader_.mark)); + } + + // Reset possible simple key on the current level. + removePossibleSimpleKey(); + // Simple keys are allowed after ':' in the block context. + allowSimpleKey_ = (flowLevel_ == 0); + } + + // Add VALUE. + Mark startMark = reader_.mark; + reader_.forward(); + tokens_.push(valueToken(startMark, reader_.mark)); + } + + /// Add ALIAS or ANCHOR token. + void fetchAnchor_(TokenID id)() @safe + if(id == TokenID.alias_ || id == TokenID.anchor) + { + // ALIAS/ANCHOR could be a simple key. + savePossibleSimpleKey(); + // No simple keys after ALIAS/ANCHOR. + allowSimpleKey_ = false; + + auto anchor = scanAnchor(id); + tokens_.push(anchor); + } + + /// Aliases to add ALIAS or ANCHOR token. + alias fetchAlias = fetchAnchor_!(TokenID.alias_); + alias fetchAnchor = fetchAnchor_!(TokenID.anchor); + + /// Add TAG token. + void fetchTag() @safe + { + //TAG could start a simple key. + savePossibleSimpleKey(); + //No simple keys after TAG. + allowSimpleKey_ = false; + + tokens_.push(scanTag()); + } + + /// Add block SCALAR token. + void fetchBlockScalar(ScalarStyle style)() @safe + if(style == ScalarStyle.literal || style == ScalarStyle.folded) + { + // Reset possible simple key on the current level. + removePossibleSimpleKey(); + // A simple key may follow a block scalar. + allowSimpleKey_ = true; + + auto blockScalar = scanBlockScalar(style); + tokens_.push(blockScalar); + } + + /// Aliases to add literal or folded block scalar. + alias fetchLiteral = fetchBlockScalar!(ScalarStyle.literal); + alias fetchFolded = fetchBlockScalar!(ScalarStyle.folded); + + /// Add quoted flow SCALAR token. + void fetchFlowScalar(ScalarStyle quotes)() + { + // A flow scalar could be a simple key. + savePossibleSimpleKey(); + // No simple keys after flow scalars. + allowSimpleKey_ = false; + + // Scan and add SCALAR. + auto scalar = scanFlowScalar(quotes); + tokens_.push(scalar); + } + + /// Aliases to add single or double quoted block scalar. + alias fetchSingle = fetchFlowScalar!(ScalarStyle.singleQuoted); + alias fetchDouble = fetchFlowScalar!(ScalarStyle.doubleQuoted); + + /// Add plain SCALAR token. + void fetchPlain() @safe + { + // A plain scalar could be a simple key + savePossibleSimpleKey(); + // No simple keys after plain scalars. But note that scanPlain() will + // change this flag if the scan is finished at the beginning of the line. + allowSimpleKey_ = false; + auto plain = scanPlain(); + + // Scan and add SCALAR. May change allowSimpleKey_ + tokens_.push(plain); + } + + pure: + + ///Check if the next token is DIRECTIVE: ^ '%' ... + bool checkDirective() @safe + { + return reader_.peekByte() == '%' && reader_.column == 0; + } + + /// Check if the next token is DOCUMENT-START: ^ '---' (' '|'\n') + bool checkDocumentStart() @safe + { + // Check one char first, then all 3, to prevent reading outside the buffer. + return reader_.column == 0 && + reader_.peekByte() == '-' && + reader_.prefix(3) == "---" && + reader_.peek(3).isWhiteSpace; + } + + /// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n') + bool checkDocumentEnd() @safe + { + // Check one char first, then all 3, to prevent reading outside the buffer. + return reader_.column == 0 && + reader_.peekByte() == '.' && + reader_.prefix(3) == "..." && + reader_.peek(3).isWhiteSpace; + } + + /// Check if the next token is BLOCK-ENTRY: '-' (' '|'\n') + bool checkBlockEntry() @safe + { + return !!reader_.peek(1).isWhiteSpace; + } + + /// Check if the next token is KEY(flow context): '?' + /// + /// or KEY(block context): '?' (' '|'\n') + bool checkKey() @safe + { + return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace); + } + + /// Check if the next token is VALUE(flow context): ':' + /// + /// or VALUE(block context): ':' (' '|'\n') + bool checkValue() @safe + { + return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace; + } + + /// Check if the next token is a plain scalar. + /// + /// A plain scalar may start with any non-space character except: + /// '-', '?', ':', ',', '[', ']', '{', '}', + /// '#', '&', '*', '!', '|', '>', '\'', '\"', + /// '%', '@', '`'. + /// + /// It may also start with + /// '-', '?', ':' + /// if it is followed by a non-space character. + /// + /// Note that we limit the last rule to the block context (except the + /// '-' character) because we want the flow context to be space + /// independent. + bool checkPlain() @safe + { + const c = reader_.peek(); + if(!c.isNonScalarStartCharacter) + { + return true; + } + return !reader_.peek(1).isWhiteSpace && + (c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':'))); + } + + /// Move to the next non-space character. + void findNextNonSpace() @safe + { + while(reader_.peekByte() == ' ') { reader_.forward(); } + } + + /// Scan a string of alphanumeric or "-_" characters. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanAlphaNumericToSlice(string name)(const Mark startMark) + { + size_t length; + dchar c = reader_.peek(); + while(c.isAlphaNum || c.among!('-', '_')) { c = reader_.peek(++length); } + + enforce(length > 0, new ScannerException("While scanning " ~ name, + startMark, expected("alphanumeric, '-' or '_'", c), reader_.mark)); + + reader_.sliceBuilder.write(reader_.get(length)); + } + + /// Scan and throw away all characters until next line break. + void scanToNextBreak() @safe + { + while(!reader_.peek().isBreak) { reader_.forward(); } + } + + /// Scan all characters until next line break. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanToNextBreakToSlice() @safe + { + uint length; + while(!reader_.peek(length).isBreak) + { + ++length; + } + reader_.sliceBuilder.write(reader_.get(length)); + } + + + /// Move to next token in the file/stream. + /// + /// We ignore spaces, line breaks and comments. + /// If we find a line break in the block context, we set + /// allowSimpleKey` on. + /// + /// We do not yet support BOM inside the stream as the + /// specification requires. Any such mark will be considered as a part + /// of the document. + void scanToNextToken() @safe + { + // TODO(PyYAML): We need to make tab handling rules more sane. A good rule is: + // Tabs cannot precede tokens + // BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, + // KEY(block), VALUE(block), BLOCK-ENTRY + // So the checking code is + // if <TAB>: + // allowSimpleKey_ = false + // We also need to add the check for `allowSimpleKey_ == true` to + // `unwindIndent` before issuing BLOCK-END. + // Scanners for block, flow, and plain scalars need to be modified. + + for(;;) + { + //All whitespace in flow context is ignored, even whitespace + // not allowed in other contexts + if (flowLevel_ > 0) + { + while(reader_.peekByte().isNonLinebreakWhitespace) { reader_.forward(); } + } + else + { + findNextNonSpace(); + } + if(reader_.peekByte() == '#') { scanToNextBreak(); } + if(scanLineBreak() != '\0') + { + if(flowLevel_ == 0) { allowSimpleKey_ = true; } + } + else + { + break; + } + } + } + + /// Scan directive token. + Token scanDirective() @safe + { + Mark startMark = reader_.mark; + // Skip the '%'. + reader_.forward(); + + // Scan directive name + reader_.sliceBuilder.begin(); + scanDirectiveNameToSlice(startMark); + const name = reader_.sliceBuilder.finish(); + + reader_.sliceBuilder.begin(); + + // Index where tag handle ends and suffix starts in a tag directive value. + uint tagHandleEnd = uint.max; + if(name == "YAML") { scanYAMLDirectiveValueToSlice(startMark); } + else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(startMark); } + char[] value = reader_.sliceBuilder.finish(); + + Mark endMark = reader_.mark; + + DirectiveType directive; + if(name == "YAML") { directive = DirectiveType.yaml; } + else if(name == "TAG") { directive = DirectiveType.tag; } + else + { + directive = DirectiveType.reserved; + scanToNextBreak(); + } + + scanDirectiveIgnoredLine(startMark); + + return directiveToken(startMark, endMark, value, directive, tagHandleEnd); + } + + /// Scan name of a directive token. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanDirectiveNameToSlice(const Mark startMark) @safe + { + // Scan directive name. + scanAlphaNumericToSlice!"a directive"(startMark); + + enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), + new ScannerException("While scanning a directive", startMark, + expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark)); + } + + /// Scan value of a YAML directive token. Returns major, minor version separated by '.'. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanYAMLDirectiveValueToSlice(const Mark startMark) @safe + { + findNextNonSpace(); + + scanYAMLDirectiveNumberToSlice(startMark); + + enforce(reader_.peekByte() == '.', + new ScannerException("While scanning a directive", startMark, + expected("digit or '.'", reader_.peek()), reader_.mark)); + // Skip the '.'. + reader_.forward(); + + reader_.sliceBuilder.write('.'); + scanYAMLDirectiveNumberToSlice(startMark); + + enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), + new ScannerException("While scanning a directive", startMark, + expected("digit or '.'", reader_.peek()), reader_.mark)); + } + + /// Scan a number from a YAML directive. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanYAMLDirectiveNumberToSlice(const Mark startMark) @safe + { + enforce(isDigit(reader_.peek()), + new ScannerException("While scanning a directive", startMark, + expected("digit", reader_.peek()), reader_.mark)); + + // Already found the first digit in the enforce(), so set length to 1. + uint length = 1; + while(reader_.peek(length).isDigit) { ++length; } + + reader_.sliceBuilder.write(reader_.get(length)); + } + + /// Scan value of a tag directive. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + /// + /// Returns: Length of tag handle (which is before tag prefix) in scanned data + uint scanTagDirectiveValueToSlice(const Mark startMark) @safe + { + findNextNonSpace(); + const startLength = reader_.sliceBuilder.length; + scanTagDirectiveHandleToSlice(startMark); + const handleLength = cast(uint)(reader_.sliceBuilder.length - startLength); + findNextNonSpace(); + scanTagDirectivePrefixToSlice(startMark); + + return handleLength; + } + + /// Scan handle of a tag directive. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanTagDirectiveHandleToSlice(const Mark startMark) @safe + { + scanTagHandleToSlice!"directive"(startMark); + enforce(reader_.peekByte() == ' ', + new ScannerException("While scanning a directive handle", startMark, + expected("' '", reader_.peek()), reader_.mark)); + } + + /// Scan prefix of a tag directive. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanTagDirectivePrefixToSlice(const Mark startMark) @safe + { + scanTagURIToSlice!"directive"(startMark); + enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), + new ScannerException("While scanning a directive prefix", startMark, + expected("' '", reader_.peek()), reader_.mark)); + } + + /// Scan (and ignore) ignored line after a directive. + void scanDirectiveIgnoredLine(const Mark startMark) @safe + { + findNextNonSpace(); + if(reader_.peekByte() == '#') { scanToNextBreak(); } + enforce(reader_.peek().isBreak, + new ScannerException("While scanning a directive", startMark, + expected("comment or a line break", reader_.peek()), reader_.mark)); + scanLineBreak(); + } + + + /// Scan an alias or an anchor. + /// + /// The specification does not restrict characters for anchors and + /// aliases. This may lead to problems, for instance, the document: + /// [ *alias, value ] + /// can be interpteted in two ways, as + /// [ "value" ] + /// and + /// [ *alias , "value" ] + /// Therefore we restrict aliases to ASCII alphanumeric characters. + Token scanAnchor(const TokenID id) @safe + { + const startMark = reader_.mark; + const dchar i = reader_.get(); + + reader_.sliceBuilder.begin(); + if(i == '*') { scanAlphaNumericToSlice!"an alias"(startMark); } + else { scanAlphaNumericToSlice!"an anchor"(startMark); } + // On error, value is discarded as we return immediately + char[] value = reader_.sliceBuilder.finish(); + + enum anchorCtx = "While scanning an anchor"; + enum aliasCtx = "While scanning an alias"; + enforce(reader_.peek().isWhiteSpace || + reader_.peekByte().among!('?', ':', ',', ']', '}', '%', '@'), + new ScannerException(i == '*' ? aliasCtx : anchorCtx, startMark, + expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark)); + + if(id == TokenID.alias_) + { + return aliasToken(startMark, reader_.mark, value); + } + if(id == TokenID.anchor) + { + return anchorToken(startMark, reader_.mark, value); + } + assert(false, "This code should never be reached"); + } + + /// Scan a tag token. + Token scanTag() @safe + { + const startMark = reader_.mark; + dchar c = reader_.peek(1); + + reader_.sliceBuilder.begin(); + scope(failure) { reader_.sliceBuilder.finish(); } + // Index where tag handle ends and tag suffix starts in the tag value + // (slice) we will produce. + uint handleEnd; + + if(c == '<') + { + reader_.forward(2); + + handleEnd = 0; + scanTagURIToSlice!"tag"(startMark); + enforce(reader_.peekByte() == '>', + new ScannerException("While scanning a tag", startMark, + expected("'>'", reader_.peek()), reader_.mark)); + reader_.forward(); + } + else if(c.isWhiteSpace) + { + reader_.forward(); + handleEnd = 0; + reader_.sliceBuilder.write('!'); + } + else + { + uint length = 1; + bool useHandle; + + while(!c.isBreakOrSpace) + { + if(c == '!') + { + useHandle = true; + break; + } + ++length; + c = reader_.peek(length); + } + + if(useHandle) + { + scanTagHandleToSlice!"tag"(startMark); + handleEnd = cast(uint)reader_.sliceBuilder.length; + } + else + { + reader_.forward(); + reader_.sliceBuilder.write('!'); + handleEnd = cast(uint)reader_.sliceBuilder.length; + } + + scanTagURIToSlice!"tag"(startMark); + } + + enforce(reader_.peek().isBreakOrSpace, + new ScannerException("While scanning a tag", startMark, expected("' '", reader_.peek()), + reader_.mark)); + + char[] slice = reader_.sliceBuilder.finish(); + return tagToken(startMark, reader_.mark, slice, handleEnd); + } + + /// Scan a block scalar token with specified style. + Token scanBlockScalar(const ScalarStyle style) @safe + { + const startMark = reader_.mark; + + // Scan the header. + reader_.forward(); + + const indicators = scanBlockScalarIndicators(startMark); + + const chomping = indicators[0]; + const increment = indicators[1]; + scanBlockScalarIgnoredLine(startMark); + + // Determine the indentation level and go to the first non-empty line. + Mark endMark; + uint indent = max(1, indent_ + 1); + + reader_.sliceBuilder.begin(); + alias Transaction = SliceBuilder.Transaction; + // Used to strip the last line breaks written to the slice at the end of the + // scalar, which may be needed based on chomping. + Transaction breaksTransaction = Transaction(&reader_.sliceBuilder); + // Read the first indentation/line breaks before the scalar. + size_t startLen = reader_.sliceBuilder.length; + if(increment == int.min) + { + auto indentation = scanBlockScalarIndentationToSlice(); + endMark = indentation[1]; + indent = max(indent, indentation[0]); + } + else + { + indent += increment - 1; + endMark = scanBlockScalarBreaksToSlice(indent); + } + + // int.max means there's no line break (int.max is outside UTF-32). + dchar lineBreak = cast(dchar)int.max; + + // Scan the inner part of the block scalar. + while(reader_.column == indent && reader_.peekByte() != '\0') + { + breaksTransaction.commit(); + const bool leadingNonSpace = !reader_.peekByte().among!(' ', '\t'); + // This is where the 'interesting' non-whitespace data gets read. + scanToNextBreakToSlice(); + lineBreak = scanLineBreak(); + + + // This transaction serves to rollback data read in the + // scanBlockScalarBreaksToSlice() call. + breaksTransaction = Transaction(&reader_.sliceBuilder); + startLen = reader_.sliceBuilder.length; + // The line breaks should actually be written _after_ the if() block + // below. We work around that by inserting + endMark = scanBlockScalarBreaksToSlice(indent); + + // This will not run during the last iteration (see the if() vs the + // while()), hence breaksTransaction rollback (which happens after this + // loop) will never roll back data written in this if() block. + if(reader_.column == indent && reader_.peekByte() != '\0') + { + // Unfortunately, folding rules are ambiguous. + + // This is the folding according to the specification: + if(style == ScalarStyle.folded && lineBreak == '\n' && + leadingNonSpace && !reader_.peekByte().among!(' ', '\t')) + { + // No breaks were scanned; no need to insert the space in the + // middle of slice. + if(startLen == reader_.sliceBuilder.length) + { + reader_.sliceBuilder.write(' '); + } + } + else + { + // We need to insert in the middle of the slice in case any line + // breaks were scanned. + reader_.sliceBuilder.insert(lineBreak, startLen); + } + + ////this is Clark Evans's interpretation (also in the spec + ////examples): + // + //if(style == ScalarStyle.folded && lineBreak == '\n') + //{ + // if(startLen == endLen) + // { + // if(!" \t"d.canFind(reader_.peekByte())) + // { + // reader_.sliceBuilder.write(' '); + // } + // else + // { + // chunks ~= lineBreak; + // } + // } + //} + //else + //{ + // reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen); + //} + } + else + { + break; + } + } + + // If chompint is Keep, we keep (commit) the last scanned line breaks + // (which are at the end of the scalar). Otherwise re remove them (end the + // transaction). + if(chomping == Chomping.keep) { breaksTransaction.commit(); } + else { breaksTransaction.end(); } + if(chomping != Chomping.strip && lineBreak != int.max) + { + // If chomping is Keep, we keep the line break but the first line break + // that isn't stripped (since chomping isn't Strip in this branch) must + // be inserted _before_ the other line breaks. + if(chomping == Chomping.keep) + { + reader_.sliceBuilder.insert(lineBreak, startLen); + } + // If chomping is not Keep, breaksTransaction was cancelled so we can + // directly write the first line break (as it isn't stripped - chomping + // is not Strip) + else + { + reader_.sliceBuilder.write(lineBreak); + } + } + + char[] slice = reader_.sliceBuilder.finish(); + return scalarToken(startMark, endMark, slice, style); + } + + /// Scan chomping and indentation indicators of a scalar token. + Tuple!(Chomping, int) scanBlockScalarIndicators(const Mark startMark) @safe + { + auto chomping = Chomping.clip; + int increment = int.min; + dchar c = reader_.peek(); + + /// Indicators can be in any order. + if(getChomping(c, chomping)) + { + getIncrement(c, increment, startMark); + } + else + { + const gotIncrement = getIncrement(c, increment, startMark); + if(gotIncrement) { getChomping(c, chomping); } + } + + enforce(c.among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), + new ScannerException("While scanning a block scalar", startMark, + expected("chomping or indentation indicator", c), reader_.mark)); + + return tuple(chomping, increment); + } + + /// Get chomping indicator, if detected. Return false otherwise. + /// + /// Used in scanBlockScalarIndicators. + /// + /// Params: + /// + /// c = The character that may be a chomping indicator. + /// chomping = Write the chomping value here, if detected. + bool getChomping(ref dchar c, ref Chomping chomping) @safe + { + if(!c.among!('+', '-')) { return false; } + chomping = c == '+' ? Chomping.keep : Chomping.strip; + reader_.forward(); + c = reader_.peek(); + return true; + } + + /// Get increment indicator, if detected. Return false otherwise. + /// + /// Used in scanBlockScalarIndicators. + /// + /// Params: + /// + /// c = The character that may be an increment indicator. + /// If an increment indicator is detected, this will be updated to + /// the next character in the Reader. + /// increment = Write the increment value here, if detected. + /// startMark = Mark for error messages. + bool getIncrement(ref dchar c, ref int increment, const Mark startMark) @safe + { + if(!c.isDigit) { return false; } + // Convert a digit to integer. + increment = c - '0'; + assert(increment < 10 && increment >= 0, "Digit has invalid value"); + + enforce(increment > 0, + new ScannerException("While scanning a block scalar", startMark, + expected("indentation indicator in range 1-9", "0"), reader_.mark)); + + reader_.forward(); + c = reader_.peek(); + return true; + } + + /// Scan (and ignore) ignored line in a block scalar. + void scanBlockScalarIgnoredLine(const Mark startMark) @safe + { + findNextNonSpace(); + if(reader_.peekByte()== '#') { scanToNextBreak(); } + + enforce(reader_.peek().isBreak, + new ScannerException("While scanning a block scalar", startMark, + expected("comment or line break", reader_.peek()), reader_.mark)); + + scanLineBreak(); + } + + /// Scan indentation in a block scalar, returning line breaks, max indent and end mark. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + Tuple!(uint, Mark) scanBlockScalarIndentationToSlice() @safe + { + uint maxIndent; + Mark endMark = reader_.mark; + + while(reader_.peek().among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029')) + { + if(reader_.peekByte() != ' ') + { + reader_.sliceBuilder.write(scanLineBreak()); + endMark = reader_.mark; + continue; + } + reader_.forward(); + maxIndent = max(reader_.column, maxIndent); + } + + return tuple(maxIndent, endMark); + } + + /// Scan line breaks at lower or specified indentation in a block scalar. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + Mark scanBlockScalarBreaksToSlice(const uint indent) @safe + { + Mark endMark = reader_.mark; + + for(;;) + { + while(reader_.column < indent && reader_.peekByte() == ' ') { reader_.forward(); } + if(!reader_.peek().among!('\n', '\r', '\u0085', '\u2028', '\u2029')) { break; } + reader_.sliceBuilder.write(scanLineBreak()); + endMark = reader_.mark; + } + + return endMark; + } + + /// Scan a qouted flow scalar token with specified quotes. + Token scanFlowScalar(const ScalarStyle quotes) @safe + { + const startMark = reader_.mark; + const quote = reader_.get(); + + reader_.sliceBuilder.begin(); + + scanFlowScalarNonSpacesToSlice(quotes, startMark); + + while(reader_.peek() != quote) + { + scanFlowScalarSpacesToSlice(startMark); + scanFlowScalarNonSpacesToSlice(quotes, startMark); + } + reader_.forward(); + + auto slice = reader_.sliceBuilder.finish(); + return scalarToken(startMark, reader_.mark, slice, quotes); + } + + /// Scan nonspace characters in a flow scalar. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanFlowScalarNonSpacesToSlice(const ScalarStyle quotes, const Mark startMark) + @safe + { + for(;;) + { + dchar c = reader_.peek(); + + size_t numCodePoints; + while(!reader_.peek(numCodePoints).isFlowScalarBreakSpace) { ++numCodePoints; } + + if (numCodePoints > 0) { reader_.sliceBuilder.write(reader_.get(numCodePoints)); } + + c = reader_.peek(); + if(quotes == ScalarStyle.singleQuoted && c == '\'' && reader_.peek(1) == '\'') + { + reader_.forward(2); + reader_.sliceBuilder.write('\''); + } + else if((quotes == ScalarStyle.doubleQuoted && c == '\'') || + (quotes == ScalarStyle.singleQuoted && c.among!('"', '\\'))) + { + reader_.forward(); + reader_.sliceBuilder.write(c); + } + else if(quotes == ScalarStyle.doubleQuoted && c == '\\') + { + reader_.forward(); + c = reader_.peek(); + if(c.among!(escapes)) + { + reader_.forward(); + // Escaping has been moved to Parser as it can't be done in + // place (in a slice) in case of '\P' and '\L' (very uncommon, + // but we don't want to break the spec) + char[2] escapeSequence = ['\\', cast(char)c]; + reader_.sliceBuilder.write(escapeSequence); + } + else if(c.among!(escapeHexCodeList)) + { + const hexLength = dyaml.escapes.escapeHexLength(c); + reader_.forward(); + + foreach(i; 0 .. hexLength) { + enforce(reader_.peek(i).isHexDigit, + new ScannerException("While scanning a double quoted scalar", startMark, + expected("escape sequence of hexadecimal numbers", + reader_.peek(i)), reader_.mark)); + } + char[] hex = reader_.get(hexLength); + + enforce((hex.length > 0) && (hex.length <= 8), + new ScannerException("While scanning a double quoted scalar", startMark, + "overflow when parsing an escape sequence of " ~ + "hexadecimal numbers.", reader_.mark)); + + char[2] escapeStart = ['\\', cast(char) c]; + reader_.sliceBuilder.write(escapeStart); + reader_.sliceBuilder.write(hex); + + } + else if(c.among!('\n', '\r', '\u0085', '\u2028', '\u2029')) + { + scanLineBreak(); + scanFlowScalarBreaksToSlice(startMark); + } + else + { + throw new ScannerException("While scanning a double quoted scalar", startMark, + text("found unsupported escape character ", c), + reader_.mark); + } + } + else { return; } + } + } + + /// Scan space characters in a flow scalar. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// spaces into that slice. + void scanFlowScalarSpacesToSlice(const Mark startMark) @safe + { + // Increase length as long as we see whitespace. + size_t length; + while(reader_.peekByte(length).among!(' ', '\t')) { ++length; } + auto whitespaces = reader_.prefixBytes(length); + + // Can check the last byte without striding because '\0' is ASCII + const c = reader_.peek(length); + enforce(c != '\0', + new ScannerException("While scanning a quoted scalar", startMark, + "found unexpected end of buffer", reader_.mark)); + + // Spaces not followed by a line break. + if(!c.among!('\n', '\r', '\u0085', '\u2028', '\u2029')) + { + reader_.forward(length); + reader_.sliceBuilder.write(whitespaces); + return; + } + + // There's a line break after the spaces. + reader_.forward(length); + const lineBreak = scanLineBreak(); + + if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); } + + // If we have extra line breaks after the first, scan them into the + // slice. + const bool extraBreaks = scanFlowScalarBreaksToSlice(startMark); + + // No extra breaks, one normal line break. Replace it with a space. + if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); } + } + + /// Scan line breaks in a flow scalar. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// line breaks into that slice. + bool scanFlowScalarBreaksToSlice(const Mark startMark) @safe + { + // True if at least one line break was found. + bool anyBreaks; + for(;;) + { + // Instead of checking indentation, we check for document separators. + const prefix = reader_.prefix(3); + enforce(!(prefix == "---" || prefix == "...") || + !reader_.peek(3).isWhiteSpace, + new ScannerException("While scanning a quoted scalar", startMark, + "found unexpected document separator", reader_.mark)); + + // Skip any whitespaces. + while(reader_.peekByte().among!(' ', '\t')) { reader_.forward(); } + + // Encountered a non-whitespace non-linebreak character, so we're done. + if(!reader_.peek().among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029')) { break; } + + const lineBreak = scanLineBreak(); + anyBreaks = true; + reader_.sliceBuilder.write(lineBreak); + } + return anyBreaks; + } + + /// Scan plain scalar token (no block, no quotes). + Token scanPlain() @safe + { + // We keep track of the allowSimpleKey_ flag here. + // Indentation rules are loosed for the flow context + const startMark = reader_.mark; + Mark endMark = startMark; + const indent = indent_ + 1; + + // We allow zero indentation for scalars, but then we need to check for + // document separators at the beginning of the line. + // if(indent == 0) { indent = 1; } + + reader_.sliceBuilder.begin(); + + alias Transaction = SliceBuilder.Transaction; + Transaction spacesTransaction; + // Stop at a comment. + while(reader_.peekByte() != '#') + { + // Scan the entire plain scalar. + size_t length; + dchar c = reader_.peek(length); + for(;;) + { + const cNext = reader_.peek(length + 1); + if(c.isWhiteSpace || + (flowLevel_ == 0 && c == ':' && cNext.isWhiteSpace) || + (flowLevel_ > 0 && c.among!(',', ':', '?', '[', ']', '{', '}'))) + { + break; + } + ++length; + c = cNext; + } + + // It's not clear what we should do with ':' in the flow context. + enforce(flowLevel_ == 0 || c != ':' || + reader_.peek(length + 1).isWhiteSpace || + reader_.peek(length + 1).among!(',', '[', ']', '{', '}'), + new ScannerException("While scanning a plain scalar", startMark, + "found unexpected ':' . Please check " ~ + "http://pyyaml.org/wiki/YAMLColonInFlowContext for details.", + reader_.mark)); + + if(length == 0) { break; } + + allowSimpleKey_ = false; + + reader_.sliceBuilder.write(reader_.get(length)); + + endMark = reader_.mark; + + spacesTransaction.commit(); + spacesTransaction = Transaction(&reader_.sliceBuilder); + + const startLength = reader_.sliceBuilder.length; + scanPlainSpacesToSlice(); + if(startLength == reader_.sliceBuilder.length || + (flowLevel_ == 0 && reader_.column < indent)) + { + break; + } + } + + spacesTransaction.end(); + char[] slice = reader_.sliceBuilder.finish(); + + return scalarToken(startMark, endMark, slice, ScalarStyle.plain); + } + + /// Scan spaces in a plain scalar. + /// + /// Assumes that the caller is building a slice in Reader, and puts the spaces + /// into that slice. + void scanPlainSpacesToSlice() @safe + { + // The specification is really confusing about tabs in plain scalars. + // We just forbid them completely. Do not use tabs in YAML! + + // Get as many plain spaces as there are. + size_t length; + while(reader_.peekByte(length) == ' ') { ++length; } + char[] whitespaces = reader_.prefixBytes(length); + reader_.forward(length); + + const dchar c = reader_.peek(); + if(!c.isNSChar) + { + // We have spaces, but no newline. + if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); } + return; + } + + // Newline after the spaces (if any) + const lineBreak = scanLineBreak(); + allowSimpleKey_ = true; + + static bool end(Reader reader_) @safe pure + { + const prefix = reader_.prefix(3); + return ("---" == prefix || "..." == prefix) + && reader_.peek(3).among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); + } + + if(end(reader_)) { return; } + + bool extraBreaks; + + alias Transaction = SliceBuilder.Transaction; + auto transaction = Transaction(&reader_.sliceBuilder); + if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); } + while(reader_.peek().isNSChar) + { + if(reader_.peekByte() == ' ') { reader_.forward(); } + else + { + const lBreak = scanLineBreak(); + extraBreaks = true; + reader_.sliceBuilder.write(lBreak); + + if(end(reader_)) { return; } + } + } + transaction.commit(); + + // No line breaks, only a space. + if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); } + } + + /// Scan handle of a tag token. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanTagHandleToSlice(string name)(const Mark startMark) + { + dchar c = reader_.peek(); + enum contextMsg = "While scanning a " ~ name; + enforce(c == '!', + new ScannerException(contextMsg, startMark, expected("'!'", c), reader_.mark)); + + uint length = 1; + c = reader_.peek(length); + if(c != ' ') + { + while(c.isAlphaNum || c.among!('-', '_')) + { + ++length; + c = reader_.peek(length); + } + enforce(c == '!', + new ScannerException(contextMsg, startMark, expected("'!'", c), reader_.mark)); + ++length; + } + + reader_.sliceBuilder.write(reader_.get(length)); + } + + /// Scan URI in a tag token. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanTagURIToSlice(string name)(const Mark startMark) + { + // Note: we do not check if URI is well-formed. + dchar c = reader_.peek(); + const startLen = reader_.sliceBuilder.length; + { + uint length; + while(c.isAlphaNum || c.isURIChar) + { + if(c == '%') + { + auto chars = reader_.get(length); + reader_.sliceBuilder.write(chars); + length = 0; + scanURIEscapesToSlice!name(startMark); + } + else { ++length; } + c = reader_.peek(length); + } + if(length > 0) + { + auto chars = reader_.get(length); + reader_.sliceBuilder.write(chars); + length = 0; + } + } + // OK if we scanned something, error otherwise. + enum contextMsg = "While parsing a " ~ name; + enforce(reader_.sliceBuilder.length > startLen, + new ScannerException(contextMsg, startMark, expected("URI", c), reader_.mark)); + } + + // Not @nogc yet because std.utf.decode is not @nogc + /// Scan URI escape sequences. + /// + /// Assumes that the caller is building a slice in Reader, and puts the scanned + /// characters into that slice. + void scanURIEscapesToSlice(string name)(const Mark startMark) + { + import core.exception : UnicodeException; + // URI escapes encode a UTF-8 string. We store UTF-8 code units here for + // decoding into UTF-32. + Appender!string buffer; + + + enum contextMsg = "While scanning a " ~ name; + while(reader_.peekByte() == '%') + { + reader_.forward(); + char[2] nextByte = [reader_.peekByte(), reader_.peekByte(1)]; + + enforce(nextByte[0].isHexDigit && nextByte[1].isHexDigit, + new ScannerException(contextMsg, startMark, + expected("URI escape sequence of 2 hexadecimal " ~ + "numbers", nextByte), reader_.mark)); + + buffer ~= nextByte[].to!ubyte(16); + + reader_.forward(2); + } + try + { + foreach (dchar chr; buffer.data) + { + reader_.sliceBuilder.write(chr); + } + } + catch (UnicodeException) + { + throw new ScannerException(contextMsg, startMark, + "Invalid UTF-8 data encoded in URI escape sequence", + reader_.mark); + } + } + + + /// Scan a line break, if any. + /// + /// Transforms: + /// '\r\n' : '\n' + /// '\r' : '\n' + /// '\n' : '\n' + /// '\u0085' : '\n' + /// '\u2028' : '\u2028' + /// '\u2029 : '\u2029' + /// no break : '\0' + dchar scanLineBreak() @safe + { + // Fast path for ASCII line breaks. + const b = reader_.peekByte(); + if(b < 0x80) + { + if(b == '\n' || b == '\r') + { + if(reader_.prefix(2) == "\r\n") { reader_.forward(2); } + else { reader_.forward(); } + return '\n'; + } + return '\0'; + } + + const c = reader_.peek(); + if(c == '\x85') + { + reader_.forward(); + return '\n'; + } + if(c == '\u2028' || c == '\u2029') + { + reader_.forward(); + return c; + } + return '\0'; + } +} diff --git a/source/dyaml/serializer.d b/source/dyaml/serializer.d new file mode 100644 index 0000000..cbaef63 --- /dev/null +++ b/source/dyaml/serializer.d @@ -0,0 +1,322 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/** + * YAML serializer. + * Code based on PyYAML: http://www.pyyaml.org + */ +module dyaml.serializer; + + +import std.array; +import std.format; +import std.typecons; + +import dyaml.emitter; +import dyaml.event; +import dyaml.exception; +import dyaml.node; +import dyaml.resolver; +import dyaml.tagdirective; +import dyaml.token; + + +package: + +///Serializes represented YAML nodes, generating events which are then emitted by Emitter. +struct Serializer +{ + private: + ///Resolver used to determine which tags are automaticaly resolvable. + Resolver resolver_; + + ///Do all document starts have to be specified explicitly? + Flag!"explicitStart" explicitStart_; + ///Do all document ends have to be specified explicitly? + Flag!"explicitEnd" explicitEnd_; + ///YAML version string. + string YAMLVersion_; + + ///Tag directives to emit. + TagDirective[] tagDirectives_; + + //TODO Use something with more deterministic memory usage. + ///Nodes with assigned anchors. + string[Node] anchors_; + ///Nodes with assigned anchors that are already serialized. + bool[Node] serializedNodes_; + ///ID of the last anchor generated. + uint lastAnchorID_ = 0; + + public: + /** + * Construct a Serializer. + * + * Params: + * resolver = Resolver used to determine which tags are automaticaly resolvable. + * explicitStart = Do all document starts have to be specified explicitly? + * explicitEnd = Do all document ends have to be specified explicitly? + * YAMLVersion = YAML version string. + * tagDirectives = Tag directives to emit. + */ + this(Resolver resolver, + const Flag!"explicitStart" explicitStart, + const Flag!"explicitEnd" explicitEnd, string YAMLVersion, + TagDirective[] tagDirectives) @safe + { + resolver_ = resolver; + explicitStart_ = explicitStart; + explicitEnd_ = explicitEnd; + YAMLVersion_ = YAMLVersion; + tagDirectives_ = tagDirectives; + } + + ///Begin the stream. + void startStream(EmitterT)(ref EmitterT emitter) @safe + { + emitter.emit(streamStartEvent(Mark(), Mark())); + } + + ///End the stream. + void endStream(EmitterT)(ref EmitterT emitter) @safe + { + emitter.emit(streamEndEvent(Mark(), Mark())); + } + + ///Serialize a node, emitting it in the process. + void serialize(EmitterT)(ref EmitterT emitter, ref Node node) @safe + { + emitter.emit(documentStartEvent(Mark(), Mark(), explicitStart_, + YAMLVersion_, tagDirectives_)); + anchorNode(node); + serializeNode(emitter, node); + emitter.emit(documentEndEvent(Mark(), Mark(), explicitEnd_)); + serializedNodes_.destroy(); + anchors_.destroy(); + string[Node] emptyAnchors; + anchors_ = emptyAnchors; + lastAnchorID_ = 0; + } + + private: + /** + * Determine if it's a good idea to add an anchor to a node. + * + * Used to prevent associating every single repeating scalar with an + * anchor/alias - only nodes long enough can use anchors. + * + * Params: node = Node to check for anchorability. + * + * Returns: True if the node is anchorable, false otherwise. + */ + static bool anchorable(ref Node node) @safe + { + if(node.nodeID == NodeID.scalar) + { + return (node.type == NodeType.string) ? node.as!string.length > 64 : + (node.type == NodeType.binary) ? node.as!(ubyte[]).length > 64 : + false; + } + return node.length > 2; + } + + @safe unittest + { + import std.string : representation; + auto shortString = "not much"; + auto longString = "A fairly long string that would be a good idea to add an anchor to"; + auto node1 = Node(shortString); + auto node2 = Node(shortString.representation.dup); + auto node3 = Node(longString); + auto node4 = Node(longString.representation.dup); + auto node5 = Node([node1]); + auto node6 = Node([node1, node2, node3, node4]); + assert(!anchorable(node1)); + assert(!anchorable(node2)); + assert(anchorable(node3)); + assert(anchorable(node4)); + assert(!anchorable(node5)); + assert(anchorable(node6)); + } + + ///Add an anchor to the node if it's anchorable and not anchored yet. + void anchorNode(ref Node node) @safe + { + if(!anchorable(node)){return;} + + if((node in anchors_) !is null) + { + if(anchors_[node] is null) + { + anchors_[node] = generateAnchor(); + } + return; + } + + anchors_.remove(node); + final switch (node.nodeID) + { + case NodeID.mapping: + foreach(ref Node key, ref Node value; node) + { + anchorNode(key); + anchorNode(value); + } + break; + case NodeID.sequence: + foreach(ref Node item; node) + { + anchorNode(item); + } + break; + case NodeID.invalid: + assert(0); + case NodeID.scalar: + } + } + + ///Generate and return a new anchor. + string generateAnchor() @safe + { + ++lastAnchorID_; + auto appender = appender!string(); + formattedWrite(appender, "id%03d", lastAnchorID_); + return appender.data; + } + + ///Serialize a node and all its subnodes. + void serializeNode(EmitterT)(ref EmitterT emitter, ref Node node) @safe + { + //If the node has an anchor, emit an anchor (as aliasEvent) on the + //first occurrence, save it in serializedNodes_, and emit an alias + //if it reappears. + string aliased; + if(anchorable(node) && (node in anchors_) !is null) + { + aliased = anchors_[node]; + if((node in serializedNodes_) !is null) + { + emitter.emit(aliasEvent(Mark(), Mark(), aliased)); + return; + } + serializedNodes_[node] = true; + } + final switch (node.nodeID) + { + case NodeID.mapping: + const defaultTag = resolver_.defaultMappingTag; + const implicit = node.tag_ == defaultTag; + emitter.emit(mappingStartEvent(Mark(), Mark(), aliased, node.tag_, + implicit, node.collectionStyle)); + foreach(ref Node key, ref Node value; node) + { + serializeNode(emitter, key); + serializeNode(emitter, value); + } + emitter.emit(mappingEndEvent(Mark(), Mark())); + return; + case NodeID.sequence: + const defaultTag = resolver_.defaultSequenceTag; + const implicit = node.tag_ == defaultTag; + emitter.emit(sequenceStartEvent(Mark(), Mark(), aliased, node.tag_, + implicit, node.collectionStyle)); + foreach(ref Node item; node) + { + serializeNode(emitter, item); + } + emitter.emit(sequenceEndEvent(Mark(), Mark())); + return; + case NodeID.scalar: + assert(node.type == NodeType.string, "Scalar node type must be string before serialized"); + auto value = node.as!string; + const detectedTag = resolver_.resolve(NodeID.scalar, null, value, true); + const bool isDetected = node.tag_ == detectedTag; + + emitter.emit(scalarEvent(Mark(), Mark(), aliased, node.tag_, + isDetected, value.idup, node.scalarStyle)); + return; + case NodeID.invalid: + assert(0); + } + } +} + +// Issue #244 +@safe unittest +{ + import dyaml.dumper : dumper; + auto node = Node([ + Node.Pair( + Node(""), + Node([ + Node([ + Node.Pair( + Node("d"), + Node([ + Node([ + Node.Pair( + Node("c"), + Node("") + ), + Node.Pair( + Node("b"), + Node("") + ), + Node.Pair( + Node(""), + Node("") + ) + ]) + ]) + ), + ]), + Node([ + Node.Pair( + Node("d"), + Node([ + Node(""), + Node(""), + Node([ + Node.Pair( + Node("c"), + Node("") + ), + Node.Pair( + Node("b"), + Node("") + ), + Node.Pair( + Node(""), + Node("") + ) + ]) + ]) + ), + Node.Pair( + Node("z"), + Node("") + ), + Node.Pair( + Node(""), + Node("") + ) + ]), + Node("") + ]) + ), + Node.Pair( + Node("g"), + Node("") + ), + Node.Pair( + Node("h"), + Node("") + ), + ]); + + auto stream = appender!string(); + dumper().dump(stream, node); +} diff --git a/source/dyaml/stdsumtype.d b/source/dyaml/stdsumtype.d new file mode 100644 index 0000000..3dac4dd --- /dev/null +++ b/source/dyaml/stdsumtype.d @@ -0,0 +1,2627 @@ +/++ + This module was copied from Phobos at commit 87c6e7e35 (2022-07-06). + This is necessary to include https://github.com/dlang/phobos/pull/8501 + which is a fix needed for DIP1000 compatibility. A couple minor changes + where also required to deal with `package(std)` imports. + +[SumType] is a generic discriminated union implementation that uses +design-by-introspection to generate safe and efficient code. Its features +include: + +* [Pattern matching.][match] +* Support for self-referential types. +* Full attribute correctness (`pure`, `@safe`, `@nogc`, and `nothrow` are + inferred whenever possible). +* A type-safe and memory-safe API compatible with DIP 1000 (`scope`). +* No dependency on runtime type information (`TypeInfo`). +* Compatibility with BetterC. + +License: Boost License 1.0 +Authors: Paul Backus +Source: $(PHOBOSSRC std/sumtype.d) ++/ +module dyaml.stdsumtype; + +/// $(DIVID basic-usage,$(H3 Basic usage)) +version (D_BetterC) {} else +@safe unittest +{ + import std.math.operations : isClose; + + struct Fahrenheit { double degrees; } + struct Celsius { double degrees; } + struct Kelvin { double degrees; } + + alias Temperature = SumType!(Fahrenheit, Celsius, Kelvin); + + // Construct from any of the member types. + Temperature t1 = Fahrenheit(98.6); + Temperature t2 = Celsius(100); + Temperature t3 = Kelvin(273); + + // Use pattern matching to access the value. + Fahrenheit toFahrenheit(Temperature t) + { + return Fahrenheit( + t.match!( + (Fahrenheit f) => f.degrees, + (Celsius c) => c.degrees * 9.0/5 + 32, + (Kelvin k) => k.degrees * 9.0/5 - 459.4 + ) + ); + } + + assert(toFahrenheit(t1).degrees.isClose(98.6)); + assert(toFahrenheit(t2).degrees.isClose(212)); + assert(toFahrenheit(t3).degrees.isClose(32)); + + // Use ref to modify the value in place. + void freeze(ref Temperature t) + { + t.match!( + (ref Fahrenheit f) => f.degrees = 32, + (ref Celsius c) => c.degrees = 0, + (ref Kelvin k) => k.degrees = 273 + ); + } + + freeze(t1); + assert(toFahrenheit(t1).degrees.isClose(32)); + + // Use a catch-all handler to give a default result. + bool isFahrenheit(Temperature t) + { + return t.match!( + (Fahrenheit f) => true, + _ => false + ); + } + + assert(isFahrenheit(t1)); + assert(!isFahrenheit(t2)); + assert(!isFahrenheit(t3)); +} + +/** $(DIVID introspection-based-matching, $(H3 Introspection-based matching)) + * + * In the `length` and `horiz` functions below, the handlers for `match` do not + * specify the types of their arguments. Instead, matching is done based on how + * the argument is used in the body of the handler: any type with `x` and `y` + * properties will be matched by the `rect` handlers, and any type with `r` and + * `theta` properties will be matched by the `polar` handlers. + */ +version (D_BetterC) {} else +@safe unittest +{ + import std.math.operations : isClose; + import std.math.trigonometry : cos; + import std.math.constants : PI; + import std.math.algebraic : sqrt; + + struct Rectangular { double x, y; } + struct Polar { double r, theta; } + alias Vector = SumType!(Rectangular, Polar); + + double length(Vector v) + { + return v.match!( + rect => sqrt(rect.x^^2 + rect.y^^2), + polar => polar.r + ); + } + + double horiz(Vector v) + { + return v.match!( + rect => rect.x, + polar => polar.r * cos(polar.theta) + ); + } + + Vector u = Rectangular(1, 1); + Vector v = Polar(1, PI/4); + + assert(length(u).isClose(sqrt(2.0))); + assert(length(v).isClose(1)); + assert(horiz(u).isClose(1)); + assert(horiz(v).isClose(sqrt(0.5))); +} + +/** $(DIVID arithmetic-expression-evaluator, $(H3 Arithmetic expression evaluator)) + * + * This example makes use of the special placeholder type `This` to define a + * [recursive data type](https://en.wikipedia.org/wiki/Recursive_data_type): an + * [abstract syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) for + * representing simple arithmetic expressions. + */ +version (D_BetterC) {} else +@system unittest +{ + import std.functional : partial; + import std.traits : EnumMembers; + import std.typecons : Tuple; + + enum Op : string + { + Plus = "+", + Minus = "-", + Times = "*", + Div = "/" + } + + // An expression is either + // - a number, + // - a variable, or + // - a binary operation combining two sub-expressions. + alias Expr = SumType!( + double, + string, + Tuple!(Op, "op", This*, "lhs", This*, "rhs") + ); + + // Shorthand for Tuple!(Op, "op", Expr*, "lhs", Expr*, "rhs"), + // the Tuple type above with Expr substituted for This. + alias BinOp = Expr.Types[2]; + + // Factory function for number expressions + Expr* num(double value) + { + return new Expr(value); + } + + // Factory function for variable expressions + Expr* var(string name) + { + return new Expr(name); + } + + // Factory function for binary operation expressions + Expr* binOp(Op op, Expr* lhs, Expr* rhs) + { + return new Expr(BinOp(op, lhs, rhs)); + } + + // Convenience wrappers for creating BinOp expressions + alias sum = partial!(binOp, Op.Plus); + alias diff = partial!(binOp, Op.Minus); + alias prod = partial!(binOp, Op.Times); + alias quot = partial!(binOp, Op.Div); + + // Evaluate expr, looking up variables in env + double eval(Expr expr, double[string] env) + { + return expr.match!( + (double num) => num, + (string var) => env[var], + (BinOp bop) + { + double lhs = eval(*bop.lhs, env); + double rhs = eval(*bop.rhs, env); + final switch (bop.op) + { + static foreach (op; EnumMembers!Op) + { + case op: + return mixin("lhs" ~ op ~ "rhs"); + } + } + } + ); + } + + // Return a "pretty-printed" representation of expr + string pprint(Expr expr) + { + import std.format : format; + + return expr.match!( + (double num) => "%g".format(num), + (string var) => var, + (BinOp bop) => "(%s %s %s)".format( + pprint(*bop.lhs), + cast(string) bop.op, + pprint(*bop.rhs) + ) + ); + } + + Expr* myExpr = sum(var("a"), prod(num(2), var("b"))); + double[string] myEnv = ["a":3, "b":4, "c":7]; + + assert(eval(*myExpr, myEnv) == 11); + assert(pprint(*myExpr) == "(a + (2 * b))"); +} + +import std.format.spec : FormatSpec, singleSpec; +import std.meta : AliasSeq, Filter, IndexOf = staticIndexOf, Map = staticMap; +import std.meta : NoDuplicates; +import std.meta : anySatisfy, allSatisfy; +import std.traits : hasElaborateCopyConstructor, hasElaborateDestructor; +import std.traits : isAssignable, isCopyable, isStaticArray, isRvalueAssignable; +import std.traits : ConstOf, ImmutableOf, InoutOf, TemplateArgsOf; + +// FIXME: std.sumtype : `std.traits : DeducedParameterType` and `std.conv : toCtString` +// are `package(std)` but trivial, hence copied below +import std.traits : CommonType, /*DeducatedParameterType*/ Unqual; +private template DeducedParameterType(T) +{ + static if (is(T == U*, U) || is(T == U[], U)) + alias DeducedParameterType = Unqual!T; + else + alias DeducedParameterType = T; +} + +import std.typecons : ReplaceTypeUnless; +import std.typecons : Flag; +//import std.conv : toCtString; +private enum toCtString(ulong n) = n.stringof[0 .. $ - "LU".length]; + +/// Placeholder used to refer to the enclosing [SumType]. +struct This {} + +// True if a variable of type T can appear on the lhs of an assignment +private enum isAssignableTo(T) = + isAssignable!T || (!isCopyable!T && isRvalueAssignable!T); + +// toHash is required by the language spec to be nothrow and @safe +private enum isHashable(T) = __traits(compiles, + () nothrow @safe { hashOf(T.init); } +); + +private enum hasPostblit(T) = __traits(hasPostblit, T); + +private enum isInout(T) = is(T == inout); + +/** + * A [tagged union](https://en.wikipedia.org/wiki/Tagged_union) that can hold a + * single value from any of a specified set of types. + * + * The value in a `SumType` can be operated on using [pattern matching][match]. + * + * To avoid ambiguity, duplicate types are not allowed (but see the + * ["basic usage" example](#basic-usage) for a workaround). + * + * The special type `This` can be used as a placeholder to create + * self-referential types, just like with `Algebraic`. See the + * ["Arithmetic expression evaluator" example](#arithmetic-expression-evaluator) for + * usage. + * + * A `SumType` is initialized by default to hold the `.init` value of its + * first member type, just like a regular union. The version identifier + * `SumTypeNoDefaultCtor` can be used to disable this behavior. + * + * See_Also: $(REF Algebraic, std,variant) + */ +struct SumType(Types...) +if (is(NoDuplicates!Types == Types) && Types.length > 0) +{ + /// The types a `SumType` can hold. + alias Types = AliasSeq!( + ReplaceTypeUnless!(isSumTypeInstance, This, typeof(this), TemplateArgsOf!SumType) + ); + +private: + + enum bool canHoldTag(T) = Types.length <= T.max; + alias unsignedInts = AliasSeq!(ubyte, ushort, uint, ulong); + + alias Tag = Filter!(canHoldTag, unsignedInts)[0]; + + union Storage + { + // Workaround for https://issues.dlang.org/show_bug.cgi?id=20068 + template memberName(T) + if (IndexOf!(T, Types) >= 0) + { + enum tid = IndexOf!(T, Types); + mixin("enum memberName = `values_", toCtString!tid, "`;"); + } + + static foreach (T; Types) + { + mixin("T ", memberName!T, ";"); + } + } + + Storage storage; + Tag tag; + + /* Accesses the value stored in a SumType. + * + * This method is memory-safe, provided that: + * + * 1. A SumType's tag is always accurate. + * 2. A SumType cannot be assigned to in @safe code if that assignment + * could cause unsafe aliasing. + * + * All code that accesses a SumType's tag or storage directly, including + * @safe code in this module, must be manually checked to ensure that it + * does not violate either of the above requirements. + */ + @trusted + ref inout(T) get(T)() inout + if (IndexOf!(T, Types) >= 0) + { + enum tid = IndexOf!(T, Types); + assert(tag == tid, + "This `" ~ SumType.stringof ~ + "` does not contain a(n) `" ~ T.stringof ~ "`" + ); + return __traits(getMember, storage, Storage.memberName!T); + } + +public: + + // Workaround for https://issues.dlang.org/show_bug.cgi?id=21399 + version (StdDdoc) + { + // Dummy type to stand in for loop variable + private struct T; + + /// Constructs a `SumType` holding a specific value. + this(T value); + + /// ditto + this(const(T) value) const; + + /// ditto + this(immutable(T) value) immutable; + + /// ditto + this(Value)(Value value) inout + if (is(Value == DeducedParameterType!(inout(T)))); + } + + static foreach (tid, T; Types) + { + /// Constructs a `SumType` holding a specific value. + this(T value) + { + import core.lifetime : forward; + + static if (isCopyable!T) + { + // Workaround for https://issues.dlang.org/show_bug.cgi?id=21542 + __traits(getMember, storage, Storage.memberName!T) = __ctfe ? value : forward!value; + } + else + { + __traits(getMember, storage, Storage.memberName!T) = forward!value; + } + + tag = tid; + } + + static if (isCopyable!(const(T))) + { + static if (IndexOf!(const(T), Map!(ConstOf, Types)) == tid) + { + /// ditto + this(const(T) value) const + { + __traits(getMember, storage, Storage.memberName!T) = value; + tag = tid; + } + } + } + else + { + @disable this(const(T) value) const; + } + + static if (isCopyable!(immutable(T))) + { + static if (IndexOf!(immutable(T), Map!(ImmutableOf, Types)) == tid) + { + /// ditto + this(immutable(T) value) immutable + { + __traits(getMember, storage, Storage.memberName!T) = value; + tag = tid; + } + } + } + else + { + @disable this(immutable(T) value) immutable; + } + + static if (isCopyable!(inout(T))) + { + static if (IndexOf!(inout(T), Map!(InoutOf, Types)) == tid) + { + /// ditto + this(Value)(Value value) inout + if (is(Value == DeducedParameterType!(inout(T)))) + { + __traits(getMember, storage, Storage.memberName!T) = value; + tag = tid; + } + } + } + else + { + @disable this(Value)(Value value) inout + if (is(Value == DeducedParameterType!(inout(T)))); + } + } + + static if (anySatisfy!(hasElaborateCopyConstructor, Types)) + { + static if + ( + allSatisfy!(isCopyable, Map!(InoutOf, Types)) + && !anySatisfy!(hasPostblit, Map!(InoutOf, Types)) + && allSatisfy!(isInout, Map!(InoutOf, Types)) + ) + { + /// Constructs a `SumType` that's a copy of another `SumType`. + this(ref inout(SumType) other) inout + { + storage = other.match!((ref value) { + alias OtherTypes = Map!(InoutOf, Types); + enum tid = IndexOf!(typeof(value), OtherTypes); + alias T = Types[tid]; + + mixin("inout(Storage) newStorage = { ", + Storage.memberName!T, ": value", + " };"); + + return newStorage; + }); + + tag = other.tag; + } + } + else + { + static if (allSatisfy!(isCopyable, Types)) + { + /// ditto + this(ref SumType other) + { + storage = other.match!((ref value) { + alias T = typeof(value); + + mixin("Storage newStorage = { ", + Storage.memberName!T, ": value", + " };"); + + return newStorage; + }); + + tag = other.tag; + } + } + else + { + @disable this(ref SumType other); + } + + static if (allSatisfy!(isCopyable, Map!(ConstOf, Types))) + { + /// ditto + this(ref const(SumType) other) const + { + storage = other.match!((ref value) { + alias OtherTypes = Map!(ConstOf, Types); + enum tid = IndexOf!(typeof(value), OtherTypes); + alias T = Types[tid]; + + mixin("const(Storage) newStorage = { ", + Storage.memberName!T, ": value", + " };"); + + return newStorage; + }); + + tag = other.tag; + } + } + else + { + @disable this(ref const(SumType) other) const; + } + + static if (allSatisfy!(isCopyable, Map!(ImmutableOf, Types))) + { + /// ditto + this(ref immutable(SumType) other) immutable + { + storage = other.match!((ref value) { + alias OtherTypes = Map!(ImmutableOf, Types); + enum tid = IndexOf!(typeof(value), OtherTypes); + alias T = Types[tid]; + + mixin("immutable(Storage) newStorage = { ", + Storage.memberName!T, ": value", + " };"); + + return newStorage; + }); + + tag = other.tag; + } + } + else + { + @disable this(ref immutable(SumType) other) immutable; + } + } + } + + version (SumTypeNoDefaultCtor) + { + @disable this(); + } + + // Workaround for https://issues.dlang.org/show_bug.cgi?id=21399 + version (StdDdoc) + { + // Dummy type to stand in for loop variable + private struct T; + + /** + * Assigns a value to a `SumType`. + * + * If any of the `SumType`'s members other than the one being assigned + * to contain pointers or references, it is possible for the assignment + * to cause memory corruption (see the + * ["Memory corruption" example](#memory-corruption) below for an + * illustration of how). Therefore, such assignments are considered + * `@system`. + * + * An individual assignment can be `@trusted` if the caller can + * guarantee that there are no outstanding references to any `SumType` + * members that contain pointers or references at the time the + * assignment occurs. + * + * Examples: + * + * $(DIVID memory-corruption, $(H3 Memory corruption)) + * + * This example shows how assignment to a `SumType` can be used to + * cause memory corruption in `@system` code. In `@safe` code, the + * assignment `s = 123` would not be allowed. + * + * --- + * SumType!(int*, int) s = new int; + * s.tryMatch!( + * (ref int* p) { + * s = 123; // overwrites `p` + * return *p; // undefined behavior + * } + * ); + * --- + */ + ref SumType opAssign(T rhs); + } + + static foreach (tid, T; Types) + { + static if (isAssignableTo!T) + { + /** + * Assigns a value to a `SumType`. + * + * If any of the `SumType`'s members other than the one being assigned + * to contain pointers or references, it is possible for the assignment + * to cause memory corruption (see the + * ["Memory corruption" example](#memory-corruption) below for an + * illustration of how). Therefore, such assignments are considered + * `@system`. + * + * An individual assignment can be `@trusted` if the caller can + * guarantee that there are no outstanding references to any `SumType` + * members that contain pointers or references at the time the + * assignment occurs. + * + * Examples: + * + * $(DIVID memory-corruption, $(H3 Memory corruption)) + * + * This example shows how assignment to a `SumType` can be used to + * cause memory corruption in `@system` code. In `@safe` code, the + * assignment `s = 123` would not be allowed. + * + * --- + * SumType!(int*, int) s = new int; + * s.tryMatch!( + * (ref int* p) { + * s = 123; // overwrites `p` + * return *p; // undefined behavior + * } + * ); + * --- + */ + ref SumType opAssign(T rhs) + { + import core.lifetime : forward; + import std.traits : hasIndirections, hasNested; + import std.meta : AliasSeq, Or = templateOr; + + alias OtherTypes = + AliasSeq!(Types[0 .. tid], Types[tid + 1 .. $]); + enum unsafeToOverwrite = + anySatisfy!(Or!(hasIndirections, hasNested), OtherTypes); + + static if (unsafeToOverwrite) + { + cast(void) () @system {}(); + } + + this.match!destroyIfOwner; + + static if (isCopyable!T) + { + // Workaround for https://issues.dlang.org/show_bug.cgi?id=21542 + mixin("Storage newStorage = { ", + Storage.memberName!T, ": __ctfe ? rhs : forward!rhs", + " };"); + } + else + { + mixin("Storage newStorage = { ", + Storage.memberName!T, ": forward!rhs", + " };"); + } + + storage = newStorage; + tag = tid; + + return this; + } + } + } + + static if (allSatisfy!(isAssignableTo, Types)) + { + static if (allSatisfy!(isCopyable, Types)) + { + /** + * Copies the value from another `SumType` into this one. + * + * See the value-assignment overload for details on `@safe`ty. + * + * Copy assignment is `@disable`d if any of `Types` is non-copyable. + */ + ref SumType opAssign(ref SumType rhs) + { + rhs.match!((ref value) { this = value; }); + return this; + } + } + else + { + @disable ref SumType opAssign(ref SumType rhs); + } + + /** + * Moves the value from another `SumType` into this one. + * + * See the value-assignment overload for details on `@safe`ty. + */ + ref SumType opAssign(SumType rhs) + { + import core.lifetime : move; + + rhs.match!((ref value) { + static if (isCopyable!(typeof(value))) + { + // Workaround for https://issues.dlang.org/show_bug.cgi?id=21542 + this = __ctfe ? value : move(value); + } + else + { + this = move(value); + } + }); + return this; + } + } + + /** + * Compares two `SumType`s for equality. + * + * Two `SumType`s are equal if they are the same kind of `SumType`, they + * contain values of the same type, and those values are equal. + */ + bool opEquals(this This, Rhs)(auto ref Rhs rhs) + if (!is(CommonType!(This, Rhs) == void)) + { + static if (is(This == Rhs)) + { + return AliasSeq!(this, rhs).match!((ref value, ref rhsValue) { + static if (is(typeof(value) == typeof(rhsValue))) + { + return value == rhsValue; + } + else + { + return false; + } + }); + } + else + { + alias CommonSumType = CommonType!(This, Rhs); + return cast(CommonSumType) this == cast(CommonSumType) rhs; + } + } + + // Workaround for https://issues.dlang.org/show_bug.cgi?id=19407 + static if (__traits(compiles, anySatisfy!(hasElaborateDestructor, Types))) + { + // If possible, include the destructor only when it's needed + private enum includeDtor = anySatisfy!(hasElaborateDestructor, Types); + } + else + { + // If we can't tell, always include it, even when it does nothing + private enum includeDtor = true; + } + + static if (includeDtor) + { + /// Calls the destructor of the `SumType`'s current value. + ~this() + { + this.match!destroyIfOwner; + } + } + + invariant + { + this.match!((ref value) { + static if (is(typeof(value) == class)) + { + if (value !is null) + { + assert(value); + } + } + else static if (is(typeof(value) == struct)) + { + assert(&value); + } + }); + } + + // Workaround for https://issues.dlang.org/show_bug.cgi?id=21400 + version (StdDdoc) + { + /** + * Returns a string representation of the `SumType`'s current value. + * + * Not available when compiled with `-betterC`. + */ + string toString(this This)(); + + /** + * Handles formatted writing of the `SumType`'s current value. + * + * Not available when compiled with `-betterC`. + * + * Params: + * sink = Output range to write to. + * fmt = Format specifier to use. + * + * See_Also: $(REF formatValue, std,format) + */ + void toString(this This, Sink, Char)(ref Sink sink, const ref FormatSpec!Char fmt); + } + + version (D_BetterC) {} else + /** + * Returns a string representation of the `SumType`'s current value. + * + * Not available when compiled with `-betterC`. + */ + string toString(this This)() + { + import std.conv : to; + + return this.match!(to!string); + } + + version (D_BetterC) {} else + /** + * Handles formatted writing of the `SumType`'s current value. + * + * Not available when compiled with `-betterC`. + * + * Params: + * sink = Output range to write to. + * fmt = Format specifier to use. + * + * See_Also: $(REF formatValue, std,format) + */ + void toString(this This, Sink, Char)(ref Sink sink, const ref FormatSpec!Char fmt) + { + import std.format.write : formatValue; + + this.match!((ref value) { + formatValue(sink, value, fmt); + }); + } + + static if (allSatisfy!(isHashable, Map!(ConstOf, Types))) + { + // Workaround for https://issues.dlang.org/show_bug.cgi?id=21400 + version (StdDdoc) + { + /** + * Returns the hash of the `SumType`'s current value. + * + * Not available when compiled with `-betterC`. + */ + size_t toHash() const; + } + + // Workaround for https://issues.dlang.org/show_bug.cgi?id=20095 + version (D_BetterC) {} else + /** + * Returns the hash of the `SumType`'s current value. + * + * Not available when compiled with `-betterC`. + */ + size_t toHash() const + { + return this.match!hashOf; + } + } +} + +// Construction +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + MySum y = MySum(3.14); +} + +// Assignment +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + x = 3.14; +} + +// Self assignment +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + MySum y = MySum(3.14); + y = x; +} + +// Equality +@safe unittest +{ + alias MySum = SumType!(int, float); + + assert(MySum(123) == MySum(123)); + assert(MySum(123) != MySum(456)); + assert(MySum(123) != MySum(123.0)); + assert(MySum(123) != MySum(456.0)); + +} + +// Equality of differently-qualified SumTypes +// Disabled in BetterC due to use of dynamic arrays +version (D_BetterC) {} else +@safe unittest +{ + alias SumA = SumType!(int, float); + alias SumB = SumType!(const(int[]), int[]); + alias SumC = SumType!(int[], const(int[])); + + int[] ma = [1, 2, 3]; + const(int[]) ca = [1, 2, 3]; + + assert(const(SumA)(123) == SumA(123)); + assert(const(SumB)(ma[]) == SumB(ca[])); + assert(const(SumC)(ma[]) == SumC(ca[])); +} + +// Imported types +@safe unittest +{ + import std.typecons : Tuple; + + alias MySum = SumType!(Tuple!(int, int)); +} + +// const and immutable types +@safe unittest +{ + alias MySum = SumType!(const(int[]), immutable(float[])); +} + +// Recursive types +@safe unittest +{ + alias MySum = SumType!(This*); + assert(is(MySum.Types[0] == MySum*)); +} + +// Allowed types +@safe unittest +{ + import std.meta : AliasSeq; + + alias MySum = SumType!(int, float, This*); + + assert(is(MySum.Types == AliasSeq!(int, float, MySum*))); +} + +// Types with destructors and postblits +@system unittest +{ + int copies; + + static struct Test + { + bool initialized = false; + int* copiesPtr; + + this(this) { (*copiesPtr)++; } + ~this() { if (initialized) (*copiesPtr)--; } + } + + alias MySum = SumType!(int, Test); + + Test t = Test(true, &copies); + + { + MySum x = t; + assert(copies == 1); + } + assert(copies == 0); + + { + MySum x = 456; + assert(copies == 0); + } + assert(copies == 0); + + { + MySum x = t; + assert(copies == 1); + x = 456; + assert(copies == 0); + } + + { + MySum x = 456; + assert(copies == 0); + x = t; + assert(copies == 1); + } + + { + MySum x = t; + MySum y = x; + assert(copies == 2); + } + + { + MySum x = t; + MySum y; + y = x; + assert(copies == 2); + } +} + +// Doesn't destroy reference types +// Disabled in BetterC due to use of classes +version (D_BetterC) {} else +@system unittest +{ + bool destroyed; + + class C + { + ~this() + { + destroyed = true; + } + } + + struct S + { + ~this() {} + } + + alias MySum = SumType!(S, C); + + C c = new C(); + { + MySum x = c; + destroyed = false; + } + assert(!destroyed); + + { + MySum x = c; + destroyed = false; + x = S(); + assert(!destroyed); + } +} + +// Types with @disable this() +@safe unittest +{ + static struct NoInit + { + @disable this(); + } + + alias MySum = SumType!(NoInit, int); + + assert(!__traits(compiles, MySum())); + auto _ = MySum(42); +} + +// const SumTypes +version (D_BetterC) {} else // not @nogc, https://issues.dlang.org/show_bug.cgi?id=22117 +@safe unittest +{ + auto _ = const(SumType!(int[]))([1, 2, 3]); +} + +// Equality of const SumTypes +@safe unittest +{ + alias MySum = SumType!int; + + auto _ = const(MySum)(123) == const(MySum)(456); +} + +// Compares reference types using value equality +@safe unittest +{ + import std.array : staticArray; + + static struct Field {} + static struct Struct { Field[] fields; } + alias MySum = SumType!Struct; + + static arr1 = staticArray([Field()]); + static arr2 = staticArray([Field()]); + + auto a = MySum(Struct(arr1[])); + auto b = MySum(Struct(arr2[])); + + assert(a == b); +} + +// toString +// Disabled in BetterC due to use of std.conv.text +version (D_BetterC) {} else +@safe unittest +{ + import std.conv : text; + + static struct Int { int i; } + static struct Double { double d; } + alias Sum = SumType!(Int, Double); + + assert(Sum(Int(42)).text == Int(42).text, Sum(Int(42)).text); + assert(Sum(Double(33.3)).text == Double(33.3).text, Sum(Double(33.3)).text); + assert((const(Sum)(Int(42))).text == (const(Int)(42)).text, (const(Sum)(Int(42))).text); +} + +// string formatting +// Disabled in BetterC due to use of std.format.format +version (D_BetterC) {} else +@safe unittest +{ + import std.format : format; + + SumType!int x = 123; + + assert(format!"%s"(x) == format!"%s"(123)); + assert(format!"%x"(x) == format!"%x"(123)); +} + +// string formatting of qualified SumTypes +// Disabled in BetterC due to use of std.format.format and dynamic arrays +version (D_BetterC) {} else +@safe unittest +{ + import std.format : format; + + int[] a = [1, 2, 3]; + const(SumType!(int[])) x = a; + + assert(format!"%(%d, %)"(x) == format!"%(%s, %)"(a)); +} + +// Github issue #16 +// Disabled in BetterC due to use of dynamic arrays +version (D_BetterC) {} else +@safe unittest +{ + alias Node = SumType!(This[], string); + + // override inference of @system attribute for cyclic functions + assert((() @trusted => + Node([Node([Node("x")])]) + == + Node([Node([Node("x")])]) + )()); +} + +// Github issue #16 with const +// Disabled in BetterC due to use of dynamic arrays +version (D_BetterC) {} else +@safe unittest +{ + alias Node = SumType!(const(This)[], string); + + // override inference of @system attribute for cyclic functions + assert((() @trusted => + Node([Node([Node("x")])]) + == + Node([Node([Node("x")])]) + )()); +} + +// Stale pointers +// Disabled in BetterC due to use of dynamic arrays +version (D_BetterC) {} else +@system unittest +{ + alias MySum = SumType!(ubyte, void*[2]); + + MySum x = [null, cast(void*) 0x12345678]; + void** p = &x.get!(void*[2])[1]; + x = ubyte(123); + + assert(*p != cast(void*) 0x12345678); +} + +// Exception-safe assignment +// Disabled in BetterC due to use of exceptions +version (D_BetterC) {} else +@safe unittest +{ + static struct A + { + int value = 123; + } + + static struct B + { + int value = 456; + this(this) { throw new Exception("oops"); } + } + + alias MySum = SumType!(A, B); + + MySum x; + try + { + x = B(); + } + catch (Exception e) {} + + assert( + (x.tag == 0 && x.get!A.value == 123) || + (x.tag == 1 && x.get!B.value == 456) + ); +} + +// Types with @disable this(this) +@safe unittest +{ + import core.lifetime : move; + + static struct NoCopy + { + @disable this(this); + } + + alias MySum = SumType!NoCopy; + + NoCopy lval = NoCopy(); + + MySum x = NoCopy(); + MySum y = NoCopy(); + + + assert(!__traits(compiles, SumType!NoCopy(lval))); + + y = NoCopy(); + y = move(x); + assert(!__traits(compiles, y = lval)); + assert(!__traits(compiles, y = x)); + + bool b = x == y; +} + +// Github issue #22 +// Disabled in BetterC due to use of std.typecons.Nullable +version (D_BetterC) {} else +@safe unittest +{ + import std.typecons; + + static struct A + { + SumType!(Nullable!int) a = Nullable!int.init; + } +} + +// Static arrays of structs with postblits +// Disabled in BetterC due to use of dynamic arrays +version (D_BetterC) {} else +@safe unittest +{ + static struct S + { + int n; + this(this) { n++; } + } + + SumType!(S[1]) x = [S(0)]; + SumType!(S[1]) y = x; + + auto xval = x.get!(S[1])[0].n; + auto yval = y.get!(S[1])[0].n; + + assert(xval != yval); +} + +// Replacement does not happen inside SumType +// Disabled in BetterC due to use of associative arrays +version (D_BetterC) {} else +@safe unittest +{ + import std.typecons : Tuple, ReplaceTypeUnless; + alias A = Tuple!(This*,SumType!(This*))[SumType!(This*,string)[This]]; + alias TR = ReplaceTypeUnless!(isSumTypeInstance, This, int, A); + static assert(is(TR == Tuple!(int*,SumType!(This*))[SumType!(This*, string)[int]])); +} + +// Supports nested self-referential SumTypes +@safe unittest +{ + import std.typecons : Tuple, Flag; + alias Nat = SumType!(Flag!"0", Tuple!(This*)); + alias Inner = SumType!Nat; + alias Outer = SumType!(Nat*, Tuple!(This*, This*)); +} + +// Self-referential SumTypes inside Algebraic +// Disabled in BetterC due to use of std.variant.Algebraic +version (D_BetterC) {} else +@safe unittest +{ + import std.variant : Algebraic; + + alias T = Algebraic!(SumType!(This*)); + + assert(is(T.AllowedTypes[0].Types[0] == T.AllowedTypes[0]*)); +} + +// Doesn't call @system postblits in @safe code +@safe unittest +{ + static struct SystemCopy { @system this(this) {} } + SystemCopy original; + + assert(!__traits(compiles, () @safe + { + SumType!SystemCopy copy = original; + })); + + assert(!__traits(compiles, () @safe + { + SumType!SystemCopy copy; copy = original; + })); +} + +// Doesn't overwrite pointers in @safe code +@safe unittest +{ + alias MySum = SumType!(int*, int); + + MySum x; + + assert(!__traits(compiles, () @safe + { + x = 123; + })); + + assert(!__traits(compiles, () @safe + { + x = MySum(123); + })); +} + +// Types with invariants +// Disabled in BetterC due to use of exceptions +version (D_BetterC) {} else +version (D_Invariants) +@system unittest +{ + import std.exception : assertThrown; + import core.exception : AssertError; + + struct S + { + int i; + invariant { assert(i >= 0); } + } + + class C + { + int i; + invariant { assert(i >= 0); } + } + + SumType!S x; + x.match!((ref v) { v.i = -1; }); + assertThrown!AssertError(assert(&x)); + + SumType!C y = new C(); + y.match!((ref v) { v.i = -1; }); + assertThrown!AssertError(assert(&y)); +} + +// Calls value postblit on self-assignment +@safe unittest +{ + static struct S + { + int n; + this(this) { n++; } + } + + SumType!S x = S(); + SumType!S y; + y = x; + + auto xval = x.get!S.n; + auto yval = y.get!S.n; + + assert(xval != yval); +} + +// Github issue #29 +@safe unittest +{ + alias A = SumType!string; + + @safe A createA(string arg) + { + return A(arg); + } + + @safe void test() + { + A a = createA(""); + } +} + +// SumTypes as associative array keys +// Disabled in BetterC due to use of associative arrays +version (D_BetterC) {} else +@safe unittest +{ + int[SumType!(int, string)] aa; +} + +// toString with non-copyable types +// Disabled in BetterC due to use of std.conv.to (in toString) +version (D_BetterC) {} else +@safe unittest +{ + struct NoCopy + { + @disable this(this); + } + + SumType!NoCopy x; + + auto _ = x.toString(); +} + +// Can use the result of assignment +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum a = MySum(123); + MySum b = MySum(3.14); + + assert((a = b) == b); + assert((a = MySum(123)) == MySum(123)); + assert((a = 3.14) == MySum(3.14)); + assert(((a = b) = MySum(123)) == MySum(123)); +} + +// Types with copy constructors +@safe unittest +{ + static struct S + { + int n; + + this(ref return scope inout S other) inout + { + n = other.n + 1; + } + } + + SumType!S x = S(); + SumType!S y = x; + + auto xval = x.get!S.n; + auto yval = y.get!S.n; + + assert(xval != yval); +} + +// Copyable by generated copy constructors +@safe unittest +{ + static struct Inner + { + ref this(ref inout Inner other) {} + } + + static struct Outer + { + SumType!Inner inner; + } + + Outer x; + Outer y = x; +} + +// Types with qualified copy constructors +@safe unittest +{ + static struct ConstCopy + { + int n; + this(inout int n) inout { this.n = n; } + this(ref const typeof(this) other) const { this.n = other.n; } + } + + static struct ImmutableCopy + { + int n; + this(inout int n) inout { this.n = n; } + this(ref immutable typeof(this) other) immutable { this.n = other.n; } + } + + const SumType!ConstCopy x = const(ConstCopy)(1); + immutable SumType!ImmutableCopy y = immutable(ImmutableCopy)(1); +} + +// Types with disabled opEquals +@safe unittest +{ + static struct S + { + @disable bool opEquals(const S rhs) const; + } + + auto _ = SumType!S(S()); +} + +// Types with non-const opEquals +@safe unittest +{ + static struct S + { + int i; + bool opEquals(S rhs) { return i == rhs.i; } + } + + auto _ = SumType!S(S(123)); +} + +// Incomparability of different SumTypes +@safe unittest +{ + SumType!(int, string) x = 123; + SumType!(string, int) y = 123; + + assert(!__traits(compiles, x != y)); +} + +// Self-reference in return/parameter type of function pointer member +// Disabled in BetterC due to use of delegates +version (D_BetterC) {} else +@safe unittest +{ + alias T = SumType!(int, This delegate(This)); +} + +// Construction and assignment from implicitly-convertible lvalue +@safe unittest +{ + alias MySum = SumType!bool; + + const(bool) b = true; + + MySum x = b; + MySum y; y = b; +} + +// @safe assignment to the only pointer type in a SumType +@safe unittest +{ + SumType!(string, int) sm = 123; + sm = "this should be @safe"; +} + +// Immutable member type with copy constructor +// https://issues.dlang.org/show_bug.cgi?id=22572 +@safe unittest +{ + static struct CopyConstruct + { + this(ref inout CopyConstruct other) inout {} + } + + static immutable struct Value + { + CopyConstruct c; + } + + SumType!Value s; +} + +// Construction of inout-qualified SumTypes +// https://issues.dlang.org/show_bug.cgi?id=22901 +@safe unittest +{ + static inout(SumType!(int[])) example(inout(int[]) arr) + { + return inout(SumType!(int[]))(arr); + } +} + +// Assignment of struct with overloaded opAssign in CTFE +// https://issues.dlang.org/show_bug.cgi?id=23182 +@safe unittest +{ + static struct HasOpAssign + { + void opAssign(HasOpAssign rhs) {} + } + + static SumType!HasOpAssign test() + { + SumType!HasOpAssign s; + // Test both overloads + s = HasOpAssign(); + s = SumType!HasOpAssign(); + return s; + } + + // Force CTFE + enum result = test(); +} + +/// True if `T` is an instance of the `SumType` template, otherwise false. +private enum bool isSumTypeInstance(T) = is(T == SumType!Args, Args...); + +@safe unittest +{ + static struct Wrapper + { + SumType!int s; + alias s this; + } + + assert(isSumTypeInstance!(SumType!int)); + assert(!isSumTypeInstance!Wrapper); +} + +/// True if `T` is a [SumType] or implicitly converts to one, otherwise false. +enum bool isSumType(T) = is(T : SumType!Args, Args...); + +/// +@safe unittest +{ + static struct ConvertsToSumType + { + SumType!int payload; + alias payload this; + } + + static struct ContainsSumType + { + SumType!int payload; + } + + assert(isSumType!(SumType!int)); + assert(isSumType!ConvertsToSumType); + assert(!isSumType!ContainsSumType); +} + +/** + * Calls a type-appropriate function with the value held in a [SumType]. + * + * For each possible type the [SumType] can hold, the given handlers are + * checked, in order, to see whether they accept a single argument of that type. + * The first one that does is chosen as the match for that type. (Note that the + * first match may not always be the most exact match. + * See ["Avoiding unintentional matches"](#avoiding-unintentional-matches) for + * one common pitfall.) + * + * Every type must have a matching handler, and every handler must match at + * least one type. This is enforced at compile time. + * + * Handlers may be functions, delegates, or objects with `opCall` overloads. If + * a function with more than one overload is given as a handler, all of the + * overloads are considered as potential matches. + * + * Templated handlers are also accepted, and will match any type for which they + * can be [implicitly instantiated](https://dlang.org/glossary.html#ifti). See + * ["Introspection-based matching"](#introspection-based-matching) for an + * example of templated handler usage. + * + * If multiple [SumType]s are passed to match, their values are passed to the + * handlers as separate arguments, and matching is done for each possible + * combination of value types. See ["Multiple dispatch"](#multiple-dispatch) for + * an example. + * + * Returns: + * The value returned from the handler that matches the currently-held type. + * + * See_Also: $(REF visit, std,variant) + */ +template match(handlers...) +{ + import std.typecons : Yes; + + /** + * The actual `match` function. + * + * Params: + * args = One or more [SumType] objects. + */ + auto ref match(SumTypes...)(auto ref SumTypes args) + if (allSatisfy!(isSumType, SumTypes) && args.length > 0) + { + return matchImpl!(Yes.exhaustive, handlers)(args); + } +} + +/** $(DIVID avoiding-unintentional-matches, $(H3 Avoiding unintentional matches)) + * + * Sometimes, implicit conversions may cause a handler to match more types than + * intended. The example below shows two solutions to this problem. + */ +@safe unittest +{ + alias Number = SumType!(double, int); + + Number x; + + // Problem: because int implicitly converts to double, the double + // handler is used for both types, and the int handler never matches. + assert(!__traits(compiles, + x.match!( + (double d) => "got double", + (int n) => "got int" + ) + )); + + // Solution 1: put the handler for the "more specialized" type (in this + // case, int) before the handler for the type it converts to. + assert(__traits(compiles, + x.match!( + (int n) => "got int", + (double d) => "got double" + ) + )); + + // Solution 2: use a template that only accepts the exact type it's + // supposed to match, instead of any type that implicitly converts to it. + alias exactly(T, alias fun) = function (arg) + { + static assert(is(typeof(arg) == T)); + return fun(arg); + }; + + // Now, even if we put the double handler first, it will only be used for + // doubles, not ints. + assert(__traits(compiles, + x.match!( + exactly!(double, d => "got double"), + exactly!(int, n => "got int") + ) + )); +} + +/** $(DIVID multiple-dispatch, $(H3 Multiple dispatch)) + * + * Pattern matching can be performed on multiple `SumType`s at once by passing + * handlers with multiple arguments. This usually leads to more concise code + * than using nested calls to `match`, as show below. + */ +@safe unittest +{ + struct Point2D { double x, y; } + struct Point3D { double x, y, z; } + + alias Point = SumType!(Point2D, Point3D); + + version (none) + { + // This function works, but the code is ugly and repetitive. + // It uses three separate calls to match! + @safe pure nothrow @nogc + bool sameDimensions(Point p1, Point p2) + { + return p1.match!( + (Point2D _) => p2.match!( + (Point2D _) => true, + _ => false + ), + (Point3D _) => p2.match!( + (Point3D _) => true, + _ => false + ) + ); + } + } + + // This version is much nicer. + @safe pure nothrow @nogc + bool sameDimensions(Point p1, Point p2) + { + alias doMatch = match!( + (Point2D _1, Point2D _2) => true, + (Point3D _1, Point3D _2) => true, + (_1, _2) => false + ); + + return doMatch(p1, p2); + } + + Point a = Point2D(1, 2); + Point b = Point2D(3, 4); + Point c = Point3D(5, 6, 7); + Point d = Point3D(8, 9, 0); + + assert( sameDimensions(a, b)); + assert( sameDimensions(c, d)); + assert(!sameDimensions(a, c)); + assert(!sameDimensions(d, b)); +} + +/** + * Attempts to call a type-appropriate function with the value held in a + * [SumType], and throws on failure. + * + * Matches are chosen using the same rules as [match], but are not required to + * be exhaustive—in other words, a type (or combination of types) is allowed to + * have no matching handler. If a type without a handler is encountered at + * runtime, a [MatchException] is thrown. + * + * Not available when compiled with `-betterC`. + * + * Returns: + * The value returned from the handler that matches the currently-held type, + * if a handler was given for that type. + * + * Throws: + * [MatchException], if the currently-held type has no matching handler. + * + * See_Also: $(REF tryVisit, std,variant) + */ +version (D_Exceptions) +template tryMatch(handlers...) +{ + import std.typecons : No; + + /** + * The actual `tryMatch` function. + * + * Params: + * args = One or more [SumType] objects. + */ + auto ref tryMatch(SumTypes...)(auto ref SumTypes args) + if (allSatisfy!(isSumType, SumTypes) && args.length > 0) + { + return matchImpl!(No.exhaustive, handlers)(args); + } +} + +/** + * Thrown by [tryMatch] when an unhandled type is encountered. + * + * Not available when compiled with `-betterC`. + */ +version (D_Exceptions) +class MatchException : Exception +{ + /// + pure @safe @nogc nothrow + this(string msg, string file = __FILE__, size_t line = __LINE__) + { + super(msg, file, line); + } +} + +/** + * True if `handler` is a potential match for `Ts`, otherwise false. + * + * See the documentation for [match] for a full explanation of how matches are + * chosen. + */ +template canMatch(alias handler, Ts...) +if (Ts.length > 0) +{ + enum canMatch = is(typeof((ref Ts args) => handler(args))); +} + +/// +@safe unittest +{ + alias handleInt = (int i) => "got an int"; + + assert( canMatch!(handleInt, int)); + assert(!canMatch!(handleInt, string)); +} + +// Includes all overloads of the given handler +@safe unittest +{ + static struct OverloadSet + { + static void fun(int n) {} + static void fun(double d) {} + } + + assert(canMatch!(OverloadSet.fun, int)); + assert(canMatch!(OverloadSet.fun, double)); +} + +// Like aliasSeqOf!(iota(n)), but works in BetterC +private template Iota(size_t n) +{ + static if (n == 0) + { + alias Iota = AliasSeq!(); + } + else + { + alias Iota = AliasSeq!(Iota!(n - 1), n - 1); + } +} + +@safe unittest +{ + assert(is(Iota!0 == AliasSeq!())); + assert(Iota!1 == AliasSeq!(0)); + assert(Iota!3 == AliasSeq!(0, 1, 2)); +} + +/* The number that the dim-th argument's tag is multiplied by when + * converting TagTuples to and from case indices ("caseIds"). + * + * Named by analogy to the stride that the dim-th index into a + * multidimensional static array is multiplied by to calculate the + * offset of a specific element. + */ +private size_t stride(size_t dim, lengths...)() +{ + import core.checkedint : mulu; + + size_t result = 1; + bool overflow = false; + + static foreach (i; 0 .. dim) + { + result = mulu(result, lengths[i], overflow); + } + + /* The largest number matchImpl uses, numCases, is calculated with + * stride!(SumTypes.length), so as long as this overflow check + * passes, we don't need to check for overflow anywhere else. + */ + assert(!overflow, "Integer overflow"); + return result; +} + +private template matchImpl(Flag!"exhaustive" exhaustive, handlers...) +{ + auto ref matchImpl(SumTypes...)(auto ref SumTypes args) + if (allSatisfy!(isSumType, SumTypes) && args.length > 0) + { + alias stride(size_t i) = .stride!(i, Map!(typeCount, SumTypes)); + alias TagTuple = .TagTuple!(SumTypes); + + /* + * A list of arguments to be passed to a handler needed for the case + * labeled with `caseId`. + */ + template handlerArgs(size_t caseId) + { + enum tags = TagTuple.fromCaseId(caseId); + enum argsFrom(size_t i : tags.length) = ""; + enum argsFrom(size_t i) = "args[" ~ toCtString!i ~ "].get!(SumTypes[" ~ toCtString!i ~ "]" ~ + ".Types[" ~ toCtString!(tags[i]) ~ "])(), " ~ argsFrom!(i + 1); + enum handlerArgs = argsFrom!0; + } + + /* An AliasSeq of the types of the member values in the argument list + * returned by `handlerArgs!caseId`. + * + * Note that these are the actual (that is, qualified) types of the + * member values, which may not be the same as the types listed in + * the arguments' `.Types` properties. + */ + template valueTypes(size_t caseId) + { + enum tags = TagTuple.fromCaseId(caseId); + + template getType(size_t i) + { + enum tid = tags[i]; + alias T = SumTypes[i].Types[tid]; + alias getType = typeof(args[i].get!T()); + } + + alias valueTypes = Map!(getType, Iota!(tags.length)); + } + + /* The total number of cases is + * + * Π SumTypes[i].Types.length for 0 ≤ i < SumTypes.length + * + * Or, equivalently, + * + * ubyte[SumTypes[0].Types.length]...[SumTypes[$-1].Types.length].sizeof + * + * Conveniently, this is equal to stride!(SumTypes.length), so we can + * use that function to compute it. + */ + enum numCases = stride!(SumTypes.length); + + /* Guaranteed to never be a valid handler index, since + * handlers.length <= size_t.max. + */ + enum noMatch = size_t.max; + + // An array that maps caseIds to handler indices ("hids"). + enum matches = () + { + size_t[numCases] matches; + + // Workaround for https://issues.dlang.org/show_bug.cgi?id=19561 + foreach (ref match; matches) + { + match = noMatch; + } + + static foreach (caseId; 0 .. numCases) + { + static foreach (hid, handler; handlers) + { + static if (canMatch!(handler, valueTypes!caseId)) + { + if (matches[caseId] == noMatch) + { + matches[caseId] = hid; + } + } + } + } + + return matches; + }(); + + import std.algorithm.searching : canFind; + + // Check for unreachable handlers + static foreach (hid, handler; handlers) + { + static assert(matches[].canFind(hid), + "`handlers[" ~ toCtString!hid ~ "]` " ~ + "of type `" ~ ( __traits(isTemplate, handler) + ? "template" + : typeof(handler).stringof + ) ~ "` " ~ + "never matches" + ); + } + + // Workaround for https://issues.dlang.org/show_bug.cgi?id=19993 + enum handlerName(size_t hid) = "handler" ~ toCtString!hid; + + static foreach (size_t hid, handler; handlers) + { + mixin("alias ", handlerName!hid, " = handler;"); + } + + immutable argsId = TagTuple(args).toCaseId; + + final switch (argsId) + { + static foreach (caseId; 0 .. numCases) + { + case caseId: + static if (matches[caseId] != noMatch) + { + return mixin(handlerName!(matches[caseId]), "(", handlerArgs!caseId, ")"); + } + else + { + static if (exhaustive) + { + static assert(false, + "No matching handler for types `" ~ valueTypes!caseId.stringof ~ "`"); + } + else + { + throw new MatchException( + "No matching handler for types `" ~ valueTypes!caseId.stringof ~ "`"); + } + } + } + } + + assert(false, "unreachable"); + } +} + +private enum typeCount(SumType) = SumType.Types.length; + +/* A TagTuple represents a single possible set of tags that `args` + * could have at runtime. + * + * Because D does not allow a struct to be the controlling expression + * of a switch statement, we cannot dispatch on the TagTuple directly. + * Instead, we must map each TagTuple to a unique integer and generate + * a case label for each of those integers. + * + * This mapping is implemented in `fromCaseId` and `toCaseId`. It uses + * the same technique that's used to map index tuples to memory offsets + * in a multidimensional static array. + * + * For example, when `args` consists of two SumTypes with two member + * types each, the TagTuples corresponding to each case label are: + * + * case 0: TagTuple([0, 0]) + * case 1: TagTuple([1, 0]) + * case 2: TagTuple([0, 1]) + * case 3: TagTuple([1, 1]) + * + * When there is only one argument, the caseId is equal to that + * argument's tag. + */ +private struct TagTuple(SumTypes...) +{ + size_t[SumTypes.length] tags; + alias tags this; + + alias stride(size_t i) = .stride!(i, Map!(typeCount, SumTypes)); + + invariant + { + static foreach (i; 0 .. tags.length) + { + assert(tags[i] < SumTypes[i].Types.length, "Invalid tag"); + } + } + + this(ref const(SumTypes) args) + { + static foreach (i; 0 .. tags.length) + { + tags[i] = args[i].tag; + } + } + + static TagTuple fromCaseId(size_t caseId) + { + TagTuple result; + + // Most-significant to least-significant + static foreach_reverse (i; 0 .. result.length) + { + result[i] = caseId / stride!i; + caseId %= stride!i; + } + + return result; + } + + size_t toCaseId() + { + size_t result; + + static foreach (i; 0 .. tags.length) + { + result += tags[i] * stride!i; + } + + return result; + } +} + +// Matching +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + MySum y = MySum(3.14); + + assert(x.match!((int v) => true, (float v) => false)); + assert(y.match!((int v) => false, (float v) => true)); +} + +// Missing handlers +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + + assert(!__traits(compiles, x.match!((int x) => true))); + assert(!__traits(compiles, x.match!())); +} + +// Handlers with qualified parameters +// Disabled in BetterC due to use of dynamic arrays +version (D_BetterC) {} else +@safe unittest +{ + alias MySum = SumType!(int[], float[]); + + MySum x = MySum([1, 2, 3]); + MySum y = MySum([1.0, 2.0, 3.0]); + + assert(x.match!((const(int[]) v) => true, (const(float[]) v) => false)); + assert(y.match!((const(int[]) v) => false, (const(float[]) v) => true)); +} + +// Handlers for qualified types +// Disabled in BetterC due to use of dynamic arrays +version (D_BetterC) {} else +@safe unittest +{ + alias MySum = SumType!(immutable(int[]), immutable(float[])); + + MySum x = MySum([1, 2, 3]); + + assert(x.match!((immutable(int[]) v) => true, (immutable(float[]) v) => false)); + assert(x.match!((const(int[]) v) => true, (const(float[]) v) => false)); + // Tail-qualified parameters + assert(x.match!((immutable(int)[] v) => true, (immutable(float)[] v) => false)); + assert(x.match!((const(int)[] v) => true, (const(float)[] v) => false)); + // Generic parameters + assert(x.match!((immutable v) => true)); + assert(x.match!((const v) => true)); + // Unqualified parameters + assert(!__traits(compiles, + x.match!((int[] v) => true, (float[] v) => false) + )); +} + +// Delegate handlers +// Disabled in BetterC due to use of closures +version (D_BetterC) {} else +@safe unittest +{ + alias MySum = SumType!(int, float); + + int answer = 42; + MySum x = MySum(42); + MySum y = MySum(3.14); + + assert(x.match!((int v) => v == answer, (float v) => v == answer)); + assert(!y.match!((int v) => v == answer, (float v) => v == answer)); +} + +version (unittest) +{ + version (D_BetterC) + { + // std.math.isClose depends on core.runtime.math, so use a + // libc-based version for testing with -betterC + @safe pure @nogc nothrow + private bool isClose(double lhs, double rhs) + { + import core.stdc.math : fabs; + + return fabs(lhs - rhs) < 1e-5; + } + } + else + { + import std.math.operations : isClose; + } +} + +// Generic handler +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + MySum y = MySum(3.14); + + assert(x.match!(v => v*2) == 84); + assert(y.match!(v => v*2).isClose(6.28)); +} + +// Fallback to generic handler +// Disabled in BetterC due to use of std.conv.to +version (D_BetterC) {} else +@safe unittest +{ + import std.conv : to; + + alias MySum = SumType!(int, float, string); + + MySum x = MySum(42); + MySum y = MySum("42"); + + assert(x.match!((string v) => v.to!int, v => v*2) == 84); + assert(y.match!((string v) => v.to!int, v => v*2) == 42); +} + +// Multiple non-overlapping generic handlers +@safe unittest +{ + import std.array : staticArray; + + alias MySum = SumType!(int, float, int[], char[]); + + static ints = staticArray([1, 2, 3]); + static chars = staticArray(['a', 'b', 'c']); + + MySum x = MySum(42); + MySum y = MySum(3.14); + MySum z = MySum(ints[]); + MySum w = MySum(chars[]); + + assert(x.match!(v => v*2, v => v.length) == 84); + assert(y.match!(v => v*2, v => v.length).isClose(6.28)); + assert(w.match!(v => v*2, v => v.length) == 3); + assert(z.match!(v => v*2, v => v.length) == 3); +} + +// Structural matching +@safe unittest +{ + static struct S1 { int x; } + static struct S2 { int y; } + alias MySum = SumType!(S1, S2); + + MySum a = MySum(S1(0)); + MySum b = MySum(S2(0)); + + assert(a.match!(s1 => s1.x + 1, s2 => s2.y - 1) == 1); + assert(b.match!(s1 => s1.x + 1, s2 => s2.y - 1) == -1); +} + +// Separate opCall handlers +@safe unittest +{ + static struct IntHandler + { + bool opCall(int arg) + { + return true; + } + } + + static struct FloatHandler + { + bool opCall(float arg) + { + return false; + } + } + + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + MySum y = MySum(3.14); + + assert(x.match!(IntHandler.init, FloatHandler.init)); + assert(!y.match!(IntHandler.init, FloatHandler.init)); +} + +// Compound opCall handler +@safe unittest +{ + static struct CompoundHandler + { + bool opCall(int arg) + { + return true; + } + + bool opCall(float arg) + { + return false; + } + } + + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + MySum y = MySum(3.14); + + assert(x.match!(CompoundHandler.init)); + assert(!y.match!(CompoundHandler.init)); +} + +// Ordered matching +@safe unittest +{ + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + + assert(x.match!((int v) => true, v => false)); +} + +// Non-exhaustive matching +version (D_Exceptions) +@system unittest +{ + import std.exception : assertThrown, assertNotThrown; + + alias MySum = SumType!(int, float); + + MySum x = MySum(42); + MySum y = MySum(3.14); + + assertNotThrown!MatchException(x.tryMatch!((int n) => true)); + assertThrown!MatchException(y.tryMatch!((int n) => true)); +} + +// Non-exhaustive matching in @safe code +version (D_Exceptions) +@safe unittest +{ + SumType!(int, float) x; + + auto _ = x.tryMatch!( + (int n) => n + 1, + ); +} + +// Handlers with ref parameters +@safe unittest +{ + alias Value = SumType!(long, double); + + auto value = Value(3.14); + + value.match!( + (long) {}, + (ref double d) { d *= 2; } + ); + + assert(value.get!double.isClose(6.28)); +} + +// Unreachable handlers +@safe unittest +{ + alias MySum = SumType!(int, string); + + MySum s; + + assert(!__traits(compiles, + s.match!( + (int _) => 0, + (string _) => 1, + (double _) => 2 + ) + )); + + assert(!__traits(compiles, + s.match!( + _ => 0, + (int _) => 1 + ) + )); +} + +// Unsafe handlers +@system unittest +{ + SumType!int x; + alias unsafeHandler = (int x) @system { return; }; + + assert(!__traits(compiles, () @safe + { + x.match!unsafeHandler; + })); + + auto test() @system + { + return x.match!unsafeHandler; + } +} + +// Overloaded handlers +@safe unittest +{ + static struct OverloadSet + { + static string fun(int i) { return "int"; } + static string fun(double d) { return "double"; } + } + + alias MySum = SumType!(int, double); + + MySum a = 42; + MySum b = 3.14; + + assert(a.match!(OverloadSet.fun) == "int"); + assert(b.match!(OverloadSet.fun) == "double"); +} + +// Overload sets that include SumType arguments +@safe unittest +{ + alias Inner = SumType!(int, double); + alias Outer = SumType!(Inner, string); + + static struct OverloadSet + { + @safe: + static string fun(int i) { return "int"; } + static string fun(double d) { return "double"; } + static string fun(string s) { return "string"; } + static string fun(Inner i) { return i.match!fun; } + static string fun(Outer o) { return o.match!fun; } + } + + Outer a = Inner(42); + Outer b = Inner(3.14); + Outer c = "foo"; + + assert(OverloadSet.fun(a) == "int"); + assert(OverloadSet.fun(b) == "double"); + assert(OverloadSet.fun(c) == "string"); +} + +// Overload sets with ref arguments +@safe unittest +{ + static struct OverloadSet + { + static void fun(ref int i) { i = 42; } + static void fun(ref double d) { d = 3.14; } + } + + alias MySum = SumType!(int, double); + + MySum x = 0; + MySum y = 0.0; + + x.match!(OverloadSet.fun); + y.match!(OverloadSet.fun); + + assert(x.match!((value) => is(typeof(value) == int) && value == 42)); + assert(y.match!((value) => is(typeof(value) == double) && value == 3.14)); +} + +// Overload sets with templates +@safe unittest +{ + import std.traits : isNumeric; + + static struct OverloadSet + { + static string fun(string arg) + { + return "string"; + } + + static string fun(T)(T arg) + if (isNumeric!T) + { + return "numeric"; + } + } + + alias MySum = SumType!(int, string); + + MySum x = 123; + MySum y = "hello"; + + assert(x.match!(OverloadSet.fun) == "numeric"); + assert(y.match!(OverloadSet.fun) == "string"); +} + +// Github issue #24 +@safe unittest +{ + void test() @nogc + { + int acc = 0; + SumType!int(1).match!((int x) => acc += x); + } +} + +// Github issue #31 +@safe unittest +{ + void test() @nogc + { + int acc = 0; + + SumType!(int, string)(1).match!( + (int x) => acc += x, + (string _) => 0, + ); + } +} + +// Types that `alias this` a SumType +@safe unittest +{ + static struct A {} + static struct B {} + static struct D { SumType!(A, B) value; alias value this; } + + auto _ = D().match!(_ => true); +} + +// Multiple dispatch +@safe unittest +{ + alias MySum = SumType!(int, string); + + static int fun(MySum x, MySum y) + { + import std.meta : Args = AliasSeq; + + return Args!(x, y).match!( + (int xv, int yv) => 0, + (string xv, int yv) => 1, + (int xv, string yv) => 2, + (string xv, string yv) => 3 + ); + } + + assert(fun(MySum(0), MySum(0)) == 0); + assert(fun(MySum(""), MySum(0)) == 1); + assert(fun(MySum(0), MySum("")) == 2); + assert(fun(MySum(""), MySum("")) == 3); +} + +// inout SumTypes +@safe unittest +{ + inout(int[]) fun(inout(SumType!(int[])) x) + { + return x.match!((inout(int[]) a) => a); + } +} + +private void destroyIfOwner(T)(ref T value) +{ + static if (hasElaborateDestructor!T) + { + destroy(value); + } +} diff --git a/source/dyaml/style.d b/source/dyaml/style.d new file mode 100644 index 0000000..319592c --- /dev/null +++ b/source/dyaml/style.d @@ -0,0 +1,37 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +///YAML node formatting styles. +module dyaml.style; + + +///Scalar styles. +enum ScalarStyle : ubyte +{ + /// Invalid (uninitialized) style + invalid = 0, + /// `|` (Literal block style) + literal, + /// `>` (Folded block style) + folded, + /// Plain scalar + plain, + /// Single quoted scalar + singleQuoted, + /// Double quoted scalar + doubleQuoted +} + +///Collection styles. +enum CollectionStyle : ubyte +{ + /// Invalid (uninitialized) style + invalid = 0, + /// Block style. + block, + /// Flow style. + flow +} diff --git a/source/dyaml/tagdirective.d b/source/dyaml/tagdirective.d new file mode 100644 index 0000000..54687fe --- /dev/null +++ b/source/dyaml/tagdirective.d @@ -0,0 +1,15 @@ + +// Copyright Ferdinand Majerech 2011. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +///Tag directives. +module dyaml.tagdirective; + +///Single tag directive. handle is the shortcut, prefix is the prefix that replaces it. +struct TagDirective +{ + string handle; + string prefix; +} diff --git a/source/dyaml/token.d b/source/dyaml/token.d new file mode 100644 index 0000000..5400a3f --- /dev/null +++ b/source/dyaml/token.d @@ -0,0 +1,172 @@ + +// Copyright Ferdinand Majerech 2011-2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/// YAML tokens. +/// Code based on PyYAML: http://www.pyyaml.org +module dyaml.token; + + +import std.conv; + +import dyaml.encoding; +import dyaml.exception; +import dyaml.reader; +import dyaml.style; + + +package: + +/// Token types. +enum TokenID : ubyte +{ + // Invalid (uninitialized) token + invalid = 0, + directive, + documentStart, + documentEnd, + streamStart, + streamEnd, + blockSequenceStart, + blockMappingStart, + blockEnd, + flowSequenceStart, + flowMappingStart, + flowSequenceEnd, + flowMappingEnd, + key, + value, + blockEntry, + flowEntry, + alias_, + anchor, + tag, + scalar +} + +/// Specifies the type of a tag directive token. +enum DirectiveType : ubyte +{ + // YAML version directive. + yaml, + // Tag directive. + tag, + // Any other directive is "reserved" for future YAML versions. + reserved +} + +/// Token produced by scanner. +/// +/// 32 bytes on 64-bit. +struct Token +{ + @disable int opCmp(ref Token); + + // 16B + /// Value of the token, if any. + /// + /// Values are char[] instead of string, as Parser may still change them in a few + /// cases. Parser casts values to strings when producing Events. + char[] value; + // 4B + /// Start position of the token in file/stream. + Mark startMark; + // 4B + /// End position of the token in file/stream. + Mark endMark; + // 1B + /// Token type. + TokenID id; + // 1B + /// Style of scalar token, if this is a scalar token. + ScalarStyle style; + // 1B + /// Encoding, if this is a stream start token. + Encoding encoding; + // 1B + /// Type of directive for directiveToken. + DirectiveType directive; + // 4B + /// Used to split value into 2 substrings for tokens that need 2 values (tagToken) + uint valueDivider; + + /// Get string representation of the token ID. + @property string idString() @safe pure const {return id.to!string;} +} + +/// Construct a directive token. +/// +/// Params: start = Start position of the token. +/// end = End position of the token. +/// value = Value of the token. +/// directive = Directive type (YAML or TAG in YAML 1.1). +/// nameEnd = Position of the end of the name +Token directiveToken(const Mark start, const Mark end, char[] value, + DirectiveType directive, const uint nameEnd) @safe pure nothrow @nogc +{ + return Token(value, start, end, TokenID.directive, ScalarStyle.init, Encoding.init, + directive, nameEnd); +} + +/// Construct a simple (no value) token with specified type. +/// +/// Params: id = Type of the token. +/// start = Start position of the token. +/// end = End position of the token. +Token simpleToken(TokenID id)(const Mark start, const Mark end) +{ + return Token(null, start, end, id); +} + +/// Construct a stream start token. +/// +/// Params: start = Start position of the token. +/// end = End position of the token. +/// encoding = Encoding of the stream. +Token streamStartToken(const Mark start, const Mark end, const Encoding encoding) @safe pure nothrow @nogc +{ + return Token(null, start, end, TokenID.streamStart, ScalarStyle.invalid, encoding); +} + +/// Aliases for construction of simple token types. +alias streamEndToken = simpleToken!(TokenID.streamEnd); +alias blockSequenceStartToken = simpleToken!(TokenID.blockSequenceStart); +alias blockMappingStartToken = simpleToken!(TokenID.blockMappingStart); +alias blockEndToken = simpleToken!(TokenID.blockEnd); +alias keyToken = simpleToken!(TokenID.key); +alias valueToken = simpleToken!(TokenID.value); +alias blockEntryToken = simpleToken!(TokenID.blockEntry); +alias flowEntryToken = simpleToken!(TokenID.flowEntry); + +/// Construct a simple token with value with specified type. +/// +/// Params: id = Type of the token. +/// start = Start position of the token. +/// end = End position of the token. +/// value = Value of the token. +/// valueDivider = A hack for TagToken to store 2 values in value; the first +/// value goes up to valueDivider, the second after it. +Token simpleValueToken(TokenID id)(const Mark start, const Mark end, char[] value, + const uint valueDivider = uint.max) +{ + return Token(value, start, end, id, ScalarStyle.invalid, Encoding.init, + DirectiveType.init, valueDivider); +} + +/// Alias for construction of tag token. +alias tagToken = simpleValueToken!(TokenID.tag); +alias aliasToken = simpleValueToken!(TokenID.alias_); +alias anchorToken = simpleValueToken!(TokenID.anchor); + +/// Construct a scalar token. +/// +/// Params: start = Start position of the token. +/// end = End position of the token. +/// value = Value of the token. +/// style = Style of the token. +Token scalarToken(const Mark start, const Mark end, char[] value, const ScalarStyle style) @safe pure nothrow @nogc +{ + return Token(value, start, end, TokenID.scalar, style); +} diff --git a/source/tinyendian.d b/source/tinyendian.d new file mode 100644 index 0000000..731b048 --- /dev/null +++ b/source/tinyendian.d @@ -0,0 +1,213 @@ +// Copyright Ferdinand Majerech 2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/// A minimal library providing functionality for changing the endianness of data. +module tinyendian; + +import std.system : Endian, endian; + +/// Unicode UTF encodings. +enum UTFEncoding : ubyte +{ + UTF_8, + UTF_16, + UTF_32 +} +/// +@safe unittest +{ + const ints = [314, -101]; + int[2] intsSwapBuffer = ints; + swapByteOrder(intsSwapBuffer[]); + swapByteOrder(intsSwapBuffer[]); + assert(ints == intsSwapBuffer, "Lost information when swapping byte order"); + + const floats = [3.14f, 10.1f]; + float[2] floatsSwapBuffer = floats; + swapByteOrder(floatsSwapBuffer[]); + swapByteOrder(floatsSwapBuffer[]); + assert(floats == floatsSwapBuffer, "Lost information when swapping byte order"); +} + +/** Swap byte order of items in an array in place. + * + * Params: + * + * T = Item type. Must be either 2 or 4 bytes long. + * array = Buffer with values to fix byte order of. + */ +void swapByteOrder(T)(T[] array) @trusted @nogc pure nothrow +if (T.sizeof == 2 || T.sizeof == 4) +{ + // Swap the byte order of all read characters. + foreach (ref item; array) + { + static if (T.sizeof == 2) + { + import std.algorithm.mutation : swap; + swap(*cast(ubyte*)&item, *(cast(ubyte*)&item + 1)); + } + else static if (T.sizeof == 4) + { + import core.bitop : bswap; + const swapped = bswap(*cast(uint*)&item); + item = *cast(const(T)*)&swapped; + } + else static assert(false, "Unsupported T: " ~ T.stringof); + } +} + +/// See fixUTFByteOrder. +struct FixUTFByteOrderResult +{ + ubyte[] array; + UTFEncoding encoding; + Endian endian; + uint bytesStripped = 0; +} + +/** Convert byte order of an array encoded in UTF(8/16/32) to system endianness in place. + * + * Uses the UTF byte-order-mark (BOM) to determine UTF encoding. If there is no BOM + * at the beginning of array, UTF-8 is assumed (this is compatible with ASCII). The + * BOM, if any, will be removed from the buffer. + * + * If the encoding is determined to be UTF-16 or UTF-32 and there aren't enough bytes + * for the last code unit (i.e. if array.length is odd for UTF-16 or not divisible by + * 4 for UTF-32), the extra bytes (1 for UTF-16, 1-3 for UTF-32) are stripped. + * + * Note that this function does $(B not) check if the array is a valid UTF string. It + * only works with the BOM and 1,2 or 4-byte items. + * + * Params: + * + * array = The array with UTF-data. + * + * Returns: + * + * A struct with the following members: + * + * $(D ubyte[] array) A slice of the input array containing data in correct + * byte order, without BOM and in case of UTF-16/UTF-32, + * without stripped bytes, if any. + * $(D UTFEncoding encoding) Encoding of the result (UTF-8, UTF-16 or UTF-32) + * $(D std.system.Endian endian) Endianness of the original array. + * $(D uint bytesStripped) Number of bytes stripped from a UTF-16/UTF-32 array, if + * any. This is non-zero only if array.length was not + * divisible by 2 or 4 for UTF-16 and UTF-32, respectively. + * + * Complexity: (BIGOH array.length) + */ +auto fixUTFByteOrder(ubyte[] array) @safe @nogc pure nothrow +{ + // Enumerates UTF BOMs, matching indices to byteOrderMarks/bomEndian. + enum BOM: ubyte + { + UTF_8 = 0, + UTF_16_LE = 1, + UTF_16_BE = 2, + UTF_32_LE = 3, + UTF_32_BE = 4, + None = ubyte.max + } + + // These 2 are from std.stream + static immutable ubyte[][5] byteOrderMarks = [ [0xEF, 0xBB, 0xBF], + [0xFF, 0xFE], + [0xFE, 0xFF], + [0xFF, 0xFE, 0x00, 0x00], + [0x00, 0x00, 0xFE, 0xFF] ]; + static immutable Endian[5] bomEndian = [ endian, + Endian.littleEndian, + Endian.bigEndian, + Endian.littleEndian, + Endian.bigEndian ]; + + // Documented in function ddoc. + + FixUTFByteOrderResult result; + + // Detect BOM, if any, in the bytes we've read. -1 means no BOM. + // Need the last match: First 2 bytes of UTF-32LE BOM match the UTF-16LE BOM. If we + // used the first match, UTF-16LE would be detected when we have a UTF-32LE BOM. + import std.algorithm.searching : startsWith; + BOM bomId = BOM.None; + foreach (i, bom; byteOrderMarks) + if (array.startsWith(bom)) + bomId = cast(BOM)i; + + result.endian = (bomId != BOM.None) ? bomEndian[bomId] : Endian.init; + + // Start of UTF data (after BOM, if any) + size_t start = 0; + // If we've read more than just the BOM, put the rest into the array. + with(BOM) final switch(bomId) + { + case None: result.encoding = UTFEncoding.UTF_8; break; + case UTF_8: + start = 3; + result.encoding = UTFEncoding.UTF_8; + break; + case UTF_16_LE, UTF_16_BE: + result.bytesStripped = array.length % 2; + start = 2; + result.encoding = UTFEncoding.UTF_16; + break; + case UTF_32_LE, UTF_32_BE: + result.bytesStripped = array.length % 4; + start = 4; + result.encoding = UTFEncoding.UTF_32; + break; + } + + // If there's a BOM, we need to move data back to ensure it starts at array[0] + if (start != 0) + { + array = array[start .. $ - result.bytesStripped]; + } + + // We enforce above that array.length is divisible by 2/4 for UTF-16/32 + if (endian != result.endian) + { + if (result.encoding == UTFEncoding.UTF_16) + swapByteOrder(cast(wchar[])array); + else if (result.encoding == UTFEncoding.UTF_32) + swapByteOrder(cast(dchar[])array); + } + + result.array = array; + return result; +} +/// +@safe unittest +{ + { + ubyte[] s = [0xEF, 0xBB, 0xBF, 'a']; + FixUTFByteOrderResult r = fixUTFByteOrder(s); + assert(r.encoding == UTFEncoding.UTF_8); + assert(r.array.length == 1); + assert(r.array == ['a']); + assert(r.endian == Endian.littleEndian); + } + + { + ubyte[] s = ['a']; + FixUTFByteOrderResult r = fixUTFByteOrder(s); + assert(r.encoding == UTFEncoding.UTF_8); + assert(r.array.length == 1); + assert(r.array == ['a']); + assert(r.endian == Endian.bigEndian); + } + + { + // strip 'a' b/c not complete unit + ubyte[] s = [0xFE, 0xFF, 'a']; + FixUTFByteOrderResult r = fixUTFByteOrder(s); + assert(r.encoding == UTFEncoding.UTF_16); + assert(r.array.length == 0); + assert(r.endian == Endian.bigEndian); + } + +}