Newer
Older
dub_jkp / source / dub / internal / dyaml / constructor.d
@WebFreak001 WebFreak001 on 4 Feb 2023 19 KB fix typo(s)
  1.  
  2. // Copyright Ferdinand Majerech 2011.
  3. // Distributed under the Boost Software License, Version 1.0.
  4. // (See accompanying file LICENSE_1_0.txt or copy at
  5. // http://www.boost.org/LICENSE_1_0.txt)
  6.  
  7. /**
  8. * Class that processes YAML mappings, sequences and scalars into nodes.
  9. * This can be used to add custom data types. A tutorial can be found
  10. * $(LINK2 https://dlang-community.github.io/D-YAML/, here).
  11. */
  12. module dub.internal.dyaml.constructor;
  13.  
  14.  
  15. import std.array;
  16. import std.algorithm;
  17. import std.base64;
  18. import std.container;
  19. import std.conv;
  20. import std.datetime;
  21. import std.exception;
  22. import std.regex;
  23. import std.string;
  24. import std.typecons;
  25. import std.utf;
  26.  
  27. import dub.internal.dyaml.node;
  28. import dub.internal.dyaml.exception;
  29. import dub.internal.dyaml.style;
  30.  
  31. package:
  32.  
  33. // Exception thrown at constructor errors.
  34. class ConstructorException : YAMLException
  35. {
  36. /// Construct a ConstructorException.
  37. ///
  38. /// Params: msg = Error message.
  39. /// start = Start position of the error context.
  40. /// end = End position of the error context.
  41. this(string msg, Mark start, Mark end, string file = __FILE__, size_t line = __LINE__)
  42. @safe pure nothrow
  43. {
  44. super(msg ~ "\nstart: " ~ start.toString() ~ "\nend: " ~ end.toString(),
  45. file, line);
  46. }
  47. }
  48.  
  49. /** Constructs YAML values.
  50. *
  51. * Each YAML scalar, sequence or mapping has a tag specifying its data type.
  52. * Constructor uses user-specifiable functions to create a node of desired
  53. * data type from a scalar, sequence or mapping.
  54. *
  55. *
  56. * Each of these functions is associated with a tag, and can process either
  57. * a scalar, a sequence, or a mapping. The constructor passes each value to
  58. * the function with corresponding tag, which then returns the resulting value
  59. * that can be stored in a node.
  60. *
  61. * If a tag is detected with no known constructor function, it is considered an error.
  62. */
  63. /*
  64. * Construct a node.
  65. *
  66. * Params: start = Start position of the node.
  67. * end = End position of the node.
  68. * tag = Tag (data type) of the node.
  69. * value = Value to construct node from (string, nodes or pairs).
  70. * style = Style of the node (scalar or collection style).
  71. *
  72. * Returns: Constructed node.
  73. */
  74. Node constructNode(T)(const Mark start, const Mark end, const string tag,
  75. T value) @safe
  76. if((is(T : string) || is(T == Node[]) || is(T == Node.Pair[])))
  77. {
  78. Node newNode;
  79. try
  80. {
  81. switch(tag)
  82. {
  83. case "tag:yaml.org,2002:null":
  84. newNode = Node(YAMLNull(), tag);
  85. break;
  86. case "tag:yaml.org,2002:bool":
  87. static if(is(T == string))
  88. {
  89. newNode = Node(constructBool(value), tag);
  90. break;
  91. }
  92. else throw new Exception("Only scalars can be bools");
  93. case "tag:yaml.org,2002:int":
  94. static if(is(T == string))
  95. {
  96. newNode = Node(constructLong(value), tag);
  97. break;
  98. }
  99. else throw new Exception("Only scalars can be ints");
  100. case "tag:yaml.org,2002:float":
  101. static if(is(T == string))
  102. {
  103. newNode = Node(constructReal(value), tag);
  104. break;
  105. }
  106. else throw new Exception("Only scalars can be floats");
  107. case "tag:yaml.org,2002:binary":
  108. static if(is(T == string))
  109. {
  110. newNode = Node(constructBinary(value), tag);
  111. break;
  112. }
  113. else throw new Exception("Only scalars can be binary data");
  114. case "tag:yaml.org,2002:timestamp":
  115. static if(is(T == string))
  116. {
  117. newNode = Node(constructTimestamp(value), tag);
  118. break;
  119. }
  120. else throw new Exception("Only scalars can be timestamps");
  121. case "tag:yaml.org,2002:str":
  122. static if(is(T == string))
  123. {
  124. newNode = Node(constructString(value), tag);
  125. break;
  126. }
  127. else throw new Exception("Only scalars can be strings");
  128. case "tag:yaml.org,2002:value":
  129. static if(is(T == string))
  130. {
  131. newNode = Node(constructString(value), tag);
  132. break;
  133. }
  134. else throw new Exception("Only scalars can be values");
  135. case "tag:yaml.org,2002:omap":
  136. static if(is(T == Node[]))
  137. {
  138. newNode = Node(constructOrderedMap(value), tag);
  139. break;
  140. }
  141. else throw new Exception("Only sequences can be ordered maps");
  142. case "tag:yaml.org,2002:pairs":
  143. static if(is(T == Node[]))
  144. {
  145. newNode = Node(constructPairs(value), tag);
  146. break;
  147. }
  148. else throw new Exception("Only sequences can be pairs");
  149. case "tag:yaml.org,2002:set":
  150. static if(is(T == Node.Pair[]))
  151. {
  152. newNode = Node(constructSet(value), tag);
  153. break;
  154. }
  155. else throw new Exception("Only mappings can be sets");
  156. case "tag:yaml.org,2002:seq":
  157. static if(is(T == Node[]))
  158. {
  159. newNode = Node(constructSequence(value), tag);
  160. break;
  161. }
  162. else throw new Exception("Only sequences can be sequences");
  163. case "tag:yaml.org,2002:map":
  164. static if(is(T == Node.Pair[]))
  165. {
  166. newNode = Node(constructMap(value), tag);
  167. break;
  168. }
  169. else throw new Exception("Only mappings can be maps");
  170. case "tag:yaml.org,2002:merge":
  171. newNode = Node(YAMLMerge(), tag);
  172. break;
  173. default:
  174. newNode = Node(value, tag);
  175. break;
  176. }
  177. }
  178. catch(Exception e)
  179. {
  180. throw new ConstructorException("Error constructing " ~ typeid(T).toString()
  181. ~ ":\n" ~ e.msg, start, end);
  182. }
  183.  
  184. newNode.startMark_ = start;
  185.  
  186. return newNode;
  187. }
  188.  
  189. private:
  190. // Construct a boolean _node.
  191. bool constructBool(const string str) @safe
  192. {
  193. string value = str.toLower();
  194. if(value.among!("yes", "true", "on")){return true;}
  195. if(value.among!("no", "false", "off")){return false;}
  196. throw new Exception("Unable to parse boolean value: " ~ value);
  197. }
  198.  
  199. // Construct an integer (long) _node.
  200. long constructLong(const string str) @safe
  201. {
  202. string value = str.replace("_", "");
  203. const char c = value[0];
  204. const long sign = c != '-' ? 1 : -1;
  205. if(c == '-' || c == '+')
  206. {
  207. value = value[1 .. $];
  208. }
  209.  
  210. enforce(value != "", new Exception("Unable to parse float value: " ~ value));
  211.  
  212. long result;
  213. try
  214. {
  215. //Zero.
  216. if(value == "0") {result = cast(long)0;}
  217. //Binary.
  218. else if(value.startsWith("0b")){result = sign * to!int(value[2 .. $], 2);}
  219. //Hexadecimal.
  220. else if(value.startsWith("0x")){result = sign * to!int(value[2 .. $], 16);}
  221. //Octal.
  222. else if(value[0] == '0') {result = sign * to!int(value, 8);}
  223. //Sexagesimal.
  224. else if(value.canFind(":"))
  225. {
  226. long val;
  227. long base = 1;
  228. foreach_reverse(digit; value.split(":"))
  229. {
  230. val += to!long(digit) * base;
  231. base *= 60;
  232. }
  233. result = sign * val;
  234. }
  235. //Decimal.
  236. else{result = sign * to!long(value);}
  237. }
  238. catch(ConvException e)
  239. {
  240. throw new Exception("Unable to parse integer value: " ~ value);
  241. }
  242.  
  243. return result;
  244. }
  245. @safe unittest
  246. {
  247. string canonical = "685230";
  248. string decimal = "+685_230";
  249. string octal = "02472256";
  250. string hexadecimal = "0x_0A_74_AE";
  251. string binary = "0b1010_0111_0100_1010_1110";
  252. string sexagesimal = "190:20:30";
  253.  
  254. assert(685230 == constructLong(canonical));
  255. assert(685230 == constructLong(decimal));
  256. assert(685230 == constructLong(octal));
  257. assert(685230 == constructLong(hexadecimal));
  258. assert(685230 == constructLong(binary));
  259. assert(685230 == constructLong(sexagesimal));
  260. }
  261.  
  262. // Construct a floating point (real) _node.
  263. real constructReal(const string str) @safe
  264. {
  265. string value = str.replace("_", "").toLower();
  266. const char c = value[0];
  267. const real sign = c != '-' ? 1.0 : -1.0;
  268. if(c == '-' || c == '+')
  269. {
  270. value = value[1 .. $];
  271. }
  272.  
  273. enforce(value != "" && value != "nan" && value != "inf" && value != "-inf",
  274. new Exception("Unable to parse float value: " ~ value));
  275.  
  276. real result;
  277. try
  278. {
  279. //Infinity.
  280. if (value == ".inf"){result = sign * real.infinity;}
  281. //Not a Number.
  282. else if(value == ".nan"){result = real.nan;}
  283. //Sexagesimal.
  284. else if(value.canFind(":"))
  285. {
  286. real val = 0.0;
  287. real base = 1.0;
  288. foreach_reverse(digit; value.split(":"))
  289. {
  290. val += to!real(digit) * base;
  291. base *= 60.0;
  292. }
  293. result = sign * val;
  294. }
  295. //Plain floating point.
  296. else{result = sign * to!real(value);}
  297. }
  298. catch(ConvException e)
  299. {
  300. throw new Exception("Unable to parse float value: \"" ~ value ~ "\"");
  301. }
  302.  
  303. return result;
  304. }
  305. @safe unittest
  306. {
  307. bool eq(real a, real b, real epsilon = 0.2) @safe
  308. {
  309. return a >= (b - epsilon) && a <= (b + epsilon);
  310. }
  311.  
  312. string canonical = "6.8523015e+5";
  313. string exponential = "685.230_15e+03";
  314. string fixed = "685_230.15";
  315. string sexagesimal = "190:20:30.15";
  316. string negativeInf = "-.inf";
  317. string NaN = ".NaN";
  318.  
  319. assert(eq(685230.15, constructReal(canonical)));
  320. assert(eq(685230.15, constructReal(exponential)));
  321. assert(eq(685230.15, constructReal(fixed)));
  322. assert(eq(685230.15, constructReal(sexagesimal)));
  323. assert(eq(-real.infinity, constructReal(negativeInf)));
  324. assert(to!string(constructReal(NaN)) == "nan");
  325. }
  326.  
  327. // Construct a binary (base64) _node.
  328. ubyte[] constructBinary(const string value) @safe
  329. {
  330. import std.ascii : newline;
  331. import std.array : array;
  332.  
  333. // For an unknown reason, this must be nested to work (compiler bug?).
  334. try
  335. {
  336. return Base64.decode(value.representation.filter!(c => !newline.canFind(c)).array);
  337. }
  338. catch(Base64Exception e)
  339. {
  340. throw new Exception("Unable to decode base64 value: " ~ e.msg);
  341. }
  342. }
  343.  
  344. @safe unittest
  345. {
  346. auto test = "The Answer: 42".representation;
  347. char[] buffer;
  348. buffer.length = 256;
  349. string input = Base64.encode(test, buffer).idup;
  350. const value = constructBinary(input);
  351. assert(value == test);
  352. assert(value == [84, 104, 101, 32, 65, 110, 115, 119, 101, 114, 58, 32, 52, 50]);
  353. }
  354.  
  355. // Construct a timestamp (SysTime) _node.
  356. SysTime constructTimestamp(const string str) @safe
  357. {
  358. string value = str;
  359.  
  360. auto YMDRegexp = regex("^([0-9][0-9][0-9][0-9])-([0-9][0-9]?)-([0-9][0-9]?)");
  361. auto HMSRegexp = regex("^[Tt \t]+([0-9][0-9]?):([0-9][0-9]):([0-9][0-9])(\\.[0-9]*)?");
  362. auto TZRegexp = regex("^[ \t]*Z|([-+][0-9][0-9]?)(:[0-9][0-9])?");
  363.  
  364. try
  365. {
  366. // First, get year, month and day.
  367. auto matches = match(value, YMDRegexp);
  368.  
  369. enforce(!matches.empty,
  370. new Exception("Unable to parse timestamp value: " ~ value));
  371.  
  372. auto captures = matches.front.captures;
  373. const year = to!int(captures[1]);
  374. const month = to!int(captures[2]);
  375. const day = to!int(captures[3]);
  376.  
  377. // If available, get hour, minute, second and fraction, if present.
  378. value = matches.front.post;
  379. matches = match(value, HMSRegexp);
  380. if(matches.empty)
  381. {
  382. return SysTime(DateTime(year, month, day), UTC());
  383. }
  384.  
  385. captures = matches.front.captures;
  386. const hour = to!int(captures[1]);
  387. const minute = to!int(captures[2]);
  388. const second = to!int(captures[3]);
  389. const hectonanosecond = cast(int)(to!real("0" ~ captures[4]) * 10_000_000);
  390.  
  391. // If available, get timezone.
  392. value = matches.front.post;
  393. matches = match(value, TZRegexp);
  394. if(matches.empty || matches.front.captures[0] == "Z")
  395. {
  396. // No timezone.
  397. return SysTime(DateTime(year, month, day, hour, minute, second),
  398. hectonanosecond.dur!"hnsecs", UTC());
  399. }
  400.  
  401. // We have a timezone, so parse it.
  402. captures = matches.front.captures;
  403. int sign = 1;
  404. int tzHours;
  405. if(!captures[1].empty)
  406. {
  407. if(captures[1][0] == '-') {sign = -1;}
  408. tzHours = to!int(captures[1][1 .. $]);
  409. }
  410. const tzMinutes = (!captures[2].empty) ? to!int(captures[2][1 .. $]) : 0;
  411. const tzOffset = dur!"minutes"(sign * (60 * tzHours + tzMinutes));
  412.  
  413. return SysTime(DateTime(year, month, day, hour, minute, second),
  414. hectonanosecond.dur!"hnsecs",
  415. new immutable SimpleTimeZone(tzOffset));
  416. }
  417. catch(ConvException e)
  418. {
  419. throw new Exception("Unable to parse timestamp value " ~ value ~ " : " ~ e.msg);
  420. }
  421. catch(DateTimeException e)
  422. {
  423. throw new Exception("Invalid timestamp value " ~ value ~ " : " ~ e.msg);
  424. }
  425.  
  426. assert(false, "This code should never be reached");
  427. }
  428. @safe unittest
  429. {
  430. string timestamp(string value)
  431. {
  432. return constructTimestamp(value).toISOString();
  433. }
  434.  
  435. string canonical = "2001-12-15T02:59:43.1Z";
  436. string iso8601 = "2001-12-14t21:59:43.10-05:00";
  437. string spaceSeparated = "2001-12-14 21:59:43.10 -5";
  438. string noTZ = "2001-12-15 2:59:43.10";
  439. string noFraction = "2001-12-15 2:59:43";
  440. string ymd = "2002-12-14";
  441.  
  442. assert(timestamp(canonical) == "20011215T025943.1Z");
  443. //avoiding float conversion errors
  444. assert(timestamp(iso8601) == "20011214T215943.0999999-05:00" ||
  445. timestamp(iso8601) == "20011214T215943.1-05:00");
  446. assert(timestamp(spaceSeparated) == "20011214T215943.0999999-05:00" ||
  447. timestamp(spaceSeparated) == "20011214T215943.1-05:00");
  448. assert(timestamp(noTZ) == "20011215T025943.0999999Z" ||
  449. timestamp(noTZ) == "20011215T025943.1Z");
  450. assert(timestamp(noFraction) == "20011215T025943Z");
  451. assert(timestamp(ymd) == "20021214T000000Z");
  452. }
  453.  
  454. // Construct a string _node.
  455. string constructString(const string str) @safe
  456. {
  457. return str;
  458. }
  459.  
  460. // Convert a sequence of single-element mappings into a sequence of pairs.
  461. Node.Pair[] getPairs(string type, const Node[] nodes) @safe
  462. {
  463. Node.Pair[] pairs;
  464. pairs.reserve(nodes.length);
  465. foreach(node; nodes)
  466. {
  467. enforce(node.nodeID == NodeID.mapping && node.length == 1,
  468. new Exception("While constructing " ~ type ~
  469. ", expected a mapping with single element"));
  470.  
  471. pairs ~= node.as!(Node.Pair[]);
  472. }
  473.  
  474. return pairs;
  475. }
  476.  
  477. // Construct an ordered map (ordered sequence of key:value pairs without duplicates) _node.
  478. Node.Pair[] constructOrderedMap(const Node[] nodes) @safe
  479. {
  480. auto pairs = getPairs("ordered map", nodes);
  481.  
  482. //Detect duplicates.
  483. //TODO this should be replaced by something with deterministic memory allocation.
  484. auto keys = new RedBlackTree!Node();
  485. foreach(ref pair; pairs)
  486. {
  487. enforce(!(pair.key in keys),
  488. new Exception("Duplicate entry in an ordered map: "
  489. ~ pair.key.debugString()));
  490. keys.insert(pair.key);
  491. }
  492. return pairs;
  493. }
  494. @safe unittest
  495. {
  496. Node[] alternateTypes(uint length) @safe
  497. {
  498. Node[] pairs;
  499. foreach(long i; 0 .. length)
  500. {
  501. auto pair = (i % 2) ? Node.Pair(i.to!string, i) : Node.Pair(i, i.to!string);
  502. pairs ~= Node([pair]);
  503. }
  504. return pairs;
  505. }
  506.  
  507. Node[] sameType(uint length) @safe
  508. {
  509. Node[] pairs;
  510. foreach(long i; 0 .. length)
  511. {
  512. auto pair = Node.Pair(i.to!string, i);
  513. pairs ~= Node([pair]);
  514. }
  515. return pairs;
  516. }
  517.  
  518. assertThrown(constructOrderedMap(alternateTypes(8) ~ alternateTypes(2)));
  519. assertNotThrown(constructOrderedMap(alternateTypes(8)));
  520. assertThrown(constructOrderedMap(sameType(64) ~ sameType(16)));
  521. assertThrown(constructOrderedMap(alternateTypes(64) ~ alternateTypes(16)));
  522. assertNotThrown(constructOrderedMap(sameType(64)));
  523. assertNotThrown(constructOrderedMap(alternateTypes(64)));
  524. }
  525.  
  526. // Construct a pairs (ordered sequence of key: value pairs allowing duplicates) _node.
  527. Node.Pair[] constructPairs(const Node[] nodes) @safe
  528. {
  529. return getPairs("pairs", nodes);
  530. }
  531.  
  532. // Construct a set _node.
  533. Node[] constructSet(const Node.Pair[] pairs) @safe
  534. {
  535. // In future, the map here should be replaced with something with deterministic
  536. // memory allocation if possible.
  537. // Detect duplicates.
  538. ubyte[Node] map;
  539. Node[] nodes;
  540. nodes.reserve(pairs.length);
  541. foreach(pair; pairs)
  542. {
  543. enforce((pair.key in map) is null, new Exception("Duplicate entry in a set"));
  544. map[pair.key] = 0;
  545. nodes ~= pair.key;
  546. }
  547.  
  548. return nodes;
  549. }
  550. @safe unittest
  551. {
  552. Node.Pair[] set(uint length) @safe
  553. {
  554. Node.Pair[] pairs;
  555. foreach(long i; 0 .. length)
  556. {
  557. pairs ~= Node.Pair(i.to!string, YAMLNull());
  558. }
  559.  
  560. return pairs;
  561. }
  562.  
  563. auto DuplicatesShort = set(8) ~ set(2);
  564. auto noDuplicatesShort = set(8);
  565. auto DuplicatesLong = set(64) ~ set(4);
  566. auto noDuplicatesLong = set(64);
  567.  
  568. bool eq(Node.Pair[] a, Node[] b)
  569. {
  570. if(a.length != b.length){return false;}
  571. foreach(i; 0 .. a.length)
  572. {
  573. if(a[i].key != b[i])
  574. {
  575. return false;
  576. }
  577. }
  578. return true;
  579. }
  580.  
  581. auto nodeDuplicatesShort = DuplicatesShort.dup;
  582. auto nodeNoDuplicatesShort = noDuplicatesShort.dup;
  583. auto nodeDuplicatesLong = DuplicatesLong.dup;
  584. auto nodeNoDuplicatesLong = noDuplicatesLong.dup;
  585.  
  586. assertThrown(constructSet(nodeDuplicatesShort));
  587. assertNotThrown(constructSet(nodeNoDuplicatesShort));
  588. assertThrown(constructSet(nodeDuplicatesLong));
  589. assertNotThrown(constructSet(nodeNoDuplicatesLong));
  590. }
  591.  
  592. // Construct a sequence (array) _node.
  593. Node[] constructSequence(Node[] nodes) @safe
  594. {
  595. return nodes;
  596. }
  597.  
  598. // Construct an unordered map (unordered set of key:value _pairs without duplicates) _node.
  599. Node.Pair[] constructMap(Node.Pair[] pairs) @safe
  600. {
  601. //Detect duplicates.
  602. //TODO this should be replaced by something with deterministic memory allocation.
  603. auto keys = new RedBlackTree!Node();
  604. foreach(ref pair; pairs)
  605. {
  606. enforce(!(pair.key in keys),
  607. new Exception("Duplicate entry in a map: " ~ pair.key.debugString()));
  608. keys.insert(pair.key);
  609. }
  610. return pairs;
  611. }