url-state-machine.js 31 KB


  1. "use strict";
  2. const tr46 = require("tr46");
  3. const infra = require("./infra");
  4. const { utf8DecodeWithoutBOM } = require("./encoding");
  5. const { percentDecodeString, utf8PercentEncodeCodePoint, utf8PercentEncodeString, isC0ControlPercentEncode,
  6. isFragmentPercentEncode, isQueryPercentEncode, isSpecialQueryPercentEncode, isPathPercentEncode,
  7. isUserinfoPercentEncode } = require("./percent-encoding");
  8. function p(char) {
  9. return char.codePointAt(0);
  10. }
  11. const specialSchemes = {
  12. ftp: 21,
  13. file: null,
  14. http: 80,
  15. https: 443,
  16. ws: 80,
  17. wss: 443
  18. };
  19. const failure = Symbol("failure");
  20. function countSymbols(str) {
  21. return [...str].length;
  22. }
  23. function at(input, idx) {
  24. const c = input[idx];
  25. return isNaN(c) ? undefined : String.fromCodePoint(c);
  26. }
  27. function isSingleDot(buffer) {
  28. return buffer === "." || buffer.toLowerCase() === "%2e";
  29. }
  30. function isDoubleDot(buffer) {
  31. buffer = buffer.toLowerCase();
  32. return buffer === ".." || buffer === "%2e." || buffer === ".%2e" || buffer === "%2e%2e";
  33. }
  34. function isWindowsDriveLetterCodePoints(cp1, cp2) {
  35. return infra.isASCIIAlpha(cp1) && (cp2 === p(":") || cp2 === p("|"));
  36. }
  37. function isWindowsDriveLetterString(string) {
  38. return string.length === 2 && infra.isASCIIAlpha(string.codePointAt(0)) && (string[1] === ":" || string[1] === "|");
  39. }
  40. function isNormalizedWindowsDriveLetterString(string) {
  41. return string.length === 2 && infra.isASCIIAlpha(string.codePointAt(0)) && string[1] === ":";
  42. }
  43. function containsForbiddenHostCodePoint(string) {
  44. return string.search(/\u0000|\u0009|\u000A|\u000D|\u0020|#|\/|:|<|>|\?|@|\[|\\|\]|\^|\|/u) !== -1;
  45. }
  46. function containsForbiddenDomainCodePoint(string) {
  47. return containsForbiddenHostCodePoint(string) || string.search(/[\u0000-\u001F]|%|\u007F/u) !== -1;
  48. }
  49. function isSpecialScheme(scheme) {
  50. return specialSchemes[scheme] !== undefined;
  51. }
  52. function isSpecial(url) {
  53. return isSpecialScheme(url.scheme);
  54. }
  55. function isNotSpecial(url) {
  56. return !isSpecialScheme(url.scheme);
  57. }
  58. function defaultPort(scheme) {
  59. return specialSchemes[scheme];
  60. }
  61. function parseIPv4Number(input) {
  62. if (input === "") {
  63. return failure;
  64. }
  65. let R = 10;
  66. if (input.length >= 2 && input.charAt(0) === "0" && input.charAt(1).toLowerCase() === "x") {
  67. input = input.substring(2);
  68. R = 16;
  69. } else if (input.length >= 2 && input.charAt(0) === "0") {
  70. input = input.substring(1);
  71. R = 8;
  72. }
  73. if (input === "") {
  74. return 0;
  75. }
  76. let regex = /[^0-7]/u;
  77. if (R === 10) {
  78. regex = /[^0-9]/u;
  79. }
  80. if (R === 16) {
  81. regex = /[^0-9A-Fa-f]/u;
  82. }
  83. if (regex.test(input)) {
  84. return failure;
  85. }
  86. return parseInt(input, R);
  87. }
  88. function parseIPv4(input) {
  89. const parts = input.split(".");
  90. if (parts[parts.length - 1] === "") {
  91. if (parts.length > 1) {
  92. parts.pop();
  93. }
  94. }
  95. if (parts.length > 4) {
  96. return failure;
  97. }
  98. const numbers = [];
  99. for (const part of parts) {
  100. const n = parseIPv4Number(part);
  101. if (n === failure) {
  102. return failure;
  103. }
  104. numbers.push(n);
  105. }
  106. for (let i = 0; i < numbers.length - 1; ++i) {
  107. if (numbers[i] > 255) {
  108. return failure;
  109. }
  110. }
  111. if (numbers[numbers.length - 1] >= 256 ** (5 - numbers.length)) {
  112. return failure;
  113. }
  114. let ipv4 = numbers.pop();
  115. let counter = 0;
  116. for (const n of numbers) {
  117. ipv4 += n * 256 ** (3 - counter);
  118. ++counter;
  119. }
  120. return ipv4;
  121. }
  122. function serializeIPv4(address) {
  123. let output = "";
  124. let n = address;
  125. for (let i = 1; i <= 4; ++i) {
  126. output = String(n % 256) + output;
  127. if (i !== 4) {
  128. output = `.${output}`;
  129. }
  130. n = Math.floor(n / 256);
  131. }
  132. return output;
  133. }
  134. function parseIPv6(input) {
  135. const address = [0, 0, 0, 0, 0, 0, 0, 0];
  136. let pieceIndex = 0;
  137. let compress = null;
  138. let pointer = 0;
  139. input = Array.from(input, c => c.codePointAt(0));
  140. if (input[pointer] === p(":")) {
  141. if (input[pointer + 1] !== p(":")) {
  142. return failure;
  143. }
  144. pointer += 2;
  145. ++pieceIndex;
  146. compress = pieceIndex;
  147. }
  148. while (pointer < input.length) {
  149. if (pieceIndex === 8) {
  150. return failure;
  151. }
  152. if (input[pointer] === p(":")) {
  153. if (compress !== null) {
  154. return failure;
  155. }
  156. ++pointer;
  157. ++pieceIndex;
  158. compress = pieceIndex;
  159. continue;
  160. }
  161. let value = 0;
  162. let length = 0;
  163. while (length < 4 && infra.isASCIIHex(input[pointer])) {
  164. value = value * 0x10 + parseInt(at(input, pointer), 16);
  165. ++pointer;
  166. ++length;
  167. }
  168. if (input[pointer] === p(".")) {
  169. if (length === 0) {
  170. return failure;
  171. }
  172. pointer -= length;
  173. if (pieceIndex > 6) {
  174. return failure;
  175. }
  176. let numbersSeen = 0;
  177. while (input[pointer] !== undefined) {
  178. let ipv4Piece = null;
  179. if (numbersSeen > 0) {
  180. if (input[pointer] === p(".") && numbersSeen < 4) {
  181. ++pointer;
  182. } else {
  183. return failure;
  184. }
  185. }
  186. if (!infra.isASCIIDigit(input[pointer])) {
  187. return failure;
  188. }
  189. while (infra.isASCIIDigit(input[pointer])) {
  190. const number = parseInt(at(input, pointer));
  191. if (ipv4Piece === null) {
  192. ipv4Piece = number;
  193. } else if (ipv4Piece === 0) {
  194. return failure;
  195. } else {
  196. ipv4Piece = ipv4Piece * 10 + number;
  197. }
  198. if (ipv4Piece > 255) {
  199. return failure;
  200. }
  201. ++pointer;
  202. }
  203. address[pieceIndex] = address[pieceIndex] * 0x100 + ipv4Piece;
  204. ++numbersSeen;
  205. if (numbersSeen === 2 || numbersSeen === 4) {
  206. ++pieceIndex;
  207. }
  208. }
  209. if (numbersSeen !== 4) {
  210. return failure;
  211. }
  212. break;
  213. } else if (input[pointer] === p(":")) {
  214. ++pointer;
  215. if (input[pointer] === undefined) {
  216. return failure;
  217. }
  218. } else if (input[pointer] !== undefined) {
  219. return failure;
  220. }
  221. address[pieceIndex] = value;
  222. ++pieceIndex;
  223. }
  224. if (compress !== null) {
  225. let swaps = pieceIndex - compress;
  226. pieceIndex = 7;
  227. while (pieceIndex !== 0 && swaps > 0) {
  228. const temp = address[compress + swaps - 1];
  229. address[compress + swaps - 1] = address[pieceIndex];
  230. address[pieceIndex] = temp;
  231. --pieceIndex;
  232. --swaps;
  233. }
  234. } else if (compress === null && pieceIndex !== 8) {
  235. return failure;
  236. }
  237. return address;
  238. }
  239. function serializeIPv6(address) {
  240. let output = "";
  241. const compress = findTheIPv6AddressCompressedPieceIndex(address);
  242. let ignore0 = false;
  243. for (let pieceIndex = 0; pieceIndex <= 7; ++pieceIndex) {
  244. if (ignore0 && address[pieceIndex] === 0) {
  245. continue;
  246. } else if (ignore0) {
  247. ignore0 = false;
  248. }
  249. if (compress === pieceIndex) {
  250. const separator = pieceIndex === 0 ? "::" : ":";
  251. output += separator;
  252. ignore0 = true;
  253. continue;
  254. }
  255. output += address[pieceIndex].toString(16);
  256. if (pieceIndex !== 7) {
  257. output += ":";
  258. }
  259. }
  260. return output;
  261. }
  262. function parseHost(input, isOpaque = false) {
  263. if (input[0] === "[") {
  264. if (input[input.length - 1] !== "]") {
  265. return failure;
  266. }
  267. return parseIPv6(input.substring(1, input.length - 1));
  268. }
  269. if (isOpaque) {
  270. return parseOpaqueHost(input);
  271. }
  272. const domain = utf8DecodeWithoutBOM(percentDecodeString(input));
  273. const asciiDomain = domainToASCII(domain);
  274. if (asciiDomain === failure) {
  275. return failure;
  276. }
  277. if (endsInANumber(asciiDomain)) {
  278. return parseIPv4(asciiDomain);
  279. }
  280. return asciiDomain;
  281. }
  282. function endsInANumber(input) {
  283. const parts = input.split(".");
  284. if (parts[parts.length - 1] === "") {
  285. if (parts.length === 1) {
  286. return false;
  287. }
  288. parts.pop();
  289. }
  290. const last = parts[parts.length - 1];
  291. if (parseIPv4Number(last) !== failure) {
  292. return true;
  293. }
  294. if (/^[0-9]+$/u.test(last)) {
  295. return true;
  296. }
  297. return false;
  298. }
  299. function parseOpaqueHost(input) {
  300. if (containsForbiddenHostCodePoint(input)) {
  301. return failure;
  302. }
  303. return utf8PercentEncodeString(input, isC0ControlPercentEncode);
  304. }
  305. function findTheIPv6AddressCompressedPieceIndex(address) {
  306. let longestIndex = null;
  307. let longestSize = 1; // only find elements > 1
  308. let foundIndex = null;
  309. let foundSize = 0;
  310. for (let pieceIndex = 0; pieceIndex < address.length; ++pieceIndex) {
  311. if (address[pieceIndex] !== 0) {
  312. if (foundSize > longestSize) {
  313. longestIndex = foundIndex;
  314. longestSize = foundSize;
  315. }
  316. foundIndex = null;
  317. foundSize = 0;
  318. } else {
  319. if (foundIndex === null) {
  320. foundIndex = pieceIndex;
  321. }
  322. ++foundSize;
  323. }
  324. }
  325. if (foundSize > longestSize) {
  326. return foundIndex;
  327. }
  328. return longestIndex;
  329. }
  330. function serializeHost(host) {
  331. if (typeof host === "number") {
  332. return serializeIPv4(host);
  333. }
  334. // IPv6 serializer
  335. if (host instanceof Array) {
  336. return `[${serializeIPv6(host)}]`;
  337. }
  338. return host;
  339. }
  340. function domainToASCII(domain, beStrict = false) {
  341. const result = tr46.toASCII(domain, {
  342. checkHyphens: beStrict,
  343. checkBidi: true,
  344. checkJoiners: true,
  345. useSTD3ASCIIRules: beStrict,
  346. transitionalProcessing: false,
  347. verifyDNSLength: beStrict,
  348. ignoreInvalidPunycode: false
  349. });
  350. if (result === null) {
  351. return failure;
  352. }
  353. if (!beStrict) {
  354. if (result === "") {
  355. return failure;
  356. }
  357. if (containsForbiddenDomainCodePoint(result)) {
  358. return failure;
  359. }
  360. }
  361. return result;
  362. }
  363. function trimControlChars(string) {
  364. // Avoid using regexp because of this V8 bug: https://issues.chromium.org/issues/42204424
  365. let start = 0;
  366. let end = string.length;
  367. for (; start < end; ++start) {
  368. if (string.charCodeAt(start) > 0x20) {
  369. break;
  370. }
  371. }
  372. for (; end > start; --end) {
  373. if (string.charCodeAt(end - 1) > 0x20) {
  374. break;
  375. }
  376. }
  377. return string.substring(start, end);
  378. }
  379. function trimTabAndNewline(url) {
  380. return url.replace(/\u0009|\u000A|\u000D/ug, "");
  381. }
  382. function shortenPath(url) {
  383. const { path } = url;
  384. if (path.length === 0) {
  385. return;
  386. }
  387. if (url.scheme === "file" && path.length === 1 && isNormalizedWindowsDriveLetter(path[0])) {
  388. return;
  389. }
  390. path.pop();
  391. }
  392. function includesCredentials(url) {
  393. return url.username !== "" || url.password !== "";
  394. }
  395. function cannotHaveAUsernamePasswordPort(url) {
  396. return url.host === null || url.host === "" || url.scheme === "file";
  397. }
  398. function hasAnOpaquePath(url) {
  399. return typeof url.path === "string";
  400. }
  401. function isNormalizedWindowsDriveLetter(string) {
  402. return /^[A-Za-z]:$/u.test(string);
  403. }
  404. function URLStateMachine(input, base, encodingOverride, url, stateOverride) {
  405. this.pointer = 0;
  406. this.input = input;
  407. this.base = base || null;
  408. this.encodingOverride = encodingOverride || "utf-8";
  409. this.stateOverride = stateOverride;
  410. this.url = url;
  411. this.failure = false;
  412. this.parseError = false;
  413. if (!this.url) {
  414. this.url = {
  415. scheme: "",
  416. username: "",
  417. password: "",
  418. host: null,
  419. port: null,
  420. path: [],
  421. query: null,
  422. fragment: null
  423. };
  424. const res = trimControlChars(this.input);
  425. if (res !== this.input) {
  426. this.parseError = true;
  427. }
  428. this.input = res;
  429. }
  430. const res = trimTabAndNewline(this.input);
  431. if (res !== this.input) {
  432. this.parseError = true;
  433. }
  434. this.input = res;
  435. this.state = stateOverride || "scheme start";
  436. this.buffer = "";
  437. this.atFlag = false;
  438. this.arrFlag = false;
  439. this.passwordTokenSeenFlag = false;
  440. this.input = Array.from(this.input, c => c.codePointAt(0));
  441. for (; this.pointer <= this.input.length; ++this.pointer) {
  442. const c = this.input[this.pointer];
  443. const cStr = isNaN(c) ? undefined : String.fromCodePoint(c);
  444. // exec state machine
  445. const ret = this[`parse ${this.state}`](c, cStr);
  446. if (!ret) {
  447. break; // terminate algorithm
  448. } else if (ret === failure) {
  449. this.failure = true;
  450. break;
  451. }
  452. }
  453. }
  454. URLStateMachine.prototype["parse scheme start"] = function parseSchemeStart(c, cStr) {
  455. if (infra.isASCIIAlpha(c)) {
  456. this.buffer += cStr.toLowerCase();
  457. this.state = "scheme";
  458. } else if (!this.stateOverride) {
  459. this.state = "no scheme";
  460. --this.pointer;
  461. } else {
  462. this.parseError = true;
  463. return failure;
  464. }
  465. return true;
  466. };
  467. URLStateMachine.prototype["parse scheme"] = function parseScheme(c, cStr) {
  468. if (infra.isASCIIAlphanumeric(c) || c === p("+") || c === p("-") || c === p(".")) {
  469. this.buffer += cStr.toLowerCase();
  470. } else if (c === p(":")) {
  471. if (this.stateOverride) {
  472. if (isSpecial(this.url) && !isSpecialScheme(this.buffer)) {
  473. return false;
  474. }
  475. if (!isSpecial(this.url) && isSpecialScheme(this.buffer)) {
  476. return false;
  477. }
  478. if ((includesCredentials(this.url) || this.url.port !== null) && this.buffer === "file") {
  479. return false;
  480. }
  481. if (this.url.scheme === "file" && this.url.host === "") {
  482. return false;
  483. }
  484. }
  485. this.url.scheme = this.buffer;
  486. if (this.stateOverride) {
  487. if (this.url.port === defaultPort(this.url.scheme)) {
  488. this.url.port = null;
  489. }
  490. return false;
  491. }
  492. this.buffer = "";
  493. if (this.url.scheme === "file") {
  494. if (this.input[this.pointer + 1] !== p("/") || this.input[this.pointer + 2] !== p("/")) {
  495. this.parseError = true;
  496. }
  497. this.state = "file";
  498. } else if (isSpecial(this.url) && this.base !== null && this.base.scheme === this.url.scheme) {
  499. this.state = "special relative or authority";
  500. } else if (isSpecial(this.url)) {
  501. this.state = "special authority slashes";
  502. } else if (this.input[this.pointer + 1] === p("/")) {
  503. this.state = "path or authority";
  504. ++this.pointer;
  505. } else {
  506. this.url.path = "";
  507. this.state = "opaque path";
  508. }
  509. } else if (!this.stateOverride) {
  510. this.buffer = "";
  511. this.state = "no scheme";
  512. this.pointer = -1;
  513. } else {
  514. this.parseError = true;
  515. return failure;
  516. }
  517. return true;
  518. };
  519. URLStateMachine.prototype["parse no scheme"] = function parseNoScheme(c) {
  520. if (this.base === null || (hasAnOpaquePath(this.base) && c !== p("#"))) {
  521. return failure;
  522. } else if (hasAnOpaquePath(this.base) && c === p("#")) {
  523. this.url.scheme = this.base.scheme;
  524. this.url.path = this.base.path;
  525. this.url.query = this.base.query;
  526. this.url.fragment = "";
  527. this.state = "fragment";
  528. } else if (this.base.scheme === "file") {
  529. this.state = "file";
  530. --this.pointer;
  531. } else {
  532. this.state = "relative";
  533. --this.pointer;
  534. }
  535. return true;
  536. };
  537. URLStateMachine.prototype["parse special relative or authority"] = function parseSpecialRelativeOrAuthority(c) {
  538. if (c === p("/") && this.input[this.pointer + 1] === p("/")) {
  539. this.state = "special authority ignore slashes";
  540. ++this.pointer;
  541. } else {
  542. this.parseError = true;
  543. this.state = "relative";
  544. --this.pointer;
  545. }
  546. return true;
  547. };
  548. URLStateMachine.prototype["parse path or authority"] = function parsePathOrAuthority(c) {
  549. if (c === p("/")) {
  550. this.state = "authority";
  551. } else {
  552. this.state = "path";
  553. --this.pointer;
  554. }
  555. return true;
  556. };
  557. URLStateMachine.prototype["parse relative"] = function parseRelative(c) {
  558. this.url.scheme = this.base.scheme;
  559. if (c === p("/")) {
  560. this.state = "relative slash";
  561. } else if (isSpecial(this.url) && c === p("\\")) {
  562. this.parseError = true;
  563. this.state = "relative slash";
  564. } else {
  565. this.url.username = this.base.username;
  566. this.url.password = this.base.password;
  567. this.url.host = this.base.host;
  568. this.url.port = this.base.port;
  569. this.url.path = this.base.path.slice();
  570. this.url.query = this.base.query;
  571. if (c === p("?")) {
  572. this.url.query = "";
  573. this.state = "query";
  574. } else if (c === p("#")) {
  575. this.url.fragment = "";
  576. this.state = "fragment";
  577. } else if (!isNaN(c)) {
  578. this.url.query = null;
  579. this.url.path.pop();
  580. this.state = "path";
  581. --this.pointer;
  582. }
  583. }
  584. return true;
  585. };
  586. URLStateMachine.prototype["parse relative slash"] = function parseRelativeSlash(c) {
  587. if (isSpecial(this.url) && (c === p("/") || c === p("\\"))) {
  588. if (c === p("\\")) {
  589. this.parseError = true;
  590. }
  591. this.state = "special authority ignore slashes";
  592. } else if (c === p("/")) {
  593. this.state = "authority";
  594. } else {
  595. this.url.username = this.base.username;
  596. this.url.password = this.base.password;
  597. this.url.host = this.base.host;
  598. this.url.port = this.base.port;
  599. this.state = "path";
  600. --this.pointer;
  601. }
  602. return true;
  603. };
  604. URLStateMachine.prototype["parse special authority slashes"] = function parseSpecialAuthoritySlashes(c) {
  605. if (c === p("/") && this.input[this.pointer + 1] === p("/")) {
  606. this.state = "special authority ignore slashes";
  607. ++this.pointer;
  608. } else {
  609. this.parseError = true;
  610. this.state = "special authority ignore slashes";
  611. --this.pointer;
  612. }
  613. return true;
  614. };
  615. URLStateMachine.prototype["parse special authority ignore slashes"] = function parseSpecialAuthorityIgnoreSlashes(c) {
  616. if (c !== p("/") && c !== p("\\")) {
  617. this.state = "authority";
  618. --this.pointer;
  619. } else {
  620. this.parseError = true;
  621. }
  622. return true;
  623. };
  624. URLStateMachine.prototype["parse authority"] = function parseAuthority(c, cStr) {
  625. if (c === p("@")) {
  626. this.parseError = true;
  627. if (this.atFlag) {
  628. this.buffer = `%40${this.buffer}`;
  629. }
  630. this.atFlag = true;
  631. // careful, this is based on buffer and has its own pointer (this.pointer != pointer) and inner chars
  632. const len = countSymbols(this.buffer);
  633. for (let pointer = 0; pointer < len; ++pointer) {
  634. const codePoint = this.buffer.codePointAt(pointer);
  635. if (codePoint === p(":") && !this.passwordTokenSeenFlag) {
  636. this.passwordTokenSeenFlag = true;
  637. continue;
  638. }
  639. const encodedCodePoints = utf8PercentEncodeCodePoint(codePoint, isUserinfoPercentEncode);
  640. if (this.passwordTokenSeenFlag) {
  641. this.url.password += encodedCodePoints;
  642. } else {
  643. this.url.username += encodedCodePoints;
  644. }
  645. }
  646. this.buffer = "";
  647. } else if (isNaN(c) || c === p("/") || c === p("?") || c === p("#") ||
  648. (isSpecial(this.url) && c === p("\\"))) {
  649. if (this.atFlag && this.buffer === "") {
  650. this.parseError = true;
  651. return failure;
  652. }
  653. this.pointer -= countSymbols(this.buffer) + 1;
  654. this.buffer = "";
  655. this.state = "host";
  656. } else {
  657. this.buffer += cStr;
  658. }
  659. return true;
  660. };
  661. URLStateMachine.prototype["parse hostname"] =
  662. URLStateMachine.prototype["parse host"] = function parseHostName(c, cStr) {
  663. if (this.stateOverride && this.url.scheme === "file") {
  664. --this.pointer;
  665. this.state = "file host";
  666. } else if (c === p(":") && !this.arrFlag) {
  667. if (this.buffer === "") {
  668. this.parseError = true;
  669. return failure;
  670. }
  671. if (this.stateOverride === "hostname") {
  672. return false;
  673. }
  674. const host = parseHost(this.buffer, isNotSpecial(this.url));
  675. if (host === failure) {
  676. return failure;
  677. }
  678. this.url.host = host;
  679. this.buffer = "";
  680. this.state = "port";
  681. } else if (isNaN(c) || c === p("/") || c === p("?") || c === p("#") ||
  682. (isSpecial(this.url) && c === p("\\"))) {
  683. --this.pointer;
  684. if (isSpecial(this.url) && this.buffer === "") {
  685. this.parseError = true;
  686. return failure;
  687. } else if (this.stateOverride && this.buffer === "" &&
  688. (includesCredentials(this.url) || this.url.port !== null)) {
  689. this.parseError = true;
  690. return false;
  691. }
  692. const host = parseHost(this.buffer, isNotSpecial(this.url));
  693. if (host === failure) {
  694. return failure;
  695. }
  696. this.url.host = host;
  697. this.buffer = "";
  698. this.state = "path start";
  699. if (this.stateOverride) {
  700. return false;
  701. }
  702. } else {
  703. if (c === p("[")) {
  704. this.arrFlag = true;
  705. } else if (c === p("]")) {
  706. this.arrFlag = false;
  707. }
  708. this.buffer += cStr;
  709. }
  710. return true;
  711. };
  712. URLStateMachine.prototype["parse port"] = function parsePort(c, cStr) {
  713. if (infra.isASCIIDigit(c)) {
  714. this.buffer += cStr;
  715. } else if (isNaN(c) || c === p("/") || c === p("?") || c === p("#") ||
  716. (isSpecial(this.url) && c === p("\\")) ||
  717. this.stateOverride) {
  718. if (this.buffer !== "") {
  719. const port = parseInt(this.buffer);
  720. if (port > 2 ** 16 - 1) {
  721. this.parseError = true;
  722. return failure;
  723. }
  724. this.url.port = port === defaultPort(this.url.scheme) ? null : port;
  725. this.buffer = "";
  726. }
  727. if (this.stateOverride) {
  728. return false;
  729. }
  730. this.state = "path start";
  731. --this.pointer;
  732. } else {
  733. this.parseError = true;
  734. return failure;
  735. }
  736. return true;
  737. };
  738. const fileOtherwiseCodePoints = new Set([p("/"), p("\\"), p("?"), p("#")]);
  739. function startsWithWindowsDriveLetter(input, pointer) {
  740. const length = input.length - pointer;
  741. return length >= 2 &&
  742. isWindowsDriveLetterCodePoints(input[pointer], input[pointer + 1]) &&
  743. (length === 2 || fileOtherwiseCodePoints.has(input[pointer + 2]));
  744. }
  745. URLStateMachine.prototype["parse file"] = function parseFile(c) {
  746. this.url.scheme = "file";
  747. this.url.host = "";
  748. if (c === p("/") || c === p("\\")) {
  749. if (c === p("\\")) {
  750. this.parseError = true;
  751. }
  752. this.state = "file slash";
  753. } else if (this.base !== null && this.base.scheme === "file") {
  754. this.url.host = this.base.host;
  755. this.url.path = this.base.path.slice();
  756. this.url.query = this.base.query;
  757. if (c === p("?")) {
  758. this.url.query = "";
  759. this.state = "query";
  760. } else if (c === p("#")) {
  761. this.url.fragment = "";
  762. this.state = "fragment";
  763. } else if (!isNaN(c)) {
  764. this.url.query = null;
  765. if (!startsWithWindowsDriveLetter(this.input, this.pointer)) {
  766. shortenPath(this.url);
  767. } else {
  768. this.parseError = true;
  769. this.url.path = [];
  770. }
  771. this.state = "path";
  772. --this.pointer;
  773. }
  774. } else {
  775. this.state = "path";
  776. --this.pointer;
  777. }
  778. return true;
  779. };
  780. URLStateMachine.prototype["parse file slash"] = function parseFileSlash(c) {
  781. if (c === p("/") || c === p("\\")) {
  782. if (c === p("\\")) {
  783. this.parseError = true;
  784. }
  785. this.state = "file host";
  786. } else {
  787. if (this.base !== null && this.base.scheme === "file") {
  788. if (!startsWithWindowsDriveLetter(this.input, this.pointer) &&
  789. isNormalizedWindowsDriveLetterString(this.base.path[0])) {
  790. this.url.path.push(this.base.path[0]);
  791. }
  792. this.url.host = this.base.host;
  793. }
  794. this.state = "path";
  795. --this.pointer;
  796. }
  797. return true;
  798. };
  799. URLStateMachine.prototype["parse file host"] = function parseFileHost(c, cStr) {
  800. if (isNaN(c) || c === p("/") || c === p("\\") || c === p("?") || c === p("#")) {
  801. --this.pointer;
  802. if (!this.stateOverride && isWindowsDriveLetterString(this.buffer)) {
  803. this.parseError = true;
  804. this.state = "path";
  805. } else if (this.buffer === "") {
  806. this.url.host = "";
  807. if (this.stateOverride) {
  808. return false;
  809. }
  810. this.state = "path start";
  811. } else {
  812. let host = parseHost(this.buffer, isNotSpecial(this.url));
  813. if (host === failure) {
  814. return failure;
  815. }
  816. if (host === "localhost") {
  817. host = "";
  818. }
  819. this.url.host = host;
  820. if (this.stateOverride) {
  821. return false;
  822. }
  823. this.buffer = "";
  824. this.state = "path start";
  825. }
  826. } else {
  827. this.buffer += cStr;
  828. }
  829. return true;
  830. };
  831. URLStateMachine.prototype["parse path start"] = function parsePathStart(c) {
  832. if (isSpecial(this.url)) {
  833. if (c === p("\\")) {
  834. this.parseError = true;
  835. }
  836. this.state = "path";
  837. if (c !== p("/") && c !== p("\\")) {
  838. --this.pointer;
  839. }
  840. } else if (!this.stateOverride && c === p("?")) {
  841. this.url.query = "";
  842. this.state = "query";
  843. } else if (!this.stateOverride && c === p("#")) {
  844. this.url.fragment = "";
  845. this.state = "fragment";
  846. } else if (c !== undefined) {
  847. this.state = "path";
  848. if (c !== p("/")) {
  849. --this.pointer;
  850. }
  851. } else if (this.stateOverride && this.url.host === null) {
  852. this.url.path.push("");
  853. }
  854. return true;
  855. };
  856. URLStateMachine.prototype["parse path"] = function parsePath(c) {
  857. if (isNaN(c) || c === p("/") || (isSpecial(this.url) && c === p("\\")) ||
  858. (!this.stateOverride && (c === p("?") || c === p("#")))) {
  859. if (isSpecial(this.url) && c === p("\\")) {
  860. this.parseError = true;
  861. }
  862. if (isDoubleDot(this.buffer)) {
  863. shortenPath(this.url);
  864. if (c !== p("/") && !(isSpecial(this.url) && c === p("\\"))) {
  865. this.url.path.push("");
  866. }
  867. } else if (isSingleDot(this.buffer) && c !== p("/") &&
  868. !(isSpecial(this.url) && c === p("\\"))) {
  869. this.url.path.push("");
  870. } else if (!isSingleDot(this.buffer)) {
  871. if (this.url.scheme === "file" && this.url.path.length === 0 && isWindowsDriveLetterString(this.buffer)) {
  872. this.buffer = `${this.buffer[0]}:`;
  873. }
  874. this.url.path.push(this.buffer);
  875. }
  876. this.buffer = "";
  877. if (c === p("?")) {
  878. this.url.query = "";
  879. this.state = "query";
  880. }
  881. if (c === p("#")) {
  882. this.url.fragment = "";
  883. this.state = "fragment";
  884. }
  885. } else {
  886. // TODO: If c is not a URL code point and not "%", parse error.
  887. if (c === p("%") &&
  888. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  889. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  890. this.parseError = true;
  891. }
  892. this.buffer += utf8PercentEncodeCodePoint(c, isPathPercentEncode);
  893. }
  894. return true;
  895. };
  896. URLStateMachine.prototype["parse opaque path"] = function parseOpaquePath(c) {
  897. if (c === p("?")) {
  898. this.url.query = "";
  899. this.state = "query";
  900. } else if (c === p("#")) {
  901. this.url.fragment = "";
  902. this.state = "fragment";
  903. } else if (c === p(" ")) {
  904. const remaining = this.input[this.pointer + 1];
  905. if (remaining === p("?") || remaining === p("#")) {
  906. this.url.path += "%20";
  907. } else {
  908. this.url.path += " ";
  909. }
  910. } else {
  911. // TODO: Add: not a URL code point
  912. if (!isNaN(c) && c !== p("%")) {
  913. this.parseError = true;
  914. }
  915. if (c === p("%") &&
  916. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  917. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  918. this.parseError = true;
  919. }
  920. if (!isNaN(c)) {
  921. this.url.path += utf8PercentEncodeCodePoint(c, isC0ControlPercentEncode);
  922. }
  923. }
  924. return true;
  925. };
  926. URLStateMachine.prototype["parse query"] = function parseQuery(c, cStr) {
  927. if (!isSpecial(this.url) || this.url.scheme === "ws" || this.url.scheme === "wss") {
  928. this.encodingOverride = "utf-8";
  929. }
  930. if ((!this.stateOverride && c === p("#")) || isNaN(c)) {
  931. const queryPercentEncodePredicate = isSpecial(this.url) ? isSpecialQueryPercentEncode : isQueryPercentEncode;
  932. this.url.query += utf8PercentEncodeString(this.buffer, queryPercentEncodePredicate);
  933. this.buffer = "";
  934. if (c === p("#")) {
  935. this.url.fragment = "";
  936. this.state = "fragment";
  937. }
  938. } else if (!isNaN(c)) {
  939. // TODO: If c is not a URL code point and not "%", parse error.
  940. if (c === p("%") &&
  941. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  942. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  943. this.parseError = true;
  944. }
  945. this.buffer += cStr;
  946. }
  947. return true;
  948. };
  949. URLStateMachine.prototype["parse fragment"] = function parseFragment(c) {
  950. if (!isNaN(c)) {
  951. // TODO: If c is not a URL code point and not "%", parse error.
  952. if (c === p("%") &&
  953. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  954. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  955. this.parseError = true;
  956. }
  957. this.url.fragment += utf8PercentEncodeCodePoint(c, isFragmentPercentEncode);
  958. }
  959. return true;
  960. };
  961. function serializeURL(url, excludeFragment) {
  962. let output = `${url.scheme}:`;
  963. if (url.host !== null) {
  964. output += "//";
  965. if (url.username !== "" || url.password !== "") {
  966. output += url.username;
  967. if (url.password !== "") {
  968. output += `:${url.password}`;
  969. }
  970. output += "@";
  971. }
  972. output += serializeHost(url.host);
  973. if (url.port !== null) {
  974. output += `:${url.port}`;
  975. }
  976. }
  977. if (url.host === null && !hasAnOpaquePath(url) && url.path.length > 1 && url.path[0] === "") {
  978. output += "/.";
  979. }
  980. output += serializePath(url);
  981. if (url.query !== null) {
  982. output += `?${url.query}`;
  983. }
  984. if (!excludeFragment && url.fragment !== null) {
  985. output += `#${url.fragment}`;
  986. }
  987. return output;
  988. }
  989. function serializeOrigin(tuple) {
  990. let result = `${tuple.scheme}://`;
  991. result += serializeHost(tuple.host);
  992. if (tuple.port !== null) {
  993. result += `:${tuple.port}`;
  994. }
  995. return result;
  996. }
  997. function serializePath(url) {
  998. if (hasAnOpaquePath(url)) {
  999. return url.path;
  1000. }
  1001. let output = "";
  1002. for (const segment of url.path) {
  1003. output += `/${segment}`;
  1004. }
  1005. return output;
  1006. }
  1007. module.exports.serializeURL = serializeURL;
  1008. module.exports.serializePath = serializePath;
  1009. module.exports.serializeURLOrigin = function (url) {
  1010. // https://url.spec.whatwg.org/#concept-url-origin
  1011. switch (url.scheme) {
  1012. case "blob": {
  1013. const pathURL = module.exports.parseURL(serializePath(url));
  1014. if (pathURL === null) {
  1015. return "null";
  1016. }
  1017. if (pathURL.scheme !== "http" && pathURL.scheme !== "https") {
  1018. return "null";
  1019. }
  1020. return module.exports.serializeURLOrigin(pathURL);
  1021. }
  1022. case "ftp":
  1023. case "http":
  1024. case "https":
  1025. case "ws":
  1026. case "wss":
  1027. return serializeOrigin({
  1028. scheme: url.scheme,
  1029. host: url.host,
  1030. port: url.port
  1031. });
  1032. case "file":
  1033. // The spec says:
  1034. // > Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin.
  1035. // Browsers tested so far:
  1036. // - Chrome says "file://", but treats file: URLs as cross-origin for most (all?) purposes; see e.g.
  1037. // https://bugs.chromium.org/p/chromium/issues/detail?id=37586
  1038. // - Firefox says "null", but treats file: URLs as same-origin sometimes based on directory stuff; see
  1039. // https://developer.mozilla.org/en-US/docs/Archive/Misc_top_level/Same-origin_policy_for_file:_URIs
  1040. return "null";
  1041. default:
  1042. // serializing an opaque origin returns "null"
  1043. return "null";
  1044. }
  1045. };
  1046. module.exports.basicURLParse = function (input, options) {
  1047. if (options === undefined) {
  1048. options = {};
  1049. }
  1050. const usm = new URLStateMachine(input, options.baseURL, options.encodingOverride, options.url, options.stateOverride);
  1051. if (usm.failure) {
  1052. return null;
  1053. }
  1054. return usm.url;
  1055. };
  1056. module.exports.setTheUsername = function (url, username) {
  1057. url.username = utf8PercentEncodeString(username, isUserinfoPercentEncode);
  1058. };
  1059. module.exports.setThePassword = function (url, password) {
  1060. url.password = utf8PercentEncodeString(password, isUserinfoPercentEncode);
  1061. };
  1062. module.exports.serializeHost = serializeHost;
  1063. module.exports.cannotHaveAUsernamePasswordPort = cannotHaveAUsernamePasswordPort;
  1064. module.exports.hasAnOpaquePath = hasAnOpaquePath;
  1065. module.exports.serializeInteger = function (integer) {
  1066. return String(integer);
  1067. };
  1068. module.exports.parseURL = function (input, options) {
  1069. if (options === undefined) {
  1070. options = {};
  1071. }
  1072. // We don't handle blobs, so this just delegates:
  1073. return module.exports.basicURLParse(input, { baseURL: options.baseURL, encodingOverride: options.encodingOverride });
  1074. };