index.js 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. "use strict";
  2. const punycode = require("punycode/");
  3. const regexes = require("./lib/regexes.js");
  4. const mappingTable = require("./lib/mappingTable.json");
  5. const { STATUS_MAPPING } = require("./lib/statusMapping.js");
  6. function containsNonASCII(str) {
  7. return /[^\x00-\x7F]/u.test(str);
  8. }
  9. function findStatus(val) {
  10. let start = 0;
  11. let end = mappingTable.length - 1;
  12. while (start <= end) {
  13. const mid = Math.floor((start + end) / 2);
  14. const target = mappingTable[mid];
  15. const min = Array.isArray(target[0]) ? target[0][0] : target[0];
  16. const max = Array.isArray(target[0]) ? target[0][1] : target[0];
  17. if (min <= val && max >= val) {
  18. return target.slice(1);
  19. } else if (min > val) {
  20. end = mid - 1;
  21. } else {
  22. start = mid + 1;
  23. }
  24. }
  25. return null;
  26. }
  27. function mapChars(domainName, { transitionalProcessing }) {
  28. let processed = "";
  29. for (const ch of domainName) {
  30. const [status, mapping] = findStatus(ch.codePointAt(0));
  31. switch (status) {
  32. case STATUS_MAPPING.disallowed:
  33. processed += ch;
  34. break;
  35. case STATUS_MAPPING.ignored:
  36. break;
  37. case STATUS_MAPPING.mapped:
  38. if (transitionalProcessing && ch === "ẞ") {
  39. processed += "ss";
  40. } else {
  41. processed += mapping;
  42. }
  43. break;
  44. case STATUS_MAPPING.deviation:
  45. if (transitionalProcessing) {
  46. processed += mapping;
  47. } else {
  48. processed += ch;
  49. }
  50. break;
  51. case STATUS_MAPPING.valid:
  52. processed += ch;
  53. break;
  54. }
  55. }
  56. return processed;
  57. }
  58. function validateLabel(label, {
  59. checkHyphens,
  60. checkBidi,
  61. checkJoiners,
  62. transitionalProcessing,
  63. useSTD3ASCIIRules,
  64. isBidi
  65. }) {
  66. // "must be satisfied for a non-empty label"
  67. if (label.length === 0) {
  68. return true;
  69. }
  70. // "1. The label must be in Unicode Normalization Form NFC."
  71. if (label.normalize("NFC") !== label) {
  72. return false;
  73. }
  74. const codePoints = Array.from(label);
  75. // "2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character in both the
  76. // third and fourth positions."
  77. //
  78. // "3. If CheckHyphens, the label must neither begin nor end with a U+002D HYPHEN-MINUS character."
  79. if (checkHyphens) {
  80. if ((codePoints[2] === "-" && codePoints[3] === "-") ||
  81. (label.startsWith("-") || label.endsWith("-"))) {
  82. return false;
  83. }
  84. }
  85. // "4. If not CheckHyphens, the label must not begin with “xn--”."
  86. if (!checkHyphens) {
  87. if (label.startsWith("xn--")) {
  88. return false;
  89. }
  90. }
  91. // "5. The label must not contain a U+002E ( . ) FULL STOP."
  92. if (label.includes(".")) {
  93. return false;
  94. }
  95. // "6. The label must not begin with a combining mark, that is: General_Category=Mark."
  96. if (regexes.combiningMarks.test(codePoints[0])) {
  97. return false;
  98. }
  99. // "7. Each code point in the label must only have certain Status values according to Section 5"
  100. for (const ch of codePoints) {
  101. const codePoint = ch.codePointAt(0);
  102. const [status] = findStatus(codePoint);
  103. if (transitionalProcessing) {
  104. // "For Transitional Processing (deprecated), each value must be valid."
  105. if (status !== STATUS_MAPPING.valid) {
  106. return false;
  107. }
  108. } else if (status !== STATUS_MAPPING.valid && status !== STATUS_MAPPING.deviation) {
  109. // "For Nontransitional Processing, each value must be either valid or deviation."
  110. return false;
  111. }
  112. // "In addition, if UseSTD3ASCIIRules=true and the code point is an ASCII code point (U+0000..U+007F), then it must
  113. // be a lowercase letter (a-z), a digit (0-9), or a hyphen-minus (U+002D). (Note: This excludes uppercase ASCII
  114. // A-Z which are mapped in UTS #46 and disallowed in IDNA2008.)"
  115. if (useSTD3ASCIIRules && codePoint <= 0x7F) {
  116. if (!/^(?:[a-z]|[0-9]|-)$/u.test(ch)) {
  117. return false;
  118. }
  119. }
  120. }
  121. // "8. If CheckJoiners, the label must satisify the ContextJ rules"
  122. // https://tools.ietf.org/html/rfc5892#appendix-A
  123. if (checkJoiners) {
  124. let last = 0;
  125. for (const [i, ch] of codePoints.entries()) {
  126. if (ch === "\u200C" || ch === "\u200D") {
  127. if (i > 0) {
  128. if (regexes.combiningClassVirama.test(codePoints[i - 1])) {
  129. continue;
  130. }
  131. if (ch === "\u200C") {
  132. // TODO: make this more efficient
  133. const next = codePoints.indexOf("\u200C", i + 1);
  134. const test = next < 0 ? codePoints.slice(last) : codePoints.slice(last, next);
  135. if (regexes.validZWNJ.test(test.join(""))) {
  136. last = i + 1;
  137. continue;
  138. }
  139. }
  140. }
  141. return false;
  142. }
  143. }
  144. }
  145. // "9. If CheckBidi, and if the domain name is a Bidi domain name, then the label must satisfy..."
  146. // https://tools.ietf.org/html/rfc5893#section-2
  147. if (checkBidi && isBidi) {
  148. let rtl;
  149. // 1
  150. if (regexes.bidiS1LTR.test(codePoints[0])) {
  151. rtl = false;
  152. } else if (regexes.bidiS1RTL.test(codePoints[0])) {
  153. rtl = true;
  154. } else {
  155. return false;
  156. }
  157. if (rtl) {
  158. // 2-4
  159. if (!regexes.bidiS2.test(label) ||
  160. !regexes.bidiS3.test(label) ||
  161. (regexes.bidiS4EN.test(label) && regexes.bidiS4AN.test(label))) {
  162. return false;
  163. }
  164. } else if (!regexes.bidiS5.test(label) ||
  165. !regexes.bidiS6.test(label)) { // 5-6
  166. return false;
  167. }
  168. }
  169. return true;
  170. }
  171. function isBidiDomain(labels) {
  172. const domain = labels.map(label => {
  173. if (label.startsWith("xn--")) {
  174. try {
  175. return punycode.decode(label.substring(4));
  176. } catch {
  177. return "";
  178. }
  179. }
  180. return label;
  181. }).join(".");
  182. return regexes.bidiDomain.test(domain);
  183. }
  184. function processing(domainName, options) {
  185. // 1. Map.
  186. let string = mapChars(domainName, options);
  187. // 2. Normalize.
  188. string = string.normalize("NFC");
  189. // 3. Break.
  190. const labels = string.split(".");
  191. const isBidi = isBidiDomain(labels);
  192. // 4. Convert/Validate.
  193. let error = false;
  194. for (const [i, origLabel] of labels.entries()) {
  195. let label = origLabel;
  196. let transitionalProcessingForThisLabel = options.transitionalProcessing;
  197. if (label.startsWith("xn--")) {
  198. if (containsNonASCII(label)) {
  199. error = true;
  200. continue;
  201. }
  202. try {
  203. label = punycode.decode(label.substring(4));
  204. } catch {
  205. if (!options.ignoreInvalidPunycode) {
  206. error = true;
  207. continue;
  208. }
  209. }
  210. labels[i] = label;
  211. if (label === "" || !containsNonASCII(label)) {
  212. error = true;
  213. }
  214. transitionalProcessingForThisLabel = false;
  215. }
  216. // No need to validate if we already know there is an error.
  217. if (error) {
  218. continue;
  219. }
  220. const validation = validateLabel(label, {
  221. ...options,
  222. transitionalProcessing: transitionalProcessingForThisLabel,
  223. isBidi
  224. });
  225. if (!validation) {
  226. error = true;
  227. }
  228. }
  229. return {
  230. string: labels.join("."),
  231. error
  232. };
  233. }
  234. function toASCII(domainName, {
  235. checkHyphens = false,
  236. checkBidi = false,
  237. checkJoiners = false,
  238. useSTD3ASCIIRules = false,
  239. verifyDNSLength = false,
  240. transitionalProcessing = false,
  241. ignoreInvalidPunycode = false
  242. } = {}) {
  243. const result = processing(domainName, {
  244. checkHyphens,
  245. checkBidi,
  246. checkJoiners,
  247. useSTD3ASCIIRules,
  248. transitionalProcessing,
  249. ignoreInvalidPunycode
  250. });
  251. let labels = result.string.split(".");
  252. labels = labels.map(l => {
  253. if (containsNonASCII(l)) {
  254. try {
  255. return `xn--${punycode.encode(l)}`;
  256. } catch {
  257. result.error = true;
  258. }
  259. }
  260. return l;
  261. });
  262. if (verifyDNSLength) {
  263. const total = labels.join(".").length;
  264. if (total > 253 || total === 0) {
  265. result.error = true;
  266. }
  267. for (let i = 0; i < labels.length; ++i) {
  268. if (labels[i].length > 63 || labels[i].length === 0) {
  269. result.error = true;
  270. break;
  271. }
  272. }
  273. }
  274. if (result.error) {
  275. return null;
  276. }
  277. return labels.join(".");
  278. }
  279. function toUnicode(domainName, {
  280. checkHyphens = false,
  281. checkBidi = false,
  282. checkJoiners = false,
  283. useSTD3ASCIIRules = false,
  284. transitionalProcessing = false,
  285. ignoreInvalidPunycode = false
  286. } = {}) {
  287. const result = processing(domainName, {
  288. checkHyphens,
  289. checkBidi,
  290. checkJoiners,
  291. useSTD3ASCIIRules,
  292. transitionalProcessing,
  293. ignoreInvalidPunycode
  294. });
  295. return {
  296. domain: result.string,
  297. error: result.error
  298. };
  299. }
  300. module.exports = {
  301. toASCII,
  302. toUnicode
  303. };