You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
	
	
		
			152 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			TypeScript
		
	
		
		
			
		
	
	
			152 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			TypeScript
		
	
| 
											7 years ago
										 | /* global URL */ | ||
|  | 
 | ||
| 
											4 years ago
										 | import { compact, isEmpty, isNumber, range } from 'lodash'; | ||
|  | import nodeUrl from 'url'; | ||
|  | import LinkifyIt from 'linkify-it'; | ||
| 
											7 years ago
										 | 
 | ||
|  | const linkify = LinkifyIt(); | ||
|  | 
 | ||
| 
											4 years ago
										 | function maybeParseHref(href: string) { | ||
| 
											7 years ago
										 |   try { | ||
| 
											5 years ago
										 |     return new URL(href); | ||
|  |   } catch (err) { | ||
|  |     return null; | ||
| 
											7 years ago
										 |   } | ||
|  | } | ||
|  | 
 | ||
| 
											4 years ago
										 | function isLinkSafeToPreview(href: string) { | ||
| 
											5 years ago
										 |   const url = maybeParseHref(href); | ||
|  |   return Boolean(url && url.protocol === 'https:' && !isLinkSneaky(href)); | ||
| 
											7 years ago
										 | } | ||
|  | 
 | ||
| 
											4 years ago
										 | function findLinks(text: string, caretLocation?: number) { | ||
| 
											7 years ago
										 |   const haveCaretLocation = isNumber(caretLocation); | ||
|  |   const textLength = text ? text.length : 0; | ||
|  | 
 | ||
| 
											7 years ago
										 |   const matches = linkify.match(text || '') || []; | ||
| 
											7 years ago
										 |   return compact( | ||
|  |     matches.map(match => { | ||
|  |       if (!haveCaretLocation) { | ||
|  |         return match.text; | ||
|  |       } | ||
|  | 
 | ||
|  |       if (match.lastIndex === textLength && caretLocation === textLength) { | ||
|  |         return match.text; | ||
|  |       } | ||
|  | 
 | ||
|  |       if (match.index > caretLocation || match.lastIndex < caretLocation) { | ||
|  |         return match.text; | ||
|  |       } | ||
|  | 
 | ||
|  |       return null; | ||
|  |     }) | ||
|  |   ); | ||
| 
											7 years ago
										 | } | ||
|  | 
 | ||
| 
											4 years ago
										 | function getDomain(href: string) { | ||
| 
											5 years ago
										 |   const url = maybeParseHref(href); | ||
|  |   return url ? url.hostname : null; | ||
| 
											7 years ago
										 | } | ||
|  | 
 | ||
| 
											5 years ago
										 | // See <https://tools.ietf.org/html/rfc3986>.
 | ||
|  | const VALID_URI_CHARACTERS = new Set([ | ||
|  |   '%', | ||
|  |   // "gen-delims"
 | ||
|  |   ':', | ||
|  |   '/', | ||
|  |   '?', | ||
|  |   '#', | ||
|  |   '[', | ||
|  |   ']', | ||
|  |   '@', | ||
|  |   // "sub-delims"
 | ||
|  |   '!', | ||
|  |   '$', | ||
|  |   '&', | ||
|  |   "'", | ||
|  |   '(', | ||
|  |   ')', | ||
|  |   '*', | ||
|  |   '+', | ||
|  |   ',', | ||
|  |   ';', | ||
|  |   '=', | ||
|  |   // unreserved
 | ||
|  |   ...String.fromCharCode(...range(65, 91), ...range(97, 123)), | ||
|  |   ...range(10).map(String), | ||
|  |   '-', | ||
|  |   '.', | ||
|  |   '_', | ||
|  |   '~', | ||
|  | ]); | ||
|  | const ASCII_PATTERN = new RegExp('[\\u0020-\\u007F]', 'g'); | ||
|  | const MAX_HREF_LENGTH = 2 ** 12; | ||
|  | 
 | ||
| 
											4 years ago
										 | function isLinkSneaky(href: string) { | ||
| 
											5 years ago
										 |   // This helps users avoid extremely long links (which could be hiding something
 | ||
|  |   //   sketchy) and also sidesteps the performance implications of extremely long hrefs.
 | ||
|  |   if (href.length > MAX_HREF_LENGTH) { | ||
|  |     return true; | ||
| 
											7 years ago
										 |   } | ||
|  | 
 | ||
| 
											5 years ago
										 |   const url = maybeParseHref(href); | ||
| 
											7 years ago
										 | 
 | ||
| 
											5 years ago
										 |   // If we can't parse it, it's sneaky.
 | ||
|  |   if (!url) { | ||
|  |     return true; | ||
| 
											7 years ago
										 |   } | ||
|  | 
 | ||
| 
											5 years ago
										 |   // Any links which contain auth are considered sneaky
 | ||
|  |   if (url.username) { | ||
|  |     return true; | ||
| 
											7 years ago
										 |   } | ||
|  | 
 | ||
| 
											5 years ago
										 |   // If the domain is falsy, something fishy is going on
 | ||
|  |   if (!url.hostname) { | ||
|  |     return true; | ||
| 
											7 years ago
										 |   } | ||
|  | 
 | ||
| 
											5 years ago
										 |   // To quote [RFC 1034][0]: "the total number of octets that represent a
 | ||
|  |   //   domain name [...] is limited to 255." To be extra careful, we set a
 | ||
|  |   //   maximum of 2048. (This also uses the string's `.length` property,
 | ||
|  |   //   which isn't exactly the same thing as the number of octets.)
 | ||
|  |   // [0]: https://tools.ietf.org/html/rfc1034
 | ||
|  |   if (url.hostname.length > 2048) { | ||
| 
											7 years ago
										 |     return true; | ||
|  |   } | ||
|  | 
 | ||
| 
											5 years ago
										 |   // Domains cannot contain encoded characters
 | ||
|  |   if (url.hostname.includes('%')) { | ||
| 
											7 years ago
										 |     return true; | ||
|  |   } | ||
|  | 
 | ||
| 
											5 years ago
										 |   // There must be at least 2 domain labels, and none of them can be empty.
 | ||
|  |   const labels = url.hostname.split('.'); | ||
|  |   if (labels.length < 2 || labels.some(isEmpty)) { | ||
| 
											7 years ago
										 |     return true; | ||
|  |   } | ||
|  | 
 | ||
| 
											5 years ago
										 |   // This is necesary because getDomain returns domains in punycode form.
 | ||
|  |   const unicodeDomain = nodeUrl.domainToUnicode | ||
|  |     ? nodeUrl.domainToUnicode(url.hostname) | ||
|  |     : url.hostname; | ||
| 
											7 years ago
										 | 
 | ||
| 
											5 years ago
										 |   const withoutPeriods = unicodeDomain.replace(/\./g, ''); | ||
| 
											7 years ago
										 | 
 | ||
| 
											5 years ago
										 |   const hasASCII = ASCII_PATTERN.test(withoutPeriods); | ||
|  |   const withoutASCII = withoutPeriods.replace(ASCII_PATTERN, ''); | ||
| 
											7 years ago
										 | 
 | ||
| 
											5 years ago
										 |   const isMixed = hasASCII && withoutASCII.length > 0; | ||
|  |   if (isMixed) { | ||
|  |     return true; | ||
| 
											7 years ago
										 |   } | ||
|  | 
 | ||
| 
											5 years ago
										 |   // We can't use `url.pathname` (and so on) because it automatically encodes strings.
 | ||
|  |   //   For example, it turns `/aquí` into `/aqu%C3%AD`.
 | ||
|  |   const startOfPathAndHash = href.indexOf('/', url.protocol.length + 4); | ||
| 
											5 years ago
										 |   const pathAndHash = startOfPathAndHash === -1 ? '' : href.substr(startOfPathAndHash); | ||
|  |   return [...pathAndHash].some(character => !VALID_URI_CHARACTERS.has(character)); | ||
| 
											7 years ago
										 | } | ||
| 
											4 years ago
										 | 
 | ||
|  | export const LinkPreviews = { isLinkSneaky, getDomain, findLinks, isLinkSafeToPreview }; |