using linkify-it
SupertigerDev opened this issue · comments
I'm trying to use linkify-it with simple-markdown because the links don't fully work the way I want it to.
I tried doing this:
//import linkify-it
import linkify from 'linkify-it'
const linkifyInstance = linkify();
//... later in the code
match: function(source) {
return linkifyInstance.match(source)
},
but i get this error:
Error in render: "Error: `match` must return a capture starting at index 0 (the current parse index). Did you forget a ^ at the start of the RegExp?"
So I think there are two things that will make this tricky, but I think both are solveable:
- SimpleMarkdown parse rules need to:
- return a regular-expression
.exec
style result, i.e. an array that looks like:["full capture string", "match1", "match2", /* ... */ ]
- return a match for the start of the input only, and
null
if the start of the input (at character 0 in the input string) does not match.
- return a regular-expression
- The
text
rule needs to not capture any input that might be part of another rule. By default, the text rule will match all ofhi nertivia.tk
, preventing a linkify rule from being able to matchnertivia.tk
We can solve problem 1 by adapting the Match
array output from linkify-it
to simple-markdown's api needs:
linkify: {
order: order++,
match: function(source) {
let linkifyMatches = linkifyInstance.match(source);
// if linkify found nothing, or if the first linkify match is at any index past 0, we have no match yet:
if (
linkifyMatches == null ||
linkifyMatches.length === 0 ||
linkifyMatches[0].index !== 0
) {
return null;
}
// translate linkify-it's match to simple-markdown's match/capture format:
let capture = [
linkifyMatches[0].raw, // the first element in the array must be the raw matched text
// future elements in the result can be anything, so take whatever parts you need here!:
linkifyMatches[0].text,
linkifyMatches[0].url,
];
return capture;
},
parse: function(capture) {
return {
content: {
type: 'text', // mark that our content is the raw text of the link
content: capture[1],
},
url: capture[2],
};
},
html: function(node, output, state) {
return '<a href="' +
SimpleMarkdown.sanitizeText(SimpleMarkdown.sanitizeUrl(node.url)) +
'">' +
output(node.content, state) +
"</a>";
},
},
We can solve problem 2 in one of two ways:
* Modify the default text rule to not break after spaces and punctuation, so that any links after a space or punctuation are properly captured
* Or have our `text` rule run `linkify-it` to make sure it doesn't capture anything that could be a link
The first option is faster, the second option is more thorough. Here's what they would look like:
// Option 1: make the default text rule break after spaces and punctuation
text: {
order: order++,
match: function(source) {
// copied and modified from simple-markdown.js
return /^[\s\S][0-9A-Za-z\u00c0-\uffff]*\s?/.exec(source);
},
parse: SimpleMarkdown.defaultRules.text.parse,
html: SimpleMarkdown.defaultRules.text.html,
},
// Option 2:
text: {
order: order++,
match: function(source) {
// Run linkify and then only match text (using the default text rule) from the non-linkified bits.
let linkifyMatches = linkifyInstance.match(source);
// Figure out the index of the next linkify match, or use the end of the source if none were found:
let indexOfNextLink = source.length;
if (linkifyMatches && linkifyMatches.length > 0) {
indexOfNextLink = linkifyMatches[0].index;
}
// Then we can re-run the default text match on the subset of the source before the linkify match:
return SimpleMarkdown.defaultRules.text.match(source.slice(0, indexOfNextLink));
},
parse: SimpleMarkdown.defaultRules.text.parse,
html: SimpleMarkdown.defaultRules.text.html,
},
(oops, there are a couple bugs in the above; i'm cleaning them up now)
Edit: I think I've fixed the basic bugs; see below for putting it all together :)
Putting that together with what you asked in #77 , your custom rules would look something like:
const SimpleMarkdown = require('simple-markdown');
const linkify = require('linkify-it');
const linkifyInstance = linkify();
let order = 0; // order the below rules as declared below rather than by the original defaultRules order:
let rules = {
// OPTIONAL: copy the paragraph rule with a new `order`
paragraph: Object.assign({}, SimpleMarkdown.defaultRules.paragraph, {
order: order++,
}),
linkify: {
order: order++,
match: function(source) {
let linkifyMatches = linkifyInstance.match(source);
// if linkify found nothing, or if the first linkify match is at any index past 0, we have no match yet:
if (
linkifyMatches == null ||
linkifyMatches.length === 0 ||
linkifyMatches[0].index !== 0
) {
return null;
}
// translate linkify-it's match to simple-markdown's match/capture format:
let capture = [
linkifyMatches[0].raw, // the first element in the array must be the raw matched text
// future elements in the result can be anything, so take whatever parts you need here!:
linkifyMatches[0].text,
linkifyMatches[0].url,
];
return capture;
},
parse: function(capture) {
return {
content: {
type: 'text',
content: capture[1],
},
url: capture[2],
};
},
html: function(node, output, state) {
return '<a href="' +
SimpleMarkdown.sanitizeText(SimpleMarkdown.sanitizeUrl(node.url)) +
'">' +
output(node.content, state) +
"</a>";
},
},
// copy the bold/strong rule with a new `order`
strong: Object.assign({}, SimpleMarkdown.defaultRules.strong, {
order: order++,
}),
text: {
order: order++,
match: function(source) {
// modified from simple-markdown.js
// match any character, followed by letter/unicode characters, followed by an optional space
return /^[\s\S][0-9A-Za-z\u00c0-\uffff]*\s?/.exec(source);
},
parse: SimpleMarkdown.defaultRules.text.parse,
html: SimpleMarkdown.defaultRules.text.html,
},
};
let parse = SimpleMarkdown.parserFor(rules);
let output = SimpleMarkdown.outputFor(rules, 'html');
// alternatively, if using react:
// let output = SimpleMarkdown.outputFor(rules, 'react');
let markdownToHtml = function(source, state) {
// if you don't have a paragraph rule, you probably want to default `state.inline` to true, to
// indicate to the bold rule that it is parsing inline text:
if (rules.paragraph == null) state.inline = true;
let parsedContentTree = parse(source, state);
return output(parsedContentTree, state);
};
module.exports = markdownToHtml;
Ah, thanks a lot, really appreciate you taking time helping 👌
Happy to! Glad that helped!
Hey, so I recently noticed that with that code, it doesn't check properly :( for example:
This works fine:
google.com
but does doesn't:
hello, google.com is dope!
EDIT: never mind, I did not read what you wrote properly 🤦♂ Thanks a lot again :D
Hey there! I don't know if a new update broke something or what but when typing:
google.com google.com google.com
it seems like only the even ones turn into links 🤔 Any help please?
Thanks