123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323 |
- package parser
- import (
- "bytes"
- "regexp"
- "strconv"
- "github.com/gomarkdown/markdown/ast"
- )
- // Parsing of inline elements
- var (
- urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
- anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
- // TODO: improve this regexp to catch all possible entities:
- htmlEntityRe = regexp.MustCompile(`&[a-z]{2,5};`)
- )
- // Inline parses text within a block.
- // Each function returns the number of consumed chars.
- func (p *Parser) Inline(currBlock ast.Node, data []byte) {
- // handlers might call us recursively: enforce a maximum depth
- if p.nesting >= p.maxNesting || len(data) == 0 {
- return
- }
- p.nesting++
- beg, end := 0, 0
- n := len(data)
- for end < n {
- handler := p.inlineCallback[data[end]]
- if handler == nil {
- end++
- continue
- }
- consumed, node := handler(p, data, end)
- if consumed == 0 {
- // no action from the callback
- end++
- continue
- }
- // copy inactive chars into the output
- ast.AppendChild(currBlock, newTextNode(data[beg:end]))
- if node != nil {
- ast.AppendChild(currBlock, node)
- }
- beg = end + consumed
- end = beg
- }
- if beg < n {
- if data[end-1] == '\n' {
- end--
- }
- ast.AppendChild(currBlock, newTextNode(data[beg:end]))
- }
- p.nesting--
- }
- // single and double emphasis parsing
- func emphasis(p *Parser, data []byte, offset int) (int, ast.Node) {
- data = data[offset:]
- c := data[0]
- n := len(data)
- if n > 2 && data[1] != c {
- // whitespace cannot follow an opening emphasis;
- // strikethrough only takes two characters '~~'
- if IsSpace(data[1]) {
- return 0, nil
- }
- if p.extensions&SuperSubscript != 0 && c == '~' {
- // potential subscript, no spaces, except when escaped, helperEmphasis does
- // not check that for us, so walk the bytes and check.
- ret := skipUntilChar(data[1:], 0, c)
- if ret == 0 {
- return 0, nil
- }
- ret++ // we started with data[1:] above.
- for i := 1; i < ret; i++ {
- if IsSpace(data[i]) && !isEscape(data, i) {
- return 0, nil
- }
- }
- sub := &ast.Subscript{}
- sub.Literal = data[1:ret]
- return ret + 1, sub
- }
- ret, node := helperEmphasis(p, data[1:], c)
- if ret == 0 {
- return 0, nil
- }
- return ret + 1, node
- }
- if n > 3 && data[1] == c && data[2] != c {
- if IsSpace(data[2]) {
- return 0, nil
- }
- ret, node := helperDoubleEmphasis(p, data[2:], c)
- if ret == 0 {
- return 0, nil
- }
- return ret + 2, node
- }
- if n > 4 && data[1] == c && data[2] == c && data[3] != c {
- if c == '~' || IsSpace(data[3]) {
- return 0, nil
- }
- ret, node := helperTripleEmphasis(p, data, 3, c)
- if ret == 0 {
- return 0, nil
- }
- return ret + 3, node
- }
- return 0, nil
- }
- func codeSpan(p *Parser, data []byte, offset int) (int, ast.Node) {
- data = data[offset:]
- // count the number of backticks in the delimiter
- nb := skipChar(data, 0, '`')
- // find the next delimiter
- i, end := 0, 0
- hasLFBeforeDelimiter := false
- for end = nb; end < len(data) && i < nb; end++ {
- if data[end] == '\n' {
- hasLFBeforeDelimiter = true
- }
- if data[end] == '`' {
- i++
- } else {
- i = 0
- }
- }
- // no matching delimiter?
- if i < nb && end >= len(data) {
- return 0, nil
- }
- // If there are non-space chars after the ending delimiter and before a '\n',
- // flag that this is not a well formed fenced code block.
- hasCharsAfterDelimiter := false
- for j := end; j < len(data); j++ {
- if data[j] == '\n' {
- break
- }
- if !IsSpace(data[j]) {
- hasCharsAfterDelimiter = true
- break
- }
- }
- // trim outside whitespace
- fBegin := nb
- for fBegin < end && data[fBegin] == ' ' {
- fBegin++
- }
- fEnd := end - nb
- for fEnd > fBegin && data[fEnd-1] == ' ' {
- fEnd--
- }
- if fBegin == fEnd {
- return end, nil
- }
- // if delimiter has 3 backticks
- if nb == 3 {
- i := fBegin
- syntaxStart, syntaxLen := syntaxRange(data, &i)
- // If we found a '\n' before the end marker and there are only spaces
- // after the end marker, then this is a code block.
- if hasLFBeforeDelimiter && !hasCharsAfterDelimiter {
- codeblock := &ast.CodeBlock{
- IsFenced: true,
- Info: data[syntaxStart : syntaxStart+syntaxLen],
- }
- codeblock.Literal = data[i:fEnd]
- return end, codeblock
- }
- }
- // render the code span
- code := &ast.Code{}
- code.Literal = data[fBegin:fEnd]
- return end, code
- }
- // newline preceded by two spaces becomes <br>
- func maybeLineBreak(p *Parser, data []byte, offset int) (int, ast.Node) {
- origOffset := offset
- offset = skipChar(data, offset, ' ')
- if offset < len(data) && data[offset] == '\n' {
- if offset-origOffset >= 2 {
- return offset - origOffset + 1, &ast.Hardbreak{}
- }
- return offset - origOffset, nil
- }
- return 0, nil
- }
- // newline without two spaces works when HardLineBreak is enabled
- func lineBreak(p *Parser, data []byte, offset int) (int, ast.Node) {
- if p.extensions&HardLineBreak != 0 {
- return 1, &ast.Hardbreak{}
- }
- return 0, nil
- }
- type linkType int
- const (
- linkNormal linkType = iota
- linkImg
- linkDeferredFootnote
- linkInlineFootnote
- linkCitation
- )
- func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
- if t == linkDeferredFootnote {
- return false
- }
- return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
- }
- func maybeImage(p *Parser, data []byte, offset int) (int, ast.Node) {
- if offset < len(data)-1 && data[offset+1] == '[' {
- return link(p, data, offset)
- }
- return 0, nil
- }
- func maybeInlineFootnoteOrSuper(p *Parser, data []byte, offset int) (int, ast.Node) {
- if offset < len(data)-1 && data[offset+1] == '[' {
- return link(p, data, offset)
- }
- if p.extensions&SuperSubscript != 0 {
- ret := skipUntilChar(data[offset:], 1, '^')
- if ret == 0 {
- return 0, nil
- }
- for i := offset; i < offset+ret; i++ {
- if IsSpace(data[i]) && !isEscape(data, i) {
- return 0, nil
- }
- }
- sup := &ast.Superscript{}
- sup.Literal = data[offset+1 : offset+ret]
- return ret + 1, sup
- }
- return 0, nil
- }
- // '[': parse a link or an image or a footnote or a citation
- func link(p *Parser, data []byte, offset int) (int, ast.Node) {
- // no links allowed inside regular links, footnote, and deferred footnotes
- if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
- return 0, nil
- }
- var t linkType
- switch {
- // special case: ![^text] == deferred footnote (that follows something with
- // an exclamation point)
- case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
- t = linkDeferredFootnote
- // ![alt] == image
- case offset >= 0 && data[offset] == '!':
- t = linkImg
- offset++
- // [@citation], [@-citation], [@?citation], [@!citation]
- case p.extensions&Mmark != 0 && len(data)-1 > offset && data[offset+1] == '@':
- t = linkCitation
- // [text] == regular link
- // ^[text] == inline footnote
- // [^refId] == deferred footnote
- case p.extensions&Footnotes != 0:
- if offset >= 0 && data[offset] == '^' {
- t = linkInlineFootnote
- offset++
- } else if len(data)-1 > offset && data[offset+1] == '^' {
- t = linkDeferredFootnote
- }
- default:
- t = linkNormal
- }
- data = data[offset:]
- if t == linkCitation {
- return citation(p, data, 0)
- }
- var (
- i = 1
- noteID int
- title, link, linkID, altContent []byte
- textHasNl = false
- )
- if t == linkDeferredFootnote {
- i++
- }
- // look for the matching closing bracket
- for level := 1; level > 0 && i < len(data); i++ {
- switch {
- case data[i] == '\n':
- textHasNl = true
- case data[i-1] == '\\':
- continue
- case data[i] == '[':
- level++
- case data[i] == ']':
- level--
- if level <= 0 {
- i-- // compensate for extra i++ in for loop
- }
- }
- }
- if i >= len(data) {
- return 0, nil
- }
- txtE := i
- i++
- var footnoteNode ast.Node
- // skip any amount of whitespace or newline
- // (this is much more lax than original markdown syntax)
- i = skipSpace(data, i)
- // inline style link
- switch {
- case i < len(data) && data[i] == '(':
- // skip initial whitespace
- i++
- i = skipSpace(data, i)
- linkB := i
- brace := 0
- // look for link end: ' " )
- findlinkend:
- for i < len(data) {
- switch {
- case data[i] == '\\':
- i += 2
- case data[i] == '(':
- brace++
- i++
- case data[i] == ')':
- if brace <= 0 {
- break findlinkend
- }
- brace--
- i++
- case data[i] == '\'' || data[i] == '"':
- break findlinkend
- default:
- i++
- }
- }
- if i >= len(data) {
- return 0, nil
- }
- linkE := i
- // look for title end if present
- titleB, titleE := 0, 0
- if data[i] == '\'' || data[i] == '"' {
- i++
- titleB = i
- titleEndCharFound := false
- findtitleend:
- for i < len(data) {
- switch {
- case data[i] == '\\':
- i++
- case data[i] == data[titleB-1]: // matching title delimiter
- titleEndCharFound = true
- case titleEndCharFound && data[i] == ')':
- break findtitleend
- }
- i++
- }
- if i >= len(data) {
- return 0, nil
- }
- // skip whitespace after title
- titleE = i - 1
- for titleE > titleB && IsSpace(data[titleE]) {
- titleE--
- }
- // check for closing quote presence
- if data[titleE] != '\'' && data[titleE] != '"' {
- titleB, titleE = 0, 0
- linkE = i
- }
- }
- // remove whitespace at the end of the link
- for linkE > linkB && IsSpace(data[linkE-1]) {
- linkE--
- }
- // remove optional angle brackets around the link
- if data[linkB] == '<' {
- linkB++
- }
- if data[linkE-1] == '>' {
- linkE--
- }
- // build escaped link and title
- if linkE > linkB {
- link = data[linkB:linkE]
- }
- if titleE > titleB {
- title = data[titleB:titleE]
- }
- i++
- // reference style link
- case isReferenceStyleLink(data, i, t):
- var id []byte
- altContentConsidered := false
- // look for the id
- i++
- linkB := i
- i = skipUntilChar(data, i, ']')
- if i >= len(data) {
- return 0, nil
- }
- linkE := i
- // find the reference
- if linkB == linkE {
- if textHasNl {
- var b bytes.Buffer
- for j := 1; j < txtE; j++ {
- switch {
- case data[j] != '\n':
- b.WriteByte(data[j])
- case data[j-1] != ' ':
- b.WriteByte(' ')
- }
- }
- id = b.Bytes()
- } else {
- id = data[1:txtE]
- altContentConsidered = true
- }
- } else {
- id = data[linkB:linkE]
- }
- // find the reference with matching id
- lr, ok := p.getRef(string(id))
- if !ok {
- return 0, nil
- }
- // keep link and title from reference
- linkID = id
- link = lr.link
- title = lr.title
- if altContentConsidered {
- altContent = lr.text
- }
- i++
- // shortcut reference style link or reference or inline footnote
- default:
- var id []byte
- // craft the id
- if textHasNl {
- var b bytes.Buffer
- for j := 1; j < txtE; j++ {
- switch {
- case data[j] != '\n':
- b.WriteByte(data[j])
- case data[j-1] != ' ':
- b.WriteByte(' ')
- }
- }
- id = b.Bytes()
- } else {
- if t == linkDeferredFootnote {
- id = data[2:txtE] // get rid of the ^
- } else {
- id = data[1:txtE]
- }
- }
- footnoteNode = &ast.ListItem{}
- if t == linkInlineFootnote {
- // create a new reference
- noteID = len(p.notes) + 1
- var fragment []byte
- if len(id) > 0 {
- if len(id) < 16 {
- fragment = make([]byte, len(id))
- } else {
- fragment = make([]byte, 16)
- }
- copy(fragment, slugify(id))
- } else {
- fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
- }
- ref := &reference{
- noteID: noteID,
- hasBlock: false,
- link: fragment,
- title: id,
- footnote: footnoteNode,
- }
- p.notes = append(p.notes, ref)
- p.refsRecord[string(ref.link)] = struct{}{}
- link = ref.link
- title = ref.title
- } else {
- // find the reference with matching id
- lr, ok := p.getRef(string(id))
- if !ok {
- return 0, nil
- }
- if t == linkDeferredFootnote && !p.isFootnote(lr) {
- lr.noteID = len(p.notes) + 1
- lr.footnote = footnoteNode
- p.notes = append(p.notes, lr)
- p.refsRecord[string(lr.link)] = struct{}{}
- }
- // keep link and title from reference
- link = lr.link
- // if inline footnote, title == footnote contents
- title = lr.title
- noteID = lr.noteID
- if len(lr.text) > 0 {
- altContent = lr.text
- }
- }
- // rewind the whitespace
- i = txtE + 1
- }
- var uLink []byte
- if t == linkNormal || t == linkImg {
- if len(link) > 0 {
- var uLinkBuf bytes.Buffer
- unescapeText(&uLinkBuf, link)
- uLink = uLinkBuf.Bytes()
- }
- // links need something to click on and somewhere to go
- // [](http://bla) is legal in CommonMark, so allow txtE <=1 for linkNormal
- // [bla]() is also legal in CommonMark, so allow empty uLink
- }
- // call the relevant rendering function
- switch t {
- case linkNormal:
- link := &ast.Link{
- Destination: normalizeURI(uLink),
- Title: title,
- DeferredID: linkID,
- }
- if len(altContent) > 0 {
- ast.AppendChild(link, newTextNode(altContent))
- } else {
- // links cannot contain other links, so turn off link parsing
- // temporarily and recurse
- insideLink := p.insideLink
- p.insideLink = true
- p.Inline(link, data[1:txtE])
- p.insideLink = insideLink
- }
- return i, link
- case linkImg:
- image := &ast.Image{
- Destination: uLink,
- Title: title,
- }
- ast.AppendChild(image, newTextNode(data[1:txtE]))
- return i + 1, image
- case linkInlineFootnote, linkDeferredFootnote:
- link := &ast.Link{
- Destination: link,
- Title: title,
- NoteID: noteID,
- Footnote: footnoteNode,
- }
- if t == linkDeferredFootnote {
- link.DeferredID = data[2:txtE]
- }
- if t == linkInlineFootnote {
- i++
- }
- return i, link
- default:
- return 0, nil
- }
- }
- func (p *Parser) inlineHTMLComment(data []byte) int {
- if len(data) < 5 {
- return 0
- }
- if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
- return 0
- }
- i := 5
- // scan for an end-of-comment marker, across lines if necessary
- for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
- i++
- }
- // no end-of-comment marker
- if i >= len(data) {
- return 0
- }
- return i + 1
- }
- func stripMailto(link []byte) []byte {
- if bytes.HasPrefix(link, []byte("mailto://")) {
- return link[9:]
- } else if bytes.HasPrefix(link, []byte("mailto:")) {
- return link[7:]
- } else {
- return link
- }
- }
- // autolinkType specifies a kind of autolink that gets detected.
- type autolinkType int
- // These are the possible flag values for the autolink renderer.
- const (
- notAutolink autolinkType = iota
- normalAutolink
- emailAutolink
- )
- // '<' when tags or autolinks are allowed
- func leftAngle(p *Parser, data []byte, offset int) (int, ast.Node) {
- data = data[offset:]
- if p.extensions&Mmark != 0 {
- id, consumed := IsCallout(data)
- if consumed > 0 {
- node := &ast.Callout{}
- node.ID = id
- return consumed, node
- }
- }
- altype, end := tagLength(data)
- if size := p.inlineHTMLComment(data); size > 0 {
- end = size
- }
- if end <= 2 {
- return end, nil
- }
- if altype == notAutolink {
- htmlTag := &ast.HTMLSpan{}
- htmlTag.Literal = data[:end]
- return end, htmlTag
- }
- var uLink bytes.Buffer
- unescapeText(&uLink, data[1:end+1-2])
- if uLink.Len() <= 0 {
- return end, nil
- }
- link := uLink.Bytes()
- node := &ast.Link{
- Destination: link,
- }
- if altype == emailAutolink {
- node.Destination = append([]byte("mailto:"), link...)
- }
- ast.AppendChild(node, newTextNode(stripMailto(link)))
- return end, node
- }
- // '\\' backslash escape
- var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~^$")
- func escape(p *Parser, data []byte, offset int) (int, ast.Node) {
- data = data[offset:]
- if len(data) <= 1 {
- return 2, nil
- }
- if p.extensions&NonBlockingSpace != 0 && data[1] == ' ' {
- return 2, &ast.NonBlockingSpace{}
- }
- if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' {
- return 2, &ast.Hardbreak{}
- }
- if bytes.IndexByte(escapeChars, data[1]) < 0 {
- return 0, nil
- }
- return 2, newTextNode(data[1:2])
- }
- func unescapeText(ob *bytes.Buffer, src []byte) {
- i := 0
- for i < len(src) {
- org := i
- for i < len(src) && src[i] != '\\' {
- i++
- }
- if i > org {
- ob.Write(src[org:i])
- }
- if i+1 >= len(src) {
- break
- }
- ob.WriteByte(src[i+1])
- i += 2
- }
- }
- // '&' escaped when it doesn't belong to an entity
- // valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
- func entity(p *Parser, data []byte, offset int) (int, ast.Node) {
- data = data[offset:]
- end := skipCharN(data, 1, '#', 1)
- end = skipAlnum(data, end)
- if end < len(data) && data[end] == ';' {
- end++ // real entity
- } else {
- return 0, nil // lone '&'
- }
- ent := data[:end]
- // undo & escaping or it will be converted to &amp; by another
- // escaper in the renderer
- if bytes.Equal(ent, []byte("&")) {
- return end, newTextNode([]byte{'&'})
- }
- if len(ent) < 4 {
- return end, newTextNode(ent)
- }
- // if ent consists solely out of numbers (hex or decimal) convert that unicode codepoint to actual rune
- codepoint := uint64(0)
- var err error
- if ent[2] == 'x' || ent[2] == 'X' { // hexadecimal
- codepoint, err = strconv.ParseUint(string(ent[3:len(ent)-1]), 16, 64)
- } else {
- codepoint, err = strconv.ParseUint(string(ent[2:len(ent)-1]), 10, 64)
- }
- if err == nil { // only if conversion was valid return here.
- return end, newTextNode([]byte(string(rune(codepoint))))
- }
- return end, newTextNode(ent)
- }
- func linkEndsWithEntity(data []byte, linkEnd int) bool {
- entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
- return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
- }
- // hasPrefixCaseInsensitive is a custom implementation of
- //
- // strings.HasPrefix(strings.ToLower(s), prefix)
- //
- // we rolled our own because ToLower pulls in a huge machinery of lowercasing
- // anything from Unicode and that's very slow. Since this func will only be
- // used on ASCII protocol prefixes, we can take shortcuts.
- func hasPrefixCaseInsensitive(s, prefix []byte) bool {
- if len(s) < len(prefix) {
- return false
- }
- delta := byte('a' - 'A')
- for i, b := range prefix {
- if b != s[i] && b != s[i]+delta {
- return false
- }
- }
- return true
- }
- var protocolPrefixes = [][]byte{
- []byte("http://"),
- []byte("https://"),
- []byte("ftp://"),
- []byte("file://"),
- []byte("mailto:"),
- }
- const shortestPrefix = 6 // len("ftp://"), the shortest of the above
- func maybeAutoLink(p *Parser, data []byte, offset int) (int, ast.Node) {
- // quick check to rule out most false hits
- if p.insideLink || len(data) < offset+shortestPrefix {
- return 0, nil
- }
- for _, prefix := range protocolPrefixes {
- endOfHead := offset + 8 // 8 is the len() of the longest prefix
- if endOfHead > len(data) {
- endOfHead = len(data)
- }
- if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
- return autoLink(p, data, offset)
- }
- }
- return 0, nil
- }
- func autoLink(p *Parser, data []byte, offset int) (int, ast.Node) {
- // Now a more expensive check to see if we're not inside an anchor element
- anchorStart := offset
- offsetFromAnchor := 0
- for anchorStart > 0 && data[anchorStart] != '<' {
- anchorStart--
- offsetFromAnchor++
- }
- anchorStr := anchorRe.Find(data[anchorStart:])
- if anchorStr != nil {
- anchorClose := &ast.HTMLSpan{}
- anchorClose.Literal = anchorStr[offsetFromAnchor:]
- return len(anchorStr) - offsetFromAnchor, anchorClose
- }
- // scan backward for a word boundary
- rewind := 0
- for offset-rewind > 0 && rewind <= 7 && IsLetter(data[offset-rewind-1]) {
- rewind++
- }
- if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
- return 0, nil
- }
- origData := data
- data = data[offset-rewind:]
- isSafeURL := p.IsSafeURLOverride
- if isSafeURL == nil {
- isSafeURL = IsSafeURL
- }
- if !isSafeURL(data) {
- return 0, nil
- }
- linkEnd := 0
- for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
- linkEnd++
- }
- // Skip punctuation at the end of the link
- if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
- linkEnd--
- }
- // But don't skip semicolon if it's a part of escaped entity:
- if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
- linkEnd--
- }
- // See if the link finishes with a punctuation sign that can be closed.
- var copen byte
- switch data[linkEnd-1] {
- case '"':
- copen = '"'
- case '\'':
- copen = '\''
- case ')':
- copen = '('
- case ']':
- copen = '['
- case '}':
- copen = '{'
- default:
- copen = 0
- }
- if copen != 0 {
- bufEnd := offset - rewind + linkEnd - 2
- openDelim := 1
- /* Try to close the final punctuation sign in this same line;
- * if we managed to close it outside of the URL, that means that it's
- * not part of the URL. If it closes inside the URL, that means it
- * is part of the URL.
- *
- * Examples:
- *
- * foo http://www.pokemon.com/Pikachu_(Electric) bar
- * => http://www.pokemon.com/Pikachu_(Electric)
- *
- * foo (http://www.pokemon.com/Pikachu_(Electric)) bar
- * => http://www.pokemon.com/Pikachu_(Electric)
- *
- * foo http://www.pokemon.com/Pikachu_(Electric)) bar
- * => http://www.pokemon.com/Pikachu_(Electric))
- *
- * (foo http://www.pokemon.com/Pikachu_(Electric)) bar
- * => foo http://www.pokemon.com/Pikachu_(Electric)
- */
- for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
- if origData[bufEnd] == data[linkEnd-1] {
- openDelim++
- }
- if origData[bufEnd] == copen {
- openDelim--
- }
- bufEnd--
- }
- if openDelim == 0 {
- linkEnd--
- }
- }
- var uLink bytes.Buffer
- unescapeText(&uLink, data[:linkEnd])
- if uLink.Len() > 0 {
- node := &ast.Link{
- Destination: uLink.Bytes(),
- }
- ast.AppendChild(node, newTextNode(uLink.Bytes()))
- return linkEnd, node
- }
- return linkEnd, nil
- }
- func isEndOfLink(char byte) bool {
- return IsSpace(char) || char == '<'
- }
- // return the length of the given tag, or 0 is it's not valid
- func tagLength(data []byte) (autolink autolinkType, end int) {
- var i, j int
- // a valid tag can't be shorter than 3 chars
- if len(data) < 3 {
- return notAutolink, 0
- }
- // begins with a '<' optionally followed by '/', followed by letter or number
- if data[0] != '<' {
- return notAutolink, 0
- }
- if data[1] == '/' {
- i = 2
- } else {
- i = 1
- }
- if !IsAlnum(data[i]) {
- return notAutolink, 0
- }
- // scheme test
- autolink = notAutolink
- // try to find the beginning of an URI
- for i < len(data) && (IsAlnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
- i++
- }
- if i > 1 && i < len(data) && data[i] == '@' {
- if j = isMailtoAutoLink(data[i:]); j != 0 {
- return emailAutolink, i + j
- }
- }
- if i > 2 && i < len(data) && data[i] == ':' {
- autolink = normalAutolink
- i++
- }
- // complete autolink test: no whitespace or ' or "
- switch {
- case i >= len(data):
- autolink = notAutolink
- case autolink != notAutolink:
- j = i
- for i < len(data) {
- if data[i] == '\\' {
- i += 2
- } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || IsSpace(data[i]) {
- break
- } else {
- i++
- }
- }
- if i >= len(data) {
- return autolink, 0
- }
- if i > j && data[i] == '>' {
- return autolink, i + 1
- }
- // one of the forbidden chars has been found
- autolink = notAutolink
- }
- i += bytes.IndexByte(data[i:], '>')
- if i < 0 {
- return autolink, 0
- }
- return autolink, i + 1
- }
- // look for the address part of a mail autolink and '>'
- // this is less strict than the original markdown e-mail address matching
- func isMailtoAutoLink(data []byte) int {
- nb := 0
- // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
- for i, c := range data {
- if IsAlnum(c) {
- continue
- }
- switch c {
- case '@':
- nb++
- case '-', '.', '_':
- // no-op but not defult
- case '>':
- if nb == 1 {
- return i + 1
- }
- return 0
- default:
- return 0
- }
- }
- return 0
- }
- // look for the next emph char, skipping other constructs
- func helperFindEmphChar(data []byte, c byte) int {
- i := 0
- for i < len(data) {
- for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
- i++
- }
- if i >= len(data) {
- return 0
- }
- // do not count escaped chars
- if i != 0 && data[i-1] == '\\' {
- i++
- continue
- }
- if data[i] == c {
- return i
- }
- if data[i] == '`' {
- // skip a code span
- tmpI := 0
- i++
- for i < len(data) && data[i] != '`' {
- if tmpI == 0 && data[i] == c {
- tmpI = i
- }
- i++
- }
- if i >= len(data) {
- return tmpI
- }
- i++
- } else if data[i] == '[' {
- // skip a link
- tmpI := 0
- i++
- for i < len(data) && data[i] != ']' {
- if tmpI == 0 && data[i] == c {
- tmpI = i
- }
- i++
- }
- i++
- for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
- i++
- }
- if i >= len(data) {
- return tmpI
- }
- if data[i] != '[' && data[i] != '(' { // not a link
- if tmpI > 0 {
- return tmpI
- }
- continue
- }
- cc := data[i]
- i++
- for i < len(data) && data[i] != cc {
- if tmpI == 0 && data[i] == c {
- return i
- }
- i++
- }
- if i >= len(data) {
- return tmpI
- }
- i++
- }
- }
- return 0
- }
- func helperEmphasis(p *Parser, data []byte, c byte) (int, ast.Node) {
- i := 0
- // skip one symbol if coming from emph3
- if len(data) > 1 && data[0] == c && data[1] == c {
- i = 1
- }
- for i < len(data) {
- length := helperFindEmphChar(data[i:], c)
- if length == 0 {
- return 0, nil
- }
- i += length
- if i >= len(data) {
- return 0, nil
- }
- if i+1 < len(data) && data[i+1] == c {
- i++
- continue
- }
- if data[i] == c && !IsSpace(data[i-1]) {
- if p.extensions&NoIntraEmphasis != 0 {
- if !(i+1 == len(data) || IsSpace(data[i+1]) || IsPunctuation(data[i+1])) {
- continue
- }
- }
- emph := &ast.Emph{}
- p.Inline(emph, data[:i])
- return i + 1, emph
- }
- }
- return 0, nil
- }
- func helperDoubleEmphasis(p *Parser, data []byte, c byte) (int, ast.Node) {
- i := 0
- for i < len(data) {
- length := helperFindEmphChar(data[i:], c)
- if length == 0 {
- return 0, nil
- }
- i += length
- if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !IsSpace(data[i-1]) {
- var node ast.Node = &ast.Strong{}
- if c == '~' {
- node = &ast.Del{}
- }
- p.Inline(node, data[:i])
- return i + 2, node
- }
- i++
- }
- return 0, nil
- }
- func helperTripleEmphasis(p *Parser, data []byte, offset int, c byte) (int, ast.Node) {
- i := 0
- origData := data
- data = data[offset:]
- for i < len(data) {
- length := helperFindEmphChar(data[i:], c)
- if length == 0 {
- return 0, nil
- }
- i += length
- // skip whitespace preceded symbols
- if data[i] != c || IsSpace(data[i-1]) {
- continue
- }
- switch {
- case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
- // triple symbol found
- strong := &ast.Strong{}
- em := &ast.Emph{}
- ast.AppendChild(strong, em)
- p.Inline(em, data[:i])
- return i + 3, strong
- case i+1 < len(data) && data[i+1] == c:
- // double symbol found, hand over to emph1
- length, node := helperEmphasis(p, origData[offset-2:], c)
- if length == 0 {
- return 0, nil
- }
- return length - 2, node
- default:
- // single symbol found, hand over to emph2
- length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
- if length == 0 {
- return 0, nil
- }
- return length - 1, node
- }
- }
- return 0, nil
- }
- // math handle inline math wrapped with '$'
- func math(p *Parser, data []byte, offset int) (int, ast.Node) {
- data = data[offset:]
- // too short, or block math
- if len(data) <= 2 || data[1] == '$' {
- return 0, nil
- }
- // find next '$'
- var end int
- for end = 1; end < len(data) && data[end] != '$'; end++ {
- }
- // $ not match
- if end == len(data) {
- return 0, nil
- }
- // create inline math node
- math := &ast.Math{}
- math.Literal = data[1:end]
- return end + 1, math
- }
- func newTextNode(d []byte) *ast.Text {
- return &ast.Text{Leaf: ast.Leaf{Literal: d}}
- }
- func normalizeURI(s []byte) []byte {
- return s // TODO: implement
- }
|