inline.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323
  1. package parser
  2. import (
  3. "bytes"
  4. "regexp"
  5. "strconv"
  6. "github.com/gomarkdown/markdown/ast"
  7. )
  8. // Parsing of inline elements
  9. var (
  10. urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
  11. anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
  12. // TODO: improve this regexp to catch all possible entities:
  13. htmlEntityRe = regexp.MustCompile(`&[a-z]{2,5};`)
  14. )
  15. // Inline parses text within a block.
  16. // Each function returns the number of consumed chars.
  17. func (p *Parser) Inline(currBlock ast.Node, data []byte) {
  18. // handlers might call us recursively: enforce a maximum depth
  19. if p.nesting >= p.maxNesting || len(data) == 0 {
  20. return
  21. }
  22. p.nesting++
  23. beg, end := 0, 0
  24. n := len(data)
  25. for end < n {
  26. handler := p.inlineCallback[data[end]]
  27. if handler == nil {
  28. end++
  29. continue
  30. }
  31. consumed, node := handler(p, data, end)
  32. if consumed == 0 {
  33. // no action from the callback
  34. end++
  35. continue
  36. }
  37. // copy inactive chars into the output
  38. ast.AppendChild(currBlock, newTextNode(data[beg:end]))
  39. if node != nil {
  40. ast.AppendChild(currBlock, node)
  41. }
  42. beg = end + consumed
  43. end = beg
  44. }
  45. if beg < n {
  46. if data[end-1] == '\n' {
  47. end--
  48. }
  49. ast.AppendChild(currBlock, newTextNode(data[beg:end]))
  50. }
  51. p.nesting--
  52. }
  53. // single and double emphasis parsing
  54. func emphasis(p *Parser, data []byte, offset int) (int, ast.Node) {
  55. data = data[offset:]
  56. c := data[0]
  57. n := len(data)
  58. if n > 2 && data[1] != c {
  59. // whitespace cannot follow an opening emphasis;
  60. // strikethrough only takes two characters '~~'
  61. if IsSpace(data[1]) {
  62. return 0, nil
  63. }
  64. if p.extensions&SuperSubscript != 0 && c == '~' {
  65. // potential subscript, no spaces, except when escaped, helperEmphasis does
  66. // not check that for us, so walk the bytes and check.
  67. ret := skipUntilChar(data[1:], 0, c)
  68. if ret == 0 {
  69. return 0, nil
  70. }
  71. ret++ // we started with data[1:] above.
  72. for i := 1; i < ret; i++ {
  73. if IsSpace(data[i]) && !isEscape(data, i) {
  74. return 0, nil
  75. }
  76. }
  77. sub := &ast.Subscript{}
  78. sub.Literal = data[1:ret]
  79. return ret + 1, sub
  80. }
  81. ret, node := helperEmphasis(p, data[1:], c)
  82. if ret == 0 {
  83. return 0, nil
  84. }
  85. return ret + 1, node
  86. }
  87. if n > 3 && data[1] == c && data[2] != c {
  88. if IsSpace(data[2]) {
  89. return 0, nil
  90. }
  91. ret, node := helperDoubleEmphasis(p, data[2:], c)
  92. if ret == 0 {
  93. return 0, nil
  94. }
  95. return ret + 2, node
  96. }
  97. if n > 4 && data[1] == c && data[2] == c && data[3] != c {
  98. if c == '~' || IsSpace(data[3]) {
  99. return 0, nil
  100. }
  101. ret, node := helperTripleEmphasis(p, data, 3, c)
  102. if ret == 0 {
  103. return 0, nil
  104. }
  105. return ret + 3, node
  106. }
  107. return 0, nil
  108. }
  109. func codeSpan(p *Parser, data []byte, offset int) (int, ast.Node) {
  110. data = data[offset:]
  111. // count the number of backticks in the delimiter
  112. nb := skipChar(data, 0, '`')
  113. // find the next delimiter
  114. i, end := 0, 0
  115. hasLFBeforeDelimiter := false
  116. for end = nb; end < len(data) && i < nb; end++ {
  117. if data[end] == '\n' {
  118. hasLFBeforeDelimiter = true
  119. }
  120. if data[end] == '`' {
  121. i++
  122. } else {
  123. i = 0
  124. }
  125. }
  126. // no matching delimiter?
  127. if i < nb && end >= len(data) {
  128. return 0, nil
  129. }
  130. // If there are non-space chars after the ending delimiter and before a '\n',
  131. // flag that this is not a well formed fenced code block.
  132. hasCharsAfterDelimiter := false
  133. for j := end; j < len(data); j++ {
  134. if data[j] == '\n' {
  135. break
  136. }
  137. if !IsSpace(data[j]) {
  138. hasCharsAfterDelimiter = true
  139. break
  140. }
  141. }
  142. // trim outside whitespace
  143. fBegin := nb
  144. for fBegin < end && data[fBegin] == ' ' {
  145. fBegin++
  146. }
  147. fEnd := end - nb
  148. for fEnd > fBegin && data[fEnd-1] == ' ' {
  149. fEnd--
  150. }
  151. if fBegin == fEnd {
  152. return end, nil
  153. }
  154. // if delimiter has 3 backticks
  155. if nb == 3 {
  156. i := fBegin
  157. syntaxStart, syntaxLen := syntaxRange(data, &i)
  158. // If we found a '\n' before the end marker and there are only spaces
  159. // after the end marker, then this is a code block.
  160. if hasLFBeforeDelimiter && !hasCharsAfterDelimiter {
  161. codeblock := &ast.CodeBlock{
  162. IsFenced: true,
  163. Info: data[syntaxStart : syntaxStart+syntaxLen],
  164. }
  165. codeblock.Literal = data[i:fEnd]
  166. return end, codeblock
  167. }
  168. }
  169. // render the code span
  170. code := &ast.Code{}
  171. code.Literal = data[fBegin:fEnd]
  172. return end, code
  173. }
  174. // newline preceded by two spaces becomes <br>
  175. func maybeLineBreak(p *Parser, data []byte, offset int) (int, ast.Node) {
  176. origOffset := offset
  177. offset = skipChar(data, offset, ' ')
  178. if offset < len(data) && data[offset] == '\n' {
  179. if offset-origOffset >= 2 {
  180. return offset - origOffset + 1, &ast.Hardbreak{}
  181. }
  182. return offset - origOffset, nil
  183. }
  184. return 0, nil
  185. }
  186. // newline without two spaces works when HardLineBreak is enabled
  187. func lineBreak(p *Parser, data []byte, offset int) (int, ast.Node) {
  188. if p.extensions&HardLineBreak != 0 {
  189. return 1, &ast.Hardbreak{}
  190. }
  191. return 0, nil
  192. }
  193. type linkType int
  194. const (
  195. linkNormal linkType = iota
  196. linkImg
  197. linkDeferredFootnote
  198. linkInlineFootnote
  199. linkCitation
  200. )
  201. func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
  202. if t == linkDeferredFootnote {
  203. return false
  204. }
  205. return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
  206. }
  207. func maybeImage(p *Parser, data []byte, offset int) (int, ast.Node) {
  208. if offset < len(data)-1 && data[offset+1] == '[' {
  209. return link(p, data, offset)
  210. }
  211. return 0, nil
  212. }
  213. func maybeInlineFootnoteOrSuper(p *Parser, data []byte, offset int) (int, ast.Node) {
  214. if offset < len(data)-1 && data[offset+1] == '[' {
  215. return link(p, data, offset)
  216. }
  217. if p.extensions&SuperSubscript != 0 {
  218. ret := skipUntilChar(data[offset:], 1, '^')
  219. if ret == 0 {
  220. return 0, nil
  221. }
  222. for i := offset; i < offset+ret; i++ {
  223. if IsSpace(data[i]) && !isEscape(data, i) {
  224. return 0, nil
  225. }
  226. }
  227. sup := &ast.Superscript{}
  228. sup.Literal = data[offset+1 : offset+ret]
  229. return ret + 1, sup
  230. }
  231. return 0, nil
  232. }
  233. // '[': parse a link or an image or a footnote or a citation
  234. func link(p *Parser, data []byte, offset int) (int, ast.Node) {
  235. // no links allowed inside regular links, footnote, and deferred footnotes
  236. if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
  237. return 0, nil
  238. }
  239. var t linkType
  240. switch {
  241. // special case: ![^text] == deferred footnote (that follows something with
  242. // an exclamation point)
  243. case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
  244. t = linkDeferredFootnote
  245. // ![alt] == image
  246. case offset >= 0 && data[offset] == '!':
  247. t = linkImg
  248. offset++
  249. // [@citation], [@-citation], [@?citation], [@!citation]
  250. case p.extensions&Mmark != 0 && len(data)-1 > offset && data[offset+1] == '@':
  251. t = linkCitation
  252. // [text] == regular link
  253. // ^[text] == inline footnote
  254. // [^refId] == deferred footnote
  255. case p.extensions&Footnotes != 0:
  256. if offset >= 0 && data[offset] == '^' {
  257. t = linkInlineFootnote
  258. offset++
  259. } else if len(data)-1 > offset && data[offset+1] == '^' {
  260. t = linkDeferredFootnote
  261. }
  262. default:
  263. t = linkNormal
  264. }
  265. data = data[offset:]
  266. if t == linkCitation {
  267. return citation(p, data, 0)
  268. }
  269. var (
  270. i = 1
  271. noteID int
  272. title, link, linkID, altContent []byte
  273. textHasNl = false
  274. )
  275. if t == linkDeferredFootnote {
  276. i++
  277. }
  278. // look for the matching closing bracket
  279. for level := 1; level > 0 && i < len(data); i++ {
  280. switch {
  281. case data[i] == '\n':
  282. textHasNl = true
  283. case data[i-1] == '\\':
  284. continue
  285. case data[i] == '[':
  286. level++
  287. case data[i] == ']':
  288. level--
  289. if level <= 0 {
  290. i-- // compensate for extra i++ in for loop
  291. }
  292. }
  293. }
  294. if i >= len(data) {
  295. return 0, nil
  296. }
  297. txtE := i
  298. i++
  299. var footnoteNode ast.Node
  300. // skip any amount of whitespace or newline
  301. // (this is much more lax than original markdown syntax)
  302. i = skipSpace(data, i)
  303. // inline style link
  304. switch {
  305. case i < len(data) && data[i] == '(':
  306. // skip initial whitespace
  307. i++
  308. i = skipSpace(data, i)
  309. linkB := i
  310. brace := 0
  311. // look for link end: ' " )
  312. findlinkend:
  313. for i < len(data) {
  314. switch {
  315. case data[i] == '\\':
  316. i += 2
  317. case data[i] == '(':
  318. brace++
  319. i++
  320. case data[i] == ')':
  321. if brace <= 0 {
  322. break findlinkend
  323. }
  324. brace--
  325. i++
  326. case data[i] == '\'' || data[i] == '"':
  327. break findlinkend
  328. default:
  329. i++
  330. }
  331. }
  332. if i >= len(data) {
  333. return 0, nil
  334. }
  335. linkE := i
  336. // look for title end if present
  337. titleB, titleE := 0, 0
  338. if data[i] == '\'' || data[i] == '"' {
  339. i++
  340. titleB = i
  341. titleEndCharFound := false
  342. findtitleend:
  343. for i < len(data) {
  344. switch {
  345. case data[i] == '\\':
  346. i++
  347. case data[i] == data[titleB-1]: // matching title delimiter
  348. titleEndCharFound = true
  349. case titleEndCharFound && data[i] == ')':
  350. break findtitleend
  351. }
  352. i++
  353. }
  354. if i >= len(data) {
  355. return 0, nil
  356. }
  357. // skip whitespace after title
  358. titleE = i - 1
  359. for titleE > titleB && IsSpace(data[titleE]) {
  360. titleE--
  361. }
  362. // check for closing quote presence
  363. if data[titleE] != '\'' && data[titleE] != '"' {
  364. titleB, titleE = 0, 0
  365. linkE = i
  366. }
  367. }
  368. // remove whitespace at the end of the link
  369. for linkE > linkB && IsSpace(data[linkE-1]) {
  370. linkE--
  371. }
  372. // remove optional angle brackets around the link
  373. if data[linkB] == '<' {
  374. linkB++
  375. }
  376. if data[linkE-1] == '>' {
  377. linkE--
  378. }
  379. // build escaped link and title
  380. if linkE > linkB {
  381. link = data[linkB:linkE]
  382. }
  383. if titleE > titleB {
  384. title = data[titleB:titleE]
  385. }
  386. i++
  387. // reference style link
  388. case isReferenceStyleLink(data, i, t):
  389. var id []byte
  390. altContentConsidered := false
  391. // look for the id
  392. i++
  393. linkB := i
  394. i = skipUntilChar(data, i, ']')
  395. if i >= len(data) {
  396. return 0, nil
  397. }
  398. linkE := i
  399. // find the reference
  400. if linkB == linkE {
  401. if textHasNl {
  402. var b bytes.Buffer
  403. for j := 1; j < txtE; j++ {
  404. switch {
  405. case data[j] != '\n':
  406. b.WriteByte(data[j])
  407. case data[j-1] != ' ':
  408. b.WriteByte(' ')
  409. }
  410. }
  411. id = b.Bytes()
  412. } else {
  413. id = data[1:txtE]
  414. altContentConsidered = true
  415. }
  416. } else {
  417. id = data[linkB:linkE]
  418. }
  419. // find the reference with matching id
  420. lr, ok := p.getRef(string(id))
  421. if !ok {
  422. return 0, nil
  423. }
  424. // keep link and title from reference
  425. linkID = id
  426. link = lr.link
  427. title = lr.title
  428. if altContentConsidered {
  429. altContent = lr.text
  430. }
  431. i++
  432. // shortcut reference style link or reference or inline footnote
  433. default:
  434. var id []byte
  435. // craft the id
  436. if textHasNl {
  437. var b bytes.Buffer
  438. for j := 1; j < txtE; j++ {
  439. switch {
  440. case data[j] != '\n':
  441. b.WriteByte(data[j])
  442. case data[j-1] != ' ':
  443. b.WriteByte(' ')
  444. }
  445. }
  446. id = b.Bytes()
  447. } else {
  448. if t == linkDeferredFootnote {
  449. id = data[2:txtE] // get rid of the ^
  450. } else {
  451. id = data[1:txtE]
  452. }
  453. }
  454. footnoteNode = &ast.ListItem{}
  455. if t == linkInlineFootnote {
  456. // create a new reference
  457. noteID = len(p.notes) + 1
  458. var fragment []byte
  459. if len(id) > 0 {
  460. if len(id) < 16 {
  461. fragment = make([]byte, len(id))
  462. } else {
  463. fragment = make([]byte, 16)
  464. }
  465. copy(fragment, slugify(id))
  466. } else {
  467. fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
  468. }
  469. ref := &reference{
  470. noteID: noteID,
  471. hasBlock: false,
  472. link: fragment,
  473. title: id,
  474. footnote: footnoteNode,
  475. }
  476. p.notes = append(p.notes, ref)
  477. p.refsRecord[string(ref.link)] = struct{}{}
  478. link = ref.link
  479. title = ref.title
  480. } else {
  481. // find the reference with matching id
  482. lr, ok := p.getRef(string(id))
  483. if !ok {
  484. return 0, nil
  485. }
  486. if t == linkDeferredFootnote && !p.isFootnote(lr) {
  487. lr.noteID = len(p.notes) + 1
  488. lr.footnote = footnoteNode
  489. p.notes = append(p.notes, lr)
  490. p.refsRecord[string(lr.link)] = struct{}{}
  491. }
  492. // keep link and title from reference
  493. link = lr.link
  494. // if inline footnote, title == footnote contents
  495. title = lr.title
  496. noteID = lr.noteID
  497. if len(lr.text) > 0 {
  498. altContent = lr.text
  499. }
  500. }
  501. // rewind the whitespace
  502. i = txtE + 1
  503. }
  504. var uLink []byte
  505. if t == linkNormal || t == linkImg {
  506. if len(link) > 0 {
  507. var uLinkBuf bytes.Buffer
  508. unescapeText(&uLinkBuf, link)
  509. uLink = uLinkBuf.Bytes()
  510. }
  511. // links need something to click on and somewhere to go
  512. // [](http://bla) is legal in CommonMark, so allow txtE <=1 for linkNormal
  513. // [bla]() is also legal in CommonMark, so allow empty uLink
  514. }
  515. // call the relevant rendering function
  516. switch t {
  517. case linkNormal:
  518. link := &ast.Link{
  519. Destination: normalizeURI(uLink),
  520. Title: title,
  521. DeferredID: linkID,
  522. }
  523. if len(altContent) > 0 {
  524. ast.AppendChild(link, newTextNode(altContent))
  525. } else {
  526. // links cannot contain other links, so turn off link parsing
  527. // temporarily and recurse
  528. insideLink := p.insideLink
  529. p.insideLink = true
  530. p.Inline(link, data[1:txtE])
  531. p.insideLink = insideLink
  532. }
  533. return i, link
  534. case linkImg:
  535. image := &ast.Image{
  536. Destination: uLink,
  537. Title: title,
  538. }
  539. ast.AppendChild(image, newTextNode(data[1:txtE]))
  540. return i + 1, image
  541. case linkInlineFootnote, linkDeferredFootnote:
  542. link := &ast.Link{
  543. Destination: link,
  544. Title: title,
  545. NoteID: noteID,
  546. Footnote: footnoteNode,
  547. }
  548. if t == linkDeferredFootnote {
  549. link.DeferredID = data[2:txtE]
  550. }
  551. if t == linkInlineFootnote {
  552. i++
  553. }
  554. return i, link
  555. default:
  556. return 0, nil
  557. }
  558. }
  559. func (p *Parser) inlineHTMLComment(data []byte) int {
  560. if len(data) < 5 {
  561. return 0
  562. }
  563. if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
  564. return 0
  565. }
  566. i := 5
  567. // scan for an end-of-comment marker, across lines if necessary
  568. for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
  569. i++
  570. }
  571. // no end-of-comment marker
  572. if i >= len(data) {
  573. return 0
  574. }
  575. return i + 1
  576. }
  577. func stripMailto(link []byte) []byte {
  578. if bytes.HasPrefix(link, []byte("mailto://")) {
  579. return link[9:]
  580. } else if bytes.HasPrefix(link, []byte("mailto:")) {
  581. return link[7:]
  582. } else {
  583. return link
  584. }
  585. }
  586. // autolinkType specifies a kind of autolink that gets detected.
  587. type autolinkType int
  588. // These are the possible flag values for the autolink renderer.
  589. const (
  590. notAutolink autolinkType = iota
  591. normalAutolink
  592. emailAutolink
  593. )
  594. // '<' when tags or autolinks are allowed
  595. func leftAngle(p *Parser, data []byte, offset int) (int, ast.Node) {
  596. data = data[offset:]
  597. if p.extensions&Mmark != 0 {
  598. id, consumed := IsCallout(data)
  599. if consumed > 0 {
  600. node := &ast.Callout{}
  601. node.ID = id
  602. return consumed, node
  603. }
  604. }
  605. altype, end := tagLength(data)
  606. if size := p.inlineHTMLComment(data); size > 0 {
  607. end = size
  608. }
  609. if end <= 2 {
  610. return end, nil
  611. }
  612. if altype == notAutolink {
  613. htmlTag := &ast.HTMLSpan{}
  614. htmlTag.Literal = data[:end]
  615. return end, htmlTag
  616. }
  617. var uLink bytes.Buffer
  618. unescapeText(&uLink, data[1:end+1-2])
  619. if uLink.Len() <= 0 {
  620. return end, nil
  621. }
  622. link := uLink.Bytes()
  623. node := &ast.Link{
  624. Destination: link,
  625. }
  626. if altype == emailAutolink {
  627. node.Destination = append([]byte("mailto:"), link...)
  628. }
  629. ast.AppendChild(node, newTextNode(stripMailto(link)))
  630. return end, node
  631. }
  632. // '\\' backslash escape
  633. var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~^$")
  634. func escape(p *Parser, data []byte, offset int) (int, ast.Node) {
  635. data = data[offset:]
  636. if len(data) <= 1 {
  637. return 2, nil
  638. }
  639. if p.extensions&NonBlockingSpace != 0 && data[1] == ' ' {
  640. return 2, &ast.NonBlockingSpace{}
  641. }
  642. if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' {
  643. return 2, &ast.Hardbreak{}
  644. }
  645. if bytes.IndexByte(escapeChars, data[1]) < 0 {
  646. return 0, nil
  647. }
  648. return 2, newTextNode(data[1:2])
  649. }
  650. func unescapeText(ob *bytes.Buffer, src []byte) {
  651. i := 0
  652. for i < len(src) {
  653. org := i
  654. for i < len(src) && src[i] != '\\' {
  655. i++
  656. }
  657. if i > org {
  658. ob.Write(src[org:i])
  659. }
  660. if i+1 >= len(src) {
  661. break
  662. }
  663. ob.WriteByte(src[i+1])
  664. i += 2
  665. }
  666. }
  667. // '&' escaped when it doesn't belong to an entity
  668. // valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
  669. func entity(p *Parser, data []byte, offset int) (int, ast.Node) {
  670. data = data[offset:]
  671. end := skipCharN(data, 1, '#', 1)
  672. end = skipAlnum(data, end)
  673. if end < len(data) && data[end] == ';' {
  674. end++ // real entity
  675. } else {
  676. return 0, nil // lone '&'
  677. }
  678. ent := data[:end]
  679. // undo &amp; escaping or it will be converted to &amp;amp; by another
  680. // escaper in the renderer
  681. if bytes.Equal(ent, []byte("&amp;")) {
  682. return end, newTextNode([]byte{'&'})
  683. }
  684. if len(ent) < 4 {
  685. return end, newTextNode(ent)
  686. }
  687. // if ent consists solely out of numbers (hex or decimal) convert that unicode codepoint to actual rune
  688. codepoint := uint64(0)
  689. var err error
  690. if ent[2] == 'x' || ent[2] == 'X' { // hexadecimal
  691. codepoint, err = strconv.ParseUint(string(ent[3:len(ent)-1]), 16, 64)
  692. } else {
  693. codepoint, err = strconv.ParseUint(string(ent[2:len(ent)-1]), 10, 64)
  694. }
  695. if err == nil { // only if conversion was valid return here.
  696. return end, newTextNode([]byte(string(rune(codepoint))))
  697. }
  698. return end, newTextNode(ent)
  699. }
  700. func linkEndsWithEntity(data []byte, linkEnd int) bool {
  701. entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
  702. return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
  703. }
  704. // hasPrefixCaseInsensitive is a custom implementation of
  705. //
  706. // strings.HasPrefix(strings.ToLower(s), prefix)
  707. //
  708. // we rolled our own because ToLower pulls in a huge machinery of lowercasing
  709. // anything from Unicode and that's very slow. Since this func will only be
  710. // used on ASCII protocol prefixes, we can take shortcuts.
  711. func hasPrefixCaseInsensitive(s, prefix []byte) bool {
  712. if len(s) < len(prefix) {
  713. return false
  714. }
  715. delta := byte('a' - 'A')
  716. for i, b := range prefix {
  717. if b != s[i] && b != s[i]+delta {
  718. return false
  719. }
  720. }
  721. return true
  722. }
  723. var protocolPrefixes = [][]byte{
  724. []byte("http://"),
  725. []byte("https://"),
  726. []byte("ftp://"),
  727. []byte("file://"),
  728. []byte("mailto:"),
  729. }
  730. const shortestPrefix = 6 // len("ftp://"), the shortest of the above
  731. func maybeAutoLink(p *Parser, data []byte, offset int) (int, ast.Node) {
  732. // quick check to rule out most false hits
  733. if p.insideLink || len(data) < offset+shortestPrefix {
  734. return 0, nil
  735. }
  736. for _, prefix := range protocolPrefixes {
  737. endOfHead := offset + 8 // 8 is the len() of the longest prefix
  738. if endOfHead > len(data) {
  739. endOfHead = len(data)
  740. }
  741. if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
  742. return autoLink(p, data, offset)
  743. }
  744. }
  745. return 0, nil
  746. }
  747. func autoLink(p *Parser, data []byte, offset int) (int, ast.Node) {
  748. // Now a more expensive check to see if we're not inside an anchor element
  749. anchorStart := offset
  750. offsetFromAnchor := 0
  751. for anchorStart > 0 && data[anchorStart] != '<' {
  752. anchorStart--
  753. offsetFromAnchor++
  754. }
  755. anchorStr := anchorRe.Find(data[anchorStart:])
  756. if anchorStr != nil {
  757. anchorClose := &ast.HTMLSpan{}
  758. anchorClose.Literal = anchorStr[offsetFromAnchor:]
  759. return len(anchorStr) - offsetFromAnchor, anchorClose
  760. }
  761. // scan backward for a word boundary
  762. rewind := 0
  763. for offset-rewind > 0 && rewind <= 7 && IsLetter(data[offset-rewind-1]) {
  764. rewind++
  765. }
  766. if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
  767. return 0, nil
  768. }
  769. origData := data
  770. data = data[offset-rewind:]
  771. isSafeURL := p.IsSafeURLOverride
  772. if isSafeURL == nil {
  773. isSafeURL = IsSafeURL
  774. }
  775. if !isSafeURL(data) {
  776. return 0, nil
  777. }
  778. linkEnd := 0
  779. for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
  780. linkEnd++
  781. }
  782. // Skip punctuation at the end of the link
  783. if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
  784. linkEnd--
  785. }
  786. // But don't skip semicolon if it's a part of escaped entity:
  787. if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
  788. linkEnd--
  789. }
  790. // See if the link finishes with a punctuation sign that can be closed.
  791. var copen byte
  792. switch data[linkEnd-1] {
  793. case '"':
  794. copen = '"'
  795. case '\'':
  796. copen = '\''
  797. case ')':
  798. copen = '('
  799. case ']':
  800. copen = '['
  801. case '}':
  802. copen = '{'
  803. default:
  804. copen = 0
  805. }
  806. if copen != 0 {
  807. bufEnd := offset - rewind + linkEnd - 2
  808. openDelim := 1
  809. /* Try to close the final punctuation sign in this same line;
  810. * if we managed to close it outside of the URL, that means that it's
  811. * not part of the URL. If it closes inside the URL, that means it
  812. * is part of the URL.
  813. *
  814. * Examples:
  815. *
  816. * foo http://www.pokemon.com/Pikachu_(Electric) bar
  817. * => http://www.pokemon.com/Pikachu_(Electric)
  818. *
  819. * foo (http://www.pokemon.com/Pikachu_(Electric)) bar
  820. * => http://www.pokemon.com/Pikachu_(Electric)
  821. *
  822. * foo http://www.pokemon.com/Pikachu_(Electric)) bar
  823. * => http://www.pokemon.com/Pikachu_(Electric))
  824. *
  825. * (foo http://www.pokemon.com/Pikachu_(Electric)) bar
  826. * => foo http://www.pokemon.com/Pikachu_(Electric)
  827. */
  828. for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
  829. if origData[bufEnd] == data[linkEnd-1] {
  830. openDelim++
  831. }
  832. if origData[bufEnd] == copen {
  833. openDelim--
  834. }
  835. bufEnd--
  836. }
  837. if openDelim == 0 {
  838. linkEnd--
  839. }
  840. }
  841. var uLink bytes.Buffer
  842. unescapeText(&uLink, data[:linkEnd])
  843. if uLink.Len() > 0 {
  844. node := &ast.Link{
  845. Destination: uLink.Bytes(),
  846. }
  847. ast.AppendChild(node, newTextNode(uLink.Bytes()))
  848. return linkEnd, node
  849. }
  850. return linkEnd, nil
  851. }
  852. func isEndOfLink(char byte) bool {
  853. return IsSpace(char) || char == '<'
  854. }
  855. // return the length of the given tag, or 0 is it's not valid
  856. func tagLength(data []byte) (autolink autolinkType, end int) {
  857. var i, j int
  858. // a valid tag can't be shorter than 3 chars
  859. if len(data) < 3 {
  860. return notAutolink, 0
  861. }
  862. // begins with a '<' optionally followed by '/', followed by letter or number
  863. if data[0] != '<' {
  864. return notAutolink, 0
  865. }
  866. if data[1] == '/' {
  867. i = 2
  868. } else {
  869. i = 1
  870. }
  871. if !IsAlnum(data[i]) {
  872. return notAutolink, 0
  873. }
  874. // scheme test
  875. autolink = notAutolink
  876. // try to find the beginning of an URI
  877. for i < len(data) && (IsAlnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
  878. i++
  879. }
  880. if i > 1 && i < len(data) && data[i] == '@' {
  881. if j = isMailtoAutoLink(data[i:]); j != 0 {
  882. return emailAutolink, i + j
  883. }
  884. }
  885. if i > 2 && i < len(data) && data[i] == ':' {
  886. autolink = normalAutolink
  887. i++
  888. }
  889. // complete autolink test: no whitespace or ' or "
  890. switch {
  891. case i >= len(data):
  892. autolink = notAutolink
  893. case autolink != notAutolink:
  894. j = i
  895. for i < len(data) {
  896. if data[i] == '\\' {
  897. i += 2
  898. } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || IsSpace(data[i]) {
  899. break
  900. } else {
  901. i++
  902. }
  903. }
  904. if i >= len(data) {
  905. return autolink, 0
  906. }
  907. if i > j && data[i] == '>' {
  908. return autolink, i + 1
  909. }
  910. // one of the forbidden chars has been found
  911. autolink = notAutolink
  912. }
  913. i += bytes.IndexByte(data[i:], '>')
  914. if i < 0 {
  915. return autolink, 0
  916. }
  917. return autolink, i + 1
  918. }
  919. // look for the address part of a mail autolink and '>'
  920. // this is less strict than the original markdown e-mail address matching
  921. func isMailtoAutoLink(data []byte) int {
  922. nb := 0
  923. // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
  924. for i, c := range data {
  925. if IsAlnum(c) {
  926. continue
  927. }
  928. switch c {
  929. case '@':
  930. nb++
  931. case '-', '.', '_':
  932. // no-op but not defult
  933. case '>':
  934. if nb == 1 {
  935. return i + 1
  936. }
  937. return 0
  938. default:
  939. return 0
  940. }
  941. }
  942. return 0
  943. }
  944. // look for the next emph char, skipping other constructs
  945. func helperFindEmphChar(data []byte, c byte) int {
  946. i := 0
  947. for i < len(data) {
  948. for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
  949. i++
  950. }
  951. if i >= len(data) {
  952. return 0
  953. }
  954. // do not count escaped chars
  955. if i != 0 && data[i-1] == '\\' {
  956. i++
  957. continue
  958. }
  959. if data[i] == c {
  960. return i
  961. }
  962. if data[i] == '`' {
  963. // skip a code span
  964. tmpI := 0
  965. i++
  966. for i < len(data) && data[i] != '`' {
  967. if tmpI == 0 && data[i] == c {
  968. tmpI = i
  969. }
  970. i++
  971. }
  972. if i >= len(data) {
  973. return tmpI
  974. }
  975. i++
  976. } else if data[i] == '[' {
  977. // skip a link
  978. tmpI := 0
  979. i++
  980. for i < len(data) && data[i] != ']' {
  981. if tmpI == 0 && data[i] == c {
  982. tmpI = i
  983. }
  984. i++
  985. }
  986. i++
  987. for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
  988. i++
  989. }
  990. if i >= len(data) {
  991. return tmpI
  992. }
  993. if data[i] != '[' && data[i] != '(' { // not a link
  994. if tmpI > 0 {
  995. return tmpI
  996. }
  997. continue
  998. }
  999. cc := data[i]
  1000. i++
  1001. for i < len(data) && data[i] != cc {
  1002. if tmpI == 0 && data[i] == c {
  1003. return i
  1004. }
  1005. i++
  1006. }
  1007. if i >= len(data) {
  1008. return tmpI
  1009. }
  1010. i++
  1011. }
  1012. }
  1013. return 0
  1014. }
  1015. func helperEmphasis(p *Parser, data []byte, c byte) (int, ast.Node) {
  1016. i := 0
  1017. // skip one symbol if coming from emph3
  1018. if len(data) > 1 && data[0] == c && data[1] == c {
  1019. i = 1
  1020. }
  1021. for i < len(data) {
  1022. length := helperFindEmphChar(data[i:], c)
  1023. if length == 0 {
  1024. return 0, nil
  1025. }
  1026. i += length
  1027. if i >= len(data) {
  1028. return 0, nil
  1029. }
  1030. if i+1 < len(data) && data[i+1] == c {
  1031. i++
  1032. continue
  1033. }
  1034. if data[i] == c && !IsSpace(data[i-1]) {
  1035. if p.extensions&NoIntraEmphasis != 0 {
  1036. if !(i+1 == len(data) || IsSpace(data[i+1]) || IsPunctuation(data[i+1])) {
  1037. continue
  1038. }
  1039. }
  1040. emph := &ast.Emph{}
  1041. p.Inline(emph, data[:i])
  1042. return i + 1, emph
  1043. }
  1044. }
  1045. return 0, nil
  1046. }
  1047. func helperDoubleEmphasis(p *Parser, data []byte, c byte) (int, ast.Node) {
  1048. i := 0
  1049. for i < len(data) {
  1050. length := helperFindEmphChar(data[i:], c)
  1051. if length == 0 {
  1052. return 0, nil
  1053. }
  1054. i += length
  1055. if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !IsSpace(data[i-1]) {
  1056. var node ast.Node = &ast.Strong{}
  1057. if c == '~' {
  1058. node = &ast.Del{}
  1059. }
  1060. p.Inline(node, data[:i])
  1061. return i + 2, node
  1062. }
  1063. i++
  1064. }
  1065. return 0, nil
  1066. }
  1067. func helperTripleEmphasis(p *Parser, data []byte, offset int, c byte) (int, ast.Node) {
  1068. i := 0
  1069. origData := data
  1070. data = data[offset:]
  1071. for i < len(data) {
  1072. length := helperFindEmphChar(data[i:], c)
  1073. if length == 0 {
  1074. return 0, nil
  1075. }
  1076. i += length
  1077. // skip whitespace preceded symbols
  1078. if data[i] != c || IsSpace(data[i-1]) {
  1079. continue
  1080. }
  1081. switch {
  1082. case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
  1083. // triple symbol found
  1084. strong := &ast.Strong{}
  1085. em := &ast.Emph{}
  1086. ast.AppendChild(strong, em)
  1087. p.Inline(em, data[:i])
  1088. return i + 3, strong
  1089. case i+1 < len(data) && data[i+1] == c:
  1090. // double symbol found, hand over to emph1
  1091. length, node := helperEmphasis(p, origData[offset-2:], c)
  1092. if length == 0 {
  1093. return 0, nil
  1094. }
  1095. return length - 2, node
  1096. default:
  1097. // single symbol found, hand over to emph2
  1098. length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
  1099. if length == 0 {
  1100. return 0, nil
  1101. }
  1102. return length - 1, node
  1103. }
  1104. }
  1105. return 0, nil
  1106. }
  1107. // math handle inline math wrapped with '$'
  1108. func math(p *Parser, data []byte, offset int) (int, ast.Node) {
  1109. data = data[offset:]
  1110. // too short, or block math
  1111. if len(data) <= 2 || data[1] == '$' {
  1112. return 0, nil
  1113. }
  1114. // find next '$'
  1115. var end int
  1116. for end = 1; end < len(data) && data[end] != '$'; end++ {
  1117. }
  1118. // $ not match
  1119. if end == len(data) {
  1120. return 0, nil
  1121. }
  1122. // create inline math node
  1123. math := &ast.Math{}
  1124. math.Literal = data[1:end]
  1125. return end + 1, math
  1126. }
  1127. func newTextNode(d []byte) *ast.Text {
  1128. return &ast.Text{Leaf: ast.Leaf{Literal: d}}
  1129. }
  1130. func normalizeURI(s []byte) []byte {
  1131. return s // TODO: implement
  1132. }