node.go 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. package blackfriday
  2. import (
  3. "bytes"
  4. "fmt"
  5. )
  6. // NodeType specifies a type of a single node of a syntax tree. Usually one
  7. // node (and its type) corresponds to a single markdown feature, e.g. emphasis
  8. // or code block.
  9. type NodeType int
  10. // Constants for identifying different types of nodes. See NodeType.
  11. const (
  12. Document NodeType = iota
  13. BlockQuote
  14. List
  15. Item
  16. Paragraph
  17. Heading
  18. HorizontalRule
  19. Emph
  20. Strong
  21. Del
  22. Link
  23. Image
  24. Text
  25. HTMLBlock
  26. CodeBlock
  27. Softbreak
  28. Hardbreak
  29. Code
  30. HTMLSpan
  31. Table
  32. TableCell
  33. TableHead
  34. TableBody
  35. TableRow
  36. )
  37. var nodeTypeNames = []string{
  38. Document: "Document",
  39. BlockQuote: "BlockQuote",
  40. List: "List",
  41. Item: "Item",
  42. Paragraph: "Paragraph",
  43. Heading: "Heading",
  44. HorizontalRule: "HorizontalRule",
  45. Emph: "Emph",
  46. Strong: "Strong",
  47. Del: "Del",
  48. Link: "Link",
  49. Image: "Image",
  50. Text: "Text",
  51. HTMLBlock: "HTMLBlock",
  52. CodeBlock: "CodeBlock",
  53. Softbreak: "Softbreak",
  54. Hardbreak: "Hardbreak",
  55. Code: "Code",
  56. HTMLSpan: "HTMLSpan",
  57. Table: "Table",
  58. TableCell: "TableCell",
  59. TableHead: "TableHead",
  60. TableBody: "TableBody",
  61. TableRow: "TableRow",
  62. }
  63. func (t NodeType) String() string {
  64. return nodeTypeNames[t]
  65. }
  66. // ListData contains fields relevant to a List and Item node type.
  67. type ListData struct {
  68. ListFlags ListType
  69. Tight bool // Skip <p>s around list item data if true
  70. BulletChar byte // '*', '+' or '-' in bullet lists
  71. Delimiter byte // '.' or ')' after the number in ordered lists
  72. RefLink []byte // If not nil, turns this list item into a footnote item and triggers different rendering
  73. IsFootnotesList bool // This is a list of footnotes
  74. }
  75. // LinkData contains fields relevant to a Link node type.
  76. type LinkData struct {
  77. Destination []byte // Destination is what goes into a href
  78. Title []byte // Title is the tooltip thing that goes in a title attribute
  79. NoteID int // NoteID contains a serial number of a footnote, zero if it's not a footnote
  80. Footnote *Node // If it's a footnote, this is a direct link to the footnote Node. Otherwise nil.
  81. }
  82. // CodeBlockData contains fields relevant to a CodeBlock node type.
  83. type CodeBlockData struct {
  84. IsFenced bool // Specifies whether it's a fenced code block or an indented one
  85. Info []byte // This holds the info string
  86. FenceChar byte
  87. FenceLength int
  88. FenceOffset int
  89. }
  90. // TableCellData contains fields relevant to a TableCell node type.
  91. type TableCellData struct {
  92. IsHeader bool // This tells if it's under the header row
  93. Align CellAlignFlags // This holds the value for align attribute
  94. }
  95. // HeadingData contains fields relevant to a Heading node type.
  96. type HeadingData struct {
  97. Level int // This holds the heading level number
  98. HeadingID string // This might hold heading ID, if present
  99. IsTitleblock bool // Specifies whether it's a title block
  100. }
  101. // Node is a single element in the abstract syntax tree of the parsed document.
  102. // It holds connections to the structurally neighboring nodes and, for certain
  103. // types of nodes, additional information that might be needed when rendering.
  104. type Node struct {
  105. Type NodeType // Determines the type of the node
  106. Parent *Node // Points to the parent
  107. FirstChild *Node // Points to the first child, if any
  108. LastChild *Node // Points to the last child, if any
  109. Prev *Node // Previous sibling; nil if it's the first child
  110. Next *Node // Next sibling; nil if it's the last child
  111. Literal []byte // Text contents of the leaf nodes
  112. HeadingData // Populated if Type is Heading
  113. ListData // Populated if Type is List
  114. CodeBlockData // Populated if Type is CodeBlock
  115. LinkData // Populated if Type is Link
  116. TableCellData // Populated if Type is TableCell
  117. content []byte // Markdown content of the block nodes
  118. open bool // Specifies an open block node that has not been finished to process yet
  119. }
  120. // NewNode allocates a node of a specified type.
  121. func NewNode(typ NodeType) *Node {
  122. return &Node{
  123. Type: typ,
  124. open: true,
  125. }
  126. }
  127. func (n *Node) String() string {
  128. ellipsis := ""
  129. snippet := n.Literal
  130. if len(snippet) > 16 {
  131. snippet = snippet[:16]
  132. ellipsis = "..."
  133. }
  134. return fmt.Sprintf("%s: '%s%s'", n.Type, snippet, ellipsis)
  135. }
  136. // Unlink removes node 'n' from the tree.
  137. // It panics if the node is nil.
  138. func (n *Node) Unlink() {
  139. if n.Prev != nil {
  140. n.Prev.Next = n.Next
  141. } else if n.Parent != nil {
  142. n.Parent.FirstChild = n.Next
  143. }
  144. if n.Next != nil {
  145. n.Next.Prev = n.Prev
  146. } else if n.Parent != nil {
  147. n.Parent.LastChild = n.Prev
  148. }
  149. n.Parent = nil
  150. n.Next = nil
  151. n.Prev = nil
  152. }
  153. // AppendChild adds a node 'child' as a child of 'n'.
  154. // It panics if either node is nil.
  155. func (n *Node) AppendChild(child *Node) {
  156. child.Unlink()
  157. child.Parent = n
  158. if n.LastChild != nil {
  159. n.LastChild.Next = child
  160. child.Prev = n.LastChild
  161. n.LastChild = child
  162. } else {
  163. n.FirstChild = child
  164. n.LastChild = child
  165. }
  166. }
  167. // InsertBefore inserts 'sibling' immediately before 'n'.
  168. // It panics if either node is nil.
  169. func (n *Node) InsertBefore(sibling *Node) {
  170. sibling.Unlink()
  171. sibling.Prev = n.Prev
  172. if sibling.Prev != nil {
  173. sibling.Prev.Next = sibling
  174. }
  175. sibling.Next = n
  176. n.Prev = sibling
  177. sibling.Parent = n.Parent
  178. if sibling.Prev == nil {
  179. sibling.Parent.FirstChild = sibling
  180. }
  181. }
  182. // IsContainer returns true if 'n' can contain children.
  183. func (n *Node) IsContainer() bool {
  184. switch n.Type {
  185. case Document:
  186. fallthrough
  187. case BlockQuote:
  188. fallthrough
  189. case List:
  190. fallthrough
  191. case Item:
  192. fallthrough
  193. case Paragraph:
  194. fallthrough
  195. case Heading:
  196. fallthrough
  197. case Emph:
  198. fallthrough
  199. case Strong:
  200. fallthrough
  201. case Del:
  202. fallthrough
  203. case Link:
  204. fallthrough
  205. case Image:
  206. fallthrough
  207. case Table:
  208. fallthrough
  209. case TableHead:
  210. fallthrough
  211. case TableBody:
  212. fallthrough
  213. case TableRow:
  214. fallthrough
  215. case TableCell:
  216. return true
  217. default:
  218. return false
  219. }
  220. }
  221. // IsLeaf returns true if 'n' is a leaf node.
  222. func (n *Node) IsLeaf() bool {
  223. return !n.IsContainer()
  224. }
  225. func (n *Node) canContain(t NodeType) bool {
  226. if n.Type == List {
  227. return t == Item
  228. }
  229. if n.Type == Document || n.Type == BlockQuote || n.Type == Item {
  230. return t != Item
  231. }
  232. if n.Type == Table {
  233. return t == TableHead || t == TableBody
  234. }
  235. if n.Type == TableHead || n.Type == TableBody {
  236. return t == TableRow
  237. }
  238. if n.Type == TableRow {
  239. return t == TableCell
  240. }
  241. return false
  242. }
  243. // WalkStatus allows NodeVisitor to have some control over the tree traversal.
  244. // It is returned from NodeVisitor and different values allow Node.Walk to
  245. // decide which node to go to next.
  246. type WalkStatus int
  247. const (
  248. // GoToNext is the default traversal of every node.
  249. GoToNext WalkStatus = iota
  250. // SkipChildren tells walker to skip all children of current node.
  251. SkipChildren
  252. // Terminate tells walker to terminate the traversal.
  253. Terminate
  254. )
  255. // NodeVisitor is a callback to be called when traversing the syntax tree.
  256. // Called twice for every node: once with entering=true when the branch is
  257. // first visited, then with entering=false after all the children are done.
  258. type NodeVisitor func(node *Node, entering bool) WalkStatus
  259. // Walk is a convenience method that instantiates a walker and starts a
  260. // traversal of subtree rooted at n.
  261. func (n *Node) Walk(visitor NodeVisitor) {
  262. w := newNodeWalker(n)
  263. for w.current != nil {
  264. status := visitor(w.current, w.entering)
  265. switch status {
  266. case GoToNext:
  267. w.next()
  268. case SkipChildren:
  269. w.entering = false
  270. w.next()
  271. case Terminate:
  272. return
  273. }
  274. }
  275. }
  276. type nodeWalker struct {
  277. current *Node
  278. root *Node
  279. entering bool
  280. }
  281. func newNodeWalker(root *Node) *nodeWalker {
  282. return &nodeWalker{
  283. current: root,
  284. root: root,
  285. entering: true,
  286. }
  287. }
  288. func (nw *nodeWalker) next() {
  289. if (!nw.current.IsContainer() || !nw.entering) && nw.current == nw.root {
  290. nw.current = nil
  291. return
  292. }
  293. if nw.entering && nw.current.IsContainer() {
  294. if nw.current.FirstChild != nil {
  295. nw.current = nw.current.FirstChild
  296. nw.entering = true
  297. } else {
  298. nw.entering = false
  299. }
  300. } else if nw.current.Next == nil {
  301. nw.current = nw.current.Parent
  302. nw.entering = false
  303. } else {
  304. nw.current = nw.current.Next
  305. nw.entering = true
  306. }
  307. }
  308. func dump(ast *Node) {
  309. fmt.Println(dumpString(ast))
  310. }
  311. func dumpR(ast *Node, depth int) string {
  312. if ast == nil {
  313. return ""
  314. }
  315. indent := bytes.Repeat([]byte("\t"), depth)
  316. content := ast.Literal
  317. if content == nil {
  318. content = ast.content
  319. }
  320. result := fmt.Sprintf("%s%s(%q)\n", indent, ast.Type, content)
  321. for n := ast.FirstChild; n != nil; n = n.Next {
  322. result += dumpR(n, depth+1)
  323. }
  324. return result
  325. }
  326. func dumpString(ast *Node) string {
  327. return dumpR(ast, 0)
  328. }