smartypants.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. package html
  2. import (
  3. "bytes"
  4. "io"
  5. "github.com/gomarkdown/markdown/parser"
  6. )
  7. // SmartyPants rendering
  8. var (
  9. isSpace = parser.IsSpace
  10. isAlnum = parser.IsAlnum
  11. isPunctuation = parser.IsPunctuation
  12. )
  13. // SPRenderer is a struct containing state of a Smartypants renderer.
  14. type SPRenderer struct {
  15. inSingleQuote bool
  16. inDoubleQuote bool
  17. callbacks [256]smartCallback
  18. }
  19. func wordBoundary(c byte) bool {
  20. return c == 0 || isSpace(c) || isPunctuation(c)
  21. }
  22. func tolower(c byte) byte {
  23. if c >= 'A' && c <= 'Z' {
  24. return c - 'A' + 'a'
  25. }
  26. return c
  27. }
  28. func isdigit(c byte) bool {
  29. return c >= '0' && c <= '9'
  30. }
  31. func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool, addNBSP bool) bool {
  32. // edge of the buffer is likely to be a tag that we don't get to see,
  33. // so we treat it like text sometimes
  34. // enumerate all sixteen possibilities for (previousChar, nextChar)
  35. // each can be one of {0, space, punct, other}
  36. switch {
  37. case previousChar == 0 && nextChar == 0:
  38. // context is not any help here, so toggle
  39. *isOpen = !*isOpen
  40. case isSpace(previousChar) && nextChar == 0:
  41. // [ "] might be [ "<code>foo...]
  42. *isOpen = true
  43. case isPunctuation(previousChar) && nextChar == 0:
  44. // [!"] hmm... could be [Run!"] or [("<code>...]
  45. *isOpen = false
  46. case /* isnormal(previousChar) && */ nextChar == 0:
  47. // [a"] is probably a close
  48. *isOpen = false
  49. case previousChar == 0 && isSpace(nextChar):
  50. // [" ] might be [...foo</code>" ]
  51. *isOpen = false
  52. case isSpace(previousChar) && isSpace(nextChar):
  53. // [ " ] context is not any help here, so toggle
  54. *isOpen = !*isOpen
  55. case isPunctuation(previousChar) && isSpace(nextChar):
  56. // [!" ] is probably a close
  57. *isOpen = false
  58. case /* isnormal(previousChar) && */ isSpace(nextChar):
  59. // [a" ] this is one of the easy cases
  60. *isOpen = false
  61. case previousChar == 0 && isPunctuation(nextChar):
  62. // ["!] hmm... could be ["$1.95] or [</code>"!...]
  63. *isOpen = false
  64. case isSpace(previousChar) && isPunctuation(nextChar):
  65. // [ "!] looks more like [ "$1.95]
  66. *isOpen = true
  67. case isPunctuation(previousChar) && isPunctuation(nextChar):
  68. // [!"!] context is not any help here, so toggle
  69. *isOpen = !*isOpen
  70. case /* isnormal(previousChar) && */ isPunctuation(nextChar):
  71. // [a"!] is probably a close
  72. *isOpen = false
  73. case previousChar == 0 /* && isnormal(nextChar) */ :
  74. // ["a] is probably an open
  75. *isOpen = true
  76. case isSpace(previousChar) /* && isnormal(nextChar) */ :
  77. // [ "a] this is one of the easy cases
  78. *isOpen = true
  79. case isPunctuation(previousChar) /* && isnormal(nextChar) */ :
  80. // [!"a] is probably an open
  81. *isOpen = true
  82. default:
  83. // [a'b] maybe a contraction?
  84. *isOpen = false
  85. }
  86. // Note that with the limited lookahead, this non-breaking
  87. // space will also be appended to single double quotes.
  88. if addNBSP && !*isOpen {
  89. out.WriteString("&nbsp;")
  90. }
  91. out.WriteByte('&')
  92. if *isOpen {
  93. out.WriteByte('l')
  94. } else {
  95. out.WriteByte('r')
  96. }
  97. out.WriteByte(quote)
  98. out.WriteString("quo;")
  99. if addNBSP && *isOpen {
  100. out.WriteString("&nbsp;")
  101. }
  102. return true
  103. }
  104. func (r *SPRenderer) smartSingleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
  105. if len(text) >= 2 {
  106. t1 := tolower(text[1])
  107. if t1 == '\'' {
  108. nextChar := byte(0)
  109. if len(text) >= 3 {
  110. nextChar = text[2]
  111. }
  112. if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote, false) {
  113. return 1
  114. }
  115. }
  116. if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) {
  117. out.WriteString("&rsquo;")
  118. return 0
  119. }
  120. if len(text) >= 3 {
  121. t2 := tolower(text[2])
  122. if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) &&
  123. (len(text) < 4 || wordBoundary(text[3])) {
  124. out.WriteString("&rsquo;")
  125. return 0
  126. }
  127. }
  128. }
  129. nextChar := byte(0)
  130. if len(text) > 1 {
  131. nextChar = text[1]
  132. }
  133. if smartQuoteHelper(out, previousChar, nextChar, 's', &r.inSingleQuote, false) {
  134. return 0
  135. }
  136. out.WriteByte(text[0])
  137. return 0
  138. }
  139. func (r *SPRenderer) smartParens(out *bytes.Buffer, previousChar byte, text []byte) int {
  140. if len(text) >= 3 {
  141. t1 := tolower(text[1])
  142. t2 := tolower(text[2])
  143. if t1 == 'c' && t2 == ')' {
  144. out.WriteString("&copy;")
  145. return 2
  146. }
  147. if t1 == 'r' && t2 == ')' {
  148. out.WriteString("&reg;")
  149. return 2
  150. }
  151. if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' {
  152. out.WriteString("&trade;")
  153. return 3
  154. }
  155. }
  156. out.WriteByte(text[0])
  157. return 0
  158. }
  159. func (r *SPRenderer) smartDash(out *bytes.Buffer, previousChar byte, text []byte) int {
  160. if len(text) >= 2 {
  161. if text[1] == '-' {
  162. out.WriteString("&mdash;")
  163. return 1
  164. }
  165. if wordBoundary(previousChar) && wordBoundary(text[1]) {
  166. out.WriteString("&ndash;")
  167. return 0
  168. }
  169. }
  170. out.WriteByte(text[0])
  171. return 0
  172. }
  173. func (r *SPRenderer) smartDashLatex(out *bytes.Buffer, previousChar byte, text []byte) int {
  174. if len(text) >= 3 && text[1] == '-' && text[2] == '-' {
  175. out.WriteString("&mdash;")
  176. return 2
  177. }
  178. if len(text) >= 2 && text[1] == '-' {
  179. out.WriteString("&ndash;")
  180. return 1
  181. }
  182. out.WriteByte(text[0])
  183. return 0
  184. }
  185. func (r *SPRenderer) smartAmpVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte, addNBSP bool) int {
  186. if bytes.HasPrefix(text, []byte("&quot;")) {
  187. nextChar := byte(0)
  188. if len(text) >= 7 {
  189. nextChar = text[6]
  190. }
  191. if smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote, addNBSP) {
  192. return 5
  193. }
  194. }
  195. if bytes.HasPrefix(text, []byte("&#0;")) {
  196. return 3
  197. }
  198. out.WriteByte('&')
  199. return 0
  200. }
  201. func (r *SPRenderer) smartAmp(angledQuotes, addNBSP bool) func(*bytes.Buffer, byte, []byte) int {
  202. var quote byte = 'd'
  203. if angledQuotes {
  204. quote = 'a'
  205. }
  206. return func(out *bytes.Buffer, previousChar byte, text []byte) int {
  207. return r.smartAmpVariant(out, previousChar, text, quote, addNBSP)
  208. }
  209. }
  210. func (r *SPRenderer) smartPeriod(out *bytes.Buffer, previousChar byte, text []byte) int {
  211. if len(text) >= 3 && text[1] == '.' && text[2] == '.' {
  212. out.WriteString("&hellip;")
  213. return 2
  214. }
  215. if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' {
  216. out.WriteString("&hellip;")
  217. return 4
  218. }
  219. out.WriteByte(text[0])
  220. return 0
  221. }
  222. func (r *SPRenderer) smartBacktick(out *bytes.Buffer, previousChar byte, text []byte) int {
  223. if len(text) >= 2 && text[1] == '`' {
  224. nextChar := byte(0)
  225. if len(text) >= 3 {
  226. nextChar = text[2]
  227. }
  228. if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote, false) {
  229. return 1
  230. }
  231. }
  232. out.WriteByte(text[0])
  233. return 0
  234. }
  235. func (r *SPRenderer) smartNumberGeneric(out *bytes.Buffer, previousChar byte, text []byte) int {
  236. if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
  237. // is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b
  238. // note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8)
  239. // and avoid changing dates like 1/23/2005 into fractions.
  240. numEnd := 0
  241. for len(text) > numEnd && isdigit(text[numEnd]) {
  242. numEnd++
  243. }
  244. if numEnd == 0 {
  245. out.WriteByte(text[0])
  246. return 0
  247. }
  248. denStart := numEnd + 1
  249. if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 {
  250. denStart = numEnd + 3
  251. } else if len(text) < numEnd+2 || text[numEnd] != '/' {
  252. out.WriteByte(text[0])
  253. return 0
  254. }
  255. denEnd := denStart
  256. for len(text) > denEnd && isdigit(text[denEnd]) {
  257. denEnd++
  258. }
  259. if denEnd == denStart {
  260. out.WriteByte(text[0])
  261. return 0
  262. }
  263. if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' {
  264. out.WriteString("<sup>")
  265. out.Write(text[:numEnd])
  266. out.WriteString("</sup>&frasl;<sub>")
  267. out.Write(text[denStart:denEnd])
  268. out.WriteString("</sub>")
  269. return denEnd - 1
  270. }
  271. }
  272. out.WriteByte(text[0])
  273. return 0
  274. }
  275. func (r *SPRenderer) smartNumber(out *bytes.Buffer, previousChar byte, text []byte) int {
  276. if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
  277. if text[0] == '1' && text[1] == '/' && text[2] == '2' {
  278. if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' {
  279. out.WriteString("&frac12;")
  280. return 2
  281. }
  282. }
  283. if text[0] == '1' && text[1] == '/' && text[2] == '4' {
  284. if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') {
  285. out.WriteString("&frac14;")
  286. return 2
  287. }
  288. }
  289. if text[0] == '3' && text[1] == '/' && text[2] == '4' {
  290. if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') {
  291. out.WriteString("&frac34;")
  292. return 2
  293. }
  294. }
  295. }
  296. out.WriteByte(text[0])
  297. return 0
  298. }
  299. func (r *SPRenderer) smartDoubleQuoteVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
  300. nextChar := byte(0)
  301. if len(text) > 1 {
  302. nextChar = text[1]
  303. }
  304. if !smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote, false) {
  305. out.WriteString("&quot;")
  306. }
  307. return 0
  308. }
  309. func (r *SPRenderer) smartDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
  310. return r.smartDoubleQuoteVariant(out, previousChar, text, 'd')
  311. }
  312. func (r *SPRenderer) smartAngledDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
  313. return r.smartDoubleQuoteVariant(out, previousChar, text, 'a')
  314. }
  315. func (r *SPRenderer) smartLeftAngle(out *bytes.Buffer, previousChar byte, text []byte) int {
  316. i := 0
  317. for i < len(text) && text[i] != '>' {
  318. i++
  319. }
  320. out.Write(text[:i+1])
  321. return i
  322. }
  323. type smartCallback func(out *bytes.Buffer, previousChar byte, text []byte) int
  324. // NewSmartypantsRenderer constructs a Smartypants renderer object.
  325. func NewSmartypantsRenderer(flags Flags) *SPRenderer {
  326. var (
  327. r SPRenderer
  328. smartAmpAngled = r.smartAmp(true, false)
  329. smartAmpAngledNBSP = r.smartAmp(true, true)
  330. smartAmpRegular = r.smartAmp(false, false)
  331. smartAmpRegularNBSP = r.smartAmp(false, true)
  332. addNBSP = flags&SmartypantsQuotesNBSP != 0
  333. )
  334. if flags&SmartypantsAngledQuotes == 0 {
  335. r.callbacks['"'] = r.smartDoubleQuote
  336. if !addNBSP {
  337. r.callbacks['&'] = smartAmpRegular
  338. } else {
  339. r.callbacks['&'] = smartAmpRegularNBSP
  340. }
  341. } else {
  342. r.callbacks['"'] = r.smartAngledDoubleQuote
  343. if !addNBSP {
  344. r.callbacks['&'] = smartAmpAngled
  345. } else {
  346. r.callbacks['&'] = smartAmpAngledNBSP
  347. }
  348. }
  349. r.callbacks['\''] = r.smartSingleQuote
  350. r.callbacks['('] = r.smartParens
  351. if flags&SmartypantsDashes != 0 {
  352. if flags&SmartypantsLatexDashes == 0 {
  353. r.callbacks['-'] = r.smartDash
  354. } else {
  355. r.callbacks['-'] = r.smartDashLatex
  356. }
  357. }
  358. r.callbacks['.'] = r.smartPeriod
  359. if flags&SmartypantsFractions == 0 {
  360. r.callbacks['1'] = r.smartNumber
  361. r.callbacks['3'] = r.smartNumber
  362. } else {
  363. for ch := '1'; ch <= '9'; ch++ {
  364. r.callbacks[ch] = r.smartNumberGeneric
  365. }
  366. }
  367. r.callbacks['<'] = r.smartLeftAngle
  368. r.callbacks['`'] = r.smartBacktick
  369. return &r
  370. }
  371. // Process is the entry point of the Smartypants renderer.
  372. func (r *SPRenderer) Process(w io.Writer, text []byte) {
  373. mark := 0
  374. for i := 0; i < len(text); i++ {
  375. if action := r.callbacks[text[i]]; action != nil {
  376. if i > mark {
  377. w.Write(text[mark:i])
  378. }
  379. previousChar := byte(0)
  380. if i > 0 {
  381. previousChar = text[i-1]
  382. }
  383. var tmp bytes.Buffer
  384. i += action(&tmp, previousChar, text[i:])
  385. w.Write(tmp.Bytes())
  386. mark = i + 1
  387. }
  388. }
  389. if mark < len(text) {
  390. w.Write(text[mark:])
  391. }
  392. }