smartypants.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. //
  2. // Blackfriday Markdown Processor
  3. // Available at http://github.com/russross/blackfriday
  4. //
  5. // Copyright © 2011 Russ Ross <russ@russross.com>.
  6. // Distributed under the Simplified BSD License.
  7. // See README.md for details.
  8. //
  9. //
  10. //
  11. // SmartyPants rendering
  12. //
  13. //
  14. package blackfriday
  15. import (
  16. "bytes"
  17. "io"
  18. )
  19. // SPRenderer is a struct containing state of a Smartypants renderer.
  20. type SPRenderer struct {
  21. inSingleQuote bool
  22. inDoubleQuote bool
  23. callbacks [256]smartCallback
  24. }
  25. func wordBoundary(c byte) bool {
  26. return c == 0 || isspace(c) || ispunct(c)
  27. }
  28. func tolower(c byte) byte {
  29. if c >= 'A' && c <= 'Z' {
  30. return c - 'A' + 'a'
  31. }
  32. return c
  33. }
  34. func isdigit(c byte) bool {
  35. return c >= '0' && c <= '9'
  36. }
  37. func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool, addNBSP bool) bool {
  38. // edge of the buffer is likely to be a tag that we don't get to see,
  39. // so we treat it like text sometimes
  40. // enumerate all sixteen possibilities for (previousChar, nextChar)
  41. // each can be one of {0, space, punct, other}
  42. switch {
  43. case previousChar == 0 && nextChar == 0:
  44. // context is not any help here, so toggle
  45. *isOpen = !*isOpen
  46. case isspace(previousChar) && nextChar == 0:
  47. // [ "] might be [ "<code>foo...]
  48. *isOpen = true
  49. case ispunct(previousChar) && nextChar == 0:
  50. // [!"] hmm... could be [Run!"] or [("<code>...]
  51. *isOpen = false
  52. case /* isnormal(previousChar) && */ nextChar == 0:
  53. // [a"] is probably a close
  54. *isOpen = false
  55. case previousChar == 0 && isspace(nextChar):
  56. // [" ] might be [...foo</code>" ]
  57. *isOpen = false
  58. case isspace(previousChar) && isspace(nextChar):
  59. // [ " ] context is not any help here, so toggle
  60. *isOpen = !*isOpen
  61. case ispunct(previousChar) && isspace(nextChar):
  62. // [!" ] is probably a close
  63. *isOpen = false
  64. case /* isnormal(previousChar) && */ isspace(nextChar):
  65. // [a" ] this is one of the easy cases
  66. *isOpen = false
  67. case previousChar == 0 && ispunct(nextChar):
  68. // ["!] hmm... could be ["$1.95] or [</code>"!...]
  69. *isOpen = false
  70. case isspace(previousChar) && ispunct(nextChar):
  71. // [ "!] looks more like [ "$1.95]
  72. *isOpen = true
  73. case ispunct(previousChar) && ispunct(nextChar):
  74. // [!"!] context is not any help here, so toggle
  75. *isOpen = !*isOpen
  76. case /* isnormal(previousChar) && */ ispunct(nextChar):
  77. // [a"!] is probably a close
  78. *isOpen = false
  79. case previousChar == 0 /* && isnormal(nextChar) */ :
  80. // ["a] is probably an open
  81. *isOpen = true
  82. case isspace(previousChar) /* && isnormal(nextChar) */ :
  83. // [ "a] this is one of the easy cases
  84. *isOpen = true
  85. case ispunct(previousChar) /* && isnormal(nextChar) */ :
  86. // [!"a] is probably an open
  87. *isOpen = true
  88. default:
  89. // [a'b] maybe a contraction?
  90. *isOpen = false
  91. }
  92. // Note that with the limited lookahead, this non-breaking
  93. // space will also be appended to single double quotes.
  94. if addNBSP && !*isOpen {
  95. out.WriteString("&nbsp;")
  96. }
  97. out.WriteByte('&')
  98. if *isOpen {
  99. out.WriteByte('l')
  100. } else {
  101. out.WriteByte('r')
  102. }
  103. out.WriteByte(quote)
  104. out.WriteString("quo;")
  105. if addNBSP && *isOpen {
  106. out.WriteString("&nbsp;")
  107. }
  108. return true
  109. }
  110. func (r *SPRenderer) smartSingleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
  111. if len(text) >= 2 {
  112. t1 := tolower(text[1])
  113. if t1 == '\'' {
  114. nextChar := byte(0)
  115. if len(text) >= 3 {
  116. nextChar = text[2]
  117. }
  118. if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote, false) {
  119. return 1
  120. }
  121. }
  122. if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) {
  123. out.WriteString("&rsquo;")
  124. return 0
  125. }
  126. if len(text) >= 3 {
  127. t2 := tolower(text[2])
  128. if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) &&
  129. (len(text) < 4 || wordBoundary(text[3])) {
  130. out.WriteString("&rsquo;")
  131. return 0
  132. }
  133. }
  134. }
  135. nextChar := byte(0)
  136. if len(text) > 1 {
  137. nextChar = text[1]
  138. }
  139. if smartQuoteHelper(out, previousChar, nextChar, 's', &r.inSingleQuote, false) {
  140. return 0
  141. }
  142. out.WriteByte(text[0])
  143. return 0
  144. }
  145. func (r *SPRenderer) smartParens(out *bytes.Buffer, previousChar byte, text []byte) int {
  146. if len(text) >= 3 {
  147. t1 := tolower(text[1])
  148. t2 := tolower(text[2])
  149. if t1 == 'c' && t2 == ')' {
  150. out.WriteString("&copy;")
  151. return 2
  152. }
  153. if t1 == 'r' && t2 == ')' {
  154. out.WriteString("&reg;")
  155. return 2
  156. }
  157. if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' {
  158. out.WriteString("&trade;")
  159. return 3
  160. }
  161. }
  162. out.WriteByte(text[0])
  163. return 0
  164. }
  165. func (r *SPRenderer) smartDash(out *bytes.Buffer, previousChar byte, text []byte) int {
  166. if len(text) >= 2 {
  167. if text[1] == '-' {
  168. out.WriteString("&mdash;")
  169. return 1
  170. }
  171. if wordBoundary(previousChar) && wordBoundary(text[1]) {
  172. out.WriteString("&ndash;")
  173. return 0
  174. }
  175. }
  176. out.WriteByte(text[0])
  177. return 0
  178. }
  179. func (r *SPRenderer) smartDashLatex(out *bytes.Buffer, previousChar byte, text []byte) int {
  180. if len(text) >= 3 && text[1] == '-' && text[2] == '-' {
  181. out.WriteString("&mdash;")
  182. return 2
  183. }
  184. if len(text) >= 2 && text[1] == '-' {
  185. out.WriteString("&ndash;")
  186. return 1
  187. }
  188. out.WriteByte(text[0])
  189. return 0
  190. }
  191. func (r *SPRenderer) smartAmpVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte, addNBSP bool) int {
  192. if bytes.HasPrefix(text, []byte("&quot;")) {
  193. nextChar := byte(0)
  194. if len(text) >= 7 {
  195. nextChar = text[6]
  196. }
  197. if smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote, addNBSP) {
  198. return 5
  199. }
  200. }
  201. if bytes.HasPrefix(text, []byte("&#0;")) {
  202. return 3
  203. }
  204. out.WriteByte('&')
  205. return 0
  206. }
  207. func (r *SPRenderer) smartAmp(angledQuotes, addNBSP bool) func(*bytes.Buffer, byte, []byte) int {
  208. var quote byte = 'd'
  209. if angledQuotes {
  210. quote = 'a'
  211. }
  212. return func(out *bytes.Buffer, previousChar byte, text []byte) int {
  213. return r.smartAmpVariant(out, previousChar, text, quote, addNBSP)
  214. }
  215. }
  216. func (r *SPRenderer) smartPeriod(out *bytes.Buffer, previousChar byte, text []byte) int {
  217. if len(text) >= 3 && text[1] == '.' && text[2] == '.' {
  218. out.WriteString("&hellip;")
  219. return 2
  220. }
  221. if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' {
  222. out.WriteString("&hellip;")
  223. return 4
  224. }
  225. out.WriteByte(text[0])
  226. return 0
  227. }
  228. func (r *SPRenderer) smartBacktick(out *bytes.Buffer, previousChar byte, text []byte) int {
  229. if len(text) >= 2 && text[1] == '`' {
  230. nextChar := byte(0)
  231. if len(text) >= 3 {
  232. nextChar = text[2]
  233. }
  234. if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote, false) {
  235. return 1
  236. }
  237. }
  238. out.WriteByte(text[0])
  239. return 0
  240. }
  241. func (r *SPRenderer) smartNumberGeneric(out *bytes.Buffer, previousChar byte, text []byte) int {
  242. if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
  243. // is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b
  244. // note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8)
  245. // and avoid changing dates like 1/23/2005 into fractions.
  246. numEnd := 0
  247. for len(text) > numEnd && isdigit(text[numEnd]) {
  248. numEnd++
  249. }
  250. if numEnd == 0 {
  251. out.WriteByte(text[0])
  252. return 0
  253. }
  254. denStart := numEnd + 1
  255. if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 {
  256. denStart = numEnd + 3
  257. } else if len(text) < numEnd+2 || text[numEnd] != '/' {
  258. out.WriteByte(text[0])
  259. return 0
  260. }
  261. denEnd := denStart
  262. for len(text) > denEnd && isdigit(text[denEnd]) {
  263. denEnd++
  264. }
  265. if denEnd == denStart {
  266. out.WriteByte(text[0])
  267. return 0
  268. }
  269. if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' {
  270. out.WriteString("<sup>")
  271. out.Write(text[:numEnd])
  272. out.WriteString("</sup>&frasl;<sub>")
  273. out.Write(text[denStart:denEnd])
  274. out.WriteString("</sub>")
  275. return denEnd - 1
  276. }
  277. }
  278. out.WriteByte(text[0])
  279. return 0
  280. }
  281. func (r *SPRenderer) smartNumber(out *bytes.Buffer, previousChar byte, text []byte) int {
  282. if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
  283. if text[0] == '1' && text[1] == '/' && text[2] == '2' {
  284. if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' {
  285. out.WriteString("&frac12;")
  286. return 2
  287. }
  288. }
  289. if text[0] == '1' && text[1] == '/' && text[2] == '4' {
  290. if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') {
  291. out.WriteString("&frac14;")
  292. return 2
  293. }
  294. }
  295. if text[0] == '3' && text[1] == '/' && text[2] == '4' {
  296. if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') {
  297. out.WriteString("&frac34;")
  298. return 2
  299. }
  300. }
  301. }
  302. out.WriteByte(text[0])
  303. return 0
  304. }
  305. func (r *SPRenderer) smartDoubleQuoteVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
  306. nextChar := byte(0)
  307. if len(text) > 1 {
  308. nextChar = text[1]
  309. }
  310. if !smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote, false) {
  311. out.WriteString("&quot;")
  312. }
  313. return 0
  314. }
  315. func (r *SPRenderer) smartDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
  316. return r.smartDoubleQuoteVariant(out, previousChar, text, 'd')
  317. }
  318. func (r *SPRenderer) smartAngledDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
  319. return r.smartDoubleQuoteVariant(out, previousChar, text, 'a')
  320. }
  321. func (r *SPRenderer) smartLeftAngle(out *bytes.Buffer, previousChar byte, text []byte) int {
  322. i := 0
  323. for i < len(text) && text[i] != '>' {
  324. i++
  325. }
  326. out.Write(text[:i+1])
  327. return i
  328. }
  329. type smartCallback func(out *bytes.Buffer, previousChar byte, text []byte) int
  330. // NewSmartypantsRenderer constructs a Smartypants renderer object.
  331. func NewSmartypantsRenderer(flags HTMLFlags) *SPRenderer {
  332. var (
  333. r SPRenderer
  334. smartAmpAngled = r.smartAmp(true, false)
  335. smartAmpAngledNBSP = r.smartAmp(true, true)
  336. smartAmpRegular = r.smartAmp(false, false)
  337. smartAmpRegularNBSP = r.smartAmp(false, true)
  338. addNBSP = flags&SmartypantsQuotesNBSP != 0
  339. )
  340. if flags&SmartypantsAngledQuotes == 0 {
  341. r.callbacks['"'] = r.smartDoubleQuote
  342. if !addNBSP {
  343. r.callbacks['&'] = smartAmpRegular
  344. } else {
  345. r.callbacks['&'] = smartAmpRegularNBSP
  346. }
  347. } else {
  348. r.callbacks['"'] = r.smartAngledDoubleQuote
  349. if !addNBSP {
  350. r.callbacks['&'] = smartAmpAngled
  351. } else {
  352. r.callbacks['&'] = smartAmpAngledNBSP
  353. }
  354. }
  355. r.callbacks['\''] = r.smartSingleQuote
  356. r.callbacks['('] = r.smartParens
  357. if flags&SmartypantsDashes != 0 {
  358. if flags&SmartypantsLatexDashes == 0 {
  359. r.callbacks['-'] = r.smartDash
  360. } else {
  361. r.callbacks['-'] = r.smartDashLatex
  362. }
  363. }
  364. r.callbacks['.'] = r.smartPeriod
  365. if flags&SmartypantsFractions == 0 {
  366. r.callbacks['1'] = r.smartNumber
  367. r.callbacks['3'] = r.smartNumber
  368. } else {
  369. for ch := '1'; ch <= '9'; ch++ {
  370. r.callbacks[ch] = r.smartNumberGeneric
  371. }
  372. }
  373. r.callbacks['<'] = r.smartLeftAngle
  374. r.callbacks['`'] = r.smartBacktick
  375. return &r
  376. }
  377. // Process is the entry point of the Smartypants renderer.
  378. func (r *SPRenderer) Process(w io.Writer, text []byte) {
  379. mark := 0
  380. for i := 0; i < len(text); i++ {
  381. if action := r.callbacks[text[i]]; action != nil {
  382. if i > mark {
  383. w.Write(text[mark:i])
  384. }
  385. previousChar := byte(0)
  386. if i > 0 {
  387. previousChar = text[i-1]
  388. }
  389. var tmp bytes.Buffer
  390. i += action(&tmp, previousChar, text[i:])
  391. w.Write(tmp.Bytes())
  392. mark = i + 1
  393. }
  394. }
  395. if mark < len(text) {
  396. w.Write(text[mark:])
  397. }
  398. }