policy.go 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990
  1. // Copyright (c) 2014, David Kitchen <david@buro9.com>
  2. //
  3. // All rights reserved.
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are met:
  7. //
  8. // * Redistributions of source code must retain the above copyright notice, this
  9. // list of conditions and the following disclaimer.
  10. //
  11. // * Redistributions in binary form must reproduce the above copyright notice,
  12. // this list of conditions and the following disclaimer in the documentation
  13. // and/or other materials provided with the distribution.
  14. //
  15. // * Neither the name of the organisation (Microcosm) nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22. // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  23. // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24. // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  25. // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  26. // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  27. // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. package bluemonday
  30. //TODO sgutzwiller create map of styles to default handlers
  31. //TODO sgutzwiller create handlers for various attributes
  32. import (
  33. "net/url"
  34. "regexp"
  35. "strings"
  36. "github.com/microcosm-cc/bluemonday/css"
  37. )
  38. // Policy encapsulates the allowlist of HTML elements and attributes that will
  39. // be applied to the sanitised HTML.
  40. //
  41. // You should use bluemonday.NewPolicy() to create a blank policy as the
  42. // unexported fields contain maps that need to be initialized.
  43. type Policy struct {
  44. // Declares whether the maps have been initialized, used as a cheap check to
  45. // ensure that those using Policy{} directly won't cause nil pointer
  46. // exceptions
  47. initialized bool
  48. // If true then we add spaces when stripping tags, specifically the closing
  49. // tag is replaced by a space character.
  50. addSpaces bool
  51. // When true, add rel="nofollow" to HTML a, area, and link tags
  52. requireNoFollow bool
  53. // When true, add rel="nofollow" to HTML a, area, and link tags
  54. // Will add for href="http://foo"
  55. // Will skip for href="/foo" or href="foo"
  56. requireNoFollowFullyQualifiedLinks bool
  57. // When true, add rel="noreferrer" to HTML a, area, and link tags
  58. requireNoReferrer bool
  59. // When true, add rel="noreferrer" to HTML a, area, and link tags
  60. // Will add for href="http://foo"
  61. // Will skip for href="/foo" or href="foo"
  62. requireNoReferrerFullyQualifiedLinks bool
  63. // When true, add crossorigin="anonymous" to HTML audio, img, link, script, and video tags
  64. requireCrossOriginAnonymous bool
  65. // When true, add and filter sandbox attribute on iframe tags
  66. requireSandboxOnIFrame map[string]bool
  67. // When true add target="_blank" to fully qualified links
  68. // Will add for href="http://foo"
  69. // Will skip for href="/foo" or href="foo"
  70. addTargetBlankToFullyQualifiedLinks bool
  71. // When true, URLs must be parseable by "net/url" url.Parse()
  72. requireParseableURLs bool
  73. // When true, u, _ := url.Parse("url"); !u.IsAbs() is permitted
  74. allowRelativeURLs bool
  75. // When true, allow data attributes.
  76. allowDataAttributes bool
  77. // When true, allow comments.
  78. allowComments bool
  79. // map[htmlElementName]map[htmlAttributeName][]attrPolicy
  80. elsAndAttrs map[string]map[string][]attrPolicy
  81. // elsMatchingAndAttrs stores regex based element matches along with attributes
  82. elsMatchingAndAttrs map[*regexp.Regexp]map[string][]attrPolicy
  83. // map[htmlAttributeName][]attrPolicy
  84. globalAttrs map[string][]attrPolicy
  85. // map[htmlElementName]map[cssPropertyName][]stylePolicy
  86. elsAndStyles map[string]map[string][]stylePolicy
  87. // map[regex]map[cssPropertyName][]stylePolicy
  88. elsMatchingAndStyles map[*regexp.Regexp]map[string][]stylePolicy
  89. // map[cssPropertyName][]stylePolicy
  90. globalStyles map[string][]stylePolicy
  91. // If urlPolicy is nil, all URLs with matching schema are allowed.
  92. // Otherwise, only the URLs with matching schema and urlPolicy(url)
  93. // returning true are allowed.
  94. allowURLSchemes map[string][]urlPolicy
  95. // These regexps are used to match allowed URL schemes, for example
  96. // if one would want to allow all URL schemes, they would add `.+`.
  97. // However pay attention as this can lead to XSS being rendered thus
  98. // defeating the purpose of using a HTML sanitizer.
  99. // The regexps are only considered if a schema was not explicitly
  100. // handled by `AllowURLSchemes` or `AllowURLSchemeWithCustomPolicy`.
  101. allowURLSchemeRegexps []*regexp.Regexp
  102. // If srcRewriter is not nil, it is used to rewrite the src attribute
  103. // of tags that download resources, such as <img> and <script>.
  104. // It requires that the URL is parsable by "net/url" url.Parse().
  105. srcRewriter urlRewriter
  106. // If an element has had all attributes removed as a result of a policy
  107. // being applied, then the element would be removed from the output.
  108. //
  109. // However some elements are valid and have strong layout meaning without
  110. // any attributes, i.e. <table>. To prevent those being removed we maintain
  111. // a list of elements that are allowed to have no attributes and that will
  112. // be maintained in the output HTML.
  113. setOfElementsAllowedWithoutAttrs map[string]struct{}
  114. // If an element has had all attributes removed as a result of a policy
  115. // being applied, then the element would be removed from the output.
  116. //
  117. // However some elements are valid and have strong layout meaning without
  118. // any attributes, i.e. <table>.
  119. //
  120. // In this case, any element matching a regular expression will be accepted without
  121. // attributes added.
  122. setOfElementsMatchingAllowedWithoutAttrs []*regexp.Regexp
  123. setOfElementsToSkipContent map[string]struct{}
  124. // Permits fundamentally unsafe elements.
  125. //
  126. // If false (default) then elements such as `style` and `script` will not be
  127. // permitted even if declared in a policy. These elements when combined with
  128. // untrusted input cannot be safely handled by bluemonday at this point in
  129. // time.
  130. //
  131. // If true then `style` and `script` would be permitted by bluemonday if a
  132. // policy declares them. However this is not recommended under any circumstance
  133. // and can lead to XSS being rendered thus defeating the purpose of using a
  134. // HTML sanitizer.
  135. allowUnsafe bool
  136. }
  137. type attrPolicy struct {
  138. // optional pattern to match, when not nil the regexp needs to match
  139. // otherwise the attribute is removed
  140. regexp *regexp.Regexp
  141. }
  142. type stylePolicy struct {
  143. // handler to validate
  144. handler func(string) bool
  145. // optional pattern to match, when not nil the regexp needs to match
  146. // otherwise the property is removed
  147. regexp *regexp.Regexp
  148. // optional list of allowed property values, for properties which
  149. // have a defined list of allowed values; property will be removed
  150. // if the value is not allowed
  151. enum []string
  152. }
  153. type attrPolicyBuilder struct {
  154. p *Policy
  155. attrNames []string
  156. regexp *regexp.Regexp
  157. allowEmpty bool
  158. }
  159. type stylePolicyBuilder struct {
  160. p *Policy
  161. propertyNames []string
  162. regexp *regexp.Regexp
  163. enum []string
  164. handler func(string) bool
  165. }
  166. type urlPolicy func(url *url.URL) (allowUrl bool)
  167. type urlRewriter func(*url.URL)
  168. type SandboxValue int64
  169. const (
  170. SandboxAllowDownloads SandboxValue = iota
  171. SandboxAllowDownloadsWithoutUserActivation
  172. SandboxAllowForms
  173. SandboxAllowModals
  174. SandboxAllowOrientationLock
  175. SandboxAllowPointerLock
  176. SandboxAllowPopups
  177. SandboxAllowPopupsToEscapeSandbox
  178. SandboxAllowPresentation
  179. SandboxAllowSameOrigin
  180. SandboxAllowScripts
  181. SandboxAllowStorageAccessByUserActivation
  182. SandboxAllowTopNavigation
  183. SandboxAllowTopNavigationByUserActivation
  184. )
  185. // init initializes the maps if this has not been done already
  186. func (p *Policy) init() {
  187. if !p.initialized {
  188. p.elsAndAttrs = make(map[string]map[string][]attrPolicy)
  189. p.elsMatchingAndAttrs = make(map[*regexp.Regexp]map[string][]attrPolicy)
  190. p.globalAttrs = make(map[string][]attrPolicy)
  191. p.elsAndStyles = make(map[string]map[string][]stylePolicy)
  192. p.elsMatchingAndStyles = make(map[*regexp.Regexp]map[string][]stylePolicy)
  193. p.globalStyles = make(map[string][]stylePolicy)
  194. p.allowURLSchemes = make(map[string][]urlPolicy)
  195. p.allowURLSchemeRegexps = make([]*regexp.Regexp, 0)
  196. p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{})
  197. p.setOfElementsToSkipContent = make(map[string]struct{})
  198. p.initialized = true
  199. }
  200. }
  201. // NewPolicy returns a blank policy with nothing allowed or permitted. This
  202. // is the recommended way to start building a policy and you should now use
  203. // AllowAttrs() and/or AllowElements() to construct the allowlist of HTML
  204. // elements and attributes.
  205. func NewPolicy() *Policy {
  206. p := Policy{}
  207. p.addDefaultElementsWithoutAttrs()
  208. p.addDefaultSkipElementContent()
  209. return &p
  210. }
  211. // AllowAttrs takes a range of HTML attribute names and returns an
  212. // attribute policy builder that allows you to specify the pattern and scope of
  213. // the allowed attribute.
  214. //
  215. // The attribute policy is only added to the core policy when either Globally()
  216. // or OnElements(...) are called.
  217. func (p *Policy) AllowAttrs(attrNames ...string) *attrPolicyBuilder {
  218. p.init()
  219. abp := attrPolicyBuilder{
  220. p: p,
  221. allowEmpty: false,
  222. }
  223. for _, attrName := range attrNames {
  224. abp.attrNames = append(abp.attrNames, strings.ToLower(attrName))
  225. }
  226. return &abp
  227. }
  228. // AllowDataAttributes permits all data attributes. We can't specify the name
  229. // of each attribute exactly as they are customized.
  230. //
  231. // NOTE: These values are not sanitized and applications that evaluate or process
  232. // them without checking and verification of the input may be at risk if this option
  233. // is enabled. This is a 'caveat emptor' option and the person enabling this option
  234. // needs to fully understand the potential impact with regards to whatever application
  235. // will be consuming the sanitized HTML afterwards, i.e. if you know you put a link in a
  236. // data attribute and use that to automatically load some new window then you're giving
  237. // the author of a HTML fragment the means to open a malicious destination automatically.
  238. // Use with care!
  239. func (p *Policy) AllowDataAttributes() {
  240. p.allowDataAttributes = true
  241. }
  242. // AllowComments allows comments.
  243. //
  244. // Please note that only one type of comment will be allowed by this, this is the
  245. // the standard HTML comment <!-- --> which includes the use of that to permit
  246. // conditionals as per https://docs.microsoft.com/en-us/previous-versions/windows/internet-explorer/ie-developer/compatibility/ms537512(v=vs.85)?redirectedfrom=MSDN
  247. //
  248. // What is not permitted are CDATA XML comments, as the x/net/html package we depend
  249. // on does not handle this fully and we are not choosing to take on that work:
  250. // https://pkg.go.dev/golang.org/x/net/html#Tokenizer.AllowCDATA . If the x/net/html
  251. // package changes this then these will be considered, otherwise if you AllowComments
  252. // but provide a CDATA comment, then as per the documentation in x/net/html this will
  253. // be treated as a plain HTML comment.
  254. func (p *Policy) AllowComments() {
  255. p.allowComments = true
  256. }
  257. // AllowNoAttrs says that attributes on element are optional.
  258. //
  259. // The attribute policy is only added to the core policy when OnElements(...)
  260. // are called.
  261. func (p *Policy) AllowNoAttrs() *attrPolicyBuilder {
  262. p.init()
  263. abp := attrPolicyBuilder{
  264. p: p,
  265. allowEmpty: true,
  266. }
  267. return &abp
  268. }
  269. // AllowNoAttrs says that attributes on element are optional.
  270. //
  271. // The attribute policy is only added to the core policy when OnElements(...)
  272. // are called.
  273. func (abp *attrPolicyBuilder) AllowNoAttrs() *attrPolicyBuilder {
  274. abp.allowEmpty = true
  275. return abp
  276. }
  277. // Matching allows a regular expression to be applied to a nascent attribute
  278. // policy, and returns the attribute policy.
  279. func (abp *attrPolicyBuilder) Matching(regex *regexp.Regexp) *attrPolicyBuilder {
  280. abp.regexp = regex
  281. return abp
  282. }
  283. // OnElements will bind an attribute policy to a given range of HTML elements
  284. // and return the updated policy
  285. func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy {
  286. for _, element := range elements {
  287. element = strings.ToLower(element)
  288. for _, attr := range abp.attrNames {
  289. if _, ok := abp.p.elsAndAttrs[element]; !ok {
  290. abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
  291. }
  292. ap := attrPolicy{}
  293. if abp.regexp != nil {
  294. ap.regexp = abp.regexp
  295. }
  296. abp.p.elsAndAttrs[element][attr] = append(abp.p.elsAndAttrs[element][attr], ap)
  297. }
  298. if abp.allowEmpty {
  299. abp.p.setOfElementsAllowedWithoutAttrs[element] = struct{}{}
  300. if _, ok := abp.p.elsAndAttrs[element]; !ok {
  301. abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
  302. }
  303. }
  304. }
  305. return abp.p
  306. }
  307. // OnElementsMatching will bind an attribute policy to all elements matching a given regex
  308. // and return the updated policy
  309. func (abp *attrPolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
  310. for _, attr := range abp.attrNames {
  311. if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
  312. abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
  313. }
  314. ap := attrPolicy{}
  315. if abp.regexp != nil {
  316. ap.regexp = abp.regexp
  317. }
  318. abp.p.elsMatchingAndAttrs[regex][attr] = append(abp.p.elsMatchingAndAttrs[regex][attr], ap)
  319. }
  320. if abp.allowEmpty {
  321. abp.p.setOfElementsMatchingAllowedWithoutAttrs = append(abp.p.setOfElementsMatchingAllowedWithoutAttrs, regex)
  322. if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
  323. abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
  324. }
  325. }
  326. return abp.p
  327. }
  328. // Globally will bind an attribute policy to all HTML elements and return the
  329. // updated policy
  330. func (abp *attrPolicyBuilder) Globally() *Policy {
  331. for _, attr := range abp.attrNames {
  332. if _, ok := abp.p.globalAttrs[attr]; !ok {
  333. abp.p.globalAttrs[attr] = []attrPolicy{}
  334. }
  335. ap := attrPolicy{}
  336. if abp.regexp != nil {
  337. ap.regexp = abp.regexp
  338. }
  339. abp.p.globalAttrs[attr] = append(abp.p.globalAttrs[attr], ap)
  340. }
  341. return abp.p
  342. }
  343. // AllowStyles takes a range of CSS property names and returns a
  344. // style policy builder that allows you to specify the pattern and scope of
  345. // the allowed property.
  346. //
  347. // The style policy is only added to the core policy when either Globally()
  348. // or OnElements(...) are called.
  349. func (p *Policy) AllowStyles(propertyNames ...string) *stylePolicyBuilder {
  350. p.init()
  351. abp := stylePolicyBuilder{
  352. p: p,
  353. }
  354. for _, propertyName := range propertyNames {
  355. abp.propertyNames = append(abp.propertyNames, strings.ToLower(propertyName))
  356. }
  357. return &abp
  358. }
  359. // Matching allows a regular expression to be applied to a nascent style
  360. // policy, and returns the style policy.
  361. func (spb *stylePolicyBuilder) Matching(regex *regexp.Regexp) *stylePolicyBuilder {
  362. spb.regexp = regex
  363. return spb
  364. }
  365. // MatchingEnum allows a list of allowed values to be applied to a nascent style
  366. // policy, and returns the style policy.
  367. func (spb *stylePolicyBuilder) MatchingEnum(enum ...string) *stylePolicyBuilder {
  368. spb.enum = enum
  369. return spb
  370. }
  371. // MatchingHandler allows a handler to be applied to a nascent style
  372. // policy, and returns the style policy.
  373. func (spb *stylePolicyBuilder) MatchingHandler(handler func(string) bool) *stylePolicyBuilder {
  374. spb.handler = handler
  375. return spb
  376. }
  377. // OnElements will bind a style policy to a given range of HTML elements
  378. // and return the updated policy
  379. func (spb *stylePolicyBuilder) OnElements(elements ...string) *Policy {
  380. for _, element := range elements {
  381. element = strings.ToLower(element)
  382. for _, attr := range spb.propertyNames {
  383. if _, ok := spb.p.elsAndStyles[element]; !ok {
  384. spb.p.elsAndStyles[element] = make(map[string][]stylePolicy)
  385. }
  386. sp := stylePolicy{}
  387. if spb.handler != nil {
  388. sp.handler = spb.handler
  389. } else if len(spb.enum) > 0 {
  390. sp.enum = spb.enum
  391. } else if spb.regexp != nil {
  392. sp.regexp = spb.regexp
  393. } else {
  394. sp.handler = css.GetDefaultHandler(attr)
  395. }
  396. spb.p.elsAndStyles[element][attr] = append(spb.p.elsAndStyles[element][attr], sp)
  397. }
  398. }
  399. return spb.p
  400. }
  401. // OnElementsMatching will bind a style policy to any HTML elements matching the pattern
  402. // and return the updated policy
  403. func (spb *stylePolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
  404. for _, attr := range spb.propertyNames {
  405. if _, ok := spb.p.elsMatchingAndStyles[regex]; !ok {
  406. spb.p.elsMatchingAndStyles[regex] = make(map[string][]stylePolicy)
  407. }
  408. sp := stylePolicy{}
  409. if spb.handler != nil {
  410. sp.handler = spb.handler
  411. } else if len(spb.enum) > 0 {
  412. sp.enum = spb.enum
  413. } else if spb.regexp != nil {
  414. sp.regexp = spb.regexp
  415. } else {
  416. sp.handler = css.GetDefaultHandler(attr)
  417. }
  418. spb.p.elsMatchingAndStyles[regex][attr] = append(spb.p.elsMatchingAndStyles[regex][attr], sp)
  419. }
  420. return spb.p
  421. }
  422. // Globally will bind a style policy to all HTML elements and return the
  423. // updated policy
  424. func (spb *stylePolicyBuilder) Globally() *Policy {
  425. for _, attr := range spb.propertyNames {
  426. if _, ok := spb.p.globalStyles[attr]; !ok {
  427. spb.p.globalStyles[attr] = []stylePolicy{}
  428. }
  429. // Use only one strategy for validating styles, fallback to default
  430. sp := stylePolicy{}
  431. if spb.handler != nil {
  432. sp.handler = spb.handler
  433. } else if len(spb.enum) > 0 {
  434. sp.enum = spb.enum
  435. } else if spb.regexp != nil {
  436. sp.regexp = spb.regexp
  437. } else {
  438. sp.handler = css.GetDefaultHandler(attr)
  439. }
  440. spb.p.globalStyles[attr] = append(spb.p.globalStyles[attr], sp)
  441. }
  442. return spb.p
  443. }
  444. // AllowElements will append HTML elements to the allowlist without applying an
  445. // attribute policy to those elements (the elements are permitted
  446. // sans-attributes)
  447. func (p *Policy) AllowElements(names ...string) *Policy {
  448. p.init()
  449. for _, element := range names {
  450. element = strings.ToLower(element)
  451. if _, ok := p.elsAndAttrs[element]; !ok {
  452. p.elsAndAttrs[element] = make(map[string][]attrPolicy)
  453. }
  454. }
  455. return p
  456. }
  457. // AllowElementsMatching will append HTML elements to the allowlist if they
  458. // match a regexp.
  459. func (p *Policy) AllowElementsMatching(regex *regexp.Regexp) *Policy {
  460. p.init()
  461. if _, ok := p.elsMatchingAndAttrs[regex]; !ok {
  462. p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
  463. }
  464. return p
  465. }
  466. // AllowURLSchemesMatching will append URL schemes to the allowlist if they
  467. // match a regexp.
  468. func (p *Policy) AllowURLSchemesMatching(r *regexp.Regexp) *Policy {
  469. p.allowURLSchemeRegexps = append(p.allowURLSchemeRegexps, r)
  470. return p
  471. }
  472. // RewriteSrc will rewrite the src attribute of a resource downloading tag
  473. // (e.g. <img>, <script>, <iframe>) using the provided function.
  474. //
  475. // Typically the use case here is that if the content that we're sanitizing
  476. // is untrusted then the content that is inlined is also untrusted.
  477. // To prevent serving this content on the same domain as the content appears
  478. // on it is good practise to proxy the content through an additional domain
  479. // name as this will force the web client to consider the inline content as
  480. // third party to the main content, thus providing browser isolation around
  481. // the inline content.
  482. //
  483. // An example of this is a web mail provider like fastmail.com , when an
  484. // email (user generated content) is displayed, the email text is shown on
  485. // fastmail.com but the inline attachments and content are rendered from
  486. // fastmailusercontent.com . This proxying of the external content on a
  487. // domain that is different to the content domain forces the browser domain
  488. // security model to kick in. Note that this only applies to differences
  489. // below the suffix (as per the publix suffix list).
  490. //
  491. // This is a good practise to adopt as it prevents the content from being
  492. // able to set cookies on the main domain and thus prevents the content on
  493. // the main domain from being able to read those cookies.
  494. func (p *Policy) RewriteSrc(fn urlRewriter) *Policy {
  495. p.srcRewriter = fn
  496. return p
  497. }
  498. // RequireNoFollowOnLinks will result in all a, area, link tags having a
  499. // rel="nofollow"added to them if one does not already exist
  500. //
  501. // Note: This requires p.RequireParseableURLs(true) and will enable it.
  502. func (p *Policy) RequireNoFollowOnLinks(require bool) *Policy {
  503. p.requireNoFollow = require
  504. p.requireParseableURLs = true
  505. return p
  506. }
  507. // RequireNoFollowOnFullyQualifiedLinks will result in all a, area, and link
  508. // tags that point to a non-local destination (i.e. starts with a protocol and
  509. // has a host) having a rel="nofollow" added to them if one does not already
  510. // exist
  511. //
  512. // Note: This requires p.RequireParseableURLs(true) and will enable it.
  513. func (p *Policy) RequireNoFollowOnFullyQualifiedLinks(require bool) *Policy {
  514. p.requireNoFollowFullyQualifiedLinks = require
  515. p.requireParseableURLs = true
  516. return p
  517. }
  518. // RequireNoReferrerOnLinks will result in all a, area, and link tags having a
  519. // rel="noreferrrer" added to them if one does not already exist
  520. //
  521. // Note: This requires p.RequireParseableURLs(true) and will enable it.
  522. func (p *Policy) RequireNoReferrerOnLinks(require bool) *Policy {
  523. p.requireNoReferrer = require
  524. p.requireParseableURLs = true
  525. return p
  526. }
  527. // RequireNoReferrerOnFullyQualifiedLinks will result in all a, area, and link
  528. // tags that point to a non-local destination (i.e. starts with a protocol and
  529. // has a host) having a rel="noreferrer" added to them if one does not already
  530. // exist
  531. //
  532. // Note: This requires p.RequireParseableURLs(true) and will enable it.
  533. func (p *Policy) RequireNoReferrerOnFullyQualifiedLinks(require bool) *Policy {
  534. p.requireNoReferrerFullyQualifiedLinks = require
  535. p.requireParseableURLs = true
  536. return p
  537. }
  538. // RequireCrossOriginAnonymous will result in all audio, img, link, script, and
  539. // video tags having a crossorigin="anonymous" added to them if one does not
  540. // already exist
  541. func (p *Policy) RequireCrossOriginAnonymous(require bool) *Policy {
  542. p.requireCrossOriginAnonymous = require
  543. return p
  544. }
  545. // AddTargetBlankToFullyQualifiedLinks will result in all a, area and link tags
  546. // that point to a non-local destination (i.e. starts with a protocol and has a
  547. // host) having a target="_blank" added to them if one does not already exist
  548. //
  549. // Note: This requires p.RequireParseableURLs(true) and will enable it.
  550. func (p *Policy) AddTargetBlankToFullyQualifiedLinks(require bool) *Policy {
  551. p.addTargetBlankToFullyQualifiedLinks = require
  552. p.requireParseableURLs = true
  553. return p
  554. }
  555. // RequireParseableURLs will result in all URLs requiring that they be parseable
  556. // by "net/url" url.Parse()
  557. // This applies to:
  558. // - a.href
  559. // - area.href
  560. // - blockquote.cite
  561. // - img.src
  562. // - link.href
  563. // - script.src
  564. func (p *Policy) RequireParseableURLs(require bool) *Policy {
  565. p.requireParseableURLs = require
  566. return p
  567. }
  568. // AllowRelativeURLs enables RequireParseableURLs and then permits URLs that
  569. // are parseable, have no schema information and url.IsAbs() returns false
  570. // This permits local URLs
  571. func (p *Policy) AllowRelativeURLs(require bool) *Policy {
  572. p.RequireParseableURLs(true)
  573. p.allowRelativeURLs = require
  574. return p
  575. }
  576. // AllowURLSchemes will append URL schemes to the allowlist
  577. // Example: p.AllowURLSchemes("mailto", "http", "https")
  578. func (p *Policy) AllowURLSchemes(schemes ...string) *Policy {
  579. p.init()
  580. p.RequireParseableURLs(true)
  581. for _, scheme := range schemes {
  582. scheme = strings.ToLower(scheme)
  583. // Allow all URLs with matching scheme.
  584. p.allowURLSchemes[scheme] = nil
  585. }
  586. return p
  587. }
  588. // AllowURLSchemeWithCustomPolicy will append URL schemes with
  589. // a custom URL policy to the allowlist.
  590. // Only the URLs with matching schema and urlPolicy(url)
  591. // returning true will be allowed.
  592. func (p *Policy) AllowURLSchemeWithCustomPolicy(
  593. scheme string,
  594. urlPolicy func(url *url.URL) (allowUrl bool),
  595. ) *Policy {
  596. p.init()
  597. p.RequireParseableURLs(true)
  598. scheme = strings.ToLower(scheme)
  599. p.allowURLSchemes[scheme] = append(p.allowURLSchemes[scheme], urlPolicy)
  600. return p
  601. }
  602. // RequireSandboxOnIFrame will result in all iframe tags having a sandbox="" tag
  603. // Any sandbox values not specified here will be filtered from the generated HTML
  604. func (p *Policy) RequireSandboxOnIFrame(vals ...SandboxValue) {
  605. p.requireSandboxOnIFrame = make(map[string]bool)
  606. for _, val := range vals {
  607. switch SandboxValue(val) {
  608. case SandboxAllowDownloads:
  609. p.requireSandboxOnIFrame["allow-downloads"] = true
  610. case SandboxAllowDownloadsWithoutUserActivation:
  611. p.requireSandboxOnIFrame["allow-downloads-without-user-activation"] = true
  612. case SandboxAllowForms:
  613. p.requireSandboxOnIFrame["allow-forms"] = true
  614. case SandboxAllowModals:
  615. p.requireSandboxOnIFrame["allow-modals"] = true
  616. case SandboxAllowOrientationLock:
  617. p.requireSandboxOnIFrame["allow-orientation-lock"] = true
  618. case SandboxAllowPointerLock:
  619. p.requireSandboxOnIFrame["allow-pointer-lock"] = true
  620. case SandboxAllowPopups:
  621. p.requireSandboxOnIFrame["allow-popups"] = true
  622. case SandboxAllowPopupsToEscapeSandbox:
  623. p.requireSandboxOnIFrame["allow-popups-to-escape-sandbox"] = true
  624. case SandboxAllowPresentation:
  625. p.requireSandboxOnIFrame["allow-presentation"] = true
  626. case SandboxAllowSameOrigin:
  627. p.requireSandboxOnIFrame["allow-same-origin"] = true
  628. case SandboxAllowScripts:
  629. p.requireSandboxOnIFrame["allow-scripts"] = true
  630. case SandboxAllowStorageAccessByUserActivation:
  631. p.requireSandboxOnIFrame["allow-storage-access-by-user-activation"] = true
  632. case SandboxAllowTopNavigation:
  633. p.requireSandboxOnIFrame["allow-top-navigation"] = true
  634. case SandboxAllowTopNavigationByUserActivation:
  635. p.requireSandboxOnIFrame["allow-top-navigation-by-user-activation"] = true
  636. }
  637. }
  638. }
  639. // AddSpaceWhenStrippingTag states whether to add a single space " " when
  640. // removing tags that are not allowed by the policy.
  641. //
  642. // This is useful if you expect to strip tags in dense markup and may lose the
  643. // value of whitespace.
  644. //
  645. // For example: "<p>Hello</p><p>World</p>"" would be sanitized to "HelloWorld"
  646. // with the default value of false, but you may wish to sanitize this to
  647. // " Hello World " by setting AddSpaceWhenStrippingTag to true as this would
  648. // retain the intent of the text.
  649. func (p *Policy) AddSpaceWhenStrippingTag(allow bool) *Policy {
  650. p.addSpaces = allow
  651. return p
  652. }
  653. // SkipElementsContent adds the HTML elements whose tags is needed to be removed
  654. // with its content.
  655. func (p *Policy) SkipElementsContent(names ...string) *Policy {
  656. p.init()
  657. for _, element := range names {
  658. element = strings.ToLower(element)
  659. if _, ok := p.setOfElementsToSkipContent[element]; !ok {
  660. p.setOfElementsToSkipContent[element] = struct{}{}
  661. }
  662. }
  663. return p
  664. }
  665. // AllowElementsContent marks the HTML elements whose content should be
  666. // retained after removing the tag.
  667. func (p *Policy) AllowElementsContent(names ...string) *Policy {
  668. p.init()
  669. for _, element := range names {
  670. delete(p.setOfElementsToSkipContent, strings.ToLower(element))
  671. }
  672. return p
  673. }
  674. // AllowUnsafe permits fundamentally unsafe elements.
  675. //
  676. // If false (default) then elements such as `style` and `script` will not be
  677. // permitted even if declared in a policy. These elements when combined with
  678. // untrusted input cannot be safely handled by bluemonday at this point in
  679. // time.
  680. //
  681. // If true then `style` and `script` would be permitted by bluemonday if a
  682. // policy declares them. However this is not recommended under any circumstance
  683. // and can lead to XSS being rendered thus defeating the purpose of using a
  684. // HTML sanitizer.
  685. func (p *Policy) AllowUnsafe(allowUnsafe bool) *Policy {
  686. p.init()
  687. p.allowUnsafe = allowUnsafe
  688. return p
  689. }
  690. // addDefaultElementsWithoutAttrs adds the HTML elements that we know are valid
  691. // without any attributes to an internal map.
  692. // i.e. we know that <table> is valid, but <bdo> isn't valid as the "dir" attr
  693. // is mandatory
  694. func (p *Policy) addDefaultElementsWithoutAttrs() {
  695. p.init()
  696. p.setOfElementsAllowedWithoutAttrs["abbr"] = struct{}{}
  697. p.setOfElementsAllowedWithoutAttrs["acronym"] = struct{}{}
  698. p.setOfElementsAllowedWithoutAttrs["address"] = struct{}{}
  699. p.setOfElementsAllowedWithoutAttrs["article"] = struct{}{}
  700. p.setOfElementsAllowedWithoutAttrs["aside"] = struct{}{}
  701. p.setOfElementsAllowedWithoutAttrs["audio"] = struct{}{}
  702. p.setOfElementsAllowedWithoutAttrs["b"] = struct{}{}
  703. p.setOfElementsAllowedWithoutAttrs["bdi"] = struct{}{}
  704. p.setOfElementsAllowedWithoutAttrs["blockquote"] = struct{}{}
  705. p.setOfElementsAllowedWithoutAttrs["body"] = struct{}{}
  706. p.setOfElementsAllowedWithoutAttrs["br"] = struct{}{}
  707. p.setOfElementsAllowedWithoutAttrs["button"] = struct{}{}
  708. p.setOfElementsAllowedWithoutAttrs["canvas"] = struct{}{}
  709. p.setOfElementsAllowedWithoutAttrs["caption"] = struct{}{}
  710. p.setOfElementsAllowedWithoutAttrs["center"] = struct{}{}
  711. p.setOfElementsAllowedWithoutAttrs["cite"] = struct{}{}
  712. p.setOfElementsAllowedWithoutAttrs["code"] = struct{}{}
  713. p.setOfElementsAllowedWithoutAttrs["col"] = struct{}{}
  714. p.setOfElementsAllowedWithoutAttrs["colgroup"] = struct{}{}
  715. p.setOfElementsAllowedWithoutAttrs["datalist"] = struct{}{}
  716. p.setOfElementsAllowedWithoutAttrs["dd"] = struct{}{}
  717. p.setOfElementsAllowedWithoutAttrs["del"] = struct{}{}
  718. p.setOfElementsAllowedWithoutAttrs["details"] = struct{}{}
  719. p.setOfElementsAllowedWithoutAttrs["dfn"] = struct{}{}
  720. p.setOfElementsAllowedWithoutAttrs["div"] = struct{}{}
  721. p.setOfElementsAllowedWithoutAttrs["dl"] = struct{}{}
  722. p.setOfElementsAllowedWithoutAttrs["dt"] = struct{}{}
  723. p.setOfElementsAllowedWithoutAttrs["em"] = struct{}{}
  724. p.setOfElementsAllowedWithoutAttrs["fieldset"] = struct{}{}
  725. p.setOfElementsAllowedWithoutAttrs["figcaption"] = struct{}{}
  726. p.setOfElementsAllowedWithoutAttrs["figure"] = struct{}{}
  727. p.setOfElementsAllowedWithoutAttrs["footer"] = struct{}{}
  728. p.setOfElementsAllowedWithoutAttrs["h1"] = struct{}{}
  729. p.setOfElementsAllowedWithoutAttrs["h2"] = struct{}{}
  730. p.setOfElementsAllowedWithoutAttrs["h3"] = struct{}{}
  731. p.setOfElementsAllowedWithoutAttrs["h4"] = struct{}{}
  732. p.setOfElementsAllowedWithoutAttrs["h5"] = struct{}{}
  733. p.setOfElementsAllowedWithoutAttrs["h6"] = struct{}{}
  734. p.setOfElementsAllowedWithoutAttrs["head"] = struct{}{}
  735. p.setOfElementsAllowedWithoutAttrs["header"] = struct{}{}
  736. p.setOfElementsAllowedWithoutAttrs["hgroup"] = struct{}{}
  737. p.setOfElementsAllowedWithoutAttrs["hr"] = struct{}{}
  738. p.setOfElementsAllowedWithoutAttrs["html"] = struct{}{}
  739. p.setOfElementsAllowedWithoutAttrs["i"] = struct{}{}
  740. p.setOfElementsAllowedWithoutAttrs["ins"] = struct{}{}
  741. p.setOfElementsAllowedWithoutAttrs["kbd"] = struct{}{}
  742. p.setOfElementsAllowedWithoutAttrs["li"] = struct{}{}
  743. p.setOfElementsAllowedWithoutAttrs["mark"] = struct{}{}
  744. p.setOfElementsAllowedWithoutAttrs["marquee"] = struct{}{}
  745. p.setOfElementsAllowedWithoutAttrs["nav"] = struct{}{}
  746. p.setOfElementsAllowedWithoutAttrs["ol"] = struct{}{}
  747. p.setOfElementsAllowedWithoutAttrs["optgroup"] = struct{}{}
  748. p.setOfElementsAllowedWithoutAttrs["option"] = struct{}{}
  749. p.setOfElementsAllowedWithoutAttrs["p"] = struct{}{}
  750. p.setOfElementsAllowedWithoutAttrs["picture"] = struct{}{}
  751. p.setOfElementsAllowedWithoutAttrs["pre"] = struct{}{}
  752. p.setOfElementsAllowedWithoutAttrs["q"] = struct{}{}
  753. p.setOfElementsAllowedWithoutAttrs["rp"] = struct{}{}
  754. p.setOfElementsAllowedWithoutAttrs["rt"] = struct{}{}
  755. p.setOfElementsAllowedWithoutAttrs["ruby"] = struct{}{}
  756. p.setOfElementsAllowedWithoutAttrs["s"] = struct{}{}
  757. p.setOfElementsAllowedWithoutAttrs["samp"] = struct{}{}
  758. p.setOfElementsAllowedWithoutAttrs["script"] = struct{}{}
  759. p.setOfElementsAllowedWithoutAttrs["section"] = struct{}{}
  760. p.setOfElementsAllowedWithoutAttrs["select"] = struct{}{}
  761. p.setOfElementsAllowedWithoutAttrs["small"] = struct{}{}
  762. p.setOfElementsAllowedWithoutAttrs["span"] = struct{}{}
  763. p.setOfElementsAllowedWithoutAttrs["strike"] = struct{}{}
  764. p.setOfElementsAllowedWithoutAttrs["strong"] = struct{}{}
  765. p.setOfElementsAllowedWithoutAttrs["style"] = struct{}{}
  766. p.setOfElementsAllowedWithoutAttrs["sub"] = struct{}{}
  767. p.setOfElementsAllowedWithoutAttrs["summary"] = struct{}{}
  768. p.setOfElementsAllowedWithoutAttrs["sup"] = struct{}{}
  769. p.setOfElementsAllowedWithoutAttrs["svg"] = struct{}{}
  770. p.setOfElementsAllowedWithoutAttrs["table"] = struct{}{}
  771. p.setOfElementsAllowedWithoutAttrs["tbody"] = struct{}{}
  772. p.setOfElementsAllowedWithoutAttrs["td"] = struct{}{}
  773. p.setOfElementsAllowedWithoutAttrs["textarea"] = struct{}{}
  774. p.setOfElementsAllowedWithoutAttrs["tfoot"] = struct{}{}
  775. p.setOfElementsAllowedWithoutAttrs["th"] = struct{}{}
  776. p.setOfElementsAllowedWithoutAttrs["thead"] = struct{}{}
  777. p.setOfElementsAllowedWithoutAttrs["title"] = struct{}{}
  778. p.setOfElementsAllowedWithoutAttrs["time"] = struct{}{}
  779. p.setOfElementsAllowedWithoutAttrs["tr"] = struct{}{}
  780. p.setOfElementsAllowedWithoutAttrs["tt"] = struct{}{}
  781. p.setOfElementsAllowedWithoutAttrs["u"] = struct{}{}
  782. p.setOfElementsAllowedWithoutAttrs["ul"] = struct{}{}
  783. p.setOfElementsAllowedWithoutAttrs["var"] = struct{}{}
  784. p.setOfElementsAllowedWithoutAttrs["video"] = struct{}{}
  785. p.setOfElementsAllowedWithoutAttrs["wbr"] = struct{}{}
  786. }
  787. // addDefaultSkipElementContent adds the HTML elements that we should skip
  788. // rendering the character content of, if the element itself is not allowed.
  789. // This is all character data that the end user would not normally see.
  790. // i.e. if we exclude a <script> tag then we shouldn't render the JavaScript or
  791. // anything else until we encounter the closing </script> tag.
  792. func (p *Policy) addDefaultSkipElementContent() {
  793. p.init()
  794. p.setOfElementsToSkipContent["frame"] = struct{}{}
  795. p.setOfElementsToSkipContent["frameset"] = struct{}{}
  796. p.setOfElementsToSkipContent["iframe"] = struct{}{}
  797. p.setOfElementsToSkipContent["noembed"] = struct{}{}
  798. p.setOfElementsToSkipContent["noframes"] = struct{}{}
  799. p.setOfElementsToSkipContent["noscript"] = struct{}{}
  800. p.setOfElementsToSkipContent["nostyle"] = struct{}{}
  801. p.setOfElementsToSkipContent["object"] = struct{}{}
  802. p.setOfElementsToSkipContent["script"] = struct{}{}
  803. p.setOfElementsToSkipContent["style"] = struct{}{}
  804. p.setOfElementsToSkipContent["title"] = struct{}{}
  805. }