xml.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. // Package xml minifies XML1.0 following the specifications at http://www.w3.org/TR/xml/.
  2. package xml
  3. import (
  4. "io"
  5. "github.com/tdewolff/minify/v2"
  6. "github.com/tdewolff/parse/v2"
  7. "github.com/tdewolff/parse/v2/xml"
  8. )
  9. var (
  10. isBytes = []byte("=")
  11. spaceBytes = []byte(" ")
  12. voidBytes = []byte("/>")
  13. )
  14. ////////////////////////////////////////////////////////////////
  15. // Minifier is an XML minifier.
  16. type Minifier struct {
  17. KeepWhitespace bool
  18. }
  19. // Minify minifies XML data, it reads from r and writes to w.
  20. func Minify(m *minify.M, w io.Writer, r io.Reader, params map[string]string) error {
  21. return (&Minifier{}).Minify(m, w, r, params)
  22. }
  23. // Minify minifies XML data, it reads from r and writes to w.
  24. func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
  25. omitSpace := true // on true the next text token must not start with a space
  26. attrByteBuffer := make([]byte, 0, 64)
  27. z := parse.NewInput(r)
  28. defer z.Restore()
  29. l := xml.NewLexer(z)
  30. tb := NewTokenBuffer(l)
  31. for {
  32. t := *tb.Shift()
  33. if t.TokenType == xml.CDATAToken {
  34. if len(t.Text) == 0 {
  35. continue
  36. }
  37. if text, useText := xml.EscapeCDATAVal(&attrByteBuffer, t.Text); useText {
  38. t.TokenType = xml.TextToken
  39. t.Data = text
  40. }
  41. }
  42. switch t.TokenType {
  43. case xml.ErrorToken:
  44. if _, err := w.Write(nil); err != nil {
  45. return err
  46. }
  47. if l.Err() == io.EOF {
  48. return nil
  49. }
  50. return l.Err()
  51. case xml.DOCTYPEToken:
  52. w.Write(t.Data)
  53. case xml.CDATAToken:
  54. w.Write(t.Data)
  55. if len(t.Text) > 0 && parse.IsWhitespace(t.Text[len(t.Text)-1]) {
  56. omitSpace = true
  57. }
  58. case xml.TextToken:
  59. t.Data = parse.ReplaceMultipleWhitespaceAndEntities(t.Data, EntitiesMap, TextRevEntitiesMap)
  60. // whitespace removal; trim left
  61. if omitSpace && parse.IsWhitespace(t.Data[0]) {
  62. t.Data = t.Data[1:]
  63. }
  64. // whitespace removal; trim right
  65. omitSpace = false
  66. if len(t.Data) == 0 {
  67. omitSpace = true
  68. } else if parse.IsWhitespace(t.Data[len(t.Data)-1]) {
  69. omitSpace = true
  70. i := 0
  71. for {
  72. next := tb.Peek(i)
  73. // trim if EOF, text token with whitespace begin or block token
  74. if next.TokenType == xml.ErrorToken {
  75. t.Data = t.Data[:len(t.Data)-1]
  76. omitSpace = false
  77. break
  78. } else if next.TokenType == xml.TextToken {
  79. // this only happens when a comment, doctype, cdata startpi tag was in between
  80. // remove if the text token starts with a whitespace
  81. if len(next.Data) > 0 && parse.IsWhitespace(next.Data[0]) {
  82. t.Data = t.Data[:len(t.Data)-1]
  83. omitSpace = false
  84. }
  85. break
  86. } else if next.TokenType == xml.CDATAToken {
  87. if len(next.Text) > 0 && parse.IsWhitespace(next.Text[0]) {
  88. t.Data = t.Data[:len(t.Data)-1]
  89. omitSpace = false
  90. }
  91. break
  92. } else if next.TokenType == xml.StartTagToken || next.TokenType == xml.EndTagToken {
  93. if !o.KeepWhitespace {
  94. t.Data = t.Data[:len(t.Data)-1]
  95. omitSpace = false
  96. }
  97. break
  98. }
  99. i++
  100. }
  101. }
  102. w.Write(t.Data)
  103. case xml.StartTagToken:
  104. if o.KeepWhitespace {
  105. omitSpace = false
  106. }
  107. w.Write(t.Data)
  108. case xml.StartTagPIToken:
  109. w.Write(t.Data)
  110. case xml.AttributeToken:
  111. w.Write(spaceBytes)
  112. w.Write(t.Text)
  113. w.Write(isBytes)
  114. if len(t.AttrVal) < 2 || t.AttrVal[0] != '"' || t.AttrVal[len(t.AttrVal)-1] != '"' {
  115. w.Write(t.AttrVal)
  116. } else {
  117. val := t.AttrVal[1 : len(t.AttrVal)-1]
  118. val = parse.ReplaceEntities(val, EntitiesMap, nil)
  119. val = xml.EscapeAttrVal(&attrByteBuffer, val) // prefer single or double quotes depending on what occurs more often in value
  120. w.Write(val)
  121. }
  122. case xml.StartTagCloseToken:
  123. next := tb.Peek(0)
  124. skipExtra := false
  125. if next.TokenType == xml.TextToken && parse.IsAllWhitespace(next.Data) {
  126. next = tb.Peek(1)
  127. skipExtra = true
  128. }
  129. if next.TokenType == xml.EndTagToken {
  130. // collapse empty tags to single void tag
  131. tb.Shift()
  132. if skipExtra {
  133. tb.Shift()
  134. }
  135. w.Write(voidBytes)
  136. } else {
  137. w.Write(t.Data)
  138. }
  139. case xml.StartTagCloseVoidToken:
  140. w.Write(t.Data)
  141. case xml.StartTagClosePIToken:
  142. w.Write(t.Data)
  143. case xml.EndTagToken:
  144. if o.KeepWhitespace {
  145. omitSpace = false
  146. }
  147. if len(t.Data) > 3+len(t.Text) {
  148. t.Data[2+len(t.Text)] = '>'
  149. t.Data = t.Data[:3+len(t.Text)]
  150. }
  151. w.Write(t.Data)
  152. }
  153. }
  154. }