123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167 |
- // Package xml minifies XML1.0 following the specifications at http://www.w3.org/TR/xml/.
- package xml
- import (
- "io"
- "github.com/tdewolff/minify/v2"
- "github.com/tdewolff/parse/v2"
- "github.com/tdewolff/parse/v2/xml"
- )
- var (
- isBytes = []byte("=")
- spaceBytes = []byte(" ")
- voidBytes = []byte("/>")
- )
- ////////////////////////////////////////////////////////////////
- // Minifier is an XML minifier.
- type Minifier struct {
- KeepWhitespace bool
- }
- // Minify minifies XML data, it reads from r and writes to w.
- func Minify(m *minify.M, w io.Writer, r io.Reader, params map[string]string) error {
- return (&Minifier{}).Minify(m, w, r, params)
- }
- // Minify minifies XML data, it reads from r and writes to w.
- func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
- omitSpace := true // on true the next text token must not start with a space
- attrByteBuffer := make([]byte, 0, 64)
- z := parse.NewInput(r)
- defer z.Restore()
- l := xml.NewLexer(z)
- tb := NewTokenBuffer(l)
- for {
- t := *tb.Shift()
- if t.TokenType == xml.CDATAToken {
- if len(t.Text) == 0 {
- continue
- }
- if text, useText := xml.EscapeCDATAVal(&attrByteBuffer, t.Text); useText {
- t.TokenType = xml.TextToken
- t.Data = text
- }
- }
- switch t.TokenType {
- case xml.ErrorToken:
- if _, err := w.Write(nil); err != nil {
- return err
- }
- if l.Err() == io.EOF {
- return nil
- }
- return l.Err()
- case xml.DOCTYPEToken:
- w.Write(t.Data)
- case xml.CDATAToken:
- w.Write(t.Data)
- if len(t.Text) > 0 && parse.IsWhitespace(t.Text[len(t.Text)-1]) {
- omitSpace = true
- }
- case xml.TextToken:
- t.Data = parse.ReplaceMultipleWhitespaceAndEntities(t.Data, EntitiesMap, TextRevEntitiesMap)
- // whitespace removal; trim left
- if omitSpace && parse.IsWhitespace(t.Data[0]) {
- t.Data = t.Data[1:]
- }
- // whitespace removal; trim right
- omitSpace = false
- if len(t.Data) == 0 {
- omitSpace = true
- } else if parse.IsWhitespace(t.Data[len(t.Data)-1]) {
- omitSpace = true
- i := 0
- for {
- next := tb.Peek(i)
- // trim if EOF, text token with whitespace begin or block token
- if next.TokenType == xml.ErrorToken {
- t.Data = t.Data[:len(t.Data)-1]
- omitSpace = false
- break
- } else if next.TokenType == xml.TextToken {
- // this only happens when a comment, doctype, cdata startpi tag was in between
- // remove if the text token starts with a whitespace
- if len(next.Data) > 0 && parse.IsWhitespace(next.Data[0]) {
- t.Data = t.Data[:len(t.Data)-1]
- omitSpace = false
- }
- break
- } else if next.TokenType == xml.CDATAToken {
- if len(next.Text) > 0 && parse.IsWhitespace(next.Text[0]) {
- t.Data = t.Data[:len(t.Data)-1]
- omitSpace = false
- }
- break
- } else if next.TokenType == xml.StartTagToken || next.TokenType == xml.EndTagToken {
- if !o.KeepWhitespace {
- t.Data = t.Data[:len(t.Data)-1]
- omitSpace = false
- }
- break
- }
- i++
- }
- }
- w.Write(t.Data)
- case xml.StartTagToken:
- if o.KeepWhitespace {
- omitSpace = false
- }
- w.Write(t.Data)
- case xml.StartTagPIToken:
- w.Write(t.Data)
- case xml.AttributeToken:
- w.Write(spaceBytes)
- w.Write(t.Text)
- w.Write(isBytes)
- if len(t.AttrVal) < 2 || t.AttrVal[0] != '"' || t.AttrVal[len(t.AttrVal)-1] != '"' {
- w.Write(t.AttrVal)
- } else {
- val := t.AttrVal[1 : len(t.AttrVal)-1]
- val = parse.ReplaceEntities(val, EntitiesMap, nil)
- val = xml.EscapeAttrVal(&attrByteBuffer, val) // prefer single or double quotes depending on what occurs more often in value
- w.Write(val)
- }
- case xml.StartTagCloseToken:
- next := tb.Peek(0)
- skipExtra := false
- if next.TokenType == xml.TextToken && parse.IsAllWhitespace(next.Data) {
- next = tb.Peek(1)
- skipExtra = true
- }
- if next.TokenType == xml.EndTagToken {
- // collapse empty tags to single void tag
- tb.Shift()
- if skipExtra {
- tb.Shift()
- }
- w.Write(voidBytes)
- } else {
- w.Write(t.Data)
- }
- case xml.StartTagCloseVoidToken:
- w.Write(t.Data)
- case xml.StartTagClosePIToken:
- w.Write(t.Data)
- case xml.EndTagToken:
- if o.KeepWhitespace {
- omitSpace = false
- }
- if len(t.Data) > 3+len(t.Text) {
- t.Data[2+len(t.Text)] = '>'
- t.Data = t.Data[:3+len(t.Text)]
- }
- w.Write(t.Data)
- }
- }
- }
|