// Package html minifies HTML5 following the specifications at http://www.w3.org/TR/html5/syntax.html. package html import ( "bytes" "io" "github.com/tdewolff/minify/v2" "github.com/tdewolff/parse/v2" "github.com/tdewolff/parse/v2/buffer" "github.com/tdewolff/parse/v2/html" ) var ( gtBytes = []byte(">") isBytes = []byte("=") spaceBytes = []byte(" ") doctypeBytes = []byte("") jsMimeBytes = []byte("application/javascript") cssMimeBytes = []byte("text/css") htmlMimeBytes = []byte("text/html") svgMimeBytes = []byte("image/svg+xml") formMimeBytes = []byte("application/x-www-form-urlencoded") mathMimeBytes = []byte("application/mathml+xml") dataSchemeBytes = []byte("data:") jsSchemeBytes = []byte("javascript:") httpBytes = []byte("http") radioBytes = []byte("radio") onBytes = []byte("on") textBytes = []byte("text") noneBytes = []byte("none") submitBytes = []byte("submit") allBytes = []byte("all") rectBytes = []byte("rect") dataBytes = []byte("data") getBytes = []byte("get") autoBytes = []byte("auto") oneBytes = []byte("one") inlineParams = map[string]string{"inline": "1"} ) //////////////////////////////////////////////////////////////// // Minifier is an HTML minifier. type Minifier struct { KeepComments bool KeepConditionalComments bool KeepDefaultAttrVals bool KeepDocumentTags bool KeepEndTags bool KeepQuotes bool KeepWhitespace bool } // Minify minifies HTML data, it reads from r and writes to w. func Minify(m *minify.M, w io.Writer, r io.Reader, params map[string]string) error { return (&Minifier{}).Minify(m, w, r, params) } // Minify minifies HTML data, it reads from r and writes to w. func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]string) error { var rawTagHash Hash var rawTagMediatype []byte omitSpace := true // if true the next leading space is omitted inPre := false attrMinifyBuffer := buffer.NewWriter(make([]byte, 0, 64)) attrByteBuffer := make([]byte, 0, 64) z := parse.NewInput(r) defer z.Restore() l := html.NewLexer(z) tb := NewTokenBuffer(z, l) for { t := *tb.Shift() switch t.TokenType { case html.ErrorToken: if _, err := w.Write(nil); err != nil { return err } if l.Err() == io.EOF { return nil } return l.Err() case html.DoctypeToken: w.Write(doctypeBytes) case html.CommentToken: if o.KeepComments { w.Write(t.Data) } else if o.KeepConditionalComments && 6 < len(t.Text) && (bytes.HasPrefix(t.Text, []byte("[if ")) || bytes.HasSuffix(t.Text, []byte("[endif]")) || bytes.HasSuffix(t.Text, []byte("[endif]--"))) { // [if ...] is always 7 or more characters, [endif] is only encountered for downlevel-revealed // see https://msdn.microsoft.com/en-us/library/ms537512(v=vs.85).aspx#syntax if bytes.HasPrefix(t.Data, []byte("")) { // downlevel-hidden begin := bytes.IndexByte(t.Data, '>') + 1 end := len(t.Data) - len("") w.Write(t.Data[:begin]) if err := o.Minify(m, w, buffer.NewReader(t.Data[begin:end]), nil); err != nil { return minify.UpdateErrorPosition(err, z, t.Offset) } w.Write(t.Data[end:]) } else { w.Write(t.Data) // downlevel-revealed or short downlevel-hidden } } else if 1 < len(t.Text) && t.Text[0] == '#' { // SSI tags w.Write(t.Data) } case html.SvgToken: if err := m.MinifyMimetype(svgMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil { if err != minify.ErrNotExist { return minify.UpdateErrorPosition(err, z, t.Offset) } w.Write(t.Data) } case html.MathToken: if err := m.MinifyMimetype(mathMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil { if err != minify.ErrNotExist { return minify.UpdateErrorPosition(err, z, t.Offset) } w.Write(t.Data) } case html.TextToken: // CSS and JS minifiers for inline code if rawTagHash != 0 { if rawTagHash == Style || rawTagHash == Script || rawTagHash == Iframe { var mimetype []byte var params map[string]string if rawTagHash == Iframe { mimetype = htmlMimeBytes } else if 0 < len(rawTagMediatype) { mimetype, params = parse.Mediatype(rawTagMediatype) } else if rawTagHash == Script { mimetype = jsMimeBytes } else if rawTagHash == Style { mimetype = cssMimeBytes } if err := m.MinifyMimetype(mimetype, w, buffer.NewReader(t.Data), params); err != nil { if err != minify.ErrNotExist { return minify.UpdateErrorPosition(err, z, t.Offset) } w.Write(t.Data) } } else { w.Write(t.Data) } } else if inPre { w.Write(t.Data) } else { t.Data = parse.ReplaceMultipleWhitespaceAndEntities(t.Data, EntitiesMap, TextRevEntitiesMap) // whitespace removal; trim left if omitSpace && parse.IsWhitespace(t.Data[0]) { t.Data = t.Data[1:] } // whitespace removal; trim right omitSpace = false if len(t.Data) == 0 { omitSpace = true } else if parse.IsWhitespace(t.Data[len(t.Data)-1]) { omitSpace = true i := 0 for { next := tb.Peek(i) // trim if EOF, text token with leading whitespace or block token if next.TokenType == html.ErrorToken { t.Data = t.Data[:len(t.Data)-1] omitSpace = false break } else if next.TokenType == html.TextToken && !parse.IsAllWhitespace(next.Data) { // stop looking when text encountered break } else if next.TokenType == html.StartTagToken || next.TokenType == html.EndTagToken { if o.KeepWhitespace { break } // remove when followed by a block tag if next.Traits&blockTag != 0 { t.Data = t.Data[:len(t.Data)-1] omitSpace = false break } else if next.TokenType == html.StartTagToken { break } } i++ } } w.Write(t.Data) } case html.StartTagToken, html.EndTagToken: rawTagHash = 0 hasAttributes := false if t.TokenType == html.StartTagToken { if next := tb.Peek(0); next.TokenType == html.AttributeToken { hasAttributes = true } if t.Traits&rawTag != 0 { // ignore empty script and style tags if !hasAttributes && (t.Hash == Script || t.Hash == Style) { if next := tb.Peek(1); next.TokenType == html.EndTagToken { tb.Shift() tb.Shift() break } } rawTagHash = t.Hash rawTagMediatype = nil // do not minify content of